3

ファイルから行のブロックを抽出しているときに、いくつかの問題に直面しています。次の2つのファイルを検討してください

File-1
1.20/abc/this_is_test_1
perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess2
exec perl/RRP/RRP-1.30/JEDI/CommonReq/confAbvExp
perl/LRP/BaseLibs/close-MMM
exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")
this/or/that

File-2
exec 1.20/setup/testird
exec 1.20/sql/temp/Test3
exec 1.20/setup/testxyz
exec 1.20/sql/fondle_opr_sql_labels
exec 1.20/setup/testird
exec 1.20/sql/temp/NEWTest
exec 1.20/setup/testxyz
exec 1.20/sql/fondle_opr_sql_xfer
exec 1.20/setup/testird
exec 1.20/sql/set_sec_not_0
exec 1.20/setup/testpqr
exec 1.20/sql/sql_ba_statuses_on_mult
exec perl/RRP/SetupReq/testdef_ijk
exec perl/RRP/RRP-1.30/JEDI/SetupReq/confAbvExp
exec perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess1
exec perl/RRP/SetupReq/testdef_ijk
exec perl/RRP/RRP-1.30/JEDI/SetupReq/confAbvExp
exec perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess2
exec perl/RRP/SetupReq/testdef_ijk
exec perl/RRP/RRP-1.30/JEDI/SetupReq/confAbvExp
exec perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess3
exec 1.20/setup/testird
exec 1.20/sql/sqlmenu_purr_labl
exec 1.20/sql/est_time_at_non_drp_plc
exec 1.20/sql/half_Brd_Supply_mix_single
exec 1.20/setup/testird
exec 1.20/sql/temp/Test
exec 1.20/setup/testird
exec 1.20/sql/temp/Test2
exec perl/LRP/SetupReq/testird_LRP("LRP")
exec perl/BaseLibs/launch_client("LRP")
exec perl/LRP/LRP-classic-4.14/churrip/chorSingle
exec perl/LRP/BaseLibs/setupLRPMMMTab
exec perl/LRP/BaseLibs/launchMMM
exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl/LRP/LRP-classic-4.14/Corrugator/multipleSeriesWeb
exec perl/BaseLibs/ShutApp("Self Destruction System")
exec perl/LRP/BaseLibs/close-MMM
exec 1.20/setup/testmiddle
exec 1.20/sql/collective_reads
exec 1.20/setup/testinit
exec 1.20/abc/this_is_test_1
exec 1.20/abc/this_is_test_1
exec perl/LRP/SetupReq/abcDEF
exec perl/BaseLibs/launch_client("sqlC","LRP")
exec perl/LRP/LRP-perl-4.20/fireTrigger

次に、File-1のすべての行について、File-2から関連する行のブロックを抽出します。File-2のブロックは次のように定義されます

exec 1.20/setup/xxxxx
blah blah blah
blah blah blah
.
.
.
all lines till next setup line is found

例えば

exec 1.20/setup/testinit
exec 1.20/abc/this_is_test_1
exec 1.20/abc/this_is_test_1

また

exec perl/LRP/SetupReq/xxxxx
blah blah blah
blah blah blah
.
.
.
all lines till next setup line is found

例えば

exec perl/LRP/SetupReq/testird_LRP("LRP")
exec perl/BaseLibs/launch_client("LRP")
exec perl/LRP/LRP-classic-4.14/churrip/chorSingle
exec perl/LRP/BaseLibs/setupLRPMMMTab
exec perl/LRP/BaseLibs/launchMMM
exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl/LRP/LRP-classic-4.14/Corrugator/multipleSeriesWeb
exec perl/BaseLibs/ShutApp("Self Destruction System")
exec perl/LRP/BaseLibs/close-MMM

私はこれまで、次のスクリプトの助けを借りて、File-2から関連するブロックを抽出することができました

Shell Script
#set -x
FLBATCHLIST=$1
BATCHFILE=$2

TEMPDIR="/usr/tmp/tempBatchDir"
rm -rf $TEMPDIR/*

WORKFILE="$TEMPDIR/failedTestList.txt"
CPBATCHFILE="$TEMPDIR/orig.test"
TESTSETFILE="$TEMPDIR/testset.txt"
TEMPFILE="$TEMPDIR/temp.txt"
DIFFFILE="$TEMPDIR/diff.txt"

#Output
FAILEDBATCH="$TEMPDIR/FailedBatch.test"
LOGFILE="$TEMPDIR/log.txt"

createBatch ()
{

TESTNAME=$1
#First process the $CPBATCHFILE to not have any blank lines, leading and trailing whitespaces
# delete BOTH leading and trailing whitespace from each line and blank lines from file
sed -i 's/^[[:space:]]*//;s/[[:space:]]*$//g;/^$/d' $CPBATCHFILE
FOUND=0
STATUS=1
while [ $STATUS -ne "0" ]
do
        if [ ! -s $CPBATCHFILE ]; then
                echo "$CPBATCHFILE is empty" >> $LOGFILE
                STATUS=0
        fi
        awk '/[Ss]etup.*[Tt]est/ || /perl\/[[:alpha:]]*\/[Ss]etup[rR]eq/{if(b) exit; else b=1}1' $CPBATCHFILE > $TESTSETFILE
        grep -i "$TESTNAME$" $TESTSETFILE >> $LOGFILE 2>&1
        if [ $? -eq "0" ]; then
                echo "test found" >> $LOGFILE
                cat $TESTSETFILE >> $FAILEDBATCH
                FOUND=1
        fi
        TSTFLLINES=`wc -l < $TESTSETFILE`
        CPBTCHLINES=`wc -l < $CPBATCHFILE`
        DIFF=`expr $CPBTCHLINES - $TSTFLLINES`
        tail -n $DIFF $CPBATCHFILE > $DIFFFILE
        mv $DIFFFILE $CPBATCHFILE
done

if [ $FOUND -eq 0 ]; then
        echo $TESTNAME > $TEMPDIR/test.txt
        ABSTEST=$(echo $TESTNAME | sed 's/\\//g')
        echo "FATAL ERROR: Test \"$ABSTEST\" not found in batch" | tee -a $LOGFILE
fi

}

####STARTS HERE####
mkdir -p $TEMPDIR
#cat  $TEMPDIR/test.txt
#FLBATCHLIST="$TEMPDIR/test.txt"
# delete run, BOTH leading and trailing whitespace and blank lines from file
sed 's/^[eE][xX][eE][cC]//g;s/^[[:space:]]*//;s/[[:space:]]*$//g;/^$/d' $FLBATCHLIST > $WORKFILE

# escaping special characters like '\' and '.' in the path names for better grepping
sed -i 's/\([\/\.\"]\)/\\\1/g' $WORKFILE

for fltest in $(cat $WORKFILE)
do
        echo $fltest >> $LOGFILE
        cp $BATCHFILE $CPBATCHFILE
        createBatch $fltest
done

sed -i 's/\//\\/g' $FAILEDBATCH
## Clean up
cp $FAILEDBATCH .

このスクリプトの問題は

  1. File-1の各行でFile-2をトラバースするので、少し時間がかかります。File-2を1回トラバースするだけのより良い解決策があるかどうか知りたかったのです。

  2. スクリプトは私の問題を解決しますが、行のブロックが重複しているファイルが残っています。重複する行のブロックを削除する方法があることを知りたかったのです。

これは、スクリプトを実行したときの出力です。

exec 1.20\setup\testinit
exec 1.20\abc\this_is_test_1
exec 1.20\abc\this_is_test_1
exec perl\RRP\SetupReq\testdef_ijk
exec perl\RRP\RRP-1.30\JEDI\SetupReq\confAbvExp
exec perl\RRP\RRP-1.30\JEDI\JEDIExportSuccess2
exec perl\RRP\SetupReq\testdef_ijk
exec perl\RRP\RRP-1.30\JEDI\SetupReq\confAbvExp
exec perl\RRP\RRP-1.30\JEDI\JEDIExportSuccess1
exec perl\RRP\SetupReq\testdef_ijk
exec perl\RRP\RRP-1.30\JEDI\SetupReq\confAbvExp
exec perl\RRP\RRP-1.30\JEDI\JEDIExportSuccess2
exec perl\RRP\SetupReq\testdef_ijk
exec perl\RRP\RRP-1.30\JEDI\SetupReq\confAbvExp
exec perl\RRP\RRP-1.30\JEDI\JEDIExportSuccess3
exec perl\LRP\SetupReq\testird_LRP("LRP")
exec perl\BaseLibs\launch_client("LRP")
exec perl\LRP\LRP-classic-4.14\churrip\chorSingle
exec perl\LRP\BaseLibs\setupLRPMMMTab
exec perl\LRP\BaseLibs\launchMMM
exec perl\LRP\BaseLibs\launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl\LRP\LRP-classic-4.14\Corrugator\multipleSeriesWeb
exec perl\BaseLibs\ShutApp("Self Destruction System")
exec perl\LRP\BaseLibs\close-MMM
exec perl\LRP\SetupReq\testird_LRP("LRP")
exec perl\BaseLibs\launch_client("LRP")
exec perl\LRP\LRP-classic-4.14\churrip\chorSingle
exec perl\LRP\BaseLibs\setupLRPMMMTab
exec perl\LRP\BaseLibs\launchMMM
exec perl\LRP\BaseLibs\launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl\LRP\LRP-classic-4.14\Corrugator\multipleSeriesWeb
exec perl\BaseLibs\ShutApp("Self Destruction System")
exec perl\LRP\BaseLibs\close-MMM

ネットで答えを探してみましたが、自分のニーズに合った答えが見つかりませんでした。

与えられたFile-1とFile-2これが私のスクリプトが出力することを期待するものです(私はFILE-1の各行に期待する出力をリストしました)

For line "1.20/abc/this_is_test_1" in FILE-1
Output
exec 1.20/setup/testinit
exec 1.20/abc/this_is_test_1
exec 1.20/abc/this_is_test_1

For line "perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess2" in FILE-1
Output
exec perl/RRP/SetupReq/testdef_ijk
exec perl/RRP/RRP-1.30/JEDI/SetupReq/confAbvExp
exec perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess2

For line "exec perl/RRP/RRP-1.30/JEDI/CommonReq/confAbvExp" in FILE-1
Output
do nothing as there is no line matching this is in FILE-2

For line "perl/LRP/BaseLibs/close-MMM" in FILE-1
Output
exec perl/LRP/SetupReq/testird_LRP("LRP")
exec perl/BaseLibs/launch_client("LRP")
exec perl/LRP/LRP-classic-4.14/churrip/chorSingle
exec perl/LRP/BaseLibs/setupLRPMMMTab
exec perl/LRP/BaseLibs/launchMMM
exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl/LRP/LRP-classic-4.14/Corrugator/multipleSeriesWeb
exec perl/BaseLibs/ShutApp("Self Destruction System")
exec perl/LRP/BaseLibs/close-MMM    

For line "exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")" in FILE-1
Output
Do nothing as it would generate the same black as line "perl/LRP/BaseLibs/close-MMM" in FILE-1 did

For Line "this/or/that" in FILE-1
Output
Do nothing as there is no line matching this is in FILE-2

したがって、最終的な出力は次のようになります(ブロックの順序は関係ありません)。

exec 1.20/setup/testinit
exec 1.20/abc/this_is_test_1
exec 1.20/abc/this_is_test_1

exec perl/RRP/SetupReq/testdef_ijk
exec perl/RRP/RRP-1.30/JEDI/SetupReq/confAbvExp
exec perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess2

exec perl/LRP/SetupReq/testird_LRP("LRP")
exec perl/BaseLibs/launch_client("LRP")
exec perl/LRP/LRP-classic-4.14/churrip/chorSingle
exec perl/LRP/BaseLibs/setupLRPMMMTab
exec perl/LRP/BaseLibs/launchMMM
exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl/LRP/LRP-classic-4.14/Corrugator/multipleSeriesWeb
exec perl/BaseLibs/ShutApp("Self Destruction System")
exec perl/LRP/BaseLibs/close-MMM

誰かが私にどのように進めるかについてのいくつかの指針を与えることができれば本当に素晴らしいでしょう。そして、はい、私は言及するのを忘れました、これは宿題の質問ではありません:-)。

どうもありがとう

4

3 に答える 3

0

Provided that line order does not matter, you can remove duplicates from a file this way, from th ecommand prompt:

sort filename | uniq

To find which lines are present in both files, I used a perl script that created a hash (or associative array, if you will). Then i scanned through File A, added each line to the hash, using the line as the key, and setting the value to 1. Then i did the same for File A, but setting the value to 2, and if the key already existed, i added 2 instead. The result would be going through each file only once, and at the end i knew that if the key had a value of 1, it only existed in File A, if it had a value of 2, it existed only in File B, and if it had a value of 3, it existed in both.

Edit: I found some perl code laying around from a project, doing exactly what i described above. In this code, i was only after the differences, but it should be easy to modify it to your needs

my %found;
foreach my $item (@qlist) { $found{$item} += 2 };
foreach my $item (@xlist) { $found{$item} += 1 };

foreach my $found (keys(%found))
{
  if    ($found{$found} == 3)
  {
    # It's in both files. Not doing anything.
  }
  elsif ($found{$found} == 2)
  {
    print "$found found in the QC-list, but not the x-list.\n";
  }
  elsif ($found{$found} == 1)
  {
    print "$found found in the x-list, but not the QC-list.\n";
  }
}
于 2012-12-04T22:36:58.983 に答える
0

The following assumes that the "setup" line is unique for each block. We use this line as a key to an associative array which keeps track of which blocks we have already printed.

The first line of script reads the first file into a variable called regex which collects the lines we want to match on from the first file (the idiom NR==FNR means when the line number of the current file is equal to the line number of all files collected, that is, it is true only when we are reading the first file from the argument list). The rest of the script is fairly straightforward, I should hope.

awk 'NR==FNR { gsub(/\//,"\\/"); regex = regex sep $0; sep = "|" ; next}
    /[Ss]etup/ { label = $0; printing = 0; collected = nl = "" }
    { collected = collected nl $0; nl=RS }
    $0 ~ regex { if(!printed[label]) {
        printed[label] = printing = 1; print collected } }
    printing { print }' File-1 File-2

If the "setup" line is not necessarily unique, maybe you can use the value of collected as a key.

This should (I hope) be robust against multiple lines from File-1 matching the same block in File-2.

I know I hinted at a sed solution in a comment but this turned out to be the sort of problem where awk felt more natural. Of course, it could be done in Perl or Python or what have you just as well.

于 2012-12-06T12:36:43.763 に答える
0

Thanks @tripleee and @Jarmund for your suggestions. From your inputs I was finally able to figure out solution of my problem. I got hint from associative arrays to make unique key for each block, so here is what i did

  • take file-2 and convert each block into single line

    awk '/[Ss]etup.[Tt]est/ || /perl/[[:alpha:]]/[Ss]etup[Rr]eq/{if(b) exit; else b=1}1' file-2 > $TESTSETFILE cat $TESTSETFILE | sed ':a;N;$!ba;s/\n//g;s/ //g' >> $SINGLELINEFILE

  • Now each line in this file is a unique entry

  • after this i use grep for each line in File-1 and find respective block(which is converted into single line) now
  • Then i use awk or sort -u to find unique entries in my solution file

Maybe this solution is not best but it is lot faster than the previous one.

Here is my new script

FLBATCHLIST=$1
BATCHFILE=$2

TEMPDIR="./tempBatchdir"
rm -rf $TEMPDIR/*
WORKFILE="$TEMPDIR/failedTestList.txt"
CPBATCHFILE="$TEMPDIR/orig.test"
TESTSETFILE="$TEMPDIR/testset.txt"
DIFFFILE="$TEMPDIR/diff.txt"
SINGLELINEFILE="$TEMPDIR/singleline.txt"
TEMPFILE="$TEMPDIR/temp.txt"
#Output
FAILEDBATCH="$TEMPDIR/FailedBatch.test"
LOGFILE="$TEMPDIR/log.txt"

convertSingleLine ()
{
sed -i 's/^[[:space:]]*//;s/[[:space:]]*$//g;/^$/d' $CPBATCHFILE
STATUS=1
while [ $STATUS -ne "0" ]
do
        if [ ! -s $CPBATCHFILE ]; then
                echo "$CPBATCHFILE is empty" >> $LOGFILE
                STATUS=0
        fi
        awk '/[Ss]etup.*[Tt]est/ || /perl\/[[:alpha:]]*\/[Ss]etup[Rr]eq/{if(b) exit; else b=1}1' $CPBATCHFILE > $TESTSETFILE
        cat $TESTSETFILE | sed ':a;N;$!ba;s/\n//g;s/ //g' >> $SINGLELINEFILE
        echo "**" >> $SINGLELINEFILE
        TSTFLLINES=`wc -l < $TESTSETFILE`
        CPBTCHLINES=`wc -l < $CPBATCHFILE`
        DIFF=`expr $CPBTCHLINES - $TSTFLLINES`
        tail -n $DIFF $CPBATCHFILE > $DIFFFILE
        mv $DIFFFILE $CPBATCHFILE
done
}

####STARTS HERE####
mkdir -p $TEMPDIR

sed 's/^[eE][xX][eE][cC]//g;s/^[[:space:]]*//;s/[[:space:]]*$//g;/^$/d' $FLBATCHLIST > $WORKFILE
sed -i 's/\([\/\.\"]\)/\\\1/g' $WORKFILE

cp $BATCHFILE $CPBATCHFILE
convertSingleLine

for fltest in $(cat $WORKFILE)
do
        echo $fltest >> $LOGFILE
        grep "$fltest" $SINGLELINEFILE >> $FAILEDBATCH
        if [ $? -eq "0" ]; then
                echo "TEST FOUND" >> $LOGFILE
        else
                ABSTEST=$(echo $fltest | sed 's/\\//g')
                echo "FATAL ERROR: Test \"$ABSTEST\" not found in $BATCHFILE" | tee -a $LOGFILE
        fi
done

awk '!x[$0]++' $FAILEDBATCH > $TEMPFILE
mv $TEMPFILE $FAILEDBATCH

sed -i "s/exec/\\nexec /g;s/#/\\n#/g" $FAILEDBATCH
sed -i '1d;s/\//\\/g' $FAILEDBATCH

Here is the output

$ crflbatch file-1 file-2
FATAL ERROR: Test "perl/RRP/RRP-1.30/JEDI/CommonReq/confAbvExp" not found in file-2
FATAL ERROR: Test "this/or/that" not found in file-2

$ cat tempBatchdir/FailedBatch.test
exec 1.20\setup\testinit
exec 1.20\abc\this_is_test_1
exec 1.20\abc\this_is_test_1

exec perl\RRP\SetupReq\testdef_ijk
exec perl\RRP\RRP-1.30\JEDI\SetupReq\confAbvExp
exec perl\RRP\RRP-1.30\JEDI\JEDIExportSuccess2

exec perl\LRP\SetupReq\testird_LRP("LRP")
exec perl\BaseLibs\launch_client("LRP")
exec perl\LRP\LRP-classic-4.14\churrip\chorSingle
exec perl\LRP\BaseLibs\setupLRPMMMTab
exec perl\LRP\BaseLibs\launchMMM
exec perl\LRP\BaseLibs\launchLRPCHURRTA("TYRE")
#PAUSEExpandChurriptreeview&openallnodes
exec perl\LRP\LRP-classic-4.14\Corrugator\multipleSeriesWeb
exec perl\BaseLibs\ShutApp("SelfDestructionSystem")
exec perl\LRP\BaseLibs\close-MMM
$
于 2012-12-11T03:57:38.930 に答える