Add file name for xargs and awk output

I have a directory full of .txt files, each of which has two columns and many rows (> 10000). For each of these files, I try to find the maximum value in the second column and print the corresponding entry in columns 1 and 2 to the output file. For this, I have an awk working team.

find ./ -name "*.txt" | xargs -I FILE awk '{if(max<$2){max=$2;datum=$1}}END{print datum, max}' FILE >> out.txt

However, I would also like to print the name of the corresponding input file with each pair of numbers. The result will look something like this:

file1.txt datum1 max1
file2.txt datum2 max2

To do this, I tried to draw inspiration from this similar question: add the file name to the beginning of the file using find and sed , but I could not get a working solution. My best efforts so far look something like this.

find ./ -name "*.txt" | xargs -I FILE echo FILE | awk '{if(max<$2){max=$2;datum=$1}}END{print datum, max}' FILE >> out.txt

but I get the error:

awk:
1

, , , :
(1)

find ./ -name "*.txt" | xargs -I FILE -c "echo FILE ; awk '{if(max<$2){max=$2;datum=$1}}END{print datum, max}' FILE" >> out.txt  

(2)

find ./ -name "*.txt" -exec sh -c "echo {} && awk '{if(max<$2){max=$2;datum=$1}}END{print datum, max}' {}" \; >> out.txt

, (xargs exec - ), .

+4
3

.txt , (GNU awk):

awk '{if(max=="" || max<$2+0){max=$2;datum=$1}}ENDFILE{print FILENAME, datum, max; max=""}' *.txt

, .txt, :

find . -name '*.txt' -exec awk '{if(max=="" || max<$2+0){max=$2;datum=$1}}ENDFILE{print FILENAME, datum, max; max=""}' {} +

find -exec, xargs .

  • {if(max=="" || max<$2+0){max=$2;datum=$1}}

    2 1.

  • ENDFILE{print FILENAME, datum, max; max=""}

    1 2 2.

    , max reset .

:

$ cat file1.txt
1       1
2       2
$ cat file2.txt
3       12
5       14
4       13
$ cat file3.txt
1       0
2       1

:

$ awk '{if(max=="" || max<$2+0){max=$2;datum=$1}}ENDFILE{print FILENAME, datum, max; max=""}' *.txt
file1.txt 2 2
file2.txt 5 14
file3.txt 2 1

BSD awk

ENDFILE, :

$ awk 'FNR==1 && NR>1{print f, datum, max; max=""} max=="" || max<$2+0{max=$2;datum=$1;f=FILENAME} END{print f, datum, max}' *.txt
file1.txt 2 2
file2.txt 5 14
file3.txt 2 1

awk- , .

  • FNR==1 && NR>1{print f, datum, max; max=""}

    , , .

    awk, FNR - , NR - , . FNR==1 && NR>1, , , .

  • max=="" || max<$2+0{max=$2;datum=$1;f=FILENAME}

    , 2 1. f.

  • END{print f, datum, max}

    , , .

+2

10 000 100 000 , , awk , , 10 000 :

find . -name \*.txt -exec awk ....

, iMac 5 .

, , Intel -, Apple , GNU Parallel.

, , - , 4 8 Mac, , :

parallel --tag -q awk 'BEGIN{max=$2;d=$1} $2>max {max=$2;d=$1} END{print d,max}' ::: *.txt 

1 22 , 4 , - ! ... , awk , 10 000 awks, , 8 . , 8 awk, . , GNU Parallel , , -X:

parallel -X -q gawk 'BEGINFILE{max=$2;d=$1} $2>max {max=$2;d=$1} ENDFILE{print FILENAME,d,max}' ::: *.txt 

49 , , gawk ENDFILE/BEGINFILE, --tag, awk , .


GNU Parallel gawk Mac homebrew. - . macOS , , .

homebrew GNU Parallel

brew install parallel

gawk

brew install gawk

, , GNU Parallel - Perl script, macOS - Perl . , :

(wget -O - pi.dk/3 || curl pi.dk/3/ ) | bash

, 25 , 262 144 , , . , stdin :

find . -name \*.txt -print0 | parallel -0 -X -q gawk 'BEGINFILE{max=$2;d=$1} $2>max {max=$2;d=$1} ENDFILE{print FILENAME,d,max}'
+2
find . -name '*.txt' | xargs -n 1 -I FILE awk '(FNR==1) || (max<$2){max=$2;datum=$1} END{print FILENAME, datum, max}' FILE >> out.txt

find . -name '*.txt' -exec awk '(FNR==1) || (max<$2){max=$2;datum=$1} END{print FILENAME, datum, max}' {} \; >> out.txt

(edited by OP for typos)

+1
source

Source: https://habr.com/ru/post/1691862/


All Articles