Awk
Revision as of 13:48, 10 October 2011 by Ekaanbj (talk | contribs) (→Print lines where first column value is bigger than 10)
basic
awk 'BEGIN {initializations} search_pattern1 {actions} search_pattern2 {actions} ... END {final actions}' file
useful
Remove 2 first lines of file.
awk 'NR > 2' /tmp/tmp
Get media information. Pick encoded date, replace : with -, print row 6 and 7 and add .mov. Find just once.
mediainfo *.mov | awk '/Encoded date/ { gsub(/:/,"-") ; print $6 "_" $7 ".mov" ;exit }'
remove xml comments
awk -v RS='<!--|-->' 'NR%2' file
print something between tags
awk -F'[<|>]' '/jndi/{print $3}'
The following simple commands are used most often.
awk 'pattern' file # print lines matching pattern awk '{action}' file # take action for every line awk 'pattern {action}' file
$0, $1, $2,... Field variables FILENAME Name of input file FS Field separator, by default the space character. Can be modified. NF Number of fields in the current record NR Number of the current record (line number) OFMT Output format (default: "%.6g") RS Record Separator ORS Output Record Separator. By default, the output and input records separators are a carriage return stored in the built-in variables ORS and RS, respectively.
print lines containing cat, dog and bird.
awk '/cat/ && /dog/ && /bird/' files
Print rows where first column doesnt contains ABC
awk '$1 !~ /ABC/' file # print lines whose 1st field doesn't
Print row 5 and 9.
awk 'NR==5 || NR==9' "file"
Print row betwen 5 and 9.
awk 'NR>=5&&NR<=9' "file"
Print column 4 to the end of passwd. Field separator set to :
cat /etc/passwd | awk ' BEGIN { FS=":" } { for (i=4; i<=NF; i++) printf("%s ", $i) printf("\n") # CR at end of line } '
Summarize size of files.
find . -type f -printf '%s\n' | awk '{ a+=$1 } END { print a }'
Summarize on 5 column. MB
awk '{ SUM += $5} END { print SUM/1024/1024 }'
Summarize on 5 column. GB
awk '{ SUM += $5} END { print SUM/1024/1024/1024 }'
Print lines containing T2 in second column.
awk '$2 ~ /T2/' file
Print second column where 1 columnt contains nodev
awk '$1 == "nodev" { print $2 }'
Print lines where first column value is bigger than 10
awk '$1 > 10'
Print fields matching regexp in 5 column delimiter being ,
awk -F',' '$5 ~ /^10.[0-9]*.21.[0-9]*/'
Print lines where 4 column is empty
awk 'BEGIN {FS=","} $4=="" {print}'
awk one liners
FILE SPACING: # double space a file awk ‘1;{print “”}’ awk ‘BEGIN{ORS=”\n\n”};1′ # double space a file which already has blank lines in it. Output file # should contain no more than one blank line between lines of text. # NOTE: On Unix systems, DOS lines which have only CRLF (\r\n) are # often treated as non-blank, and thus ‘NF’ alone will return TRUE. awk ‘NF{print $0 “\n”}’ # triple space a file awk ‘1;{print “\n”}’ NUMBERING AND CALCULATIONS: # precede each line by its line number FOR THAT FILE (left alignment). # Using a tab (\t) instead of space will preserve margins. awk ‘{print FNR “\t” $0}’ files* # precede each line by its line number FOR ALL FILES TOGETHER, with tab. awk ‘{print NR “\t” $0}’ files* # number each line of a file (number on left, right-aligned) # Double the percent signs if typing from the DOS command prompt. awk ‘{printf(”%5d : %s\n”, NR,$0)}’ # number each line of file, but only print numbers if line is not blank # Remember caveats about Unix treatment of \r (mentioned above) awk ‘NF{$0=++a ” :” $0};{print}’ awk ‘{print (NF? ++a ” :” :”") $0}’ # count lines (emulates “wc -l”) awk ‘END{print NR}’ # print the sums of the fields of every line awk ‘{s=0; for (i=1; i max {max=$1; maxline=$0}; END{ print max, maxline}’ # print the number of fields in each line, followed by the line awk ‘{ print NF “:” $0 } ‘ # print the last field of each line awk ‘{ print $NF }’ # print the last field of the last line awk ‘{ field = $NF }; END{ print field }’ # print every line with more than 4 fields awk ‘NF > 4′ # print every line where the value of the last field is > 4 awk ‘$NF > 4′ TEXT CONVERSION AND SUBSTITUTION: # IN UNIX ENVIRONMENT: convert DOS newlines (CR/LF) to Unix format awk ‘{sub(/\r$/,”");print}’ # assumes EACH line ends with Ctrl-M # IN UNIX ENVIRONMENT: convert Unix newlines (LF) to DOS format awk ‘{sub(/$/,”\r”);print} # IN DOS ENVIRONMENT: convert Unix newlines (LF) to DOS format awk 1 # IN DOS ENVIRONMENT: convert DOS newlines (CR/LF) to Unix format # Cannot be done with DOS versions of awk, other than gawk: gawk -v BINMODE=”w” ‘1′ infile >outfile # Use “tr” instead. tr -d \r outfile # GNU tr version 1.22 or higher # delete leading whitespace (spaces, tabs) from front of each line # aligns all text flush left awk ‘{sub(/^[ \t]+/, “”); print}’ # delete trailing whitespace (spaces, tabs) from end of each line awk ‘{sub(/[ \t]+$/, “”);print}’ # delete BOTH leading and trailing whitespace from each line awk ‘{gsub(/^[ \t]+|[ \t]+$/,”");print}’ awk ‘{$1=$1;print}’ # also removes extra space between fields # insert 5 blank spaces at beginning of each line (make page offset) awk ‘{sub(/^/, ” “);print}’ # align all text flush right on a 79-column width awk ‘{printf “%79s\n”, $0}’ file* # center all text on a 79-character width awk ‘{l=length();s=int((79-l)/2); printf “%”(s+l)”s\n”,$0}’ file* # substitute (find and replace) “foo” with “bar” on each line awk ‘{sub(/foo/,”bar”);print}’ # replaces only 1st instance gawk ‘{$0=gensub(/foo/,”bar”,4);print}’ # replaces only 4th instance awk ‘{gsub(/foo/,”bar”);print}’ # replaces ALL instances in a line # substitute “foo” with “bar” ONLY for lines which contain “baz” awk ‘/baz/{gsub(/foo/, “bar”)};{print}’ # substitute “foo” with “bar” EXCEPT for lines which contain “baz” awk ‘!/baz/{gsub(/foo/, “bar”)};{print}’ # change “scarlet” or “ruby” or “puce” to “red” awk ‘{gsub(/scarlet|ruby|puce/, “red”); print}’ # reverse order of lines (emulates “tac”) awk ‘{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j–] }’ file* # if a line ends with a backslash, append the next line to it # (fails if there are multiple lines ending with backslash…) awk ‘/\\$/ {sub(/\\$/,”"); getline t; print $0 t; next}; 1′ file* # print and sort the login names of all users awk -F “:” ‘{ print $1 | “sort” }’ /etc/passwd # print the first 2 fields, in opposite order, of every line awk ‘{print $2, $1}’ file # switch the first 2 fields of every line awk ‘{temp = $1; $1 = $2; $2 = temp}’ file # print every line, deleting the second field of that line awk ‘{ $2 = “”; print }’ # print in reverse order the fields of every line awk ‘{for (i=NF; i>0; i–) printf(”%s “,i);printf (”\n”)}’ file # remove duplicate, consecutive lines (emulates “uniq”) awk ‘a !~ $0; {a=$0}’ # remove duplicate, nonconsecutive lines awk ‘! a[$0]++’ # most concise script awk ‘!($0 in a) {a[$0];print}’ # most efficient script # concatenate every 5 lines of input, using a comma separator # between fields awk ‘ORS=%NR%5?”,”:”\n”‘ file SELECTIVE PRINTING OF CERTAIN LINES: # print first 10 lines of file (emulates behavior of “head”) awk ‘NR 1{exit};1′ # print the last 2 lines of a file (emulates “tail -2″) awk ‘{y=x “\n” $0; x=$0};END{print y}’ # print the last line of a file (emulates “tail -1″) awk ‘END{print}’ # print only lines which match regular expression (emulates “grep”) awk ‘/regex/’ # print only lines which do NOT match regex (emulates “grep -v”) awk ‘!/regex/’ # print the line immediately before a regex, but not the line # containing the regex awk ‘/regex/{print x};{x=$0}’ awk ‘/regex/{print (x==”" ? “match on line 1″ : x)};{x=$0}’ # print the line immediately after a regex, but not the line # containing the regex awk ‘/regex/{getline;print}’ # grep for AAA and BBB and CCC (in any order) awk ‘/AAA/; /BBB/; /CCC/’ # grep for AAA and BBB and CCC (in that order) awk ‘/AAA.*BBB.*CCC/’ # print only lines of 65 characters or longer awk ‘length > 64′ # print only lines of less than 65 characters awk ‘length # print section of file from regular expression to end of file awk '/regex/,0' awk '/regex/,EOF' # print section of file based on line numbers (lines 8-12, inclusive) awk 'NR==8,NR==12' # print line number 52 awk 'NR==52' awk 'NR==52 {print;exit}' # more efficient on large files # print section of file between two regular expressions (inclusive) awk '/Iowa/,/Montana/' # case sensitive SELECTIVE DELETION OF CERTAIN LINES: # delete ALL blank lines from a file (same as "grep '.' ") awk NF awk '/./'