#Text Manipulation

Master the power tools for processing text: grep, sed, and awk.

#📋 Table of Contents


#grep - Pattern Matching

#Basic Usage

bash
1# Search for pattern
2grep "error" /var/log/syslog
3
4# Case insensitive
5grep -i "error" /var/log/syslog
6
7# Recursive search
8grep -r "TODO" ./src/
9
10# Show line numbers
11grep -n "error" logfile.txt
12
13# Show context
14grep -B 2 -A 2 "error" logfile.txt  # 2 lines before/after
15grep -C 3 "error" logfile.txt        # 3 lines context
16
17# Count matches
18grep -c "error" logfile.txt
19
20# Invert match (lines NOT matching)
21grep -v "debug" logfile.txt
22
23# Match whole word
24grep -w "error" logfile.txt
25
26# Multiple patterns
27grep -E "error|warning|critical" logfile.txt
28egrep "error|warning" logfile.txt

#Regular Expressions

bash
1# Start of line
2grep "^ERROR" logfile.txt
3
4# End of line
5grep "failed$" logfile.txt
6
7# Any character
8grep "err.r" logfile.txt
9
10# Zero or more
11grep "err*" logfile.txt
12
13# IP address pattern
14grep -E "\b[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\b" logfile.txt
15
16# Email pattern
17grep -E "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}" file.txt

#ripgrep (Modern Alternative)

bash
1# Install ripgrep
2sudo apt install ripgrep
3
4# Search recursively (faster than grep)
5rg "pattern" ./src/
6
7# Ignore files
8rg "pattern" --ignore-file .gitignore
9
10# File type filter
11rg "function" --type py
12rg "class" -t js

#sed - Stream Editor

#Basic Substitution

bash
1# Replace first occurrence per line
2sed 's/old/new/' file.txt
3
4# Replace all occurrences per line
5sed 's/old/new/g' file.txt
6
7# Replace in place
8sed -i 's/old/new/g' file.txt
9
10# Replace with backup
11sed -i.bak 's/old/new/g' file.txt
12
13# Case insensitive
14sed 's/old/new/gi' file.txt

#Advanced sed

bash
1# Delete lines matching pattern
2sed '/pattern/d' file.txt
3
4# Delete blank lines
5sed '/^$/d' file.txt
6
7# Delete lines 5-10
8sed '5,10d' file.txt
9
10# Print only matching lines
11sed -n '/pattern/p' file.txt
12
13# Multiple substitutions
14sed 's/foo/bar/g; s/baz/qux/g' file.txt
15sed -e 's/foo/bar/' -e 's/baz/qux/' file.txt
16
17# Replace in specific lines
18sed '10s/old/new/' file.txt        # Line 10 only
19sed '10,20s/old/new/' file.txt     # Lines 10-20
20
21# Insert line before match
22sed '/pattern/i\New line before' file.txt
23
24# Insert line after match
25sed '/pattern/a\New line after' file.txt
26
27# Replace between patterns
28sed '/start/,/end/s/old/new/g' file.txt

#Practical Examples

bash
1# Remove trailing whitespace
2sed 's/[[:space:]]*$//' file.txt
3
4# Convert DOS to Unix line endings
5sed 's/\r$//' file.txt
6
7# Extract value from config
8sed -n 's/^port=//p' config.txt
9
10# Change config value
11sed -i 's/^port=.*/port=8080/' config.txt

#awk - Text Processing

#Basic Usage

bash
1# Print specific columns
2awk '{print $1}' file.txt           # First column
3awk '{print $1, $3}' file.txt       # First and third
4awk '{print $NF}' file.txt          # Last column
5
6# Custom delimiter
7awk -F ':' '{print $1}' /etc/passwd
8awk -F ',' '{print $2}' data.csv
9
10# Print with formatting
11awk '{printf "%-20s %s\n", $1, $2}' file.txt

#Patterns and Conditions

bash
1# Match pattern
2awk '/error/ {print}' logfile.txt
3
4# Condition
5awk '$3 > 100 {print $1, $3}' data.txt
6
7# NR = line number
8awk 'NR > 1 {print}' file.txt       # Skip header
9awk 'NR == 5 {print}' file.txt      # Print line 5
10awk 'NR >= 10 && NR <= 20' file.txt # Lines 10-20
11
12# NF = number of fields
13awk 'NF > 0 {print}' file.txt       # Non-empty lines

#Calculations

bash
1# Sum column
2awk '{sum += $2} END {print sum}' data.txt
3
4# Average
5awk '{sum += $2; count++} END {print sum/count}' data.txt
6
7# Max value
8awk 'BEGIN {max=0} $2 > max {max=$2} END {print max}' data.txt
9
10# Count occurrences
11awk '{count[$1]++} END {for (key in count) print key, count[key]}' logfile.txt

#Practical Examples

bash
1# Disk usage by directory
2df -h | awk 'NR>1 {print $5, $6}'
3
4# Process memory usage
5ps aux | awk '{sum += $4} END {print "Total Memory: " sum "%"}'
6
7# Apache log analysis
8awk '{print $1}' access.log | sort | uniq -c | sort -rn | head -10
9
10# Count HTTP status codes
11awk '{count[$9]++} END {for (code in count) print code, count[code]}' access.log

#Other Useful Tools

#cut

bash
1# Cut by delimiter
2cut -d ':' -f 1 /etc/passwd
3
4# Cut by character position
5cut -c 1-10 file.txt

#sort and uniq

bash
1# Sort alphabetically
2sort file.txt
3
4# Sort numerically
5sort -n numbers.txt
6
7# Sort by column
8sort -k 2 file.txt
9
10# Reverse sort
11sort -r file.txt
12
13# Unique lines
14sort file.txt | uniq
15
16# Count unique
17sort file.txt | uniq -c

#tr

bash
1# Replace characters
2tr 'a-z' 'A-Z' < file.txt    # Lowercase to uppercase
3
4# Delete characters
5tr -d '[:space:]' < file.txt  # Remove whitespace
6
7# Squeeze repeats
8tr -s ' ' < file.txt          # Multiple spaces to single

#wc

bash
1# Count lines, words, characters
2wc file.txt
3
4# Lines only
5wc -l file.txt
6
7# Words only
8wc -w file.txt

#Summary

TaskCommand
Searchgrep "pattern" file
Replacesed 's/old/new/g' file
Extract columnsawk '{print $1}' file
Sortsort file
Countwc -l file

[!TIP] Pro Tip: Combine these tools with pipes for powerful one-liners:

bash
cat access.log | awk '{print $1}' | sort | uniq -c | sort -rn | head -10