1*cb77f0d6SKamil Rytarowski#!/usr/bin/env perl 26e019b00SH. Peter Anvin# 36e019b00SH. Peter Anvin# Clean a patch file -- or directory of patch files -- of stealth whitespace. 46e019b00SH. Peter Anvin# WARNING: this can be a highly destructive operation. Use with caution. 56e019b00SH. Peter Anvin# 66e019b00SH. Peter Anvin 7*cb77f0d6SKamil Rytarowskiuse warnings; 86e019b00SH. Peter Anvinuse bytes; 96e019b00SH. Peter Anvinuse File::Basename; 106e019b00SH. Peter Anvin 11cb3ed5b7SH. Peter Anvin# Default options 12cb3ed5b7SH. Peter Anvin$max_width = 79; 13cb3ed5b7SH. Peter Anvin 146e019b00SH. Peter Anvin# Clean up space-tab sequences, either by removing spaces or 156e019b00SH. Peter Anvin# replacing them with tabs. 166e019b00SH. Peter Anvinsub clean_space_tabs($) 176e019b00SH. Peter Anvin{ 186e019b00SH. Peter Anvin no bytes; # Tab alignment depends on characters 196e019b00SH. Peter Anvin 206e019b00SH. Peter Anvin my($li) = @_; 216e019b00SH. Peter Anvin my($lo) = ''; 226e019b00SH. Peter Anvin my $pos = 0; 236e019b00SH. Peter Anvin my $nsp = 0; 246e019b00SH. Peter Anvin my($i, $c); 256e019b00SH. Peter Anvin 266e019b00SH. Peter Anvin for ($i = 0; $i < length($li); $i++) { 276e019b00SH. Peter Anvin $c = substr($li, $i, 1); 286e019b00SH. Peter Anvin if ($c eq "\t") { 296e019b00SH. Peter Anvin my $npos = ($pos+$nsp+8) & ~7; 306e019b00SH. Peter Anvin my $ntab = ($npos >> 3) - ($pos >> 3); 316e019b00SH. Peter Anvin $lo .= "\t" x $ntab; 326e019b00SH. Peter Anvin $pos = $npos; 336e019b00SH. Peter Anvin $nsp = 0; 346e019b00SH. Peter Anvin } elsif ($c eq "\n" || $c eq "\r") { 356e019b00SH. Peter Anvin $lo .= " " x $nsp; 366e019b00SH. Peter Anvin $pos += $nsp; 376e019b00SH. Peter Anvin $nsp = 0; 386e019b00SH. Peter Anvin $lo .= $c; 396e019b00SH. Peter Anvin $pos = 0; 406e019b00SH. Peter Anvin } elsif ($c eq " ") { 416e019b00SH. Peter Anvin $nsp++; 426e019b00SH. Peter Anvin } else { 436e019b00SH. Peter Anvin $lo .= " " x $nsp; 446e019b00SH. Peter Anvin $pos += $nsp; 456e019b00SH. Peter Anvin $nsp = 0; 466e019b00SH. Peter Anvin $lo .= $c; 476e019b00SH. Peter Anvin $pos++; 486e019b00SH. Peter Anvin } 496e019b00SH. Peter Anvin } 506e019b00SH. Peter Anvin $lo .= " " x $nsp; 516e019b00SH. Peter Anvin return $lo; 526e019b00SH. Peter Anvin} 536e019b00SH. Peter Anvin 54cb3ed5b7SH. Peter Anvin# Compute the visual width of a string 55cb3ed5b7SH. Peter Anvinsub strwidth($) { 56cb3ed5b7SH. Peter Anvin no bytes; # Tab alignment depends on characters 57cb3ed5b7SH. Peter Anvin 58cb3ed5b7SH. Peter Anvin my($li) = @_; 59cb3ed5b7SH. Peter Anvin my($c, $i); 60cb3ed5b7SH. Peter Anvin my $pos = 0; 61cb3ed5b7SH. Peter Anvin my $mlen = 0; 62cb3ed5b7SH. Peter Anvin 63cb3ed5b7SH. Peter Anvin for ($i = 0; $i < length($li); $i++) { 64cb3ed5b7SH. Peter Anvin $c = substr($li,$i,1); 65cb3ed5b7SH. Peter Anvin if ($c eq "\t") { 66cb3ed5b7SH. Peter Anvin $pos = ($pos+8) & ~7; 67cb3ed5b7SH. Peter Anvin } elsif ($c eq "\n") { 68cb3ed5b7SH. Peter Anvin $mlen = $pos if ($pos > $mlen); 69cb3ed5b7SH. Peter Anvin $pos = 0; 70cb3ed5b7SH. Peter Anvin } else { 71cb3ed5b7SH. Peter Anvin $pos++; 72cb3ed5b7SH. Peter Anvin } 73cb3ed5b7SH. Peter Anvin } 74cb3ed5b7SH. Peter Anvin 75cb3ed5b7SH. Peter Anvin $mlen = $pos if ($pos > $mlen); 76cb3ed5b7SH. Peter Anvin return $mlen; 77cb3ed5b7SH. Peter Anvin} 78cb3ed5b7SH. Peter Anvin 796e019b00SH. Peter Anvin$name = basename($0); 806e019b00SH. Peter Anvin 81cb3ed5b7SH. Peter Anvin@files = (); 82cb3ed5b7SH. Peter Anvin 83cb3ed5b7SH. Peter Anvinwhile (defined($a = shift(@ARGV))) { 84cb3ed5b7SH. Peter Anvin if ($a =~ /^-/) { 85cb3ed5b7SH. Peter Anvin if ($a eq '-width' || $a eq '-w') { 86cb3ed5b7SH. Peter Anvin $max_width = shift(@ARGV)+0; 87cb3ed5b7SH. Peter Anvin } else { 88cb3ed5b7SH. Peter Anvin print STDERR "Usage: $name [-width #] files...\n"; 89cb3ed5b7SH. Peter Anvin exit 1; 90cb3ed5b7SH. Peter Anvin } 91cb3ed5b7SH. Peter Anvin } else { 92cb3ed5b7SH. Peter Anvin push(@files, $a); 93cb3ed5b7SH. Peter Anvin } 94cb3ed5b7SH. Peter Anvin} 95cb3ed5b7SH. Peter Anvin 96cb3ed5b7SH. Peter Anvinforeach $f ( @files ) { 976e019b00SH. Peter Anvin print STDERR "$name: $f\n"; 986e019b00SH. Peter Anvin 996e019b00SH. Peter Anvin if (! -f $f) { 1006e019b00SH. Peter Anvin print STDERR "$f: not a file\n"; 1016e019b00SH. Peter Anvin next; 1026e019b00SH. Peter Anvin } 1036e019b00SH. Peter Anvin 1046e019b00SH. Peter Anvin if (!open(FILE, '+<', $f)) { 1056e019b00SH. Peter Anvin print STDERR "$name: Cannot open file: $f: $!\n"; 1066e019b00SH. Peter Anvin next; 1076e019b00SH. Peter Anvin } 1086e019b00SH. Peter Anvin 1096e019b00SH. Peter Anvin binmode FILE; 1106e019b00SH. Peter Anvin 1116e019b00SH. Peter Anvin # First, verify that it is not a binary file; consider any file 1126e019b00SH. Peter Anvin # with a zero byte to be a binary file. Is there any better, or 1136e019b00SH. Peter Anvin # additional, heuristic that should be applied? 1146e019b00SH. Peter Anvin $is_binary = 0; 1156e019b00SH. Peter Anvin 1166e019b00SH. Peter Anvin while (read(FILE, $data, 65536) > 0) { 1176e019b00SH. Peter Anvin if ($data =~ /\0/) { 1186e019b00SH. Peter Anvin $is_binary = 1; 1196e019b00SH. Peter Anvin last; 1206e019b00SH. Peter Anvin } 1216e019b00SH. Peter Anvin } 1226e019b00SH. Peter Anvin 1236e019b00SH. Peter Anvin if ($is_binary) { 1246e019b00SH. Peter Anvin print STDERR "$name: $f: binary file\n"; 1256e019b00SH. Peter Anvin next; 1266e019b00SH. Peter Anvin } 1276e019b00SH. Peter Anvin 1286e019b00SH. Peter Anvin seek(FILE, 0, 0); 1296e019b00SH. Peter Anvin 1306e019b00SH. Peter Anvin $in_bytes = 0; 1316e019b00SH. Peter Anvin $out_bytes = 0; 132cb3ed5b7SH. Peter Anvin $lineno = 0; 1336e019b00SH. Peter Anvin 1346e019b00SH. Peter Anvin @lines = (); 1356e019b00SH. Peter Anvin 1366e019b00SH. Peter Anvin $in_hunk = 0; 1376e019b00SH. Peter Anvin $err = 0; 1386e019b00SH. Peter Anvin 1396e019b00SH. Peter Anvin while ( defined($line = <FILE>) ) { 140cb3ed5b7SH. Peter Anvin $lineno++; 1416e019b00SH. Peter Anvin $in_bytes += length($line); 1426e019b00SH. Peter Anvin 1436e019b00SH. Peter Anvin if (!$in_hunk) { 144cb3ed5b7SH. Peter Anvin if ($line =~ 145cb3ed5b7SH. Peter Anvin /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) { 1466e019b00SH. Peter Anvin $minus_lines = $2; 1476e019b00SH. Peter Anvin $plus_lines = $4; 1486e019b00SH. Peter Anvin if ($minus_lines || $plus_lines) { 1496e019b00SH. Peter Anvin $in_hunk = 1; 1506e019b00SH. Peter Anvin @hunk_lines = ($line); 1516e019b00SH. Peter Anvin } 1526e019b00SH. Peter Anvin } else { 1536e019b00SH. Peter Anvin push(@lines, $line); 1546e019b00SH. Peter Anvin $out_bytes += length($line); 1556e019b00SH. Peter Anvin } 1566e019b00SH. Peter Anvin } else { 1576e019b00SH. Peter Anvin # We're in a hunk 1586e019b00SH. Peter Anvin 1596e019b00SH. Peter Anvin if ($line =~ /^\+/) { 1606e019b00SH. Peter Anvin $plus_lines--; 1616e019b00SH. Peter Anvin 1626e019b00SH. Peter Anvin $text = substr($line, 1); 1636e019b00SH. Peter Anvin $text =~ s/[ \t\r]*$//; # Remove trailing spaces 1646e019b00SH. Peter Anvin $text = clean_space_tabs($text); 1656e019b00SH. Peter Anvin 166cb3ed5b7SH. Peter Anvin $l_width = strwidth($text); 167cb3ed5b7SH. Peter Anvin if ($max_width && $l_width > $max_width) { 168cb3ed5b7SH. Peter Anvin print STDERR 169cb3ed5b7SH. Peter Anvin "$f:$lineno: adds line exceeds $max_width ", 170cb3ed5b7SH. Peter Anvin "characters ($l_width)\n"; 171cb3ed5b7SH. Peter Anvin } 172cb3ed5b7SH. Peter Anvin 1736e019b00SH. Peter Anvin push(@hunk_lines, '+'.$text); 1746e019b00SH. Peter Anvin } elsif ($line =~ /^\-/) { 1756e019b00SH. Peter Anvin $minus_lines--; 1766e019b00SH. Peter Anvin push(@hunk_lines, $line); 1776e019b00SH. Peter Anvin } elsif ($line =~ /^ /) { 1786e019b00SH. Peter Anvin $plus_lines--; 1796e019b00SH. Peter Anvin $minus_lines--; 1806e019b00SH. Peter Anvin push(@hunk_lines, $line); 1816e019b00SH. Peter Anvin } else { 1826e019b00SH. Peter Anvin print STDERR "$name: $f: malformed patch\n"; 1836e019b00SH. Peter Anvin $err = 1; 1846e019b00SH. Peter Anvin last; 1856e019b00SH. Peter Anvin } 1866e019b00SH. Peter Anvin 1876e019b00SH. Peter Anvin if ($plus_lines < 0 || $minus_lines < 0) { 1886e019b00SH. Peter Anvin print STDERR "$name: $f: malformed patch\n"; 1896e019b00SH. Peter Anvin $err = 1; 1906e019b00SH. Peter Anvin last; 1916e019b00SH. Peter Anvin } elsif ($plus_lines == 0 && $minus_lines == 0) { 1926e019b00SH. Peter Anvin # End of a hunk. Process this hunk. 1936e019b00SH. Peter Anvin my $i; 1946e019b00SH. Peter Anvin my $l; 1956e019b00SH. Peter Anvin my @h = (); 1966e019b00SH. Peter Anvin my $adj = 0; 1976e019b00SH. Peter Anvin my $done = 0; 1986e019b00SH. Peter Anvin 1996e019b00SH. Peter Anvin for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) { 2006e019b00SH. Peter Anvin $l = $hunk_lines[$i]; 2016e019b00SH. Peter Anvin if (!$done && $l eq "+\n") { 2026e019b00SH. Peter Anvin $adj++; # Skip this line 2036e019b00SH. Peter Anvin } elsif ($l =~ /^[ +]/) { 2046e019b00SH. Peter Anvin $done = 1; 2056e019b00SH. Peter Anvin unshift(@h, $l); 2066e019b00SH. Peter Anvin } else { 2076e019b00SH. Peter Anvin unshift(@h, $l); 2086e019b00SH. Peter Anvin } 2096e019b00SH. Peter Anvin } 2106e019b00SH. Peter Anvin 2116e019b00SH. Peter Anvin $l = $hunk_lines[0]; # Hunk header 2126e019b00SH. Peter Anvin undef @hunk_lines; # Free memory 2136e019b00SH. Peter Anvin 2146e019b00SH. Peter Anvin if ($adj) { 2156e019b00SH. Peter Anvin die unless 2166e019b00SH. Peter Anvin ($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/); 2176e019b00SH. Peter Anvin my $mstart = $1; 2186e019b00SH. Peter Anvin my $mlin = $2; 2196e019b00SH. Peter Anvin my $pstart = $3; 2206e019b00SH. Peter Anvin my $plin = $4; 2216e019b00SH. Peter Anvin my $tail = $5; # doesn't include the final newline 2226e019b00SH. Peter Anvin 2236e019b00SH. Peter Anvin $l = sprintf("@@ -%d,%d +%d,%d @@%s\n", 2246e019b00SH. Peter Anvin $mstart, $mlin, $pstart, $plin-$adj, 2256e019b00SH. Peter Anvin $tail); 2266e019b00SH. Peter Anvin } 2276e019b00SH. Peter Anvin unshift(@h, $l); 2286e019b00SH. Peter Anvin 2296e019b00SH. Peter Anvin # Transfer to the output array 2306e019b00SH. Peter Anvin foreach $l (@h) { 2316e019b00SH. Peter Anvin $out_bytes += length($l); 2326e019b00SH. Peter Anvin push(@lines, $l); 2336e019b00SH. Peter Anvin } 2346e019b00SH. Peter Anvin 2356e019b00SH. Peter Anvin $in_hunk = 0; 2366e019b00SH. Peter Anvin } 2376e019b00SH. Peter Anvin } 2386e019b00SH. Peter Anvin } 2396e019b00SH. Peter Anvin 2406e019b00SH. Peter Anvin if ($in_hunk) { 2416e019b00SH. Peter Anvin print STDERR "$name: $f: malformed patch\n"; 2426e019b00SH. Peter Anvin $err = 1; 2436e019b00SH. Peter Anvin } 2446e019b00SH. Peter Anvin 2456e019b00SH. Peter Anvin if (!$err) { 2466e019b00SH. Peter Anvin if ($in_bytes != $out_bytes) { 2476e019b00SH. Peter Anvin # Only write to the file if changed 2486e019b00SH. Peter Anvin seek(FILE, 0, 0); 2496e019b00SH. Peter Anvin print FILE @lines; 2506e019b00SH. Peter Anvin 2516e019b00SH. Peter Anvin if ( !defined($where = tell(FILE)) || 2526e019b00SH. Peter Anvin !truncate(FILE, $where) ) { 2536e019b00SH. Peter Anvin die "$name: Failed to truncate modified file: $f: $!\n"; 2546e019b00SH. Peter Anvin } 2556e019b00SH. Peter Anvin } 2566e019b00SH. Peter Anvin } 2576e019b00SH. Peter Anvin 2586e019b00SH. Peter Anvin close(FILE); 2596e019b00SH. Peter Anvin} 260