16e019b00SH. Peter Anvin#!/usr/bin/perl -w 26e019b00SH. Peter Anvin# 36e019b00SH. Peter Anvin# Clean a patch file -- or directory of patch files -- of stealth whitespace. 46e019b00SH. Peter Anvin# WARNING: this can be a highly destructive operation. Use with caution. 56e019b00SH. Peter Anvin# 66e019b00SH. Peter Anvin 76e019b00SH. Peter Anvinuse bytes; 86e019b00SH. Peter Anvinuse File::Basename; 96e019b00SH. Peter Anvin 10*cb3ed5b7SH. Peter Anvin# Default options 11*cb3ed5b7SH. Peter Anvin$max_width = 79; 12*cb3ed5b7SH. Peter Anvin 136e019b00SH. Peter Anvin# Clean up space-tab sequences, either by removing spaces or 146e019b00SH. Peter Anvin# replacing them with tabs. 156e019b00SH. Peter Anvinsub clean_space_tabs($) 166e019b00SH. Peter Anvin{ 176e019b00SH. Peter Anvin no bytes; # Tab alignment depends on characters 186e019b00SH. Peter Anvin 196e019b00SH. Peter Anvin my($li) = @_; 206e019b00SH. Peter Anvin my($lo) = ''; 216e019b00SH. Peter Anvin my $pos = 0; 226e019b00SH. Peter Anvin my $nsp = 0; 236e019b00SH. Peter Anvin my($i, $c); 246e019b00SH. Peter Anvin 256e019b00SH. Peter Anvin for ($i = 0; $i < length($li); $i++) { 266e019b00SH. Peter Anvin $c = substr($li, $i, 1); 276e019b00SH. Peter Anvin if ($c eq "\t") { 286e019b00SH. Peter Anvin my $npos = ($pos+$nsp+8) & ~7; 296e019b00SH. Peter Anvin my $ntab = ($npos >> 3) - ($pos >> 3); 306e019b00SH. Peter Anvin $lo .= "\t" x $ntab; 316e019b00SH. Peter Anvin $pos = $npos; 326e019b00SH. Peter Anvin $nsp = 0; 336e019b00SH. Peter Anvin } elsif ($c eq "\n" || $c eq "\r") { 346e019b00SH. Peter Anvin $lo .= " " x $nsp; 356e019b00SH. Peter Anvin $pos += $nsp; 366e019b00SH. Peter Anvin $nsp = 0; 376e019b00SH. Peter Anvin $lo .= $c; 386e019b00SH. Peter Anvin $pos = 0; 396e019b00SH. Peter Anvin } elsif ($c eq " ") { 406e019b00SH. Peter Anvin $nsp++; 416e019b00SH. Peter Anvin } else { 426e019b00SH. Peter Anvin $lo .= " " x $nsp; 436e019b00SH. Peter Anvin $pos += $nsp; 446e019b00SH. Peter Anvin $nsp = 0; 456e019b00SH. Peter Anvin $lo .= $c; 466e019b00SH. Peter Anvin $pos++; 476e019b00SH. Peter Anvin } 486e019b00SH. Peter Anvin } 496e019b00SH. Peter Anvin $lo .= " " x $nsp; 506e019b00SH. Peter Anvin return $lo; 516e019b00SH. Peter Anvin} 526e019b00SH. Peter Anvin 53*cb3ed5b7SH. Peter Anvin# Compute the visual width of a string 54*cb3ed5b7SH. Peter Anvinsub strwidth($) { 55*cb3ed5b7SH. Peter Anvin no bytes; # Tab alignment depends on characters 56*cb3ed5b7SH. Peter Anvin 57*cb3ed5b7SH. Peter Anvin my($li) = @_; 58*cb3ed5b7SH. Peter Anvin my($c, $i); 59*cb3ed5b7SH. Peter Anvin my $pos = 0; 60*cb3ed5b7SH. Peter Anvin my $mlen = 0; 61*cb3ed5b7SH. Peter Anvin 62*cb3ed5b7SH. Peter Anvin for ($i = 0; $i < length($li); $i++) { 63*cb3ed5b7SH. Peter Anvin $c = substr($li,$i,1); 64*cb3ed5b7SH. Peter Anvin if ($c eq "\t") { 65*cb3ed5b7SH. Peter Anvin $pos = ($pos+8) & ~7; 66*cb3ed5b7SH. Peter Anvin } elsif ($c eq "\n") { 67*cb3ed5b7SH. Peter Anvin $mlen = $pos if ($pos > $mlen); 68*cb3ed5b7SH. Peter Anvin $pos = 0; 69*cb3ed5b7SH. Peter Anvin } else { 70*cb3ed5b7SH. Peter Anvin $pos++; 71*cb3ed5b7SH. Peter Anvin } 72*cb3ed5b7SH. Peter Anvin } 73*cb3ed5b7SH. Peter Anvin 74*cb3ed5b7SH. Peter Anvin $mlen = $pos if ($pos > $mlen); 75*cb3ed5b7SH. Peter Anvin return $mlen; 76*cb3ed5b7SH. Peter Anvin} 77*cb3ed5b7SH. Peter Anvin 786e019b00SH. Peter Anvin$name = basename($0); 796e019b00SH. Peter Anvin 80*cb3ed5b7SH. Peter Anvin@files = (); 81*cb3ed5b7SH. Peter Anvin 82*cb3ed5b7SH. Peter Anvinwhile (defined($a = shift(@ARGV))) { 83*cb3ed5b7SH. Peter Anvin if ($a =~ /^-/) { 84*cb3ed5b7SH. Peter Anvin if ($a eq '-width' || $a eq '-w') { 85*cb3ed5b7SH. Peter Anvin $max_width = shift(@ARGV)+0; 86*cb3ed5b7SH. Peter Anvin } else { 87*cb3ed5b7SH. Peter Anvin print STDERR "Usage: $name [-width #] files...\n"; 88*cb3ed5b7SH. Peter Anvin exit 1; 89*cb3ed5b7SH. Peter Anvin } 90*cb3ed5b7SH. Peter Anvin } else { 91*cb3ed5b7SH. Peter Anvin push(@files, $a); 92*cb3ed5b7SH. Peter Anvin } 93*cb3ed5b7SH. Peter Anvin} 94*cb3ed5b7SH. Peter Anvin 95*cb3ed5b7SH. Peter Anvinforeach $f ( @files ) { 966e019b00SH. Peter Anvin print STDERR "$name: $f\n"; 976e019b00SH. Peter Anvin 986e019b00SH. Peter Anvin if (! -f $f) { 996e019b00SH. Peter Anvin print STDERR "$f: not a file\n"; 1006e019b00SH. Peter Anvin next; 1016e019b00SH. Peter Anvin } 1026e019b00SH. Peter Anvin 1036e019b00SH. Peter Anvin if (!open(FILE, '+<', $f)) { 1046e019b00SH. Peter Anvin print STDERR "$name: Cannot open file: $f: $!\n"; 1056e019b00SH. Peter Anvin next; 1066e019b00SH. Peter Anvin } 1076e019b00SH. Peter Anvin 1086e019b00SH. Peter Anvin binmode FILE; 1096e019b00SH. Peter Anvin 1106e019b00SH. Peter Anvin # First, verify that it is not a binary file; consider any file 1116e019b00SH. Peter Anvin # with a zero byte to be a binary file. Is there any better, or 1126e019b00SH. Peter Anvin # additional, heuristic that should be applied? 1136e019b00SH. Peter Anvin $is_binary = 0; 1146e019b00SH. Peter Anvin 1156e019b00SH. Peter Anvin while (read(FILE, $data, 65536) > 0) { 1166e019b00SH. Peter Anvin if ($data =~ /\0/) { 1176e019b00SH. Peter Anvin $is_binary = 1; 1186e019b00SH. Peter Anvin last; 1196e019b00SH. Peter Anvin } 1206e019b00SH. Peter Anvin } 1216e019b00SH. Peter Anvin 1226e019b00SH. Peter Anvin if ($is_binary) { 1236e019b00SH. Peter Anvin print STDERR "$name: $f: binary file\n"; 1246e019b00SH. Peter Anvin next; 1256e019b00SH. Peter Anvin } 1266e019b00SH. Peter Anvin 1276e019b00SH. Peter Anvin seek(FILE, 0, 0); 1286e019b00SH. Peter Anvin 1296e019b00SH. Peter Anvin $in_bytes = 0; 1306e019b00SH. Peter Anvin $out_bytes = 0; 131*cb3ed5b7SH. Peter Anvin $lineno = 0; 1326e019b00SH. Peter Anvin 1336e019b00SH. Peter Anvin @lines = (); 1346e019b00SH. Peter Anvin 1356e019b00SH. Peter Anvin $in_hunk = 0; 1366e019b00SH. Peter Anvin $err = 0; 1376e019b00SH. Peter Anvin 1386e019b00SH. Peter Anvin while ( defined($line = <FILE>) ) { 139*cb3ed5b7SH. Peter Anvin $lineno++; 1406e019b00SH. Peter Anvin $in_bytes += length($line); 1416e019b00SH. Peter Anvin 1426e019b00SH. Peter Anvin if (!$in_hunk) { 143*cb3ed5b7SH. Peter Anvin if ($line =~ 144*cb3ed5b7SH. Peter Anvin /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) { 1456e019b00SH. Peter Anvin $minus_lines = $2; 1466e019b00SH. Peter Anvin $plus_lines = $4; 1476e019b00SH. Peter Anvin if ($minus_lines || $plus_lines) { 1486e019b00SH. Peter Anvin $in_hunk = 1; 1496e019b00SH. Peter Anvin @hunk_lines = ($line); 1506e019b00SH. Peter Anvin } 1516e019b00SH. Peter Anvin } else { 1526e019b00SH. Peter Anvin push(@lines, $line); 1536e019b00SH. Peter Anvin $out_bytes += length($line); 1546e019b00SH. Peter Anvin } 1556e019b00SH. Peter Anvin } else { 1566e019b00SH. Peter Anvin # We're in a hunk 1576e019b00SH. Peter Anvin 1586e019b00SH. Peter Anvin if ($line =~ /^\+/) { 1596e019b00SH. Peter Anvin $plus_lines--; 1606e019b00SH. Peter Anvin 1616e019b00SH. Peter Anvin $text = substr($line, 1); 1626e019b00SH. Peter Anvin $text =~ s/[ \t\r]*$//; # Remove trailing spaces 1636e019b00SH. Peter Anvin $text = clean_space_tabs($text); 1646e019b00SH. Peter Anvin 165*cb3ed5b7SH. Peter Anvin $l_width = strwidth($text); 166*cb3ed5b7SH. Peter Anvin if ($max_width && $l_width > $max_width) { 167*cb3ed5b7SH. Peter Anvin print STDERR 168*cb3ed5b7SH. Peter Anvin "$f:$lineno: adds line exceeds $max_width ", 169*cb3ed5b7SH. Peter Anvin "characters ($l_width)\n"; 170*cb3ed5b7SH. Peter Anvin } 171*cb3ed5b7SH. Peter Anvin 1726e019b00SH. Peter Anvin push(@hunk_lines, '+'.$text); 1736e019b00SH. Peter Anvin } elsif ($line =~ /^\-/) { 1746e019b00SH. Peter Anvin $minus_lines--; 1756e019b00SH. Peter Anvin push(@hunk_lines, $line); 1766e019b00SH. Peter Anvin } elsif ($line =~ /^ /) { 1776e019b00SH. Peter Anvin $plus_lines--; 1786e019b00SH. Peter Anvin $minus_lines--; 1796e019b00SH. Peter Anvin push(@hunk_lines, $line); 1806e019b00SH. Peter Anvin } else { 1816e019b00SH. Peter Anvin print STDERR "$name: $f: malformed patch\n"; 1826e019b00SH. Peter Anvin $err = 1; 1836e019b00SH. Peter Anvin last; 1846e019b00SH. Peter Anvin } 1856e019b00SH. Peter Anvin 1866e019b00SH. Peter Anvin if ($plus_lines < 0 || $minus_lines < 0) { 1876e019b00SH. Peter Anvin print STDERR "$name: $f: malformed patch\n"; 1886e019b00SH. Peter Anvin $err = 1; 1896e019b00SH. Peter Anvin last; 1906e019b00SH. Peter Anvin } elsif ($plus_lines == 0 && $minus_lines == 0) { 1916e019b00SH. Peter Anvin # End of a hunk. Process this hunk. 1926e019b00SH. Peter Anvin my $i; 1936e019b00SH. Peter Anvin my $l; 1946e019b00SH. Peter Anvin my @h = (); 1956e019b00SH. Peter Anvin my $adj = 0; 1966e019b00SH. Peter Anvin my $done = 0; 1976e019b00SH. Peter Anvin 1986e019b00SH. Peter Anvin for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) { 1996e019b00SH. Peter Anvin $l = $hunk_lines[$i]; 2006e019b00SH. Peter Anvin if (!$done && $l eq "+\n") { 2016e019b00SH. Peter Anvin $adj++; # Skip this line 2026e019b00SH. Peter Anvin } elsif ($l =~ /^[ +]/) { 2036e019b00SH. Peter Anvin $done = 1; 2046e019b00SH. Peter Anvin unshift(@h, $l); 2056e019b00SH. Peter Anvin } else { 2066e019b00SH. Peter Anvin unshift(@h, $l); 2076e019b00SH. Peter Anvin } 2086e019b00SH. Peter Anvin } 2096e019b00SH. Peter Anvin 2106e019b00SH. Peter Anvin $l = $hunk_lines[0]; # Hunk header 2116e019b00SH. Peter Anvin undef @hunk_lines; # Free memory 2126e019b00SH. Peter Anvin 2136e019b00SH. Peter Anvin if ($adj) { 2146e019b00SH. Peter Anvin die unless 2156e019b00SH. Peter Anvin ($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/); 2166e019b00SH. Peter Anvin my $mstart = $1; 2176e019b00SH. Peter Anvin my $mlin = $2; 2186e019b00SH. Peter Anvin my $pstart = $3; 2196e019b00SH. Peter Anvin my $plin = $4; 2206e019b00SH. Peter Anvin my $tail = $5; # doesn't include the final newline 2216e019b00SH. Peter Anvin 2226e019b00SH. Peter Anvin $l = sprintf("@@ -%d,%d +%d,%d @@%s\n", 2236e019b00SH. Peter Anvin $mstart, $mlin, $pstart, $plin-$adj, 2246e019b00SH. Peter Anvin $tail); 2256e019b00SH. Peter Anvin } 2266e019b00SH. Peter Anvin unshift(@h, $l); 2276e019b00SH. Peter Anvin 2286e019b00SH. Peter Anvin # Transfer to the output array 2296e019b00SH. Peter Anvin foreach $l (@h) { 2306e019b00SH. Peter Anvin $out_bytes += length($l); 2316e019b00SH. Peter Anvin push(@lines, $l); 2326e019b00SH. Peter Anvin } 2336e019b00SH. Peter Anvin 2346e019b00SH. Peter Anvin $in_hunk = 0; 2356e019b00SH. Peter Anvin } 2366e019b00SH. Peter Anvin } 2376e019b00SH. Peter Anvin } 2386e019b00SH. Peter Anvin 2396e019b00SH. Peter Anvin if ($in_hunk) { 2406e019b00SH. Peter Anvin print STDERR "$name: $f: malformed patch\n"; 2416e019b00SH. Peter Anvin $err = 1; 2426e019b00SH. Peter Anvin } 2436e019b00SH. Peter Anvin 2446e019b00SH. Peter Anvin if (!$err) { 2456e019b00SH. Peter Anvin if ($in_bytes != $out_bytes) { 2466e019b00SH. Peter Anvin # Only write to the file if changed 2476e019b00SH. Peter Anvin seek(FILE, 0, 0); 2486e019b00SH. Peter Anvin print FILE @lines; 2496e019b00SH. Peter Anvin 2506e019b00SH. Peter Anvin if ( !defined($where = tell(FILE)) || 2516e019b00SH. Peter Anvin !truncate(FILE, $where) ) { 2526e019b00SH. Peter Anvin die "$name: Failed to truncate modified file: $f: $!\n"; 2536e019b00SH. Peter Anvin } 2546e019b00SH. Peter Anvin } 2556e019b00SH. Peter Anvin } 2566e019b00SH. Peter Anvin 2576e019b00SH. Peter Anvin close(FILE); 2586e019b00SH. Peter Anvin} 259