1#!/usr/bin/perl -w 2# 3# Clean a text file -- or directory of text files -- of stealth whitespace. 4# WARNING: this can be a highly destructive operation. Use with caution. 5# 6 7use bytes; 8use File::Basename; 9 10# Default options 11$max_width = 79; 12 13# Clean up space-tab sequences, either by removing spaces or 14# replacing them with tabs. 15sub clean_space_tabs($) 16{ 17 no bytes; # Tab alignment depends on characters 18 19 my($li) = @_; 20 my($lo) = ''; 21 my $pos = 0; 22 my $nsp = 0; 23 my($i, $c); 24 25 for ($i = 0; $i < length($li); $i++) { 26 $c = substr($li, $i, 1); 27 if ($c eq "\t") { 28 my $npos = ($pos+$nsp+8) & ~7; 29 my $ntab = ($npos >> 3) - ($pos >> 3); 30 $lo .= "\t" x $ntab; 31 $pos = $npos; 32 $nsp = 0; 33 } elsif ($c eq "\n" || $c eq "\r") { 34 $lo .= " " x $nsp; 35 $pos += $nsp; 36 $nsp = 0; 37 $lo .= $c; 38 $pos = 0; 39 } elsif ($c eq " ") { 40 $nsp++; 41 } else { 42 $lo .= " " x $nsp; 43 $pos += $nsp; 44 $nsp = 0; 45 $lo .= $c; 46 $pos++; 47 } 48 } 49 $lo .= " " x $nsp; 50 return $lo; 51} 52 53# Compute the visual width of a string 54sub strwidth($) { 55 no bytes; # Tab alignment depends on characters 56 57 my($li) = @_; 58 my($c, $i); 59 my $pos = 0; 60 my $mlen = 0; 61 62 for ($i = 0; $i < length($li); $i++) { 63 $c = substr($li,$i,1); 64 if ($c eq "\t") { 65 $pos = ($pos+8) & ~7; 66 } elsif ($c eq "\n") { 67 $mlen = $pos if ($pos > $mlen); 68 $pos = 0; 69 } else { 70 $pos++; 71 } 72 } 73 74 $mlen = $pos if ($pos > $mlen); 75 return $mlen; 76} 77 78$name = basename($0); 79 80@files = (); 81 82while (defined($a = shift(@ARGV))) { 83 if ($a =~ /^-/) { 84 if ($a eq '-width' || $a eq '-w') { 85 $max_width = shift(@ARGV)+0; 86 } else { 87 print STDERR "Usage: $name [-width #] files...\n"; 88 exit 1; 89 } 90 } else { 91 push(@files, $a); 92 } 93} 94 95foreach $f ( @files ) { 96 print STDERR "$name: $f\n"; 97 98 if (! -f $f) { 99 print STDERR "$f: not a file\n"; 100 next; 101 } 102 103 if (!open(FILE, '+<', $f)) { 104 print STDERR "$name: Cannot open file: $f: $!\n"; 105 next; 106 } 107 108 binmode FILE; 109 110 # First, verify that it is not a binary file; consider any file 111 # with a zero byte to be a binary file. Is there any better, or 112 # additional, heuristic that should be applied? 113 $is_binary = 0; 114 115 while (read(FILE, $data, 65536) > 0) { 116 if ($data =~ /\0/) { 117 $is_binary = 1; 118 last; 119 } 120 } 121 122 if ($is_binary) { 123 print STDERR "$name: $f: binary file\n"; 124 next; 125 } 126 127 seek(FILE, 0, 0); 128 129 $in_bytes = 0; 130 $out_bytes = 0; 131 $blank_bytes = 0; 132 133 @blanks = (); 134 @lines = (); 135 $lineno = 0; 136 137 while ( defined($line = <FILE>) ) { 138 $lineno++; 139 $in_bytes += length($line); 140 $line =~ s/[ \t\r]*$//; # Remove trailing spaces 141 $line = clean_space_tabs($line); 142 143 if ( $line eq "\n" ) { 144 push(@blanks, $line); 145 $blank_bytes += length($line); 146 } else { 147 push(@lines, @blanks); 148 $out_bytes += $blank_bytes; 149 push(@lines, $line); 150 $out_bytes += length($line); 151 @blanks = (); 152 $blank_bytes = 0; 153 } 154 155 $l_width = strwidth($line); 156 if ($max_width && $l_width > $max_width) { 157 print STDERR 158 "$f:$lineno: line exceeds $max_width characters ($l_width)\n"; 159 } 160 } 161 162 # Any blanks at the end of the file are discarded 163 164 if ($in_bytes != $out_bytes) { 165 # Only write to the file if changed 166 seek(FILE, 0, 0); 167 print FILE @lines; 168 169 if ( !defined($where = tell(FILE)) || 170 !truncate(FILE, $where) ) { 171 die "$name: Failed to truncate modified file: $f: $!\n"; 172 } 173 } 174 175 close(FILE); 176} 177