1#!/usr/bin/env perl 2# 3# Copyright (c) 2014, Juniper Networks, Inc. 4# All rights reserved. 5# This SOFTWARE is licensed under the LICENSE provided in the 6# ../Copyright file. By downloading, installing, copying, or otherwise 7# using the SOFTWARE, you agree to be bound by the terms of that 8# LICENSE. 9# Phil Shafer, August 2014 10# 11# 12# xolint -- a lint for inspecting xo_emit format strings 13# 14# Yes, that's a long way to go for a pun. 15 16%vocabulary = (); 17 18sub main { 19 while ($ARGV[0] =~ /^-/) { 20 $_ = shift @ARGV; 21 $opt_cpp = 1 if /^-c/; 22 $opt_cflags .= shift @ARGV if /^-C/; 23 $opt_debug = 1 if /^-d/; 24 extract_docs() if /^-D/; 25 $opt_info = $opt_vocabulary = 1 if /^-I/; 26 $opt_print = 1 if /^-p/; 27 $opt_vocabulary = 1 if /^-V/; 28 extract_samples() if /^-X/; 29 } 30 31 for $file (@ARGV) { 32 parse_file($file); 33 } 34 35 if ($opt_info) { 36 print "static xo_info_t xo_info_table[] = {\n"; 37 for $name (sort(keys(%vocabulary))) { 38 print " { \"", $name, "\", \"type\", \"desc\" },\n"; 39 } 40 print "};\n"; 41 print "static int xo_info_count = " 42 . "(sizeof(xo_info_table) / sizeof(xo_info_table[0]));\n\n"; 43 print "#define XO_SET_INFO() \\\n"; 44 print " xo_set_info(NULL, xo_info_table, xo_info_count)\n"; 45 } elsif ($opt_vocabulary) { 46 for $name (sort(keys(%vocabulary))) { 47 print $name, "\n"; 48 } 49 } 50} 51 52sub extract_samples { 53 my $x = "\#" . "\@"; 54 my $cmd = "grep -B1 -i '$x Should be' $0 | grep xo_emit | sed 's/.*\#*\@//'"; 55 system($cmd); 56 exit(0); 57} 58 59sub extract_docs { 60 my $x = "\#" . "\@"; 61 my $cmd = "grep -B1 '$x' $0"; 62 open INPUT, "$cmd |"; 63 local @input = <INPUT>; 64 close INPUT; 65 my $ln, $new = 0, $first = 1, $need_nl; 66 67 for ($ln = 0; $ln <= $#input; $ln++) { 68 chomp($_ = $input[$ln]); 69 if (/^--/) { 70 $ln += 1; 71 $new = 1; 72 next; 73 } 74 if ($first) { 75 $new = 1; 76 $first = 0; 77 next; 78 } 79 80 s/\s*\#\@\s*//; 81 82 if ($new) { 83 if ($need_nl) { 84 print "\n\n"; 85 $need_nl = 0; 86 } 87 88 print "*** '$_'\n\n"; 89 print "The message \"$_\" can be caused by code like:\n\n"; 90 $new = 0; 91 92 } elsif (/xo_emit\s*\(/) { 93 s/^\s+//; 94 print " $_\n\n"; 95 96 } elsif (/^Should be/i) { 97 print "This code should be replaced with code like:\n\n"; 98 99 } else { 100 print "$_\n"; 101 $need_nl = 1; 102 } 103 } 104 105 exit(0); 106} 107 108sub parse_file { 109 local($file) = @_; 110 local($errors, $warnings, $info) = (0, 0, 0); 111 local $curfile = $file; 112 local $curln = 0; 113 114 if ($opt_cpp) { 115 die "no such file" unless -f $file; 116 open INPUT, "cpp $opt_cflags $file |"; 117 } else { 118 open INPUT, $file || die "cannot open input file '$file'"; 119 } 120 local @input = <INPUT>; 121 close INPUT; 122 123 local $ln, $rln, $line, $replay; 124 125 for ($ln = 0; $ln < $#input; $ln++) { 126 $line = $input[$ln]; 127 $curln += 1; 128 129 if ($line =~ /^\#/) { 130 my($num, $fn) = ($line =~ /\#\s*(\d+)\s+"(.+)"/); 131 ($curfile, $curln) = ($fn, $num) if $num; 132 next; 133 } 134 135 next unless $line =~ /xo_emit\(/; 136 137 @tokens = parse_tokens(); 138 print "token:\n '" . join("'\n '", @tokens) . "'\n" 139 if $opt_debug; 140 check_format($tokens[0]); 141 } 142 143 print $file . ": $errors errors, $warnings warnings, $info info\n" 144 unless $opt_vocabulary; 145} 146 147sub parse_tokens { 148 my $full = "$'"; 149 my @tokens = (); 150 my %pairs = ( "{" => "}", "[" => "]", "(" => ")" ); 151 my %quotes = ( "\"" => "\"", "'" => "'" ); 152 local @data = split(//, $full); 153 local @open = (); 154 local $current = ""; 155 my $quote = ""; 156 local $off = 0; 157 my $ch; 158 159 $replay = $curln . " " . $line; 160 $rln = $ln + 1; 161 162 for (;;) { 163 get_tokens() if $off > $#data; 164 die "out of data" if $off > $#data; 165 $ch = $data[$off++]; 166 167 print "'$ch' ($quote) ($#open) [" . join("", @open) . "]\n" 168 if $opt_debug; 169 170 last if $ch eq ";" && $#open < 0; 171 172 if ($ch eq "," && $quote eq "" && $#open < 0) { 173 print "[$current]\n" if $opt_debug; 174 push @tokens, $current; 175 $current = ""; 176 next; 177 } 178 179 next if $ch =~ /[ \t\n\r]/ && $quote eq "" && $#open < 0; 180 181 $current .= $ch; 182 183 if ($quote) { 184 if ($ch eq $quote) { 185 $quote = ""; 186 } 187 next; 188 } 189 if ($quotes{$ch}) { 190 $quote = $quotes{$ch}; 191 $current = substr($current, 0, -2) if $current =~ /""$/; 192 next; 193 } 194 195 if ($pairs{$ch}) { 196 push @open, $pairs{$ch}; 197 next; 198 } 199 200 if ($#open >= 0 && $ch eq $open[$#open]) { 201 pop @open; 202 next; 203 } 204 } 205 206 push @tokens, substr($current, 0, -1); 207 return @tokens; 208} 209 210sub get_tokens { 211 if ($ln + 1 < $#input) { 212 $line = $input[++$ln]; 213 $curln += 1; 214 $replay .= $curln . " " . $line; 215 @data = split(//, $line); 216 $off = 0; 217 } 218} 219 220sub check_format { 221 my($format) = @_; 222 223 return unless $format =~ /^".*"$/; 224 225 my @data = split(//, $format); 226 my $ch; 227 my $braces = 0; 228 local $count = 0; 229 my $content = ""; 230 my $off; 231 my $phase = 0; 232 my @build = (); 233 local $last, $prev = ""; 234 235 # Nukes quotes 236 pop @data; 237 shift @data; 238 239 for (;;) { 240 last if $off > $#data; 241 $ch = $data[$off++]; 242 243 if ($ch eq "\\") { 244 $ch = $data[$off++]; 245 $off += 1 if $ch eq "\\"; # double backslash: "\\/" 246 next; 247 } 248 249 if ($braces) { 250 if ($ch eq "}") { 251 check_field(@build); 252 $braces = 0; 253 @build = (); 254 $phase = 0; 255 next; 256 } elsif ($phase == 0 && $ch eq ":") { 257 $phase += 1; 258 next; 259 } elsif ($ch eq "/") { 260 $phase += 1; 261 next; 262 } 263 264 } else { 265 if ($ch eq "{") { 266 check_text($build[0]) if length($build[0]); 267 $braces = 1; 268 @build = (); 269 $last = $prev; 270 next; 271 } 272 } 273 274 $prev = $ch; 275 $build[$phase] .= $ch; 276 } 277 278 if ($braces) { 279 error("missing closing brace"); 280 check_field(@build); 281 } else { 282 check_text($build[0]) if length($build[0]); 283 } 284} 285 286sub check_text { 287 my($text) = @_; 288 289 print "checking text: [$text]\n" if $opt_debug; 290 291 #@ A percent sign appearing in text is a literal 292 #@ xo_emit("cost: %d", cost); 293 #@ Should be: 294 #@ xo_emit("{L:cost}: {:cost/%d}", cost); 295 #@ This can be a bit surprising and could be a field that was not 296 #@ properly converted to a libxo-style format string. 297 info("a percent sign appearing in text is a literal") if $text =~ /%/; 298} 299 300sub check_field { 301 my(@field) = @_; 302 print "checking field: [" . join("][", @field) . "]\n" if $opt_debug; 303 304 if ($opt_vocabulary) { 305 $vocabulary{$field[1]} = 1 306 if $field[1] && $field[0] !~ /[DELNPTUW\[\]]/; 307 return; 308 } 309 310 #@ Last character before field definition is a field type 311 #@ A common typo: 312 #@ xo_emit("{T:Min} T{:Max}"); 313 #@ Should be: 314 #@ xo_emit("{T:Min} {T:Max}"); 315 #@ Twiddling the "{" and the field role is a common typo. 316 info("last character before field definition is a field type ($last)") 317 if $last =~ /[DELNPTUVW\[\]]/ && $field[0] !~ /[DELNPTUVW\[\]]/; 318 319 #@ Encoding format uses different number of arguments 320 #@ xo_emit("{:name/%6.6s %%04d/%s}", name, number); 321 #@ Should be: 322 #@ xo_emit("{:name/%6.6s %04d/%s-%d}", name, number); 323 #@ Both format should consume the same number of arguments off the stack 324 my $cf = count_args($field[2]); 325 my $ce = count_args($field[3]); 326 warn("encoding format uses different number of arguments ($cf/$ce)") 327 if $ce >= 0 && $cf >= 0 && $ce != $cf; 328 329 #@ Only one field role can be used 330 #@ xo_emit("{LT:Max}"); 331 #@ Should be: 332 #@ xo_emit("{T:Max}"); 333 my(@roles) = ($field[0] !~ /([DELNPTUVW\[\]]).*([DELNPTUVW\[\]])/); 334 error("only one field role can be used (" . join(", ", @roles) . ")") 335 if $#roles > 0; 336 337 # Field is a note, label, or title 338 if ($field[0] =~ /[DLNT]/) { 339 340 #@ Potential missing slash after N, L, or T with format 341 #@ xo_emit("{T:%6.6s}\n", "Max"); 342 #@ should be: 343 #@ xo_emit("{T:/%6.6s}\n", "Max"); 344 #@ The "%6.6s" will be a literal, not a field format. While 345 #@ it's possibly valid, it's likely a missing "/". 346 info("potential missing slash after N, L, or T with format") 347 if $field[1] =~ /%/; 348 349 #@ Format cannot be given when content is present (roles: DNLT) 350 #@ xo_emit("{T:Max/%6.6s}", "Max"); 351 #@ Fields with the D, N, L, or T roles can't have both 352 #@ static literal content ("{T:Title}") and a 353 #@ format ("{T:/%s}"). 354 #@ This error will also occur when the content has a backslash 355 #@ in it, like "{N:Type of I/O}"; backslashes should be escaped, 356 #@ like "{N:Type of I\\/O}". Note the double backslash, one for 357 #@ handling 'C' strings, and one for libxo. 358 error("format cannot be given when content is present") 359 if $field[1] && $field[2]; 360 361 #@ An encoding format cannot be given (roles: DNLT) 362 #@ xo_emit("{T:Max//%s}", "Max"); 363 #@ Fields with the D, N, L, and T roles are not emitted in 364 #@ the 'encoding' style (JSON, XML), so an encoding format 365 #@ would make no sense. 366 error("encoding format cannot be given when content is present") 367 if $field[3]; 368 } 369 370 # A value field 371 if (length($field[0]) == 0 || $field[0] =~ /V/) { 372 373 #@ Value field must have a name (as content)") 374 #@ xo_emit("{:/%s}", "value"); 375 #@ Should be: 376 #@ xo_emit("{:tag-name/%s}", "value"); 377 #@ The field name is used for XML and JSON encodings. These 378 #@ tags names are static and must appear directly in the 379 #@ field descriptor. 380 error("value field must have a name (as content)") 381 unless $field[1]; 382 383 #@ Use hyphens, not underscores, for value field name 384 #@ xo_emit("{:no_under_scores}", "bad"); 385 #@ Should be: 386 #@ xo_emit("{:no-under-scores}", "bad"); 387 #@ Use of hyphens is traditional in XML, and the XOF_UNDERSCORES 388 #@ flag can be used to generate underscores in JSON, if desired. 389 #@ But the raw field name should use hyphens. 390 error("use hyphens, not underscores, for value field name") 391 if $field[1] =~ /_/; 392 393 #@ Value field name cannot start with digit 394 #@ xo_emit("{:10-gig/}"); 395 #@ Should be: 396 #@ xo_emit("{:ten-gig/}"); 397 #@ XML element names cannot start with a digit. 398 error("value field name cannot start with digit") 399 if $field[1] =~ /^[0-9]/; 400 401 #@ Value field name should be lower case 402 #@ xo_emit("{:WHY-ARE-YOU-SHOUTING}", "NO REASON"); 403 #@ Should be: 404 #@ xo_emit("{:why-are-you-shouting}", "no reason"); 405 #@ Lower case is more civilized. Even TLAs should be lower case 406 #@ to avoid scenarios where the differences between "XPath" and 407 #@ "Xpath" drive your users crazy. Lower case rules the seas. 408 error("value field name should be lower case") 409 if $field[1] =~ /[A-Z]/; 410 411 #@ Value field name should be longer than two characters 412 #@ xo_emit("{:x}", "mumble"); 413 #@ Should be: 414 #@ xo_emit("{:something-meaningful}", "mumble"); 415 #@ Field names should be descriptive, and it's hard to 416 #@ be descriptive in less than two characters. Consider 417 #@ your users and try to make something more useful. 418 #@ Note that this error often occurs when the field type 419 #@ is placed after the colon ("{:T/%20s}"), instead of before 420 #@ it ("{T:/20s}"). 421 error("value field name should be longer than two characters") 422 if $field[1] =~ /[A-Z]/; 423 424 #@ Value field name contains invalid character 425 #@ xo_emit("{:cost-in-$$/%u}", 15); 426 #@ Should be: 427 #@ xo_emit("{:cost-in-dollars/%u}", 15); 428 #@ An invalid character is often a sign of a typo, like "{:]}" 429 #@ instead of "{]:}". Field names are restricted to lower-case 430 #@ characters, digits, and hyphens. 431 error("value field name contains invalid character (" . $field[1] . ")") 432 unless $field[1] =~ /^[0-9a-z-]*$/; 433 } 434 435 # A decoration field 436 if ($field[0] =~ /D/) { 437 438 #@decoration field contains invalid character 439 #@ xo_emit("{D:not good}"); 440 #@ Should be: 441 #@ xo_emit("{D:((}{:good}{D:))}", "yes"); 442 #@ This is minor, but fields should use proper roles. Decoration 443 #@ fields are meant to hold puncuation and other characters used 444 #@ to decorate the content, typically to make it more readable 445 #@ to human readers. 446 warn("decoration field contains invalid character") 447 unless $field[1] =~ m:^[~!\@\#\$%^&\*\(\);\:\[\]\{\} ]+$:; 448 } 449 450 if ($field[0] =~ /[\[\]]/) { 451 #@ Anchor content should be decimal width 452 #@ xo_emit("{[:mumble}"); 453 #@ Should be: 454 #@ xo_emit("{[:32}"); 455 #@ Anchors need an integer value to specify the width of 456 #@ the set of anchored fields. The value can be positive 457 #@ (for left padding/right justification) or negative (for 458 #@ right padding/left justification) and can appear in 459 #@ either the start or stop anchor field descriptor. 460 error("anchor content should be decimal width") 461 if $field[1] && $field[1] !~ /^-?\d+$/ ; 462 463 #@ Anchor format should be "%d" 464 #@ xo_emit("{[:/%s}"); 465 #@ Should be: 466 #@ xo_emit("{[:/%d}"); 467 #@ Anchors only grok integer values, and if the value is not static, 468 #@ if must be in an 'int' argument, represented by the "%d" format. 469 #@ Anything else is an error. 470 error("anchor format should be \"%d\"") 471 if $field[2] && $field[2] ne "%d"; 472 473 #@ Anchor cannot have both format and encoding format") 474 #@ xo_emit("{[:32/%d}"); 475 #@ Should be: 476 #@ xo_emit("{[:32}"); 477 #@ Anchors can have a static value or argument for the width, 478 #@ but cannot have both. 479 error("anchor cannot have both format and encoding format") 480 if $field[1] && $field[2]; 481 } 482} 483 484sub count_args { 485 my($format) = @_; 486 487 return -1 unless $format; 488 489 my $in; 490 my($text, $ff, $fc, $rest); 491 for ($in = $format; $in; $in = $rest) { 492 ($text, $ff, $fc, $rest) = 493 ($in =~ /^([^%]*)(%[^%diouxXDOUeEfFgGaAcCsSp]*)([diouxXDOUeEfFgGaAcCsSp])(.*)$/); 494 unless ($ff) { 495 # Might be a "%%" 496 ($text, $ff, $rest) = ($in =~ /^([^%]*)(%%)(.*)$/); 497 if ($ff) { 498 check_text($text); 499 } else { 500 # Not sure what's going on here, but something's wrong... 501 error("invalid field format") if $in =~ /%/; 502 } 503 next; 504 } 505 506 check_text($text); 507 check_field_format($ff, $fc); 508 } 509 510 return 0; 511} 512 513sub check_field_format { 514 my($ff, $fc) = @_; 515 516 print "check_field_format: [$ff] [$fc]\n" if $opt_debug; 517 518 my(@chunks) = split(/\./, $ff); 519 520 #@ Max width only valid for strings 521 #@ xo_emit("{:tag/%2.4.6d}", 55); 522 #@ Should be: 523 #@ xo_emit("{:tag/%2.6d}", 55); 524 #@ libxo allows a true 'max width' in addition to the traditional 525 #@ printf-style 'max number of bytes to use for input'. But this 526 #@ is supported only for string values, since it makes no sense 527 #@ for non-strings. This error may occur from a typo, 528 #@ like "{:tag/%6..6d}" where only one period should be used. 529 error("max width only valid for strings") 530 if $#chunks >= 2 && $fc =~ /[sS]/; 531} 532 533sub error { 534 return if $opt_vocabulary; 535 print STDERR $curfile . ": " .$curln . ": error: " . join(" ", @_) . "\n"; 536 print STDERR $replay . "\n" if $opt_print; 537 $errors += 1; 538} 539 540sub warn { 541 return if $opt_vocabulary; 542 print STDERR $curfile . ": " .$curln . ": warning: " . join(" ", @_) . "\n"; 543 print STDERR $replay . "\n" if $opt_print; 544 $warnings += 1; 545} 546 547sub info { 548 return if $opt_vocabulary; 549 print STDERR $curfile . ": " .$curln . ": info: " . join(" ", @_) . "\n"; 550 print STDERR $replay . "\n" if $opt_print; 551 $info += 1; 552} 553 554main: { 555 main(); 556} 557