1#!/usr/bin/env perl 2# 3# Copyright (c) 2014, Juniper Networks, Inc. 4# All rights reserved. 5# This SOFTWARE is licensed under the LICENSE provided in the 6# ../Copyright file. By downloading, installing, copying, or otherwise 7# using the SOFTWARE, you agree to be bound by the terms of that 8# LICENSE. 9# Phil Shafer, August 2014 10# 11# 12# xolint -- a lint for inspecting xo_emit format strings 13# 14# Yes, that's a long way to go for a pun. 15 16%vocabulary = (); 17 18sub main { 19 while ($ARGV[0] =~ /^-/) { 20 $_ = shift @ARGV; 21 $opt_cpp = 1 if /^-c/; 22 $opt_cflags .= shift @ARGV if /^-C/; 23 $opt_debug = 1 if /^-d/; 24 extract_docs() if /^-D/; 25 $opt_info = $opt_vocabulary = 1 if /^-I/; 26 $opt_print = 1 if /^-p/; 27 $opt_vocabulary = 1 if /^-V/; 28 extract_samples() if /^-X/; 29 } 30 31 if ($#ARGV < 0) { 32 print STDERR "xolint [options] files ...\n"; 33 print STDERR " -c invoke 'cpp' on input\n"; 34 print STDERR " -C flags Pass flags to cpp\n"; 35 print STDERR " -d Show debug output\n"; 36 print STDERR " -D Extract xolint documentation\n"; 37 print STDERR " -I Print xo_info_t data\n"; 38 print STDERR " -p Print input data on errors\n"; 39 print STDERR " -V Print vocabulary (list of tags)\n"; 40 print STDERR " -X Print examples of invalid use\n"; 41 exit(1); 42 } 43 44 for $file (@ARGV) { 45 parse_file($file); 46 } 47 48 if ($opt_info) { 49 print "static xo_info_t xo_info_table[] = {\n"; 50 for $name (sort(keys(%vocabulary))) { 51 print " { \"", $name, "\", \"type\", \"desc\" },\n"; 52 } 53 print "};\n"; 54 print "static int xo_info_count = " 55 . "(sizeof(xo_info_table) / sizeof(xo_info_table[0]));\n\n"; 56 print "#define XO_SET_INFO() \\\n"; 57 print " xo_set_info(NULL, xo_info_table, xo_info_count)\n"; 58 } elsif ($opt_vocabulary) { 59 for $name (sort(keys(%vocabulary))) { 60 print $name, "\n"; 61 } 62 } 63} 64 65sub extract_samples { 66 my $x = "\#" . "\@"; 67 my $cmd = "grep -B1 -i '$x Should be' $0 | grep xo_emit | sed 's/.*\#*\@//'"; 68 system($cmd); 69 exit(0); 70} 71 72sub extract_docs { 73 my $x = "\#" . "\@"; 74 my $cmd = "grep -B1 '$x' $0"; 75 open INPUT, "$cmd |"; 76 local @input = <INPUT>; 77 close INPUT; 78 my $ln, $new = 0, $first = 1, $need_nl; 79 80 for ($ln = 0; $ln <= $#input; $ln++) { 81 chomp($_ = $input[$ln]); 82 if (/^--/) { 83 $ln += 1; 84 $new = 1; 85 next; 86 } 87 if ($first) { 88 $new = 1; 89 $first = 0; 90 next; 91 } 92 93 s/\s*\#\@\s*//; 94 95 if ($new) { 96 if ($need_nl) { 97 print "\n\n"; 98 $need_nl = 0; 99 } 100 101 print "*** '$_'\n\n"; 102 print "The message \"$_\" can be caused by code like:\n\n"; 103 $new = 0; 104 105 } elsif (/xo_emit\s*\(/) { 106 s/^\s+//; 107 print " $_\n\n"; 108 109 } elsif (/^Should be/i) { 110 print "This code should be replaced with code like:\n\n"; 111 112 } else { 113 print "$_\n"; 114 $need_nl = 1; 115 } 116 } 117 118 exit(0); 119} 120 121sub parse_file { 122 local($file) = @_; 123 local($errors, $warnings, $info) = (0, 0, 0); 124 local $curfile = $file; 125 local $curln = 0; 126 127 if ($opt_cpp) { 128 die "no such file" unless -f $file; 129 open INPUT, "cpp $opt_cflags $file |"; 130 } else { 131 open INPUT, $file || die "cannot open input file '$file'"; 132 } 133 local @input = <INPUT>; 134 close INPUT; 135 136 local $ln, $rln, $line, $replay; 137 138 for ($ln = 0; $ln < $#input; $ln++) { 139 $line = $input[$ln]; 140 $curln += 1; 141 142 if ($line =~ /^\#/) { 143 my($num, $fn) = ($line =~ /\#\s*(\d+)\s+"(.+)"/); 144 ($curfile, $curln) = ($fn, $num) if $num; 145 next; 146 } 147 148 next unless $line =~ /xo_emit\(/; 149 150 @tokens = parse_tokens(); 151 print "token:\n '" . join("'\n '", @tokens) . "'\n" 152 if $opt_debug; 153 check_format($tokens[0]); 154 } 155 156 print $file . ": $errors errors, $warnings warnings, $info info\n" 157 unless $opt_vocabulary; 158} 159 160sub parse_tokens { 161 my $full = "$'"; 162 my @tokens = (); 163 my %pairs = ( "{" => "}", "[" => "]", "(" => ")" ); 164 my %quotes = ( "\"" => "\"", "'" => "'" ); 165 local @data = split(//, $full); 166 local @open = (); 167 local $current = ""; 168 my $quote = ""; 169 local $off = 0; 170 my $ch; 171 172 $replay = $curln . " " . $line; 173 $rln = $ln + 1; 174 175 for (;;) { 176 get_tokens() if $off > $#data; 177 die "out of data" if $off > $#data; 178 $ch = $data[$off++]; 179 180 print "'$ch' ($quote) ($#open) [" . join("", @open) . "]\n" 181 if $opt_debug; 182 183 last if $ch eq ";" && $#open < 0; 184 185 if ($ch eq "," && $quote eq "" && $#open < 0) { 186 print "[$current]\n" if $opt_debug; 187 push @tokens, $current; 188 $current = ""; 189 next; 190 } 191 192 next if $ch =~ /[ \t\n\r]/ && $quote eq "" && $#open < 0; 193 194 $current .= $ch; 195 196 if ($quote) { 197 if ($ch eq $quote) { 198 $quote = ""; 199 } 200 next; 201 } 202 if ($quotes{$ch}) { 203 $quote = $quotes{$ch}; 204 $current = substr($current, 0, -2) if $current =~ /""$/; 205 next; 206 } 207 208 if ($pairs{$ch}) { 209 push @open, $pairs{$ch}; 210 next; 211 } 212 213 if ($#open >= 0 && $ch eq $open[$#open]) { 214 pop @open; 215 next; 216 } 217 } 218 219 push @tokens, substr($current, 0, -1); 220 return @tokens; 221} 222 223sub get_tokens { 224 if ($ln + 1 < $#input) { 225 $line = $input[++$ln]; 226 $curln += 1; 227 $replay .= $curln . " " . $line; 228 @data = split(//, $line); 229 $off = 0; 230 } 231} 232 233sub check_format { 234 my($format) = @_; 235 236 return unless $format =~ /^".*"$/; 237 238 my @data = split(//, $format); 239 my $ch; 240 my $braces = 0; 241 local $count = 0; 242 my $content = ""; 243 my $off; 244 my $phase = 0; 245 my @build = (); 246 local $last, $prev = ""; 247 248 # Nukes quotes 249 pop @data; 250 shift @data; 251 252 for (;;) { 253 last if $off > $#data; 254 $ch = $data[$off++]; 255 256 if ($ch eq "\\") { 257 $ch = $data[$off++]; 258 $off += 1 if $ch eq "\\"; # double backslash: "\\/" 259 next; 260 } 261 262 if ($braces) { 263 if ($ch eq "}") { 264 check_field(@build); 265 $braces = 0; 266 @build = (); 267 $phase = 0; 268 next; 269 } elsif ($phase == 0 && $ch eq ":") { 270 $phase += 1; 271 next; 272 } elsif ($ch eq "/") { 273 $phase += 1; 274 next; 275 } 276 277 } else { 278 if ($ch eq "{") { 279 check_text($build[0]) if length($build[0]); 280 $braces = 1; 281 @build = (); 282 $last = $prev; 283 next; 284 } 285 $prev = $ch; 286 } 287 288 $build[$phase] .= $ch; 289 } 290 291 if ($braces) { 292 error("missing closing brace"); 293 check_field(@build); 294 } else { 295 check_text($build[0]) if length($build[0]); 296 } 297} 298 299sub check_text { 300 my($text) = @_; 301 302 print "checking text: [$text]\n" if $opt_debug; 303 304 #@ A percent sign appearing in text is a literal 305 #@ xo_emit("cost: %d", cost); 306 #@ Should be: 307 #@ xo_emit("{L:cost}: {:cost/%d}", cost); 308 #@ This can be a bit surprising and could be a field that was not 309 #@ properly converted to a libxo-style format string. 310 info("a percent sign appearing in text is a literal") if $text =~ /%/; 311} 312 313sub check_field { 314 my(@field) = @_; 315 print "checking field: [" . join("][", @field) . "]\n" if $opt_debug; 316 317 if ($opt_vocabulary) { 318 $vocabulary{$field[1]} = 1 319 if $field[1] && $field[0] !~ /[DELNPTUW\[\]]/; 320 return; 321 } 322 323 #@ Last character before field definition is a field type 324 #@ A common typo: 325 #@ xo_emit("{T:Min} T{:Max}"); 326 #@ Should be: 327 #@ xo_emit("{T:Min} {T:Max}"); 328 #@ Twiddling the "{" and the field role is a common typo. 329 info("last character before field definition is a field type ($last)") 330 if $last =~ /[DELNPTUVW\[\]]/ && $field[0] !~ /[DELNPTUVW\[\]]/; 331 332 #@ Encoding format uses different number of arguments 333 #@ xo_emit("{:name/%6.6s %%04d/%s}", name, number); 334 #@ Should be: 335 #@ xo_emit("{:name/%6.6s %04d/%s-%d}", name, number); 336 #@ Both format should consume the same number of arguments off the stack 337 my $cf = count_args($field[2]); 338 my $ce = count_args($field[3]); 339 warn("encoding format uses different number of arguments ($cf/$ce)") 340 if $ce >= 0 && $cf >= 0 && $ce != $cf; 341 342 #@ Only one field role can be used 343 #@ xo_emit("{LT:Max}"); 344 #@ Should be: 345 #@ xo_emit("{T:Max}"); 346 my(@roles) = ($field[0] !~ /([DELNPTUVW\[\]]).*([DELNPTUVW\[\]])/); 347 error("only one field role can be used (" . join(", ", @roles) . ")") 348 if $#roles > 0; 349 350 # Field is a note, label, or title 351 if ($field[0] =~ /[DLNT]/) { 352 353 #@ Potential missing slash after N, L, or T with format 354 #@ xo_emit("{T:%6.6s}\n", "Max"); 355 #@ should be: 356 #@ xo_emit("{T:/%6.6s}\n", "Max"); 357 #@ The "%6.6s" will be a literal, not a field format. While 358 #@ it's possibly valid, it's likely a missing "/". 359 info("potential missing slash after N, L, or T with format") 360 if $field[1] =~ /%/; 361 362 #@ An encoding format cannot be given (roles: DNLT) 363 #@ xo_emit("{T:Max//%s}", "Max"); 364 #@ Fields with the D, N, L, and T roles are not emitted in 365 #@ the 'encoding' style (JSON, XML), so an encoding format 366 #@ would make no sense. 367 error("encoding format cannot be given when content is present") 368 if $field[3]; 369 } 370 371 # Field is a decoration, label, or title 372 if ($field[0] =~ /DLN/) { 373 #@ Format cannot be given when content is present (roles: DLN) 374 #@ xo_emit("{N:Max/%6.6s}", "Max"); 375 #@ Fields with the D, L, or N roles can't have both 376 #@ static literal content ("{L:Label}") and a 377 #@ format ("{L:/%s}"). 378 #@ This error will also occur when the content has a backslash 379 #@ in it, like "{N:Type of I/O}"; backslashes should be escaped, 380 #@ like "{N:Type of I\\/O}". Note the double backslash, one for 381 #@ handling 'C' strings, and one for libxo. 382 error("format cannot be given when content is present") 383 if $field[1] && $field[2]; 384 } 385 386 # A value field 387 if (length($field[0]) == 0 || $field[0] =~ /V/) { 388 389 #@ Value field must have a name (as content)") 390 #@ xo_emit("{:/%s}", "value"); 391 #@ Should be: 392 #@ xo_emit("{:tag-name/%s}", "value"); 393 #@ The field name is used for XML and JSON encodings. These 394 #@ tags names are static and must appear directly in the 395 #@ field descriptor. 396 error("value field must have a name (as content)") 397 unless $field[1]; 398 399 #@ Use hyphens, not underscores, for value field name 400 #@ xo_emit("{:no_under_scores}", "bad"); 401 #@ Should be: 402 #@ xo_emit("{:no-under-scores}", "bad"); 403 #@ Use of hyphens is traditional in XML, and the XOF_UNDERSCORES 404 #@ flag can be used to generate underscores in JSON, if desired. 405 #@ But the raw field name should use hyphens. 406 error("use hyphens, not underscores, for value field name") 407 if $field[1] =~ /_/; 408 409 #@ Value field name cannot start with digit 410 #@ xo_emit("{:10-gig/}"); 411 #@ Should be: 412 #@ xo_emit("{:ten-gig/}"); 413 #@ XML element names cannot start with a digit. 414 error("value field name cannot start with digit") 415 if $field[1] =~ /^[0-9]/; 416 417 #@ Value field name should be lower case 418 #@ xo_emit("{:WHY-ARE-YOU-SHOUTING}", "NO REASON"); 419 #@ Should be: 420 #@ xo_emit("{:why-are-you-shouting}", "no reason"); 421 #@ Lower case is more civilized. Even TLAs should be lower case 422 #@ to avoid scenarios where the differences between "XPath" and 423 #@ "Xpath" drive your users crazy. Lower case rules the seas. 424 error("value field name should be lower case") 425 if $field[1] =~ /[A-Z]/; 426 427 #@ Value field name should be longer than two characters 428 #@ xo_emit("{:x}", "mumble"); 429 #@ Should be: 430 #@ xo_emit("{:something-meaningful}", "mumble"); 431 #@ Field names should be descriptive, and it's hard to 432 #@ be descriptive in less than two characters. Consider 433 #@ your users and try to make something more useful. 434 #@ Note that this error often occurs when the field type 435 #@ is placed after the colon ("{:T/%20s}"), instead of before 436 #@ it ("{T:/20s}"). 437 error("value field name should be longer than two characters") 438 if $field[1] =~ /[A-Z]/; 439 440 #@ Value field name contains invalid character 441 #@ xo_emit("{:cost-in-$$/%u}", 15); 442 #@ Should be: 443 #@ xo_emit("{:cost-in-dollars/%u}", 15); 444 #@ An invalid character is often a sign of a typo, like "{:]}" 445 #@ instead of "{]:}". Field names are restricted to lower-case 446 #@ characters, digits, and hyphens. 447 error("value field name contains invalid character (" . $field[1] . ")") 448 unless $field[1] =~ /^[0-9a-z-]*$/; 449 } 450 451 # A decoration field 452 if ($field[0] =~ /D/) { 453 454 #@decoration field contains invalid character 455 #@ xo_emit("{D:not good}"); 456 #@ Should be: 457 #@ xo_emit("{D:((}{:good}{D:))}", "yes"); 458 #@ This is minor, but fields should use proper roles. Decoration 459 #@ fields are meant to hold puncuation and other characters used 460 #@ to decorate the content, typically to make it more readable 461 #@ to human readers. 462 warn("decoration field contains invalid character") 463 unless $field[1] =~ m:^[~!\@\#\$%^&\*\(\);\:\[\]\{\} ]+$:; 464 } 465 466 if ($field[0] =~ /[\[\]]/) { 467 #@ Anchor content should be decimal width 468 #@ xo_emit("{[:mumble}"); 469 #@ Should be: 470 #@ xo_emit("{[:32}"); 471 #@ Anchors need an integer value to specify the width of 472 #@ the set of anchored fields. The value can be positive 473 #@ (for left padding/right justification) or negative (for 474 #@ right padding/left justification) and can appear in 475 #@ either the start or stop anchor field descriptor. 476 error("anchor content should be decimal width") 477 if $field[1] && $field[1] !~ /^-?\d+$/ ; 478 479 #@ Anchor format should be "%d" 480 #@ xo_emit("{[:/%s}"); 481 #@ Should be: 482 #@ xo_emit("{[:/%d}"); 483 #@ Anchors only grok integer values, and if the value is not static, 484 #@ if must be in an 'int' argument, represented by the "%d" format. 485 #@ Anything else is an error. 486 error("anchor format should be \"%d\"") 487 if $field[2] && $field[2] ne "%d"; 488 489 #@ Anchor cannot have both format and encoding format") 490 #@ xo_emit("{[:32/%d}"); 491 #@ Should be: 492 #@ xo_emit("{[:32}"); 493 #@ Anchors can have a static value or argument for the width, 494 #@ but cannot have both. 495 error("anchor cannot have both format and encoding format") 496 if $field[1] && $field[2]; 497 } 498} 499 500sub count_args { 501 my($format) = @_; 502 503 return -1 unless $format; 504 505 my $in; 506 my($text, $ff, $fc, $rest); 507 for ($in = $format; $in; $in = $rest) { 508 ($text, $ff, $fc, $rest) = 509 ($in =~ /^([^%]*)(%[^%diouxXDOUeEfFgGaAcCsSp]*)([diouxXDOUeEfFgGaAcCsSp])(.*)$/); 510 unless ($ff) { 511 # Might be a "%%" 512 ($text, $ff, $rest) = ($in =~ /^([^%]*)(%%)(.*)$/); 513 if ($ff) { 514 check_text($text); 515 } else { 516 # Not sure what's going on here, but something's wrong... 517 error("invalid field format") if $in =~ /%/; 518 } 519 next; 520 } 521 522 check_text($text); 523 check_field_format($ff, $fc); 524 } 525 526 return 0; 527} 528 529sub check_field_format { 530 my($ff, $fc) = @_; 531 532 print "check_field_format: [$ff] [$fc]\n" if $opt_debug; 533 534 my(@chunks) = split(/\./, $ff); 535 536 #@ Max width only valid for strings 537 #@ xo_emit("{:tag/%2.4.6d}", 55); 538 #@ Should be: 539 #@ xo_emit("{:tag/%2.6d}", 55); 540 #@ libxo allows a true 'max width' in addition to the traditional 541 #@ printf-style 'max number of bytes to use for input'. But this 542 #@ is supported only for string values, since it makes no sense 543 #@ for non-strings. This error may occur from a typo, 544 #@ like "{:tag/%6..6d}" where only one period should be used. 545 error("max width only valid for strings") 546 if $#chunks >= 2 && $fc !~ /[sS]/; 547} 548 549sub error { 550 return if $opt_vocabulary; 551 print STDERR $curfile . ": " .$curln . ": error: " . join(" ", @_) . "\n"; 552 print STDERR $replay . "\n" if $opt_print; 553 $errors += 1; 554} 555 556sub warn { 557 return if $opt_vocabulary; 558 print STDERR $curfile . ": " .$curln . ": warning: " . join(" ", @_) . "\n"; 559 print STDERR $replay . "\n" if $opt_print; 560 $warnings += 1; 561} 562 563sub info { 564 return if $opt_vocabulary; 565 print STDERR $curfile . ": " .$curln . ": info: " . join(" ", @_) . "\n"; 566 print STDERR $replay . "\n" if $opt_print; 567 $info += 1; 568} 569 570main: { 571 main(); 572} 573