1#!/usr/bin/env perl 2# 3# Copyright (c) 2014, Juniper Networks, Inc. 4# All rights reserved. 5# This SOFTWARE is licensed under the LICENSE provided in the 6# ../Copyright file. By downloading, installing, copying, or otherwise 7# using the SOFTWARE, you agree to be bound by the terms of that 8# LICENSE. 9# Phil Shafer, August 2014 10# 11# 12# xolint -- a lint for inspecting xo_emit format strings 13# 14# Yes, that's a long way to go for a pun. 15 16%vocabulary = (); 17 18sub main { 19 while ($ARGV[0] =~ /^-/) { 20 $_ = shift @ARGV; 21 $opt_cpp = 1 if /^-c/; 22 $opt_cflags .= shift @ARGV if /^-C/; 23 $opt_debug = 1 if /^-d/; 24 extract_docs() if /^-D/; 25 $opt_info = $opt_vocabulary = 1 if /^-I/; 26 $opt_print = 1 if /^-p/; 27 $opt_vocabulary = 1 if /^-V/; 28 extract_samples() if /^-X/; 29 } 30 31 if ($#ARGV < 0) { 32 print STDERR "xolint [options] files ...\n"; 33 print STDERR " -c invoke 'cpp' on input\n"; 34 print STDERR " -C flags Pass flags to cpp\n"; 35 print STDERR " -d Show debug output\n"; 36 print STDERR " -D Extract xolint documentation\n"; 37 print STDERR " -I Print xo_info_t data\n"; 38 print STDERR " -p Print input data on errors\n"; 39 print STDERR " -V Print vocabulary (list of tags)\n"; 40 print STDERR " -X Print examples of invalid use\n"; 41 exit(1); 42 } 43 44 for $file (@ARGV) { 45 parse_file($file); 46 } 47 48 if ($opt_info) { 49 print "static xo_info_t xo_info_table[] = {\n"; 50 for $name (sort(keys(%vocabulary))) { 51 print " { \"", $name, "\", \"type\", \"desc\" },\n"; 52 } 53 print "};\n"; 54 print "static int xo_info_count = " 55 . "(sizeof(xo_info_table) / sizeof(xo_info_table[0]));\n\n"; 56 print "#define XO_SET_INFO() \\\n"; 57 print " xo_set_info(NULL, xo_info_table, xo_info_count)\n"; 58 } elsif ($opt_vocabulary) { 59 for $name (sort(keys(%vocabulary))) { 60 print $name, "\n"; 61 } 62 } 63} 64 65sub extract_samples { 66 my $x = "\#" . "\@"; 67 my $cmd = "grep -B1 -i '$x Should be' $0 | grep xo_emit | sed 's/.*\#*\@//'"; 68 system($cmd); 69 exit(0); 70} 71 72sub extract_docs { 73 my $x = "\#" . "\@"; 74 my $cmd = "grep -B1 '$x' $0"; 75 open INPUT, "$cmd |"; 76 local @input = <INPUT>; 77 close INPUT; 78 my $ln, $new = 0, $first = 1, $need_nl; 79 80 for ($ln = 0; $ln <= $#input; $ln++) { 81 chomp($_ = $input[$ln]); 82 if (/^--/) { 83 $ln += 1; 84 $new = 1; 85 next; 86 } 87 if ($first) { 88 $new = 1; 89 $first = 0; 90 next; 91 } 92 93 s/\s*\#\@\s*//; 94 95 if ($new) { 96 if ($need_nl) { 97 print "\n\n"; 98 $need_nl = 0; 99 } 100 101 print "*** '$_'\n\n"; 102 print "The message \"$_\" can be caused by code like:\n\n"; 103 $new = 0; 104 105 } elsif (/xo_emit\s*\(/) { 106 s/^\s+//; 107 print " $_\n\n"; 108 109 } elsif (/^Should be/i) { 110 print "This code should be replaced with code like:\n\n"; 111 112 } else { 113 print "$_\n"; 114 $need_nl = 1; 115 } 116 } 117 118 exit(0); 119} 120 121sub parse_file { 122 local($file) = @_; 123 local($errors, $warnings, $info) = (0, 0, 0); 124 local $curfile = $file; 125 local $curln = 0; 126 127 if ($opt_cpp) { 128 die "no such file" unless -f $file; 129 open INPUT, "cpp $opt_cflags $file |"; 130 } else { 131 open INPUT, $file || die "cannot open input file '$file'"; 132 } 133 local @input = <INPUT>; 134 close INPUT; 135 136 local $ln, $rln, $line, $replay; 137 138 for ($ln = 0; $ln < $#input; $ln++) { 139 $line = $input[$ln]; 140 $curln += 1; 141 142 if ($line =~ /^\#/) { 143 my($num, $fn) = ($line =~ /\#\s*(\d+)\s+"(.+)"/); 144 ($curfile, $curln) = ($fn, $num) if $num; 145 next; 146 } 147 148 next unless $line =~ /xo_emit\(/; 149 150 @tokens = parse_tokens(); 151 print "token:\n '" . join("'\n '", @tokens) . "'\n" 152 if $opt_debug; 153 check_format($tokens[0]); 154 } 155 156 print $file . ": $errors errors, $warnings warnings, $info info\n" 157 unless $opt_vocabulary; 158} 159 160sub parse_tokens { 161 my $full = "$'"; 162 my @tokens = (); 163 my %pairs = ( "{" => "}", "[" => "]", "(" => ")" ); 164 my %quotes = ( "\"" => "\"", "'" => "'" ); 165 local @data = split(//, $full); 166 local @open = (); 167 local $current = ""; 168 my $quote = ""; 169 local $off = 0; 170 my $ch; 171 172 $replay = $curln . " " . $line; 173 $rln = $ln + 1; 174 175 for (;;) { 176 get_tokens() if $off > $#data; 177 die "out of data" if $off > $#data; 178 $ch = $data[$off++]; 179 180 print "'$ch' ($quote) ($#open) [" . join("", @open) . "]\n" 181 if $opt_debug; 182 183 last if $ch eq ";" && $#open < 0; 184 185 if ($ch eq "," && $quote eq "" && $#open < 0) { 186 print "[$current]\n" if $opt_debug; 187 push @tokens, $current; 188 $current = ""; 189 next; 190 } 191 192 next if $ch =~ /[ \t\n\r]/ && $quote eq "" && $#open < 0; 193 194 $current .= $ch; 195 196 if ($quote) { 197 if ($ch eq $quote) { 198 $quote = ""; 199 } 200 next; 201 } 202 if ($quotes{$ch}) { 203 $quote = $quotes{$ch}; 204 $current = substr($current, 0, -2) if $current =~ /""$/; 205 next; 206 } 207 208 if ($pairs{$ch}) { 209 push @open, $pairs{$ch}; 210 next; 211 } 212 213 if ($#open >= 0 && $ch eq $open[$#open]) { 214 pop @open; 215 next; 216 } 217 } 218 219 push @tokens, substr($current, 0, -1); 220 return @tokens; 221} 222 223sub get_tokens { 224 if ($ln + 1 < $#input) { 225 $line = $input[++$ln]; 226 $curln += 1; 227 $replay .= $curln . " " . $line; 228 @data = split(//, $line); 229 $off = 0; 230 } 231} 232 233sub check_format { 234 my($format) = @_; 235 236 return unless $format =~ /^".*"$/; 237 238 my @data = split(//, $format); 239 my $ch; 240 my $braces = 0; 241 local $count = 0; 242 my $content = ""; 243 my $off; 244 my $phase = 0; 245 my @build = (); 246 local $last, $prev = ""; 247 248 # Nukes quotes 249 pop @data; 250 shift @data; 251 252 for (;;) { 253 last if $off > $#data; 254 $ch = $data[$off++]; 255 256 if ($ch eq "\\") { 257 $ch = $data[$off++]; 258 $off += 1 if $ch eq "\\"; # double backslash: "\\/" 259 next; 260 } 261 262 if ($braces) { 263 if ($ch eq "}") { 264 check_field(@build); 265 $braces = 0; 266 @build = (); 267 $phase = 0; 268 next; 269 } elsif ($phase == 0 && $ch eq ":") { 270 $phase += 1; 271 next; 272 } elsif ($ch eq "/") { 273 $phase += 1; 274 next; 275 } 276 277 } else { 278 if ($ch eq "{") { 279 check_text($build[0]) if length($build[0]); 280 $braces = 1; 281 @build = (); 282 $last = $prev; 283 next; 284 } 285 $prev = $ch; 286 } 287 288 $build[$phase] .= $ch; 289 } 290 291 if ($braces) { 292 error("missing closing brace"); 293 check_field(@build); 294 } else { 295 check_text($build[0]) if length($build[0]); 296 } 297} 298 299sub check_text { 300 my($text) = @_; 301 302 print "checking text: [$text]\n" if $opt_debug; 303 304 #@ A percent sign appearing in text is a literal 305 #@ xo_emit("cost: %d", cost); 306 #@ Should be: 307 #@ xo_emit("{L:cost}: {:cost/%d}", cost); 308 #@ This can be a bit surprising and could be a field that was not 309 #@ properly converted to a libxo-style format string. 310 info("a percent sign appearing in text is a literal") if $text =~ /%/; 311} 312 313sub check_field { 314 my(@field) = @_; 315 print "checking field: [" . join("][", @field) . "]\n" if $opt_debug; 316 317 if ($opt_vocabulary) { 318 $vocabulary{$field[1]} = 1 319 if $field[1] && $field[0] !~ /[DELNPTUW\[\]]/; 320 return; 321 } 322 323 #@ Last character before field definition is a field type 324 #@ A common typo: 325 #@ xo_emit("{T:Min} T{:Max}"); 326 #@ Should be: 327 #@ xo_emit("{T:Min} {T:Max}"); 328 #@ Twiddling the "{" and the field role is a common typo. 329 info("last character before field definition is a field type ($last)") 330 if $last =~ /[DELNPTUVW\[\]]/ && $field[0] !~ /[DELNPTUVW\[\]]/; 331 332 #@ Encoding format uses different number of arguments 333 #@ xo_emit("{:name/%6.6s %%04d/%s}", name, number); 334 #@ Should be: 335 #@ xo_emit("{:name/%6.6s %04d/%s-%d}", name, number); 336 #@ Both format should consume the same number of arguments off the stack 337 my $cf = count_args($field[2]); 338 my $ce = count_args($field[3]); 339 warn("encoding format uses different number of arguments ($cf/$ce)") 340 if $ce >= 0 && $cf >= 0 && $ce != $cf; 341 342 #@ Only one field role can be used 343 #@ xo_emit("{LT:Max}"); 344 #@ Should be: 345 #@ xo_emit("{T:Max}"); 346 my(@roles) = ($field[0] !~ /([DELNPTUVW\[\]]).*([DELNPTUVW\[\]])/); 347 error("only one field role can be used (" . join(", ", @roles) . ")") 348 if $#roles > 0; 349 350 # Field is a color, note, label, or title 351 if ($field[0] =~ /[CDLNT]/) { 352 353 #@ Potential missing slash after C, D, N, L, or T with format 354 #@ xo_emit("{T:%6.6s}\n", "Max"); 355 #@ should be: 356 #@ xo_emit("{T:/%6.6s}\n", "Max"); 357 #@ The "%6.6s" will be a literal, not a field format. While 358 #@ it's possibly valid, it's likely a missing "/". 359 info("potential missing slash after C, D, N, L, or T with format") 360 if $field[1] =~ /%/; 361 362 #@ An encoding format cannot be given (roles: DNLT) 363 #@ xo_emit("{T:Max//%s}", "Max"); 364 #@ Fields with the C, D, N, L, and T roles are not emitted in 365 #@ the 'encoding' style (JSON, XML), so an encoding format 366 #@ would make no sense. 367 error("encoding format cannot be given when content is present") 368 if $field[3]; 369 } 370 371 # Field is a color, decoration, label, or title 372 if ($field[0] =~ /[CDLN]/) { 373 #@ Format cannot be given when content is present (roles: CDLN) 374 #@ xo_emit("{N:Max/%6.6s}", "Max"); 375 #@ Fields with the C, D, L, or N roles can't have both 376 #@ static literal content ("{L:Label}") and a 377 #@ format ("{L:/%s}"). 378 #@ This error will also occur when the content has a backslash 379 #@ in it, like "{N:Type of I/O}"; backslashes should be escaped, 380 #@ like "{N:Type of I\\/O}". Note the double backslash, one for 381 #@ handling 'C' strings, and one for libxo. 382 error("format cannot be given when content is present") 383 if $field[1] && $field[2]; 384 } 385 386 # Field is a color/effect 387 if ($field[0] =~ /C/) { 388 if ($field[1]) { 389 my $val; 390 my @sub = split(/,/, $field[1]); 391 grep { s/^\s*//; s/\s*$//; } @sub; 392 393 for $val (@sub) { 394 if ($val =~ /^(default,black,red,green,yellow,blue,magenta,cyan,white)$/) { 395 396 #@ Field has color without fg- or bg- (role: C) 397 #@ xo_emit("{C:green}{:foo}{C:}", x); 398 #@ Should be: 399 #@ xo_emit("{C:fg-green}{:foo}{C:}", x); 400 #@ Colors must be prefixed by either "fg-" or "bg-". 401 error("Field has color without fg- or bg- (role: C)"); 402 403 } elsif ($val =~ /^(fg|bg)-(default|black|red|green|yellow|blue|magenta|cyan|white)$/) { 404 # color 405 } elsif ($val =~ /^(bold|underline)$/) { 406 } elsif ($val =~ /^(no-)?(bold|underline|inverse)$/) { 407 # effect 408 409 } elsif ($val =~ /^(reset|normal)$/) { 410 # effect also 411 } else { 412 #@ Field has invalid color or effect (role: C) 413 #@ xo_emit("{C:fg-purple,bold}{:foo}{C:gween}", x); 414 #@ Should be: 415 #@ xo_emit("{C:fg-red,bold}{:foo}{C:fg-green}", x); 416 #@ The list of colors and effects are limited. The 417 #@ set of colors includes default, black, red, green, 418 #@ yellow, blue, magenta, cyan, and white, which must 419 #@ be prefixed by either "fg-" or "bg-". Effects are 420 #@ limited to bold, no-bold, underline, no-underline, 421 #@ inverse, no-inverse, normal, and reset. Values must 422 #@ be separated by commas. 423 error("Field has invalid color or effect (role: C) ($val)"); 424 } 425 } 426 } 427 } 428 429 # A value field 430 if (length($field[0]) == 0 || $field[0] =~ /V/) { 431 432 #@ Value field must have a name (as content)") 433 #@ xo_emit("{:/%s}", "value"); 434 #@ Should be: 435 #@ xo_emit("{:tag-name/%s}", "value"); 436 #@ The field name is used for XML and JSON encodings. These 437 #@ tags names are static and must appear directly in the 438 #@ field descriptor. 439 error("value field must have a name (as content)") 440 unless $field[1]; 441 442 #@ Use hyphens, not underscores, for value field name 443 #@ xo_emit("{:no_under_scores}", "bad"); 444 #@ Should be: 445 #@ xo_emit("{:no-under-scores}", "bad"); 446 #@ Use of hyphens is traditional in XML, and the XOF_UNDERSCORES 447 #@ flag can be used to generate underscores in JSON, if desired. 448 #@ But the raw field name should use hyphens. 449 error("use hyphens, not underscores, for value field name") 450 if $field[1] =~ /_/; 451 452 #@ Value field name cannot start with digit 453 #@ xo_emit("{:10-gig/}"); 454 #@ Should be: 455 #@ xo_emit("{:ten-gig/}"); 456 #@ XML element names cannot start with a digit. 457 error("value field name cannot start with digit") 458 if $field[1] =~ /^[0-9]/; 459 460 #@ Value field name should be lower case 461 #@ xo_emit("{:WHY-ARE-YOU-SHOUTING}", "NO REASON"); 462 #@ Should be: 463 #@ xo_emit("{:why-are-you-shouting}", "no reason"); 464 #@ Lower case is more civilized. Even TLAs should be lower case 465 #@ to avoid scenarios where the differences between "XPath" and 466 #@ "Xpath" drive your users crazy. Lower case rules the seas. 467 error("value field name should be lower case") 468 if $field[1] =~ /[A-Z]/; 469 470 #@ Value field name should be longer than two characters 471 #@ xo_emit("{:x}", "mumble"); 472 #@ Should be: 473 #@ xo_emit("{:something-meaningful}", "mumble"); 474 #@ Field names should be descriptive, and it's hard to 475 #@ be descriptive in less than two characters. Consider 476 #@ your users and try to make something more useful. 477 #@ Note that this error often occurs when the field type 478 #@ is placed after the colon ("{:T/%20s}"), instead of before 479 #@ it ("{T:/20s}"). 480 error("value field name should be longer than two characters") 481 if $field[1] =~ /[A-Z]/; 482 483 #@ Value field name contains invalid character 484 #@ xo_emit("{:cost-in-$$/%u}", 15); 485 #@ Should be: 486 #@ xo_emit("{:cost-in-dollars/%u}", 15); 487 #@ An invalid character is often a sign of a typo, like "{:]}" 488 #@ instead of "{]:}". Field names are restricted to lower-case 489 #@ characters, digits, and hyphens. 490 error("value field name contains invalid character (" . $field[1] . ")") 491 unless $field[1] =~ /^[0-9a-z-]*$/; 492 } 493 494 # A decoration field 495 if ($field[0] =~ /D/) { 496 497 #@decoration field contains invalid character 498 #@ xo_emit("{D:not good}"); 499 #@ Should be: 500 #@ xo_emit("{D:((}{:good}{D:))}", "yes"); 501 #@ This is minor, but fields should use proper roles. Decoration 502 #@ fields are meant to hold punctuation and other characters used 503 #@ to decorate the content, typically to make it more readable 504 #@ to human readers. 505 warn("decoration field contains invalid character") 506 unless $field[1] =~ m:^[~!\@\#\$%^&\*\(\);\:\[\]\{\} ]+$:; 507 } 508 509 if ($field[0] =~ /[\[\]]/) { 510 #@ Anchor content should be decimal width 511 #@ xo_emit("{[:mumble}"); 512 #@ Should be: 513 #@ xo_emit("{[:32}"); 514 #@ Anchors need an integer value to specify the width of 515 #@ the set of anchored fields. The value can be positive 516 #@ (for left padding/right justification) or negative (for 517 #@ right padding/left justification) and can appear in 518 #@ either the start or stop anchor field descriptor. 519 error("anchor content should be decimal width") 520 if $field[1] && $field[1] !~ /^-?\d+$/ ; 521 522 #@ Anchor format should be "%d" 523 #@ xo_emit("{[:/%s}"); 524 #@ Should be: 525 #@ xo_emit("{[:/%d}"); 526 #@ Anchors only grok integer values, and if the value is not static, 527 #@ if must be in an 'int' argument, represented by the "%d" format. 528 #@ Anything else is an error. 529 error("anchor format should be \"%d\"") 530 if $field[2] && $field[2] ne "%d"; 531 532 #@ Anchor cannot have both format and encoding format") 533 #@ xo_emit("{[:32/%d}"); 534 #@ Should be: 535 #@ xo_emit("{[:32}"); 536 #@ Anchors can have a static value or argument for the width, 537 #@ but cannot have both. 538 error("anchor cannot have both format and encoding format") 539 if $field[1] && $field[2]; 540 } 541} 542 543sub count_args { 544 my($format) = @_; 545 546 return -1 unless $format; 547 548 my $in; 549 my($text, $ff, $fc, $rest); 550 for ($in = $format; $in; $in = $rest) { 551 ($text, $ff, $fc, $rest) = 552 ($in =~ /^([^%]*)(%[^%diouxXDOUeEfFgGaAcCsSp]*)([diouxXDOUeEfFgGaAcCsSp])(.*)$/); 553 unless ($ff) { 554 # Might be a "%%" 555 ($text, $ff, $rest) = ($in =~ /^([^%]*)(%%)(.*)$/); 556 if ($ff) { 557 check_text($text); 558 } else { 559 # Not sure what's going on here, but something's wrong... 560 error("invalid field format") if $in =~ /%/; 561 } 562 next; 563 } 564 565 check_text($text); 566 check_field_format($ff, $fc); 567 } 568 569 return 0; 570} 571 572sub check_field_format { 573 my($ff, $fc) = @_; 574 575 print "check_field_format: [$ff] [$fc]\n" if $opt_debug; 576 577 my(@chunks) = split(/\./, $ff); 578 579 #@ Max width only valid for strings 580 #@ xo_emit("{:tag/%2.4.6d}", 55); 581 #@ Should be: 582 #@ xo_emit("{:tag/%2.6d}", 55); 583 #@ libxo allows a true 'max width' in addition to the traditional 584 #@ printf-style 'max number of bytes to use for input'. But this 585 #@ is supported only for string values, since it makes no sense 586 #@ for non-strings. This error may occur from a typo, 587 #@ like "{:tag/%6..6d}" where only one period should be used. 588 error("max width only valid for strings") 589 if $#chunks >= 2 && $fc !~ /[sS]/; 590} 591 592sub error { 593 return if $opt_vocabulary; 594 print STDERR $curfile . ": " .$curln . ": error: " . join(" ", @_) . "\n"; 595 print STDERR $replay . "\n" if $opt_print; 596 $errors += 1; 597} 598 599sub warn { 600 return if $opt_vocabulary; 601 print STDERR $curfile . ": " .$curln . ": warning: " . join(" ", @_) . "\n"; 602 print STDERR $replay . "\n" if $opt_print; 603 $warnings += 1; 604} 605 606sub info { 607 return if $opt_vocabulary; 608 print STDERR $curfile . ": " .$curln . ": info: " . join(" ", @_) . "\n"; 609 print STDERR $replay . "\n" if $opt_print; 610 $info += 1; 611} 612 613main: { 614 main(); 615} 616