1#!/usr/bin/env perl 2# 3# Copyright (c) 2014, Juniper Networks, Inc. 4# All rights reserved. 5# This SOFTWARE is licensed under the LICENSE provided in the 6# ../Copyright file. By downloading, installing, copying, or otherwise 7# using the SOFTWARE, you agree to be bound by the terms of that 8# LICENSE. 9# Phil Shafer, August 2014 10# 11# 12# xolint -- a lint for inspecting xo_emit format strings 13# 14# Yes, that's a long way to go for a pun. 15 16%vocabulary = (); 17 18sub main { 19 while ($ARGV[0] =~ /^-/) { 20 $_ = shift @ARGV; 21 $opt_cpp = 1 if /^-c/; 22 $opt_cflags .= shift @ARGV if /^-C/; 23 $opt_debug = 1 if /^-d/; 24 extract_docs() if /^-D/; 25 $opt_info = $opt_vocabulary = 1 if /^-I/; 26 $opt_print = 1 if /^-p/; 27 $opt_vocabulary = 1 if /^-V/; 28 extract_samples() if /^-X/; 29 } 30 31 if ($#ARGV < 0) { 32 print STDERR "xolint [options] files ...\n"; 33 print STDERR " -c invoke 'cpp' on input\n"; 34 print STDERR " -C flags Pass flags to cpp\n"; 35 print STDERR " -d Show debug output\n"; 36 print STDERR " -D Extract xolint documentation\n"; 37 print STDERR " -I Print xo_info_t data\n"; 38 print STDERR " -p Print input data on errors\n"; 39 print STDERR " -V Print vocabulary (list of tags)\n"; 40 print STDERR " -X Print examples of invalid use\n"; 41 exit(1); 42 } 43 44 for $file (@ARGV) { 45 parse_file($file); 46 } 47 48 if ($opt_info) { 49 print "static xo_info_t xo_info_table[] = {\n"; 50 for $name (sort(keys(%vocabulary))) { 51 print " { \"", $name, "\", \"type\", \"desc\" },\n"; 52 } 53 print "};\n"; 54 print "static int xo_info_count = " 55 . "(sizeof(xo_info_table) / sizeof(xo_info_table[0]));\n\n"; 56 print "#define XO_SET_INFO() \\\n"; 57 print " xo_set_info(NULL, xo_info_table, xo_info_count)\n"; 58 } elsif ($opt_vocabulary) { 59 for $name (sort(keys(%vocabulary))) { 60 print $name, "\n"; 61 } 62 } 63} 64 65sub extract_samples { 66 my $x = "\#" . "\@"; 67 my $cmd = "grep -B1 -i '$x Should be' $0 | grep xo_emit | sed 's/.*\#*\@//'"; 68 system($cmd); 69 exit(0); 70} 71 72sub extract_docs { 73 my $x = "\#" . "\@"; 74 my $cmd = "grep -B1 '$x' $0"; 75 open INPUT, "$cmd |"; 76 local @input = <INPUT>; 77 close INPUT; 78 my $ln, $new = 0, $first = 1, $need_nl; 79 80 for ($ln = 0; $ln <= $#input; $ln++) { 81 chomp($_ = $input[$ln]); 82 if (/^--/) { 83 $ln += 1; 84 $new = 1; 85 next; 86 } 87 if ($first) { 88 $new = 1; 89 $first = 0; 90 next; 91 } 92 93 s/\s*\#\@\s*//; 94 95 if ($new) { 96 if ($need_nl) { 97 print "\n\n"; 98 $need_nl = 0; 99 } 100 101 $under = "+" x (length($_) + 2); 102 103 print "'$_'\n$under\n\n"; 104 print "The message \"$_\" can be caused by code like:\n"; 105 $new = 0; 106 107 } elsif (/xo_emit\s*\(/) { 108 s/^\s+//; 109 print "\n::\n\n $_\n\n"; 110 111 } elsif (/^Should be/i) { 112 print "This code should be replaced with code like:\n"; 113 114 } else { 115 print "$_\n"; 116 $need_nl = 1; 117 } 118 } 119 120 exit(0); 121} 122 123sub parse_file { 124 local($file) = @_; 125 local($errors, $warnings, $info) = (0, 0, 0); 126 local $curfile = $file; 127 local $curln = 0; 128 129 if ($opt_cpp) { 130 die "no such file" unless -f $file; 131 open INPUT, "cpp $opt_cflags $file |"; 132 } else { 133 open INPUT, $file || die "cannot open input file '$file'"; 134 } 135 local @input = <INPUT>; 136 close INPUT; 137 138 local $ln, $rln, $line, $replay; 139 140 for ($ln = 0; $ln < $#input; $ln++) { 141 $line = $input[$ln]; 142 $curln += 1; 143 144 if ($line =~ /^\#/) { 145 my($num, $fn) = ($line =~ /\#\s*(\d+)\s+"(.+)"/); 146 ($curfile, $curln) = ($fn, $num) if $num; 147 next; 148 } 149 150 next unless $line =~ /xo_emit\(/; 151 152 @tokens = parse_tokens(); 153 print "token:\n '" . join("'\n '", @tokens) . "'\n" 154 if $opt_debug; 155 check_format($tokens[0]); 156 } 157 158 print $file . ": $errors errors, $warnings warnings, $info info\n" 159 unless $opt_vocabulary; 160} 161 162sub parse_tokens { 163 my $full = "$'"; 164 my @tokens = (); 165 my %pairs = ( "{" => "}", "[" => "]", "(" => ")" ); 166 my %quotes = ( "\"" => "\"", "'" => "'" ); 167 local @data = split(//, $full); 168 local @open = (); 169 local $current = ""; 170 my $quote = ""; 171 local $off = 0; 172 my $ch; 173 174 $replay = $curln . " " . $line; 175 $rln = $ln + 1; 176 177 for (;;) { 178 get_tokens() if $off > $#data; 179 die "out of data" if $off > $#data; 180 $ch = $data[$off++]; 181 182 print "'$ch' ($quote) ($#open) [" . join("", @open) . "]\n" 183 if $opt_debug; 184 185 last if $ch eq ";" && $#open < 0; 186 187 if ($ch eq "," && $quote eq "" && $#open < 0) { 188 print "[$current]\n" if $opt_debug; 189 push @tokens, $current; 190 $current = ""; 191 next; 192 } 193 194 next if $ch =~ /[ \t\n\r]/ && $quote eq "" && $#open < 0; 195 196 $current .= $ch; 197 198 if ($quote) { 199 if ($ch eq $quote) { 200 $quote = ""; 201 } 202 next; 203 } 204 if ($quotes{$ch}) { 205 $quote = $quotes{$ch}; 206 $current = substr($current, 0, -2) if $current =~ /""$/; 207 next; 208 } 209 210 if ($pairs{$ch}) { 211 push @open, $pairs{$ch}; 212 next; 213 } 214 215 if ($#open >= 0 && $ch eq $open[$#open]) { 216 pop @open; 217 next; 218 } 219 } 220 221 push @tokens, substr($current, 0, -1); 222 return @tokens; 223} 224 225sub get_tokens { 226 if ($ln + 1 < $#input) { 227 $line = $input[++$ln]; 228 $curln += 1; 229 $replay .= $curln . " " . $line; 230 @data = split(//, $line); 231 $off = 0; 232 } 233} 234 235sub check_format { 236 my($format) = @_; 237 238 return unless $format =~ /^".*"$/; 239 240 my @data = split(//, $format); 241 my $ch; 242 my $braces = 0; 243 local $count = 0; 244 my $content = ""; 245 my $off; 246 my $phase = 0; 247 my @build = (); 248 local $last, $prev = ""; 249 250 # Nukes quotes 251 pop @data; 252 shift @data; 253 254 for (;;) { 255 last if $off > $#data; 256 $ch = $data[$off++]; 257 258 if ($ch eq "\\") { 259 $ch = $data[$off++]; 260 $off += 1 if $ch eq "\\"; # double backslash: "\\/" 261 next; 262 } 263 264 if ($braces) { 265 if ($ch eq "}") { 266 check_field(@build); 267 $braces = 0; 268 @build = (); 269 $phase = 0; 270 next; 271 } elsif ($phase == 0 && $ch eq ":") { 272 $phase += 1; 273 next; 274 } elsif ($ch eq "/") { 275 $phase += 1; 276 next; 277 } 278 279 } else { 280 if ($ch eq "{") { 281 check_text($build[0]) if length($build[0]); 282 $braces = 1; 283 @build = (); 284 $last = $prev; 285 next; 286 } 287 $prev = $ch; 288 } 289 290 $build[$phase] .= $ch; 291 } 292 293 if ($braces) { 294 error("missing closing brace"); 295 check_field(@build); 296 } else { 297 check_text($build[0]) if length($build[0]); 298 } 299} 300 301sub check_text { 302 my($text) = @_; 303 304 print "checking text: [$text]\n" if $opt_debug; 305 306 #@ A percent sign appearing in text is a literal 307 #@ xo_emit("cost: %d", cost); 308 #@ Should be: 309 #@ xo_emit("{L:cost}: {:cost/%d}", cost); 310 #@ This can be a bit surprising and could be a field that was not 311 #@ properly converted to a libxo-style format string. 312 info("a percent sign appearing in text is a literal") if $text =~ /%/; 313} 314 315%short = ( 316 # Roles 317 "color" => "C", 318 "decoration" => "D", 319 "error" => "E", 320 "label" => "L", 321 "note" => "N", 322 "padding" => "P", 323 "title" => "T", 324 "units" => "U", 325 "value" => "V", 326 "warning" => "W", 327 "start-anchor" => "[", 328 "stop-anchor" => "]", 329 # Modifiers 330 "colon" => "c", 331 "display" => "d", 332 "encoding" => "e", 333 "hn" => "h", 334 "hn-decimal" => "@", 335 "hn-space" => "@", 336 "hn-1000" => "@", 337 "humanize" => "h", 338 "key" => "k", 339 "leaf-list" => "l", 340 "no-quotes" => "n", 341 "quotes" => "q", 342 "trim" => "t", 343 "white" => "w", 344 ); 345 346sub check_field { 347 my(@field) = @_; 348 print "checking field: [" . join("][", @field) . "]\n" if $opt_debug; 349 350 if ($field[0] =~ /,/) { 351 # We have long names; deal with it by turning them into short names 352 my @parts = split(/,/, $field[0]); 353 my $new = ""; 354 for (my $i = 1; $i <= $#parts; $i++) { 355 my $v = $parts[$i]; 356 $v =~ s/^\s+//; 357 $v =~ s/\s+$//; 358 if ($short{$v} eq "@") { 359 # ignore; has no short version 360 } elsif ($short{$v}) { 361 $new .= $short{$v}; 362 } else { 363 #@ Unknown long name for role/modifier 364 #@ xo_emit("{,humanization:value}", value); 365 #@ Should be: 366 #@ xo_emit("{,humanize:value}", value); 367 #@ The hn-* modifiers (hn-decimal, hn-space, hn-1000) 368 #@ are only valid for fields with the {h:} modifier. 369 error("Unknown long name for role/modifier ($v)"); 370 } 371 } 372 373 $field[4] = substr($field[0], index($field[0], ",")); 374 $field[0] = $parts[0] . $new; 375 } 376 377 if ($opt_vocabulary) { 378 $vocabulary{$field[1]} = 1 379 if $field[1] && $field[0] !~ /[DELNPTUW\[\]]/; 380 return; 381 } 382 383 #@ Last character before field definition is a field type 384 #@ A common typo: 385 #@ xo_emit("{T:Min} T{:Max}"); 386 #@ Should be: 387 #@ xo_emit("{T:Min} {T:Max}"); 388 #@ Twiddling the "{" and the field role is a common typo. 389 info("last character before field definition is a field type ($last)") 390 if $last =~ /[DELNPTUVW\[\]]/ && $field[0] !~ /[DELNPTUVW\[\]]/; 391 392 #@ Encoding format uses different number of arguments 393 #@ xo_emit("{:name/%6.6s %%04d/%s}", name, number); 394 #@ Should be: 395 #@ xo_emit("{:name/%6.6s %04d/%s-%d}", name, number); 396 #@ Both format should consume the same number of arguments off the stack 397 my $cf = count_args($field[2]); 398 my $ce = count_args($field[3]); 399 warn("encoding format uses different number of arguments ($cf/$ce)") 400 if $ce >= 0 && $cf >= 0 && $ce != $cf; 401 402 #@ Only one field role can be used 403 #@ xo_emit("{LT:Max}"); 404 #@ Should be: 405 #@ xo_emit("{T:Max}"); 406 my(@roles) = ($field[0] !~ /([DELNPTUVW\[\]]).*([DELNPTUVW\[\]])/); 407 error("only one field role can be used (" . join(", ", @roles) . ")") 408 if $#roles > 0; 409 410 # Field is a color, note, label, or title 411 if ($field[0] =~ /[CDLNT]/) { 412 413 #@ Potential missing slash after C, D, N, L, or T with format 414 #@ xo_emit("{T:%6.6s}\n", "Max"); 415 #@ should be: 416 #@ xo_emit("{T:/%6.6s}\n", "Max"); 417 #@ The "%6.6s" will be a literal, not a field format. While 418 #@ it's possibly valid, it's likely a missing "/". 419 info("potential missing slash after C, D, N, L, or T with format") 420 if $field[1] =~ /%/; 421 422 #@ An encoding format cannot be given (roles: DNLT) 423 #@ xo_emit("{T:Max//%s}", "Max"); 424 #@ Fields with the C, D, N, L, and T roles are not emitted in 425 #@ the 'encoding' style (JSON, XML), so an encoding format 426 #@ would make no sense. 427 error("encoding format cannot be given when content is present") 428 if $field[3]; 429 } 430 431 # Field is a color, decoration, label, or title 432 if ($field[0] =~ /[CDLN]/) { 433 #@ Format cannot be given when content is present (roles: CDLN) 434 #@ xo_emit("{N:Max/%6.6s}", "Max"); 435 #@ Fields with the C, D, L, or N roles can't have both 436 #@ static literal content ("{L:Label}") and a 437 #@ format ("{L:/%s}"). 438 #@ This error will also occur when the content has a backslash 439 #@ in it, like "{N:Type of I/O}"; backslashes should be escaped, 440 #@ like "{N:Type of I\\/O}". Note the double backslash, one for 441 #@ handling 'C' strings, and one for libxo. 442 error("format cannot be given when content is present") 443 if $field[1] && $field[2]; 444 } 445 446 # Field is a color/effect 447 if ($field[0] =~ /C/) { 448 if ($field[1]) { 449 my $val; 450 my @sub = split(/,/, $field[1]); 451 grep { s/^\s*//; s/\s*$//; } @sub; 452 453 for $val (@sub) { 454 if ($val =~ /^(default,black,red,green,yellow,blue,magenta,cyan,white)$/) { 455 456 #@ Field has color without fg- or bg- (role: C) 457 #@ xo_emit("{C:green}{:foo}{C:}", x); 458 #@ Should be: 459 #@ xo_emit("{C:fg-green}{:foo}{C:}", x); 460 #@ Colors must be prefixed by either "fg-" or "bg-". 461 error("Field has color without fg- or bg- (role: C)"); 462 463 } elsif ($val =~ /^(fg|bg)-(default|black|red|green|yellow|blue|magenta|cyan|white)$/) { 464 # color 465 } elsif ($val =~ /^(bold|underline)$/) { 466 } elsif ($val =~ /^(no-)?(bold|underline|inverse)$/) { 467 # effect 468 469 } elsif ($val =~ /^(reset|normal)$/) { 470 # effect also 471 } else { 472 #@ Field has invalid color or effect (role: C) 473 #@ xo_emit("{C:fg-purple,bold}{:foo}{C:gween}", x); 474 #@ Should be: 475 #@ xo_emit("{C:fg-red,bold}{:foo}{C:fg-green}", x); 476 #@ The list of colors and effects are limited. The 477 #@ set of colors includes default, black, red, green, 478 #@ yellow, blue, magenta, cyan, and white, which must 479 #@ be prefixed by either "fg-" or "bg-". Effects are 480 #@ limited to bold, no-bold, underline, no-underline, 481 #@ inverse, no-inverse, normal, and reset. Values must 482 #@ be separated by commas. 483 error("Field has invalid color or effect (role: C) ($val)"); 484 } 485 } 486 } 487 } 488 489 # Humanized field 490 if ($field[0] =~ /h/) { 491 if (length($field[2]) == 0) { 492 #@ Field has humanize modifier but no format string 493 #@ xo_emit("{h:value}", value); 494 #@ Should be: 495 #@ xo_emit("{h:value/%d}", value); 496 #@ Humanization is only value for numbers, which are not 497 #@ likely to use the default format ("%s"). 498 error("Field has humanize modifier but no format string"); 499 } 500 } 501 502 # hn-* on non-humanize field 503 if ($field[0] !~ /h/) { 504 if ($field[4] =~ /,hn-/) { 505 #@ Field has hn-* modifier but not 'h' modifier 506 #@ xo_emit("{,hn-1000:value}", value); 507 #@ Should be: 508 #@ xo_emit("{h,hn-1000:value}", value); 509 #@ The hn-* modifiers (hn-decimal, hn-space, hn-1000) 510 #@ are only valid for fields with the {h:} modifier. 511 error("Field has hn-* modifier but not 'h' modifier"); 512 } 513 } 514 515 # A value field 516 if (length($field[0]) == 0 || $field[0] =~ /V/) { 517 518 #@ Value field must have a name (as content)") 519 #@ xo_emit("{:/%s}", "value"); 520 #@ Should be: 521 #@ xo_emit("{:tag-name/%s}", "value"); 522 #@ The field name is used for XML and JSON encodings. These 523 #@ tags names are static and must appear directly in the 524 #@ field descriptor. 525 error("value field must have a name (as content)") 526 unless $field[1]; 527 528 #@ Use hyphens, not underscores, for value field name 529 #@ xo_emit("{:no_under_scores}", "bad"); 530 #@ Should be: 531 #@ xo_emit("{:no-under-scores}", "bad"); 532 #@ Use of hyphens is traditional in XML, and the XOF_UNDERSCORES 533 #@ flag can be used to generate underscores in JSON, if desired. 534 #@ But the raw field name should use hyphens. 535 error("use hyphens, not underscores, for value field name") 536 if $field[1] =~ /_/; 537 538 #@ Value field name cannot start with digit 539 #@ xo_emit("{:10-gig/}"); 540 #@ Should be: 541 #@ xo_emit("{:ten-gig/}"); 542 #@ XML element names cannot start with a digit. 543 error("value field name cannot start with digit") 544 if $field[1] =~ /^[0-9]/; 545 546 #@ Value field name should be lower case 547 #@ xo_emit("{:WHY-ARE-YOU-SHOUTING}", "NO REASON"); 548 #@ Should be: 549 #@ xo_emit("{:why-are-you-shouting}", "no reason"); 550 #@ Lower case is more civilized. Even TLAs should be lower case 551 #@ to avoid scenarios where the differences between "XPath" and 552 #@ "Xpath" drive your users crazy. Lower case rules the seas. 553 error("value field name should be lower case") 554 if $field[1] =~ /[A-Z]/; 555 556 #@ Value field name should be longer than two characters 557 #@ xo_emit("{:x}", "mumble"); 558 #@ Should be: 559 #@ xo_emit("{:something-meaningful}", "mumble"); 560 #@ Field names should be descriptive, and it's hard to 561 #@ be descriptive in less than two characters. Consider 562 #@ your users and try to make something more useful. 563 #@ Note that this error often occurs when the field type 564 #@ is placed after the colon ("{:T/%20s}"), instead of before 565 #@ it ("{T:/20s}"). 566 error("value field name should be longer than two characters") 567 if $field[1] =~ /[A-Z]/; 568 569 #@ Value field name contains invalid character 570 #@ xo_emit("{:cost-in-$$/%u}", 15); 571 #@ Should be: 572 #@ xo_emit("{:cost-in-dollars/%u}", 15); 573 #@ An invalid character is often a sign of a typo, like "{:]}" 574 #@ instead of "{]:}". Field names are restricted to lower-case 575 #@ characters, digits, and hyphens. 576 error("value field name contains invalid character (" . $field[1] . ")") 577 unless $field[1] =~ /^[0-9a-z-]*$/; 578 } 579 580 # A decoration field 581 if ($field[0] =~ /D/) { 582 583 #@decoration field contains invalid character 584 #@ xo_emit("{D:not good}"); 585 #@ Should be: 586 #@ xo_emit("{D:((}{:good}{D:))}", "yes"); 587 #@ This is minor, but fields should use proper roles. Decoration 588 #@ fields are meant to hold punctuation and other characters used 589 #@ to decorate the content, typically to make it more readable 590 #@ to human readers. 591 warn("decoration field contains invalid character") 592 unless $field[1] =~ m:^[~!\@\#\$%^&\*\(\);\:\[\]\{\} ]+$:; 593 } 594 595 if ($field[0] =~ /[\[\]]/) { 596 #@ Anchor content should be decimal width 597 #@ xo_emit("{[:mumble}"); 598 #@ Should be: 599 #@ xo_emit("{[:32}"); 600 #@ Anchors need an integer value to specify the width of 601 #@ the set of anchored fields. The value can be positive 602 #@ (for left padding/right justification) or negative (for 603 #@ right padding/left justification) and can appear in 604 #@ either the start or stop anchor field descriptor. 605 error("anchor content should be decimal width") 606 if $field[1] && $field[1] !~ /^-?\d+$/ ; 607 608 #@ Anchor format should be "%d" 609 #@ xo_emit("{[:/%s}"); 610 #@ Should be: 611 #@ xo_emit("{[:/%d}"); 612 #@ Anchors only grok integer values, and if the value is not static, 613 #@ if must be in an 'int' argument, represented by the "%d" format. 614 #@ Anything else is an error. 615 error("anchor format should be \"%d\"") 616 if $field[2] && $field[2] ne "%d"; 617 618 #@ Anchor cannot have both format and encoding format") 619 #@ xo_emit("{[:32/%d}"); 620 #@ Should be: 621 #@ xo_emit("{[:32}"); 622 #@ Anchors can have a static value or argument for the width, 623 #@ but cannot have both. 624 error("anchor cannot have both format and encoding format") 625 if $field[1] && $field[2]; 626 } 627} 628 629sub count_args { 630 my($format) = @_; 631 632 return -1 unless $format; 633 634 my $in; 635 my($text, $ff, $fc, $rest); 636 for ($in = $format; $in; $in = $rest) { 637 ($text, $ff, $fc, $rest) = 638 ($in =~ /^([^%]*)(%[^%diouxXDOUeEfFgGaAcCsSp]*)([diouxXDOUeEfFgGaAcCsSp])(.*)$/); 639 unless ($ff) { 640 # Might be a "%%" 641 ($text, $ff, $rest) = ($in =~ /^([^%]*)(%%)(.*)$/); 642 if ($ff) { 643 check_text($text); 644 } else { 645 # Not sure what's going on here, but something's wrong... 646 error("invalid field format") if $in =~ /%/; 647 } 648 next; 649 } 650 651 check_text($text); 652 check_field_format($ff, $fc); 653 } 654 655 return 0; 656} 657 658sub check_field_format { 659 my($ff, $fc) = @_; 660 661 print "check_field_format: [$ff] [$fc]\n" if $opt_debug; 662 663 my(@chunks) = split(/\./, $ff); 664 665 #@ Max width only valid for strings 666 #@ xo_emit("{:tag/%2.4.6d}", 55); 667 #@ Should be: 668 #@ xo_emit("{:tag/%2.6d}", 55); 669 #@ libxo allows a true 'max width' in addition to the traditional 670 #@ printf-style 'max number of bytes to use for input'. But this 671 #@ is supported only for string values, since it makes no sense 672 #@ for non-strings. This error may occur from a typo, 673 #@ like "{:tag/%6..6d}" where only one period should be used. 674 error("max width only valid for strings") 675 if $#chunks >= 2 && $fc !~ /[sS]/; 676} 677 678sub error { 679 return if $opt_vocabulary; 680 print STDERR $curfile . ": " .$curln . ": error: " . join(" ", @_) . "\n"; 681 print STDERR $replay . "\n" if $opt_print; 682 $errors += 1; 683} 684 685sub warn { 686 return if $opt_vocabulary; 687 print STDERR $curfile . ": " .$curln . ": warning: " . join(" ", @_) . "\n"; 688 print STDERR $replay . "\n" if $opt_print; 689 $warnings += 1; 690} 691 692sub info { 693 return if $opt_vocabulary; 694 print STDERR $curfile . ": " .$curln . ": info: " . join(" ", @_) . "\n"; 695 print STDERR $replay . "\n" if $opt_print; 696 $info += 1; 697} 698 699main: { 700 main(); 701} 702