1#!/usr/bin/env perl 2# 3# Copyright (c) 2014, Juniper Networks, Inc. 4# All rights reserved. 5# This SOFTWARE is licensed under the LICENSE provided in the 6# ../Copyright file. By downloading, installing, copying, or otherwise 7# using the SOFTWARE, you agree to be bound by the terms of that 8# LICENSE. 9# Phil Shafer, August 2014 10# 11# 12# xolint -- a lint for inspecting xo_emit format strings 13# 14# Yes, that's a long way to go for a pun. 15 16%vocabulary = (); 17 18sub main { 19 while ($ARGV[0] =~ /^-/) { 20 $_ = shift @ARGV; 21 $opt_cpp = 1 if /^-c/; 22 $opt_cflags .= shift @ARGV if /^-C/; 23 $opt_debug = 1 if /^-d/; 24 extract_docs() if /^-D/; 25 $opt_info = $opt_vocabulary = 1 if /^-I/; 26 $opt_print = 1 if /^-p/; 27 $opt_vocabulary = 1 if /^-V/; 28 extract_samples() if /^-X/; 29 } 30 31 if ($#ARGV < 0) { 32 print STDERR "xolint [options] files ...\n"; 33 print STDERR " -c invoke 'cpp' on input\n"; 34 print STDERR " -C flags Pass flags to cpp\n"; 35 print STDERR " -d Show debug output\n"; 36 print STDERR " -D Extract xolint documentation\n"; 37 print STDERR " -I Print xo_info_t data\n"; 38 print STDERR " -p Print input data on errors\n"; 39 print STDERR " -V Print vocabulary (list of tags)\n"; 40 print STDERR " -X Print examples of invalid use\n"; 41 exit(1); 42 } 43 44 for $file (@ARGV) { 45 parse_file($file); 46 } 47 48 if ($opt_info) { 49 print "static xo_info_t xo_info_table[] = {\n"; 50 for $name (sort(keys(%vocabulary))) { 51 print " { \"", $name, "\", \"type\", \"desc\" },\n"; 52 } 53 print "};\n"; 54 print "static int xo_info_count = " 55 . "(sizeof(xo_info_table) / sizeof(xo_info_table[0]));\n\n"; 56 print "#define XO_SET_INFO() \\\n"; 57 print " xo_set_info(NULL, xo_info_table, xo_info_count)\n"; 58 } elsif ($opt_vocabulary) { 59 for $name (sort(keys(%vocabulary))) { 60 print $name, "\n"; 61 } 62 } 63} 64 65sub extract_samples { 66 my $x = "\#" . "\@"; 67 my $cmd = "grep -B1 -i '$x Should be' $0 | grep xo_emit | sed 's/.*\#*\@//'"; 68 system($cmd); 69 exit(0); 70} 71 72sub extract_docs { 73 my $x = "\#" . "\@"; 74 my $cmd = "grep -B1 '$x' $0"; 75 open INPUT, "$cmd |"; 76 local @input = <INPUT>; 77 close INPUT; 78 my $ln, $new = 0, $first = 1, $need_nl; 79 80 for ($ln = 0; $ln <= $#input; $ln++) { 81 chomp($_ = $input[$ln]); 82 if (/^--/) { 83 $ln += 1; 84 $new = 1; 85 next; 86 } 87 if ($first) { 88 $new = 1; 89 $first = 0; 90 next; 91 } 92 93 s/\s*\#\@\s*//; 94 95 if ($new) { 96 if ($need_nl) { 97 print "\n\n"; 98 $need_nl = 0; 99 } 100 101 print "*** '$_'\n\n"; 102 print "The message \"$_\" can be caused by code like:\n\n"; 103 $new = 0; 104 105 } elsif (/xo_emit\s*\(/) { 106 s/^\s+//; 107 print " $_\n\n"; 108 109 } elsif (/^Should be/i) { 110 print "This code should be replaced with code like:\n\n"; 111 112 } else { 113 print "$_\n"; 114 $need_nl = 1; 115 } 116 } 117 118 exit(0); 119} 120 121sub parse_file { 122 local($file) = @_; 123 local($errors, $warnings, $info) = (0, 0, 0); 124 local $curfile = $file; 125 local $curln = 0; 126 127 if ($opt_cpp) { 128 die "no such file" unless -f $file; 129 open INPUT, "cpp $opt_cflags $file |"; 130 } else { 131 open INPUT, $file || die "cannot open input file '$file'"; 132 } 133 local @input = <INPUT>; 134 close INPUT; 135 136 local $ln, $rln, $line, $replay; 137 138 for ($ln = 0; $ln < $#input; $ln++) { 139 $line = $input[$ln]; 140 $curln += 1; 141 142 if ($line =~ /^\#/) { 143 my($num, $fn) = ($line =~ /\#\s*(\d+)\s+"(.+)"/); 144 ($curfile, $curln) = ($fn, $num) if $num; 145 next; 146 } 147 148 next unless $line =~ /xo_emit\(/; 149 150 @tokens = parse_tokens(); 151 print "token:\n '" . join("'\n '", @tokens) . "'\n" 152 if $opt_debug; 153 check_format($tokens[0]); 154 } 155 156 print $file . ": $errors errors, $warnings warnings, $info info\n" 157 unless $opt_vocabulary; 158} 159 160sub parse_tokens { 161 my $full = "$'"; 162 my @tokens = (); 163 my %pairs = ( "{" => "}", "[" => "]", "(" => ")" ); 164 my %quotes = ( "\"" => "\"", "'" => "'" ); 165 local @data = split(//, $full); 166 local @open = (); 167 local $current = ""; 168 my $quote = ""; 169 local $off = 0; 170 my $ch; 171 172 $replay = $curln . " " . $line; 173 $rln = $ln + 1; 174 175 for (;;) { 176 get_tokens() if $off > $#data; 177 die "out of data" if $off > $#data; 178 $ch = $data[$off++]; 179 180 print "'$ch' ($quote) ($#open) [" . join("", @open) . "]\n" 181 if $opt_debug; 182 183 last if $ch eq ";" && $#open < 0; 184 185 if ($ch eq "," && $quote eq "" && $#open < 0) { 186 print "[$current]\n" if $opt_debug; 187 push @tokens, $current; 188 $current = ""; 189 next; 190 } 191 192 next if $ch =~ /[ \t\n\r]/ && $quote eq "" && $#open < 0; 193 194 $current .= $ch; 195 196 if ($quote) { 197 if ($ch eq $quote) { 198 $quote = ""; 199 } 200 next; 201 } 202 if ($quotes{$ch}) { 203 $quote = $quotes{$ch}; 204 $current = substr($current, 0, -2) if $current =~ /""$/; 205 next; 206 } 207 208 if ($pairs{$ch}) { 209 push @open, $pairs{$ch}; 210 next; 211 } 212 213 if ($#open >= 0 && $ch eq $open[$#open]) { 214 pop @open; 215 next; 216 } 217 } 218 219 push @tokens, substr($current, 0, -1); 220 return @tokens; 221} 222 223sub get_tokens { 224 if ($ln + 1 < $#input) { 225 $line = $input[++$ln]; 226 $curln += 1; 227 $replay .= $curln . " " . $line; 228 @data = split(//, $line); 229 $off = 0; 230 } 231} 232 233sub check_format { 234 my($format) = @_; 235 236 return unless $format =~ /^".*"$/; 237 238 my @data = split(//, $format); 239 my $ch; 240 my $braces = 0; 241 local $count = 0; 242 my $content = ""; 243 my $off; 244 my $phase = 0; 245 my @build = (); 246 local $last, $prev = ""; 247 248 # Nukes quotes 249 pop @data; 250 shift @data; 251 252 for (;;) { 253 last if $off > $#data; 254 $ch = $data[$off++]; 255 256 if ($ch eq "\\") { 257 $ch = $data[$off++]; 258 $off += 1 if $ch eq "\\"; # double backslash: "\\/" 259 next; 260 } 261 262 if ($braces) { 263 if ($ch eq "}") { 264 check_field(@build); 265 $braces = 0; 266 @build = (); 267 $phase = 0; 268 next; 269 } elsif ($phase == 0 && $ch eq ":") { 270 $phase += 1; 271 next; 272 } elsif ($ch eq "/") { 273 $phase += 1; 274 next; 275 } 276 277 } else { 278 if ($ch eq "{") { 279 check_text($build[0]) if length($build[0]); 280 $braces = 1; 281 @build = (); 282 $last = $prev; 283 next; 284 } 285 $prev = $ch; 286 } 287 288 $build[$phase] .= $ch; 289 } 290 291 if ($braces) { 292 error("missing closing brace"); 293 check_field(@build); 294 } else { 295 check_text($build[0]) if length($build[0]); 296 } 297} 298 299sub check_text { 300 my($text) = @_; 301 302 print "checking text: [$text]\n" if $opt_debug; 303 304 #@ A percent sign appearing in text is a literal 305 #@ xo_emit("cost: %d", cost); 306 #@ Should be: 307 #@ xo_emit("{L:cost}: {:cost/%d}", cost); 308 #@ This can be a bit surprising and could be a field that was not 309 #@ properly converted to a libxo-style format string. 310 info("a percent sign appearing in text is a literal") if $text =~ /%/; 311} 312 313%short = ( 314 # Roles 315 "color" => "C", 316 "decoration" => "D", 317 "error" => "E", 318 "label" => "L", 319 "note" => "N", 320 "padding" => "P", 321 "title" => "T", 322 "units" => "U", 323 "value" => "V", 324 "warning" => "W", 325 "start-anchor" => "[", 326 "stop-anchor" => "]", 327 # Modifiers 328 "colon" => "c", 329 "display" => "d", 330 "encoding" => "e", 331 "hn" => "h", 332 "hn-decimal" => "@", 333 "hn-space" => "@", 334 "hn-1000" => "@", 335 "humanize" => "h", 336 "key" => "k", 337 "leaf-list" => "l", 338 "no-quotes" => "n", 339 "quotes" => "q", 340 "trim" => "t", 341 "white" => "w", 342 ); 343 344sub check_field { 345 my(@field) = @_; 346 print "checking field: [" . join("][", @field) . "]\n" if $opt_debug; 347 348 if ($field[0] =~ /,/) { 349 # We have long names; deal with it by turning them into short names 350 my @parts = split(/,/, $field[0]); 351 my $new = ""; 352 for (my $i = 1; $i <= $#parts; $i++) { 353 my $v = $parts[$i]; 354 $v =~ s/^\s+//; 355 $v =~ s/\s+$//; 356 if ($short{$v} eq "@") { 357 # ignore; has no short version 358 } elsif ($short{$v}) { 359 $new .= $short{$v}; 360 } else { 361 #@ Unknown long name for role/modifier 362 #@ xo_emit("{,humanization:value}", value); 363 #@ Should be: 364 #@ xo_emit("{,humanize:value}", value); 365 #@ The hn-* modifiers (hn-decimal, hn-space, hn-1000) 366 #@ are only valid for fields with the {h:} modifier. 367 error("Unknown long name for role/modifier ($v)"); 368 } 369 } 370 371 $field[4] = substr($field[0], index($field[0], ",")); 372 $field[0] = $parts[0] . $new; 373 } 374 375 if ($opt_vocabulary) { 376 $vocabulary{$field[1]} = 1 377 if $field[1] && $field[0] !~ /[DELNPTUW\[\]]/; 378 return; 379 } 380 381 #@ Last character before field definition is a field type 382 #@ A common typo: 383 #@ xo_emit("{T:Min} T{:Max}"); 384 #@ Should be: 385 #@ xo_emit("{T:Min} {T:Max}"); 386 #@ Twiddling the "{" and the field role is a common typo. 387 info("last character before field definition is a field type ($last)") 388 if $last =~ /[DELNPTUVW\[\]]/ && $field[0] !~ /[DELNPTUVW\[\]]/; 389 390 #@ Encoding format uses different number of arguments 391 #@ xo_emit("{:name/%6.6s %%04d/%s}", name, number); 392 #@ Should be: 393 #@ xo_emit("{:name/%6.6s %04d/%s-%d}", name, number); 394 #@ Both format should consume the same number of arguments off the stack 395 my $cf = count_args($field[2]); 396 my $ce = count_args($field[3]); 397 warn("encoding format uses different number of arguments ($cf/$ce)") 398 if $ce >= 0 && $cf >= 0 && $ce != $cf; 399 400 #@ Only one field role can be used 401 #@ xo_emit("{LT:Max}"); 402 #@ Should be: 403 #@ xo_emit("{T:Max}"); 404 my(@roles) = ($field[0] !~ /([DELNPTUVW\[\]]).*([DELNPTUVW\[\]])/); 405 error("only one field role can be used (" . join(", ", @roles) . ")") 406 if $#roles > 0; 407 408 # Field is a color, note, label, or title 409 if ($field[0] =~ /[CDLNT]/) { 410 411 #@ Potential missing slash after C, D, N, L, or T with format 412 #@ xo_emit("{T:%6.6s}\n", "Max"); 413 #@ should be: 414 #@ xo_emit("{T:/%6.6s}\n", "Max"); 415 #@ The "%6.6s" will be a literal, not a field format. While 416 #@ it's possibly valid, it's likely a missing "/". 417 info("potential missing slash after C, D, N, L, or T with format") 418 if $field[1] =~ /%/; 419 420 #@ An encoding format cannot be given (roles: DNLT) 421 #@ xo_emit("{T:Max//%s}", "Max"); 422 #@ Fields with the C, D, N, L, and T roles are not emitted in 423 #@ the 'encoding' style (JSON, XML), so an encoding format 424 #@ would make no sense. 425 error("encoding format cannot be given when content is present") 426 if $field[3]; 427 } 428 429 # Field is a color, decoration, label, or title 430 if ($field[0] =~ /[CDLN]/) { 431 #@ Format cannot be given when content is present (roles: CDLN) 432 #@ xo_emit("{N:Max/%6.6s}", "Max"); 433 #@ Fields with the C, D, L, or N roles can't have both 434 #@ static literal content ("{L:Label}") and a 435 #@ format ("{L:/%s}"). 436 #@ This error will also occur when the content has a backslash 437 #@ in it, like "{N:Type of I/O}"; backslashes should be escaped, 438 #@ like "{N:Type of I\\/O}". Note the double backslash, one for 439 #@ handling 'C' strings, and one for libxo. 440 error("format cannot be given when content is present") 441 if $field[1] && $field[2]; 442 } 443 444 # Field is a color/effect 445 if ($field[0] =~ /C/) { 446 if ($field[1]) { 447 my $val; 448 my @sub = split(/,/, $field[1]); 449 grep { s/^\s*//; s/\s*$//; } @sub; 450 451 for $val (@sub) { 452 if ($val =~ /^(default,black,red,green,yellow,blue,magenta,cyan,white)$/) { 453 454 #@ Field has color without fg- or bg- (role: C) 455 #@ xo_emit("{C:green}{:foo}{C:}", x); 456 #@ Should be: 457 #@ xo_emit("{C:fg-green}{:foo}{C:}", x); 458 #@ Colors must be prefixed by either "fg-" or "bg-". 459 error("Field has color without fg- or bg- (role: C)"); 460 461 } elsif ($val =~ /^(fg|bg)-(default|black|red|green|yellow|blue|magenta|cyan|white)$/) { 462 # color 463 } elsif ($val =~ /^(bold|underline)$/) { 464 } elsif ($val =~ /^(no-)?(bold|underline|inverse)$/) { 465 # effect 466 467 } elsif ($val =~ /^(reset|normal)$/) { 468 # effect also 469 } else { 470 #@ Field has invalid color or effect (role: C) 471 #@ xo_emit("{C:fg-purple,bold}{:foo}{C:gween}", x); 472 #@ Should be: 473 #@ xo_emit("{C:fg-red,bold}{:foo}{C:fg-green}", x); 474 #@ The list of colors and effects are limited. The 475 #@ set of colors includes default, black, red, green, 476 #@ yellow, blue, magenta, cyan, and white, which must 477 #@ be prefixed by either "fg-" or "bg-". Effects are 478 #@ limited to bold, no-bold, underline, no-underline, 479 #@ inverse, no-inverse, normal, and reset. Values must 480 #@ be separated by commas. 481 error("Field has invalid color or effect (role: C) ($val)"); 482 } 483 } 484 } 485 } 486 487 # Humanized field 488 if ($field[0] =~ /h/) { 489 if (length($field[2]) == 0) { 490 #@ Field has humanize modifier but no format string 491 #@ xo_emit("{h:value}", value); 492 #@ Should be: 493 #@ xo_emit("{h:value/%d}", value); 494 #@ Humanization is only value for numbers, which are not 495 #@ likely to use the default format ("%s"). 496 error("Field has humanize modifier but no format string"); 497 } 498 } 499 500 # hn-* on non-humanize field 501 if ($field[0] !~ /h/) { 502 if ($field[4] =~ /,hn-/) { 503 #@ Field has hn-* modifier but not 'h' modifier 504 #@ xo_emit("{,hn-1000:value}", value); 505 #@ Should be: 506 #@ xo_emit("{h,hn-1000:value}", value); 507 #@ The hn-* modifiers (hn-decimal, hn-space, hn-1000) 508 #@ are only valid for fields with the {h:} modifier. 509 error("Field has hn-* modifier but not 'h' modifier"); 510 } 511 } 512 513 # A value field 514 if (length($field[0]) == 0 || $field[0] =~ /V/) { 515 516 #@ Value field must have a name (as content)") 517 #@ xo_emit("{:/%s}", "value"); 518 #@ Should be: 519 #@ xo_emit("{:tag-name/%s}", "value"); 520 #@ The field name is used for XML and JSON encodings. These 521 #@ tags names are static and must appear directly in the 522 #@ field descriptor. 523 error("value field must have a name (as content)") 524 unless $field[1]; 525 526 #@ Use hyphens, not underscores, for value field name 527 #@ xo_emit("{:no_under_scores}", "bad"); 528 #@ Should be: 529 #@ xo_emit("{:no-under-scores}", "bad"); 530 #@ Use of hyphens is traditional in XML, and the XOF_UNDERSCORES 531 #@ flag can be used to generate underscores in JSON, if desired. 532 #@ But the raw field name should use hyphens. 533 error("use hyphens, not underscores, for value field name") 534 if $field[1] =~ /_/; 535 536 #@ Value field name cannot start with digit 537 #@ xo_emit("{:10-gig/}"); 538 #@ Should be: 539 #@ xo_emit("{:ten-gig/}"); 540 #@ XML element names cannot start with a digit. 541 error("value field name cannot start with digit") 542 if $field[1] =~ /^[0-9]/; 543 544 #@ Value field name should be lower case 545 #@ xo_emit("{:WHY-ARE-YOU-SHOUTING}", "NO REASON"); 546 #@ Should be: 547 #@ xo_emit("{:why-are-you-shouting}", "no reason"); 548 #@ Lower case is more civilized. Even TLAs should be lower case 549 #@ to avoid scenarios where the differences between "XPath" and 550 #@ "Xpath" drive your users crazy. Lower case rules the seas. 551 error("value field name should be lower case") 552 if $field[1] =~ /[A-Z]/; 553 554 #@ Value field name should be longer than two characters 555 #@ xo_emit("{:x}", "mumble"); 556 #@ Should be: 557 #@ xo_emit("{:something-meaningful}", "mumble"); 558 #@ Field names should be descriptive, and it's hard to 559 #@ be descriptive in less than two characters. Consider 560 #@ your users and try to make something more useful. 561 #@ Note that this error often occurs when the field type 562 #@ is placed after the colon ("{:T/%20s}"), instead of before 563 #@ it ("{T:/20s}"). 564 error("value field name should be longer than two characters") 565 if $field[1] =~ /[A-Z]/; 566 567 #@ Value field name contains invalid character 568 #@ xo_emit("{:cost-in-$$/%u}", 15); 569 #@ Should be: 570 #@ xo_emit("{:cost-in-dollars/%u}", 15); 571 #@ An invalid character is often a sign of a typo, like "{:]}" 572 #@ instead of "{]:}". Field names are restricted to lower-case 573 #@ characters, digits, and hyphens. 574 error("value field name contains invalid character (" . $field[1] . ")") 575 unless $field[1] =~ /^[0-9a-z-]*$/; 576 } 577 578 # A decoration field 579 if ($field[0] =~ /D/) { 580 581 #@decoration field contains invalid character 582 #@ xo_emit("{D:not good}"); 583 #@ Should be: 584 #@ xo_emit("{D:((}{:good}{D:))}", "yes"); 585 #@ This is minor, but fields should use proper roles. Decoration 586 #@ fields are meant to hold punctuation and other characters used 587 #@ to decorate the content, typically to make it more readable 588 #@ to human readers. 589 warn("decoration field contains invalid character") 590 unless $field[1] =~ m:^[~!\@\#\$%^&\*\(\);\:\[\]\{\} ]+$:; 591 } 592 593 if ($field[0] =~ /[\[\]]/) { 594 #@ Anchor content should be decimal width 595 #@ xo_emit("{[:mumble}"); 596 #@ Should be: 597 #@ xo_emit("{[:32}"); 598 #@ Anchors need an integer value to specify the width of 599 #@ the set of anchored fields. The value can be positive 600 #@ (for left padding/right justification) or negative (for 601 #@ right padding/left justification) and can appear in 602 #@ either the start or stop anchor field descriptor. 603 error("anchor content should be decimal width") 604 if $field[1] && $field[1] !~ /^-?\d+$/ ; 605 606 #@ Anchor format should be "%d" 607 #@ xo_emit("{[:/%s}"); 608 #@ Should be: 609 #@ xo_emit("{[:/%d}"); 610 #@ Anchors only grok integer values, and if the value is not static, 611 #@ if must be in an 'int' argument, represented by the "%d" format. 612 #@ Anything else is an error. 613 error("anchor format should be \"%d\"") 614 if $field[2] && $field[2] ne "%d"; 615 616 #@ Anchor cannot have both format and encoding format") 617 #@ xo_emit("{[:32/%d}"); 618 #@ Should be: 619 #@ xo_emit("{[:32}"); 620 #@ Anchors can have a static value or argument for the width, 621 #@ but cannot have both. 622 error("anchor cannot have both format and encoding format") 623 if $field[1] && $field[2]; 624 } 625} 626 627sub count_args { 628 my($format) = @_; 629 630 return -1 unless $format; 631 632 my $in; 633 my($text, $ff, $fc, $rest); 634 for ($in = $format; $in; $in = $rest) { 635 ($text, $ff, $fc, $rest) = 636 ($in =~ /^([^%]*)(%[^%diouxXDOUeEfFgGaAcCsSp]*)([diouxXDOUeEfFgGaAcCsSp])(.*)$/); 637 unless ($ff) { 638 # Might be a "%%" 639 ($text, $ff, $rest) = ($in =~ /^([^%]*)(%%)(.*)$/); 640 if ($ff) { 641 check_text($text); 642 } else { 643 # Not sure what's going on here, but something's wrong... 644 error("invalid field format") if $in =~ /%/; 645 } 646 next; 647 } 648 649 check_text($text); 650 check_field_format($ff, $fc); 651 } 652 653 return 0; 654} 655 656sub check_field_format { 657 my($ff, $fc) = @_; 658 659 print "check_field_format: [$ff] [$fc]\n" if $opt_debug; 660 661 my(@chunks) = split(/\./, $ff); 662 663 #@ Max width only valid for strings 664 #@ xo_emit("{:tag/%2.4.6d}", 55); 665 #@ Should be: 666 #@ xo_emit("{:tag/%2.6d}", 55); 667 #@ libxo allows a true 'max width' in addition to the traditional 668 #@ printf-style 'max number of bytes to use for input'. But this 669 #@ is supported only for string values, since it makes no sense 670 #@ for non-strings. This error may occur from a typo, 671 #@ like "{:tag/%6..6d}" where only one period should be used. 672 error("max width only valid for strings") 673 if $#chunks >= 2 && $fc !~ /[sS]/; 674} 675 676sub error { 677 return if $opt_vocabulary; 678 print STDERR $curfile . ": " .$curln . ": error: " . join(" ", @_) . "\n"; 679 print STDERR $replay . "\n" if $opt_print; 680 $errors += 1; 681} 682 683sub warn { 684 return if $opt_vocabulary; 685 print STDERR $curfile . ": " .$curln . ": warning: " . join(" ", @_) . "\n"; 686 print STDERR $replay . "\n" if $opt_print; 687 $warnings += 1; 688} 689 690sub info { 691 return if $opt_vocabulary; 692 print STDERR $curfile . ": " .$curln . ": info: " . join(" ", @_) . "\n"; 693 print STDERR $replay . "\n" if $opt_print; 694 $info += 1; 695} 696 697main: { 698 main(); 699} 700