#!/usr/bin/env perl # # Copyright (c) 2014, Juniper Networks, Inc. # All rights reserved. # This SOFTWARE is licensed under the LICENSE provided in the # ../Copyright file. By downloading, installing, copying, or otherwise # using the SOFTWARE, you agree to be bound by the terms of that # LICENSE. # Phil Shafer, August 2014 # # # xolint -- a lint for inspecting xo_emit format strings # # Yes, that's a long way to go for a pun. %vocabulary = (); sub main { while ($ARGV[0] =~ /^-/) { $_ = shift @ARGV; $opt_cpp = 1 if /^-c/; $opt_cflags .= shift @ARGV if /^-C/; $opt_debug = 1 if /^-d/; extract_docs() if /^-D/; $opt_info = $opt_vocabulary = 1 if /^-I/; $opt_print = 1 if /^-p/; $opt_vocabulary = 1 if /^-V/; extract_samples() if /^-X/; } for $file (@ARGV) { parse_file($file); } if ($opt_info) { print "static xo_info_t xo_info_table[] = {\n"; for $name (sort(keys(%vocabulary))) { print " { \"", $name, "\", \"type\", \"desc\" },\n"; } print "};\n"; print "static int xo_info_count = " . "(sizeof(xo_info_table) / sizeof(xo_info_table[0]));\n\n"; print "#define XO_SET_INFO() \\\n"; print " xo_set_info(NULL, xo_info_table, xo_info_count)\n"; } elsif ($opt_vocabulary) { for $name (sort(keys(%vocabulary))) { print $name, "\n"; } } } sub extract_samples { my $x = "\#" . "\@"; my $cmd = "grep -B1 -i '$x Should be' $0 | grep xo_emit | sed 's/.*\#*\@//'"; system($cmd); exit(0); } sub extract_docs { my $x = "\#" . "\@"; my $cmd = "grep -B1 '$x' $0"; open INPUT, "$cmd |"; local @input = ; close INPUT; my $ln, $new = 0, $first = 1, $need_nl; for ($ln = 0; $ln <= $#input; $ln++) { chomp($_ = $input[$ln]); if (/^--/) { $ln += 1; $new = 1; next; } if ($first) { $new = 1; $first = 0; next; } s/\s*\#\@\s*//; if ($new) { if ($need_nl) { print "\n\n"; $need_nl = 0; } print "*** '$_'\n\n"; print "The message \"$_\" can be caused by code like:\n\n"; $new = 0; } elsif (/xo_emit\s*\(/) { s/^\s+//; print " $_\n\n"; } elsif (/^Should be/i) { print "This code should be replaced with code like:\n\n"; } else { print "$_\n"; $need_nl = 1; } } exit(0); } sub parse_file { local($file) = @_; local($errors, $warnings, $info) = (0, 0, 0); local $curfile = $file; local $curln = 0; if ($opt_cpp) { die "no such file" unless -f $file; open INPUT, "cpp $opt_cflags $file |"; } else { open INPUT, $file || die "cannot open input file '$file'"; } local @input = ; close INPUT; local $ln, $rln, $line, $replay; for ($ln = 0; $ln < $#input; $ln++) { $line = $input[$ln]; $curln += 1; if ($line =~ /^\#/) { my($num, $fn) = ($line =~ /\#\s*(\d+)\s+"(.+)"/); ($curfile, $curln) = ($fn, $num) if $num; next; } next unless $line =~ /xo_emit\(/; @tokens = parse_tokens(); print "token:\n '" . join("'\n '", @tokens) . "'\n" if $opt_debug; check_format($tokens[0]); } print $file . ": $errors errors, $warnings warnings, $info info\n" unless $opt_vocabulary; } sub parse_tokens { my $full = "$'"; my @tokens = (); my %pairs = ( "{" => "}", "[" => "]", "(" => ")" ); my %quotes = ( "\"" => "\"", "'" => "'" ); local @data = split(//, $full); local @open = (); local $current = ""; my $quote = ""; local $off = 0; my $ch; $replay = $curln . " " . $line; $rln = $ln + 1; for (;;) { get_tokens() if $off > $#data; die "out of data" if $off > $#data; $ch = $data[$off++]; print "'$ch' ($quote) ($#open) [" . join("", @open) . "]\n" if $opt_debug; last if $ch eq ";" && $#open < 0; if ($ch eq "," && $quote eq "" && $#open < 0) { print "[$current]\n" if $opt_debug; push @tokens, $current; $current = ""; next; } next if $ch =~ /[ \t\n\r]/ && $quote eq "" && $#open < 0; $current .= $ch; if ($quote) { if ($ch eq $quote) { $quote = ""; } next; } if ($quotes{$ch}) { $quote = $quotes{$ch}; $current = substr($current, 0, -2) if $current =~ /""$/; next; } if ($pairs{$ch}) { push @open, $pairs{$ch}; next; } if ($#open >= 0 && $ch eq $open[$#open]) { pop @open; next; } } push @tokens, substr($current, 0, -1); return @tokens; } sub get_tokens { if ($ln + 1 < $#input) { $line = $input[++$ln]; $curln += 1; $replay .= $curln . " " . $line; @data = split(//, $line); $off = 0; } } sub check_format { my($format) = @_; return unless $format =~ /^".*"$/; my @data = split(//, $format); my $ch; my $braces = 0; local $count = 0; my $content = ""; my $off; my $phase = 0; my @build = (); local $last, $prev = ""; # Nukes quotes pop @data; shift @data; for (;;) { last if $off > $#data; $ch = $data[$off++]; if ($ch eq "\\") { $ch = $data[$off++]; $off += 1 if $ch eq "\\"; # double backslash: "\\/" next; } if ($braces) { if ($ch eq "}") { check_field(@build); $braces = 0; @build = (); $phase = 0; next; } elsif ($phase == 0 && $ch eq ":") { $phase += 1; next; } elsif ($ch eq "/") { $phase += 1; next; } } else { if ($ch eq "{") { check_text($build[0]) if length($build[0]); $braces = 1; @build = (); $last = $prev; next; } } $prev = $ch; $build[$phase] .= $ch; } if ($braces) { error("missing closing brace"); check_field(@build); } else { check_text($build[0]) if length($build[0]); } } sub check_text { my($text) = @_; print "checking text: [$text]\n" if $opt_debug; #@ A percent sign appearing in text is a literal #@ xo_emit("cost: %d", cost); #@ Should be: #@ xo_emit("{L:cost}: {:cost/%d}", cost); #@ This can be a bit surprising and could be a field that was not #@ properly converted to a libxo-style format string. info("a percent sign appearing in text is a literal") if $text =~ /%/; } sub check_field { my(@field) = @_; print "checking field: [" . join("][", @field) . "]\n" if $opt_debug; if ($opt_vocabulary) { $vocabulary{$field[1]} = 1 if $field[1] && $field[0] !~ /[DELNPTUW\[\]]/; return; } #@ Last character before field definition is a field type #@ A common typo: #@ xo_emit("{T:Min} T{:Max}"); #@ Should be: #@ xo_emit("{T:Min} {T:Max}"); #@ Twiddling the "{" and the field role is a common typo. info("last character before field definition is a field type ($last)") if $last =~ /[DELNPTUVW\[\]]/ && $field[0] !~ /[DELNPTUVW\[\]]/; #@ Encoding format uses different number of arguments #@ xo_emit("{:name/%6.6s %%04d/%s}", name, number); #@ Should be: #@ xo_emit("{:name/%6.6s %04d/%s-%d}", name, number); #@ Both format should consume the same number of arguments off the stack my $cf = count_args($field[2]); my $ce = count_args($field[3]); warn("encoding format uses different number of arguments ($cf/$ce)") if $ce >= 0 && $cf >= 0 && $ce != $cf; #@ Only one field role can be used #@ xo_emit("{LT:Max}"); #@ Should be: #@ xo_emit("{T:Max}"); my(@roles) = ($field[0] !~ /([DELNPTUVW\[\]]).*([DELNPTUVW\[\]])/); error("only one field role can be used (" . join(", ", @roles) . ")") if $#roles > 0; # Field is a note, label, or title if ($field[0] =~ /[DLNT]/) { #@ Potential missing slash after N, L, or T with format #@ xo_emit("{T:%6.6s}\n", "Max"); #@ should be: #@ xo_emit("{T:/%6.6s}\n", "Max"); #@ The "%6.6s" will be a literal, not a field format. While #@ it's possibly valid, it's likely a missing "/". info("potential missing slash after N, L, or T with format") if $field[1] =~ /%/; #@ Format cannot be given when content is present (roles: DNLT) #@ xo_emit("{T:Max/%6.6s}", "Max"); #@ Fields with the D, N, L, or T roles can't have both #@ static literal content ("{T:Title}") and a #@ format ("{T:/%s}"). #@ This error will also occur when the content has a backslash #@ in it, like "{N:Type of I/O}"; backslashes should be escaped, #@ like "{N:Type of I\\/O}". Note the double backslash, one for #@ handling 'C' strings, and one for libxo. error("format cannot be given when content is present") if $field[1] && $field[2]; #@ An encoding format cannot be given (roles: DNLT) #@ xo_emit("{T:Max//%s}", "Max"); #@ Fields with the D, N, L, and T roles are not emitted in #@ the 'encoding' style (JSON, XML), so an encoding format #@ would make no sense. error("encoding format cannot be given when content is present") if $field[3]; } # A value field if (length($field[0]) == 0 || $field[0] =~ /V/) { #@ Value field must have a name (as content)") #@ xo_emit("{:/%s}", "value"); #@ Should be: #@ xo_emit("{:tag-name/%s}", "value"); #@ The field name is used for XML and JSON encodings. These #@ tags names are static and must appear directly in the #@ field descriptor. error("value field must have a name (as content)") unless $field[1]; #@ Use hyphens, not underscores, for value field name #@ xo_emit("{:no_under_scores}", "bad"); #@ Should be: #@ xo_emit("{:no-under-scores}", "bad"); #@ Use of hyphens is traditional in XML, and the XOF_UNDERSCORES #@ flag can be used to generate underscores in JSON, if desired. #@ But the raw field name should use hyphens. error("use hyphens, not underscores, for value field name") if $field[1] =~ /_/; #@ Value field name cannot start with digit #@ xo_emit("{:10-gig/}"); #@ Should be: #@ xo_emit("{:ten-gig/}"); #@ XML element names cannot start with a digit. error("value field name cannot start with digit") if $field[1] =~ /^[0-9]/; #@ Value field name should be lower case #@ xo_emit("{:WHY-ARE-YOU-SHOUTING}", "NO REASON"); #@ Should be: #@ xo_emit("{:why-are-you-shouting}", "no reason"); #@ Lower case is more civilized. Even TLAs should be lower case #@ to avoid scenarios where the differences between "XPath" and #@ "Xpath" drive your users crazy. Lower case rules the seas. error("value field name should be lower case") if $field[1] =~ /[A-Z]/; #@ Value field name should be longer than two characters #@ xo_emit("{:x}", "mumble"); #@ Should be: #@ xo_emit("{:something-meaningful}", "mumble"); #@ Field names should be descriptive, and it's hard to #@ be descriptive in less than two characters. Consider #@ your users and try to make something more useful. #@ Note that this error often occurs when the field type #@ is placed after the colon ("{:T/%20s}"), instead of before #@ it ("{T:/20s}"). error("value field name should be longer than two characters") if $field[1] =~ /[A-Z]/; #@ Value field name contains invalid character #@ xo_emit("{:cost-in-$$/%u}", 15); #@ Should be: #@ xo_emit("{:cost-in-dollars/%u}", 15); #@ An invalid character is often a sign of a typo, like "{:]}" #@ instead of "{]:}". Field names are restricted to lower-case #@ characters, digits, and hyphens. error("value field name contains invalid character (" . $field[1] . ")") unless $field[1] =~ /^[0-9a-z-]*$/; } # A decoration field if ($field[0] =~ /D/) { #@decoration field contains invalid character #@ xo_emit("{D:not good}"); #@ Should be: #@ xo_emit("{D:((}{:good}{D:))}", "yes"); #@ This is minor, but fields should use proper roles. Decoration #@ fields are meant to hold puncuation and other characters used #@ to decorate the content, typically to make it more readable #@ to human readers. warn("decoration field contains invalid character") unless $field[1] =~ m:^[~!\@\#\$%^&\*\(\);\:\[\]\{\} ]+$:; } if ($field[0] =~ /[\[\]]/) { #@ Anchor content should be decimal width #@ xo_emit("{[:mumble}"); #@ Should be: #@ xo_emit("{[:32}"); #@ Anchors need an integer value to specify the width of #@ the set of anchored fields. The value can be positive #@ (for left padding/right justification) or negative (for #@ right padding/left justification) and can appear in #@ either the start or stop anchor field descriptor. error("anchor content should be decimal width") if $field[1] && $field[1] !~ /^-?\d+$/ ; #@ Anchor format should be "%d" #@ xo_emit("{[:/%s}"); #@ Should be: #@ xo_emit("{[:/%d}"); #@ Anchors only grok integer values, and if the value is not static, #@ if must be in an 'int' argument, represented by the "%d" format. #@ Anything else is an error. error("anchor format should be \"%d\"") if $field[2] && $field[2] ne "%d"; #@ Anchor cannot have both format and encoding format") #@ xo_emit("{[:32/%d}"); #@ Should be: #@ xo_emit("{[:32}"); #@ Anchors can have a static value or argument for the width, #@ but cannot have both. error("anchor cannot have both format and encoding format") if $field[1] && $field[2]; } } sub count_args { my($format) = @_; return -1 unless $format; my $in; my($text, $ff, $fc, $rest); for ($in = $format; $in; $in = $rest) { ($text, $ff, $fc, $rest) = ($in =~ /^([^%]*)(%[^%diouxXDOUeEfFgGaAcCsSp]*)([diouxXDOUeEfFgGaAcCsSp])(.*)$/); unless ($ff) { # Might be a "%%" ($text, $ff, $rest) = ($in =~ /^([^%]*)(%%)(.*)$/); if ($ff) { check_text($text); } else { # Not sure what's going on here, but something's wrong... error("invalid field format") if $in =~ /%/; } next; } check_text($text); check_field_format($ff, $fc); } return 0; } sub check_field_format { my($ff, $fc) = @_; print "check_field_format: [$ff] [$fc]\n" if $opt_debug; my(@chunks) = split(/\./, $ff); #@ Max width only valid for strings #@ xo_emit("{:tag/%2.4.6d}", 55); #@ Should be: #@ xo_emit("{:tag/%2.6d}", 55); #@ libxo allows a true 'max width' in addition to the traditional #@ printf-style 'max number of bytes to use for input'. But this #@ is supported only for string values, since it makes no sense #@ for non-strings. This error may occur from a typo, #@ like "{:tag/%6..6d}" where only one period should be used. error("max width only valid for strings") if $#chunks >= 2 && $fc =~ /[sS]/; } sub error { return if $opt_vocabulary; print STDERR $curfile . ": " .$curln . ": error: " . join(" ", @_) . "\n"; print STDERR $replay . "\n" if $opt_print; $errors += 1; } sub warn { return if $opt_vocabulary; print STDERR $curfile . ": " .$curln . ": warning: " . join(" ", @_) . "\n"; print STDERR $replay . "\n" if $opt_print; $warnings += 1; } sub info { return if $opt_vocabulary; print STDERR $curfile . ": " .$curln . ": info: " . join(" ", @_) . "\n"; print STDERR $replay . "\n" if $opt_print; $info += 1; } main: { main(); }