xref: /freebsd/contrib/libxo/xolint/xolint.pl (revision ff0ba87247820afbdfdc1b307c803f7923d0e4d3)
1#!/usr/bin/env perl
2#
3# Copyright (c) 2014, Juniper Networks, Inc.
4# All rights reserved.
5# This SOFTWARE is licensed under the LICENSE provided in the
6# ../Copyright file. By downloading, installing, copying, or otherwise
7# using the SOFTWARE, you agree to be bound by the terms of that
8# LICENSE.
9# Phil Shafer, August 2014
10#
11#
12# xolint -- a lint for inspecting xo_emit format strings
13#
14# Yes, that's a long way to go for a pun.
15
16%vocabulary = ();
17
18sub main {
19    while ($ARGV[0] =~ /^-/) {
20	$_ = shift @ARGV;
21	$opt_cpp = 1 if /^-c/;
22	$opt_cflags .= shift @ARGV if /^-C/;
23	$opt_debug = 1 if /^-d/;
24	extract_docs() if /^-D/;
25	$opt_info = $opt_vocabulary = 1 if /^-I/;
26	$opt_print = 1 if /^-p/;
27	$opt_vocabulary = 1 if /^-V/;
28	extract_samples() if /^-X/;
29    }
30
31    for $file (@ARGV) {
32	parse_file($file);
33    }
34
35    if ($opt_info) {
36	print "static xo_info_t xo_info_table[] = {\n";
37	for $name (sort(keys(%vocabulary))) {
38	    print "    { \"", $name, "\", \"type\", \"desc\" },\n";
39	}
40	print "};\n";
41	print "static int xo_info_count = "
42	    . "(sizeof(xo_info_table) / sizeof(xo_info_table[0]));\n\n";
43	print "#define XO_SET_INFO() \\\n";
44	print "    xo_set_info(NULL, xo_info_table, xo_info_count)\n";
45    } elsif ($opt_vocabulary) {
46	for $name (sort(keys(%vocabulary))) {
47	    print $name, "\n";
48	}
49    }
50}
51
52sub extract_samples {
53    my $x = "\#" . "\@";
54    my $cmd = "grep -B1 -i '$x Should be' $0 | grep xo_emit | sed 's/.*\#*\@//'";
55    system($cmd);
56    exit(0);
57}
58
59sub extract_docs {
60    my $x = "\#" . "\@";
61    my $cmd = "grep -B1 '$x' $0";
62    open INPUT, "$cmd |";
63    local @input = <INPUT>;
64    close INPUT;
65    my $ln, $new = 0, $first = 1, $need_nl;
66
67    for ($ln = 0; $ln <= $#input; $ln++) {
68	chomp($_ = $input[$ln]);
69	if (/^--/) {
70	    $ln += 1;
71	    $new = 1;
72	    next;
73	}
74	if ($first) {
75	    $new = 1;
76	    $first = 0;
77	    next;
78	}
79
80	s/\s*\#\@\s*//;
81
82	if ($new) {
83	    if ($need_nl) {
84		print "\n\n";
85		$need_nl = 0;
86	    }
87
88	    print "*** '$_'\n\n";
89	    print "The message \"$_\" can be caused by code like:\n\n";
90	    $new = 0;
91
92	} elsif (/xo_emit\s*\(/) {
93	    s/^\s+//;
94	    print "    $_\n\n";
95
96	} elsif (/^Should be/i) {
97	    print "This code should be replaced with code like:\n\n";
98
99	} else {
100	    print "$_\n";
101	    $need_nl = 1;
102	}
103    }
104
105    exit(0);
106}
107
108sub parse_file {
109    local($file) = @_;
110    local($errors, $warnings, $info) = (0, 0, 0);
111    local $curfile = $file;
112    local $curln = 0;
113
114    if ($opt_cpp) {
115	die "no such file" unless -f $file;
116	open INPUT, "cpp $opt_cflags $file |";
117    } else {
118	open INPUT, $file || die "cannot open input file '$file'";
119    }
120    local @input = <INPUT>;
121    close INPUT;
122
123    local $ln, $rln, $line, $replay;
124
125    for ($ln = 0; $ln < $#input; $ln++) {
126	$line = $input[$ln];
127	$curln += 1;
128
129	if ($line =~ /^\#/) {
130	    my($num, $fn) = ($line =~ /\#\s*(\d+)\s+"(.+)"/);
131	    ($curfile, $curln) = ($fn, $num) if $num;
132	    next;
133	}
134
135	next unless $line =~ /xo_emit\(/;
136
137	@tokens = parse_tokens();
138	print "token:\n    '" . join("'\n    '", @tokens) . "'\n"
139	    if $opt_debug;
140	check_format($tokens[0]);
141    }
142
143    print $file . ": $errors errors, $warnings warnings, $info info\n"
144	unless $opt_vocabulary;
145}
146
147sub parse_tokens {
148    my $full = "$'";
149    my @tokens = ();
150    my %pairs = ( "{" => "}", "[" => "]", "(" => ")" );
151    my %quotes = ( "\"" => "\"", "'" => "'" );
152    local @data = split(//, $full);
153    local @open = ();
154    local $current = "";
155    my $quote = "";
156    local $off = 0;
157    my $ch;
158
159    $replay = $curln . "     " . $line;
160    $rln = $ln + 1;
161
162    for (;;) {
163	get_tokens() if $off > $#data;
164	die "out of data" if $off > $#data;
165	$ch = $data[$off++];
166
167	print "'$ch' ($quote) ($#open) [" . join("", @open) . "]\n"
168	    if $opt_debug;
169
170	last if $ch eq ";" && $#open < 0;
171
172	if ($ch eq "," && $quote eq "" && $#open < 0) {
173	    print "[$current]\n" if $opt_debug;
174	    push @tokens, $current;
175	    $current = "";
176	    next;
177	}
178
179	next if $ch =~ /[ \t\n\r]/ && $quote eq "" && $#open < 0;
180
181	$current .= $ch;
182
183	if ($quote) {
184	    if ($ch eq $quote) {
185		$quote = "";
186	    }
187	    next;
188	}
189	if ($quotes{$ch}) {
190	    $quote = $quotes{$ch};
191	    $current = substr($current, 0, -2) if $current =~ /""$/;
192	    next;
193	}
194
195	if ($pairs{$ch}) {
196	    push @open, $pairs{$ch};
197	    next;
198	}
199
200	if ($#open >= 0 && $ch eq $open[$#open]) {
201	    pop @open;
202	    next;
203	}
204    }
205
206    push @tokens, substr($current, 0, -1);
207    return @tokens;
208}
209
210sub get_tokens {
211    if ($ln + 1 < $#input) {
212	$line = $input[++$ln];
213	$curln += 1;
214	$replay .= $curln . "     " . $line;
215	@data = split(//, $line);
216	$off = 0;
217    }
218}
219
220sub check_format {
221    my($format) = @_;
222
223    return unless $format =~ /^".*"$/;
224
225    my @data = split(//, $format);
226    my $ch;
227    my $braces = 0;
228    local $count = 0;
229    my $content = "";
230    my $off;
231    my $phase = 0;
232    my @build = ();
233    local $last, $prev = "";
234
235    # Nukes quotes
236    pop @data;
237    shift @data;
238
239    for (;;) {
240	last if $off > $#data;
241	$ch = $data[$off++];
242
243	if ($ch eq "\\") {
244	    $ch = $data[$off++];
245	    $off += 1 if $ch eq "\\"; # double backslash: "\\/"
246	    next;
247	}
248
249	if ($braces) {
250	    if ($ch eq "}") {
251		check_field(@build);
252		$braces = 0;
253		@build = ();
254		$phase = 0;
255		next;
256	    } elsif ($phase == 0 && $ch eq ":") {
257		$phase += 1;
258		next;
259	    } elsif ($ch eq "/") {
260		$phase += 1;
261		next;
262	    }
263
264	} else {
265	    if ($ch eq "{") {
266		check_text($build[0]) if length($build[0]);
267		$braces = 1;
268		@build = ();
269		$last = $prev;
270		next;
271	    }
272	}
273
274	$prev = $ch;
275	$build[$phase] .= $ch;
276    }
277
278    if ($braces) {
279	error("missing closing brace");
280	check_field(@build);
281    } else {
282	check_text($build[0]) if length($build[0]);
283    }
284}
285
286sub check_text {
287    my($text) = @_;
288
289    print "checking text: [$text]\n" if $opt_debug;
290
291    #@ A percent sign appearing in text is a literal
292    #@     xo_emit("cost: %d", cost);
293    #@ Should be:
294    #@     xo_emit("{L:cost}: {:cost/%d}", cost);
295    #@ This can be a bit surprising and could be a field that was not
296    #@ properly converted to a libxo-style format string.
297    info("a percent sign appearing in text is a literal") if $text =~ /%/;
298}
299
300sub check_field {
301    my(@field) = @_;
302    print "checking field: [" . join("][", @field) . "]\n" if $opt_debug;
303
304    if ($opt_vocabulary) {
305	$vocabulary{$field[1]} = 1
306	    if $field[1] && $field[0] !~ /[DELNPTUW\[\]]/;
307	return;
308    }
309
310    #@ Last character before field definition is a field type
311    #@ A common typo:
312    #@     xo_emit("{T:Min} T{:Max}");
313    #@ Should be:
314    #@     xo_emit("{T:Min} {T:Max}");
315    #@ Twiddling the "{" and the field role is a common typo.
316    info("last character before field definition is a field type ($last)")
317	if $last =~ /[DELNPTUVW\[\]]/ && $field[0] !~ /[DELNPTUVW\[\]]/;
318
319    #@ Encoding format uses different number of arguments
320    #@     xo_emit("{:name/%6.6s %%04d/%s}", name, number);
321    #@ Should be:
322    #@     xo_emit("{:name/%6.6s %04d/%s-%d}", name, number);
323    #@ Both format should consume the same number of arguments off the stack
324    my $cf = count_args($field[2]);
325    my $ce = count_args($field[3]);
326    warn("encoding format uses different number of arguments ($cf/$ce)")
327	if $ce >= 0 && $cf >= 0 && $ce != $cf;
328
329    #@ Only one field role can be used
330    #@     xo_emit("{LT:Max}");
331    #@ Should be:
332    #@     xo_emit("{T:Max}");
333    my(@roles) = ($field[0] !~ /([DELNPTUVW\[\]]).*([DELNPTUVW\[\]])/);
334    error("only one field role can be used (" . join(", ", @roles) . ")")
335	if $#roles > 0;
336
337    # Field is a note, label, or title
338    if ($field[0] =~ /[DLNT]/) {
339
340	#@ Potential missing slash after N, L, or T with format
341	#@     xo_emit("{T:%6.6s}\n", "Max");
342	#@ should be:
343	#@     xo_emit("{T:/%6.6s}\n", "Max");
344	#@ The "%6.6s" will be a literal, not a field format.  While
345	#@ it's possibly valid, it's likely a missing "/".
346	info("potential missing slash after N, L, or T with format")
347	    if $field[1] =~ /%/;
348
349	#@ Format cannot be given when content is present (roles: DNLT)
350	#@    xo_emit("{T:Max/%6.6s}", "Max");
351	#@ Fields with the D, N, L, or T roles can't have both
352	#@ static literal content ("{T:Title}") and a
353	#@ format ("{T:/%s}").
354	#@ This error will also occur when the content has a backslash
355	#@ in it, like "{N:Type of I/O}"; backslashes should be escaped,
356	#@ like "{N:Type of I\\/O}".  Note the double backslash, one for
357	#@ handling 'C' strings, and one for libxo.
358	error("format cannot be given when content is present")
359	    if $field[1] && $field[2];
360
361	#@ An encoding format cannot be given (roles: DNLT)
362	#@    xo_emit("{T:Max//%s}", "Max");
363	#@ Fields with the D, N, L, and T roles are not emitted in
364	#@ the 'encoding' style (JSON, XML), so an encoding format
365	#@ would make no sense.
366	error("encoding format cannot be given when content is present")
367	    if $field[3];
368    }
369
370    # A value field
371    if (length($field[0]) == 0 || $field[0] =~ /V/) {
372
373	#@ Value field must have a name (as content)")
374	#@     xo_emit("{:/%s}", "value");
375	#@ Should be:
376	#@     xo_emit("{:tag-name/%s}", "value");
377	#@ The field name is used for XML and JSON encodings.  These
378	#@ tags names are static and must appear directly in the
379	#@ field descriptor.
380	error("value field must have a name (as content)")
381	    unless $field[1];
382
383	#@ Use hyphens, not underscores, for value field name
384	#@     xo_emit("{:no_under_scores}", "bad");
385	#@ Should be:
386	#@     xo_emit("{:no-under-scores}", "bad");
387	#@ Use of hyphens is traditional in XML, and the XOF_UNDERSCORES
388	#@ flag can be used to generate underscores in JSON, if desired.
389	#@ But the raw field name should use hyphens.
390	error("use hyphens, not underscores, for value field name")
391	    if $field[1] =~ /_/;
392
393	#@ Value field name cannot start with digit
394	#@     xo_emit("{:10-gig/}");
395	#@ Should be:
396	#@     xo_emit("{:ten-gig/}");
397	#@ XML element names cannot start with a digit.
398	error("value field name cannot start with digit")
399	    if $field[1] =~ /^[0-9]/;
400
401	#@ Value field name should be lower case
402	#@     xo_emit("{:WHY-ARE-YOU-SHOUTING}", "NO REASON");
403	#@ Should be:
404	#@     xo_emit("{:why-are-you-shouting}", "no reason");
405	#@ Lower case is more civilized.  Even TLAs should be lower case
406	#@ to avoid scenarios where the differences between "XPath" and
407	#@ "Xpath" drive your users crazy.  Lower case rules the seas.
408	error("value field name should be lower case")
409	    if $field[1] =~ /[A-Z]/;
410
411	#@ Value field name should be longer than two characters
412	#@     xo_emit("{:x}", "mumble");
413	#@ Should be:
414	#@     xo_emit("{:something-meaningful}", "mumble");
415	#@ Field names should be descriptive, and it's hard to
416	#@ be descriptive in less than two characters.  Consider
417	#@ your users and try to make something more useful.
418	#@ Note that this error often occurs when the field type
419	#@ is placed after the colon ("{:T/%20s}"), instead of before
420	#@ it ("{T:/20s}").
421	error("value field name should be longer than two characters")
422	    if $field[1] =~ /[A-Z]/;
423
424	#@ Value field name contains invalid character
425	#@     xo_emit("{:cost-in-$$/%u}", 15);
426	#@ Should be:
427	#@     xo_emit("{:cost-in-dollars/%u}", 15);
428	#@ An invalid character is often a sign of a typo, like "{:]}"
429	#@ instead of "{]:}".  Field names are restricted to lower-case
430	#@ characters, digits, and hyphens.
431	error("value field name contains invalid character (" . $field[1] . ")")
432	    unless $field[1] =~ /^[0-9a-z-]*$/;
433    }
434
435    # A decoration field
436    if ($field[0] =~ /D/) {
437
438	#@decoration field contains invalid character
439	#@     xo_emit("{D:not good}");
440	#@ Should be:
441	#@     xo_emit("{D:((}{:good}{D:))}", "yes");
442	#@ This is minor, but fields should use proper roles.  Decoration
443	#@ fields are meant to hold puncuation and other characters used
444	#@ to decorate the content, typically to make it more readable
445	#@ to human readers.
446	warn("decoration field contains invalid character")
447	    unless $field[1] =~ m:^[~!\@\#\$%^&\*\(\);\:\[\]\{\} ]+$:;
448    }
449
450    if ($field[0] =~ /[\[\]]/) {
451	#@ Anchor content should be decimal width
452	#@     xo_emit("{[:mumble}");
453	#@ Should be:
454	#@     xo_emit("{[:32}");
455	#@ Anchors need an integer value to specify the width of
456	#@ the set of anchored fields.  The value can be positive
457	#@ (for left padding/right justification) or negative (for
458	#@ right padding/left justification) and can appear in
459	#@ either the start or stop anchor field descriptor.
460	error("anchor content should be decimal width")
461	    if $field[1] && $field[1] !~ /^-?\d+$/ ;
462
463	#@ Anchor format should be "%d"
464	#@     xo_emit("{[:/%s}");
465	#@ Should be:
466	#@     xo_emit("{[:/%d}");
467	#@ Anchors only grok integer values, and if the value is not static,
468	#@ if must be in an 'int' argument, represented by the "%d" format.
469	#@ Anything else is an error.
470	error("anchor format should be \"%d\"")
471	    if $field[2] && $field[2] ne "%d";
472
473	#@ Anchor cannot have both format and encoding format")
474	#@     xo_emit("{[:32/%d}");
475	#@ Should be:
476	#@     xo_emit("{[:32}");
477	#@ Anchors can have a static value or argument for the width,
478	#@ but cannot have both.
479	error("anchor cannot have both format and encoding format")
480	    if $field[1] && $field[2];
481    }
482}
483
484sub count_args {
485    my($format) = @_;
486
487    return -1 unless $format;
488
489    my $in;
490    my($text, $ff, $fc, $rest);
491    for ($in = $format; $in; $in = $rest) {
492	($text, $ff, $fc, $rest) =
493	   ($in =~ /^([^%]*)(%[^%diouxXDOUeEfFgGaAcCsSp]*)([diouxXDOUeEfFgGaAcCsSp])(.*)$/);
494	unless ($ff) {
495	    # Might be a "%%"
496	    ($text, $ff, $rest) = ($in =~ /^([^%]*)(%%)(.*)$/);
497	    if ($ff) {
498		check_text($text);
499	    } else {
500		# Not sure what's going on here, but something's wrong...
501		error("invalid field format") if $in =~ /%/;
502	    }
503	    next;
504	}
505
506	check_text($text);
507	check_field_format($ff, $fc);
508    }
509
510    return 0;
511}
512
513sub check_field_format {
514    my($ff, $fc) = @_;
515
516    print "check_field_format: [$ff] [$fc]\n" if $opt_debug;
517
518    my(@chunks) = split(/\./, $ff);
519
520    #@ Max width only valid for strings
521    #@     xo_emit("{:tag/%2.4.6d}", 55);
522    #@ Should be:
523    #@     xo_emit("{:tag/%2.6d}", 55);
524    #@ libxo allows a true 'max width' in addition to the traditional
525    #@ printf-style 'max number of bytes to use for input'.  But this
526    #@ is supported only for string values, since it makes no sense
527    #@ for non-strings.  This error may occur from a typo,
528    #@ like "{:tag/%6..6d}" where only one period should be used.
529    error("max width only valid for strings")
530	if $#chunks >= 2 && $fc =~ /[sS]/;
531}
532
533sub error {
534    return if $opt_vocabulary;
535    print STDERR $curfile . ": " .$curln . ": error: " . join(" ", @_) . "\n";
536    print STDERR $replay . "\n" if $opt_print;
537    $errors += 1;
538}
539
540sub warn {
541    return if $opt_vocabulary;
542    print STDERR $curfile . ": " .$curln . ": warning: " . join(" ", @_) . "\n";
543    print STDERR $replay . "\n" if $opt_print;
544    $warnings += 1;
545}
546
547sub info {
548    return if $opt_vocabulary;
549    print STDERR $curfile . ": " .$curln . ": info: " . join(" ", @_) . "\n";
550    print STDERR $replay . "\n" if $opt_print;
551    $info += 1;
552}
553
554main: {
555    main();
556}
557