1 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
2 /* All Rights Reserved */
3
4
5 /*
6 * Copyright (c) 1980 Regents of the University of California.
7 * All rights reserved. The Berkeley software License Agreement
8 * specifies the terms and conditions for redistribution.
9 */
10
11 /*
12 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
13 * Use is subject to license terms.
14 */
15
16 /*
17 * Copyright (c) 2018, Joyent, Inc.
18 */
19
20 /*
21 * checknr: check an nroff/troff input file for matching macro calls.
22 * we also attempt to match size and font changes, but only the embedded
23 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
24 * later but for now think of these restrictions as contributions to
25 * structured typesetting.
26 */
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <string.h>
31 #include <ctype.h>
32 #include <locale.h>
33
34 #define MAXSTK 100 /* Stack size */
35 static int maxstk;
36 #define MAXBR 100 /* Max number of bracket pairs known */
37 #define MAXCMDS 500 /* Max number of commands known */
38
39 /*
40 * The stack on which we remember what we've seen so far.
41 */
42 static struct stkstr {
43 int opno; /* number of opening bracket */
44 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
45 int parm; /* parm to size, font, etc */
46 int lno; /* line number the thing came in in */
47 } *stk;
48 static int stktop;
49
50 /*
51 * The kinds of opening and closing brackets.
52 */
53 static struct brstr {
54 char *opbr;
55 char *clbr;
56 } br[MAXBR] = {
57 /* A few bare bones troff commands */
58 #define SZ 0
59 "sz", "sz", /* also \s */
60 #define FT 1
61 "ft", "ft", /* also \f */
62 /* the -mm package */
63 "AL", "LE",
64 "AS", "AE",
65 "BL", "LE",
66 "BS", "BE",
67 "DF", "DE",
68 "DL", "LE",
69 "DS", "DE",
70 "FS", "FE",
71 "ML", "LE",
72 "NS", "NE",
73 "RL", "LE",
74 "VL", "LE",
75 /* the -ms package */
76 "AB", "AE",
77 "BD", "DE",
78 "CD", "DE",
79 "DS", "DE",
80 "FS", "FE",
81 "ID", "DE",
82 "KF", "KE",
83 "KS", "KE",
84 "LD", "DE",
85 "LG", "NL",
86 "QS", "QE",
87 "RS", "RE",
88 "SM", "NL",
89 "XA", "XE",
90 "XS", "XE",
91 /* The -me package */
92 "(b", ")b",
93 "(c", ")c",
94 "(d", ")d",
95 "(f", ")f",
96 "(l", ")l",
97 "(q", ")q",
98 "(x", ")x",
99 "(z", ")z",
100 /* Things needed by preprocessors */
101 "EQ", "EN",
102 "TS", "TE",
103 /* Refer */
104 "[", "]",
105 0, 0
106 };
107
108 /*
109 * All commands known to nroff, plus macro packages.
110 * Used so we can complain about unrecognized commands.
111 */
112 static char *knowncmds[MAXCMDS] = {
113 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
114 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
115 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
116 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
117 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
118 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
119 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
120 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
121 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
122 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
123 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
124 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
125 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
126 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
127 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
128 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
129 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
130 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
131 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
132 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
133 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
134 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
135 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
136 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
137 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
138 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
139 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
140 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
141 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
142 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
143 "yr", 0
144 };
145
146 static int lineno; /* current line number in input file */
147 static char line[256]; /* the current line */
148 static char *cfilename; /* name of current file */
149 static int nfiles; /* number of files to process */
150 static int fflag; /* -f: ignore \f */
151 static int sflag; /* -s: ignore \s */
152 static int ncmds; /* size of knowncmds */
153 static int slot; /* slot in knowncmds found by binsrch */
154
155 static void growstk();
156 static void usage();
157 static void process(FILE *f);
158 static void complain(int i);
159 static void prop(int i);
160 static void chkcmd(char *line, char *mac);
161 static void nomatch(char *mac);
162 static int eq(char *s1, char *s2);
163 static void pe(int lineno);
164 static void checkknown(char *mac);
165 static void addcmd(char *line);
166 static void addmac(char *mac);
167 static int binsrch(char *mac);
168
169 static void
growstk()170 growstk()
171 {
172 stktop++;
173 if (stktop >= maxstk) {
174 maxstk *= 2;
175 stk = (struct stkstr *)realloc(stk,
176 sizeof (struct stkstr) * maxstk);
177 }
178 }
179
180 int
main(argc,argv)181 main(argc, argv)
182 int argc;
183 char **argv;
184 {
185 FILE *f;
186 int i;
187 char *cp;
188 char b1[4];
189
190 (void) setlocale(LC_ALL, "");
191 #if !defined(TEXT_DOMAIN)
192 #define TEXT_DOMAIN "SYS_TEST"
193 #endif
194 (void) textdomain(TEXT_DOMAIN);
195 stk = (struct stkstr *)calloc(100, sizeof (struct stkstr));
196 maxstk = 100;
197 /* Figure out how many known commands there are */
198 while (knowncmds[ncmds])
199 ncmds++;
200 while (argc > 1 && argv[1][0] == '-') {
201 switch (argv[1][1]) {
202
203 /* -a: add pairs of macros */
204 case 'a':
205 i = strlen(argv[1]) - 2;
206 if (i % 6 != 0)
207 usage();
208 /* look for empty macro slots */
209 for (i = 0; br[i].opbr; i++)
210 ;
211 for (cp = argv[1]+3; cp[-1]; cp += 6) {
212 br[i].opbr = malloc(3);
213 (void) strncpy(br[i].opbr, cp, 2);
214 br[i].clbr = malloc(3);
215 (void) strncpy(br[i].clbr, cp+3, 2);
216 /* knows pairs are also known cmds */
217 addmac(br[i].opbr);
218 addmac(br[i].clbr);
219 i++;
220 }
221 break;
222
223 /* -c: add known commands */
224 case 'c':
225 i = strlen(argv[1]) - 2;
226 if (i % 3 != 0)
227 usage();
228 for (cp = argv[1]+3; cp[-1]; cp += 3) {
229 if (cp[2] && cp[2] != '.')
230 usage();
231 (void) strncpy(b1, cp, 2);
232 addmac(b1);
233 }
234 break;
235
236 /* -f: ignore font changes */
237 case 'f':
238 fflag = 1;
239 break;
240
241 /* -s: ignore size changes */
242 case 's':
243 sflag = 1;
244 break;
245 default:
246 usage();
247 }
248 argc--; argv++;
249 }
250
251 nfiles = argc - 1;
252
253 if (nfiles > 0) {
254 for (i = 1; i < argc; i++) {
255 cfilename = argv[i];
256 f = fopen(cfilename, "r");
257 if (f == NULL) {
258 perror(cfilename);
259 exit(1);
260 }
261 else
262 process(f);
263 }
264 } else {
265 cfilename = "stdin";
266 process(stdin);
267 }
268 return (0);
269 }
270
271 static void
usage()272 usage()
273 {
274 (void) printf(gettext("Usage: \
275 checknr [ -fs ] [ -a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [ filename .. ]\n"));
276 exit(1);
277 }
278
279 static void
process(FILE * f)280 process(FILE *f)
281 {
282 int i, n;
283 char mac[5]; /* The current macro or nroff command */
284 int pl;
285
286 stktop = -1;
287 for (lineno = 1; fgets(line, sizeof (line), f); lineno++) {
288 if (line[0] == '.') {
289 /*
290 * find and isolate the macro/command name.
291 */
292 (void) strncpy(mac, line+1, 4);
293 if (isspace(mac[0])) {
294 pe(lineno);
295 (void) printf(gettext("Empty command\n"));
296 } else if (isspace(mac[1])) {
297 mac[1] = 0;
298 } else if (isspace(mac[2])) {
299 mac[2] = 0;
300 } else if (mac[0] != '\\' || mac[1] != '\"') {
301 pe(lineno);
302 (void) printf(gettext("Command too long\n"));
303 }
304
305 /*
306 * Is it a known command?
307 */
308 checkknown(mac);
309
310 /*
311 * Should we add it?
312 */
313 if (eq(mac, "de"))
314 addcmd(line);
315
316 chkcmd(line, mac);
317 }
318
319 /*
320 * At this point we process the line looking
321 * for \s and \f.
322 */
323 for (i = 0; line[i]; i++)
324 if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
325 if (!sflag && line[++i] == 's') {
326 pl = line[++i];
327 if (isdigit(pl)) {
328 n = pl - '0';
329 pl = ' ';
330 } else
331 n = 0;
332 while (isdigit(line[++i]))
333 n = 10 * n + line[i] - '0';
334 i--;
335 if (n == 0) {
336 if (stk[stktop].opno == SZ) {
337 stktop--;
338 } else {
339 pe(lineno);
340 (void) printf(
341 gettext("unmatched \\s0\n"));
342 }
343 } else {
344 growstk();
345 stk[stktop].opno = SZ;
346 stk[stktop].pl = pl;
347 stk[stktop].parm = n;
348 stk[stktop].lno = lineno;
349 }
350 } else if (!fflag && line[i] == 'f') {
351 n = line[++i];
352 if (n == 'P') {
353 if (stk[stktop].opno == FT) {
354 stktop--;
355 } else {
356 pe(lineno);
357 (void) printf(
358 gettext("unmatched \\fP\n"));
359 }
360 } else {
361 growstk();
362 stk[stktop].opno = FT;
363 stk[stktop].pl = 1;
364 stk[stktop].parm = n;
365 stk[stktop].lno = lineno;
366 }
367 }
368 }
369 }
370 /*
371 * We've hit the end and look at all this stuff that hasn't been
372 * matched yet! Complain, complain.
373 */
374 for (i = stktop; i >= 0; i--) {
375 complain(i);
376 }
377 }
378
379 static void
complain(int i)380 complain(int i)
381 {
382 pe(stk[i].lno);
383 (void) printf(gettext("Unmatched "));
384 prop(i);
385 (void) printf("\n");
386 }
387
388 static void
prop(int i)389 prop(int i)
390 {
391 if (stk[i].pl == 0)
392 (void) printf(".%s", br[stk[i].opno].opbr);
393 else switch (stk[i].opno) {
394 case SZ:
395 (void) printf("\\s%c%d", stk[i].pl, stk[i].parm);
396 break;
397 case FT:
398 (void) printf("\\f%c", stk[i].parm);
399 break;
400 default:
401 (void) printf(gettext("Bug: stk[%d].opno = %d = .%s, .%s"),
402 i, stk[i].opno, br[stk[i].opno].opbr,
403 br[stk[i].opno].clbr);
404 }
405 }
406
407 /* ARGSUSED */
408 static void
chkcmd(char * line,char * mac)409 chkcmd(char *line, char *mac)
410 {
411 int i;
412
413 /*
414 * Check to see if it matches top of stack.
415 */
416 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
417 stktop--; /* OK. Pop & forget */
418 else {
419 /* No. Maybe it's an opener */
420 for (i = 0; br[i].opbr; i++) {
421 if (eq(mac, br[i].opbr)) {
422 /* Found. Push it. */
423 growstk();
424 stk[stktop].opno = i;
425 stk[stktop].pl = 0;
426 stk[stktop].parm = 0;
427 stk[stktop].lno = lineno;
428 break;
429 }
430 /*
431 * Maybe it's an unmatched closer.
432 * NOTE: this depends on the fact
433 * that none of the closers can be
434 * openers too.
435 */
436 if (eq(mac, br[i].clbr)) {
437 nomatch(mac);
438 break;
439 }
440 }
441 }
442 }
443
444 static void
nomatch(char * mac)445 nomatch(char *mac)
446 {
447 int i, j;
448
449 /*
450 * Look for a match further down on stack
451 * If we find one, it suggests that the stuff in
452 * between is supposed to match itself.
453 */
454 for (j = stktop; j >= 0; j--)
455 if (eq(mac, br[stk[j].opno].clbr)) {
456 /* Found. Make a good diagnostic. */
457 if (j == stktop-2) {
458 /*
459 * Check for special case \fx..\fR and don't
460 * complain.
461 */
462 if (stk[j+1].opno == FT &&
463 stk[j+1].parm != 'R' &&
464 stk[j+2].opno == FT &&
465 stk[j+2].parm == 'R') {
466 stktop = j -1;
467 return;
468 }
469 /*
470 * We have two unmatched frobs. Chances are
471 * they were intended to match, so we mention
472 * them together.
473 */
474 pe(stk[j+1].lno);
475 prop(j+1);
476 (void) printf(gettext(" does not match %d: "),
477 stk[j+2].lno);
478 prop(j+2);
479 (void) printf("\n");
480 } else for (i = j+1; i <= stktop; i++) {
481 complain(i);
482 }
483 stktop = j-1;
484 return;
485 }
486 /* Didn't find one. Throw this away. */
487 pe(lineno);
488 (void) printf(gettext("Unmatched .%s\n"), mac);
489 }
490
491 /* eq: are two strings equal? */
492 static int
eq(char * s1,char * s2)493 eq(char *s1, char *s2)
494 {
495 return (strcmp(s1, s2) == 0);
496 }
497
498 /* print the first part of an error message, given the line number */
499 static void
pe(int lineno)500 pe(int lineno)
501 {
502 if (nfiles > 1)
503 (void) printf("%s: ", cfilename);
504 (void) printf("%d: ", lineno);
505 }
506
507 static void
checkknown(char * mac)508 checkknown(char *mac)
509 {
510
511 if (eq(mac, "."))
512 return;
513 if (binsrch(mac) >= 0)
514 return;
515 if (mac[0] == '\\' && mac[1] == '"') /* comments */
516 return;
517
518 pe(lineno);
519 (void) printf(gettext("Unknown command: .%s\n"), mac);
520 }
521
522 /*
523 * We have a .de xx line in "line". Add xx to the list of known commands.
524 */
525 static void
addcmd(char * line)526 addcmd(char *line)
527 {
528 char *mac;
529
530 /* grab the macro being defined */
531 mac = line+4;
532 while (isspace(*mac))
533 mac++;
534 if (*mac == 0) {
535 pe(lineno);
536 (void) printf(gettext("illegal define: %s\n"), line);
537 return;
538 }
539 mac[2] = 0;
540 if (isspace(mac[1]) || mac[1] == '\\')
541 mac[1] = 0;
542 if (ncmds >= MAXCMDS) {
543 (void) printf(gettext("Only %d known commands allowed\n"),
544 MAXCMDS);
545 exit(1);
546 }
547 addmac(mac);
548 }
549
550 /*
551 * Add mac to the list. We should really have some kind of tree
552 * structure here but this is a quick-and-dirty job and I just don't
553 * have time to mess with it. (I wonder if this will come back to haunt
554 * me someday?) Anyway, I claim that .de is fairly rare in user
555 * nroff programs, and the loop below is pretty fast.
556 */
557 static void
addmac(char * mac)558 addmac(char *mac)
559 {
560 char **src, **dest, **loc;
561
562 if (binsrch(mac) >= 0) { /* it's OK to redefine something */
563 #ifdef DEBUG
564 (void) printf("binsrch(%s) -> already in table\n", mac);
565 #endif
566 return;
567 }
568 /* binsrch sets slot as a side effect */
569 #ifdef DEBUG
570 printf("binsrch(%s) -> %d\n", mac, slot);
571 #endif
572 loc = &knowncmds[slot];
573 src = &knowncmds[ncmds-1];
574 dest = src+1;
575 while (dest > loc)
576 *dest-- = *src--;
577 *loc = malloc(3);
578 (void) strcpy(*loc, mac);
579 ncmds++;
580 #ifdef DEBUG
581 (void) printf("after: %s %s %s %s %s, %d cmds\n",
582 knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
583 knowncmds[slot+1], knowncmds[slot+2], ncmds);
584 #endif
585 }
586
587 /*
588 * Do a binary search in knowncmds for mac.
589 * If found, return the index. If not, return -1.
590 */
591 static int
binsrch(char * mac)592 binsrch(char *mac)
593 {
594 char *p; /* pointer to current cmd in list */
595 int d; /* difference if any */
596 int mid; /* mid point in binary search */
597 int top, bot; /* boundaries of bin search, inclusive */
598
599 top = ncmds-1;
600 bot = 0;
601 while (top >= bot) {
602 mid = (top+bot)/2;
603 p = knowncmds[mid];
604 d = p[0] - mac[0];
605 if (d == 0)
606 d = p[1] - mac[1];
607 if (d == 0)
608 return (mid);
609 if (d < 0)
610 bot = mid + 1;
611 else
612 top = mid - 1;
613 }
614 slot = bot; /* place it would have gone */
615 return (-1);
616 }
617