1 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
2 /* All Rights Reserved */
3
4
5 /*
6 * Copyright (c) 1980 Regents of the University of California.
7 * All rights reserved. The Berkeley software License Agreement
8 * specifies the terms and conditions for redistribution.
9 */
10
11 /*
12 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
13 * Use is subject to license terms.
14 */
15
16 #pragma ident "%Z%%M% %I% %E% SMI"
17
18 /*
19 * checknr: check an nroff/troff input file for matching macro calls.
20 * we also attempt to match size and font changes, but only the embedded
21 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
22 * later but for now think of these restrictions as contributions to
23 * structured typesetting.
24 */
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <unistd.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <locale.h>
31
32 #define MAXSTK 100 /* Stack size */
33 static int maxstk;
34 #define MAXBR 100 /* Max number of bracket pairs known */
35 #define MAXCMDS 500 /* Max number of commands known */
36
37 /*
38 * The stack on which we remember what we've seen so far.
39 */
40 static struct stkstr {
41 int opno; /* number of opening bracket */
42 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
43 int parm; /* parm to size, font, etc */
44 int lno; /* line number the thing came in in */
45 } *stk;
46 static int stktop;
47
48 /*
49 * The kinds of opening and closing brackets.
50 */
51 static struct brstr {
52 char *opbr;
53 char *clbr;
54 } br[MAXBR] = {
55 /* A few bare bones troff commands */
56 #define SZ 0
57 "sz", "sz", /* also \s */
58 #define FT 1
59 "ft", "ft", /* also \f */
60 /* the -mm package */
61 "AL", "LE",
62 "AS", "AE",
63 "BL", "LE",
64 "BS", "BE",
65 "DF", "DE",
66 "DL", "LE",
67 "DS", "DE",
68 "FS", "FE",
69 "ML", "LE",
70 "NS", "NE",
71 "RL", "LE",
72 "VL", "LE",
73 /* the -ms package */
74 "AB", "AE",
75 "BD", "DE",
76 "CD", "DE",
77 "DS", "DE",
78 "FS", "FE",
79 "ID", "DE",
80 "KF", "KE",
81 "KS", "KE",
82 "LD", "DE",
83 "LG", "NL",
84 "QS", "QE",
85 "RS", "RE",
86 "SM", "NL",
87 "XA", "XE",
88 "XS", "XE",
89 /* The -me package */
90 "(b", ")b",
91 "(c", ")c",
92 "(d", ")d",
93 "(f", ")f",
94 "(l", ")l",
95 "(q", ")q",
96 "(x", ")x",
97 "(z", ")z",
98 /* Things needed by preprocessors */
99 "EQ", "EN",
100 "TS", "TE",
101 /* Refer */
102 "[", "]",
103 0, 0
104 };
105
106 /*
107 * All commands known to nroff, plus macro packages.
108 * Used so we can complain about unrecognized commands.
109 */
110 static char *knowncmds[MAXCMDS] = {
111 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
112 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
113 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
114 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
115 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
116 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
117 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
118 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
119 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
120 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
121 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
122 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
123 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
124 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
125 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
126 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
127 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
128 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
129 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
130 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
131 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
132 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
133 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
134 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
135 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
136 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
137 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
138 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
139 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
140 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
141 "yr", 0
142 };
143
144 static int lineno; /* current line number in input file */
145 static char line[256]; /* the current line */
146 static char *cfilename; /* name of current file */
147 static int nfiles; /* number of files to process */
148 static int fflag; /* -f: ignore \f */
149 static int sflag; /* -s: ignore \s */
150 static int ncmds; /* size of knowncmds */
151 static int slot; /* slot in knowncmds found by binsrch */
152
153 static void growstk();
154 static void usage();
155 static void process(FILE *f);
156 static void complain(int i);
157 static void prop(int i);
158 static void chkcmd(char *line, char *mac);
159 static void nomatch(char *mac);
160 static int eq(char *s1, char *s2);
161 static void pe(int lineno);
162 static void checkknown(char *mac);
163 static void addcmd(char *line);
164 static void addmac(char *mac);
165 static int binsrch(char *mac);
166
167 static void
growstk()168 growstk()
169 {
170 stktop++;
171 if (stktop >= maxstk) {
172 maxstk *= 2;
173 stk = (struct stkstr *)realloc(stk,
174 sizeof (struct stkstr) * maxstk);
175 }
176 }
177
178 int
main(argc,argv)179 main(argc, argv)
180 int argc;
181 char **argv;
182 {
183 FILE *f;
184 int i;
185 char *cp;
186 char b1[4];
187
188 (void) setlocale(LC_ALL, "");
189 #if !defined(TEXT_DOMAIN)
190 #define TEXT_DOMAIN "SYS_TEST"
191 #endif
192 (void) textdomain(TEXT_DOMAIN);
193 stk = (struct stkstr *)calloc(sizeof (struct stkstr), 100);
194 maxstk = 100;
195 /* Figure out how many known commands there are */
196 while (knowncmds[ncmds])
197 ncmds++;
198 while (argc > 1 && argv[1][0] == '-') {
199 switch (argv[1][1]) {
200
201 /* -a: add pairs of macros */
202 case 'a':
203 i = strlen(argv[1]) - 2;
204 if (i % 6 != 0)
205 usage();
206 /* look for empty macro slots */
207 for (i = 0; br[i].opbr; i++)
208 ;
209 for (cp = argv[1]+3; cp[-1]; cp += 6) {
210 br[i].opbr = malloc(3);
211 (void) strncpy(br[i].opbr, cp, 2);
212 br[i].clbr = malloc(3);
213 (void) strncpy(br[i].clbr, cp+3, 2);
214 /* knows pairs are also known cmds */
215 addmac(br[i].opbr);
216 addmac(br[i].clbr);
217 i++;
218 }
219 break;
220
221 /* -c: add known commands */
222 case 'c':
223 i = strlen(argv[1]) - 2;
224 if (i % 3 != 0)
225 usage();
226 for (cp = argv[1]+3; cp[-1]; cp += 3) {
227 if (cp[2] && cp[2] != '.')
228 usage();
229 (void) strncpy(b1, cp, 2);
230 addmac(b1);
231 }
232 break;
233
234 /* -f: ignore font changes */
235 case 'f':
236 fflag = 1;
237 break;
238
239 /* -s: ignore size changes */
240 case 's':
241 sflag = 1;
242 break;
243 default:
244 usage();
245 }
246 argc--; argv++;
247 }
248
249 nfiles = argc - 1;
250
251 if (nfiles > 0) {
252 for (i = 1; i < argc; i++) {
253 cfilename = argv[i];
254 f = fopen(cfilename, "r");
255 if (f == NULL) {
256 perror(cfilename);
257 exit(1);
258 }
259 else
260 process(f);
261 }
262 } else {
263 cfilename = "stdin";
264 process(stdin);
265 }
266 return (0);
267 }
268
269 static void
usage()270 usage()
271 {
272 (void) printf(gettext("Usage: \
273 checknr [ -fs ] [ -a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [ filename .. ]\n"));
274 exit(1);
275 }
276
277 static void
process(FILE * f)278 process(FILE *f)
279 {
280 int i, n;
281 char mac[5]; /* The current macro or nroff command */
282 int pl;
283
284 stktop = -1;
285 for (lineno = 1; fgets(line, sizeof (line), f); lineno++) {
286 if (line[0] == '.') {
287 /*
288 * find and isolate the macro/command name.
289 */
290 (void) strncpy(mac, line+1, 4);
291 if (isspace(mac[0])) {
292 pe(lineno);
293 (void) printf(gettext("Empty command\n"));
294 } else if (isspace(mac[1])) {
295 mac[1] = 0;
296 } else if (isspace(mac[2])) {
297 mac[2] = 0;
298 } else if (mac[0] != '\\' || mac[1] != '\"') {
299 pe(lineno);
300 (void) printf(gettext("Command too long\n"));
301 }
302
303 /*
304 * Is it a known command?
305 */
306 checkknown(mac);
307
308 /*
309 * Should we add it?
310 */
311 if (eq(mac, "de"))
312 addcmd(line);
313
314 chkcmd(line, mac);
315 }
316
317 /*
318 * At this point we process the line looking
319 * for \s and \f.
320 */
321 for (i = 0; line[i]; i++)
322 if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
323 if (!sflag && line[++i] == 's') {
324 pl = line[++i];
325 if (isdigit(pl)) {
326 n = pl - '0';
327 pl = ' ';
328 } else
329 n = 0;
330 while (isdigit(line[++i]))
331 n = 10 * n + line[i] - '0';
332 i--;
333 if (n == 0) {
334 if (stk[stktop].opno == SZ) {
335 stktop--;
336 } else {
337 pe(lineno);
338 (void) printf(
339 gettext("unmatched \\s0\n"));
340 }
341 } else {
342 growstk();
343 stk[stktop].opno = SZ;
344 stk[stktop].pl = pl;
345 stk[stktop].parm = n;
346 stk[stktop].lno = lineno;
347 }
348 } else if (!fflag && line[i] == 'f') {
349 n = line[++i];
350 if (n == 'P') {
351 if (stk[stktop].opno == FT) {
352 stktop--;
353 } else {
354 pe(lineno);
355 (void) printf(
356 gettext("unmatched \\fP\n"));
357 }
358 } else {
359 growstk();
360 stk[stktop].opno = FT;
361 stk[stktop].pl = 1;
362 stk[stktop].parm = n;
363 stk[stktop].lno = lineno;
364 }
365 }
366 }
367 }
368 /*
369 * We've hit the end and look at all this stuff that hasn't been
370 * matched yet! Complain, complain.
371 */
372 for (i = stktop; i >= 0; i--) {
373 complain(i);
374 }
375 }
376
377 static void
complain(int i)378 complain(int i)
379 {
380 pe(stk[i].lno);
381 (void) printf(gettext("Unmatched "));
382 prop(i);
383 (void) printf("\n");
384 }
385
386 static void
prop(int i)387 prop(int i)
388 {
389 if (stk[i].pl == 0)
390 (void) printf(".%s", br[stk[i].opno].opbr);
391 else switch (stk[i].opno) {
392 case SZ:
393 (void) printf("\\s%c%d", stk[i].pl, stk[i].parm);
394 break;
395 case FT:
396 (void) printf("\\f%c", stk[i].parm);
397 break;
398 default:
399 (void) printf(gettext("Bug: stk[%d].opno = %d = .%s, .%s"),
400 i, stk[i].opno, br[stk[i].opno].opbr,
401 br[stk[i].opno].clbr);
402 }
403 }
404
405 /* ARGSUSED */
406 static void
chkcmd(char * line,char * mac)407 chkcmd(char *line, char *mac)
408 {
409 int i;
410
411 /*
412 * Check to see if it matches top of stack.
413 */
414 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
415 stktop--; /* OK. Pop & forget */
416 else {
417 /* No. Maybe it's an opener */
418 for (i = 0; br[i].opbr; i++) {
419 if (eq(mac, br[i].opbr)) {
420 /* Found. Push it. */
421 growstk();
422 stk[stktop].opno = i;
423 stk[stktop].pl = 0;
424 stk[stktop].parm = 0;
425 stk[stktop].lno = lineno;
426 break;
427 }
428 /*
429 * Maybe it's an unmatched closer.
430 * NOTE: this depends on the fact
431 * that none of the closers can be
432 * openers too.
433 */
434 if (eq(mac, br[i].clbr)) {
435 nomatch(mac);
436 break;
437 }
438 }
439 }
440 }
441
442 static void
nomatch(char * mac)443 nomatch(char *mac)
444 {
445 int i, j;
446
447 /*
448 * Look for a match further down on stack
449 * If we find one, it suggests that the stuff in
450 * between is supposed to match itself.
451 */
452 for (j = stktop; j >= 0; j--)
453 if (eq(mac, br[stk[j].opno].clbr)) {
454 /* Found. Make a good diagnostic. */
455 if (j == stktop-2) {
456 /*
457 * Check for special case \fx..\fR and don't
458 * complain.
459 */
460 if (stk[j+1].opno == FT &&
461 stk[j+1].parm != 'R' &&
462 stk[j+2].opno == FT &&
463 stk[j+2].parm == 'R') {
464 stktop = j -1;
465 return;
466 }
467 /*
468 * We have two unmatched frobs. Chances are
469 * they were intended to match, so we mention
470 * them together.
471 */
472 pe(stk[j+1].lno);
473 prop(j+1);
474 (void) printf(gettext(" does not match %d: "),
475 stk[j+2].lno);
476 prop(j+2);
477 (void) printf("\n");
478 } else for (i = j+1; i <= stktop; i++) {
479 complain(i);
480 }
481 stktop = j-1;
482 return;
483 }
484 /* Didn't find one. Throw this away. */
485 pe(lineno);
486 (void) printf(gettext("Unmatched .%s\n"), mac);
487 }
488
489 /* eq: are two strings equal? */
490 static int
eq(char * s1,char * s2)491 eq(char *s1, char *s2)
492 {
493 return (strcmp(s1, s2) == 0);
494 }
495
496 /* print the first part of an error message, given the line number */
497 static void
pe(int lineno)498 pe(int lineno)
499 {
500 if (nfiles > 1)
501 (void) printf("%s: ", cfilename);
502 (void) printf("%d: ", lineno);
503 }
504
505 static void
checkknown(char * mac)506 checkknown(char *mac)
507 {
508
509 if (eq(mac, "."))
510 return;
511 if (binsrch(mac) >= 0)
512 return;
513 if (mac[0] == '\\' && mac[1] == '"') /* comments */
514 return;
515
516 pe(lineno);
517 (void) printf(gettext("Unknown command: .%s\n"), mac);
518 }
519
520 /*
521 * We have a .de xx line in "line". Add xx to the list of known commands.
522 */
523 static void
addcmd(char * line)524 addcmd(char *line)
525 {
526 char *mac;
527
528 /* grab the macro being defined */
529 mac = line+4;
530 while (isspace(*mac))
531 mac++;
532 if (*mac == 0) {
533 pe(lineno);
534 (void) printf(gettext("illegal define: %s\n"), line);
535 return;
536 }
537 mac[2] = 0;
538 if (isspace(mac[1]) || mac[1] == '\\')
539 mac[1] = 0;
540 if (ncmds >= MAXCMDS) {
541 (void) printf(gettext("Only %d known commands allowed\n"),
542 MAXCMDS);
543 exit(1);
544 }
545 addmac(mac);
546 }
547
548 /*
549 * Add mac to the list. We should really have some kind of tree
550 * structure here but this is a quick-and-dirty job and I just don't
551 * have time to mess with it. (I wonder if this will come back to haunt
552 * me someday?) Anyway, I claim that .de is fairly rare in user
553 * nroff programs, and the loop below is pretty fast.
554 */
555 static void
addmac(char * mac)556 addmac(char *mac)
557 {
558 char **src, **dest, **loc;
559
560 if (binsrch(mac) >= 0) { /* it's OK to redefine something */
561 #ifdef DEBUG
562 (void) printf("binsrch(%s) -> already in table\n", mac);
563 #endif
564 return;
565 }
566 /* binsrch sets slot as a side effect */
567 #ifdef DEBUG
568 printf("binsrch(%s) -> %d\n", mac, slot);
569 #endif
570 loc = &knowncmds[slot];
571 src = &knowncmds[ncmds-1];
572 dest = src+1;
573 while (dest > loc)
574 *dest-- = *src--;
575 *loc = malloc(3);
576 (void) strcpy(*loc, mac);
577 ncmds++;
578 #ifdef DEBUG
579 (void) printf("after: %s %s %s %s %s, %d cmds\n",
580 knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
581 knowncmds[slot+1], knowncmds[slot+2], ncmds);
582 #endif
583 }
584
585 /*
586 * Do a binary search in knowncmds for mac.
587 * If found, return the index. If not, return -1.
588 */
589 static int
binsrch(char * mac)590 binsrch(char *mac)
591 {
592 char *p; /* pointer to current cmd in list */
593 int d; /* difference if any */
594 int mid; /* mid point in binary search */
595 int top, bot; /* boundaries of bin search, inclusive */
596
597 top = ncmds-1;
598 bot = 0;
599 while (top >= bot) {
600 mid = (top+bot)/2;
601 p = knowncmds[mid];
602 d = p[0] - mac[0];
603 if (d == 0)
604 d = p[1] - mac[1];
605 if (d == 0)
606 return (mid);
607 if (d < 0)
608 bot = mid + 1;
609 else
610 top = mid - 1;
611 }
612 slot = bot; /* place it would have gone */
613 return (-1);
614 }
615