xref: /illumos-gate/usr/src/cmd/checknr/checknr.c (revision e6d6c189fa3a95d7aa27bbe0aeacf7c1a6b57c8c)
1 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
2 /*	  All Rights Reserved  	*/
3 
4 
5 /*
6  * Copyright (c) 1980 Regents of the University of California.
7  * All rights reserved. The Berkeley software License Agreement
8  * specifies the terms and conditions for redistribution.
9  */
10 
11 /*
12  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
13  * Use is subject to license terms.
14  */
15 
16 /*
17  * Copyright (c) 2018, Joyent, Inc.
18  */
19 
20 /*
21  * checknr: check an nroff/troff input file for matching macro calls.
22  * we also attempt to match size and font changes, but only the embedded
23  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
24  * later but for now think of these restrictions as contributions to
25  * structured typesetting.
26  */
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <string.h>
31 #include <ctype.h>
32 #include <locale.h>
33 
34 #define	MAXSTK	100	/* Stack size */
35 static	int	maxstk;
36 #define	MAXBR	100	/* Max number of bracket pairs known */
37 #define	MAXCMDS	500	/* Max number of commands known */
38 
39 /*
40  * The stack on which we remember what we've seen so far.
41  */
42 static struct stkstr {
43 	int opno;	/* number of opening bracket */
44 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
45 	int parm;	/* parm to size, font, etc */
46 	int lno;	/* line number the thing came in in */
47 } *stk;
48 static int stktop;
49 
50 /*
51  * The kinds of opening and closing brackets.
52  */
53 static struct brstr {
54 	char *opbr;
55 	char *clbr;
56 } br[MAXBR] = {
57 	/* A few bare bones troff commands */
58 #define	SZ	0
59 	"sz",	"sz",	/* also \s */
60 #define	FT	1
61 	"ft",	"ft",	/* also \f */
62 	/* the -mm package */
63 	"AL",	"LE",
64 	"AS",	"AE",
65 	"BL",	"LE",
66 	"BS",	"BE",
67 	"DF",	"DE",
68 	"DL",	"LE",
69 	"DS",	"DE",
70 	"FS",	"FE",
71 	"ML",	"LE",
72 	"NS",	"NE",
73 	"RL",	"LE",
74 	"VL",	"LE",
75 	/* the -ms package */
76 	"AB",	"AE",
77 	"BD",	"DE",
78 	"CD",	"DE",
79 	"DS",	"DE",
80 	"FS",	"FE",
81 	"ID",	"DE",
82 	"KF",	"KE",
83 	"KS",	"KE",
84 	"LD",	"DE",
85 	"LG",	"NL",
86 	"QS",	"QE",
87 	"RS",	"RE",
88 	"SM",	"NL",
89 	"XA",	"XE",
90 	"XS",	"XE",
91 	/* The -me package */
92 	"(b",	")b",
93 	"(c",	")c",
94 	"(d",	")d",
95 	"(f",	")f",
96 	"(l",	")l",
97 	"(q",	")q",
98 	"(x",	")x",
99 	"(z",	")z",
100 	/* Things needed by preprocessors */
101 	"EQ",	"EN",
102 	"TS",	"TE",
103 	/* Refer */
104 	"[",	"]",
105 	0,	0
106 };
107 
108 /*
109  * All commands known to nroff, plus macro packages.
110  * Used so we can complain about unrecognized commands.
111  */
112 static char *knowncmds[MAXCMDS] = {
113 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
114 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
115 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
116 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
117 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
118 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
119 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
120 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
121 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
122 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
123 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
124 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
125 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
126 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
127 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
128 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
129 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
130 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
131 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
132 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
133 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
134 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
135 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
136 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
137 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
138 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
139 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
140 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
141 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
142 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
143 "yr", 0
144 };
145 
146 static	int	lineno;		/* current line number in input file */
147 static	char	line[256];	/* the current line */
148 static	char	*cfilename;	/* name of current file */
149 static	int	nfiles;		/* number of files to process */
150 static	int	fflag;		/* -f: ignore \f */
151 static	int	sflag;		/* -s: ignore \s */
152 static	int	ncmds;		/* size of knowncmds */
153 static	int	slot;		/* slot in knowncmds found by binsrch */
154 
155 static void growstk();
156 static void usage();
157 static void process(FILE *f);
158 static void complain(int i);
159 static void prop(int i);
160 static void chkcmd(char *line, char *mac);
161 static void nomatch(char *mac);
162 static int eq(char *s1, char *s2);
163 static void pe(int lineno);
164 static void checkknown(char *mac);
165 static void addcmd(char *line);
166 static void addmac(char *mac);
167 static int binsrch(char *mac);
168 
169 static void
170 growstk()
171 {
172 	stktop++;
173 	if (stktop >= maxstk) {
174 		maxstk *= 2;
175 		stk = (struct stkstr *)realloc(stk,
176 		    sizeof (struct stkstr) * maxstk);
177 	}
178 }
179 
180 int
181 main(argc, argv)
182 int argc;
183 char **argv;
184 {
185 	FILE *f;
186 	int i;
187 	char *cp;
188 	char b1[4];
189 
190 	(void) setlocale(LC_ALL, "");
191 #if !defined(TEXT_DOMAIN)
192 #define	TEXT_DOMAIN	"SYS_TEST"
193 #endif
194 	(void) textdomain(TEXT_DOMAIN);
195 	stk = (struct stkstr *)calloc(100, sizeof (struct stkstr));
196 	maxstk = 100;
197 	/* Figure out how many known commands there are */
198 	while (knowncmds[ncmds])
199 		ncmds++;
200 	while (argc > 1 && argv[1][0] == '-') {
201 		switch (argv[1][1]) {
202 
203 		/* -a: add pairs of macros */
204 		case 'a':
205 			i = strlen(argv[1]) - 2;
206 			if (i % 6 != 0)
207 				usage();
208 			/* look for empty macro slots */
209 			for (i = 0; br[i].opbr; i++)
210 				;
211 			for (cp = argv[1]+3; cp[-1]; cp += 6) {
212 				br[i].opbr = malloc(3);
213 				(void) strncpy(br[i].opbr, cp, 2);
214 				br[i].clbr = malloc(3);
215 				(void) strncpy(br[i].clbr, cp+3, 2);
216 				/* knows pairs are also known cmds */
217 				addmac(br[i].opbr);
218 				addmac(br[i].clbr);
219 				i++;
220 			}
221 			break;
222 
223 		/* -c: add known commands */
224 		case 'c':
225 			i = strlen(argv[1]) - 2;
226 			if (i % 3 != 0)
227 				usage();
228 			for (cp = argv[1]+3; cp[-1]; cp += 3) {
229 				if (cp[2] && cp[2] != '.')
230 					usage();
231 				(void) strncpy(b1, cp, 2);
232 				addmac(b1);
233 			}
234 			break;
235 
236 		/* -f: ignore font changes */
237 		case 'f':
238 			fflag = 1;
239 			break;
240 
241 		/* -s: ignore size changes */
242 		case 's':
243 			sflag = 1;
244 			break;
245 		default:
246 			usage();
247 		}
248 		argc--; argv++;
249 	}
250 
251 	nfiles = argc - 1;
252 
253 	if (nfiles > 0) {
254 		for (i = 1; i < argc; i++) {
255 			cfilename = argv[i];
256 			f = fopen(cfilename, "r");
257 			if (f == NULL) {
258 				perror(cfilename);
259 				exit(1);
260 				}
261 			else
262 				process(f);
263 		}
264 	} else {
265 		cfilename = "stdin";
266 		process(stdin);
267 	}
268 	return (0);
269 }
270 
271 static void
272 usage()
273 {
274 	(void) printf(gettext("Usage: \
275 checknr [ -fs ] [ -a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [ filename .. ]\n"));
276 	exit(1);
277 }
278 
279 static void
280 process(FILE *f)
281 {
282 	int i, n;
283 	char mac[5];	/* The current macro or nroff command */
284 	int pl;
285 
286 	stktop = -1;
287 	for (lineno = 1; fgets(line, sizeof (line), f); lineno++) {
288 		if (line[0] == '.') {
289 			/*
290 			 * find and isolate the macro/command name.
291 			 */
292 			(void) strncpy(mac, line+1, 4);
293 			if (isspace(mac[0])) {
294 				pe(lineno);
295 				(void) printf(gettext("Empty command\n"));
296 			} else if (isspace(mac[1])) {
297 				mac[1] = 0;
298 			} else if (isspace(mac[2])) {
299 				mac[2] = 0;
300 			} else if (mac[0] != '\\' || mac[1] != '\"') {
301 				pe(lineno);
302 				(void) printf(gettext("Command too long\n"));
303 			}
304 
305 			/*
306 			 * Is it a known command?
307 			 */
308 			checkknown(mac);
309 
310 			/*
311 			 * Should we add it?
312 			 */
313 			if (eq(mac, "de"))
314 				addcmd(line);
315 
316 			chkcmd(line, mac);
317 		}
318 
319 		/*
320 		 * At this point we process the line looking
321 		 * for \s and \f.
322 		 */
323 		for (i = 0; line[i]; i++)
324 			if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
325 				if (!sflag && line[++i] == 's') {
326 					pl = line[++i];
327 					if (isdigit(pl)) {
328 						n = pl - '0';
329 						pl = ' ';
330 					} else
331 						n = 0;
332 					while (isdigit(line[++i]))
333 						n = 10 * n + line[i] - '0';
334 					i--;
335 					if (n == 0) {
336 						if (stk[stktop].opno == SZ) {
337 							stktop--;
338 						} else {
339 							pe(lineno);
340 							(void) printf(
341 						gettext("unmatched \\s0\n"));
342 						}
343 					} else {
344 						growstk();
345 						stk[stktop].opno = SZ;
346 						stk[stktop].pl = pl;
347 						stk[stktop].parm = n;
348 						stk[stktop].lno = lineno;
349 					}
350 				} else if (!fflag && line[i] == 'f') {
351 					n = line[++i];
352 					if (n == 'P') {
353 						if (stk[stktop].opno == FT) {
354 							stktop--;
355 						} else {
356 							pe(lineno);
357 							(void) printf(
358 						gettext("unmatched \\fP\n"));
359 						}
360 					} else {
361 						growstk();
362 						stk[stktop].opno = FT;
363 						stk[stktop].pl = 1;
364 						stk[stktop].parm = n;
365 						stk[stktop].lno = lineno;
366 					}
367 				}
368 			}
369 	}
370 	/*
371 	 * We've hit the end and look at all this stuff that hasn't been
372 	 * matched yet!  Complain, complain.
373 	 */
374 	for (i = stktop; i >= 0; i--) {
375 		complain(i);
376 	}
377 }
378 
379 static void
380 complain(int i)
381 {
382 	pe(stk[i].lno);
383 	(void) printf(gettext("Unmatched "));
384 	prop(i);
385 	(void) printf("\n");
386 }
387 
388 static void
389 prop(int i)
390 {
391 	if (stk[i].pl == 0)
392 		(void) printf(".%s", br[stk[i].opno].opbr);
393 	else switch (stk[i].opno) {
394 	case SZ:
395 		(void) printf("\\s%c%d", stk[i].pl, stk[i].parm);
396 		break;
397 	case FT:
398 		(void) printf("\\f%c", stk[i].parm);
399 		break;
400 	default:
401 		(void) printf(gettext("Bug: stk[%d].opno = %d = .%s, .%s"),
402 			i, stk[i].opno, br[stk[i].opno].opbr,
403 			br[stk[i].opno].clbr);
404 	}
405 }
406 
407 /* ARGSUSED */
408 static void
409 chkcmd(char *line, char *mac)
410 {
411 	int i;
412 
413 	/*
414 	 * Check to see if it matches top of stack.
415 	 */
416 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
417 		stktop--;	/* OK. Pop & forget */
418 	else {
419 		/* No. Maybe it's an opener */
420 		for (i = 0; br[i].opbr; i++) {
421 			if (eq(mac, br[i].opbr)) {
422 				/* Found. Push it. */
423 				growstk();
424 				stk[stktop].opno = i;
425 				stk[stktop].pl = 0;
426 				stk[stktop].parm = 0;
427 				stk[stktop].lno = lineno;
428 				break;
429 			}
430 			/*
431 			 * Maybe it's an unmatched closer.
432 			 * NOTE: this depends on the fact
433 			 * that none of the closers can be
434 			 * openers too.
435 			 */
436 			if (eq(mac, br[i].clbr)) {
437 				nomatch(mac);
438 				break;
439 			}
440 		}
441 	}
442 }
443 
444 static void
445 nomatch(char *mac)
446 {
447 	int i, j;
448 
449 	/*
450 	 * Look for a match further down on stack
451 	 * If we find one, it suggests that the stuff in
452 	 * between is supposed to match itself.
453 	 */
454 	for (j = stktop; j >= 0; j--)
455 		if (eq(mac, br[stk[j].opno].clbr)) {
456 			/* Found.  Make a good diagnostic. */
457 			if (j == stktop-2) {
458 				/*
459 				 * Check for special case \fx..\fR and don't
460 				 * complain.
461 				 */
462 				if (stk[j+1].opno == FT &&
463 				    stk[j+1].parm != 'R' &&
464 				    stk[j+2].opno == FT &&
465 				    stk[j+2].parm == 'R') {
466 					stktop = j -1;
467 					return;
468 				}
469 				/*
470 				 * We have two unmatched frobs.  Chances are
471 				 * they were intended to match, so we mention
472 				 * them together.
473 				 */
474 				pe(stk[j+1].lno);
475 				prop(j+1);
476 				(void) printf(gettext(" does not match %d: "),
477 					stk[j+2].lno);
478 				prop(j+2);
479 				(void) printf("\n");
480 			} else for (i = j+1; i <= stktop; i++) {
481 				complain(i);
482 			}
483 			stktop = j-1;
484 			return;
485 		}
486 	/* Didn't find one.  Throw this away. */
487 	pe(lineno);
488 	(void) printf(gettext("Unmatched .%s\n"), mac);
489 }
490 
491 /* eq: are two strings equal? */
492 static int
493 eq(char *s1, char *s2)
494 {
495 	return (strcmp(s1, s2) == 0);
496 }
497 
498 /* print the first part of an error message, given the line number */
499 static void
500 pe(int lineno)
501 {
502 	if (nfiles > 1)
503 		(void) printf("%s: ", cfilename);
504 	(void) printf("%d: ", lineno);
505 }
506 
507 static void
508 checkknown(char *mac)
509 {
510 
511 	if (eq(mac, "."))
512 		return;
513 	if (binsrch(mac) >= 0)
514 		return;
515 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
516 		return;
517 
518 	pe(lineno);
519 	(void) printf(gettext("Unknown command: .%s\n"), mac);
520 }
521 
522 /*
523  * We have a .de xx line in "line".  Add xx to the list of known commands.
524  */
525 static void
526 addcmd(char *line)
527 {
528 	char *mac;
529 
530 	/* grab the macro being defined */
531 	mac = line+4;
532 	while (isspace(*mac))
533 		mac++;
534 	if (*mac == 0) {
535 		pe(lineno);
536 		(void) printf(gettext("illegal define: %s\n"), line);
537 		return;
538 	}
539 	mac[2] = 0;
540 	if (isspace(mac[1]) || mac[1] == '\\')
541 		mac[1] = 0;
542 	if (ncmds >= MAXCMDS) {
543 		(void) printf(gettext("Only %d known commands allowed\n"),
544 		    MAXCMDS);
545 		exit(1);
546 	}
547 	addmac(mac);
548 }
549 
550 /*
551  * Add mac to the list.  We should really have some kind of tree
552  * structure here but this is a quick-and-dirty job and I just don't
553  * have time to mess with it.  (I wonder if this will come back to haunt
554  * me someday?)  Anyway, I claim that .de is fairly rare in user
555  * nroff programs, and the loop below is pretty fast.
556  */
557 static void
558 addmac(char *mac)
559 {
560 	char **src, **dest, **loc;
561 
562 	if (binsrch(mac) >= 0) {	/* it's OK to redefine something */
563 #ifdef DEBUG
564 		(void) printf("binsrch(%s) -> already in table\n", mac);
565 #endif
566 		return;
567 	}
568 	/* binsrch sets slot as a side effect */
569 #ifdef DEBUG
570 printf("binsrch(%s) -> %d\n", mac, slot);
571 #endif
572 	loc = &knowncmds[slot];
573 	src = &knowncmds[ncmds-1];
574 	dest = src+1;
575 	while (dest > loc)
576 		*dest-- = *src--;
577 	*loc = malloc(3);
578 	(void) strcpy(*loc, mac);
579 	ncmds++;
580 #ifdef DEBUG
581 	(void) printf("after: %s %s %s %s %s, %d cmds\n",
582 	    knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
583 	    knowncmds[slot+1], knowncmds[slot+2], ncmds);
584 #endif
585 }
586 
587 /*
588  * Do a binary search in knowncmds for mac.
589  * If found, return the index.  If not, return -1.
590  */
591 static int
592 binsrch(char *mac)
593 {
594 	char *p;	/* pointer to current cmd in list */
595 	int d;		/* difference if any */
596 	int mid;	/* mid point in binary search */
597 	int top, bot;	/* boundaries of bin search, inclusive */
598 
599 	top = ncmds-1;
600 	bot = 0;
601 	while (top >= bot) {
602 		mid = (top+bot)/2;
603 		p = knowncmds[mid];
604 		d = p[0] - mac[0];
605 		if (d == 0)
606 			d = p[1] - mac[1];
607 		if (d == 0)
608 			return (mid);
609 		if (d < 0)
610 			bot = mid + 1;
611 		else
612 			top = mid - 1;
613 	}
614 	slot = bot;	/* place it would have gone */
615 	return (-1);
616 }
617