1 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 2 /* All Rights Reserved */ 3 4 5 /* 6 * Copyright (c) 1980 Regents of the University of California. 7 * All rights reserved. The Berkeley software License Agreement 8 * specifies the terms and conditions for redistribution. 9 */ 10 11 /* 12 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 13 * Use is subject to license terms. 14 */ 15 16 /* 17 * Copyright (c) 2018, Joyent, Inc. 18 */ 19 20 /* 21 * checknr: check an nroff/troff input file for matching macro calls. 22 * we also attempt to match size and font changes, but only the embedded 23 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 24 * later but for now think of these restrictions as contributions to 25 * structured typesetting. 26 */ 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <unistd.h> 30 #include <string.h> 31 #include <ctype.h> 32 #include <locale.h> 33 34 #define MAXSTK 100 /* Stack size */ 35 static int maxstk; 36 #define MAXBR 100 /* Max number of bracket pairs known */ 37 #define MAXCMDS 500 /* Max number of commands known */ 38 39 /* 40 * The stack on which we remember what we've seen so far. 41 */ 42 static struct stkstr { 43 int opno; /* number of opening bracket */ 44 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 45 int parm; /* parm to size, font, etc */ 46 int lno; /* line number the thing came in in */ 47 } *stk; 48 static int stktop; 49 50 /* 51 * The kinds of opening and closing brackets. 52 */ 53 static struct brstr { 54 char *opbr; 55 char *clbr; 56 } br[MAXBR] = { 57 /* A few bare bones troff commands */ 58 #define SZ 0 59 "sz", "sz", /* also \s */ 60 #define FT 1 61 "ft", "ft", /* also \f */ 62 /* the -mm package */ 63 "AL", "LE", 64 "AS", "AE", 65 "BL", "LE", 66 "BS", "BE", 67 "DF", "DE", 68 "DL", "LE", 69 "DS", "DE", 70 "FS", "FE", 71 "ML", "LE", 72 "NS", "NE", 73 "RL", "LE", 74 "VL", "LE", 75 /* the -ms package */ 76 "AB", "AE", 77 "BD", "DE", 78 "CD", "DE", 79 "DS", "DE", 80 "FS", "FE", 81 "ID", "DE", 82 "KF", "KE", 83 "KS", "KE", 84 "LD", "DE", 85 "LG", "NL", 86 "QS", "QE", 87 "RS", "RE", 88 "SM", "NL", 89 "XA", "XE", 90 "XS", "XE", 91 /* The -me package */ 92 "(b", ")b", 93 "(c", ")c", 94 "(d", ")d", 95 "(f", ")f", 96 "(l", ")l", 97 "(q", ")q", 98 "(x", ")x", 99 "(z", ")z", 100 /* Things needed by preprocessors */ 101 "EQ", "EN", 102 "TS", "TE", 103 /* Refer */ 104 "[", "]", 105 0, 0 106 }; 107 108 /* 109 * All commands known to nroff, plus macro packages. 110 * Used so we can complain about unrecognized commands. 111 */ 112 static char *knowncmds[MAXCMDS] = { 113 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 114 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 115 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 116 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 117 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 118 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 119 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 120 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 121 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 122 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 123 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 124 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 125 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 126 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 127 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 128 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 129 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 130 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 131 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 132 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 133 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 134 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 135 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 136 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 137 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 138 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 139 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 140 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 141 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 142 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 143 "yr", 0 144 }; 145 146 static int lineno; /* current line number in input file */ 147 static char line[256]; /* the current line */ 148 static char *cfilename; /* name of current file */ 149 static int nfiles; /* number of files to process */ 150 static int fflag; /* -f: ignore \f */ 151 static int sflag; /* -s: ignore \s */ 152 static int ncmds; /* size of knowncmds */ 153 static int slot; /* slot in knowncmds found by binsrch */ 154 155 static void growstk(); 156 static void usage(); 157 static void process(FILE *f); 158 static void complain(int i); 159 static void prop(int i); 160 static void chkcmd(char *line, char *mac); 161 static void nomatch(char *mac); 162 static int eq(char *s1, char *s2); 163 static void pe(int lineno); 164 static void checkknown(char *mac); 165 static void addcmd(char *line); 166 static void addmac(char *mac); 167 static int binsrch(char *mac); 168 169 static void 170 growstk() 171 { 172 stktop++; 173 if (stktop >= maxstk) { 174 maxstk *= 2; 175 stk = (struct stkstr *)realloc(stk, 176 sizeof (struct stkstr) * maxstk); 177 } 178 } 179 180 int 181 main(argc, argv) 182 int argc; 183 char **argv; 184 { 185 FILE *f; 186 int i; 187 char *cp; 188 char b1[4]; 189 190 (void) setlocale(LC_ALL, ""); 191 #if !defined(TEXT_DOMAIN) 192 #define TEXT_DOMAIN "SYS_TEST" 193 #endif 194 (void) textdomain(TEXT_DOMAIN); 195 stk = (struct stkstr *)calloc(100, sizeof (struct stkstr)); 196 maxstk = 100; 197 /* Figure out how many known commands there are */ 198 while (knowncmds[ncmds]) 199 ncmds++; 200 while (argc > 1 && argv[1][0] == '-') { 201 switch (argv[1][1]) { 202 203 /* -a: add pairs of macros */ 204 case 'a': 205 i = strlen(argv[1]) - 2; 206 if (i % 6 != 0) 207 usage(); 208 /* look for empty macro slots */ 209 for (i = 0; br[i].opbr; i++) 210 ; 211 for (cp = argv[1]+3; cp[-1]; cp += 6) { 212 br[i].opbr = malloc(3); 213 (void) strncpy(br[i].opbr, cp, 2); 214 br[i].clbr = malloc(3); 215 (void) strncpy(br[i].clbr, cp+3, 2); 216 /* knows pairs are also known cmds */ 217 addmac(br[i].opbr); 218 addmac(br[i].clbr); 219 i++; 220 } 221 break; 222 223 /* -c: add known commands */ 224 case 'c': 225 i = strlen(argv[1]) - 2; 226 if (i % 3 != 0) 227 usage(); 228 for (cp = argv[1]+3; cp[-1]; cp += 3) { 229 if (cp[2] && cp[2] != '.') 230 usage(); 231 (void) strncpy(b1, cp, 2); 232 addmac(b1); 233 } 234 break; 235 236 /* -f: ignore font changes */ 237 case 'f': 238 fflag = 1; 239 break; 240 241 /* -s: ignore size changes */ 242 case 's': 243 sflag = 1; 244 break; 245 default: 246 usage(); 247 } 248 argc--; argv++; 249 } 250 251 nfiles = argc - 1; 252 253 if (nfiles > 0) { 254 for (i = 1; i < argc; i++) { 255 cfilename = argv[i]; 256 f = fopen(cfilename, "r"); 257 if (f == NULL) { 258 perror(cfilename); 259 exit(1); 260 } 261 else 262 process(f); 263 } 264 } else { 265 cfilename = "stdin"; 266 process(stdin); 267 } 268 return (0); 269 } 270 271 static void 272 usage() 273 { 274 (void) printf(gettext("Usage: \ 275 checknr [ -fs ] [ -a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [ filename .. ]\n")); 276 exit(1); 277 } 278 279 static void 280 process(FILE *f) 281 { 282 int i, n; 283 char mac[5]; /* The current macro or nroff command */ 284 int pl; 285 286 stktop = -1; 287 for (lineno = 1; fgets(line, sizeof (line), f); lineno++) { 288 if (line[0] == '.') { 289 /* 290 * find and isolate the macro/command name. 291 */ 292 (void) strncpy(mac, line+1, 4); 293 if (isspace(mac[0])) { 294 pe(lineno); 295 (void) printf(gettext("Empty command\n")); 296 } else if (isspace(mac[1])) { 297 mac[1] = 0; 298 } else if (isspace(mac[2])) { 299 mac[2] = 0; 300 } else if (mac[0] != '\\' || mac[1] != '\"') { 301 pe(lineno); 302 (void) printf(gettext("Command too long\n")); 303 } 304 305 /* 306 * Is it a known command? 307 */ 308 checkknown(mac); 309 310 /* 311 * Should we add it? 312 */ 313 if (eq(mac, "de")) 314 addcmd(line); 315 316 chkcmd(line, mac); 317 } 318 319 /* 320 * At this point we process the line looking 321 * for \s and \f. 322 */ 323 for (i = 0; line[i]; i++) 324 if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) { 325 if (!sflag && line[++i] == 's') { 326 pl = line[++i]; 327 if (isdigit(pl)) { 328 n = pl - '0'; 329 pl = ' '; 330 } else 331 n = 0; 332 while (isdigit(line[++i])) 333 n = 10 * n + line[i] - '0'; 334 i--; 335 if (n == 0) { 336 if (stk[stktop].opno == SZ) { 337 stktop--; 338 } else { 339 pe(lineno); 340 (void) printf( 341 gettext("unmatched \\s0\n")); 342 } 343 } else { 344 growstk(); 345 stk[stktop].opno = SZ; 346 stk[stktop].pl = pl; 347 stk[stktop].parm = n; 348 stk[stktop].lno = lineno; 349 } 350 } else if (!fflag && line[i] == 'f') { 351 n = line[++i]; 352 if (n == 'P') { 353 if (stk[stktop].opno == FT) { 354 stktop--; 355 } else { 356 pe(lineno); 357 (void) printf( 358 gettext("unmatched \\fP\n")); 359 } 360 } else { 361 growstk(); 362 stk[stktop].opno = FT; 363 stk[stktop].pl = 1; 364 stk[stktop].parm = n; 365 stk[stktop].lno = lineno; 366 } 367 } 368 } 369 } 370 /* 371 * We've hit the end and look at all this stuff that hasn't been 372 * matched yet! Complain, complain. 373 */ 374 for (i = stktop; i >= 0; i--) { 375 complain(i); 376 } 377 } 378 379 static void 380 complain(int i) 381 { 382 pe(stk[i].lno); 383 (void) printf(gettext("Unmatched ")); 384 prop(i); 385 (void) printf("\n"); 386 } 387 388 static void 389 prop(int i) 390 { 391 if (stk[i].pl == 0) 392 (void) printf(".%s", br[stk[i].opno].opbr); 393 else switch (stk[i].opno) { 394 case SZ: 395 (void) printf("\\s%c%d", stk[i].pl, stk[i].parm); 396 break; 397 case FT: 398 (void) printf("\\f%c", stk[i].parm); 399 break; 400 default: 401 (void) printf(gettext("Bug: stk[%d].opno = %d = .%s, .%s"), 402 i, stk[i].opno, br[stk[i].opno].opbr, 403 br[stk[i].opno].clbr); 404 } 405 } 406 407 /* ARGSUSED */ 408 static void 409 chkcmd(char *line, char *mac) 410 { 411 int i; 412 413 /* 414 * Check to see if it matches top of stack. 415 */ 416 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 417 stktop--; /* OK. Pop & forget */ 418 else { 419 /* No. Maybe it's an opener */ 420 for (i = 0; br[i].opbr; i++) { 421 if (eq(mac, br[i].opbr)) { 422 /* Found. Push it. */ 423 growstk(); 424 stk[stktop].opno = i; 425 stk[stktop].pl = 0; 426 stk[stktop].parm = 0; 427 stk[stktop].lno = lineno; 428 break; 429 } 430 /* 431 * Maybe it's an unmatched closer. 432 * NOTE: this depends on the fact 433 * that none of the closers can be 434 * openers too. 435 */ 436 if (eq(mac, br[i].clbr)) { 437 nomatch(mac); 438 break; 439 } 440 } 441 } 442 } 443 444 static void 445 nomatch(char *mac) 446 { 447 int i, j; 448 449 /* 450 * Look for a match further down on stack 451 * If we find one, it suggests that the stuff in 452 * between is supposed to match itself. 453 */ 454 for (j = stktop; j >= 0; j--) 455 if (eq(mac, br[stk[j].opno].clbr)) { 456 /* Found. Make a good diagnostic. */ 457 if (j == stktop-2) { 458 /* 459 * Check for special case \fx..\fR and don't 460 * complain. 461 */ 462 if (stk[j+1].opno == FT && 463 stk[j+1].parm != 'R' && 464 stk[j+2].opno == FT && 465 stk[j+2].parm == 'R') { 466 stktop = j -1; 467 return; 468 } 469 /* 470 * We have two unmatched frobs. Chances are 471 * they were intended to match, so we mention 472 * them together. 473 */ 474 pe(stk[j+1].lno); 475 prop(j+1); 476 (void) printf(gettext(" does not match %d: "), 477 stk[j+2].lno); 478 prop(j+2); 479 (void) printf("\n"); 480 } else for (i = j+1; i <= stktop; i++) { 481 complain(i); 482 } 483 stktop = j-1; 484 return; 485 } 486 /* Didn't find one. Throw this away. */ 487 pe(lineno); 488 (void) printf(gettext("Unmatched .%s\n"), mac); 489 } 490 491 /* eq: are two strings equal? */ 492 static int 493 eq(char *s1, char *s2) 494 { 495 return (strcmp(s1, s2) == 0); 496 } 497 498 /* print the first part of an error message, given the line number */ 499 static void 500 pe(int lineno) 501 { 502 if (nfiles > 1) 503 (void) printf("%s: ", cfilename); 504 (void) printf("%d: ", lineno); 505 } 506 507 static void 508 checkknown(char *mac) 509 { 510 511 if (eq(mac, ".")) 512 return; 513 if (binsrch(mac) >= 0) 514 return; 515 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 516 return; 517 518 pe(lineno); 519 (void) printf(gettext("Unknown command: .%s\n"), mac); 520 } 521 522 /* 523 * We have a .de xx line in "line". Add xx to the list of known commands. 524 */ 525 static void 526 addcmd(char *line) 527 { 528 char *mac; 529 530 /* grab the macro being defined */ 531 mac = line+4; 532 while (isspace(*mac)) 533 mac++; 534 if (*mac == 0) { 535 pe(lineno); 536 (void) printf(gettext("illegal define: %s\n"), line); 537 return; 538 } 539 mac[2] = 0; 540 if (isspace(mac[1]) || mac[1] == '\\') 541 mac[1] = 0; 542 if (ncmds >= MAXCMDS) { 543 (void) printf(gettext("Only %d known commands allowed\n"), 544 MAXCMDS); 545 exit(1); 546 } 547 addmac(mac); 548 } 549 550 /* 551 * Add mac to the list. We should really have some kind of tree 552 * structure here but this is a quick-and-dirty job and I just don't 553 * have time to mess with it. (I wonder if this will come back to haunt 554 * me someday?) Anyway, I claim that .de is fairly rare in user 555 * nroff programs, and the loop below is pretty fast. 556 */ 557 static void 558 addmac(char *mac) 559 { 560 char **src, **dest, **loc; 561 562 if (binsrch(mac) >= 0) { /* it's OK to redefine something */ 563 #ifdef DEBUG 564 (void) printf("binsrch(%s) -> already in table\n", mac); 565 #endif 566 return; 567 } 568 /* binsrch sets slot as a side effect */ 569 #ifdef DEBUG 570 printf("binsrch(%s) -> %d\n", mac, slot); 571 #endif 572 loc = &knowncmds[slot]; 573 src = &knowncmds[ncmds-1]; 574 dest = src+1; 575 while (dest > loc) 576 *dest-- = *src--; 577 *loc = malloc(3); 578 (void) strcpy(*loc, mac); 579 ncmds++; 580 #ifdef DEBUG 581 (void) printf("after: %s %s %s %s %s, %d cmds\n", 582 knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], 583 knowncmds[slot+1], knowncmds[slot+2], ncmds); 584 #endif 585 } 586 587 /* 588 * Do a binary search in knowncmds for mac. 589 * If found, return the index. If not, return -1. 590 */ 591 static int 592 binsrch(char *mac) 593 { 594 char *p; /* pointer to current cmd in list */ 595 int d; /* difference if any */ 596 int mid; /* mid point in binary search */ 597 int top, bot; /* boundaries of bin search, inclusive */ 598 599 top = ncmds-1; 600 bot = 0; 601 while (top >= bot) { 602 mid = (top+bot)/2; 603 p = knowncmds[mid]; 604 d = p[0] - mac[0]; 605 if (d == 0) 606 d = p[1] - mac[1]; 607 if (d == 0) 608 return (mid); 609 if (d < 0) 610 bot = mid + 1; 611 else 612 top = mid - 1; 613 } 614 slot = bot; /* place it would have gone */ 615 return (-1); 616 } 617