1 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 2 /* All Rights Reserved */ 3 4 5 /* 6 * Copyright (c) 1980 Regents of the University of California. 7 * All rights reserved. The Berkeley software License Agreement 8 * specifies the terms and conditions for redistribution. 9 */ 10 11 /* 12 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 13 * Use is subject to license terms. 14 */ 15 16 #pragma ident "%Z%%M% %I% %E% SMI" 17 18 /* 19 * checknr: check an nroff/troff input file for matching macro calls. 20 * we also attempt to match size and font changes, but only the embedded 21 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 22 * later but for now think of these restrictions as contributions to 23 * structured typesetting. 24 */ 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <unistd.h> 28 #include <string.h> 29 #include <ctype.h> 30 #include <locale.h> 31 32 #define MAXSTK 100 /* Stack size */ 33 static int maxstk; 34 #define MAXBR 100 /* Max number of bracket pairs known */ 35 #define MAXCMDS 500 /* Max number of commands known */ 36 37 /* 38 * The stack on which we remember what we've seen so far. 39 */ 40 static struct stkstr { 41 int opno; /* number of opening bracket */ 42 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 43 int parm; /* parm to size, font, etc */ 44 int lno; /* line number the thing came in in */ 45 } *stk; 46 static int stktop; 47 48 /* 49 * The kinds of opening and closing brackets. 50 */ 51 static struct brstr { 52 char *opbr; 53 char *clbr; 54 } br[MAXBR] = { 55 /* A few bare bones troff commands */ 56 #define SZ 0 57 "sz", "sz", /* also \s */ 58 #define FT 1 59 "ft", "ft", /* also \f */ 60 /* the -mm package */ 61 "AL", "LE", 62 "AS", "AE", 63 "BL", "LE", 64 "BS", "BE", 65 "DF", "DE", 66 "DL", "LE", 67 "DS", "DE", 68 "FS", "FE", 69 "ML", "LE", 70 "NS", "NE", 71 "RL", "LE", 72 "VL", "LE", 73 /* the -ms package */ 74 "AB", "AE", 75 "BD", "DE", 76 "CD", "DE", 77 "DS", "DE", 78 "FS", "FE", 79 "ID", "DE", 80 "KF", "KE", 81 "KS", "KE", 82 "LD", "DE", 83 "LG", "NL", 84 "QS", "QE", 85 "RS", "RE", 86 "SM", "NL", 87 "XA", "XE", 88 "XS", "XE", 89 /* The -me package */ 90 "(b", ")b", 91 "(c", ")c", 92 "(d", ")d", 93 "(f", ")f", 94 "(l", ")l", 95 "(q", ")q", 96 "(x", ")x", 97 "(z", ")z", 98 /* Things needed by preprocessors */ 99 "EQ", "EN", 100 "TS", "TE", 101 /* Refer */ 102 "[", "]", 103 0, 0 104 }; 105 106 /* 107 * All commands known to nroff, plus macro packages. 108 * Used so we can complain about unrecognized commands. 109 */ 110 static char *knowncmds[MAXCMDS] = { 111 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 112 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 113 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 114 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 115 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 116 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 117 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 118 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 119 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 120 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 121 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 122 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 123 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 124 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 125 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 126 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 127 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 128 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 129 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 130 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 131 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 132 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 133 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 134 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 135 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 136 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 137 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 138 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 139 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 140 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 141 "yr", 0 142 }; 143 144 static int lineno; /* current line number in input file */ 145 static char line[256]; /* the current line */ 146 static char *cfilename; /* name of current file */ 147 static int nfiles; /* number of files to process */ 148 static int fflag; /* -f: ignore \f */ 149 static int sflag; /* -s: ignore \s */ 150 static int ncmds; /* size of knowncmds */ 151 static int slot; /* slot in knowncmds found by binsrch */ 152 153 static void growstk(); 154 static void usage(); 155 static void process(FILE *f); 156 static void complain(int i); 157 static void prop(int i); 158 static void chkcmd(char *line, char *mac); 159 static void nomatch(char *mac); 160 static int eq(char *s1, char *s2); 161 static void pe(int lineno); 162 static void checkknown(char *mac); 163 static void addcmd(char *line); 164 static void addmac(char *mac); 165 static int binsrch(char *mac); 166 167 static void 168 growstk() 169 { 170 stktop++; 171 if (stktop >= maxstk) { 172 maxstk *= 2; 173 stk = (struct stkstr *)realloc(stk, 174 sizeof (struct stkstr) * maxstk); 175 } 176 } 177 178 int 179 main(argc, argv) 180 int argc; 181 char **argv; 182 { 183 FILE *f; 184 int i; 185 char *cp; 186 char b1[4]; 187 188 (void) setlocale(LC_ALL, ""); 189 #if !defined(TEXT_DOMAIN) 190 #define TEXT_DOMAIN "SYS_TEST" 191 #endif 192 (void) textdomain(TEXT_DOMAIN); 193 stk = (struct stkstr *)calloc(sizeof (struct stkstr), 100); 194 maxstk = 100; 195 /* Figure out how many known commands there are */ 196 while (knowncmds[ncmds]) 197 ncmds++; 198 while (argc > 1 && argv[1][0] == '-') { 199 switch (argv[1][1]) { 200 201 /* -a: add pairs of macros */ 202 case 'a': 203 i = strlen(argv[1]) - 2; 204 if (i % 6 != 0) 205 usage(); 206 /* look for empty macro slots */ 207 for (i = 0; br[i].opbr; i++) 208 ; 209 for (cp = argv[1]+3; cp[-1]; cp += 6) { 210 br[i].opbr = malloc(3); 211 (void) strncpy(br[i].opbr, cp, 2); 212 br[i].clbr = malloc(3); 213 (void) strncpy(br[i].clbr, cp+3, 2); 214 /* knows pairs are also known cmds */ 215 addmac(br[i].opbr); 216 addmac(br[i].clbr); 217 i++; 218 } 219 break; 220 221 /* -c: add known commands */ 222 case 'c': 223 i = strlen(argv[1]) - 2; 224 if (i % 3 != 0) 225 usage(); 226 for (cp = argv[1]+3; cp[-1]; cp += 3) { 227 if (cp[2] && cp[2] != '.') 228 usage(); 229 (void) strncpy(b1, cp, 2); 230 addmac(b1); 231 } 232 break; 233 234 /* -f: ignore font changes */ 235 case 'f': 236 fflag = 1; 237 break; 238 239 /* -s: ignore size changes */ 240 case 's': 241 sflag = 1; 242 break; 243 default: 244 usage(); 245 } 246 argc--; argv++; 247 } 248 249 nfiles = argc - 1; 250 251 if (nfiles > 0) { 252 for (i = 1; i < argc; i++) { 253 cfilename = argv[i]; 254 f = fopen(cfilename, "r"); 255 if (f == NULL) { 256 perror(cfilename); 257 exit(1); 258 } 259 else 260 process(f); 261 } 262 } else { 263 cfilename = "stdin"; 264 process(stdin); 265 } 266 return (0); 267 } 268 269 static void 270 usage() 271 { 272 (void) printf(gettext("Usage: \ 273 checknr [ -fs ] [ -a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [ filename .. ]\n")); 274 exit(1); 275 } 276 277 static void 278 process(FILE *f) 279 { 280 int i, n; 281 char mac[5]; /* The current macro or nroff command */ 282 int pl; 283 284 stktop = -1; 285 for (lineno = 1; fgets(line, sizeof (line), f); lineno++) { 286 if (line[0] == '.') { 287 /* 288 * find and isolate the macro/command name. 289 */ 290 (void) strncpy(mac, line+1, 4); 291 if (isspace(mac[0])) { 292 pe(lineno); 293 (void) printf(gettext("Empty command\n")); 294 } else if (isspace(mac[1])) { 295 mac[1] = 0; 296 } else if (isspace(mac[2])) { 297 mac[2] = 0; 298 } else if (mac[0] != '\\' || mac[1] != '\"') { 299 pe(lineno); 300 (void) printf(gettext("Command too long\n")); 301 } 302 303 /* 304 * Is it a known command? 305 */ 306 checkknown(mac); 307 308 /* 309 * Should we add it? 310 */ 311 if (eq(mac, "de")) 312 addcmd(line); 313 314 chkcmd(line, mac); 315 } 316 317 /* 318 * At this point we process the line looking 319 * for \s and \f. 320 */ 321 for (i = 0; line[i]; i++) 322 if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) { 323 if (!sflag && line[++i] == 's') { 324 pl = line[++i]; 325 if (isdigit(pl)) { 326 n = pl - '0'; 327 pl = ' '; 328 } else 329 n = 0; 330 while (isdigit(line[++i])) 331 n = 10 * n + line[i] - '0'; 332 i--; 333 if (n == 0) { 334 if (stk[stktop].opno == SZ) { 335 stktop--; 336 } else { 337 pe(lineno); 338 (void) printf( 339 gettext("unmatched \\s0\n")); 340 } 341 } else { 342 growstk(); 343 stk[stktop].opno = SZ; 344 stk[stktop].pl = pl; 345 stk[stktop].parm = n; 346 stk[stktop].lno = lineno; 347 } 348 } else if (!fflag && line[i] == 'f') { 349 n = line[++i]; 350 if (n == 'P') { 351 if (stk[stktop].opno == FT) { 352 stktop--; 353 } else { 354 pe(lineno); 355 (void) printf( 356 gettext("unmatched \\fP\n")); 357 } 358 } else { 359 growstk(); 360 stk[stktop].opno = FT; 361 stk[stktop].pl = 1; 362 stk[stktop].parm = n; 363 stk[stktop].lno = lineno; 364 } 365 } 366 } 367 } 368 /* 369 * We've hit the end and look at all this stuff that hasn't been 370 * matched yet! Complain, complain. 371 */ 372 for (i = stktop; i >= 0; i--) { 373 complain(i); 374 } 375 } 376 377 static void 378 complain(int i) 379 { 380 pe(stk[i].lno); 381 (void) printf(gettext("Unmatched ")); 382 prop(i); 383 (void) printf("\n"); 384 } 385 386 static void 387 prop(int i) 388 { 389 if (stk[i].pl == 0) 390 (void) printf(".%s", br[stk[i].opno].opbr); 391 else switch (stk[i].opno) { 392 case SZ: 393 (void) printf("\\s%c%d", stk[i].pl, stk[i].parm); 394 break; 395 case FT: 396 (void) printf("\\f%c", stk[i].parm); 397 break; 398 default: 399 (void) printf(gettext("Bug: stk[%d].opno = %d = .%s, .%s"), 400 i, stk[i].opno, br[stk[i].opno].opbr, 401 br[stk[i].opno].clbr); 402 } 403 } 404 405 /* ARGSUSED */ 406 static void 407 chkcmd(char *line, char *mac) 408 { 409 int i; 410 411 /* 412 * Check to see if it matches top of stack. 413 */ 414 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 415 stktop--; /* OK. Pop & forget */ 416 else { 417 /* No. Maybe it's an opener */ 418 for (i = 0; br[i].opbr; i++) { 419 if (eq(mac, br[i].opbr)) { 420 /* Found. Push it. */ 421 growstk(); 422 stk[stktop].opno = i; 423 stk[stktop].pl = 0; 424 stk[stktop].parm = 0; 425 stk[stktop].lno = lineno; 426 break; 427 } 428 /* 429 * Maybe it's an unmatched closer. 430 * NOTE: this depends on the fact 431 * that none of the closers can be 432 * openers too. 433 */ 434 if (eq(mac, br[i].clbr)) { 435 nomatch(mac); 436 break; 437 } 438 } 439 } 440 } 441 442 static void 443 nomatch(char *mac) 444 { 445 int i, j; 446 447 /* 448 * Look for a match further down on stack 449 * If we find one, it suggests that the stuff in 450 * between is supposed to match itself. 451 */ 452 for (j = stktop; j >= 0; j--) 453 if (eq(mac, br[stk[j].opno].clbr)) { 454 /* Found. Make a good diagnostic. */ 455 if (j == stktop-2) { 456 /* 457 * Check for special case \fx..\fR and don't 458 * complain. 459 */ 460 if (stk[j+1].opno == FT && 461 stk[j+1].parm != 'R' && 462 stk[j+2].opno == FT && 463 stk[j+2].parm == 'R') { 464 stktop = j -1; 465 return; 466 } 467 /* 468 * We have two unmatched frobs. Chances are 469 * they were intended to match, so we mention 470 * them together. 471 */ 472 pe(stk[j+1].lno); 473 prop(j+1); 474 (void) printf(gettext(" does not match %d: "), 475 stk[j+2].lno); 476 prop(j+2); 477 (void) printf("\n"); 478 } else for (i = j+1; i <= stktop; i++) { 479 complain(i); 480 } 481 stktop = j-1; 482 return; 483 } 484 /* Didn't find one. Throw this away. */ 485 pe(lineno); 486 (void) printf(gettext("Unmatched .%s\n"), mac); 487 } 488 489 /* eq: are two strings equal? */ 490 static int 491 eq(char *s1, char *s2) 492 { 493 return (strcmp(s1, s2) == 0); 494 } 495 496 /* print the first part of an error message, given the line number */ 497 static void 498 pe(int lineno) 499 { 500 if (nfiles > 1) 501 (void) printf("%s: ", cfilename); 502 (void) printf("%d: ", lineno); 503 } 504 505 static void 506 checkknown(char *mac) 507 { 508 509 if (eq(mac, ".")) 510 return; 511 if (binsrch(mac) >= 0) 512 return; 513 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 514 return; 515 516 pe(lineno); 517 (void) printf(gettext("Unknown command: .%s\n"), mac); 518 } 519 520 /* 521 * We have a .de xx line in "line". Add xx to the list of known commands. 522 */ 523 static void 524 addcmd(char *line) 525 { 526 char *mac; 527 528 /* grab the macro being defined */ 529 mac = line+4; 530 while (isspace(*mac)) 531 mac++; 532 if (*mac == 0) { 533 pe(lineno); 534 (void) printf(gettext("illegal define: %s\n"), line); 535 return; 536 } 537 mac[2] = 0; 538 if (isspace(mac[1]) || mac[1] == '\\') 539 mac[1] = 0; 540 if (ncmds >= MAXCMDS) { 541 (void) printf(gettext("Only %d known commands allowed\n"), 542 MAXCMDS); 543 exit(1); 544 } 545 addmac(mac); 546 } 547 548 /* 549 * Add mac to the list. We should really have some kind of tree 550 * structure here but this is a quick-and-dirty job and I just don't 551 * have time to mess with it. (I wonder if this will come back to haunt 552 * me someday?) Anyway, I claim that .de is fairly rare in user 553 * nroff programs, and the loop below is pretty fast. 554 */ 555 static void 556 addmac(char *mac) 557 { 558 char **src, **dest, **loc; 559 560 if (binsrch(mac) >= 0) { /* it's OK to redefine something */ 561 #ifdef DEBUG 562 (void) printf("binsrch(%s) -> already in table\n", mac); 563 #endif 564 return; 565 } 566 /* binsrch sets slot as a side effect */ 567 #ifdef DEBUG 568 printf("binsrch(%s) -> %d\n", mac, slot); 569 #endif 570 loc = &knowncmds[slot]; 571 src = &knowncmds[ncmds-1]; 572 dest = src+1; 573 while (dest > loc) 574 *dest-- = *src--; 575 *loc = malloc(3); 576 (void) strcpy(*loc, mac); 577 ncmds++; 578 #ifdef DEBUG 579 (void) printf("after: %s %s %s %s %s, %d cmds\n", 580 knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], 581 knowncmds[slot+1], knowncmds[slot+2], ncmds); 582 #endif 583 } 584 585 /* 586 * Do a binary search in knowncmds for mac. 587 * If found, return the index. If not, return -1. 588 */ 589 static int 590 binsrch(char *mac) 591 { 592 char *p; /* pointer to current cmd in list */ 593 int d; /* difference if any */ 594 int mid; /* mid point in binary search */ 595 int top, bot; /* boundaries of bin search, inclusive */ 596 597 top = ncmds-1; 598 bot = 0; 599 while (top >= bot) { 600 mid = (top+bot)/2; 601 p = knowncmds[mid]; 602 d = p[0] - mac[0]; 603 if (d == 0) 604 d = p[1] - mac[1]; 605 if (d == 0) 606 return (mid); 607 if (d < 0) 608 bot = mid + 1; 609 else 610 top = mid - 1; 611 } 612 slot = bot; /* place it would have gone */ 613 return (-1); 614 } 615