1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <assert.h> 33 #include <errno.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <locale.h> 38 #include <sys/varargs.h> 39 40 /* 41 * Deroff command -- strip troff, eqn, and Tbl sequences from a file. 42 * Has three flags argument, -w, to cause output one word per line 43 * rather than in the original format. 44 * -mm (or -ms) causes the corresponding macro's to be interpreted 45 * so that just sentences are output 46 * -ml also gets rid of lists. 47 * -i causes deroff to ignore .so and .nx commands. 48 * Deroff follows .so and .nx commands, removes contents of macro 49 * definitions, equations (both .EQ ... .EN and $...$), 50 * Tbl command sequences, and Troff backslash constructions. 51 * 52 * All input is through the C macro; the most recently read character 53 * is in c. 54 */ 55 56 #define C ((c = getc(infile)) == EOF ? eof() : \ 57 ((c == ldelim) && (filesp == files) ? skeqn() : c)) 58 #define C1 ((c = getc(infile)) == EOF ? eof() : c) 59 #define SKIP while (C != '\n') 60 #define SKIP_TO_COM SKIP; SKIP; pc = c; \ 61 while ((C != '.') || (pc != '\n') || \ 62 (C > 'Z')) { \ 63 pc = c; \ 64 } 65 66 #define YES 1 67 #define NO 0 68 #define MS 0 69 #define MM 1 70 #define ONE 1 71 #define TWO 2 72 73 #define NOCHAR -2 74 #define SPECIAL 0 75 #define APOS 1 76 #define DIGIT 2 77 #define LETTER 3 78 79 #define MAXLINESZ 512 80 81 static int wordflag = NO; 82 static int msflag = NO; 83 static int iflag = NO; 84 static int mac = MM; 85 static int disp = 0; 86 static int inmacro = NO; 87 static int intable = NO; 88 static int lindx; 89 static size_t linesize = MAXLINESZ; 90 91 static char chars[128]; /* SPECIAL, APOS, DIGIT, or LETTER */ 92 93 static char *line = NULL; 94 95 static char c; 96 static int pc; 97 static int ldelim = NOCHAR; 98 static int rdelim = NOCHAR; 99 100 static int argc; 101 static char **argv; 102 103 extern int optind; 104 extern char *optarg; 105 static char fname[50]; 106 static FILE *files[15]; 107 static FILE **filesp; 108 static FILE *infile; 109 110 static void backsl(void); 111 static void comline(void); 112 static char *copys(char *); 113 static int eof(void); 114 static void eqn(void); 115 static void fatal(const char *, ...); 116 static void fatal_msg(char *); 117 static void getfname(void); 118 static void macro(void); 119 static FILE *opn(char *); 120 static void putmac(char *, int); 121 static void putwords(int); 122 static void regline(int, int); 123 static void sce(void); 124 static int skeqn(void); 125 static void sdis(char, char); 126 static void stbl(void); 127 static void tbl(void); 128 static void usage(void); 129 static void work(void) __NORETURN; 130 131 int 132 main(int ac, char **av) 133 { 134 int i; 135 int errflg = 0; 136 int optchar; 137 138 (void) setlocale(LC_ALL, ""); 139 #if !defined(TEXT_DOMAIN) 140 #define TEXT_DOMAIN "SYS_TEST" 141 #endif 142 (void) textdomain(TEXT_DOMAIN); 143 argc = ac; 144 argv = av; 145 while ((optchar = getopt(argc, argv, "wim:")) != EOF) { 146 switch (optchar) { 147 case 'w': 148 wordflag = YES; 149 break; 150 case 'm': 151 msflag = YES; 152 if (*optarg == 'm') 153 mac = MM; 154 else if (*optarg == 's') 155 mac = MS; 156 else if (*optarg == 'l') 157 disp = 1; 158 else 159 errflg++; 160 break; 161 case 'i': 162 iflag = YES; 163 break; 164 case '?': 165 errflg++; 166 } 167 } 168 if (errflg) { 169 usage(); 170 return (1); 171 } 172 if (optind == argc) 173 infile = stdin; 174 else 175 infile = opn(argv[optind++]); 176 files[0] = infile; 177 filesp = &files[0]; 178 179 for (i = 'a'; i <= 'z'; ++i) 180 chars[i] = LETTER; 181 for (i = 'A'; i <= 'Z'; ++i) 182 chars[i] = LETTER; 183 for (i = '0'; i <= '9'; ++i) 184 chars[i] = DIGIT; 185 chars['\''] = APOS; 186 chars['&'] = APOS; 187 work(); 188 /* NOTREACHED */ 189 } 190 191 192 static int 193 skeqn(void) 194 { 195 while ((c = getc(infile)) != rdelim) { 196 if (c == EOF) { 197 c = eof(); 198 } else if (c == '"') { 199 while ((c = getc(infile)) != '"') { 200 if (c == EOF) { 201 c = eof(); 202 } else if (c == '\\') { 203 if ((c = getc(infile)) == EOF) { 204 c = eof(); 205 } 206 } 207 } 208 } 209 } 210 if (msflag) { 211 return (c = 'x'); 212 } 213 return (c = ' '); 214 } 215 216 217 /* Functions calling opn() should ensure 'p' is non-null */ 218 static FILE * 219 opn(char *p) 220 { 221 FILE *fd; 222 223 assert(p != NULL); 224 if ((fd = fopen(p, "r")) == NULL) 225 fatal(gettext("Cannot open file %s: %s\n"), p, strerror(errno)); 226 227 return (fd); 228 } 229 230 231 232 static int 233 eof(void) 234 { 235 if (infile != stdin) 236 (void) fclose(infile); 237 if (filesp > files) { 238 infile = *--filesp; 239 } else if (optind < argc) { 240 infile = opn(argv[optind++]); 241 } else { 242 exit(0); 243 } 244 245 return (C); 246 } 247 248 249 250 static void 251 getfname(void) 252 { 253 char *p; 254 struct chain { 255 struct chain *nextp; 256 char *datap; 257 }; 258 struct chain *q; 259 static struct chain *namechain = NULL; 260 261 while (C == ' ') 262 ; 263 264 for (p = fname; ((*p = c) != '\n') && (c != ' ') && (c != '\t') && 265 (c != '\\'); ++p) { 266 (void) C; 267 } 268 *p = '\0'; 269 while (c != '\n') { 270 (void) C; 271 } 272 273 /* see if this name has already been used */ 274 for (q = namechain; q; q = q->nextp) 275 if (strcmp(fname, q->datap) != 0) { 276 fname[0] = '\0'; 277 return; 278 } 279 280 q = (struct chain *)calloc(1, sizeof (*namechain)); 281 q->nextp = namechain; 282 q->datap = copys(fname); 283 namechain = q; 284 } 285 286 287 /* 288 * Functions calling fatal() should ensure 'format' and 289 * arguments are non-null. 290 */ 291 static void 292 fatal(const char *format, ...) 293 { 294 va_list alist; 295 296 assert(format != NULL); 297 (void) fputs(gettext("deroff: "), stderr); 298 va_start(alist, format); 299 (void) vfprintf(stderr, format, alist); 300 exit(1); 301 } 302 303 /* Functions calling fatal_msg() should ensure 's' is non-null */ 304 static void 305 fatal_msg(char *s) 306 { 307 assert(s != NULL); 308 (void) fprintf(stderr, gettext("deroff: %s\n"), s); 309 exit(1); 310 } 311 312 static void 313 usage(void) 314 { 315 (void) fputs(gettext( 316 "usage: deroff [ -w ] [ -m (m s l) ] [ -i ] " 317 "[ file ] ... \n"), stderr); 318 } 319 320 static void 321 work(void) 322 { 323 324 for (;;) { 325 if ((C == '.') || (c == '\'')) 326 comline(); 327 else 328 regline(NO, TWO); 329 } 330 } 331 332 333 static void 334 regline(int macline, int cnst) 335 { 336 337 if (line == NULL) { 338 if ((line = (char *)malloc(linesize * sizeof (char))) == NULL) { 339 fatal_msg(gettext("Cannot allocate memory")); 340 } 341 } 342 343 lindx = 0; 344 line[lindx] = c; 345 for (;;) { 346 if (c == '\\') { 347 line[lindx] = ' '; 348 backsl(); 349 if (c == '%') { /* no blank for hyphenation char */ 350 lindx--; 351 } 352 } 353 if (c == '\n') { 354 break; 355 } 356 /* 357 * We're just about to add another character to the line 358 * buffer so ensure we don't overrun it. 359 */ 360 if (++lindx >= linesize - 1) { 361 linesize = linesize * 2; 362 if ((line = (char *)realloc(line, 363 linesize * sizeof (char))) == NULL) { 364 fatal_msg(gettext("Cannot allocate memory")); 365 } 366 } 367 if (intable && (c == 'T')) { 368 line[lindx] = C; 369 if ((c == '{') || (c == '}')) { 370 line[lindx - 1] = ' '; 371 line[lindx] = C; 372 } 373 } else { 374 line[lindx] = C; 375 } 376 } 377 378 line[lindx] = '\0'; 379 380 if (line[0] != '\0') { 381 if (wordflag) { 382 putwords(macline); 383 } else if (macline) { 384 putmac(line, cnst); 385 } else { 386 (void) puts(line); 387 } 388 } 389 } 390 391 392 393 394 static void 395 putmac(char *s, int cnst) 396 { 397 char *t; 398 399 while (*s) { 400 while ((*s == ' ') || (*s == '\t')) { 401 (void) putchar(*s++); 402 } 403 for (t = s; (*t != ' ') && (*t != '\t') && (*t != '\0'); ++t) 404 ; 405 if (*s == '\"') 406 s++; 407 if ((t > s + cnst) && (chars[s[0]] == LETTER) && 408 (chars[s[1]] == LETTER)) { 409 while (s < t) { 410 if (*s == '\"') 411 s++; 412 else 413 (void) putchar(*s++); 414 } 415 } else { 416 s = t; 417 } 418 } 419 (void) putchar('\n'); 420 } 421 422 423 424 static void 425 putwords(int macline) /* break into words for -w option */ 426 { 427 char *p, *p1; 428 int i, nlet; 429 430 for (p1 = line; ; ) { 431 /* skip initial specials ampersands and apostrophes */ 432 while (chars[*p1] < DIGIT) { 433 if (*p1++ == '\0') 434 return; 435 } 436 nlet = 0; 437 for (p = p1; (i = chars[*p]) != SPECIAL; ++p) { 438 if (i == LETTER) 439 ++nlet; 440 } 441 442 if ((!macline && (nlet > 1)) /* MDM definition of word */ || 443 (macline && (nlet > 2) && (chars[p1[0]] == LETTER) && 444 (chars[p1[1]] == LETTER))) { 445 /* delete trailing ampersands and apostrophes */ 446 while ((p[-1] == '\'') || (p[-1] == '&')) { 447 --p; 448 } 449 while (p1 < p) { 450 (void) putchar(*p1++); 451 } 452 (void) putchar('\n'); 453 } else { 454 p1 = p; 455 } 456 } 457 } 458 459 460 461 static void 462 comline(void) 463 { 464 int c1, c2; 465 466 com: 467 while ((C == ' ') || (c == '\t')) 468 ; 469 comx: 470 if ((c1 = c) == '\n') 471 return; 472 c2 = C; 473 if ((c1 == '.') && (c2 != '.')) 474 inmacro = NO; 475 if (c2 == '\n') 476 return; 477 478 if ((c1 == 'E') && (c2 == 'Q') && (filesp == files)) { 479 eqn(); 480 } else if ((c1 == 'T') && ((c2 == 'S') || (c2 == 'C') || 481 (c2 == '&')) && (filesp == files)) { 482 if (msflag) { 483 stbl(); 484 } else { 485 tbl(); 486 } 487 } else if ((c1 == 'T') && (c2 == 'E')) { 488 intable = NO; 489 } else if (!inmacro && (c1 == 'd') && (c2 == 'e')) { 490 macro(); 491 } else if (!inmacro && (c1 == 'i') && (c2 == 'g')) { 492 macro(); 493 } else if (!inmacro && (c1 == 'a') && (c2 == 'm')) { 494 macro(); 495 } else if ((c1 == 's') && (c2 == 'o')) { 496 if (iflag) { 497 SKIP; 498 } else { 499 getfname(); 500 if (fname[0]) { 501 infile = *++filesp = opn(fname); 502 } 503 } 504 } else if ((c1 == 'n') && (c2 == 'x')) { 505 if (iflag) { 506 SKIP; 507 } else { 508 getfname(); 509 if (fname[0] == '\0') { 510 exit(0); 511 } 512 if (infile != stdin) { 513 (void) fclose(infile); 514 } 515 infile = *filesp = opn(fname); 516 } 517 } else if ((c1 == 'h') && (c2 == 'w')) { 518 SKIP; 519 } else if (msflag && (c1 == 'T') && (c2 == 'L')) { 520 SKIP_TO_COM; 521 goto comx; 522 } else if (msflag && (c1 == 'N') && (c2 == 'R')) { 523 SKIP; 524 } else if (msflag && (c1 == 'A') && ((c2 == 'U') || (c2 == 'I'))) { 525 if (mac == MM) { 526 SKIP; 527 } else { 528 SKIP_TO_COM; 529 goto comx; 530 } 531 } else if (msflag && (c1 == 'F') && (c2 == 'S')) { 532 SKIP_TO_COM; 533 goto comx; 534 } else if (msflag && (c1 == 'S') && (c2 == 'H')) { 535 SKIP_TO_COM; 536 goto comx; 537 } else if (msflag && (c1 == 'N') && (c2 == 'H')) { 538 SKIP_TO_COM; 539 goto comx; 540 } else if (msflag && (c1 == 'O') && (c2 == 'K')) { 541 SKIP_TO_COM; 542 goto comx; 543 } else if (msflag && (c1 == 'N') && (c2 == 'D')) { 544 SKIP; 545 } else if (msflag && (mac == MM) && (c1 == 'H') && 546 ((c2 == ' ') || (c2 == 'U'))) { 547 SKIP; 548 } else if (msflag && (mac == MM) && (c2 == 'L')) { 549 if (disp || (c1 == 'R')) { 550 sdis('L', 'E'); 551 } else { 552 SKIP; 553 (void) putchar('.'); 554 } 555 } else if (msflag && ((c1 == 'D') || (c1 == 'N') || 556 (c1 == 'K') || (c1 == 'P')) && (c2 == 'S')) { 557 sdis(c1, 'E'); /* removed RS-RE */ 558 } else if (msflag && (c1 == 'K' && c2 == 'F')) { 559 sdis(c1, 'E'); 560 } else if (msflag && (c1 == 'n') && (c2 == 'f')) { 561 sdis('f', 'i'); 562 } else if (msflag && (c1 == 'c') && (c2 == 'e')) { 563 sce(); 564 } else { 565 if ((c1 == '.') && (c2 == '.')) { 566 while (C == '.') 567 ; 568 } 569 ++inmacro; 570 if ((c1 <= 'Z') && msflag) { 571 regline(YES, ONE); 572 } else { 573 regline(YES, TWO); 574 } 575 --inmacro; 576 } 577 } 578 579 580 581 static void 582 macro(void) 583 { 584 if (msflag) { 585 /* look for .. */ 586 do { 587 SKIP; 588 } while ((C != '.') || (C != '.') || (C == '.')); 589 if (c != '\n') { 590 SKIP; 591 } 592 return; 593 } 594 SKIP; 595 inmacro = YES; 596 } 597 598 599 600 601 static void 602 sdis(char a1, char a2) 603 { 604 int c1, c2; 605 int eqnf; 606 int notdone = 1; 607 eqnf = 1; 608 SKIP; 609 while (notdone) { 610 while (C != '.') 611 SKIP; 612 if ((c1 = C) == '\n') 613 continue; 614 if ((c2 = C) == '\n') 615 continue; 616 if ((c1 == a1) && (c2 == a2)) { 617 SKIP; 618 if (eqnf) 619 (void) putchar('.'); 620 (void) putchar('\n'); 621 return; 622 } else if ((a1 == 'D') && (c1 == 'E') && (c2 == 'Q')) { 623 eqn(); 624 eqnf = 0; 625 } else { 626 SKIP; 627 } 628 } 629 } 630 631 static void 632 tbl(void) 633 { 634 while (C != '.') 635 ; 636 SKIP; 637 intable = YES; 638 } 639 640 static void 641 stbl(void) 642 { 643 while (C != '.') 644 ; 645 SKIP_TO_COM; 646 if ((c != 'T') || (C != 'E')) { 647 SKIP; 648 pc = c; 649 while ((C != '.') || (pc != '\n') || 650 (C != 'T') || (C != 'E')) { 651 pc = c; 652 } 653 } 654 } 655 656 static void 657 eqn(void) 658 { 659 int c1, c2; 660 int dflg; 661 int last; 662 663 last = 0; 664 dflg = 1; 665 SKIP; 666 667 for (;;) { 668 if ((C1 == '.') || (c == '\'')) { 669 while ((C1 == ' ') || (c == '\t')) 670 ; 671 if ((c == 'E') && (C1 == 'N')) { 672 SKIP; 673 if (msflag && dflg) { 674 (void) putchar('x'); 675 (void) putchar(' '); 676 if (last) { 677 (void) putchar('.'); 678 (void) putchar(' '); 679 } 680 } 681 return; 682 } 683 } else if (c == 'd') { /* look for delim */ 684 if ((C1 == 'e') && (C1 == 'l')) { 685 if ((C1 == 'i') && (C1 == 'm')) { 686 while (C1 == ' ') 687 ; 688 if (((c1 = c) == '\n') || 689 ((c2 = C1) == '\n') || 690 ((c1 == 'o') && (c2 == 'f') && 691 (C1 == 'f'))) { 692 ldelim = NOCHAR; 693 rdelim = NOCHAR; 694 } else { 695 ldelim = c1; 696 rdelim = c2; 697 } 698 } 699 dflg = 0; 700 } 701 } 702 703 if (c != '\n') { 704 while (C1 != '\n') { 705 if (c == '.') { 706 last = 1; 707 } else { 708 last = 0; 709 } 710 } 711 } 712 } 713 } 714 715 716 717 static void 718 backsl(void) /* skip over a complete backslash construction */ 719 { 720 int bdelim; 721 722 sw: switch (C) { 723 case '"': 724 SKIP; 725 return; 726 case 's': 727 if (C == '\\') { 728 backsl(); 729 } else { 730 while ((C >= '0') && (c <= '9')) 731 ; 732 (void) ungetc(c, infile); 733 c = '0'; 734 } 735 lindx--; 736 return; 737 738 case 'f': 739 case 'n': 740 case '*': 741 if (C != '(') 742 return; 743 /* FALLTHROUGH */ 744 745 case '(': 746 if (C != '\n') { 747 (void) C; 748 } 749 return; 750 751 case '$': 752 (void) C; /* discard argument number */ 753 return; 754 755 case 'b': 756 case 'x': 757 case 'v': 758 case 'h': 759 case 'w': 760 case 'o': 761 case 'l': 762 case 'L': 763 if ((bdelim = C) == '\n') 764 return; 765 while ((C != '\n') && (c != bdelim)) 766 if (c == '\\') 767 backsl(); 768 return; 769 770 case '\\': 771 if (inmacro) 772 goto sw; 773 default: 774 return; 775 } 776 } 777 778 779 780 781 static char * 782 copys(char *s) 783 { 784 char *t, *t0; 785 786 if ((t0 = t = calloc((unsigned)(strlen(s) + 1), sizeof (*t))) == NULL) 787 fatal_msg(gettext("Cannot allocate memory")); 788 789 while (*t++ = *s++) 790 ; 791 return (t0); 792 } 793 794 static void 795 sce(void) 796 { 797 char *ap; 798 int n, i; 799 char a[10]; 800 801 for (ap = a; C != '\n'; ap++) { 802 *ap = c; 803 if (ap == &a[9]) { 804 SKIP; 805 ap = a; 806 break; 807 } 808 } 809 if (ap != a) { 810 n = atoi(a); 811 } else { 812 n = 1; 813 } 814 for (i = 0; i < n; ) { 815 if (C == '.') { 816 if (C == 'c') { 817 if (C == 'e') { 818 while (C == ' ') 819 ; 820 if (c == '0') { 821 break; 822 } else { 823 SKIP; 824 } 825 } else { 826 SKIP; 827 } 828 } else { 829 SKIP; 830 } 831 } else { 832 SKIP; 833 i++; 834 } 835 } 836 } 837