1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <assert.h> 31 #include <errno.h> 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <locale.h> 36 #include <sys/varargs.h> 37 38 /* 39 * Deroff command -- strip troff, eqn, and Tbl sequences from a file. 40 * Has three flags argument, -w, to cause output one word per line 41 * rather than in the original format. 42 * -mm (or -ms) causes the corresponding macro's to be interpreted 43 * so that just sentences are output 44 * -ml also gets rid of lists. 45 * -i causes deroff to ignore .so and .nx commands. 46 * Deroff follows .so and .nx commands, removes contents of macro 47 * definitions, equations (both .EQ ... .EN and $...$), 48 * Tbl command sequences, and Troff backslash constructions. 49 * 50 * All input is through the C macro; the most recently read character 51 * is in c. 52 */ 53 54 #define C ((c = getc(infile)) == EOF ? eof() : \ 55 ((c == ldelim) && (filesp == files) ? skeqn() : c)) 56 #define C1 ((c = getc(infile)) == EOF ? eof() : c) 57 #define SKIP while (C != '\n') 58 #define SKIP_TO_COM SKIP; SKIP; pc = c; \ 59 while ((C != '.') || (pc != '\n') || \ 60 (C > 'Z')) { \ 61 pc = c; \ 62 } 63 64 #define YES 1 65 #define NO 0 66 #define MS 0 67 #define MM 1 68 #define ONE 1 69 #define TWO 2 70 71 #define NOCHAR -2 72 #define SPECIAL 0 73 #define APOS 1 74 #define DIGIT 2 75 #define LETTER 3 76 77 #define MAXLINESZ 512 78 79 static int wordflag = NO; 80 static int msflag = NO; 81 static int iflag = NO; 82 static int mac = MM; 83 static int disp = 0; 84 static int inmacro = NO; 85 static int intable = NO; 86 static int lindx; 87 static size_t linesize = MAXLINESZ; 88 89 static char chars[128]; /* SPECIAL, APOS, DIGIT, or LETTER */ 90 91 static char *line = NULL; 92 93 static char c; 94 static int pc; 95 static int ldelim = NOCHAR; 96 static int rdelim = NOCHAR; 97 98 static int argc; 99 static char **argv; 100 101 extern int optind; 102 extern char *optarg; 103 static char fname[50]; 104 static FILE *files[15]; 105 static FILE **filesp; 106 static FILE *infile; 107 108 static void backsl(void); 109 static void comline(void); 110 static char *copys(char *); 111 static int eof(void); 112 static void eqn(void); 113 static void fatal(const char *, ...); 114 static void fatal_msg(char *); 115 static void getfname(void); 116 static void macro(void); 117 static FILE *opn(char *); 118 static void putmac(char *, int); 119 static void putwords(int); 120 static void regline(int, int); 121 static void sce(void); 122 static int skeqn(void); 123 static void sdis(char, char); 124 static void stbl(void); 125 static void tbl(void); 126 static void usage(void); 127 static void work(void) __NORETURN; 128 129 int 130 main(int ac, char **av) 131 { 132 int i; 133 int errflg = 0; 134 int optchar; 135 136 (void) setlocale(LC_ALL, ""); 137 #if !defined(TEXT_DOMAIN) 138 #define TEXT_DOMAIN "SYS_TEST" 139 #endif 140 (void) textdomain(TEXT_DOMAIN); 141 argc = ac; 142 argv = av; 143 while ((optchar = getopt(argc, argv, "wim:")) != EOF) { 144 switch (optchar) { 145 case 'w': 146 wordflag = YES; 147 break; 148 case 'm': 149 msflag = YES; 150 if (*optarg == 'm') 151 mac = MM; 152 else if (*optarg == 's') 153 mac = MS; 154 else if (*optarg == 'l') 155 disp = 1; 156 else 157 errflg++; 158 break; 159 case 'i': 160 iflag = YES; 161 break; 162 case '?': 163 errflg++; 164 } 165 } 166 if (errflg) { 167 usage(); 168 return (1); 169 } 170 if (optind == argc) 171 infile = stdin; 172 else 173 infile = opn(argv[optind++]); 174 files[0] = infile; 175 filesp = &files[0]; 176 177 for (i = 'a'; i <= 'z'; ++i) 178 chars[i] = LETTER; 179 for (i = 'A'; i <= 'Z'; ++i) 180 chars[i] = LETTER; 181 for (i = '0'; i <= '9'; ++i) 182 chars[i] = DIGIT; 183 chars['\''] = APOS; 184 chars['&'] = APOS; 185 work(); 186 /* NOTREACHED */ 187 } 188 189 190 static int 191 skeqn(void) 192 { 193 while ((c = getc(infile)) != rdelim) { 194 if (c == EOF) { 195 c = eof(); 196 } else if (c == '"') { 197 while ((c = getc(infile)) != '"') { 198 if (c == EOF) { 199 c = eof(); 200 } else if (c == '\\') { 201 if ((c = getc(infile)) == EOF) { 202 c = eof(); 203 } 204 } 205 } 206 } 207 } 208 if (msflag) { 209 return (c = 'x'); 210 } 211 return (c = ' '); 212 } 213 214 215 /* Functions calling opn() should ensure 'p' is non-null */ 216 static FILE * 217 opn(char *p) 218 { 219 FILE *fd; 220 221 assert(p != NULL); 222 if ((fd = fopen(p, "r")) == NULL) 223 fatal(gettext("Cannot open file %s: %s\n"), p, strerror(errno)); 224 225 return (fd); 226 } 227 228 229 230 static int 231 eof(void) 232 { 233 if (infile != stdin) 234 (void) fclose(infile); 235 if (filesp > files) { 236 infile = *--filesp; 237 } else if (optind < argc) { 238 infile = opn(argv[optind++]); 239 } else { 240 exit(0); 241 } 242 243 return (C); 244 } 245 246 247 248 static void 249 getfname(void) 250 { 251 char *p; 252 struct chain { 253 struct chain *nextp; 254 char *datap; 255 }; 256 struct chain *q; 257 static struct chain *namechain = NULL; 258 259 while (C == ' ') 260 ; 261 262 for (p = fname; ((*p = c) != '\n') && (c != ' ') && (c != '\t') && 263 (c != '\\'); ++p) { 264 (void) C; 265 } 266 *p = '\0'; 267 while (c != '\n') { 268 (void) C; 269 } 270 271 /* see if this name has already been used */ 272 for (q = namechain; q; q = q->nextp) 273 if (strcmp(fname, q->datap) != 0) { 274 fname[0] = '\0'; 275 return; 276 } 277 278 q = (struct chain *)calloc(1, sizeof (*namechain)); 279 q->nextp = namechain; 280 q->datap = copys(fname); 281 namechain = q; 282 } 283 284 285 /* 286 * Functions calling fatal() should ensure 'format' and 287 * arguments are non-null. 288 */ 289 static void 290 fatal(const char *format, ...) 291 { 292 va_list alist; 293 294 assert(format != NULL); 295 (void) fputs(gettext("deroff: "), stderr); 296 va_start(alist, format); 297 (void) vfprintf(stderr, format, alist); 298 exit(1); 299 } 300 301 /* Functions calling fatal_msg() should ensure 's' is non-null */ 302 static void 303 fatal_msg(char *s) 304 { 305 assert(s != NULL); 306 (void) fprintf(stderr, gettext("deroff: %s\n"), s); 307 exit(1); 308 } 309 310 static void 311 usage(void) 312 { 313 (void) fputs(gettext( 314 "usage: deroff [ -w ] [ -m (m s l) ] [ -i ] " 315 "[ file ] ... \n"), stderr); 316 } 317 318 static void 319 work(void) 320 { 321 322 for (;;) { 323 if ((C == '.') || (c == '\'')) 324 comline(); 325 else 326 regline(NO, TWO); 327 } 328 } 329 330 331 static void 332 regline(int macline, int cnst) 333 { 334 335 if (line == NULL) { 336 if ((line = (char *)malloc(linesize * sizeof (char))) == NULL) { 337 fatal_msg(gettext("Cannot allocate memory")); 338 } 339 } 340 341 lindx = 0; 342 line[lindx] = c; 343 for (;;) { 344 if (c == '\\') { 345 line[lindx] = ' '; 346 backsl(); 347 if (c == '%') { /* no blank for hyphenation char */ 348 lindx--; 349 } 350 } 351 if (c == '\n') { 352 break; 353 } 354 /* 355 * We're just about to add another character to the line 356 * buffer so ensure we don't overrun it. 357 */ 358 if (++lindx >= linesize - 1) { 359 linesize = linesize * 2; 360 if ((line = (char *)realloc(line, 361 linesize * sizeof (char))) == NULL) { 362 fatal_msg(gettext("Cannot allocate memory")); 363 } 364 } 365 if (intable && (c == 'T')) { 366 line[lindx] = C; 367 if ((c == '{') || (c == '}')) { 368 line[lindx - 1] = ' '; 369 line[lindx] = C; 370 } 371 } else { 372 line[lindx] = C; 373 } 374 } 375 376 line[lindx] = '\0'; 377 378 if (line[0] != '\0') { 379 if (wordflag) { 380 putwords(macline); 381 } else if (macline) { 382 putmac(line, cnst); 383 } else { 384 (void) puts(line); 385 } 386 } 387 } 388 389 390 391 392 static void 393 putmac(char *s, int cnst) 394 { 395 char *t; 396 397 while (*s) { 398 while ((*s == ' ') || (*s == '\t')) { 399 (void) putchar(*s++); 400 } 401 for (t = s; (*t != ' ') && (*t != '\t') && (*t != '\0'); ++t) 402 ; 403 if (*s == '\"') 404 s++; 405 if ((t > s + cnst) && (chars[s[0]] == LETTER) && 406 (chars[s[1]] == LETTER)) { 407 while (s < t) { 408 if (*s == '\"') 409 s++; 410 else 411 (void) putchar(*s++); 412 } 413 } else { 414 s = t; 415 } 416 } 417 (void) putchar('\n'); 418 } 419 420 421 422 static void 423 putwords(int macline) /* break into words for -w option */ 424 { 425 char *p, *p1; 426 int i, nlet; 427 428 for (p1 = line; ; ) { 429 /* skip initial specials ampersands and apostrophes */ 430 while (chars[*p1] < DIGIT) { 431 if (*p1++ == '\0') 432 return; 433 } 434 nlet = 0; 435 for (p = p1; (i = chars[*p]) != SPECIAL; ++p) { 436 if (i == LETTER) 437 ++nlet; 438 } 439 440 if ((!macline && (nlet > 1)) /* MDM definition of word */ || 441 (macline && (nlet > 2) && (chars[p1[0]] == LETTER) && 442 (chars[p1[1]] == LETTER))) { 443 /* delete trailing ampersands and apostrophes */ 444 while ((p[-1] == '\'') || (p[-1] == '&')) { 445 --p; 446 } 447 while (p1 < p) { 448 (void) putchar(*p1++); 449 } 450 (void) putchar('\n'); 451 } else { 452 p1 = p; 453 } 454 } 455 } 456 457 458 459 static void 460 comline(void) 461 { 462 int c1, c2; 463 464 com: 465 while ((C == ' ') || (c == '\t')) 466 ; 467 comx: 468 if ((c1 = c) == '\n') 469 return; 470 c2 = C; 471 if ((c1 == '.') && (c2 != '.')) 472 inmacro = NO; 473 if (c2 == '\n') 474 return; 475 476 if ((c1 == 'E') && (c2 == 'Q') && (filesp == files)) { 477 eqn(); 478 } else if ((c1 == 'T') && ((c2 == 'S') || (c2 == 'C') || 479 (c2 == '&')) && (filesp == files)) { 480 if (msflag) { 481 stbl(); 482 } else { 483 tbl(); 484 } 485 } else if ((c1 == 'T') && (c2 == 'E')) { 486 intable = NO; 487 } else if (!inmacro && (c1 == 'd') && (c2 == 'e')) { 488 macro(); 489 } else if (!inmacro && (c1 == 'i') && (c2 == 'g')) { 490 macro(); 491 } else if (!inmacro && (c1 == 'a') && (c2 == 'm')) { 492 macro(); 493 } else if ((c1 == 's') && (c2 == 'o')) { 494 if (iflag) { 495 SKIP; 496 } else { 497 getfname(); 498 if (fname[0]) { 499 infile = *++filesp = opn(fname); 500 } 501 } 502 } else if ((c1 == 'n') && (c2 == 'x')) { 503 if (iflag) { 504 SKIP; 505 } else { 506 getfname(); 507 if (fname[0] == '\0') { 508 exit(0); 509 } 510 if (infile != stdin) { 511 (void) fclose(infile); 512 } 513 infile = *filesp = opn(fname); 514 } 515 } else if ((c1 == 'h') && (c2 == 'w')) { 516 SKIP; 517 } else if (msflag && (c1 == 'T') && (c2 == 'L')) { 518 SKIP_TO_COM; 519 goto comx; 520 } else if (msflag && (c1 == 'N') && (c2 == 'R')) { 521 SKIP; 522 } else if (msflag && (c1 == 'A') && ((c2 == 'U') || (c2 == 'I'))) { 523 if (mac == MM) { 524 SKIP; 525 } else { 526 SKIP_TO_COM; 527 goto comx; 528 } 529 } else if (msflag && (c1 == 'F') && (c2 == 'S')) { 530 SKIP_TO_COM; 531 goto comx; 532 } else if (msflag && (c1 == 'S') && (c2 == 'H')) { 533 SKIP_TO_COM; 534 goto comx; 535 } else if (msflag && (c1 == 'N') && (c2 == 'H')) { 536 SKIP_TO_COM; 537 goto comx; 538 } else if (msflag && (c1 == 'O') && (c2 == 'K')) { 539 SKIP_TO_COM; 540 goto comx; 541 } else if (msflag && (c1 == 'N') && (c2 == 'D')) { 542 SKIP; 543 } else if (msflag && (mac == MM) && (c1 == 'H') && 544 ((c2 == ' ') || (c2 == 'U'))) { 545 SKIP; 546 } else if (msflag && (mac == MM) && (c2 == 'L')) { 547 if (disp || (c1 == 'R')) { 548 sdis('L', 'E'); 549 } else { 550 SKIP; 551 (void) putchar('.'); 552 } 553 } else if (msflag && ((c1 == 'D') || (c1 == 'N') || 554 (c1 == 'K') || (c1 == 'P')) && (c2 == 'S')) { 555 sdis(c1, 'E'); /* removed RS-RE */ 556 } else if (msflag && (c1 == 'K' && c2 == 'F')) { 557 sdis(c1, 'E'); 558 } else if (msflag && (c1 == 'n') && (c2 == 'f')) { 559 sdis('f', 'i'); 560 } else if (msflag && (c1 == 'c') && (c2 == 'e')) { 561 sce(); 562 } else { 563 if ((c1 == '.') && (c2 == '.')) { 564 while (C == '.') 565 ; 566 } 567 ++inmacro; 568 if ((c1 <= 'Z') && msflag) { 569 regline(YES, ONE); 570 } else { 571 regline(YES, TWO); 572 } 573 --inmacro; 574 } 575 } 576 577 578 579 static void 580 macro(void) 581 { 582 if (msflag) { 583 /* look for .. */ 584 do { 585 SKIP; 586 } while ((C != '.') || (C != '.') || (C == '.')); 587 if (c != '\n') { 588 SKIP; 589 } 590 return; 591 } 592 SKIP; 593 inmacro = YES; 594 } 595 596 597 598 599 static void 600 sdis(char a1, char a2) 601 { 602 int c1, c2; 603 int eqnf; 604 int notdone = 1; 605 eqnf = 1; 606 SKIP; 607 while (notdone) { 608 while (C != '.') 609 SKIP; 610 if ((c1 = C) == '\n') 611 continue; 612 if ((c2 = C) == '\n') 613 continue; 614 if ((c1 == a1) && (c2 == a2)) { 615 SKIP; 616 if (eqnf) 617 (void) putchar('.'); 618 (void) putchar('\n'); 619 return; 620 } else if ((a1 == 'D') && (c1 == 'E') && (c2 == 'Q')) { 621 eqn(); 622 eqnf = 0; 623 } else { 624 SKIP; 625 } 626 } 627 } 628 629 static void 630 tbl(void) 631 { 632 while (C != '.') 633 ; 634 SKIP; 635 intable = YES; 636 } 637 638 static void 639 stbl(void) 640 { 641 while (C != '.') 642 ; 643 SKIP_TO_COM; 644 if ((c != 'T') || (C != 'E')) { 645 SKIP; 646 pc = c; 647 while ((C != '.') || (pc != '\n') || 648 (C != 'T') || (C != 'E')) { 649 pc = c; 650 } 651 } 652 } 653 654 static void 655 eqn(void) 656 { 657 int c1, c2; 658 int dflg; 659 int last; 660 661 last = 0; 662 dflg = 1; 663 SKIP; 664 665 for (;;) { 666 if ((C1 == '.') || (c == '\'')) { 667 while ((C1 == ' ') || (c == '\t')) 668 ; 669 if ((c == 'E') && (C1 == 'N')) { 670 SKIP; 671 if (msflag && dflg) { 672 (void) putchar('x'); 673 (void) putchar(' '); 674 if (last) { 675 (void) putchar('.'); 676 (void) putchar(' '); 677 } 678 } 679 return; 680 } 681 } else if (c == 'd') { /* look for delim */ 682 if ((C1 == 'e') && (C1 == 'l')) { 683 if ((C1 == 'i') && (C1 == 'm')) { 684 while (C1 == ' ') 685 ; 686 if (((c1 = c) == '\n') || 687 ((c2 = C1) == '\n') || 688 ((c1 == 'o') && (c2 == 'f') && 689 (C1 == 'f'))) { 690 ldelim = NOCHAR; 691 rdelim = NOCHAR; 692 } else { 693 ldelim = c1; 694 rdelim = c2; 695 } 696 } 697 dflg = 0; 698 } 699 } 700 701 if (c != '\n') { 702 while (C1 != '\n') { 703 if (c == '.') { 704 last = 1; 705 } else { 706 last = 0; 707 } 708 } 709 } 710 } 711 } 712 713 714 715 static void 716 backsl(void) /* skip over a complete backslash construction */ 717 { 718 int bdelim; 719 720 sw: switch (C) { 721 case '"': 722 SKIP; 723 return; 724 case 's': 725 if (C == '\\') { 726 backsl(); 727 } else { 728 while ((C >= '0') && (c <= '9')) 729 ; 730 (void) ungetc(c, infile); 731 c = '0'; 732 } 733 lindx--; 734 return; 735 736 case 'f': 737 case 'n': 738 case '*': 739 if (C != '(') 740 return; 741 /* FALLTHROUGH */ 742 743 case '(': 744 if (C != '\n') { 745 (void) C; 746 } 747 return; 748 749 case '$': 750 (void) C; /* discard argument number */ 751 return; 752 753 case 'b': 754 case 'x': 755 case 'v': 756 case 'h': 757 case 'w': 758 case 'o': 759 case 'l': 760 case 'L': 761 if ((bdelim = C) == '\n') 762 return; 763 while ((C != '\n') && (c != bdelim)) 764 if (c == '\\') 765 backsl(); 766 return; 767 768 case '\\': 769 if (inmacro) 770 goto sw; 771 default: 772 return; 773 } 774 } 775 776 777 778 779 static char * 780 copys(char *s) 781 { 782 char *t, *t0; 783 784 if ((t0 = t = calloc((unsigned)(strlen(s) + 1), sizeof (*t))) == NULL) 785 fatal_msg(gettext("Cannot allocate memory")); 786 787 while (*t++ = *s++) 788 ; 789 return (t0); 790 } 791 792 static void 793 sce(void) 794 { 795 char *ap; 796 int n, i; 797 char a[10]; 798 799 for (ap = a; C != '\n'; ap++) { 800 *ap = c; 801 if (ap == &a[9]) { 802 SKIP; 803 ap = a; 804 break; 805 } 806 } 807 if (ap != a) { 808 n = atoi(a); 809 } else { 810 n = 1; 811 } 812 for (i = 0; i < n; ) { 813 if (C == '.') { 814 if (C == 'c') { 815 if (C == 'e') { 816 while (C == ' ') 817 ; 818 if (c == '0') { 819 break; 820 } else { 821 SKIP; 822 } 823 } else { 824 SKIP; 825 } 826 } else { 827 SKIP; 828 } 829 } else { 830 SKIP; 831 i++; 832 } 833 } 834 } 835