1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 23 /* All Rights Reserved */ 24 25 26 /* 27 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <assert.h> 34 #include <errno.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <locale.h> 39 #include <sys/varargs.h> 40 41 /* 42 * Deroff command -- strip troff, eqn, and Tbl sequences from a file. 43 * Has three flags argument, -w, to cause output one word per line 44 * rather than in the original format. 45 * -mm (or -ms) causes the corresponding macro's to be interpreted 46 * so that just sentences are output 47 * -ml also gets rid of lists. 48 * -i causes deroff to ignore .so and .nx commands. 49 * Deroff follows .so and .nx commands, removes contents of macro 50 * definitions, equations (both .EQ ... .EN and $...$), 51 * Tbl command sequences, and Troff backslash constructions. 52 * 53 * All input is through the C macro; the most recently read character 54 * is in c. 55 */ 56 57 #define C ((c = getc(infile)) == EOF ? eof() : \ 58 ((c == ldelim) && (filesp == files) ? skeqn() : c)) 59 #define C1 ((c = getc(infile)) == EOF ? eof() : c) 60 #define SKIP while (C != '\n') 61 #define SKIP_TO_COM SKIP; SKIP; pc = c; \ 62 while ((C != '.') || (pc != '\n') || \ 63 (C > 'Z')) { \ 64 pc = c; \ 65 } 66 67 #define YES 1 68 #define NO 0 69 #define MS 0 70 #define MM 1 71 #define ONE 1 72 #define TWO 2 73 74 #define NOCHAR -2 75 #define SPECIAL 0 76 #define APOS 1 77 #define DIGIT 2 78 #define LETTER 3 79 80 #define MAXLINESZ 512 81 82 static int wordflag = NO; 83 static int msflag = NO; 84 static int iflag = NO; 85 static int mac = MM; 86 static int disp = 0; 87 static int inmacro = NO; 88 static int intable = NO; 89 static int lindx; 90 static size_t linesize = MAXLINESZ; 91 92 static char chars[128]; /* SPECIAL, APOS, DIGIT, or LETTER */ 93 94 static char *line = NULL; 95 96 static char c; 97 static int pc; 98 static int ldelim = NOCHAR; 99 static int rdelim = NOCHAR; 100 101 static int argc; 102 static char **argv; 103 104 extern int optind; 105 extern char *optarg; 106 static char fname[50]; 107 static FILE *files[15]; 108 static FILE **filesp; 109 static FILE *infile; 110 111 static void backsl(void); 112 static void comline(void); 113 static char *copys(char *); 114 static int eof(void); 115 static void eqn(void); 116 static void fatal(const char *, ...); 117 static void fatal_msg(char *); 118 static void getfname(void); 119 static void macro(void); 120 static FILE *opn(char *); 121 static void putmac(char *, int); 122 static void putwords(int); 123 static void regline(int, int); 124 static void sce(void); 125 static int skeqn(); 126 static void sdis(char, char); 127 static void stbl(void); 128 static void tbl(void); 129 static void usage(void); 130 static void work(void); 131 132 void 133 main(int ac, char **av) 134 { 135 int i; 136 int errflg = 0; 137 int optchar; 138 139 (void) setlocale(LC_ALL, ""); 140 #if !defined(TEXT_DOMAIN) 141 #define TEXT_DOMAIN "SYS_TEST" 142 #endif 143 (void) textdomain(TEXT_DOMAIN); 144 argc = ac; 145 argv = av; 146 while ((optchar = getopt(argc, argv, "wim:")) != EOF) { 147 switch (optchar) { 148 case 'w': 149 wordflag = YES; 150 break; 151 case 'm': 152 msflag = YES; 153 if (*optarg == 'm') 154 mac = MM; 155 else if (*optarg == 's') 156 mac = MS; 157 else if (*optarg == 'l') 158 disp = 1; 159 else 160 errflg++; 161 break; 162 case 'i': 163 iflag = YES; 164 break; 165 case '?': 166 errflg++; 167 } 168 } 169 if (errflg) 170 usage(); 171 if (optind == argc) 172 infile = stdin; 173 else 174 infile = opn(argv[optind++]); 175 files[0] = infile; 176 filesp = &files[0]; 177 178 for (i = 'a'; i <= 'z'; ++i) 179 chars[i] = LETTER; 180 for (i = 'A'; i <= 'Z'; ++i) 181 chars[i] = LETTER; 182 for (i = '0'; i <= '9'; ++i) 183 chars[i] = DIGIT; 184 chars['\''] = APOS; 185 chars['&'] = APOS; 186 work(); 187 } 188 189 190 191 192 193 194 static int 195 skeqn() 196 { 197 while ((c = getc(infile)) != rdelim) { 198 if (c == EOF) { 199 c = eof(); 200 } else if (c == '"') { 201 while ((c = getc(infile)) != '"') { 202 if (c == EOF) { 203 c = eof(); 204 } else if (c == '\\') { 205 if ((c = getc(infile)) == EOF) { 206 c = eof(); 207 } 208 } 209 } 210 } 211 } 212 if (msflag) { 213 return (c = 'x'); 214 } 215 return (c = ' '); 216 } 217 218 219 /* Functions calling opn() should ensure 'p' is non-null */ 220 static FILE * 221 opn(char *p) 222 { 223 FILE *fd; 224 225 assert(p != NULL); 226 if ((fd = fopen(p, "r")) == NULL) 227 fatal(gettext("Cannot open file %s: %s\n"), p, strerror(errno)); 228 229 return (fd); 230 } 231 232 233 234 static int 235 eof(void) 236 { 237 if (infile != stdin) 238 (void) fclose(infile); 239 if (filesp > files) { 240 infile = *--filesp; 241 } else if (optind < argc) { 242 infile = opn(argv[optind++]); 243 } else { 244 exit(0); 245 } 246 247 return (C); 248 } 249 250 251 252 static void 253 getfname(void) 254 { 255 char *p; 256 struct chain { 257 struct chain *nextp; 258 char *datap; 259 }; 260 struct chain *q; 261 static struct chain *namechain = NULL; 262 263 while (C == ' ') 264 ; 265 266 for (p = fname; ((*p = c) != '\n') && (c != ' ') && (c != '\t') && 267 (c != '\\'); ++p) { 268 (void) C; 269 } 270 *p = '\0'; 271 while (c != '\n') { 272 (void) C; 273 } 274 275 /* see if this name has already been used */ 276 for (q = namechain; q; q = q->nextp) 277 if (strcmp(fname, q->datap) != 0) { 278 fname[0] = '\0'; 279 return; 280 } 281 282 q = (struct chain *)calloc(1, sizeof (*namechain)); 283 q->nextp = namechain; 284 q->datap = copys(fname); 285 namechain = q; 286 } 287 288 289 /* 290 * Functions calling fatal() should ensure 'format' and 291 * arguments are non-null. 292 */ 293 static void 294 fatal(const char *format, ...) 295 { 296 va_list alist; 297 298 assert(format != NULL); 299 (void) fputs(gettext("deroff: "), stderr); 300 va_start(alist, format); 301 (void) vfprintf(stderr, format, alist); 302 exit(1); 303 } 304 305 /* Functions calling fatal_msg() should ensure 's' is non-null */ 306 static void 307 fatal_msg(char *s) 308 { 309 assert(s != NULL); 310 (void) fprintf(stderr, gettext("deroff: %s\n"), s); 311 exit(1); 312 } 313 314 static void 315 usage(void) 316 { 317 (void) fputs(gettext( 318 "usage: deroff [ -w ] [ -m (m s l) ] [ -i ] " 319 "[ file ] ... \n"), stderr); 320 exit(1); 321 } 322 323 static void 324 work(void) 325 { 326 327 for (;;) { 328 if ((C == '.') || (c == '\'')) 329 comline(); 330 else 331 regline(NO, TWO); 332 } 333 } 334 335 336 static void 337 regline(int macline, int cnst) 338 { 339 340 if (line == NULL) { 341 if ((line = (char *)malloc(linesize * sizeof (char))) == NULL) { 342 fatal_msg(gettext("Cannot allocate memory")); 343 } 344 } 345 346 lindx = 0; 347 line[lindx] = c; 348 for (;;) { 349 if (c == '\\') { 350 line[lindx] = ' '; 351 backsl(); 352 if (c == '%') { /* no blank for hyphenation char */ 353 lindx--; 354 } 355 } 356 if (c == '\n') { 357 break; 358 } 359 /* 360 * We're just about to add another character to the line 361 * buffer so ensure we don't overrun it. 362 */ 363 if (++lindx >= linesize - 1) { 364 linesize = linesize * 2; 365 if ((line = (char *)realloc(line, 366 linesize * sizeof (char))) == NULL) { 367 fatal_msg(gettext("Cannot allocate memory")); 368 } 369 } 370 if (intable && (c == 'T')) { 371 line[lindx] = C; 372 if ((c == '{') || (c == '}')) { 373 line[lindx - 1] = ' '; 374 line[lindx] = C; 375 } 376 } else { 377 line[lindx] = C; 378 } 379 } 380 381 line[lindx] = '\0'; 382 383 if (line[0] != '\0') { 384 if (wordflag) { 385 putwords(macline); 386 } else if (macline) { 387 putmac(line, cnst); 388 } else { 389 (void) puts(line); 390 } 391 } 392 } 393 394 395 396 397 static void 398 putmac(char *s, int cnst) 399 { 400 char *t; 401 402 while (*s) { 403 while ((*s == ' ') || (*s == '\t')) { 404 (void) putchar(*s++); 405 } 406 for (t = s; (*t != ' ') && (*t != '\t') && (*t != '\0'); ++t) 407 ; 408 if (*s == '\"') 409 s++; 410 if ((t > s + cnst) && (chars[s[0]] == LETTER) && 411 (chars[s[1]] == LETTER)) { 412 while (s < t) { 413 if (*s == '\"') 414 s++; 415 else 416 (void) putchar(*s++); 417 } 418 } else { 419 s = t; 420 } 421 } 422 (void) putchar('\n'); 423 } 424 425 426 427 static void 428 putwords(int macline) /* break into words for -w option */ 429 { 430 char *p, *p1; 431 int i, nlet; 432 433 for (p1 = line; ; ) { 434 /* skip initial specials ampersands and apostrophes */ 435 while (chars[*p1] < DIGIT) { 436 if (*p1++ == '\0') 437 return; 438 } 439 nlet = 0; 440 for (p = p1; (i = chars[*p]) != SPECIAL; ++p) { 441 if (i == LETTER) 442 ++nlet; 443 } 444 445 if ((!macline && (nlet > 1)) /* MDM definition of word */ || 446 (macline && (nlet > 2) && (chars[p1[0]] == LETTER) && 447 (chars[p1[1]] == LETTER))) { 448 /* delete trailing ampersands and apostrophes */ 449 while ((p[-1] == '\'') || (p[-1] == '&')) { 450 --p; 451 } 452 while (p1 < p) { 453 (void) putchar(*p1++); 454 } 455 (void) putchar('\n'); 456 } else { 457 p1 = p; 458 } 459 } 460 } 461 462 463 464 static void 465 comline(void) 466 { 467 int c1, c2; 468 469 com: 470 while ((C == ' ') || (c == '\t')) 471 ; 472 comx: 473 if ((c1 = c) == '\n') 474 return; 475 c2 = C; 476 if ((c1 == '.') && (c2 != '.')) 477 inmacro = NO; 478 if (c2 == '\n') 479 return; 480 481 if ((c1 == 'E') && (c2 == 'Q') && (filesp == files)) { 482 eqn(); 483 } else if ((c1 == 'T') && ((c2 == 'S') || (c2 == 'C') || 484 (c2 == '&')) && (filesp == files)) { 485 if (msflag) { 486 stbl(); 487 } else { 488 tbl(); 489 } 490 } else if ((c1 == 'T') && (c2 == 'E')) { 491 intable = NO; 492 } else if (!inmacro && (c1 == 'd') && (c2 == 'e')) { 493 macro(); 494 } else if (!inmacro && (c1 == 'i') && (c2 == 'g')) { 495 macro(); 496 } else if (!inmacro && (c1 == 'a') && (c2 == 'm')) { 497 macro(); 498 } else if ((c1 == 's') && (c2 == 'o')) { 499 if (iflag) { 500 SKIP; 501 } else { 502 getfname(); 503 if (fname[0]) { 504 infile = *++filesp = opn(fname); 505 } 506 } 507 } else if ((c1 == 'n') && (c2 == 'x')) { 508 if (iflag) { 509 SKIP; 510 } else { 511 getfname(); 512 if (fname[0] == '\0') { 513 exit(0); 514 } 515 if (infile != stdin) { 516 (void) fclose(infile); 517 } 518 infile = *filesp = opn(fname); 519 } 520 } else if ((c1 == 'h') && (c2 == 'w')) { 521 SKIP; 522 } else if (msflag && (c1 == 'T') && (c2 == 'L')) { 523 SKIP_TO_COM; 524 goto comx; 525 } else if (msflag && (c1 == 'N') && (c2 == 'R')) { 526 SKIP; 527 } else if (msflag && (c1 == 'A') && ((c2 == 'U') || (c2 == 'I'))) { 528 if (mac == MM) { 529 SKIP; 530 } else { 531 SKIP_TO_COM; 532 goto comx; 533 } 534 } else if (msflag && (c1 == 'F') && (c2 == 'S')) { 535 SKIP_TO_COM; 536 goto comx; 537 } else if (msflag && (c1 == 'S') && (c2 == 'H')) { 538 SKIP_TO_COM; 539 goto comx; 540 } else if (msflag && (c1 == 'N') && (c2 == 'H')) { 541 SKIP_TO_COM; 542 goto comx; 543 } else if (msflag && (c1 == 'O') && (c2 == 'K')) { 544 SKIP_TO_COM; 545 goto comx; 546 } else if (msflag && (c1 == 'N') && (c2 == 'D')) { 547 SKIP; 548 } else if (msflag && (mac == MM) && (c1 == 'H') && 549 ((c2 == ' ') || (c2 == 'U'))) { 550 SKIP; 551 } else if (msflag && (mac == MM) && (c2 == 'L')) { 552 if (disp || (c1 == 'R')) { 553 sdis('L', 'E'); 554 } else { 555 SKIP; 556 (void) putchar('.'); 557 } 558 } else if (msflag && ((c1 == 'D') || (c1 == 'N') || 559 (c1 == 'K') || (c1 == 'P')) && (c2 == 'S')) { 560 sdis(c1, 'E'); /* removed RS-RE */ 561 } else if (msflag && (c1 == 'K' && c2 == 'F')) { 562 sdis(c1, 'E'); 563 } else if (msflag && (c1 == 'n') && (c2 == 'f')) { 564 sdis('f', 'i'); 565 } else if (msflag && (c1 == 'c') && (c2 == 'e')) { 566 sce(); 567 } else { 568 if ((c1 == '.') && (c2 == '.')) { 569 while (C == '.') 570 ; 571 } 572 ++inmacro; 573 if ((c1 <= 'Z') && msflag) { 574 regline(YES, ONE); 575 } else { 576 regline(YES, TWO); 577 } 578 --inmacro; 579 } 580 } 581 582 583 584 static void 585 macro(void) 586 { 587 if (msflag) { 588 /* look for .. */ 589 do { 590 SKIP; 591 } while ((C != '.') || (C != '.') || (C == '.')); 592 if (c != '\n') { 593 SKIP; 594 } 595 return; 596 } 597 SKIP; 598 inmacro = YES; 599 } 600 601 602 603 604 static void 605 sdis(char a1, char a2) 606 { 607 int c1, c2; 608 int eqnf; 609 int notdone = 1; 610 eqnf = 1; 611 SKIP; 612 while (notdone) { 613 while (C != '.') 614 SKIP; 615 if ((c1 = C) == '\n') 616 continue; 617 if ((c2 = C) == '\n') 618 continue; 619 if ((c1 == a1) && (c2 == a2)) { 620 SKIP; 621 if (eqnf) 622 (void) putchar('.'); 623 (void) putchar('\n'); 624 return; 625 } else if ((a1 == 'D') && (c1 == 'E') && (c2 == 'Q')) { 626 eqn(); 627 eqnf = 0; 628 } else { 629 SKIP; 630 } 631 } 632 } 633 634 static void 635 tbl(void) 636 { 637 while (C != '.') 638 ; 639 SKIP; 640 intable = YES; 641 } 642 643 static void 644 stbl(void) 645 { 646 while (C != '.') 647 ; 648 SKIP_TO_COM; 649 if ((c != 'T') || (C != 'E')) { 650 SKIP; 651 pc = c; 652 while ((C != '.') || (pc != '\n') || 653 (C != 'T') || (C != 'E')) { 654 pc = c; 655 } 656 } 657 } 658 659 static void 660 eqn(void) 661 { 662 int c1, c2; 663 int dflg; 664 int last; 665 666 last = 0; 667 dflg = 1; 668 SKIP; 669 670 for (;;) { 671 if ((C1 == '.') || (c == '\'')) { 672 while ((C1 == ' ') || (c == '\t')) 673 ; 674 if ((c == 'E') && (C1 == 'N')) { 675 SKIP; 676 if (msflag && dflg) { 677 (void) putchar('x'); 678 (void) putchar(' '); 679 if (last) { 680 (void) putchar('.'); 681 (void) putchar(' '); 682 } 683 } 684 return; 685 } 686 } else if (c == 'd') { /* look for delim */ 687 if ((C1 == 'e') && (C1 == 'l')) { 688 if ((C1 == 'i') && (C1 == 'm')) { 689 while (C1 == ' ') 690 ; 691 if (((c1 = c) == '\n') || 692 ((c2 = C1) == '\n') || 693 ((c1 == 'o') && (c2 == 'f') && 694 (C1 == 'f'))) { 695 ldelim = NOCHAR; 696 rdelim = NOCHAR; 697 } else { 698 ldelim = c1; 699 rdelim = c2; 700 } 701 } 702 dflg = 0; 703 } 704 } 705 706 if (c != '\n') { 707 while (C1 != '\n') { 708 if (c == '.') { 709 last = 1; 710 } else { 711 last = 0; 712 } 713 } 714 } 715 } 716 } 717 718 719 720 static void 721 backsl(void) /* skip over a complete backslash construction */ 722 { 723 int bdelim; 724 725 sw: switch (C) { 726 case '"': 727 SKIP; 728 return; 729 case 's': 730 if (C == '\\') { 731 backsl(); 732 } else { 733 while ((C >= '0') && (c <= '9')) 734 ; 735 (void) ungetc(c, infile); 736 c = '0'; 737 } 738 lindx--; 739 return; 740 741 case 'f': 742 case 'n': 743 case '*': 744 if (C != '(') 745 return; 746 /* FALLTHROUGH */ 747 748 case '(': 749 if (C != '\n') { 750 (void) C; 751 } 752 return; 753 754 case '$': 755 (void) C; /* discard argument number */ 756 return; 757 758 case 'b': 759 case 'x': 760 case 'v': 761 case 'h': 762 case 'w': 763 case 'o': 764 case 'l': 765 case 'L': 766 if ((bdelim = C) == '\n') 767 return; 768 while ((C != '\n') && (c != bdelim)) 769 if (c == '\\') 770 backsl(); 771 return; 772 773 case '\\': 774 if (inmacro) 775 goto sw; 776 default: 777 return; 778 } 779 } 780 781 782 783 784 static char * 785 copys(char *s) 786 { 787 char *t, *t0; 788 789 if ((t0 = t = calloc((unsigned)(strlen(s) + 1), sizeof (*t))) == NULL) 790 fatal_msg(gettext("Cannot allocate memory")); 791 792 while (*t++ = *s++) 793 ; 794 return (t0); 795 } 796 797 static void 798 sce(void) 799 { 800 char *ap; 801 int n, i; 802 char a[10]; 803 804 for (ap = a; C != '\n'; ap++) { 805 *ap = c; 806 if (ap == &a[9]) { 807 SKIP; 808 ap = a; 809 break; 810 } 811 } 812 if (ap != a) { 813 n = atoi(a); 814 } else { 815 n = 1; 816 } 817 for (i = 0; i < n; ) { 818 if (C == '.') { 819 if (C == 'c') { 820 if (C == 'e') { 821 while (C == ' ') 822 ; 823 if (c == '0') { 824 break; 825 } else { 826 SKIP; 827 } 828 } else { 829 SKIP; 830 } 831 } else { 832 SKIP; 833 } 834 } else { 835 SKIP; 836 i++; 837 } 838 } 839 } 840