1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #include <assert.h>
31 #include <errno.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <locale.h>
36 #include <sys/varargs.h>
37
38 /*
39 * Deroff command -- strip troff, eqn, and Tbl sequences from a file.
40 * Has three flags argument, -w, to cause output one word per line
41 * rather than in the original format.
42 * -mm (or -ms) causes the corresponding macro's to be interpreted
43 * so that just sentences are output
44 * -ml also gets rid of lists.
45 * -i causes deroff to ignore .so and .nx commands.
46 * Deroff follows .so and .nx commands, removes contents of macro
47 * definitions, equations (both .EQ ... .EN and $...$),
48 * Tbl command sequences, and Troff backslash constructions.
49 *
50 * All input is through the C macro; the most recently read character
51 * is in c.
52 */
53
54 #define C ((c = getc(infile)) == EOF ? eof() : \
55 ((c == ldelim) && (filesp == files) ? skeqn() : c))
56 #define C1 ((c = getc(infile)) == EOF ? eof() : c)
57 #define SKIP while (C != '\n')
58 #define SKIP_TO_COM SKIP; SKIP; pc = c; \
59 while ((C != '.') || (pc != '\n') || \
60 (C > 'Z')) { \
61 pc = c; \
62 }
63
64 #define YES 1
65 #define NO 0
66 #define MS 0
67 #define MM 1
68 #define ONE 1
69 #define TWO 2
70
71 #define NOCHAR -2
72 #define SPECIAL 0
73 #define APOS 1
74 #define DIGIT 2
75 #define LETTER 3
76
77 #define MAXLINESZ 512
78
79 static int wordflag = NO;
80 static int msflag = NO;
81 static int iflag = NO;
82 static int mac = MM;
83 static int disp = 0;
84 static int inmacro = NO;
85 static int intable = NO;
86 static int lindx;
87 static size_t linesize = MAXLINESZ;
88
89 static char chars[128]; /* SPECIAL, APOS, DIGIT, or LETTER */
90
91 static char *line = NULL;
92
93 static char c;
94 static int pc;
95 static int ldelim = NOCHAR;
96 static int rdelim = NOCHAR;
97
98 static int argc;
99 static char **argv;
100
101 extern int optind;
102 extern char *optarg;
103 static char fname[50];
104 static FILE *files[15];
105 static FILE **filesp;
106 static FILE *infile;
107
108 static void backsl(void);
109 static void comline(void);
110 static char *copys(char *);
111 static int eof(void);
112 static void eqn(void);
113 static void fatal(const char *, ...);
114 static void fatal_msg(char *);
115 static void getfname(void);
116 static void macro(void);
117 static FILE *opn(char *);
118 static void putmac(char *, int);
119 static void putwords(int);
120 static void regline(int, int);
121 static void sce(void);
122 static int skeqn(void);
123 static void sdis(char, char);
124 static void stbl(void);
125 static void tbl(void);
126 static void usage(void);
127 static void work(void) __NORETURN;
128
129 int
main(int ac,char ** av)130 main(int ac, char **av)
131 {
132 int i;
133 int errflg = 0;
134 int optchar;
135
136 (void) setlocale(LC_ALL, "");
137 #if !defined(TEXT_DOMAIN)
138 #define TEXT_DOMAIN "SYS_TEST"
139 #endif
140 (void) textdomain(TEXT_DOMAIN);
141 argc = ac;
142 argv = av;
143 while ((optchar = getopt(argc, argv, "wim:")) != EOF) {
144 switch (optchar) {
145 case 'w':
146 wordflag = YES;
147 break;
148 case 'm':
149 msflag = YES;
150 if (*optarg == 'm')
151 mac = MM;
152 else if (*optarg == 's')
153 mac = MS;
154 else if (*optarg == 'l')
155 disp = 1;
156 else
157 errflg++;
158 break;
159 case 'i':
160 iflag = YES;
161 break;
162 case '?':
163 errflg++;
164 }
165 }
166 if (errflg) {
167 usage();
168 return (1);
169 }
170 if (optind == argc)
171 infile = stdin;
172 else
173 infile = opn(argv[optind++]);
174 files[0] = infile;
175 filesp = &files[0];
176
177 for (i = 'a'; i <= 'z'; ++i)
178 chars[i] = LETTER;
179 for (i = 'A'; i <= 'Z'; ++i)
180 chars[i] = LETTER;
181 for (i = '0'; i <= '9'; ++i)
182 chars[i] = DIGIT;
183 chars['\''] = APOS;
184 chars['&'] = APOS;
185 work();
186 /* NOTREACHED */
187 }
188
189
190 static int
skeqn(void)191 skeqn(void)
192 {
193 while ((c = getc(infile)) != rdelim) {
194 if (c == EOF) {
195 c = eof();
196 } else if (c == '"') {
197 while ((c = getc(infile)) != '"') {
198 if (c == EOF) {
199 c = eof();
200 } else if (c == '\\') {
201 if ((c = getc(infile)) == EOF) {
202 c = eof();
203 }
204 }
205 }
206 }
207 }
208 if (msflag) {
209 return (c = 'x');
210 }
211 return (c = ' ');
212 }
213
214
215 /* Functions calling opn() should ensure 'p' is non-null */
216 static FILE *
opn(char * p)217 opn(char *p)
218 {
219 FILE *fd;
220
221 assert(p != NULL);
222 if ((fd = fopen(p, "r")) == NULL)
223 fatal(gettext("Cannot open file %s: %s\n"), p, strerror(errno));
224
225 return (fd);
226 }
227
228
229
230 static int
eof(void)231 eof(void)
232 {
233 if (infile != stdin)
234 (void) fclose(infile);
235 if (filesp > files) {
236 infile = *--filesp;
237 } else if (optind < argc) {
238 infile = opn(argv[optind++]);
239 } else {
240 exit(0);
241 }
242
243 return (C);
244 }
245
246
247
248 static void
getfname(void)249 getfname(void)
250 {
251 char *p;
252 struct chain {
253 struct chain *nextp;
254 char *datap;
255 };
256 struct chain *q;
257 static struct chain *namechain = NULL;
258
259 while (C == ' ')
260 ;
261
262 for (p = fname; ((*p = c) != '\n') && (c != ' ') && (c != '\t') &&
263 (c != '\\'); ++p) {
264 (void) C;
265 }
266 *p = '\0';
267 while (c != '\n') {
268 (void) C;
269 }
270
271 /* see if this name has already been used */
272 for (q = namechain; q; q = q->nextp)
273 if (strcmp(fname, q->datap) != 0) {
274 fname[0] = '\0';
275 return;
276 }
277
278 q = (struct chain *)calloc(1, sizeof (*namechain));
279 q->nextp = namechain;
280 q->datap = copys(fname);
281 namechain = q;
282 }
283
284
285 /*
286 * Functions calling fatal() should ensure 'format' and
287 * arguments are non-null.
288 */
289 static void
fatal(const char * format,...)290 fatal(const char *format, ...)
291 {
292 va_list alist;
293
294 assert(format != NULL);
295 (void) fputs(gettext("deroff: "), stderr);
296 va_start(alist, format);
297 (void) vfprintf(stderr, format, alist);
298 exit(1);
299 }
300
301 /* Functions calling fatal_msg() should ensure 's' is non-null */
302 static void
fatal_msg(char * s)303 fatal_msg(char *s)
304 {
305 assert(s != NULL);
306 (void) fprintf(stderr, gettext("deroff: %s\n"), s);
307 exit(1);
308 }
309
310 static void
usage(void)311 usage(void)
312 {
313 (void) fputs(gettext(
314 "usage: deroff [ -w ] [ -m (m s l) ] [ -i ] "
315 "[ file ] ... \n"), stderr);
316 }
317
318 static void
work(void)319 work(void)
320 {
321
322 for (;;) {
323 if ((C == '.') || (c == '\''))
324 comline();
325 else
326 regline(NO, TWO);
327 }
328 }
329
330
331 static void
regline(int macline,int cnst)332 regline(int macline, int cnst)
333 {
334
335 if (line == NULL) {
336 if ((line = (char *)malloc(linesize * sizeof (char))) == NULL) {
337 fatal_msg(gettext("Cannot allocate memory"));
338 }
339 }
340
341 lindx = 0;
342 line[lindx] = c;
343 for (;;) {
344 if (c == '\\') {
345 line[lindx] = ' ';
346 backsl();
347 if (c == '%') { /* no blank for hyphenation char */
348 lindx--;
349 }
350 }
351 if (c == '\n') {
352 break;
353 }
354 /*
355 * We're just about to add another character to the line
356 * buffer so ensure we don't overrun it.
357 */
358 if (++lindx >= linesize - 1) {
359 linesize = linesize * 2;
360 if ((line = (char *)realloc(line,
361 linesize * sizeof (char))) == NULL) {
362 fatal_msg(gettext("Cannot allocate memory"));
363 }
364 }
365 if (intable && (c == 'T')) {
366 line[lindx] = C;
367 if ((c == '{') || (c == '}')) {
368 line[lindx - 1] = ' ';
369 line[lindx] = C;
370 }
371 } else {
372 line[lindx] = C;
373 }
374 }
375
376 line[lindx] = '\0';
377
378 if (line[0] != '\0') {
379 if (wordflag) {
380 putwords(macline);
381 } else if (macline) {
382 putmac(line, cnst);
383 } else {
384 (void) puts(line);
385 }
386 }
387 }
388
389
390
391
392 static void
putmac(char * s,int cnst)393 putmac(char *s, int cnst)
394 {
395 char *t;
396
397 while (*s) {
398 while ((*s == ' ') || (*s == '\t')) {
399 (void) putchar(*s++);
400 }
401 for (t = s; (*t != ' ') && (*t != '\t') && (*t != '\0'); ++t)
402 ;
403 if (*s == '\"')
404 s++;
405 if ((t > s + cnst) && (chars[s[0]] == LETTER) &&
406 (chars[s[1]] == LETTER)) {
407 while (s < t) {
408 if (*s == '\"')
409 s++;
410 else
411 (void) putchar(*s++);
412 }
413 } else {
414 s = t;
415 }
416 }
417 (void) putchar('\n');
418 }
419
420
421
422 static void
putwords(int macline)423 putwords(int macline) /* break into words for -w option */
424 {
425 char *p, *p1;
426 int i, nlet;
427
428 for (p1 = line; ; ) {
429 /* skip initial specials ampersands and apostrophes */
430 while (chars[*p1] < DIGIT) {
431 if (*p1++ == '\0')
432 return;
433 }
434 nlet = 0;
435 for (p = p1; (i = chars[*p]) != SPECIAL; ++p) {
436 if (i == LETTER)
437 ++nlet;
438 }
439
440 if ((!macline && (nlet > 1)) /* MDM definition of word */ ||
441 (macline && (nlet > 2) && (chars[p1[0]] == LETTER) &&
442 (chars[p1[1]] == LETTER))) {
443 /* delete trailing ampersands and apostrophes */
444 while ((p[-1] == '\'') || (p[-1] == '&')) {
445 --p;
446 }
447 while (p1 < p) {
448 (void) putchar(*p1++);
449 }
450 (void) putchar('\n');
451 } else {
452 p1 = p;
453 }
454 }
455 }
456
457
458
459 static void
comline(void)460 comline(void)
461 {
462 int c1, c2;
463
464 com:
465 while ((C == ' ') || (c == '\t'))
466 ;
467 comx:
468 if ((c1 = c) == '\n')
469 return;
470 c2 = C;
471 if ((c1 == '.') && (c2 != '.'))
472 inmacro = NO;
473 if (c2 == '\n')
474 return;
475
476 if ((c1 == 'E') && (c2 == 'Q') && (filesp == files)) {
477 eqn();
478 } else if ((c1 == 'T') && ((c2 == 'S') || (c2 == 'C') ||
479 (c2 == '&')) && (filesp == files)) {
480 if (msflag) {
481 stbl();
482 } else {
483 tbl();
484 }
485 } else if ((c1 == 'T') && (c2 == 'E')) {
486 intable = NO;
487 } else if (!inmacro && (c1 == 'd') && (c2 == 'e')) {
488 macro();
489 } else if (!inmacro && (c1 == 'i') && (c2 == 'g')) {
490 macro();
491 } else if (!inmacro && (c1 == 'a') && (c2 == 'm')) {
492 macro();
493 } else if ((c1 == 's') && (c2 == 'o')) {
494 if (iflag) {
495 SKIP;
496 } else {
497 getfname();
498 if (fname[0]) {
499 infile = *++filesp = opn(fname);
500 }
501 }
502 } else if ((c1 == 'n') && (c2 == 'x')) {
503 if (iflag) {
504 SKIP;
505 } else {
506 getfname();
507 if (fname[0] == '\0') {
508 exit(0);
509 }
510 if (infile != stdin) {
511 (void) fclose(infile);
512 }
513 infile = *filesp = opn(fname);
514 }
515 } else if ((c1 == 'h') && (c2 == 'w')) {
516 SKIP;
517 } else if (msflag && (c1 == 'T') && (c2 == 'L')) {
518 SKIP_TO_COM;
519 goto comx;
520 } else if (msflag && (c1 == 'N') && (c2 == 'R')) {
521 SKIP;
522 } else if (msflag && (c1 == 'A') && ((c2 == 'U') || (c2 == 'I'))) {
523 if (mac == MM) {
524 SKIP;
525 } else {
526 SKIP_TO_COM;
527 goto comx;
528 }
529 } else if (msflag && (c1 == 'F') && (c2 == 'S')) {
530 SKIP_TO_COM;
531 goto comx;
532 } else if (msflag && (c1 == 'S') && (c2 == 'H')) {
533 SKIP_TO_COM;
534 goto comx;
535 } else if (msflag && (c1 == 'N') && (c2 == 'H')) {
536 SKIP_TO_COM;
537 goto comx;
538 } else if (msflag && (c1 == 'O') && (c2 == 'K')) {
539 SKIP_TO_COM;
540 goto comx;
541 } else if (msflag && (c1 == 'N') && (c2 == 'D')) {
542 SKIP;
543 } else if (msflag && (mac == MM) && (c1 == 'H') &&
544 ((c2 == ' ') || (c2 == 'U'))) {
545 SKIP;
546 } else if (msflag && (mac == MM) && (c2 == 'L')) {
547 if (disp || (c1 == 'R')) {
548 sdis('L', 'E');
549 } else {
550 SKIP;
551 (void) putchar('.');
552 }
553 } else if (msflag && ((c1 == 'D') || (c1 == 'N') ||
554 (c1 == 'K') || (c1 == 'P')) && (c2 == 'S')) {
555 sdis(c1, 'E'); /* removed RS-RE */
556 } else if (msflag && (c1 == 'K' && c2 == 'F')) {
557 sdis(c1, 'E');
558 } else if (msflag && (c1 == 'n') && (c2 == 'f')) {
559 sdis('f', 'i');
560 } else if (msflag && (c1 == 'c') && (c2 == 'e')) {
561 sce();
562 } else {
563 if ((c1 == '.') && (c2 == '.')) {
564 while (C == '.')
565 ;
566 }
567 ++inmacro;
568 if ((c1 <= 'Z') && msflag) {
569 regline(YES, ONE);
570 } else {
571 regline(YES, TWO);
572 }
573 --inmacro;
574 }
575 }
576
577
578
579 static void
macro(void)580 macro(void)
581 {
582 if (msflag) {
583 /* look for .. */
584 do {
585 SKIP;
586 } while ((C != '.') || (C != '.') || (C == '.'));
587 if (c != '\n') {
588 SKIP;
589 }
590 return;
591 }
592 SKIP;
593 inmacro = YES;
594 }
595
596
597
598
599 static void
sdis(char a1,char a2)600 sdis(char a1, char a2)
601 {
602 int c1, c2;
603 int eqnf;
604 int notdone = 1;
605 eqnf = 1;
606 SKIP;
607 while (notdone) {
608 while (C != '.')
609 SKIP;
610 if ((c1 = C) == '\n')
611 continue;
612 if ((c2 = C) == '\n')
613 continue;
614 if ((c1 == a1) && (c2 == a2)) {
615 SKIP;
616 if (eqnf)
617 (void) putchar('.');
618 (void) putchar('\n');
619 return;
620 } else if ((a1 == 'D') && (c1 == 'E') && (c2 == 'Q')) {
621 eqn();
622 eqnf = 0;
623 } else {
624 SKIP;
625 }
626 }
627 }
628
629 static void
tbl(void)630 tbl(void)
631 {
632 while (C != '.')
633 ;
634 SKIP;
635 intable = YES;
636 }
637
638 static void
stbl(void)639 stbl(void)
640 {
641 while (C != '.')
642 ;
643 SKIP_TO_COM;
644 if ((c != 'T') || (C != 'E')) {
645 SKIP;
646 pc = c;
647 while ((C != '.') || (pc != '\n') ||
648 (C != 'T') || (C != 'E')) {
649 pc = c;
650 }
651 }
652 }
653
654 static void
eqn(void)655 eqn(void)
656 {
657 int c1, c2;
658 int dflg;
659 int last;
660
661 last = 0;
662 dflg = 1;
663 SKIP;
664
665 for (;;) {
666 if ((C1 == '.') || (c == '\'')) {
667 while ((C1 == ' ') || (c == '\t'))
668 ;
669 if ((c == 'E') && (C1 == 'N')) {
670 SKIP;
671 if (msflag && dflg) {
672 (void) putchar('x');
673 (void) putchar(' ');
674 if (last) {
675 (void) putchar('.');
676 (void) putchar(' ');
677 }
678 }
679 return;
680 }
681 } else if (c == 'd') { /* look for delim */
682 if ((C1 == 'e') && (C1 == 'l')) {
683 if ((C1 == 'i') && (C1 == 'm')) {
684 while (C1 == ' ')
685 ;
686 if (((c1 = c) == '\n') ||
687 ((c2 = C1) == '\n') ||
688 ((c1 == 'o') && (c2 == 'f') &&
689 (C1 == 'f'))) {
690 ldelim = NOCHAR;
691 rdelim = NOCHAR;
692 } else {
693 ldelim = c1;
694 rdelim = c2;
695 }
696 }
697 dflg = 0;
698 }
699 }
700
701 if (c != '\n') {
702 while (C1 != '\n') {
703 if (c == '.') {
704 last = 1;
705 } else {
706 last = 0;
707 }
708 }
709 }
710 }
711 }
712
713
714
715 static void
backsl(void)716 backsl(void) /* skip over a complete backslash construction */
717 {
718 int bdelim;
719
720 sw: switch (C) {
721 case '"':
722 SKIP;
723 return;
724 case 's':
725 if (C == '\\') {
726 backsl();
727 } else {
728 while ((C >= '0') && (c <= '9'))
729 ;
730 (void) ungetc(c, infile);
731 c = '0';
732 }
733 lindx--;
734 return;
735
736 case 'f':
737 case 'n':
738 case '*':
739 if (C != '(')
740 return;
741 /* FALLTHROUGH */
742
743 case '(':
744 if (C != '\n') {
745 (void) C;
746 }
747 return;
748
749 case '$':
750 (void) C; /* discard argument number */
751 return;
752
753 case 'b':
754 case 'x':
755 case 'v':
756 case 'h':
757 case 'w':
758 case 'o':
759 case 'l':
760 case 'L':
761 if ((bdelim = C) == '\n')
762 return;
763 while ((C != '\n') && (c != bdelim))
764 if (c == '\\')
765 backsl();
766 return;
767
768 case '\\':
769 if (inmacro)
770 goto sw;
771 default:
772 return;
773 }
774 }
775
776
777
778
779 static char *
copys(char * s)780 copys(char *s)
781 {
782 char *t, *t0;
783
784 if ((t0 = t = calloc((unsigned)(strlen(s) + 1), sizeof (*t))) == NULL)
785 fatal_msg(gettext("Cannot allocate memory"));
786
787 while (*t++ = *s++)
788 ;
789 return (t0);
790 }
791
792 static void
sce(void)793 sce(void)
794 {
795 char *ap;
796 int n, i;
797 char a[10];
798
799 for (ap = a; C != '\n'; ap++) {
800 *ap = c;
801 if (ap == &a[9]) {
802 SKIP;
803 ap = a;
804 break;
805 }
806 }
807 if (ap != a) {
808 n = atoi(a);
809 } else {
810 n = 1;
811 }
812 for (i = 0; i < n; ) {
813 if (C == '.') {
814 if (C == 'c') {
815 if (C == 'e') {
816 while (C == ' ')
817 ;
818 if (c == '0') {
819 break;
820 } else {
821 SKIP;
822 }
823 } else {
824 SKIP;
825 }
826 } else {
827 SKIP;
828 }
829 } else {
830 SKIP;
831 i++;
832 }
833 }
834 }
835