1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #include <assert.h>
31 #include <errno.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <locale.h>
36 #include <sys/varargs.h>
37
38 /*
39 * Deroff command -- strip troff, eqn, and Tbl sequences from a file.
40 * Has three flags argument, -w, to cause output one word per line
41 * rather than in the original format.
42 * -mm (or -ms) causes the corresponding macro's to be interpreted
43 * so that just sentences are output
44 * -ml also gets rid of lists.
45 * -i causes deroff to ignore .so and .nx commands.
46 * Deroff follows .so and .nx commands, removes contents of macro
47 * definitions, equations (both .EQ ... .EN and $...$),
48 * Tbl command sequences, and Troff backslash constructions.
49 *
50 * All input is through the C macro; the most recently read character
51 * is in c.
52 */
53
54 #define C ((c = getc(infile)) == EOF ? eof() : \
55 ((c == ldelim) && (filesp == files) ? skeqn() : c))
56 #define C1 ((c = getc(infile)) == EOF ? eof() : c)
57 #define SKIP while (C != '\n')
58 #define SKIP_TO_COM SKIP; SKIP; pc = c; \
59 while ((C != '.') || (pc != '\n') || \
60 (C > 'Z')) { \
61 pc = c; \
62 }
63
64 #define YES 1
65 #define NO 0
66 #define MS 0
67 #define MM 1
68 #define ONE 1
69 #define TWO 2
70
71 #define NOCHAR -2
72 #define SPECIAL 0
73 #define APOS 1
74 #define DIGIT 2
75 #define LETTER 3
76
77 #define MAXLINESZ 512
78
79 static int wordflag = NO;
80 static int msflag = NO;
81 static int iflag = NO;
82 static int mac = MM;
83 static int disp = 0;
84 static int inmacro = NO;
85 static int intable = NO;
86 static int lindx;
87 static size_t linesize = MAXLINESZ;
88
89 static char chars[128]; /* SPECIAL, APOS, DIGIT, or LETTER */
90
91 static char *line = NULL;
92
93 static char c;
94 static int pc;
95 static int ldelim = NOCHAR;
96 static int rdelim = NOCHAR;
97
98 static int argc;
99 static char **argv;
100
101 extern int optind;
102 extern char *optarg;
103 static char fname[50];
104 static FILE *files[15];
105 static FILE **filesp;
106 static FILE *infile;
107
108 static void backsl(void);
109 static void comline(void);
110 static char *copys(char *);
111 static int eof(void);
112 static void eqn(void);
113 static void fatal(const char *, ...);
114 static void fatal_msg(char *);
115 static void getfname(void);
116 static void macro(void);
117 static FILE *opn(char *);
118 static void putmac(char *, int);
119 static void putwords(int);
120 static void regline(int, int);
121 static void sce(void);
122 static int skeqn(void);
123 static void sdis(char, char);
124 static void stbl(void);
125 static void tbl(void);
126 static void usage(void);
127 static void work(void) __NORETURN;
128
129 int
main(int ac,char ** av)130 main(int ac, char **av)
131 {
132 int i;
133 int errflg = 0;
134 int optchar;
135
136 (void) setlocale(LC_ALL, "");
137 #if !defined(TEXT_DOMAIN)
138 #define TEXT_DOMAIN "SYS_TEST"
139 #endif
140 (void) textdomain(TEXT_DOMAIN);
141 argc = ac;
142 argv = av;
143 while ((optchar = getopt(argc, argv, "wim:")) != EOF) {
144 switch (optchar) {
145 case 'w':
146 wordflag = YES;
147 break;
148 case 'm':
149 msflag = YES;
150 if (*optarg == 'm')
151 mac = MM;
152 else if (*optarg == 's')
153 mac = MS;
154 else if (*optarg == 'l')
155 disp = 1;
156 else
157 errflg++;
158 break;
159 case 'i':
160 iflag = YES;
161 break;
162 case '?':
163 errflg++;
164 }
165 }
166 if (errflg) {
167 usage();
168 return (1);
169 }
170 if (optind == argc)
171 infile = stdin;
172 else
173 infile = opn(argv[optind++]);
174 files[0] = infile;
175 filesp = &files[0];
176
177 for (i = 'a'; i <= 'z'; ++i)
178 chars[i] = LETTER;
179 for (i = 'A'; i <= 'Z'; ++i)
180 chars[i] = LETTER;
181 for (i = '0'; i <= '9'; ++i)
182 chars[i] = DIGIT;
183 chars['\''] = APOS;
184 chars['&'] = APOS;
185 work();
186 /* NOTREACHED */
187 }
188
189
190 static int
skeqn(void)191 skeqn(void)
192 {
193 while ((c = getc(infile)) != rdelim) {
194 if (c == EOF) {
195 c = eof();
196 } else if (c == '"') {
197 while ((c = getc(infile)) != '"') {
198 if (c == EOF) {
199 c = eof();
200 } else if (c == '\\') {
201 if ((c = getc(infile)) == EOF) {
202 c = eof();
203 }
204 }
205 }
206 }
207 }
208 if (msflag) {
209 return (c = 'x');
210 }
211 return (c = ' ');
212 }
213
214
215 /* Functions calling opn() should ensure 'p' is non-null */
216 static FILE *
opn(char * p)217 opn(char *p)
218 {
219 FILE *fd;
220
221 assert(p != NULL);
222 if ((fd = fopen(p, "r")) == NULL)
223 fatal(gettext("Cannot open file %s: %s\n"), p, strerror(errno));
224
225 return (fd);
226 }
227
228
229
230 static int
eof(void)231 eof(void)
232 {
233 if (infile != stdin)
234 (void) fclose(infile);
235 if (filesp > files) {
236 infile = *--filesp;
237 } else if (optind < argc) {
238 infile = opn(argv[optind++]);
239 } else {
240 exit(0);
241 }
242
243 return (C);
244 }
245
246
247
248 static void
getfname(void)249 getfname(void)
250 {
251 char *p;
252 struct chain {
253 struct chain *nextp;
254 char *datap;
255 };
256 struct chain *q;
257 static struct chain *namechain = NULL;
258
259 while (C == ' ')
260 ;
261
262 for (p = fname; ((*p = c) != '\n') && (c != ' ') && (c != '\t') &&
263 (c != '\\'); ++p) {
264 (void) C;
265 }
266 *p = '\0';
267 while (c != '\n') {
268 (void) C;
269 }
270
271 /* see if this name has already been used */
272 for (q = namechain; q; q = q->nextp)
273 if (strcmp(fname, q->datap) != 0) {
274 fname[0] = '\0';
275 return;
276 }
277
278 q = (struct chain *)calloc(1, sizeof (*namechain));
279 q->nextp = namechain;
280 q->datap = copys(fname);
281 namechain = q;
282 }
283
284
285 /*
286 * Functions calling fatal() should ensure 'format' and
287 * arguments are non-null.
288 */
289 static void
fatal(const char * format,...)290 fatal(const char *format, ...)
291 {
292 va_list alist;
293
294 assert(format != NULL);
295 (void) fputs(gettext("deroff: "), stderr);
296 va_start(alist, format);
297 (void) vfprintf(stderr, format, alist);
298 exit(1);
299 }
300
301 /* Functions calling fatal_msg() should ensure 's' is non-null */
302 static void
fatal_msg(char * s)303 fatal_msg(char *s)
304 {
305 assert(s != NULL);
306 (void) fprintf(stderr, gettext("deroff: %s\n"), s);
307 exit(1);
308 }
309
310 static void
usage(void)311 usage(void)
312 {
313 (void) fputs(gettext(
314 "usage: deroff [ -w ] [ -m (m s l) ] [ -i ] "
315 "[ file ] ... \n"), stderr);
316 }
317
318 static void
work(void)319 work(void)
320 {
321
322 for (;;) {
323 if ((C == '.') || (c == '\''))
324 comline();
325 else
326 regline(NO, TWO);
327 }
328 }
329
330
331 static void
regline(int macline,int cnst)332 regline(int macline, int cnst)
333 {
334
335 if (line == NULL) {
336 if ((line = (char *)malloc(linesize * sizeof (char))) == NULL) {
337 fatal_msg(gettext("Cannot allocate memory"));
338 }
339 }
340
341 lindx = 0;
342 line[lindx] = c;
343 for (;;) {
344 if (c == '\\') {
345 line[lindx] = ' ';
346 backsl();
347 if (c == '%') { /* no blank for hyphenation char */
348 lindx--;
349 }
350 }
351 if (c == '\n') {
352 break;
353 }
354 /*
355 * We're just about to add another character to the line
356 * buffer so ensure we don't overrun it.
357 */
358 if (++lindx >= linesize - 1) {
359 linesize = linesize * 2;
360 if ((line = (char *)realloc(line,
361 linesize * sizeof (char))) == NULL) {
362 fatal_msg(gettext("Cannot allocate memory"));
363 }
364 }
365 if (intable && (c == 'T')) {
366 line[lindx] = C;
367 if ((c == '{') || (c == '}')) {
368 line[lindx - 1] = ' ';
369 line[lindx] = C;
370 }
371 } else {
372 line[lindx] = C;
373 }
374 }
375
376 line[lindx] = '\0';
377
378 if (line[0] != '\0') {
379 if (wordflag) {
380 putwords(macline);
381 } else if (macline) {
382 putmac(line, cnst);
383 } else {
384 (void) puts(line);
385 }
386 }
387 }
388
389
390
391
392 static void
putmac(char * s,int cnst)393 putmac(char *s, int cnst)
394 {
395 char *t;
396
397 while (*s) {
398 while ((*s == ' ') || (*s == '\t')) {
399 (void) putchar(*s++);
400 }
401 for (t = s; (*t != ' ') && (*t != '\t') && (*t != '\0'); ++t)
402 ;
403 if (*s == '\"')
404 s++;
405 if ((t > s + cnst) && (chars[s[0]] == LETTER) &&
406 (chars[s[1]] == LETTER)) {
407 while (s < t) {
408 if (*s == '\"')
409 s++;
410 else
411 (void) putchar(*s++);
412 }
413 } else {
414 s = t;
415 }
416 }
417 (void) putchar('\n');
418 }
419
420
421
422 static void
putwords(int macline)423 putwords(int macline) /* break into words for -w option */
424 {
425 char *p, *p1;
426 int i, nlet;
427
428 for (p1 = line; ; ) {
429 /* skip initial specials ampersands and apostrophes */
430 while (chars[*p1] < DIGIT) {
431 if (*p1++ == '\0')
432 return;
433 }
434 nlet = 0;
435 for (p = p1; (i = chars[*p]) != SPECIAL; ++p) {
436 if (i == LETTER)
437 ++nlet;
438 }
439
440 if ((!macline && (nlet > 1)) /* MDM definition of word */ ||
441 (macline && (nlet > 2) && (chars[p1[0]] == LETTER) &&
442 (chars[p1[1]] == LETTER))) {
443 /* delete trailing ampersands and apostrophes */
444 while ((p[-1] == '\'') || (p[-1] == '&')) {
445 --p;
446 }
447 while (p1 < p) {
448 (void) putchar(*p1++);
449 }
450 (void) putchar('\n');
451 } else {
452 p1 = p;
453 }
454 }
455 }
456
457
458
459 static void
comline(void)460 comline(void)
461 {
462 int c1, c2;
463
464 while ((C == ' ') || (c == '\t'))
465 ;
466 comx:
467 if ((c1 = c) == '\n')
468 return;
469 c2 = C;
470 if ((c1 == '.') && (c2 != '.'))
471 inmacro = NO;
472 if (c2 == '\n')
473 return;
474
475 if ((c1 == 'E') && (c2 == 'Q') && (filesp == files)) {
476 eqn();
477 } else if ((c1 == 'T') && ((c2 == 'S') || (c2 == 'C') ||
478 (c2 == '&')) && (filesp == files)) {
479 if (msflag) {
480 stbl();
481 } else {
482 tbl();
483 }
484 } else if ((c1 == 'T') && (c2 == 'E')) {
485 intable = NO;
486 } else if (!inmacro && (c1 == 'd') && (c2 == 'e')) {
487 macro();
488 } else if (!inmacro && (c1 == 'i') && (c2 == 'g')) {
489 macro();
490 } else if (!inmacro && (c1 == 'a') && (c2 == 'm')) {
491 macro();
492 } else if ((c1 == 's') && (c2 == 'o')) {
493 if (iflag) {
494 SKIP;
495 } else {
496 getfname();
497 if (fname[0]) {
498 infile = *++filesp = opn(fname);
499 }
500 }
501 } else if ((c1 == 'n') && (c2 == 'x')) {
502 if (iflag) {
503 SKIP;
504 } else {
505 getfname();
506 if (fname[0] == '\0') {
507 exit(0);
508 }
509 if (infile != stdin) {
510 (void) fclose(infile);
511 }
512 infile = *filesp = opn(fname);
513 }
514 } else if ((c1 == 'h') && (c2 == 'w')) {
515 SKIP;
516 } else if (msflag && (c1 == 'T') && (c2 == 'L')) {
517 SKIP_TO_COM;
518 goto comx;
519 } else if (msflag && (c1 == 'N') && (c2 == 'R')) {
520 SKIP;
521 } else if (msflag && (c1 == 'A') && ((c2 == 'U') || (c2 == 'I'))) {
522 if (mac == MM) {
523 SKIP;
524 } else {
525 SKIP_TO_COM;
526 goto comx;
527 }
528 } else if (msflag && (c1 == 'F') && (c2 == 'S')) {
529 SKIP_TO_COM;
530 goto comx;
531 } else if (msflag && (c1 == 'S') && (c2 == 'H')) {
532 SKIP_TO_COM;
533 goto comx;
534 } else if (msflag && (c1 == 'N') && (c2 == 'H')) {
535 SKIP_TO_COM;
536 goto comx;
537 } else if (msflag && (c1 == 'O') && (c2 == 'K')) {
538 SKIP_TO_COM;
539 goto comx;
540 } else if (msflag && (c1 == 'N') && (c2 == 'D')) {
541 SKIP;
542 } else if (msflag && (mac == MM) && (c1 == 'H') &&
543 ((c2 == ' ') || (c2 == 'U'))) {
544 SKIP;
545 } else if (msflag && (mac == MM) && (c2 == 'L')) {
546 if (disp || (c1 == 'R')) {
547 sdis('L', 'E');
548 } else {
549 SKIP;
550 (void) putchar('.');
551 }
552 } else if (msflag && ((c1 == 'D') || (c1 == 'N') ||
553 (c1 == 'K') || (c1 == 'P')) && (c2 == 'S')) {
554 sdis(c1, 'E'); /* removed RS-RE */
555 } else if (msflag && (c1 == 'K' && c2 == 'F')) {
556 sdis(c1, 'E');
557 } else if (msflag && (c1 == 'n') && (c2 == 'f')) {
558 sdis('f', 'i');
559 } else if (msflag && (c1 == 'c') && (c2 == 'e')) {
560 sce();
561 } else {
562 if ((c1 == '.') && (c2 == '.')) {
563 while (C == '.')
564 ;
565 }
566 ++inmacro;
567 if ((c1 <= 'Z') && msflag) {
568 regline(YES, ONE);
569 } else {
570 regline(YES, TWO);
571 }
572 --inmacro;
573 }
574 }
575
576
577
578 static void
macro(void)579 macro(void)
580 {
581 if (msflag) {
582 /* look for .. */
583 do {
584 SKIP;
585 } while ((C != '.') || (C != '.') || (C == '.'));
586 if (c != '\n') {
587 SKIP;
588 }
589 return;
590 }
591 SKIP;
592 inmacro = YES;
593 }
594
595
596
597
598 static void
sdis(char a1,char a2)599 sdis(char a1, char a2)
600 {
601 int c1, c2;
602 int eqnf;
603 int notdone = 1;
604 eqnf = 1;
605 SKIP;
606 while (notdone) {
607 while (C != '.')
608 SKIP;
609 if ((c1 = C) == '\n')
610 continue;
611 if ((c2 = C) == '\n')
612 continue;
613 if ((c1 == a1) && (c2 == a2)) {
614 SKIP;
615 if (eqnf)
616 (void) putchar('.');
617 (void) putchar('\n');
618 return;
619 } else if ((a1 == 'D') && (c1 == 'E') && (c2 == 'Q')) {
620 eqn();
621 eqnf = 0;
622 } else {
623 SKIP;
624 }
625 }
626 }
627
628 static void
tbl(void)629 tbl(void)
630 {
631 while (C != '.')
632 ;
633 SKIP;
634 intable = YES;
635 }
636
637 static void
stbl(void)638 stbl(void)
639 {
640 while (C != '.')
641 ;
642 SKIP_TO_COM;
643 if ((c != 'T') || (C != 'E')) {
644 SKIP;
645 pc = c;
646 while ((C != '.') || (pc != '\n') ||
647 (C != 'T') || (C != 'E')) {
648 pc = c;
649 }
650 }
651 }
652
653 static void
eqn(void)654 eqn(void)
655 {
656 int c1, c2;
657 int dflg;
658 int last;
659
660 last = 0;
661 dflg = 1;
662 SKIP;
663
664 for (;;) {
665 if ((C1 == '.') || (c == '\'')) {
666 while ((C1 == ' ') || (c == '\t'))
667 ;
668 if ((c == 'E') && (C1 == 'N')) {
669 SKIP;
670 if (msflag && dflg) {
671 (void) putchar('x');
672 (void) putchar(' ');
673 if (last) {
674 (void) putchar('.');
675 (void) putchar(' ');
676 }
677 }
678 return;
679 }
680 } else if (c == 'd') { /* look for delim */
681 if ((C1 == 'e') && (C1 == 'l')) {
682 if ((C1 == 'i') && (C1 == 'm')) {
683 while (C1 == ' ')
684 ;
685 if (((c1 = c) == '\n') ||
686 ((c2 = C1) == '\n') ||
687 ((c1 == 'o') && (c2 == 'f') &&
688 (C1 == 'f'))) {
689 ldelim = NOCHAR;
690 rdelim = NOCHAR;
691 } else {
692 ldelim = c1;
693 rdelim = c2;
694 }
695 }
696 dflg = 0;
697 }
698 }
699
700 if (c != '\n') {
701 while (C1 != '\n') {
702 if (c == '.') {
703 last = 1;
704 } else {
705 last = 0;
706 }
707 }
708 }
709 }
710 }
711
712
713
714 static void
backsl(void)715 backsl(void) /* skip over a complete backslash construction */
716 {
717 int bdelim;
718
719 sw: switch (C) {
720 case '"':
721 SKIP;
722 return;
723 case 's':
724 if (C == '\\') {
725 backsl();
726 } else {
727 while ((C >= '0') && (c <= '9'))
728 ;
729 (void) ungetc(c, infile);
730 c = '0';
731 }
732 lindx--;
733 return;
734
735 case 'f':
736 case 'n':
737 case '*':
738 if (C != '(')
739 return;
740 /* FALLTHROUGH */
741
742 case '(':
743 if (C != '\n') {
744 (void) C;
745 }
746 return;
747
748 case '$':
749 (void) C; /* discard argument number */
750 return;
751
752 case 'b':
753 case 'x':
754 case 'v':
755 case 'h':
756 case 'w':
757 case 'o':
758 case 'l':
759 case 'L':
760 if ((bdelim = C) == '\n')
761 return;
762 while ((C != '\n') && (c != bdelim))
763 if (c == '\\')
764 backsl();
765 return;
766
767 case '\\':
768 if (inmacro)
769 goto sw;
770 default:
771 return;
772 }
773 }
774
775
776
777
778 static char *
copys(char * s)779 copys(char *s)
780 {
781 char *t, *t0;
782
783 if ((t0 = t = calloc((unsigned)(strlen(s) + 1), sizeof (*t))) == NULL)
784 fatal_msg(gettext("Cannot allocate memory"));
785
786 while (*t++ = *s++)
787 ;
788 return (t0);
789 }
790
791 static void
sce(void)792 sce(void)
793 {
794 char *ap;
795 int n, i;
796 char a[10];
797
798 for (ap = a; C != '\n'; ap++) {
799 *ap = c;
800 if (ap == &a[9]) {
801 SKIP;
802 ap = a;
803 break;
804 }
805 }
806 if (ap != a) {
807 n = atoi(a);
808 } else {
809 n = 1;
810 }
811 for (i = 0; i < n; ) {
812 if (C == '.') {
813 if (C == 'c') {
814 if (C == 'e') {
815 while (C == ' ')
816 ;
817 if (c == '0') {
818 break;
819 } else {
820 SKIP;
821 }
822 } else {
823 SKIP;
824 }
825 } else {
826 SKIP;
827 }
828 } else {
829 SKIP;
830 i++;
831 }
832 }
833 }
834