1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 #include <assert.h>
33 #include <errno.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <locale.h>
38 #include <sys/varargs.h>
39
40 /*
41 * Deroff command -- strip troff, eqn, and Tbl sequences from a file.
42 * Has three flags argument, -w, to cause output one word per line
43 * rather than in the original format.
44 * -mm (or -ms) causes the corresponding macro's to be interpreted
45 * so that just sentences are output
46 * -ml also gets rid of lists.
47 * -i causes deroff to ignore .so and .nx commands.
48 * Deroff follows .so and .nx commands, removes contents of macro
49 * definitions, equations (both .EQ ... .EN and $...$),
50 * Tbl command sequences, and Troff backslash constructions.
51 *
52 * All input is through the C macro; the most recently read character
53 * is in c.
54 */
55
56 #define C ((c = getc(infile)) == EOF ? eof() : \
57 ((c == ldelim) && (filesp == files) ? skeqn() : c))
58 #define C1 ((c = getc(infile)) == EOF ? eof() : c)
59 #define SKIP while (C != '\n')
60 #define SKIP_TO_COM SKIP; SKIP; pc = c; \
61 while ((C != '.') || (pc != '\n') || \
62 (C > 'Z')) { \
63 pc = c; \
64 }
65
66 #define YES 1
67 #define NO 0
68 #define MS 0
69 #define MM 1
70 #define ONE 1
71 #define TWO 2
72
73 #define NOCHAR -2
74 #define SPECIAL 0
75 #define APOS 1
76 #define DIGIT 2
77 #define LETTER 3
78
79 #define MAXLINESZ 512
80
81 static int wordflag = NO;
82 static int msflag = NO;
83 static int iflag = NO;
84 static int mac = MM;
85 static int disp = 0;
86 static int inmacro = NO;
87 static int intable = NO;
88 static int lindx;
89 static size_t linesize = MAXLINESZ;
90
91 static char chars[128]; /* SPECIAL, APOS, DIGIT, or LETTER */
92
93 static char *line = NULL;
94
95 static char c;
96 static int pc;
97 static int ldelim = NOCHAR;
98 static int rdelim = NOCHAR;
99
100 static int argc;
101 static char **argv;
102
103 extern int optind;
104 extern char *optarg;
105 static char fname[50];
106 static FILE *files[15];
107 static FILE **filesp;
108 static FILE *infile;
109
110 static void backsl(void);
111 static void comline(void);
112 static char *copys(char *);
113 static int eof(void);
114 static void eqn(void);
115 static void fatal(const char *, ...);
116 static void fatal_msg(char *);
117 static void getfname(void);
118 static void macro(void);
119 static FILE *opn(char *);
120 static void putmac(char *, int);
121 static void putwords(int);
122 static void regline(int, int);
123 static void sce(void);
124 static int skeqn(void);
125 static void sdis(char, char);
126 static void stbl(void);
127 static void tbl(void);
128 static void usage(void);
129 static void work(void) __NORETURN;
130
131 int
main(int ac,char ** av)132 main(int ac, char **av)
133 {
134 int i;
135 int errflg = 0;
136 int optchar;
137
138 (void) setlocale(LC_ALL, "");
139 #if !defined(TEXT_DOMAIN)
140 #define TEXT_DOMAIN "SYS_TEST"
141 #endif
142 (void) textdomain(TEXT_DOMAIN);
143 argc = ac;
144 argv = av;
145 while ((optchar = getopt(argc, argv, "wim:")) != EOF) {
146 switch (optchar) {
147 case 'w':
148 wordflag = YES;
149 break;
150 case 'm':
151 msflag = YES;
152 if (*optarg == 'm')
153 mac = MM;
154 else if (*optarg == 's')
155 mac = MS;
156 else if (*optarg == 'l')
157 disp = 1;
158 else
159 errflg++;
160 break;
161 case 'i':
162 iflag = YES;
163 break;
164 case '?':
165 errflg++;
166 }
167 }
168 if (errflg) {
169 usage();
170 return (1);
171 }
172 if (optind == argc)
173 infile = stdin;
174 else
175 infile = opn(argv[optind++]);
176 files[0] = infile;
177 filesp = &files[0];
178
179 for (i = 'a'; i <= 'z'; ++i)
180 chars[i] = LETTER;
181 for (i = 'A'; i <= 'Z'; ++i)
182 chars[i] = LETTER;
183 for (i = '0'; i <= '9'; ++i)
184 chars[i] = DIGIT;
185 chars['\''] = APOS;
186 chars['&'] = APOS;
187 work();
188 /* NOTREACHED */
189 }
190
191
192 static int
skeqn(void)193 skeqn(void)
194 {
195 while ((c = getc(infile)) != rdelim) {
196 if (c == EOF) {
197 c = eof();
198 } else if (c == '"') {
199 while ((c = getc(infile)) != '"') {
200 if (c == EOF) {
201 c = eof();
202 } else if (c == '\\') {
203 if ((c = getc(infile)) == EOF) {
204 c = eof();
205 }
206 }
207 }
208 }
209 }
210 if (msflag) {
211 return (c = 'x');
212 }
213 return (c = ' ');
214 }
215
216
217 /* Functions calling opn() should ensure 'p' is non-null */
218 static FILE *
opn(char * p)219 opn(char *p)
220 {
221 FILE *fd;
222
223 assert(p != NULL);
224 if ((fd = fopen(p, "r")) == NULL)
225 fatal(gettext("Cannot open file %s: %s\n"), p, strerror(errno));
226
227 return (fd);
228 }
229
230
231
232 static int
eof(void)233 eof(void)
234 {
235 if (infile != stdin)
236 (void) fclose(infile);
237 if (filesp > files) {
238 infile = *--filesp;
239 } else if (optind < argc) {
240 infile = opn(argv[optind++]);
241 } else {
242 exit(0);
243 }
244
245 return (C);
246 }
247
248
249
250 static void
getfname(void)251 getfname(void)
252 {
253 char *p;
254 struct chain {
255 struct chain *nextp;
256 char *datap;
257 };
258 struct chain *q;
259 static struct chain *namechain = NULL;
260
261 while (C == ' ')
262 ;
263
264 for (p = fname; ((*p = c) != '\n') && (c != ' ') && (c != '\t') &&
265 (c != '\\'); ++p) {
266 (void) C;
267 }
268 *p = '\0';
269 while (c != '\n') {
270 (void) C;
271 }
272
273 /* see if this name has already been used */
274 for (q = namechain; q; q = q->nextp)
275 if (strcmp(fname, q->datap) != 0) {
276 fname[0] = '\0';
277 return;
278 }
279
280 q = (struct chain *)calloc(1, sizeof (*namechain));
281 q->nextp = namechain;
282 q->datap = copys(fname);
283 namechain = q;
284 }
285
286
287 /*
288 * Functions calling fatal() should ensure 'format' and
289 * arguments are non-null.
290 */
291 static void
fatal(const char * format,...)292 fatal(const char *format, ...)
293 {
294 va_list alist;
295
296 assert(format != NULL);
297 (void) fputs(gettext("deroff: "), stderr);
298 va_start(alist, format);
299 (void) vfprintf(stderr, format, alist);
300 exit(1);
301 }
302
303 /* Functions calling fatal_msg() should ensure 's' is non-null */
304 static void
fatal_msg(char * s)305 fatal_msg(char *s)
306 {
307 assert(s != NULL);
308 (void) fprintf(stderr, gettext("deroff: %s\n"), s);
309 exit(1);
310 }
311
312 static void
usage(void)313 usage(void)
314 {
315 (void) fputs(gettext(
316 "usage: deroff [ -w ] [ -m (m s l) ] [ -i ] "
317 "[ file ] ... \n"), stderr);
318 }
319
320 static void
work(void)321 work(void)
322 {
323
324 for (;;) {
325 if ((C == '.') || (c == '\''))
326 comline();
327 else
328 regline(NO, TWO);
329 }
330 }
331
332
333 static void
regline(int macline,int cnst)334 regline(int macline, int cnst)
335 {
336
337 if (line == NULL) {
338 if ((line = (char *)malloc(linesize * sizeof (char))) == NULL) {
339 fatal_msg(gettext("Cannot allocate memory"));
340 }
341 }
342
343 lindx = 0;
344 line[lindx] = c;
345 for (;;) {
346 if (c == '\\') {
347 line[lindx] = ' ';
348 backsl();
349 if (c == '%') { /* no blank for hyphenation char */
350 lindx--;
351 }
352 }
353 if (c == '\n') {
354 break;
355 }
356 /*
357 * We're just about to add another character to the line
358 * buffer so ensure we don't overrun it.
359 */
360 if (++lindx >= linesize - 1) {
361 linesize = linesize * 2;
362 if ((line = (char *)realloc(line,
363 linesize * sizeof (char))) == NULL) {
364 fatal_msg(gettext("Cannot allocate memory"));
365 }
366 }
367 if (intable && (c == 'T')) {
368 line[lindx] = C;
369 if ((c == '{') || (c == '}')) {
370 line[lindx - 1] = ' ';
371 line[lindx] = C;
372 }
373 } else {
374 line[lindx] = C;
375 }
376 }
377
378 line[lindx] = '\0';
379
380 if (line[0] != '\0') {
381 if (wordflag) {
382 putwords(macline);
383 } else if (macline) {
384 putmac(line, cnst);
385 } else {
386 (void) puts(line);
387 }
388 }
389 }
390
391
392
393
394 static void
putmac(char * s,int cnst)395 putmac(char *s, int cnst)
396 {
397 char *t;
398
399 while (*s) {
400 while ((*s == ' ') || (*s == '\t')) {
401 (void) putchar(*s++);
402 }
403 for (t = s; (*t != ' ') && (*t != '\t') && (*t != '\0'); ++t)
404 ;
405 if (*s == '\"')
406 s++;
407 if ((t > s + cnst) && (chars[s[0]] == LETTER) &&
408 (chars[s[1]] == LETTER)) {
409 while (s < t) {
410 if (*s == '\"')
411 s++;
412 else
413 (void) putchar(*s++);
414 }
415 } else {
416 s = t;
417 }
418 }
419 (void) putchar('\n');
420 }
421
422
423
424 static void
putwords(int macline)425 putwords(int macline) /* break into words for -w option */
426 {
427 char *p, *p1;
428 int i, nlet;
429
430 for (p1 = line; ; ) {
431 /* skip initial specials ampersands and apostrophes */
432 while (chars[*p1] < DIGIT) {
433 if (*p1++ == '\0')
434 return;
435 }
436 nlet = 0;
437 for (p = p1; (i = chars[*p]) != SPECIAL; ++p) {
438 if (i == LETTER)
439 ++nlet;
440 }
441
442 if ((!macline && (nlet > 1)) /* MDM definition of word */ ||
443 (macline && (nlet > 2) && (chars[p1[0]] == LETTER) &&
444 (chars[p1[1]] == LETTER))) {
445 /* delete trailing ampersands and apostrophes */
446 while ((p[-1] == '\'') || (p[-1] == '&')) {
447 --p;
448 }
449 while (p1 < p) {
450 (void) putchar(*p1++);
451 }
452 (void) putchar('\n');
453 } else {
454 p1 = p;
455 }
456 }
457 }
458
459
460
461 static void
comline(void)462 comline(void)
463 {
464 int c1, c2;
465
466 com:
467 while ((C == ' ') || (c == '\t'))
468 ;
469 comx:
470 if ((c1 = c) == '\n')
471 return;
472 c2 = C;
473 if ((c1 == '.') && (c2 != '.'))
474 inmacro = NO;
475 if (c2 == '\n')
476 return;
477
478 if ((c1 == 'E') && (c2 == 'Q') && (filesp == files)) {
479 eqn();
480 } else if ((c1 == 'T') && ((c2 == 'S') || (c2 == 'C') ||
481 (c2 == '&')) && (filesp == files)) {
482 if (msflag) {
483 stbl();
484 } else {
485 tbl();
486 }
487 } else if ((c1 == 'T') && (c2 == 'E')) {
488 intable = NO;
489 } else if (!inmacro && (c1 == 'd') && (c2 == 'e')) {
490 macro();
491 } else if (!inmacro && (c1 == 'i') && (c2 == 'g')) {
492 macro();
493 } else if (!inmacro && (c1 == 'a') && (c2 == 'm')) {
494 macro();
495 } else if ((c1 == 's') && (c2 == 'o')) {
496 if (iflag) {
497 SKIP;
498 } else {
499 getfname();
500 if (fname[0]) {
501 infile = *++filesp = opn(fname);
502 }
503 }
504 } else if ((c1 == 'n') && (c2 == 'x')) {
505 if (iflag) {
506 SKIP;
507 } else {
508 getfname();
509 if (fname[0] == '\0') {
510 exit(0);
511 }
512 if (infile != stdin) {
513 (void) fclose(infile);
514 }
515 infile = *filesp = opn(fname);
516 }
517 } else if ((c1 == 'h') && (c2 == 'w')) {
518 SKIP;
519 } else if (msflag && (c1 == 'T') && (c2 == 'L')) {
520 SKIP_TO_COM;
521 goto comx;
522 } else if (msflag && (c1 == 'N') && (c2 == 'R')) {
523 SKIP;
524 } else if (msflag && (c1 == 'A') && ((c2 == 'U') || (c2 == 'I'))) {
525 if (mac == MM) {
526 SKIP;
527 } else {
528 SKIP_TO_COM;
529 goto comx;
530 }
531 } else if (msflag && (c1 == 'F') && (c2 == 'S')) {
532 SKIP_TO_COM;
533 goto comx;
534 } else if (msflag && (c1 == 'S') && (c2 == 'H')) {
535 SKIP_TO_COM;
536 goto comx;
537 } else if (msflag && (c1 == 'N') && (c2 == 'H')) {
538 SKIP_TO_COM;
539 goto comx;
540 } else if (msflag && (c1 == 'O') && (c2 == 'K')) {
541 SKIP_TO_COM;
542 goto comx;
543 } else if (msflag && (c1 == 'N') && (c2 == 'D')) {
544 SKIP;
545 } else if (msflag && (mac == MM) && (c1 == 'H') &&
546 ((c2 == ' ') || (c2 == 'U'))) {
547 SKIP;
548 } else if (msflag && (mac == MM) && (c2 == 'L')) {
549 if (disp || (c1 == 'R')) {
550 sdis('L', 'E');
551 } else {
552 SKIP;
553 (void) putchar('.');
554 }
555 } else if (msflag && ((c1 == 'D') || (c1 == 'N') ||
556 (c1 == 'K') || (c1 == 'P')) && (c2 == 'S')) {
557 sdis(c1, 'E'); /* removed RS-RE */
558 } else if (msflag && (c1 == 'K' && c2 == 'F')) {
559 sdis(c1, 'E');
560 } else if (msflag && (c1 == 'n') && (c2 == 'f')) {
561 sdis('f', 'i');
562 } else if (msflag && (c1 == 'c') && (c2 == 'e')) {
563 sce();
564 } else {
565 if ((c1 == '.') && (c2 == '.')) {
566 while (C == '.')
567 ;
568 }
569 ++inmacro;
570 if ((c1 <= 'Z') && msflag) {
571 regline(YES, ONE);
572 } else {
573 regline(YES, TWO);
574 }
575 --inmacro;
576 }
577 }
578
579
580
581 static void
macro(void)582 macro(void)
583 {
584 if (msflag) {
585 /* look for .. */
586 do {
587 SKIP;
588 } while ((C != '.') || (C != '.') || (C == '.'));
589 if (c != '\n') {
590 SKIP;
591 }
592 return;
593 }
594 SKIP;
595 inmacro = YES;
596 }
597
598
599
600
601 static void
sdis(char a1,char a2)602 sdis(char a1, char a2)
603 {
604 int c1, c2;
605 int eqnf;
606 int notdone = 1;
607 eqnf = 1;
608 SKIP;
609 while (notdone) {
610 while (C != '.')
611 SKIP;
612 if ((c1 = C) == '\n')
613 continue;
614 if ((c2 = C) == '\n')
615 continue;
616 if ((c1 == a1) && (c2 == a2)) {
617 SKIP;
618 if (eqnf)
619 (void) putchar('.');
620 (void) putchar('\n');
621 return;
622 } else if ((a1 == 'D') && (c1 == 'E') && (c2 == 'Q')) {
623 eqn();
624 eqnf = 0;
625 } else {
626 SKIP;
627 }
628 }
629 }
630
631 static void
tbl(void)632 tbl(void)
633 {
634 while (C != '.')
635 ;
636 SKIP;
637 intable = YES;
638 }
639
640 static void
stbl(void)641 stbl(void)
642 {
643 while (C != '.')
644 ;
645 SKIP_TO_COM;
646 if ((c != 'T') || (C != 'E')) {
647 SKIP;
648 pc = c;
649 while ((C != '.') || (pc != '\n') ||
650 (C != 'T') || (C != 'E')) {
651 pc = c;
652 }
653 }
654 }
655
656 static void
eqn(void)657 eqn(void)
658 {
659 int c1, c2;
660 int dflg;
661 int last;
662
663 last = 0;
664 dflg = 1;
665 SKIP;
666
667 for (;;) {
668 if ((C1 == '.') || (c == '\'')) {
669 while ((C1 == ' ') || (c == '\t'))
670 ;
671 if ((c == 'E') && (C1 == 'N')) {
672 SKIP;
673 if (msflag && dflg) {
674 (void) putchar('x');
675 (void) putchar(' ');
676 if (last) {
677 (void) putchar('.');
678 (void) putchar(' ');
679 }
680 }
681 return;
682 }
683 } else if (c == 'd') { /* look for delim */
684 if ((C1 == 'e') && (C1 == 'l')) {
685 if ((C1 == 'i') && (C1 == 'm')) {
686 while (C1 == ' ')
687 ;
688 if (((c1 = c) == '\n') ||
689 ((c2 = C1) == '\n') ||
690 ((c1 == 'o') && (c2 == 'f') &&
691 (C1 == 'f'))) {
692 ldelim = NOCHAR;
693 rdelim = NOCHAR;
694 } else {
695 ldelim = c1;
696 rdelim = c2;
697 }
698 }
699 dflg = 0;
700 }
701 }
702
703 if (c != '\n') {
704 while (C1 != '\n') {
705 if (c == '.') {
706 last = 1;
707 } else {
708 last = 0;
709 }
710 }
711 }
712 }
713 }
714
715
716
717 static void
backsl(void)718 backsl(void) /* skip over a complete backslash construction */
719 {
720 int bdelim;
721
722 sw: switch (C) {
723 case '"':
724 SKIP;
725 return;
726 case 's':
727 if (C == '\\') {
728 backsl();
729 } else {
730 while ((C >= '0') && (c <= '9'))
731 ;
732 (void) ungetc(c, infile);
733 c = '0';
734 }
735 lindx--;
736 return;
737
738 case 'f':
739 case 'n':
740 case '*':
741 if (C != '(')
742 return;
743 /* FALLTHROUGH */
744
745 case '(':
746 if (C != '\n') {
747 (void) C;
748 }
749 return;
750
751 case '$':
752 (void) C; /* discard argument number */
753 return;
754
755 case 'b':
756 case 'x':
757 case 'v':
758 case 'h':
759 case 'w':
760 case 'o':
761 case 'l':
762 case 'L':
763 if ((bdelim = C) == '\n')
764 return;
765 while ((C != '\n') && (c != bdelim))
766 if (c == '\\')
767 backsl();
768 return;
769
770 case '\\':
771 if (inmacro)
772 goto sw;
773 default:
774 return;
775 }
776 }
777
778
779
780
781 static char *
copys(char * s)782 copys(char *s)
783 {
784 char *t, *t0;
785
786 if ((t0 = t = calloc((unsigned)(strlen(s) + 1), sizeof (*t))) == NULL)
787 fatal_msg(gettext("Cannot allocate memory"));
788
789 while (*t++ = *s++)
790 ;
791 return (t0);
792 }
793
794 static void
sce(void)795 sce(void)
796 {
797 char *ap;
798 int n, i;
799 char a[10];
800
801 for (ap = a; C != '\n'; ap++) {
802 *ap = c;
803 if (ap == &a[9]) {
804 SKIP;
805 ap = a;
806 break;
807 }
808 }
809 if (ap != a) {
810 n = atoi(a);
811 } else {
812 n = 1;
813 }
814 for (i = 0; i < n; ) {
815 if (C == '.') {
816 if (C == 'c') {
817 if (C == 'e') {
818 while (C == ' ')
819 ;
820 if (c == '0') {
821 break;
822 } else {
823 SKIP;
824 }
825 } else {
826 SKIP;
827 }
828 } else {
829 SKIP;
830 }
831 } else {
832 SKIP;
833 i++;
834 }
835 }
836 }
837