xref: /illumos-gate/usr/src/cmd/deroff/deroff.c (revision 49218d4f8e4d84d1c08aeb267bcf6e451f2056dc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <assert.h>
33 #include <errno.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <locale.h>
38 #include <sys/varargs.h>
39 
40 /*
41  * Deroff command -- strip troff, eqn, and Tbl sequences from a file.
42  * Has three flags argument, -w, to cause output one word per line
43  * rather than in the original format.
44  * -mm (or -ms) causes the corresponding macro's to be interpreted
45  * so that just sentences are output
46  * -ml  also gets rid of lists.
47  * -i causes deroff to ignore .so and .nx commands.
48  * Deroff follows .so and .nx commands, removes contents of macro
49  * definitions, equations (both .EQ ... .EN and $...$),
50  * Tbl command sequences, and Troff backslash constructions.
51  *
52  * All input is through the C macro; the most recently read character
53  * is in c.
54  */
55 
56 #define	C	((c = getc(infile)) == EOF ? eof() : \
57 		    ((c == ldelim) && (filesp == files) ? skeqn() : c))
58 #define	C1	((c = getc(infile)) == EOF ? eof() : c)
59 #define	SKIP	while (C != '\n')
60 #define	SKIP_TO_COM	SKIP; SKIP; pc = c; \
61 			while ((C != '.') || (pc != '\n') || \
62 			    (C > 'Z')) { \
63 				pc = c; \
64 			}
65 
66 #define	YES 1
67 #define	NO 0
68 #define	MS 0
69 #define	MM 1
70 #define	ONE 1
71 #define	TWO 2
72 
73 #define	NOCHAR -2
74 #define	SPECIAL 0
75 #define	APOS 1
76 #define	DIGIT 2
77 #define	LETTER 3
78 
79 #define	MAXLINESZ	512
80 
81 static int wordflag = NO;
82 static int msflag = NO;
83 static int iflag = NO;
84 static int mac = MM;
85 static int disp = 0;
86 static int inmacro = NO;
87 static int intable = NO;
88 static int lindx;
89 static size_t linesize = MAXLINESZ;
90 
91 static char chars[128];  /* SPECIAL, APOS, DIGIT, or LETTER */
92 
93 static char *line = NULL;
94 
95 static char c;
96 static int pc;
97 static int ldelim	= NOCHAR;
98 static int rdelim	= NOCHAR;
99 
100 static int argc;
101 static char **argv;
102 
103 extern int optind;
104 extern char *optarg;
105 static char fname[50];
106 static FILE *files[15];
107 static FILE **filesp;
108 static FILE *infile;
109 
110 static void backsl(void);
111 static void comline(void);
112 static char *copys(char *);
113 static int eof(void);
114 static void eqn(void);
115 static void fatal(const char *, ...);
116 static void fatal_msg(char *);
117 static void getfname(void);
118 static void macro(void);
119 static FILE *opn(char *);
120 static void putmac(char *, int);
121 static void putwords(int);
122 static void regline(int, int);
123 static void sce(void);
124 static int skeqn(void);
125 static void sdis(char, char);
126 static void stbl(void);
127 static void tbl(void);
128 static void usage(void);
129 static void work(void)	__NORETURN;
130 
131 int
132 main(int ac, char **av)
133 {
134 	int i;
135 	int errflg = 0;
136 	int optchar;
137 
138 	(void) setlocale(LC_ALL, "");
139 #if !defined(TEXT_DOMAIN)
140 #define	TEXT_DOMAIN "SYS_TEST"
141 #endif
142 	(void) textdomain(TEXT_DOMAIN);
143 	argc = ac;
144 	argv = av;
145 	while ((optchar = getopt(argc, argv, "wim:")) != EOF) {
146 		switch (optchar) {
147 		case 'w':
148 			wordflag = YES;
149 			break;
150 		case 'm':
151 			msflag = YES;
152 			if (*optarg == 'm')
153 				mac = MM;
154 			else if (*optarg == 's')
155 				mac = MS;
156 			else if (*optarg == 'l')
157 				disp = 1;
158 			else
159 				errflg++;
160 			break;
161 		case 'i':
162 			iflag = YES;
163 			break;
164 		case '?':
165 			errflg++;
166 		}
167 	}
168 	if (errflg) {
169 		usage();
170 		return (1);
171 	}
172 	if (optind == argc)
173 		infile = stdin;
174 	else
175 		infile = opn(argv[optind++]);
176 	files[0] = infile;
177 	filesp = &files[0];
178 
179 	for (i = 'a'; i <= 'z'; ++i)
180 		chars[i] = LETTER;
181 	for (i = 'A'; i <= 'Z'; ++i)
182 		chars[i] = LETTER;
183 	for (i = '0'; i <= '9'; ++i)
184 		chars[i] = DIGIT;
185 	chars['\''] = APOS;
186 	chars['&'] = APOS;
187 	work();
188 	/* NOTREACHED */
189 }
190 
191 
192 static int
193 skeqn(void)
194 {
195 	while ((c = getc(infile)) != rdelim) {
196 		if (c == EOF) {
197 			c = eof();
198 		} else if (c == '"') {
199 			while ((c = getc(infile)) != '"') {
200 				if (c == EOF) {
201 					c = eof();
202 				} else if (c == '\\') {
203 					if ((c = getc(infile)) == EOF) {
204 						c = eof();
205 					}
206 				}
207 			}
208 		}
209 	}
210 	if (msflag) {
211 		return (c = 'x');
212 	}
213 	return (c = ' ');
214 }
215 
216 
217 /* Functions calling opn() should ensure 'p' is non-null */
218 static FILE *
219 opn(char *p)
220 {
221 	FILE *fd;
222 
223 	assert(p != NULL);
224 	if ((fd = fopen(p, "r")) == NULL)
225 		fatal(gettext("Cannot open file %s: %s\n"), p, strerror(errno));
226 
227 	return (fd);
228 }
229 
230 
231 
232 static int
233 eof(void)
234 {
235 	if (infile != stdin)
236 		(void) fclose(infile);
237 	if (filesp > files) {
238 		infile = *--filesp;
239 	} else if (optind < argc) {
240 		infile = opn(argv[optind++]);
241 	} else {
242 		exit(0);
243 	}
244 
245 	return (C);
246 }
247 
248 
249 
250 static void
251 getfname(void)
252 {
253 	char *p;
254 	struct chain {
255 		struct chain *nextp;
256 		char *datap;
257 	};
258 	struct chain *q;
259 	static struct chain *namechain = NULL;
260 
261 	while (C == ' ')
262 		;
263 
264 	for (p = fname; ((*p = c) != '\n') && (c != ' ') && (c != '\t') &&
265 	    (c != '\\'); ++p) {
266 		(void) C;
267 	}
268 	*p = '\0';
269 	while (c != '\n') {
270 		(void) C;
271 	}
272 
273 	/* see if this name has already been used */
274 	for (q = namechain; q; q = q->nextp)
275 		if (strcmp(fname, q->datap) != 0) {
276 			fname[0] = '\0';
277 			return;
278 		}
279 
280 	q = (struct chain *)calloc(1, sizeof (*namechain));
281 	q->nextp = namechain;
282 	q->datap = copys(fname);
283 	namechain = q;
284 }
285 
286 
287 /*
288  * Functions calling fatal() should ensure 'format' and
289  * arguments are non-null.
290  */
291 static void
292 fatal(const char *format, ...)
293 {
294 	va_list	alist;
295 
296 	assert(format != NULL);
297 	(void) fputs(gettext("deroff: "), stderr);
298 	va_start(alist, format);
299 	(void) vfprintf(stderr, format, alist);
300 	exit(1);
301 }
302 
303 /* Functions calling fatal_msg() should ensure 's' is non-null */
304 static void
305 fatal_msg(char *s)
306 {
307 	assert(s != NULL);
308 	(void) fprintf(stderr, gettext("deroff: %s\n"), s);
309 	exit(1);
310 }
311 
312 static void
313 usage(void)
314 {
315 	(void) fputs(gettext(
316 	    "usage: deroff [ -w ] [ -m (m s l) ] [ -i ] "
317 	    "[ file ] ... \n"), stderr);
318 }
319 
320 static void
321 work(void)
322 {
323 
324 	for (;;) {
325 		if ((C == '.') || (c == '\''))
326 			comline();
327 		else
328 			regline(NO, TWO);
329 	}
330 }
331 
332 
333 static void
334 regline(int macline, int cnst)
335 {
336 
337 	if (line == NULL) {
338 		if ((line = (char *)malloc(linesize * sizeof (char))) == NULL) {
339 			fatal_msg(gettext("Cannot allocate memory"));
340 		}
341 	}
342 
343 	lindx = 0;
344 	line[lindx] = c;
345 	for (;;) {
346 		if (c == '\\') {
347 			line[lindx] = ' ';
348 			backsl();
349 			if (c == '%') {	/* no blank for hyphenation char */
350 				lindx--;
351 			}
352 		}
353 		if (c == '\n') {
354 			break;
355 		}
356 		/*
357 		 * We're just about to add another character to the line
358 		 * buffer so ensure we don't overrun it.
359 		 */
360 		if (++lindx >= linesize - 1) {
361 			linesize = linesize * 2;
362 			if ((line = (char *)realloc(line,
363 			    linesize * sizeof (char))) == NULL) {
364 				fatal_msg(gettext("Cannot allocate memory"));
365 			}
366 		}
367 		if (intable && (c == 'T')) {
368 			line[lindx] = C;
369 			if ((c == '{') || (c == '}')) {
370 				line[lindx - 1] = ' ';
371 				line[lindx] = C;
372 			}
373 		} else {
374 			line[lindx] = C;
375 		}
376 	}
377 
378 	line[lindx] = '\0';
379 
380 	if (line[0] != '\0') {
381 		if (wordflag) {
382 			putwords(macline);
383 		} else if (macline) {
384 			putmac(line, cnst);
385 		} else {
386 			(void) puts(line);
387 		}
388 	}
389 }
390 
391 
392 
393 
394 static void
395 putmac(char *s, int cnst)
396 {
397 	char *t;
398 
399 	while (*s) {
400 		while ((*s == ' ') || (*s == '\t')) {
401 			(void) putchar(*s++);
402 		}
403 		for (t = s; (*t != ' ') && (*t != '\t') && (*t != '\0'); ++t)
404 			;
405 		if (*s == '\"')
406 			s++;
407 		if ((t > s + cnst) && (chars[s[0]] == LETTER) &&
408 		    (chars[s[1]] == LETTER)) {
409 			while (s < t) {
410 				if (*s == '\"')
411 					s++;
412 				else
413 					(void) putchar(*s++);
414 			}
415 		} else {
416 			s = t;
417 		}
418 	}
419 	(void) putchar('\n');
420 }
421 
422 
423 
424 static void
425 putwords(int macline)	/* break into words for -w option */
426 {
427 	char *p, *p1;
428 	int i, nlet;
429 
430 	for (p1 = line; ; ) {
431 		/* skip initial specials ampersands and apostrophes */
432 		while (chars[*p1] < DIGIT) {
433 			if (*p1++ == '\0')
434 				return;
435 		}
436 		nlet = 0;
437 		for (p = p1; (i = chars[*p]) != SPECIAL; ++p) {
438 			if (i == LETTER)
439 				++nlet;
440 		}
441 
442 		if ((!macline && (nlet > 1)) /* MDM definition of word */ ||
443 		    (macline && (nlet > 2) && (chars[p1[0]] == LETTER) &&
444 		    (chars[p1[1]] == LETTER))) {
445 			/* delete trailing ampersands and apostrophes */
446 			while ((p[-1] == '\'') || (p[-1] == '&')) {
447 				--p;
448 			}
449 			while (p1 < p) {
450 				(void) putchar(*p1++);
451 			}
452 			(void) putchar('\n');
453 		} else {
454 			p1 = p;
455 		}
456 	}
457 }
458 
459 
460 
461 static void
462 comline(void)
463 {
464 	int c1, c2;
465 
466 com:
467 	while ((C == ' ') || (c == '\t'))
468 		;
469 comx:
470 	if ((c1 = c) == '\n')
471 		return;
472 	c2 = C;
473 	if ((c1 == '.') && (c2 != '.'))
474 		inmacro = NO;
475 	if (c2 == '\n')
476 		return;
477 
478 	if ((c1 == 'E') && (c2 == 'Q') && (filesp == files)) {
479 		eqn();
480 	} else if ((c1 == 'T') && ((c2 == 'S') || (c2 == 'C') ||
481 	    (c2 == '&')) && (filesp == files)) {
482 		if (msflag) {
483 			stbl();
484 		} else {
485 			tbl();
486 		}
487 	} else if ((c1 == 'T') && (c2 == 'E')) {
488 		intable = NO;
489 	} else if (!inmacro && (c1 == 'd') && (c2 == 'e')) {
490 		macro();
491 	} else if (!inmacro && (c1 == 'i') && (c2 == 'g')) {
492 		macro();
493 	} else if (!inmacro && (c1 == 'a') && (c2 == 'm')) {
494 		macro();
495 	} else if ((c1 == 's') && (c2 == 'o')) {
496 		if (iflag) {
497 			SKIP;
498 		} else {
499 			getfname();
500 			if (fname[0]) {
501 				infile = *++filesp = opn(fname);
502 			}
503 		}
504 	} else if ((c1 == 'n') && (c2 == 'x')) {
505 		if (iflag) {
506 			SKIP;
507 		} else {
508 			getfname();
509 			if (fname[0] == '\0') {
510 				exit(0);
511 			}
512 			if (infile != stdin) {
513 				(void) fclose(infile);
514 			}
515 			infile = *filesp = opn(fname);
516 		}
517 	} else if ((c1 == 'h') && (c2 == 'w')) {
518 		SKIP;
519 	} else if (msflag && (c1 == 'T') && (c2 == 'L')) {
520 		SKIP_TO_COM;
521 		goto comx;
522 	} else if (msflag && (c1 == 'N') && (c2 == 'R')) {
523 		SKIP;
524 	} else if (msflag && (c1 == 'A') && ((c2 == 'U') || (c2 == 'I'))) {
525 		if (mac == MM) {
526 			SKIP;
527 		} else {
528 			SKIP_TO_COM;
529 			goto comx;
530 		}
531 	} else if (msflag && (c1 == 'F') && (c2 == 'S')) {
532 		SKIP_TO_COM;
533 		goto comx;
534 	} else if (msflag && (c1 == 'S') && (c2 == 'H')) {
535 		SKIP_TO_COM;
536 		goto comx;
537 	} else if (msflag && (c1 == 'N') && (c2 == 'H')) {
538 		SKIP_TO_COM;
539 		goto comx;
540 	} else if (msflag && (c1 == 'O') && (c2 == 'K')) {
541 		SKIP_TO_COM;
542 		goto comx;
543 	} else if (msflag && (c1 == 'N') && (c2 == 'D')) {
544 		SKIP;
545 	} else if (msflag && (mac == MM) && (c1 == 'H') &&
546 	    ((c2 == ' ') || (c2 == 'U'))) {
547 		SKIP;
548 	} else if (msflag && (mac == MM) && (c2 == 'L')) {
549 		if (disp || (c1 == 'R')) {
550 			sdis('L', 'E');
551 		} else {
552 			SKIP;
553 			(void) putchar('.');
554 		}
555 	} else if (msflag && ((c1 == 'D') || (c1 == 'N') ||
556 	    (c1 == 'K') || (c1 == 'P')) && (c2 == 'S')) {
557 		sdis(c1, 'E');		/* removed RS-RE */
558 	} else if (msflag && (c1 == 'K' && c2 == 'F')) {
559 		sdis(c1, 'E');
560 	} else if (msflag && (c1 == 'n') && (c2 == 'f')) {
561 		sdis('f', 'i');
562 	} else if (msflag && (c1 == 'c') && (c2 == 'e')) {
563 		sce();
564 	} else {
565 		if ((c1 == '.') && (c2 == '.')) {
566 			while (C == '.')
567 				;
568 		}
569 		++inmacro;
570 		if ((c1 <= 'Z') && msflag) {
571 			regline(YES, ONE);
572 		} else {
573 			regline(YES, TWO);
574 		}
575 		--inmacro;
576 	}
577 }
578 
579 
580 
581 static void
582 macro(void)
583 {
584 	if (msflag) {
585 		/* look for  .. */
586 		do {
587 			SKIP;
588 		} while ((C != '.') || (C != '.') || (C == '.'));
589 		if (c != '\n') {
590 			SKIP;
591 		}
592 		return;
593 	}
594 	SKIP;
595 	inmacro = YES;
596 }
597 
598 
599 
600 
601 static void
602 sdis(char a1, char a2)
603 {
604 	int c1, c2;
605 	int eqnf;
606 	int notdone = 1;
607 	eqnf = 1;
608 	SKIP;
609 	while (notdone) {
610 		while (C != '.')
611 			SKIP;
612 		if ((c1 = C) == '\n')
613 			continue;
614 		if ((c2 = C) == '\n')
615 			continue;
616 		if ((c1 == a1) && (c2 == a2)) {
617 			SKIP;
618 			if (eqnf)
619 				(void) putchar('.');
620 			(void) putchar('\n');
621 			return;
622 		} else if ((a1 == 'D') && (c1 == 'E') && (c2 == 'Q')) {
623 			eqn();
624 			eqnf = 0;
625 		} else {
626 			SKIP;
627 		}
628 	}
629 }
630 
631 static void
632 tbl(void)
633 {
634 	while (C != '.')
635 		;
636 	SKIP;
637 	intable = YES;
638 }
639 
640 static void
641 stbl(void)
642 {
643 	while (C != '.')
644 		;
645 	SKIP_TO_COM;
646 	if ((c != 'T') || (C != 'E')) {
647 		SKIP;
648 		pc = c;
649 		while ((C != '.') || (pc != '\n') ||
650 		    (C != 'T') || (C != 'E')) {
651 			pc = c;
652 		}
653 	}
654 }
655 
656 static void
657 eqn(void)
658 {
659 	int c1, c2;
660 	int dflg;
661 	int last;
662 
663 	last = 0;
664 	dflg = 1;
665 	SKIP;
666 
667 	for (;;) {
668 		if ((C1 == '.') || (c == '\'')) {
669 			while ((C1 == ' ') || (c == '\t'))
670 				;
671 			if ((c == 'E') && (C1 == 'N')) {
672 				SKIP;
673 				if (msflag && dflg) {
674 					(void) putchar('x');
675 					(void) putchar(' ');
676 					if (last) {
677 						(void) putchar('.');
678 						(void) putchar(' ');
679 					}
680 				}
681 				return;
682 			}
683 		} else if (c == 'd') {	/* look for delim */
684 			if ((C1 == 'e') && (C1 == 'l')) {
685 				if ((C1 == 'i') && (C1 == 'm')) {
686 					while (C1 == ' ')
687 						;
688 					if (((c1 = c) == '\n') ||
689 					    ((c2 = C1) == '\n') ||
690 					    ((c1 == 'o') && (c2 == 'f') &&
691 					    (C1 == 'f'))) {
692 						ldelim = NOCHAR;
693 						rdelim = NOCHAR;
694 					} else {
695 						ldelim = c1;
696 						rdelim = c2;
697 					}
698 				}
699 				dflg = 0;
700 			}
701 		}
702 
703 		if (c != '\n') {
704 			while (C1 != '\n') {
705 				if (c == '.') {
706 					last = 1;
707 				} else {
708 					last = 0;
709 				}
710 			}
711 		}
712 	}
713 }
714 
715 
716 
717 static void
718 backsl(void)	/* skip over a complete backslash construction */
719 {
720 	int bdelim;
721 
722 sw:	switch (C) {
723 	case '"':
724 		SKIP;
725 		return;
726 	case 's':
727 		if (C == '\\') {
728 			backsl();
729 		} else {
730 			while ((C >= '0') && (c <= '9'))
731 				;
732 			(void) ungetc(c, infile);
733 			c = '0';
734 		}
735 		lindx--;
736 		return;
737 
738 	case 'f':
739 	case 'n':
740 	case '*':
741 		if (C != '(')
742 			return;
743 		/* FALLTHROUGH */
744 
745 	case '(':
746 		if (C != '\n') {
747 			(void) C;
748 		}
749 		return;
750 
751 	case '$':
752 		(void) C;	/* discard argument number */
753 		return;
754 
755 	case 'b':
756 	case 'x':
757 	case 'v':
758 	case 'h':
759 	case 'w':
760 	case 'o':
761 	case 'l':
762 	case 'L':
763 		if ((bdelim = C) == '\n')
764 			return;
765 		while ((C != '\n') && (c != bdelim))
766 			if (c == '\\')
767 				backsl();
768 		return;
769 
770 	case '\\':
771 		if (inmacro)
772 			goto sw;
773 	default:
774 		return;
775 	}
776 }
777 
778 
779 
780 
781 static char *
782 copys(char *s)
783 {
784 	char *t, *t0;
785 
786 	if ((t0 = t = calloc((unsigned)(strlen(s) + 1), sizeof (*t))) == NULL)
787 		fatal_msg(gettext("Cannot allocate memory"));
788 
789 	while (*t++ = *s++)
790 		;
791 	return (t0);
792 }
793 
794 static void
795 sce(void)
796 {
797 	char *ap;
798 	int n, i;
799 	char a[10];
800 
801 	for (ap = a; C != '\n'; ap++) {
802 		*ap = c;
803 		if (ap == &a[9]) {
804 			SKIP;
805 			ap = a;
806 			break;
807 		}
808 	}
809 	if (ap != a) {
810 		n = atoi(a);
811 	} else {
812 		n = 1;
813 	}
814 	for (i = 0; i < n; ) {
815 		if (C == '.') {
816 			if (C == 'c') {
817 				if (C == 'e') {
818 					while (C == ' ')
819 						;
820 					if (c == '0') {
821 						break;
822 					} else {
823 						SKIP;
824 					}
825 				} else {
826 					SKIP;
827 				}
828 			} else {
829 				SKIP;
830 			}
831 		} else {
832 			SKIP;
833 			i++;
834 		}
835 	}
836 }
837