xref: /illumos-gate/usr/src/cmd/deroff/deroff.c (revision 10a40e179c111088c21d8e895198ac95dcb83d14)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #include <assert.h>
31 #include <errno.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <locale.h>
36 #include <sys/varargs.h>
37 
38 /*
39  * Deroff command -- strip troff, eqn, and Tbl sequences from a file.
40  * Has three flags argument, -w, to cause output one word per line
41  * rather than in the original format.
42  * -mm (or -ms) causes the corresponding macro's to be interpreted
43  * so that just sentences are output
44  * -ml  also gets rid of lists.
45  * -i causes deroff to ignore .so and .nx commands.
46  * Deroff follows .so and .nx commands, removes contents of macro
47  * definitions, equations (both .EQ ... .EN and $...$),
48  * Tbl command sequences, and Troff backslash constructions.
49  *
50  * All input is through the C macro; the most recently read character
51  * is in c.
52  */
53 
54 #define	C	((c = getc(infile)) == EOF ? eof() : \
55 		    ((c == ldelim) && (filesp == files) ? skeqn() : c))
56 #define	C1	((c = getc(infile)) == EOF ? eof() : c)
57 #define	SKIP	while (C != '\n')
58 #define	SKIP_TO_COM	SKIP; SKIP; pc = c; \
59 			while ((C != '.') || (pc != '\n') || \
60 			    (C > 'Z')) { \
61 				pc = c; \
62 			}
63 
64 #define	YES 1
65 #define	NO 0
66 #define	MS 0
67 #define	MM 1
68 #define	ONE 1
69 #define	TWO 2
70 
71 #define	NOCHAR -2
72 #define	SPECIAL 0
73 #define	APOS 1
74 #define	DIGIT 2
75 #define	LETTER 3
76 
77 #define	MAXLINESZ	512
78 
79 static int wordflag = NO;
80 static int msflag = NO;
81 static int iflag = NO;
82 static int mac = MM;
83 static int disp = 0;
84 static int inmacro = NO;
85 static int intable = NO;
86 static int lindx;
87 static size_t linesize = MAXLINESZ;
88 
89 static char chars[128];  /* SPECIAL, APOS, DIGIT, or LETTER */
90 
91 static char *line = NULL;
92 
93 static char c;
94 static int pc;
95 static int ldelim	= NOCHAR;
96 static int rdelim	= NOCHAR;
97 
98 static int argc;
99 static char **argv;
100 
101 extern int optind;
102 extern char *optarg;
103 static char fname[50];
104 static FILE *files[15];
105 static FILE **filesp;
106 static FILE *infile;
107 
108 static void backsl(void);
109 static void comline(void);
110 static char *copys(char *);
111 static int eof(void);
112 static void eqn(void);
113 static void fatal(const char *, ...);
114 static void fatal_msg(char *);
115 static void getfname(void);
116 static void macro(void);
117 static FILE *opn(char *);
118 static void putmac(char *, int);
119 static void putwords(int);
120 static void regline(int, int);
121 static void sce(void);
122 static int skeqn(void);
123 static void sdis(char, char);
124 static void stbl(void);
125 static void tbl(void);
126 static void usage(void);
127 static void work(void)	__NORETURN;
128 
129 int
130 main(int ac, char **av)
131 {
132 	int i;
133 	int errflg = 0;
134 	int optchar;
135 
136 	(void) setlocale(LC_ALL, "");
137 #if !defined(TEXT_DOMAIN)
138 #define	TEXT_DOMAIN "SYS_TEST"
139 #endif
140 	(void) textdomain(TEXT_DOMAIN);
141 	argc = ac;
142 	argv = av;
143 	while ((optchar = getopt(argc, argv, "wim:")) != EOF) {
144 		switch (optchar) {
145 		case 'w':
146 			wordflag = YES;
147 			break;
148 		case 'm':
149 			msflag = YES;
150 			if (*optarg == 'm')
151 				mac = MM;
152 			else if (*optarg == 's')
153 				mac = MS;
154 			else if (*optarg == 'l')
155 				disp = 1;
156 			else
157 				errflg++;
158 			break;
159 		case 'i':
160 			iflag = YES;
161 			break;
162 		case '?':
163 			errflg++;
164 		}
165 	}
166 	if (errflg) {
167 		usage();
168 		return (1);
169 	}
170 	if (optind == argc)
171 		infile = stdin;
172 	else
173 		infile = opn(argv[optind++]);
174 	files[0] = infile;
175 	filesp = &files[0];
176 
177 	for (i = 'a'; i <= 'z'; ++i)
178 		chars[i] = LETTER;
179 	for (i = 'A'; i <= 'Z'; ++i)
180 		chars[i] = LETTER;
181 	for (i = '0'; i <= '9'; ++i)
182 		chars[i] = DIGIT;
183 	chars['\''] = APOS;
184 	chars['&'] = APOS;
185 	work();
186 	/* NOTREACHED */
187 }
188 
189 
190 static int
191 skeqn(void)
192 {
193 	while ((c = getc(infile)) != rdelim) {
194 		if (c == EOF) {
195 			c = eof();
196 		} else if (c == '"') {
197 			while ((c = getc(infile)) != '"') {
198 				if (c == EOF) {
199 					c = eof();
200 				} else if (c == '\\') {
201 					if ((c = getc(infile)) == EOF) {
202 						c = eof();
203 					}
204 				}
205 			}
206 		}
207 	}
208 	if (msflag) {
209 		return (c = 'x');
210 	}
211 	return (c = ' ');
212 }
213 
214 
215 /* Functions calling opn() should ensure 'p' is non-null */
216 static FILE *
217 opn(char *p)
218 {
219 	FILE *fd;
220 
221 	assert(p != NULL);
222 	if ((fd = fopen(p, "r")) == NULL)
223 		fatal(gettext("Cannot open file %s: %s\n"), p, strerror(errno));
224 
225 	return (fd);
226 }
227 
228 
229 
230 static int
231 eof(void)
232 {
233 	if (infile != stdin)
234 		(void) fclose(infile);
235 	if (filesp > files) {
236 		infile = *--filesp;
237 	} else if (optind < argc) {
238 		infile = opn(argv[optind++]);
239 	} else {
240 		exit(0);
241 	}
242 
243 	return (C);
244 }
245 
246 
247 
248 static void
249 getfname(void)
250 {
251 	char *p;
252 	struct chain {
253 		struct chain *nextp;
254 		char *datap;
255 	};
256 	struct chain *q;
257 	static struct chain *namechain = NULL;
258 
259 	while (C == ' ')
260 		;
261 
262 	for (p = fname; ((*p = c) != '\n') && (c != ' ') && (c != '\t') &&
263 	    (c != '\\'); ++p) {
264 		(void) C;
265 	}
266 	*p = '\0';
267 	while (c != '\n') {
268 		(void) C;
269 	}
270 
271 	/* see if this name has already been used */
272 	for (q = namechain; q; q = q->nextp)
273 		if (strcmp(fname, q->datap) != 0) {
274 			fname[0] = '\0';
275 			return;
276 		}
277 
278 	q = (struct chain *)calloc(1, sizeof (*namechain));
279 	q->nextp = namechain;
280 	q->datap = copys(fname);
281 	namechain = q;
282 }
283 
284 
285 /*
286  * Functions calling fatal() should ensure 'format' and
287  * arguments are non-null.
288  */
289 static void
290 fatal(const char *format, ...)
291 {
292 	va_list	alist;
293 
294 	assert(format != NULL);
295 	(void) fputs(gettext("deroff: "), stderr);
296 	va_start(alist, format);
297 	(void) vfprintf(stderr, format, alist);
298 	exit(1);
299 }
300 
301 /* Functions calling fatal_msg() should ensure 's' is non-null */
302 static void
303 fatal_msg(char *s)
304 {
305 	assert(s != NULL);
306 	(void) fprintf(stderr, gettext("deroff: %s\n"), s);
307 	exit(1);
308 }
309 
310 static void
311 usage(void)
312 {
313 	(void) fputs(gettext(
314 	    "usage: deroff [ -w ] [ -m (m s l) ] [ -i ] "
315 	    "[ file ] ... \n"), stderr);
316 }
317 
318 static void
319 work(void)
320 {
321 
322 	for (;;) {
323 		if ((C == '.') || (c == '\''))
324 			comline();
325 		else
326 			regline(NO, TWO);
327 	}
328 }
329 
330 
331 static void
332 regline(int macline, int cnst)
333 {
334 
335 	if (line == NULL) {
336 		if ((line = (char *)malloc(linesize * sizeof (char))) == NULL) {
337 			fatal_msg(gettext("Cannot allocate memory"));
338 		}
339 	}
340 
341 	lindx = 0;
342 	line[lindx] = c;
343 	for (;;) {
344 		if (c == '\\') {
345 			line[lindx] = ' ';
346 			backsl();
347 			if (c == '%') {	/* no blank for hyphenation char */
348 				lindx--;
349 			}
350 		}
351 		if (c == '\n') {
352 			break;
353 		}
354 		/*
355 		 * We're just about to add another character to the line
356 		 * buffer so ensure we don't overrun it.
357 		 */
358 		if (++lindx >= linesize - 1) {
359 			linesize = linesize * 2;
360 			if ((line = (char *)realloc(line,
361 			    linesize * sizeof (char))) == NULL) {
362 				fatal_msg(gettext("Cannot allocate memory"));
363 			}
364 		}
365 		if (intable && (c == 'T')) {
366 			line[lindx] = C;
367 			if ((c == '{') || (c == '}')) {
368 				line[lindx - 1] = ' ';
369 				line[lindx] = C;
370 			}
371 		} else {
372 			line[lindx] = C;
373 		}
374 	}
375 
376 	line[lindx] = '\0';
377 
378 	if (line[0] != '\0') {
379 		if (wordflag) {
380 			putwords(macline);
381 		} else if (macline) {
382 			putmac(line, cnst);
383 		} else {
384 			(void) puts(line);
385 		}
386 	}
387 }
388 
389 
390 
391 
392 static void
393 putmac(char *s, int cnst)
394 {
395 	char *t;
396 
397 	while (*s) {
398 		while ((*s == ' ') || (*s == '\t')) {
399 			(void) putchar(*s++);
400 		}
401 		for (t = s; (*t != ' ') && (*t != '\t') && (*t != '\0'); ++t)
402 			;
403 		if (*s == '\"')
404 			s++;
405 		if ((t > s + cnst) && (chars[s[0]] == LETTER) &&
406 		    (chars[s[1]] == LETTER)) {
407 			while (s < t) {
408 				if (*s == '\"')
409 					s++;
410 				else
411 					(void) putchar(*s++);
412 			}
413 		} else {
414 			s = t;
415 		}
416 	}
417 	(void) putchar('\n');
418 }
419 
420 
421 
422 static void
423 putwords(int macline)	/* break into words for -w option */
424 {
425 	char *p, *p1;
426 	int i, nlet;
427 
428 	for (p1 = line; ; ) {
429 		/* skip initial specials ampersands and apostrophes */
430 		while (chars[*p1] < DIGIT) {
431 			if (*p1++ == '\0')
432 				return;
433 		}
434 		nlet = 0;
435 		for (p = p1; (i = chars[*p]) != SPECIAL; ++p) {
436 			if (i == LETTER)
437 				++nlet;
438 		}
439 
440 		if ((!macline && (nlet > 1)) /* MDM definition of word */ ||
441 		    (macline && (nlet > 2) && (chars[p1[0]] == LETTER) &&
442 		    (chars[p1[1]] == LETTER))) {
443 			/* delete trailing ampersands and apostrophes */
444 			while ((p[-1] == '\'') || (p[-1] == '&')) {
445 				--p;
446 			}
447 			while (p1 < p) {
448 				(void) putchar(*p1++);
449 			}
450 			(void) putchar('\n');
451 		} else {
452 			p1 = p;
453 		}
454 	}
455 }
456 
457 
458 
459 static void
460 comline(void)
461 {
462 	int c1, c2;
463 
464 com:
465 	while ((C == ' ') || (c == '\t'))
466 		;
467 comx:
468 	if ((c1 = c) == '\n')
469 		return;
470 	c2 = C;
471 	if ((c1 == '.') && (c2 != '.'))
472 		inmacro = NO;
473 	if (c2 == '\n')
474 		return;
475 
476 	if ((c1 == 'E') && (c2 == 'Q') && (filesp == files)) {
477 		eqn();
478 	} else if ((c1 == 'T') && ((c2 == 'S') || (c2 == 'C') ||
479 	    (c2 == '&')) && (filesp == files)) {
480 		if (msflag) {
481 			stbl();
482 		} else {
483 			tbl();
484 		}
485 	} else if ((c1 == 'T') && (c2 == 'E')) {
486 		intable = NO;
487 	} else if (!inmacro && (c1 == 'd') && (c2 == 'e')) {
488 		macro();
489 	} else if (!inmacro && (c1 == 'i') && (c2 == 'g')) {
490 		macro();
491 	} else if (!inmacro && (c1 == 'a') && (c2 == 'm')) {
492 		macro();
493 	} else if ((c1 == 's') && (c2 == 'o')) {
494 		if (iflag) {
495 			SKIP;
496 		} else {
497 			getfname();
498 			if (fname[0]) {
499 				infile = *++filesp = opn(fname);
500 			}
501 		}
502 	} else if ((c1 == 'n') && (c2 == 'x')) {
503 		if (iflag) {
504 			SKIP;
505 		} else {
506 			getfname();
507 			if (fname[0] == '\0') {
508 				exit(0);
509 			}
510 			if (infile != stdin) {
511 				(void) fclose(infile);
512 			}
513 			infile = *filesp = opn(fname);
514 		}
515 	} else if ((c1 == 'h') && (c2 == 'w')) {
516 		SKIP;
517 	} else if (msflag && (c1 == 'T') && (c2 == 'L')) {
518 		SKIP_TO_COM;
519 		goto comx;
520 	} else if (msflag && (c1 == 'N') && (c2 == 'R')) {
521 		SKIP;
522 	} else if (msflag && (c1 == 'A') && ((c2 == 'U') || (c2 == 'I'))) {
523 		if (mac == MM) {
524 			SKIP;
525 		} else {
526 			SKIP_TO_COM;
527 			goto comx;
528 		}
529 	} else if (msflag && (c1 == 'F') && (c2 == 'S')) {
530 		SKIP_TO_COM;
531 		goto comx;
532 	} else if (msflag && (c1 == 'S') && (c2 == 'H')) {
533 		SKIP_TO_COM;
534 		goto comx;
535 	} else if (msflag && (c1 == 'N') && (c2 == 'H')) {
536 		SKIP_TO_COM;
537 		goto comx;
538 	} else if (msflag && (c1 == 'O') && (c2 == 'K')) {
539 		SKIP_TO_COM;
540 		goto comx;
541 	} else if (msflag && (c1 == 'N') && (c2 == 'D')) {
542 		SKIP;
543 	} else if (msflag && (mac == MM) && (c1 == 'H') &&
544 	    ((c2 == ' ') || (c2 == 'U'))) {
545 		SKIP;
546 	} else if (msflag && (mac == MM) && (c2 == 'L')) {
547 		if (disp || (c1 == 'R')) {
548 			sdis('L', 'E');
549 		} else {
550 			SKIP;
551 			(void) putchar('.');
552 		}
553 	} else if (msflag && ((c1 == 'D') || (c1 == 'N') ||
554 	    (c1 == 'K') || (c1 == 'P')) && (c2 == 'S')) {
555 		sdis(c1, 'E');		/* removed RS-RE */
556 	} else if (msflag && (c1 == 'K' && c2 == 'F')) {
557 		sdis(c1, 'E');
558 	} else if (msflag && (c1 == 'n') && (c2 == 'f')) {
559 		sdis('f', 'i');
560 	} else if (msflag && (c1 == 'c') && (c2 == 'e')) {
561 		sce();
562 	} else {
563 		if ((c1 == '.') && (c2 == '.')) {
564 			while (C == '.')
565 				;
566 		}
567 		++inmacro;
568 		if ((c1 <= 'Z') && msflag) {
569 			regline(YES, ONE);
570 		} else {
571 			regline(YES, TWO);
572 		}
573 		--inmacro;
574 	}
575 }
576 
577 
578 
579 static void
580 macro(void)
581 {
582 	if (msflag) {
583 		/* look for  .. */
584 		do {
585 			SKIP;
586 		} while ((C != '.') || (C != '.') || (C == '.'));
587 		if (c != '\n') {
588 			SKIP;
589 		}
590 		return;
591 	}
592 	SKIP;
593 	inmacro = YES;
594 }
595 
596 
597 
598 
599 static void
600 sdis(char a1, char a2)
601 {
602 	int c1, c2;
603 	int eqnf;
604 	int notdone = 1;
605 	eqnf = 1;
606 	SKIP;
607 	while (notdone) {
608 		while (C != '.')
609 			SKIP;
610 		if ((c1 = C) == '\n')
611 			continue;
612 		if ((c2 = C) == '\n')
613 			continue;
614 		if ((c1 == a1) && (c2 == a2)) {
615 			SKIP;
616 			if (eqnf)
617 				(void) putchar('.');
618 			(void) putchar('\n');
619 			return;
620 		} else if ((a1 == 'D') && (c1 == 'E') && (c2 == 'Q')) {
621 			eqn();
622 			eqnf = 0;
623 		} else {
624 			SKIP;
625 		}
626 	}
627 }
628 
629 static void
630 tbl(void)
631 {
632 	while (C != '.')
633 		;
634 	SKIP;
635 	intable = YES;
636 }
637 
638 static void
639 stbl(void)
640 {
641 	while (C != '.')
642 		;
643 	SKIP_TO_COM;
644 	if ((c != 'T') || (C != 'E')) {
645 		SKIP;
646 		pc = c;
647 		while ((C != '.') || (pc != '\n') ||
648 		    (C != 'T') || (C != 'E')) {
649 			pc = c;
650 		}
651 	}
652 }
653 
654 static void
655 eqn(void)
656 {
657 	int c1, c2;
658 	int dflg;
659 	int last;
660 
661 	last = 0;
662 	dflg = 1;
663 	SKIP;
664 
665 	for (;;) {
666 		if ((C1 == '.') || (c == '\'')) {
667 			while ((C1 == ' ') || (c == '\t'))
668 				;
669 			if ((c == 'E') && (C1 == 'N')) {
670 				SKIP;
671 				if (msflag && dflg) {
672 					(void) putchar('x');
673 					(void) putchar(' ');
674 					if (last) {
675 						(void) putchar('.');
676 						(void) putchar(' ');
677 					}
678 				}
679 				return;
680 			}
681 		} else if (c == 'd') {	/* look for delim */
682 			if ((C1 == 'e') && (C1 == 'l')) {
683 				if ((C1 == 'i') && (C1 == 'm')) {
684 					while (C1 == ' ')
685 						;
686 					if (((c1 = c) == '\n') ||
687 					    ((c2 = C1) == '\n') ||
688 					    ((c1 == 'o') && (c2 == 'f') &&
689 					    (C1 == 'f'))) {
690 						ldelim = NOCHAR;
691 						rdelim = NOCHAR;
692 					} else {
693 						ldelim = c1;
694 						rdelim = c2;
695 					}
696 				}
697 				dflg = 0;
698 			}
699 		}
700 
701 		if (c != '\n') {
702 			while (C1 != '\n') {
703 				if (c == '.') {
704 					last = 1;
705 				} else {
706 					last = 0;
707 				}
708 			}
709 		}
710 	}
711 }
712 
713 
714 
715 static void
716 backsl(void)	/* skip over a complete backslash construction */
717 {
718 	int bdelim;
719 
720 sw:	switch (C) {
721 	case '"':
722 		SKIP;
723 		return;
724 	case 's':
725 		if (C == '\\') {
726 			backsl();
727 		} else {
728 			while ((C >= '0') && (c <= '9'))
729 				;
730 			(void) ungetc(c, infile);
731 			c = '0';
732 		}
733 		lindx--;
734 		return;
735 
736 	case 'f':
737 	case 'n':
738 	case '*':
739 		if (C != '(')
740 			return;
741 		/* FALLTHROUGH */
742 
743 	case '(':
744 		if (C != '\n') {
745 			(void) C;
746 		}
747 		return;
748 
749 	case '$':
750 		(void) C;	/* discard argument number */
751 		return;
752 
753 	case 'b':
754 	case 'x':
755 	case 'v':
756 	case 'h':
757 	case 'w':
758 	case 'o':
759 	case 'l':
760 	case 'L':
761 		if ((bdelim = C) == '\n')
762 			return;
763 		while ((C != '\n') && (c != bdelim))
764 			if (c == '\\')
765 				backsl();
766 		return;
767 
768 	case '\\':
769 		if (inmacro)
770 			goto sw;
771 	default:
772 		return;
773 	}
774 }
775 
776 
777 
778 
779 static char *
780 copys(char *s)
781 {
782 	char *t, *t0;
783 
784 	if ((t0 = t = calloc((unsigned)(strlen(s) + 1), sizeof (*t))) == NULL)
785 		fatal_msg(gettext("Cannot allocate memory"));
786 
787 	while (*t++ = *s++)
788 		;
789 	return (t0);
790 }
791 
792 static void
793 sce(void)
794 {
795 	char *ap;
796 	int n, i;
797 	char a[10];
798 
799 	for (ap = a; C != '\n'; ap++) {
800 		*ap = c;
801 		if (ap == &a[9]) {
802 			SKIP;
803 			ap = a;
804 			break;
805 		}
806 	}
807 	if (ap != a) {
808 		n = atoi(a);
809 	} else {
810 		n = 1;
811 	}
812 	for (i = 0; i < n; ) {
813 		if (C == '.') {
814 			if (C == 'c') {
815 				if (C == 'e') {
816 					while (C == ' ')
817 						;
818 					if (c == '0') {
819 						break;
820 					} else {
821 						SKIP;
822 					}
823 				} else {
824 					SKIP;
825 				}
826 			} else {
827 				SKIP;
828 			}
829 		} else {
830 			SKIP;
831 			i++;
832 		}
833 	}
834 }
835