xref: /freebsd/usr.bin/m4/main.c (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1 /*	$OpenBSD: main.c,v 1.87 2017/06/15 13:48:42 bcallah Exp $	*/
2 /*	$NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $	*/
3 
4 /*-
5  * SPDX-License-Identifier: BSD-3-Clause
6  *
7  * Copyright (c) 1989, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Ozan Yigit at York University.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 /*
39  * main.c
40  * Facility: m4 macro processor
41  * by: oz
42  */
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
45 
46 #include <assert.h>
47 #include <signal.h>
48 #include <err.h>
49 #include <errno.h>
50 #include <getopt.h>
51 #include <unistd.h>
52 #include <stdio.h>
53 #include <ctype.h>
54 #include <string.h>
55 #include <stddef.h>
56 #include <stdint.h>
57 #include <stdlib.h>
58 #include <ohash.h>
59 #include "mdef.h"
60 #include "stdd.h"
61 #include "extern.h"
62 #include "pathnames.h"
63 
64 static const char *shortopts = "+D:d::EGgI:o:Pst:U:";
65 static const struct option longopts[] = {
66 	{ "define",		required_argument,	NULL,	'D' },
67 	{ "debug",		optional_argument,	NULL,	'd' },
68 	{ "fatal-warnings",	no_argument,		NULL,	'E' },
69 	{ "traditional",	no_argument,		NULL,	'G' },
70 	{ "gnu",		no_argument,		NULL,	'g' },
71 	{ "include",		required_argument,	NULL,	'I' },
72 	{ "error-output",	required_argument,	NULL,	'o' },
73 	{ "prefix-builtins",	no_argument,		NULL,	'P' },
74 	{ "synclines",		no_argument,		NULL,	's' },
75 	{ "trace",		required_argument,	NULL,	't' },
76 	{ "undefine",		required_argument,	NULL,	'U' },
77 	{ NULL, 0, NULL, 0 },
78 };
79 
80 stae *mstack;			/* stack of m4 machine         */
81 char *sstack;			/* shadow stack, for string space extension */
82 static size_t STACKMAX;		/* current maximum size of stack */
83 int sp;				/* current m4  stack pointer   */
84 int fp;				/* m4 call frame pointer       */
85 struct input_file infile[MAXINP];/* input file stack (0=stdin)  */
86 FILE **outfile;			/* diversion array(0=bitbucket)*/
87 int maxout;
88 FILE *active;			/* active output file pointer  */
89 int ilevel = 0;			/* input file stack pointer    */
90 int oindex = 0;			/* diversion index..	       */
91 const char *null = "";                /* as it says.. just a null..  */
92 char **m4wraps = NULL;		/* m4wraps array.	       */
93 int maxwraps = 0;		/* size of m4wraps array       */
94 int wrapindex = 0;		/* current offset in m4wraps   */
95 char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
96 char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
97 char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
98 char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
99 int  synch_lines = 0;		/* line synchronisation for C preprocessor */
100 int  prefix_builtins = 0;	/* -P option to prefix builtin keywords */
101 int  error_warns = 0;		/* -E option to make warnings exit_code = 1 */
102 int  fatal_warns = 0;		/* -E -E option to make warnings fatal */
103 
104 struct keyblk {
105         const char    *knam;          /* keyword name */
106         int     ktyp;           /* keyword type */
107 };
108 
109 static struct keyblk keywrds[] = {	/* m4 keywords to be installed */
110 	{ "include",      INCLTYPE },
111 	{ "sinclude",     SINCTYPE },
112 	{ "define",       DEFITYPE },
113 	{ "defn",         DEFNTYPE },
114 	{ "divert",       DIVRTYPE | NOARGS },
115 	{ "expr",         EXPRTYPE },
116 	{ "eval",         EXPRTYPE },
117 	{ "substr",       SUBSTYPE },
118 	{ "ifelse",       IFELTYPE },
119 	{ "ifdef",        IFDFTYPE },
120 	{ "len",          LENGTYPE },
121 	{ "incr",         INCRTYPE },
122 	{ "decr",         DECRTYPE },
123 	{ "dnl",          DNLNTYPE | NOARGS },
124 	{ "changequote",  CHNQTYPE | NOARGS },
125 	{ "changecom",    CHNCTYPE | NOARGS },
126 	{ "index",        INDXTYPE },
127 #ifdef EXTENDED
128 	{ "paste",        PASTTYPE },
129 	{ "spaste",       SPASTYPE },
130 	/* Newer extensions, needed to handle gnu-m4 scripts */
131 	{ "indir",        INDIRTYPE},
132 	{ "builtin",      BUILTINTYPE},
133 	{ "patsubst",	  PATSTYPE},
134 	{ "regexp",	  REGEXPTYPE},
135 	{ "esyscmd",	  ESYSCMDTYPE},
136 	{ "__file__",	  FILENAMETYPE | NOARGS},
137 	{ "__line__",	  LINETYPE | NOARGS},
138 #endif
139 	{ "popdef",       POPDTYPE },
140 	{ "pushdef",      PUSDTYPE },
141 	{ "dumpdef",      DUMPTYPE | NOARGS },
142 	{ "shift",        SHIFTYPE | NOARGS },
143 	{ "translit",     TRNLTYPE },
144 	{ "undefine",     UNDFTYPE },
145 	{ "undivert",     UNDVTYPE | NOARGS },
146 	{ "divnum",       DIVNTYPE | NOARGS },
147 	{ "maketemp",     MKTMTYPE },
148 	{ "mkstemp",      MKTMTYPE },
149 	{ "errprint",     ERRPTYPE | NOARGS },
150 	{ "m4wrap",       M4WRTYPE | NOARGS },
151 	{ "m4exit",       EXITTYPE | NOARGS },
152 	{ "syscmd",       SYSCTYPE },
153 	{ "sysval",       SYSVTYPE | NOARGS },
154 	{ "traceon",	  TRACEONTYPE | NOARGS },
155 	{ "traceoff",	  TRACEOFFTYPE | NOARGS },
156 
157 	{ "unix",         SELFTYPE | NOARGS },
158 };
159 
160 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
161 
162 extern int optind;
163 extern char *optarg;
164 
165 #define MAXRECORD 50
166 static struct position {
167 	char *name;
168 	unsigned long line;
169 } quotes[MAXRECORD], paren[MAXRECORD];
170 
171 static void record(struct position *, int);
172 static void dump_stack(struct position *, int);
173 
174 static void macro(void);
175 static void initkwds(void);
176 static ndptr inspect(int, char *);
177 static int do_look_ahead(int, const char *);
178 static void reallyoutputstr(const char *);
179 static void reallyputchar(int);
180 
181 static void enlarge_stack(void);
182 
183 int main(int, char *[]);
184 
185 int exit_code = 0;
186 
187 int
188 main(int argc, char *argv[])
189 {
190 	int c;
191 	int n;
192 	char *p;
193 
194 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
195 		signal(SIGINT, onintr);
196 
197 	init_macros();
198 	initspaces();
199 	STACKMAX = INITSTACKMAX;
200 
201 	mstack = xreallocarray(NULL, STACKMAX, sizeof(stae), NULL);
202 	sstack = xalloc(STACKMAX, NULL);
203 
204 	maxout = 0;
205 	outfile = NULL;
206 	resizedivs(MAXOUT);
207 
208 	while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1)
209 		switch(c) {
210 
211 		case 'D':               /* define something..*/
212 			for (p = optarg; *p; p++)
213 				if (*p == '=')
214 					break;
215 			if (*p)
216 				*p++ = EOS;
217 			dodefine(optarg, p);
218 			break;
219 		case 'E':               /* like GNU m4 1.4.9+ */
220 			if (error_warns == 0)
221 				error_warns = 1;
222 			else
223 				fatal_warns = 1;
224 			break;
225 		case 'I':
226 			addtoincludepath(optarg);
227 			break;
228 		case 'P':
229 			prefix_builtins = 1;
230 			break;
231 		case 'U':               /* undefine...       */
232 			macro_popdef(optarg);
233 			break;
234 		case 'G':
235 			mimic_gnu = 0;
236 			break;
237 		case 'g':
238 			mimic_gnu = 1;
239 			break;
240 		case 'd':
241 			set_trace_flags(optarg ? optarg : "aeq");
242 			break;
243 		case 's':
244 			synch_lines = 1;
245 			break;
246 		case 't':
247 			mark_traced(optarg, 1);
248 			break;
249 		case 'o':
250 			trace_file(optarg);
251                         break;
252 		case '?':
253 			usage();
254 		}
255 
256         argc -= optind;
257         argv += optind;
258 
259 	initkwds();
260 	if (mimic_gnu)
261 		setup_builtin("format", FORMATTYPE);
262 
263 	active = stdout;		/* default active output     */
264 	bbase[0] = bufbase;
265         if (!argc) {
266 		sp = -1;		/* stack pointer initialized */
267 		fp = 0;			/* frame pointer initialized */
268 		set_input(infile+0, stdin, "stdin");
269 					/* default input (naturally) */
270 		macro();
271 	} else
272 		for (; argc--; ++argv) {
273 			p = *argv;
274 			if (p[0] == '-' && p[1] == EOS)
275 				set_input(infile, stdin, "stdin");
276 			else if (fopen_trypath(infile, p) == NULL)
277 				err(1, "%s", p);
278 			sp = -1;
279 			fp = 0;
280 			macro();
281 			release_input(infile);
282 		}
283 
284 	if (wrapindex) {
285 		int i;
286 
287 		ilevel = 0;		/* in case m4wrap includes.. */
288 		bufbase = bp = buf;	/* use the entire buffer   */
289 		if (mimic_gnu) {
290 			while (wrapindex != 0) {
291 				for (i = 0; i < wrapindex; i++)
292 					pbstr(m4wraps[i]);
293 				wrapindex =0;
294 				macro();
295 			}
296 		} else {
297 			for (i = 0; i < wrapindex; i++) {
298 				pbstr(m4wraps[i]);
299 				macro();
300 			}
301 		}
302 	}
303 
304 	if (active != stdout)
305 		active = stdout;	/* reset output just in case */
306 	for (n = 1; n < maxout; n++)	/* default wrap-up: undivert */
307 		if (outfile[n] != NULL)
308 			getdiv(n);
309 					/* remove bitbucket if used  */
310 	if (outfile[0] != NULL) {
311 		(void) fclose(outfile[0]);
312 	}
313 
314 	return exit_code;
315 }
316 
317 /*
318  * Look ahead for `token'.
319  * (on input `t == token[0]')
320  * Used for comment and quoting delimiters.
321  * Returns 1 if `token' present; copied to output.
322  *         0 if `token' not found; all characters pushed back
323  */
324 static int
325 do_look_ahead(int t, const char *token)
326 {
327 	int i;
328 
329 	assert((unsigned char)t == (unsigned char)token[0]);
330 
331 	for (i = 1; *++token; i++) {
332 		t = gpbc();
333 		if (t == EOF || (unsigned char)t != (unsigned char)*token) {
334 			pushback(t);
335 			while (--i)
336 				pushback(*--token);
337 			return 0;
338 		}
339 	}
340 	return 1;
341 }
342 
343 #define LOOK_AHEAD(t, token) (t != EOF &&		\
344     (unsigned char)(t)==(unsigned char)(token)[0] &&	\
345     do_look_ahead(t,token))
346 
347 /*
348  * macro - the work horse..
349  */
350 static void
351 macro(void)
352 {
353 	char token[MAXTOK+1];
354 	int t, l;
355 	ndptr p;
356 	int  nlpar;
357 
358 	cycle {
359 		t = gpbc();
360 
361 		if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
362 			nlpar = 0;
363 			record(quotes, nlpar++);
364 			/*
365 			 * Opening quote: scan forward until matching
366 			 * closing quote has been found.
367 			 */
368 			do {
369 
370 				l = gpbc();
371 				if (LOOK_AHEAD(l,rquote)) {
372 					if (--nlpar > 0)
373 						outputstr(rquote);
374 				} else if (LOOK_AHEAD(l,lquote)) {
375 					record(quotes, nlpar++);
376 					outputstr(lquote);
377 				} else if (l == EOF) {
378 					if (nlpar == 1)
379 						warnx("unclosed quote:");
380 					else
381 						warnx("%d unclosed quotes:", nlpar);
382 					dump_stack(quotes, nlpar);
383 					exit(1);
384 				} else {
385 					if (nlpar > 0) {
386 						if (sp < 0)
387 							reallyputchar(l);
388 						else
389 							CHRSAVE(l);
390 					}
391 				}
392 			}
393 			while (nlpar != 0);
394 		} else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
395 			reallyoutputstr(scommt);
396 
397 			for(;;) {
398 				t = gpbc();
399 				if (LOOK_AHEAD(t, ecommt)) {
400 					reallyoutputstr(ecommt);
401 					break;
402 				}
403 				if (t == EOF)
404 					break;
405 				reallyputchar(t);
406 			}
407 		} else if (t == '_' || isalpha(t)) {
408 			p = inspect(t, token);
409 			if (p != NULL)
410 				pushback(l = gpbc());
411 			if (p == NULL || (l != LPAREN &&
412 			    (macro_getdef(p)->type & NEEDARGS) != 0))
413 				outputstr(token);
414 			else {
415 		/*
416 		 * real thing.. First build a call frame:
417 		 */
418 				pushf(fp);	/* previous call frm */
419 				pushf(macro_getdef(p)->type); /* type of the call  */
420 				pushf(is_traced(p));
421 				pushf(0);	/* parenthesis level */
422 				fp = sp;	/* new frame pointer */
423 		/*
424 		 * now push the string arguments:
425 		 */
426 				pushdef(p);			/* defn string */
427 				pushs1((char *)macro_name(p));	/* macro name  */
428 				pushs(ep);			/* start next..*/
429 
430 				if (l != LPAREN && PARLEV == 0) {
431 				    /* no bracks  */
432 					chrsave(EOS);
433 
434 					if (sp == (int)STACKMAX)
435 						errx(1, "internal stack overflow");
436 					eval((const char **) mstack+fp+1, 2,
437 					    CALTYP, TRACESTATUS);
438 
439 					ep = PREVEP;	/* flush strspace */
440 					sp = PREVSP;	/* previous sp..  */
441 					fp = PREVFP;	/* rewind stack...*/
442 				}
443 			}
444 		} else if (t == EOF) {
445 			if (!mimic_gnu /* you can puke right there */
446 			    && sp > -1 && ilevel <= 0) {
447 				warnx( "unexpected end of input, unclosed parenthesis:");
448 				dump_stack(paren, PARLEV);
449 				exit(1);
450 			}
451 			if (ilevel <= 0)
452 				break;			/* all done thanks.. */
453 			release_input(infile+ilevel--);
454 			emit_synchline();
455 			bufbase = bbase[ilevel];
456 			continue;
457 		} else if (sp < 0) {		/* not in a macro at all */
458 			reallyputchar(t);	/* output directly..	 */
459 		}
460 
461 		else switch(t) {
462 
463 		case LPAREN:
464 			if (PARLEV > 0)
465 				chrsave(t);
466 			while (isspace(l = gpbc())) /* skip blank, tab, nl.. */
467 				if (PARLEV > 0)
468 					chrsave(l);
469 			pushback(l);
470 			record(paren, PARLEV++);
471 			break;
472 
473 		case RPAREN:
474 			if (--PARLEV > 0)
475 				chrsave(t);
476 			else {			/* end of argument list */
477 				chrsave(EOS);
478 
479 				if (sp == (int)STACKMAX)
480 					errx(1, "internal stack overflow");
481 
482 				eval((const char **) mstack+fp+1, sp-fp,
483 				    CALTYP, TRACESTATUS);
484 
485 				ep = PREVEP;	/* flush strspace */
486 				sp = PREVSP;	/* previous sp..  */
487 				fp = PREVFP;	/* rewind stack...*/
488 			}
489 			break;
490 
491 		case COMMA:
492 			if (PARLEV == 1) {
493 				chrsave(EOS);		/* new argument   */
494 				while (isspace(l = gpbc()))
495 					;
496 				pushback(l);
497 				pushs(ep);
498 			} else
499 				chrsave(t);
500 			break;
501 
502 		default:
503 			if (LOOK_AHEAD(t, scommt)) {
504 				char *p;
505 				for (p = scommt; *p; p++)
506 					chrsave(*p);
507 				for(;;) {
508 					t = gpbc();
509 					if (LOOK_AHEAD(t, ecommt)) {
510 						for (p = ecommt; *p; p++)
511 							chrsave(*p);
512 						break;
513 					}
514 					if (t == EOF)
515 					    break;
516 					CHRSAVE(t);
517 				}
518 			} else
519 				CHRSAVE(t);		/* stack the char */
520 			break;
521 		}
522 	}
523 }
524 
525 /*
526  * output string directly, without pushing it for reparses.
527  */
528 void
529 outputstr(const char *s)
530 {
531 	if (sp < 0)
532 		reallyoutputstr(s);
533 	else
534 		while (*s)
535 			CHRSAVE(*s++);
536 }
537 
538 void
539 reallyoutputstr(const char *s)
540 {
541 	if (synch_lines) {
542 		while (*s) {
543 			fputc(*s, active);
544 			if (*s++ == '\n') {
545 				infile[ilevel].synch_lineno++;
546 				if (infile[ilevel].synch_lineno !=
547 				    infile[ilevel].lineno)
548 					do_emit_synchline();
549 			}
550 		}
551 	} else
552 		fputs(s, active);
553 }
554 
555 void
556 reallyputchar(int c)
557 {
558 	putc(c, active);
559 	if (synch_lines && c == '\n') {
560 		infile[ilevel].synch_lineno++;
561 		if (infile[ilevel].synch_lineno != infile[ilevel].lineno)
562 			do_emit_synchline();
563 	}
564 }
565 
566 /*
567  * build an input token..
568  * consider only those starting with _ or A-Za-z.
569  */
570 static ndptr
571 inspect(int c, char *tp)
572 {
573 	char *name = tp;
574 	char *etp = tp+MAXTOK;
575 	ndptr p;
576 
577 	*tp++ = c;
578 
579 	while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
580 		*tp++ = c;
581 	if (c != EOF)
582 		PUSHBACK(c);
583 	*tp = EOS;
584 	/* token is too long, it won't match anything, but it can still
585 	 * be output. */
586 	if (tp == ep) {
587 		outputstr(name);
588 		while (isalnum(c = gpbc()) || c == '_') {
589 			if (sp < 0)
590 				reallyputchar(c);
591 			else
592 				CHRSAVE(c);
593 		}
594 		*name = EOS;
595 		return NULL;
596 	}
597 
598 	p = ohash_find(&macros, ohash_qlookupi(&macros, name, (const char **)&tp));
599 	if (p == NULL)
600 		return NULL;
601 	if (macro_getdef(p) == NULL)
602 		return NULL;
603 	return p;
604 }
605 
606 /*
607  * initkwds - initialise m4 keywords as fast as possible.
608  * This very similar to install, but without certain overheads,
609  * such as calling lookup. Malloc is not used for storing the
610  * keyword strings, since we simply use the static pointers
611  * within keywrds block.
612  */
613 static void
614 initkwds(void)
615 {
616 	unsigned int type;
617 	int i;
618 
619 	for (i = 0; i < (int)MAXKEYS; i++) {
620 		type = keywrds[i].ktyp & TYPEMASK;
621 		if ((keywrds[i].ktyp & NOARGS) == 0)
622 			type |= NEEDARGS;
623 		setup_builtin(keywrds[i].knam, type);
624 	}
625 }
626 
627 static void
628 record(struct position *t, int lev)
629 {
630 	if (lev < MAXRECORD) {
631 		t[lev].name = CURRENT_NAME;
632 		t[lev].line = CURRENT_LINE;
633 	}
634 }
635 
636 static void
637 dump_stack(struct position *t, int lev)
638 {
639 	int i;
640 
641 	for (i = 0; i < lev; i++) {
642 		if (i == MAXRECORD) {
643 			fprintf(stderr, "   ...\n");
644 			break;
645 		}
646 		fprintf(stderr, "   %s at line %lu\n",
647 			t[i].name, t[i].line);
648 	}
649 }
650 
651 
652 static void
653 enlarge_stack(void)
654 {
655 	STACKMAX += STACKMAX/2;
656 	mstack = xreallocarray(mstack, STACKMAX, sizeof(stae),
657 	    "Evaluation stack overflow (%lu)",
658 	    (unsigned long)STACKMAX);
659 	sstack = xrealloc(sstack, STACKMAX,
660 	    "Evaluation stack overflow (%lu)",
661 	    (unsigned long)STACKMAX);
662 }
663