1 /* $OpenBSD: main.c,v 1.53 2002/04/26 16:15:16 espie Exp $ */ 2 /* $NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $ */ 3 4 /*- 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Ozan Yigit at York University. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #ifndef lint 37 #if 0 38 static char copyright[] = 39 "@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41 #endif 42 #endif /* not lint */ 43 44 #ifndef lint 45 #if 0 46 static char sccsid[] = "@(#)main.c 8.1 (Berkeley) 6/6/93"; 47 #else 48 #if 0 49 static char rcsid[] = "$OpenBSD: main.c,v 1.53 2002/04/26 16:15:16 espie Exp $"; 50 #endif 51 #endif 52 #endif /* not lint */ 53 54 #include <sys/cdefs.h> 55 __FBSDID("$FreeBSD$"); 56 57 /* 58 * main.c 59 * Facility: m4 macro processor 60 * by: oz 61 */ 62 63 #include <sys/types.h> 64 #include <assert.h> 65 #include <signal.h> 66 #include <errno.h> 67 #include <unistd.h> 68 #include <stdio.h> 69 #include <ctype.h> 70 #include <string.h> 71 #include <stddef.h> 72 #include <stdlib.h> 73 #include <err.h> 74 #include <locale.h> 75 #include "mdef.h" 76 #include "stdd.h" 77 #include "extern.h" 78 #include "pathnames.h" 79 80 ndptr hashtab[HASHSIZE]; /* hash table for macros etc. */ 81 stae *mstack; /* stack of m4 machine */ 82 char *sstack; /* shadow stack, for string space extension */ 83 static size_t STACKMAX; /* current maximum size of stack */ 84 int sp; /* current m4 stack pointer */ 85 int fp; /* m4 call frame pointer */ 86 struct input_file infile[MAXINP];/* input file stack (0=stdin) */ 87 char *inname[MAXINP]; /* names of these input files */ 88 int inlineno[MAXINP]; /* current number in each input file */ 89 FILE **outfile; /* diversion array(0=bitbucket)*/ 90 int maxout; 91 FILE *active; /* active output file pointer */ 92 int ilevel = 0; /* input file stack pointer */ 93 int oindex = 0; /* diversion index.. */ 94 char null[] = ""; /* as it says.. just a null.. */ 95 const char *m4wraps = ""; /* m4wrap string default.. */ 96 char lquote[MAXCCHARS+1] = {LQUOTE}; /* left quote character (`) */ 97 char rquote[MAXCCHARS+1] = {RQUOTE}; /* right quote character (') */ 98 char scommt[MAXCCHARS+1] = {SCOMMT}; /* start character for comment */ 99 char ecommt[MAXCCHARS+1] = {ECOMMT}; /* end character for comment */ 100 int synccpp; /* Line synchronisation for C preprocessor */ 101 102 static const struct keyblk keywrds[] = { /* m4 keywords to be installed */ 103 { "include", INCLTYPE }, 104 { "sinclude", SINCTYPE }, 105 { "define", DEFITYPE }, 106 { "defn", DEFNTYPE }, 107 { "divert", DIVRTYPE | NOARGS }, 108 { "expr", EXPRTYPE }, 109 { "eval", EXPRTYPE }, 110 { "substr", SUBSTYPE }, 111 { "ifelse", IFELTYPE }, 112 { "ifdef", IFDFTYPE }, 113 { "len", LENGTYPE }, 114 { "incr", INCRTYPE }, 115 { "decr", DECRTYPE }, 116 { "dnl", DNLNTYPE | NOARGS }, 117 { "changequote", CHNQTYPE | NOARGS }, 118 { "changecom", CHNCTYPE | NOARGS }, 119 { "index", INDXTYPE }, 120 #ifdef EXTENDED 121 { "paste", PASTTYPE }, 122 { "spaste", SPASTYPE }, 123 /* Newer extensions, needed to handle gnu-m4 scripts */ 124 { "indir", INDIRTYPE}, 125 { "builtin", BUILTINTYPE}, 126 { "patsubst", PATSTYPE}, 127 { "regexp", REGEXPTYPE}, 128 { "esyscmd", ESYSCMDTYPE}, 129 { "__file__", FILENAMETYPE | NOARGS}, 130 { "__line__", LINETYPE | NOARGS}, 131 #endif 132 { "popdef", POPDTYPE }, 133 { "pushdef", PUSDTYPE }, 134 { "dumpdef", DUMPTYPE | NOARGS }, 135 { "shift", SHIFTYPE | NOARGS }, 136 { "translit", TRNLTYPE }, 137 { "undefine", UNDFTYPE }, 138 { "undivert", UNDVTYPE | NOARGS }, 139 { "divnum", DIVNTYPE | NOARGS }, 140 { "maketemp", MKTMTYPE }, 141 { "errprint", ERRPTYPE | NOARGS }, 142 { "m4wrap", M4WRTYPE | NOARGS }, 143 { "m4exit", EXITTYPE | NOARGS }, 144 { "syscmd", SYSCTYPE }, 145 { "sysval", SYSVTYPE | NOARGS }, 146 { "traceon", TRACEONTYPE | NOARGS }, 147 { "traceoff", TRACEOFFTYPE | NOARGS }, 148 149 #if defined(unix) || defined(__unix__) 150 { "unix", SELFTYPE | NOARGS }, 151 #else 152 #ifdef vms 153 { "vms", SELFTYPE | NOARGS }, 154 #endif 155 #endif 156 }; 157 158 #define MAXKEYS (sizeof(keywrds)/sizeof(struct keyblk)) 159 160 #define MAXRECORD 50 161 static struct position { 162 char *name; 163 unsigned long line; 164 } quotes[MAXRECORD], paren[MAXRECORD]; 165 166 static void record(struct position *, int); 167 static void dump_stack(struct position *, int); 168 169 static void macro(void); 170 static void initkwds(void); 171 static ndptr inspect(int, char *); 172 static int do_look_ahead(int, const char *); 173 174 static void enlarge_stack(void); 175 176 int 177 main(int argc, char *argv[]) 178 { 179 int c; 180 int n; 181 int rval; 182 char *p; 183 184 setlocale(LC_ALL, ""); 185 186 traceout = stderr; 187 188 if (signal(SIGINT, SIG_IGN) != SIG_IGN) 189 signal(SIGINT, onintr); 190 191 initkwds(); 192 initspaces(); 193 STACKMAX = INITSTACKMAX; 194 195 mstack = (stae *)xalloc(sizeof(stae) * STACKMAX); 196 sstack = (char *)xalloc(STACKMAX); 197 198 maxout = 0; 199 outfile = NULL; 200 resizedivs(MAXOUT); 201 202 while ((c = getopt(argc, argv, "gst:d:D:U:o:I:")) != -1) 203 switch(c) { 204 case 'D': /* define something..*/ 205 for (p = optarg; *p; p++) 206 if (*p == '=') 207 break; 208 if (p == optarg) 209 errx(1, "null variable cannot be defined"); 210 if (*p) 211 *p++ = EOS; 212 dodefine(optarg, p); 213 break; 214 case 'I': 215 addtoincludepath(optarg); 216 break; 217 case 'U': /* undefine... */ 218 remhash(optarg, TOP); 219 break; 220 case 'g': 221 mimic_gnu = 1; 222 break; 223 case 'd': 224 set_trace_flags(optarg); 225 break; 226 case 's': 227 synccpp = 1; 228 break; 229 case 't': 230 mark_traced(optarg, 1); 231 break; 232 case 'o': 233 trace_file(optarg); 234 break; 235 case '?': 236 default: 237 usage(); 238 } 239 240 argc -= optind; 241 argv += optind; 242 243 rval = 0; 244 active = stdout; /* default active output */ 245 bbase[0] = bufbase; 246 if (!argc) { 247 sp = -1; /* stack pointer initialized */ 248 fp = 0; /* frame pointer initialized */ 249 set_input(infile+0, stdin, "stdin"); 250 /* default input (naturally) */ 251 if ((inname[0] = strdup("-")) == NULL) 252 err(1, NULL); 253 inlineno[0] = 1; 254 emitline(); 255 macro(); 256 } else 257 for (; argc--; ++argv) { 258 p = *argv; 259 if (p[0] == '-' && p[1] == EOS) 260 set_input(infile, stdin, "stdin"); 261 else if (fopen_trypath(infile, p) == NULL) { 262 warn("%s", p); 263 rval = 1; 264 continue; 265 } 266 sp = -1; 267 fp = 0; 268 if ((inname[0] = strdup(p)) == NULL) 269 err(1, NULL); 270 inlineno[0] = 1; 271 emitline(); 272 macro(); 273 release_input(infile); 274 } 275 276 if (*m4wraps) { /* anything for rundown ?? */ 277 ilevel = 0; /* in case m4wrap includes.. */ 278 bufbase = bp = buf; /* use the entire buffer */ 279 pbstr(m4wraps); /* user-defined wrapup act */ 280 macro(); /* last will and testament */ 281 } 282 283 if (active != stdout) 284 active = stdout; /* reset output just in case */ 285 for (n = 1; n < maxout; n++) /* default wrap-up: undivert */ 286 if (outfile[n] != NULL) 287 getdiv(n); 288 /* remove bitbucket if used */ 289 if (outfile[0] != NULL) { 290 (void) fclose(outfile[0]); 291 } 292 293 exit(rval); 294 } 295 296 /* 297 * Look ahead for `token'. 298 * (on input `t == token[0]') 299 * Used for comment and quoting delimiters. 300 * Returns 1 if `token' present; copied to output. 301 * 0 if `token' not found; all characters pushed back 302 */ 303 static int 304 do_look_ahead(int t, const char *token) 305 { 306 int i; 307 308 assert((unsigned char)t == (unsigned char)token[0]); 309 310 for (i = 1; *++token; i++) { 311 t = gpbc(); 312 if (t == EOF || (unsigned char)t != (unsigned char)*token) { 313 putback(t); 314 while (--i) 315 putback(*--token); 316 return 0; 317 } 318 } 319 return 1; 320 } 321 322 #define LOOK_AHEAD(t, token) (t != EOF && \ 323 (unsigned char)(t)==(unsigned char)(token)[0] && \ 324 do_look_ahead(t,token)) 325 326 /* 327 * macro - the work horse.. 328 */ 329 static void 330 macro(void) 331 { 332 char token[MAXTOK+1]; 333 int t, l; 334 ndptr p; 335 int nlpar; 336 337 cycle { 338 t = gpbc(); 339 if (t == '_' || isalpha(t)) { 340 p = inspect(t, token); 341 if (p != nil) 342 putback(l = gpbc()); 343 if (p == nil || (l != LPAREN && 344 (p->type & NEEDARGS) != 0)) 345 outputstr(token); 346 else { 347 /* 348 * real thing.. First build a call frame: 349 */ 350 pushf(fp); /* previous call frm */ 351 pushf(p->type); /* type of the call */ 352 pushf(0); /* parenthesis level */ 353 fp = sp; /* new frame pointer */ 354 /* 355 * now push the string arguments: 356 */ 357 pushs1(p->defn); /* defn string */ 358 pushs1(p->name); /* macro name */ 359 pushs(ep); /* start next..*/ 360 361 if (l != LPAREN && PARLEV == 0) { 362 /* no bracks */ 363 chrsave(EOS); 364 365 if ((uintptr_t)sp == STACKMAX) 366 errx(1, "internal stack overflow"); 367 eval((const char **) mstack+fp+1, 2, 368 CALTYP); 369 370 ep = PREVEP; /* flush strspace */ 371 sp = PREVSP; /* previous sp.. */ 372 fp = PREVFP; /* rewind stack...*/ 373 } 374 } 375 } else if (t == EOF) { 376 if (sp > -1) { 377 warnx( "unexpected end of input, unclosed parenthesis:"); 378 dump_stack(paren, PARLEV); 379 exit(1); 380 } 381 if (ilevel <= 0) 382 break; /* all done thanks.. */ 383 release_input(infile+ilevel--); 384 free(inname[ilevel+1]); 385 bufbase = bbase[ilevel]; 386 emitline(); 387 continue; 388 } 389 /* 390 * non-alpha token possibly seen.. 391 * [the order of else if .. stmts is important.] 392 */ 393 else if (LOOK_AHEAD(t,lquote)) { /* strip quotes */ 394 nlpar = 0; 395 record(quotes, nlpar++); 396 /* 397 * Opening quote: scan forward until matching 398 * closing quote has been found. 399 */ 400 do { 401 402 l = gpbc(); 403 if (LOOK_AHEAD(l,rquote)) { 404 if (--nlpar > 0) 405 outputstr(rquote); 406 } else if (LOOK_AHEAD(l,lquote)) { 407 record(quotes, nlpar++); 408 outputstr(lquote); 409 } else if (l == EOF) { 410 if (nlpar == 1) 411 warnx("unclosed quote:"); 412 else 413 warnx("%d unclosed quotes:", nlpar); 414 dump_stack(quotes, nlpar); 415 exit(1); 416 } else { 417 if (nlpar > 0) { 418 if (sp < 0) 419 putc(l, active); 420 else 421 CHRSAVE(l); 422 } 423 } 424 } 425 while (nlpar != 0); 426 } 427 428 else if (sp < 0 && LOOK_AHEAD(t, scommt)) { 429 fputs(scommt, active); 430 431 for(;;) { 432 t = gpbc(); 433 if (LOOK_AHEAD(t, ecommt)) { 434 fputs(ecommt, active); 435 break; 436 } 437 if (t == EOF) 438 break; 439 putc(t, active); 440 } 441 } 442 443 else if (sp < 0) { /* not in a macro at all */ 444 putc(t, active); /* output directly.. */ 445 } 446 447 else switch(t) { 448 449 case LPAREN: 450 if (PARLEV > 0) 451 chrsave(t); 452 while (isspace(l = gpbc())) 453 ; /* skip blank, tab, nl.. */ 454 putback(l); 455 record(paren, PARLEV++); 456 break; 457 458 case RPAREN: 459 if (--PARLEV > 0) 460 chrsave(t); 461 else { /* end of argument list */ 462 chrsave(EOS); 463 464 if ((uintptr_t)sp == STACKMAX) 465 errx(1, "internal stack overflow"); 466 467 eval((const char **) mstack+fp+1, sp-fp, 468 CALTYP); 469 470 ep = PREVEP; /* flush strspace */ 471 sp = PREVSP; /* previous sp.. */ 472 fp = PREVFP; /* rewind stack...*/ 473 } 474 break; 475 476 case COMMA: 477 if (PARLEV == 1) { 478 chrsave(EOS); /* new argument */ 479 while (isspace(l = gpbc())) 480 ; 481 putback(l); 482 pushs(ep); 483 } else 484 chrsave(t); 485 break; 486 487 default: 488 if (LOOK_AHEAD(t, scommt)) { 489 char *pc; 490 for (pc = scommt; *pc; pc++) 491 chrsave(*pc); 492 for(;;) { 493 t = gpbc(); 494 if (LOOK_AHEAD(t, ecommt)) { 495 for (pc = ecommt; *pc; pc++) 496 chrsave(*pc); 497 break; 498 } 499 if (t == EOF) 500 break; 501 CHRSAVE(t); 502 } 503 } else 504 CHRSAVE(t); /* stack the char */ 505 break; 506 } 507 } 508 } 509 510 /* 511 * output string directly, without pushing it for reparses. 512 */ 513 void 514 outputstr(const char *s) 515 { 516 if (sp < 0) 517 while (*s) 518 putc(*s++, active); 519 else 520 while (*s) 521 CHRSAVE(*s++); 522 } 523 524 /* 525 * build an input token.. 526 * consider only those starting with _ or A-Za-z. This is a 527 * combo with lookup to speed things up. 528 */ 529 static ndptr 530 inspect(int c, char *tp) 531 { 532 char *name = tp; 533 char *etp = tp+MAXTOK; 534 ndptr p; 535 unsigned int h; 536 537 h = *tp++ = c; 538 539 while ((isalnum(c = gpbc()) || c == '_') && tp < etp) 540 h = (h << 5) + h + (*tp++ = c); 541 if (c != EOF) 542 PUTBACK(c); 543 *tp = EOS; 544 /* token is too long, it won't match anything, but it can still 545 * be output. */ 546 if (tp == ep) { 547 outputstr(name); 548 while (isalnum(c = gpbc()) || c == '_') { 549 if (sp < 0) 550 putc(c, active); 551 else 552 CHRSAVE(c); 553 } 554 *name = EOS; 555 return nil; 556 } 557 558 for (p = hashtab[h % HASHSIZE]; p != nil; p = p->nxtptr) 559 if (h == p->hv && STREQ(name, p->name)) 560 break; 561 return p; 562 } 563 564 /* 565 * initkwds - initialise m4 keywords as fast as possible. 566 * This very similar to install, but without certain overheads, 567 * such as calling lookup. Malloc is not used for storing the 568 * keyword strings, since we simply use the static pointers 569 * within keywrds block. 570 */ 571 static void 572 initkwds(void) 573 { 574 size_t i; 575 unsigned int h; 576 ndptr p; 577 578 for (i = 0; i < MAXKEYS; i++) { 579 h = hash(keywrds[i].knam); 580 p = (ndptr) xalloc(sizeof(struct ndblock)); 581 p->nxtptr = hashtab[h % HASHSIZE]; 582 hashtab[h % HASHSIZE] = p; 583 p->name = xstrdup(keywrds[i].knam); 584 p->defn = null; 585 p->hv = h; 586 p->type = keywrds[i].ktyp & TYPEMASK; 587 if ((keywrds[i].ktyp & NOARGS) == 0) 588 p->type |= NEEDARGS; 589 } 590 } 591 592 /* Look up a builtin type, even if overridden by the user */ 593 int 594 builtin_type(const char *key) 595 { 596 int i; 597 598 for (i = 0; i != MAXKEYS; i++) 599 if (STREQ(keywrds[i].knam, key)) 600 return keywrds[i].ktyp; 601 return -1; 602 } 603 604 const char * 605 builtin_realname(int n) 606 { 607 int i; 608 609 for (i = 0; i != MAXKEYS; i++) 610 if (((keywrds[i].ktyp ^ n) & TYPEMASK) == 0) 611 return keywrds[i].knam; 612 return NULL; 613 } 614 615 static void 616 record(struct position *t, int lev) 617 { 618 if (lev < MAXRECORD) { 619 t[lev].name = CURRENT_NAME; 620 t[lev].line = CURRENT_LINE; 621 } 622 } 623 624 static void 625 dump_stack(struct position *t, int lev) 626 { 627 int i; 628 629 for (i = 0; i < lev; i++) { 630 if (i == MAXRECORD) { 631 fprintf(stderr, " ...\n"); 632 break; 633 } 634 fprintf(stderr, " %s at line %lu\n", 635 t[i].name, t[i].line); 636 } 637 } 638 639 640 static void 641 enlarge_stack(void) 642 { 643 STACKMAX *= 2; 644 mstack = realloc(mstack, sizeof(stae) * STACKMAX); 645 sstack = realloc(sstack, STACKMAX); 646 if (mstack == NULL || sstack == NULL) 647 errx(1, "Evaluation stack overflow (%lu)", 648 (unsigned long)STACKMAX); 649 } 650 651 /* Emit preprocessor #line directive if -s option used. */ 652 void 653 emitline(void) 654 { 655 656 if (synccpp) 657 fprintf(active, "#line %d \"%s\"\n", inlineno[ilevel], 658 inname[ilevel]); 659 } 660