1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Kenneth Almquist. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * $Id: parser.c,v 1.20 1997/04/28 03:22:09 steve Exp $ 37 */ 38 39 #ifndef lint 40 static char const sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; 41 #endif /* not lint */ 42 43 #include <stdlib.h> 44 45 #include "shell.h" 46 #include "parser.h" 47 #include "nodes.h" 48 #include "expand.h" /* defines rmescapes() */ 49 #include "redir.h" /* defines copyfd() */ 50 #include "syntax.h" 51 #include "options.h" 52 #include "input.h" 53 #include "output.h" 54 #include "var.h" 55 #include "error.h" 56 #include "memalloc.h" 57 #include "mystring.h" 58 #include "alias.h" 59 #include "show.h" 60 #ifndef NO_HISTORY 61 #include "myhistedit.h" 62 #endif 63 64 /* 65 * Shell command parser. 66 */ 67 68 #define EOFMARKLEN 79 69 70 /* values returned by readtoken */ 71 #include "token.h" 72 73 74 75 struct heredoc { 76 struct heredoc *next; /* next here document in list */ 77 union node *here; /* redirection node */ 78 char *eofmark; /* string indicating end of input */ 79 int striptabs; /* if set, strip leading tabs */ 80 }; 81 82 83 84 struct heredoc *heredoclist; /* list of here documents to read */ 85 int parsebackquote; /* nonzero if we are inside backquotes */ 86 int doprompt; /* if set, prompt the user */ 87 int needprompt; /* true if interactive and at start of line */ 88 int lasttoken; /* last token read */ 89 MKINIT int tokpushback; /* last token pushed back */ 90 char *wordtext; /* text of last word returned by readtoken */ 91 MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */ 92 struct nodelist *backquotelist; 93 union node *redirnode; 94 struct heredoc *heredoc; 95 int quoteflag; /* set if (part of) last token was quoted */ 96 int startlinno; /* line # where last token started */ 97 98 /* XXX When 'noaliases' is set to one, no alias expansion takes place. */ 99 static int noaliases = 0; 100 101 #define GDB_HACK 1 /* avoid local declarations which gdb can't handle */ 102 #ifdef GDB_HACK 103 static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'}; 104 static const char types[] = "}-+?="; 105 #endif 106 107 108 STATIC union node *list __P((int)); 109 STATIC union node *andor __P((void)); 110 STATIC union node *pipeline __P((void)); 111 STATIC union node *command __P((void)); 112 STATIC union node *simplecmd __P((union node **, union node *)); 113 STATIC union node *makename __P((void)); 114 STATIC void parsefname __P((void)); 115 STATIC void parseheredoc __P((void)); 116 STATIC int peektoken __P((void)); 117 STATIC int readtoken __P((void)); 118 STATIC int xxreadtoken __P((void)); 119 STATIC int readtoken1 __P((int, char const *, char *, int)); 120 STATIC int noexpand __P((char *)); 121 STATIC void synexpect __P((int)); 122 STATIC void synerror __P((char *)); 123 STATIC void setprompt __P((int)); 124 125 126 /* 127 * Read and parse a command. Returns NEOF on end of file. (NULL is a 128 * valid parse tree indicating a blank line.) 129 */ 130 131 union node * 132 parsecmd(interact) 133 int interact; 134 { 135 int t; 136 137 doprompt = interact; 138 if (doprompt) 139 setprompt(1); 140 else 141 setprompt(0); 142 needprompt = 0; 143 t = readtoken(); 144 if (t == TEOF) 145 return NEOF; 146 if (t == TNL) 147 return NULL; 148 tokpushback++; 149 return list(1); 150 } 151 152 153 STATIC union node * 154 list(nlflag) 155 int nlflag; 156 { 157 union node *n1, *n2, *n3; 158 int tok; 159 160 checkkwd = 2; 161 if (nlflag == 0 && tokendlist[peektoken()]) 162 return NULL; 163 n1 = NULL; 164 for (;;) { 165 n2 = andor(); 166 tok = readtoken(); 167 if (tok == TBACKGND) { 168 if (n2->type == NCMD || n2->type == NPIPE) { 169 n2->ncmd.backgnd = 1; 170 } else if (n2->type == NREDIR) { 171 n2->type = NBACKGND; 172 } else { 173 n3 = (union node *)stalloc(sizeof (struct nredir)); 174 n3->type = NBACKGND; 175 n3->nredir.n = n2; 176 n3->nredir.redirect = NULL; 177 n2 = n3; 178 } 179 } 180 if (n1 == NULL) { 181 n1 = n2; 182 } 183 else { 184 n3 = (union node *)stalloc(sizeof (struct nbinary)); 185 n3->type = NSEMI; 186 n3->nbinary.ch1 = n1; 187 n3->nbinary.ch2 = n2; 188 n1 = n3; 189 } 190 switch (tok) { 191 case TBACKGND: 192 case TSEMI: 193 tok = readtoken(); 194 /* fall through */ 195 case TNL: 196 if (tok == TNL) { 197 parseheredoc(); 198 if (nlflag) 199 return n1; 200 } else { 201 tokpushback++; 202 } 203 checkkwd = 2; 204 if (tokendlist[peektoken()]) 205 return n1; 206 break; 207 case TEOF: 208 if (heredoclist) 209 parseheredoc(); 210 else 211 pungetc(); /* push back EOF on input */ 212 return n1; 213 default: 214 if (nlflag) 215 synexpect(-1); 216 tokpushback++; 217 return n1; 218 } 219 } 220 } 221 222 223 224 STATIC union node * 225 andor() { 226 union node *n1, *n2, *n3; 227 int t; 228 229 n1 = pipeline(); 230 for (;;) { 231 if ((t = readtoken()) == TAND) { 232 t = NAND; 233 } else if (t == TOR) { 234 t = NOR; 235 } else { 236 tokpushback++; 237 return n1; 238 } 239 n2 = pipeline(); 240 n3 = (union node *)stalloc(sizeof (struct nbinary)); 241 n3->type = t; 242 n3->nbinary.ch1 = n1; 243 n3->nbinary.ch2 = n2; 244 n1 = n3; 245 } 246 } 247 248 249 250 STATIC union node * 251 pipeline() { 252 union node *n1, *pipenode, *notnode; 253 struct nodelist *lp, *prev; 254 int negate = 0; 255 256 TRACE(("pipeline: entered\n")); 257 while (readtoken() == TNOT) { 258 TRACE(("pipeline: TNOT recognized\n")); 259 negate = !negate; 260 } 261 tokpushback++; 262 n1 = command(); 263 if (readtoken() == TPIPE) { 264 pipenode = (union node *)stalloc(sizeof (struct npipe)); 265 pipenode->type = NPIPE; 266 pipenode->npipe.backgnd = 0; 267 lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 268 pipenode->npipe.cmdlist = lp; 269 lp->n = n1; 270 do { 271 prev = lp; 272 lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 273 lp->n = command(); 274 prev->next = lp; 275 } while (readtoken() == TPIPE); 276 lp->next = NULL; 277 n1 = pipenode; 278 } 279 tokpushback++; 280 if (negate) { 281 notnode = (union node *)stalloc(sizeof(struct nnot)); 282 notnode->type = NNOT; 283 notnode->nnot.com = n1; 284 n1 = notnode; 285 } 286 return n1; 287 } 288 289 290 291 STATIC union node * 292 command() { 293 union node *n1, *n2; 294 union node *ap, **app; 295 union node *cp, **cpp; 296 union node *redir, **rpp; 297 int t; 298 299 checkkwd = 2; 300 redir = NULL; 301 n1 = NULL; 302 rpp = &redir; 303 304 /* Check for redirection which may precede command */ 305 while (readtoken() == TREDIR) { 306 *rpp = n2 = redirnode; 307 rpp = &n2->nfile.next; 308 parsefname(); 309 } 310 tokpushback++; 311 312 switch (readtoken()) { 313 case TIF: 314 n1 = (union node *)stalloc(sizeof (struct nif)); 315 n1->type = NIF; 316 n1->nif.test = list(0); 317 if (readtoken() != TTHEN) 318 synexpect(TTHEN); 319 n1->nif.ifpart = list(0); 320 n2 = n1; 321 while (readtoken() == TELIF) { 322 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif)); 323 n2 = n2->nif.elsepart; 324 n2->type = NIF; 325 n2->nif.test = list(0); 326 if (readtoken() != TTHEN) 327 synexpect(TTHEN); 328 n2->nif.ifpart = list(0); 329 } 330 if (lasttoken == TELSE) 331 n2->nif.elsepart = list(0); 332 else { 333 n2->nif.elsepart = NULL; 334 tokpushback++; 335 } 336 if (readtoken() != TFI) 337 synexpect(TFI); 338 checkkwd = 1; 339 break; 340 case TWHILE: 341 case TUNTIL: { 342 int got; 343 n1 = (union node *)stalloc(sizeof (struct nbinary)); 344 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL; 345 n1->nbinary.ch1 = list(0); 346 if ((got=readtoken()) != TDO) { 347 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : "")); 348 synexpect(TDO); 349 } 350 n1->nbinary.ch2 = list(0); 351 if (readtoken() != TDONE) 352 synexpect(TDONE); 353 checkkwd = 1; 354 break; 355 } 356 case TFOR: 357 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext)) 358 synerror("Bad for loop variable"); 359 n1 = (union node *)stalloc(sizeof (struct nfor)); 360 n1->type = NFOR; 361 n1->nfor.var = wordtext; 362 if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) { 363 app = ≈ 364 while (readtoken() == TWORD) { 365 n2 = (union node *)stalloc(sizeof (struct narg)); 366 n2->type = NARG; 367 n2->narg.text = wordtext; 368 n2->narg.backquote = backquotelist; 369 *app = n2; 370 app = &n2->narg.next; 371 } 372 *app = NULL; 373 n1->nfor.args = ap; 374 if (lasttoken != TNL && lasttoken != TSEMI) 375 synexpect(-1); 376 } else { 377 #ifndef GDB_HACK 378 static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE, 379 '@', '=', '\0'}; 380 #endif 381 n2 = (union node *)stalloc(sizeof (struct narg)); 382 n2->type = NARG; 383 n2->narg.text = (char *)argvars; 384 n2->narg.backquote = NULL; 385 n2->narg.next = NULL; 386 n1->nfor.args = n2; 387 /* 388 * Newline or semicolon here is optional (but note 389 * that the original Bourne shell only allowed NL). 390 */ 391 if (lasttoken != TNL && lasttoken != TSEMI) 392 tokpushback++; 393 } 394 checkkwd = 2; 395 if ((t = readtoken()) == TDO) 396 t = TDONE; 397 else if (t == TBEGIN) 398 t = TEND; 399 else 400 synexpect(-1); 401 n1->nfor.body = list(0); 402 if (readtoken() != t) 403 synexpect(t); 404 checkkwd = 1; 405 break; 406 case TCASE: 407 n1 = (union node *)stalloc(sizeof (struct ncase)); 408 n1->type = NCASE; 409 if (readtoken() != TWORD) 410 synexpect(TWORD); 411 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg)); 412 n2->type = NARG; 413 n2->narg.text = wordtext; 414 n2->narg.backquote = backquotelist; 415 n2->narg.next = NULL; 416 while (readtoken() == TNL); 417 if (lasttoken != TWORD || ! equal(wordtext, "in")) 418 synerror("expecting \"in\""); 419 cpp = &n1->ncase.cases; 420 noaliases = 1; /* turn off alias expansion */ 421 checkkwd = 2, readtoken(); 422 do { 423 *cpp = cp = (union node *)stalloc(sizeof (struct nclist)); 424 cp->type = NCLIST; 425 app = &cp->nclist.pattern; 426 for (;;) { 427 *app = ap = (union node *)stalloc(sizeof (struct narg)); 428 ap->type = NARG; 429 ap->narg.text = wordtext; 430 ap->narg.backquote = backquotelist; 431 if (checkkwd = 2, readtoken() != TPIPE) 432 break; 433 app = &ap->narg.next; 434 readtoken(); 435 } 436 ap->narg.next = NULL; 437 if (lasttoken != TRP) 438 noaliases = 0, synexpect(TRP); 439 cp->nclist.body = list(0); 440 441 checkkwd = 2; 442 if ((t = readtoken()) != TESAC) { 443 if (t != TENDCASE) 444 noaliases = 0, synexpect(TENDCASE); 445 else 446 checkkwd = 2, readtoken(); 447 } 448 cpp = &cp->nclist.next; 449 } while(lasttoken != TESAC); 450 noaliases = 0; /* reset alias expansion */ 451 *cpp = NULL; 452 checkkwd = 1; 453 break; 454 case TLP: 455 n1 = (union node *)stalloc(sizeof (struct nredir)); 456 n1->type = NSUBSHELL; 457 n1->nredir.n = list(0); 458 n1->nredir.redirect = NULL; 459 if (readtoken() != TRP) 460 synexpect(TRP); 461 checkkwd = 1; 462 break; 463 case TBEGIN: 464 n1 = list(0); 465 if (readtoken() != TEND) 466 synexpect(TEND); 467 checkkwd = 1; 468 break; 469 /* Handle an empty command like other simple commands. */ 470 case TSEMI: 471 /* 472 * An empty command before a ; doesn't make much sense, and 473 * should certainly be disallowed in the case of `if ;'. 474 */ 475 if (!redir) 476 synexpect(-1); 477 case TAND: 478 case TOR: 479 case TNL: 480 case TEOF: 481 case TWORD: 482 case TRP: 483 tokpushback++; 484 return simplecmd(rpp, redir); 485 default: 486 synexpect(-1); 487 } 488 489 /* Now check for redirection which may follow command */ 490 while (readtoken() == TREDIR) { 491 *rpp = n2 = redirnode; 492 rpp = &n2->nfile.next; 493 parsefname(); 494 } 495 tokpushback++; 496 *rpp = NULL; 497 if (redir) { 498 if (n1->type != NSUBSHELL) { 499 n2 = (union node *)stalloc(sizeof (struct nredir)); 500 n2->type = NREDIR; 501 n2->nredir.n = n1; 502 n1 = n2; 503 } 504 n1->nredir.redirect = redir; 505 } 506 return n1; 507 } 508 509 510 STATIC union node * 511 simplecmd(rpp, redir) 512 union node **rpp, *redir; 513 { 514 union node *args, **app; 515 union node **orig_rpp = rpp; 516 union node *n = NULL; 517 518 /* If we don't have any redirections already, then we must reset */ 519 /* rpp to be the address of the local redir variable. */ 520 if (redir == 0) 521 rpp = &redir; 522 523 args = NULL; 524 app = &args; 525 /* 526 * We save the incoming value, because we need this for shell 527 * functions. There can not be a redirect or an argument between 528 * the function name and the open parenthesis. 529 */ 530 orig_rpp = rpp; 531 532 for (;;) { 533 if (readtoken() == TWORD) { 534 n = (union node *)stalloc(sizeof (struct narg)); 535 n->type = NARG; 536 n->narg.text = wordtext; 537 n->narg.backquote = backquotelist; 538 *app = n; 539 app = &n->narg.next; 540 } else if (lasttoken == TREDIR) { 541 *rpp = n = redirnode; 542 rpp = &n->nfile.next; 543 parsefname(); /* read name of redirection file */ 544 } else if (lasttoken == TLP && app == &args->narg.next 545 && rpp == orig_rpp) { 546 /* We have a function */ 547 if (readtoken() != TRP) 548 synexpect(TRP); 549 #ifdef notdef 550 if (! goodname(n->narg.text)) 551 synerror("Bad function name"); 552 #endif 553 n->type = NDEFUN; 554 n->narg.next = command(); 555 return n; 556 } else { 557 tokpushback++; 558 break; 559 } 560 } 561 *app = NULL; 562 *rpp = NULL; 563 n = (union node *)stalloc(sizeof (struct ncmd)); 564 n->type = NCMD; 565 n->ncmd.backgnd = 0; 566 n->ncmd.args = args; 567 n->ncmd.redirect = redir; 568 return n; 569 } 570 571 STATIC union node * 572 makename() { 573 union node *n; 574 575 n = (union node *)stalloc(sizeof (struct narg)); 576 n->type = NARG; 577 n->narg.next = NULL; 578 n->narg.text = wordtext; 579 n->narg.backquote = backquotelist; 580 return n; 581 } 582 583 void fixredir(n, text, err) 584 union node *n; 585 const char *text; 586 int err; 587 { 588 TRACE(("Fix redir %s %d\n", text, err)); 589 if (!err) 590 n->ndup.vname = NULL; 591 592 if (is_digit(text[0]) && text[1] == '\0') 593 n->ndup.dupfd = digit_val(text[0]); 594 else if (text[0] == '-' && text[1] == '\0') 595 n->ndup.dupfd = -1; 596 else { 597 598 if (err) 599 synerror("Bad fd number"); 600 else 601 n->ndup.vname = makename(); 602 } 603 } 604 605 606 STATIC void 607 parsefname() { 608 union node *n = redirnode; 609 610 if (readtoken() != TWORD) 611 synexpect(-1); 612 if (n->type == NHERE) { 613 struct heredoc *here = heredoc; 614 struct heredoc *p; 615 int i; 616 617 if (quoteflag == 0) 618 n->type = NXHERE; 619 TRACE(("Here document %d\n", n->type)); 620 if (here->striptabs) { 621 while (*wordtext == '\t') 622 wordtext++; 623 } 624 if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN) 625 synerror("Illegal eof marker for << redirection"); 626 rmescapes(wordtext); 627 here->eofmark = wordtext; 628 here->next = NULL; 629 if (heredoclist == NULL) 630 heredoclist = here; 631 else { 632 for (p = heredoclist ; p->next ; p = p->next); 633 p->next = here; 634 } 635 } else if (n->type == NTOFD || n->type == NFROMFD) { 636 fixredir(n, wordtext, 0); 637 } else { 638 n->nfile.fname = makename(); 639 } 640 } 641 642 643 /* 644 * Input any here documents. 645 */ 646 647 STATIC void 648 parseheredoc() { 649 struct heredoc *here; 650 union node *n; 651 652 while (heredoclist) { 653 here = heredoclist; 654 heredoclist = here->next; 655 if (needprompt) { 656 setprompt(2); 657 needprompt = 0; 658 } 659 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX, 660 here->eofmark, here->striptabs); 661 n = (union node *)stalloc(sizeof (struct narg)); 662 n->narg.type = NARG; 663 n->narg.next = NULL; 664 n->narg.text = wordtext; 665 n->narg.backquote = backquotelist; 666 here->here->nhere.doc = n; 667 } 668 } 669 670 STATIC int 671 peektoken() { 672 int t; 673 674 t = readtoken(); 675 tokpushback++; 676 return (t); 677 } 678 679 STATIC int xxreadtoken(); 680 681 STATIC int 682 readtoken() { 683 int t; 684 int savecheckkwd = checkkwd; 685 struct alias *ap; 686 #ifdef DEBUG 687 int alreadyseen = tokpushback; 688 #endif 689 690 top: 691 t = xxreadtoken(); 692 693 if (checkkwd) { 694 /* 695 * eat newlines 696 */ 697 if (checkkwd == 2) { 698 checkkwd = 0; 699 while (t == TNL) { 700 parseheredoc(); 701 t = xxreadtoken(); 702 } 703 } else 704 checkkwd = 0; 705 /* 706 * check for keywords and aliases 707 */ 708 if (t == TWORD && !quoteflag) 709 { 710 char * const *pp; 711 712 for (pp = (char **)parsekwd; *pp; pp++) { 713 if (**pp == *wordtext && equal(*pp, wordtext)) 714 { 715 lasttoken = t = pp - parsekwd + KWDOFFSET; 716 TRACE(("keyword %s recognized\n", tokname[t])); 717 goto out; 718 } 719 } 720 if (noaliases == 0 && 721 (ap = lookupalias(wordtext, 1)) != NULL) { 722 pushstring(ap->val, strlen(ap->val), ap); 723 checkkwd = savecheckkwd; 724 goto top; 725 } 726 } 727 out: 728 checkkwd = 0; 729 } 730 #ifdef DEBUG 731 if (!alreadyseen) 732 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); 733 else 734 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); 735 #endif 736 return (t); 737 } 738 739 740 /* 741 * Read the next input token. 742 * If the token is a word, we set backquotelist to the list of cmds in 743 * backquotes. We set quoteflag to true if any part of the word was 744 * quoted. 745 * If the token is TREDIR, then we set redirnode to a structure containing 746 * the redirection. 747 * In all cases, the variable startlinno is set to the number of the line 748 * on which the token starts. 749 * 750 * [Change comment: here documents and internal procedures] 751 * [Readtoken shouldn't have any arguments. Perhaps we should make the 752 * word parsing code into a separate routine. In this case, readtoken 753 * doesn't need to have any internal procedures, but parseword does. 754 * We could also make parseoperator in essence the main routine, and 755 * have parseword (readtoken1?) handle both words and redirection.] 756 */ 757 758 #define RETURN(token) return lasttoken = token 759 760 STATIC int 761 xxreadtoken() { 762 int c; 763 764 if (tokpushback) { 765 tokpushback = 0; 766 return lasttoken; 767 } 768 if (needprompt) { 769 setprompt(2); 770 needprompt = 0; 771 } 772 startlinno = plinno; 773 for (;;) { /* until token or start of word found */ 774 c = pgetc_macro(); 775 if (c == ' ' || c == '\t') 776 continue; /* quick check for white space first */ 777 switch (c) { 778 case ' ': case '\t': 779 continue; 780 case '#': 781 while ((c = pgetc()) != '\n' && c != PEOF); 782 pungetc(); 783 continue; 784 case '\\': 785 if (pgetc() == '\n') { 786 startlinno = ++plinno; 787 if (doprompt) 788 setprompt(2); 789 else 790 setprompt(0); 791 continue; 792 } 793 pungetc(); 794 goto breakloop; 795 case '\n': 796 plinno++; 797 needprompt = doprompt; 798 RETURN(TNL); 799 case PEOF: 800 RETURN(TEOF); 801 case '&': 802 if (pgetc() == '&') 803 RETURN(TAND); 804 pungetc(); 805 RETURN(TBACKGND); 806 case '|': 807 if (pgetc() == '|') 808 RETURN(TOR); 809 pungetc(); 810 RETURN(TPIPE); 811 case ';': 812 if (pgetc() == ';') 813 RETURN(TENDCASE); 814 pungetc(); 815 RETURN(TSEMI); 816 case '(': 817 RETURN(TLP); 818 case ')': 819 RETURN(TRP); 820 default: 821 goto breakloop; 822 } 823 } 824 breakloop: 825 return readtoken1(c, BASESYNTAX, (char *)NULL, 0); 826 #undef RETURN 827 } 828 829 830 831 /* 832 * If eofmark is NULL, read a word or a redirection symbol. If eofmark 833 * is not NULL, read a here document. In the latter case, eofmark is the 834 * word which marks the end of the document and striptabs is true if 835 * leading tabs should be stripped from the document. The argument firstc 836 * is the first character of the input token or document. 837 * 838 * Because C does not have internal subroutines, I have simulated them 839 * using goto's to implement the subroutine linkage. The following macros 840 * will run code that appears at the end of readtoken1. 841 */ 842 843 #define CHECKEND() {goto checkend; checkend_return:;} 844 #define PARSEREDIR() {goto parseredir; parseredir_return:;} 845 #define PARSESUB() {goto parsesub; parsesub_return:;} 846 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;} 847 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;} 848 #define PARSEARITH() {goto parsearith; parsearith_return:;} 849 850 STATIC int 851 readtoken1(firstc, syntax, eofmark, striptabs) 852 int firstc; 853 char const *syntax; 854 char *eofmark; 855 int striptabs; 856 { 857 int c = firstc; 858 char *out; 859 int len; 860 char line[EOFMARKLEN + 1]; 861 struct nodelist *bqlist; 862 int quotef; 863 int dblquote; 864 int varnest; /* levels of variables expansion */ 865 int arinest; /* levels of arithmetic expansion */ 866 int parenlevel; /* levels of parens in arithmetic */ 867 int oldstyle; 868 char const *prevsyntax; /* syntax before arithmetic */ 869 #if __GNUC__ 870 /* Avoid longjmp clobbering */ 871 (void) &out; 872 (void) "ef; 873 (void) &dblquote; 874 (void) &varnest; 875 (void) &arinest; 876 (void) &parenlevel; 877 (void) &oldstyle; 878 (void) &prevsyntax; 879 (void) &syntax; 880 #endif 881 882 startlinno = plinno; 883 dblquote = 0; 884 if (syntax == DQSYNTAX) 885 dblquote = 1; 886 quotef = 0; 887 bqlist = NULL; 888 varnest = 0; 889 arinest = 0; 890 parenlevel = 0; 891 892 STARTSTACKSTR(out); 893 loop: { /* for each line, until end of word */ 894 #if ATTY 895 if (c == '\034' && doprompt 896 && attyset() && ! equal(termval(), "emacs")) { 897 attyline(); 898 if (syntax == BASESYNTAX) 899 return readtoken(); 900 c = pgetc(); 901 goto loop; 902 } 903 #endif 904 CHECKEND(); /* set c to PEOF if at end of here document */ 905 for (;;) { /* until end of line or end of word */ 906 CHECKSTRSPACE(3, out); /* permit 3 calls to USTPUTC */ 907 switch(syntax[c]) { 908 case CNL: /* '\n' */ 909 if (syntax == BASESYNTAX) 910 goto endword; /* exit outer loop */ 911 USTPUTC(c, out); 912 plinno++; 913 if (doprompt) 914 setprompt(2); 915 else 916 setprompt(0); 917 c = pgetc(); 918 goto loop; /* continue outer loop */ 919 case CWORD: 920 USTPUTC(c, out); 921 break; 922 case CCTL: 923 if (eofmark == NULL || dblquote) 924 USTPUTC(CTLESC, out); 925 USTPUTC(c, out); 926 break; 927 case CBACK: /* backslash */ 928 c = pgetc(); 929 if (c == PEOF) { 930 USTPUTC('\\', out); 931 pungetc(); 932 } else if (c == '\n') { 933 if (doprompt) 934 setprompt(2); 935 else 936 setprompt(0); 937 } else { 938 if (dblquote && c != '\\' && c != '`' && c != '$' 939 && (c != '"' || eofmark != NULL)) 940 USTPUTC('\\', out); 941 if (SQSYNTAX[c] == CCTL) 942 USTPUTC(CTLESC, out); 943 USTPUTC(c, out); 944 quotef++; 945 } 946 break; 947 case CSQUOTE: 948 syntax = SQSYNTAX; 949 break; 950 case CDQUOTE: 951 syntax = DQSYNTAX; 952 dblquote = 1; 953 break; 954 case CENDQUOTE: 955 if (eofmark) { 956 USTPUTC(c, out); 957 } else { 958 if (arinest) 959 syntax = ARISYNTAX; 960 else 961 syntax = BASESYNTAX; 962 quotef++; 963 dblquote = 0; 964 } 965 break; 966 case CVAR: /* '$' */ 967 PARSESUB(); /* parse substitution */ 968 break; 969 case CENDVAR: /* '}' */ 970 if (varnest > 0) { 971 varnest--; 972 USTPUTC(CTLENDVAR, out); 973 } else { 974 USTPUTC(c, out); 975 } 976 break; 977 case CLP: /* '(' in arithmetic */ 978 parenlevel++; 979 USTPUTC(c, out); 980 break; 981 case CRP: /* ')' in arithmetic */ 982 if (parenlevel > 0) { 983 USTPUTC(c, out); 984 --parenlevel; 985 } else { 986 if (pgetc() == ')') { 987 if (--arinest == 0) { 988 USTPUTC(CTLENDARI, out); 989 syntax = prevsyntax; 990 } else 991 USTPUTC(')', out); 992 } else { 993 /* 994 * unbalanced parens 995 * (don't 2nd guess - no error) 996 */ 997 pungetc(); 998 USTPUTC(')', out); 999 } 1000 } 1001 break; 1002 case CBQUOTE: /* '`' */ 1003 PARSEBACKQOLD(); 1004 break; 1005 case CEOF: 1006 goto endword; /* exit outer loop */ 1007 default: 1008 if (varnest == 0) 1009 goto endword; /* exit outer loop */ 1010 USTPUTC(c, out); 1011 } 1012 c = pgetc_macro(); 1013 } 1014 } 1015 endword: 1016 if (syntax == ARISYNTAX) 1017 synerror("Missing '))'"); 1018 if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL) 1019 synerror("Unterminated quoted string"); 1020 if (varnest != 0) { 1021 startlinno = plinno; 1022 synerror("Missing '}'"); 1023 } 1024 USTPUTC('\0', out); 1025 len = out - stackblock(); 1026 out = stackblock(); 1027 if (eofmark == NULL) { 1028 if ((c == '>' || c == '<') 1029 && quotef == 0 1030 && len <= 2 1031 && (*out == '\0' || is_digit(*out))) { 1032 PARSEREDIR(); 1033 return lasttoken = TREDIR; 1034 } else { 1035 pungetc(); 1036 } 1037 } 1038 quoteflag = quotef; 1039 backquotelist = bqlist; 1040 grabstackblock(len); 1041 wordtext = out; 1042 return lasttoken = TWORD; 1043 /* end of readtoken routine */ 1044 1045 1046 1047 /* 1048 * Check to see whether we are at the end of the here document. When this 1049 * is called, c is set to the first character of the next input line. If 1050 * we are at the end of the here document, this routine sets the c to PEOF. 1051 */ 1052 1053 checkend: { 1054 if (eofmark) { 1055 if (striptabs) { 1056 while (c == '\t') 1057 c = pgetc(); 1058 } 1059 if (c == *eofmark) { 1060 if (pfgets(line, sizeof line) != NULL) { 1061 char *p, *q; 1062 1063 p = line; 1064 for (q = eofmark + 1 ; *q && *p == *q ; p++, q++); 1065 if (*p == '\n' && *q == '\0') { 1066 c = PEOF; 1067 plinno++; 1068 needprompt = doprompt; 1069 } else { 1070 pushstring(line, strlen(line), NULL); 1071 } 1072 } 1073 } 1074 } 1075 goto checkend_return; 1076 } 1077 1078 1079 /* 1080 * Parse a redirection operator. The variable "out" points to a string 1081 * specifying the fd to be redirected. The variable "c" contains the 1082 * first character of the redirection operator. 1083 */ 1084 1085 parseredir: { 1086 char fd = *out; 1087 union node *np; 1088 1089 np = (union node *)stalloc(sizeof (struct nfile)); 1090 if (c == '>') { 1091 np->nfile.fd = 1; 1092 c = pgetc(); 1093 if (c == '>') 1094 np->type = NAPPEND; 1095 else if (c == '&') 1096 np->type = NTOFD; 1097 else { 1098 np->type = NTO; 1099 pungetc(); 1100 } 1101 } else { /* c == '<' */ 1102 np->nfile.fd = 0; 1103 c = pgetc(); 1104 if (c == '<') { 1105 if (sizeof (struct nfile) != sizeof (struct nhere)) { 1106 np = (union node *)stalloc(sizeof (struct nhere)); 1107 np->nfile.fd = 0; 1108 } 1109 np->type = NHERE; 1110 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc)); 1111 heredoc->here = np; 1112 if ((c = pgetc()) == '-') { 1113 heredoc->striptabs = 1; 1114 } else { 1115 heredoc->striptabs = 0; 1116 pungetc(); 1117 } 1118 } else if (c == '&') 1119 np->type = NFROMFD; 1120 else { 1121 np->type = NFROM; 1122 pungetc(); 1123 } 1124 } 1125 if (fd != '\0') 1126 np->nfile.fd = digit_val(fd); 1127 redirnode = np; 1128 goto parseredir_return; 1129 } 1130 1131 1132 /* 1133 * Parse a substitution. At this point, we have read the dollar sign 1134 * and nothing else. 1135 */ 1136 1137 parsesub: { 1138 int subtype; 1139 int typeloc; 1140 int flags; 1141 char *p; 1142 #ifndef GDB_HACK 1143 static const char types[] = "}-+?="; 1144 #endif 1145 int bracketed_name = 0; /* used to handle ${[0-9]*} variables */ 1146 1147 c = pgetc(); 1148 if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) { 1149 USTPUTC('$', out); 1150 pungetc(); 1151 } else if (c == '(') { /* $(command) or $((arith)) */ 1152 if (pgetc() == '(') { 1153 PARSEARITH(); 1154 } else { 1155 pungetc(); 1156 PARSEBACKQNEW(); 1157 } 1158 } else { 1159 USTPUTC(CTLVAR, out); 1160 typeloc = out - stackblock(); 1161 USTPUTC(VSNORMAL, out); 1162 subtype = VSNORMAL; 1163 if (c == '{') { 1164 bracketed_name = 1; 1165 c = pgetc(); 1166 if (c == '#') { 1167 if ((c = pgetc()) == '}') 1168 c = '#'; 1169 else 1170 subtype = VSLENGTH; 1171 } 1172 else 1173 subtype = 0; 1174 } 1175 if (is_name(c)) { 1176 do { 1177 STPUTC(c, out); 1178 c = pgetc(); 1179 } while (is_in_name(c)); 1180 } else if (is_digit(c)) { 1181 if (bracketed_name) { 1182 do { 1183 STPUTC(c, out); 1184 c = pgetc(); 1185 } while (is_digit(c)); 1186 } else { 1187 STPUTC(c, out); 1188 c = pgetc(); 1189 } 1190 } else { 1191 if (! is_special(c)) 1192 badsub: synerror("Bad substitution"); 1193 USTPUTC(c, out); 1194 c = pgetc(); 1195 } 1196 STPUTC('=', out); 1197 flags = 0; 1198 if (subtype == 0) { 1199 switch (c) { 1200 case ':': 1201 flags = VSNUL; 1202 c = pgetc(); 1203 /*FALLTHROUGH*/ 1204 default: 1205 p = strchr(types, c); 1206 if (p == NULL) 1207 goto badsub; 1208 subtype = p - types + VSNORMAL; 1209 break; 1210 case '%': 1211 case '#': 1212 { 1213 int cc = c; 1214 subtype = c == '#' ? VSTRIMLEFT : 1215 VSTRIMRIGHT; 1216 c = pgetc(); 1217 if (c == cc) 1218 subtype++; 1219 else 1220 pungetc(); 1221 break; 1222 } 1223 } 1224 } else { 1225 pungetc(); 1226 } 1227 if (dblquote || arinest) 1228 flags |= VSQUOTE; 1229 *(stackblock() + typeloc) = subtype | flags; 1230 if (subtype != VSNORMAL) 1231 varnest++; 1232 } 1233 goto parsesub_return; 1234 } 1235 1236 1237 /* 1238 * Called to parse command substitutions. Newstyle is set if the command 1239 * is enclosed inside $(...); nlpp is a pointer to the head of the linked 1240 * list of commands (passed by reference), and savelen is the number of 1241 * characters on the top of the stack which must be preserved. 1242 */ 1243 1244 parsebackq: { 1245 struct nodelist **nlpp; 1246 int savepbq; 1247 union node *n; 1248 char *volatile str; 1249 struct jmploc jmploc; 1250 struct jmploc *volatile savehandler; 1251 int savelen; 1252 int saveprompt; 1253 #if __GNUC__ 1254 /* Avoid longjmp clobbering */ 1255 (void) &saveprompt; 1256 #endif 1257 1258 savepbq = parsebackquote; 1259 if (setjmp(jmploc.loc)) { 1260 if (str) 1261 ckfree(str); 1262 parsebackquote = 0; 1263 handler = savehandler; 1264 longjmp(handler->loc, 1); 1265 } 1266 INTOFF; 1267 str = NULL; 1268 savelen = out - stackblock(); 1269 if (savelen > 0) { 1270 str = ckmalloc(savelen); 1271 memcpy(str, stackblock(), savelen); 1272 } 1273 savehandler = handler; 1274 handler = &jmploc; 1275 INTON; 1276 if (oldstyle) { 1277 /* We must read until the closing backquote, giving special 1278 treatment to some slashes, and then push the string and 1279 reread it as input, interpreting it normally. */ 1280 char *out; 1281 int c; 1282 int savelen; 1283 char *str; 1284 1285 1286 STARTSTACKSTR(out); 1287 for (;;) { 1288 if (needprompt) { 1289 setprompt(2); 1290 needprompt = 0; 1291 } 1292 switch (c = pgetc()) { 1293 case '`': 1294 goto done; 1295 1296 case '\\': 1297 if ((c = pgetc()) == '\n') { 1298 plinno++; 1299 if (doprompt) 1300 setprompt(2); 1301 else 1302 setprompt(0); 1303 /* 1304 * If eating a newline, avoid putting 1305 * the newline into the new character 1306 * stream (via the STPUTC after the 1307 * switch). 1308 */ 1309 continue; 1310 } 1311 if (c != '\\' && c != '`' && c != '$' 1312 && (!dblquote || c != '"')) 1313 STPUTC('\\', out); 1314 break; 1315 1316 case '\n': 1317 plinno++; 1318 needprompt = doprompt; 1319 break; 1320 1321 case PEOF: 1322 startlinno = plinno; 1323 synerror("EOF in backquote substitution"); 1324 break; 1325 1326 default: 1327 break; 1328 } 1329 STPUTC(c, out); 1330 } 1331 done: 1332 STPUTC('\0', out); 1333 savelen = out - stackblock(); 1334 if (savelen > 0) { 1335 str = ckmalloc(savelen); 1336 memcpy(str, stackblock(), savelen); 1337 setinputstring(str, 1); 1338 } 1339 } 1340 nlpp = &bqlist; 1341 while (*nlpp) 1342 nlpp = &(*nlpp)->next; 1343 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 1344 (*nlpp)->next = NULL; 1345 parsebackquote = oldstyle; 1346 1347 if (oldstyle) { 1348 saveprompt = doprompt; 1349 doprompt = 0; 1350 } 1351 1352 n = list(0); 1353 1354 if (oldstyle) 1355 doprompt = saveprompt; 1356 else { 1357 if (readtoken() != TRP) 1358 synexpect(TRP); 1359 } 1360 1361 (*nlpp)->n = n; 1362 if (oldstyle) { 1363 /* 1364 * Start reading from old file again, ignoring any pushed back 1365 * tokens left from the backquote parsing 1366 */ 1367 popfile(); 1368 tokpushback = 0; 1369 } 1370 while (stackblocksize() <= savelen) 1371 growstackblock(); 1372 STARTSTACKSTR(out); 1373 if (str) { 1374 memcpy(out, str, savelen); 1375 STADJUST(savelen, out); 1376 INTOFF; 1377 ckfree(str); 1378 str = NULL; 1379 INTON; 1380 } 1381 parsebackquote = savepbq; 1382 handler = savehandler; 1383 if (arinest || dblquote) 1384 USTPUTC(CTLBACKQ | CTLQUOTE, out); 1385 else 1386 USTPUTC(CTLBACKQ, out); 1387 if (oldstyle) 1388 goto parsebackq_oldreturn; 1389 else 1390 goto parsebackq_newreturn; 1391 } 1392 1393 /* 1394 * Parse an arithmetic expansion (indicate start of one and set state) 1395 */ 1396 parsearith: { 1397 1398 if (++arinest == 1) { 1399 prevsyntax = syntax; 1400 syntax = ARISYNTAX; 1401 USTPUTC(CTLARI, out); 1402 } else { 1403 /* 1404 * we collapse embedded arithmetic expansion to 1405 * parenthesis, which should be equivalent 1406 */ 1407 USTPUTC('(', out); 1408 } 1409 goto parsearith_return; 1410 } 1411 1412 } /* end of readtoken */ 1413 1414 1415 1416 #ifdef mkinit 1417 RESET { 1418 tokpushback = 0; 1419 checkkwd = 0; 1420 } 1421 #endif 1422 1423 /* 1424 * Returns true if the text contains nothing to expand (no dollar signs 1425 * or backquotes). 1426 */ 1427 1428 STATIC int 1429 noexpand(text) 1430 char *text; 1431 { 1432 char *p; 1433 char c; 1434 1435 p = text; 1436 while ((c = *p++) != '\0') { 1437 if (c == CTLESC) 1438 p++; 1439 else if (BASESYNTAX[c] == CCTL) 1440 return 0; 1441 } 1442 return 1; 1443 } 1444 1445 1446 /* 1447 * Return true if the argument is a legal variable name (a letter or 1448 * underscore followed by zero or more letters, underscores, and digits). 1449 */ 1450 1451 int 1452 goodname(name) 1453 char *name; 1454 { 1455 char *p; 1456 1457 p = name; 1458 if (! is_name(*p)) 1459 return 0; 1460 while (*++p) { 1461 if (! is_in_name(*p)) 1462 return 0; 1463 } 1464 return 1; 1465 } 1466 1467 1468 /* 1469 * Called when an unexpected token is read during the parse. The argument 1470 * is the token that is expected, or -1 if more than one type of token can 1471 * occur at this point. 1472 */ 1473 1474 STATIC void 1475 synexpect(token) 1476 int token; 1477 { 1478 char msg[64]; 1479 1480 if (token >= 0) { 1481 fmtstr(msg, 64, "%s unexpected (expecting %s)", 1482 tokname[lasttoken], tokname[token]); 1483 } else { 1484 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]); 1485 } 1486 synerror(msg); 1487 } 1488 1489 1490 STATIC void 1491 synerror(msg) 1492 char *msg; 1493 { 1494 if (commandname) 1495 outfmt(&errout, "%s: %d: ", commandname, startlinno); 1496 outfmt(&errout, "Syntax error: %s\n", msg); 1497 error((char *)NULL); 1498 } 1499 1500 STATIC void 1501 setprompt(which) 1502 int which; 1503 { 1504 whichprompt = which; 1505 1506 #ifndef NO_HISTORY 1507 if (!el) 1508 #endif 1509 out2str(getprompt(NULL)); 1510 } 1511 1512 /* 1513 * called by editline -- any expansions to the prompt 1514 * should be added here. 1515 */ 1516 char * 1517 getprompt(unused) 1518 void *unused __unused; 1519 { 1520 switch (whichprompt) { 1521 case 0: 1522 return ""; 1523 case 1: 1524 return ps1val(); 1525 case 2: 1526 return ps2val(); 1527 default: 1528 return "<internal prompt error>"; 1529 } 1530 } 1531