1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Kenneth Almquist. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #ifndef lint 34 #if 0 35 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; 36 #endif 37 #endif /* not lint */ 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include <stdlib.h> 42 #include <unistd.h> 43 #include <stdio.h> 44 45 #include "shell.h" 46 #include "parser.h" 47 #include "nodes.h" 48 #include "expand.h" /* defines rmescapes() */ 49 #include "syntax.h" 50 #include "options.h" 51 #include "input.h" 52 #include "output.h" 53 #include "var.h" 54 #include "error.h" 55 #include "memalloc.h" 56 #include "mystring.h" 57 #include "alias.h" 58 #include "show.h" 59 #include "eval.h" 60 #include "exec.h" /* to check for special builtins */ 61 #ifndef NO_HISTORY 62 #include "myhistedit.h" 63 #endif 64 65 /* 66 * Shell command parser. 67 */ 68 69 #define PROMPTLEN 128 70 71 /* values of checkkwd variable */ 72 #define CHKALIAS 0x1 73 #define CHKKWD 0x2 74 #define CHKNL 0x4 75 76 /* values returned by readtoken */ 77 #include "token.h" 78 79 80 81 struct heredoc { 82 struct heredoc *next; /* next here document in list */ 83 union node *here; /* redirection node */ 84 char *eofmark; /* string indicating end of input */ 85 int striptabs; /* if set, strip leading tabs */ 86 }; 87 88 struct parser_temp { 89 struct parser_temp *next; 90 void *data; 91 }; 92 93 94 static struct heredoc *heredoclist; /* list of here documents to read */ 95 static int doprompt; /* if set, prompt the user */ 96 static int needprompt; /* true if interactive and at start of line */ 97 static int lasttoken; /* last token read */ 98 static int tokpushback; /* last token pushed back */ 99 static char *wordtext; /* text of last word returned by readtoken */ 100 static int checkkwd; 101 static struct nodelist *backquotelist; 102 static union node *redirnode; 103 static struct heredoc *heredoc; 104 static int quoteflag; /* set if (part of) last token was quoted */ 105 static int startlinno; /* line # where last token started */ 106 static int funclinno; /* line # where the current function started */ 107 static struct parser_temp *parser_temp; 108 109 #define NOEOFMARK ((const char *)&heredoclist) 110 111 112 static union node *list(int); 113 static union node *andor(void); 114 static union node *pipeline(void); 115 static union node *command(void); 116 static union node *simplecmd(union node **, union node *); 117 static union node *makename(void); 118 static union node *makebinary(int type, union node *n1, union node *n2); 119 static void parsefname(void); 120 static void parseheredoc(void); 121 static int peektoken(void); 122 static int readtoken(void); 123 static int xxreadtoken(void); 124 static int readtoken1(int, const char *, const char *, int); 125 static int noexpand(char *); 126 static void consumetoken(int); 127 static void synexpect(int) __dead2; 128 static void synerror(const char *) __dead2; 129 static void setprompt(int); 130 static int pgetc_linecont(void); 131 132 133 static void * 134 parser_temp_alloc(size_t len) 135 { 136 struct parser_temp *t; 137 138 INTOFF; 139 t = ckmalloc(sizeof(*t)); 140 t->data = NULL; 141 t->next = parser_temp; 142 parser_temp = t; 143 t->data = ckmalloc(len); 144 INTON; 145 return t->data; 146 } 147 148 149 static void * 150 parser_temp_realloc(void *ptr, size_t len) 151 { 152 struct parser_temp *t; 153 154 INTOFF; 155 t = parser_temp; 156 if (ptr != t->data) 157 error("bug: parser_temp_realloc misused"); 158 t->data = ckrealloc(t->data, len); 159 INTON; 160 return t->data; 161 } 162 163 164 static void 165 parser_temp_free_upto(void *ptr) 166 { 167 struct parser_temp *t; 168 int done = 0; 169 170 INTOFF; 171 while (parser_temp != NULL && !done) { 172 t = parser_temp; 173 parser_temp = t->next; 174 done = t->data == ptr; 175 ckfree(t->data); 176 ckfree(t); 177 } 178 INTON; 179 if (!done) 180 error("bug: parser_temp_free_upto misused"); 181 } 182 183 184 static void 185 parser_temp_free_all(void) 186 { 187 struct parser_temp *t; 188 189 INTOFF; 190 while (parser_temp != NULL) { 191 t = parser_temp; 192 parser_temp = t->next; 193 ckfree(t->data); 194 ckfree(t); 195 } 196 INTON; 197 } 198 199 200 /* 201 * Read and parse a command. Returns NEOF on end of file. (NULL is a 202 * valid parse tree indicating a blank line.) 203 */ 204 205 union node * 206 parsecmd(int interact) 207 { 208 int t; 209 210 /* This assumes the parser is not re-entered, 211 * which could happen if we add command substitution on PS1/PS2. 212 */ 213 parser_temp_free_all(); 214 heredoclist = NULL; 215 216 tokpushback = 0; 217 checkkwd = 0; 218 doprompt = interact; 219 if (doprompt) 220 setprompt(1); 221 else 222 setprompt(0); 223 needprompt = 0; 224 t = readtoken(); 225 if (t == TEOF) 226 return NEOF; 227 if (t == TNL) 228 return NULL; 229 tokpushback++; 230 return list(1); 231 } 232 233 234 /* 235 * Read and parse words for wordexp. 236 * Returns a list of NARG nodes; NULL if there are no words. 237 */ 238 union node * 239 parsewordexp(void) 240 { 241 union node *n, *first = NULL, **pnext; 242 int t; 243 244 /* This assumes the parser is not re-entered, 245 * which could happen if we add command substitution on PS1/PS2. 246 */ 247 parser_temp_free_all(); 248 heredoclist = NULL; 249 250 tokpushback = 0; 251 checkkwd = 0; 252 doprompt = 0; 253 setprompt(0); 254 needprompt = 0; 255 pnext = &first; 256 while ((t = readtoken()) != TEOF) { 257 if (t != TWORD) 258 synexpect(TWORD); 259 n = makename(); 260 *pnext = n; 261 pnext = &n->narg.next; 262 } 263 return first; 264 } 265 266 267 static union node * 268 list(int nlflag) 269 { 270 union node *ntop, *n1, *n2, *n3; 271 int tok; 272 273 checkkwd = CHKNL | CHKKWD | CHKALIAS; 274 if (!nlflag && tokendlist[peektoken()]) 275 return NULL; 276 ntop = n1 = NULL; 277 for (;;) { 278 n2 = andor(); 279 tok = readtoken(); 280 if (tok == TBACKGND) { 281 if (n2 != NULL && n2->type == NPIPE) { 282 n2->npipe.backgnd = 1; 283 } else if (n2 != NULL && n2->type == NREDIR) { 284 n2->type = NBACKGND; 285 } else { 286 n3 = (union node *)stalloc(sizeof (struct nredir)); 287 n3->type = NBACKGND; 288 n3->nredir.n = n2; 289 n3->nredir.redirect = NULL; 290 n2 = n3; 291 } 292 } 293 if (ntop == NULL) 294 ntop = n2; 295 else if (n1 == NULL) { 296 n1 = makebinary(NSEMI, ntop, n2); 297 ntop = n1; 298 } 299 else { 300 n3 = makebinary(NSEMI, n1->nbinary.ch2, n2); 301 n1->nbinary.ch2 = n3; 302 n1 = n3; 303 } 304 switch (tok) { 305 case TBACKGND: 306 case TSEMI: 307 tok = readtoken(); 308 /* FALLTHROUGH */ 309 case TNL: 310 if (tok == TNL) { 311 parseheredoc(); 312 if (nlflag) 313 return ntop; 314 } else if (tok == TEOF && nlflag) { 315 parseheredoc(); 316 return ntop; 317 } else { 318 tokpushback++; 319 } 320 checkkwd = CHKNL | CHKKWD | CHKALIAS; 321 if (!nlflag && tokendlist[peektoken()]) 322 return ntop; 323 break; 324 case TEOF: 325 if (heredoclist) 326 parseheredoc(); 327 else 328 pungetc(); /* push back EOF on input */ 329 return ntop; 330 default: 331 if (nlflag) 332 synexpect(-1); 333 tokpushback++; 334 return ntop; 335 } 336 } 337 } 338 339 340 341 static union node * 342 andor(void) 343 { 344 union node *n; 345 int t; 346 347 n = pipeline(); 348 for (;;) { 349 if ((t = readtoken()) == TAND) { 350 t = NAND; 351 } else if (t == TOR) { 352 t = NOR; 353 } else { 354 tokpushback++; 355 return n; 356 } 357 n = makebinary(t, n, pipeline()); 358 } 359 } 360 361 362 363 static union node * 364 pipeline(void) 365 { 366 union node *n1, *n2, *pipenode; 367 struct nodelist *lp, *prev; 368 int negate, t; 369 370 negate = 0; 371 checkkwd = CHKNL | CHKKWD | CHKALIAS; 372 TRACE(("pipeline: entered\n")); 373 while (readtoken() == TNOT) 374 negate = !negate; 375 tokpushback++; 376 n1 = command(); 377 if (readtoken() == TPIPE) { 378 pipenode = (union node *)stalloc(sizeof (struct npipe)); 379 pipenode->type = NPIPE; 380 pipenode->npipe.backgnd = 0; 381 lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 382 pipenode->npipe.cmdlist = lp; 383 lp->n = n1; 384 do { 385 prev = lp; 386 lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 387 checkkwd = CHKNL | CHKKWD | CHKALIAS; 388 t = readtoken(); 389 tokpushback++; 390 if (t == TNOT) 391 lp->n = pipeline(); 392 else 393 lp->n = command(); 394 prev->next = lp; 395 } while (readtoken() == TPIPE); 396 lp->next = NULL; 397 n1 = pipenode; 398 } 399 tokpushback++; 400 if (negate) { 401 n2 = (union node *)stalloc(sizeof (struct nnot)); 402 n2->type = NNOT; 403 n2->nnot.com = n1; 404 return n2; 405 } else 406 return n1; 407 } 408 409 410 411 static union node * 412 command(void) 413 { 414 union node *n1, *n2; 415 union node *ap, **app; 416 union node *cp, **cpp; 417 union node *redir, **rpp; 418 int t; 419 int is_subshell; 420 421 checkkwd = CHKNL | CHKKWD | CHKALIAS; 422 is_subshell = 0; 423 redir = NULL; 424 n1 = NULL; 425 rpp = &redir; 426 427 /* Check for redirection which may precede command */ 428 while (readtoken() == TREDIR) { 429 *rpp = n2 = redirnode; 430 rpp = &n2->nfile.next; 431 parsefname(); 432 } 433 tokpushback++; 434 435 switch (readtoken()) { 436 case TIF: 437 n1 = (union node *)stalloc(sizeof (struct nif)); 438 n1->type = NIF; 439 if ((n1->nif.test = list(0)) == NULL) 440 synexpect(-1); 441 consumetoken(TTHEN); 442 n1->nif.ifpart = list(0); 443 n2 = n1; 444 while (readtoken() == TELIF) { 445 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif)); 446 n2 = n2->nif.elsepart; 447 n2->type = NIF; 448 if ((n2->nif.test = list(0)) == NULL) 449 synexpect(-1); 450 consumetoken(TTHEN); 451 n2->nif.ifpart = list(0); 452 } 453 if (lasttoken == TELSE) 454 n2->nif.elsepart = list(0); 455 else { 456 n2->nif.elsepart = NULL; 457 tokpushback++; 458 } 459 consumetoken(TFI); 460 checkkwd = CHKKWD | CHKALIAS; 461 break; 462 case TWHILE: 463 case TUNTIL: 464 t = lasttoken; 465 if ((n1 = list(0)) == NULL) 466 synexpect(-1); 467 consumetoken(TDO); 468 n1 = makebinary((t == TWHILE)? NWHILE : NUNTIL, n1, list(0)); 469 consumetoken(TDONE); 470 checkkwd = CHKKWD | CHKALIAS; 471 break; 472 case TFOR: 473 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext)) 474 synerror("Bad for loop variable"); 475 n1 = (union node *)stalloc(sizeof (struct nfor)); 476 n1->type = NFOR; 477 n1->nfor.var = wordtext; 478 while (readtoken() == TNL) 479 ; 480 if (lasttoken == TWORD && ! quoteflag && equal(wordtext, "in")) { 481 app = ≈ 482 while (readtoken() == TWORD) { 483 n2 = makename(); 484 *app = n2; 485 app = &n2->narg.next; 486 } 487 *app = NULL; 488 n1->nfor.args = ap; 489 if (lasttoken != TNL && lasttoken != TSEMI) 490 synexpect(-1); 491 } else { 492 static char argvars[5] = { 493 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0' 494 }; 495 n2 = (union node *)stalloc(sizeof (struct narg)); 496 n2->type = NARG; 497 n2->narg.text = argvars; 498 n2->narg.backquote = NULL; 499 n2->narg.next = NULL; 500 n1->nfor.args = n2; 501 /* 502 * Newline or semicolon here is optional (but note 503 * that the original Bourne shell only allowed NL). 504 */ 505 if (lasttoken != TNL && lasttoken != TSEMI) 506 tokpushback++; 507 } 508 checkkwd = CHKNL | CHKKWD | CHKALIAS; 509 if ((t = readtoken()) == TDO) 510 t = TDONE; 511 else if (t == TBEGIN) 512 t = TEND; 513 else 514 synexpect(-1); 515 n1->nfor.body = list(0); 516 consumetoken(t); 517 checkkwd = CHKKWD | CHKALIAS; 518 break; 519 case TCASE: 520 n1 = (union node *)stalloc(sizeof (struct ncase)); 521 n1->type = NCASE; 522 consumetoken(TWORD); 523 n1->ncase.expr = makename(); 524 while (readtoken() == TNL); 525 if (lasttoken != TWORD || ! equal(wordtext, "in")) 526 synerror("expecting \"in\""); 527 cpp = &n1->ncase.cases; 528 checkkwd = CHKNL | CHKKWD, readtoken(); 529 while (lasttoken != TESAC) { 530 *cpp = cp = (union node *)stalloc(sizeof (struct nclist)); 531 cp->type = NCLIST; 532 app = &cp->nclist.pattern; 533 if (lasttoken == TLP) 534 readtoken(); 535 for (;;) { 536 *app = ap = makename(); 537 checkkwd = CHKNL | CHKKWD; 538 if (readtoken() != TPIPE) 539 break; 540 app = &ap->narg.next; 541 readtoken(); 542 } 543 ap->narg.next = NULL; 544 if (lasttoken != TRP) 545 synexpect(TRP); 546 cp->nclist.body = list(0); 547 548 checkkwd = CHKNL | CHKKWD | CHKALIAS; 549 if ((t = readtoken()) != TESAC) { 550 if (t == TENDCASE) 551 ; 552 else if (t == TFALLTHRU) 553 cp->type = NCLISTFALLTHRU; 554 else 555 synexpect(TENDCASE); 556 checkkwd = CHKNL | CHKKWD, readtoken(); 557 } 558 cpp = &cp->nclist.next; 559 } 560 *cpp = NULL; 561 checkkwd = CHKKWD | CHKALIAS; 562 break; 563 case TLP: 564 n1 = (union node *)stalloc(sizeof (struct nredir)); 565 n1->type = NSUBSHELL; 566 n1->nredir.n = list(0); 567 n1->nredir.redirect = NULL; 568 consumetoken(TRP); 569 checkkwd = CHKKWD | CHKALIAS; 570 is_subshell = 1; 571 break; 572 case TBEGIN: 573 n1 = list(0); 574 consumetoken(TEND); 575 checkkwd = CHKKWD | CHKALIAS; 576 break; 577 /* A simple command must have at least one redirection or word. */ 578 case TBACKGND: 579 case TSEMI: 580 case TAND: 581 case TOR: 582 case TPIPE: 583 case TENDCASE: 584 case TFALLTHRU: 585 case TEOF: 586 case TNL: 587 case TRP: 588 if (!redir) 589 synexpect(-1); 590 case TWORD: 591 tokpushback++; 592 n1 = simplecmd(rpp, redir); 593 return n1; 594 default: 595 synexpect(-1); 596 } 597 598 /* Now check for redirection which may follow command */ 599 while (readtoken() == TREDIR) { 600 *rpp = n2 = redirnode; 601 rpp = &n2->nfile.next; 602 parsefname(); 603 } 604 tokpushback++; 605 *rpp = NULL; 606 if (redir) { 607 if (!is_subshell) { 608 n2 = (union node *)stalloc(sizeof (struct nredir)); 609 n2->type = NREDIR; 610 n2->nredir.n = n1; 611 n1 = n2; 612 } 613 n1->nredir.redirect = redir; 614 } 615 616 return n1; 617 } 618 619 620 static union node * 621 simplecmd(union node **rpp, union node *redir) 622 { 623 union node *args, **app; 624 union node **orig_rpp = rpp; 625 union node *n = NULL; 626 int special; 627 int savecheckkwd; 628 629 /* If we don't have any redirections already, then we must reset */ 630 /* rpp to be the address of the local redir variable. */ 631 if (redir == 0) 632 rpp = &redir; 633 634 args = NULL; 635 app = &args; 636 /* 637 * We save the incoming value, because we need this for shell 638 * functions. There can not be a redirect or an argument between 639 * the function name and the open parenthesis. 640 */ 641 orig_rpp = rpp; 642 643 savecheckkwd = CHKALIAS; 644 645 for (;;) { 646 checkkwd = savecheckkwd; 647 if (readtoken() == TWORD) { 648 n = makename(); 649 *app = n; 650 app = &n->narg.next; 651 if (savecheckkwd != 0 && !isassignment(wordtext)) 652 savecheckkwd = 0; 653 } else if (lasttoken == TREDIR) { 654 *rpp = n = redirnode; 655 rpp = &n->nfile.next; 656 parsefname(); /* read name of redirection file */ 657 } else if (lasttoken == TLP && app == &args->narg.next 658 && rpp == orig_rpp) { 659 /* We have a function */ 660 consumetoken(TRP); 661 funclinno = plinno; 662 /* 663 * - Require plain text. 664 * - Functions with '/' cannot be called. 665 * - Reject name=(). 666 * - Reject ksh extended glob patterns. 667 */ 668 if (!noexpand(n->narg.text) || quoteflag || 669 strchr(n->narg.text, '/') || 670 strchr("!%*+-=?@}~", 671 n->narg.text[strlen(n->narg.text) - 1])) 672 synerror("Bad function name"); 673 rmescapes(n->narg.text); 674 if (find_builtin(n->narg.text, &special) >= 0 && 675 special) 676 synerror("Cannot override a special builtin with a function"); 677 n->type = NDEFUN; 678 n->narg.next = command(); 679 funclinno = 0; 680 return n; 681 } else { 682 tokpushback++; 683 break; 684 } 685 } 686 *app = NULL; 687 *rpp = NULL; 688 n = (union node *)stalloc(sizeof (struct ncmd)); 689 n->type = NCMD; 690 n->ncmd.args = args; 691 n->ncmd.redirect = redir; 692 return n; 693 } 694 695 static union node * 696 makename(void) 697 { 698 union node *n; 699 700 n = (union node *)stalloc(sizeof (struct narg)); 701 n->type = NARG; 702 n->narg.next = NULL; 703 n->narg.text = wordtext; 704 n->narg.backquote = backquotelist; 705 return n; 706 } 707 708 static union node * 709 makebinary(int type, union node *n1, union node *n2) 710 { 711 union node *n; 712 713 n = (union node *)stalloc(sizeof (struct nbinary)); 714 n->type = type; 715 n->nbinary.ch1 = n1; 716 n->nbinary.ch2 = n2; 717 return (n); 718 } 719 720 void 721 forcealias(void) 722 { 723 checkkwd |= CHKALIAS; 724 } 725 726 void 727 fixredir(union node *n, const char *text, int err) 728 { 729 TRACE(("Fix redir %s %d\n", text, err)); 730 if (!err) 731 n->ndup.vname = NULL; 732 733 if (is_digit(text[0]) && text[1] == '\0') 734 n->ndup.dupfd = digit_val(text[0]); 735 else if (text[0] == '-' && text[1] == '\0') 736 n->ndup.dupfd = -1; 737 else { 738 739 if (err) 740 synerror("Bad fd number"); 741 else 742 n->ndup.vname = makename(); 743 } 744 } 745 746 747 static void 748 parsefname(void) 749 { 750 union node *n = redirnode; 751 752 consumetoken(TWORD); 753 if (n->type == NHERE) { 754 struct heredoc *here = heredoc; 755 struct heredoc *p; 756 757 if (quoteflag == 0) 758 n->type = NXHERE; 759 TRACE(("Here document %d\n", n->type)); 760 if (here->striptabs) { 761 while (*wordtext == '\t') 762 wordtext++; 763 } 764 if (! noexpand(wordtext)) 765 synerror("Illegal eof marker for << redirection"); 766 rmescapes(wordtext); 767 here->eofmark = wordtext; 768 here->next = NULL; 769 if (heredoclist == NULL) 770 heredoclist = here; 771 else { 772 for (p = heredoclist ; p->next ; p = p->next); 773 p->next = here; 774 } 775 } else if (n->type == NTOFD || n->type == NFROMFD) { 776 fixredir(n, wordtext, 0); 777 } else { 778 n->nfile.fname = makename(); 779 } 780 } 781 782 783 /* 784 * Input any here documents. 785 */ 786 787 static void 788 parseheredoc(void) 789 { 790 struct heredoc *here; 791 union node *n; 792 793 while (heredoclist) { 794 here = heredoclist; 795 heredoclist = here->next; 796 if (needprompt) { 797 setprompt(2); 798 needprompt = 0; 799 } 800 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX, 801 here->eofmark, here->striptabs); 802 n = makename(); 803 here->here->nhere.doc = n; 804 } 805 } 806 807 static int 808 peektoken(void) 809 { 810 int t; 811 812 t = readtoken(); 813 tokpushback++; 814 return (t); 815 } 816 817 static int 818 readtoken(void) 819 { 820 int t; 821 struct alias *ap; 822 #ifdef DEBUG 823 int alreadyseen = tokpushback; 824 #endif 825 826 top: 827 t = xxreadtoken(); 828 829 /* 830 * eat newlines 831 */ 832 if (checkkwd & CHKNL) { 833 while (t == TNL) { 834 parseheredoc(); 835 t = xxreadtoken(); 836 } 837 } 838 839 /* 840 * check for keywords and aliases 841 */ 842 if (t == TWORD && !quoteflag) 843 { 844 const char * const *pp; 845 846 if (checkkwd & CHKKWD) 847 for (pp = parsekwd; *pp; pp++) { 848 if (**pp == *wordtext && equal(*pp, wordtext)) 849 { 850 lasttoken = t = pp - parsekwd + KWDOFFSET; 851 TRACE(("keyword %s recognized\n", tokname[t])); 852 goto out; 853 } 854 } 855 if (checkkwd & CHKALIAS && 856 (ap = lookupalias(wordtext, 1)) != NULL) { 857 pushstring(ap->val, strlen(ap->val), ap); 858 goto top; 859 } 860 } 861 out: 862 if (t != TNOT) 863 checkkwd = 0; 864 865 #ifdef DEBUG 866 if (!alreadyseen) 867 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); 868 else 869 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); 870 #endif 871 return (t); 872 } 873 874 875 /* 876 * Read the next input token. 877 * If the token is a word, we set backquotelist to the list of cmds in 878 * backquotes. We set quoteflag to true if any part of the word was 879 * quoted. 880 * If the token is TREDIR, then we set redirnode to a structure containing 881 * the redirection. 882 * In all cases, the variable startlinno is set to the number of the line 883 * on which the token starts. 884 * 885 * [Change comment: here documents and internal procedures] 886 * [Readtoken shouldn't have any arguments. Perhaps we should make the 887 * word parsing code into a separate routine. In this case, readtoken 888 * doesn't need to have any internal procedures, but parseword does. 889 * We could also make parseoperator in essence the main routine, and 890 * have parseword (readtoken1?) handle both words and redirection.] 891 */ 892 893 #define RETURN(token) return lasttoken = token 894 895 static int 896 xxreadtoken(void) 897 { 898 int c; 899 900 if (tokpushback) { 901 tokpushback = 0; 902 return lasttoken; 903 } 904 if (needprompt) { 905 setprompt(2); 906 needprompt = 0; 907 } 908 startlinno = plinno; 909 for (;;) { /* until token or start of word found */ 910 c = pgetc_macro(); 911 switch (c) { 912 case ' ': case '\t': 913 continue; 914 case '#': 915 while ((c = pgetc()) != '\n' && c != PEOF); 916 pungetc(); 917 continue; 918 case '\\': 919 if (pgetc() == '\n') { 920 startlinno = ++plinno; 921 if (doprompt) 922 setprompt(2); 923 else 924 setprompt(0); 925 continue; 926 } 927 pungetc(); 928 /* FALLTHROUGH */ 929 default: 930 return readtoken1(c, BASESYNTAX, (char *)NULL, 0); 931 case '\n': 932 plinno++; 933 needprompt = doprompt; 934 RETURN(TNL); 935 case PEOF: 936 RETURN(TEOF); 937 case '&': 938 if (pgetc_linecont() == '&') 939 RETURN(TAND); 940 pungetc(); 941 RETURN(TBACKGND); 942 case '|': 943 if (pgetc_linecont() == '|') 944 RETURN(TOR); 945 pungetc(); 946 RETURN(TPIPE); 947 case ';': 948 c = pgetc_linecont(); 949 if (c == ';') 950 RETURN(TENDCASE); 951 else if (c == '&') 952 RETURN(TFALLTHRU); 953 pungetc(); 954 RETURN(TSEMI); 955 case '(': 956 RETURN(TLP); 957 case ')': 958 RETURN(TRP); 959 } 960 } 961 #undef RETURN 962 } 963 964 965 #define MAXNEST_static 8 966 struct tokenstate 967 { 968 const char *syntax; /* *SYNTAX */ 969 int parenlevel; /* levels of parentheses in arithmetic */ 970 enum tokenstate_category 971 { 972 TSTATE_TOP, 973 TSTATE_VAR_OLD, /* ${var+-=?}, inherits dquotes */ 974 TSTATE_VAR_NEW, /* other ${var...}, own dquote state */ 975 TSTATE_ARITH 976 } category; 977 }; 978 979 980 /* 981 * Check to see whether we are at the end of the here document. When this 982 * is called, c is set to the first character of the next input line. If 983 * we are at the end of the here document, this routine sets the c to PEOF. 984 * The new value of c is returned. 985 */ 986 987 static int 988 checkend(int c, const char *eofmark, int striptabs) 989 { 990 if (striptabs) { 991 while (c == '\t') 992 c = pgetc(); 993 } 994 if (c == *eofmark) { 995 int c2; 996 const char *q; 997 998 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++) 999 ; 1000 if ((c2 == PEOF || c2 == '\n') && *q == '\0') { 1001 c = PEOF; 1002 if (c2 == '\n') { 1003 plinno++; 1004 needprompt = doprompt; 1005 } 1006 } else { 1007 pungetc(); 1008 pushstring(eofmark + 1, q - (eofmark + 1), NULL); 1009 } 1010 } else if (c == '\n' && *eofmark == '\0') { 1011 c = PEOF; 1012 plinno++; 1013 needprompt = doprompt; 1014 } 1015 return (c); 1016 } 1017 1018 1019 /* 1020 * Parse a redirection operator. The variable "out" points to a string 1021 * specifying the fd to be redirected. The variable "c" contains the 1022 * first character of the redirection operator. 1023 */ 1024 1025 static void 1026 parseredir(char *out, int c) 1027 { 1028 char fd = *out; 1029 union node *np; 1030 1031 np = (union node *)stalloc(sizeof (struct nfile)); 1032 if (c == '>') { 1033 np->nfile.fd = 1; 1034 c = pgetc_linecont(); 1035 if (c == '>') 1036 np->type = NAPPEND; 1037 else if (c == '&') 1038 np->type = NTOFD; 1039 else if (c == '|') 1040 np->type = NCLOBBER; 1041 else { 1042 np->type = NTO; 1043 pungetc(); 1044 } 1045 } else { /* c == '<' */ 1046 np->nfile.fd = 0; 1047 c = pgetc_linecont(); 1048 if (c == '<') { 1049 if (sizeof (struct nfile) != sizeof (struct nhere)) { 1050 np = (union node *)stalloc(sizeof (struct nhere)); 1051 np->nfile.fd = 0; 1052 } 1053 np->type = NHERE; 1054 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc)); 1055 heredoc->here = np; 1056 if ((c = pgetc_linecont()) == '-') { 1057 heredoc->striptabs = 1; 1058 } else { 1059 heredoc->striptabs = 0; 1060 pungetc(); 1061 } 1062 } else if (c == '&') 1063 np->type = NFROMFD; 1064 else if (c == '>') 1065 np->type = NFROMTO; 1066 else { 1067 np->type = NFROM; 1068 pungetc(); 1069 } 1070 } 1071 if (fd != '\0') 1072 np->nfile.fd = digit_val(fd); 1073 redirnode = np; 1074 } 1075 1076 /* 1077 * Called to parse command substitutions. 1078 */ 1079 1080 static char * 1081 parsebackq(char *out, struct nodelist **pbqlist, 1082 int oldstyle, int dblquote, int quoted) 1083 { 1084 struct nodelist **nlpp; 1085 union node *n; 1086 char *volatile str; 1087 struct jmploc jmploc; 1088 struct jmploc *const savehandler = handler; 1089 size_t savelen; 1090 int saveprompt; 1091 const int bq_startlinno = plinno; 1092 char *volatile ostr = NULL; 1093 struct parsefile *const savetopfile = getcurrentfile(); 1094 struct heredoc *const saveheredoclist = heredoclist; 1095 struct heredoc *here; 1096 1097 str = NULL; 1098 if (setjmp(jmploc.loc)) { 1099 popfilesupto(savetopfile); 1100 if (str) 1101 ckfree(str); 1102 if (ostr) 1103 ckfree(ostr); 1104 heredoclist = saveheredoclist; 1105 handler = savehandler; 1106 if (exception == EXERROR) { 1107 startlinno = bq_startlinno; 1108 synerror("Error in command substitution"); 1109 } 1110 longjmp(handler->loc, 1); 1111 } 1112 INTOFF; 1113 savelen = out - stackblock(); 1114 if (savelen > 0) { 1115 str = ckmalloc(savelen); 1116 memcpy(str, stackblock(), savelen); 1117 } 1118 handler = &jmploc; 1119 heredoclist = NULL; 1120 INTON; 1121 if (oldstyle) { 1122 /* We must read until the closing backquote, giving special 1123 treatment to some slashes, and then push the string and 1124 reread it as input, interpreting it normally. */ 1125 char *oout; 1126 int c; 1127 int olen; 1128 1129 1130 STARTSTACKSTR(oout); 1131 for (;;) { 1132 if (needprompt) { 1133 setprompt(2); 1134 needprompt = 0; 1135 } 1136 CHECKSTRSPACE(2, oout); 1137 c = pgetc_linecont(); 1138 if (c == '`') 1139 break; 1140 switch (c) { 1141 case '\\': 1142 c = pgetc(); 1143 if (c != '\\' && c != '`' && c != '$' 1144 && (!dblquote || c != '"')) 1145 USTPUTC('\\', oout); 1146 break; 1147 1148 case '\n': 1149 plinno++; 1150 needprompt = doprompt; 1151 break; 1152 1153 case PEOF: 1154 startlinno = plinno; 1155 synerror("EOF in backquote substitution"); 1156 break; 1157 1158 default: 1159 break; 1160 } 1161 USTPUTC(c, oout); 1162 } 1163 USTPUTC('\0', oout); 1164 olen = oout - stackblock(); 1165 INTOFF; 1166 ostr = ckmalloc(olen); 1167 memcpy(ostr, stackblock(), olen); 1168 setinputstring(ostr, 1); 1169 INTON; 1170 } 1171 nlpp = pbqlist; 1172 while (*nlpp) 1173 nlpp = &(*nlpp)->next; 1174 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 1175 (*nlpp)->next = NULL; 1176 1177 if (oldstyle) { 1178 saveprompt = doprompt; 1179 doprompt = 0; 1180 } 1181 1182 n = list(0); 1183 1184 if (oldstyle) { 1185 if (peektoken() != TEOF) 1186 synexpect(-1); 1187 doprompt = saveprompt; 1188 } else 1189 consumetoken(TRP); 1190 1191 (*nlpp)->n = n; 1192 if (oldstyle) { 1193 /* 1194 * Start reading from old file again, ignoring any pushed back 1195 * tokens left from the backquote parsing 1196 */ 1197 popfile(); 1198 tokpushback = 0; 1199 } 1200 STARTSTACKSTR(out); 1201 CHECKSTRSPACE(savelen + 1, out); 1202 INTOFF; 1203 if (str) { 1204 memcpy(out, str, savelen); 1205 STADJUST(savelen, out); 1206 ckfree(str); 1207 str = NULL; 1208 } 1209 if (ostr) { 1210 ckfree(ostr); 1211 ostr = NULL; 1212 } 1213 here = saveheredoclist; 1214 if (here != NULL) { 1215 while (here->next != NULL) 1216 here = here->next; 1217 here->next = heredoclist; 1218 heredoclist = saveheredoclist; 1219 } 1220 handler = savehandler; 1221 INTON; 1222 if (quoted) 1223 USTPUTC(CTLBACKQ | CTLQUOTE, out); 1224 else 1225 USTPUTC(CTLBACKQ, out); 1226 return out; 1227 } 1228 1229 1230 /* 1231 * Called to parse a backslash escape sequence inside $'...'. 1232 * The backslash has already been read. 1233 */ 1234 static char * 1235 readcstyleesc(char *out) 1236 { 1237 int c, vc, i, n; 1238 unsigned int v; 1239 1240 c = pgetc(); 1241 switch (c) { 1242 case '\0': 1243 synerror("Unterminated quoted string"); 1244 case '\n': 1245 plinno++; 1246 if (doprompt) 1247 setprompt(2); 1248 else 1249 setprompt(0); 1250 return out; 1251 case '\\': 1252 case '\'': 1253 case '"': 1254 v = c; 1255 break; 1256 case 'a': v = '\a'; break; 1257 case 'b': v = '\b'; break; 1258 case 'e': v = '\033'; break; 1259 case 'f': v = '\f'; break; 1260 case 'n': v = '\n'; break; 1261 case 'r': v = '\r'; break; 1262 case 't': v = '\t'; break; 1263 case 'v': v = '\v'; break; 1264 case 'x': 1265 v = 0; 1266 for (;;) { 1267 c = pgetc(); 1268 if (c >= '0' && c <= '9') 1269 v = (v << 4) + c - '0'; 1270 else if (c >= 'A' && c <= 'F') 1271 v = (v << 4) + c - 'A' + 10; 1272 else if (c >= 'a' && c <= 'f') 1273 v = (v << 4) + c - 'a' + 10; 1274 else 1275 break; 1276 } 1277 pungetc(); 1278 break; 1279 case '0': case '1': case '2': case '3': 1280 case '4': case '5': case '6': case '7': 1281 v = c - '0'; 1282 c = pgetc(); 1283 if (c >= '0' && c <= '7') { 1284 v <<= 3; 1285 v += c - '0'; 1286 c = pgetc(); 1287 if (c >= '0' && c <= '7') { 1288 v <<= 3; 1289 v += c - '0'; 1290 } else 1291 pungetc(); 1292 } else 1293 pungetc(); 1294 break; 1295 case 'c': 1296 c = pgetc(); 1297 if (c < 0x3f || c > 0x7a || c == 0x60) 1298 synerror("Bad escape sequence"); 1299 if (c == '\\' && pgetc() != '\\') 1300 synerror("Bad escape sequence"); 1301 if (c == '?') 1302 v = 127; 1303 else 1304 v = c & 0x1f; 1305 break; 1306 case 'u': 1307 case 'U': 1308 n = c == 'U' ? 8 : 4; 1309 v = 0; 1310 for (i = 0; i < n; i++) { 1311 c = pgetc(); 1312 if (c >= '0' && c <= '9') 1313 v = (v << 4) + c - '0'; 1314 else if (c >= 'A' && c <= 'F') 1315 v = (v << 4) + c - 'A' + 10; 1316 else if (c >= 'a' && c <= 'f') 1317 v = (v << 4) + c - 'a' + 10; 1318 else 1319 synerror("Bad escape sequence"); 1320 } 1321 if (v == 0 || (v >= 0xd800 && v <= 0xdfff)) 1322 synerror("Bad escape sequence"); 1323 /* We really need iconv here. */ 1324 if (initial_localeisutf8 && v > 127) { 1325 CHECKSTRSPACE(4, out); 1326 /* 1327 * We cannot use wctomb() as the locale may have 1328 * changed. 1329 */ 1330 if (v <= 0x7ff) { 1331 USTPUTC(0xc0 | v >> 6, out); 1332 USTPUTC(0x80 | (v & 0x3f), out); 1333 return out; 1334 } else if (v <= 0xffff) { 1335 USTPUTC(0xe0 | v >> 12, out); 1336 USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1337 USTPUTC(0x80 | (v & 0x3f), out); 1338 return out; 1339 } else if (v <= 0x10ffff) { 1340 USTPUTC(0xf0 | v >> 18, out); 1341 USTPUTC(0x80 | ((v >> 12) & 0x3f), out); 1342 USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1343 USTPUTC(0x80 | (v & 0x3f), out); 1344 return out; 1345 } 1346 } 1347 if (v > 127) 1348 v = '?'; 1349 break; 1350 default: 1351 synerror("Bad escape sequence"); 1352 } 1353 vc = (char)v; 1354 /* 1355 * We can't handle NUL bytes. 1356 * POSIX says we should skip till the closing quote. 1357 */ 1358 if (vc == '\0') { 1359 while ((c = pgetc()) != '\'') { 1360 if (c == '\\') 1361 c = pgetc(); 1362 if (c == PEOF) 1363 synerror("Unterminated quoted string"); 1364 if (c == '\n') { 1365 plinno++; 1366 if (doprompt) 1367 setprompt(2); 1368 else 1369 setprompt(0); 1370 } 1371 } 1372 pungetc(); 1373 return out; 1374 } 1375 if (SQSYNTAX[vc] == CCTL) 1376 USTPUTC(CTLESC, out); 1377 USTPUTC(vc, out); 1378 return out; 1379 } 1380 1381 1382 /* 1383 * If eofmark is NULL, read a word or a redirection symbol. If eofmark 1384 * is not NULL, read a here document. In the latter case, eofmark is the 1385 * word which marks the end of the document and striptabs is true if 1386 * leading tabs should be stripped from the document. The argument firstc 1387 * is the first character of the input token or document. 1388 * 1389 * Because C does not have internal subroutines, I have simulated them 1390 * using goto's to implement the subroutine linkage. The following macros 1391 * will run code that appears at the end of readtoken1. 1392 */ 1393 1394 #define PARSESUB() {goto parsesub; parsesub_return:;} 1395 #define PARSEARITH() {goto parsearith; parsearith_return:;} 1396 1397 static int 1398 readtoken1(int firstc, char const *initialsyntax, const char *eofmark, 1399 int striptabs) 1400 { 1401 int c = firstc; 1402 char *out; 1403 int len; 1404 struct nodelist *bqlist; 1405 int quotef; 1406 int newvarnest; 1407 int level; 1408 int synentry; 1409 struct tokenstate state_static[MAXNEST_static]; 1410 int maxnest = MAXNEST_static; 1411 struct tokenstate *state = state_static; 1412 int sqiscstyle = 0; 1413 1414 startlinno = plinno; 1415 quotef = 0; 1416 bqlist = NULL; 1417 newvarnest = 0; 1418 level = 0; 1419 state[level].syntax = initialsyntax; 1420 state[level].parenlevel = 0; 1421 state[level].category = TSTATE_TOP; 1422 1423 STARTSTACKSTR(out); 1424 loop: { /* for each line, until end of word */ 1425 if (eofmark && eofmark != NOEOFMARK) 1426 /* set c to PEOF if at end of here document */ 1427 c = checkend(c, eofmark, striptabs); 1428 for (;;) { /* until end of line or end of word */ 1429 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ 1430 1431 synentry = state[level].syntax[c]; 1432 1433 switch(synentry) { 1434 case CNL: /* '\n' */ 1435 if (state[level].syntax == BASESYNTAX) 1436 goto endword; /* exit outer loop */ 1437 USTPUTC(c, out); 1438 plinno++; 1439 if (doprompt) 1440 setprompt(2); 1441 else 1442 setprompt(0); 1443 c = pgetc(); 1444 goto loop; /* continue outer loop */ 1445 case CSBACK: 1446 if (sqiscstyle) { 1447 out = readcstyleesc(out); 1448 break; 1449 } 1450 /* FALLTHROUGH */ 1451 case CWORD: 1452 USTPUTC(c, out); 1453 break; 1454 case CCTL: 1455 if (eofmark == NULL || initialsyntax != SQSYNTAX) 1456 USTPUTC(CTLESC, out); 1457 USTPUTC(c, out); 1458 break; 1459 case CBACK: /* backslash */ 1460 c = pgetc(); 1461 if (c == PEOF) { 1462 USTPUTC('\\', out); 1463 pungetc(); 1464 } else if (c == '\n') { 1465 plinno++; 1466 if (doprompt) 1467 setprompt(2); 1468 else 1469 setprompt(0); 1470 } else { 1471 if (state[level].syntax == DQSYNTAX && 1472 c != '\\' && c != '`' && c != '$' && 1473 (c != '"' || (eofmark != NULL && 1474 newvarnest == 0)) && 1475 (c != '}' || state[level].category != TSTATE_VAR_OLD)) 1476 USTPUTC('\\', out); 1477 if ((eofmark == NULL || 1478 newvarnest > 0) && 1479 state[level].syntax == BASESYNTAX) 1480 USTPUTC(CTLQUOTEMARK, out); 1481 if (SQSYNTAX[c] == CCTL) 1482 USTPUTC(CTLESC, out); 1483 USTPUTC(c, out); 1484 if ((eofmark == NULL || 1485 newvarnest > 0) && 1486 state[level].syntax == BASESYNTAX && 1487 state[level].category == TSTATE_VAR_OLD) 1488 USTPUTC(CTLQUOTEEND, out); 1489 quotef++; 1490 } 1491 break; 1492 case CSQUOTE: 1493 USTPUTC(CTLQUOTEMARK, out); 1494 state[level].syntax = SQSYNTAX; 1495 sqiscstyle = 0; 1496 break; 1497 case CDQUOTE: 1498 USTPUTC(CTLQUOTEMARK, out); 1499 state[level].syntax = DQSYNTAX; 1500 break; 1501 case CENDQUOTE: 1502 if (eofmark != NULL && newvarnest == 0) 1503 USTPUTC(c, out); 1504 else { 1505 if (state[level].category == TSTATE_VAR_OLD) 1506 USTPUTC(CTLQUOTEEND, out); 1507 state[level].syntax = BASESYNTAX; 1508 quotef++; 1509 } 1510 break; 1511 case CVAR: /* '$' */ 1512 PARSESUB(); /* parse substitution */ 1513 break; 1514 case CENDVAR: /* '}' */ 1515 if (level > 0 && 1516 ((state[level].category == TSTATE_VAR_OLD && 1517 state[level].syntax == 1518 state[level - 1].syntax) || 1519 (state[level].category == TSTATE_VAR_NEW && 1520 state[level].syntax == BASESYNTAX))) { 1521 if (state[level].category == TSTATE_VAR_NEW) 1522 newvarnest--; 1523 level--; 1524 USTPUTC(CTLENDVAR, out); 1525 } else { 1526 USTPUTC(c, out); 1527 } 1528 break; 1529 case CLP: /* '(' in arithmetic */ 1530 state[level].parenlevel++; 1531 USTPUTC(c, out); 1532 break; 1533 case CRP: /* ')' in arithmetic */ 1534 if (state[level].parenlevel > 0) { 1535 USTPUTC(c, out); 1536 --state[level].parenlevel; 1537 } else { 1538 if (pgetc_linecont() == ')') { 1539 if (level > 0 && 1540 state[level].category == TSTATE_ARITH) { 1541 level--; 1542 USTPUTC(CTLENDARI, out); 1543 } else 1544 USTPUTC(')', out); 1545 } else { 1546 /* 1547 * unbalanced parens 1548 * (don't 2nd guess - no error) 1549 */ 1550 pungetc(); 1551 USTPUTC(')', out); 1552 } 1553 } 1554 break; 1555 case CBQUOTE: /* '`' */ 1556 out = parsebackq(out, &bqlist, 1, 1557 state[level].syntax == DQSYNTAX && 1558 (eofmark == NULL || newvarnest > 0), 1559 state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX); 1560 break; 1561 case CEOF: 1562 goto endword; /* exit outer loop */ 1563 case CIGN: 1564 break; 1565 default: 1566 if (level == 0) 1567 goto endword; /* exit outer loop */ 1568 USTPUTC(c, out); 1569 } 1570 c = pgetc_macro(); 1571 } 1572 } 1573 endword: 1574 if (state[level].syntax == ARISYNTAX) 1575 synerror("Missing '))'"); 1576 if (state[level].syntax != BASESYNTAX && eofmark == NULL) 1577 synerror("Unterminated quoted string"); 1578 if (state[level].category == TSTATE_VAR_OLD || 1579 state[level].category == TSTATE_VAR_NEW) { 1580 startlinno = plinno; 1581 synerror("Missing '}'"); 1582 } 1583 if (state != state_static) 1584 parser_temp_free_upto(state); 1585 USTPUTC('\0', out); 1586 len = out - stackblock(); 1587 out = stackblock(); 1588 if (eofmark == NULL) { 1589 if ((c == '>' || c == '<') 1590 && quotef == 0 1591 && len <= 2 1592 && (*out == '\0' || is_digit(*out))) { 1593 parseredir(out, c); 1594 return lasttoken = TREDIR; 1595 } else { 1596 pungetc(); 1597 } 1598 } 1599 quoteflag = quotef; 1600 backquotelist = bqlist; 1601 grabstackblock(len); 1602 wordtext = out; 1603 return lasttoken = TWORD; 1604 /* end of readtoken routine */ 1605 1606 1607 /* 1608 * Parse a substitution. At this point, we have read the dollar sign 1609 * and nothing else. 1610 */ 1611 1612 parsesub: { 1613 char buf[10]; 1614 int subtype; 1615 int typeloc; 1616 int flags; 1617 char *p; 1618 static const char types[] = "}-+?="; 1619 int bracketed_name = 0; /* used to handle ${[0-9]*} variables */ 1620 int linno; 1621 int length; 1622 int c1; 1623 1624 c = pgetc_linecont(); 1625 if (c == '(') { /* $(command) or $((arith)) */ 1626 if (pgetc_linecont() == '(') { 1627 PARSEARITH(); 1628 } else { 1629 pungetc(); 1630 out = parsebackq(out, &bqlist, 0, 1631 state[level].syntax == DQSYNTAX && 1632 (eofmark == NULL || newvarnest > 0), 1633 state[level].syntax == DQSYNTAX || 1634 state[level].syntax == ARISYNTAX); 1635 } 1636 } else if (c == '{' || is_name(c) || is_special(c)) { 1637 USTPUTC(CTLVAR, out); 1638 typeloc = out - stackblock(); 1639 USTPUTC(VSNORMAL, out); 1640 subtype = VSNORMAL; 1641 flags = 0; 1642 if (c == '{') { 1643 bracketed_name = 1; 1644 c = pgetc_linecont(); 1645 subtype = 0; 1646 } 1647 varname: 1648 if (!is_eof(c) && is_name(c)) { 1649 length = 0; 1650 do { 1651 STPUTC(c, out); 1652 c = pgetc_linecont(); 1653 length++; 1654 } while (!is_eof(c) && is_in_name(c)); 1655 if (length == 6 && 1656 strncmp(out - length, "LINENO", length) == 0) { 1657 /* Replace the variable name with the 1658 * current line number. */ 1659 linno = plinno; 1660 if (funclinno != 0) 1661 linno -= funclinno - 1; 1662 snprintf(buf, sizeof(buf), "%d", linno); 1663 STADJUST(-6, out); 1664 STPUTS(buf, out); 1665 flags |= VSLINENO; 1666 } 1667 } else if (is_digit(c)) { 1668 if (bracketed_name) { 1669 do { 1670 STPUTC(c, out); 1671 c = pgetc_linecont(); 1672 } while (is_digit(c)); 1673 } else { 1674 STPUTC(c, out); 1675 c = pgetc_linecont(); 1676 } 1677 } else if (is_special(c)) { 1678 c1 = c; 1679 c = pgetc_linecont(); 1680 if (subtype == 0 && c1 == '#') { 1681 subtype = VSLENGTH; 1682 if (strchr(types, c) == NULL && c != ':' && 1683 c != '#' && c != '%') 1684 goto varname; 1685 c1 = c; 1686 c = pgetc_linecont(); 1687 if (c1 != '}' && c == '}') { 1688 pungetc(); 1689 c = c1; 1690 goto varname; 1691 } 1692 pungetc(); 1693 c = c1; 1694 c1 = '#'; 1695 subtype = 0; 1696 } 1697 USTPUTC(c1, out); 1698 } else { 1699 subtype = VSERROR; 1700 if (c == '}') 1701 pungetc(); 1702 else if (c == '\n' || c == PEOF) 1703 synerror("Unexpected end of line in substitution"); 1704 else if (BASESYNTAX[c] != CCTL) 1705 USTPUTC(c, out); 1706 } 1707 if (subtype == 0) { 1708 switch (c) { 1709 case ':': 1710 flags |= VSNUL; 1711 c = pgetc_linecont(); 1712 /*FALLTHROUGH*/ 1713 default: 1714 p = strchr(types, c); 1715 if (p == NULL) { 1716 if (c == '\n' || c == PEOF) 1717 synerror("Unexpected end of line in substitution"); 1718 if (flags == VSNUL) 1719 STPUTC(':', out); 1720 if (BASESYNTAX[c] != CCTL) 1721 STPUTC(c, out); 1722 subtype = VSERROR; 1723 } else 1724 subtype = p - types + VSNORMAL; 1725 break; 1726 case '%': 1727 case '#': 1728 { 1729 int cc = c; 1730 subtype = c == '#' ? VSTRIMLEFT : 1731 VSTRIMRIGHT; 1732 c = pgetc_linecont(); 1733 if (c == cc) 1734 subtype++; 1735 else 1736 pungetc(); 1737 break; 1738 } 1739 } 1740 } else if (subtype != VSERROR) { 1741 if (subtype == VSLENGTH && c != '}') 1742 subtype = VSERROR; 1743 pungetc(); 1744 } 1745 STPUTC('=', out); 1746 if (state[level].syntax == DQSYNTAX || 1747 state[level].syntax == ARISYNTAX) 1748 flags |= VSQUOTE; 1749 *(stackblock() + typeloc) = subtype | flags; 1750 if (subtype != VSNORMAL) { 1751 if (level + 1 >= maxnest) { 1752 maxnest *= 2; 1753 if (state == state_static) { 1754 state = parser_temp_alloc( 1755 maxnest * sizeof(*state)); 1756 memcpy(state, state_static, 1757 MAXNEST_static * sizeof(*state)); 1758 } else 1759 state = parser_temp_realloc(state, 1760 maxnest * sizeof(*state)); 1761 } 1762 level++; 1763 state[level].parenlevel = 0; 1764 if (subtype == VSMINUS || subtype == VSPLUS || 1765 subtype == VSQUESTION || subtype == VSASSIGN) { 1766 /* 1767 * For operators that were in the Bourne shell, 1768 * inherit the double-quote state. 1769 */ 1770 state[level].syntax = state[level - 1].syntax; 1771 state[level].category = TSTATE_VAR_OLD; 1772 } else { 1773 /* 1774 * The other operators take a pattern, 1775 * so go to BASESYNTAX. 1776 * Also, ' and " are now special, even 1777 * in here documents. 1778 */ 1779 state[level].syntax = BASESYNTAX; 1780 state[level].category = TSTATE_VAR_NEW; 1781 newvarnest++; 1782 } 1783 } 1784 } else if (c == '\'' && state[level].syntax == BASESYNTAX) { 1785 /* $'cstylequotes' */ 1786 USTPUTC(CTLQUOTEMARK, out); 1787 state[level].syntax = SQSYNTAX; 1788 sqiscstyle = 1; 1789 } else { 1790 USTPUTC('$', out); 1791 pungetc(); 1792 } 1793 goto parsesub_return; 1794 } 1795 1796 1797 /* 1798 * Parse an arithmetic expansion (indicate start of one and set state) 1799 */ 1800 parsearith: { 1801 1802 if (level + 1 >= maxnest) { 1803 maxnest *= 2; 1804 if (state == state_static) { 1805 state = parser_temp_alloc( 1806 maxnest * sizeof(*state)); 1807 memcpy(state, state_static, 1808 MAXNEST_static * sizeof(*state)); 1809 } else 1810 state = parser_temp_realloc(state, 1811 maxnest * sizeof(*state)); 1812 } 1813 level++; 1814 state[level].syntax = ARISYNTAX; 1815 state[level].parenlevel = 0; 1816 state[level].category = TSTATE_ARITH; 1817 USTPUTC(CTLARI, out); 1818 if (state[level - 1].syntax == DQSYNTAX) 1819 USTPUTC('"',out); 1820 else 1821 USTPUTC(' ',out); 1822 goto parsearith_return; 1823 } 1824 1825 } /* end of readtoken */ 1826 1827 1828 /* 1829 * Returns true if the text contains nothing to expand (no dollar signs 1830 * or backquotes). 1831 */ 1832 1833 static int 1834 noexpand(char *text) 1835 { 1836 char *p; 1837 char c; 1838 1839 p = text; 1840 while ((c = *p++) != '\0') { 1841 if ( c == CTLQUOTEMARK) 1842 continue; 1843 if (c == CTLESC) 1844 p++; 1845 else if (BASESYNTAX[(int)c] == CCTL) 1846 return 0; 1847 } 1848 return 1; 1849 } 1850 1851 1852 /* 1853 * Return true if the argument is a legal variable name (a letter or 1854 * underscore followed by zero or more letters, underscores, and digits). 1855 */ 1856 1857 int 1858 goodname(const char *name) 1859 { 1860 const char *p; 1861 1862 p = name; 1863 if (! is_name(*p)) 1864 return 0; 1865 while (*++p) { 1866 if (! is_in_name(*p)) 1867 return 0; 1868 } 1869 return 1; 1870 } 1871 1872 1873 int 1874 isassignment(const char *p) 1875 { 1876 if (!is_name(*p)) 1877 return 0; 1878 p++; 1879 for (;;) { 1880 if (*p == '=') 1881 return 1; 1882 else if (!is_in_name(*p)) 1883 return 0; 1884 p++; 1885 } 1886 } 1887 1888 1889 static void 1890 consumetoken(int token) 1891 { 1892 if (readtoken() != token) 1893 synexpect(token); 1894 } 1895 1896 1897 /* 1898 * Called when an unexpected token is read during the parse. The argument 1899 * is the token that is expected, or -1 if more than one type of token can 1900 * occur at this point. 1901 */ 1902 1903 static void 1904 synexpect(int token) 1905 { 1906 char msg[64]; 1907 1908 if (token >= 0) { 1909 fmtstr(msg, 64, "%s unexpected (expecting %s)", 1910 tokname[lasttoken], tokname[token]); 1911 } else { 1912 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]); 1913 } 1914 synerror(msg); 1915 } 1916 1917 1918 static void 1919 synerror(const char *msg) 1920 { 1921 if (commandname) 1922 outfmt(out2, "%s: %d: ", commandname, startlinno); 1923 else if (arg0) 1924 outfmt(out2, "%s: ", arg0); 1925 outfmt(out2, "Syntax error: %s\n", msg); 1926 error((char *)NULL); 1927 } 1928 1929 static void 1930 setprompt(int which) 1931 { 1932 whichprompt = which; 1933 if (which == 0) 1934 return; 1935 1936 #ifndef NO_HISTORY 1937 if (!el) 1938 #endif 1939 { 1940 out2str(getprompt(NULL)); 1941 flushout(out2); 1942 } 1943 } 1944 1945 static int 1946 pgetc_linecont(void) 1947 { 1948 int c; 1949 1950 while ((c = pgetc_macro()) == '\\') { 1951 c = pgetc(); 1952 if (c == '\n') { 1953 plinno++; 1954 if (doprompt) 1955 setprompt(2); 1956 else 1957 setprompt(0); 1958 } else { 1959 pungetc(); 1960 /* Allow the backslash to be pushed back. */ 1961 pushstring("\\", 1, NULL); 1962 return (pgetc()); 1963 } 1964 } 1965 return (c); 1966 } 1967 1968 /* 1969 * called by editline -- any expansions to the prompt 1970 * should be added here. 1971 */ 1972 char * 1973 getprompt(void *unused __unused) 1974 { 1975 static char ps[PROMPTLEN]; 1976 const char *fmt; 1977 const char *pwd; 1978 int i, trim; 1979 static char internal_error[] = "??"; 1980 1981 /* 1982 * Select prompt format. 1983 */ 1984 switch (whichprompt) { 1985 case 0: 1986 fmt = ""; 1987 break; 1988 case 1: 1989 fmt = ps1val(); 1990 break; 1991 case 2: 1992 fmt = ps2val(); 1993 break; 1994 default: 1995 return internal_error; 1996 } 1997 1998 /* 1999 * Format prompt string. 2000 */ 2001 for (i = 0; (i < 127) && (*fmt != '\0'); i++, fmt++) 2002 if (*fmt == '\\') 2003 switch (*++fmt) { 2004 2005 /* 2006 * Hostname. 2007 * 2008 * \h specifies just the local hostname, 2009 * \H specifies fully-qualified hostname. 2010 */ 2011 case 'h': 2012 case 'H': 2013 ps[i] = '\0'; 2014 gethostname(&ps[i], PROMPTLEN - i); 2015 /* Skip to end of hostname. */ 2016 trim = (*fmt == 'h') ? '.' : '\0'; 2017 while ((ps[i+1] != '\0') && (ps[i+1] != trim)) 2018 i++; 2019 break; 2020 2021 /* 2022 * Working directory. 2023 * 2024 * \W specifies just the final component, 2025 * \w specifies the entire path. 2026 */ 2027 case 'W': 2028 case 'w': 2029 pwd = lookupvar("PWD"); 2030 if (pwd == NULL) 2031 pwd = "?"; 2032 if (*fmt == 'W' && 2033 *pwd == '/' && pwd[1] != '\0') 2034 strlcpy(&ps[i], strrchr(pwd, '/') + 1, 2035 PROMPTLEN - i); 2036 else 2037 strlcpy(&ps[i], pwd, PROMPTLEN - i); 2038 /* Skip to end of path. */ 2039 while (ps[i + 1] != '\0') 2040 i++; 2041 break; 2042 2043 /* 2044 * Superuser status. 2045 * 2046 * '$' for normal users, '#' for root. 2047 */ 2048 case '$': 2049 ps[i] = (geteuid() != 0) ? '$' : '#'; 2050 break; 2051 2052 /* 2053 * A literal \. 2054 */ 2055 case '\\': 2056 ps[i] = '\\'; 2057 break; 2058 2059 /* 2060 * Emit unrecognized formats verbatim. 2061 */ 2062 default: 2063 ps[i++] = '\\'; 2064 ps[i] = *fmt; 2065 break; 2066 } 2067 else 2068 ps[i] = *fmt; 2069 ps[i] = '\0'; 2070 return (ps); 2071 } 2072 2073 2074 const char * 2075 expandstr(const char *ps) 2076 { 2077 union node n; 2078 struct jmploc jmploc; 2079 struct jmploc *const savehandler = handler; 2080 const int saveprompt = doprompt; 2081 struct parsefile *const savetopfile = getcurrentfile(); 2082 struct parser_temp *const saveparser_temp = parser_temp; 2083 const char *result = NULL; 2084 2085 if (!setjmp(jmploc.loc)) { 2086 handler = &jmploc; 2087 parser_temp = NULL; 2088 setinputstring(ps, 1); 2089 doprompt = 0; 2090 readtoken1(pgetc(), DQSYNTAX, NOEOFMARK, 0); 2091 if (backquotelist != NULL) 2092 error("Command substitution not allowed here"); 2093 2094 n.narg.type = NARG; 2095 n.narg.next = NULL; 2096 n.narg.text = wordtext; 2097 n.narg.backquote = backquotelist; 2098 2099 expandarg(&n, NULL, 0); 2100 result = stackblock(); 2101 INTOFF; 2102 } 2103 handler = savehandler; 2104 doprompt = saveprompt; 2105 popfilesupto(savetopfile); 2106 if (parser_temp != saveparser_temp) { 2107 parser_temp_free_all(); 2108 parser_temp = saveparser_temp; 2109 } 2110 if (result != NULL) { 2111 INTON; 2112 } else if (exception == EXINT) 2113 raise(SIGINT); 2114 return result; 2115 } 2116