1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Kenneth Almquist. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #ifndef lint 36 #if 0 37 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; 38 #endif 39 #endif /* not lint */ 40 #include <sys/cdefs.h> 41 __FBSDID("$FreeBSD$"); 42 43 #include <sys/param.h> 44 #include <pwd.h> 45 #include <stdlib.h> 46 #include <unistd.h> 47 #include <stdio.h> 48 49 #include "shell.h" 50 #include "parser.h" 51 #include "nodes.h" 52 #include "expand.h" /* defines rmescapes() */ 53 #include "syntax.h" 54 #include "options.h" 55 #include "input.h" 56 #include "output.h" 57 #include "var.h" 58 #include "error.h" 59 #include "memalloc.h" 60 #include "mystring.h" 61 #include "alias.h" 62 #include "show.h" 63 #include "eval.h" 64 #include "exec.h" /* to check for special builtins */ 65 #ifndef NO_HISTORY 66 #include "myhistedit.h" 67 #endif 68 69 /* 70 * Shell command parser. 71 */ 72 73 #define PROMPTLEN 192 74 75 /* values of checkkwd variable */ 76 #define CHKALIAS 0x1 77 #define CHKKWD 0x2 78 #define CHKNL 0x4 79 80 /* values returned by readtoken */ 81 #include "token.h" 82 83 84 85 struct heredoc { 86 struct heredoc *next; /* next here document in list */ 87 union node *here; /* redirection node */ 88 char *eofmark; /* string indicating end of input */ 89 int striptabs; /* if set, strip leading tabs */ 90 }; 91 92 struct parser_temp { 93 struct parser_temp *next; 94 void *data; 95 }; 96 97 98 static struct heredoc *heredoclist; /* list of here documents to read */ 99 static int doprompt; /* if set, prompt the user */ 100 static int needprompt; /* true if interactive and at start of line */ 101 static int lasttoken; /* last token read */ 102 static int tokpushback; /* last token pushed back */ 103 static char *wordtext; /* text of last word returned by readtoken */ 104 static int checkkwd; 105 static struct nodelist *backquotelist; 106 static union node *redirnode; 107 static struct heredoc *heredoc; 108 static int quoteflag; /* set if (part of) last token was quoted */ 109 static int startlinno; /* line # where last token started */ 110 static int funclinno; /* line # where the current function started */ 111 static struct parser_temp *parser_temp; 112 113 #define NOEOFMARK ((const char *)&heredoclist) 114 115 116 static union node *list(int); 117 static union node *andor(void); 118 static union node *pipeline(void); 119 static union node *command(void); 120 static union node *simplecmd(union node **, union node *); 121 static union node *makename(void); 122 static union node *makebinary(int type, union node *n1, union node *n2); 123 static void parsefname(void); 124 static void parseheredoc(void); 125 static int peektoken(void); 126 static int readtoken(void); 127 static int xxreadtoken(void); 128 static int readtoken1(int, const char *, const char *, int); 129 static int noexpand(char *); 130 static void consumetoken(int); 131 static void synexpect(int) __dead2; 132 static void synerror(const char *) __dead2; 133 static void setprompt(int); 134 static int pgetc_linecont(void); 135 static void getusername(char *, size_t); 136 137 138 static void * 139 parser_temp_alloc(size_t len) 140 { 141 struct parser_temp *t; 142 143 INTOFF; 144 t = ckmalloc(sizeof(*t)); 145 t->data = NULL; 146 t->next = parser_temp; 147 parser_temp = t; 148 t->data = ckmalloc(len); 149 INTON; 150 return t->data; 151 } 152 153 154 static void * 155 parser_temp_realloc(void *ptr, size_t len) 156 { 157 struct parser_temp *t; 158 159 INTOFF; 160 t = parser_temp; 161 if (ptr != t->data) 162 error("bug: parser_temp_realloc misused"); 163 t->data = ckrealloc(t->data, len); 164 INTON; 165 return t->data; 166 } 167 168 169 static void 170 parser_temp_free_upto(void *ptr) 171 { 172 struct parser_temp *t; 173 int done = 0; 174 175 INTOFF; 176 while (parser_temp != NULL && !done) { 177 t = parser_temp; 178 parser_temp = t->next; 179 done = t->data == ptr; 180 ckfree(t->data); 181 ckfree(t); 182 } 183 INTON; 184 if (!done) 185 error("bug: parser_temp_free_upto misused"); 186 } 187 188 189 static void 190 parser_temp_free_all(void) 191 { 192 struct parser_temp *t; 193 194 INTOFF; 195 while (parser_temp != NULL) { 196 t = parser_temp; 197 parser_temp = t->next; 198 ckfree(t->data); 199 ckfree(t); 200 } 201 INTON; 202 } 203 204 205 /* 206 * Read and parse a command. Returns NEOF on end of file. (NULL is a 207 * valid parse tree indicating a blank line.) 208 */ 209 210 union node * 211 parsecmd(int interact) 212 { 213 int t; 214 215 /* This assumes the parser is not re-entered, 216 * which could happen if we add command substitution on PS1/PS2. 217 */ 218 parser_temp_free_all(); 219 heredoclist = NULL; 220 221 tokpushback = 0; 222 checkkwd = 0; 223 doprompt = interact; 224 if (doprompt) 225 setprompt(1); 226 else 227 setprompt(0); 228 needprompt = 0; 229 t = readtoken(); 230 if (t == TEOF) 231 return NEOF; 232 if (t == TNL) 233 return NULL; 234 tokpushback++; 235 return list(1); 236 } 237 238 239 /* 240 * Read and parse words for wordexp. 241 * Returns a list of NARG nodes; NULL if there are no words. 242 */ 243 union node * 244 parsewordexp(void) 245 { 246 union node *n, *first = NULL, **pnext; 247 int t; 248 249 /* This assumes the parser is not re-entered, 250 * which could happen if we add command substitution on PS1/PS2. 251 */ 252 parser_temp_free_all(); 253 heredoclist = NULL; 254 255 tokpushback = 0; 256 checkkwd = 0; 257 doprompt = 0; 258 setprompt(0); 259 needprompt = 0; 260 pnext = &first; 261 while ((t = readtoken()) != TEOF) { 262 if (t != TWORD) 263 synexpect(TWORD); 264 n = makename(); 265 *pnext = n; 266 pnext = &n->narg.next; 267 } 268 return first; 269 } 270 271 272 static union node * 273 list(int nlflag) 274 { 275 union node *ntop, *n1, *n2, *n3; 276 int tok; 277 278 checkkwd = CHKNL | CHKKWD | CHKALIAS; 279 if (!nlflag && tokendlist[peektoken()]) 280 return NULL; 281 ntop = n1 = NULL; 282 for (;;) { 283 n2 = andor(); 284 tok = readtoken(); 285 if (tok == TBACKGND) { 286 if (n2 != NULL && n2->type == NPIPE) { 287 n2->npipe.backgnd = 1; 288 } else if (n2 != NULL && n2->type == NREDIR) { 289 n2->type = NBACKGND; 290 } else { 291 n3 = (union node *)stalloc(sizeof (struct nredir)); 292 n3->type = NBACKGND; 293 n3->nredir.n = n2; 294 n3->nredir.redirect = NULL; 295 n2 = n3; 296 } 297 } 298 if (ntop == NULL) 299 ntop = n2; 300 else if (n1 == NULL) { 301 n1 = makebinary(NSEMI, ntop, n2); 302 ntop = n1; 303 } 304 else { 305 n3 = makebinary(NSEMI, n1->nbinary.ch2, n2); 306 n1->nbinary.ch2 = n3; 307 n1 = n3; 308 } 309 switch (tok) { 310 case TBACKGND: 311 case TSEMI: 312 tok = readtoken(); 313 /* FALLTHROUGH */ 314 case TNL: 315 if (tok == TNL) { 316 parseheredoc(); 317 if (nlflag) 318 return ntop; 319 } else if (tok == TEOF && nlflag) { 320 parseheredoc(); 321 return ntop; 322 } else { 323 tokpushback++; 324 } 325 checkkwd = CHKNL | CHKKWD | CHKALIAS; 326 if (!nlflag && tokendlist[peektoken()]) 327 return ntop; 328 break; 329 case TEOF: 330 if (heredoclist) 331 parseheredoc(); 332 else 333 pungetc(); /* push back EOF on input */ 334 return ntop; 335 default: 336 if (nlflag) 337 synexpect(-1); 338 tokpushback++; 339 return ntop; 340 } 341 } 342 } 343 344 345 346 static union node * 347 andor(void) 348 { 349 union node *n; 350 int t; 351 352 n = pipeline(); 353 for (;;) { 354 if ((t = readtoken()) == TAND) { 355 t = NAND; 356 } else if (t == TOR) { 357 t = NOR; 358 } else { 359 tokpushback++; 360 return n; 361 } 362 n = makebinary(t, n, pipeline()); 363 } 364 } 365 366 367 368 static union node * 369 pipeline(void) 370 { 371 union node *n1, *n2, *pipenode; 372 struct nodelist *lp, *prev; 373 int negate, t; 374 375 negate = 0; 376 checkkwd = CHKNL | CHKKWD | CHKALIAS; 377 TRACE(("pipeline: entered\n")); 378 while (readtoken() == TNOT) 379 negate = !negate; 380 tokpushback++; 381 n1 = command(); 382 if (readtoken() == TPIPE) { 383 pipenode = (union node *)stalloc(sizeof (struct npipe)); 384 pipenode->type = NPIPE; 385 pipenode->npipe.backgnd = 0; 386 lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 387 pipenode->npipe.cmdlist = lp; 388 lp->n = n1; 389 do { 390 prev = lp; 391 lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 392 checkkwd = CHKNL | CHKKWD | CHKALIAS; 393 t = readtoken(); 394 tokpushback++; 395 if (t == TNOT) 396 lp->n = pipeline(); 397 else 398 lp->n = command(); 399 prev->next = lp; 400 } while (readtoken() == TPIPE); 401 lp->next = NULL; 402 n1 = pipenode; 403 } 404 tokpushback++; 405 if (negate) { 406 n2 = (union node *)stalloc(sizeof (struct nnot)); 407 n2->type = NNOT; 408 n2->nnot.com = n1; 409 return n2; 410 } else 411 return n1; 412 } 413 414 415 416 static union node * 417 command(void) 418 { 419 union node *n1, *n2; 420 union node *ap, **app; 421 union node *cp, **cpp; 422 union node *redir, **rpp; 423 int t; 424 int is_subshell; 425 426 checkkwd = CHKNL | CHKKWD | CHKALIAS; 427 is_subshell = 0; 428 redir = NULL; 429 n1 = NULL; 430 rpp = &redir; 431 432 /* Check for redirection which may precede command */ 433 while (readtoken() == TREDIR) { 434 *rpp = n2 = redirnode; 435 rpp = &n2->nfile.next; 436 parsefname(); 437 } 438 tokpushback++; 439 440 switch (readtoken()) { 441 case TIF: 442 n1 = (union node *)stalloc(sizeof (struct nif)); 443 n1->type = NIF; 444 if ((n1->nif.test = list(0)) == NULL) 445 synexpect(-1); 446 consumetoken(TTHEN); 447 n1->nif.ifpart = list(0); 448 n2 = n1; 449 while (readtoken() == TELIF) { 450 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif)); 451 n2 = n2->nif.elsepart; 452 n2->type = NIF; 453 if ((n2->nif.test = list(0)) == NULL) 454 synexpect(-1); 455 consumetoken(TTHEN); 456 n2->nif.ifpart = list(0); 457 } 458 if (lasttoken == TELSE) 459 n2->nif.elsepart = list(0); 460 else { 461 n2->nif.elsepart = NULL; 462 tokpushback++; 463 } 464 consumetoken(TFI); 465 checkkwd = CHKKWD | CHKALIAS; 466 break; 467 case TWHILE: 468 case TUNTIL: 469 t = lasttoken; 470 if ((n1 = list(0)) == NULL) 471 synexpect(-1); 472 consumetoken(TDO); 473 n1 = makebinary((t == TWHILE)? NWHILE : NUNTIL, n1, list(0)); 474 consumetoken(TDONE); 475 checkkwd = CHKKWD | CHKALIAS; 476 break; 477 case TFOR: 478 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext)) 479 synerror("Bad for loop variable"); 480 n1 = (union node *)stalloc(sizeof (struct nfor)); 481 n1->type = NFOR; 482 n1->nfor.var = wordtext; 483 checkkwd = CHKNL; 484 if (readtoken() == TWORD && !quoteflag && 485 equal(wordtext, "in")) { 486 app = ≈ 487 while (readtoken() == TWORD) { 488 n2 = makename(); 489 *app = n2; 490 app = &n2->narg.next; 491 } 492 *app = NULL; 493 n1->nfor.args = ap; 494 if (lasttoken == TNL) 495 tokpushback++; 496 else if (lasttoken != TSEMI) 497 synexpect(-1); 498 } else { 499 static char argvars[5] = { 500 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0' 501 }; 502 n2 = (union node *)stalloc(sizeof (struct narg)); 503 n2->type = NARG; 504 n2->narg.text = argvars; 505 n2->narg.backquote = NULL; 506 n2->narg.next = NULL; 507 n1->nfor.args = n2; 508 /* 509 * Newline or semicolon here is optional (but note 510 * that the original Bourne shell only allowed NL). 511 */ 512 if (lasttoken != TSEMI) 513 tokpushback++; 514 } 515 checkkwd = CHKNL | CHKKWD | CHKALIAS; 516 if ((t = readtoken()) == TDO) 517 t = TDONE; 518 else if (t == TBEGIN) 519 t = TEND; 520 else 521 synexpect(-1); 522 n1->nfor.body = list(0); 523 consumetoken(t); 524 checkkwd = CHKKWD | CHKALIAS; 525 break; 526 case TCASE: 527 n1 = (union node *)stalloc(sizeof (struct ncase)); 528 n1->type = NCASE; 529 consumetoken(TWORD); 530 n1->ncase.expr = makename(); 531 checkkwd = CHKNL; 532 if (readtoken() != TWORD || ! equal(wordtext, "in")) 533 synerror("expecting \"in\""); 534 cpp = &n1->ncase.cases; 535 checkkwd = CHKNL | CHKKWD, readtoken(); 536 while (lasttoken != TESAC) { 537 *cpp = cp = (union node *)stalloc(sizeof (struct nclist)); 538 cp->type = NCLIST; 539 app = &cp->nclist.pattern; 540 if (lasttoken == TLP) 541 readtoken(); 542 for (;;) { 543 *app = ap = makename(); 544 checkkwd = CHKNL | CHKKWD; 545 if (readtoken() != TPIPE) 546 break; 547 app = &ap->narg.next; 548 readtoken(); 549 } 550 ap->narg.next = NULL; 551 if (lasttoken != TRP) 552 synexpect(TRP); 553 cp->nclist.body = list(0); 554 555 checkkwd = CHKNL | CHKKWD | CHKALIAS; 556 if ((t = readtoken()) != TESAC) { 557 if (t == TENDCASE) 558 ; 559 else if (t == TFALLTHRU) 560 cp->type = NCLISTFALLTHRU; 561 else 562 synexpect(TENDCASE); 563 checkkwd = CHKNL | CHKKWD, readtoken(); 564 } 565 cpp = &cp->nclist.next; 566 } 567 *cpp = NULL; 568 checkkwd = CHKKWD | CHKALIAS; 569 break; 570 case TLP: 571 n1 = (union node *)stalloc(sizeof (struct nredir)); 572 n1->type = NSUBSHELL; 573 n1->nredir.n = list(0); 574 n1->nredir.redirect = NULL; 575 consumetoken(TRP); 576 checkkwd = CHKKWD | CHKALIAS; 577 is_subshell = 1; 578 break; 579 case TBEGIN: 580 n1 = list(0); 581 consumetoken(TEND); 582 checkkwd = CHKKWD | CHKALIAS; 583 break; 584 /* A simple command must have at least one redirection or word. */ 585 case TBACKGND: 586 case TSEMI: 587 case TAND: 588 case TOR: 589 case TPIPE: 590 case TENDCASE: 591 case TFALLTHRU: 592 case TEOF: 593 case TNL: 594 case TRP: 595 if (!redir) 596 synexpect(-1); 597 case TWORD: 598 tokpushback++; 599 n1 = simplecmd(rpp, redir); 600 return n1; 601 default: 602 synexpect(-1); 603 } 604 605 /* Now check for redirection which may follow command */ 606 while (readtoken() == TREDIR) { 607 *rpp = n2 = redirnode; 608 rpp = &n2->nfile.next; 609 parsefname(); 610 } 611 tokpushback++; 612 *rpp = NULL; 613 if (redir) { 614 if (!is_subshell) { 615 n2 = (union node *)stalloc(sizeof (struct nredir)); 616 n2->type = NREDIR; 617 n2->nredir.n = n1; 618 n1 = n2; 619 } 620 n1->nredir.redirect = redir; 621 } 622 623 return n1; 624 } 625 626 627 static union node * 628 simplecmd(union node **rpp, union node *redir) 629 { 630 union node *args, **app; 631 union node **orig_rpp = rpp; 632 union node *n = NULL; 633 int special; 634 int savecheckkwd; 635 636 /* If we don't have any redirections already, then we must reset */ 637 /* rpp to be the address of the local redir variable. */ 638 if (redir == NULL) 639 rpp = &redir; 640 641 args = NULL; 642 app = &args; 643 /* 644 * We save the incoming value, because we need this for shell 645 * functions. There can not be a redirect or an argument between 646 * the function name and the open parenthesis. 647 */ 648 orig_rpp = rpp; 649 650 savecheckkwd = CHKALIAS; 651 652 for (;;) { 653 checkkwd = savecheckkwd; 654 if (readtoken() == TWORD) { 655 n = makename(); 656 *app = n; 657 app = &n->narg.next; 658 if (savecheckkwd != 0 && !isassignment(wordtext)) 659 savecheckkwd = 0; 660 } else if (lasttoken == TREDIR) { 661 *rpp = n = redirnode; 662 rpp = &n->nfile.next; 663 parsefname(); /* read name of redirection file */ 664 } else if (lasttoken == TLP && app == &args->narg.next 665 && rpp == orig_rpp) { 666 /* We have a function */ 667 consumetoken(TRP); 668 funclinno = plinno; 669 /* 670 * - Require plain text. 671 * - Functions with '/' cannot be called. 672 * - Reject name=(). 673 * - Reject ksh extended glob patterns. 674 */ 675 if (!noexpand(n->narg.text) || quoteflag || 676 strchr(n->narg.text, '/') || 677 strchr("!%*+-=?@}~", 678 n->narg.text[strlen(n->narg.text) - 1])) 679 synerror("Bad function name"); 680 rmescapes(n->narg.text); 681 if (find_builtin(n->narg.text, &special) >= 0 && 682 special) 683 synerror("Cannot override a special builtin with a function"); 684 n->type = NDEFUN; 685 n->narg.next = command(); 686 funclinno = 0; 687 return n; 688 } else { 689 tokpushback++; 690 break; 691 } 692 } 693 *app = NULL; 694 *rpp = NULL; 695 n = (union node *)stalloc(sizeof (struct ncmd)); 696 n->type = NCMD; 697 n->ncmd.args = args; 698 n->ncmd.redirect = redir; 699 return n; 700 } 701 702 static union node * 703 makename(void) 704 { 705 union node *n; 706 707 n = (union node *)stalloc(sizeof (struct narg)); 708 n->type = NARG; 709 n->narg.next = NULL; 710 n->narg.text = wordtext; 711 n->narg.backquote = backquotelist; 712 return n; 713 } 714 715 static union node * 716 makebinary(int type, union node *n1, union node *n2) 717 { 718 union node *n; 719 720 n = (union node *)stalloc(sizeof (struct nbinary)); 721 n->type = type; 722 n->nbinary.ch1 = n1; 723 n->nbinary.ch2 = n2; 724 return (n); 725 } 726 727 void 728 forcealias(void) 729 { 730 checkkwd |= CHKALIAS; 731 } 732 733 void 734 fixredir(union node *n, const char *text, int err) 735 { 736 TRACE(("Fix redir %s %d\n", text, err)); 737 if (!err) 738 n->ndup.vname = NULL; 739 740 if (is_digit(text[0]) && text[1] == '\0') 741 n->ndup.dupfd = digit_val(text[0]); 742 else if (text[0] == '-' && text[1] == '\0') 743 n->ndup.dupfd = -1; 744 else { 745 746 if (err) 747 synerror("Bad fd number"); 748 else 749 n->ndup.vname = makename(); 750 } 751 } 752 753 754 static void 755 parsefname(void) 756 { 757 union node *n = redirnode; 758 759 consumetoken(TWORD); 760 if (n->type == NHERE) { 761 struct heredoc *here = heredoc; 762 struct heredoc *p; 763 764 if (quoteflag == 0) 765 n->type = NXHERE; 766 TRACE(("Here document %d\n", n->type)); 767 if (here->striptabs) { 768 while (*wordtext == '\t') 769 wordtext++; 770 } 771 if (! noexpand(wordtext)) 772 synerror("Illegal eof marker for << redirection"); 773 rmescapes(wordtext); 774 here->eofmark = wordtext; 775 here->next = NULL; 776 if (heredoclist == NULL) 777 heredoclist = here; 778 else { 779 for (p = heredoclist ; p->next ; p = p->next); 780 p->next = here; 781 } 782 } else if (n->type == NTOFD || n->type == NFROMFD) { 783 fixredir(n, wordtext, 0); 784 } else { 785 n->nfile.fname = makename(); 786 } 787 } 788 789 790 /* 791 * Input any here documents. 792 */ 793 794 static void 795 parseheredoc(void) 796 { 797 struct heredoc *here; 798 union node *n; 799 800 while (heredoclist) { 801 here = heredoclist; 802 heredoclist = here->next; 803 if (needprompt) { 804 setprompt(2); 805 needprompt = 0; 806 } 807 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX, 808 here->eofmark, here->striptabs); 809 n = makename(); 810 here->here->nhere.doc = n; 811 } 812 } 813 814 static int 815 peektoken(void) 816 { 817 int t; 818 819 t = readtoken(); 820 tokpushback++; 821 return (t); 822 } 823 824 static int 825 readtoken(void) 826 { 827 int t; 828 struct alias *ap; 829 #ifdef DEBUG 830 int alreadyseen = tokpushback; 831 #endif 832 833 top: 834 t = xxreadtoken(); 835 836 /* 837 * eat newlines 838 */ 839 if (checkkwd & CHKNL) { 840 while (t == TNL) { 841 parseheredoc(); 842 t = xxreadtoken(); 843 } 844 } 845 846 /* 847 * check for keywords and aliases 848 */ 849 if (t == TWORD && !quoteflag) 850 { 851 const char * const *pp; 852 853 if (checkkwd & CHKKWD) 854 for (pp = parsekwd; *pp; pp++) { 855 if (**pp == *wordtext && equal(*pp, wordtext)) 856 { 857 lasttoken = t = pp - parsekwd + KWDOFFSET; 858 TRACE(("keyword %s recognized\n", tokname[t])); 859 goto out; 860 } 861 } 862 if (checkkwd & CHKALIAS && 863 (ap = lookupalias(wordtext, 1)) != NULL) { 864 pushstring(ap->val, strlen(ap->val), ap); 865 goto top; 866 } 867 } 868 out: 869 if (t != TNOT) 870 checkkwd = 0; 871 872 #ifdef DEBUG 873 if (!alreadyseen) 874 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); 875 else 876 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); 877 #endif 878 return (t); 879 } 880 881 882 /* 883 * Read the next input token. 884 * If the token is a word, we set backquotelist to the list of cmds in 885 * backquotes. We set quoteflag to true if any part of the word was 886 * quoted. 887 * If the token is TREDIR, then we set redirnode to a structure containing 888 * the redirection. 889 * In all cases, the variable startlinno is set to the number of the line 890 * on which the token starts. 891 * 892 * [Change comment: here documents and internal procedures] 893 * [Readtoken shouldn't have any arguments. Perhaps we should make the 894 * word parsing code into a separate routine. In this case, readtoken 895 * doesn't need to have any internal procedures, but parseword does. 896 * We could also make parseoperator in essence the main routine, and 897 * have parseword (readtoken1?) handle both words and redirection.] 898 */ 899 900 #define RETURN(token) return lasttoken = token 901 902 static int 903 xxreadtoken(void) 904 { 905 int c; 906 907 if (tokpushback) { 908 tokpushback = 0; 909 return lasttoken; 910 } 911 if (needprompt) { 912 setprompt(2); 913 needprompt = 0; 914 } 915 startlinno = plinno; 916 for (;;) { /* until token or start of word found */ 917 c = pgetc_macro(); 918 switch (c) { 919 case ' ': case '\t': 920 continue; 921 case '#': 922 while ((c = pgetc()) != '\n' && c != PEOF); 923 pungetc(); 924 continue; 925 case '\\': 926 if (pgetc() == '\n') { 927 startlinno = ++plinno; 928 if (doprompt) 929 setprompt(2); 930 else 931 setprompt(0); 932 continue; 933 } 934 pungetc(); 935 /* FALLTHROUGH */ 936 default: 937 return readtoken1(c, BASESYNTAX, (char *)NULL, 0); 938 case '\n': 939 plinno++; 940 needprompt = doprompt; 941 RETURN(TNL); 942 case PEOF: 943 RETURN(TEOF); 944 case '&': 945 if (pgetc_linecont() == '&') 946 RETURN(TAND); 947 pungetc(); 948 RETURN(TBACKGND); 949 case '|': 950 if (pgetc_linecont() == '|') 951 RETURN(TOR); 952 pungetc(); 953 RETURN(TPIPE); 954 case ';': 955 c = pgetc_linecont(); 956 if (c == ';') 957 RETURN(TENDCASE); 958 else if (c == '&') 959 RETURN(TFALLTHRU); 960 pungetc(); 961 RETURN(TSEMI); 962 case '(': 963 RETURN(TLP); 964 case ')': 965 RETURN(TRP); 966 } 967 } 968 #undef RETURN 969 } 970 971 972 #define MAXNEST_static 8 973 struct tokenstate 974 { 975 const char *syntax; /* *SYNTAX */ 976 int parenlevel; /* levels of parentheses in arithmetic */ 977 enum tokenstate_category 978 { 979 TSTATE_TOP, 980 TSTATE_VAR_OLD, /* ${var+-=?}, inherits dquotes */ 981 TSTATE_VAR_NEW, /* other ${var...}, own dquote state */ 982 TSTATE_ARITH 983 } category; 984 }; 985 986 987 /* 988 * Check to see whether we are at the end of the here document. When this 989 * is called, c is set to the first character of the next input line. If 990 * we are at the end of the here document, this routine sets the c to PEOF. 991 * The new value of c is returned. 992 */ 993 994 static int 995 checkend(int c, const char *eofmark, int striptabs) 996 { 997 if (striptabs) { 998 while (c == '\t') 999 c = pgetc(); 1000 } 1001 if (c == *eofmark) { 1002 int c2; 1003 const char *q; 1004 1005 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++) 1006 ; 1007 if ((c2 == PEOF || c2 == '\n') && *q == '\0') { 1008 c = PEOF; 1009 if (c2 == '\n') { 1010 plinno++; 1011 needprompt = doprompt; 1012 } 1013 } else { 1014 pungetc(); 1015 pushstring(eofmark + 1, q - (eofmark + 1), NULL); 1016 } 1017 } else if (c == '\n' && *eofmark == '\0') { 1018 c = PEOF; 1019 plinno++; 1020 needprompt = doprompt; 1021 } 1022 return (c); 1023 } 1024 1025 1026 /* 1027 * Parse a redirection operator. The variable "out" points to a string 1028 * specifying the fd to be redirected. The variable "c" contains the 1029 * first character of the redirection operator. 1030 */ 1031 1032 static void 1033 parseredir(char *out, int c) 1034 { 1035 char fd = *out; 1036 union node *np; 1037 1038 np = (union node *)stalloc(sizeof (struct nfile)); 1039 if (c == '>') { 1040 np->nfile.fd = 1; 1041 c = pgetc_linecont(); 1042 if (c == '>') 1043 np->type = NAPPEND; 1044 else if (c == '&') 1045 np->type = NTOFD; 1046 else if (c == '|') 1047 np->type = NCLOBBER; 1048 else { 1049 np->type = NTO; 1050 pungetc(); 1051 } 1052 } else { /* c == '<' */ 1053 np->nfile.fd = 0; 1054 c = pgetc_linecont(); 1055 if (c == '<') { 1056 if (sizeof (struct nfile) != sizeof (struct nhere)) { 1057 np = (union node *)stalloc(sizeof (struct nhere)); 1058 np->nfile.fd = 0; 1059 } 1060 np->type = NHERE; 1061 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc)); 1062 heredoc->here = np; 1063 if ((c = pgetc_linecont()) == '-') { 1064 heredoc->striptabs = 1; 1065 } else { 1066 heredoc->striptabs = 0; 1067 pungetc(); 1068 } 1069 } else if (c == '&') 1070 np->type = NFROMFD; 1071 else if (c == '>') 1072 np->type = NFROMTO; 1073 else { 1074 np->type = NFROM; 1075 pungetc(); 1076 } 1077 } 1078 if (fd != '\0') 1079 np->nfile.fd = digit_val(fd); 1080 redirnode = np; 1081 } 1082 1083 /* 1084 * Called to parse command substitutions. 1085 */ 1086 1087 static char * 1088 parsebackq(char *out, struct nodelist **pbqlist, 1089 int oldstyle, int dblquote, int quoted) 1090 { 1091 struct nodelist **nlpp; 1092 union node *n; 1093 char *volatile str; 1094 struct jmploc jmploc; 1095 struct jmploc *const savehandler = handler; 1096 size_t savelen; 1097 int saveprompt; 1098 const int bq_startlinno = plinno; 1099 char *volatile ostr = NULL; 1100 struct parsefile *const savetopfile = getcurrentfile(); 1101 struct heredoc *const saveheredoclist = heredoclist; 1102 struct heredoc *here; 1103 1104 str = NULL; 1105 if (setjmp(jmploc.loc)) { 1106 popfilesupto(savetopfile); 1107 if (str) 1108 ckfree(str); 1109 if (ostr) 1110 ckfree(ostr); 1111 heredoclist = saveheredoclist; 1112 handler = savehandler; 1113 if (exception == EXERROR) { 1114 startlinno = bq_startlinno; 1115 synerror("Error in command substitution"); 1116 } 1117 longjmp(handler->loc, 1); 1118 } 1119 INTOFF; 1120 savelen = out - stackblock(); 1121 if (savelen > 0) { 1122 str = ckmalloc(savelen); 1123 memcpy(str, stackblock(), savelen); 1124 } 1125 handler = &jmploc; 1126 heredoclist = NULL; 1127 INTON; 1128 if (oldstyle) { 1129 /* We must read until the closing backquote, giving special 1130 treatment to some slashes, and then push the string and 1131 reread it as input, interpreting it normally. */ 1132 char *oout; 1133 int c; 1134 int olen; 1135 1136 1137 STARTSTACKSTR(oout); 1138 for (;;) { 1139 if (needprompt) { 1140 setprompt(2); 1141 needprompt = 0; 1142 } 1143 CHECKSTRSPACE(2, oout); 1144 c = pgetc_linecont(); 1145 if (c == '`') 1146 break; 1147 switch (c) { 1148 case '\\': 1149 c = pgetc(); 1150 if (c != '\\' && c != '`' && c != '$' 1151 && (!dblquote || c != '"')) 1152 USTPUTC('\\', oout); 1153 break; 1154 1155 case '\n': 1156 plinno++; 1157 needprompt = doprompt; 1158 break; 1159 1160 case PEOF: 1161 startlinno = plinno; 1162 synerror("EOF in backquote substitution"); 1163 break; 1164 1165 default: 1166 break; 1167 } 1168 USTPUTC(c, oout); 1169 } 1170 USTPUTC('\0', oout); 1171 olen = oout - stackblock(); 1172 INTOFF; 1173 ostr = ckmalloc(olen); 1174 memcpy(ostr, stackblock(), olen); 1175 setinputstring(ostr, 1); 1176 INTON; 1177 } 1178 nlpp = pbqlist; 1179 while (*nlpp) 1180 nlpp = &(*nlpp)->next; 1181 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 1182 (*nlpp)->next = NULL; 1183 1184 if (oldstyle) { 1185 saveprompt = doprompt; 1186 doprompt = 0; 1187 } 1188 1189 n = list(0); 1190 1191 if (oldstyle) { 1192 if (peektoken() != TEOF) 1193 synexpect(-1); 1194 doprompt = saveprompt; 1195 } else 1196 consumetoken(TRP); 1197 1198 (*nlpp)->n = n; 1199 if (oldstyle) { 1200 /* 1201 * Start reading from old file again, ignoring any pushed back 1202 * tokens left from the backquote parsing 1203 */ 1204 popfile(); 1205 tokpushback = 0; 1206 } 1207 STARTSTACKSTR(out); 1208 CHECKSTRSPACE(savelen + 1, out); 1209 INTOFF; 1210 if (str) { 1211 memcpy(out, str, savelen); 1212 STADJUST(savelen, out); 1213 ckfree(str); 1214 str = NULL; 1215 } 1216 if (ostr) { 1217 ckfree(ostr); 1218 ostr = NULL; 1219 } 1220 here = saveheredoclist; 1221 if (here != NULL) { 1222 while (here->next != NULL) 1223 here = here->next; 1224 here->next = heredoclist; 1225 heredoclist = saveheredoclist; 1226 } 1227 handler = savehandler; 1228 INTON; 1229 if (quoted) 1230 USTPUTC(CTLBACKQ | CTLQUOTE, out); 1231 else 1232 USTPUTC(CTLBACKQ, out); 1233 return out; 1234 } 1235 1236 1237 /* 1238 * Called to parse a backslash escape sequence inside $'...'. 1239 * The backslash has already been read. 1240 */ 1241 static char * 1242 readcstyleesc(char *out) 1243 { 1244 int c, vc, i, n; 1245 unsigned int v; 1246 1247 c = pgetc(); 1248 switch (c) { 1249 case '\0': 1250 synerror("Unterminated quoted string"); 1251 case '\n': 1252 plinno++; 1253 if (doprompt) 1254 setprompt(2); 1255 else 1256 setprompt(0); 1257 return out; 1258 case '\\': 1259 case '\'': 1260 case '"': 1261 v = c; 1262 break; 1263 case 'a': v = '\a'; break; 1264 case 'b': v = '\b'; break; 1265 case 'e': v = '\033'; break; 1266 case 'f': v = '\f'; break; 1267 case 'n': v = '\n'; break; 1268 case 'r': v = '\r'; break; 1269 case 't': v = '\t'; break; 1270 case 'v': v = '\v'; break; 1271 case 'x': 1272 v = 0; 1273 for (;;) { 1274 c = pgetc(); 1275 if (c >= '0' && c <= '9') 1276 v = (v << 4) + c - '0'; 1277 else if (c >= 'A' && c <= 'F') 1278 v = (v << 4) + c - 'A' + 10; 1279 else if (c >= 'a' && c <= 'f') 1280 v = (v << 4) + c - 'a' + 10; 1281 else 1282 break; 1283 } 1284 pungetc(); 1285 break; 1286 case '0': case '1': case '2': case '3': 1287 case '4': case '5': case '6': case '7': 1288 v = c - '0'; 1289 c = pgetc(); 1290 if (c >= '0' && c <= '7') { 1291 v <<= 3; 1292 v += c - '0'; 1293 c = pgetc(); 1294 if (c >= '0' && c <= '7') { 1295 v <<= 3; 1296 v += c - '0'; 1297 } else 1298 pungetc(); 1299 } else 1300 pungetc(); 1301 break; 1302 case 'c': 1303 c = pgetc(); 1304 if (c < 0x3f || c > 0x7a || c == 0x60) 1305 synerror("Bad escape sequence"); 1306 if (c == '\\' && pgetc() != '\\') 1307 synerror("Bad escape sequence"); 1308 if (c == '?') 1309 v = 127; 1310 else 1311 v = c & 0x1f; 1312 break; 1313 case 'u': 1314 case 'U': 1315 n = c == 'U' ? 8 : 4; 1316 v = 0; 1317 for (i = 0; i < n; i++) { 1318 c = pgetc(); 1319 if (c >= '0' && c <= '9') 1320 v = (v << 4) + c - '0'; 1321 else if (c >= 'A' && c <= 'F') 1322 v = (v << 4) + c - 'A' + 10; 1323 else if (c >= 'a' && c <= 'f') 1324 v = (v << 4) + c - 'a' + 10; 1325 else 1326 synerror("Bad escape sequence"); 1327 } 1328 if (v == 0 || (v >= 0xd800 && v <= 0xdfff)) 1329 synerror("Bad escape sequence"); 1330 /* We really need iconv here. */ 1331 if (initial_localeisutf8 && v > 127) { 1332 CHECKSTRSPACE(4, out); 1333 /* 1334 * We cannot use wctomb() as the locale may have 1335 * changed. 1336 */ 1337 if (v <= 0x7ff) { 1338 USTPUTC(0xc0 | v >> 6, out); 1339 USTPUTC(0x80 | (v & 0x3f), out); 1340 return out; 1341 } else if (v <= 0xffff) { 1342 USTPUTC(0xe0 | v >> 12, out); 1343 USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1344 USTPUTC(0x80 | (v & 0x3f), out); 1345 return out; 1346 } else if (v <= 0x10ffff) { 1347 USTPUTC(0xf0 | v >> 18, out); 1348 USTPUTC(0x80 | ((v >> 12) & 0x3f), out); 1349 USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1350 USTPUTC(0x80 | (v & 0x3f), out); 1351 return out; 1352 } 1353 } 1354 if (v > 127) 1355 v = '?'; 1356 break; 1357 default: 1358 synerror("Bad escape sequence"); 1359 } 1360 vc = (char)v; 1361 /* 1362 * We can't handle NUL bytes. 1363 * POSIX says we should skip till the closing quote. 1364 */ 1365 if (vc == '\0') { 1366 while ((c = pgetc()) != '\'') { 1367 if (c == '\\') 1368 c = pgetc(); 1369 if (c == PEOF) 1370 synerror("Unterminated quoted string"); 1371 if (c == '\n') { 1372 plinno++; 1373 if (doprompt) 1374 setprompt(2); 1375 else 1376 setprompt(0); 1377 } 1378 } 1379 pungetc(); 1380 return out; 1381 } 1382 if (SQSYNTAX[vc] == CCTL) 1383 USTPUTC(CTLESC, out); 1384 USTPUTC(vc, out); 1385 return out; 1386 } 1387 1388 1389 /* 1390 * If eofmark is NULL, read a word or a redirection symbol. If eofmark 1391 * is not NULL, read a here document. In the latter case, eofmark is the 1392 * word which marks the end of the document and striptabs is true if 1393 * leading tabs should be stripped from the document. The argument firstc 1394 * is the first character of the input token or document. 1395 * 1396 * Because C does not have internal subroutines, I have simulated them 1397 * using goto's to implement the subroutine linkage. The following macros 1398 * will run code that appears at the end of readtoken1. 1399 */ 1400 1401 #define PARSESUB() {goto parsesub; parsesub_return:;} 1402 #define PARSEARITH() {goto parsearith; parsearith_return:;} 1403 1404 static int 1405 readtoken1(int firstc, char const *initialsyntax, const char *eofmark, 1406 int striptabs) 1407 { 1408 int c = firstc; 1409 char *out; 1410 int len; 1411 struct nodelist *bqlist; 1412 int quotef; 1413 int newvarnest; 1414 int level; 1415 int synentry; 1416 struct tokenstate state_static[MAXNEST_static]; 1417 int maxnest = MAXNEST_static; 1418 struct tokenstate *state = state_static; 1419 int sqiscstyle = 0; 1420 1421 startlinno = plinno; 1422 quotef = 0; 1423 bqlist = NULL; 1424 newvarnest = 0; 1425 level = 0; 1426 state[level].syntax = initialsyntax; 1427 state[level].parenlevel = 0; 1428 state[level].category = TSTATE_TOP; 1429 1430 STARTSTACKSTR(out); 1431 loop: { /* for each line, until end of word */ 1432 if (eofmark && eofmark != NOEOFMARK) 1433 /* set c to PEOF if at end of here document */ 1434 c = checkend(c, eofmark, striptabs); 1435 for (;;) { /* until end of line or end of word */ 1436 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ 1437 1438 synentry = state[level].syntax[c]; 1439 1440 switch(synentry) { 1441 case CNL: /* '\n' */ 1442 if (level == 0) 1443 goto endword; /* exit outer loop */ 1444 /* FALLTHROUGH */ 1445 case CQNL: 1446 USTPUTC(c, out); 1447 plinno++; 1448 if (doprompt) 1449 setprompt(2); 1450 else 1451 setprompt(0); 1452 c = pgetc(); 1453 goto loop; /* continue outer loop */ 1454 case CSBACK: 1455 if (sqiscstyle) { 1456 out = readcstyleesc(out); 1457 break; 1458 } 1459 /* FALLTHROUGH */ 1460 case CWORD: 1461 USTPUTC(c, out); 1462 break; 1463 case CCTL: 1464 if (eofmark == NULL || initialsyntax != SQSYNTAX) 1465 USTPUTC(CTLESC, out); 1466 USTPUTC(c, out); 1467 break; 1468 case CBACK: /* backslash */ 1469 c = pgetc(); 1470 if (c == PEOF) { 1471 USTPUTC('\\', out); 1472 pungetc(); 1473 } else if (c == '\n') { 1474 plinno++; 1475 if (doprompt) 1476 setprompt(2); 1477 else 1478 setprompt(0); 1479 } else { 1480 if (state[level].syntax == DQSYNTAX && 1481 c != '\\' && c != '`' && c != '$' && 1482 (c != '"' || (eofmark != NULL && 1483 newvarnest == 0)) && 1484 (c != '}' || state[level].category != TSTATE_VAR_OLD)) 1485 USTPUTC('\\', out); 1486 if ((eofmark == NULL || 1487 newvarnest > 0) && 1488 state[level].syntax == BASESYNTAX) 1489 USTPUTC(CTLQUOTEMARK, out); 1490 if (SQSYNTAX[c] == CCTL) 1491 USTPUTC(CTLESC, out); 1492 USTPUTC(c, out); 1493 if ((eofmark == NULL || 1494 newvarnest > 0) && 1495 state[level].syntax == BASESYNTAX && 1496 state[level].category == TSTATE_VAR_OLD) 1497 USTPUTC(CTLQUOTEEND, out); 1498 quotef++; 1499 } 1500 break; 1501 case CSQUOTE: 1502 USTPUTC(CTLQUOTEMARK, out); 1503 state[level].syntax = SQSYNTAX; 1504 sqiscstyle = 0; 1505 break; 1506 case CDQUOTE: 1507 USTPUTC(CTLQUOTEMARK, out); 1508 state[level].syntax = DQSYNTAX; 1509 break; 1510 case CENDQUOTE: 1511 if (eofmark != NULL && newvarnest == 0) 1512 USTPUTC(c, out); 1513 else { 1514 if (state[level].category == TSTATE_VAR_OLD) 1515 USTPUTC(CTLQUOTEEND, out); 1516 state[level].syntax = BASESYNTAX; 1517 quotef++; 1518 } 1519 break; 1520 case CVAR: /* '$' */ 1521 PARSESUB(); /* parse substitution */ 1522 break; 1523 case CENDVAR: /* '}' */ 1524 if (level > 0 && 1525 ((state[level].category == TSTATE_VAR_OLD && 1526 state[level].syntax == 1527 state[level - 1].syntax) || 1528 (state[level].category == TSTATE_VAR_NEW && 1529 state[level].syntax == BASESYNTAX))) { 1530 if (state[level].category == TSTATE_VAR_NEW) 1531 newvarnest--; 1532 level--; 1533 USTPUTC(CTLENDVAR, out); 1534 } else { 1535 USTPUTC(c, out); 1536 } 1537 break; 1538 case CLP: /* '(' in arithmetic */ 1539 state[level].parenlevel++; 1540 USTPUTC(c, out); 1541 break; 1542 case CRP: /* ')' in arithmetic */ 1543 if (state[level].parenlevel > 0) { 1544 USTPUTC(c, out); 1545 --state[level].parenlevel; 1546 } else { 1547 if (pgetc_linecont() == ')') { 1548 if (level > 0 && 1549 state[level].category == TSTATE_ARITH) { 1550 level--; 1551 USTPUTC(CTLENDARI, out); 1552 } else 1553 USTPUTC(')', out); 1554 } else { 1555 /* 1556 * unbalanced parens 1557 * (don't 2nd guess - no error) 1558 */ 1559 pungetc(); 1560 USTPUTC(')', out); 1561 } 1562 } 1563 break; 1564 case CBQUOTE: /* '`' */ 1565 out = parsebackq(out, &bqlist, 1, 1566 state[level].syntax == DQSYNTAX && 1567 (eofmark == NULL || newvarnest > 0), 1568 state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX); 1569 break; 1570 case CEOF: 1571 goto endword; /* exit outer loop */ 1572 case CIGN: 1573 break; 1574 default: 1575 if (level == 0) 1576 goto endword; /* exit outer loop */ 1577 USTPUTC(c, out); 1578 } 1579 c = pgetc_macro(); 1580 } 1581 } 1582 endword: 1583 if (state[level].syntax == ARISYNTAX) 1584 synerror("Missing '))'"); 1585 if (state[level].syntax != BASESYNTAX && eofmark == NULL) 1586 synerror("Unterminated quoted string"); 1587 if (state[level].category == TSTATE_VAR_OLD || 1588 state[level].category == TSTATE_VAR_NEW) { 1589 startlinno = plinno; 1590 synerror("Missing '}'"); 1591 } 1592 if (state != state_static) 1593 parser_temp_free_upto(state); 1594 USTPUTC('\0', out); 1595 len = out - stackblock(); 1596 out = stackblock(); 1597 if (eofmark == NULL) { 1598 if ((c == '>' || c == '<') 1599 && quotef == 0 1600 && len <= 2 1601 && (*out == '\0' || is_digit(*out))) { 1602 parseredir(out, c); 1603 return lasttoken = TREDIR; 1604 } else { 1605 pungetc(); 1606 } 1607 } 1608 quoteflag = quotef; 1609 backquotelist = bqlist; 1610 grabstackblock(len); 1611 wordtext = out; 1612 return lasttoken = TWORD; 1613 /* end of readtoken routine */ 1614 1615 1616 /* 1617 * Parse a substitution. At this point, we have read the dollar sign 1618 * and nothing else. 1619 */ 1620 1621 parsesub: { 1622 int subtype; 1623 int typeloc; 1624 int flags; 1625 char *p; 1626 static const char types[] = "}-+?="; 1627 int linno; 1628 int length; 1629 int c1; 1630 1631 c = pgetc_linecont(); 1632 if (c == '(') { /* $(command) or $((arith)) */ 1633 if (pgetc_linecont() == '(') { 1634 PARSEARITH(); 1635 } else { 1636 pungetc(); 1637 out = parsebackq(out, &bqlist, 0, 1638 state[level].syntax == DQSYNTAX && 1639 (eofmark == NULL || newvarnest > 0), 1640 state[level].syntax == DQSYNTAX || 1641 state[level].syntax == ARISYNTAX); 1642 } 1643 } else if (c == '{' || is_name(c) || is_special(c)) { 1644 USTPUTC(CTLVAR, out); 1645 typeloc = out - stackblock(); 1646 USTPUTC(VSNORMAL, out); 1647 subtype = VSNORMAL; 1648 flags = 0; 1649 if (c == '{') { 1650 c = pgetc_linecont(); 1651 subtype = 0; 1652 } 1653 varname: 1654 if (!is_eof(c) && is_name(c)) { 1655 length = 0; 1656 do { 1657 STPUTC(c, out); 1658 c = pgetc_linecont(); 1659 length++; 1660 } while (!is_eof(c) && is_in_name(c)); 1661 if (length == 6 && 1662 strncmp(out - length, "LINENO", length) == 0) { 1663 /* Replace the variable name with the 1664 * current line number. */ 1665 STADJUST(-6, out); 1666 CHECKSTRSPACE(11, out); 1667 linno = plinno; 1668 if (funclinno != 0) 1669 linno -= funclinno - 1; 1670 length = snprintf(out, 11, "%d", linno); 1671 if (length > 10) 1672 length = 10; 1673 out += length; 1674 flags |= VSLINENO; 1675 } 1676 } else if (is_digit(c)) { 1677 if (subtype != VSNORMAL) { 1678 do { 1679 STPUTC(c, out); 1680 c = pgetc_linecont(); 1681 } while (is_digit(c)); 1682 } else { 1683 USTPUTC(c, out); 1684 c = pgetc_linecont(); 1685 } 1686 } else if (is_special(c)) { 1687 c1 = c; 1688 c = pgetc_linecont(); 1689 if (subtype == 0 && c1 == '#') { 1690 subtype = VSLENGTH; 1691 if (strchr(types, c) == NULL && c != ':' && 1692 c != '#' && c != '%') 1693 goto varname; 1694 c1 = c; 1695 c = pgetc_linecont(); 1696 if (c1 != '}' && c == '}') { 1697 pungetc(); 1698 c = c1; 1699 goto varname; 1700 } 1701 pungetc(); 1702 c = c1; 1703 c1 = '#'; 1704 subtype = 0; 1705 } 1706 USTPUTC(c1, out); 1707 } else { 1708 subtype = VSERROR; 1709 if (c == '}') 1710 pungetc(); 1711 else if (c == '\n' || c == PEOF) 1712 synerror("Unexpected end of line in substitution"); 1713 else if (BASESYNTAX[c] != CCTL) 1714 USTPUTC(c, out); 1715 } 1716 if (subtype == 0) { 1717 switch (c) { 1718 case ':': 1719 flags |= VSNUL; 1720 c = pgetc_linecont(); 1721 /*FALLTHROUGH*/ 1722 default: 1723 p = strchr(types, c); 1724 if (p == NULL) { 1725 if (c == '\n' || c == PEOF) 1726 synerror("Unexpected end of line in substitution"); 1727 if (flags == VSNUL) 1728 STPUTC(':', out); 1729 if (BASESYNTAX[c] != CCTL) 1730 STPUTC(c, out); 1731 subtype = VSERROR; 1732 } else 1733 subtype = p - types + VSNORMAL; 1734 break; 1735 case '%': 1736 case '#': 1737 { 1738 int cc = c; 1739 subtype = c == '#' ? VSTRIMLEFT : 1740 VSTRIMRIGHT; 1741 c = pgetc_linecont(); 1742 if (c == cc) 1743 subtype++; 1744 else 1745 pungetc(); 1746 break; 1747 } 1748 } 1749 } else if (subtype != VSERROR) { 1750 if (subtype == VSLENGTH && c != '}') 1751 subtype = VSERROR; 1752 pungetc(); 1753 } 1754 STPUTC('=', out); 1755 if (state[level].syntax == DQSYNTAX || 1756 state[level].syntax == ARISYNTAX) 1757 flags |= VSQUOTE; 1758 *(stackblock() + typeloc) = subtype | flags; 1759 if (subtype != VSNORMAL) { 1760 if (level + 1 >= maxnest) { 1761 maxnest *= 2; 1762 if (state == state_static) { 1763 state = parser_temp_alloc( 1764 maxnest * sizeof(*state)); 1765 memcpy(state, state_static, 1766 MAXNEST_static * sizeof(*state)); 1767 } else 1768 state = parser_temp_realloc(state, 1769 maxnest * sizeof(*state)); 1770 } 1771 level++; 1772 state[level].parenlevel = 0; 1773 if (subtype == VSMINUS || subtype == VSPLUS || 1774 subtype == VSQUESTION || subtype == VSASSIGN) { 1775 /* 1776 * For operators that were in the Bourne shell, 1777 * inherit the double-quote state. 1778 */ 1779 state[level].syntax = state[level - 1].syntax; 1780 state[level].category = TSTATE_VAR_OLD; 1781 } else { 1782 /* 1783 * The other operators take a pattern, 1784 * so go to BASESYNTAX. 1785 * Also, ' and " are now special, even 1786 * in here documents. 1787 */ 1788 state[level].syntax = BASESYNTAX; 1789 state[level].category = TSTATE_VAR_NEW; 1790 newvarnest++; 1791 } 1792 } 1793 } else if (c == '\'' && state[level].syntax == BASESYNTAX) { 1794 /* $'cstylequotes' */ 1795 USTPUTC(CTLQUOTEMARK, out); 1796 state[level].syntax = SQSYNTAX; 1797 sqiscstyle = 1; 1798 } else { 1799 USTPUTC('$', out); 1800 pungetc(); 1801 } 1802 goto parsesub_return; 1803 } 1804 1805 1806 /* 1807 * Parse an arithmetic expansion (indicate start of one and set state) 1808 */ 1809 parsearith: { 1810 1811 if (level + 1 >= maxnest) { 1812 maxnest *= 2; 1813 if (state == state_static) { 1814 state = parser_temp_alloc( 1815 maxnest * sizeof(*state)); 1816 memcpy(state, state_static, 1817 MAXNEST_static * sizeof(*state)); 1818 } else 1819 state = parser_temp_realloc(state, 1820 maxnest * sizeof(*state)); 1821 } 1822 level++; 1823 state[level].syntax = ARISYNTAX; 1824 state[level].parenlevel = 0; 1825 state[level].category = TSTATE_ARITH; 1826 USTPUTC(CTLARI, out); 1827 if (state[level - 1].syntax == DQSYNTAX) 1828 USTPUTC('"',out); 1829 else 1830 USTPUTC(' ',out); 1831 goto parsearith_return; 1832 } 1833 1834 } /* end of readtoken */ 1835 1836 1837 /* 1838 * Returns true if the text contains nothing to expand (no dollar signs 1839 * or backquotes). 1840 */ 1841 1842 static int 1843 noexpand(char *text) 1844 { 1845 char *p; 1846 char c; 1847 1848 p = text; 1849 while ((c = *p++) != '\0') { 1850 if ( c == CTLQUOTEMARK) 1851 continue; 1852 if (c == CTLESC) 1853 p++; 1854 else if (BASESYNTAX[(int)c] == CCTL) 1855 return 0; 1856 } 1857 return 1; 1858 } 1859 1860 1861 /* 1862 * Return true if the argument is a legal variable name (a letter or 1863 * underscore followed by zero or more letters, underscores, and digits). 1864 */ 1865 1866 int 1867 goodname(const char *name) 1868 { 1869 const char *p; 1870 1871 p = name; 1872 if (! is_name(*p)) 1873 return 0; 1874 while (*++p) { 1875 if (! is_in_name(*p)) 1876 return 0; 1877 } 1878 return 1; 1879 } 1880 1881 1882 int 1883 isassignment(const char *p) 1884 { 1885 if (!is_name(*p)) 1886 return 0; 1887 p++; 1888 for (;;) { 1889 if (*p == '=') 1890 return 1; 1891 else if (!is_in_name(*p)) 1892 return 0; 1893 p++; 1894 } 1895 } 1896 1897 1898 static void 1899 consumetoken(int token) 1900 { 1901 if (readtoken() != token) 1902 synexpect(token); 1903 } 1904 1905 1906 /* 1907 * Called when an unexpected token is read during the parse. The argument 1908 * is the token that is expected, or -1 if more than one type of token can 1909 * occur at this point. 1910 */ 1911 1912 static void 1913 synexpect(int token) 1914 { 1915 char msg[64]; 1916 1917 if (token >= 0) { 1918 fmtstr(msg, 64, "%s unexpected (expecting %s)", 1919 tokname[lasttoken], tokname[token]); 1920 } else { 1921 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]); 1922 } 1923 synerror(msg); 1924 } 1925 1926 1927 static void 1928 synerror(const char *msg) 1929 { 1930 if (commandname) 1931 outfmt(out2, "%s: %d: ", commandname, startlinno); 1932 else if (arg0) 1933 outfmt(out2, "%s: ", arg0); 1934 outfmt(out2, "Syntax error: %s\n", msg); 1935 error((char *)NULL); 1936 } 1937 1938 static void 1939 setprompt(int which) 1940 { 1941 whichprompt = which; 1942 if (which == 0) 1943 return; 1944 1945 #ifndef NO_HISTORY 1946 if (!el) 1947 #endif 1948 { 1949 out2str(getprompt(NULL)); 1950 flushout(out2); 1951 } 1952 } 1953 1954 static int 1955 pgetc_linecont(void) 1956 { 1957 int c; 1958 1959 while ((c = pgetc_macro()) == '\\') { 1960 c = pgetc(); 1961 if (c == '\n') { 1962 plinno++; 1963 if (doprompt) 1964 setprompt(2); 1965 else 1966 setprompt(0); 1967 } else { 1968 pungetc(); 1969 /* Allow the backslash to be pushed back. */ 1970 pushstring("\\", 1, NULL); 1971 return (pgetc()); 1972 } 1973 } 1974 return (c); 1975 } 1976 1977 1978 static struct passwd * 1979 getpwlogin(void) 1980 { 1981 const char *login; 1982 1983 login = getlogin(); 1984 if (login == NULL) 1985 return (NULL); 1986 1987 return (getpwnam(login)); 1988 } 1989 1990 1991 static void 1992 getusername(char *name, size_t namelen) 1993 { 1994 static char cached_name[MAXLOGNAME]; 1995 struct passwd *pw; 1996 uid_t euid; 1997 1998 if (cached_name[0] == '\0') { 1999 euid = geteuid(); 2000 2001 /* 2002 * Handle the case when there is more than one 2003 * login with the same UID, or when the login 2004 * returned by getlogin(2) does no longer match 2005 * the current UID. 2006 */ 2007 pw = getpwlogin(); 2008 if (pw == NULL || pw->pw_uid != euid) 2009 pw = getpwuid(euid); 2010 2011 if (pw != NULL) { 2012 strlcpy(cached_name, pw->pw_name, 2013 sizeof(cached_name)); 2014 } else { 2015 snprintf(cached_name, sizeof(cached_name), 2016 "%u", euid); 2017 } 2018 } 2019 2020 strlcpy(name, cached_name, namelen); 2021 } 2022 2023 2024 /* 2025 * called by editline -- any expansions to the prompt 2026 * should be added here. 2027 */ 2028 char * 2029 getprompt(void *unused __unused) 2030 { 2031 static char ps[PROMPTLEN]; 2032 const char *fmt; 2033 const char *home; 2034 const char *pwd; 2035 size_t homelen; 2036 int i, trim; 2037 static char internal_error[] = "??"; 2038 2039 /* 2040 * Select prompt format. 2041 */ 2042 switch (whichprompt) { 2043 case 0: 2044 fmt = ""; 2045 break; 2046 case 1: 2047 fmt = ps1val(); 2048 break; 2049 case 2: 2050 fmt = ps2val(); 2051 break; 2052 default: 2053 return internal_error; 2054 } 2055 2056 /* 2057 * Format prompt string. 2058 */ 2059 for (i = 0; (i < PROMPTLEN - 1) && (*fmt != '\0'); i++, fmt++) 2060 if (*fmt == '\\') 2061 switch (*++fmt) { 2062 2063 /* 2064 * Non-printing sequence begin and end. 2065 */ 2066 case '[': 2067 case ']': 2068 ps[i] = '\001'; 2069 break; 2070 2071 /* 2072 * Literal \ and some ASCII characters: 2073 * \a BEL 2074 * \e ESC 2075 * \r CR 2076 */ 2077 case '\\': 2078 case 'a': 2079 case 'e': 2080 case 'r': 2081 if (*fmt == 'a') 2082 ps[i] = '\007'; 2083 else if (*fmt == 'e') 2084 ps[i] = '\033'; 2085 else if (*fmt == 'r') 2086 ps[i] = '\r'; 2087 else 2088 ps[i] = '\\'; 2089 break; 2090 2091 /* 2092 * CRLF sequence 2093 */ 2094 case 'n': 2095 if (i < PROMPTLEN - 3) { 2096 ps[i++] = '\r'; 2097 ps[i] = '\n'; 2098 } 2099 break; 2100 2101 /* 2102 * Hostname. 2103 * 2104 * \h specifies just the local hostname, 2105 * \H specifies fully-qualified hostname. 2106 */ 2107 case 'h': 2108 case 'H': 2109 ps[i] = '\0'; 2110 gethostname(&ps[i], PROMPTLEN - i - 1); 2111 ps[PROMPTLEN - 1] = '\0'; 2112 /* Skip to end of hostname. */ 2113 trim = (*fmt == 'h') ? '.' : '\0'; 2114 while ((ps[i] != '\0') && (ps[i] != trim)) 2115 i++; 2116 --i; 2117 break; 2118 2119 /* 2120 * User name. 2121 */ 2122 case 'u': 2123 ps[i] = '\0'; 2124 getusername(&ps[i], PROMPTLEN - i); 2125 /* Skip to end of username. */ 2126 while (ps[i + 1] != '\0') 2127 i++; 2128 break; 2129 2130 /* 2131 * Working directory. 2132 * 2133 * \W specifies just the final component, 2134 * \w specifies the entire path. 2135 */ 2136 case 'W': 2137 case 'w': 2138 pwd = lookupvar("PWD"); 2139 if (pwd == NULL || *pwd == '\0') 2140 pwd = "?"; 2141 if (*fmt == 'W' && 2142 *pwd == '/' && pwd[1] != '\0') 2143 strlcpy(&ps[i], strrchr(pwd, '/') + 1, 2144 PROMPTLEN - i); 2145 else { 2146 home = lookupvar("HOME"); 2147 if (home != NULL) 2148 homelen = strlen(home); 2149 if (home != NULL && 2150 strcmp(home, "/") != 0 && 2151 strncmp(pwd, home, homelen) == 0 && 2152 (pwd[homelen] == '/' || 2153 pwd[homelen] == '\0')) { 2154 strlcpy(&ps[i], "~", 2155 PROMPTLEN - i); 2156 strlcpy(&ps[i + 1], 2157 pwd + homelen, 2158 PROMPTLEN - i - 1); 2159 } else { 2160 strlcpy(&ps[i], pwd, PROMPTLEN - i); 2161 } 2162 } 2163 /* Skip to end of path. */ 2164 while (ps[i + 1] != '\0') 2165 i++; 2166 break; 2167 2168 /* 2169 * Superuser status. 2170 * 2171 * '$' for normal users, '#' for root. 2172 */ 2173 case '$': 2174 ps[i] = (geteuid() != 0) ? '$' : '#'; 2175 break; 2176 2177 /* 2178 * Emit unrecognized formats verbatim. 2179 */ 2180 default: 2181 ps[i] = '\\'; 2182 if (i < PROMPTLEN - 2) 2183 ps[++i] = *fmt; 2184 break; 2185 } 2186 else 2187 ps[i] = *fmt; 2188 ps[i] = '\0'; 2189 return (ps); 2190 } 2191 2192 2193 const char * 2194 expandstr(const char *ps) 2195 { 2196 union node n; 2197 struct jmploc jmploc; 2198 struct jmploc *const savehandler = handler; 2199 const int saveprompt = doprompt; 2200 struct parsefile *const savetopfile = getcurrentfile(); 2201 struct parser_temp *const saveparser_temp = parser_temp; 2202 const char *result = NULL; 2203 2204 if (!setjmp(jmploc.loc)) { 2205 handler = &jmploc; 2206 parser_temp = NULL; 2207 setinputstring(ps, 1); 2208 doprompt = 0; 2209 readtoken1(pgetc(), DQSYNTAX, NOEOFMARK, 0); 2210 if (backquotelist != NULL) 2211 error("Command substitution not allowed here"); 2212 2213 n.narg.type = NARG; 2214 n.narg.next = NULL; 2215 n.narg.text = wordtext; 2216 n.narg.backquote = backquotelist; 2217 2218 expandarg(&n, NULL, 0); 2219 result = stackblock(); 2220 INTOFF; 2221 } 2222 handler = savehandler; 2223 doprompt = saveprompt; 2224 popfilesupto(savetopfile); 2225 if (parser_temp != saveparser_temp) { 2226 parser_temp_free_all(); 2227 parser_temp = saveparser_temp; 2228 } 2229 if (result != NULL) { 2230 INTON; 2231 } else if (exception == EXINT) 2232 raise(SIGINT); 2233 return result; 2234 } 2235