xref: /freebsd/bin/sh/parser.c (revision bdcbfde31e8e9b343f113a1956384bdf30d1ed62)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Kenneth Almquist.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #ifndef lint
36 #endif /* not lint */
37 #include <sys/cdefs.h>
38 #include <sys/param.h>
39 #include <pwd.h>
40 #include <stdlib.h>
41 #include <unistd.h>
42 #include <stdio.h>
43 #include <time.h>
44 
45 #include "shell.h"
46 #include "parser.h"
47 #include "nodes.h"
48 #include "expand.h"	/* defines rmescapes() */
49 #include "syntax.h"
50 #include "options.h"
51 #include "input.h"
52 #include "output.h"
53 #include "var.h"
54 #include "error.h"
55 #include "memalloc.h"
56 #include "mystring.h"
57 #include "alias.h"
58 #include "show.h"
59 #include "eval.h"
60 #include "exec.h"	/* to check for special builtins */
61 #ifndef NO_HISTORY
62 #include "myhistedit.h"
63 #endif
64 
65 /*
66  * Shell command parser.
67  */
68 
69 #define	PROMPTLEN	192
70 
71 /* values of checkkwd variable */
72 #define CHKALIAS	0x1
73 #define CHKKWD		0x2
74 #define CHKNL		0x4
75 
76 /* values returned by readtoken */
77 #include "token.h"
78 
79 
80 
81 struct heredoc {
82 	struct heredoc *next;	/* next here document in list */
83 	union node *here;		/* redirection node */
84 	char *eofmark;		/* string indicating end of input */
85 	int striptabs;		/* if set, strip leading tabs */
86 };
87 
88 struct parser_temp {
89 	struct parser_temp *next;
90 	void *data;
91 };
92 
93 
94 static struct heredoc *heredoclist;	/* list of here documents to read */
95 static int doprompt;		/* if set, prompt the user */
96 static int needprompt;		/* true if interactive and at start of line */
97 static int lasttoken;		/* last token read */
98 static int tokpushback;		/* last token pushed back */
99 static char *wordtext;		/* text of last word returned by readtoken */
100 static int checkkwd;
101 static struct nodelist *backquotelist;
102 static union node *redirnode;
103 static struct heredoc *heredoc;
104 static int quoteflag;		/* set if (part of) last token was quoted */
105 static int startlinno;		/* line # where last token started */
106 static int funclinno;		/* line # where the current function started */
107 static struct parser_temp *parser_temp;
108 
109 #define NOEOFMARK ((const char *)&heredoclist)
110 
111 
112 static union node *list(int);
113 static union node *andor(void);
114 static union node *pipeline(void);
115 static union node *command(void);
116 static union node *simplecmd(union node **, union node *);
117 static union node *makename(void);
118 static union node *makebinary(int type, union node *n1, union node *n2);
119 static void parsefname(void);
120 static void parseheredoc(void);
121 static int peektoken(void);
122 static int readtoken(void);
123 static int xxreadtoken(void);
124 static int readtoken1(int, const char *, const char *, int);
125 static int noexpand(char *);
126 static void consumetoken(int);
127 static void synexpect(int) __dead2;
128 static void synerror(const char *) __dead2;
129 static void setprompt(int);
130 static int pgetc_linecont(void);
131 static void getusername(char *, size_t);
132 
133 
134 static void *
135 parser_temp_alloc(size_t len)
136 {
137 	struct parser_temp *t;
138 
139 	INTOFF;
140 	t = ckmalloc(sizeof(*t));
141 	t->data = NULL;
142 	t->next = parser_temp;
143 	parser_temp = t;
144 	t->data = ckmalloc(len);
145 	INTON;
146 	return t->data;
147 }
148 
149 
150 static void *
151 parser_temp_realloc(void *ptr, size_t len)
152 {
153 	struct parser_temp *t;
154 
155 	INTOFF;
156 	t = parser_temp;
157 	if (ptr != t->data)
158 		error("bug: parser_temp_realloc misused");
159 	t->data = ckrealloc(t->data, len);
160 	INTON;
161 	return t->data;
162 }
163 
164 
165 static void
166 parser_temp_free_upto(void *ptr)
167 {
168 	struct parser_temp *t;
169 	int done = 0;
170 
171 	INTOFF;
172 	while (parser_temp != NULL && !done) {
173 		t = parser_temp;
174 		parser_temp = t->next;
175 		done = t->data == ptr;
176 		ckfree(t->data);
177 		ckfree(t);
178 	}
179 	INTON;
180 	if (!done)
181 		error("bug: parser_temp_free_upto misused");
182 }
183 
184 
185 static void
186 parser_temp_free_all(void)
187 {
188 	struct parser_temp *t;
189 
190 	INTOFF;
191 	while (parser_temp != NULL) {
192 		t = parser_temp;
193 		parser_temp = t->next;
194 		ckfree(t->data);
195 		ckfree(t);
196 	}
197 	INTON;
198 }
199 
200 
201 /*
202  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
203  * valid parse tree indicating a blank line.)
204  */
205 
206 union node *
207 parsecmd(int interact)
208 {
209 	int t;
210 
211 	/* This assumes the parser is not re-entered,
212 	 * which could happen if we add command substitution on PS1/PS2.
213 	 */
214 	parser_temp_free_all();
215 	heredoclist = NULL;
216 
217 	tokpushback = 0;
218 	checkkwd = 0;
219 	doprompt = interact;
220 	if (doprompt)
221 		setprompt(1);
222 	else
223 		setprompt(0);
224 	needprompt = 0;
225 	t = readtoken();
226 	if (t == TEOF)
227 		return NEOF;
228 	if (t == TNL)
229 		return NULL;
230 	tokpushback++;
231 	return list(1);
232 }
233 
234 
235 /*
236  * Read and parse words for wordexp.
237  * Returns a list of NARG nodes; NULL if there are no words.
238  */
239 union node *
240 parsewordexp(void)
241 {
242 	union node *n, *first = NULL, **pnext;
243 	int t;
244 
245 	/* This assumes the parser is not re-entered,
246 	 * which could happen if we add command substitution on PS1/PS2.
247 	 */
248 	parser_temp_free_all();
249 	heredoclist = NULL;
250 
251 	tokpushback = 0;
252 	checkkwd = 0;
253 	doprompt = 0;
254 	setprompt(0);
255 	needprompt = 0;
256 	pnext = &first;
257 	while ((t = readtoken()) != TEOF) {
258 		if (t != TWORD)
259 			synexpect(TWORD);
260 		n = makename();
261 		*pnext = n;
262 		pnext = &n->narg.next;
263 	}
264 	return first;
265 }
266 
267 
268 static union node *
269 list(int nlflag)
270 {
271 	union node *ntop, *n1, *n2, *n3;
272 	int tok;
273 
274 	checkkwd = CHKNL | CHKKWD | CHKALIAS;
275 	if (!nlflag && tokendlist[peektoken()])
276 		return NULL;
277 	ntop = n1 = NULL;
278 	for (;;) {
279 		n2 = andor();
280 		tok = readtoken();
281 		if (tok == TBACKGND) {
282 			if (n2 != NULL && n2->type == NPIPE) {
283 				n2->npipe.backgnd = 1;
284 			} else if (n2 != NULL && n2->type == NREDIR) {
285 				n2->type = NBACKGND;
286 			} else {
287 				n3 = (union node *)stalloc(sizeof (struct nredir));
288 				n3->type = NBACKGND;
289 				n3->nredir.n = n2;
290 				n3->nredir.redirect = NULL;
291 				n2 = n3;
292 			}
293 		}
294 		if (ntop == NULL)
295 			ntop = n2;
296 		else if (n1 == NULL) {
297 			n1 = makebinary(NSEMI, ntop, n2);
298 			ntop = n1;
299 		}
300 		else {
301 			n3 = makebinary(NSEMI, n1->nbinary.ch2, n2);
302 			n1->nbinary.ch2 = n3;
303 			n1 = n3;
304 		}
305 		switch (tok) {
306 		case TBACKGND:
307 		case TSEMI:
308 			tok = readtoken();
309 			/* FALLTHROUGH */
310 		case TNL:
311 			if (tok == TNL) {
312 				parseheredoc();
313 				if (nlflag)
314 					return ntop;
315 			} else if (tok == TEOF && nlflag) {
316 				parseheredoc();
317 				return ntop;
318 			} else {
319 				tokpushback++;
320 			}
321 			checkkwd = CHKNL | CHKKWD | CHKALIAS;
322 			if (!nlflag && tokendlist[peektoken()])
323 				return ntop;
324 			break;
325 		case TEOF:
326 			if (heredoclist)
327 				parseheredoc();
328 			else
329 				pungetc();		/* push back EOF on input */
330 			return ntop;
331 		default:
332 			if (nlflag)
333 				synexpect(-1);
334 			tokpushback++;
335 			return ntop;
336 		}
337 	}
338 }
339 
340 
341 
342 static union node *
343 andor(void)
344 {
345 	union node *n;
346 	int t;
347 
348 	n = pipeline();
349 	for (;;) {
350 		if ((t = readtoken()) == TAND) {
351 			t = NAND;
352 		} else if (t == TOR) {
353 			t = NOR;
354 		} else {
355 			tokpushback++;
356 			return n;
357 		}
358 		n = makebinary(t, n, pipeline());
359 	}
360 }
361 
362 
363 
364 static union node *
365 pipeline(void)
366 {
367 	union node *n1, *n2, *pipenode;
368 	struct nodelist *lp, *prev;
369 	int negate, t;
370 
371 	negate = 0;
372 	checkkwd = CHKNL | CHKKWD | CHKALIAS;
373 	TRACE(("pipeline: entered\n"));
374 	while (readtoken() == TNOT)
375 		negate = !negate;
376 	tokpushback++;
377 	n1 = command();
378 	if (readtoken() == TPIPE) {
379 		pipenode = (union node *)stalloc(sizeof (struct npipe));
380 		pipenode->type = NPIPE;
381 		pipenode->npipe.backgnd = 0;
382 		lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
383 		pipenode->npipe.cmdlist = lp;
384 		lp->n = n1;
385 		do {
386 			prev = lp;
387 			lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
388 			checkkwd = CHKNL | CHKKWD | CHKALIAS;
389 			t = readtoken();
390 			tokpushback++;
391 			if (t == TNOT)
392 				lp->n = pipeline();
393 			else
394 				lp->n = command();
395 			prev->next = lp;
396 		} while (readtoken() == TPIPE);
397 		lp->next = NULL;
398 		n1 = pipenode;
399 	}
400 	tokpushback++;
401 	if (negate) {
402 		n2 = (union node *)stalloc(sizeof (struct nnot));
403 		n2->type = NNOT;
404 		n2->nnot.com = n1;
405 		return n2;
406 	} else
407 		return n1;
408 }
409 
410 
411 
412 static union node *
413 command(void)
414 {
415 	union node *n1, *n2;
416 	union node *ap, **app;
417 	union node *cp, **cpp;
418 	union node *redir, **rpp;
419 	int t;
420 	int is_subshell;
421 
422 	checkkwd = CHKNL | CHKKWD | CHKALIAS;
423 	is_subshell = 0;
424 	redir = NULL;
425 	n1 = NULL;
426 	rpp = &redir;
427 
428 	/* Check for redirection which may precede command */
429 	while (readtoken() == TREDIR) {
430 		*rpp = n2 = redirnode;
431 		rpp = &n2->nfile.next;
432 		parsefname();
433 	}
434 	tokpushback++;
435 
436 	switch (readtoken()) {
437 	case TIF:
438 		n1 = (union node *)stalloc(sizeof (struct nif));
439 		n1->type = NIF;
440 		if ((n1->nif.test = list(0)) == NULL)
441 			synexpect(-1);
442 		consumetoken(TTHEN);
443 		n1->nif.ifpart = list(0);
444 		n2 = n1;
445 		while (readtoken() == TELIF) {
446 			n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
447 			n2 = n2->nif.elsepart;
448 			n2->type = NIF;
449 			if ((n2->nif.test = list(0)) == NULL)
450 				synexpect(-1);
451 			consumetoken(TTHEN);
452 			n2->nif.ifpart = list(0);
453 		}
454 		if (lasttoken == TELSE)
455 			n2->nif.elsepart = list(0);
456 		else {
457 			n2->nif.elsepart = NULL;
458 			tokpushback++;
459 		}
460 		consumetoken(TFI);
461 		checkkwd = CHKKWD | CHKALIAS;
462 		break;
463 	case TWHILE:
464 	case TUNTIL:
465 		t = lasttoken;
466 		if ((n1 = list(0)) == NULL)
467 			synexpect(-1);
468 		consumetoken(TDO);
469 		n1 = makebinary((t == TWHILE)? NWHILE : NUNTIL, n1, list(0));
470 		consumetoken(TDONE);
471 		checkkwd = CHKKWD | CHKALIAS;
472 		break;
473 	case TFOR:
474 		if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
475 			synerror("Bad for loop variable");
476 		n1 = (union node *)stalloc(sizeof (struct nfor));
477 		n1->type = NFOR;
478 		n1->nfor.var = wordtext;
479 		checkkwd = CHKNL;
480 		if (readtoken() == TWORD && !quoteflag &&
481 		    equal(wordtext, "in")) {
482 			app = &ap;
483 			while (readtoken() == TWORD) {
484 				n2 = makename();
485 				*app = n2;
486 				app = &n2->narg.next;
487 			}
488 			*app = NULL;
489 			n1->nfor.args = ap;
490 			if (lasttoken == TNL)
491 				tokpushback++;
492 			else if (lasttoken != TSEMI)
493 				synexpect(-1);
494 		} else {
495 			static char argvars[5] = {
496 				CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
497 			};
498 			n2 = (union node *)stalloc(sizeof (struct narg));
499 			n2->type = NARG;
500 			n2->narg.text = argvars;
501 			n2->narg.backquote = NULL;
502 			n2->narg.next = NULL;
503 			n1->nfor.args = n2;
504 			/*
505 			 * Newline or semicolon here is optional (but note
506 			 * that the original Bourne shell only allowed NL).
507 			 */
508 			if (lasttoken != TSEMI)
509 				tokpushback++;
510 		}
511 		checkkwd = CHKNL | CHKKWD | CHKALIAS;
512 		if ((t = readtoken()) == TDO)
513 			t = TDONE;
514 		else if (t == TBEGIN)
515 			t = TEND;
516 		else
517 			synexpect(-1);
518 		n1->nfor.body = list(0);
519 		consumetoken(t);
520 		checkkwd = CHKKWD | CHKALIAS;
521 		break;
522 	case TCASE:
523 		n1 = (union node *)stalloc(sizeof (struct ncase));
524 		n1->type = NCASE;
525 		consumetoken(TWORD);
526 		n1->ncase.expr = makename();
527 		checkkwd = CHKNL;
528 		if (readtoken() != TWORD || ! equal(wordtext, "in"))
529 			synerror("expecting \"in\"");
530 		cpp = &n1->ncase.cases;
531 		checkkwd = CHKNL | CHKKWD, readtoken();
532 		while (lasttoken != TESAC) {
533 			*cpp = cp = (union node *)stalloc(sizeof (struct nclist));
534 			cp->type = NCLIST;
535 			app = &cp->nclist.pattern;
536 			if (lasttoken == TLP)
537 				readtoken();
538 			for (;;) {
539 				*app = ap = makename();
540 				checkkwd = CHKNL | CHKKWD;
541 				if (readtoken() != TPIPE)
542 					break;
543 				app = &ap->narg.next;
544 				readtoken();
545 			}
546 			ap->narg.next = NULL;
547 			if (lasttoken != TRP)
548 				synexpect(TRP);
549 			cp->nclist.body = list(0);
550 
551 			checkkwd = CHKNL | CHKKWD | CHKALIAS;
552 			if ((t = readtoken()) != TESAC) {
553 				if (t == TENDCASE)
554 					;
555 				else if (t == TFALLTHRU)
556 					cp->type = NCLISTFALLTHRU;
557 				else
558 					synexpect(TENDCASE);
559 				checkkwd = CHKNL | CHKKWD, readtoken();
560 			}
561 			cpp = &cp->nclist.next;
562 		}
563 		*cpp = NULL;
564 		checkkwd = CHKKWD | CHKALIAS;
565 		break;
566 	case TLP:
567 		n1 = (union node *)stalloc(sizeof (struct nredir));
568 		n1->type = NSUBSHELL;
569 		n1->nredir.n = list(0);
570 		n1->nredir.redirect = NULL;
571 		consumetoken(TRP);
572 		checkkwd = CHKKWD | CHKALIAS;
573 		is_subshell = 1;
574 		break;
575 	case TBEGIN:
576 		n1 = list(0);
577 		consumetoken(TEND);
578 		checkkwd = CHKKWD | CHKALIAS;
579 		break;
580 	/* A simple command must have at least one redirection or word. */
581 	case TBACKGND:
582 	case TSEMI:
583 	case TAND:
584 	case TOR:
585 	case TPIPE:
586 	case TENDCASE:
587 	case TFALLTHRU:
588 	case TEOF:
589 	case TNL:
590 	case TRP:
591 		if (!redir)
592 			synexpect(-1);
593 	case TWORD:
594 		tokpushback++;
595 		n1 = simplecmd(rpp, redir);
596 		return n1;
597 	default:
598 		synexpect(-1);
599 	}
600 
601 	/* Now check for redirection which may follow command */
602 	while (readtoken() == TREDIR) {
603 		*rpp = n2 = redirnode;
604 		rpp = &n2->nfile.next;
605 		parsefname();
606 	}
607 	tokpushback++;
608 	*rpp = NULL;
609 	if (redir) {
610 		if (!is_subshell) {
611 			n2 = (union node *)stalloc(sizeof (struct nredir));
612 			n2->type = NREDIR;
613 			n2->nredir.n = n1;
614 			n1 = n2;
615 		}
616 		n1->nredir.redirect = redir;
617 	}
618 
619 	return n1;
620 }
621 
622 
623 static union node *
624 simplecmd(union node **rpp, union node *redir)
625 {
626 	union node *args, **app;
627 	union node **orig_rpp = rpp;
628 	union node *n = NULL;
629 	int special;
630 	int savecheckkwd;
631 
632 	/* If we don't have any redirections already, then we must reset */
633 	/* rpp to be the address of the local redir variable.  */
634 	if (redir == NULL)
635 		rpp = &redir;
636 
637 	args = NULL;
638 	app = &args;
639 	/*
640 	 * We save the incoming value, because we need this for shell
641 	 * functions.  There can not be a redirect or an argument between
642 	 * the function name and the open parenthesis.
643 	 */
644 	orig_rpp = rpp;
645 
646 	savecheckkwd = CHKALIAS;
647 
648 	for (;;) {
649 		checkkwd = savecheckkwd;
650 		if (readtoken() == TWORD) {
651 			n = makename();
652 			*app = n;
653 			app = &n->narg.next;
654 			if (savecheckkwd != 0 && !isassignment(wordtext))
655 				savecheckkwd = 0;
656 		} else if (lasttoken == TREDIR) {
657 			*rpp = n = redirnode;
658 			rpp = &n->nfile.next;
659 			parsefname();	/* read name of redirection file */
660 		} else if (lasttoken == TLP && app == &args->narg.next
661 					    && rpp == orig_rpp) {
662 			/* We have a function */
663 			consumetoken(TRP);
664 			funclinno = plinno;
665 			/*
666 			 * - Require plain text.
667 			 * - Functions with '/' cannot be called.
668 			 * - Reject name=().
669 			 * - Reject ksh extended glob patterns.
670 			 */
671 			if (!noexpand(n->narg.text) || quoteflag ||
672 			    strchr(n->narg.text, '/') ||
673 			    strchr("!%*+-=?@}~",
674 				n->narg.text[strlen(n->narg.text) - 1]))
675 				synerror("Bad function name");
676 			rmescapes(n->narg.text);
677 			if (find_builtin(n->narg.text, &special) >= 0 &&
678 			    special)
679 				synerror("Cannot override a special builtin with a function");
680 			n->type = NDEFUN;
681 			n->narg.next = command();
682 			funclinno = 0;
683 			return n;
684 		} else {
685 			tokpushback++;
686 			break;
687 		}
688 	}
689 	*app = NULL;
690 	*rpp = NULL;
691 	n = (union node *)stalloc(sizeof (struct ncmd));
692 	n->type = NCMD;
693 	n->ncmd.args = args;
694 	n->ncmd.redirect = redir;
695 	return n;
696 }
697 
698 static union node *
699 makename(void)
700 {
701 	union node *n;
702 
703 	n = (union node *)stalloc(sizeof (struct narg));
704 	n->type = NARG;
705 	n->narg.next = NULL;
706 	n->narg.text = wordtext;
707 	n->narg.backquote = backquotelist;
708 	return n;
709 }
710 
711 static union node *
712 makebinary(int type, union node *n1, union node *n2)
713 {
714 	union node *n;
715 
716 	n = (union node *)stalloc(sizeof (struct nbinary));
717 	n->type = type;
718 	n->nbinary.ch1 = n1;
719 	n->nbinary.ch2 = n2;
720 	return (n);
721 }
722 
723 void
724 forcealias(void)
725 {
726 	checkkwd |= CHKALIAS;
727 }
728 
729 void
730 fixredir(union node *n, const char *text, int err)
731 {
732 	TRACE(("Fix redir %s %d\n", text, err));
733 	if (!err)
734 		n->ndup.vname = NULL;
735 
736 	if (is_digit(text[0]) && text[1] == '\0')
737 		n->ndup.dupfd = digit_val(text[0]);
738 	else if (text[0] == '-' && text[1] == '\0')
739 		n->ndup.dupfd = -1;
740 	else {
741 
742 		if (err)
743 			synerror("Bad fd number");
744 		else
745 			n->ndup.vname = makename();
746 	}
747 }
748 
749 
750 static void
751 parsefname(void)
752 {
753 	union node *n = redirnode;
754 
755 	consumetoken(TWORD);
756 	if (n->type == NHERE) {
757 		struct heredoc *here = heredoc;
758 		struct heredoc *p;
759 
760 		if (quoteflag == 0)
761 			n->type = NXHERE;
762 		TRACE(("Here document %d\n", n->type));
763 		if (here->striptabs) {
764 			while (*wordtext == '\t')
765 				wordtext++;
766 		}
767 		if (! noexpand(wordtext))
768 			synerror("Illegal eof marker for << redirection");
769 		rmescapes(wordtext);
770 		here->eofmark = wordtext;
771 		here->next = NULL;
772 		if (heredoclist == NULL)
773 			heredoclist = here;
774 		else {
775 			for (p = heredoclist ; p->next ; p = p->next);
776 			p->next = here;
777 		}
778 	} else if (n->type == NTOFD || n->type == NFROMFD) {
779 		fixredir(n, wordtext, 0);
780 	} else {
781 		n->nfile.fname = makename();
782 	}
783 }
784 
785 
786 /*
787  * Input any here documents.
788  */
789 
790 static void
791 parseheredoc(void)
792 {
793 	struct heredoc *here;
794 	union node *n;
795 
796 	while (heredoclist) {
797 		here = heredoclist;
798 		heredoclist = here->next;
799 		if (needprompt) {
800 			setprompt(2);
801 			needprompt = 0;
802 		}
803 		readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
804 				here->eofmark, here->striptabs);
805 		n = makename();
806 		here->here->nhere.doc = n;
807 	}
808 }
809 
810 static int
811 peektoken(void)
812 {
813 	int t;
814 
815 	t = readtoken();
816 	tokpushback++;
817 	return (t);
818 }
819 
820 static int
821 readtoken(void)
822 {
823 	int t;
824 	struct alias *ap;
825 #ifdef DEBUG
826 	int alreadyseen = tokpushback;
827 #endif
828 
829 	top:
830 	t = xxreadtoken();
831 
832 	/*
833 	 * eat newlines
834 	 */
835 	if (checkkwd & CHKNL) {
836 		while (t == TNL) {
837 			parseheredoc();
838 			t = xxreadtoken();
839 		}
840 	}
841 
842 	/*
843 	 * check for keywords and aliases
844 	 */
845 	if (t == TWORD && !quoteflag)
846 	{
847 		const char * const *pp;
848 
849 		if (checkkwd & CHKKWD)
850 			for (pp = parsekwd; *pp; pp++) {
851 				if (**pp == *wordtext && equal(*pp, wordtext))
852 				{
853 					lasttoken = t = pp - parsekwd + KWDOFFSET;
854 					TRACE(("keyword %s recognized\n", tokname[t]));
855 					goto out;
856 				}
857 			}
858 		if (checkkwd & CHKALIAS &&
859 		    (ap = lookupalias(wordtext, 1)) != NULL) {
860 			pushstring(ap->val, strlen(ap->val), ap);
861 			goto top;
862 		}
863 	}
864 out:
865 	if (t != TNOT)
866 		checkkwd = 0;
867 
868 #ifdef DEBUG
869 	if (!alreadyseen)
870 	    TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
871 	else
872 	    TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
873 #endif
874 	return (t);
875 }
876 
877 
878 /*
879  * Read the next input token.
880  * If the token is a word, we set backquotelist to the list of cmds in
881  *	backquotes.  We set quoteflag to true if any part of the word was
882  *	quoted.
883  * If the token is TREDIR, then we set redirnode to a structure containing
884  *	the redirection.
885  * In all cases, the variable startlinno is set to the number of the line
886  *	on which the token starts.
887  *
888  * [Change comment:  here documents and internal procedures]
889  * [Readtoken shouldn't have any arguments.  Perhaps we should make the
890  *  word parsing code into a separate routine.  In this case, readtoken
891  *  doesn't need to have any internal procedures, but parseword does.
892  *  We could also make parseoperator in essence the main routine, and
893  *  have parseword (readtoken1?) handle both words and redirection.]
894  */
895 
896 #define RETURN(token)	return lasttoken = token
897 
898 static int
899 xxreadtoken(void)
900 {
901 	int c;
902 
903 	if (tokpushback) {
904 		tokpushback = 0;
905 		return lasttoken;
906 	}
907 	if (needprompt) {
908 		setprompt(2);
909 		needprompt = 0;
910 	}
911 	startlinno = plinno;
912 	for (;;) {	/* until token or start of word found */
913 		c = pgetc_macro();
914 		switch (c) {
915 		case ' ': case '\t':
916 			continue;
917 		case '#':
918 			while ((c = pgetc()) != '\n' && c != PEOF);
919 			pungetc();
920 			continue;
921 		case '\\':
922 			if (pgetc() == '\n') {
923 				startlinno = ++plinno;
924 				if (doprompt)
925 					setprompt(2);
926 				else
927 					setprompt(0);
928 				continue;
929 			}
930 			pungetc();
931 			/* FALLTHROUGH */
932 		default:
933 			return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
934 		case '\n':
935 			plinno++;
936 			needprompt = doprompt;
937 			RETURN(TNL);
938 		case PEOF:
939 			RETURN(TEOF);
940 		case '&':
941 			if (pgetc_linecont() == '&')
942 				RETURN(TAND);
943 			pungetc();
944 			RETURN(TBACKGND);
945 		case '|':
946 			if (pgetc_linecont() == '|')
947 				RETURN(TOR);
948 			pungetc();
949 			RETURN(TPIPE);
950 		case ';':
951 			c = pgetc_linecont();
952 			if (c == ';')
953 				RETURN(TENDCASE);
954 			else if (c == '&')
955 				RETURN(TFALLTHRU);
956 			pungetc();
957 			RETURN(TSEMI);
958 		case '(':
959 			RETURN(TLP);
960 		case ')':
961 			RETURN(TRP);
962 		}
963 	}
964 #undef RETURN
965 }
966 
967 
968 #define MAXNEST_static 8
969 struct tokenstate
970 {
971 	const char *syntax; /* *SYNTAX */
972 	int parenlevel; /* levels of parentheses in arithmetic */
973 	enum tokenstate_category
974 	{
975 		TSTATE_TOP,
976 		TSTATE_VAR_OLD, /* ${var+-=?}, inherits dquotes */
977 		TSTATE_VAR_NEW, /* other ${var...}, own dquote state */
978 		TSTATE_ARITH
979 	} category;
980 };
981 
982 
983 /*
984  * Check to see whether we are at the end of the here document.  When this
985  * is called, c is set to the first character of the next input line.  If
986  * we are at the end of the here document, this routine sets the c to PEOF.
987  * The new value of c is returned.
988  */
989 
990 static int
991 checkend(int c, const char *eofmark, int striptabs)
992 {
993 	if (striptabs) {
994 		while (c == '\t')
995 			c = pgetc();
996 	}
997 	if (c == *eofmark) {
998 		int c2;
999 		const char *q;
1000 
1001 		for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
1002 			;
1003 		if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
1004 			c = PEOF;
1005 			if (c2 == '\n') {
1006 				plinno++;
1007 				needprompt = doprompt;
1008 			}
1009 		} else {
1010 			pungetc();
1011 			pushstring(eofmark + 1, q - (eofmark + 1), NULL);
1012 		}
1013 	} else if (c == '\n' && *eofmark == '\0') {
1014 		c = PEOF;
1015 		plinno++;
1016 		needprompt = doprompt;
1017 	}
1018 	return (c);
1019 }
1020 
1021 
1022 /*
1023  * Parse a redirection operator.  The variable "out" points to a string
1024  * specifying the fd to be redirected.  The variable "c" contains the
1025  * first character of the redirection operator.
1026  */
1027 
1028 static void
1029 parseredir(char *out, int c)
1030 {
1031 	char fd = *out;
1032 	union node *np;
1033 
1034 	np = (union node *)stalloc(sizeof (struct nfile));
1035 	if (c == '>') {
1036 		np->nfile.fd = 1;
1037 		c = pgetc_linecont();
1038 		if (c == '>')
1039 			np->type = NAPPEND;
1040 		else if (c == '&')
1041 			np->type = NTOFD;
1042 		else if (c == '|')
1043 			np->type = NCLOBBER;
1044 		else {
1045 			np->type = NTO;
1046 			pungetc();
1047 		}
1048 	} else {	/* c == '<' */
1049 		np->nfile.fd = 0;
1050 		c = pgetc_linecont();
1051 		if (c == '<') {
1052 			if (sizeof (struct nfile) != sizeof (struct nhere)) {
1053 				np = (union node *)stalloc(sizeof (struct nhere));
1054 				np->nfile.fd = 0;
1055 			}
1056 			np->type = NHERE;
1057 			heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1058 			heredoc->here = np;
1059 			if ((c = pgetc_linecont()) == '-') {
1060 				heredoc->striptabs = 1;
1061 			} else {
1062 				heredoc->striptabs = 0;
1063 				pungetc();
1064 			}
1065 		} else if (c == '&')
1066 			np->type = NFROMFD;
1067 		else if (c == '>')
1068 			np->type = NFROMTO;
1069 		else {
1070 			np->type = NFROM;
1071 			pungetc();
1072 		}
1073 	}
1074 	if (fd != '\0')
1075 		np->nfile.fd = digit_val(fd);
1076 	redirnode = np;
1077 }
1078 
1079 /*
1080  * Called to parse command substitutions.
1081  */
1082 
1083 static char *
1084 parsebackq(char *out, struct nodelist **pbqlist,
1085 		int oldstyle, int dblquote, int quoted)
1086 {
1087 	struct nodelist **nlpp;
1088 	union node *n;
1089 	char *volatile str;
1090 	struct jmploc jmploc;
1091 	struct jmploc *const savehandler = handler;
1092 	size_t savelen;
1093 	int saveprompt;
1094 	const int bq_startlinno = plinno;
1095 	char *volatile ostr = NULL;
1096 	struct parsefile *const savetopfile = getcurrentfile();
1097 	struct heredoc *const saveheredoclist = heredoclist;
1098 	struct heredoc *here;
1099 
1100 	str = NULL;
1101 	if (setjmp(jmploc.loc)) {
1102 		popfilesupto(savetopfile);
1103 		if (str)
1104 			ckfree(str);
1105 		if (ostr)
1106 			ckfree(ostr);
1107 		heredoclist = saveheredoclist;
1108 		handler = savehandler;
1109 		if (exception == EXERROR) {
1110 			startlinno = bq_startlinno;
1111 			synerror("Error in command substitution");
1112 		}
1113 		longjmp(handler->loc, 1);
1114 	}
1115 	INTOFF;
1116 	savelen = out - stackblock();
1117 	if (savelen > 0) {
1118 		str = ckmalloc(savelen);
1119 		memcpy(str, stackblock(), savelen);
1120 	}
1121 	handler = &jmploc;
1122 	heredoclist = NULL;
1123 	INTON;
1124         if (oldstyle) {
1125                 /* We must read until the closing backquote, giving special
1126                    treatment to some slashes, and then push the string and
1127                    reread it as input, interpreting it normally.  */
1128                 char *oout;
1129                 int c;
1130                 int olen;
1131 
1132 
1133                 STARTSTACKSTR(oout);
1134 		for (;;) {
1135 			if (needprompt) {
1136 				setprompt(2);
1137 				needprompt = 0;
1138 			}
1139 			CHECKSTRSPACE(2, oout);
1140 			c = pgetc_linecont();
1141 			if (c == '`')
1142 				break;
1143 			switch (c) {
1144 			case '\\':
1145 				c = pgetc();
1146                                 if (c != '\\' && c != '`' && c != '$'
1147                                     && (!dblquote || c != '"'))
1148                                         USTPUTC('\\', oout);
1149 				break;
1150 
1151 			case '\n':
1152 				plinno++;
1153 				needprompt = doprompt;
1154 				break;
1155 
1156 			case PEOF:
1157 			        startlinno = plinno;
1158 				synerror("EOF in backquote substitution");
1159  				break;
1160 
1161 			default:
1162 				break;
1163 			}
1164 			USTPUTC(c, oout);
1165                 }
1166                 USTPUTC('\0', oout);
1167                 olen = oout - stackblock();
1168 		INTOFF;
1169 		ostr = ckmalloc(olen);
1170 		memcpy(ostr, stackblock(), olen);
1171 		setinputstring(ostr, 1);
1172 		INTON;
1173         }
1174 	nlpp = pbqlist;
1175 	while (*nlpp)
1176 		nlpp = &(*nlpp)->next;
1177 	*nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1178 	(*nlpp)->next = NULL;
1179 
1180 	if (oldstyle) {
1181 		saveprompt = doprompt;
1182 		doprompt = 0;
1183 	}
1184 
1185 	n = list(0);
1186 
1187 	if (oldstyle) {
1188 		if (peektoken() != TEOF)
1189 			synexpect(-1);
1190 		doprompt = saveprompt;
1191 	} else
1192 		consumetoken(TRP);
1193 
1194 	(*nlpp)->n = n;
1195         if (oldstyle) {
1196 		/*
1197 		 * Start reading from old file again, ignoring any pushed back
1198 		 * tokens left from the backquote parsing
1199 		 */
1200                 popfile();
1201 		tokpushback = 0;
1202 	}
1203 	STARTSTACKSTR(out);
1204 	CHECKSTRSPACE(savelen + 1, out);
1205 	INTOFF;
1206 	if (str) {
1207 		memcpy(out, str, savelen);
1208 		STADJUST(savelen, out);
1209 		ckfree(str);
1210 		str = NULL;
1211 	}
1212 	if (ostr) {
1213 		ckfree(ostr);
1214 		ostr = NULL;
1215 	}
1216 	here = saveheredoclist;
1217 	if (here != NULL) {
1218 		while (here->next != NULL)
1219 			here = here->next;
1220 		here->next = heredoclist;
1221 		heredoclist = saveheredoclist;
1222 	}
1223 	handler = savehandler;
1224 	INTON;
1225 	if (quoted)
1226 		USTPUTC(CTLBACKQ | CTLQUOTE, out);
1227 	else
1228 		USTPUTC(CTLBACKQ, out);
1229 	return out;
1230 }
1231 
1232 
1233 /*
1234  * Called to parse a backslash escape sequence inside $'...'.
1235  * The backslash has already been read.
1236  */
1237 static char *
1238 readcstyleesc(char *out)
1239 {
1240 	int c, vc, i, n;
1241 	unsigned int v;
1242 
1243 	c = pgetc();
1244 	switch (c) {
1245 	case '\0':
1246 		synerror("Unterminated quoted string");
1247 	case '\n':
1248 		plinno++;
1249 		if (doprompt)
1250 			setprompt(2);
1251 		else
1252 			setprompt(0);
1253 		return out;
1254 	case '\\':
1255 	case '\'':
1256 	case '"':
1257 		v = c;
1258 		break;
1259 	case 'a': v = '\a'; break;
1260 	case 'b': v = '\b'; break;
1261 	case 'e': v = '\033'; break;
1262 	case 'f': v = '\f'; break;
1263 	case 'n': v = '\n'; break;
1264 	case 'r': v = '\r'; break;
1265 	case 't': v = '\t'; break;
1266 	case 'v': v = '\v'; break;
1267 	case 'x':
1268 		  v = 0;
1269 		  for (;;) {
1270 			  c = pgetc();
1271 			  if (c >= '0' && c <= '9')
1272 				  v = (v << 4) + c - '0';
1273 			  else if (c >= 'A' && c <= 'F')
1274 				  v = (v << 4) + c - 'A' + 10;
1275 			  else if (c >= 'a' && c <= 'f')
1276 				  v = (v << 4) + c - 'a' + 10;
1277 			  else
1278 				  break;
1279 		  }
1280 		  pungetc();
1281 		  break;
1282 	case '0': case '1': case '2': case '3':
1283 	case '4': case '5': case '6': case '7':
1284 		  v = c - '0';
1285 		  c = pgetc();
1286 		  if (c >= '0' && c <= '7') {
1287 			  v <<= 3;
1288 			  v += c - '0';
1289 			  c = pgetc();
1290 			  if (c >= '0' && c <= '7') {
1291 				  v <<= 3;
1292 				  v += c - '0';
1293 			  } else
1294 				  pungetc();
1295 		  } else
1296 			  pungetc();
1297 		  break;
1298 	case 'c':
1299 		  c = pgetc();
1300 		  if (c < 0x3f || c > 0x7a || c == 0x60)
1301 			  synerror("Bad escape sequence");
1302 		  if (c == '\\' && pgetc() != '\\')
1303 			  synerror("Bad escape sequence");
1304 		  if (c == '?')
1305 			  v = 127;
1306 		  else
1307 			  v = c & 0x1f;
1308 		  break;
1309 	case 'u':
1310 	case 'U':
1311 		  n = c == 'U' ? 8 : 4;
1312 		  v = 0;
1313 		  for (i = 0; i < n; i++) {
1314 			  c = pgetc();
1315 			  if (c >= '0' && c <= '9')
1316 				  v = (v << 4) + c - '0';
1317 			  else if (c >= 'A' && c <= 'F')
1318 				  v = (v << 4) + c - 'A' + 10;
1319 			  else if (c >= 'a' && c <= 'f')
1320 				  v = (v << 4) + c - 'a' + 10;
1321 			  else
1322 				  synerror("Bad escape sequence");
1323 		  }
1324 		  if (v == 0 || (v >= 0xd800 && v <= 0xdfff))
1325 			  synerror("Bad escape sequence");
1326 		  /* We really need iconv here. */
1327 		  if (initial_localeisutf8 && v > 127) {
1328 			  CHECKSTRSPACE(4, out);
1329 			  /*
1330 			   * We cannot use wctomb() as the locale may have
1331 			   * changed.
1332 			   */
1333 			  if (v <= 0x7ff) {
1334 				  USTPUTC(0xc0 | v >> 6, out);
1335 				  USTPUTC(0x80 | (v & 0x3f), out);
1336 				  return out;
1337 			  } else if (v <= 0xffff) {
1338 				  USTPUTC(0xe0 | v >> 12, out);
1339 				  USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1340 				  USTPUTC(0x80 | (v & 0x3f), out);
1341 				  return out;
1342 			  } else if (v <= 0x10ffff) {
1343 				  USTPUTC(0xf0 | v >> 18, out);
1344 				  USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1345 				  USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1346 				  USTPUTC(0x80 | (v & 0x3f), out);
1347 				  return out;
1348 			  }
1349 		  }
1350 		  if (v > 127)
1351 			  v = '?';
1352 		  break;
1353 	default:
1354 		  synerror("Bad escape sequence");
1355 	}
1356 	vc = (char)v;
1357 	/*
1358 	 * We can't handle NUL bytes.
1359 	 * POSIX says we should skip till the closing quote.
1360 	 */
1361 	if (vc == '\0') {
1362 		while ((c = pgetc()) != '\'') {
1363 			if (c == '\\')
1364 				c = pgetc();
1365 			if (c == PEOF)
1366 				synerror("Unterminated quoted string");
1367 			if (c == '\n') {
1368 				plinno++;
1369 				if (doprompt)
1370 					setprompt(2);
1371 				else
1372 					setprompt(0);
1373 			}
1374 		}
1375 		pungetc();
1376 		return out;
1377 	}
1378 	if (SQSYNTAX[vc] == CCTL)
1379 		USTPUTC(CTLESC, out);
1380 	USTPUTC(vc, out);
1381 	return out;
1382 }
1383 
1384 
1385 /*
1386  * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
1387  * is not NULL, read a here document.  In the latter case, eofmark is the
1388  * word which marks the end of the document and striptabs is true if
1389  * leading tabs should be stripped from the document.  The argument firstc
1390  * is the first character of the input token or document.
1391  *
1392  * Because C does not have internal subroutines, I have simulated them
1393  * using goto's to implement the subroutine linkage.  The following macros
1394  * will run code that appears at the end of readtoken1.
1395  */
1396 
1397 #define PARSESUB()	{goto parsesub; parsesub_return:;}
1398 #define	PARSEARITH()	{goto parsearith; parsearith_return:;}
1399 
1400 static int
1401 readtoken1(int firstc, char const *initialsyntax, const char *eofmark,
1402     int striptabs)
1403 {
1404 	int c = firstc;
1405 	char *out;
1406 	int len;
1407 	struct nodelist *bqlist;
1408 	int quotef;
1409 	int newvarnest;
1410 	int level;
1411 	int synentry;
1412 	struct tokenstate state_static[MAXNEST_static];
1413 	int maxnest = MAXNEST_static;
1414 	struct tokenstate *state = state_static;
1415 	int sqiscstyle = 0;
1416 
1417 	startlinno = plinno;
1418 	quotef = 0;
1419 	bqlist = NULL;
1420 	newvarnest = 0;
1421 	level = 0;
1422 	state[level].syntax = initialsyntax;
1423 	state[level].parenlevel = 0;
1424 	state[level].category = TSTATE_TOP;
1425 
1426 	STARTSTACKSTR(out);
1427 	loop: {	/* for each line, until end of word */
1428 		if (eofmark && eofmark != NOEOFMARK)
1429 			/* set c to PEOF if at end of here document */
1430 			c = checkend(c, eofmark, striptabs);
1431 		for (;;) {	/* until end of line or end of word */
1432 			CHECKSTRSPACE(4, out);	/* permit 4 calls to USTPUTC */
1433 
1434 			synentry = state[level].syntax[c];
1435 
1436 			switch(synentry) {
1437 			case CNL:	/* '\n' */
1438 				if (level == 0)
1439 					goto endword;	/* exit outer loop */
1440 				/* FALLTHROUGH */
1441 			case CQNL:
1442 				USTPUTC(c, out);
1443 				plinno++;
1444 				if (doprompt)
1445 					setprompt(2);
1446 				else
1447 					setprompt(0);
1448 				c = pgetc();
1449 				goto loop;		/* continue outer loop */
1450 			case CSBACK:
1451 				if (sqiscstyle) {
1452 					out = readcstyleesc(out);
1453 					break;
1454 				}
1455 				/* FALLTHROUGH */
1456 			case CWORD:
1457 				USTPUTC(c, out);
1458 				break;
1459 			case CCTL:
1460 				if (eofmark == NULL || initialsyntax != SQSYNTAX)
1461 					USTPUTC(CTLESC, out);
1462 				USTPUTC(c, out);
1463 				break;
1464 			case CBACK:	/* backslash */
1465 				c = pgetc();
1466 				if (c == PEOF) {
1467 					USTPUTC('\\', out);
1468 					pungetc();
1469 				} else if (c == '\n') {
1470 					plinno++;
1471 					if (doprompt)
1472 						setprompt(2);
1473 					else
1474 						setprompt(0);
1475 				} else {
1476 					if (state[level].syntax == DQSYNTAX &&
1477 					    c != '\\' && c != '`' && c != '$' &&
1478 					    (c != '"' || (eofmark != NULL &&
1479 						newvarnest == 0)) &&
1480 					    (c != '}' || state[level].category != TSTATE_VAR_OLD))
1481 						USTPUTC('\\', out);
1482 					if ((eofmark == NULL ||
1483 					    newvarnest > 0) &&
1484 					    state[level].syntax == BASESYNTAX)
1485 						USTPUTC(CTLQUOTEMARK, out);
1486 					if (SQSYNTAX[c] == CCTL)
1487 						USTPUTC(CTLESC, out);
1488 					USTPUTC(c, out);
1489 					if ((eofmark == NULL ||
1490 					    newvarnest > 0) &&
1491 					    state[level].syntax == BASESYNTAX &&
1492 					    state[level].category == TSTATE_VAR_OLD)
1493 						USTPUTC(CTLQUOTEEND, out);
1494 					quotef++;
1495 				}
1496 				break;
1497 			case CSQUOTE:
1498 				USTPUTC(CTLQUOTEMARK, out);
1499 				state[level].syntax = SQSYNTAX;
1500 				sqiscstyle = 0;
1501 				break;
1502 			case CDQUOTE:
1503 				USTPUTC(CTLQUOTEMARK, out);
1504 				state[level].syntax = DQSYNTAX;
1505 				break;
1506 			case CENDQUOTE:
1507 				if (eofmark != NULL && newvarnest == 0)
1508 					USTPUTC(c, out);
1509 				else {
1510 					if (state[level].category == TSTATE_VAR_OLD)
1511 						USTPUTC(CTLQUOTEEND, out);
1512 					state[level].syntax = BASESYNTAX;
1513 					quotef++;
1514 				}
1515 				break;
1516 			case CVAR:	/* '$' */
1517 				PARSESUB();		/* parse substitution */
1518 				break;
1519 			case CENDVAR:	/* '}' */
1520 				if (level > 0 &&
1521 				    ((state[level].category == TSTATE_VAR_OLD &&
1522 				      state[level].syntax ==
1523 				      state[level - 1].syntax) ||
1524 				    (state[level].category == TSTATE_VAR_NEW &&
1525 				     state[level].syntax == BASESYNTAX))) {
1526 					if (state[level].category == TSTATE_VAR_NEW)
1527 						newvarnest--;
1528 					level--;
1529 					USTPUTC(CTLENDVAR, out);
1530 				} else {
1531 					USTPUTC(c, out);
1532 				}
1533 				break;
1534 			case CLP:	/* '(' in arithmetic */
1535 				state[level].parenlevel++;
1536 				USTPUTC(c, out);
1537 				break;
1538 			case CRP:	/* ')' in arithmetic */
1539 				if (state[level].parenlevel > 0) {
1540 					USTPUTC(c, out);
1541 					--state[level].parenlevel;
1542 				} else {
1543 					if (pgetc_linecont() == ')') {
1544 						if (level > 0 &&
1545 						    state[level].category == TSTATE_ARITH) {
1546 							level--;
1547 							USTPUTC(CTLENDARI, out);
1548 						} else
1549 							USTPUTC(')', out);
1550 					} else {
1551 						/*
1552 						 * unbalanced parens
1553 						 *  (don't 2nd guess - no error)
1554 						 */
1555 						pungetc();
1556 						USTPUTC(')', out);
1557 					}
1558 				}
1559 				break;
1560 			case CBQUOTE:	/* '`' */
1561 				out = parsebackq(out, &bqlist, 1,
1562 				    state[level].syntax == DQSYNTAX &&
1563 				    (eofmark == NULL || newvarnest > 0),
1564 				    state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX);
1565 				break;
1566 			case CEOF:
1567 				goto endword;		/* exit outer loop */
1568 			case CIGN:
1569 				break;
1570 			default:
1571 				if (level == 0)
1572 					goto endword;	/* exit outer loop */
1573 				USTPUTC(c, out);
1574 			}
1575 			c = pgetc_macro();
1576 		}
1577 	}
1578 endword:
1579 	if (state[level].syntax == ARISYNTAX)
1580 		synerror("Missing '))'");
1581 	if (state[level].syntax != BASESYNTAX && eofmark == NULL)
1582 		synerror("Unterminated quoted string");
1583 	if (state[level].category == TSTATE_VAR_OLD ||
1584 	    state[level].category == TSTATE_VAR_NEW) {
1585 		startlinno = plinno;
1586 		synerror("Missing '}'");
1587 	}
1588 	if (state != state_static)
1589 		parser_temp_free_upto(state);
1590 	USTPUTC('\0', out);
1591 	len = out - stackblock();
1592 	out = stackblock();
1593 	if (eofmark == NULL) {
1594 		if ((c == '>' || c == '<')
1595 		 && quotef == 0
1596 		 && len <= 2
1597 		 && (*out == '\0' || is_digit(*out))) {
1598 			parseredir(out, c);
1599 			return lasttoken = TREDIR;
1600 		} else {
1601 			pungetc();
1602 		}
1603 	}
1604 	quoteflag = quotef;
1605 	backquotelist = bqlist;
1606 	grabstackblock(len);
1607 	wordtext = out;
1608 	return lasttoken = TWORD;
1609 /* end of readtoken routine */
1610 
1611 
1612 /*
1613  * Parse a substitution.  At this point, we have read the dollar sign
1614  * and nothing else.
1615  */
1616 
1617 parsesub: {
1618 	int subtype;
1619 	int typeloc;
1620 	int flags;
1621 	char *p;
1622 	static const char types[] = "}-+?=";
1623 	int linno;
1624 	int length;
1625 	int c1;
1626 
1627 	c = pgetc_linecont();
1628 	if (c == '(') {	/* $(command) or $((arith)) */
1629 		if (pgetc_linecont() == '(') {
1630 			PARSEARITH();
1631 		} else {
1632 			pungetc();
1633 			out = parsebackq(out, &bqlist, 0,
1634 			    state[level].syntax == DQSYNTAX &&
1635 			    (eofmark == NULL || newvarnest > 0),
1636 			    state[level].syntax == DQSYNTAX ||
1637 			    state[level].syntax == ARISYNTAX);
1638 		}
1639 	} else if (c == '{' || is_name(c) || is_special(c)) {
1640 		USTPUTC(CTLVAR, out);
1641 		typeloc = out - stackblock();
1642 		USTPUTC(VSNORMAL, out);
1643 		subtype = VSNORMAL;
1644 		flags = 0;
1645 		if (c == '{') {
1646 			c = pgetc_linecont();
1647 			subtype = 0;
1648 		}
1649 varname:
1650 		if (!is_eof(c) && is_name(c)) {
1651 			length = 0;
1652 			do {
1653 				STPUTC(c, out);
1654 				c = pgetc_linecont();
1655 				length++;
1656 			} while (!is_eof(c) && is_in_name(c));
1657 			if (length == 6 &&
1658 			    strncmp(out - length, "LINENO", length) == 0) {
1659 				/* Replace the variable name with the
1660 				 * current line number. */
1661 				STADJUST(-6, out);
1662 				CHECKSTRSPACE(11, out);
1663 				linno = plinno;
1664 				if (funclinno != 0)
1665 					linno -= funclinno - 1;
1666 				length = snprintf(out, 11, "%d", linno);
1667 				if (length > 10)
1668 					length = 10;
1669 				out += length;
1670 				flags |= VSLINENO;
1671 			}
1672 		} else if (is_digit(c)) {
1673 			if (subtype != VSNORMAL) {
1674 				do {
1675 					STPUTC(c, out);
1676 					c = pgetc_linecont();
1677 				} while (is_digit(c));
1678 			} else {
1679 				USTPUTC(c, out);
1680 				c = pgetc_linecont();
1681 			}
1682 		} else if (is_special(c)) {
1683 			c1 = c;
1684 			c = pgetc_linecont();
1685 			if (subtype == 0 && c1 == '#') {
1686 				subtype = VSLENGTH;
1687 				if (strchr(types, c) == NULL && c != ':' &&
1688 				    c != '#' && c != '%')
1689 					goto varname;
1690 				c1 = c;
1691 				c = pgetc_linecont();
1692 				if (c1 != '}' && c == '}') {
1693 					pungetc();
1694 					c = c1;
1695 					goto varname;
1696 				}
1697 				pungetc();
1698 				c = c1;
1699 				c1 = '#';
1700 				subtype = 0;
1701 			}
1702 			USTPUTC(c1, out);
1703 		} else {
1704 			subtype = VSERROR;
1705 			if (c == '}')
1706 				pungetc();
1707 			else if (c == '\n' || c == PEOF)
1708 				synerror("Unexpected end of line in substitution");
1709 			else if (BASESYNTAX[c] != CCTL)
1710 				USTPUTC(c, out);
1711 		}
1712 		if (subtype == 0) {
1713 			switch (c) {
1714 			case ':':
1715 				flags |= VSNUL;
1716 				c = pgetc_linecont();
1717 				/*FALLTHROUGH*/
1718 			default:
1719 				p = strchr(types, c);
1720 				if (p == NULL) {
1721 					if (c == '\n' || c == PEOF)
1722 						synerror("Unexpected end of line in substitution");
1723 					if (flags == VSNUL)
1724 						STPUTC(':', out);
1725 					if (BASESYNTAX[c] != CCTL)
1726 						STPUTC(c, out);
1727 					subtype = VSERROR;
1728 				} else
1729 					subtype = p - types + VSNORMAL;
1730 				break;
1731 			case '%':
1732 			case '#':
1733 				{
1734 					int cc = c;
1735 					subtype = c == '#' ? VSTRIMLEFT :
1736 							     VSTRIMRIGHT;
1737 					c = pgetc_linecont();
1738 					if (c == cc)
1739 						subtype++;
1740 					else
1741 						pungetc();
1742 					break;
1743 				}
1744 			}
1745 		} else if (subtype != VSERROR) {
1746 			if (subtype == VSLENGTH && c != '}')
1747 				subtype = VSERROR;
1748 			pungetc();
1749 		}
1750 		STPUTC('=', out);
1751 		if (state[level].syntax == DQSYNTAX ||
1752 		    state[level].syntax == ARISYNTAX)
1753 			flags |= VSQUOTE;
1754 		*(stackblock() + typeloc) = subtype | flags;
1755 		if (subtype != VSNORMAL) {
1756 			if (level + 1 >= maxnest) {
1757 				maxnest *= 2;
1758 				if (state == state_static) {
1759 					state = parser_temp_alloc(
1760 					    maxnest * sizeof(*state));
1761 					memcpy(state, state_static,
1762 					    MAXNEST_static * sizeof(*state));
1763 				} else
1764 					state = parser_temp_realloc(state,
1765 					    maxnest * sizeof(*state));
1766 			}
1767 			level++;
1768 			state[level].parenlevel = 0;
1769 			if (subtype == VSMINUS || subtype == VSPLUS ||
1770 			    subtype == VSQUESTION || subtype == VSASSIGN) {
1771 				/*
1772 				 * For operators that were in the Bourne shell,
1773 				 * inherit the double-quote state.
1774 				 */
1775 				state[level].syntax = state[level - 1].syntax;
1776 				state[level].category = TSTATE_VAR_OLD;
1777 			} else {
1778 				/*
1779 				 * The other operators take a pattern,
1780 				 * so go to BASESYNTAX.
1781 				 * Also, ' and " are now special, even
1782 				 * in here documents.
1783 				 */
1784 				state[level].syntax = BASESYNTAX;
1785 				state[level].category = TSTATE_VAR_NEW;
1786 				newvarnest++;
1787 			}
1788 		}
1789 	} else if (c == '\'' && state[level].syntax == BASESYNTAX) {
1790 		/* $'cstylequotes' */
1791 		USTPUTC(CTLQUOTEMARK, out);
1792 		state[level].syntax = SQSYNTAX;
1793 		sqiscstyle = 1;
1794 	} else {
1795 		USTPUTC('$', out);
1796 		pungetc();
1797 	}
1798 	goto parsesub_return;
1799 }
1800 
1801 
1802 /*
1803  * Parse an arithmetic expansion (indicate start of one and set state)
1804  */
1805 parsearith: {
1806 
1807 	if (level + 1 >= maxnest) {
1808 		maxnest *= 2;
1809 		if (state == state_static) {
1810 			state = parser_temp_alloc(
1811 			    maxnest * sizeof(*state));
1812 			memcpy(state, state_static,
1813 			    MAXNEST_static * sizeof(*state));
1814 		} else
1815 			state = parser_temp_realloc(state,
1816 			    maxnest * sizeof(*state));
1817 	}
1818 	level++;
1819 	state[level].syntax = ARISYNTAX;
1820 	state[level].parenlevel = 0;
1821 	state[level].category = TSTATE_ARITH;
1822 	USTPUTC(CTLARI, out);
1823 	if (state[level - 1].syntax == DQSYNTAX)
1824 		USTPUTC('"',out);
1825 	else
1826 		USTPUTC(' ',out);
1827 	goto parsearith_return;
1828 }
1829 
1830 } /* end of readtoken */
1831 
1832 
1833 /*
1834  * Returns true if the text contains nothing to expand (no dollar signs
1835  * or backquotes).
1836  */
1837 
1838 static int
1839 noexpand(char *text)
1840 {
1841 	char *p;
1842 	char c;
1843 
1844 	p = text;
1845 	while ((c = *p++) != '\0') {
1846 		if ( c == CTLQUOTEMARK)
1847 			continue;
1848 		if (c == CTLESC)
1849 			p++;
1850 		else if (BASESYNTAX[(int)c] == CCTL)
1851 			return 0;
1852 	}
1853 	return 1;
1854 }
1855 
1856 
1857 /*
1858  * Return true if the argument is a legal variable name (a letter or
1859  * underscore followed by zero or more letters, underscores, and digits).
1860  */
1861 
1862 int
1863 goodname(const char *name)
1864 {
1865 	const char *p;
1866 
1867 	p = name;
1868 	if (! is_name(*p))
1869 		return 0;
1870 	while (*++p) {
1871 		if (! is_in_name(*p))
1872 			return 0;
1873 	}
1874 	return 1;
1875 }
1876 
1877 
1878 int
1879 isassignment(const char *p)
1880 {
1881 	if (!is_name(*p))
1882 		return 0;
1883 	p++;
1884 	for (;;) {
1885 		if (*p == '=')
1886 			return 1;
1887 		else if (!is_in_name(*p))
1888 			return 0;
1889 		p++;
1890 	}
1891 }
1892 
1893 
1894 static void
1895 consumetoken(int token)
1896 {
1897 	if (readtoken() != token)
1898 		synexpect(token);
1899 }
1900 
1901 
1902 /*
1903  * Called when an unexpected token is read during the parse.  The argument
1904  * is the token that is expected, or -1 if more than one type of token can
1905  * occur at this point.
1906  */
1907 
1908 static void
1909 synexpect(int token)
1910 {
1911 	char msg[64];
1912 
1913 	if (token >= 0) {
1914 		fmtstr(msg, 64, "%s unexpected (expecting %s)",
1915 			tokname[lasttoken], tokname[token]);
1916 	} else {
1917 		fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1918 	}
1919 	synerror(msg);
1920 }
1921 
1922 
1923 static void
1924 synerror(const char *msg)
1925 {
1926 	if (commandname)
1927 		outfmt(out2, "%s: %d: ", commandname, startlinno);
1928 	else if (arg0)
1929 		outfmt(out2, "%s: ", arg0);
1930 	outfmt(out2, "Syntax error: %s\n", msg);
1931 	error((char *)NULL);
1932 }
1933 
1934 static void
1935 setprompt(int which)
1936 {
1937 	whichprompt = which;
1938 	if (which == 0)
1939 		return;
1940 
1941 #ifndef NO_HISTORY
1942 	if (!el)
1943 #endif
1944 	{
1945 		out2str(getprompt(NULL));
1946 		flushout(out2);
1947 	}
1948 }
1949 
1950 static int
1951 pgetc_linecont(void)
1952 {
1953 	int c;
1954 
1955 	while ((c = pgetc_macro()) == '\\') {
1956 		c = pgetc();
1957 		if (c == '\n') {
1958 			plinno++;
1959 			if (doprompt)
1960 				setprompt(2);
1961 			else
1962 				setprompt(0);
1963 		} else {
1964 			pungetc();
1965 			/* Allow the backslash to be pushed back. */
1966 			pushstring("\\", 1, NULL);
1967 			return (pgetc());
1968 		}
1969 	}
1970 	return (c);
1971 }
1972 
1973 
1974 static struct passwd *
1975 getpwlogin(void)
1976 {
1977 	const char *login;
1978 
1979 	login = getlogin();
1980 	if (login == NULL)
1981 		return (NULL);
1982 
1983 	return (getpwnam(login));
1984 }
1985 
1986 
1987 static void
1988 getusername(char *name, size_t namelen)
1989 {
1990 	static char cached_name[MAXLOGNAME];
1991 	struct passwd *pw;
1992 	uid_t euid;
1993 
1994 	if (cached_name[0] == '\0') {
1995 		euid = geteuid();
1996 
1997 		/*
1998 		 * Handle the case when there is more than one
1999 		 * login with the same UID, or when the login
2000 		 * returned by getlogin(2) does no longer match
2001 		 * the current UID.
2002 		 */
2003 		pw = getpwlogin();
2004 		if (pw == NULL || pw->pw_uid != euid)
2005 			pw = getpwuid(euid);
2006 
2007 		if (pw != NULL) {
2008 			strlcpy(cached_name, pw->pw_name,
2009 			    sizeof(cached_name));
2010 		} else {
2011 			snprintf(cached_name, sizeof(cached_name),
2012 			    "%u", euid);
2013 		}
2014 	}
2015 
2016 	strlcpy(name, cached_name, namelen);
2017 }
2018 
2019 
2020 /*
2021  * called by editline -- any expansions to the prompt
2022  *    should be added here.
2023  */
2024 char *
2025 getprompt(void *unused __unused)
2026 {
2027 	static char ps[PROMPTLEN];
2028 	const char *fmt;
2029 	const char *home;
2030 	const char *pwd;
2031 	size_t homelen;
2032 	int i, trim;
2033 	static char internal_error[] = "??";
2034 
2035 	/*
2036 	 * Select prompt format.
2037 	 */
2038 	switch (whichprompt) {
2039 	case 0:
2040 		fmt = "";
2041 		break;
2042 	case 1:
2043 		fmt = ps1val();
2044 		break;
2045 	case 2:
2046 		fmt = ps2val();
2047 		break;
2048 	default:
2049 		return internal_error;
2050 	}
2051 
2052 	/*
2053 	 * Format prompt string.
2054 	 */
2055 	for (i = 0; (i < PROMPTLEN - 1) && (*fmt != '\0'); i++, fmt++) {
2056 		if (*fmt != '\\') {
2057 			ps[i] = *fmt;
2058 			continue;
2059 		}
2060 
2061 		switch (*++fmt) {
2062 
2063 		/*
2064 		 * Non-printing sequence begin and end.
2065 		 */
2066 		case '[':
2067 		case ']':
2068 			ps[i] = '\001';
2069 			break;
2070 
2071 		/*
2072 		 * Literal \ and some ASCII characters:
2073 		 * \a	BEL
2074 		 * \e	ESC
2075 		 * \r	CR
2076 		 */
2077 		case '\\':
2078 		case 'a':
2079 		case 'e':
2080 		case 'r':
2081 			if (*fmt == 'a')
2082 				ps[i] = '\007';
2083 			else if (*fmt == 'e')
2084 				ps[i] = '\033';
2085 			else if (*fmt == 'r')
2086 				ps[i] = '\r';
2087 			else
2088 				ps[i] = '\\';
2089 			break;
2090 
2091 		/*
2092 		 * CRLF sequence
2093 		 */
2094 		case 'n':
2095 			if (i < PROMPTLEN - 3) {
2096 				ps[i++] = '\r';
2097 				ps[i] = '\n';
2098 			}
2099 			break;
2100 
2101 		/*
2102 		 * Print the current time as per provided strftime format.
2103 		 */
2104 		case 'D': {
2105 			char tfmt[128] = "%X"; /* \D{} means %X. */
2106 			struct tm *now;
2107 
2108 			if (fmt[1] != '{') {
2109 				/*
2110 				 * "\D" but not "\D{", so treat the '\'
2111 				 * literally and rewind fmt to treat 'D'
2112 				 * literally next iteration.
2113 				 */
2114 				ps[i] = '\\';
2115 				fmt--;
2116 				break;
2117 			}
2118 			fmt += 2; /* Consume "D{". */
2119 			if (fmt[0] != '}') {
2120 				char *end;
2121 
2122 				end = memccpy(tfmt, fmt, '}', sizeof(tfmt));
2123 				if (end == NULL) {
2124 					/*
2125 					 * Format too long or no '}', so
2126 					 * ignore "\D{" altogether.
2127 					 * The loop will do i++, but nothing
2128 					 * was written to ps, so do i-- here.
2129 					 * Rewind fmt for similar reason.
2130 					 */
2131 					i--;
2132 					fmt--;
2133 					break;
2134 				}
2135 				*--end = '\0'; /* Ignore the copy of '}'. */
2136 				fmt += end - tfmt;
2137 			}
2138 			now = localtime(&(time_t){time(NULL)});
2139 			i += strftime(&ps[i], PROMPTLEN - i - 1, tfmt, now);
2140 			i--; /* The loop will do i++. */
2141 			break;
2142 		}
2143 
2144 		/*
2145 		 * Hostname.
2146 		 *
2147 		 * \h specifies just the local hostname,
2148 		 * \H specifies fully-qualified hostname.
2149 		 */
2150 		case 'h':
2151 		case 'H':
2152 			ps[i] = '\0';
2153 			gethostname(&ps[i], PROMPTLEN - i - 1);
2154 			ps[PROMPTLEN - 1] = '\0';
2155 			/* Skip to end of hostname. */
2156 			trim = (*fmt == 'h') ? '.' : '\0';
2157 			while ((ps[i] != '\0') && (ps[i] != trim))
2158 				i++;
2159 			--i;
2160 			break;
2161 
2162 		/*
2163 		 * User name.
2164 		 */
2165 		case 'u':
2166 			ps[i] = '\0';
2167 			getusername(&ps[i], PROMPTLEN - i);
2168 			/* Skip to end of username. */
2169 			while (ps[i + 1] != '\0')
2170 				i++;
2171 			break;
2172 
2173 		/*
2174 		 * Working directory.
2175 		 *
2176 		 * \W specifies just the final component,
2177 		 * \w specifies the entire path.
2178 		 */
2179 		case 'W':
2180 		case 'w':
2181 			pwd = lookupvar("PWD");
2182 			if (pwd == NULL || *pwd == '\0')
2183 				pwd = "?";
2184 			if (*fmt == 'W' &&
2185 			    *pwd == '/' && pwd[1] != '\0')
2186 				strlcpy(&ps[i], strrchr(pwd, '/') + 1,
2187 				    PROMPTLEN - i);
2188 			else {
2189 				home = lookupvar("HOME");
2190 				if (home != NULL)
2191 					homelen = strlen(home);
2192 				if (home != NULL &&
2193 				    strcmp(home, "/") != 0 &&
2194 				    strncmp(pwd, home, homelen) == 0 &&
2195 				    (pwd[homelen] == '/' ||
2196 				    pwd[homelen] == '\0')) {
2197 					strlcpy(&ps[i], "~",
2198 					    PROMPTLEN - i);
2199 					strlcpy(&ps[i + 1],
2200 					    pwd + homelen,
2201 					    PROMPTLEN - i - 1);
2202 				} else {
2203 					strlcpy(&ps[i], pwd, PROMPTLEN - i);
2204 				}
2205 			}
2206 			/* Skip to end of path. */
2207 			while (ps[i + 1] != '\0')
2208 				i++;
2209 			break;
2210 
2211 		/*
2212 		 * Superuser status.
2213 		 *
2214 		 * '$' for normal users, '#' for root.
2215 		 */
2216 		case '$':
2217 			ps[i] = (geteuid() != 0) ? '$' : '#';
2218 			break;
2219 
2220 		/*
2221 		 * Emit unrecognized formats verbatim.
2222 		 */
2223 		default:
2224 			ps[i] = '\\';
2225 			if (i < PROMPTLEN - 2)
2226 				ps[++i] = *fmt;
2227 			break;
2228 		}
2229 
2230 	}
2231 	ps[i] = '\0';
2232 	return (ps);
2233 }
2234 
2235 
2236 const char *
2237 expandstr(const char *ps)
2238 {
2239 	union node n;
2240 	struct jmploc jmploc;
2241 	struct jmploc *const savehandler = handler;
2242 	const int saveprompt = doprompt;
2243 	struct parsefile *const savetopfile = getcurrentfile();
2244 	struct parser_temp *const saveparser_temp = parser_temp;
2245 	const char *result = NULL;
2246 
2247 	if (!setjmp(jmploc.loc)) {
2248 		handler = &jmploc;
2249 		parser_temp = NULL;
2250 		setinputstring(ps, 1);
2251 		doprompt = 0;
2252 		readtoken1(pgetc(), DQSYNTAX, NOEOFMARK, 0);
2253 		if (backquotelist != NULL)
2254 			error("Command substitution not allowed here");
2255 
2256 		n.narg.type = NARG;
2257 		n.narg.next = NULL;
2258 		n.narg.text = wordtext;
2259 		n.narg.backquote = backquotelist;
2260 
2261 		expandarg(&n, NULL, 0);
2262 		result = stackblock();
2263 		INTOFF;
2264 	}
2265 	handler = savehandler;
2266 	doprompt = saveprompt;
2267 	popfilesupto(savetopfile);
2268 	if (parser_temp != saveparser_temp) {
2269 		parser_temp_free_all();
2270 		parser_temp = saveparser_temp;
2271 	}
2272 	if (result != NULL) {
2273 		INTON;
2274 	} else if (exception == EXINT)
2275 		raise(SIGINT);
2276 	return result;
2277 }
2278