xref: /freebsd/bin/sh/parser.c (revision 11afcc8f9f96d657b8e6f7547c02c1957331fc96)
1 /*-
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Kenneth Almquist.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #ifndef lint
38 #if 0
39 static char sccsid[] = "@(#)parser.c	8.7 (Berkeley) 5/16/95";
40 #endif
41 static const char rcsid[] =
42 	"$Id$";
43 #endif /* not lint */
44 
45 #include <stdlib.h>
46 
47 #include "shell.h"
48 #include "parser.h"
49 #include "nodes.h"
50 #include "expand.h"	/* defines rmescapes() */
51 #include "redir.h"	/* defines copyfd() */
52 #include "syntax.h"
53 #include "options.h"
54 #include "input.h"
55 #include "output.h"
56 #include "var.h"
57 #include "error.h"
58 #include "memalloc.h"
59 #include "mystring.h"
60 #include "alias.h"
61 #include "show.h"
62 #ifndef NO_HISTORY
63 #include "myhistedit.h"
64 #endif
65 
66 /*
67  * Shell command parser.
68  */
69 
70 #define EOFMARKLEN 79
71 
72 /* values returned by readtoken */
73 #include "token.h"
74 
75 
76 
77 struct heredoc {
78 	struct heredoc *next;	/* next here document in list */
79 	union node *here;		/* redirection node */
80 	char *eofmark;		/* string indicating end of input */
81 	int striptabs;		/* if set, strip leading tabs */
82 };
83 
84 
85 
86 struct heredoc *heredoclist;	/* list of here documents to read */
87 int parsebackquote;		/* nonzero if we are inside backquotes */
88 int doprompt;			/* if set, prompt the user */
89 int needprompt;			/* true if interactive and at start of line */
90 int lasttoken;			/* last token read */
91 MKINIT int tokpushback;		/* last token pushed back */
92 char *wordtext;			/* text of last word returned by readtoken */
93 MKINIT int checkkwd;            /* 1 == check for kwds, 2 == also eat newlines */
94 struct nodelist *backquotelist;
95 union node *redirnode;
96 struct heredoc *heredoc;
97 int quoteflag;			/* set if (part of) last token was quoted */
98 int startlinno;			/* line # where last token started */
99 
100 /* XXX When 'noaliases' is set to one, no alias expansion takes place. */
101 static int noaliases = 0;
102 
103 #define GDB_HACK 1 /* avoid local declarations which gdb can't handle */
104 #ifdef GDB_HACK
105 static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'};
106 static const char types[] = "}-+?=";
107 #endif
108 
109 
110 STATIC union node *list __P((int));
111 STATIC union node *andor __P((void));
112 STATIC union node *pipeline __P((void));
113 STATIC union node *command __P((void));
114 STATIC union node *simplecmd __P((union node **, union node *));
115 STATIC union node *makename __P((void));
116 STATIC void parsefname __P((void));
117 STATIC void parseheredoc __P((void));
118 STATIC int peektoken __P((void));
119 STATIC int readtoken __P((void));
120 STATIC int xxreadtoken __P((void));
121 STATIC int readtoken1 __P((int, char const *, char *, int));
122 STATIC int noexpand __P((char *));
123 STATIC void synexpect __P((int));
124 STATIC void synerror __P((char *));
125 STATIC void setprompt __P((int));
126 
127 
128 /*
129  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
130  * valid parse tree indicating a blank line.)
131  */
132 
133 union node *
134 parsecmd(interact)
135 	int interact;
136 {
137 	int t;
138 
139 	doprompt = interact;
140 	if (doprompt)
141 		setprompt(1);
142 	else
143 		setprompt(0);
144 	needprompt = 0;
145 	t = readtoken();
146 	if (t == TEOF)
147 		return NEOF;
148 	if (t == TNL)
149 		return NULL;
150 	tokpushback++;
151 	return list(1);
152 }
153 
154 
155 STATIC union node *
156 list(nlflag)
157 	int nlflag;
158 {
159 	union node *n1, *n2, *n3;
160 	int tok;
161 
162 	checkkwd = 2;
163 	if (nlflag == 0 && tokendlist[peektoken()])
164 		return NULL;
165 	n1 = NULL;
166 	for (;;) {
167 		n2 = andor();
168 		tok = readtoken();
169 		if (tok == TBACKGND) {
170 			if (n2->type == NCMD || n2->type == NPIPE) {
171 				n2->ncmd.backgnd = 1;
172 			} else if (n2->type == NREDIR) {
173 				n2->type = NBACKGND;
174 			} else {
175 				n3 = (union node *)stalloc(sizeof (struct nredir));
176 				n3->type = NBACKGND;
177 				n3->nredir.n = n2;
178 				n3->nredir.redirect = NULL;
179 				n2 = n3;
180 			}
181 		}
182 		if (n1 == NULL) {
183 			n1 = n2;
184 		}
185 		else {
186 			n3 = (union node *)stalloc(sizeof (struct nbinary));
187 			n3->type = NSEMI;
188 			n3->nbinary.ch1 = n1;
189 			n3->nbinary.ch2 = n2;
190 			n1 = n3;
191 		}
192 		switch (tok) {
193 		case TBACKGND:
194 		case TSEMI:
195 			tok = readtoken();
196 			/* fall through */
197 		case TNL:
198 			if (tok == TNL) {
199 				parseheredoc();
200 				if (nlflag)
201 					return n1;
202 			} else {
203 				tokpushback++;
204 			}
205 			checkkwd = 2;
206 			if (tokendlist[peektoken()])
207 				return n1;
208 			break;
209 		case TEOF:
210 			if (heredoclist)
211 				parseheredoc();
212 			else
213 				pungetc();		/* push back EOF on input */
214 			return n1;
215 		default:
216 			if (nlflag)
217 				synexpect(-1);
218 			tokpushback++;
219 			return n1;
220 		}
221 	}
222 }
223 
224 
225 
226 STATIC union node *
227 andor() {
228 	union node *n1, *n2, *n3;
229 	int t;
230 
231 	n1 = pipeline();
232 	for (;;) {
233 		if ((t = readtoken()) == TAND) {
234 			t = NAND;
235 		} else if (t == TOR) {
236 			t = NOR;
237 		} else {
238 			tokpushback++;
239 			return n1;
240 		}
241 		n2 = pipeline();
242 		n3 = (union node *)stalloc(sizeof (struct nbinary));
243 		n3->type = t;
244 		n3->nbinary.ch1 = n1;
245 		n3->nbinary.ch2 = n2;
246 		n1 = n3;
247 	}
248 }
249 
250 
251 
252 STATIC union node *
253 pipeline() {
254 	union node *n1, *pipenode, *notnode;
255 	struct nodelist *lp, *prev;
256 	int negate = 0;
257 
258 	TRACE(("pipeline: entered\n"));
259 	while (readtoken() == TNOT) {
260 		TRACE(("pipeline: TNOT recognized\n"));
261 		negate = !negate;
262 	}
263 	tokpushback++;
264 	n1 = command();
265 	if (readtoken() == TPIPE) {
266 		pipenode = (union node *)stalloc(sizeof (struct npipe));
267 		pipenode->type = NPIPE;
268 		pipenode->npipe.backgnd = 0;
269 		lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
270 		pipenode->npipe.cmdlist = lp;
271 		lp->n = n1;
272 		do {
273 			prev = lp;
274 			lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
275 			lp->n = command();
276 			prev->next = lp;
277 		} while (readtoken() == TPIPE);
278 		lp->next = NULL;
279 		n1 = pipenode;
280 	}
281 	tokpushback++;
282 	if (negate) {
283 		notnode = (union node *)stalloc(sizeof(struct nnot));
284 		notnode->type = NNOT;
285 		notnode->nnot.com = n1;
286 		n1 = notnode;
287 	}
288 	return n1;
289 }
290 
291 
292 
293 STATIC union node *
294 command() {
295 	union node *n1, *n2;
296 	union node *ap, **app;
297 	union node *cp, **cpp;
298 	union node *redir, **rpp;
299 	int t;
300 
301 	checkkwd = 2;
302 	redir = NULL;
303 	n1 = NULL;
304 	rpp = &redir;
305 
306 	/* Check for redirection which may precede command */
307 	while (readtoken() == TREDIR) {
308 		*rpp = n2 = redirnode;
309 		rpp = &n2->nfile.next;
310 		parsefname();
311 	}
312 	tokpushback++;
313 
314 	switch (readtoken()) {
315 	case TIF:
316 		n1 = (union node *)stalloc(sizeof (struct nif));
317 		n1->type = NIF;
318 		n1->nif.test = list(0);
319 		if (readtoken() != TTHEN)
320 			synexpect(TTHEN);
321 		n1->nif.ifpart = list(0);
322 		n2 = n1;
323 		while (readtoken() == TELIF) {
324 			n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
325 			n2 = n2->nif.elsepart;
326 			n2->type = NIF;
327 			n2->nif.test = list(0);
328 			if (readtoken() != TTHEN)
329 				synexpect(TTHEN);
330 			n2->nif.ifpart = list(0);
331 		}
332 		if (lasttoken == TELSE)
333 			n2->nif.elsepart = list(0);
334 		else {
335 			n2->nif.elsepart = NULL;
336 			tokpushback++;
337 		}
338 		if (readtoken() != TFI)
339 			synexpect(TFI);
340 		checkkwd = 1;
341 		break;
342 	case TWHILE:
343 	case TUNTIL: {
344 		int got;
345 		n1 = (union node *)stalloc(sizeof (struct nbinary));
346 		n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
347 		n1->nbinary.ch1 = list(0);
348 		if ((got=readtoken()) != TDO) {
349 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
350 			synexpect(TDO);
351 		}
352 		n1->nbinary.ch2 = list(0);
353 		if (readtoken() != TDONE)
354 			synexpect(TDONE);
355 		checkkwd = 1;
356 		break;
357 	}
358 	case TFOR:
359 		if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
360 			synerror("Bad for loop variable");
361 		n1 = (union node *)stalloc(sizeof (struct nfor));
362 		n1->type = NFOR;
363 		n1->nfor.var = wordtext;
364 		if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
365 			app = &ap;
366 			while (readtoken() == TWORD) {
367 				n2 = (union node *)stalloc(sizeof (struct narg));
368 				n2->type = NARG;
369 				n2->narg.text = wordtext;
370 				n2->narg.backquote = backquotelist;
371 				*app = n2;
372 				app = &n2->narg.next;
373 			}
374 			*app = NULL;
375 			n1->nfor.args = ap;
376 			if (lasttoken != TNL && lasttoken != TSEMI)
377 				synexpect(-1);
378 		} else {
379 #ifndef GDB_HACK
380 			static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE,
381 								   '@', '=', '\0'};
382 #endif
383 			n2 = (union node *)stalloc(sizeof (struct narg));
384 			n2->type = NARG;
385 			n2->narg.text = (char *)argvars;
386 			n2->narg.backquote = NULL;
387 			n2->narg.next = NULL;
388 			n1->nfor.args = n2;
389 			/*
390 			 * Newline or semicolon here is optional (but note
391 			 * that the original Bourne shell only allowed NL).
392 			 */
393 			if (lasttoken != TNL && lasttoken != TSEMI)
394 				tokpushback++;
395 		}
396 		checkkwd = 2;
397 		if ((t = readtoken()) == TDO)
398 			t = TDONE;
399 		else if (t == TBEGIN)
400 			t = TEND;
401 		else
402 			synexpect(-1);
403 		n1->nfor.body = list(0);
404 		if (readtoken() != t)
405 			synexpect(t);
406 		checkkwd = 1;
407 		break;
408 	case TCASE:
409 		n1 = (union node *)stalloc(sizeof (struct ncase));
410 		n1->type = NCASE;
411 		if (readtoken() != TWORD)
412 			synexpect(TWORD);
413 		n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
414 		n2->type = NARG;
415 		n2->narg.text = wordtext;
416 		n2->narg.backquote = backquotelist;
417 		n2->narg.next = NULL;
418 		while (readtoken() == TNL);
419 		if (lasttoken != TWORD || ! equal(wordtext, "in"))
420 			synerror("expecting \"in\"");
421 		cpp = &n1->ncase.cases;
422 		noaliases = 1;	/* turn off alias expansion */
423 		checkkwd = 2, readtoken();
424 		do {
425 			*cpp = cp = (union node *)stalloc(sizeof (struct nclist));
426 			cp->type = NCLIST;
427 			app = &cp->nclist.pattern;
428 			for (;;) {
429 				*app = ap = (union node *)stalloc(sizeof (struct narg));
430 				ap->type = NARG;
431 				ap->narg.text = wordtext;
432 				ap->narg.backquote = backquotelist;
433 				if (checkkwd = 2, readtoken() != TPIPE)
434 					break;
435 				app = &ap->narg.next;
436 				readtoken();
437 			}
438 			ap->narg.next = NULL;
439 			if (lasttoken != TRP)
440 				noaliases = 0, synexpect(TRP);
441 			cp->nclist.body = list(0);
442 
443 			checkkwd = 2;
444 			if ((t = readtoken()) != TESAC) {
445 				if (t != TENDCASE)
446 					noaliases = 0, synexpect(TENDCASE);
447 				else
448 					checkkwd = 2, readtoken();
449 			}
450 			cpp = &cp->nclist.next;
451 		} while(lasttoken != TESAC);
452 		noaliases = 0;	/* reset alias expansion */
453 		*cpp = NULL;
454 		checkkwd = 1;
455 		break;
456 	case TLP:
457 		n1 = (union node *)stalloc(sizeof (struct nredir));
458 		n1->type = NSUBSHELL;
459 		n1->nredir.n = list(0);
460 		n1->nredir.redirect = NULL;
461 		if (readtoken() != TRP)
462 			synexpect(TRP);
463 		checkkwd = 1;
464 		break;
465 	case TBEGIN:
466 		n1 = list(0);
467 		if (readtoken() != TEND)
468 			synexpect(TEND);
469 		checkkwd = 1;
470 		break;
471 	/* Handle an empty command like other simple commands.  */
472 	case TSEMI:
473 		/*
474 		 * An empty command before a ; doesn't make much sense, and
475 		 * should certainly be disallowed in the case of `if ;'.
476 		 */
477 		if (!redir)
478 			synexpect(-1);
479 	case TAND:
480 	case TOR:
481 	case TNL:
482 	case TEOF:
483 	case TWORD:
484 	case TRP:
485 		tokpushback++;
486 		return simplecmd(rpp, redir);
487 	default:
488 		synexpect(-1);
489 	}
490 
491 	/* Now check for redirection which may follow command */
492 	while (readtoken() == TREDIR) {
493 		*rpp = n2 = redirnode;
494 		rpp = &n2->nfile.next;
495 		parsefname();
496 	}
497 	tokpushback++;
498 	*rpp = NULL;
499 	if (redir) {
500 		if (n1->type != NSUBSHELL) {
501 			n2 = (union node *)stalloc(sizeof (struct nredir));
502 			n2->type = NREDIR;
503 			n2->nredir.n = n1;
504 			n1 = n2;
505 		}
506 		n1->nredir.redirect = redir;
507 	}
508 	return n1;
509 }
510 
511 
512 STATIC union node *
513 simplecmd(rpp, redir)
514 	union node **rpp, *redir;
515 	{
516 	union node *args, **app;
517 	union node **orig_rpp = rpp;
518 	union node *n = NULL;
519 
520 	/* If we don't have any redirections already, then we must reset */
521 	/* rpp to be the address of the local redir variable.  */
522 	if (redir == 0)
523 		rpp = &redir;
524 
525 	args = NULL;
526 	app = &args;
527 	/*
528 	 * We save the incoming value, because we need this for shell
529 	 * functions.  There can not be a redirect or an argument between
530 	 * the function name and the open parenthesis.
531 	 */
532 	orig_rpp = rpp;
533 
534 	for (;;) {
535 		if (readtoken() == TWORD) {
536 			n = (union node *)stalloc(sizeof (struct narg));
537 			n->type = NARG;
538 			n->narg.text = wordtext;
539 			n->narg.backquote = backquotelist;
540 			*app = n;
541 			app = &n->narg.next;
542 		} else if (lasttoken == TREDIR) {
543 			*rpp = n = redirnode;
544 			rpp = &n->nfile.next;
545 			parsefname();	/* read name of redirection file */
546 		} else if (lasttoken == TLP && app == &args->narg.next
547 					    && rpp == orig_rpp) {
548 			/* We have a function */
549 			if (readtoken() != TRP)
550 				synexpect(TRP);
551 #ifdef notdef
552 			if (! goodname(n->narg.text))
553 				synerror("Bad function name");
554 #endif
555 			n->type = NDEFUN;
556 			n->narg.next = command();
557 			return n;
558 		} else {
559 			tokpushback++;
560 			break;
561 		}
562 	}
563 	*app = NULL;
564 	*rpp = NULL;
565 	n = (union node *)stalloc(sizeof (struct ncmd));
566 	n->type = NCMD;
567 	n->ncmd.backgnd = 0;
568 	n->ncmd.args = args;
569 	n->ncmd.redirect = redir;
570 	return n;
571 }
572 
573 STATIC union node *
574 makename() {
575 	union node *n;
576 
577 	n = (union node *)stalloc(sizeof (struct narg));
578 	n->type = NARG;
579 	n->narg.next = NULL;
580 	n->narg.text = wordtext;
581 	n->narg.backquote = backquotelist;
582 	return n;
583 }
584 
585 void fixredir(n, text, err)
586 	union node *n;
587 	const char *text;
588 	int err;
589 	{
590 	TRACE(("Fix redir %s %d\n", text, err));
591 	if (!err)
592 		n->ndup.vname = NULL;
593 
594 	if (is_digit(text[0]) && text[1] == '\0')
595 		n->ndup.dupfd = digit_val(text[0]);
596 	else if (text[0] == '-' && text[1] == '\0')
597 		n->ndup.dupfd = -1;
598 	else {
599 
600 		if (err)
601 			synerror("Bad fd number");
602 		else
603 			n->ndup.vname = makename();
604 	}
605 }
606 
607 
608 STATIC void
609 parsefname() {
610 	union node *n = redirnode;
611 
612 	if (readtoken() != TWORD)
613 		synexpect(-1);
614 	if (n->type == NHERE) {
615 		struct heredoc *here = heredoc;
616 		struct heredoc *p;
617 		int i;
618 
619 		if (quoteflag == 0)
620 			n->type = NXHERE;
621 		TRACE(("Here document %d\n", n->type));
622 		if (here->striptabs) {
623 			while (*wordtext == '\t')
624 				wordtext++;
625 		}
626 		if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
627 			synerror("Illegal eof marker for << redirection");
628 		rmescapes(wordtext);
629 		here->eofmark = wordtext;
630 		here->next = NULL;
631 		if (heredoclist == NULL)
632 			heredoclist = here;
633 		else {
634 			for (p = heredoclist ; p->next ; p = p->next);
635 			p->next = here;
636 		}
637 	} else if (n->type == NTOFD || n->type == NFROMFD) {
638 		fixredir(n, wordtext, 0);
639 	} else {
640 		n->nfile.fname = makename();
641 	}
642 }
643 
644 
645 /*
646  * Input any here documents.
647  */
648 
649 STATIC void
650 parseheredoc() {
651 	struct heredoc *here;
652 	union node *n;
653 
654 	while (heredoclist) {
655 		here = heredoclist;
656 		heredoclist = here->next;
657 		if (needprompt) {
658 			setprompt(2);
659 			needprompt = 0;
660 		}
661 		readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
662 				here->eofmark, here->striptabs);
663 		n = (union node *)stalloc(sizeof (struct narg));
664 		n->narg.type = NARG;
665 		n->narg.next = NULL;
666 		n->narg.text = wordtext;
667 		n->narg.backquote = backquotelist;
668 		here->here->nhere.doc = n;
669 	}
670 }
671 
672 STATIC int
673 peektoken() {
674 	int t;
675 
676 	t = readtoken();
677 	tokpushback++;
678 	return (t);
679 }
680 
681 STATIC int xxreadtoken();
682 
683 STATIC int
684 readtoken() {
685 	int t;
686 	int savecheckkwd = checkkwd;
687 	struct alias *ap;
688 #ifdef DEBUG
689 	int alreadyseen = tokpushback;
690 #endif
691 
692 	top:
693 	t = xxreadtoken();
694 
695 	if (checkkwd) {
696 		/*
697 		 * eat newlines
698 		 */
699 		if (checkkwd == 2) {
700 			checkkwd = 0;
701 			while (t == TNL) {
702 				parseheredoc();
703 				t = xxreadtoken();
704 			}
705 		} else
706 			checkkwd = 0;
707 		/*
708 		 * check for keywords and aliases
709 		 */
710 		if (t == TWORD && !quoteflag)
711 		{
712 			char * const *pp;
713 
714 			for (pp = (char **)parsekwd; *pp; pp++) {
715 				if (**pp == *wordtext && equal(*pp, wordtext))
716 				{
717 					lasttoken = t = pp - parsekwd + KWDOFFSET;
718 					TRACE(("keyword %s recognized\n", tokname[t]));
719 					goto out;
720 				}
721 			}
722 			if (noaliases == 0 &&
723 			    (ap = lookupalias(wordtext, 1)) != NULL) {
724 				pushstring(ap->val, strlen(ap->val), ap);
725 				checkkwd = savecheckkwd;
726 				goto top;
727 			}
728 		}
729 out:
730 		checkkwd = 0;
731 	}
732 #ifdef DEBUG
733 	if (!alreadyseen)
734 	    TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
735 	else
736 	    TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
737 #endif
738 	return (t);
739 }
740 
741 
742 /*
743  * Read the next input token.
744  * If the token is a word, we set backquotelist to the list of cmds in
745  *	backquotes.  We set quoteflag to true if any part of the word was
746  *	quoted.
747  * If the token is TREDIR, then we set redirnode to a structure containing
748  *	the redirection.
749  * In all cases, the variable startlinno is set to the number of the line
750  *	on which the token starts.
751  *
752  * [Change comment:  here documents and internal procedures]
753  * [Readtoken shouldn't have any arguments.  Perhaps we should make the
754  *  word parsing code into a separate routine.  In this case, readtoken
755  *  doesn't need to have any internal procedures, but parseword does.
756  *  We could also make parseoperator in essence the main routine, and
757  *  have parseword (readtoken1?) handle both words and redirection.]
758  */
759 
760 #define RETURN(token)	return lasttoken = token
761 
762 STATIC int
763 xxreadtoken() {
764 	int c;
765 
766 	if (tokpushback) {
767 		tokpushback = 0;
768 		return lasttoken;
769 	}
770 	if (needprompt) {
771 		setprompt(2);
772 		needprompt = 0;
773 	}
774 	startlinno = plinno;
775 	for (;;) {	/* until token or start of word found */
776 		c = pgetc_macro();
777 		if (c == ' ' || c == '\t')
778 			continue;		/* quick check for white space first */
779 		switch (c) {
780 		case ' ': case '\t':
781 			continue;
782 		case '#':
783 			while ((c = pgetc()) != '\n' && c != PEOF);
784 			pungetc();
785 			continue;
786 		case '\\':
787 			if (pgetc() == '\n') {
788 				startlinno = ++plinno;
789 				if (doprompt)
790 					setprompt(2);
791 				else
792 					setprompt(0);
793 				continue;
794 			}
795 			pungetc();
796 			goto breakloop;
797 		case '\n':
798 			plinno++;
799 			needprompt = doprompt;
800 			RETURN(TNL);
801 		case PEOF:
802 			RETURN(TEOF);
803 		case '&':
804 			if (pgetc() == '&')
805 				RETURN(TAND);
806 			pungetc();
807 			RETURN(TBACKGND);
808 		case '|':
809 			if (pgetc() == '|')
810 				RETURN(TOR);
811 			pungetc();
812 			RETURN(TPIPE);
813 		case ';':
814 			if (pgetc() == ';')
815 				RETURN(TENDCASE);
816 			pungetc();
817 			RETURN(TSEMI);
818 		case '(':
819 			RETURN(TLP);
820 		case ')':
821 			RETURN(TRP);
822 		default:
823 			goto breakloop;
824 		}
825 	}
826 breakloop:
827 	return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
828 #undef RETURN
829 }
830 
831 
832 
833 /*
834  * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
835  * is not NULL, read a here document.  In the latter case, eofmark is the
836  * word which marks the end of the document and striptabs is true if
837  * leading tabs should be stripped from the document.  The argument firstc
838  * is the first character of the input token or document.
839  *
840  * Because C does not have internal subroutines, I have simulated them
841  * using goto's to implement the subroutine linkage.  The following macros
842  * will run code that appears at the end of readtoken1.
843  */
844 
845 #define CHECKEND()	{goto checkend; checkend_return:;}
846 #define PARSEREDIR()	{goto parseredir; parseredir_return:;}
847 #define PARSESUB()	{goto parsesub; parsesub_return:;}
848 #define PARSEBACKQOLD()	{oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
849 #define PARSEBACKQNEW()	{oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
850 #define	PARSEARITH()	{goto parsearith; parsearith_return:;}
851 
852 STATIC int
853 readtoken1(firstc, syntax, eofmark, striptabs)
854 	int firstc;
855 	char const *syntax;
856 	char *eofmark;
857 	int striptabs;
858 	{
859 	int c = firstc;
860 	char *out;
861 	int len;
862 	char line[EOFMARKLEN + 1];
863 	struct nodelist *bqlist;
864 	int quotef;
865 	int dblquote;
866 	int varnest;	/* levels of variables expansion */
867 	int arinest;	/* levels of arithmetic expansion */
868 	int parenlevel;	/* levels of parens in arithmetic */
869 	int oldstyle;
870 	char const *prevsyntax;	/* syntax before arithmetic */
871 #if __GNUC__
872 	/* Avoid longjmp clobbering */
873 	(void) &out;
874 	(void) &quotef;
875 	(void) &dblquote;
876 	(void) &varnest;
877 	(void) &arinest;
878 	(void) &parenlevel;
879 	(void) &oldstyle;
880 	(void) &prevsyntax;
881 	(void) &syntax;
882 #endif
883 
884 	startlinno = plinno;
885 	dblquote = 0;
886 	if (syntax == DQSYNTAX)
887 		dblquote = 1;
888 	quotef = 0;
889 	bqlist = NULL;
890 	varnest = 0;
891 	arinest = 0;
892 	parenlevel = 0;
893 
894 	STARTSTACKSTR(out);
895 	loop: {	/* for each line, until end of word */
896 #if ATTY
897 		if (c == '\034' && doprompt
898 		 && attyset() && ! equal(termval(), "emacs")) {
899 			attyline();
900 			if (syntax == BASESYNTAX)
901 				return readtoken();
902 			c = pgetc();
903 			goto loop;
904 		}
905 #endif
906 		CHECKEND();	/* set c to PEOF if at end of here document */
907 		for (;;) {	/* until end of line or end of word */
908 			CHECKSTRSPACE(3, out);	/* permit 3 calls to USTPUTC */
909 			switch(syntax[c]) {
910 			case CNL:	/* '\n' */
911 				if (syntax == BASESYNTAX)
912 					goto endword;	/* exit outer loop */
913 				USTPUTC(c, out);
914 				plinno++;
915 				if (doprompt)
916 					setprompt(2);
917 				else
918 					setprompt(0);
919 				c = pgetc();
920 				goto loop;		/* continue outer loop */
921 			case CWORD:
922 				USTPUTC(c, out);
923 				break;
924 			case CCTL:
925 				if (eofmark == NULL || dblquote)
926 					USTPUTC(CTLESC, out);
927 				USTPUTC(c, out);
928 				break;
929 			case CBACK:	/* backslash */
930 				c = pgetc();
931 				if (c == PEOF) {
932 					USTPUTC('\\', out);
933 					pungetc();
934 				} else if (c == '\n') {
935 					if (doprompt)
936 						setprompt(2);
937 					else
938 						setprompt(0);
939 				} else {
940 					if (dblquote && c != '\\' && c != '`' && c != '$'
941 							 && (c != '"' || eofmark != NULL))
942 						USTPUTC('\\', out);
943 					if (SQSYNTAX[c] == CCTL)
944 						USTPUTC(CTLESC, out);
945 					USTPUTC(c, out);
946 					quotef++;
947 				}
948 				break;
949 			case CSQUOTE:
950 				syntax = SQSYNTAX;
951 				break;
952 			case CDQUOTE:
953 				syntax = DQSYNTAX;
954 				dblquote = 1;
955 				break;
956 			case CENDQUOTE:
957 				if (eofmark) {
958 					USTPUTC(c, out);
959 				} else {
960 					if (arinest)
961 						syntax = ARISYNTAX;
962 					else
963 						syntax = BASESYNTAX;
964 					quotef++;
965 					dblquote = 0;
966 				}
967 				break;
968 			case CVAR:	/* '$' */
969 				PARSESUB();		/* parse substitution */
970 				break;
971 			case CENDVAR:	/* '}' */
972 				if (varnest > 0) {
973 					varnest--;
974 					USTPUTC(CTLENDVAR, out);
975 				} else {
976 					USTPUTC(c, out);
977 				}
978 				break;
979 			case CLP:	/* '(' in arithmetic */
980 				parenlevel++;
981 				USTPUTC(c, out);
982 				break;
983 			case CRP:	/* ')' in arithmetic */
984 				if (parenlevel > 0) {
985 					USTPUTC(c, out);
986 					--parenlevel;
987 				} else {
988 					if (pgetc() == ')') {
989 						if (--arinest == 0) {
990 							USTPUTC(CTLENDARI, out);
991 							syntax = prevsyntax;
992 						} else
993 							USTPUTC(')', out);
994 					} else {
995 						/*
996 						 * unbalanced parens
997 						 *  (don't 2nd guess - no error)
998 						 */
999 						pungetc();
1000 						USTPUTC(')', out);
1001 					}
1002 				}
1003 				break;
1004 			case CBQUOTE:	/* '`' */
1005 				PARSEBACKQOLD();
1006 				break;
1007 			case CEOF:
1008 				goto endword;		/* exit outer loop */
1009 			default:
1010 				if (varnest == 0)
1011 					goto endword;	/* exit outer loop */
1012 				USTPUTC(c, out);
1013 			}
1014 			c = pgetc_macro();
1015 		}
1016 	}
1017 endword:
1018 	if (syntax == ARISYNTAX)
1019 		synerror("Missing '))'");
1020 	if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL)
1021 		synerror("Unterminated quoted string");
1022 	if (varnest != 0) {
1023 		startlinno = plinno;
1024 		synerror("Missing '}'");
1025 	}
1026 	USTPUTC('\0', out);
1027 	len = out - stackblock();
1028 	out = stackblock();
1029 	if (eofmark == NULL) {
1030 		if ((c == '>' || c == '<')
1031 		 && quotef == 0
1032 		 && len <= 2
1033 		 && (*out == '\0' || is_digit(*out))) {
1034 			PARSEREDIR();
1035 			return lasttoken = TREDIR;
1036 		} else {
1037 			pungetc();
1038 		}
1039 	}
1040 	quoteflag = quotef;
1041 	backquotelist = bqlist;
1042 	grabstackblock(len);
1043 	wordtext = out;
1044 	return lasttoken = TWORD;
1045 /* end of readtoken routine */
1046 
1047 
1048 
1049 /*
1050  * Check to see whether we are at the end of the here document.  When this
1051  * is called, c is set to the first character of the next input line.  If
1052  * we are at the end of the here document, this routine sets the c to PEOF.
1053  */
1054 
1055 checkend: {
1056 	if (eofmark) {
1057 		if (striptabs) {
1058 			while (c == '\t')
1059 				c = pgetc();
1060 		}
1061 		if (c == *eofmark) {
1062 			if (pfgets(line, sizeof line) != NULL) {
1063 				char *p, *q;
1064 
1065 				p = line;
1066 				for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
1067 				if (*p == '\n' && *q == '\0') {
1068 					c = PEOF;
1069 					plinno++;
1070 					needprompt = doprompt;
1071 				} else {
1072 					pushstring(line, strlen(line), NULL);
1073 				}
1074 			}
1075 		}
1076 	}
1077 	goto checkend_return;
1078 }
1079 
1080 
1081 /*
1082  * Parse a redirection operator.  The variable "out" points to a string
1083  * specifying the fd to be redirected.  The variable "c" contains the
1084  * first character of the redirection operator.
1085  */
1086 
1087 parseredir: {
1088 	char fd = *out;
1089 	union node *np;
1090 
1091 	np = (union node *)stalloc(sizeof (struct nfile));
1092 	if (c == '>') {
1093 		np->nfile.fd = 1;
1094 		c = pgetc();
1095 		if (c == '>')
1096 			np->type = NAPPEND;
1097 		else if (c == '&')
1098 			np->type = NTOFD;
1099 		else {
1100 			np->type = NTO;
1101 			pungetc();
1102 		}
1103 	} else {	/* c == '<' */
1104 		np->nfile.fd = 0;
1105 		c = pgetc();
1106 		if (c == '<') {
1107 			if (sizeof (struct nfile) != sizeof (struct nhere)) {
1108 				np = (union node *)stalloc(sizeof (struct nhere));
1109 				np->nfile.fd = 0;
1110 			}
1111 			np->type = NHERE;
1112 			heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1113 			heredoc->here = np;
1114 			if ((c = pgetc()) == '-') {
1115 				heredoc->striptabs = 1;
1116 			} else {
1117 				heredoc->striptabs = 0;
1118 				pungetc();
1119 			}
1120 		} else if (c == '&')
1121 			np->type = NFROMFD;
1122 		else {
1123 			np->type = NFROM;
1124 			pungetc();
1125 		}
1126 	}
1127 	if (fd != '\0')
1128 		np->nfile.fd = digit_val(fd);
1129 	redirnode = np;
1130 	goto parseredir_return;
1131 }
1132 
1133 
1134 /*
1135  * Parse a substitution.  At this point, we have read the dollar sign
1136  * and nothing else.
1137  */
1138 
1139 parsesub: {
1140 	int subtype;
1141 	int typeloc;
1142 	int flags;
1143 	char *p;
1144 #ifndef GDB_HACK
1145 	static const char types[] = "}-+?=";
1146 #endif
1147        int bracketed_name = 0; /* used to handle ${[0-9]*} variables */
1148 
1149 	c = pgetc();
1150 	if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) {
1151 		USTPUTC('$', out);
1152 		pungetc();
1153 	} else if (c == '(') {	/* $(command) or $((arith)) */
1154 		if (pgetc() == '(') {
1155 			PARSEARITH();
1156 		} else {
1157 			pungetc();
1158 			PARSEBACKQNEW();
1159 		}
1160 	} else {
1161 		USTPUTC(CTLVAR, out);
1162 		typeloc = out - stackblock();
1163 		USTPUTC(VSNORMAL, out);
1164 		subtype = VSNORMAL;
1165 		if (c == '{') {
1166 			bracketed_name = 1;
1167 			c = pgetc();
1168 			if (c == '#') {
1169 				if ((c = pgetc()) == '}')
1170 					c = '#';
1171 				else
1172 					subtype = VSLENGTH;
1173 			}
1174 			else
1175 				subtype = 0;
1176 		}
1177 		if (is_name(c)) {
1178 			do {
1179 				STPUTC(c, out);
1180 				c = pgetc();
1181 			} while (is_in_name(c));
1182 		} else if (is_digit(c)) {
1183 			if (bracketed_name) {
1184 				do {
1185 					STPUTC(c, out);
1186 					c = pgetc();
1187 				} while (is_digit(c));
1188 			} else {
1189 				STPUTC(c, out);
1190 				c = pgetc();
1191 			}
1192 		} else {
1193 			if (! is_special(c))
1194 badsub:				synerror("Bad substitution");
1195 			USTPUTC(c, out);
1196 			c = pgetc();
1197 		}
1198 		STPUTC('=', out);
1199 		flags = 0;
1200 		if (subtype == 0) {
1201 			switch (c) {
1202 			case ':':
1203 				flags = VSNUL;
1204 				c = pgetc();
1205 				/*FALLTHROUGH*/
1206 			default:
1207 				p = strchr(types, c);
1208 				if (p == NULL)
1209 					goto badsub;
1210 				subtype = p - types + VSNORMAL;
1211 				break;
1212 			case '%':
1213 			case '#':
1214 				{
1215 					int cc = c;
1216 					subtype = c == '#' ? VSTRIMLEFT :
1217 							     VSTRIMRIGHT;
1218 					c = pgetc();
1219 					if (c == cc)
1220 						subtype++;
1221 					else
1222 						pungetc();
1223 					break;
1224 				}
1225 			}
1226 		} else {
1227 			pungetc();
1228 		}
1229 		if (dblquote || arinest)
1230 			flags |= VSQUOTE;
1231 		*(stackblock() + typeloc) = subtype | flags;
1232 		if (subtype != VSNORMAL)
1233 			varnest++;
1234 	}
1235 	goto parsesub_return;
1236 }
1237 
1238 
1239 /*
1240  * Called to parse command substitutions.  Newstyle is set if the command
1241  * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1242  * list of commands (passed by reference), and savelen is the number of
1243  * characters on the top of the stack which must be preserved.
1244  */
1245 
1246 parsebackq: {
1247 	struct nodelist **nlpp;
1248 	int savepbq;
1249 	union node *n;
1250 	char *volatile str;
1251 	struct jmploc jmploc;
1252 	struct jmploc *volatile savehandler;
1253 	int savelen;
1254 	int saveprompt;
1255 #if __GNUC__
1256 	/* Avoid longjmp clobbering */
1257 	(void) &saveprompt;
1258 #endif
1259 
1260 	savepbq = parsebackquote;
1261 	if (setjmp(jmploc.loc)) {
1262 		if (str)
1263 			ckfree(str);
1264 		parsebackquote = 0;
1265 		handler = savehandler;
1266 		longjmp(handler->loc, 1);
1267 	}
1268 	INTOFF;
1269 	str = NULL;
1270 	savelen = out - stackblock();
1271 	if (savelen > 0) {
1272 		str = ckmalloc(savelen);
1273 		memcpy(str, stackblock(), savelen);
1274 	}
1275 	savehandler = handler;
1276 	handler = &jmploc;
1277 	INTON;
1278         if (oldstyle) {
1279                 /* We must read until the closing backquote, giving special
1280                    treatment to some slashes, and then push the string and
1281                    reread it as input, interpreting it normally.  */
1282                 char *out;
1283                 int c;
1284                 int savelen;
1285                 char *str;
1286 
1287 
1288                 STARTSTACKSTR(out);
1289 		for (;;) {
1290 			if (needprompt) {
1291 				setprompt(2);
1292 				needprompt = 0;
1293 			}
1294 			switch (c = pgetc()) {
1295 			case '`':
1296 				goto done;
1297 
1298 			case '\\':
1299                                 if ((c = pgetc()) == '\n') {
1300 					plinno++;
1301 					if (doprompt)
1302 						setprompt(2);
1303 					else
1304 						setprompt(0);
1305 					/*
1306 					 * If eating a newline, avoid putting
1307 					 * the newline into the new character
1308 					 * stream (via the STPUTC after the
1309 					 * switch).
1310 					 */
1311 					continue;
1312 				}
1313                                 if (c != '\\' && c != '`' && c != '$'
1314                                     && (!dblquote || c != '"'))
1315                                         STPUTC('\\', out);
1316 				break;
1317 
1318 			case '\n':
1319 				plinno++;
1320 				needprompt = doprompt;
1321 				break;
1322 
1323 			case PEOF:
1324 			        startlinno = plinno;
1325 				synerror("EOF in backquote substitution");
1326  				break;
1327 
1328 			default:
1329 				break;
1330 			}
1331 			STPUTC(c, out);
1332                 }
1333 done:
1334                 STPUTC('\0', out);
1335                 savelen = out - stackblock();
1336                 if (savelen > 0) {
1337                         str = ckmalloc(savelen);
1338                         memcpy(str, stackblock(), savelen);
1339 			setinputstring(str, 1);
1340                 }
1341         }
1342 	nlpp = &bqlist;
1343 	while (*nlpp)
1344 		nlpp = &(*nlpp)->next;
1345 	*nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1346 	(*nlpp)->next = NULL;
1347 	parsebackquote = oldstyle;
1348 
1349 	if (oldstyle) {
1350 		saveprompt = doprompt;
1351 		doprompt = 0;
1352 	}
1353 
1354 	n = list(0);
1355 
1356 	if (oldstyle)
1357 		doprompt = saveprompt;
1358 	else {
1359 		if (readtoken() != TRP)
1360 			synexpect(TRP);
1361 	}
1362 
1363 	(*nlpp)->n = n;
1364         if (oldstyle) {
1365 		/*
1366 		 * Start reading from old file again, ignoring any pushed back
1367 		 * tokens left from the backquote parsing
1368 		 */
1369                 popfile();
1370 		tokpushback = 0;
1371 	}
1372 	while (stackblocksize() <= savelen)
1373 		growstackblock();
1374 	STARTSTACKSTR(out);
1375 	if (str) {
1376 		memcpy(out, str, savelen);
1377 		STADJUST(savelen, out);
1378 		INTOFF;
1379 		ckfree(str);
1380 		str = NULL;
1381 		INTON;
1382 	}
1383 	parsebackquote = savepbq;
1384 	handler = savehandler;
1385 	if (arinest || dblquote)
1386 		USTPUTC(CTLBACKQ | CTLQUOTE, out);
1387 	else
1388 		USTPUTC(CTLBACKQ, out);
1389 	if (oldstyle)
1390 		goto parsebackq_oldreturn;
1391 	else
1392 		goto parsebackq_newreturn;
1393 }
1394 
1395 /*
1396  * Parse an arithmetic expansion (indicate start of one and set state)
1397  */
1398 parsearith: {
1399 
1400 	if (++arinest == 1) {
1401 		prevsyntax = syntax;
1402 		syntax = ARISYNTAX;
1403 		USTPUTC(CTLARI, out);
1404 	} else {
1405 		/*
1406 		 * we collapse embedded arithmetic expansion to
1407 		 * parenthesis, which should be equivalent
1408 		 */
1409 		USTPUTC('(', out);
1410 	}
1411 	goto parsearith_return;
1412 }
1413 
1414 } /* end of readtoken */
1415 
1416 
1417 
1418 #ifdef mkinit
1419 RESET {
1420 	tokpushback = 0;
1421 	checkkwd = 0;
1422 }
1423 #endif
1424 
1425 /*
1426  * Returns true if the text contains nothing to expand (no dollar signs
1427  * or backquotes).
1428  */
1429 
1430 STATIC int
1431 noexpand(text)
1432 	char *text;
1433 	{
1434 	char *p;
1435 	char c;
1436 
1437 	p = text;
1438 	while ((c = *p++) != '\0') {
1439 		if (c == CTLESC)
1440 			p++;
1441 		else if (BASESYNTAX[c] == CCTL)
1442 			return 0;
1443 	}
1444 	return 1;
1445 }
1446 
1447 
1448 /*
1449  * Return true if the argument is a legal variable name (a letter or
1450  * underscore followed by zero or more letters, underscores, and digits).
1451  */
1452 
1453 int
1454 goodname(name)
1455 	char *name;
1456 	{
1457 	char *p;
1458 
1459 	p = name;
1460 	if (! is_name(*p))
1461 		return 0;
1462 	while (*++p) {
1463 		if (! is_in_name(*p))
1464 			return 0;
1465 	}
1466 	return 1;
1467 }
1468 
1469 
1470 /*
1471  * Called when an unexpected token is read during the parse.  The argument
1472  * is the token that is expected, or -1 if more than one type of token can
1473  * occur at this point.
1474  */
1475 
1476 STATIC void
1477 synexpect(token)
1478 	int token;
1479 {
1480 	char msg[64];
1481 
1482 	if (token >= 0) {
1483 		fmtstr(msg, 64, "%s unexpected (expecting %s)",
1484 			tokname[lasttoken], tokname[token]);
1485 	} else {
1486 		fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1487 	}
1488 	synerror(msg);
1489 }
1490 
1491 
1492 STATIC void
1493 synerror(msg)
1494 	char *msg;
1495 	{
1496 	if (commandname)
1497 		outfmt(&errout, "%s: %d: ", commandname, startlinno);
1498 	outfmt(&errout, "Syntax error: %s\n", msg);
1499 	error((char *)NULL);
1500 }
1501 
1502 STATIC void
1503 setprompt(which)
1504 	int which;
1505 	{
1506 	whichprompt = which;
1507 
1508 #ifndef NO_HISTORY
1509 	if (!el)
1510 #endif
1511 		out2str(getprompt(NULL));
1512 }
1513 
1514 /*
1515  * called by editline -- any expansions to the prompt
1516  *    should be added here.
1517  */
1518 char *
1519 getprompt(unused)
1520 	void *unused __unused;
1521 {
1522 	switch (whichprompt) {
1523 	case 0:
1524 		return "";
1525 	case 1:
1526 		return ps1val();
1527 	case 2:
1528 		return ps2val();
1529 	default:
1530 		return "<internal prompt error>";
1531 	}
1532 }
1533