xref: /illumos-gate/usr/src/cmd/csh/sh.dol.c (revision ffb6483089015eb90be1f5e7fc2a96c9929546a6)
1 /*
2  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980 Regents of the University of California.
11  * All rights reserved.  The Berkeley Software License Agreement
12  * specifies the terms and conditions for redistribution.
13  */
14 
15 #include <unistd.h>		/* for lseek prototype */
16 #include "sh.h"
17 #include "sh.tconst.h"
18 
19 /*
20  * C shell
21  */
22 
23 bool	noexec;
24 long	gargc;
25 short	OLDSTD;
26 short	gflag;
27 tchar	*bname;
28 tchar	*file;
29 tchar	**gargv;
30 tchar	*doldol;
31 tchar	*lap;
32 tchar	**pargv;
33 
34 /*
35  * These routines perform variable substitution and quoting via ' and ".
36  * To this point these constructs have been preserved in the divided
37  * input words.  Here we expand variables and turn quoting via ' and " into
38  * QUOTE bits on characters (which prevent further interpretation).
39  * If the `:q' modifier was applied during history expansion, then
40  * some QUOTEing may have occurred already, so we dont "trim()" here.
41  */
42 
43 int	Dpeekc, Dpeekrd;		/* Peeks for DgetC and Dreadc */
44 tchar	*Dcp, **Dvp;			/* Input vector for Dreadc */
45 
46 #define	DEOF	-1
47 
48 #define	unDgetC(c)	Dpeekc = c
49 
50 #define	QUOTES		(_Q|_Q1|_ESC)	/* \ ' " ` */
51 
52 /*
53  * The following variables give the information about the current
54  * $ expansion, recording the current word position, the remaining
55  * words within this expansion, the count of remaining words, and the
56  * information about any : modifier which is being applied.
57  */
58 tchar	*dolp;			/* Remaining chars from this word */
59 tchar	**dolnxt;		/* Further words */
60 int	dolcnt;			/* Count of further words */
61 tchar	dolmod;			/* : modifier character */
62 int	dolmcnt;		/* :gx -> 10000, else 1 */
63 
64 void	Dfix2(tchar **);
65 void	Dgetdol(void);
66 void	setDolp(tchar *);
67 void	unDredc(int);
68 
69 /*
70  * Fix up the $ expansions and quotations in the
71  * argument list to command t.
72  */
73 void
74 Dfix(struct command *t)
75 {
76 	tchar **pp;
77 	tchar *p;
78 
79 #ifdef TRACE
80 	tprintf("TRACE- Dfix()\n");
81 #endif
82 	if (noexec)
83 		return;
84 	/* Note that t_dcom isn't trimmed thus !...:q's aren't lost */
85 	for (pp = t->t_dcom; p = *pp++; )
86 		while (*p)
87 			if (cmap(*p++, _DOL|QUOTES)) {	/* $, \, ', ", ` */
88 				Dfix2(t->t_dcom);	/* found one */
89 				blkfree(t->t_dcom);
90 				t->t_dcom = gargv;
91 				gargv = 0;
92 				return;
93 			}
94 }
95 
96 /*
97  * $ substitute one word, for i/o redirection
98  */
99 tchar *
100 Dfix1(tchar *cp)
101 {
102 	tchar *Dv[2];
103 
104 #ifdef TRACE
105 	tprintf("TRACE- Dfix1()\n");
106 #endif
107 	if (noexec)
108 		return (0);
109 	Dv[0] = cp; Dv[1] = NOSTR;
110 	Dfix2(Dv);
111 	if (gargc != 1) {
112 		setname(cp);
113 		bferr("Ambiguous");
114 	}
115 	cp = savestr(gargv[0]);
116 	blkfree(gargv), gargv = 0;
117 	return (cp);
118 }
119 
120 /*
121  * Subroutine to do actual fixing after state initialization.
122  */
123 void
124 Dfix2(tchar **v)
125 {
126 	tchar *agargv[GAVSIZ];
127 
128 #ifdef TRACE
129 	tprintf("TRACE- Dfix2()\n");
130 #endif
131 	ginit(agargv);			/* Initialize glob's area pointers */
132 	Dvp = v; Dcp = S_ /* "" */;	/* Setup input vector for Dreadc */
133 	unDgetC(0); unDredc(0);		/* Clear out any old peeks (at error) */
134 	dolp = 0; dolcnt = 0;		/* Clear out residual $ expands (...) */
135 	while (Dword())
136 		continue;
137 	gargv = copyblk(gargv);
138 }
139 
140 /*
141  * Get a word.  This routine is analogous to the routine
142  * word() in sh.lex.c for the main lexical input.  One difference
143  * here is that we don't get a newline to terminate our expansion.
144  * Rather, DgetC will return a DEOF when we hit the end-of-input.
145  */
146 int
147 Dword(void)
148 {
149 	int c, c1;
150 	static tchar *wbuf = NULL;
151 	static int wbufsiz = BUFSIZ;
152 	int wp = 0;
153 	bool dolflg;
154 	bool sofar = 0;
155 #define	DYNAMICBUFFER() \
156 	do { \
157 		if (wp >= wbufsiz) { \
158 			wbufsiz += BUFSIZ; \
159 			wbuf = xrealloc(wbuf, (wbufsiz+1) * sizeof (tchar)); \
160 		} \
161 	} while (0)
162 
163 #ifdef TRACE
164 	tprintf("TRACE- Dword()\n");
165 #endif
166 	if (wbuf == NULL)
167 		wbuf = xalloc((wbufsiz+1) * sizeof (tchar));
168 loop:
169 	c = DgetC(DODOL);
170 	switch (c) {
171 
172 	case DEOF:
173 deof:
174 		if (sofar == 0)
175 			return (0);
176 		/* finish this word and catch the code above the next time */
177 		unDredc(c);
178 		/* fall into ... */
179 
180 	case '\n':
181 		wbuf[wp] = 0;
182 		goto ret;
183 
184 	case ' ':
185 	case '\t':
186 		goto loop;
187 
188 	case '`':
189 		/* We preserve ` quotations which are done yet later */
190 		wbuf[wp++] = c;
191 	case '\'':
192 	case '"':
193 		/*
194 		 * Note that DgetC never returns a QUOTES character
195 		 * from an expansion, so only true input quotes will
196 		 * get us here or out.
197 		 */
198 		c1 = c;
199 		dolflg = c1 == '"' ? DODOL : 0;
200 		for (;;) {
201 			c = DgetC(dolflg);
202 			if (c == c1)
203 				break;
204 			if (c == '\n' || c == DEOF)
205 				error("Unmatched %c", (tchar) c1);
206 			if ((c & (QUOTE|TRIM)) == ('\n' | QUOTE))
207 				--wp;
208 			DYNAMICBUFFER();
209 			switch (c1) {
210 
211 			case '"':
212 				/*
213 				 * Leave any `s alone for later.
214 				 * Other chars are all quoted, thus `...`
215 				 * can tell it was within "...".
216 				 */
217 				wbuf[wp++] = c == '`' ? '`' : c | QUOTE;
218 				break;
219 
220 			case '\'':
221 				/* Prevent all further interpretation */
222 				wbuf[wp++] = c | QUOTE;
223 				break;
224 
225 			case '`':
226 				/* Leave all text alone for later */
227 				wbuf[wp++] = c;
228 				break;
229 			}
230 		}
231 		if (c1 == '`') {
232 			DYNAMICBUFFER();
233 			wbuf[wp++] = '`';
234 		}
235 		goto pack;		/* continue the word */
236 
237 	case '\\':
238 		c = DgetC(0);		/* No $ subst! */
239 		if (c == '\n' || c == DEOF)
240 			goto loop;
241 		c |= QUOTE;
242 		break;
243 #ifdef MBCHAR /* Could be a space char from aux. codeset. */
244 	default:
245 		if (isauxsp(c)) goto loop;
246 #endif /* MBCHAR */
247 	}
248 	unDgetC(c);
249 pack:
250 	sofar = 1;
251 	/* pack up more characters in this word */
252 	for (;;) {
253 		c = DgetC(DODOL);
254 		if (c == '\\') {
255 			c = DgetC(0);
256 			if (c == DEOF)
257 				goto deof;
258 			if (c == '\n')
259 				c = ' ';
260 			else
261 				c |= QUOTE;
262 		}
263 		if (c == DEOF)
264 			goto deof;
265 		if (cmap(c, _SP|_NL|_Q|_Q1) ||
266 		    isauxsp(c)) {		/* sp \t\n'"` or aux. sp */
267 			unDgetC(c);
268 			if (cmap(c, QUOTES))
269 				goto loop;
270 			DYNAMICBUFFER();
271 			wbuf[wp++] = 0;
272 			goto ret;
273 		}
274 		DYNAMICBUFFER();
275 		wbuf[wp++] = c;
276 	}
277 ret:
278 	Gcat(S_ /* "" */, wbuf);
279 	return (1);
280 }
281 
282 /*
283  * Get a character, performing $ substitution unless flag is 0.
284  * Any QUOTES character which is returned from a $ expansion is
285  * QUOTEd so that it will not be recognized above.
286  */
287 int
288 DgetC(int flag)
289 {
290 	int c;
291 
292 top:
293 	if (c = Dpeekc) {
294 		Dpeekc = 0;
295 		return (c);
296 	}
297 	if (lap) {
298 		c = *lap++ & (QUOTE|TRIM);
299 		if (c == 0) {
300 			lap = 0;
301 			goto top;
302 		}
303 quotspec:
304 		/*
305 		 *	don't quote things if there was an error (err!=0)
306 		 * 	the input is original, not from a substitution and
307 		 *	therefore should not be quoted
308 		 */
309 		if (!err_msg && cmap(c, QUOTES))
310 			return (c | QUOTE);
311 		return (c);
312 	}
313 	if (dolp) {
314 		if (c = *dolp++ & (QUOTE|TRIM))
315 			goto quotspec;
316 		if (dolcnt > 0) {
317 			setDolp(*dolnxt++);
318 			--dolcnt;
319 			return (' ');
320 		}
321 		dolp = 0;
322 	}
323 	if (dolcnt > 0) {
324 		setDolp(*dolnxt++);
325 		--dolcnt;
326 		goto top;
327 	}
328 	c = Dredc();
329 	if (c == '$' && flag) {
330 		Dgetdol();
331 		goto top;
332 	}
333 	return (c);
334 }
335 
336 tchar *nulvec[] = { 0 };
337 struct	varent nulargv = { nulvec, S_argv, 0 };
338 
339 /*
340  * Handle the multitudinous $ expansion forms.
341  * Ugh.
342  */
343 void
344 Dgetdol(void)
345 {
346 	tchar *np;
347 	struct varent *vp;
348 	tchar name[MAX_VREF_LEN];
349 	int c, sc;
350 	int subscr = 0, lwb = 1, upb = 0;
351 	bool dimen = 0, bitset = 0;
352 	tchar wbuf[BUFSIZ + MB_LEN_MAX]; /* read_ may return extra bytes */
353 
354 #ifdef TRACE
355 	tprintf("TRACE- Dgetdol()\n");
356 #endif
357 	dolmod = dolmcnt = 0;
358 	c = sc = DgetC(0);
359 	if (c == '{')
360 		c = DgetC(0);		/* sc is { to take } later */
361 	if ((c & TRIM) == '#')
362 		dimen++, c = DgetC(0);		/* $# takes dimension */
363 	else if (c == '?')
364 		bitset++, c = DgetC(0);		/* $? tests existence */
365 	switch (c) {
366 
367 	case '$':
368 		if (dimen || bitset)
369 syntax:
370 		error("Variable syntax");  /* No $?$, $#$ */
371 		setDolp(doldol);
372 		goto eatbrac;
373 
374 	case '<'|QUOTE:
375 		if (dimen || bitset)
376 			goto syntax;		/* No $?<, $#< */
377 		for (np = wbuf; read_(OLDSTD, np, 1) == 1; np++) {
378 			if (np >= &wbuf[BUFSIZ-1])
379 				error("$< line too long");
380 			if (*np <= 0 || *np == '\n')
381 				break;
382 		}
383 		*np = 0;
384 		/*
385 		 * KLUDGE: dolmod is set here because it will
386 		 * cause setDolp to call domod and thus to copy wbuf.
387 		 * Otherwise setDolp would use it directly. If we saved
388 		 * it ourselves, no one would know when to free it.
389 		 * The actual function of the 'q' causes filename
390 		 * expansion not to be done on the interpolated value.
391 		 */
392 		dolmod = 'q';
393 		dolmcnt = 10000;
394 		setDolp(wbuf);
395 		goto eatbrac;
396 
397 	case DEOF:
398 	case '\n':
399 		goto syntax;
400 
401 	case '*':
402 		(void) strcpy_(name, S_argv);
403 		vp = adrof(S_argv);
404 		subscr = -1;			/* Prevent eating [...] */
405 		break;
406 
407 	default:
408 		np = name;
409 		if (digit(c)) {
410 			if (dimen)
411 				goto syntax;	/* No $#1, e.g. */
412 			subscr = 0;
413 			do {
414 				subscr = subscr * 10 + c - '0';
415 				c = DgetC(0);
416 			} while (digit(c));
417 			unDredc(c);
418 			if (subscr < 0)
419 				error("Subscript out of range");
420 			if (subscr == 0) {
421 				if (bitset) {
422 					dolp = file ? S_1 /* "1" */ : S_0 /* "0" */;
423 					goto eatbrac;
424 				}
425 				if (file == 0)
426 					error("No file for $0");
427 				setDolp(file);
428 				goto eatbrac;
429 			}
430 			if (bitset)
431 				goto syntax;
432 			vp = adrof(S_argv);
433 			if (vp == 0) {
434 				vp = &nulargv;
435 				goto eatmod;
436 			}
437 			break;
438 		}
439 		if (!alnum(c))
440 			goto syntax;
441 		for (;;) {
442 			*np++ = c;
443 			c = DgetC(0);
444 			if (!alnum(c))
445 				break;
446 			/* if variable name is > 20, complain */
447 			if (np >= &name[MAX_VAR_LEN])
448 				error("Variable name too long");
449 
450 		}
451 		*np++ = 0;
452 		unDredc(c);
453 		vp = adrof(name);
454 	}
455 	if (bitset) {
456 		/*
457 		 * getenv() to getenv_(), because 'name''s type is now tchar *
458 		 * no need to xalloc
459 		 */
460 		dolp = (vp || getenv_(name)) ? S_1 /* "1" */ : S_0 /* "0" */;
461 		goto eatbrac;
462 	}
463 	if (vp == 0) {
464 		/*
465 		 * getenv() to getenv_(), because 'name''s type is now tchar *
466 		 * no need to xalloc
467 		 */
468 		np = getenv_(name);
469 		if (np) {
470 			addla(np);
471 			goto eatbrac;
472 		}
473 		udvar(name);
474 		/*NOTREACHED*/
475 	}
476 	c = DgetC(0);
477 	upb = blklen(vp->vec);
478 	if (dimen == 0 && subscr == 0 && c == '[') {
479 		np = name;
480 		for (;;) {
481 			c = DgetC(DODOL);	/* Allow $ expand within [ ] */
482 			if (c == ']')
483 				break;
484 			if (c == '\n' || c == DEOF)
485 				goto syntax;
486 			if (np >= &name[MAX_VREF_LEN])
487 				error("Variable reference too long");
488 			*np++ = c;
489 		}
490 		*np = 0, np = name;
491 		if (dolp || dolcnt)		/* $ exp must end before ] */
492 			goto syntax;
493 		if (!*np)
494 			goto syntax;
495 		if (digit(*np)) {
496 			int i = 0;
497 
498 			while (digit(*np))
499 				i = i * 10 + *np++ - '0';
500 /*			if ((i < 0 || i > upb) && !any(*np, "-*")) { */
501 			if ((i < 0 || i > upb) && (*np != '-') && (*np != '*')) {
502 oob:
503 				setname(vp->v_name);
504 				error("Subscript out of range");
505 			}
506 			lwb = i;
507 			if (!*np)
508 				upb = lwb, np = S_AST /* "*" */;
509 		}
510 		if (*np == '*')
511 			np++;
512 		else if (*np != '-')
513 			goto syntax;
514 		else {
515 			int i = upb;
516 
517 			np++;
518 			if (digit(*np)) {
519 				i = 0;
520 				while (digit(*np))
521 					i = i * 10 + *np++ - '0';
522 				if (i < 0 || i > upb)
523 					goto oob;
524 			}
525 			if (i < lwb)
526 				upb = lwb - 1;
527 			else
528 				upb = i;
529 		}
530 		if (lwb == 0) {
531 			if (upb != 0)
532 				goto oob;
533 			upb = -1;
534 		}
535 		if (*np)
536 			goto syntax;
537 	} else {
538 		if (subscr > 0)
539 			if (subscr > upb)
540 				lwb = 1, upb = 0;
541 			else
542 				lwb = upb = subscr;
543 		unDredc(c);
544 	}
545 	if (dimen) {
546 		tchar *cp = putn(upb - lwb + 1);
547 
548 		addla(cp);
549 		xfree(cp);
550 	} else {
551 eatmod:
552 		c = DgetC(0);
553 		if (c == ':') {
554 			c = DgetC(0), dolmcnt = 1;
555 			if (c == 'g')
556 				c = DgetC(0), dolmcnt = 10000;
557 			if (!any(c, S_htrqxe))
558 				error("Bad : mod in $");
559 			dolmod = c;
560 			if (c == 'q')
561 				dolmcnt = 10000;
562 		} else
563 			unDredc(c);
564 		dolnxt = &vp->vec[lwb - 1];
565 		dolcnt = upb - lwb + 1;
566 	}
567 eatbrac:
568 	if (sc == '{') {
569 		c = Dredc();
570 		if (c != '}')
571 			goto syntax;
572 	}
573 }
574 
575 void
576 setDolp(tchar *cp)
577 {
578 	tchar *dp;
579 
580 #ifdef TRACE
581 	tprintf("TRACE- setDolp()\n");
582 #endif
583 	if (dolmod == 0 || dolmcnt == 0) {
584 		dolp = cp;
585 		return;
586 	}
587 	dp = domod(cp, dolmod);
588 	if (dp) {
589 		dolmcnt--;
590 		addla(dp);
591 		xfree(dp);
592 	} else
593 		addla(cp);
594 	dolp = S_ /* "" */;
595 }
596 
597 void
598 unDredc(int c)
599 {
600 
601 	Dpeekrd = c;
602 }
603 
604 int
605 Dredc()
606 {
607 	int c;
608 
609 	if (c = Dpeekrd) {
610 		Dpeekrd = 0;
611 		return (c);
612 	}
613 	if (Dcp && (c = *Dcp++))
614 		return (c&(QUOTE|TRIM));
615 	if (*Dvp == 0) {
616 		Dcp = 0;
617 		return (DEOF);
618 	}
619 	Dcp = *Dvp++;
620 	return (' ');
621 }
622 
623 void
624 Dtestq(int c)
625 {
626 
627 	if (cmap(c, QUOTES))
628 		gflag = 1;
629 }
630 
631 /*
632  * Form a shell temporary file (in unit 0) from the words
633  * of the shell input up to a line the same as "term".
634  * Unit 0 should have been closed before this call.
635  */
636 void
637 heredoc(tchar *term)
638 {
639 	int c;
640 	tchar *Dv[2];
641 	tchar obuf[BUFSIZ], lbuf[BUFSIZ], mbuf[BUFSIZ];
642 	int ocnt, lcnt, mcnt;
643 	tchar *lbp, *obp, *mbp;
644 	tchar **vp;
645 	bool quoted;
646 	tchar shtemp[] = {'/', 't', 'm', 'p', '/', 's', 'h', 'X', 'X', 'X',
647 'X', 'X', 'X', 0};
648 	int fd1;
649 
650 #ifdef TRACE
651 	tprintf("TRACE- heredoc()\n");
652 #endif
653 	if ((fd1 = mkstemp_(shtemp)) < 0)
654 		Perror(shtemp);
655 	(void) unlink_(shtemp);			/* 0 0 inode! */
656 	unsetfd(fd1);
657 	Dv[0] = term; Dv[1] = NOSTR; gflag = 0;
658 	trim(Dv); rscan(Dv, Dtestq); quoted = gflag;
659 	ocnt = BUFSIZ; obp = obuf;
660 	for (;;) {
661 		/*
662 		 * Read up a line
663 		 */
664 		lbp = lbuf; lcnt = BUFSIZ - 4;
665 		for (;;) {
666 			c = readc(1);		/* 1 -> Want EOF returns */
667 			if (c < 0) {
668 				setname(term);
669 				bferr("<< terminator not found");
670 			}
671 			if (c == '\n')
672 				break;
673 			if (c &= TRIM) {
674 				*lbp++ = c;
675 				if (--lcnt < 0) {
676 					setname(S_LESLES /* "<<" */);
677 					error("Line overflow");
678 				}
679 			}
680 		}
681 		*lbp = 0;
682 
683 		/*
684 		 * Compare to terminator -- before expansion
685 		 */
686 		if (eq(lbuf, term)) {
687 			(void) write_(0, obuf, BUFSIZ - ocnt);
688 			(void) lseek(0, (off_t)0, 0);
689 			return;
690 		}
691 
692 		/*
693 		 * If term was quoted or -n just pass it on
694 		 */
695 		if (quoted || noexec) {
696 			*lbp++ = '\n'; *lbp = 0;
697 			for (lbp = lbuf; c = *lbp++; ) {
698 				*obp++ = c;
699 				if (--ocnt == 0) {
700 					(void) write_(0, obuf, BUFSIZ);
701 					obp = obuf; ocnt = BUFSIZ;
702 				}
703 			}
704 			continue;
705 		}
706 
707 		/*
708 		 * Term wasn't quoted so variable and then command
709 		 * expand the input line
710 		 */
711 		Dcp = lbuf; Dvp = Dv + 1; mbp = mbuf; mcnt = BUFSIZ - 4;
712 		for (;;) {
713 			c = DgetC(DODOL);
714 			if (c == DEOF)
715 				break;
716 			if ((c &= TRIM) == 0)
717 				continue;
718 			/* \ quotes \ $ ` here */
719 			if (c == '\\') {
720 				c = DgetC(0);
721 /*				if (!any(c, "$\\`")) */
722 				if ((c != '$') && (c != '\\') && (c != '`'))
723 					unDgetC(c | QUOTE), c = '\\';
724 				else
725 					c |= QUOTE;
726 			}
727 			*mbp++ = c;
728 			if (--mcnt == 0) {
729 				setname(S_LESLES /* "<<" */);
730 				bferr("Line overflow");
731 			}
732 		}
733 		*mbp++ = 0;
734 
735 		/*
736 		 * If any ` in line do command substitution
737 		 */
738 		mbp = mbuf;
739 		if (any('`', mbp)) {
740 			/*
741 			 * 1 arg to dobackp causes substitution to be literal.
742 			 * Words are broken only at newlines so that all blanks
743 			 * and tabs are preserved.  Blank lines (null words)
744 			 * are not discarded.
745 			 */
746 			vp = dobackp(mbuf, 1);
747 		} else
748 			/* Setup trivial vector similar to return of dobackp */
749 			Dv[0] = mbp, Dv[1] = NOSTR, vp = Dv;
750 
751 		/*
752 		 * Resurrect the words from the command substitution
753 		 * each separated by a newline.  Note that the last
754 		 * newline of a command substitution will have been
755 		 * discarded, but we put a newline after the last word
756 		 * because this represents the newline after the last
757 		 * input line!
758 		 */
759 		for (; *vp; vp++) {
760 			for (mbp = *vp; *mbp; mbp++) {
761 				*obp++ = *mbp & TRIM;
762 				if (--ocnt == 0) {
763 					(void) write_(0, obuf, BUFSIZ);
764 					obp = obuf; ocnt = BUFSIZ;
765 				}
766 			}
767 			*obp++ = '\n';
768 			if (--ocnt == 0) {
769 				(void) write_(0, obuf, BUFSIZ);
770 				obp = obuf; ocnt = BUFSIZ;
771 			}
772 		}
773 		if (pargv)
774 			blkfree(pargv), pargv = 0;
775 	}
776 }
777