xref: /illumos-gate/usr/src/cmd/csh/sh.dol.c (revision 598f4ceed9327d2d6c2325dd67cae3aa06f7fea6)
1 /*
2  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980 Regents of the University of California.
11  * All rights reserved.  The Berkeley Software License Agreement
12  * specifies the terms and conditions for redistribution.
13  */
14 
15 #pragma ident	"%Z%%M%	%I%	%E% SMI"
16 
17 #include <unistd.h>		/* for lseek prototype */
18 #include "sh.h"
19 #include "sh.tconst.h"
20 
21 /*
22  * C shell
23  */
24 
25 /*
26  * These routines perform variable substitution and quoting via ' and ".
27  * To this point these constructs have been preserved in the divided
28  * input words.  Here we expand variables and turn quoting via ' and " into
29  * QUOTE bits on characters (which prevent further interpretation).
30  * If the `:q' modifier was applied during history expansion, then
31  * some QUOTEing may have occurred already, so we dont "trim()" here.
32  */
33 
34 int	Dpeekc, Dpeekrd;		/* Peeks for DgetC and Dreadc */
35 tchar	*Dcp, **Dvp;			/* Input vector for Dreadc */
36 
37 #define	DEOF	-1
38 
39 #define	unDgetC(c)	Dpeekc = c
40 
41 #define	QUOTES		(_Q|_Q1|_ESC)	/* \ ' " ` */
42 
43 /*
44  * The following variables give the information about the current
45  * $ expansion, recording the current word position, the remaining
46  * words within this expansion, the count of remaining words, and the
47  * information about any : modifier which is being applied.
48  */
49 tchar	*dolp;			/* Remaining chars from this word */
50 tchar	**dolnxt;		/* Further words */
51 int	dolcnt;			/* Count of further words */
52 tchar	dolmod;			/* : modifier character */
53 int	dolmcnt;		/* :gx -> 10000, else 1 */
54 
55 void	Dfix2(tchar **);
56 void	Dgetdol(void);
57 void	setDolp(tchar *);
58 void	unDredc(int);
59 
60 /*
61  * Fix up the $ expansions and quotations in the
62  * argument list to command t.
63  */
64 void
65 Dfix(struct command *t)
66 {
67 	tchar **pp;
68 	tchar *p;
69 
70 #ifdef TRACE
71 	tprintf("TRACE- Dfix()\n");
72 #endif
73 	if (noexec)
74 		return;
75 	/* Note that t_dcom isn't trimmed thus !...:q's aren't lost */
76 	for (pp = t->t_dcom; p = *pp++; )
77 		while (*p)
78 			if (cmap(*p++, _DOL|QUOTES)) {	/* $, \, ', ", ` */
79 				Dfix2(t->t_dcom);	/* found one */
80 				blkfree(t->t_dcom);
81 				t->t_dcom = gargv;
82 				gargv = 0;
83 				return;
84 			}
85 }
86 
87 /*
88  * $ substitute one word, for i/o redirection
89  */
90 tchar *
91 Dfix1(tchar *cp)
92 {
93 	tchar *Dv[2];
94 
95 #ifdef TRACE
96 	tprintf("TRACE- Dfix1()\n");
97 #endif
98 	if (noexec)
99 		return (0);
100 	Dv[0] = cp; Dv[1] = NOSTR;
101 	Dfix2(Dv);
102 	if (gargc != 1) {
103 		setname(cp);
104 		bferr("Ambiguous");
105 	}
106 	cp = savestr(gargv[0]);
107 	blkfree(gargv), gargv = 0;
108 	return (cp);
109 }
110 
111 /*
112  * Subroutine to do actual fixing after state initialization.
113  */
114 void
115 Dfix2(tchar **v)
116 {
117 	tchar *agargv[GAVSIZ];
118 
119 #ifdef TRACE
120 	tprintf("TRACE- Dfix2()\n");
121 #endif
122 	ginit(agargv);			/* Initialize glob's area pointers */
123 	Dvp = v; Dcp = S_ /* "" */;	/* Setup input vector for Dreadc */
124 	unDgetC(0); unDredc(0);		/* Clear out any old peeks (at error) */
125 	dolp = 0; dolcnt = 0;		/* Clear out residual $ expands (...) */
126 	while (Dword())
127 		continue;
128 	gargv = copyblk(gargv);
129 }
130 
131 /*
132  * Get a word.  This routine is analogous to the routine
133  * word() in sh.lex.c for the main lexical input.  One difference
134  * here is that we don't get a newline to terminate our expansion.
135  * Rather, DgetC will return a DEOF when we hit the end-of-input.
136  */
137 int
138 Dword(void)
139 {
140 	int c, c1;
141 	static tchar *wbuf = NULL;
142 	static int wbufsiz = BUFSIZ;
143 	int wp = 0;
144 	bool dolflg;
145 	bool sofar = 0;
146 #define	DYNAMICBUFFER() \
147 	do { \
148 		if (wp >= wbufsiz) { \
149 			wbufsiz += BUFSIZ; \
150 			wbuf = xrealloc(wbuf, (wbufsiz+1) * sizeof (tchar)); \
151 		} \
152 	} while (0)
153 
154 #ifdef TRACE
155 	tprintf("TRACE- Dword()\n");
156 #endif
157 	if (wbuf == NULL)
158 		wbuf = xalloc((wbufsiz+1) * sizeof (tchar));
159 loop:
160 	c = DgetC(DODOL);
161 	switch (c) {
162 
163 	case DEOF:
164 deof:
165 		if (sofar == 0)
166 			return (0);
167 		/* finish this word and catch the code above the next time */
168 		unDredc(c);
169 		/* fall into ... */
170 
171 	case '\n':
172 		wbuf[wp] = 0;
173 		goto ret;
174 
175 	case ' ':
176 	case '\t':
177 		goto loop;
178 
179 	case '`':
180 		/* We preserve ` quotations which are done yet later */
181 		wbuf[wp++] = c;
182 	case '\'':
183 	case '"':
184 		/*
185 		 * Note that DgetC never returns a QUOTES character
186 		 * from an expansion, so only true input quotes will
187 		 * get us here or out.
188 		 */
189 		c1 = c;
190 		dolflg = c1 == '"' ? DODOL : 0;
191 		for (;;) {
192 			c = DgetC(dolflg);
193 			if (c == c1)
194 				break;
195 			if (c == '\n' || c == DEOF)
196 				error("Unmatched %c", (tchar) c1);
197 			if ((c & (QUOTE|TRIM)) == ('\n' | QUOTE))
198 				--wp;
199 			DYNAMICBUFFER();
200 			switch (c1) {
201 
202 			case '"':
203 				/*
204 				 * Leave any `s alone for later.
205 				 * Other chars are all quoted, thus `...`
206 				 * can tell it was within "...".
207 				 */
208 				wbuf[wp++] = c == '`' ? '`' : c | QUOTE;
209 				break;
210 
211 			case '\'':
212 				/* Prevent all further interpretation */
213 				wbuf[wp++] = c | QUOTE;
214 				break;
215 
216 			case '`':
217 				/* Leave all text alone for later */
218 				wbuf[wp++] = c;
219 				break;
220 			}
221 		}
222 		if (c1 == '`') {
223 			DYNAMICBUFFER();
224 			wbuf[wp++] = '`';
225 		}
226 		goto pack;		/* continue the word */
227 
228 	case '\\':
229 		c = DgetC(0);		/* No $ subst! */
230 		if (c == '\n' || c == DEOF)
231 			goto loop;
232 		c |= QUOTE;
233 		break;
234 #ifdef MBCHAR /* Could be a space char from aux. codeset. */
235 	default:
236 		if (isauxsp(c)) goto loop;
237 #endif /* MBCHAR */
238 	}
239 	unDgetC(c);
240 pack:
241 	sofar = 1;
242 	/* pack up more characters in this word */
243 	for (;;) {
244 		c = DgetC(DODOL);
245 		if (c == '\\') {
246 			c = DgetC(0);
247 			if (c == DEOF)
248 				goto deof;
249 			if (c == '\n')
250 				c = ' ';
251 			else
252 				c |= QUOTE;
253 		}
254 		if (c == DEOF)
255 			goto deof;
256 		if (cmap(c, _SP|_NL|_Q|_Q1) ||
257 		    isauxsp(c)) {		/* sp \t\n'"` or aux. sp */
258 			unDgetC(c);
259 			if (cmap(c, QUOTES))
260 				goto loop;
261 			DYNAMICBUFFER();
262 			wbuf[wp++] = 0;
263 			goto ret;
264 		}
265 		DYNAMICBUFFER();
266 		wbuf[wp++] = c;
267 	}
268 ret:
269 	Gcat(S_ /* "" */, wbuf);
270 	return (1);
271 }
272 
273 /*
274  * Get a character, performing $ substitution unless flag is 0.
275  * Any QUOTES character which is returned from a $ expansion is
276  * QUOTEd so that it will not be recognized above.
277  */
278 int
279 DgetC(int flag)
280 {
281 	int c;
282 
283 top:
284 	if (c = Dpeekc) {
285 		Dpeekc = 0;
286 		return (c);
287 	}
288 	if (lap) {
289 		c = *lap++ & (QUOTE|TRIM);
290 		if (c == 0) {
291 			lap = 0;
292 			goto top;
293 		}
294 quotspec:
295 		/*
296 		 *	don't quote things if there was an error (err!=0)
297 		 * 	the input is original, not from a substitution and
298 		 *	therefore should not be quoted
299 		 */
300 		if (!err && cmap(c, QUOTES))
301 			return (c | QUOTE);
302 		return (c);
303 	}
304 	if (dolp) {
305 		if (c = *dolp++ & (QUOTE|TRIM))
306 			goto quotspec;
307 		if (dolcnt > 0) {
308 			setDolp(*dolnxt++);
309 			--dolcnt;
310 			return (' ');
311 		}
312 		dolp = 0;
313 	}
314 	if (dolcnt > 0) {
315 		setDolp(*dolnxt++);
316 		--dolcnt;
317 		goto top;
318 	}
319 	c = Dredc();
320 	if (c == '$' && flag) {
321 		Dgetdol();
322 		goto top;
323 	}
324 	return (c);
325 }
326 
327 tchar *nulvec[] = { 0 };
328 struct	varent nulargv = { nulvec, S_argv, 0 };
329 
330 /*
331  * Handle the multitudinous $ expansion forms.
332  * Ugh.
333  */
334 void
335 Dgetdol(void)
336 {
337 	tchar *np;
338 	struct varent *vp;
339 	tchar name[MAX_VREF_LEN];
340 	int c, sc;
341 	int subscr = 0, lwb = 1, upb = 0;
342 	bool dimen = 0, bitset = 0;
343 	tchar wbuf[BUFSIZ + MB_LEN_MAX]; /* read_ may return extra bytes */
344 
345 #ifdef TRACE
346 	tprintf("TRACE- Dgetdol()\n");
347 #endif
348 	dolmod = dolmcnt = 0;
349 	c = sc = DgetC(0);
350 	if (c == '{')
351 		c = DgetC(0);		/* sc is { to take } later */
352 	if ((c & TRIM) == '#')
353 		dimen++, c = DgetC(0);		/* $# takes dimension */
354 	else if (c == '?')
355 		bitset++, c = DgetC(0);		/* $? tests existence */
356 	switch (c) {
357 
358 	case '$':
359 		if (dimen || bitset)
360 syntax:
361 		error("Variable syntax");  /* No $?$, $#$ */
362 		setDolp(doldol);
363 		goto eatbrac;
364 
365 	case '<'|QUOTE:
366 		if (dimen || bitset)
367 			goto syntax;		/* No $?<, $#< */
368 		for (np = wbuf; read_(OLDSTD, np, 1) == 1; np++) {
369 			if (np >= &wbuf[BUFSIZ-1])
370 				error("$< line too long");
371 			if (*np <= 0 || *np == '\n')
372 				break;
373 		}
374 		*np = 0;
375 		/*
376 		 * KLUDGE: dolmod is set here because it will
377 		 * cause setDolp to call domod and thus to copy wbuf.
378 		 * Otherwise setDolp would use it directly. If we saved
379 		 * it ourselves, no one would know when to free it.
380 		 * The actual function of the 'q' causes filename
381 		 * expansion not to be done on the interpolated value.
382 		 */
383 		dolmod = 'q';
384 		dolmcnt = 10000;
385 		setDolp(wbuf);
386 		goto eatbrac;
387 
388 	case DEOF:
389 	case '\n':
390 		goto syntax;
391 
392 	case '*':
393 		(void) strcpy_(name, S_argv);
394 		vp = adrof(S_argv);
395 		subscr = -1;			/* Prevent eating [...] */
396 		break;
397 
398 	default:
399 		np = name;
400 		if (digit(c)) {
401 			if (dimen)
402 				goto syntax;	/* No $#1, e.g. */
403 			subscr = 0;
404 			do {
405 				subscr = subscr * 10 + c - '0';
406 				c = DgetC(0);
407 			} while (digit(c));
408 			unDredc(c);
409 			if (subscr < 0)
410 				error("Subscript out of range");
411 			if (subscr == 0) {
412 				if (bitset) {
413 					dolp = file ? S_1 /* "1" */ : S_0 /* "0" */;
414 					goto eatbrac;
415 				}
416 				if (file == 0)
417 					error("No file for $0");
418 				setDolp(file);
419 				goto eatbrac;
420 			}
421 			if (bitset)
422 				goto syntax;
423 			vp = adrof(S_argv);
424 			if (vp == 0) {
425 				vp = &nulargv;
426 				goto eatmod;
427 			}
428 			break;
429 		}
430 		if (!alnum(c))
431 			goto syntax;
432 		for (;;) {
433 			*np++ = c;
434 			c = DgetC(0);
435 			if (!alnum(c))
436 				break;
437 			/* if variable name is > 20, complain */
438 			if (np >= &name[MAX_VAR_LEN])
439 				error("Variable name too long");
440 
441 		}
442 		*np++ = 0;
443 		unDredc(c);
444 		vp = adrof(name);
445 	}
446 	if (bitset) {
447 		/*
448 		 * getenv() to getenv_(), because 'name''s type is now tchar *
449 		 * no need to xalloc
450 		 */
451 		dolp = (vp || getenv_(name)) ? S_1 /* "1" */ : S_0 /* "0" */;
452 		goto eatbrac;
453 	}
454 	if (vp == 0) {
455 		/*
456 		 * getenv() to getenv_(), because 'name''s type is now tchar *
457 		 * no need to xalloc
458 		 */
459 		np = getenv_(name);
460 		if (np) {
461 			addla(np);
462 			goto eatbrac;
463 		}
464 		udvar(name);
465 		/*NOTREACHED*/
466 	}
467 	c = DgetC(0);
468 	upb = blklen(vp->vec);
469 	if (dimen == 0 && subscr == 0 && c == '[') {
470 		np = name;
471 		for (;;) {
472 			c = DgetC(DODOL);	/* Allow $ expand within [ ] */
473 			if (c == ']')
474 				break;
475 			if (c == '\n' || c == DEOF)
476 				goto syntax;
477 			if (np >= &name[MAX_VREF_LEN])
478 				error("Variable reference too long");
479 			*np++ = c;
480 		}
481 		*np = 0, np = name;
482 		if (dolp || dolcnt)		/* $ exp must end before ] */
483 			goto syntax;
484 		if (!*np)
485 			goto syntax;
486 		if (digit(*np)) {
487 			int i = 0;
488 
489 			while (digit(*np))
490 				i = i * 10 + *np++ - '0';
491 /*			if ((i < 0 || i > upb) && !any(*np, "-*")) { */
492 			if ((i < 0 || i > upb) && (*np != '-') && (*np != '*')) {
493 oob:
494 				setname(vp->v_name);
495 				error("Subscript out of range");
496 			}
497 			lwb = i;
498 			if (!*np)
499 				upb = lwb, np = S_AST /* "*" */;
500 		}
501 		if (*np == '*')
502 			np++;
503 		else if (*np != '-')
504 			goto syntax;
505 		else {
506 			int i = upb;
507 
508 			np++;
509 			if (digit(*np)) {
510 				i = 0;
511 				while (digit(*np))
512 					i = i * 10 + *np++ - '0';
513 				if (i < 0 || i > upb)
514 					goto oob;
515 			}
516 			if (i < lwb)
517 				upb = lwb - 1;
518 			else
519 				upb = i;
520 		}
521 		if (lwb == 0) {
522 			if (upb != 0)
523 				goto oob;
524 			upb = -1;
525 		}
526 		if (*np)
527 			goto syntax;
528 	} else {
529 		if (subscr > 0)
530 			if (subscr > upb)
531 				lwb = 1, upb = 0;
532 			else
533 				lwb = upb = subscr;
534 		unDredc(c);
535 	}
536 	if (dimen) {
537 		tchar *cp = putn(upb - lwb + 1);
538 
539 		addla(cp);
540 		xfree(cp);
541 	} else {
542 eatmod:
543 		c = DgetC(0);
544 		if (c == ':') {
545 			c = DgetC(0), dolmcnt = 1;
546 			if (c == 'g')
547 				c = DgetC(0), dolmcnt = 10000;
548 			if (!any(c, S_htrqxe))
549 				error("Bad : mod in $");
550 			dolmod = c;
551 			if (c == 'q')
552 				dolmcnt = 10000;
553 		} else
554 			unDredc(c);
555 		dolnxt = &vp->vec[lwb - 1];
556 		dolcnt = upb - lwb + 1;
557 	}
558 eatbrac:
559 	if (sc == '{') {
560 		c = Dredc();
561 		if (c != '}')
562 			goto syntax;
563 	}
564 }
565 
566 void
567 setDolp(tchar *cp)
568 {
569 	tchar *dp;
570 
571 #ifdef TRACE
572 	tprintf("TRACE- setDolp()\n");
573 #endif
574 	if (dolmod == 0 || dolmcnt == 0) {
575 		dolp = cp;
576 		return;
577 	}
578 	dp = domod(cp, dolmod);
579 	if (dp) {
580 		dolmcnt--;
581 		addla(dp);
582 		xfree(dp);
583 	} else
584 		addla(cp);
585 	dolp = S_ /* "" */;
586 }
587 
588 void
589 unDredc(int c)
590 {
591 
592 	Dpeekrd = c;
593 }
594 
595 int
596 Dredc()
597 {
598 	int c;
599 
600 	if (c = Dpeekrd) {
601 		Dpeekrd = 0;
602 		return (c);
603 	}
604 	if (Dcp && (c = *Dcp++))
605 		return (c&(QUOTE|TRIM));
606 	if (*Dvp == 0) {
607 		Dcp = 0;
608 		return (DEOF);
609 	}
610 	Dcp = *Dvp++;
611 	return (' ');
612 }
613 
614 void
615 Dtestq(int c)
616 {
617 
618 	if (cmap(c, QUOTES))
619 		gflag = 1;
620 }
621 
622 /*
623  * Form a shell temporary file (in unit 0) from the words
624  * of the shell input up to a line the same as "term".
625  * Unit 0 should have been closed before this call.
626  */
627 void
628 heredoc(tchar *term)
629 {
630 	int c;
631 	tchar *Dv[2];
632 	tchar obuf[BUFSIZ], lbuf[BUFSIZ], mbuf[BUFSIZ];
633 	int ocnt, lcnt, mcnt;
634 	tchar *lbp, *obp, *mbp;
635 	tchar **vp;
636 	bool quoted;
637 	tchar shtemp[] = {'/', 't', 'm', 'p', '/', 's', 'h', 'X', 'X', 'X',
638 'X', 'X', 'X', 0};
639 	int fd1;
640 
641 #ifdef TRACE
642 	tprintf("TRACE- heredoc()\n");
643 #endif
644 	if ((fd1 = mkstemp_(shtemp)) < 0)
645 		Perror(shtemp);
646 	(void) unlink_(shtemp);			/* 0 0 inode! */
647 	unsetfd(fd1);
648 	Dv[0] = term; Dv[1] = NOSTR; gflag = 0;
649 	trim(Dv); rscan(Dv, Dtestq); quoted = gflag;
650 	ocnt = BUFSIZ; obp = obuf;
651 	for (;;) {
652 		/*
653 		 * Read up a line
654 		 */
655 		lbp = lbuf; lcnt = BUFSIZ - 4;
656 		for (;;) {
657 			c = readc(1);		/* 1 -> Want EOF returns */
658 			if (c < 0) {
659 				setname(term);
660 				bferr("<< terminator not found");
661 			}
662 			if (c == '\n')
663 				break;
664 			if (c &= TRIM) {
665 				*lbp++ = c;
666 				if (--lcnt < 0) {
667 					setname(S_LESLES /* "<<" */);
668 					error("Line overflow");
669 				}
670 			}
671 		}
672 		*lbp = 0;
673 
674 		/*
675 		 * Compare to terminator -- before expansion
676 		 */
677 		if (eq(lbuf, term)) {
678 			(void) write_(0, obuf, BUFSIZ - ocnt);
679 			(void) lseek(0, (off_t)0, 0);
680 			return;
681 		}
682 
683 		/*
684 		 * If term was quoted or -n just pass it on
685 		 */
686 		if (quoted || noexec) {
687 			*lbp++ = '\n'; *lbp = 0;
688 			for (lbp = lbuf; c = *lbp++; ) {
689 				*obp++ = c;
690 				if (--ocnt == 0) {
691 					(void) write_(0, obuf, BUFSIZ);
692 					obp = obuf; ocnt = BUFSIZ;
693 				}
694 			}
695 			continue;
696 		}
697 
698 		/*
699 		 * Term wasn't quoted so variable and then command
700 		 * expand the input line
701 		 */
702 		Dcp = lbuf; Dvp = Dv + 1; mbp = mbuf; mcnt = BUFSIZ - 4;
703 		for (;;) {
704 			c = DgetC(DODOL);
705 			if (c == DEOF)
706 				break;
707 			if ((c &= TRIM) == 0)
708 				continue;
709 			/* \ quotes \ $ ` here */
710 			if (c == '\\') {
711 				c = DgetC(0);
712 /*				if (!any(c, "$\\`")) */
713 				if ((c != '$') && (c != '\\') && (c != '`'))
714 					unDgetC(c | QUOTE), c = '\\';
715 				else
716 					c |= QUOTE;
717 			}
718 			*mbp++ = c;
719 			if (--mcnt == 0) {
720 				setname(S_LESLES /* "<<" */);
721 				bferr("Line overflow");
722 			}
723 		}
724 		*mbp++ = 0;
725 
726 		/*
727 		 * If any ` in line do command substitution
728 		 */
729 		mbp = mbuf;
730 		if (any('`', mbp)) {
731 			/*
732 			 * 1 arg to dobackp causes substitution to be literal.
733 			 * Words are broken only at newlines so that all blanks
734 			 * and tabs are preserved.  Blank lines (null words)
735 			 * are not discarded.
736 			 */
737 			vp = dobackp(mbuf, 1);
738 		} else
739 			/* Setup trivial vector similar to return of dobackp */
740 			Dv[0] = mbp, Dv[1] = NOSTR, vp = Dv;
741 
742 		/*
743 		 * Resurrect the words from the command substitution
744 		 * each separated by a newline.  Note that the last
745 		 * newline of a command substitution will have been
746 		 * discarded, but we put a newline after the last word
747 		 * because this represents the newline after the last
748 		 * input line!
749 		 */
750 		for (; *vp; vp++) {
751 			for (mbp = *vp; *mbp; mbp++) {
752 				*obp++ = *mbp & TRIM;
753 				if (--ocnt == 0) {
754 					(void) write_(0, obuf, BUFSIZ);
755 					obp = obuf; ocnt = BUFSIZ;
756 				}
757 			}
758 			*obp++ = '\n';
759 			if (--ocnt == 0) {
760 				(void) write_(0, obuf, BUFSIZ);
761 				obp = obuf; ocnt = BUFSIZ;
762 			}
763 		}
764 		if (pargv)
765 			blkfree(pargv), pargv = 0;
766 	}
767 }
768