1 /*
2 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
4 */
5
6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
7 /* All Rights Reserved */
8
9 /*
10 * Copyright (c) 1980 Regents of the University of California.
11 * All rights reserved. The Berkeley Software License Agreement
12 * specifies the terms and conditions for redistribution.
13 */
14
15 #pragma ident "%Z%%M% %I% %E% SMI"
16
17 #include <unistd.h> /* for lseek prototype */
18 #include "sh.h"
19 #include "sh.tconst.h"
20
21 /*
22 * C shell
23 */
24
25 /*
26 * These routines perform variable substitution and quoting via ' and ".
27 * To this point these constructs have been preserved in the divided
28 * input words. Here we expand variables and turn quoting via ' and " into
29 * QUOTE bits on characters (which prevent further interpretation).
30 * If the `:q' modifier was applied during history expansion, then
31 * some QUOTEing may have occurred already, so we dont "trim()" here.
32 */
33
34 int Dpeekc, Dpeekrd; /* Peeks for DgetC and Dreadc */
35 tchar *Dcp, **Dvp; /* Input vector for Dreadc */
36
37 #define DEOF -1
38
39 #define unDgetC(c) Dpeekc = c
40
41 #define QUOTES (_Q|_Q1|_ESC) /* \ ' " ` */
42
43 /*
44 * The following variables give the information about the current
45 * $ expansion, recording the current word position, the remaining
46 * words within this expansion, the count of remaining words, and the
47 * information about any : modifier which is being applied.
48 */
49 tchar *dolp; /* Remaining chars from this word */
50 tchar **dolnxt; /* Further words */
51 int dolcnt; /* Count of further words */
52 tchar dolmod; /* : modifier character */
53 int dolmcnt; /* :gx -> 10000, else 1 */
54
55 void Dfix2(tchar **);
56 void Dgetdol(void);
57 void setDolp(tchar *);
58 void unDredc(int);
59
60 /*
61 * Fix up the $ expansions and quotations in the
62 * argument list to command t.
63 */
64 void
Dfix(struct command * t)65 Dfix(struct command *t)
66 {
67 tchar **pp;
68 tchar *p;
69
70 #ifdef TRACE
71 tprintf("TRACE- Dfix()\n");
72 #endif
73 if (noexec)
74 return;
75 /* Note that t_dcom isn't trimmed thus !...:q's aren't lost */
76 for (pp = t->t_dcom; p = *pp++; )
77 while (*p)
78 if (cmap(*p++, _DOL|QUOTES)) { /* $, \, ', ", ` */
79 Dfix2(t->t_dcom); /* found one */
80 blkfree(t->t_dcom);
81 t->t_dcom = gargv;
82 gargv = 0;
83 return;
84 }
85 }
86
87 /*
88 * $ substitute one word, for i/o redirection
89 */
90 tchar *
Dfix1(tchar * cp)91 Dfix1(tchar *cp)
92 {
93 tchar *Dv[2];
94
95 #ifdef TRACE
96 tprintf("TRACE- Dfix1()\n");
97 #endif
98 if (noexec)
99 return (0);
100 Dv[0] = cp; Dv[1] = NOSTR;
101 Dfix2(Dv);
102 if (gargc != 1) {
103 setname(cp);
104 bferr("Ambiguous");
105 }
106 cp = savestr(gargv[0]);
107 blkfree(gargv), gargv = 0;
108 return (cp);
109 }
110
111 /*
112 * Subroutine to do actual fixing after state initialization.
113 */
114 void
Dfix2(tchar ** v)115 Dfix2(tchar **v)
116 {
117 tchar *agargv[GAVSIZ];
118
119 #ifdef TRACE
120 tprintf("TRACE- Dfix2()\n");
121 #endif
122 ginit(agargv); /* Initialize glob's area pointers */
123 Dvp = v; Dcp = S_ /* "" */; /* Setup input vector for Dreadc */
124 unDgetC(0); unDredc(0); /* Clear out any old peeks (at error) */
125 dolp = 0; dolcnt = 0; /* Clear out residual $ expands (...) */
126 while (Dword())
127 continue;
128 gargv = copyblk(gargv);
129 }
130
131 /*
132 * Get a word. This routine is analogous to the routine
133 * word() in sh.lex.c for the main lexical input. One difference
134 * here is that we don't get a newline to terminate our expansion.
135 * Rather, DgetC will return a DEOF when we hit the end-of-input.
136 */
137 int
Dword(void)138 Dword(void)
139 {
140 int c, c1;
141 static tchar *wbuf = NULL;
142 static int wbufsiz = BUFSIZ;
143 int wp = 0;
144 bool dolflg;
145 bool sofar = 0;
146 #define DYNAMICBUFFER() \
147 do { \
148 if (wp >= wbufsiz) { \
149 wbufsiz += BUFSIZ; \
150 wbuf = xrealloc(wbuf, (wbufsiz+1) * sizeof (tchar)); \
151 } \
152 } while (0)
153
154 #ifdef TRACE
155 tprintf("TRACE- Dword()\n");
156 #endif
157 if (wbuf == NULL)
158 wbuf = xalloc((wbufsiz+1) * sizeof (tchar));
159 loop:
160 c = DgetC(DODOL);
161 switch (c) {
162
163 case DEOF:
164 deof:
165 if (sofar == 0)
166 return (0);
167 /* finish this word and catch the code above the next time */
168 unDredc(c);
169 /* fall into ... */
170
171 case '\n':
172 wbuf[wp] = 0;
173 goto ret;
174
175 case ' ':
176 case '\t':
177 goto loop;
178
179 case '`':
180 /* We preserve ` quotations which are done yet later */
181 wbuf[wp++] = c;
182 case '\'':
183 case '"':
184 /*
185 * Note that DgetC never returns a QUOTES character
186 * from an expansion, so only true input quotes will
187 * get us here or out.
188 */
189 c1 = c;
190 dolflg = c1 == '"' ? DODOL : 0;
191 for (;;) {
192 c = DgetC(dolflg);
193 if (c == c1)
194 break;
195 if (c == '\n' || c == DEOF)
196 error("Unmatched %c", (tchar) c1);
197 if ((c & (QUOTE|TRIM)) == ('\n' | QUOTE))
198 --wp;
199 DYNAMICBUFFER();
200 switch (c1) {
201
202 case '"':
203 /*
204 * Leave any `s alone for later.
205 * Other chars are all quoted, thus `...`
206 * can tell it was within "...".
207 */
208 wbuf[wp++] = c == '`' ? '`' : c | QUOTE;
209 break;
210
211 case '\'':
212 /* Prevent all further interpretation */
213 wbuf[wp++] = c | QUOTE;
214 break;
215
216 case '`':
217 /* Leave all text alone for later */
218 wbuf[wp++] = c;
219 break;
220 }
221 }
222 if (c1 == '`') {
223 DYNAMICBUFFER();
224 wbuf[wp++] = '`';
225 }
226 goto pack; /* continue the word */
227
228 case '\\':
229 c = DgetC(0); /* No $ subst! */
230 if (c == '\n' || c == DEOF)
231 goto loop;
232 c |= QUOTE;
233 break;
234 #ifdef MBCHAR /* Could be a space char from aux. codeset. */
235 default:
236 if (isauxsp(c)) goto loop;
237 #endif /* MBCHAR */
238 }
239 unDgetC(c);
240 pack:
241 sofar = 1;
242 /* pack up more characters in this word */
243 for (;;) {
244 c = DgetC(DODOL);
245 if (c == '\\') {
246 c = DgetC(0);
247 if (c == DEOF)
248 goto deof;
249 if (c == '\n')
250 c = ' ';
251 else
252 c |= QUOTE;
253 }
254 if (c == DEOF)
255 goto deof;
256 if (cmap(c, _SP|_NL|_Q|_Q1) ||
257 isauxsp(c)) { /* sp \t\n'"` or aux. sp */
258 unDgetC(c);
259 if (cmap(c, QUOTES))
260 goto loop;
261 DYNAMICBUFFER();
262 wbuf[wp++] = 0;
263 goto ret;
264 }
265 DYNAMICBUFFER();
266 wbuf[wp++] = c;
267 }
268 ret:
269 Gcat(S_ /* "" */, wbuf);
270 return (1);
271 }
272
273 /*
274 * Get a character, performing $ substitution unless flag is 0.
275 * Any QUOTES character which is returned from a $ expansion is
276 * QUOTEd so that it will not be recognized above.
277 */
278 int
DgetC(int flag)279 DgetC(int flag)
280 {
281 int c;
282
283 top:
284 if (c = Dpeekc) {
285 Dpeekc = 0;
286 return (c);
287 }
288 if (lap) {
289 c = *lap++ & (QUOTE|TRIM);
290 if (c == 0) {
291 lap = 0;
292 goto top;
293 }
294 quotspec:
295 /*
296 * don't quote things if there was an error (err!=0)
297 * the input is original, not from a substitution and
298 * therefore should not be quoted
299 */
300 if (!err && cmap(c, QUOTES))
301 return (c | QUOTE);
302 return (c);
303 }
304 if (dolp) {
305 if (c = *dolp++ & (QUOTE|TRIM))
306 goto quotspec;
307 if (dolcnt > 0) {
308 setDolp(*dolnxt++);
309 --dolcnt;
310 return (' ');
311 }
312 dolp = 0;
313 }
314 if (dolcnt > 0) {
315 setDolp(*dolnxt++);
316 --dolcnt;
317 goto top;
318 }
319 c = Dredc();
320 if (c == '$' && flag) {
321 Dgetdol();
322 goto top;
323 }
324 return (c);
325 }
326
327 tchar *nulvec[] = { 0 };
328 struct varent nulargv = { nulvec, S_argv, 0 };
329
330 /*
331 * Handle the multitudinous $ expansion forms.
332 * Ugh.
333 */
334 void
Dgetdol(void)335 Dgetdol(void)
336 {
337 tchar *np;
338 struct varent *vp;
339 tchar name[MAX_VREF_LEN];
340 int c, sc;
341 int subscr = 0, lwb = 1, upb = 0;
342 bool dimen = 0, bitset = 0;
343 tchar wbuf[BUFSIZ + MB_LEN_MAX]; /* read_ may return extra bytes */
344
345 #ifdef TRACE
346 tprintf("TRACE- Dgetdol()\n");
347 #endif
348 dolmod = dolmcnt = 0;
349 c = sc = DgetC(0);
350 if (c == '{')
351 c = DgetC(0); /* sc is { to take } later */
352 if ((c & TRIM) == '#')
353 dimen++, c = DgetC(0); /* $# takes dimension */
354 else if (c == '?')
355 bitset++, c = DgetC(0); /* $? tests existence */
356 switch (c) {
357
358 case '$':
359 if (dimen || bitset)
360 syntax:
361 error("Variable syntax"); /* No $?$, $#$ */
362 setDolp(doldol);
363 goto eatbrac;
364
365 case '<'|QUOTE:
366 if (dimen || bitset)
367 goto syntax; /* No $?<, $#< */
368 for (np = wbuf; read_(OLDSTD, np, 1) == 1; np++) {
369 if (np >= &wbuf[BUFSIZ-1])
370 error("$< line too long");
371 if (*np <= 0 || *np == '\n')
372 break;
373 }
374 *np = 0;
375 /*
376 * KLUDGE: dolmod is set here because it will
377 * cause setDolp to call domod and thus to copy wbuf.
378 * Otherwise setDolp would use it directly. If we saved
379 * it ourselves, no one would know when to free it.
380 * The actual function of the 'q' causes filename
381 * expansion not to be done on the interpolated value.
382 */
383 dolmod = 'q';
384 dolmcnt = 10000;
385 setDolp(wbuf);
386 goto eatbrac;
387
388 case DEOF:
389 case '\n':
390 goto syntax;
391
392 case '*':
393 (void) strcpy_(name, S_argv);
394 vp = adrof(S_argv);
395 subscr = -1; /* Prevent eating [...] */
396 break;
397
398 default:
399 np = name;
400 if (digit(c)) {
401 if (dimen)
402 goto syntax; /* No $#1, e.g. */
403 subscr = 0;
404 do {
405 subscr = subscr * 10 + c - '0';
406 c = DgetC(0);
407 } while (digit(c));
408 unDredc(c);
409 if (subscr < 0)
410 error("Subscript out of range");
411 if (subscr == 0) {
412 if (bitset) {
413 dolp = file ? S_1 /* "1" */ : S_0 /* "0" */;
414 goto eatbrac;
415 }
416 if (file == 0)
417 error("No file for $0");
418 setDolp(file);
419 goto eatbrac;
420 }
421 if (bitset)
422 goto syntax;
423 vp = adrof(S_argv);
424 if (vp == 0) {
425 vp = &nulargv;
426 goto eatmod;
427 }
428 break;
429 }
430 if (!alnum(c))
431 goto syntax;
432 for (;;) {
433 *np++ = c;
434 c = DgetC(0);
435 if (!alnum(c))
436 break;
437 /* if variable name is > 20, complain */
438 if (np >= &name[MAX_VAR_LEN])
439 error("Variable name too long");
440
441 }
442 *np++ = 0;
443 unDredc(c);
444 vp = adrof(name);
445 }
446 if (bitset) {
447 /*
448 * getenv() to getenv_(), because 'name''s type is now tchar *
449 * no need to xalloc
450 */
451 dolp = (vp || getenv_(name)) ? S_1 /* "1" */ : S_0 /* "0" */;
452 goto eatbrac;
453 }
454 if (vp == 0) {
455 /*
456 * getenv() to getenv_(), because 'name''s type is now tchar *
457 * no need to xalloc
458 */
459 np = getenv_(name);
460 if (np) {
461 addla(np);
462 goto eatbrac;
463 }
464 udvar(name);
465 /*NOTREACHED*/
466 }
467 c = DgetC(0);
468 upb = blklen(vp->vec);
469 if (dimen == 0 && subscr == 0 && c == '[') {
470 np = name;
471 for (;;) {
472 c = DgetC(DODOL); /* Allow $ expand within [ ] */
473 if (c == ']')
474 break;
475 if (c == '\n' || c == DEOF)
476 goto syntax;
477 if (np >= &name[MAX_VREF_LEN])
478 error("Variable reference too long");
479 *np++ = c;
480 }
481 *np = 0, np = name;
482 if (dolp || dolcnt) /* $ exp must end before ] */
483 goto syntax;
484 if (!*np)
485 goto syntax;
486 if (digit(*np)) {
487 int i = 0;
488
489 while (digit(*np))
490 i = i * 10 + *np++ - '0';
491 /* if ((i < 0 || i > upb) && !any(*np, "-*")) { */
492 if ((i < 0 || i > upb) && (*np != '-') && (*np != '*')) {
493 oob:
494 setname(vp->v_name);
495 error("Subscript out of range");
496 }
497 lwb = i;
498 if (!*np)
499 upb = lwb, np = S_AST /* "*" */;
500 }
501 if (*np == '*')
502 np++;
503 else if (*np != '-')
504 goto syntax;
505 else {
506 int i = upb;
507
508 np++;
509 if (digit(*np)) {
510 i = 0;
511 while (digit(*np))
512 i = i * 10 + *np++ - '0';
513 if (i < 0 || i > upb)
514 goto oob;
515 }
516 if (i < lwb)
517 upb = lwb - 1;
518 else
519 upb = i;
520 }
521 if (lwb == 0) {
522 if (upb != 0)
523 goto oob;
524 upb = -1;
525 }
526 if (*np)
527 goto syntax;
528 } else {
529 if (subscr > 0)
530 if (subscr > upb)
531 lwb = 1, upb = 0;
532 else
533 lwb = upb = subscr;
534 unDredc(c);
535 }
536 if (dimen) {
537 tchar *cp = putn(upb - lwb + 1);
538
539 addla(cp);
540 xfree(cp);
541 } else {
542 eatmod:
543 c = DgetC(0);
544 if (c == ':') {
545 c = DgetC(0), dolmcnt = 1;
546 if (c == 'g')
547 c = DgetC(0), dolmcnt = 10000;
548 if (!any(c, S_htrqxe))
549 error("Bad : mod in $");
550 dolmod = c;
551 if (c == 'q')
552 dolmcnt = 10000;
553 } else
554 unDredc(c);
555 dolnxt = &vp->vec[lwb - 1];
556 dolcnt = upb - lwb + 1;
557 }
558 eatbrac:
559 if (sc == '{') {
560 c = Dredc();
561 if (c != '}')
562 goto syntax;
563 }
564 }
565
566 void
setDolp(tchar * cp)567 setDolp(tchar *cp)
568 {
569 tchar *dp;
570
571 #ifdef TRACE
572 tprintf("TRACE- setDolp()\n");
573 #endif
574 if (dolmod == 0 || dolmcnt == 0) {
575 dolp = cp;
576 return;
577 }
578 dp = domod(cp, dolmod);
579 if (dp) {
580 dolmcnt--;
581 addla(dp);
582 xfree(dp);
583 } else
584 addla(cp);
585 dolp = S_ /* "" */;
586 }
587
588 void
unDredc(int c)589 unDredc(int c)
590 {
591
592 Dpeekrd = c;
593 }
594
595 int
Dredc()596 Dredc()
597 {
598 int c;
599
600 if (c = Dpeekrd) {
601 Dpeekrd = 0;
602 return (c);
603 }
604 if (Dcp && (c = *Dcp++))
605 return (c&(QUOTE|TRIM));
606 if (*Dvp == 0) {
607 Dcp = 0;
608 return (DEOF);
609 }
610 Dcp = *Dvp++;
611 return (' ');
612 }
613
614 void
Dtestq(int c)615 Dtestq(int c)
616 {
617
618 if (cmap(c, QUOTES))
619 gflag = 1;
620 }
621
622 /*
623 * Form a shell temporary file (in unit 0) from the words
624 * of the shell input up to a line the same as "term".
625 * Unit 0 should have been closed before this call.
626 */
627 void
heredoc(tchar * term)628 heredoc(tchar *term)
629 {
630 int c;
631 tchar *Dv[2];
632 tchar obuf[BUFSIZ], lbuf[BUFSIZ], mbuf[BUFSIZ];
633 int ocnt, lcnt, mcnt;
634 tchar *lbp, *obp, *mbp;
635 tchar **vp;
636 bool quoted;
637 tchar shtemp[] = {'/', 't', 'm', 'p', '/', 's', 'h', 'X', 'X', 'X',
638 'X', 'X', 'X', 0};
639 int fd1;
640
641 #ifdef TRACE
642 tprintf("TRACE- heredoc()\n");
643 #endif
644 if ((fd1 = mkstemp_(shtemp)) < 0)
645 Perror(shtemp);
646 (void) unlink_(shtemp); /* 0 0 inode! */
647 unsetfd(fd1);
648 Dv[0] = term; Dv[1] = NOSTR; gflag = 0;
649 trim(Dv); rscan(Dv, Dtestq); quoted = gflag;
650 ocnt = BUFSIZ; obp = obuf;
651 for (;;) {
652 /*
653 * Read up a line
654 */
655 lbp = lbuf; lcnt = BUFSIZ - 4;
656 for (;;) {
657 c = readc(1); /* 1 -> Want EOF returns */
658 if (c < 0) {
659 setname(term);
660 bferr("<< terminator not found");
661 }
662 if (c == '\n')
663 break;
664 if (c &= TRIM) {
665 *lbp++ = c;
666 if (--lcnt < 0) {
667 setname(S_LESLES /* "<<" */);
668 error("Line overflow");
669 }
670 }
671 }
672 *lbp = 0;
673
674 /*
675 * Compare to terminator -- before expansion
676 */
677 if (eq(lbuf, term)) {
678 (void) write_(0, obuf, BUFSIZ - ocnt);
679 (void) lseek(0, (off_t)0, 0);
680 return;
681 }
682
683 /*
684 * If term was quoted or -n just pass it on
685 */
686 if (quoted || noexec) {
687 *lbp++ = '\n'; *lbp = 0;
688 for (lbp = lbuf; c = *lbp++; ) {
689 *obp++ = c;
690 if (--ocnt == 0) {
691 (void) write_(0, obuf, BUFSIZ);
692 obp = obuf; ocnt = BUFSIZ;
693 }
694 }
695 continue;
696 }
697
698 /*
699 * Term wasn't quoted so variable and then command
700 * expand the input line
701 */
702 Dcp = lbuf; Dvp = Dv + 1; mbp = mbuf; mcnt = BUFSIZ - 4;
703 for (;;) {
704 c = DgetC(DODOL);
705 if (c == DEOF)
706 break;
707 if ((c &= TRIM) == 0)
708 continue;
709 /* \ quotes \ $ ` here */
710 if (c == '\\') {
711 c = DgetC(0);
712 /* if (!any(c, "$\\`")) */
713 if ((c != '$') && (c != '\\') && (c != '`'))
714 unDgetC(c | QUOTE), c = '\\';
715 else
716 c |= QUOTE;
717 }
718 *mbp++ = c;
719 if (--mcnt == 0) {
720 setname(S_LESLES /* "<<" */);
721 bferr("Line overflow");
722 }
723 }
724 *mbp++ = 0;
725
726 /*
727 * If any ` in line do command substitution
728 */
729 mbp = mbuf;
730 if (any('`', mbp)) {
731 /*
732 * 1 arg to dobackp causes substitution to be literal.
733 * Words are broken only at newlines so that all blanks
734 * and tabs are preserved. Blank lines (null words)
735 * are not discarded.
736 */
737 vp = dobackp(mbuf, 1);
738 } else
739 /* Setup trivial vector similar to return of dobackp */
740 Dv[0] = mbp, Dv[1] = NOSTR, vp = Dv;
741
742 /*
743 * Resurrect the words from the command substitution
744 * each separated by a newline. Note that the last
745 * newline of a command substitution will have been
746 * discarded, but we put a newline after the last word
747 * because this represents the newline after the last
748 * input line!
749 */
750 for (; *vp; vp++) {
751 for (mbp = *vp; *mbp; mbp++) {
752 *obp++ = *mbp & TRIM;
753 if (--ocnt == 0) {
754 (void) write_(0, obuf, BUFSIZ);
755 obp = obuf; ocnt = BUFSIZ;
756 }
757 }
758 *obp++ = '\n';
759 if (--ocnt == 0) {
760 (void) write_(0, obuf, BUFSIZ);
761 obp = obuf; ocnt = BUFSIZ;
762 }
763 }
764 if (pargv)
765 blkfree(pargv), pargv = 0;
766 }
767 }
768