1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1997-2005 5 * Herbert Xu <herbert@gondor.apana.org.au>. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Kenneth Almquist. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #ifndef lint 36 #if 0 37 static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95"; 38 #endif 39 #endif /* not lint */ 40 #include <sys/cdefs.h> 41 __FBSDID("$FreeBSD$"); 42 43 #include <sys/types.h> 44 #include <sys/time.h> 45 #include <sys/stat.h> 46 #include <dirent.h> 47 #include <errno.h> 48 #include <inttypes.h> 49 #include <limits.h> 50 #include <pwd.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <unistd.h> 55 #include <wchar.h> 56 #include <wctype.h> 57 58 /* 59 * Routines to expand arguments to commands. We have to deal with 60 * backquotes, shell variables, and file metacharacters. 61 */ 62 63 #include "shell.h" 64 #include "main.h" 65 #include "nodes.h" 66 #include "eval.h" 67 #include "expand.h" 68 #include "syntax.h" 69 #include "parser.h" 70 #include "jobs.h" 71 #include "options.h" 72 #include "var.h" 73 #include "input.h" 74 #include "output.h" 75 #include "memalloc.h" 76 #include "error.h" 77 #include "mystring.h" 78 #include "arith.h" 79 #include "show.h" 80 #include "builtins.h" 81 82 /* 83 * Structure specifying which parts of the string should be searched 84 * for IFS characters. 85 */ 86 87 struct ifsregion { 88 struct ifsregion *next; /* next region in list */ 89 int begoff; /* offset of start of region */ 90 int endoff; /* offset of end of region */ 91 int inquotes; /* search for nul bytes only */ 92 }; 93 94 95 static char *expdest; /* output of current string */ 96 static struct nodelist *argbackq; /* list of back quote expressions */ 97 static struct ifsregion ifsfirst; /* first struct in list of ifs regions */ 98 static struct ifsregion *ifslastp; /* last struct in list */ 99 static struct arglist exparg; /* holds expanded arg list */ 100 101 static char *argstr(char *, int); 102 static char *exptilde(char *, int); 103 static char *expari(char *); 104 static void expbackq(union node *, int, int); 105 static int subevalvar(char *, char *, int, int, int, int, int); 106 static char *evalvar(char *, int); 107 static int varisset(const char *, int); 108 static void strtodest(const char *, int, int, int); 109 static void varvalue(const char *, int, int, int); 110 static void recordregion(int, int, int); 111 static void removerecordregions(int); 112 static void ifsbreakup(char *, struct arglist *); 113 static void expandmeta(struct strlist *); 114 static void expmeta(char *, char *); 115 static void addfname(char *); 116 static struct strlist *expsort(struct strlist *); 117 static struct strlist *msort(struct strlist *, int); 118 static int patmatch(const char *, const char *, int); 119 static char *cvtnum(int, char *); 120 static int collate_range_cmp(wchar_t, wchar_t); 121 122 static int 123 collate_range_cmp(wchar_t c1, wchar_t c2) 124 { 125 static wchar_t s1[2], s2[2]; 126 127 s1[0] = c1; 128 s2[0] = c2; 129 return (wcscoll(s1, s2)); 130 } 131 132 static char * 133 stputs_quotes(const char *data, const char *syntax, char *p) 134 { 135 while (*data) { 136 CHECKSTRSPACE(2, p); 137 if (syntax[(int)*data] == CCTL) 138 USTPUTC(CTLESC, p); 139 USTPUTC(*data++, p); 140 } 141 return (p); 142 } 143 #define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p) 144 145 /* 146 * Perform expansions on an argument, placing the resulting list of arguments 147 * in arglist. Parameter expansion, command substitution and arithmetic 148 * expansion are always performed; additional expansions can be requested 149 * via flag (EXP_*). 150 * The result is left in the stack string. 151 * When arglist is NULL, perform here document expansion. 152 * 153 * Caution: this function uses global state and is not reentrant. 154 * However, a new invocation after an interrupted invocation is safe 155 * and will reset the global state for the new call. 156 */ 157 void 158 expandarg(union node *arg, struct arglist *arglist, int flag) 159 { 160 struct strlist *sp; 161 char *p; 162 163 argbackq = arg->narg.backquote; 164 STARTSTACKSTR(expdest); 165 ifsfirst.next = NULL; 166 ifslastp = NULL; 167 argstr(arg->narg.text, flag); 168 if (arglist == NULL) { 169 STACKSTRNUL(expdest); 170 return; /* here document expanded */ 171 } 172 STPUTC('\0', expdest); 173 p = grabstackstr(expdest); 174 exparg.lastp = &exparg.list; 175 if (flag & EXP_FULL) { 176 ifsbreakup(p, &exparg); 177 *exparg.lastp = NULL; 178 exparg.lastp = &exparg.list; 179 expandmeta(exparg.list); 180 } else { 181 sp = (struct strlist *)stalloc(sizeof (struct strlist)); 182 sp->text = p; 183 *exparg.lastp = sp; 184 exparg.lastp = &sp->next; 185 } 186 while (ifsfirst.next != NULL) { 187 struct ifsregion *ifsp; 188 INTOFF; 189 ifsp = ifsfirst.next->next; 190 ckfree(ifsfirst.next); 191 ifsfirst.next = ifsp; 192 INTON; 193 } 194 *exparg.lastp = NULL; 195 if (exparg.list) { 196 *arglist->lastp = exparg.list; 197 arglist->lastp = exparg.lastp; 198 } 199 } 200 201 202 203 /* 204 * Perform parameter expansion, command substitution and arithmetic 205 * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE. 206 * Processing ends at a CTLENDVAR or CTLENDARI character as well as '\0'. 207 * This is used to expand word in ${var+word} etc. 208 * If EXP_FULL or EXP_CASE are set, keep and/or generate CTLESC 209 * characters to allow for further processing. 210 * If EXP_FULL is set, also preserve CTLQUOTEMARK characters. 211 */ 212 static char * 213 argstr(char *p, int flag) 214 { 215 char c; 216 int quotes = flag & (EXP_FULL | EXP_CASE); /* do CTLESC */ 217 int firsteq = 1; 218 int split_lit; 219 int lit_quoted; 220 221 split_lit = flag & EXP_SPLIT_LIT; 222 lit_quoted = flag & EXP_LIT_QUOTED; 223 flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED); 224 if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE))) 225 p = exptilde(p, flag); 226 for (;;) { 227 CHECKSTRSPACE(2, expdest); 228 switch (c = *p++) { 229 case '\0': 230 return (p - 1); 231 case CTLENDVAR: 232 case CTLENDARI: 233 return (p); 234 case CTLQUOTEMARK: 235 lit_quoted = 1; 236 /* "$@" syntax adherence hack */ 237 if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=') 238 break; 239 if ((flag & EXP_FULL) != 0) 240 USTPUTC(c, expdest); 241 break; 242 case CTLQUOTEEND: 243 lit_quoted = 0; 244 break; 245 case CTLESC: 246 if (quotes) 247 USTPUTC(c, expdest); 248 c = *p++; 249 USTPUTC(c, expdest); 250 if (split_lit && !lit_quoted) 251 recordregion(expdest - stackblock() - 252 (quotes ? 2 : 1), 253 expdest - stackblock(), 0); 254 break; 255 case CTLVAR: 256 p = evalvar(p, flag); 257 break; 258 case CTLBACKQ: 259 case CTLBACKQ|CTLQUOTE: 260 expbackq(argbackq->n, c & CTLQUOTE, flag); 261 argbackq = argbackq->next; 262 break; 263 case CTLARI: 264 p = expari(p); 265 break; 266 case ':': 267 case '=': 268 /* 269 * sort of a hack - expand tildes in variable 270 * assignments (after the first '=' and after ':'s). 271 */ 272 USTPUTC(c, expdest); 273 if (split_lit && !lit_quoted) 274 recordregion(expdest - stackblock() - 1, 275 expdest - stackblock(), 0); 276 if (flag & EXP_VARTILDE && *p == '~' && 277 (c != '=' || firsteq)) { 278 if (c == '=') 279 firsteq = 0; 280 p = exptilde(p, flag); 281 } 282 break; 283 default: 284 USTPUTC(c, expdest); 285 if (split_lit && !lit_quoted) 286 recordregion(expdest - stackblock() - 1, 287 expdest - stackblock(), 0); 288 } 289 } 290 } 291 292 /* 293 * Perform tilde expansion, placing the result in the stack string and 294 * returning the next position in the input string to process. 295 */ 296 static char * 297 exptilde(char *p, int flag) 298 { 299 char c, *startp = p; 300 struct passwd *pw; 301 char *home; 302 303 for (;;) { 304 c = *p; 305 switch(c) { 306 case CTLESC: /* This means CTL* are always considered quoted. */ 307 case CTLVAR: 308 case CTLBACKQ: 309 case CTLBACKQ | CTLQUOTE: 310 case CTLARI: 311 case CTLENDARI: 312 case CTLQUOTEMARK: 313 return (startp); 314 case ':': 315 if ((flag & EXP_VARTILDE) == 0) 316 break; 317 /* FALLTHROUGH */ 318 case '\0': 319 case '/': 320 case CTLENDVAR: 321 *p = '\0'; 322 if (*(startp+1) == '\0') { 323 home = lookupvar("HOME"); 324 } else { 325 pw = getpwnam(startp+1); 326 home = pw != NULL ? pw->pw_dir : NULL; 327 } 328 *p = c; 329 if (home == NULL || *home == '\0') 330 return (startp); 331 strtodest(home, flag, VSNORMAL, 1); 332 return (p); 333 } 334 p++; 335 } 336 } 337 338 339 static void 340 removerecordregions(int endoff) 341 { 342 if (ifslastp == NULL) 343 return; 344 345 if (ifsfirst.endoff > endoff) { 346 while (ifsfirst.next != NULL) { 347 struct ifsregion *ifsp; 348 INTOFF; 349 ifsp = ifsfirst.next->next; 350 ckfree(ifsfirst.next); 351 ifsfirst.next = ifsp; 352 INTON; 353 } 354 if (ifsfirst.begoff > endoff) 355 ifslastp = NULL; 356 else { 357 ifslastp = &ifsfirst; 358 ifsfirst.endoff = endoff; 359 } 360 return; 361 } 362 363 ifslastp = &ifsfirst; 364 while (ifslastp->next && ifslastp->next->begoff < endoff) 365 ifslastp=ifslastp->next; 366 while (ifslastp->next != NULL) { 367 struct ifsregion *ifsp; 368 INTOFF; 369 ifsp = ifslastp->next->next; 370 ckfree(ifslastp->next); 371 ifslastp->next = ifsp; 372 INTON; 373 } 374 if (ifslastp->endoff > endoff) 375 ifslastp->endoff = endoff; 376 } 377 378 /* 379 * Expand arithmetic expression. 380 * Note that flag is not required as digits never require CTLESC characters. 381 */ 382 static char * 383 expari(char *p) 384 { 385 char *q, *start; 386 arith_t result; 387 int begoff; 388 int quoted; 389 int adj; 390 391 quoted = *p++ == '"'; 392 begoff = expdest - stackblock(); 393 p = argstr(p, 0); 394 removerecordregions(begoff); 395 STPUTC('\0', expdest); 396 start = stackblock() + begoff; 397 398 q = grabstackstr(expdest); 399 result = arith(start); 400 ungrabstackstr(q, expdest); 401 402 start = stackblock() + begoff; 403 adj = start - expdest; 404 STADJUST(adj, expdest); 405 406 CHECKSTRSPACE((int)(DIGITS(result) + 1), expdest); 407 fmtstr(expdest, DIGITS(result), ARITH_FORMAT_STR, result); 408 adj = strlen(expdest); 409 STADJUST(adj, expdest); 410 if (!quoted) 411 recordregion(begoff, expdest - stackblock(), 0); 412 return p; 413 } 414 415 416 /* 417 * Perform command substitution. 418 */ 419 static void 420 expbackq(union node *cmd, int quoted, int flag) 421 { 422 struct backcmd in; 423 int i; 424 char buf[128]; 425 char *p; 426 char *dest = expdest; 427 struct ifsregion saveifs, *savelastp; 428 struct nodelist *saveargbackq; 429 char lastc; 430 int startloc = dest - stackblock(); 431 char const *syntax = quoted? DQSYNTAX : BASESYNTAX; 432 int quotes = flag & (EXP_FULL | EXP_CASE); 433 size_t nnl; 434 435 INTOFF; 436 saveifs = ifsfirst; 437 savelastp = ifslastp; 438 saveargbackq = argbackq; 439 p = grabstackstr(dest); 440 evalbackcmd(cmd, &in); 441 ungrabstackstr(p, dest); 442 ifsfirst = saveifs; 443 ifslastp = savelastp; 444 argbackq = saveargbackq; 445 446 p = in.buf; 447 lastc = '\0'; 448 nnl = 0; 449 /* Don't copy trailing newlines */ 450 for (;;) { 451 if (--in.nleft < 0) { 452 if (in.fd < 0) 453 break; 454 while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR); 455 TRACE(("expbackq: read returns %d\n", i)); 456 if (i <= 0) 457 break; 458 p = buf; 459 in.nleft = i - 1; 460 } 461 lastc = *p++; 462 if (lastc != '\0') { 463 if (lastc == '\n') { 464 nnl++; 465 } else { 466 CHECKSTRSPACE(nnl + 2, dest); 467 while (nnl > 0) { 468 nnl--; 469 USTPUTC('\n', dest); 470 } 471 if (quotes && syntax[(int)lastc] == CCTL) 472 USTPUTC(CTLESC, dest); 473 USTPUTC(lastc, dest); 474 } 475 } 476 } 477 478 if (in.fd >= 0) 479 close(in.fd); 480 if (in.buf) 481 ckfree(in.buf); 482 if (in.jp) 483 exitstatus = waitforjob(in.jp, (int *)NULL); 484 if (quoted == 0) 485 recordregion(startloc, dest - stackblock(), 0); 486 TRACE(("expbackq: size=%td: \"%.*s\"\n", 487 ((dest - stackblock()) - startloc), 488 (int)((dest - stackblock()) - startloc), 489 stackblock() + startloc)); 490 expdest = dest; 491 INTON; 492 } 493 494 495 496 static void 497 recordleft(const char *str, const char *loc, char *startp) 498 { 499 int amount; 500 501 amount = ((str - 1) - (loc - startp)) - expdest; 502 STADJUST(amount, expdest); 503 while (loc != str - 1) 504 *startp++ = *loc++; 505 } 506 507 static int 508 subevalvar(char *p, char *str, int strloc, int subtype, int startloc, 509 int varflags, int quotes) 510 { 511 char *startp; 512 char *loc = NULL; 513 char *q; 514 int c = 0; 515 struct nodelist *saveargbackq = argbackq; 516 int amount; 517 518 argstr(p, (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX || 519 subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX ? 520 EXP_CASE : 0) | EXP_TILDE); 521 STACKSTRNUL(expdest); 522 argbackq = saveargbackq; 523 startp = stackblock() + startloc; 524 if (str == NULL) 525 str = stackblock() + strloc; 526 527 switch (subtype) { 528 case VSASSIGN: 529 setvar(str, startp, 0); 530 amount = startp - expdest; 531 STADJUST(amount, expdest); 532 varflags &= ~VSNUL; 533 return 1; 534 535 case VSQUESTION: 536 if (*p != CTLENDVAR) { 537 outfmt(out2, "%s\n", startp); 538 error((char *)NULL); 539 } 540 error("%.*s: parameter %snot set", (int)(p - str - 1), 541 str, (varflags & VSNUL) ? "null or " : ""); 542 return 0; 543 544 case VSTRIMLEFT: 545 for (loc = startp; loc < str; loc++) { 546 c = *loc; 547 *loc = '\0'; 548 if (patmatch(str, startp, quotes)) { 549 *loc = c; 550 recordleft(str, loc, startp); 551 return 1; 552 } 553 *loc = c; 554 if (quotes && *loc == CTLESC) 555 loc++; 556 } 557 return 0; 558 559 case VSTRIMLEFTMAX: 560 for (loc = str - 1; loc >= startp;) { 561 c = *loc; 562 *loc = '\0'; 563 if (patmatch(str, startp, quotes)) { 564 *loc = c; 565 recordleft(str, loc, startp); 566 return 1; 567 } 568 *loc = c; 569 loc--; 570 if (quotes && loc > startp && *(loc - 1) == CTLESC) { 571 for (q = startp; q < loc; q++) 572 if (*q == CTLESC) 573 q++; 574 if (q > loc) 575 loc--; 576 } 577 } 578 return 0; 579 580 case VSTRIMRIGHT: 581 for (loc = str - 1; loc >= startp;) { 582 if (patmatch(str, loc, quotes)) { 583 amount = loc - expdest; 584 STADJUST(amount, expdest); 585 return 1; 586 } 587 loc--; 588 if (quotes && loc > startp && *(loc - 1) == CTLESC) { 589 for (q = startp; q < loc; q++) 590 if (*q == CTLESC) 591 q++; 592 if (q > loc) 593 loc--; 594 } 595 } 596 return 0; 597 598 case VSTRIMRIGHTMAX: 599 for (loc = startp; loc < str - 1; loc++) { 600 if (patmatch(str, loc, quotes)) { 601 amount = loc - expdest; 602 STADJUST(amount, expdest); 603 return 1; 604 } 605 if (quotes && *loc == CTLESC) 606 loc++; 607 } 608 return 0; 609 610 611 default: 612 abort(); 613 } 614 } 615 616 617 /* 618 * Expand a variable, and return a pointer to the next character in the 619 * input string. 620 */ 621 622 static char * 623 evalvar(char *p, int flag) 624 { 625 int subtype; 626 int varflags; 627 char *var; 628 const char *val; 629 int patloc; 630 int c; 631 int set; 632 int special; 633 int startloc; 634 int varlen; 635 int varlenb; 636 int easy; 637 int quotes = flag & (EXP_FULL | EXP_CASE); 638 int record = 0; 639 640 varflags = (unsigned char)*p++; 641 subtype = varflags & VSTYPE; 642 var = p; 643 special = 0; 644 if (! is_name(*p)) 645 special = 1; 646 p = strchr(p, '=') + 1; 647 again: /* jump here after setting a variable with ${var=text} */ 648 if (varflags & VSLINENO) { 649 set = 1; 650 special = 1; 651 val = NULL; 652 } else if (special) { 653 set = varisset(var, varflags & VSNUL); 654 val = NULL; 655 } else { 656 val = bltinlookup(var, 1); 657 if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) { 658 val = NULL; 659 set = 0; 660 } else 661 set = 1; 662 } 663 varlen = 0; 664 startloc = expdest - stackblock(); 665 if (!set && uflag && *var != '@' && *var != '*') { 666 switch (subtype) { 667 case VSNORMAL: 668 case VSTRIMLEFT: 669 case VSTRIMLEFTMAX: 670 case VSTRIMRIGHT: 671 case VSTRIMRIGHTMAX: 672 case VSLENGTH: 673 error("%.*s: parameter not set", (int)(p - var - 1), 674 var); 675 } 676 } 677 if (set && subtype != VSPLUS) { 678 /* insert the value of the variable */ 679 if (special) { 680 if (varflags & VSLINENO) 681 STPUTBIN(var, p - var - 1, expdest); 682 else 683 varvalue(var, varflags & VSQUOTE, subtype, flag); 684 if (subtype == VSLENGTH) { 685 varlenb = expdest - stackblock() - startloc; 686 varlen = varlenb; 687 if (localeisutf8) { 688 val = stackblock() + startloc; 689 for (;val != expdest; val++) 690 if ((*val & 0xC0) == 0x80) 691 varlen--; 692 } 693 STADJUST(-varlenb, expdest); 694 } 695 } else { 696 if (subtype == VSLENGTH) { 697 for (;*val; val++) 698 if (!localeisutf8 || 699 (*val & 0xC0) != 0x80) 700 varlen++; 701 } 702 else 703 strtodest(val, flag, subtype, 704 varflags & VSQUOTE); 705 } 706 } 707 708 if (subtype == VSPLUS) 709 set = ! set; 710 711 easy = ((varflags & VSQUOTE) == 0 || 712 (*var == '@' && shellparam.nparam != 1)); 713 714 715 switch (subtype) { 716 case VSLENGTH: 717 expdest = cvtnum(varlen, expdest); 718 record = 1; 719 break; 720 721 case VSNORMAL: 722 record = easy; 723 break; 724 725 case VSPLUS: 726 case VSMINUS: 727 if (!set) { 728 argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) | 729 (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0)); 730 break; 731 } 732 record = easy; 733 break; 734 735 case VSTRIMLEFT: 736 case VSTRIMLEFTMAX: 737 case VSTRIMRIGHT: 738 case VSTRIMRIGHTMAX: 739 if (!set) 740 break; 741 /* 742 * Terminate the string and start recording the pattern 743 * right after it 744 */ 745 STPUTC('\0', expdest); 746 patloc = expdest - stackblock(); 747 if (subevalvar(p, NULL, patloc, subtype, 748 startloc, varflags, quotes) == 0) { 749 int amount = (expdest - stackblock() - patloc) + 1; 750 STADJUST(-amount, expdest); 751 } 752 /* Remove any recorded regions beyond start of variable */ 753 removerecordregions(startloc); 754 record = 1; 755 break; 756 757 case VSASSIGN: 758 case VSQUESTION: 759 if (!set) { 760 if (subevalvar(p, var, 0, subtype, startloc, varflags, 761 quotes)) { 762 varflags &= ~VSNUL; 763 /* 764 * Remove any recorded regions beyond 765 * start of variable 766 */ 767 removerecordregions(startloc); 768 goto again; 769 } 770 break; 771 } 772 record = easy; 773 break; 774 775 case VSERROR: 776 c = p - var - 1; 777 error("${%.*s%s}: Bad substitution", c, var, 778 (c > 0 && *p != CTLENDVAR) ? "..." : ""); 779 780 default: 781 abort(); 782 } 783 784 if (record) 785 recordregion(startloc, expdest - stackblock(), 786 varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' && 787 (*var == '@' || *var == '*'))); 788 789 if (subtype != VSNORMAL) { /* skip to end of alternative */ 790 int nesting = 1; 791 for (;;) { 792 if ((c = *p++) == CTLESC) 793 p++; 794 else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) { 795 if (set) 796 argbackq = argbackq->next; 797 } else if (c == CTLVAR) { 798 if ((*p++ & VSTYPE) != VSNORMAL) 799 nesting++; 800 } else if (c == CTLENDVAR) { 801 if (--nesting == 0) 802 break; 803 } 804 } 805 } 806 return p; 807 } 808 809 810 811 /* 812 * Test whether a specialized variable is set. 813 */ 814 815 static int 816 varisset(const char *name, int nulok) 817 { 818 819 if (*name == '!') 820 return backgndpidset(); 821 else if (*name == '@' || *name == '*') { 822 if (*shellparam.p == NULL) 823 return 0; 824 825 if (nulok) { 826 char **av; 827 828 for (av = shellparam.p; *av; av++) 829 if (**av != '\0') 830 return 1; 831 return 0; 832 } 833 } else if (is_digit(*name)) { 834 char *ap; 835 long num; 836 837 errno = 0; 838 num = strtol(name, NULL, 10); 839 if (errno != 0 || num > shellparam.nparam) 840 return 0; 841 842 if (num == 0) 843 ap = arg0; 844 else 845 ap = shellparam.p[num - 1]; 846 847 if (nulok && (ap == NULL || *ap == '\0')) 848 return 0; 849 } 850 return 1; 851 } 852 853 static void 854 strtodest(const char *p, int flag, int subtype, int quoted) 855 { 856 if (flag & (EXP_FULL | EXP_CASE) && subtype != VSLENGTH) 857 STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest); 858 else 859 STPUTS(p, expdest); 860 } 861 862 /* 863 * Add the value of a specialized variable to the stack string. 864 */ 865 866 static void 867 varvalue(const char *name, int quoted, int subtype, int flag) 868 { 869 int num; 870 char *p; 871 int i; 872 char sep[2]; 873 char **ap; 874 875 switch (*name) { 876 case '$': 877 num = rootpid; 878 break; 879 case '?': 880 num = oexitstatus; 881 break; 882 case '#': 883 num = shellparam.nparam; 884 break; 885 case '!': 886 num = backgndpidval(); 887 break; 888 case '-': 889 for (i = 0 ; i < NSHORTOPTS ; i++) { 890 if (optlist[i].val) 891 STPUTC(optlist[i].letter, expdest); 892 } 893 return; 894 case '@': 895 if (flag & EXP_FULL && quoted) { 896 for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { 897 strtodest(p, flag, subtype, quoted); 898 if (*ap) 899 STPUTC('\0', expdest); 900 } 901 return; 902 } 903 /* FALLTHROUGH */ 904 case '*': 905 if (ifsset()) 906 sep[0] = ifsval()[0]; 907 else 908 sep[0] = ' '; 909 sep[1] = '\0'; 910 for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { 911 strtodest(p, flag, subtype, quoted); 912 if (!*ap) 913 break; 914 if (sep[0]) 915 strtodest(sep, flag, subtype, quoted); 916 else if (flag & EXP_FULL && !quoted && **ap != '\0') 917 STPUTC('\0', expdest); 918 } 919 return; 920 default: 921 if (is_digit(*name)) { 922 num = atoi(name); 923 if (num == 0) 924 p = arg0; 925 else if (num > 0 && num <= shellparam.nparam) 926 p = shellparam.p[num - 1]; 927 else 928 return; 929 strtodest(p, flag, subtype, quoted); 930 } 931 return; 932 } 933 expdest = cvtnum(num, expdest); 934 } 935 936 937 938 /* 939 * Record the fact that we have to scan this region of the 940 * string for IFS characters. 941 */ 942 943 static void 944 recordregion(int start, int end, int inquotes) 945 { 946 struct ifsregion *ifsp; 947 948 INTOFF; 949 if (ifslastp == NULL) { 950 ifsp = &ifsfirst; 951 } else { 952 if (ifslastp->endoff == start 953 && ifslastp->inquotes == inquotes) { 954 /* extend previous area */ 955 ifslastp->endoff = end; 956 INTON; 957 return; 958 } 959 ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion)); 960 ifslastp->next = ifsp; 961 } 962 ifslastp = ifsp; 963 ifslastp->next = NULL; 964 ifslastp->begoff = start; 965 ifslastp->endoff = end; 966 ifslastp->inquotes = inquotes; 967 INTON; 968 } 969 970 971 972 /* 973 * Break the argument string into pieces based upon IFS and add the 974 * strings to the argument list. The regions of the string to be 975 * searched for IFS characters have been stored by recordregion. 976 * CTLESC characters are preserved but have little effect in this pass 977 * other than escaping CTL* characters. In particular, they do not escape 978 * IFS characters: that should be done with the ifsregion mechanism. 979 * CTLQUOTEMARK characters are used to preserve empty quoted strings. 980 * This pass treats them as a regular character, making the string non-empty. 981 * Later, they are removed along with the other CTL* characters. 982 */ 983 static void 984 ifsbreakup(char *string, struct arglist *arglist) 985 { 986 struct ifsregion *ifsp; 987 struct strlist *sp; 988 char *start; 989 char *p; 990 char *q; 991 const char *ifs; 992 const char *ifsspc; 993 int had_param_ch = 0; 994 995 start = string; 996 997 if (ifslastp == NULL) { 998 /* Return entire argument, IFS doesn't apply to any of it */ 999 sp = (struct strlist *)stalloc(sizeof *sp); 1000 sp->text = start; 1001 *arglist->lastp = sp; 1002 arglist->lastp = &sp->next; 1003 return; 1004 } 1005 1006 ifs = ifsset() ? ifsval() : " \t\n"; 1007 1008 for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) { 1009 p = string + ifsp->begoff; 1010 while (p < string + ifsp->endoff) { 1011 q = p; 1012 if (*p == CTLESC) 1013 p++; 1014 if (ifsp->inquotes) { 1015 /* Only NULs (should be from "$@") end args */ 1016 had_param_ch = 1; 1017 if (*p != 0) { 1018 p++; 1019 continue; 1020 } 1021 ifsspc = NULL; 1022 } else { 1023 if (!strchr(ifs, *p)) { 1024 had_param_ch = 1; 1025 p++; 1026 continue; 1027 } 1028 ifsspc = strchr(" \t\n", *p); 1029 1030 /* Ignore IFS whitespace at start */ 1031 if (q == start && ifsspc != NULL) { 1032 p++; 1033 start = p; 1034 continue; 1035 } 1036 had_param_ch = 0; 1037 } 1038 1039 /* Save this argument... */ 1040 *q = '\0'; 1041 sp = (struct strlist *)stalloc(sizeof *sp); 1042 sp->text = start; 1043 *arglist->lastp = sp; 1044 arglist->lastp = &sp->next; 1045 p++; 1046 1047 if (ifsspc != NULL) { 1048 /* Ignore further trailing IFS whitespace */ 1049 for (; p < string + ifsp->endoff; p++) { 1050 q = p; 1051 if (*p == CTLESC) 1052 p++; 1053 if (strchr(ifs, *p) == NULL) { 1054 p = q; 1055 break; 1056 } 1057 if (strchr(" \t\n", *p) == NULL) { 1058 p++; 1059 break; 1060 } 1061 } 1062 } 1063 start = p; 1064 } 1065 } 1066 1067 /* 1068 * Save anything left as an argument. 1069 * Traditionally we have treated 'IFS=':'; set -- x$IFS' as 1070 * generating 2 arguments, the second of which is empty. 1071 * Some recent clarification of the Posix spec say that it 1072 * should only generate one.... 1073 */ 1074 if (had_param_ch || *start != 0) { 1075 sp = (struct strlist *)stalloc(sizeof *sp); 1076 sp->text = start; 1077 *arglist->lastp = sp; 1078 arglist->lastp = &sp->next; 1079 } 1080 } 1081 1082 1083 static char expdir[PATH_MAX]; 1084 #define expdir_end (expdir + sizeof(expdir)) 1085 1086 /* 1087 * Perform pathname generation and remove control characters. 1088 * At this point, the only control characters should be CTLESC and CTLQUOTEMARK. 1089 * The results are stored in the list exparg. 1090 */ 1091 static void 1092 expandmeta(struct strlist *str) 1093 { 1094 char *p; 1095 struct strlist **savelastp; 1096 struct strlist *sp; 1097 char c; 1098 1099 while (str) { 1100 savelastp = exparg.lastp; 1101 if (!fflag) { 1102 p = str->text; 1103 for (; (c = *p) != '\0'; p++) { 1104 /* fast check for meta chars */ 1105 if (c == '*' || c == '?' || c == '[') { 1106 INTOFF; 1107 expmeta(expdir, str->text); 1108 INTON; 1109 break; 1110 } 1111 } 1112 } 1113 if (exparg.lastp == savelastp) { 1114 /* 1115 * no matches 1116 */ 1117 *exparg.lastp = str; 1118 rmescapes(str->text); 1119 exparg.lastp = &str->next; 1120 } else { 1121 *exparg.lastp = NULL; 1122 *savelastp = sp = expsort(*savelastp); 1123 while (sp->next != NULL) 1124 sp = sp->next; 1125 exparg.lastp = &sp->next; 1126 } 1127 str = str->next; 1128 } 1129 } 1130 1131 1132 /* 1133 * Do metacharacter (i.e. *, ?, [...]) expansion. 1134 */ 1135 1136 static void 1137 expmeta(char *enddir, char *name) 1138 { 1139 const char *p; 1140 const char *q; 1141 const char *start; 1142 char *endname; 1143 int metaflag; 1144 struct stat statb; 1145 DIR *dirp; 1146 struct dirent *dp; 1147 int atend; 1148 int matchdot; 1149 int esc; 1150 int namlen; 1151 1152 metaflag = 0; 1153 start = name; 1154 for (p = name; esc = 0, *p; p += esc + 1) { 1155 if (*p == '*' || *p == '?') 1156 metaflag = 1; 1157 else if (*p == '[') { 1158 q = p + 1; 1159 if (*q == '!' || *q == '^') 1160 q++; 1161 for (;;) { 1162 while (*q == CTLQUOTEMARK) 1163 q++; 1164 if (*q == CTLESC) 1165 q++; 1166 if (*q == '/' || *q == '\0') 1167 break; 1168 if (*++q == ']') { 1169 metaflag = 1; 1170 break; 1171 } 1172 } 1173 } else if (*p == '\0') 1174 break; 1175 else if (*p == CTLQUOTEMARK) 1176 continue; 1177 else { 1178 if (*p == CTLESC) 1179 esc++; 1180 if (p[esc] == '/') { 1181 if (metaflag) 1182 break; 1183 start = p + esc + 1; 1184 } 1185 } 1186 } 1187 if (metaflag == 0) { /* we've reached the end of the file name */ 1188 if (enddir != expdir) 1189 metaflag++; 1190 for (p = name ; ; p++) { 1191 if (*p == CTLQUOTEMARK) 1192 continue; 1193 if (*p == CTLESC) 1194 p++; 1195 *enddir++ = *p; 1196 if (*p == '\0') 1197 break; 1198 if (enddir == expdir_end) 1199 return; 1200 } 1201 if (metaflag == 0 || lstat(expdir, &statb) >= 0) 1202 addfname(expdir); 1203 return; 1204 } 1205 endname = name + (p - name); 1206 if (start != name) { 1207 p = name; 1208 while (p < start) { 1209 while (*p == CTLQUOTEMARK) 1210 p++; 1211 if (*p == CTLESC) 1212 p++; 1213 *enddir++ = *p++; 1214 if (enddir == expdir_end) 1215 return; 1216 } 1217 } 1218 if (enddir == expdir) { 1219 p = "."; 1220 } else if (enddir == expdir + 1 && *expdir == '/') { 1221 p = "/"; 1222 } else { 1223 p = expdir; 1224 enddir[-1] = '\0'; 1225 } 1226 if ((dirp = opendir(p)) == NULL) 1227 return; 1228 if (enddir != expdir) 1229 enddir[-1] = '/'; 1230 if (*endname == 0) { 1231 atend = 1; 1232 } else { 1233 atend = 0; 1234 *endname = '\0'; 1235 endname += esc + 1; 1236 } 1237 matchdot = 0; 1238 p = start; 1239 while (*p == CTLQUOTEMARK) 1240 p++; 1241 if (*p == CTLESC) 1242 p++; 1243 if (*p == '.') 1244 matchdot++; 1245 while (! int_pending() && (dp = readdir(dirp)) != NULL) { 1246 if (dp->d_name[0] == '.' && ! matchdot) 1247 continue; 1248 if (patmatch(start, dp->d_name, 0)) { 1249 namlen = dp->d_namlen; 1250 if (enddir + namlen + 1 > expdir_end) 1251 continue; 1252 memcpy(enddir, dp->d_name, namlen + 1); 1253 if (atend) 1254 addfname(expdir); 1255 else { 1256 if (dp->d_type != DT_UNKNOWN && 1257 dp->d_type != DT_DIR && 1258 dp->d_type != DT_LNK) 1259 continue; 1260 if (enddir + namlen + 2 > expdir_end) 1261 continue; 1262 enddir[namlen] = '/'; 1263 enddir[namlen + 1] = '\0'; 1264 expmeta(enddir + namlen + 1, endname); 1265 } 1266 } 1267 } 1268 closedir(dirp); 1269 if (! atend) 1270 endname[-esc - 1] = esc ? CTLESC : '/'; 1271 } 1272 1273 1274 /* 1275 * Add a file name to the list. 1276 */ 1277 1278 static void 1279 addfname(char *name) 1280 { 1281 char *p; 1282 struct strlist *sp; 1283 1284 p = stsavestr(name); 1285 sp = (struct strlist *)stalloc(sizeof *sp); 1286 sp->text = p; 1287 *exparg.lastp = sp; 1288 exparg.lastp = &sp->next; 1289 } 1290 1291 1292 /* 1293 * Sort the results of file name expansion. It calculates the number of 1294 * strings to sort and then calls msort (short for merge sort) to do the 1295 * work. 1296 */ 1297 1298 static struct strlist * 1299 expsort(struct strlist *str) 1300 { 1301 int len; 1302 struct strlist *sp; 1303 1304 len = 0; 1305 for (sp = str ; sp ; sp = sp->next) 1306 len++; 1307 return msort(str, len); 1308 } 1309 1310 1311 static struct strlist * 1312 msort(struct strlist *list, int len) 1313 { 1314 struct strlist *p, *q = NULL; 1315 struct strlist **lpp; 1316 int half; 1317 int n; 1318 1319 if (len <= 1) 1320 return list; 1321 half = len >> 1; 1322 p = list; 1323 for (n = half ; --n >= 0 ; ) { 1324 q = p; 1325 p = p->next; 1326 } 1327 q->next = NULL; /* terminate first half of list */ 1328 q = msort(list, half); /* sort first half of list */ 1329 p = msort(p, len - half); /* sort second half */ 1330 lpp = &list; 1331 for (;;) { 1332 if (strcmp(p->text, q->text) < 0) { 1333 *lpp = p; 1334 lpp = &p->next; 1335 if ((p = *lpp) == NULL) { 1336 *lpp = q; 1337 break; 1338 } 1339 } else { 1340 *lpp = q; 1341 lpp = &q->next; 1342 if ((q = *lpp) == NULL) { 1343 *lpp = p; 1344 break; 1345 } 1346 } 1347 } 1348 return list; 1349 } 1350 1351 1352 1353 static wchar_t 1354 get_wc(const char **p) 1355 { 1356 wchar_t c; 1357 int chrlen; 1358 1359 chrlen = mbtowc(&c, *p, 4); 1360 if (chrlen == 0) 1361 return 0; 1362 else if (chrlen == -1) 1363 c = 0; 1364 else 1365 *p += chrlen; 1366 return c; 1367 } 1368 1369 1370 /* 1371 * See if a character matches a character class, starting at the first colon 1372 * of "[:class:]". 1373 * If a valid character class is recognized, a pointer to the next character 1374 * after the final closing bracket is stored into *end, otherwise a null 1375 * pointer is stored into *end. 1376 */ 1377 static int 1378 match_charclass(const char *p, wchar_t chr, const char **end) 1379 { 1380 char name[20]; 1381 const char *nameend; 1382 wctype_t cclass; 1383 1384 *end = NULL; 1385 p++; 1386 nameend = strstr(p, ":]"); 1387 if (nameend == NULL || (size_t)(nameend - p) >= sizeof(name) || 1388 nameend == p) 1389 return 0; 1390 memcpy(name, p, nameend - p); 1391 name[nameend - p] = '\0'; 1392 *end = nameend + 2; 1393 cclass = wctype(name); 1394 /* An unknown class matches nothing but is valid nevertheless. */ 1395 if (cclass == 0) 1396 return 0; 1397 return iswctype(chr, cclass); 1398 } 1399 1400 1401 /* 1402 * Returns true if the pattern matches the string. 1403 */ 1404 1405 static int 1406 patmatch(const char *pattern, const char *string, int squoted) 1407 { 1408 const char *p, *q, *end; 1409 const char *bt_p, *bt_q; 1410 char c; 1411 wchar_t wc, wc2; 1412 1413 p = pattern; 1414 q = string; 1415 bt_p = NULL; 1416 bt_q = NULL; 1417 for (;;) { 1418 switch (c = *p++) { 1419 case '\0': 1420 if (*q != '\0') 1421 goto backtrack; 1422 return 1; 1423 case CTLESC: 1424 if (squoted && *q == CTLESC) 1425 q++; 1426 if (*q++ != *p++) 1427 goto backtrack; 1428 break; 1429 case CTLQUOTEMARK: 1430 continue; 1431 case '?': 1432 if (squoted && *q == CTLESC) 1433 q++; 1434 if (*q == '\0') 1435 return 0; 1436 if (localeisutf8) { 1437 wc = get_wc(&q); 1438 /* 1439 * A '?' does not match invalid UTF-8 but a 1440 * '*' does, so backtrack. 1441 */ 1442 if (wc == 0) 1443 goto backtrack; 1444 } else 1445 wc = (unsigned char)*q++; 1446 break; 1447 case '*': 1448 c = *p; 1449 while (c == CTLQUOTEMARK || c == '*') 1450 c = *++p; 1451 /* 1452 * If the pattern ends here, we know the string 1453 * matches without needing to look at the rest of it. 1454 */ 1455 if (c == '\0') 1456 return 1; 1457 /* 1458 * First try the shortest match for the '*' that 1459 * could work. We can forget any earlier '*' since 1460 * there is no way having it match more characters 1461 * can help us, given that we are already here. 1462 */ 1463 bt_p = p; 1464 bt_q = q; 1465 break; 1466 case '[': { 1467 const char *savep, *saveq; 1468 int invert, found; 1469 wchar_t chr; 1470 1471 savep = p, saveq = q; 1472 invert = 0; 1473 if (*p == '!' || *p == '^') { 1474 invert++; 1475 p++; 1476 } 1477 found = 0; 1478 if (squoted && *q == CTLESC) 1479 q++; 1480 if (*q == '\0') 1481 return 0; 1482 if (localeisutf8) { 1483 chr = get_wc(&q); 1484 if (chr == 0) 1485 goto backtrack; 1486 } else 1487 chr = (unsigned char)*q++; 1488 c = *p++; 1489 do { 1490 if (c == '\0') { 1491 p = savep, q = saveq; 1492 c = '['; 1493 goto dft; 1494 } 1495 if (c == CTLQUOTEMARK) 1496 continue; 1497 if (c == '[' && *p == ':') { 1498 found |= match_charclass(p, chr, &end); 1499 if (end != NULL) 1500 p = end; 1501 } 1502 if (c == CTLESC) 1503 c = *p++; 1504 if (localeisutf8 && c & 0x80) { 1505 p--; 1506 wc = get_wc(&p); 1507 if (wc == 0) /* bad utf-8 */ 1508 return 0; 1509 } else 1510 wc = (unsigned char)c; 1511 if (*p == '-' && p[1] != ']') { 1512 p++; 1513 while (*p == CTLQUOTEMARK) 1514 p++; 1515 if (*p == CTLESC) 1516 p++; 1517 if (localeisutf8) { 1518 wc2 = get_wc(&p); 1519 if (wc2 == 0) /* bad utf-8 */ 1520 return 0; 1521 } else 1522 wc2 = (unsigned char)*p++; 1523 if ( collate_range_cmp(chr, wc) >= 0 1524 && collate_range_cmp(chr, wc2) <= 0 1525 ) 1526 found = 1; 1527 } else { 1528 if (chr == wc) 1529 found = 1; 1530 } 1531 } while ((c = *p++) != ']'); 1532 if (found == invert) 1533 goto backtrack; 1534 break; 1535 } 1536 dft: default: 1537 if (squoted && *q == CTLESC) 1538 q++; 1539 if (*q == '\0') 1540 return 0; 1541 if (*q++ == c) 1542 break; 1543 backtrack: 1544 /* 1545 * If we have a mismatch (other than hitting the end 1546 * of the string), go back to the last '*' seen and 1547 * have it match one additional character. 1548 */ 1549 if (bt_p == NULL) 1550 return 0; 1551 if (squoted && *bt_q == CTLESC) 1552 bt_q++; 1553 if (*bt_q == '\0') 1554 return 0; 1555 bt_q++; 1556 p = bt_p; 1557 q = bt_q; 1558 break; 1559 } 1560 } 1561 } 1562 1563 1564 1565 /* 1566 * Remove any CTLESC and CTLQUOTEMARK characters from a string. 1567 */ 1568 1569 void 1570 rmescapes(char *str) 1571 { 1572 char *p, *q; 1573 1574 p = str; 1575 while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) { 1576 if (*p++ == '\0') 1577 return; 1578 } 1579 q = p; 1580 while (*p) { 1581 if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) { 1582 p++; 1583 continue; 1584 } 1585 if (*p == CTLESC) 1586 p++; 1587 *q++ = *p++; 1588 } 1589 *q = '\0'; 1590 } 1591 1592 1593 1594 /* 1595 * See if a pattern matches in a case statement. 1596 */ 1597 1598 int 1599 casematch(union node *pattern, const char *val) 1600 { 1601 struct stackmark smark; 1602 int result; 1603 char *p; 1604 1605 setstackmark(&smark); 1606 argbackq = pattern->narg.backquote; 1607 STARTSTACKSTR(expdest); 1608 ifslastp = NULL; 1609 argstr(pattern->narg.text, EXP_TILDE | EXP_CASE); 1610 STPUTC('\0', expdest); 1611 p = grabstackstr(expdest); 1612 result = patmatch(p, val, 0); 1613 popstackmark(&smark); 1614 return result; 1615 } 1616 1617 /* 1618 * Our own itoa(). 1619 */ 1620 1621 static char * 1622 cvtnum(int num, char *buf) 1623 { 1624 char temp[32]; 1625 int neg = num < 0; 1626 char *p = temp + 31; 1627 1628 temp[31] = '\0'; 1629 1630 do { 1631 *--p = num % 10 + '0'; 1632 } while ((num /= 10) != 0); 1633 1634 if (neg) 1635 *--p = '-'; 1636 1637 STPUTS(p, buf); 1638 return buf; 1639 } 1640 1641 /* 1642 * Do most of the work for wordexp(3). 1643 */ 1644 1645 int 1646 wordexpcmd(int argc, char **argv) 1647 { 1648 size_t len; 1649 int i; 1650 1651 out1fmt("%08x", argc - 1); 1652 for (i = 1, len = 0; i < argc; i++) 1653 len += strlen(argv[i]); 1654 out1fmt("%08x", (int)len); 1655 for (i = 1; i < argc; i++) 1656 outbin(argv[i], strlen(argv[i]) + 1, out1); 1657 return (0); 1658 } 1659