1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1982-2010 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * David Korn <dgk@research.att.com> * 18 * * 19 ***********************************************************************/ 20 #pragma prototyped 21 /* 22 * string processing routines for Korn shell 23 * 24 */ 25 26 #include <ast.h> 27 #include <ast_wchar.h> 28 #include "defs.h" 29 #include <stak.h> 30 #include <ccode.h> 31 #include "shtable.h" 32 #include "lexstates.h" 33 #include "national.h" 34 35 #if !SHOPT_MULTIBYTE 36 #define mbchar(p) (*(unsigned char*)p++) 37 #endif 38 39 #if _hdr_wctype 40 # include <wctype.h> 41 #endif 42 43 #if !_lib_iswprint && !defined(iswprint) 44 # define iswprint(c) (((c)&~0377) || isprint(c)) 45 #endif 46 47 48 /* 49 * Table lookup routine 50 * <table> is searched for string <sp> and corresponding value is returned 51 * This is only used for small tables and is used to save non-sharable memory 52 */ 53 54 const Shtable_t *sh_locate(register const char *sp,const Shtable_t *table,int size) 55 { 56 register int first; 57 register const Shtable_t *tp; 58 register int c; 59 static const Shtable_t empty = {0,0}; 60 if(sp==0 || (first= *sp)==0) 61 return(&empty); 62 tp=table; 63 while((c= *tp->sh_name) && (CC_NATIVE!=CC_ASCII || c <= first)) 64 { 65 if(first == c && strcmp(sp,tp->sh_name)==0) 66 return(tp); 67 tp = (Shtable_t*)((char*)tp+size); 68 } 69 return(&empty); 70 } 71 72 /* 73 * shtab_options lookup routine 74 */ 75 76 #define sep(c) ((c)=='-'||(c)=='_') 77 78 int sh_lookopt(register const char *sp, int *invert) 79 { 80 register int first; 81 register const Shtable_t *tp; 82 register int c; 83 register const char *s, *t, *sw, *tw; 84 int amb; 85 int hit; 86 int inv; 87 int no; 88 if(sp==0) 89 return(0); 90 if(*sp=='n' && *(sp+1)=='o' && (*(sp+2)!='t' || *(sp+3)!='i')) 91 { 92 sp+=2; 93 if(sep(*sp)) 94 sp++; 95 *invert = !*invert; 96 } 97 if((first= *sp)==0) 98 return(0); 99 tp=shtab_options; 100 amb=hit=0; 101 for(;;) 102 { 103 t=tp->sh_name; 104 if(no = *t=='n' && *(t+1)=='o' && *(t+2)!='t') 105 t+=2; 106 if(!(c= *t)) 107 break; 108 if(first == c) 109 { 110 if(strcmp(sp,t)==0) 111 { 112 *invert ^= no; 113 return(tp->sh_number); 114 } 115 s=sw=sp; 116 tw=t; 117 for(;;) 118 { 119 if(!*s || *s=='=') 120 { 121 if (*s == '=' && !strtol(s+1, NiL, 0)) 122 no = !no; 123 if (!*t) 124 { 125 *invert ^= no; 126 return(tp->sh_number); 127 } 128 if (hit || amb) 129 { 130 hit = 0; 131 amb = 1; 132 } 133 else 134 { 135 hit = tp->sh_number; 136 inv = no; 137 } 138 break; 139 } 140 else if(!*t) 141 break; 142 else if(sep(*s)) 143 sw = ++s; 144 else if(sep(*t)) 145 tw = ++t; 146 else if(*s==*t) 147 { 148 s++; 149 t++; 150 } 151 else if(s==sw && t==tw) 152 break; 153 else 154 { 155 if(t!=tw) 156 { 157 while(*t && !sep(*t)) 158 t++; 159 if(!*t) 160 break; 161 tw = ++t; 162 } 163 while (s>sw && *s!=*t) 164 s--; 165 } 166 } 167 } 168 tp = (Shtable_t*)((char*)tp+sizeof(*shtab_options)); 169 } 170 if(hit) 171 *invert ^= inv; 172 return(hit); 173 } 174 175 /* 176 * look for the substring <oldsp> in <string> and replace with <newsp> 177 * The new string is put on top of the stack 178 */ 179 char *sh_substitute(const char *string,const char *oldsp,char *newsp) 180 /*@ 181 assume string!=NULL && oldsp!=NULL && newsp!=NULL; 182 return x satisfying x==NULL || 183 strlen(x)==(strlen(in string)+strlen(in newsp)-strlen(in oldsp)); 184 @*/ 185 { 186 register const char *sp = string; 187 register const char *cp; 188 const char *savesp = 0; 189 stakseek(0); 190 if(*sp==0) 191 return((char*)0); 192 if(*(cp=oldsp) == 0) 193 goto found; 194 #if SHOPT_MULTIBYTE 195 mbinit(); 196 #endif /* SHOPT_MULTIBYTE */ 197 do 198 { 199 /* skip to first character which matches start of oldsp */ 200 while(*sp && (savesp==sp || *sp != *cp)) 201 { 202 #if SHOPT_MULTIBYTE 203 /* skip a whole character at a time */ 204 int c = mbsize(sp); 205 if(c < 0) 206 sp++; 207 while(c-- > 0) 208 #endif /* SHOPT_MULTIBYTE */ 209 stakputc(*sp++); 210 } 211 if(*sp == 0) 212 return((char*)0); 213 savesp = sp; 214 for(;*cp;cp++) 215 { 216 if(*cp != *sp++) 217 break; 218 } 219 if(*cp==0) 220 /* match found */ 221 goto found; 222 sp = savesp; 223 cp = oldsp; 224 } 225 while(*sp); 226 return((char*)0); 227 228 found: 229 /* copy new */ 230 stakputs(newsp); 231 /* copy rest of string */ 232 stakputs(sp); 233 return(stakfreeze(1)); 234 } 235 236 /* 237 * TRIM(sp) 238 * Remove escape characters from characters in <sp> and eliminate quoted nulls. 239 */ 240 241 void sh_trim(register char *sp) 242 /*@ 243 assume sp!=NULL; 244 promise strlen(in sp) <= in strlen(sp); 245 @*/ 246 { 247 register char *dp; 248 register int c; 249 if(sp) 250 { 251 dp = sp; 252 while(c= *sp) 253 { 254 #if SHOPT_MULTIBYTE 255 int len; 256 if(mbwide() && (len=mbsize(sp))>1) 257 { 258 memmove(dp, sp, len); 259 dp += len; 260 sp += len; 261 continue; 262 } 263 #endif /* SHOPT_MULTIBYTE */ 264 sp++; 265 if(c == '\\') 266 c = *sp++; 267 if(c) 268 *dp++ = c; 269 } 270 *dp = 0; 271 } 272 } 273 274 /* 275 * copy <str1> to <str2> changing upper case to lower case 276 * <str2> must be big enough to hold <str1> 277 * <str1> and <str2> may point to the same place. 278 */ 279 280 void sh_utol(register char const *str1,register char *str2) 281 /*@ 282 assume str1!=0 && str2!=0 283 return x satisfying strlen(in str1)==strlen(in str2); 284 @*/ 285 { 286 register int c; 287 for(; c= *((unsigned char*)str1); str1++,str2++) 288 { 289 if(isupper(c)) 290 *str2 = tolower(c); 291 else 292 *str2 = c; 293 } 294 *str2 = 0; 295 } 296 297 /* 298 * print <str> quoting chars so that it can be read by the shell 299 * puts null terminated result on stack, but doesn't freeze it 300 */ 301 char *sh_fmtq(const char *string) 302 { 303 register const char *cp = string, *op; 304 register int c, state; 305 int offset; 306 if(!cp) 307 return((char*)0); 308 offset = staktell(); 309 #if SHOPT_MULTIBYTE 310 state = ((c= mbchar(cp))==0); 311 #else 312 state = ((c= *(unsigned char*)cp++)==0); 313 #endif 314 if(isaletter(c)) 315 { 316 #if SHOPT_MULTIBYTE 317 while((c=mbchar(cp)),isaname(c)); 318 #else 319 while((c = *(unsigned char*)cp++),isaname(c)); 320 #endif 321 if(c==0) 322 return((char*)string); 323 if(c=='=') 324 { 325 if(*cp==0) 326 return((char*)string); 327 c = cp - string; 328 stakwrite(string,c); 329 string = cp; 330 #if SHOPT_MULTIBYTE 331 c = mbchar(cp); 332 #else 333 c = *(unsigned char*)cp++; 334 #endif 335 } 336 } 337 if(c==0 || c=='#' || c=='~') 338 state = 1; 339 #if SHOPT_MULTIBYTE 340 for(;c;c= mbchar(cp)) 341 #else 342 for(;c; c= *(unsigned char*)cp++) 343 #endif 344 { 345 #if SHOPT_MULTIBYTE 346 if(c=='\'' || !iswprint(c)) 347 #else 348 if(c=='\'' || !isprint(c)) 349 #endif /* SHOPT_MULTIBYTE */ 350 state = 2; 351 else if(c==']' || (c!=':' && c<=0xff && (c=sh_lexstates[ST_NORM][c]) && c!=S_EPAT)) 352 state |=1; 353 } 354 if(state<2) 355 { 356 if(state==1) 357 stakputc('\''); 358 if(c = --cp - string) 359 stakwrite(string,c); 360 if(state==1) 361 stakputc('\''); 362 } 363 else 364 { 365 stakwrite("$'",2); 366 cp = string; 367 #if SHOPT_MULTIBYTE 368 while(op = cp, c= mbchar(cp)) 369 #else 370 while(op = cp, c= *(unsigned char*)cp++) 371 #endif 372 { 373 state=1; 374 switch(c) 375 { 376 case ('a'==97?'\033':39): 377 c = 'E'; 378 break; 379 case '\n': 380 c = 'n'; 381 break; 382 case '\r': 383 c = 'r'; 384 break; 385 case '\t': 386 c = 't'; 387 break; 388 case '\f': 389 c = 'f'; 390 break; 391 case '\b': 392 c = 'b'; 393 break; 394 case '\a': 395 c = 'a'; 396 break; 397 case '\\': case '\'': 398 break; 399 default: 400 #if SHOPT_MULTIBYTE 401 if(!iswprint(c)) 402 { 403 while(op<cp) 404 sfprintf(staksp,"\\%.3o",*(unsigned char*)op++); 405 continue; 406 } 407 #else 408 if(!isprint(c)) 409 { 410 sfprintf(staksp,"\\%.3o",c); 411 continue; 412 } 413 #endif 414 state=0; 415 break; 416 } 417 if(state) 418 { 419 stakputc('\\'); 420 stakputc(c); 421 } 422 else 423 stakwrite(op, cp-op); 424 } 425 stakputc('\''); 426 } 427 stakputc(0); 428 return(stakptr(offset)); 429 } 430 431 /* 432 * print <str> quoting chars so that it can be read by the shell 433 * puts null terminated result on stack, but doesn't freeze it 434 * single!=0 limits quoting to '...' 435 * fold>0 prints raw newlines and inserts appropriately 436 * escaped newlines every (fold-x) chars 437 */ 438 char *sh_fmtqf(const char *string, int single, int fold) 439 { 440 register const char *cp = string; 441 register const char *bp; 442 register const char *vp; 443 register int c; 444 register int n; 445 register int q; 446 register int a; 447 int offset; 448 449 if (--fold < 8) 450 fold = 0; 451 if (!cp || !*cp || !single && !fold || fold && strlen(string) < fold) 452 return sh_fmtq(cp); 453 offset = staktell(); 454 single = single ? 1 : 3; 455 c = mbchar(string); 456 a = isaletter(c) ? '=' : 0; 457 vp = cp + 1; 458 do 459 { 460 q = 0; 461 n = fold; 462 bp = cp; 463 while ((!n || n-- > 0) && (c = mbchar(cp))) 464 { 465 if (a && !isaname(c)) 466 a = 0; 467 #if SHOPT_MULTIBYTE 468 if (c >= 0x200) 469 continue; 470 if (c == '\'' || !iswprint(c)) 471 #else 472 if (c == '\'' || !isprint(c)) 473 #endif /* SHOPT_MULTIBYTE */ 474 { 475 q = single; 476 break; 477 } 478 if (c == '\n') 479 q = 1; 480 else if (c == a) 481 { 482 stakwrite(bp, cp - bp); 483 bp = cp; 484 vp = cp + 1; 485 a = 0; 486 } 487 else if ((c == '#' || c == '~') && cp == vp || c == ']' || c != ':' && (c = sh_lexstates[ST_NORM][c]) && c != S_EPAT) 488 q = 1; 489 } 490 if (q & 2) 491 { 492 stakputc('$'); 493 stakputc('\''); 494 cp = bp; 495 n = fold - 3; 496 q = 1; 497 while (c = mbchar(cp)) 498 { 499 switch (c) 500 { 501 case ('a'==97?'\033':39): 502 c = 'E'; 503 break; 504 case '\n': 505 q = 0; 506 n = fold - 1; 507 break; 508 case '\r': 509 c = 'r'; 510 break; 511 case '\t': 512 c = 't'; 513 break; 514 case '\f': 515 c = 'f'; 516 break; 517 case '\b': 518 c = 'b'; 519 break; 520 case '\a': 521 c = 'a'; 522 break; 523 case '\\': 524 if (*cp == 'n') 525 { 526 c = '\n'; 527 q = 0; 528 n = fold - 1; 529 break; 530 } 531 case '\'': 532 break; 533 default: 534 #if SHOPT_MULTIBYTE 535 if(!iswprint(c)) 536 #else 537 if(!isprint(c)) 538 #endif 539 { 540 if ((n -= 4) <= 0) 541 { 542 stakwrite("'\\\n$'", 5); 543 n = fold - 7; 544 } 545 sfprintf(staksp, "\\%03o", c); 546 continue; 547 } 548 q = 0; 549 break; 550 } 551 if ((n -= q + 1) <= 0) 552 { 553 if (!q) 554 { 555 stakputc('\''); 556 cp = bp; 557 break; 558 } 559 stakwrite("'\\\n$'", 5); 560 n = fold - 5; 561 } 562 if (q) 563 stakputc('\\'); 564 else 565 q = 1; 566 stakputc(c); 567 bp = cp; 568 } 569 if (!c) 570 stakputc('\''); 571 } 572 else if (q & 1) 573 { 574 stakputc('\''); 575 cp = bp; 576 n = fold ? (fold - 2) : 0; 577 while (c = mbchar(cp)) 578 { 579 if (c == '\n') 580 n = fold - 1; 581 else if (n && --n <= 0) 582 { 583 n = fold - 2; 584 stakwrite(bp, --cp - bp); 585 bp = cp; 586 stakwrite("'\\\n'", 4); 587 } 588 else if (n == 1 && *cp == '\'') 589 { 590 n = fold - 5; 591 stakwrite(bp, --cp - bp); 592 bp = cp; 593 stakwrite("'\\\n\\''", 6); 594 } 595 else if (c == '\'') 596 { 597 stakwrite(bp, cp - bp - 1); 598 bp = cp; 599 if (n && (n -= 4) <= 0) 600 { 601 n = fold - 5; 602 stakwrite("'\\\n\\''", 6); 603 } 604 else 605 stakwrite("'\\''", 4); 606 } 607 } 608 stakwrite(bp, cp - bp - 1); 609 stakputc('\''); 610 } 611 else if (n = fold) 612 { 613 cp = bp; 614 while (c = mbchar(cp)) 615 { 616 if (--n <= 0) 617 { 618 n = fold; 619 stakwrite(bp, --cp - bp); 620 bp = cp; 621 stakwrite("\\\n", 2); 622 } 623 } 624 stakwrite(bp, cp - bp - 1); 625 } 626 else 627 stakwrite(bp, cp - bp); 628 if (c) 629 { 630 stakputc('\\'); 631 stakputc('\n'); 632 } 633 } while (c); 634 stakputc(0); 635 return(stakptr(offset)); 636 } 637 638 #if SHOPT_MULTIBYTE 639 int sh_strchr(const char *string, register const char *dp) 640 { 641 wchar_t c, d; 642 register const char *cp=string; 643 mbinit(); 644 d = mbchar(dp); 645 mbinit(); 646 while(c = mbchar(cp)) 647 { 648 if(c==d) 649 return(cp-string); 650 } 651 if(d==0) 652 return(cp-string); 653 return(-1); 654 } 655 #endif /* SHOPT_MULTIBYTE */ 656 657 const char *_sh_translate(const char *message) 658 { 659 #if ERROR_VERSION >= 20000317L 660 return(ERROR_translate(0,0,e_dict,message)); 661 #else 662 #if ERROR_VERSION >= 20000101L 663 return(ERROR_translate(e_dict,message)); 664 #else 665 return(ERROR_translate(message,1)); 666 #endif 667 #endif 668 } 669 670 /* 671 * change '['identifier']' to identifier 672 * character before <str> must be a '[' 673 * returns pointer to last character 674 */ 675 char *sh_checkid(char *str, char *last) 676 { 677 register unsigned char *cp = (unsigned char*)str; 678 register unsigned char *v = cp; 679 register int c; 680 if(c= *cp++,isaletter(c)) 681 while(c= *cp++,isaname(c)); 682 if(c==']' && (!last || ((char*)cp==last))) 683 { 684 /* eliminate [ and ] */ 685 while(v < cp) 686 { 687 v[-1] = *v; 688 v++; 689 } 690 if(last) 691 last -=2; 692 else 693 { 694 while(*v) 695 { 696 v[-2] = *v; 697 v++; 698 } 699 v[-2] = 0; 700 last = (char*)v; 701 } 702 } 703 return(last); 704 } 705 706 #if _AST_VERSION <= 20000317L 707 char *fmtident(const char *string) 708 { 709 return((char*)string); 710 } 711 #endif 712