1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1982-2012 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Eclipse Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.eclipse.org/org/documents/epl-v10.html * 11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * David Korn <dgk@research.att.com> * 18 * * 19 ***********************************************************************/ 20 #pragma prototyped 21 /* 22 * string processing routines for Korn shell 23 * 24 */ 25 26 #include <ast.h> 27 #include <ast_wchar.h> 28 #include "defs.h" 29 #include <stak.h> 30 #include <ccode.h> 31 #include "shtable.h" 32 #include "lexstates.h" 33 #include "national.h" 34 35 #if _hdr_wctype 36 # include <wctype.h> 37 #endif 38 39 #if !_lib_iswprint && !defined(iswprint) 40 # define iswprint(c) (((c)&~0377) || isprint(c)) 41 #endif 42 43 44 /* 45 * Table lookup routine 46 * <table> is searched for string <sp> and corresponding value is returned 47 * This is only used for small tables and is used to save non-sharable memory 48 */ 49 50 const Shtable_t *sh_locate(register const char *sp,const Shtable_t *table,int size) 51 { 52 register int first; 53 register const Shtable_t *tp; 54 register int c; 55 static const Shtable_t empty = {0,0}; 56 if(sp==0 || (first= *sp)==0) 57 return(&empty); 58 tp=table; 59 while((c= *tp->sh_name) && (CC_NATIVE!=CC_ASCII || c <= first)) 60 { 61 if(first == c && strcmp(sp,tp->sh_name)==0) 62 return(tp); 63 tp = (Shtable_t*)((char*)tp+size); 64 } 65 return(&empty); 66 } 67 68 /* 69 * shtab_options lookup routine 70 */ 71 72 #define sep(c) ((c)=='-'||(c)=='_') 73 74 int sh_lookopt(register const char *sp, int *invert) 75 { 76 register int first; 77 register const Shtable_t *tp; 78 register int c; 79 register const char *s, *t, *sw, *tw; 80 int amb; 81 int hit; 82 int inv; 83 int no; 84 if(sp==0) 85 return(0); 86 if(*sp=='n' && *(sp+1)=='o' && (*(sp+2)!='t' || *(sp+3)!='i')) 87 { 88 sp+=2; 89 if(sep(*sp)) 90 sp++; 91 *invert = !*invert; 92 } 93 if((first= *sp)==0) 94 return(0); 95 tp=shtab_options; 96 amb=hit=0; 97 for(;;) 98 { 99 t=tp->sh_name; 100 if(no = *t=='n' && *(t+1)=='o' && *(t+2)!='t') 101 t+=2; 102 if(!(c= *t)) 103 break; 104 if(first == c) 105 { 106 if(strcmp(sp,t)==0) 107 { 108 *invert ^= no; 109 return(tp->sh_number); 110 } 111 s=sw=sp; 112 tw=t; 113 for(;;) 114 { 115 if(!*s || *s=='=') 116 { 117 if (*s == '=' && !strtol(s+1, NiL, 0)) 118 no = !no; 119 if (!*t) 120 { 121 *invert ^= no; 122 return(tp->sh_number); 123 } 124 if (hit || amb) 125 { 126 hit = 0; 127 amb = 1; 128 } 129 else 130 { 131 hit = tp->sh_number; 132 inv = no; 133 } 134 break; 135 } 136 else if(!*t) 137 break; 138 else if(sep(*s)) 139 sw = ++s; 140 else if(sep(*t)) 141 tw = ++t; 142 else if(*s==*t) 143 { 144 s++; 145 t++; 146 } 147 else if(s==sw && t==tw) 148 break; 149 else 150 { 151 if(t!=tw) 152 { 153 while(*t && !sep(*t)) 154 t++; 155 if(!*t) 156 break; 157 tw = ++t; 158 } 159 while (s>sw && *s!=*t) 160 s--; 161 } 162 } 163 } 164 tp = (Shtable_t*)((char*)tp+sizeof(*shtab_options)); 165 } 166 if(hit) 167 *invert ^= inv; 168 return(hit); 169 } 170 171 /* 172 * look for the substring <oldsp> in <string> and replace with <newsp> 173 * The new string is put on top of the stack 174 */ 175 char *sh_substitute(const char *string,const char *oldsp,char *newsp) 176 /*@ 177 assume string!=NULL && oldsp!=NULL && newsp!=NULL; 178 return x satisfying x==NULL || 179 strlen(x)==(strlen(in string)+strlen(in newsp)-strlen(in oldsp)); 180 @*/ 181 { 182 register const char *sp = string; 183 register const char *cp; 184 const char *savesp = 0; 185 stakseek(0); 186 if(*sp==0) 187 return((char*)0); 188 if(*(cp=oldsp) == 0) 189 goto found; 190 #if SHOPT_MULTIBYTE 191 mbinit(); 192 #endif /* SHOPT_MULTIBYTE */ 193 do 194 { 195 /* skip to first character which matches start of oldsp */ 196 while(*sp && (savesp==sp || *sp != *cp)) 197 { 198 #if SHOPT_MULTIBYTE 199 /* skip a whole character at a time */ 200 int c = mbsize(sp); 201 if(c < 0) 202 sp++; 203 while(c-- > 0) 204 #endif /* SHOPT_MULTIBYTE */ 205 stakputc(*sp++); 206 } 207 if(*sp == 0) 208 return((char*)0); 209 savesp = sp; 210 for(;*cp;cp++) 211 { 212 if(*cp != *sp++) 213 break; 214 } 215 if(*cp==0) 216 /* match found */ 217 goto found; 218 sp = savesp; 219 cp = oldsp; 220 } 221 while(*sp); 222 return((char*)0); 223 224 found: 225 /* copy new */ 226 stakputs(newsp); 227 /* copy rest of string */ 228 stakputs(sp); 229 return(stakfreeze(1)); 230 } 231 232 /* 233 * TRIM(sp) 234 * Remove escape characters from characters in <sp> and eliminate quoted nulls. 235 */ 236 237 void sh_trim(register char *sp) 238 /*@ 239 assume sp!=NULL; 240 promise strlen(in sp) <= in strlen(sp); 241 @*/ 242 { 243 register char *dp; 244 register int c; 245 if(sp) 246 { 247 dp = sp; 248 while(c= *sp) 249 { 250 #if SHOPT_MULTIBYTE 251 int len; 252 if(mbwide() && (len=mbsize(sp))>1) 253 { 254 memmove(dp, sp, len); 255 dp += len; 256 sp += len; 257 continue; 258 } 259 #endif /* SHOPT_MULTIBYTE */ 260 sp++; 261 if(c == '\\') 262 c = *sp++; 263 if(c) 264 *dp++ = c; 265 } 266 *dp = 0; 267 } 268 } 269 270 /* 271 * copy <str1> to <str2> changing upper case to lower case 272 * <str2> must be big enough to hold <str1> 273 * <str1> and <str2> may point to the same place. 274 */ 275 276 void sh_utol(register char const *str1,register char *str2) 277 /*@ 278 assume str1!=0 && str2!=0 279 return x satisfying strlen(in str1)==strlen(in str2); 280 @*/ 281 { 282 register int c; 283 for(; c= *((unsigned char*)str1); str1++,str2++) 284 { 285 if(isupper(c)) 286 *str2 = tolower(c); 287 else 288 *str2 = c; 289 } 290 *str2 = 0; 291 } 292 293 /* 294 * format string as a csv field 295 */ 296 static char *sh_fmtcsv(const char *string) 297 { 298 register const char *cp = string; 299 register int c; 300 int offset; 301 if(!cp) 302 return((char*)0); 303 offset = staktell(); 304 while((c=mbchar(cp)),isaname(c)); 305 if(c==0) 306 return((char*)string); 307 stakputc('"'); 308 stakwrite(string,cp-string); 309 if(c=='"') 310 stakputc('"'); 311 string = cp; 312 while(c=mbchar(cp)) 313 { 314 if(c=='"') 315 { 316 stakwrite(string,cp-string); 317 string = cp; 318 stakputc('"'); 319 } 320 } 321 if(--cp>string) 322 stakwrite(string,cp-string); 323 stakputc('"'); 324 stakputc(0); 325 return(stakptr(offset)); 326 } 327 328 /* 329 * print <str> quoting chars so that it can be read by the shell 330 * puts null terminated result on stack, but doesn't freeze it 331 */ 332 char *sh_fmtq(const char *string) 333 { 334 register const char *cp = string, *op; 335 register int c, state; 336 int offset; 337 if(!cp) 338 return((char*)0); 339 #if SHOPT_MULTIBYTE 340 mbinit(); 341 #endif 342 offset = staktell(); 343 state = ((c= mbchar(cp))==0); 344 if(isaletter(c)) 345 { 346 while((c=mbchar(cp)),isaname(c)); 347 if(c==0) 348 return((char*)string); 349 if(c=='=') 350 { 351 if(*cp==0) 352 return((char*)string); 353 if(*cp=='=') 354 cp++; 355 c = cp - string; 356 stakwrite(string,c); 357 string = cp; 358 c = mbchar(cp); 359 } 360 } 361 if(c==0 || c=='#' || c=='~') 362 state = 1; 363 for(;c;c= mbchar(cp)) 364 { 365 #if SHOPT_MULTIBYTE 366 if(c=='\'' || c>=128 || c<0 || !iswprint(c)) 367 #else 368 if(c=='\'' || !isprint(c)) 369 #endif /* SHOPT_MULTIBYTE */ 370 state = 2; 371 else if(c==']' || c=='=' || (c!=':' && c<=0x7f && (c=sh_lexstates[ST_NORM][c]) && c!=S_EPAT)) 372 state |=1; 373 } 374 if(state<2) 375 { 376 if(state==1) 377 stakputc('\''); 378 if(c = --cp - string) 379 stakwrite(string,c); 380 if(state==1) 381 stakputc('\''); 382 } 383 else 384 { 385 int isbyte=0; 386 stakwrite("$'",2); 387 cp = string; 388 #if SHOPT_MULTIBYTE 389 while(op = cp, c= mbchar(cp)) 390 #else 391 while(op = cp, c= *(unsigned char*)cp++) 392 #endif 393 { 394 state=1; 395 switch(c) 396 { 397 case ('a'==97?'\033':39): 398 c = 'E'; 399 break; 400 case '\n': 401 c = 'n'; 402 break; 403 case '\r': 404 c = 'r'; 405 break; 406 case '\t': 407 c = 't'; 408 break; 409 case '\f': 410 c = 'f'; 411 break; 412 case '\b': 413 c = 'b'; 414 break; 415 case '\a': 416 c = 'a'; 417 break; 418 case '\\': case '\'': 419 break; 420 default: 421 #if SHOPT_MULTIBYTE 422 isbyte = 0; 423 if(c<0) 424 { 425 c = *((unsigned char *)op); 426 cp = op+1; 427 isbyte = 1; 428 } 429 if(mbwide() && ((cp-op)>1)) 430 { 431 sfprintf(staksp,"\\u[%x]",c); 432 continue; 433 } 434 else if(!iswprint(c) || isbyte) 435 #else 436 if(!isprint(c)) 437 #endif 438 { 439 sfprintf(staksp,"\\x%.2x",c); 440 continue; 441 } 442 state=0; 443 break; 444 } 445 if(state) 446 { 447 stakputc('\\'); 448 stakputc(c); 449 } 450 else 451 stakwrite(op, cp-op); 452 } 453 stakputc('\''); 454 } 455 stakputc(0); 456 return(stakptr(offset)); 457 } 458 459 /* 460 * print <str> quoting chars so that it can be read by the shell 461 * puts null terminated result on stack, but doesn't freeze it 462 * single!=0 limits quoting to '...' 463 * fold>0 prints raw newlines and inserts appropriately 464 * escaped newlines every (fold-x) chars 465 */ 466 char *sh_fmtqf(const char *string, int single, int fold) 467 { 468 register const char *cp = string; 469 register const char *bp; 470 register const char *vp; 471 register int c; 472 register int n; 473 register int q; 474 register int a; 475 int offset; 476 477 if (--fold < 8) 478 fold = 0; 479 if(single) 480 return sh_fmtcsv(cp); 481 if (!cp || !*cp || !fold || fold && strlen(string) < fold) 482 return sh_fmtq(cp); 483 offset = staktell(); 484 single = single ? 1 : 3; 485 c = mbchar(string); 486 a = isaletter(c) ? '=' : 0; 487 vp = cp + 1; 488 do 489 { 490 q = 0; 491 n = fold; 492 bp = cp; 493 while ((!n || n-- > 0) && (c = mbchar(cp))) 494 { 495 if (a && !isaname(c)) 496 a = 0; 497 #if SHOPT_MULTIBYTE 498 if (c >= 0x200) 499 continue; 500 if (c == '\'' || !iswprint(c)) 501 #else 502 if (c == '\'' || !isprint(c)) 503 #endif /* SHOPT_MULTIBYTE */ 504 { 505 q = single; 506 break; 507 } 508 if (c == '\n') 509 q = 1; 510 else if (c == a) 511 { 512 stakwrite(bp, cp - bp); 513 bp = cp; 514 vp = cp + 1; 515 a = 0; 516 } 517 else if ((c == '#' || c == '~') && cp == vp || c == ']' || c != ':' && (c = sh_lexstates[ST_NORM][c]) && c != S_EPAT) 518 q = 1; 519 } 520 if (q & 2) 521 { 522 stakputc('$'); 523 stakputc('\''); 524 cp = bp; 525 n = fold - 3; 526 q = 1; 527 while (c = mbchar(cp)) 528 { 529 switch (c) 530 { 531 case ('a'==97?'\033':39): 532 c = 'E'; 533 break; 534 case '\n': 535 q = 0; 536 n = fold - 1; 537 break; 538 case '\r': 539 c = 'r'; 540 break; 541 case '\t': 542 c = 't'; 543 break; 544 case '\f': 545 c = 'f'; 546 break; 547 case '\b': 548 c = 'b'; 549 break; 550 case '\a': 551 c = 'a'; 552 break; 553 case '\\': 554 if (*cp == 'n') 555 { 556 c = '\n'; 557 q = 0; 558 n = fold - 1; 559 break; 560 } 561 case '\'': 562 break; 563 default: 564 #if SHOPT_MULTIBYTE 565 if(!iswprint(c)) 566 #else 567 if(!isprint(c)) 568 #endif 569 { 570 if ((n -= 4) <= 0) 571 { 572 stakwrite("'\\\n$'", 5); 573 n = fold - 7; 574 } 575 sfprintf(staksp, "\\%03o", c); 576 continue; 577 } 578 q = 0; 579 break; 580 } 581 if ((n -= q + 1) <= 0) 582 { 583 if (!q) 584 { 585 stakputc('\''); 586 cp = bp; 587 break; 588 } 589 stakwrite("'\\\n$'", 5); 590 n = fold - 5; 591 } 592 if (q) 593 stakputc('\\'); 594 else 595 q = 1; 596 stakputc(c); 597 bp = cp; 598 } 599 if (!c) 600 stakputc('\''); 601 } 602 else if (q & 1) 603 { 604 stakputc('\''); 605 cp = bp; 606 n = fold ? (fold - 2) : 0; 607 while (c = mbchar(cp)) 608 { 609 if (c == '\n') 610 n = fold - 1; 611 else if (n && --n <= 0) 612 { 613 n = fold - 2; 614 stakwrite(bp, --cp - bp); 615 bp = cp; 616 stakwrite("'\\\n'", 4); 617 } 618 else if (n == 1 && *cp == '\'') 619 { 620 n = fold - 5; 621 stakwrite(bp, --cp - bp); 622 bp = cp; 623 stakwrite("'\\\n\\''", 6); 624 } 625 else if (c == '\'') 626 { 627 stakwrite(bp, cp - bp - 1); 628 bp = cp; 629 if (n && (n -= 4) <= 0) 630 { 631 n = fold - 5; 632 stakwrite("'\\\n\\''", 6); 633 } 634 else 635 stakwrite("'\\''", 4); 636 } 637 } 638 stakwrite(bp, cp - bp - 1); 639 stakputc('\''); 640 } 641 else if (n = fold) 642 { 643 cp = bp; 644 while (c = mbchar(cp)) 645 { 646 if (--n <= 0) 647 { 648 n = fold; 649 stakwrite(bp, --cp - bp); 650 bp = cp; 651 stakwrite("\\\n", 2); 652 } 653 } 654 stakwrite(bp, cp - bp - 1); 655 } 656 else 657 stakwrite(bp, cp - bp); 658 if (c) 659 { 660 stakputc('\\'); 661 stakputc('\n'); 662 } 663 } while (c); 664 stakputc(0); 665 return(stakptr(offset)); 666 } 667 668 #if SHOPT_MULTIBYTE 669 int sh_strchr(const char *string, register const char *dp) 670 { 671 wchar_t c, d; 672 register const char *cp=string; 673 mbinit(); 674 d = mbchar(dp); 675 mbinit(); 676 while(c = mbchar(cp)) 677 { 678 if(c==d) 679 return(cp-string); 680 } 681 if(d==0) 682 return(cp-string); 683 return(-1); 684 } 685 #endif /* SHOPT_MULTIBYTE */ 686 687 const char *_sh_translate(const char *message) 688 { 689 #if ERROR_VERSION >= 20000317L 690 return(ERROR_translate(0,0,e_dict,message)); 691 #else 692 #if ERROR_VERSION >= 20000101L 693 return(ERROR_translate(e_dict,message)); 694 #else 695 return(ERROR_translate(message,1)); 696 #endif 697 #endif 698 } 699 700 /* 701 * change '['identifier']' to identifier 702 * character before <str> must be a '[' 703 * returns pointer to last character 704 */ 705 char *sh_checkid(char *str, char *last) 706 { 707 register unsigned char *cp = (unsigned char*)str; 708 register unsigned char *v = cp; 709 register int c; 710 if(c=mbchar(cp),isaletter(c)) 711 while(c=mbchar(cp),isaname(c)); 712 if(c==']' && (!last || ((char*)cp==last))) 713 { 714 /* eliminate [ and ] */ 715 while(v < cp) 716 { 717 v[-1] = *v; 718 v++; 719 } 720 if(last) 721 last -=2; 722 else 723 { 724 while(*v) 725 { 726 v[-2] = *v; 727 v++; 728 } 729 v[-2] = 0; 730 last = (char*)v; 731 } 732 } 733 return(last); 734 } 735 736 #if _AST_VERSION <= 20000317L 737 char *fmtident(const char *string) 738 { 739 return((char*)string); 740 } 741 #endif 742