1 /* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $ */ 2 /* 3 * tc.str.c: Short string package 4 * This has been a lesson of how to write buggy code! 5 */ 6 /*- 7 * Copyright (c) 1980, 1991 The Regents of the University of California. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include "sh.h" 35 36 #include <assert.h> 37 #include <limits.h> 38 39 RCSID("$tcsh: tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $") 40 41 #define MALLOC_INCR 128 42 #ifdef WIDE_STRINGS 43 #define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */ 44 #else 45 #define MALLOC_SURPLUS 0 46 #endif 47 48 #ifdef WIDE_STRINGS 49 size_t 50 one_mbtowc(Char *pwc, const char *s, size_t n) 51 { 52 int len; 53 54 len = rt_mbtowc(pwc, s, n); 55 if (len == -1) { 56 reset_mbtowc(); 57 *pwc = (unsigned char)*s | INVALID_BYTE; 58 } 59 if (len <= 0) 60 len = 1; 61 return len; 62 } 63 64 size_t 65 one_wctomb(char *s, Char wchar) 66 { 67 int len; 68 69 if (wchar & INVALID_BYTE) { 70 s[0] = wchar & 0xFF; 71 len = 1; 72 } else { 73 #ifdef UTF16_STRINGS 74 if (wchar >= 0x10000) { 75 /* UTF-16 systems can't handle these values directly in calls to 76 wctomb. Convert value to UTF-16 surrogate and call wcstombs to 77 convert the "string" to the correct multibyte representation, 78 if any. */ 79 wchar_t ws[3]; 80 wchar -= 0x10000; 81 ws[0] = 0xd800 | (wchar >> 10); 82 ws[1] = 0xdc00 | (wchar & 0x3ff); 83 ws[2] = 0; 84 /* The return value of wcstombs excludes the trailing 0, so len is 85 the correct number of multibytes for the Unicode char. */ 86 len = wcstombs (s, ws, MB_CUR_MAX + 1); 87 } else 88 #endif 89 len = wctomb(s, (wchar_t) wchar); 90 if (len == -1) 91 s[0] = wchar; 92 if (len <= 0) 93 len = 1; 94 } 95 return len; 96 } 97 98 int 99 rt_mbtowc(Char *pwc, const char *s, size_t n) 100 { 101 int ret; 102 char back[MB_LEN_MAX]; 103 wchar_t tmp; 104 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) 105 # if defined(AUTOSET_KANJI) 106 static mbstate_t mb_zero, mb; 107 /* 108 * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII! 109 */ 110 if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') && 111 !memcmp(&mb, &mb_zero, sizeof(mb))) 112 { 113 *pwc = *s; 114 return 1; 115 } 116 # else 117 mbstate_t mb; 118 # endif 119 120 memset (&mb, 0, sizeof mb); 121 ret = mbrtowc(&tmp, s, n, &mb); 122 #else 123 ret = mbtowc(&tmp, s, n); 124 #endif 125 if (ret > 0) { 126 *pwc = tmp; 127 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) 128 if (tmp >= 0xd800 && tmp <= 0xdbff) { 129 /* UTF-16 surrogate pair. Fetch second half and compute 130 UTF-32 value. Dispense with the inverse test in this case. */ 131 size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb); 132 if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) 133 ret = -1; 134 else { 135 *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000; 136 ret += n2; 137 } 138 } else 139 #endif 140 if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0) 141 ret = -1; 142 143 } else if (ret == -2) 144 ret = -1; 145 else if (ret == 0) 146 *pwc = '\0'; 147 148 return ret; 149 } 150 #endif 151 152 #ifdef SHORT_STRINGS 153 Char ** 154 blk2short(char **src) 155 { 156 size_t n; 157 Char **sdst, **dst; 158 159 /* 160 * Count 161 */ 162 for (n = 0; src[n] != NULL; n++) 163 continue; 164 sdst = dst = xmalloc((n + 1) * sizeof(Char *)); 165 166 for (; *src != NULL; src++) 167 *dst++ = SAVE(*src); 168 *dst = NULL; 169 return (sdst); 170 } 171 172 char ** 173 short2blk(Char **src) 174 { 175 size_t n; 176 char **sdst, **dst; 177 178 /* 179 * Count 180 */ 181 for (n = 0; src[n] != NULL; n++) 182 continue; 183 sdst = dst = xmalloc((n + 1) * sizeof(char *)); 184 185 for (; *src != NULL; src++) 186 *dst++ = strsave(short2str(*src)); 187 *dst = NULL; 188 return (sdst); 189 } 190 191 Char * 192 str2short(const char *src) 193 { 194 static struct Strbuf buf; /* = Strbuf_INIT; */ 195 196 if (src == NULL) 197 return (NULL); 198 199 buf.len = 0; 200 while (*src) { 201 Char wc; 202 203 src += one_mbtowc(&wc, src, MB_LEN_MAX); 204 Strbuf_append1(&buf, wc); 205 } 206 Strbuf_terminate(&buf); 207 return buf.s; 208 } 209 210 char * 211 short2str(const Char *src) 212 { 213 static char *sdst = NULL; 214 static size_t dstsize = 0; 215 char *dst, *edst; 216 217 if (src == NULL) 218 return (NULL); 219 220 if (sdst == NULL) { 221 dstsize = MALLOC_INCR; 222 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); 223 } 224 dst = sdst; 225 edst = &dst[dstsize]; 226 while (*src) { 227 dst += one_wctomb(dst, *src & CHAR); 228 src++; 229 if (dst >= edst) { 230 char *wdst = dst; 231 char *wedst = edst; 232 233 dstsize += MALLOC_INCR; 234 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); 235 edst = &sdst[dstsize]; 236 dst = &edst[-MALLOC_INCR]; 237 while (wdst > wedst) { 238 dst++; 239 wdst--; 240 } 241 } 242 } 243 *dst = 0; 244 return (sdst); 245 } 246 247 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS) 248 Char * 249 s_strcpy(Char *dst, const Char *src) 250 { 251 Char *sdst; 252 253 sdst = dst; 254 while ((*dst++ = *src++) != '\0') 255 continue; 256 return (sdst); 257 } 258 259 Char * 260 s_strncpy(Char *dst, const Char *src, size_t n) 261 { 262 Char *sdst; 263 264 if (n == 0) 265 return(dst); 266 267 sdst = dst; 268 do 269 if ((*dst++ = *src++) == '\0') { 270 while (--n != 0) 271 *dst++ = '\0'; 272 return(sdst); 273 } 274 while (--n != 0); 275 return (sdst); 276 } 277 278 Char * 279 s_strcat(Char *dst, const Char *src) 280 { 281 Strcpy(Strend(dst), src); 282 return dst; 283 } 284 285 #ifdef NOTUSED 286 Char * 287 s_strncat(Char *dst, const Char *src, size_t n) 288 { 289 Char *sdst; 290 291 if (n == 0) 292 return (dst); 293 294 sdst = dst; 295 296 while (*dst) 297 dst++; 298 299 do 300 if ((*dst++ = *src++) == '\0') 301 return(sdst); 302 while (--n != 0) 303 continue; 304 305 *dst = '\0'; 306 return (sdst); 307 } 308 309 #endif 310 311 Char * 312 s_strchr(const Char *str, int ch) 313 { 314 do 315 if (*str == ch) 316 return ((Char *)(intptr_t)str); 317 while (*str++); 318 return (NULL); 319 } 320 321 Char * 322 s_strrchr(const Char *str, int ch) 323 { 324 const Char *rstr; 325 326 rstr = NULL; 327 do 328 if (*str == ch) 329 rstr = str; 330 while (*str++); 331 return ((Char *)(intptr_t)rstr); 332 } 333 334 size_t 335 s_strlen(const Char *str) 336 { 337 size_t n; 338 339 for (n = 0; *str++; n++) 340 continue; 341 return (n); 342 } 343 344 int 345 s_strcmp(const Char *str1, const Char *str2) 346 { 347 for (; *str1 && *str1 == *str2; str1++, str2++) 348 continue; 349 /* 350 * The following case analysis is necessary so that characters which look 351 * negative collate low against normal characters but high against the 352 * end-of-string NUL. 353 */ 354 if (*str1 == '\0' && *str2 == '\0') 355 return (0); 356 else if (*str1 == '\0') 357 return (-1); 358 else if (*str2 == '\0') 359 return (1); 360 else 361 return (*str1 - *str2); 362 } 363 364 int 365 s_strncmp(const Char *str1, const Char *str2, size_t n) 366 { 367 if (n == 0) 368 return (0); 369 do { 370 if (*str1 != *str2) { 371 /* 372 * The following case analysis is necessary so that characters 373 * which look negative collate low against normal characters 374 * but high against the end-of-string NUL. 375 */ 376 if (*str1 == '\0') 377 return (-1); 378 else if (*str2 == '\0') 379 return (1); 380 else 381 return (*str1 - *str2); 382 } 383 if (*str1 == '\0') 384 return(0); 385 str1++, str2++; 386 } while (--n != 0); 387 return(0); 388 } 389 #endif /* not WIDE_STRINGS */ 390 391 int 392 s_strcasecmp(const Char *str1, const Char *str2) 393 { 394 #ifdef WIDE_STRINGS 395 wint_t l1 = 0, l2 = 0; 396 for (; *str1; str1++, str2++) 397 if (*str1 == *str2) 398 l1 = l2 = 0; 399 else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2))) 400 break; 401 #else 402 unsigned char l1 = 0, l2 = 0; 403 for (; *str1; str1++, str2++) 404 if (*str1 == *str2) 405 l1 = l2 = 0; 406 else if ((l1 = tolower((unsigned char)*str1)) != 407 (l2 = tolower((unsigned char)*str2))) 408 break; 409 #endif 410 /* 411 * The following case analysis is necessary so that characters which look 412 * negative collate low against normal characters but high against the 413 * end-of-string NUL. 414 */ 415 if (*str1 == '\0' && *str2 == '\0') 416 return (0); 417 else if (*str1 == '\0') 418 return (-1); 419 else if (*str2 == '\0') 420 return (1); 421 else if (l1 == l2) /* They are zero when they are equal */ 422 return (*str1 - *str2); 423 else 424 return (l1 - l2); 425 } 426 427 Char * 428 s_strnsave(const Char *s, size_t len) 429 { 430 Char *n; 431 432 n = xmalloc((len + 1) * sizeof (*n)); 433 memcpy(n, s, len * sizeof (*n)); 434 n[len] = '\0'; 435 return n; 436 } 437 438 Char * 439 s_strsave(const Char *s) 440 { 441 Char *n; 442 size_t size; 443 444 if (s == NULL) 445 s = STRNULL; 446 size = (Strlen(s) + 1) * sizeof(*n); 447 n = xmalloc(size); 448 memcpy(n, s, size); 449 return (n); 450 } 451 452 Char * 453 s_strspl(const Char *cp, const Char *dp) 454 { 455 Char *res, *ep; 456 const Char *p, *q; 457 458 if (!cp) 459 cp = STRNULL; 460 if (!dp) 461 dp = STRNULL; 462 for (p = cp; *p++;) 463 continue; 464 for (q = dp; *q++;) 465 continue; 466 res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char)); 467 for (ep = res, q = cp; (*ep++ = *q++) != '\0';) 468 continue; 469 for (ep--, q = dp; (*ep++ = *q++) != '\0';) 470 continue; 471 return (res); 472 } 473 474 Char * 475 s_strend(const Char *cp) 476 { 477 if (!cp) 478 return ((Char *)(intptr_t) cp); 479 while (*cp) 480 cp++; 481 return ((Char *)(intptr_t) cp); 482 } 483 484 Char * 485 s_strstr(const Char *s, const Char *t) 486 { 487 do { 488 const Char *ss = s; 489 const Char *tt = t; 490 491 do 492 if (*tt == '\0') 493 return ((Char *)(intptr_t) s); 494 while (*ss++ == *tt++); 495 } while (*s++ != '\0'); 496 return (NULL); 497 } 498 499 #else /* !SHORT_STRINGS */ 500 char * 501 caching_strip(const char *s) 502 { 503 static char *buf = NULL; 504 static size_t buf_size = 0; 505 size_t size; 506 507 if (s == NULL) 508 return NULL; 509 size = strlen(s) + 1; 510 if (buf_size < size) { 511 buf = xrealloc(buf, size); 512 buf_size = size; 513 } 514 memcpy(buf, s, size); 515 strip(buf); 516 return buf; 517 } 518 #endif 519 520 char * 521 short2qstr(const Char *src) 522 { 523 static char *sdst = NULL; 524 static size_t dstsize = 0; 525 char *dst, *edst; 526 527 if (src == NULL) 528 return (NULL); 529 530 if (sdst == NULL) { 531 dstsize = MALLOC_INCR; 532 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); 533 } 534 dst = sdst; 535 edst = &dst[dstsize]; 536 while (*src) { 537 if (*src & QUOTE) { 538 *dst++ = '\\'; 539 if (dst == edst) { 540 dstsize += MALLOC_INCR; 541 sdst = xrealloc(sdst, 542 (dstsize + MALLOC_SURPLUS) * sizeof(char)); 543 edst = &sdst[dstsize]; 544 dst = &edst[-MALLOC_INCR]; 545 } 546 } 547 dst += one_wctomb(dst, *src & CHAR); 548 src++; 549 if (dst >= edst) { 550 ptrdiff_t i = dst - edst; 551 dstsize += MALLOC_INCR; 552 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); 553 edst = &sdst[dstsize]; 554 dst = &edst[-MALLOC_INCR + i]; 555 } 556 } 557 *dst = 0; 558 return (sdst); 559 } 560 561 struct blk_buf * 562 bb_alloc() 563 { 564 return xcalloc(1, sizeof(struct blk_buf)); 565 } 566 567 static void 568 bb_store(struct blk_buf *bb, Char *str) 569 { 570 if (bb->len == bb->size) { /* Keep space for terminating NULL */ 571 if (bb->size == 0) 572 bb->size = 16; /* Arbitrary */ 573 else 574 bb->size *= 2; 575 bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec)); 576 } 577 bb->vec[bb->len] = str; 578 } 579 580 void 581 bb_append(struct blk_buf *bb, Char *str) 582 { 583 bb_store(bb, str); 584 bb->len++; 585 } 586 587 void 588 bb_cleanup(void *xbb) 589 { 590 struct blk_buf *bb; 591 size_t i; 592 593 bb = xbb; 594 for (i = 0; i < bb->len; i++) 595 xfree(bb->vec[i]); 596 xfree(bb->vec); 597 } 598 599 void 600 bb_free(void *bb) 601 { 602 bb_cleanup(bb); 603 xfree(bb); 604 } 605 606 Char ** 607 bb_finish(struct blk_buf *bb) 608 { 609 bb_store(bb, NULL); 610 return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec)); 611 } 612 613 #define DO_STRBUF(STRBUF, CHAR, STRLEN) \ 614 \ 615 struct STRBUF * \ 616 STRBUF##_alloc(void) \ 617 { \ 618 return xcalloc(1, sizeof(struct STRBUF)); \ 619 } \ 620 \ 621 static void \ 622 STRBUF##_store1(struct STRBUF *buf, CHAR c) \ 623 { \ 624 if (buf->size == buf->len) { \ 625 if (buf->size == 0) \ 626 buf->size = 64; /* Arbitrary */ \ 627 else \ 628 buf->size *= 2; \ 629 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ 630 } \ 631 assert(buf->s); \ 632 buf->s[buf->len] = c; \ 633 } \ 634 \ 635 /* Like strbuf_append1(buf, '\0'), but don't advance len */ \ 636 void \ 637 STRBUF##_terminate(struct STRBUF *buf) \ 638 { \ 639 STRBUF##_store1(buf, '\0'); \ 640 } \ 641 \ 642 void \ 643 STRBUF##_append1(struct STRBUF *buf, CHAR c) \ 644 { \ 645 STRBUF##_store1(buf, c); \ 646 buf->len++; \ 647 } \ 648 \ 649 void \ 650 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \ 651 { \ 652 if (buf->size < buf->len + len) { \ 653 if (buf->size == 0) \ 654 buf->size = 64; /* Arbitrary */ \ 655 while (buf->size < buf->len + len) \ 656 buf->size *= 2; \ 657 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ 658 } \ 659 memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \ 660 buf->len += len; \ 661 } \ 662 \ 663 void \ 664 STRBUF##_append(struct STRBUF *buf, const CHAR *s) \ 665 { \ 666 STRBUF##_appendn(buf, s, STRLEN(s)); \ 667 } \ 668 \ 669 CHAR * \ 670 STRBUF##_finish(struct STRBUF *buf) \ 671 { \ 672 STRBUF##_append1(buf, 0); \ 673 return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \ 674 } \ 675 \ 676 void \ 677 STRBUF##_cleanup(void *xbuf) \ 678 { \ 679 struct STRBUF *buf; \ 680 \ 681 buf = xbuf; \ 682 xfree(buf->s); \ 683 } \ 684 \ 685 void \ 686 STRBUF##_free(void *xbuf) \ 687 { \ 688 STRBUF##_cleanup(xbuf); \ 689 xfree(xbuf); \ 690 } \ 691 \ 692 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */ 693 694 DO_STRBUF(strbuf, char, strlen); 695 DO_STRBUF(Strbuf, Char, Strlen); 696