1 /* 2 * tc.str.c: Short string package 3 * This has been a lesson of how to write buggy code! 4 */ 5 /*- 6 * Copyright (c) 1980, 1991 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 #include "sh.h" 34 35 #include <assert.h> 36 #include <limits.h> 37 38 #define MALLOC_INCR 128 39 #ifdef WIDE_STRINGS 40 #define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */ 41 #else 42 #define MALLOC_SURPLUS 0 43 #endif 44 45 #ifdef WIDE_STRINGS 46 size_t 47 one_mbtowc(Char *pwc, const char *s, size_t n) 48 { 49 int len; 50 51 len = rt_mbtowc(pwc, s, n); 52 if (len == -1) { 53 reset_mbtowc(); 54 *pwc = (unsigned char)*s | INVALID_BYTE; 55 } 56 if (len <= 0) 57 len = 1; 58 return len; 59 } 60 61 size_t 62 one_wctomb(char *s, Char wchar) 63 { 64 int len; 65 66 #if INVALID_BYTE != 0 67 if ((wchar & INVALID_BYTE) == INVALID_BYTE) { /* wchar >= INVALID_BYTE */ 68 /* invalid char 69 * exmaple) 70 * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */ 71 *s = (char)wchar; 72 len = 1; 73 #else 74 if (wchar & (CHAR & INVALID_BYTE)) { 75 s[0] = wchar & (CHAR & 0xFF); 76 len = 1; 77 #endif 78 } else { 79 #if INVALID_BYTE != 0 80 wchar &= MAX_UTF32; 81 #else 82 wchar &= CHAR; 83 #endif 84 #ifdef UTF16_STRINGS 85 if (wchar >= 0x10000) { 86 /* UTF-16 systems can't handle these values directly in calls to 87 wctomb. Convert value to UTF-16 surrogate and call wcstombs to 88 convert the "string" to the correct multibyte representation, 89 if any. */ 90 wchar_t ws[3]; 91 wchar -= 0x10000; 92 ws[0] = 0xd800 | (wchar >> 10); 93 ws[1] = 0xdc00 | (wchar & 0x3ff); 94 ws[2] = 0; 95 /* The return value of wcstombs excludes the trailing 0, so len is 96 the correct number of multibytes for the Unicode char. */ 97 len = wcstombs (s, ws, MB_CUR_MAX + 1); 98 } else 99 #endif 100 len = wctomb(s, (wchar_t) wchar); 101 if (len == -1) 102 s[0] = wchar; 103 if (len <= 0) 104 len = 1; 105 } 106 return len; 107 } 108 109 int 110 rt_mbtowc(Char *pwc, const char *s, size_t n) 111 { 112 int ret; 113 char back[MB_LEN_MAX]; 114 wchar_t tmp; 115 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) 116 # if defined(AUTOSET_KANJI) 117 static mbstate_t mb_zero, mb; 118 /* 119 * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII! 120 */ 121 if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') && 122 !memcmp(&mb, &mb_zero, sizeof(mb))) 123 { 124 *pwc = *s; 125 return 1; 126 } 127 # else 128 mbstate_t mb; 129 # endif 130 131 memset (&mb, 0, sizeof mb); 132 ret = mbrtowc(&tmp, s, n, &mb); 133 #else 134 ret = mbtowc(&tmp, s, n); 135 #endif 136 if (ret > 0) { 137 *pwc = tmp; 138 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) 139 if (tmp >= 0xd800 && tmp <= 0xdbff) { 140 /* UTF-16 surrogate pair. Fetch second half and compute 141 UTF-32 value. Dispense with the inverse test in this case. */ 142 size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb); 143 if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) 144 ret = -1; 145 else { 146 *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000; 147 ret += n2; 148 } 149 } else 150 #endif 151 if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0) 152 ret = -1; 153 154 } else if (ret == -2) 155 ret = -1; 156 else if (ret == 0) 157 *pwc = '\0'; 158 159 return ret; 160 } 161 #endif 162 163 #ifdef SHORT_STRINGS 164 Char ** 165 blk2short(char **src) 166 { 167 size_t n; 168 Char **sdst, **dst; 169 170 /* 171 * Count 172 */ 173 for (n = 0; src[n] != NULL; n++) 174 continue; 175 sdst = dst = xmalloc((n + 1) * sizeof(Char *)); 176 177 for (; *src != NULL; src++) 178 *dst++ = SAVE(*src); 179 *dst = NULL; 180 return (sdst); 181 } 182 183 char ** 184 short2blk(Char **src) 185 { 186 size_t n; 187 char **sdst, **dst; 188 189 /* 190 * Count 191 */ 192 for (n = 0; src[n] != NULL; n++) 193 continue; 194 sdst = dst = xmalloc((n + 1) * sizeof(char *)); 195 196 for (; *src != NULL; src++) 197 *dst++ = strsave(short2str(*src)); 198 *dst = NULL; 199 return (sdst); 200 } 201 202 Char * 203 str2short(const char *src) 204 { 205 static struct Strbuf buf; /* = Strbuf_INIT; */ 206 207 if (src == NULL) 208 return (NULL); 209 210 buf.len = 0; 211 while (*src) { 212 Char wc; 213 214 src += one_mbtowc(&wc, src, MB_LEN_MAX); 215 Strbuf_append1(&buf, wc); 216 } 217 Strbuf_terminate(&buf); 218 return buf.s; 219 } 220 221 char * 222 short2str(const Char *src) 223 { 224 static char *sdst = NULL; 225 static size_t dstsize = 0; 226 char *dst, *edst; 227 228 if (src == NULL) 229 return (NULL); 230 231 if (sdst == NULL) { 232 dstsize = MALLOC_INCR; 233 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); 234 } 235 dst = sdst; 236 edst = &dst[dstsize]; 237 while (*src) { 238 dst += one_wctomb(dst, *src); 239 src++; 240 if (dst >= edst) { 241 char *wdst = dst; 242 char *wedst = edst; 243 244 dstsize += MALLOC_INCR; 245 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); 246 edst = &sdst[dstsize]; 247 dst = &edst[-MALLOC_INCR]; 248 while (wdst > wedst) { 249 dst++; 250 wdst--; 251 } 252 } 253 } 254 *dst = 0; 255 return (sdst); 256 } 257 258 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS) 259 Char * 260 s_strcpy(Char *dst, const Char *src) 261 { 262 Char *sdst; 263 264 sdst = dst; 265 while ((*dst++ = *src++) != '\0') 266 continue; 267 return (sdst); 268 } 269 270 Char * 271 s_strncpy(Char *dst, const Char *src, size_t n) 272 { 273 Char *sdst; 274 275 if (n == 0) 276 return(dst); 277 278 sdst = dst; 279 do 280 if ((*dst++ = *src++) == '\0') { 281 while (--n != 0) 282 *dst++ = '\0'; 283 return(sdst); 284 } 285 while (--n != 0); 286 return (sdst); 287 } 288 289 Char * 290 s_strcat(Char *dst, const Char *src) 291 { 292 Strcpy(Strend(dst), src); 293 return dst; 294 } 295 296 #ifdef NOTUSED 297 Char * 298 s_strncat(Char *dst, const Char *src, size_t n) 299 { 300 Char *sdst; 301 302 if (n == 0) 303 return (dst); 304 305 sdst = dst; 306 307 while (*dst) 308 dst++; 309 310 do 311 if ((*dst++ = *src++) == '\0') 312 return(sdst); 313 while (--n != 0) 314 continue; 315 316 *dst = '\0'; 317 return (sdst); 318 } 319 320 #endif 321 322 Char * 323 s_strchr(const Char *str, int ch) 324 { 325 do 326 if (*str == ch) 327 return ((Char *)(intptr_t)str); 328 while (*str++); 329 return (NULL); 330 } 331 332 Char * 333 s_strrchr(const Char *str, int ch) 334 { 335 const Char *rstr; 336 337 rstr = NULL; 338 do 339 if (*str == ch) 340 rstr = str; 341 while (*str++); 342 return ((Char *)(intptr_t)rstr); 343 } 344 345 size_t 346 s_strlen(const Char *str) 347 { 348 size_t n; 349 350 for (n = 0; *str++; n++) 351 continue; 352 return (n); 353 } 354 355 int 356 s_strcmp(const Char *str1, const Char *str2) 357 { 358 for (; *str1 && *str1 == *str2; str1++, str2++) 359 continue; 360 /* 361 * The following case analysis is necessary so that characters which look 362 * negative collate low against normal characters but high against the 363 * end-of-string NUL. 364 */ 365 if (*str1 == '\0' && *str2 == '\0') 366 return (0); 367 else if (*str1 == '\0') 368 return (-1); 369 else if (*str2 == '\0') 370 return (1); 371 else 372 return (*str1 - *str2); 373 } 374 375 int 376 s_strncmp(const Char *str1, const Char *str2, size_t n) 377 { 378 if (n == 0) 379 return (0); 380 do { 381 if (*str1 != *str2) { 382 /* 383 * The following case analysis is necessary so that characters 384 * which look negative collate low against normal characters 385 * but high against the end-of-string NUL. 386 */ 387 if (*str1 == '\0') 388 return (-1); 389 else if (*str2 == '\0') 390 return (1); 391 else 392 return (*str1 - *str2); 393 } 394 if (*str1 == '\0') 395 return(0); 396 str1++, str2++; 397 } while (--n != 0); 398 return(0); 399 } 400 #endif /* not WIDE_STRINGS */ 401 402 int 403 s_strcasecmp(const Char *str1, const Char *str2) 404 { 405 #ifdef WIDE_STRINGS 406 wint_t l1 = 0, l2 = 0; 407 for (; *str1; str1++, str2++) 408 if (*str1 == *str2) 409 l1 = l2 = 0; 410 else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2))) 411 break; 412 #else 413 unsigned char l1 = 0, l2 = 0; 414 for (; *str1; str1++, str2++) 415 if (*str1 == *str2) 416 l1 = l2 = 0; 417 else if ((l1 = tolower((unsigned char)*str1)) != 418 (l2 = tolower((unsigned char)*str2))) 419 break; 420 #endif 421 /* 422 * The following case analysis is necessary so that characters which look 423 * negative collate low against normal characters but high against the 424 * end-of-string NUL. 425 */ 426 if (*str1 == '\0' && *str2 == '\0') 427 return (0); 428 else if (*str1 == '\0') 429 return (-1); 430 else if (*str2 == '\0') 431 return (1); 432 else if (l1 == l2) /* They are zero when they are equal */ 433 return (*str1 - *str2); 434 else 435 return (l1 - l2); 436 } 437 438 Char * 439 s_strnsave(const Char *s, size_t len) 440 { 441 Char *n; 442 443 n = xmalloc((len + 1) * sizeof (*n)); 444 memcpy(n, s, len * sizeof (*n)); 445 n[len] = '\0'; 446 return n; 447 } 448 449 Char * 450 s_strsave(const Char *s) 451 { 452 Char *n; 453 size_t size; 454 455 if (s == NULL) 456 s = STRNULL; 457 size = (Strlen(s) + 1) * sizeof(*n); 458 n = xmalloc(size); 459 memcpy(n, s, size); 460 return (n); 461 } 462 463 Char * 464 s_strspl(const Char *cp, const Char *dp) 465 { 466 Char *res, *ep; 467 const Char *p, *q; 468 469 if (!cp) 470 cp = STRNULL; 471 if (!dp) 472 dp = STRNULL; 473 for (p = cp; *p++;) 474 continue; 475 for (q = dp; *q++;) 476 continue; 477 res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char)); 478 for (ep = res, q = cp; (*ep++ = *q++) != '\0';) 479 continue; 480 for (ep--, q = dp; (*ep++ = *q++) != '\0';) 481 continue; 482 return (res); 483 } 484 485 Char * 486 s_strend(const Char *cp) 487 { 488 if (!cp) 489 return ((Char *)(intptr_t) cp); 490 while (*cp) 491 cp++; 492 return ((Char *)(intptr_t) cp); 493 } 494 495 Char * 496 s_strstr(const Char *s, const Char *t) 497 { 498 do { 499 const Char *ss = s; 500 const Char *tt = t; 501 502 do 503 if (*tt == '\0') 504 return ((Char *)(intptr_t) s); 505 while (*ss++ == *tt++); 506 } while (*s++ != '\0'); 507 return (NULL); 508 } 509 510 #else /* !SHORT_STRINGS */ 511 char * 512 caching_strip(const char *s) 513 { 514 static char *buf = NULL; 515 static size_t buf_size = 0; 516 size_t size; 517 518 if (s == NULL) 519 return NULL; 520 size = strlen(s) + 1; 521 if (buf_size < size) { 522 buf = xrealloc(buf, size); 523 buf_size = size; 524 } 525 memcpy(buf, s, size); 526 strip(buf); 527 return buf; 528 } 529 #endif 530 531 char * 532 short2qstr(const Char *src) 533 { 534 static char *sdst = NULL; 535 static size_t dstsize = 0; 536 char *dst, *edst; 537 538 if (src == NULL) 539 return (NULL); 540 541 if (sdst == NULL) { 542 dstsize = MALLOC_INCR; 543 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); 544 } 545 dst = sdst; 546 edst = &dst[dstsize]; 547 while (*src) { 548 if (*src & QUOTE) { 549 *dst++ = '\\'; 550 if (dst == edst) { 551 dstsize += MALLOC_INCR; 552 sdst = xrealloc(sdst, 553 (dstsize + MALLOC_SURPLUS) * sizeof(char)); 554 edst = &sdst[dstsize]; 555 dst = &edst[-MALLOC_INCR]; 556 } 557 } 558 dst += one_wctomb(dst, *src); 559 src++; 560 if (dst >= edst) { 561 ptrdiff_t i = dst - edst; 562 dstsize += MALLOC_INCR; 563 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); 564 edst = &sdst[dstsize]; 565 dst = &edst[-MALLOC_INCR + i]; 566 } 567 } 568 *dst = 0; 569 return (sdst); 570 } 571 572 struct blk_buf * 573 bb_alloc(void) 574 { 575 return xcalloc(1, sizeof(struct blk_buf)); 576 } 577 578 static void 579 bb_store(struct blk_buf *bb, Char *str) 580 { 581 if (bb->len == bb->size) { /* Keep space for terminating NULL */ 582 if (bb->size == 0) 583 bb->size = 16; /* Arbitrary */ 584 else 585 bb->size *= 2; 586 bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec)); 587 } 588 bb->vec[bb->len] = str; 589 } 590 591 void 592 bb_append(struct blk_buf *bb, Char *str) 593 { 594 bb_store(bb, str); 595 bb->len++; 596 } 597 598 void 599 bb_cleanup(void *xbb) 600 { 601 struct blk_buf *bb; 602 size_t i; 603 604 bb = (struct blk_buf *)xbb; 605 if (bb->vec) { 606 for (i = 0; i < bb->len; i++) 607 xfree(bb->vec[i]); 608 xfree(bb->vec); 609 } 610 bb->vec = NULL; 611 bb->len = 0; 612 } 613 614 void 615 bb_free(void *bb) 616 { 617 bb_cleanup(bb); 618 xfree(bb); 619 } 620 621 Char ** 622 bb_finish(struct blk_buf *bb) 623 { 624 bb_store(bb, NULL); 625 return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec)); 626 } 627 628 #define DO_STRBUF(STRBUF, CHAR, STRLEN) \ 629 \ 630 struct STRBUF * \ 631 STRBUF##_alloc(void) \ 632 { \ 633 return xcalloc(1, sizeof(struct STRBUF)); \ 634 } \ 635 \ 636 static void \ 637 STRBUF##_store1(struct STRBUF *buf, CHAR c) \ 638 { \ 639 if (buf->size == buf->len) { \ 640 if (buf->size == 0) \ 641 buf->size = 64; /* Arbitrary */ \ 642 else \ 643 buf->size *= 2; \ 644 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ 645 } \ 646 assert(buf->s); \ 647 buf->s[buf->len] = c; \ 648 } \ 649 \ 650 /* Like strbuf_append1(buf, '\0'), but don't advance len */ \ 651 void \ 652 STRBUF##_terminate(struct STRBUF *buf) \ 653 { \ 654 STRBUF##_store1(buf, '\0'); \ 655 } \ 656 \ 657 void \ 658 STRBUF##_append1(struct STRBUF *buf, CHAR c) \ 659 { \ 660 STRBUF##_store1(buf, c); \ 661 buf->len++; \ 662 } \ 663 \ 664 void \ 665 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \ 666 { \ 667 if (buf->size < buf->len + len) { \ 668 if (buf->size == 0) \ 669 buf->size = 64; /* Arbitrary */ \ 670 while (buf->size < buf->len + len) \ 671 buf->size *= 2; \ 672 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ 673 } \ 674 memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \ 675 buf->len += len; \ 676 } \ 677 \ 678 void \ 679 STRBUF##_append(struct STRBUF *buf, const CHAR *s) \ 680 { \ 681 STRBUF##_appendn(buf, s, STRLEN(s)); \ 682 } \ 683 \ 684 CHAR * \ 685 STRBUF##_finish(struct STRBUF *buf) \ 686 { \ 687 STRBUF##_append1(buf, 0); \ 688 return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \ 689 } \ 690 \ 691 void \ 692 STRBUF##_cleanup(void *xbuf) \ 693 { \ 694 struct STRBUF *buf; \ 695 \ 696 buf = xbuf; \ 697 xfree(buf->s); \ 698 } \ 699 \ 700 void \ 701 STRBUF##_free(void *xbuf) \ 702 { \ 703 STRBUF##_cleanup(xbuf); \ 704 xfree(xbuf); \ 705 } \ 706 \ 707 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */ 708 709 DO_STRBUF(strbuf, char, strlen); 710 DO_STRBUF(Strbuf, Char, Strlen); 711