1 /* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $ */ 2 /* 3 * tc.str.c: Short string package 4 * This has been a lesson of how to write buggy code! 5 */ 6 /*- 7 * Copyright (c) 1980, 1991 The Regents of the University of California. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include "sh.h" 35 36 #include <assert.h> 37 #include <limits.h> 38 39 RCSID("$tcsh: tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $") 40 41 #define MALLOC_INCR 128 42 #ifdef WIDE_STRINGS 43 #define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */ 44 #else 45 #define MALLOC_SURPLUS 0 46 #endif 47 48 #ifdef WIDE_STRINGS 49 size_t 50 one_mbtowc(Char *pwc, const char *s, size_t n) 51 { 52 int len; 53 54 len = rt_mbtowc(pwc, s, n); 55 if (len == -1) { 56 reset_mbtowc(); 57 *pwc = (unsigned char)*s | INVALID_BYTE; 58 } 59 if (len <= 0) 60 len = 1; 61 return len; 62 } 63 64 size_t 65 one_wctomb(char *s, Char wchar) 66 { 67 int len; 68 69 #if INVALID_BYTE != 0 70 if ((wchar & INVALID_BYTE) == INVALID_BYTE) { /* wchar >= INVALID_BYTE */ 71 /* invalid char 72 * exmaple) 73 * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */ 74 *s = (char)wchar; 75 len = 1; 76 #else 77 if (wchar & (CHAR & INVALID_BYTE)) { 78 s[0] = wchar & (CHAR & 0xFF); 79 len = 1; 80 #endif 81 } else { 82 #if INVALID_BYTE != 0 83 wchar &= MAX_UTF32; 84 #else 85 wchar &= CHAR; 86 #endif 87 #ifdef UTF16_STRINGS 88 if (wchar >= 0x10000) { 89 /* UTF-16 systems can't handle these values directly in calls to 90 wctomb. Convert value to UTF-16 surrogate and call wcstombs to 91 convert the "string" to the correct multibyte representation, 92 if any. */ 93 wchar_t ws[3]; 94 wchar -= 0x10000; 95 ws[0] = 0xd800 | (wchar >> 10); 96 ws[1] = 0xdc00 | (wchar & 0x3ff); 97 ws[2] = 0; 98 /* The return value of wcstombs excludes the trailing 0, so len is 99 the correct number of multibytes for the Unicode char. */ 100 len = wcstombs (s, ws, MB_CUR_MAX + 1); 101 } else 102 #endif 103 len = wctomb(s, (wchar_t) wchar); 104 if (len == -1) 105 s[0] = wchar; 106 if (len <= 0) 107 len = 1; 108 } 109 return len; 110 } 111 112 int 113 rt_mbtowc(Char *pwc, const char *s, size_t n) 114 { 115 int ret; 116 char back[MB_LEN_MAX]; 117 wchar_t tmp; 118 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) 119 # if defined(AUTOSET_KANJI) 120 static mbstate_t mb_zero, mb; 121 /* 122 * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII! 123 */ 124 if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') && 125 !memcmp(&mb, &mb_zero, sizeof(mb))) 126 { 127 *pwc = *s; 128 return 1; 129 } 130 # else 131 mbstate_t mb; 132 # endif 133 134 memset (&mb, 0, sizeof mb); 135 ret = mbrtowc(&tmp, s, n, &mb); 136 #else 137 ret = mbtowc(&tmp, s, n); 138 #endif 139 if (ret > 0) { 140 *pwc = tmp; 141 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) 142 if (tmp >= 0xd800 && tmp <= 0xdbff) { 143 /* UTF-16 surrogate pair. Fetch second half and compute 144 UTF-32 value. Dispense with the inverse test in this case. */ 145 size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb); 146 if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) 147 ret = -1; 148 else { 149 *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000; 150 ret += n2; 151 } 152 } else 153 #endif 154 if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0) 155 ret = -1; 156 157 } else if (ret == -2) 158 ret = -1; 159 else if (ret == 0) 160 *pwc = '\0'; 161 162 return ret; 163 } 164 #endif 165 166 #ifdef SHORT_STRINGS 167 Char ** 168 blk2short(char **src) 169 { 170 size_t n; 171 Char **sdst, **dst; 172 173 /* 174 * Count 175 */ 176 for (n = 0; src[n] != NULL; n++) 177 continue; 178 sdst = dst = xmalloc((n + 1) * sizeof(Char *)); 179 180 for (; *src != NULL; src++) 181 *dst++ = SAVE(*src); 182 *dst = NULL; 183 return (sdst); 184 } 185 186 char ** 187 short2blk(Char **src) 188 { 189 size_t n; 190 char **sdst, **dst; 191 192 /* 193 * Count 194 */ 195 for (n = 0; src[n] != NULL; n++) 196 continue; 197 sdst = dst = xmalloc((n + 1) * sizeof(char *)); 198 199 for (; *src != NULL; src++) 200 *dst++ = strsave(short2str(*src)); 201 *dst = NULL; 202 return (sdst); 203 } 204 205 Char * 206 str2short(const char *src) 207 { 208 static struct Strbuf buf; /* = Strbuf_INIT; */ 209 210 if (src == NULL) 211 return (NULL); 212 213 buf.len = 0; 214 while (*src) { 215 Char wc; 216 217 src += one_mbtowc(&wc, src, MB_LEN_MAX); 218 Strbuf_append1(&buf, wc); 219 } 220 Strbuf_terminate(&buf); 221 return buf.s; 222 } 223 224 char * 225 short2str(const Char *src) 226 { 227 static char *sdst = NULL; 228 static size_t dstsize = 0; 229 char *dst, *edst; 230 231 if (src == NULL) 232 return (NULL); 233 234 if (sdst == NULL) { 235 dstsize = MALLOC_INCR; 236 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); 237 } 238 dst = sdst; 239 edst = &dst[dstsize]; 240 while (*src) { 241 dst += one_wctomb(dst, *src); 242 src++; 243 if (dst >= edst) { 244 char *wdst = dst; 245 char *wedst = edst; 246 247 dstsize += MALLOC_INCR; 248 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); 249 edst = &sdst[dstsize]; 250 dst = &edst[-MALLOC_INCR]; 251 while (wdst > wedst) { 252 dst++; 253 wdst--; 254 } 255 } 256 } 257 *dst = 0; 258 return (sdst); 259 } 260 261 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS) 262 Char * 263 s_strcpy(Char *dst, const Char *src) 264 { 265 Char *sdst; 266 267 sdst = dst; 268 while ((*dst++ = *src++) != '\0') 269 continue; 270 return (sdst); 271 } 272 273 Char * 274 s_strncpy(Char *dst, const Char *src, size_t n) 275 { 276 Char *sdst; 277 278 if (n == 0) 279 return(dst); 280 281 sdst = dst; 282 do 283 if ((*dst++ = *src++) == '\0') { 284 while (--n != 0) 285 *dst++ = '\0'; 286 return(sdst); 287 } 288 while (--n != 0); 289 return (sdst); 290 } 291 292 Char * 293 s_strcat(Char *dst, const Char *src) 294 { 295 Strcpy(Strend(dst), src); 296 return dst; 297 } 298 299 #ifdef NOTUSED 300 Char * 301 s_strncat(Char *dst, const Char *src, size_t n) 302 { 303 Char *sdst; 304 305 if (n == 0) 306 return (dst); 307 308 sdst = dst; 309 310 while (*dst) 311 dst++; 312 313 do 314 if ((*dst++ = *src++) == '\0') 315 return(sdst); 316 while (--n != 0) 317 continue; 318 319 *dst = '\0'; 320 return (sdst); 321 } 322 323 #endif 324 325 Char * 326 s_strchr(const Char *str, int ch) 327 { 328 do 329 if (*str == ch) 330 return ((Char *)(intptr_t)str); 331 while (*str++); 332 return (NULL); 333 } 334 335 Char * 336 s_strrchr(const Char *str, int ch) 337 { 338 const Char *rstr; 339 340 rstr = NULL; 341 do 342 if (*str == ch) 343 rstr = str; 344 while (*str++); 345 return ((Char *)(intptr_t)rstr); 346 } 347 348 size_t 349 s_strlen(const Char *str) 350 { 351 size_t n; 352 353 for (n = 0; *str++; n++) 354 continue; 355 return (n); 356 } 357 358 int 359 s_strcmp(const Char *str1, const Char *str2) 360 { 361 for (; *str1 && *str1 == *str2; str1++, str2++) 362 continue; 363 /* 364 * The following case analysis is necessary so that characters which look 365 * negative collate low against normal characters but high against the 366 * end-of-string NUL. 367 */ 368 if (*str1 == '\0' && *str2 == '\0') 369 return (0); 370 else if (*str1 == '\0') 371 return (-1); 372 else if (*str2 == '\0') 373 return (1); 374 else 375 return (*str1 - *str2); 376 } 377 378 int 379 s_strncmp(const Char *str1, const Char *str2, size_t n) 380 { 381 if (n == 0) 382 return (0); 383 do { 384 if (*str1 != *str2) { 385 /* 386 * The following case analysis is necessary so that characters 387 * which look negative collate low against normal characters 388 * but high against the end-of-string NUL. 389 */ 390 if (*str1 == '\0') 391 return (-1); 392 else if (*str2 == '\0') 393 return (1); 394 else 395 return (*str1 - *str2); 396 } 397 if (*str1 == '\0') 398 return(0); 399 str1++, str2++; 400 } while (--n != 0); 401 return(0); 402 } 403 #endif /* not WIDE_STRINGS */ 404 405 int 406 s_strcasecmp(const Char *str1, const Char *str2) 407 { 408 #ifdef WIDE_STRINGS 409 wint_t l1 = 0, l2 = 0; 410 for (; *str1; str1++, str2++) 411 if (*str1 == *str2) 412 l1 = l2 = 0; 413 else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2))) 414 break; 415 #else 416 unsigned char l1 = 0, l2 = 0; 417 for (; *str1; str1++, str2++) 418 if (*str1 == *str2) 419 l1 = l2 = 0; 420 else if ((l1 = tolower((unsigned char)*str1)) != 421 (l2 = tolower((unsigned char)*str2))) 422 break; 423 #endif 424 /* 425 * The following case analysis is necessary so that characters which look 426 * negative collate low against normal characters but high against the 427 * end-of-string NUL. 428 */ 429 if (*str1 == '\0' && *str2 == '\0') 430 return (0); 431 else if (*str1 == '\0') 432 return (-1); 433 else if (*str2 == '\0') 434 return (1); 435 else if (l1 == l2) /* They are zero when they are equal */ 436 return (*str1 - *str2); 437 else 438 return (l1 - l2); 439 } 440 441 Char * 442 s_strnsave(const Char *s, size_t len) 443 { 444 Char *n; 445 446 n = xmalloc((len + 1) * sizeof (*n)); 447 memcpy(n, s, len * sizeof (*n)); 448 n[len] = '\0'; 449 return n; 450 } 451 452 Char * 453 s_strsave(const Char *s) 454 { 455 Char *n; 456 size_t size; 457 458 if (s == NULL) 459 s = STRNULL; 460 size = (Strlen(s) + 1) * sizeof(*n); 461 n = xmalloc(size); 462 memcpy(n, s, size); 463 return (n); 464 } 465 466 Char * 467 s_strspl(const Char *cp, const Char *dp) 468 { 469 Char *res, *ep; 470 const Char *p, *q; 471 472 if (!cp) 473 cp = STRNULL; 474 if (!dp) 475 dp = STRNULL; 476 for (p = cp; *p++;) 477 continue; 478 for (q = dp; *q++;) 479 continue; 480 res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char)); 481 for (ep = res, q = cp; (*ep++ = *q++) != '\0';) 482 continue; 483 for (ep--, q = dp; (*ep++ = *q++) != '\0';) 484 continue; 485 return (res); 486 } 487 488 Char * 489 s_strend(const Char *cp) 490 { 491 if (!cp) 492 return ((Char *)(intptr_t) cp); 493 while (*cp) 494 cp++; 495 return ((Char *)(intptr_t) cp); 496 } 497 498 Char * 499 s_strstr(const Char *s, const Char *t) 500 { 501 do { 502 const Char *ss = s; 503 const Char *tt = t; 504 505 do 506 if (*tt == '\0') 507 return ((Char *)(intptr_t) s); 508 while (*ss++ == *tt++); 509 } while (*s++ != '\0'); 510 return (NULL); 511 } 512 513 #else /* !SHORT_STRINGS */ 514 char * 515 caching_strip(const char *s) 516 { 517 static char *buf = NULL; 518 static size_t buf_size = 0; 519 size_t size; 520 521 if (s == NULL) 522 return NULL; 523 size = strlen(s) + 1; 524 if (buf_size < size) { 525 buf = xrealloc(buf, size); 526 buf_size = size; 527 } 528 memcpy(buf, s, size); 529 strip(buf); 530 return buf; 531 } 532 #endif 533 534 char * 535 short2qstr(const Char *src) 536 { 537 static char *sdst = NULL; 538 static size_t dstsize = 0; 539 char *dst, *edst; 540 541 if (src == NULL) 542 return (NULL); 543 544 if (sdst == NULL) { 545 dstsize = MALLOC_INCR; 546 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); 547 } 548 dst = sdst; 549 edst = &dst[dstsize]; 550 while (*src) { 551 if (*src & QUOTE) { 552 *dst++ = '\\'; 553 if (dst == edst) { 554 dstsize += MALLOC_INCR; 555 sdst = xrealloc(sdst, 556 (dstsize + MALLOC_SURPLUS) * sizeof(char)); 557 edst = &sdst[dstsize]; 558 dst = &edst[-MALLOC_INCR]; 559 } 560 } 561 dst += one_wctomb(dst, *src); 562 src++; 563 if (dst >= edst) { 564 ptrdiff_t i = dst - edst; 565 dstsize += MALLOC_INCR; 566 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); 567 edst = &sdst[dstsize]; 568 dst = &edst[-MALLOC_INCR + i]; 569 } 570 } 571 *dst = 0; 572 return (sdst); 573 } 574 575 struct blk_buf * 576 bb_alloc(void) 577 { 578 return xcalloc(1, sizeof(struct blk_buf)); 579 } 580 581 static void 582 bb_store(struct blk_buf *bb, Char *str) 583 { 584 if (bb->len == bb->size) { /* Keep space for terminating NULL */ 585 if (bb->size == 0) 586 bb->size = 16; /* Arbitrary */ 587 else 588 bb->size *= 2; 589 bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec)); 590 } 591 bb->vec[bb->len] = str; 592 } 593 594 void 595 bb_append(struct blk_buf *bb, Char *str) 596 { 597 bb_store(bb, str); 598 bb->len++; 599 } 600 601 void 602 bb_cleanup(void *xbb) 603 { 604 struct blk_buf *bb; 605 size_t i; 606 607 bb = (struct blk_buf *)xbb; 608 if (bb->vec) { 609 for (i = 0; i < bb->len; i++) 610 xfree(bb->vec[i]); 611 xfree(bb->vec); 612 } 613 bb->vec = NULL; 614 bb->len = 0; 615 } 616 617 void 618 bb_free(void *bb) 619 { 620 bb_cleanup(bb); 621 xfree(bb); 622 } 623 624 Char ** 625 bb_finish(struct blk_buf *bb) 626 { 627 bb_store(bb, NULL); 628 return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec)); 629 } 630 631 #define DO_STRBUF(STRBUF, CHAR, STRLEN) \ 632 \ 633 struct STRBUF * \ 634 STRBUF##_alloc(void) \ 635 { \ 636 return xcalloc(1, sizeof(struct STRBUF)); \ 637 } \ 638 \ 639 static void \ 640 STRBUF##_store1(struct STRBUF *buf, CHAR c) \ 641 { \ 642 if (buf->size == buf->len) { \ 643 if (buf->size == 0) \ 644 buf->size = 64; /* Arbitrary */ \ 645 else \ 646 buf->size *= 2; \ 647 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ 648 } \ 649 assert(buf->s); \ 650 buf->s[buf->len] = c; \ 651 } \ 652 \ 653 /* Like strbuf_append1(buf, '\0'), but don't advance len */ \ 654 void \ 655 STRBUF##_terminate(struct STRBUF *buf) \ 656 { \ 657 STRBUF##_store1(buf, '\0'); \ 658 } \ 659 \ 660 void \ 661 STRBUF##_append1(struct STRBUF *buf, CHAR c) \ 662 { \ 663 STRBUF##_store1(buf, c); \ 664 buf->len++; \ 665 } \ 666 \ 667 void \ 668 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \ 669 { \ 670 if (buf->size < buf->len + len) { \ 671 if (buf->size == 0) \ 672 buf->size = 64; /* Arbitrary */ \ 673 while (buf->size < buf->len + len) \ 674 buf->size *= 2; \ 675 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ 676 } \ 677 memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \ 678 buf->len += len; \ 679 } \ 680 \ 681 void \ 682 STRBUF##_append(struct STRBUF *buf, const CHAR *s) \ 683 { \ 684 STRBUF##_appendn(buf, s, STRLEN(s)); \ 685 } \ 686 \ 687 CHAR * \ 688 STRBUF##_finish(struct STRBUF *buf) \ 689 { \ 690 STRBUF##_append1(buf, 0); \ 691 return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \ 692 } \ 693 \ 694 void \ 695 STRBUF##_cleanup(void *xbuf) \ 696 { \ 697 struct STRBUF *buf; \ 698 \ 699 buf = xbuf; \ 700 xfree(buf->s); \ 701 } \ 702 \ 703 void \ 704 STRBUF##_free(void *xbuf) \ 705 { \ 706 STRBUF##_cleanup(xbuf); \ 707 xfree(xbuf); \ 708 } \ 709 \ 710 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */ 711 712 DO_STRBUF(strbuf, char, strlen); 713 DO_STRBUF(Strbuf, Char, Strlen); 714