1 /*
2 * tc.str.c: Short string package
3 * This has been a lesson of how to write buggy code!
4 */
5 /*-
6 * Copyright (c) 1980, 1991 The Regents of the University of California.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33 #include "sh.h"
34
35 #include <assert.h>
36 #include <limits.h>
37
38 #define MALLOC_INCR 128
39 #ifdef WIDE_STRINGS
40 #define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */
41 #else
42 #define MALLOC_SURPLUS 0
43 #endif
44
45 #ifdef WIDE_STRINGS
46 size_t
one_mbtowc(Char * pwc,const char * s,size_t n)47 one_mbtowc(Char *pwc, const char *s, size_t n)
48 {
49 int len;
50
51 len = rt_mbtowc(pwc, s, n);
52 if (len == -1) {
53 reset_mbtowc();
54 *pwc = (unsigned char)*s | INVALID_BYTE;
55 }
56 if (len <= 0)
57 len = 1;
58 return len;
59 }
60
61 size_t
one_wctomb(char * s,Char wchar)62 one_wctomb(char *s, Char wchar)
63 {
64 int len;
65
66 #if INVALID_BYTE != 0
67 if ((wchar & INVALID_BYTE) == INVALID_BYTE) { /* wchar >= INVALID_BYTE */
68 /* invalid char
69 * exmaple)
70 * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */
71 *s = (char)wchar;
72 len = 1;
73 #else
74 if (wchar & (CHAR & INVALID_BYTE)) {
75 s[0] = wchar & (CHAR & 0xFF);
76 len = 1;
77 #endif
78 } else {
79 #if INVALID_BYTE != 0
80 wchar &= MAX_UTF32;
81 #else
82 wchar &= CHAR;
83 #endif
84 #ifdef UTF16_STRINGS
85 if (wchar >= 0x10000) {
86 /* UTF-16 systems can't handle these values directly in calls to
87 wctomb. Convert value to UTF-16 surrogate and call wcstombs to
88 convert the "string" to the correct multibyte representation,
89 if any. */
90 wchar_t ws[3];
91 wchar -= 0x10000;
92 ws[0] = 0xd800 | (wchar >> 10);
93 ws[1] = 0xdc00 | (wchar & 0x3ff);
94 ws[2] = 0;
95 /* The return value of wcstombs excludes the trailing 0, so len is
96 the correct number of multibytes for the Unicode char. */
97 len = wcstombs (s, ws, MB_CUR_MAX + 1);
98 } else
99 #endif
100 len = wctomb(s, (wchar_t) wchar);
101 if (len == -1)
102 s[0] = wchar;
103 if (len <= 0)
104 len = 1;
105 }
106 return len;
107 }
108
109 int
110 rt_mbtowc(Char *pwc, const char *s, size_t n)
111 {
112 int ret;
113 char back[MB_LEN_MAX];
114 wchar_t tmp;
115 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
116 # if defined(AUTOSET_KANJI)
117 static mbstate_t mb_zero, mb;
118 /*
119 * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
120 */
121 if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
122 !memcmp(&mb, &mb_zero, sizeof(mb)))
123 {
124 *pwc = *s;
125 return 1;
126 }
127 # else
128 mbstate_t mb;
129 # endif
130
131 memset (&mb, 0, sizeof mb);
132 ret = mbrtowc(&tmp, s, n, &mb);
133 #else
134 ret = mbtowc(&tmp, s, n);
135 #endif
136 if (ret > 0) {
137 *pwc = tmp;
138 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
139 if (tmp >= 0xd800 && tmp <= 0xdbff) {
140 /* UTF-16 surrogate pair. Fetch second half and compute
141 UTF-32 value. Dispense with the inverse test in this case. */
142 size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
143 if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
144 ret = -1;
145 else {
146 *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
147 ret += n2;
148 }
149 } else
150 #endif
151 if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
152 ret = -1;
153
154 } else if (ret == -2)
155 ret = -1;
156 else if (ret == 0)
157 *pwc = '\0';
158
159 return ret;
160 }
161 #endif
162
163 #ifdef SHORT_STRINGS
164 Char **
165 blk2short(char **src)
166 {
167 size_t n;
168 Char **sdst, **dst;
169
170 /*
171 * Count
172 */
173 for (n = 0; src[n] != NULL; n++)
174 continue;
175 sdst = dst = xmalloc((n + 1) * sizeof(Char *));
176
177 for (; *src != NULL; src++)
178 *dst++ = SAVE(*src);
179 *dst = NULL;
180 return (sdst);
181 }
182
183 char **
184 short2blk(Char **src)
185 {
186 size_t n;
187 char **sdst, **dst;
188
189 /*
190 * Count
191 */
192 for (n = 0; src[n] != NULL; n++)
193 continue;
194 sdst = dst = xmalloc((n + 1) * sizeof(char *));
195
196 for (; *src != NULL; src++)
197 *dst++ = strsave(short2str(*src));
198 *dst = NULL;
199 return (sdst);
200 }
201
202 Char *
203 str2short(const char *src)
204 {
205 static struct Strbuf buf; /* = Strbuf_INIT; */
206
207 if (src == NULL)
208 return (NULL);
209
210 buf.len = 0;
211 while (*src) {
212 Char wc;
213
214 src += one_mbtowc(&wc, src, MB_LEN_MAX);
215 Strbuf_append1(&buf, wc);
216 }
217 Strbuf_terminate(&buf);
218 return buf.s;
219 }
220
221 char *
222 short2str(const Char *src)
223 {
224 static char *sdst = NULL;
225 static size_t dstsize = 0;
226 char *dst, *edst;
227
228 if (src == NULL)
229 return (NULL);
230
231 if (sdst == NULL) {
232 dstsize = MALLOC_INCR;
233 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
234 }
235 dst = sdst;
236 edst = &dst[dstsize];
237 while (*src) {
238 dst += one_wctomb(dst, *src);
239 src++;
240 if (dst >= edst) {
241 char *wdst = dst;
242 char *wedst = edst;
243
244 dstsize += MALLOC_INCR;
245 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
246 edst = &sdst[dstsize];
247 dst = &edst[-MALLOC_INCR];
248 while (wdst > wedst) {
249 dst++;
250 wdst--;
251 }
252 }
253 }
254 *dst = 0;
255 return (sdst);
256 }
257
258 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
259 Char *
260 s_strcpy(Char *dst, const Char *src)
261 {
262 Char *sdst;
263
264 sdst = dst;
265 while ((*dst++ = *src++) != '\0')
266 continue;
267 return (sdst);
268 }
269
270 Char *
271 s_strncpy(Char *dst, const Char *src, size_t n)
272 {
273 Char *sdst;
274
275 if (n == 0)
276 return(dst);
277
278 sdst = dst;
279 do
280 if ((*dst++ = *src++) == '\0') {
281 while (--n != 0)
282 *dst++ = '\0';
283 return(sdst);
284 }
285 while (--n != 0);
286 return (sdst);
287 }
288
289 Char *
290 s_strcat(Char *dst, const Char *src)
291 {
292 Strcpy(Strend(dst), src);
293 return dst;
294 }
295
296 #ifdef NOTUSED
297 Char *
298 s_strncat(Char *dst, const Char *src, size_t n)
299 {
300 Char *sdst;
301
302 if (n == 0)
303 return (dst);
304
305 sdst = dst;
306
307 while (*dst)
308 dst++;
309
310 do
311 if ((*dst++ = *src++) == '\0')
312 return(sdst);
313 while (--n != 0)
314 continue;
315
316 *dst = '\0';
317 return (sdst);
318 }
319
320 #endif
321
322 Char *
323 s_strchr(const Char *str, int ch)
324 {
325 do
326 if (*str == ch)
327 return ((Char *)(intptr_t)str);
328 while (*str++);
329 return (NULL);
330 }
331
332 Char *
333 s_strrchr(const Char *str, int ch)
334 {
335 const Char *rstr;
336
337 rstr = NULL;
338 do
339 if (*str == ch)
340 rstr = str;
341 while (*str++);
342 return ((Char *)(intptr_t)rstr);
343 }
344
345 size_t
346 s_strlen(const Char *str)
347 {
348 size_t n;
349
350 for (n = 0; *str++; n++)
351 continue;
352 return (n);
353 }
354
355 int
356 s_strcmp(const Char *str1, const Char *str2)
357 {
358 for (; *str1 && *str1 == *str2; str1++, str2++)
359 continue;
360 /*
361 * The following case analysis is necessary so that characters which look
362 * negative collate low against normal characters but high against the
363 * end-of-string NUL.
364 */
365 if (*str1 == '\0' && *str2 == '\0')
366 return (0);
367 else if (*str1 == '\0')
368 return (-1);
369 else if (*str2 == '\0')
370 return (1);
371 else
372 return (*str1 - *str2);
373 }
374
375 int
376 s_strncmp(const Char *str1, const Char *str2, size_t n)
377 {
378 if (n == 0)
379 return (0);
380 do {
381 if (*str1 != *str2) {
382 /*
383 * The following case analysis is necessary so that characters
384 * which look negative collate low against normal characters
385 * but high against the end-of-string NUL.
386 */
387 if (*str1 == '\0')
388 return (-1);
389 else if (*str2 == '\0')
390 return (1);
391 else
392 return (*str1 - *str2);
393 }
394 if (*str1 == '\0')
395 return(0);
396 str1++, str2++;
397 } while (--n != 0);
398 return(0);
399 }
400 #endif /* not WIDE_STRINGS */
401
402 int
403 s_strcasecmp(const Char *str1, const Char *str2)
404 {
405 #ifdef WIDE_STRINGS
406 wint_t l1 = 0, l2 = 0;
407 for (; *str1; str1++, str2++)
408 if (*str1 == *str2)
409 l1 = l2 = 0;
410 else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
411 break;
412 #else
413 unsigned char l1 = 0, l2 = 0;
414 for (; *str1; str1++, str2++)
415 if (*str1 == *str2)
416 l1 = l2 = 0;
417 else if ((l1 = tolower((unsigned char)*str1)) !=
418 (l2 = tolower((unsigned char)*str2)))
419 break;
420 #endif
421 /*
422 * The following case analysis is necessary so that characters which look
423 * negative collate low against normal characters but high against the
424 * end-of-string NUL.
425 */
426 if (*str1 == '\0' && *str2 == '\0')
427 return (0);
428 else if (*str1 == '\0')
429 return (-1);
430 else if (*str2 == '\0')
431 return (1);
432 else if (l1 == l2) /* They are zero when they are equal */
433 return (*str1 - *str2);
434 else
435 return (l1 - l2);
436 }
437
438 Char *
439 s_strnsave(const Char *s, size_t len)
440 {
441 Char *n;
442
443 n = xmalloc((len + 1) * sizeof (*n));
444 memcpy(n, s, len * sizeof (*n));
445 n[len] = '\0';
446 return n;
447 }
448
449 Char *
450 s_strsave(const Char *s)
451 {
452 Char *n;
453 size_t size;
454
455 if (s == NULL)
456 s = STRNULL;
457 size = (Strlen(s) + 1) * sizeof(*n);
458 n = xmalloc(size);
459 memcpy(n, s, size);
460 return (n);
461 }
462
463 Char *
464 s_strspl(const Char *cp, const Char *dp)
465 {
466 Char *res, *ep;
467 const Char *p, *q;
468
469 if (!cp)
470 cp = STRNULL;
471 if (!dp)
472 dp = STRNULL;
473 for (p = cp; *p++;)
474 continue;
475 for (q = dp; *q++;)
476 continue;
477 res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
478 for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
479 continue;
480 for (ep--, q = dp; (*ep++ = *q++) != '\0';)
481 continue;
482 return (res);
483 }
484
485 Char *
486 s_strend(const Char *cp)
487 {
488 if (!cp)
489 return ((Char *)(intptr_t) cp);
490 while (*cp)
491 cp++;
492 return ((Char *)(intptr_t) cp);
493 }
494
495 Char *
496 s_strstr(const Char *s, const Char *t)
497 {
498 do {
499 const Char *ss = s;
500 const Char *tt = t;
501
502 do
503 if (*tt == '\0')
504 return ((Char *)(intptr_t) s);
505 while (*ss++ == *tt++);
506 } while (*s++ != '\0');
507 return (NULL);
508 }
509
510 #else /* !SHORT_STRINGS */
511 char *
512 caching_strip(const char *s)
513 {
514 static char *buf = NULL;
515 static size_t buf_size = 0;
516 size_t size;
517
518 if (s == NULL)
519 return NULL;
520 size = strlen(s) + 1;
521 if (buf_size < size) {
522 buf = xrealloc(buf, size);
523 buf_size = size;
524 }
525 memcpy(buf, s, size);
526 strip(buf);
527 return buf;
528 }
529 #endif
530
531 char *
532 short2qstr(const Char *src)
533 {
534 static char *sdst = NULL;
535 static size_t dstsize = 0;
536 char *dst, *edst;
537
538 if (src == NULL)
539 return (NULL);
540
541 if (sdst == NULL) {
542 dstsize = MALLOC_INCR;
543 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
544 }
545 dst = sdst;
546 edst = &dst[dstsize];
547 while (*src) {
548 if (*src & QUOTE) {
549 *dst++ = '\\';
550 if (dst == edst) {
551 dstsize += MALLOC_INCR;
552 sdst = xrealloc(sdst,
553 (dstsize + MALLOC_SURPLUS) * sizeof(char));
554 edst = &sdst[dstsize];
555 dst = &edst[-MALLOC_INCR];
556 }
557 }
558 dst += one_wctomb(dst, *src);
559 src++;
560 if (dst >= edst) {
561 ptrdiff_t i = dst - edst;
562 dstsize += MALLOC_INCR;
563 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
564 edst = &sdst[dstsize];
565 dst = &edst[-MALLOC_INCR + i];
566 }
567 }
568 *dst = 0;
569 return (sdst);
570 }
571
572 struct blk_buf *
573 bb_alloc(void)
574 {
575 return xcalloc(1, sizeof(struct blk_buf));
576 }
577
578 static void
579 bb_store(struct blk_buf *bb, Char *str)
580 {
581 if (bb->len == bb->size) { /* Keep space for terminating NULL */
582 if (bb->size == 0)
583 bb->size = 16; /* Arbitrary */
584 else
585 bb->size *= 2;
586 bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
587 }
588 bb->vec[bb->len] = str;
589 }
590
591 void
592 bb_append(struct blk_buf *bb, Char *str)
593 {
594 bb_store(bb, str);
595 bb->len++;
596 }
597
598 void
599 bb_cleanup(void *xbb)
600 {
601 struct blk_buf *bb;
602 size_t i;
603
604 bb = (struct blk_buf *)xbb;
605 if (bb->vec) {
606 for (i = 0; i < bb->len; i++)
607 xfree(bb->vec[i]);
608 xfree(bb->vec);
609 }
610 bb->vec = NULL;
611 bb->len = 0;
612 }
613
614 void
615 bb_free(void *bb)
616 {
617 bb_cleanup(bb);
618 xfree(bb);
619 }
620
621 Char **
622 bb_finish(struct blk_buf *bb)
623 {
624 bb_store(bb, NULL);
625 return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
626 }
627
628 #define DO_STRBUF(STRBUF, CHAR, STRLEN) \
629 \
630 struct STRBUF * \
631 STRBUF##_alloc(void) \
632 { \
633 return xcalloc(1, sizeof(struct STRBUF)); \
634 } \
635 \
636 static void \
637 STRBUF##_store1(struct STRBUF *buf, CHAR c) \
638 { \
639 if (buf->size == buf->len) { \
640 if (buf->size == 0) \
641 buf->size = 64; /* Arbitrary */ \
642 else \
643 buf->size *= 2; \
644 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
645 } \
646 assert(buf->s); \
647 buf->s[buf->len] = c; \
648 } \
649 \
650 /* Like strbuf_append1(buf, '\0'), but don't advance len */ \
651 void \
652 STRBUF##_terminate(struct STRBUF *buf) \
653 { \
654 STRBUF##_store1(buf, '\0'); \
655 } \
656 \
657 void \
658 STRBUF##_append1(struct STRBUF *buf, CHAR c) \
659 { \
660 STRBUF##_store1(buf, c); \
661 buf->len++; \
662 } \
663 \
664 void \
665 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \
666 { \
667 if (buf->size < buf->len + len) { \
668 if (buf->size == 0) \
669 buf->size = 64; /* Arbitrary */ \
670 while (buf->size < buf->len + len) \
671 buf->size *= 2; \
672 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
673 } \
674 memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \
675 buf->len += len; \
676 } \
677 \
678 void \
679 STRBUF##_append(struct STRBUF *buf, const CHAR *s) \
680 { \
681 STRBUF##_appendn(buf, s, STRLEN(s)); \
682 } \
683 \
684 CHAR * \
685 STRBUF##_finish(struct STRBUF *buf) \
686 { \
687 STRBUF##_append1(buf, 0); \
688 return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \
689 } \
690 \
691 void \
692 STRBUF##_cleanup(void *xbuf) \
693 { \
694 struct STRBUF *buf; \
695 \
696 buf = xbuf; \
697 xfree(buf->s); \
698 } \
699 \
700 void \
701 STRBUF##_free(void *xbuf) \
702 { \
703 STRBUF##_cleanup(xbuf); \
704 xfree(xbuf); \
705 } \
706 \
707 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
708
709 DO_STRBUF(strbuf, char, strlen);
710 DO_STRBUF(Strbuf, Char, Strlen);
711