xref: /freebsd/contrib/tcsh/tc.str.c (revision 6be3386466ab79a84b48429ae66244f21526d3df)
1 /*
2  * tc.str.c: Short string package
3  * 	     This has been a lesson of how to write buggy code!
4  */
5 /*-
6  * Copyright (c) 1980, 1991 The Regents of the University of California.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 #include "sh.h"
34 
35 #include <assert.h>
36 #include <limits.h>
37 
38 #define MALLOC_INCR	128
39 #ifdef WIDE_STRINGS
40 #define MALLOC_SURPLUS	MB_LEN_MAX /* Space for one multibyte character */
41 #else
42 #define MALLOC_SURPLUS	0
43 #endif
44 
45 #ifdef WIDE_STRINGS
46 size_t
47 one_mbtowc(Char *pwc, const char *s, size_t n)
48 {
49     int len;
50 
51     len = rt_mbtowc(pwc, s, n);
52     if (len == -1) {
53         reset_mbtowc();
54 	*pwc = (unsigned char)*s | INVALID_BYTE;
55     }
56     if (len <= 0)
57 	len = 1;
58     return len;
59 }
60 
61 size_t
62 one_wctomb(char *s, Char wchar)
63 {
64     int len;
65 
66 #if INVALID_BYTE != 0
67     if ((wchar & INVALID_BYTE) == INVALID_BYTE) {    /* wchar >= INVALID_BYTE */
68 	/* invalid char
69 	 * exmaple)
70 	 * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */
71 	*s = (char)wchar;
72 	len = 1;
73 #else
74     if (wchar & (CHAR & INVALID_BYTE)) {
75 	s[0] = wchar & (CHAR & 0xFF);
76 	len = 1;
77 #endif
78     } else {
79 #if INVALID_BYTE != 0
80 	wchar &= MAX_UTF32;
81 #else
82 	wchar &= CHAR;
83 #endif
84 #ifdef UTF16_STRINGS
85 	if (wchar >= 0x10000) {
86 	    /* UTF-16 systems can't handle these values directly in calls to
87 	       wctomb.  Convert value to UTF-16 surrogate and call wcstombs to
88 	       convert the "string" to the correct multibyte representation,
89 	       if any. */
90 	    wchar_t ws[3];
91 	    wchar -= 0x10000;
92 	    ws[0] = 0xd800 | (wchar >> 10);
93 	    ws[1] = 0xdc00 | (wchar & 0x3ff);
94 	    ws[2] = 0;
95 	    /* The return value of wcstombs excludes the trailing 0, so len is
96 	       the correct number of multibytes for the Unicode char. */
97 	    len = wcstombs (s, ws, MB_CUR_MAX + 1);
98 	} else
99 #endif
100 	len = wctomb(s, (wchar_t) wchar);
101 	if (len == -1)
102 	    s[0] = wchar;
103 	if (len <= 0)
104 	    len = 1;
105     }
106     return len;
107 }
108 
109 int
110 rt_mbtowc(Char *pwc, const char *s, size_t n)
111 {
112     int ret;
113     char back[MB_LEN_MAX];
114     wchar_t tmp;
115 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
116 # if defined(AUTOSET_KANJI)
117     static mbstate_t mb_zero, mb;
118     /*
119      * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
120      */
121     if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
122 	!memcmp(&mb, &mb_zero, sizeof(mb)))
123     {
124 	*pwc = *s;
125 	return 1;
126     }
127 # else
128     mbstate_t mb;
129 # endif
130 
131     memset (&mb, 0, sizeof mb);
132     ret = mbrtowc(&tmp, s, n, &mb);
133 #else
134     ret = mbtowc(&tmp, s, n);
135 #endif
136     if (ret > 0) {
137 	*pwc = tmp;
138 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
139 	if (tmp >= 0xd800 && tmp <= 0xdbff) {
140 	    /* UTF-16 surrogate pair.  Fetch second half and compute
141 	       UTF-32 value.  Dispense with the inverse test in this case. */
142 	    size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
143 	    if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
144 		ret = -1;
145 	    else {
146 		*pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
147 		ret += n2;
148 	    }
149 	} else
150 #endif
151       	if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
152 	    ret = -1;
153 
154     } else if (ret == -2)
155 	ret = -1;
156     else if (ret == 0)
157 	*pwc = '\0';
158 
159     return ret;
160 }
161 #endif
162 
163 #ifdef SHORT_STRINGS
164 Char  **
165 blk2short(char **src)
166 {
167     size_t     n;
168     Char **sdst, **dst;
169 
170     /*
171      * Count
172      */
173     for (n = 0; src[n] != NULL; n++)
174 	continue;
175     sdst = dst = xmalloc((n + 1) * sizeof(Char *));
176 
177     for (; *src != NULL; src++)
178 	*dst++ = SAVE(*src);
179     *dst = NULL;
180     return (sdst);
181 }
182 
183 char  **
184 short2blk(Char **src)
185 {
186     size_t     n;
187     char **sdst, **dst;
188 
189     /*
190      * Count
191      */
192     for (n = 0; src[n] != NULL; n++)
193 	continue;
194     sdst = dst = xmalloc((n + 1) * sizeof(char *));
195 
196     for (; *src != NULL; src++)
197 	*dst++ = strsave(short2str(*src));
198     *dst = NULL;
199     return (sdst);
200 }
201 
202 Char   *
203 str2short(const char *src)
204 {
205     static struct Strbuf buf; /* = Strbuf_INIT; */
206 
207     if (src == NULL)
208 	return (NULL);
209 
210     buf.len = 0;
211     while (*src) {
212 	Char wc;
213 
214 	src += one_mbtowc(&wc, src, MB_LEN_MAX);
215 	Strbuf_append1(&buf, wc);
216     }
217     Strbuf_terminate(&buf);
218     return buf.s;
219 }
220 
221 char   *
222 short2str(const Char *src)
223 {
224     static char *sdst = NULL;
225     static size_t dstsize = 0;
226     char *dst, *edst;
227 
228     if (src == NULL)
229 	return (NULL);
230 
231     if (sdst == NULL) {
232 	dstsize = MALLOC_INCR;
233 	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
234     }
235     dst = sdst;
236     edst = &dst[dstsize];
237     while (*src) {
238 	dst += one_wctomb(dst, *src);
239 	src++;
240 	if (dst >= edst) {
241 	    char *wdst = dst;
242 	    char *wedst = edst;
243 
244 	    dstsize += MALLOC_INCR;
245 	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
246 	    edst = &sdst[dstsize];
247 	    dst = &edst[-MALLOC_INCR];
248 	    while (wdst > wedst) {
249 		dst++;
250 		wdst--;
251 	    }
252 	}
253     }
254     *dst = 0;
255     return (sdst);
256 }
257 
258 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
259 Char   *
260 s_strcpy(Char *dst, const Char *src)
261 {
262     Char *sdst;
263 
264     sdst = dst;
265     while ((*dst++ = *src++) != '\0')
266 	continue;
267     return (sdst);
268 }
269 
270 Char   *
271 s_strncpy(Char *dst, const Char *src, size_t n)
272 {
273     Char *sdst;
274 
275     if (n == 0)
276 	return(dst);
277 
278     sdst = dst;
279     do
280 	if ((*dst++ = *src++) == '\0') {
281 	    while (--n != 0)
282 		*dst++ = '\0';
283 	    return(sdst);
284 	}
285     while (--n != 0);
286     return (sdst);
287 }
288 
289 Char   *
290 s_strcat(Char *dst, const Char *src)
291 {
292     Strcpy(Strend(dst), src);
293     return dst;
294 }
295 
296 #ifdef NOTUSED
297 Char   *
298 s_strncat(Char *dst, const Char *src, size_t n)
299 {
300     Char *sdst;
301 
302     if (n == 0)
303 	return (dst);
304 
305     sdst = dst;
306 
307     while (*dst)
308 	dst++;
309 
310     do
311 	if ((*dst++ = *src++) == '\0')
312 	    return(sdst);
313     while (--n != 0)
314 	continue;
315 
316     *dst = '\0';
317     return (sdst);
318 }
319 
320 #endif
321 
322 Char   *
323 s_strchr(const Char *str, int ch)
324 {
325     do
326 	if (*str == ch)
327 	    return ((Char *)(intptr_t)str);
328     while (*str++);
329     return (NULL);
330 }
331 
332 Char   *
333 s_strrchr(const Char *str, int ch)
334 {
335     const Char *rstr;
336 
337     rstr = NULL;
338     do
339 	if (*str == ch)
340 	    rstr = str;
341     while (*str++);
342     return ((Char *)(intptr_t)rstr);
343 }
344 
345 size_t
346 s_strlen(const Char *str)
347 {
348     size_t n;
349 
350     for (n = 0; *str++; n++)
351 	continue;
352     return (n);
353 }
354 
355 int
356 s_strcmp(const Char *str1, const Char *str2)
357 {
358     for (; *str1 && *str1 == *str2; str1++, str2++)
359 	continue;
360     /*
361      * The following case analysis is necessary so that characters which look
362      * negative collate low against normal characters but high against the
363      * end-of-string NUL.
364      */
365     if (*str1 == '\0' && *str2 == '\0')
366 	return (0);
367     else if (*str1 == '\0')
368 	return (-1);
369     else if (*str2 == '\0')
370 	return (1);
371     else
372 	return (*str1 - *str2);
373 }
374 
375 int
376 s_strncmp(const Char *str1, const Char *str2, size_t n)
377 {
378     if (n == 0)
379 	return (0);
380     do {
381 	if (*str1 != *str2) {
382 	    /*
383 	     * The following case analysis is necessary so that characters
384 	     * which look negative collate low against normal characters
385 	     * but high against the end-of-string NUL.
386 	     */
387 	    if (*str1 == '\0')
388 		return (-1);
389 	    else if (*str2 == '\0')
390 		return (1);
391 	    else
392 		return (*str1 - *str2);
393 	}
394         if (*str1 == '\0')
395 	    return(0);
396 	str1++, str2++;
397     } while (--n != 0);
398     return(0);
399 }
400 #endif /* not WIDE_STRINGS */
401 
402 int
403 s_strcasecmp(const Char *str1, const Char *str2)
404 {
405 #ifdef WIDE_STRINGS
406     wint_t l1 = 0, l2 = 0;
407     for (; *str1; str1++, str2++)
408 	if (*str1 == *str2)
409 	    l1 = l2 = 0;
410 	else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
411 	    break;
412 #else
413     unsigned char l1 = 0, l2 = 0;
414     for (; *str1; str1++, str2++)
415 	if (*str1 == *str2)
416 		l1 = l2 = 0;
417 	else if ((l1 = tolower((unsigned char)*str1)) !=
418 	    (l2 = tolower((unsigned char)*str2)))
419 	    break;
420 #endif
421     /*
422      * The following case analysis is necessary so that characters which look
423      * negative collate low against normal characters but high against the
424      * end-of-string NUL.
425      */
426     if (*str1 == '\0' && *str2 == '\0')
427 	return (0);
428     else if (*str1 == '\0')
429 	return (-1);
430     else if (*str2 == '\0')
431 	return (1);
432     else if (l1 == l2)	/* They are zero when they are equal */
433 	return (*str1 - *str2);
434     else
435 	return (l1 - l2);
436 }
437 
438 Char   *
439 s_strnsave(const Char *s, size_t len)
440 {
441     Char *n;
442 
443     n = xmalloc((len + 1) * sizeof (*n));
444     memcpy(n, s, len * sizeof (*n));
445     n[len] = '\0';
446     return n;
447 }
448 
449 Char   *
450 s_strsave(const Char *s)
451 {
452     Char   *n;
453     size_t size;
454 
455     if (s == NULL)
456 	s = STRNULL;
457     size = (Strlen(s) + 1) * sizeof(*n);
458     n = xmalloc(size);
459     memcpy(n, s, size);
460     return (n);
461 }
462 
463 Char   *
464 s_strspl(const Char *cp, const Char *dp)
465 {
466     Char *res, *ep;
467     const Char *p, *q;
468 
469     if (!cp)
470 	cp = STRNULL;
471     if (!dp)
472 	dp = STRNULL;
473     for (p = cp; *p++;)
474 	continue;
475     for (q = dp; *q++;)
476 	continue;
477     res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
478     for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
479 	continue;
480     for (ep--, q = dp; (*ep++ = *q++) != '\0';)
481 	continue;
482     return (res);
483 }
484 
485 Char   *
486 s_strend(const Char *cp)
487 {
488     if (!cp)
489 	return ((Char *)(intptr_t) cp);
490     while (*cp)
491 	cp++;
492     return ((Char *)(intptr_t) cp);
493 }
494 
495 Char   *
496 s_strstr(const Char *s, const Char *t)
497 {
498     do {
499 	const Char *ss = s;
500 	const Char *tt = t;
501 
502 	do
503 	    if (*tt == '\0')
504 		return ((Char *)(intptr_t) s);
505 	while (*ss++ == *tt++);
506     } while (*s++ != '\0');
507     return (NULL);
508 }
509 
510 #else /* !SHORT_STRINGS */
511 char *
512 caching_strip(const char *s)
513 {
514     static char *buf = NULL;
515     static size_t buf_size = 0;
516     size_t size;
517 
518     if (s == NULL)
519       return NULL;
520     size = strlen(s) + 1;
521     if (buf_size < size) {
522 	buf = xrealloc(buf, size);
523 	buf_size = size;
524     }
525     memcpy(buf, s, size);
526     strip(buf);
527     return buf;
528 }
529 #endif
530 
531 char   *
532 short2qstr(const Char *src)
533 {
534     static char *sdst = NULL;
535     static size_t dstsize = 0;
536     char *dst, *edst;
537 
538     if (src == NULL)
539 	return (NULL);
540 
541     if (sdst == NULL) {
542 	dstsize = MALLOC_INCR;
543 	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
544     }
545     dst = sdst;
546     edst = &dst[dstsize];
547     while (*src) {
548 	if (*src & QUOTE) {
549 	    *dst++ = '\\';
550 	    if (dst == edst) {
551 		dstsize += MALLOC_INCR;
552 		sdst = xrealloc(sdst,
553 				(dstsize + MALLOC_SURPLUS) * sizeof(char));
554 		edst = &sdst[dstsize];
555 		dst = &edst[-MALLOC_INCR];
556 	    }
557 	}
558 	dst += one_wctomb(dst, *src);
559 	src++;
560 	if (dst >= edst) {
561 	    ptrdiff_t i = dst - edst;
562 	    dstsize += MALLOC_INCR;
563 	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
564 	    edst = &sdst[dstsize];
565 	    dst = &edst[-MALLOC_INCR + i];
566 	}
567     }
568     *dst = 0;
569     return (sdst);
570 }
571 
572 struct blk_buf *
573 bb_alloc(void)
574 {
575     return xcalloc(1, sizeof(struct blk_buf));
576 }
577 
578 static void
579 bb_store(struct blk_buf *bb, Char *str)
580 {
581     if (bb->len == bb->size) { /* Keep space for terminating NULL */
582 	if (bb->size == 0)
583 	    bb->size = 16; /* Arbitrary */
584 	else
585 	    bb->size *= 2;
586 	bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
587     }
588     bb->vec[bb->len] = str;
589 }
590 
591 void
592 bb_append(struct blk_buf *bb, Char *str)
593 {
594     bb_store(bb, str);
595     bb->len++;
596 }
597 
598 void
599 bb_cleanup(void *xbb)
600 {
601     struct blk_buf *bb;
602     size_t i;
603 
604     bb = (struct blk_buf *)xbb;
605     if (bb->vec) {
606 	for (i = 0; i < bb->len; i++)
607 	    xfree(bb->vec[i]);
608 	xfree(bb->vec);
609     }
610     bb->vec = NULL;
611     bb->len = 0;
612 }
613 
614 void
615 bb_free(void *bb)
616 {
617     bb_cleanup(bb);
618     xfree(bb);
619 }
620 
621 Char **
622 bb_finish(struct blk_buf *bb)
623 {
624     bb_store(bb, NULL);
625     return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
626 }
627 
628 #define DO_STRBUF(STRBUF, CHAR, STRLEN)				\
629 								\
630 struct STRBUF *							\
631 STRBUF##_alloc(void)						\
632 {								\
633     return xcalloc(1, sizeof(struct STRBUF));			\
634 }								\
635 								\
636 static void							\
637 STRBUF##_store1(struct STRBUF *buf, CHAR c)			\
638 {								\
639     if (buf->size == buf->len) {				\
640 	if (buf->size == 0)					\
641 	    buf->size = 64; /* Arbitrary */			\
642 	else							\
643 	    buf->size *= 2;					\
644 	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
645     }								\
646     assert(buf->s);						\
647     buf->s[buf->len] = c;					\
648 }								\
649 								\
650 /* Like strbuf_append1(buf, '\0'), but don't advance len */	\
651 void								\
652 STRBUF##_terminate(struct STRBUF *buf)				\
653 {								\
654     STRBUF##_store1(buf, '\0');					\
655 }								\
656 								\
657 void								\
658 STRBUF##_append1(struct STRBUF *buf, CHAR c)			\
659 {								\
660     STRBUF##_store1(buf, c);					\
661     buf->len++;							\
662 }								\
663 								\
664 void								\
665 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len)	\
666 {								\
667     if (buf->size < buf->len + len) {				\
668 	if (buf->size == 0)					\
669 	    buf->size = 64; /* Arbitrary */			\
670 	while (buf->size < buf->len + len)			\
671 	    buf->size *= 2;					\
672 	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
673     }								\
674     memcpy(buf->s + buf->len, s, len * sizeof(*buf->s));	\
675     buf->len += len;						\
676 }								\
677 								\
678 void								\
679 STRBUF##_append(struct STRBUF *buf, const CHAR *s)		\
680 {								\
681     STRBUF##_appendn(buf, s, STRLEN(s));			\
682 }								\
683 								\
684 CHAR *								\
685 STRBUF##_finish(struct STRBUF *buf)				\
686 {								\
687     STRBUF##_append1(buf, 0);					\
688     return xrealloc(buf->s, buf->len * sizeof(*buf->s));	\
689 }								\
690 								\
691 void								\
692 STRBUF##_cleanup(void *xbuf)					\
693 {								\
694     struct STRBUF *buf;						\
695 								\
696     buf = xbuf;							\
697     xfree(buf->s);						\
698 }								\
699 								\
700 void								\
701 STRBUF##_free(void *xbuf)					\
702 {								\
703     STRBUF##_cleanup(xbuf);					\
704     xfree(xbuf);						\
705 }								\
706 								\
707 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
708 
709 DO_STRBUF(strbuf, char, strlen);
710 DO_STRBUF(Strbuf, Char, Strlen);
711