xref: /freebsd/contrib/tcsh/tc.str.c (revision 0572ccaa4543b0abef8ef81e384c1d04de9f3da1)
1 /* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $ */
2 /*
3  * tc.str.c: Short string package
4  * 	     This has been a lesson of how to write buggy code!
5  */
6 /*-
7  * Copyright (c) 1980, 1991 The Regents of the University of California.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include "sh.h"
35 
36 #include <assert.h>
37 #include <limits.h>
38 
39 RCSID("$tcsh: tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $")
40 
41 #define MALLOC_INCR	128
42 #ifdef WIDE_STRINGS
43 #define MALLOC_SURPLUS	MB_LEN_MAX /* Space for one multibyte character */
44 #else
45 #define MALLOC_SURPLUS	0
46 #endif
47 
48 #ifdef WIDE_STRINGS
49 size_t
50 one_mbtowc(Char *pwc, const char *s, size_t n)
51 {
52     int len;
53 
54     len = rt_mbtowc(pwc, s, n);
55     if (len == -1) {
56         reset_mbtowc();
57 	*pwc = (unsigned char)*s | INVALID_BYTE;
58     }
59     if (len <= 0)
60 	len = 1;
61     return len;
62 }
63 
64 size_t
65 one_wctomb(char *s, Char wchar)
66 {
67     int len;
68 
69     if (wchar & INVALID_BYTE) {
70 	s[0] = wchar & 0xFF;
71 	len = 1;
72     } else {
73 #ifdef UTF16_STRINGS
74 	if (wchar >= 0x10000) {
75 	    /* UTF-16 systems can't handle these values directly in calls to
76 	       wctomb.  Convert value to UTF-16 surrogate and call wcstombs to
77 	       convert the "string" to the correct multibyte representation,
78 	       if any. */
79 	    wchar_t ws[3];
80 	    wchar -= 0x10000;
81 	    ws[0] = 0xd800 | (wchar >> 10);
82 	    ws[1] = 0xdc00 | (wchar & 0x3ff);
83 	    ws[2] = 0;
84 	    /* The return value of wcstombs excludes the trailing 0, so len is
85 	       the correct number of multibytes for the Unicode char. */
86 	    len = wcstombs (s, ws, MB_CUR_MAX + 1);
87 	} else
88 #endif
89 	len = wctomb(s, (wchar_t) wchar);
90 	if (len == -1)
91 	    s[0] = wchar;
92 	if (len <= 0)
93 	    len = 1;
94     }
95     return len;
96 }
97 
98 int
99 rt_mbtowc(Char *pwc, const char *s, size_t n)
100 {
101     int ret;
102     char back[MB_LEN_MAX];
103     wchar_t tmp;
104 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
105 # if defined(AUTOSET_KANJI)
106     static mbstate_t mb_zero, mb;
107     /*
108      * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
109      */
110     if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
111 	!memcmp(&mb, &mb_zero, sizeof(mb)))
112     {
113 	*pwc = *s;
114 	return 1;
115     }
116 # else
117     mbstate_t mb;
118 # endif
119 
120     memset (&mb, 0, sizeof mb);
121     ret = mbrtowc(&tmp, s, n, &mb);
122 #else
123     ret = mbtowc(&tmp, s, n);
124 #endif
125     if (ret > 0) {
126 	*pwc = tmp;
127 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
128 	if (tmp >= 0xd800 && tmp <= 0xdbff) {
129 	    /* UTF-16 surrogate pair.  Fetch second half and compute
130 	       UTF-32 value.  Dispense with the inverse test in this case. */
131 	    size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
132 	    if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
133 		ret = -1;
134 	    else {
135 		*pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
136 		ret += n2;
137 	    }
138 	} else
139 #endif
140       	if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
141 	    ret = -1;
142 
143     } else if (ret == -2)
144 	ret = -1;
145     else if (ret == 0)
146 	*pwc = '\0';
147 
148     return ret;
149 }
150 #endif
151 
152 #ifdef SHORT_STRINGS
153 Char  **
154 blk2short(char **src)
155 {
156     size_t     n;
157     Char **sdst, **dst;
158 
159     /*
160      * Count
161      */
162     for (n = 0; src[n] != NULL; n++)
163 	continue;
164     sdst = dst = xmalloc((n + 1) * sizeof(Char *));
165 
166     for (; *src != NULL; src++)
167 	*dst++ = SAVE(*src);
168     *dst = NULL;
169     return (sdst);
170 }
171 
172 char  **
173 short2blk(Char **src)
174 {
175     size_t     n;
176     char **sdst, **dst;
177 
178     /*
179      * Count
180      */
181     for (n = 0; src[n] != NULL; n++)
182 	continue;
183     sdst = dst = xmalloc((n + 1) * sizeof(char *));
184 
185     for (; *src != NULL; src++)
186 	*dst++ = strsave(short2str(*src));
187     *dst = NULL;
188     return (sdst);
189 }
190 
191 Char   *
192 str2short(const char *src)
193 {
194     static struct Strbuf buf; /* = Strbuf_INIT; */
195 
196     if (src == NULL)
197 	return (NULL);
198 
199     buf.len = 0;
200     while (*src) {
201 	Char wc;
202 
203 	src += one_mbtowc(&wc, src, MB_LEN_MAX);
204 	Strbuf_append1(&buf, wc);
205     }
206     Strbuf_terminate(&buf);
207     return buf.s;
208 }
209 
210 char   *
211 short2str(const Char *src)
212 {
213     static char *sdst = NULL;
214     static size_t dstsize = 0;
215     char *dst, *edst;
216 
217     if (src == NULL)
218 	return (NULL);
219 
220     if (sdst == NULL) {
221 	dstsize = MALLOC_INCR;
222 	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
223     }
224     dst = sdst;
225     edst = &dst[dstsize];
226     while (*src) {
227 	dst += one_wctomb(dst, *src & CHAR);
228 	src++;
229 	if (dst >= edst) {
230 	    char *wdst = dst;
231 	    char *wedst = edst;
232 
233 	    dstsize += MALLOC_INCR;
234 	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
235 	    edst = &sdst[dstsize];
236 	    dst = &edst[-MALLOC_INCR];
237 	    while (wdst > wedst) {
238 		dst++;
239 		wdst--;
240 	    }
241 	}
242     }
243     *dst = 0;
244     return (sdst);
245 }
246 
247 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
248 Char   *
249 s_strcpy(Char *dst, const Char *src)
250 {
251     Char *sdst;
252 
253     sdst = dst;
254     while ((*dst++ = *src++) != '\0')
255 	continue;
256     return (sdst);
257 }
258 
259 Char   *
260 s_strncpy(Char *dst, const Char *src, size_t n)
261 {
262     Char *sdst;
263 
264     if (n == 0)
265 	return(dst);
266 
267     sdst = dst;
268     do
269 	if ((*dst++ = *src++) == '\0') {
270 	    while (--n != 0)
271 		*dst++ = '\0';
272 	    return(sdst);
273 	}
274     while (--n != 0);
275     return (sdst);
276 }
277 
278 Char   *
279 s_strcat(Char *dst, const Char *src)
280 {
281     Strcpy(Strend(dst), src);
282     return dst;
283 }
284 
285 #ifdef NOTUSED
286 Char   *
287 s_strncat(Char *dst, const Char *src, size_t n)
288 {
289     Char *sdst;
290 
291     if (n == 0)
292 	return (dst);
293 
294     sdst = dst;
295 
296     while (*dst)
297 	dst++;
298 
299     do
300 	if ((*dst++ = *src++) == '\0')
301 	    return(sdst);
302     while (--n != 0)
303 	continue;
304 
305     *dst = '\0';
306     return (sdst);
307 }
308 
309 #endif
310 
311 Char   *
312 s_strchr(const Char *str, int ch)
313 {
314     do
315 	if (*str == ch)
316 	    return ((Char *)(intptr_t)str);
317     while (*str++);
318     return (NULL);
319 }
320 
321 Char   *
322 s_strrchr(const Char *str, int ch)
323 {
324     const Char *rstr;
325 
326     rstr = NULL;
327     do
328 	if (*str == ch)
329 	    rstr = str;
330     while (*str++);
331     return ((Char *)(intptr_t)rstr);
332 }
333 
334 size_t
335 s_strlen(const Char *str)
336 {
337     size_t n;
338 
339     for (n = 0; *str++; n++)
340 	continue;
341     return (n);
342 }
343 
344 int
345 s_strcmp(const Char *str1, const Char *str2)
346 {
347     for (; *str1 && *str1 == *str2; str1++, str2++)
348 	continue;
349     /*
350      * The following case analysis is necessary so that characters which look
351      * negative collate low against normal characters but high against the
352      * end-of-string NUL.
353      */
354     if (*str1 == '\0' && *str2 == '\0')
355 	return (0);
356     else if (*str1 == '\0')
357 	return (-1);
358     else if (*str2 == '\0')
359 	return (1);
360     else
361 	return (*str1 - *str2);
362 }
363 
364 int
365 s_strncmp(const Char *str1, const Char *str2, size_t n)
366 {
367     if (n == 0)
368 	return (0);
369     do {
370 	if (*str1 != *str2) {
371 	    /*
372 	     * The following case analysis is necessary so that characters
373 	     * which look negative collate low against normal characters
374 	     * but high against the end-of-string NUL.
375 	     */
376 	    if (*str1 == '\0')
377 		return (-1);
378 	    else if (*str2 == '\0')
379 		return (1);
380 	    else
381 		return (*str1 - *str2);
382 	}
383         if (*str1 == '\0')
384 	    return(0);
385 	str1++, str2++;
386     } while (--n != 0);
387     return(0);
388 }
389 #endif /* not WIDE_STRINGS */
390 
391 int
392 s_strcasecmp(const Char *str1, const Char *str2)
393 {
394 #ifdef WIDE_STRINGS
395     wint_t l1 = 0, l2 = 0;
396     for (; *str1; str1++, str2++)
397 	if (*str1 == *str2)
398 	    l1 = l2 = 0;
399 	else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
400 	    break;
401 #else
402     unsigned char l1 = 0, l2 = 0;
403     for (; *str1; str1++, str2++)
404 	if (*str1 == *str2)
405 		l1 = l2 = 0;
406 	else if ((l1 = tolower((unsigned char)*str1)) !=
407 	    (l2 = tolower((unsigned char)*str2)))
408 	    break;
409 #endif
410     /*
411      * The following case analysis is necessary so that characters which look
412      * negative collate low against normal characters but high against the
413      * end-of-string NUL.
414      */
415     if (*str1 == '\0' && *str2 == '\0')
416 	return (0);
417     else if (*str1 == '\0')
418 	return (-1);
419     else if (*str2 == '\0')
420 	return (1);
421     else if (l1 == l2)	/* They are zero when they are equal */
422 	return (*str1 - *str2);
423     else
424 	return (l1 - l2);
425 }
426 
427 Char   *
428 s_strnsave(const Char *s, size_t len)
429 {
430     Char *n;
431 
432     n = xmalloc((len + 1) * sizeof (*n));
433     memcpy(n, s, len * sizeof (*n));
434     n[len] = '\0';
435     return n;
436 }
437 
438 Char   *
439 s_strsave(const Char *s)
440 {
441     Char   *n;
442     size_t size;
443 
444     if (s == NULL)
445 	s = STRNULL;
446     size = (Strlen(s) + 1) * sizeof(*n);
447     n = xmalloc(size);
448     memcpy(n, s, size);
449     return (n);
450 }
451 
452 Char   *
453 s_strspl(const Char *cp, const Char *dp)
454 {
455     Char *res, *ep;
456     const Char *p, *q;
457 
458     if (!cp)
459 	cp = STRNULL;
460     if (!dp)
461 	dp = STRNULL;
462     for (p = cp; *p++;)
463 	continue;
464     for (q = dp; *q++;)
465 	continue;
466     res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
467     for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
468 	continue;
469     for (ep--, q = dp; (*ep++ = *q++) != '\0';)
470 	continue;
471     return (res);
472 }
473 
474 Char   *
475 s_strend(const Char *cp)
476 {
477     if (!cp)
478 	return ((Char *)(intptr_t) cp);
479     while (*cp)
480 	cp++;
481     return ((Char *)(intptr_t) cp);
482 }
483 
484 Char   *
485 s_strstr(const Char *s, const Char *t)
486 {
487     do {
488 	const Char *ss = s;
489 	const Char *tt = t;
490 
491 	do
492 	    if (*tt == '\0')
493 		return ((Char *)(intptr_t) s);
494 	while (*ss++ == *tt++);
495     } while (*s++ != '\0');
496     return (NULL);
497 }
498 
499 #else /* !SHORT_STRINGS */
500 char *
501 caching_strip(const char *s)
502 {
503     static char *buf = NULL;
504     static size_t buf_size = 0;
505     size_t size;
506 
507     if (s == NULL)
508       return NULL;
509     size = strlen(s) + 1;
510     if (buf_size < size) {
511 	buf = xrealloc(buf, size);
512 	buf_size = size;
513     }
514     memcpy(buf, s, size);
515     strip(buf);
516     return buf;
517 }
518 #endif
519 
520 char   *
521 short2qstr(const Char *src)
522 {
523     static char *sdst = NULL;
524     static size_t dstsize = 0;
525     char *dst, *edst;
526 
527     if (src == NULL)
528 	return (NULL);
529 
530     if (sdst == NULL) {
531 	dstsize = MALLOC_INCR;
532 	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
533     }
534     dst = sdst;
535     edst = &dst[dstsize];
536     while (*src) {
537 	if (*src & QUOTE) {
538 	    *dst++ = '\\';
539 	    if (dst == edst) {
540 		dstsize += MALLOC_INCR;
541 		sdst = xrealloc(sdst,
542 				(dstsize + MALLOC_SURPLUS) * sizeof(char));
543 		edst = &sdst[dstsize];
544 		dst = &edst[-MALLOC_INCR];
545 	    }
546 	}
547 	dst += one_wctomb(dst, *src & CHAR);
548 	src++;
549 	if (dst >= edst) {
550 	    ptrdiff_t i = dst - edst;
551 	    dstsize += MALLOC_INCR;
552 	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
553 	    edst = &sdst[dstsize];
554 	    dst = &edst[-MALLOC_INCR + i];
555 	}
556     }
557     *dst = 0;
558     return (sdst);
559 }
560 
561 struct blk_buf *
562 bb_alloc()
563 {
564     return xcalloc(1, sizeof(struct blk_buf));
565 }
566 
567 static void
568 bb_store(struct blk_buf *bb, Char *str)
569 {
570     if (bb->len == bb->size) { /* Keep space for terminating NULL */
571 	if (bb->size == 0)
572 	    bb->size = 16; /* Arbitrary */
573 	else
574 	    bb->size *= 2;
575 	bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
576     }
577     bb->vec[bb->len] = str;
578 }
579 
580 void
581 bb_append(struct blk_buf *bb, Char *str)
582 {
583     bb_store(bb, str);
584     bb->len++;
585 }
586 
587 void
588 bb_cleanup(void *xbb)
589 {
590     struct blk_buf *bb;
591     size_t i;
592 
593     bb = xbb;
594     for (i = 0; i < bb->len; i++)
595 	xfree(bb->vec[i]);
596     xfree(bb->vec);
597 }
598 
599 void
600 bb_free(void *bb)
601 {
602     bb_cleanup(bb);
603     xfree(bb);
604 }
605 
606 Char **
607 bb_finish(struct blk_buf *bb)
608 {
609     bb_store(bb, NULL);
610     return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
611 }
612 
613 #define DO_STRBUF(STRBUF, CHAR, STRLEN)				\
614 								\
615 struct STRBUF *							\
616 STRBUF##_alloc(void)						\
617 {								\
618     return xcalloc(1, sizeof(struct STRBUF));			\
619 }								\
620 								\
621 static void							\
622 STRBUF##_store1(struct STRBUF *buf, CHAR c)			\
623 {								\
624     if (buf->size == buf->len) {				\
625 	if (buf->size == 0)					\
626 	    buf->size = 64; /* Arbitrary */			\
627 	else							\
628 	    buf->size *= 2;					\
629 	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
630     }								\
631     assert(buf->s);						\
632     buf->s[buf->len] = c;					\
633 }								\
634 								\
635 /* Like strbuf_append1(buf, '\0'), but don't advance len */	\
636 void								\
637 STRBUF##_terminate(struct STRBUF *buf)				\
638 {								\
639     STRBUF##_store1(buf, '\0');					\
640 }								\
641 								\
642 void								\
643 STRBUF##_append1(struct STRBUF *buf, CHAR c)			\
644 {								\
645     STRBUF##_store1(buf, c);					\
646     buf->len++;							\
647 }								\
648 								\
649 void								\
650 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len)	\
651 {								\
652     if (buf->size < buf->len + len) {				\
653 	if (buf->size == 0)					\
654 	    buf->size = 64; /* Arbitrary */			\
655 	while (buf->size < buf->len + len)			\
656 	    buf->size *= 2;					\
657 	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
658     }								\
659     memcpy(buf->s + buf->len, s, len * sizeof(*buf->s));	\
660     buf->len += len;						\
661 }								\
662 								\
663 void								\
664 STRBUF##_append(struct STRBUF *buf, const CHAR *s)		\
665 {								\
666     STRBUF##_appendn(buf, s, STRLEN(s));			\
667 }								\
668 								\
669 CHAR *								\
670 STRBUF##_finish(struct STRBUF *buf)				\
671 {								\
672     STRBUF##_append1(buf, 0);					\
673     return xrealloc(buf->s, buf->len * sizeof(*buf->s));	\
674 }								\
675 								\
676 void								\
677 STRBUF##_cleanup(void *xbuf)					\
678 {								\
679     struct STRBUF *buf;						\
680 								\
681     buf = xbuf;							\
682     xfree(buf->s);						\
683 }								\
684 								\
685 void								\
686 STRBUF##_free(void *xbuf)					\
687 {								\
688     STRBUF##_cleanup(xbuf);					\
689     xfree(xbuf);						\
690 }								\
691 								\
692 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
693 
694 DO_STRBUF(strbuf, char, strlen);
695 DO_STRBUF(Strbuf, Char, Strlen);
696