/* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $ */ /* * tc.str.c: Short string package * This has been a lesson of how to write buggy code! */ /*- * Copyright (c) 1980, 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "sh.h" #include <assert.h> #include <limits.h> RCSID("$tcsh: tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $") #define MALLOC_INCR 128 #ifdef WIDE_STRINGS #define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */ #else #define MALLOC_SURPLUS 0 #endif #ifdef WIDE_STRINGS size_t one_mbtowc(Char *pwc, const char *s, size_t n) { int len; len = rt_mbtowc(pwc, s, n); if (len == -1) { reset_mbtowc(); *pwc = (unsigned char)*s | INVALID_BYTE; } if (len <= 0) len = 1; return len; } size_t one_wctomb(char *s, Char wchar) { int len; #if INVALID_BYTE != 0 if ((wchar & INVALID_BYTE) == INVALID_BYTE) { /* wchar >= INVALID_BYTE */ /* invalid char * exmaple) * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */ *s = (char)wchar; len = 1; #else if (wchar & (CHAR & INVALID_BYTE)) { s[0] = wchar & (CHAR & 0xFF); len = 1; #endif } else { #if INVALID_BYTE != 0 wchar &= MAX_UTF32; #else wchar &= CHAR; #endif #ifdef UTF16_STRINGS if (wchar >= 0x10000) { /* UTF-16 systems can't handle these values directly in calls to wctomb. Convert value to UTF-16 surrogate and call wcstombs to convert the "string" to the correct multibyte representation, if any. */ wchar_t ws[3]; wchar -= 0x10000; ws[0] = 0xd800 | (wchar >> 10); ws[1] = 0xdc00 | (wchar & 0x3ff); ws[2] = 0; /* The return value of wcstombs excludes the trailing 0, so len is the correct number of multibytes for the Unicode char. */ len = wcstombs (s, ws, MB_CUR_MAX + 1); } else #endif len = wctomb(s, (wchar_t) wchar); if (len == -1) s[0] = wchar; if (len <= 0) len = 1; } return len; } int rt_mbtowc(Char *pwc, const char *s, size_t n) { int ret; char back[MB_LEN_MAX]; wchar_t tmp; #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) # if defined(AUTOSET_KANJI) static mbstate_t mb_zero, mb; /* * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII! */ if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') && !memcmp(&mb, &mb_zero, sizeof(mb))) { *pwc = *s; return 1; } # else mbstate_t mb; # endif memset (&mb, 0, sizeof mb); ret = mbrtowc(&tmp, s, n, &mb); #else ret = mbtowc(&tmp, s, n); #endif if (ret > 0) { *pwc = tmp; #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) if (tmp >= 0xd800 && tmp <= 0xdbff) { /* UTF-16 surrogate pair. Fetch second half and compute UTF-32 value. Dispense with the inverse test in this case. */ size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb); if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) ret = -1; else { *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000; ret += n2; } } else #endif if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0) ret = -1; } else if (ret == -2) ret = -1; else if (ret == 0) *pwc = '\0'; return ret; } #endif #ifdef SHORT_STRINGS Char ** blk2short(char **src) { size_t n; Char **sdst, **dst; /* * Count */ for (n = 0; src[n] != NULL; n++) continue; sdst = dst = xmalloc((n + 1) * sizeof(Char *)); for (; *src != NULL; src++) *dst++ = SAVE(*src); *dst = NULL; return (sdst); } char ** short2blk(Char **src) { size_t n; char **sdst, **dst; /* * Count */ for (n = 0; src[n] != NULL; n++) continue; sdst = dst = xmalloc((n + 1) * sizeof(char *)); for (; *src != NULL; src++) *dst++ = strsave(short2str(*src)); *dst = NULL; return (sdst); } Char * str2short(const char *src) { static struct Strbuf buf; /* = Strbuf_INIT; */ if (src == NULL) return (NULL); buf.len = 0; while (*src) { Char wc; src += one_mbtowc(&wc, src, MB_LEN_MAX); Strbuf_append1(&buf, wc); } Strbuf_terminate(&buf); return buf.s; } char * short2str(const Char *src) { static char *sdst = NULL; static size_t dstsize = 0; char *dst, *edst; if (src == NULL) return (NULL); if (sdst == NULL) { dstsize = MALLOC_INCR; sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); } dst = sdst; edst = &dst[dstsize]; while (*src) { dst += one_wctomb(dst, *src); src++; if (dst >= edst) { char *wdst = dst; char *wedst = edst; dstsize += MALLOC_INCR; sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); edst = &sdst[dstsize]; dst = &edst[-MALLOC_INCR]; while (wdst > wedst) { dst++; wdst--; } } } *dst = 0; return (sdst); } #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS) Char * s_strcpy(Char *dst, const Char *src) { Char *sdst; sdst = dst; while ((*dst++ = *src++) != '\0') continue; return (sdst); } Char * s_strncpy(Char *dst, const Char *src, size_t n) { Char *sdst; if (n == 0) return(dst); sdst = dst; do if ((*dst++ = *src++) == '\0') { while (--n != 0) *dst++ = '\0'; return(sdst); } while (--n != 0); return (sdst); } Char * s_strcat(Char *dst, const Char *src) { Strcpy(Strend(dst), src); return dst; } #ifdef NOTUSED Char * s_strncat(Char *dst, const Char *src, size_t n) { Char *sdst; if (n == 0) return (dst); sdst = dst; while (*dst) dst++; do if ((*dst++ = *src++) == '\0') return(sdst); while (--n != 0) continue; *dst = '\0'; return (sdst); } #endif Char * s_strchr(const Char *str, int ch) { do if (*str == ch) return ((Char *)(intptr_t)str); while (*str++); return (NULL); } Char * s_strrchr(const Char *str, int ch) { const Char *rstr; rstr = NULL; do if (*str == ch) rstr = str; while (*str++); return ((Char *)(intptr_t)rstr); } size_t s_strlen(const Char *str) { size_t n; for (n = 0; *str++; n++) continue; return (n); } int s_strcmp(const Char *str1, const Char *str2) { for (; *str1 && *str1 == *str2; str1++, str2++) continue; /* * The following case analysis is necessary so that characters which look * negative collate low against normal characters but high against the * end-of-string NUL. */ if (*str1 == '\0' && *str2 == '\0') return (0); else if (*str1 == '\0') return (-1); else if (*str2 == '\0') return (1); else return (*str1 - *str2); } int s_strncmp(const Char *str1, const Char *str2, size_t n) { if (n == 0) return (0); do { if (*str1 != *str2) { /* * The following case analysis is necessary so that characters * which look negative collate low against normal characters * but high against the end-of-string NUL. */ if (*str1 == '\0') return (-1); else if (*str2 == '\0') return (1); else return (*str1 - *str2); } if (*str1 == '\0') return(0); str1++, str2++; } while (--n != 0); return(0); } #endif /* not WIDE_STRINGS */ int s_strcasecmp(const Char *str1, const Char *str2) { #ifdef WIDE_STRINGS wint_t l1 = 0, l2 = 0; for (; *str1; str1++, str2++) if (*str1 == *str2) l1 = l2 = 0; else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2))) break; #else unsigned char l1 = 0, l2 = 0; for (; *str1; str1++, str2++) if (*str1 == *str2) l1 = l2 = 0; else if ((l1 = tolower((unsigned char)*str1)) != (l2 = tolower((unsigned char)*str2))) break; #endif /* * The following case analysis is necessary so that characters which look * negative collate low against normal characters but high against the * end-of-string NUL. */ if (*str1 == '\0' && *str2 == '\0') return (0); else if (*str1 == '\0') return (-1); else if (*str2 == '\0') return (1); else if (l1 == l2) /* They are zero when they are equal */ return (*str1 - *str2); else return (l1 - l2); } Char * s_strnsave(const Char *s, size_t len) { Char *n; n = xmalloc((len + 1) * sizeof (*n)); memcpy(n, s, len * sizeof (*n)); n[len] = '\0'; return n; } Char * s_strsave(const Char *s) { Char *n; size_t size; if (s == NULL) s = STRNULL; size = (Strlen(s) + 1) * sizeof(*n); n = xmalloc(size); memcpy(n, s, size); return (n); } Char * s_strspl(const Char *cp, const Char *dp) { Char *res, *ep; const Char *p, *q; if (!cp) cp = STRNULL; if (!dp) dp = STRNULL; for (p = cp; *p++;) continue; for (q = dp; *q++;) continue; res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char)); for (ep = res, q = cp; (*ep++ = *q++) != '\0';) continue; for (ep--, q = dp; (*ep++ = *q++) != '\0';) continue; return (res); } Char * s_strend(const Char *cp) { if (!cp) return ((Char *)(intptr_t) cp); while (*cp) cp++; return ((Char *)(intptr_t) cp); } Char * s_strstr(const Char *s, const Char *t) { do { const Char *ss = s; const Char *tt = t; do if (*tt == '\0') return ((Char *)(intptr_t) s); while (*ss++ == *tt++); } while (*s++ != '\0'); return (NULL); } #else /* !SHORT_STRINGS */ char * caching_strip(const char *s) { static char *buf = NULL; static size_t buf_size = 0; size_t size; if (s == NULL) return NULL; size = strlen(s) + 1; if (buf_size < size) { buf = xrealloc(buf, size); buf_size = size; } memcpy(buf, s, size); strip(buf); return buf; } #endif char * short2qstr(const Char *src) { static char *sdst = NULL; static size_t dstsize = 0; char *dst, *edst; if (src == NULL) return (NULL); if (sdst == NULL) { dstsize = MALLOC_INCR; sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); } dst = sdst; edst = &dst[dstsize]; while (*src) { if (*src & QUOTE) { *dst++ = '\\'; if (dst == edst) { dstsize += MALLOC_INCR; sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); edst = &sdst[dstsize]; dst = &edst[-MALLOC_INCR]; } } dst += one_wctomb(dst, *src); src++; if (dst >= edst) { ptrdiff_t i = dst - edst; dstsize += MALLOC_INCR; sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); edst = &sdst[dstsize]; dst = &edst[-MALLOC_INCR + i]; } } *dst = 0; return (sdst); } struct blk_buf * bb_alloc(void) { return xcalloc(1, sizeof(struct blk_buf)); } static void bb_store(struct blk_buf *bb, Char *str) { if (bb->len == bb->size) { /* Keep space for terminating NULL */ if (bb->size == 0) bb->size = 16; /* Arbitrary */ else bb->size *= 2; bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec)); } bb->vec[bb->len] = str; } void bb_append(struct blk_buf *bb, Char *str) { bb_store(bb, str); bb->len++; } void bb_cleanup(void *xbb) { struct blk_buf *bb; size_t i; bb = (struct blk_buf *)xbb; if (bb->vec) { for (i = 0; i < bb->len; i++) xfree(bb->vec[i]); xfree(bb->vec); } bb->vec = NULL; bb->len = 0; } void bb_free(void *bb) { bb_cleanup(bb); xfree(bb); } Char ** bb_finish(struct blk_buf *bb) { bb_store(bb, NULL); return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec)); } #define DO_STRBUF(STRBUF, CHAR, STRLEN) \ \ struct STRBUF * \ STRBUF##_alloc(void) \ { \ return xcalloc(1, sizeof(struct STRBUF)); \ } \ \ static void \ STRBUF##_store1(struct STRBUF *buf, CHAR c) \ { \ if (buf->size == buf->len) { \ if (buf->size == 0) \ buf->size = 64; /* Arbitrary */ \ else \ buf->size *= 2; \ buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ } \ assert(buf->s); \ buf->s[buf->len] = c; \ } \ \ /* Like strbuf_append1(buf, '\0'), but don't advance len */ \ void \ STRBUF##_terminate(struct STRBUF *buf) \ { \ STRBUF##_store1(buf, '\0'); \ } \ \ void \ STRBUF##_append1(struct STRBUF *buf, CHAR c) \ { \ STRBUF##_store1(buf, c); \ buf->len++; \ } \ \ void \ STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \ { \ if (buf->size < buf->len + len) { \ if (buf->size == 0) \ buf->size = 64; /* Arbitrary */ \ while (buf->size < buf->len + len) \ buf->size *= 2; \ buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ } \ memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \ buf->len += len; \ } \ \ void \ STRBUF##_append(struct STRBUF *buf, const CHAR *s) \ { \ STRBUF##_appendn(buf, s, STRLEN(s)); \ } \ \ CHAR * \ STRBUF##_finish(struct STRBUF *buf) \ { \ STRBUF##_append1(buf, 0); \ return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \ } \ \ void \ STRBUF##_cleanup(void *xbuf) \ { \ struct STRBUF *buf; \ \ buf = xbuf; \ xfree(buf->s); \ } \ \ void \ STRBUF##_free(void *xbuf) \ { \ STRBUF##_cleanup(xbuf); \ xfree(xbuf); \ } \ \ const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */ DO_STRBUF(strbuf, char, strlen); DO_STRBUF(Strbuf, Char, Strlen);