1*9f45a3c8SSimon J. Gerraty /* $NetBSD: str.h,v 1.15 2021/12/15 10:57:01 rillig Exp $ */ 2b0c40a00SSimon J. Gerraty 3b0c40a00SSimon J. Gerraty /* 4b0c40a00SSimon J. Gerraty Copyright (c) 2021 Roland Illig <rillig@NetBSD.org> 5b0c40a00SSimon J. Gerraty All rights reserved. 6b0c40a00SSimon J. Gerraty 7b0c40a00SSimon J. Gerraty Redistribution and use in source and binary forms, with or without 8b0c40a00SSimon J. Gerraty modification, are permitted provided that the following conditions 9b0c40a00SSimon J. Gerraty are met: 10b0c40a00SSimon J. Gerraty 11b0c40a00SSimon J. Gerraty 1. Redistributions of source code must retain the above copyright 12b0c40a00SSimon J. Gerraty notice, this list of conditions and the following disclaimer. 13b0c40a00SSimon J. Gerraty 2. Redistributions in binary form must reproduce the above copyright 14b0c40a00SSimon J. Gerraty notice, this list of conditions and the following disclaimer in the 15b0c40a00SSimon J. Gerraty documentation and/or other materials provided with the distribution. 16b0c40a00SSimon J. Gerraty 17b0c40a00SSimon J. Gerraty THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18b0c40a00SSimon J. Gerraty "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19b0c40a00SSimon J. Gerraty TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20b0c40a00SSimon J. Gerraty PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS 21b0c40a00SSimon J. Gerraty BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22b0c40a00SSimon J. Gerraty CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23b0c40a00SSimon J. Gerraty SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24b0c40a00SSimon J. Gerraty INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25b0c40a00SSimon J. Gerraty CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26b0c40a00SSimon J. Gerraty ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27b0c40a00SSimon J. Gerraty POSSIBILITY OF SUCH DAMAGE. 28b0c40a00SSimon J. Gerraty */ 29b0c40a00SSimon J. Gerraty 30b0c40a00SSimon J. Gerraty 31b0c40a00SSimon J. Gerraty /* 32b0c40a00SSimon J. Gerraty * Memory-efficient string handling. 33b0c40a00SSimon J. Gerraty */ 34b0c40a00SSimon J. Gerraty 35b0c40a00SSimon J. Gerraty 36b0c40a00SSimon J. Gerraty /* A read-only string that may need to be freed after use. */ 37b0c40a00SSimon J. Gerraty typedef struct FStr { 38b0c40a00SSimon J. Gerraty const char *str; 39b0c40a00SSimon J. Gerraty void *freeIt; 40b0c40a00SSimon J. Gerraty } FStr; 41b0c40a00SSimon J. Gerraty 42b0c40a00SSimon J. Gerraty /* A read-only range of a character array, NOT null-terminated. */ 43b0c40a00SSimon J. Gerraty typedef struct Substring { 44b0c40a00SSimon J. Gerraty const char *start; 45b0c40a00SSimon J. Gerraty const char *end; 46b0c40a00SSimon J. Gerraty } Substring; 47b0c40a00SSimon J. Gerraty 48b0c40a00SSimon J. Gerraty /* 49b0c40a00SSimon J. Gerraty * Builds a string, only allocating memory if the string is different from the 50b0c40a00SSimon J. Gerraty * expected string. 51b0c40a00SSimon J. Gerraty */ 52b0c40a00SSimon J. Gerraty typedef struct LazyBuf { 53b0c40a00SSimon J. Gerraty char *data; 54b0c40a00SSimon J. Gerraty size_t len; 55b0c40a00SSimon J. Gerraty size_t cap; 56b0c40a00SSimon J. Gerraty const char *expected; 57b0c40a00SSimon J. Gerraty } LazyBuf; 58b0c40a00SSimon J. Gerraty 59b0c40a00SSimon J. Gerraty /* The result of splitting a string into words. */ 60b0c40a00SSimon J. Gerraty typedef struct Words { 61b0c40a00SSimon J. Gerraty char **words; 62b0c40a00SSimon J. Gerraty size_t len; 63b0c40a00SSimon J. Gerraty void *freeIt; 64b0c40a00SSimon J. Gerraty } Words; 65b0c40a00SSimon J. Gerraty 66b0c40a00SSimon J. Gerraty /* The result of splitting a string into words. */ 67b0c40a00SSimon J. Gerraty typedef struct SubstringWords { 68b0c40a00SSimon J. Gerraty Substring *words; 69b0c40a00SSimon J. Gerraty size_t len; 70b0c40a00SSimon J. Gerraty void *freeIt; 71b0c40a00SSimon J. Gerraty } SubstringWords; 72b0c40a00SSimon J. Gerraty 73b0c40a00SSimon J. Gerraty 74b0c40a00SSimon J. Gerraty MAKE_INLINE FStr 75b0c40a00SSimon J. Gerraty FStr_Init(const char *str, void *freeIt) 76b0c40a00SSimon J. Gerraty { 77b0c40a00SSimon J. Gerraty FStr fstr; 78b0c40a00SSimon J. Gerraty fstr.str = str; 79b0c40a00SSimon J. Gerraty fstr.freeIt = freeIt; 80b0c40a00SSimon J. Gerraty return fstr; 81b0c40a00SSimon J. Gerraty } 82b0c40a00SSimon J. Gerraty 83b0c40a00SSimon J. Gerraty /* Return a string that is the sole owner of str. */ 84b0c40a00SSimon J. Gerraty MAKE_INLINE FStr 85b0c40a00SSimon J. Gerraty FStr_InitOwn(char *str) 86b0c40a00SSimon J. Gerraty { 87b0c40a00SSimon J. Gerraty return FStr_Init(str, str); 88b0c40a00SSimon J. Gerraty } 89b0c40a00SSimon J. Gerraty 90b0c40a00SSimon J. Gerraty /* Return a string that refers to the shared str. */ 91b0c40a00SSimon J. Gerraty MAKE_INLINE FStr 92b0c40a00SSimon J. Gerraty FStr_InitRefer(const char *str) 93b0c40a00SSimon J. Gerraty { 94b0c40a00SSimon J. Gerraty return FStr_Init(str, NULL); 95b0c40a00SSimon J. Gerraty } 96b0c40a00SSimon J. Gerraty 97b0c40a00SSimon J. Gerraty MAKE_INLINE void 98b0c40a00SSimon J. Gerraty FStr_Done(FStr *fstr) 99b0c40a00SSimon J. Gerraty { 100b0c40a00SSimon J. Gerraty free(fstr->freeIt); 101b0c40a00SSimon J. Gerraty #ifdef CLEANUP 102b0c40a00SSimon J. Gerraty fstr->str = NULL; 103b0c40a00SSimon J. Gerraty fstr->freeIt = NULL; 104b0c40a00SSimon J. Gerraty #endif 105b0c40a00SSimon J. Gerraty } 106b0c40a00SSimon J. Gerraty 107b0c40a00SSimon J. Gerraty 108b0c40a00SSimon J. Gerraty MAKE_STATIC Substring 109b0c40a00SSimon J. Gerraty Substring_Init(const char *start, const char *end) 110b0c40a00SSimon J. Gerraty { 111b0c40a00SSimon J. Gerraty Substring sub; 112b0c40a00SSimon J. Gerraty 113b0c40a00SSimon J. Gerraty sub.start = start; 114b0c40a00SSimon J. Gerraty sub.end = end; 115b0c40a00SSimon J. Gerraty return sub; 116b0c40a00SSimon J. Gerraty } 117b0c40a00SSimon J. Gerraty 118b0c40a00SSimon J. Gerraty MAKE_INLINE Substring 119b0c40a00SSimon J. Gerraty Substring_InitStr(const char *str) 120b0c40a00SSimon J. Gerraty { 121b0c40a00SSimon J. Gerraty return Substring_Init(str, str + strlen(str)); 122b0c40a00SSimon J. Gerraty } 123b0c40a00SSimon J. Gerraty 124b0c40a00SSimon J. Gerraty MAKE_STATIC size_t 125b0c40a00SSimon J. Gerraty Substring_Length(Substring sub) 126b0c40a00SSimon J. Gerraty { 127b0c40a00SSimon J. Gerraty return (size_t)(sub.end - sub.start); 128b0c40a00SSimon J. Gerraty } 129b0c40a00SSimon J. Gerraty 130b0c40a00SSimon J. Gerraty MAKE_STATIC bool 131b0c40a00SSimon J. Gerraty Substring_IsEmpty(Substring sub) 132b0c40a00SSimon J. Gerraty { 133b0c40a00SSimon J. Gerraty return sub.start == sub.end; 134b0c40a00SSimon J. Gerraty } 135b0c40a00SSimon J. Gerraty 136b0c40a00SSimon J. Gerraty MAKE_INLINE bool 137b0c40a00SSimon J. Gerraty Substring_Equals(Substring sub, const char *str) 138b0c40a00SSimon J. Gerraty { 139b0c40a00SSimon J. Gerraty size_t len = strlen(str); 140b0c40a00SSimon J. Gerraty return Substring_Length(sub) == len && 141b0c40a00SSimon J. Gerraty memcmp(sub.start, str, len) == 0; 142b0c40a00SSimon J. Gerraty } 143b0c40a00SSimon J. Gerraty 14412904384SSimon J. Gerraty MAKE_INLINE bool 14512904384SSimon J. Gerraty Substring_Eq(Substring sub, Substring str) 14612904384SSimon J. Gerraty { 14712904384SSimon J. Gerraty size_t len = Substring_Length(sub); 14812904384SSimon J. Gerraty return len == Substring_Length(str) && 14912904384SSimon J. Gerraty memcmp(sub.start, str.start, len) == 0; 15012904384SSimon J. Gerraty } 15112904384SSimon J. Gerraty 152b0c40a00SSimon J. Gerraty MAKE_STATIC Substring 153b0c40a00SSimon J. Gerraty Substring_Sub(Substring sub, size_t start, size_t end) 154b0c40a00SSimon J. Gerraty { 155b0c40a00SSimon J. Gerraty assert(start <= Substring_Length(sub)); 156b0c40a00SSimon J. Gerraty assert(end <= Substring_Length(sub)); 157b0c40a00SSimon J. Gerraty return Substring_Init(sub.start + start, sub.start + end); 158b0c40a00SSimon J. Gerraty } 159b0c40a00SSimon J. Gerraty 160b0c40a00SSimon J. Gerraty MAKE_STATIC bool 161b0c40a00SSimon J. Gerraty Substring_HasPrefix(Substring sub, Substring prefix) 162b0c40a00SSimon J. Gerraty { 163b0c40a00SSimon J. Gerraty return Substring_Length(sub) >= Substring_Length(prefix) && 164b0c40a00SSimon J. Gerraty memcmp(sub.start, prefix.start, Substring_Length(prefix)) == 0; 165b0c40a00SSimon J. Gerraty } 166b0c40a00SSimon J. Gerraty 167b0c40a00SSimon J. Gerraty MAKE_STATIC bool 168b0c40a00SSimon J. Gerraty Substring_HasSuffix(Substring sub, Substring suffix) 169b0c40a00SSimon J. Gerraty { 170b0c40a00SSimon J. Gerraty size_t suffixLen = Substring_Length(suffix); 171b0c40a00SSimon J. Gerraty return Substring_Length(sub) >= suffixLen && 172b0c40a00SSimon J. Gerraty memcmp(sub.end - suffixLen, suffix.start, suffixLen) == 0; 173b0c40a00SSimon J. Gerraty } 174b0c40a00SSimon J. Gerraty 175b0c40a00SSimon J. Gerraty /* Returns an independent, null-terminated copy of the substring. */ 176b0c40a00SSimon J. Gerraty MAKE_STATIC FStr 177b0c40a00SSimon J. Gerraty Substring_Str(Substring sub) 178b0c40a00SSimon J. Gerraty { 179b0c40a00SSimon J. Gerraty if (Substring_IsEmpty(sub)) 180b0c40a00SSimon J. Gerraty return FStr_InitRefer(""); 181b0c40a00SSimon J. Gerraty return FStr_InitOwn(bmake_strsedup(sub.start, sub.end)); 182b0c40a00SSimon J. Gerraty } 183b0c40a00SSimon J. Gerraty 184b0c40a00SSimon J. Gerraty MAKE_STATIC const char * 185b0c40a00SSimon J. Gerraty Substring_SkipFirst(Substring sub, char ch) 186b0c40a00SSimon J. Gerraty { 187b0c40a00SSimon J. Gerraty const char *p; 188b0c40a00SSimon J. Gerraty 189b0c40a00SSimon J. Gerraty for (p = sub.start; p != sub.end; p++) 190b0c40a00SSimon J. Gerraty if (*p == ch) 191b0c40a00SSimon J. Gerraty return p + 1; 192b0c40a00SSimon J. Gerraty return sub.start; 193b0c40a00SSimon J. Gerraty } 194b0c40a00SSimon J. Gerraty 195b0c40a00SSimon J. Gerraty MAKE_STATIC const char * 196b0c40a00SSimon J. Gerraty Substring_LastIndex(Substring sub, char ch) 197b0c40a00SSimon J. Gerraty { 198b0c40a00SSimon J. Gerraty const char *p; 199b0c40a00SSimon J. Gerraty 200b0c40a00SSimon J. Gerraty for (p = sub.end; p != sub.start; p--) 201b0c40a00SSimon J. Gerraty if (p[-1] == ch) 202b0c40a00SSimon J. Gerraty return p - 1; 203b0c40a00SSimon J. Gerraty return NULL; 204b0c40a00SSimon J. Gerraty } 205b0c40a00SSimon J. Gerraty 206b0c40a00SSimon J. Gerraty MAKE_STATIC Substring 207b0c40a00SSimon J. Gerraty Substring_Dirname(Substring pathname) 208b0c40a00SSimon J. Gerraty { 209b0c40a00SSimon J. Gerraty const char *p; 210b0c40a00SSimon J. Gerraty 211b0c40a00SSimon J. Gerraty for (p = pathname.end; p != pathname.start; p--) 212b0c40a00SSimon J. Gerraty if (p[-1] == '/') 213b0c40a00SSimon J. Gerraty return Substring_Init(pathname.start, p - 1); 214b0c40a00SSimon J. Gerraty return Substring_InitStr("."); 215b0c40a00SSimon J. Gerraty } 216b0c40a00SSimon J. Gerraty 217b0c40a00SSimon J. Gerraty MAKE_STATIC Substring 218b0c40a00SSimon J. Gerraty Substring_Basename(Substring pathname) 219b0c40a00SSimon J. Gerraty { 220b0c40a00SSimon J. Gerraty const char *p; 221b0c40a00SSimon J. Gerraty 222b0c40a00SSimon J. Gerraty for (p = pathname.end; p != pathname.start; p--) 223b0c40a00SSimon J. Gerraty if (p[-1] == '/') 224b0c40a00SSimon J. Gerraty return Substring_Init(p, pathname.end); 225b0c40a00SSimon J. Gerraty return pathname; 226b0c40a00SSimon J. Gerraty } 227b0c40a00SSimon J. Gerraty 228b0c40a00SSimon J. Gerraty 229b0c40a00SSimon J. Gerraty MAKE_STATIC void 230b0c40a00SSimon J. Gerraty LazyBuf_Init(LazyBuf *buf, const char *expected) 231b0c40a00SSimon J. Gerraty { 232b0c40a00SSimon J. Gerraty buf->data = NULL; 233b0c40a00SSimon J. Gerraty buf->len = 0; 234b0c40a00SSimon J. Gerraty buf->cap = 0; 235b0c40a00SSimon J. Gerraty buf->expected = expected; 236b0c40a00SSimon J. Gerraty } 237b0c40a00SSimon J. Gerraty 238b0c40a00SSimon J. Gerraty MAKE_INLINE void 239b0c40a00SSimon J. Gerraty LazyBuf_Done(LazyBuf *buf) 240b0c40a00SSimon J. Gerraty { 24112904384SSimon J. Gerraty free(buf->data); 242b0c40a00SSimon J. Gerraty } 243b0c40a00SSimon J. Gerraty 244b0c40a00SSimon J. Gerraty MAKE_STATIC void 245b0c40a00SSimon J. Gerraty LazyBuf_Add(LazyBuf *buf, char ch) 246b0c40a00SSimon J. Gerraty { 247b0c40a00SSimon J. Gerraty 248b0c40a00SSimon J. Gerraty if (buf->data != NULL) { 249b0c40a00SSimon J. Gerraty if (buf->len == buf->cap) { 250b0c40a00SSimon J. Gerraty buf->cap *= 2; 251b0c40a00SSimon J. Gerraty buf->data = bmake_realloc(buf->data, buf->cap); 252b0c40a00SSimon J. Gerraty } 253b0c40a00SSimon J. Gerraty buf->data[buf->len++] = ch; 254b0c40a00SSimon J. Gerraty 255b0c40a00SSimon J. Gerraty } else if (ch == buf->expected[buf->len]) { 256b0c40a00SSimon J. Gerraty buf->len++; 257b0c40a00SSimon J. Gerraty return; 258b0c40a00SSimon J. Gerraty 259b0c40a00SSimon J. Gerraty } else { 260b0c40a00SSimon J. Gerraty buf->cap = buf->len + 16; 261b0c40a00SSimon J. Gerraty buf->data = bmake_malloc(buf->cap); 262b0c40a00SSimon J. Gerraty memcpy(buf->data, buf->expected, buf->len); 263b0c40a00SSimon J. Gerraty buf->data[buf->len++] = ch; 264b0c40a00SSimon J. Gerraty } 265b0c40a00SSimon J. Gerraty } 266b0c40a00SSimon J. Gerraty 267b0c40a00SSimon J. Gerraty MAKE_STATIC void 268b0c40a00SSimon J. Gerraty LazyBuf_AddStr(LazyBuf *buf, const char *str) 269b0c40a00SSimon J. Gerraty { 270b0c40a00SSimon J. Gerraty const char *p; 271b0c40a00SSimon J. Gerraty 272b0c40a00SSimon J. Gerraty for (p = str; *p != '\0'; p++) 273b0c40a00SSimon J. Gerraty LazyBuf_Add(buf, *p); 274b0c40a00SSimon J. Gerraty } 275b0c40a00SSimon J. Gerraty 276b0c40a00SSimon J. Gerraty MAKE_STATIC void 277b0c40a00SSimon J. Gerraty LazyBuf_AddBytesBetween(LazyBuf *buf, const char *start, const char *end) 278b0c40a00SSimon J. Gerraty { 279b0c40a00SSimon J. Gerraty const char *p; 280b0c40a00SSimon J. Gerraty 281b0c40a00SSimon J. Gerraty for (p = start; p != end; p++) 282b0c40a00SSimon J. Gerraty LazyBuf_Add(buf, *p); 283b0c40a00SSimon J. Gerraty } 284b0c40a00SSimon J. Gerraty 285b0c40a00SSimon J. Gerraty MAKE_INLINE void 286b0c40a00SSimon J. Gerraty LazyBuf_AddSubstring(LazyBuf *buf, Substring sub) 287b0c40a00SSimon J. Gerraty { 288b0c40a00SSimon J. Gerraty LazyBuf_AddBytesBetween(buf, sub.start, sub.end); 289b0c40a00SSimon J. Gerraty } 290b0c40a00SSimon J. Gerraty 291b0c40a00SSimon J. Gerraty MAKE_STATIC Substring 292b0c40a00SSimon J. Gerraty LazyBuf_Get(const LazyBuf *buf) 293b0c40a00SSimon J. Gerraty { 294b0c40a00SSimon J. Gerraty const char *start = buf->data != NULL ? buf->data : buf->expected; 295b0c40a00SSimon J. Gerraty return Substring_Init(start, start + buf->len); 296b0c40a00SSimon J. Gerraty } 297b0c40a00SSimon J. Gerraty 29812904384SSimon J. Gerraty /* 29912904384SSimon J. Gerraty * Returns the content of the buffer as a newly allocated string. 30012904384SSimon J. Gerraty * 30112904384SSimon J. Gerraty * See LazyBuf_Get to avoid unnecessary memory allocations. 30212904384SSimon J. Gerraty */ 303b0c40a00SSimon J. Gerraty MAKE_STATIC FStr 304b0c40a00SSimon J. Gerraty LazyBuf_DoneGet(LazyBuf *buf) 305b0c40a00SSimon J. Gerraty { 306b0c40a00SSimon J. Gerraty if (buf->data != NULL) { 307b0c40a00SSimon J. Gerraty LazyBuf_Add(buf, '\0'); 308b0c40a00SSimon J. Gerraty return FStr_InitOwn(buf->data); 309b0c40a00SSimon J. Gerraty } 310b0c40a00SSimon J. Gerraty return Substring_Str(LazyBuf_Get(buf)); 311b0c40a00SSimon J. Gerraty } 312b0c40a00SSimon J. Gerraty 313b0c40a00SSimon J. Gerraty 314b0c40a00SSimon J. Gerraty Words Str_Words(const char *, bool); 315b0c40a00SSimon J. Gerraty 316b0c40a00SSimon J. Gerraty MAKE_INLINE void 317b0c40a00SSimon J. Gerraty Words_Free(Words w) 318b0c40a00SSimon J. Gerraty { 319b0c40a00SSimon J. Gerraty free(w.words); 320b0c40a00SSimon J. Gerraty free(w.freeIt); 321b0c40a00SSimon J. Gerraty } 322b0c40a00SSimon J. Gerraty 323b0c40a00SSimon J. Gerraty 324b0c40a00SSimon J. Gerraty SubstringWords Substring_Words(const char *, bool); 325b0c40a00SSimon J. Gerraty 326b0c40a00SSimon J. Gerraty MAKE_INLINE void 32712904384SSimon J. Gerraty SubstringWords_Init(SubstringWords *w) 32812904384SSimon J. Gerraty { 32912904384SSimon J. Gerraty w->words = NULL; 33012904384SSimon J. Gerraty w->len = 0; 33112904384SSimon J. Gerraty w->freeIt = NULL; 33212904384SSimon J. Gerraty } 33312904384SSimon J. Gerraty 33412904384SSimon J. Gerraty MAKE_INLINE void 335b0c40a00SSimon J. Gerraty SubstringWords_Free(SubstringWords w) 336b0c40a00SSimon J. Gerraty { 337b0c40a00SSimon J. Gerraty free(w.words); 338b0c40a00SSimon J. Gerraty free(w.freeIt); 339b0c40a00SSimon J. Gerraty } 340b0c40a00SSimon J. Gerraty 341b0c40a00SSimon J. Gerraty 342b0c40a00SSimon J. Gerraty char *str_concat2(const char *, const char *); 343b0c40a00SSimon J. Gerraty char *str_concat3(const char *, const char *, const char *); 344b0c40a00SSimon J. Gerraty 345b0c40a00SSimon J. Gerraty bool Str_Match(const char *, const char *); 346*9f45a3c8SSimon J. Gerraty 347*9f45a3c8SSimon J. Gerraty void Str_Intern_Init(void); 348*9f45a3c8SSimon J. Gerraty void Str_Intern_End(void); 349*9f45a3c8SSimon J. Gerraty const char *Str_Intern(const char *); 350