1 /* $NetBSD: str.c,v 1.64 2020/08/30 19:56:02 rillig Exp $ */ 2 3 /*- 4 * Copyright (c) 1988, 1989, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Adam de Boor. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /*- 36 * Copyright (c) 1989 by Berkeley Softworks 37 * All rights reserved. 38 * 39 * This code is derived from software contributed to Berkeley by 40 * Adam de Boor. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed by the University of 53 * California, Berkeley and its contributors. 54 * 4. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 #ifndef MAKE_NATIVE 72 static char rcsid[] = "$NetBSD: str.c,v 1.64 2020/08/30 19:56:02 rillig Exp $"; 73 #else 74 #include <sys/cdefs.h> 75 #ifndef lint 76 #if 0 77 static char sccsid[] = "@(#)str.c 5.8 (Berkeley) 6/1/90"; 78 #else 79 __RCSID("$NetBSD: str.c,v 1.64 2020/08/30 19:56:02 rillig Exp $"); 80 #endif 81 #endif /* not lint */ 82 #endif 83 84 #include "make.h" 85 86 /* Return the concatenation of s1 and s2, freshly allocated. */ 87 char * 88 str_concat2(const char *s1, const char *s2) 89 { 90 size_t len1 = strlen(s1); 91 size_t len2 = strlen(s2); 92 char *result = bmake_malloc(len1 + len2 + 1); 93 memcpy(result, s1, len1); 94 memcpy(result + len1, s2, len2 + 1); 95 return result; 96 } 97 98 /* Return the concatenation of s1, s2 and s3, freshly allocated. */ 99 char * 100 str_concat3(const char *s1, const char *s2, const char *s3) 101 { 102 size_t len1 = strlen(s1); 103 size_t len2 = strlen(s2); 104 size_t len3 = strlen(s3); 105 char *result = bmake_malloc(len1 + len2 + len3 + 1); 106 memcpy(result, s1, len1); 107 memcpy(result + len1, s2, len2); 108 memcpy(result + len1 + len2, s3, len3 + 1); 109 return result; 110 } 111 112 /* Return the concatenation of s1, s2, s3 and s4, freshly allocated. */ 113 char * 114 str_concat4(const char *s1, const char *s2, const char *s3, const char *s4) 115 { 116 size_t len1 = strlen(s1); 117 size_t len2 = strlen(s2); 118 size_t len3 = strlen(s3); 119 size_t len4 = strlen(s4); 120 char *result = bmake_malloc(len1 + len2 + len3 + len4 + 1); 121 memcpy(result, s1, len1); 122 memcpy(result + len1, s2, len2); 123 memcpy(result + len1 + len2, s3, len3); 124 memcpy(result + len1 + len2 + len3, s4, len4 + 1); 125 return result; 126 } 127 128 /* Fracture a string into an array of words (as delineated by tabs or spaces) 129 * taking quotation marks into account. Leading tabs/spaces are ignored. 130 * 131 * If expand is TRUE, quotes are removed and escape sequences such as \r, \t, 132 * etc... are expanded. In this case, the return value is NULL on parse 133 * errors. 134 * 135 * Returns the fractured words, which must be freed later using Words_Free. 136 * If expand was TRUE and there was a parse error, words is NULL, and in that 137 * case, nothing needs to be freed. 138 */ 139 Words 140 Str_Words(const char *str, Boolean expand) 141 { 142 size_t str_len; 143 char *words_buf; 144 size_t words_cap; 145 char **words; 146 size_t words_len; 147 char inquote; 148 char *word_start; 149 char *word_end; 150 const char *str_p; 151 152 /* skip leading space chars. */ 153 for (; *str == ' ' || *str == '\t'; ++str) 154 continue; 155 156 /* words_buf holds the words, separated by '\0'. */ 157 str_len = strlen(str); 158 words_buf = bmake_malloc(strlen(str) + 1); 159 160 words_cap = MAX((str_len / 5), 50); 161 words = bmake_malloc((words_cap + 1) * sizeof(char *)); 162 163 /* 164 * copy the string; at the same time, parse backslashes, 165 * quotes and build the word list. 166 */ 167 words_len = 0; 168 inquote = '\0'; 169 word_start = words_buf; 170 word_end = words_buf; 171 for (str_p = str;; ++str_p) { 172 char ch = *str_p; 173 switch (ch) { 174 case '"': 175 case '\'': 176 if (inquote) { 177 if (inquote == ch) 178 inquote = '\0'; 179 else 180 break; 181 } else { 182 inquote = (char)ch; 183 /* Don't miss "" or '' */ 184 if (word_start == NULL && str_p[1] == inquote) { 185 if (!expand) { 186 word_start = word_end; 187 *word_end++ = ch; 188 } else 189 word_start = word_end + 1; 190 str_p++; 191 inquote = '\0'; 192 break; 193 } 194 } 195 if (!expand) { 196 if (word_start == NULL) 197 word_start = word_end; 198 *word_end++ = ch; 199 } 200 continue; 201 case ' ': 202 case '\t': 203 case '\n': 204 if (inquote) 205 break; 206 if (word_start == NULL) 207 continue; 208 /* FALLTHROUGH */ 209 case '\0': 210 /* 211 * end of a token -- make sure there's enough words 212 * space and save off a pointer. 213 */ 214 if (word_start == NULL) 215 goto done; 216 217 *word_end++ = '\0'; 218 if (words_len == words_cap) { 219 size_t new_size; 220 words_cap *= 2; /* ramp up fast */ 221 new_size = (words_cap + 1) * sizeof(char *); 222 words = bmake_realloc(words, new_size); 223 } 224 words[words_len++] = word_start; 225 word_start = NULL; 226 if (ch == '\n' || ch == '\0') { 227 if (expand && inquote) { 228 free(words); 229 free(words_buf); 230 return (Words){ NULL, 0, NULL }; 231 } 232 goto done; 233 } 234 continue; 235 case '\\': 236 if (!expand) { 237 if (word_start == NULL) 238 word_start = word_end; 239 *word_end++ = '\\'; 240 /* catch '\' at end of line */ 241 if (str_p[1] == '\0') 242 continue; 243 ch = *++str_p; 244 break; 245 } 246 247 switch (ch = *++str_p) { 248 case '\0': 249 case '\n': 250 /* hmmm; fix it up as best we can */ 251 ch = '\\'; 252 --str_p; 253 break; 254 case 'b': 255 ch = '\b'; 256 break; 257 case 'f': 258 ch = '\f'; 259 break; 260 case 'n': 261 ch = '\n'; 262 break; 263 case 'r': 264 ch = '\r'; 265 break; 266 case 't': 267 ch = '\t'; 268 break; 269 } 270 break; 271 } 272 if (word_start == NULL) 273 word_start = word_end; 274 *word_end++ = ch; 275 } 276 done: 277 words[words_len] = NULL; 278 return (Words){ words, words_len, words_buf }; 279 } 280 281 /* 282 * Str_FindSubstring -- See if a string contains a particular substring. 283 * 284 * Input: 285 * string String to search. 286 * substring Substring to find in string. 287 * 288 * Results: If string contains substring, the return value is the location of 289 * the first matching instance of substring in string. If string doesn't 290 * contain substring, the return value is NULL. Matching is done on an exact 291 * character-for-character basis with no wildcards or special characters. 292 * 293 * Side effects: None. 294 */ 295 char * 296 Str_FindSubstring(const char *string, const char *substring) 297 { 298 const char *a, *b; 299 300 /* 301 * First scan quickly through the two strings looking for a single- 302 * character match. When it's found, then compare the rest of the 303 * substring. 304 */ 305 306 for (b = substring; *string != 0; string++) { 307 if (*string != *b) 308 continue; 309 a = string; 310 for (;;) { 311 if (*b == 0) 312 return UNCONST(string); 313 if (*a++ != *b++) 314 break; 315 } 316 b = substring; 317 } 318 return NULL; 319 } 320 321 /* 322 * Str_Match -- Test if a string matches a pattern like "*.[ch]". 323 * 324 * XXX this function does not detect or report malformed patterns. 325 * 326 * Results: 327 * Non-zero is returned if string matches the pattern, 0 otherwise. The 328 * matching operation permits the following special characters in the 329 * pattern: *?\[] (as in fnmatch(3)). 330 * 331 * Side effects: None. 332 */ 333 Boolean 334 Str_Match(const char *str, const char *pat) 335 { 336 for (;;) { 337 /* 338 * See if we're at the end of both the pattern and the 339 * string. If, we succeeded. If we're at the end of the 340 * pattern but not at the end of the string, we failed. 341 */ 342 if (*pat == 0) 343 return *str == 0; 344 if (*str == 0 && *pat != '*') 345 return FALSE; 346 347 /* 348 * A '*' in the pattern matches any substring. We handle this 349 * by calling ourselves for each suffix of the string. 350 */ 351 if (*pat == '*') { 352 pat++; 353 while (*pat == '*') 354 pat++; 355 if (*pat == 0) 356 return TRUE; 357 while (*str != 0) { 358 if (Str_Match(str, pat)) 359 return TRUE; 360 str++; 361 } 362 return FALSE; 363 } 364 365 /* A '?' in the pattern matches any single character. */ 366 if (*pat == '?') 367 goto thisCharOK; 368 369 /* 370 * A '[' in the pattern matches a character from a list. 371 * The '[' is followed by the list of acceptable characters, 372 * or by ranges (two characters separated by '-'). In these 373 * character lists, the backslash is an ordinary character. 374 */ 375 if (*pat == '[') { 376 Boolean neg = pat[1] == '^'; 377 pat += neg ? 2 : 1; 378 379 for (;;) { 380 if (*pat == ']' || *pat == 0) { 381 if (neg) 382 break; 383 return FALSE; 384 } 385 if (*pat == *str) 386 break; 387 if (pat[1] == '-') { 388 if (pat[2] == 0) 389 return neg; 390 if (*pat <= *str && pat[2] >= *str) 391 break; 392 if (*pat >= *str && pat[2] <= *str) 393 break; 394 pat += 2; 395 } 396 pat++; 397 } 398 if (neg && *pat != ']' && *pat != 0) 399 return FALSE; 400 while (*pat != ']' && *pat != 0) 401 pat++; 402 if (*pat == 0) 403 pat--; 404 goto thisCharOK; 405 } 406 407 /* 408 * A backslash in the pattern matches the character following 409 * it exactly. 410 */ 411 if (*pat == '\\') { 412 pat++; 413 if (*pat == 0) 414 return FALSE; 415 } 416 417 if (*pat != *str) 418 return FALSE; 419 420 thisCharOK: 421 pat++; 422 str++; 423 } 424 } 425