1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Guido van Rossum. 9 * 10 * Copyright (c) 2011 The FreeBSD Foundation 11 * 12 * Portions of this software were developed by David Chisnall 13 * under sponsorship from the FreeBSD Foundation. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 /* 41 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. 42 * Compares a filename or pathname to a pattern. 43 */ 44 45 /* 46 * Some notes on multibyte character support: 47 * 1. Patterns with illegal byte sequences match nothing. 48 * 2. Illegal byte sequences in the "string" argument are handled by treating 49 * them as single-byte characters with a value of the first byte of the 50 * sequence cast to wchar_t. 51 * 3. Multibyte conversion state objects (mbstate_t) are passed around and 52 * used for most, but not all, conversions. Further work will be required 53 * to support state-dependent encodings. 54 */ 55 56 #include <fnmatch.h> 57 #include <limits.h> 58 #include <string.h> 59 #include <wchar.h> 60 #include <wctype.h> 61 62 #include "collate.h" 63 64 #define EOS '\0' 65 66 #define RANGE_MATCH 1 67 #define RANGE_NOMATCH 0 68 #define RANGE_ERROR (-1) 69 70 static int rangematch(const char *, wchar_t, const char *, int, char **, 71 char **, mbstate_t *, mbstate_t *); 72 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, 73 mbstate_t); 74 75 int 76 fnmatch(const char *pattern, const char *string, int flags) 77 { 78 static const mbstate_t initial; 79 80 return (fnmatch1(pattern, string, string, flags, initial, initial)); 81 } 82 83 static int 84 fnmatch1(const char *pattern, const char *string, const char *stringstart, 85 int flags, mbstate_t patmbs, mbstate_t strmbs) 86 { 87 const char *bt_pattern, *bt_string; 88 mbstate_t bt_patmbs, bt_strmbs; 89 char *newp, *news; 90 char c; 91 wchar_t pc, sc; 92 size_t pclen, sclen; 93 94 bt_pattern = bt_string = NULL; 95 for (;;) { 96 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); 97 if (pclen == (size_t)-1 || pclen == (size_t)-2) 98 return (FNM_NOMATCH); 99 pattern += pclen; 100 sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); 101 if (sclen == (size_t)-1 || sclen == (size_t)-2) { 102 sc = (unsigned char)*string; 103 sclen = 1; 104 memset(&strmbs, 0, sizeof(strmbs)); 105 } 106 switch (pc) { 107 case EOS: 108 if ((flags & FNM_LEADING_DIR) && sc == '/') 109 return (0); 110 if (sc == EOS) 111 return (0); 112 goto backtrack; 113 case '?': 114 if (sc == EOS) 115 return (FNM_NOMATCH); 116 if (sc == '/' && (flags & FNM_PATHNAME)) 117 goto backtrack; 118 if (sc == '.' && (flags & FNM_PERIOD) && 119 (string == stringstart || 120 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 121 goto backtrack; 122 string += sclen; 123 break; 124 case '*': 125 c = *pattern; 126 /* Collapse multiple stars. */ 127 while (c == '*') 128 c = *++pattern; 129 130 if (sc == '.' && (flags & FNM_PERIOD) && 131 (string == stringstart || 132 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 133 goto backtrack; 134 135 /* Optimize for pattern with * at end or before /. */ 136 if (c == EOS) 137 if (flags & FNM_PATHNAME) 138 return ((flags & FNM_LEADING_DIR) || 139 strchr(string, '/') == NULL ? 140 0 : FNM_NOMATCH); 141 else 142 return (0); 143 else if (c == '/' && flags & FNM_PATHNAME) { 144 if ((string = strchr(string, '/')) == NULL) 145 return (FNM_NOMATCH); 146 break; 147 } 148 149 /* 150 * First try the shortest match for the '*' that 151 * could work. We can forget any earlier '*' since 152 * there is no way having it match more characters 153 * can help us, given that we are already here. 154 */ 155 bt_pattern = pattern; 156 bt_patmbs = patmbs; 157 bt_string = string; 158 bt_strmbs = strmbs; 159 break; 160 case '[': 161 if (sc == EOS) 162 return (FNM_NOMATCH); 163 if (sc == '/' && (flags & FNM_PATHNAME)) 164 goto backtrack; 165 if (sc == '.' && (flags & FNM_PERIOD) && 166 (string == stringstart || 167 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 168 goto backtrack; 169 170 switch (rangematch(pattern, sc, string + sclen, flags, 171 &newp, &news, &patmbs, &strmbs)) { 172 case RANGE_ERROR: 173 goto norm; 174 case RANGE_MATCH: 175 pattern = newp; 176 string = news; 177 break; 178 case RANGE_NOMATCH: 179 goto backtrack; 180 } 181 break; 182 case '\\': 183 if (!(flags & FNM_NOESCAPE)) { 184 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, 185 &patmbs); 186 if (pclen == 0 || pclen == (size_t)-1 || 187 pclen == (size_t)-2) 188 return (FNM_NOMATCH); 189 pattern += pclen; 190 } 191 /* FALLTHROUGH */ 192 default: 193 norm: 194 string += sclen; 195 if (pc == sc) 196 ; 197 else if ((flags & FNM_CASEFOLD) && 198 (towlower(pc) == towlower(sc))) 199 ; 200 else { 201 backtrack: 202 /* 203 * If we have a mismatch (other than hitting 204 * the end of the string), go back to the last 205 * '*' seen and have it match one additional 206 * character. 207 */ 208 if (bt_pattern == NULL) 209 return (FNM_NOMATCH); 210 sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX, 211 &bt_strmbs); 212 if (sclen == (size_t)-1 || 213 sclen == (size_t)-2) { 214 sc = (unsigned char)*bt_string; 215 sclen = 1; 216 memset(&bt_strmbs, 0, 217 sizeof(bt_strmbs)); 218 } 219 if (sc == EOS) 220 return (FNM_NOMATCH); 221 if (sc == '/' && flags & FNM_PATHNAME) 222 return (FNM_NOMATCH); 223 bt_string += sclen; 224 pattern = bt_pattern; 225 patmbs = bt_patmbs; 226 string = bt_string; 227 strmbs = bt_strmbs; 228 } 229 break; 230 } 231 } 232 /* NOTREACHED */ 233 } 234 235 static int 236 rangematch(const char *pattern, wchar_t test, const char *string, int flags, 237 char **newp, char **news, mbstate_t *patmbs, mbstate_t *strmbs) 238 { 239 int negate, ok; 240 wchar_t c, c2; 241 size_t pclen; 242 const char *origpat; 243 struct xlocale_collate *table = 244 (struct xlocale_collate *)__get_locale()->components[XLC_COLLATE]; 245 wchar_t buf[COLLATE_STR_LEN]; /* STR_LEN defined in collate.h */ 246 const char *cp, *savestring; 247 int special; 248 mbstate_t save; 249 size_t sclen, len; 250 251 /* 252 * A bracket expression starting with an unquoted circumflex 253 * character produces unspecified results (IEEE 1003.2-1992, 254 * 3.13.2). This implementation treats it like '!', for 255 * consistency with the regular expression syntax. 256 * J.T. Conklin (conklin@ngai.kaleida.com) 257 */ 258 if ((negate = (*pattern == '!' || *pattern == '^'))) 259 ++pattern; 260 261 if (flags & FNM_CASEFOLD) 262 test = towlower(test); 263 264 /* 265 * A right bracket shall lose its special meaning and represent 266 * itself in a bracket expression if it occurs first in the list. 267 * -- POSIX.2 2.8.3.2 268 */ 269 ok = 0; 270 origpat = pattern; 271 for (;;) { 272 c = 0; 273 if (*pattern == ']' && pattern > origpat) { 274 break; 275 } else if (*pattern == '\0') { 276 return (RANGE_ERROR); 277 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { 278 return (RANGE_NOMATCH); 279 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) { 280 pattern++; 281 } else if (*pattern == '[' && 282 ((special = *(pattern + 1)) == '.' || 283 special == '=' || special == ':')) { 284 cp = (pattern += 2); 285 while ((cp = strchr(cp, special))) { 286 if (*(cp + 1) == ']') 287 break; 288 cp++; 289 } 290 if (!cp) 291 return (RANGE_ERROR); 292 if (special == '.') { 293 treat_like_collating_symbol: 294 len = __collate_collating_symbol(buf, 295 COLLATE_STR_LEN, pattern, 296 cp - pattern, patmbs); 297 if (len == (size_t)-1 || len == 0) 298 return (RANGE_ERROR); 299 pattern = cp + 2; 300 if (len > 1) { 301 wchar_t *wp, sc; 302 303 /* 304 * No multi-character collation 305 * symbols as start of range. 306 */ 307 if (*(cp + 2) == '-' && 308 *(cp + 3) != EOS && 309 *(cp + 3) != ']') 310 return (RANGE_ERROR); 311 wp = buf; 312 if (test != *wp++) 313 continue; 314 if (len == 1) { 315 ok = 1; 316 break; 317 } 318 memcpy(&save, strmbs, sizeof(save)); 319 savestring = string; 320 while (--len > 0) { 321 sclen = mbrtowc(&sc, string, 322 MB_LEN_MAX, strmbs); 323 if (sclen == (size_t)-1 || 324 sclen == (size_t)-2) { 325 sc = (unsigned char)*string; 326 sclen = 1; 327 memset(&strmbs, 0, 328 sizeof(strmbs)); 329 } 330 if (sc != *wp++) { 331 memcpy(strmbs, &save, 332 sizeof(save)); 333 string = savestring; 334 break; 335 } 336 string += sclen; 337 } 338 if (len == 0) { 339 ok = 1; 340 break; 341 } 342 continue; /* no match */ 343 } 344 c = *buf; 345 } else if (special == '=') { 346 int ec; 347 memcpy(&save, patmbs, sizeof(save)); 348 ec = __collate_equiv_class(pattern, 349 cp - pattern, patmbs); 350 if (ec < 0) 351 return (RANGE_ERROR); 352 if (ec == 0) { 353 memcpy(patmbs, &save, sizeof(save)); 354 goto treat_like_collating_symbol; 355 } 356 pattern = cp + 2; 357 /* no equivalence classes as start of range */ 358 if (*(cp + 2) == '-' && *(cp + 3) != EOS && 359 *(cp + 3) != ']') 360 return (RANGE_ERROR); 361 len = __collate_equiv_match(ec, NULL, 0, test, 362 string, strlen(string), strmbs, &sclen); 363 if (len < 0) 364 return (RANGE_ERROR); 365 if (len > 0) { 366 ok = 1; 367 string += sclen; 368 break; 369 } 370 continue; 371 } else { /* special == ':' */ 372 wctype_t charclass; 373 char name[CHARCLASS_NAME_MAX + 1]; 374 /* no character classes as start of range */ 375 if (*(cp + 2) == '-' && *(cp + 3) != EOS && 376 *(cp + 3) != ']') 377 return (RANGE_ERROR); 378 /* assume character class names are ascii */ 379 if (cp - pattern > CHARCLASS_NAME_MAX) 380 return (RANGE_ERROR); 381 strlcpy(name, pattern, cp - pattern + 1); 382 pattern = cp + 2; 383 if ((charclass = wctype(name)) == 0) 384 return (RANGE_ERROR); 385 if (iswctype(test, charclass)) { 386 ok = 1; 387 break; 388 } 389 continue; 390 } 391 } 392 if (!c) { 393 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); 394 if (pclen == (size_t)-1 || pclen == (size_t)-2) 395 return (RANGE_NOMATCH); 396 pattern += pclen; 397 } 398 if (flags & FNM_CASEFOLD) 399 c = towlower(c); 400 401 if (*pattern == '-' && *(pattern + 1) != EOS && 402 *(pattern + 1) != ']') { 403 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) 404 if (*pattern != EOS) 405 pattern++; 406 pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs); 407 if (pclen == (size_t)-1 || pclen == (size_t)-2) 408 return (RANGE_NOMATCH); 409 pattern += pclen; 410 if (c2 == EOS) 411 return (RANGE_ERROR); 412 413 if ((c2 == '[' && (special = *pattern) == '.') || 414 special == '=' || special == ':') { 415 416 /* 417 * No equivalence classes or character 418 * classes as end of range. 419 */ 420 if (special == '=' || special == ':') 421 return (RANGE_ERROR); 422 cp = ++pattern; 423 while ((cp = strchr(cp, special))) { 424 if (*(cp + 1) == ']') 425 break; 426 cp++; 427 } 428 if (!cp) 429 return (RANGE_ERROR); 430 len = __collate_collating_symbol(buf, 431 COLLATE_STR_LEN, pattern, 432 cp - pattern, patmbs); 433 434 /* 435 * No multi-character collation symbols 436 * as end of range. 437 */ 438 if (len != 1) 439 return (RANGE_ERROR); 440 pattern = cp + 2; 441 c2 = *buf; 442 } 443 444 if (flags & FNM_CASEFOLD) 445 c2 = towlower(c2); 446 447 if (table->__collate_load_error ? 448 c <= test && test <= c2 : 449 __wcollate_range_cmp(c, test) <= 0 450 && __wcollate_range_cmp(test, c2) <= 0 451 ) { 452 ok = 1; 453 break; 454 } 455 } else if (c == test) { 456 ok = 1; 457 break; 458 } 459 } 460 461 /* go to end of bracket expression */ 462 special = 0; 463 while (*pattern != ']') { 464 if (*pattern == 0) 465 return (RANGE_ERROR); 466 if (*pattern == special) { 467 if (*++pattern == ']') { 468 special = 0; 469 pattern++; 470 } 471 continue; 472 } 473 if (!special && *pattern == '[') { 474 special = *++pattern; 475 if (special != '.' && special != '=' && special != ':') 476 special = 0; 477 else 478 pattern++; 479 continue; 480 } 481 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); 482 if (pclen == (size_t)-1 || pclen == (size_t)-2) 483 return (RANGE_NOMATCH); 484 pattern += pclen; 485 } 486 487 *newp = (char *)++pattern; 488 *news = (char *)string; 489 490 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 491 } 492