1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Guido van Rossum. 9 * 10 * Copyright (c) 2011 The FreeBSD Foundation 11 * 12 * Portions of this software were developed by David Chisnall 13 * under sponsorship from the FreeBSD Foundation. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 /* 41 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. 42 * Compares a filename or pathname to a pattern. 43 */ 44 45 /* 46 * Some notes on multibyte character support: 47 * 1. Patterns with illegal byte sequences match nothing. 48 * 2. Illegal byte sequences in the "string" argument are handled by treating 49 * them as single-byte characters with a value of the first byte of the 50 * sequence cast to wchar_t. 51 * 3. Multibyte conversion state objects (mbstate_t) are passed around and 52 * used for most, but not all, conversions. Further work will be required 53 * to support state-dependent encodings. 54 */ 55 56 #include <fnmatch.h> 57 #include <limits.h> 58 #include <string.h> 59 #include <wchar.h> 60 #include <wctype.h> 61 62 #include "collate.h" 63 64 #define EOS '\0' 65 66 #define RANGE_MATCH 1 67 #define RANGE_NOMATCH 0 68 #define RANGE_ERROR (-1) 69 70 static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); 71 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, 72 mbstate_t); 73 74 int 75 fnmatch(const char *pattern, const char *string, int flags) 76 { 77 static const mbstate_t initial; 78 79 return (fnmatch1(pattern, string, string, flags, initial, initial)); 80 } 81 82 static int 83 fnmatch1(const char *pattern, const char *string, const char *stringstart, 84 int flags, mbstate_t patmbs, mbstate_t strmbs) 85 { 86 const char *bt_pattern, *bt_string; 87 mbstate_t bt_patmbs, bt_strmbs; 88 char *newp; 89 char c; 90 wchar_t pc, sc; 91 size_t pclen, sclen; 92 93 bt_pattern = bt_string = NULL; 94 for (;;) { 95 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); 96 if (pclen == (size_t)-1 || pclen == (size_t)-2) 97 return (FNM_NOMATCH); 98 pattern += pclen; 99 sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); 100 if (sclen == (size_t)-1 || sclen == (size_t)-2) { 101 sc = (unsigned char)*string; 102 sclen = 1; 103 memset(&strmbs, 0, sizeof(strmbs)); 104 } 105 switch (pc) { 106 case EOS: 107 if ((flags & FNM_LEADING_DIR) && sc == '/') 108 return (0); 109 if (sc == EOS) 110 return (0); 111 goto backtrack; 112 case '?': 113 if (sc == EOS) 114 return (FNM_NOMATCH); 115 if (sc == '/' && (flags & FNM_PATHNAME)) 116 goto backtrack; 117 if (sc == '.' && (flags & FNM_PERIOD) && 118 (string == stringstart || 119 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 120 goto backtrack; 121 string += sclen; 122 break; 123 case '*': 124 c = *pattern; 125 /* Collapse multiple stars. */ 126 while (c == '*') 127 c = *++pattern; 128 129 if (sc == '.' && (flags & FNM_PERIOD) && 130 (string == stringstart || 131 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 132 goto backtrack; 133 134 /* Optimize for pattern with * at end or before /. */ 135 if (c == EOS) 136 if (flags & FNM_PATHNAME) 137 return ((flags & FNM_LEADING_DIR) || 138 strchr(string, '/') == NULL ? 139 0 : FNM_NOMATCH); 140 else 141 return (0); 142 else if (c == '/' && flags & FNM_PATHNAME) { 143 if ((string = strchr(string, '/')) == NULL) 144 return (FNM_NOMATCH); 145 break; 146 } 147 148 /* 149 * First try the shortest match for the '*' that 150 * could work. We can forget any earlier '*' since 151 * there is no way having it match more characters 152 * can help us, given that we are already here. 153 */ 154 bt_pattern = pattern, bt_patmbs = patmbs; 155 bt_string = string, bt_strmbs = strmbs; 156 break; 157 case '[': 158 if (sc == EOS) 159 return (FNM_NOMATCH); 160 if (sc == '/' && (flags & FNM_PATHNAME)) 161 goto backtrack; 162 if (sc == '.' && (flags & FNM_PERIOD) && 163 (string == stringstart || 164 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 165 goto backtrack; 166 167 switch (rangematch(pattern, sc, flags, &newp, 168 &patmbs)) { 169 case RANGE_ERROR: 170 goto norm; 171 case RANGE_MATCH: 172 pattern = newp; 173 break; 174 case RANGE_NOMATCH: 175 goto backtrack; 176 } 177 string += sclen; 178 break; 179 case '\\': 180 if (!(flags & FNM_NOESCAPE)) { 181 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, 182 &patmbs); 183 if (pclen == 0 || pclen == (size_t)-1 || 184 pclen == (size_t)-2) 185 return (FNM_NOMATCH); 186 pattern += pclen; 187 } 188 /* FALLTHROUGH */ 189 default: 190 norm: 191 string += sclen; 192 if (pc == sc) 193 ; 194 else if ((flags & FNM_CASEFOLD) && 195 (towlower(pc) == towlower(sc))) 196 ; 197 else { 198 backtrack: 199 /* 200 * If we have a mismatch (other than hitting 201 * the end of the string), go back to the last 202 * '*' seen and have it match one additional 203 * character. 204 */ 205 if (bt_pattern == NULL) 206 return (FNM_NOMATCH); 207 sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX, 208 &bt_strmbs); 209 if (sclen == (size_t)-1 || 210 sclen == (size_t)-2) { 211 sc = (unsigned char)*bt_string; 212 sclen = 1; 213 memset(&bt_strmbs, 0, 214 sizeof(bt_strmbs)); 215 } 216 if (sc == EOS) 217 return (FNM_NOMATCH); 218 if (sc == '/' && flags & FNM_PATHNAME) 219 return (FNM_NOMATCH); 220 bt_string += sclen; 221 pattern = bt_pattern, patmbs = bt_patmbs; 222 string = bt_string, strmbs = bt_strmbs; 223 } 224 break; 225 } 226 } 227 /* NOTREACHED */ 228 } 229 230 static int 231 rangematch(const char *pattern, wchar_t test, int flags, char **newp, 232 mbstate_t *patmbs) 233 { 234 int negate, ok; 235 wchar_t c, c2; 236 size_t pclen; 237 const char *origpat; 238 struct xlocale_collate *table = 239 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; 240 241 /* 242 * A bracket expression starting with an unquoted circumflex 243 * character produces unspecified results (IEEE 1003.2-1992, 244 * 3.13.2). This implementation treats it like '!', for 245 * consistency with the regular expression syntax. 246 * J.T. Conklin (conklin@ngai.kaleida.com) 247 */ 248 if ((negate = (*pattern == '!' || *pattern == '^'))) 249 ++pattern; 250 251 if (flags & FNM_CASEFOLD) 252 test = towlower(test); 253 254 /* 255 * A right bracket shall lose its special meaning and represent 256 * itself in a bracket expression if it occurs first in the list. 257 * -- POSIX.2 2.8.3.2 258 */ 259 ok = 0; 260 origpat = pattern; 261 for (;;) { 262 if (*pattern == ']' && pattern > origpat) { 263 pattern++; 264 break; 265 } else if (*pattern == '\0') { 266 return (RANGE_ERROR); 267 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { 268 return (RANGE_NOMATCH); 269 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) 270 pattern++; 271 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); 272 if (pclen == (size_t)-1 || pclen == (size_t)-2) 273 return (RANGE_NOMATCH); 274 pattern += pclen; 275 276 if (flags & FNM_CASEFOLD) 277 c = towlower(c); 278 279 if (*pattern == '-' && *(pattern + 1) != EOS && 280 *(pattern + 1) != ']') { 281 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) 282 if (*pattern != EOS) 283 pattern++; 284 pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs); 285 if (pclen == (size_t)-1 || pclen == (size_t)-2) 286 return (RANGE_NOMATCH); 287 pattern += pclen; 288 if (c2 == EOS) 289 return (RANGE_ERROR); 290 291 if (flags & FNM_CASEFOLD) 292 c2 = towlower(c2); 293 294 if (table->__collate_load_error ? 295 c <= test && test <= c2 : 296 __wcollate_range_cmp(c, test) <= 0 297 && __wcollate_range_cmp(test, c2) <= 0 298 ) 299 ok = 1; 300 } else if (c == test) 301 ok = 1; 302 } 303 304 *newp = (char *)pattern; 305 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 306 } 307