1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Guido van Rossum.
9 *
10 * Copyright (c) 2011 The FreeBSD Foundation
11 *
12 * Portions of this software were developed by David Chisnall
13 * under sponsorship from the FreeBSD Foundation.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 /*
41 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
42 * Compares a filename or pathname to a pattern.
43 */
44
45 /*
46 * Some notes on multibyte character support:
47 * 1. Patterns with illegal byte sequences match nothing.
48 * 2. Illegal byte sequences in the "string" argument are handled by treating
49 * them as single-byte characters with a value of the first byte of the
50 * sequence cast to wchar_t.
51 * 3. Multibyte conversion state objects (mbstate_t) are passed around and
52 * used for most, but not all, conversions. Further work will be required
53 * to support state-dependent encodings.
54 */
55
56 #include <fnmatch.h>
57 #include <limits.h>
58 #include <string.h>
59 #include <wchar.h>
60 #include <wctype.h>
61
62 #include "collate.h"
63
64 #define EOS '\0'
65
66 #define RANGE_MATCH 1
67 #define RANGE_NOMATCH 0
68 #define RANGE_ERROR (-1)
69
70 static int rangematch(const char *, wchar_t, const char *, int, char **,
71 char **, mbstate_t *, mbstate_t *);
72 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
73 mbstate_t);
74
75 int
fnmatch(const char * pattern,const char * string,int flags)76 fnmatch(const char *pattern, const char *string, int flags)
77 {
78 static const mbstate_t initial;
79
80 return (fnmatch1(pattern, string, string, flags, initial, initial));
81 }
82
83 static int
fnmatch1(const char * pattern,const char * string,const char * stringstart,int flags,mbstate_t patmbs,mbstate_t strmbs)84 fnmatch1(const char *pattern, const char *string, const char *stringstart,
85 int flags, mbstate_t patmbs, mbstate_t strmbs)
86 {
87 const char *bt_pattern, *bt_string;
88 mbstate_t bt_patmbs, bt_strmbs;
89 char *newp, *news;
90 char c;
91 wchar_t pc, sc;
92 size_t pclen, sclen;
93
94 bt_pattern = bt_string = NULL;
95 for (;;) {
96 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
97 if (pclen == (size_t)-1 || pclen == (size_t)-2)
98 return (FNM_NOMATCH);
99 pattern += pclen;
100 sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
101 if (sclen == (size_t)-1 || sclen == (size_t)-2) {
102 sc = (unsigned char)*string;
103 sclen = 1;
104 memset(&strmbs, 0, sizeof(strmbs));
105 }
106 switch (pc) {
107 case EOS:
108 if ((flags & FNM_LEADING_DIR) && sc == '/')
109 return (0);
110 if (sc == EOS)
111 return (0);
112 goto backtrack;
113 case '?':
114 if (sc == EOS)
115 return (FNM_NOMATCH);
116 if (sc == '/' && (flags & FNM_PATHNAME))
117 goto backtrack;
118 if (sc == '.' && (flags & FNM_PERIOD) &&
119 (string == stringstart ||
120 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
121 goto backtrack;
122 string += sclen;
123 break;
124 case '*':
125 c = *pattern;
126 /* Collapse multiple stars. */
127 while (c == '*')
128 c = *++pattern;
129
130 if (sc == '.' && (flags & FNM_PERIOD) &&
131 (string == stringstart ||
132 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
133 goto backtrack;
134
135 /* Optimize for pattern with * at end or before /. */
136 if (c == EOS)
137 if (flags & FNM_PATHNAME)
138 return ((flags & FNM_LEADING_DIR) ||
139 strchr(string, '/') == NULL ?
140 0 : FNM_NOMATCH);
141 else
142 return (0);
143 else if (c == '/' && flags & FNM_PATHNAME) {
144 if ((string = strchr(string, '/')) == NULL)
145 return (FNM_NOMATCH);
146 break;
147 }
148
149 /*
150 * First try the shortest match for the '*' that
151 * could work. We can forget any earlier '*' since
152 * there is no way having it match more characters
153 * can help us, given that we are already here.
154 */
155 bt_pattern = pattern;
156 bt_patmbs = patmbs;
157 bt_string = string;
158 bt_strmbs = strmbs;
159 break;
160 case '[':
161 if (sc == EOS)
162 return (FNM_NOMATCH);
163 if (sc == '/' && (flags & FNM_PATHNAME))
164 goto backtrack;
165 if (sc == '.' && (flags & FNM_PERIOD) &&
166 (string == stringstart ||
167 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
168 goto backtrack;
169
170 switch (rangematch(pattern, sc, string + sclen, flags,
171 &newp, &news, &patmbs, &strmbs)) {
172 case RANGE_ERROR:
173 goto norm;
174 case RANGE_MATCH:
175 pattern = newp;
176 string = news;
177 break;
178 case RANGE_NOMATCH:
179 goto backtrack;
180 }
181 break;
182 case '\\':
183 if (!(flags & FNM_NOESCAPE)) {
184 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
185 &patmbs);
186 if (pclen == 0 || pclen == (size_t)-1 ||
187 pclen == (size_t)-2)
188 return (FNM_NOMATCH);
189 pattern += pclen;
190 }
191 /* FALLTHROUGH */
192 default:
193 norm:
194 string += sclen;
195 if (pc == sc)
196 ;
197 else if ((flags & FNM_CASEFOLD) &&
198 (towlower(pc) == towlower(sc)))
199 ;
200 else {
201 backtrack:
202 /*
203 * If we have a mismatch (other than hitting
204 * the end of the string), go back to the last
205 * '*' seen and have it match one additional
206 * character.
207 */
208 if (bt_pattern == NULL)
209 return (FNM_NOMATCH);
210 sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX,
211 &bt_strmbs);
212 if (sclen == (size_t)-1 ||
213 sclen == (size_t)-2) {
214 sc = (unsigned char)*bt_string;
215 sclen = 1;
216 memset(&bt_strmbs, 0,
217 sizeof(bt_strmbs));
218 }
219 if (sc == EOS)
220 return (FNM_NOMATCH);
221 if (sc == '/' && flags & FNM_PATHNAME)
222 return (FNM_NOMATCH);
223 bt_string += sclen;
224 pattern = bt_pattern;
225 patmbs = bt_patmbs;
226 string = bt_string;
227 strmbs = bt_strmbs;
228 }
229 break;
230 }
231 }
232 /* NOTREACHED */
233 }
234
235 static int
rangematch(const char * pattern,wchar_t test,const char * string,int flags,char ** newp,char ** news,mbstate_t * patmbs,mbstate_t * strmbs)236 rangematch(const char *pattern, wchar_t test, const char *string, int flags,
237 char **newp, char **news, mbstate_t *patmbs, mbstate_t *strmbs)
238 {
239 int negate, ok;
240 wchar_t c, c2;
241 size_t pclen;
242 const char *origpat;
243 struct xlocale_collate *table =
244 (struct xlocale_collate *)__get_locale()->components[XLC_COLLATE];
245 wchar_t buf[COLLATE_STR_LEN]; /* STR_LEN defined in collate.h */
246 const char *cp, *savestring;
247 int special;
248 mbstate_t save;
249 size_t sclen, len;
250
251 /*
252 * A bracket expression starting with an unquoted circumflex
253 * character produces unspecified results (IEEE 1003.2-1992,
254 * 3.13.2). This implementation treats it like '!', for
255 * consistency with the regular expression syntax.
256 * J.T. Conklin (conklin@ngai.kaleida.com)
257 */
258 if ((negate = (*pattern == '!' || *pattern == '^')))
259 ++pattern;
260
261 if (flags & FNM_CASEFOLD)
262 test = towlower(test);
263
264 /*
265 * A right bracket shall lose its special meaning and represent
266 * itself in a bracket expression if it occurs first in the list.
267 * -- POSIX.2 2.8.3.2
268 */
269 ok = 0;
270 origpat = pattern;
271 for (;;) {
272 c = 0;
273 if (*pattern == ']' && pattern > origpat) {
274 break;
275 } else if (*pattern == '\0') {
276 return (RANGE_ERROR);
277 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
278 return (RANGE_NOMATCH);
279 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) {
280 pattern++;
281 } else if (*pattern == '[' &&
282 ((special = *(pattern + 1)) == '.' ||
283 special == '=' || special == ':')) {
284 cp = (pattern += 2);
285 while ((cp = strchr(cp, special))) {
286 if (*(cp + 1) == ']')
287 break;
288 cp++;
289 }
290 if (!cp)
291 return (RANGE_ERROR);
292 if (special == '.') {
293 treat_like_collating_symbol:
294 len = __collate_collating_symbol(buf,
295 COLLATE_STR_LEN, pattern,
296 cp - pattern, patmbs);
297 if (len == (size_t)-1 || len == 0)
298 return (RANGE_ERROR);
299 pattern = cp + 2;
300 if (len > 1) {
301 wchar_t *wp, sc;
302
303 /*
304 * No multi-character collation
305 * symbols as start of range.
306 */
307 if (*(cp + 2) == '-' &&
308 *(cp + 3) != EOS &&
309 *(cp + 3) != ']')
310 return (RANGE_ERROR);
311 wp = buf;
312 if (test != *wp++)
313 continue;
314 if (len == 1) {
315 ok = 1;
316 break;
317 }
318 memcpy(&save, strmbs, sizeof(save));
319 savestring = string;
320 while (--len > 0) {
321 sclen = mbrtowc(&sc, string,
322 MB_LEN_MAX, strmbs);
323 if (sclen == (size_t)-1 ||
324 sclen == (size_t)-2) {
325 sc = (unsigned char)*string;
326 sclen = 1;
327 memset(&strmbs, 0,
328 sizeof(strmbs));
329 }
330 if (sc != *wp++) {
331 memcpy(strmbs, &save,
332 sizeof(save));
333 string = savestring;
334 break;
335 }
336 string += sclen;
337 }
338 if (len == 0) {
339 ok = 1;
340 break;
341 }
342 continue; /* no match */
343 }
344 c = *buf;
345 } else if (special == '=') {
346 int ec;
347 memcpy(&save, patmbs, sizeof(save));
348 ec = __collate_equiv_class(pattern,
349 cp - pattern, patmbs);
350 if (ec < 0)
351 return (RANGE_ERROR);
352 if (ec == 0) {
353 memcpy(patmbs, &save, sizeof(save));
354 goto treat_like_collating_symbol;
355 }
356 pattern = cp + 2;
357 /* no equivalence classes as start of range */
358 if (*(cp + 2) == '-' && *(cp + 3) != EOS &&
359 *(cp + 3) != ']')
360 return (RANGE_ERROR);
361 len = __collate_equiv_match(ec, NULL, 0, test,
362 string, strlen(string), strmbs, &sclen);
363 if (len < 0)
364 return (RANGE_ERROR);
365 if (len > 0) {
366 ok = 1;
367 string += sclen;
368 break;
369 }
370 continue;
371 } else { /* special == ':' */
372 wctype_t charclass;
373 char name[CHARCLASS_NAME_MAX + 1];
374 /* no character classes as start of range */
375 if (*(cp + 2) == '-' && *(cp + 3) != EOS &&
376 *(cp + 3) != ']')
377 return (RANGE_ERROR);
378 /* assume character class names are ascii */
379 if (cp - pattern > CHARCLASS_NAME_MAX)
380 return (RANGE_ERROR);
381 strlcpy(name, pattern, cp - pattern + 1);
382 pattern = cp + 2;
383 if ((charclass = wctype(name)) == 0)
384 return (RANGE_ERROR);
385 if (iswctype(test, charclass)) {
386 ok = 1;
387 break;
388 }
389 continue;
390 }
391 }
392 if (!c) {
393 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
394 if (pclen == (size_t)-1 || pclen == (size_t)-2)
395 return (RANGE_NOMATCH);
396 pattern += pclen;
397 }
398 if (flags & FNM_CASEFOLD)
399 c = towlower(c);
400
401 if (*pattern == '-' && *(pattern + 1) != EOS &&
402 *(pattern + 1) != ']') {
403 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
404 if (*pattern != EOS)
405 pattern++;
406 pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
407 if (pclen == (size_t)-1 || pclen == (size_t)-2)
408 return (RANGE_NOMATCH);
409 pattern += pclen;
410 if (c2 == EOS)
411 return (RANGE_ERROR);
412
413 if ((c2 == '[' && (special = *pattern) == '.') ||
414 special == '=' || special == ':') {
415
416 /*
417 * No equivalence classes or character
418 * classes as end of range.
419 */
420 if (special == '=' || special == ':')
421 return (RANGE_ERROR);
422 cp = ++pattern;
423 while ((cp = strchr(cp, special))) {
424 if (*(cp + 1) == ']')
425 break;
426 cp++;
427 }
428 if (!cp)
429 return (RANGE_ERROR);
430 len = __collate_collating_symbol(buf,
431 COLLATE_STR_LEN, pattern,
432 cp - pattern, patmbs);
433
434 /*
435 * No multi-character collation symbols
436 * as end of range.
437 */
438 if (len != 1)
439 return (RANGE_ERROR);
440 pattern = cp + 2;
441 c2 = *buf;
442 }
443
444 if (flags & FNM_CASEFOLD)
445 c2 = towlower(c2);
446
447 if (table->__collate_load_error ?
448 c <= test && test <= c2 :
449 __wcollate_range_cmp(c, test) <= 0
450 && __wcollate_range_cmp(test, c2) <= 0
451 ) {
452 ok = 1;
453 break;
454 }
455 } else if (c == test) {
456 ok = 1;
457 break;
458 }
459 }
460
461 /* go to end of bracket expression */
462 special = 0;
463 while (*pattern != ']') {
464 if (*pattern == 0)
465 return (RANGE_ERROR);
466 if (*pattern == special) {
467 if (*++pattern == ']') {
468 special = 0;
469 pattern++;
470 }
471 continue;
472 }
473 if (!special && *pattern == '[') {
474 special = *++pattern;
475 if (special != '.' && special != '=' && special != ':')
476 special = 0;
477 else
478 pattern++;
479 continue;
480 }
481 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
482 if (pclen == (size_t)-1 || pclen == (size_t)-2)
483 return (RANGE_NOMATCH);
484 pattern += pclen;
485 }
486
487 *newp = (char *)++pattern;
488 *news = (char *)string;
489
490 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
491 }
492