1 /*
2 * Copyright (c) 1989, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
35 * Copyright 2017 Nexenta Systems, Inc.
36 */
37
38 /*
39 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
40 * Compares a filename or pathname to a pattern.
41 */
42
43 /*
44 * Some notes on multibyte character support:
45 * 1. Patterns with illegal byte sequences match nothing.
46 * 2. Illegal byte sequences in the "string" argument are handled by treating
47 * them as single-byte characters with a value of the first byte of the
48 * sequence cast to wchar_t.
49 * 3. Multibyte conversion state objects (mbstate_t) are passed around and
50 * used for most, but not all, conversions. Further work will be required
51 * to support state-dependent encodings.
52 */
53
54 #include "lint.h"
55 #include <fnmatch.h>
56 #include <limits.h>
57 #include <string.h>
58 #include <wchar.h>
59 #include <xlocale.h>
60 #include <wctype.h>
61 #include "localeimpl.h"
62 #include "collate.h"
63
64 #define EOS '\0'
65
66 #define RANGE_MATCH 1
67 #define RANGE_NOMATCH 0
68 #define RANGE_ERROR (-1)
69
70 static int rangematch(const char *, wchar_t, int, char **, mbstate_t *,
71 locale_t);
72 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
73 mbstate_t, locale_t);
74
75 int
fnmatch(const char * pattern,const char * string,int flags)76 fnmatch(const char *pattern, const char *string, int flags)
77 {
78 locale_t loc = uselocale(NULL);
79 static const mbstate_t initial = { 0 };
80
81 return (fnmatch1(pattern, string, string, flags, initial, initial,
82 loc));
83 }
84
85 static int
fnmatch1(const char * pattern,const char * string,const char * stringstart,int flags,mbstate_t patmbs,mbstate_t strmbs,locale_t loc)86 fnmatch1(const char *pattern, const char *string, const char *stringstart,
87 int flags, mbstate_t patmbs, mbstate_t strmbs, locale_t loc)
88 {
89 const char *bt_pattern, *bt_string;
90 mbstate_t bt_patmbs, bt_strmbs;
91 char *newp;
92 char c;
93 wchar_t pc, sc;
94 size_t pclen, sclen;
95
96 bt_pattern = bt_string = NULL;
97 for (;;) {
98 pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc);
99 if (pclen == (size_t)-1 || pclen == (size_t)-2)
100 return (FNM_NOMATCH);
101 pattern += pclen;
102 sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, &strmbs, loc);
103 if (sclen == (size_t)-1 || sclen == (size_t)-2) {
104 sc = (unsigned char)*string;
105 sclen = 1;
106 (void) memset(&strmbs, 0, sizeof (strmbs));
107 }
108 switch (pc) {
109 case EOS:
110 if ((flags & FNM_LEADING_DIR) && sc == '/')
111 return (0);
112 if (sc == EOS)
113 return (0);
114 goto backtrack;
115 case '?':
116 if (sc == EOS)
117 return (FNM_NOMATCH);
118 if (sc == '/' && (flags & FNM_PATHNAME))
119 goto backtrack;
120 if (sc == '.' && (flags & FNM_PERIOD) &&
121 (string == stringstart ||
122 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
123 goto backtrack;
124 string += sclen;
125 break;
126 case '*':
127 c = *pattern;
128 /* Collapse multiple stars. */
129 while (c == '*')
130 c = *++pattern;
131
132 if (sc == '.' && (flags & FNM_PERIOD) &&
133 (string == stringstart ||
134 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
135 goto backtrack;
136
137 /* Optimize for pattern with * at end or before /. */
138 if (c == EOS)
139 if (flags & FNM_PATHNAME)
140 return ((flags & FNM_LEADING_DIR) ||
141 strchr(string, '/') == NULL ?
142 0 : FNM_NOMATCH);
143 else
144 return (0);
145 else if (c == '/' && flags & FNM_PATHNAME) {
146 if ((string = strchr(string, '/')) == NULL)
147 return (FNM_NOMATCH);
148 break;
149 }
150
151 /*
152 * First try the shortest match for the '*' that
153 * could work. We can forget any earlier '*' since
154 * there is no way having it match more characters
155 * can help us, given that we are already here.
156 */
157 bt_pattern = pattern, bt_patmbs = patmbs;
158 bt_string = string, bt_strmbs = strmbs;
159 break;
160 case '[':
161 if (sc == EOS)
162 return (FNM_NOMATCH);
163 if (sc == '/' && (flags & FNM_PATHNAME))
164 goto backtrack;
165 if (sc == '.' && (flags & FNM_PERIOD) &&
166 (string == stringstart ||
167 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
168 goto backtrack;
169
170 switch (rangematch(pattern, sc, flags, &newp,
171 &patmbs, loc)) {
172 case RANGE_ERROR:
173 goto norm;
174 case RANGE_MATCH:
175 pattern = newp;
176 break;
177 case RANGE_NOMATCH:
178 goto backtrack;
179 }
180 string += sclen;
181 break;
182 case '\\':
183 if (!(flags & FNM_NOESCAPE)) {
184 pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX,
185 &patmbs, loc);
186 if (pclen == 0 || pclen == (size_t)-1 ||
187 pclen == (size_t)-2)
188 return (FNM_NOMATCH);
189 pattern += pclen;
190 }
191 /* FALLTHROUGH */
192 default:
193 norm:
194 string += sclen;
195 if (pc == sc)
196 break;
197 else if ((flags & FNM_CASEFOLD) &&
198 (towlower_l(pc, loc) == towlower_l(sc, loc)))
199 break;
200 else {
201 backtrack:
202 /*
203 * If we have a mismatch (other than hitting
204 * the end of the string), go back to the last
205 * '*' seen and have it match one additional
206 * character.
207 */
208 if (bt_pattern == NULL)
209 return (FNM_NOMATCH);
210 sclen = mbrtowc_l(&sc, bt_string, MB_LEN_MAX,
211 &bt_strmbs, loc);
212 if (sclen == (size_t)-1 ||
213 sclen == (size_t)-2) {
214 sc = (unsigned char)*bt_string;
215 sclen = 1;
216 (void) memset(&bt_strmbs, 0,
217 sizeof (bt_strmbs));
218 }
219 if (sc == EOS)
220 return (FNM_NOMATCH);
221 if (sc == '/' && flags & FNM_PATHNAME)
222 return (FNM_NOMATCH);
223 bt_string += sclen;
224 pattern = bt_pattern, patmbs = bt_patmbs;
225 string = bt_string, strmbs = bt_strmbs;
226 }
227 break;
228 }
229 }
230 /* NOTREACHED */
231 }
232
233 static int
rangematch(const char * pattern,wchar_t test,int flags,char ** newp,mbstate_t * patmbs,locale_t loc)234 rangematch(const char *pattern, wchar_t test, int flags, char **newp,
235 mbstate_t *patmbs, locale_t loc)
236 {
237 int negate, ok;
238 wchar_t c, c2;
239 size_t pclen;
240 const char *origpat;
241
242 /*
243 * A bracket expression starting with an unquoted circumflex
244 * character produces unspecified results (IEEE 1003.2-1992,
245 * 3.13.2). This implementation treats it like '!', for
246 * consistency with the regular expression syntax.
247 * J.T. Conklin (conklin@ngai.kaleida.com)
248 */
249 if ((negate = (*pattern == '!' || *pattern == '^')) != 0)
250 ++pattern;
251
252 if (flags & FNM_CASEFOLD)
253 test = towlower_l(test, loc);
254
255 /*
256 * A right bracket shall lose its special meaning and represent
257 * itself in a bracket expression if it occurs first in the list.
258 * -- POSIX.2 2.8.3.2
259 */
260 ok = 0;
261 origpat = pattern;
262 for (;;) {
263 if (*pattern == ']' && pattern > origpat) {
264 pattern++;
265 break;
266 } else if (*pattern == '\0') {
267 return (RANGE_ERROR);
268 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
269 return (RANGE_NOMATCH);
270 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
271 pattern++;
272 pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc);
273 if (pclen == (size_t)-1 || pclen == (size_t)-2)
274 return (RANGE_NOMATCH);
275 pattern += pclen;
276
277 if (flags & FNM_CASEFOLD)
278 c = towlower_l(c, loc);
279
280 if (*pattern == '-' && *(pattern + 1) != EOS &&
281 *(pattern + 1) != ']') {
282 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
283 if (*pattern != EOS)
284 pattern++;
285 pclen = mbrtowc_l(&c2, pattern, MB_LEN_MAX, patmbs,
286 loc);
287 if (pclen == (size_t)-1 || pclen == (size_t)-2)
288 return (RANGE_NOMATCH);
289 pattern += pclen;
290 if (c2 == EOS)
291 return (RANGE_ERROR);
292
293 if (flags & FNM_CASEFOLD)
294 c2 = towlower_l(c2, loc);
295
296 if (loc->collate->lc_is_posix ?
297 c <= test && test <= c2 :
298 _collate_range_cmp(c, test, loc) <= 0 &&
299 _collate_range_cmp(test, c2, loc) <= 0)
300 ok = 1;
301 } else if (c == test)
302 ok = 1;
303 }
304
305 *newp = (char *)pattern;
306 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
307 }
308