1 /*
2 * Copyright (c) 1989, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
35 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
36 * Use is subject to license terms.
37 */
38
39 /*
40 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
41 * Compares a filename or pathname to a pattern.
42 */
43
44 /*
45 * Some notes on multibyte character support:
46 * 1. Patterns with illegal byte sequences match nothing.
47 * 2. Illegal byte sequences in the "string" argument are handled by treating
48 * them as single-byte characters with a value of the first byte of the
49 * sequence cast to wchar_t.
50 * 3. Multibyte conversion state objects (mbstate_t) are passed around and
51 * used for most, but not all, conversions. Further work will be required
52 * to support state-dependent encodings.
53 */
54
55 #include "lint.h"
56 #include <fnmatch.h>
57 #include <limits.h>
58 #include <string.h>
59 #include <wchar.h>
60 #include <xlocale.h>
61 #include <wctype.h>
62 #include "localeimpl.h"
63 #include "collate.h"
64
65 #define EOS '\0'
66
67 #define RANGE_MATCH 1
68 #define RANGE_NOMATCH 0
69 #define RANGE_ERROR (-1)
70
71 static int rangematch(const char *, wchar_t, int, char **, mbstate_t *,
72 locale_t);
73 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
74 mbstate_t, locale_t);
75
76 int
fnmatch(pattern,string,flags)77 fnmatch(pattern, string, flags)
78 const char *pattern, *string;
79 int flags;
80 {
81 locale_t loc = uselocale(NULL);
82 static const mbstate_t initial = { 0 };
83
84 return (fnmatch1(pattern, string, string, flags, initial, initial,
85 loc));
86 }
87
88 static int
fnmatch1(const char * pattern,const char * string,const char * stringstart,int flags,mbstate_t patmbs,mbstate_t strmbs,locale_t loc)89 fnmatch1(const char *pattern, const char *string, const char *stringstart,
90 int flags, mbstate_t patmbs, mbstate_t strmbs, locale_t loc)
91 {
92 char *newp;
93 char c;
94 wchar_t pc, sc;
95 size_t pclen, sclen;
96
97 for (;;) {
98 pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc);
99 if (pclen == (size_t)-1 || pclen == (size_t)-2)
100 return (FNM_NOMATCH);
101 pattern += pclen;
102 sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, &strmbs, loc);
103 if (sclen == (size_t)-1 || sclen == (size_t)-2) {
104 sc = (unsigned char)*string;
105 sclen = 1;
106 (void) memset(&strmbs, 0, sizeof (strmbs));
107 }
108 switch (pc) {
109 case EOS:
110 /*
111 * Removed FNM_LEADING_DIR, as it is not present
112 * on Solaris.
113 */
114 return (sc == EOS ? 0 : FNM_NOMATCH);
115 case '?':
116 if (sc == EOS)
117 return (FNM_NOMATCH);
118 if (sc == '/' && (flags & FNM_PATHNAME))
119 return (FNM_NOMATCH);
120 if (sc == '.' && (flags & FNM_PERIOD) &&
121 (string == stringstart ||
122 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
123 return (FNM_NOMATCH);
124 string += sclen;
125 break;
126 case '*':
127 c = *pattern;
128 /* Collapse multiple stars. */
129 while (c == '*')
130 c = *++pattern;
131
132 if (sc == '.' && (flags & FNM_PERIOD) &&
133 (string == stringstart ||
134 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
135 return (FNM_NOMATCH);
136
137 /* Optimize for pattern with * at end or before /. */
138 if (c == EOS)
139 if (flags & FNM_PATHNAME)
140 return (strchr(string, '/') == NULL ?
141 0 : FNM_NOMATCH);
142 else
143 return (0);
144 else if (c == '/' && flags & FNM_PATHNAME) {
145 if ((string = strchr(string, '/')) == NULL)
146 return (FNM_NOMATCH);
147 break;
148 }
149
150 /* General case, use recursion. */
151 while (sc != EOS) {
152 if (!fnmatch1(pattern, string, stringstart,
153 flags, patmbs, strmbs, loc))
154 return (0);
155 sclen = mbrtowc_l(&sc, string, MB_LEN_MAX,
156 &strmbs, loc);
157 if (sclen == (size_t)-1 ||
158 sclen == (size_t)-2) {
159 sc = (unsigned char)*string;
160 sclen = 1;
161 (void) memset(&strmbs, 0,
162 sizeof (strmbs));
163 }
164 if (sc == '/' && flags & FNM_PATHNAME)
165 break;
166 string += sclen;
167 }
168 return (FNM_NOMATCH);
169 case '[':
170 if (sc == EOS)
171 return (FNM_NOMATCH);
172 if (sc == '/' && (flags & FNM_PATHNAME))
173 return (FNM_NOMATCH);
174 if (sc == '.' && (flags & FNM_PERIOD) &&
175 (string == stringstart ||
176 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
177 return (FNM_NOMATCH);
178
179 switch (rangematch(pattern, sc, flags, &newp,
180 &patmbs, loc)) {
181 case RANGE_ERROR:
182 goto norm;
183 case RANGE_MATCH:
184 pattern = newp;
185 break;
186 case RANGE_NOMATCH:
187 return (FNM_NOMATCH);
188 }
189 string += sclen;
190 break;
191 case '\\':
192 if (!(flags & FNM_NOESCAPE)) {
193 pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX,
194 &patmbs, loc);
195 if (pclen == (size_t)-1 || pclen == (size_t)-2)
196 return (FNM_NOMATCH);
197 if (pclen == 0)
198 pc = '\\';
199 pattern += pclen;
200 }
201 /* FALLTHROUGH */
202 default:
203 norm:
204 if (pc == sc)
205 string += sclen;
206
207 else if ((flags & FNM_IGNORECASE) &&
208 (towlower_l(pc, loc) == towlower_l(sc, loc)))
209 string += sclen;
210 else
211 return (FNM_NOMATCH);
212
213 break;
214 }
215 }
216 /* NOTREACHED */
217 }
218
219 static int
rangematch(const char * pattern,wchar_t test,int flags,char ** newp,mbstate_t * patmbs,locale_t loc)220 rangematch(const char *pattern, wchar_t test, int flags, char **newp,
221 mbstate_t *patmbs, locale_t loc)
222 {
223 int negate, ok;
224 wchar_t c, c2;
225 size_t pclen;
226 const char *origpat;
227
228 /*
229 * A bracket expression starting with an unquoted circumflex
230 * character produces unspecified results (IEEE 1003.2-1992,
231 * 3.13.2). This implementation treats it like '!', for
232 * consistency with the regular expression syntax.
233 * J.T. Conklin (conklin@ngai.kaleida.com)
234 */
235 if ((negate = (*pattern == '!' || *pattern == '^')) != 0)
236 ++pattern;
237
238 if (flags & FNM_IGNORECASE)
239 test = towlower_l(test, loc);
240
241 /*
242 * A right bracket shall lose its special meaning and represent
243 * itself in a bracket expression if it occurs first in the list.
244 * -- POSIX.2 2.8.3.2
245 */
246 ok = 0;
247 origpat = pattern;
248 for (;;) {
249 if (*pattern == ']' && pattern > origpat) {
250 pattern++;
251 break;
252 } else if (*pattern == '\0') {
253 return (RANGE_ERROR);
254 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
255 return (RANGE_NOMATCH);
256 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
257 pattern++;
258 pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc);
259 if (pclen == (size_t)-1 || pclen == (size_t)-2)
260 return (RANGE_NOMATCH);
261 pattern += pclen;
262
263 if (flags & FNM_IGNORECASE)
264 c = towlower_l(c, loc);
265
266 if (*pattern == '-' && *(pattern + 1) != EOS &&
267 *(pattern + 1) != ']') {
268 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
269 if (*pattern != EOS)
270 pattern++;
271 pclen = mbrtowc_l(&c2, pattern, MB_LEN_MAX, patmbs,
272 loc);
273 if (pclen == (size_t)-1 || pclen == (size_t)-2)
274 return (RANGE_NOMATCH);
275 pattern += pclen;
276 if (c2 == EOS)
277 return (RANGE_ERROR);
278
279 if (flags & FNM_IGNORECASE)
280 c2 = towlower_l(c2, loc);
281
282 if (loc->collate->lc_is_posix ?
283 c <= test && test <= c2 :
284 _collate_range_cmp(c, test, loc) <= 0 &&
285 _collate_range_cmp(test, c2, loc) <= 0)
286 ok = 1;
287 } else if (c == test)
288 ok = 1;
289 }
290
291 *newp = (char *)pattern;
292 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
293 }
294