xref: /freebsd/lib/libc/gen/fnmatch.c (revision 5bf5ca772c6de2d53344a78cf461447cc322ccea)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Guido van Rossum.
9  *
10  * Copyright (c) 2011 The FreeBSD Foundation
11  * All rights reserved.
12  * Portions of this software were developed by David Chisnall
13  * under sponsorship from the FreeBSD Foundation.
14  *
15  * Redistribution and use in source and binary forms, with or without
16  * modification, are permitted provided that the following conditions
17  * are met:
18  * 1. Redistributions of source code must retain the above copyright
19  *    notice, this list of conditions and the following disclaimer.
20  * 2. Redistributions in binary form must reproduce the above copyright
21  *    notice, this list of conditions and the following disclaimer in the
22  *    documentation and/or other materials provided with the distribution.
23  * 3. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #if defined(LIBC_SCCS) && !defined(lint)
41 static char sccsid[] = "@(#)fnmatch.c	8.2 (Berkeley) 4/16/94";
42 #endif /* LIBC_SCCS and not lint */
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
45 
46 /*
47  * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
48  * Compares a filename or pathname to a pattern.
49  */
50 
51 /*
52  * Some notes on multibyte character support:
53  * 1. Patterns with illegal byte sequences match nothing.
54  * 2. Illegal byte sequences in the "string" argument are handled by treating
55  *    them as single-byte characters with a value of the first byte of the
56  *    sequence cast to wchar_t.
57  * 3. Multibyte conversion state objects (mbstate_t) are passed around and
58  *    used for most, but not all, conversions. Further work will be required
59  *    to support state-dependent encodings.
60  */
61 
62 #include <fnmatch.h>
63 #include <limits.h>
64 #include <string.h>
65 #include <wchar.h>
66 #include <wctype.h>
67 
68 #include "collate.h"
69 
70 #define	EOS	'\0'
71 
72 #define RANGE_MATCH     1
73 #define RANGE_NOMATCH   0
74 #define RANGE_ERROR     (-1)
75 
76 static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
77 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
78 		mbstate_t);
79 
80 int
81 fnmatch(const char *pattern, const char *string, int flags)
82 {
83 	static const mbstate_t initial;
84 
85 	return (fnmatch1(pattern, string, string, flags, initial, initial));
86 }
87 
88 static int
89 fnmatch1(const char *pattern, const char *string, const char *stringstart,
90     int flags, mbstate_t patmbs, mbstate_t strmbs)
91 {
92 	const char *bt_pattern, *bt_string;
93 	mbstate_t bt_patmbs, bt_strmbs;
94 	char *newp;
95 	char c;
96 	wchar_t pc, sc;
97 	size_t pclen, sclen;
98 
99 	bt_pattern = bt_string = NULL;
100 	for (;;) {
101 		pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
102 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
103 			return (FNM_NOMATCH);
104 		pattern += pclen;
105 		sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
106 		if (sclen == (size_t)-1 || sclen == (size_t)-2) {
107 			sc = (unsigned char)*string;
108 			sclen = 1;
109 			memset(&strmbs, 0, sizeof(strmbs));
110 		}
111 		switch (pc) {
112 		case EOS:
113 			if ((flags & FNM_LEADING_DIR) && sc == '/')
114 				return (0);
115 			if (sc == EOS)
116 				return (0);
117 			goto backtrack;
118 		case '?':
119 			if (sc == EOS)
120 				return (FNM_NOMATCH);
121 			if (sc == '/' && (flags & FNM_PATHNAME))
122 				goto backtrack;
123 			if (sc == '.' && (flags & FNM_PERIOD) &&
124 			    (string == stringstart ||
125 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
126 				goto backtrack;
127 			string += sclen;
128 			break;
129 		case '*':
130 			c = *pattern;
131 			/* Collapse multiple stars. */
132 			while (c == '*')
133 				c = *++pattern;
134 
135 			if (sc == '.' && (flags & FNM_PERIOD) &&
136 			    (string == stringstart ||
137 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
138 				goto backtrack;
139 
140 			/* Optimize for pattern with * at end or before /. */
141 			if (c == EOS)
142 				if (flags & FNM_PATHNAME)
143 					return ((flags & FNM_LEADING_DIR) ||
144 					    strchr(string, '/') == NULL ?
145 					    0 : FNM_NOMATCH);
146 				else
147 					return (0);
148 			else if (c == '/' && flags & FNM_PATHNAME) {
149 				if ((string = strchr(string, '/')) == NULL)
150 					return (FNM_NOMATCH);
151 				break;
152 			}
153 
154 			/*
155 			 * First try the shortest match for the '*' that
156 			 * could work. We can forget any earlier '*' since
157 			 * there is no way having it match more characters
158 			 * can help us, given that we are already here.
159 			 */
160 			bt_pattern = pattern, bt_patmbs = patmbs;
161 			bt_string = string, bt_strmbs = strmbs;
162 			break;
163 		case '[':
164 			if (sc == EOS)
165 				return (FNM_NOMATCH);
166 			if (sc == '/' && (flags & FNM_PATHNAME))
167 				goto backtrack;
168 			if (sc == '.' && (flags & FNM_PERIOD) &&
169 			    (string == stringstart ||
170 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
171 				goto backtrack;
172 
173 			switch (rangematch(pattern, sc, flags, &newp,
174 			    &patmbs)) {
175 			case RANGE_ERROR:
176 				goto norm;
177 			case RANGE_MATCH:
178 				pattern = newp;
179 				break;
180 			case RANGE_NOMATCH:
181 				goto backtrack;
182 			}
183 			string += sclen;
184 			break;
185 		case '\\':
186 			if (!(flags & FNM_NOESCAPE)) {
187 				pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
188 				    &patmbs);
189 				if (pclen == 0 || pclen == (size_t)-1 ||
190 				    pclen == (size_t)-2)
191 					return (FNM_NOMATCH);
192 				pattern += pclen;
193 			}
194 			/* FALLTHROUGH */
195 		default:
196 		norm:
197 			string += sclen;
198 			if (pc == sc)
199 				;
200 			else if ((flags & FNM_CASEFOLD) &&
201 				 (towlower(pc) == towlower(sc)))
202 				;
203 			else {
204 		backtrack:
205 				/*
206 				 * If we have a mismatch (other than hitting
207 				 * the end of the string), go back to the last
208 				 * '*' seen and have it match one additional
209 				 * character.
210 				 */
211 				if (bt_pattern == NULL)
212 					return (FNM_NOMATCH);
213 				sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX,
214 				    &bt_strmbs);
215 				if (sclen == (size_t)-1 ||
216 				    sclen == (size_t)-2) {
217 					sc = (unsigned char)*bt_string;
218 					sclen = 1;
219 					memset(&bt_strmbs, 0,
220 					    sizeof(bt_strmbs));
221 				}
222 				if (sc == EOS)
223 					return (FNM_NOMATCH);
224 				if (sc == '/' && flags & FNM_PATHNAME)
225 					return (FNM_NOMATCH);
226 				bt_string += sclen;
227 				pattern = bt_pattern, patmbs = bt_patmbs;
228 				string = bt_string, strmbs = bt_strmbs;
229 			}
230 			break;
231 		}
232 	}
233 	/* NOTREACHED */
234 }
235 
236 static int
237 rangematch(const char *pattern, wchar_t test, int flags, char **newp,
238     mbstate_t *patmbs)
239 {
240 	int negate, ok;
241 	wchar_t c, c2;
242 	size_t pclen;
243 	const char *origpat;
244 	struct xlocale_collate *table =
245 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
246 
247 	/*
248 	 * A bracket expression starting with an unquoted circumflex
249 	 * character produces unspecified results (IEEE 1003.2-1992,
250 	 * 3.13.2).  This implementation treats it like '!', for
251 	 * consistency with the regular expression syntax.
252 	 * J.T. Conklin (conklin@ngai.kaleida.com)
253 	 */
254 	if ((negate = (*pattern == '!' || *pattern == '^')))
255 		++pattern;
256 
257 	if (flags & FNM_CASEFOLD)
258 		test = towlower(test);
259 
260 	/*
261 	 * A right bracket shall lose its special meaning and represent
262 	 * itself in a bracket expression if it occurs first in the list.
263 	 * -- POSIX.2 2.8.3.2
264 	 */
265 	ok = 0;
266 	origpat = pattern;
267 	for (;;) {
268 		if (*pattern == ']' && pattern > origpat) {
269 			pattern++;
270 			break;
271 		} else if (*pattern == '\0') {
272 			return (RANGE_ERROR);
273 		} else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
274 			return (RANGE_NOMATCH);
275 		} else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
276 			pattern++;
277 		pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
278 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
279 			return (RANGE_NOMATCH);
280 		pattern += pclen;
281 
282 		if (flags & FNM_CASEFOLD)
283 			c = towlower(c);
284 
285 		if (*pattern == '-' && *(pattern + 1) != EOS &&
286 		    *(pattern + 1) != ']') {
287 			if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
288 				if (*pattern != EOS)
289 					pattern++;
290 			pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
291 			if (pclen == (size_t)-1 || pclen == (size_t)-2)
292 				return (RANGE_NOMATCH);
293 			pattern += pclen;
294 			if (c2 == EOS)
295 				return (RANGE_ERROR);
296 
297 			if (flags & FNM_CASEFOLD)
298 				c2 = towlower(c2);
299 
300 			if (table->__collate_load_error ?
301 			    c <= test && test <= c2 :
302 			       __wcollate_range_cmp(c, test) <= 0
303 			    && __wcollate_range_cmp(test, c2) <= 0
304 			   )
305 				ok = 1;
306 		} else if (c == test)
307 			ok = 1;
308 	}
309 
310 	*newp = (char *)pattern;
311 	return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
312 }
313