xref: /freebsd/crypto/openssh/openbsd-compat/fnmatch.c (revision 8ddb146abcdf061be9f2c0db7e391697dafad85c)
1 /*	$OpenBSD: fnmatch.c,v 1.22 2020/03/13 03:25:45 djm Exp $	*/
2 
3 /* Copyright (c) 2011, VMware, Inc.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *     * Redistributions of source code must retain the above copyright
9  *       notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above copyright
11  *       notice, this list of conditions and the following disclaimer in the
12  *       documentation and/or other materials provided with the distribution.
13  *     * Neither the name of the VMware, Inc. nor the names of its contributors
14  *       may be used to endorse or promote products derived from this software
15  *       without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2008, 2016 Todd C. Miller <millert@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 /* Authored by William A. Rowe Jr. <wrowe; apache.org, vmware.com>, April 2011
46  *
47  * Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008
48  * as described in;
49  *   http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
50  *
51  * Filename pattern matches defined in section 2.13, "Pattern Matching Notation"
52  * from chapter 2. "Shell Command Language"
53  *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
54  * where; 1. A bracket expression starting with an unquoted <circumflex> '^'
55  * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.'
56  * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading
57  * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce
58  * a valid bracket expression is treated as an ordinary character; 4. a differing
59  * number of consecutive slashes within pattern and string will NOT match;
60  * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character.
61  *
62  * Bracket expansion defined in section 9.3.5, "RE Bracket Expression",
63  * from chapter 9, "Regular Expressions"
64  *   http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
65  * with no support for collating symbols, equivalence class expressions or
66  * character class expressions.  A partial range expression with a leading
67  * hyphen following a valid range expression will match only the ordinary
68  * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters
69  * 'a' through 'm', a <hyphen> '-', or a 'z').
70  *
71  * Supports BSD extensions FNM_LEADING_DIR to match pattern to the end of one
72  * path segment of string, and FNM_CASEFOLD to ignore alpha case.
73  *
74  * NOTE: Only POSIX/C single byte locales are correctly supported at this time.
75  * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results,
76  * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and
77  * nonalpha characters within a range.
78  *
79  * XXX comments below indicate porting required for multi-byte character sets
80  * and non-POSIX locale collation orders; requires mbr* APIs to track shift
81  * state of pattern and string (rewinding pattern and string repeatedly).
82  *
83  * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g.
84  * UTF-8, SHIFT-JIS, etc).  Any implementation allowing '\' as an alternate
85  * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS.
86  */
87 
88 /* OPENBSD ORIGINAL: lib/libc/gen/fnmatch.c */
89 
90 #include "includes.h"
91 #ifndef HAVE_FNMATCH
92 
93 #include <fnmatch.h>
94 #include <string.h>
95 #include <ctype.h>
96 
97 #include "charclass.h"
98 
99 #define	RANGE_MATCH	1
100 #define	RANGE_NOMATCH	0
101 #define	RANGE_ERROR	(-1)
102 
103 static int
104 classmatch(const char *pattern, char test, int foldcase, const char **ep)
105 {
106 	const char * const mismatch = pattern;
107 	const char *colon;
108 	struct cclass *cc;
109 	int rval = RANGE_NOMATCH;
110 	size_t len;
111 
112 	if (pattern[0] != '[' || pattern[1] != ':') {
113 		*ep = mismatch;
114 		return RANGE_ERROR;
115 	}
116 	pattern += 2;
117 
118 	if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
119 		*ep = mismatch;
120 		return RANGE_ERROR;
121 	}
122 	*ep = colon + 2;
123 	len = (size_t)(colon - pattern);
124 
125 	if (foldcase && strncmp(pattern, "upper:]", 7) == 0)
126 		pattern = "lower:]";
127 	for (cc = cclasses; cc->name != NULL; cc++) {
128 		if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
129 			if (cc->isctype((unsigned char)test))
130 				rval = RANGE_MATCH;
131 			break;
132 		}
133 	}
134 	if (cc->name == NULL) {
135 		/* invalid character class, treat as normal text */
136 		*ep = mismatch;
137 		rval = RANGE_ERROR;
138 	}
139 	return rval;
140 }
141 
142 /* Most MBCS/collation/case issues handled here.  Wildcard '*' is not handled.
143  * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over,
144  * however the "\/" sequence is advanced to '/'.
145  *
146  * Both pattern and string are **char to support pointer increment of arbitrary
147  * multibyte characters for the given locale, in a later iteration of this code
148  */
149 static int fnmatch_ch(const char **pattern, const char **string, int flags)
150 {
151 	const char * const mismatch = *pattern;
152 	const int nocase = !!(flags & FNM_CASEFOLD);
153 	const int escape = !(flags & FNM_NOESCAPE);
154 	const int slash = !!(flags & FNM_PATHNAME);
155 	int result = FNM_NOMATCH;
156 	const char *startch;
157 	int negate;
158 
159 	if (**pattern == '[') {
160 		++*pattern;
161 
162 		/* Handle negation, either leading ! or ^ operators */
163 		negate = (**pattern == '!') || (**pattern == '^');
164 		if (negate)
165 			++*pattern;
166 
167 		/* ']' is an ordinary char at the start of the range pattern */
168 		if (**pattern == ']')
169 			goto leadingclosebrace;
170 
171 		while (**pattern) {
172 			if (**pattern == ']') {
173 				++*pattern;
174 				/* XXX: Fix for MBCS character width */
175 				++*string;
176 				return (result ^ negate);
177 			}
178 
179 			if (escape && (**pattern == '\\')) {
180 				++*pattern;
181 
182 				/* Patterns must terminate with ']', not EOS */
183 				if (!**pattern)
184 					break;
185 			}
186 
187 			/* Patterns must terminate with ']' not '/' */
188 			if (slash && (**pattern == '/'))
189 				break;
190 
191 			/* Match character classes. */
192 			switch (classmatch(*pattern, **string, nocase, pattern)) {
193 			case RANGE_MATCH:
194 				result = 0;
195 				continue;
196 			case RANGE_NOMATCH:
197 				/* Valid character class but no match. */
198 				continue;
199 			default:
200 				/* Not a valid character class. */
201 				break;
202 			}
203 			if (!**pattern)
204 				break;
205 
206 leadingclosebrace:
207 			/* Look at only well-formed range patterns;
208 			 * "x-]" is not allowed unless escaped ("x-\]")
209 			 * XXX: Fix for locale/MBCS character width
210 			 */
211 			if (((*pattern)[1] == '-') && ((*pattern)[2] != ']')) {
212 				startch = *pattern;
213 				*pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2;
214 
215 				/*
216 				 * NOT a properly balanced [expr] pattern, EOS
217 				 * terminated or ranges containing a slash in
218 				 * FNM_PATHNAME mode pattern fall out to to the
219 				 * rewind and test '[' literal code path.
220 				 */
221 				if (!**pattern || (slash && (**pattern == '/')))
222 					break;
223 
224 				/* XXX: handle locale/MBCS comparison, advance by MBCS char width */
225 				if ((**string >= *startch) && (**string <= **pattern))
226 					result = 0;
227 				else if (nocase &&
228 				    (isupper((unsigned char)**string) ||
229 				     isupper((unsigned char)*startch) ||
230 				     isupper((unsigned char)**pattern)) &&
231 				    (tolower((unsigned char)**string) >=
232 				     tolower((unsigned char)*startch)) &&
233 				    (tolower((unsigned char)**string) <=
234 				     tolower((unsigned char)**pattern)))
235 					result = 0;
236 
237 				++*pattern;
238 				continue;
239 			}
240 
241 			/* XXX: handle locale/MBCS comparison, advance by MBCS char width */
242 			if ((**string == **pattern))
243 				result = 0;
244 			else if (nocase && (isupper((unsigned char)**string) ||
245 			    isupper((unsigned char)**pattern)) &&
246 			    (tolower((unsigned char)**string) ==
247 			    tolower((unsigned char)**pattern)))
248 				result = 0;
249 
250 			++*pattern;
251 		}
252 		/*
253 		 * NOT a properly balanced [expr] pattern;
254 		 * Rewind and reset result to test '[' literal
255 		 */
256 		*pattern = mismatch;
257 		result = FNM_NOMATCH;
258 	} else if (**pattern == '?') {
259 		/* Optimize '?' match before unescaping **pattern */
260 		if (!**string || (slash && (**string == '/')))
261 			return FNM_NOMATCH;
262 		result = 0;
263 		goto fnmatch_ch_success;
264 	} else if (escape && (**pattern == '\\') && (*pattern)[1]) {
265 		++*pattern;
266 	}
267 
268 	/* XXX: handle locale/MBCS comparison, advance by the MBCS char width */
269 	if (**string == **pattern)
270 		result = 0;
271 	else if (nocase && (isupper((unsigned char)**string) ||
272 	    isupper((unsigned char)**pattern)) &&
273 	    (tolower((unsigned char)**string) ==
274 	    tolower((unsigned char)**pattern)))
275 		result = 0;
276 
277 	/* Refuse to advance over trailing slash or NULs */
278 	if (**string == '\0' || **pattern == '\0' ||
279 	    (slash && ((**string == '/') || (**pattern == '/'))))
280 		return result;
281 
282 fnmatch_ch_success:
283 	++*pattern;
284 	++*string;
285 	return result;
286 }
287 
288 
289 int fnmatch(const char *pattern, const char *string, int flags)
290 {
291 	static const char dummystring[2] = {' ', 0};
292 	const int escape = !(flags & FNM_NOESCAPE);
293 	const int slash = !!(flags & FNM_PATHNAME);
294 	const int leading_dir = !!(flags & FNM_LEADING_DIR);
295 	const char *dummyptr, *matchptr, *strendseg;
296 	int wild;
297 	/* For '*' wild processing only; suppress 'used before initialization'
298 	 * warnings with dummy initialization values;
299 	 */
300 	const char *strstartseg = NULL;
301 	const char *mismatch = NULL;
302 	int matchlen = 0;
303 
304 	if (*pattern == '*')
305 		goto firstsegment;
306 
307 	while (*pattern && *string) {
308 		/*
309 		 * Pre-decode "\/" which has no special significance, and
310 		 * match balanced slashes, starting a new segment pattern.
311 		 */
312 		if (slash && escape && (*pattern == '\\') && (pattern[1] == '/'))
313 			++pattern;
314 		if (slash && (*pattern == '/') && (*string == '/')) {
315 			++pattern;
316 			++string;
317 		}
318 
319 firstsegment:
320 		/*
321 		 * At the beginning of each segment, validate leading period
322 		 * behavior.
323 		 */
324 		if ((flags & FNM_PERIOD) && (*string == '.')) {
325 		    if (*pattern == '.')
326 			    ++pattern;
327 		    else if (escape && (*pattern == '\\') && (pattern[1] == '.'))
328 			    pattern += 2;
329 		    else
330 			    return FNM_NOMATCH;
331 		    ++string;
332 		}
333 
334 		/*
335 		 * Determine the end of string segment.  Presumes '/'
336 		 * character is unique, not composite in any MBCS encoding
337 		 */
338 		if (slash) {
339 			strendseg = strchr(string, '/');
340 			if (!strendseg)
341 				strendseg = strchr(string, '\0');
342 		} else {
343 			strendseg = strchr(string, '\0');
344 		}
345 
346 		/*
347 		 * Allow pattern '*' to be consumed even with no remaining
348 		 * string to match.
349 		 */
350 		while (*pattern) {
351 			if ((string > strendseg) ||
352 			    ((string == strendseg) && (*pattern != '*')))
353 				break;
354 
355 			if (slash && ((*pattern == '/') ||
356 			    (escape && (*pattern == '\\') && (pattern[1] == '/'))))
357 				break;
358 
359 			/*
360 			 * Reduce groups of '*' and '?' to n '?' matches
361 			 * followed by one '*' test for simplicity.
362 			 */
363 			for (wild = 0; (*pattern == '*') || (*pattern == '?'); ++pattern) {
364 				if (*pattern == '*') {
365 					wild = 1;
366 				} else if (string < strendseg) {  /* && (*pattern == '?') */
367 					/* XXX: Advance 1 char for MBCS locale */
368 					++string;
369 				}
370 				else {  /* (string >= strendseg) && (*pattern == '?') */
371 					return FNM_NOMATCH;
372 				}
373 			}
374 
375 			if (wild) {
376 				strstartseg = string;
377 				mismatch = pattern;
378 
379 				/*
380 				 * Count fixed (non '*') char matches remaining
381 				 * in pattern * excluding '/' (or "\/") and '*'.
382 				 */
383 				for (matchptr = pattern, matchlen = 0; 1; ++matchlen) {
384 					if ((*matchptr == '\0') ||
385 					    (slash && ((*matchptr == '/') ||
386 					    (escape && (*matchptr == '\\') &&
387 					    (matchptr[1] == '/'))))) {
388 						/* Compare precisely this many
389 						 * trailing string chars, the
390 						 * resulting match needs no
391 						 * wildcard loop.
392 						 */
393 						/* XXX: Adjust for MBCS */
394 						if (string + matchlen > strendseg)
395 							return FNM_NOMATCH;
396 
397 						string = strendseg - matchlen;
398 						wild = 0;
399 						break;
400 					}
401 
402 					if (*matchptr == '*') {
403 						/*
404 						 * Ensure at least this many
405 						 * trailing string chars remain
406 						 * for the first comparison.
407 						 */
408 						/* XXX: Adjust for MBCS */
409 						if (string + matchlen > strendseg)
410 							return FNM_NOMATCH;
411 
412 						/*
413 						 * Begin first wild comparison
414 						 * at the current position.
415 						 */
416 						break;
417 					}
418 
419 					/*
420 					 * Skip forward in pattern by a single
421 					 * character match Use a dummy
422 					 * fnmatch_ch() test to count one
423 					 * "[range]" escape.
424 					 */
425 					/* XXX: Adjust for MBCS */
426 					if (escape && (*matchptr == '\\') &&
427 					    matchptr[1]) {
428 						matchptr += 2;
429 					} else if (*matchptr == '[') {
430 						dummyptr = dummystring;
431 						fnmatch_ch(&matchptr, &dummyptr,
432 						    flags);
433 					} else {
434 						++matchptr;
435 					}
436 				}
437 			}
438 
439 			/* Incrementally match string against the pattern. */
440 			while (*pattern && (string < strendseg)) {
441 				/* Success; begin a new wild pattern search. */
442 				if (*pattern == '*')
443 					break;
444 
445 				if (slash && ((*string == '/') ||
446 				    (*pattern == '/') || (escape &&
447 				    (*pattern == '\\') && (pattern[1] == '/'))))
448 					break;
449 
450 				/*
451 				 * Compare ch's (the pattern is advanced over
452 				 * "\/" to the '/', but slashes will mismatch,
453 				 * and are not consumed).
454 				 */
455 				if (!fnmatch_ch(&pattern, &string, flags))
456 					continue;
457 
458 				/*
459 				 * Failed to match, loop against next char
460 				 * offset of string segment until not enough
461 				 * string chars remain to match the fixed
462 				 * pattern.
463 				 */
464 				if (wild) {
465 					/* XXX: Advance 1 char for MBCS locale */
466 					string = ++strstartseg;
467 					if (string + matchlen > strendseg)
468 						return FNM_NOMATCH;
469 
470 					pattern = mismatch;
471 					continue;
472 				} else
473 					return FNM_NOMATCH;
474 			}
475 		}
476 
477 		if (*string && !((slash || leading_dir) && (*string == '/')))
478 			return FNM_NOMATCH;
479 
480 		if (*pattern && !(slash && ((*pattern == '/') ||
481 		    (escape && (*pattern == '\\') && (pattern[1] == '/')))))
482 			return FNM_NOMATCH;
483 
484 		if (leading_dir && !*pattern && *string == '/')
485 			return 0;
486 	}
487 
488 	/* Where both pattern and string are at EOS, declare success.  */
489 	if (!*string && !*pattern)
490 		return 0;
491 
492 	/* Pattern didn't match to the end of string. */
493 	return FNM_NOMATCH;
494 }
495 #endif /* HAVE_FNMATCH */
496