xref: /freebsd/lib/libc/gen/fnmatch.c (revision fbbd9655e5107c68e4e0146ff22b73d7350475bc)
158f0484fSRodney W. Grimes /*
258f0484fSRodney W. Grimes  * Copyright (c) 1989, 1993, 1994
358f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
458f0484fSRodney W. Grimes  *
558f0484fSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
658f0484fSRodney W. Grimes  * Guido van Rossum.
758f0484fSRodney W. Grimes  *
83c87aa1dSDavid Chisnall  * Copyright (c) 2011 The FreeBSD Foundation
93c87aa1dSDavid Chisnall  * All rights reserved.
103c87aa1dSDavid Chisnall  * Portions of this software were developed by David Chisnall
113c87aa1dSDavid Chisnall  * under sponsorship from the FreeBSD Foundation.
123c87aa1dSDavid Chisnall  *
1358f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
1458f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1558f0484fSRodney W. Grimes  * are met:
1658f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1758f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1858f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1958f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
2058f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
21*fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
2258f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
2358f0484fSRodney W. Grimes  *    without specific prior written permission.
2458f0484fSRodney W. Grimes  *
2558f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2658f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2758f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2858f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2958f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3058f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3158f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3258f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3358f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3458f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3558f0484fSRodney W. Grimes  * SUCH DAMAGE.
3658f0484fSRodney W. Grimes  */
3758f0484fSRodney W. Grimes 
3858f0484fSRodney W. Grimes #if defined(LIBC_SCCS) && !defined(lint)
3958f0484fSRodney W. Grimes static char sccsid[] = "@(#)fnmatch.c	8.2 (Berkeley) 4/16/94";
4058f0484fSRodney W. Grimes #endif /* LIBC_SCCS and not lint */
41b231cb39SDavid E. O'Brien #include <sys/cdefs.h>
42b231cb39SDavid E. O'Brien __FBSDID("$FreeBSD$");
4358f0484fSRodney W. Grimes 
4458f0484fSRodney W. Grimes /*
4558f0484fSRodney W. Grimes  * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
4658f0484fSRodney W. Grimes  * Compares a filename or pathname to a pattern.
4758f0484fSRodney W. Grimes  */
4858f0484fSRodney W. Grimes 
499d88e270STim J. Robbins /*
509d88e270STim J. Robbins  * Some notes on multibyte character support:
519d88e270STim J. Robbins  * 1. Patterns with illegal byte sequences match nothing.
529d88e270STim J. Robbins  * 2. Illegal byte sequences in the "string" argument are handled by treating
539d88e270STim J. Robbins  *    them as single-byte characters with a value of the first byte of the
549d88e270STim J. Robbins  *    sequence cast to wchar_t.
559d88e270STim J. Robbins  * 3. Multibyte conversion state objects (mbstate_t) are passed around and
569d88e270STim J. Robbins  *    used for most, but not all, conversions. Further work will be required
579d88e270STim J. Robbins  *    to support state-dependent encodings.
589d88e270STim J. Robbins  */
599d88e270STim J. Robbins 
6058f0484fSRodney W. Grimes #include <fnmatch.h>
619d88e270STim J. Robbins #include <limits.h>
6258f0484fSRodney W. Grimes #include <string.h>
639d88e270STim J. Robbins #include <wchar.h>
649d88e270STim J. Robbins #include <wctype.h>
6558f0484fSRodney W. Grimes 
661daad8f5SAndrey A. Chernov #include "collate.h"
671daad8f5SAndrey A. Chernov 
6858f0484fSRodney W. Grimes #define	EOS	'\0'
6958f0484fSRodney W. Grimes 
70e2dbbd9eSAndrey A. Chernov #define RANGE_MATCH     1
71e2dbbd9eSAndrey A. Chernov #define RANGE_NOMATCH   0
72e2dbbd9eSAndrey A. Chernov #define RANGE_ERROR     (-1)
73e2dbbd9eSAndrey A. Chernov 
749d88e270STim J. Robbins static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
75139ac6b2SJilles Tjoelker static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
76139ac6b2SJilles Tjoelker 		mbstate_t);
7758f0484fSRodney W. Grimes 
7858f0484fSRodney W. Grimes int
7955b6b759SCraig Rodrigues fnmatch(const char *pattern, const char *string, int flags)
8058f0484fSRodney W. Grimes {
819d88e270STim J. Robbins 	static const mbstate_t initial;
829d88e270STim J. Robbins 
83139ac6b2SJilles Tjoelker 	return (fnmatch1(pattern, string, string, flags, initial, initial));
849d88e270STim J. Robbins }
859d88e270STim J. Robbins 
869d88e270STim J. Robbins static int
8755b6b759SCraig Rodrigues fnmatch1(const char *pattern, const char *string, const char *stringstart,
8855b6b759SCraig Rodrigues     int flags, mbstate_t patmbs, mbstate_t strmbs)
899d88e270STim J. Robbins {
903caeab9dSJilles Tjoelker 	const char *bt_pattern, *bt_string;
913caeab9dSJilles Tjoelker 	mbstate_t bt_patmbs, bt_strmbs;
92e2dbbd9eSAndrey A. Chernov 	char *newp;
939d88e270STim J. Robbins 	char c;
949d88e270STim J. Robbins 	wchar_t pc, sc;
959d88e270STim J. Robbins 	size_t pclen, sclen;
9658f0484fSRodney W. Grimes 
973caeab9dSJilles Tjoelker 	bt_pattern = bt_string = NULL;
98139ac6b2SJilles Tjoelker 	for (;;) {
999d88e270STim J. Robbins 		pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
1009d88e270STim J. Robbins 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
1019d88e270STim J. Robbins 			return (FNM_NOMATCH);
1029d88e270STim J. Robbins 		pattern += pclen;
1039d88e270STim J. Robbins 		sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
1049d88e270STim J. Robbins 		if (sclen == (size_t)-1 || sclen == (size_t)-2) {
1059d88e270STim J. Robbins 			sc = (unsigned char)*string;
1069d88e270STim J. Robbins 			sclen = 1;
1079d88e270STim J. Robbins 			memset(&strmbs, 0, sizeof(strmbs));
1089d88e270STim J. Robbins 		}
1099d88e270STim J. Robbins 		switch (pc) {
11058f0484fSRodney W. Grimes 		case EOS:
1119d88e270STim J. Robbins 			if ((flags & FNM_LEADING_DIR) && sc == '/')
1123deeb59dSAndrey A. Chernov 				return (0);
1133caeab9dSJilles Tjoelker 			if (sc == EOS)
1143caeab9dSJilles Tjoelker 				return (0);
1153caeab9dSJilles Tjoelker 			goto backtrack;
11658f0484fSRodney W. Grimes 		case '?':
1179d88e270STim J. Robbins 			if (sc == EOS)
11858f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1199d88e270STim J. Robbins 			if (sc == '/' && (flags & FNM_PATHNAME))
1203caeab9dSJilles Tjoelker 				goto backtrack;
1219d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
12258f0484fSRodney W. Grimes 			    (string == stringstart ||
12358f0484fSRodney W. Grimes 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
1243caeab9dSJilles Tjoelker 				goto backtrack;
1259d88e270STim J. Robbins 			string += sclen;
12658f0484fSRodney W. Grimes 			break;
12758f0484fSRodney W. Grimes 		case '*':
12858f0484fSRodney W. Grimes 			c = *pattern;
12958f0484fSRodney W. Grimes 			/* Collapse multiple stars. */
13058f0484fSRodney W. Grimes 			while (c == '*')
13158f0484fSRodney W. Grimes 				c = *++pattern;
13258f0484fSRodney W. Grimes 
1339d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
13458f0484fSRodney W. Grimes 			    (string == stringstart ||
13558f0484fSRodney W. Grimes 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
1363caeab9dSJilles Tjoelker 				goto backtrack;
13758f0484fSRodney W. Grimes 
13858f0484fSRodney W. Grimes 			/* Optimize for pattern with * at end or before /. */
13958f0484fSRodney W. Grimes 			if (c == EOS)
14058f0484fSRodney W. Grimes 				if (flags & FNM_PATHNAME)
141298c8e3dSJohn Polstra 					return ((flags & FNM_LEADING_DIR) ||
142298c8e3dSJohn Polstra 					    strchr(string, '/') == NULL ?
14358f0484fSRodney W. Grimes 					    0 : FNM_NOMATCH);
14458f0484fSRodney W. Grimes 				else
14558f0484fSRodney W. Grimes 					return (0);
14658f0484fSRodney W. Grimes 			else if (c == '/' && flags & FNM_PATHNAME) {
14758f0484fSRodney W. Grimes 				if ((string = strchr(string, '/')) == NULL)
14858f0484fSRodney W. Grimes 					return (FNM_NOMATCH);
14958f0484fSRodney W. Grimes 				break;
15058f0484fSRodney W. Grimes 			}
15158f0484fSRodney W. Grimes 
1523caeab9dSJilles Tjoelker 			/*
1533caeab9dSJilles Tjoelker 			 * First try the shortest match for the '*' that
1543caeab9dSJilles Tjoelker 			 * could work. We can forget any earlier '*' since
1553caeab9dSJilles Tjoelker 			 * there is no way having it match more characters
1563caeab9dSJilles Tjoelker 			 * can help us, given that we are already here.
1573caeab9dSJilles Tjoelker 			 */
1583caeab9dSJilles Tjoelker 			bt_pattern = pattern, bt_patmbs = patmbs;
1593caeab9dSJilles Tjoelker 			bt_string = string, bt_strmbs = strmbs;
16058f0484fSRodney W. Grimes 			break;
16158f0484fSRodney W. Grimes 		case '[':
1629d88e270STim J. Robbins 			if (sc == EOS)
16358f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1649d88e270STim J. Robbins 			if (sc == '/' && (flags & FNM_PATHNAME))
1653caeab9dSJilles Tjoelker 				goto backtrack;
1669d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
16705a068e6SAndrey A. Chernov 			    (string == stringstart ||
16805a068e6SAndrey A. Chernov 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
1693caeab9dSJilles Tjoelker 				goto backtrack;
17005a068e6SAndrey A. Chernov 
1719d88e270STim J. Robbins 			switch (rangematch(pattern, sc, flags, &newp,
1729d88e270STim J. Robbins 			    &patmbs)) {
173e2dbbd9eSAndrey A. Chernov 			case RANGE_ERROR:
174e2dbbd9eSAndrey A. Chernov 				goto norm;
175e2dbbd9eSAndrey A. Chernov 			case RANGE_MATCH:
176e2dbbd9eSAndrey A. Chernov 				pattern = newp;
177e2dbbd9eSAndrey A. Chernov 				break;
178e2dbbd9eSAndrey A. Chernov 			case RANGE_NOMATCH:
1793caeab9dSJilles Tjoelker 				goto backtrack;
180e2dbbd9eSAndrey A. Chernov 			}
1819d88e270STim J. Robbins 			string += sclen;
18258f0484fSRodney W. Grimes 			break;
18358f0484fSRodney W. Grimes 		case '\\':
18458f0484fSRodney W. Grimes 			if (!(flags & FNM_NOESCAPE)) {
1859d88e270STim J. Robbins 				pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
1869d88e270STim J. Robbins 				    &patmbs);
1879d88e270STim J. Robbins 				if (pclen == (size_t)-1 || pclen == (size_t)-2)
1889d88e270STim J. Robbins 					return (FNM_NOMATCH);
1899d88e270STim J. Robbins 				pattern += pclen;
19058f0484fSRodney W. Grimes 			}
19158f0484fSRodney W. Grimes 			/* FALLTHROUGH */
19258f0484fSRodney W. Grimes 		default:
193e2dbbd9eSAndrey A. Chernov 		norm:
1943caeab9dSJilles Tjoelker 			string += sclen;
1959d88e270STim J. Robbins 			if (pc == sc)
19695e4966cSWolfram Schneider 				;
1973deeb59dSAndrey A. Chernov 			else if ((flags & FNM_CASEFOLD) &&
1989d88e270STim J. Robbins 				 (towlower(pc) == towlower(sc)))
19995e4966cSWolfram Schneider 				;
2003caeab9dSJilles Tjoelker 			else {
2013caeab9dSJilles Tjoelker 		backtrack:
2023caeab9dSJilles Tjoelker 				/*
2033caeab9dSJilles Tjoelker 				 * If we have a mismatch (other than hitting
2043caeab9dSJilles Tjoelker 				 * the end of the string), go back to the last
2053caeab9dSJilles Tjoelker 				 * '*' seen and have it match one additional
2063caeab9dSJilles Tjoelker 				 * character.
2073caeab9dSJilles Tjoelker 				 */
2083caeab9dSJilles Tjoelker 				if (bt_pattern == NULL)
20958f0484fSRodney W. Grimes 					return (FNM_NOMATCH);
2103caeab9dSJilles Tjoelker 				sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX,
2113caeab9dSJilles Tjoelker 				    &bt_strmbs);
2123caeab9dSJilles Tjoelker 				if (sclen == (size_t)-1 ||
2133caeab9dSJilles Tjoelker 				    sclen == (size_t)-2) {
2143caeab9dSJilles Tjoelker 					sc = (unsigned char)*bt_string;
2153caeab9dSJilles Tjoelker 					sclen = 1;
2163caeab9dSJilles Tjoelker 					memset(&bt_strmbs, 0,
2173caeab9dSJilles Tjoelker 					    sizeof(bt_strmbs));
2183caeab9dSJilles Tjoelker 				}
2193caeab9dSJilles Tjoelker 				if (sc == EOS)
2203caeab9dSJilles Tjoelker 					return (FNM_NOMATCH);
2213caeab9dSJilles Tjoelker 				if (sc == '/' && flags & FNM_PATHNAME)
2223caeab9dSJilles Tjoelker 					return (FNM_NOMATCH);
2233caeab9dSJilles Tjoelker 				bt_string += sclen;
2243caeab9dSJilles Tjoelker 				pattern = bt_pattern, patmbs = bt_patmbs;
2253caeab9dSJilles Tjoelker 				string = bt_string, strmbs = bt_strmbs;
2263caeab9dSJilles Tjoelker 			}
22758f0484fSRodney W. Grimes 			break;
22858f0484fSRodney W. Grimes 		}
2299d88e270STim J. Robbins 	}
23058f0484fSRodney W. Grimes 	/* NOTREACHED */
23158f0484fSRodney W. Grimes }
23258f0484fSRodney W. Grimes 
233e2dbbd9eSAndrey A. Chernov static int
23455b6b759SCraig Rodrigues rangematch(const char *pattern, wchar_t test, int flags, char **newp,
23555b6b759SCraig Rodrigues     mbstate_t *patmbs)
23658f0484fSRodney W. Grimes {
237e728d480SAndrey A. Chernov 	int negate, ok;
2389d88e270STim J. Robbins 	wchar_t c, c2;
2399d88e270STim J. Robbins 	size_t pclen;
2409d88e270STim J. Robbins 	const char *origpat;
2411daad8f5SAndrey A. Chernov 	struct xlocale_collate *table =
2421daad8f5SAndrey A. Chernov 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
24358f0484fSRodney W. Grimes 
24458f0484fSRodney W. Grimes 	/*
24558f0484fSRodney W. Grimes 	 * A bracket expression starting with an unquoted circumflex
24658f0484fSRodney W. Grimes 	 * character produces unspecified results (IEEE 1003.2-1992,
24758f0484fSRodney W. Grimes 	 * 3.13.2).  This implementation treats it like '!', for
24858f0484fSRodney W. Grimes 	 * consistency with the regular expression syntax.
24958f0484fSRodney W. Grimes 	 * J.T. Conklin (conklin@ngai.kaleida.com)
25058f0484fSRodney W. Grimes 	 */
25105a068e6SAndrey A. Chernov 	if ( (negate = (*pattern == '!' || *pattern == '^')) )
25258f0484fSRodney W. Grimes 		++pattern;
25358f0484fSRodney W. Grimes 
2543deeb59dSAndrey A. Chernov 	if (flags & FNM_CASEFOLD)
2559d88e270STim J. Robbins 		test = towlower(test);
25695e4966cSWolfram Schneider 
257e2dbbd9eSAndrey A. Chernov 	/*
258e2dbbd9eSAndrey A. Chernov 	 * A right bracket shall lose its special meaning and represent
259e2dbbd9eSAndrey A. Chernov 	 * itself in a bracket expression if it occurs first in the list.
260e2dbbd9eSAndrey A. Chernov 	 * -- POSIX.2 2.8.3.2
261e2dbbd9eSAndrey A. Chernov 	 */
262e728d480SAndrey A. Chernov 	ok = 0;
2639d88e270STim J. Robbins 	origpat = pattern;
2649d88e270STim J. Robbins 	for (;;) {
2659d88e270STim J. Robbins 		if (*pattern == ']' && pattern > origpat) {
2669d88e270STim J. Robbins 			pattern++;
2679d88e270STim J. Robbins 			break;
2689d88e270STim J. Robbins 		} else if (*pattern == '\0') {
269e2dbbd9eSAndrey A. Chernov 			return (RANGE_ERROR);
2709d88e270STim J. Robbins 		} else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
27105a068e6SAndrey A. Chernov 			return (RANGE_NOMATCH);
2729d88e270STim J. Robbins 		} else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
2739d88e270STim J. Robbins 			pattern++;
2749d88e270STim J. Robbins 		pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
2759d88e270STim J. Robbins 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
2769d88e270STim J. Robbins 			return (RANGE_NOMATCH);
2779d88e270STim J. Robbins 		pattern += pclen;
27805a068e6SAndrey A. Chernov 
2793deeb59dSAndrey A. Chernov 		if (flags & FNM_CASEFOLD)
2809d88e270STim J. Robbins 			c = towlower(c);
28195e4966cSWolfram Schneider 
2829d88e270STim J. Robbins 		if (*pattern == '-' && *(pattern + 1) != EOS &&
2839d88e270STim J. Robbins 		    *(pattern + 1) != ']') {
2849d88e270STim J. Robbins 			if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
2859d88e270STim J. Robbins 				if (*pattern != EOS)
2869d88e270STim J. Robbins 					pattern++;
2879d88e270STim J. Robbins 			pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
2889d88e270STim J. Robbins 			if (pclen == (size_t)-1 || pclen == (size_t)-2)
2899d88e270STim J. Robbins 				return (RANGE_NOMATCH);
2909d88e270STim J. Robbins 			pattern += pclen;
29158f0484fSRodney W. Grimes 			if (c2 == EOS)
292e2dbbd9eSAndrey A. Chernov 				return (RANGE_ERROR);
29395e4966cSWolfram Schneider 
2943deeb59dSAndrey A. Chernov 			if (flags & FNM_CASEFOLD)
2959d88e270STim J. Robbins 				c2 = towlower(c2);
29695e4966cSWolfram Schneider 
2971daad8f5SAndrey A. Chernov 			if (table->__collate_load_error ?
2981daad8f5SAndrey A. Chernov 			    c <= test && test <= c2 :
29912eae8c8SAndrey A. Chernov 			       __wcollate_range_cmp(c, test) <= 0
30012eae8c8SAndrey A. Chernov 			    && __wcollate_range_cmp(test, c2) <= 0
3011daad8f5SAndrey A. Chernov 			   )
30258f0484fSRodney W. Grimes 				ok = 1;
30358f0484fSRodney W. Grimes 		} else if (c == test)
30458f0484fSRodney W. Grimes 			ok = 1;
3059d88e270STim J. Robbins 	}
306e728d480SAndrey A. Chernov 
307e2dbbd9eSAndrey A. Chernov 	*newp = (char *)pattern;
308e2dbbd9eSAndrey A. Chernov 	return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
30958f0484fSRodney W. Grimes }
310