xref: /freebsd/lib/libc/gen/fnmatch.c (revision 8a16b7a18f5d0b031f09832fd7752fba717e2a97)
1*8a16b7a1SPedro F. Giffuni /*-
2*8a16b7a1SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
3*8a16b7a1SPedro F. Giffuni  *
458f0484fSRodney W. Grimes  * Copyright (c) 1989, 1993, 1994
558f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
658f0484fSRodney W. Grimes  *
758f0484fSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
858f0484fSRodney W. Grimes  * Guido van Rossum.
958f0484fSRodney W. Grimes  *
103c87aa1dSDavid Chisnall  * Copyright (c) 2011 The FreeBSD Foundation
113c87aa1dSDavid Chisnall  * All rights reserved.
123c87aa1dSDavid Chisnall  * Portions of this software were developed by David Chisnall
133c87aa1dSDavid Chisnall  * under sponsorship from the FreeBSD Foundation.
143c87aa1dSDavid Chisnall  *
1558f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
1658f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1758f0484fSRodney W. Grimes  * are met:
1858f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1958f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
2058f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
2158f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
2258f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
23fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
2458f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
2558f0484fSRodney W. Grimes  *    without specific prior written permission.
2658f0484fSRodney W. Grimes  *
2758f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2858f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2958f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3058f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3158f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3258f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3358f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3458f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3558f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3658f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3758f0484fSRodney W. Grimes  * SUCH DAMAGE.
3858f0484fSRodney W. Grimes  */
3958f0484fSRodney W. Grimes 
4058f0484fSRodney W. Grimes #if defined(LIBC_SCCS) && !defined(lint)
4158f0484fSRodney W. Grimes static char sccsid[] = "@(#)fnmatch.c	8.2 (Berkeley) 4/16/94";
4258f0484fSRodney W. Grimes #endif /* LIBC_SCCS and not lint */
43b231cb39SDavid E. O'Brien #include <sys/cdefs.h>
44b231cb39SDavid E. O'Brien __FBSDID("$FreeBSD$");
4558f0484fSRodney W. Grimes 
4658f0484fSRodney W. Grimes /*
4758f0484fSRodney W. Grimes  * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
4858f0484fSRodney W. Grimes  * Compares a filename or pathname to a pattern.
4958f0484fSRodney W. Grimes  */
5058f0484fSRodney W. Grimes 
519d88e270STim J. Robbins /*
529d88e270STim J. Robbins  * Some notes on multibyte character support:
539d88e270STim J. Robbins  * 1. Patterns with illegal byte sequences match nothing.
549d88e270STim J. Robbins  * 2. Illegal byte sequences in the "string" argument are handled by treating
559d88e270STim J. Robbins  *    them as single-byte characters with a value of the first byte of the
569d88e270STim J. Robbins  *    sequence cast to wchar_t.
579d88e270STim J. Robbins  * 3. Multibyte conversion state objects (mbstate_t) are passed around and
589d88e270STim J. Robbins  *    used for most, but not all, conversions. Further work will be required
599d88e270STim J. Robbins  *    to support state-dependent encodings.
609d88e270STim J. Robbins  */
619d88e270STim J. Robbins 
6258f0484fSRodney W. Grimes #include <fnmatch.h>
639d88e270STim J. Robbins #include <limits.h>
6458f0484fSRodney W. Grimes #include <string.h>
659d88e270STim J. Robbins #include <wchar.h>
669d88e270STim J. Robbins #include <wctype.h>
6758f0484fSRodney W. Grimes 
681daad8f5SAndrey A. Chernov #include "collate.h"
691daad8f5SAndrey A. Chernov 
7058f0484fSRodney W. Grimes #define	EOS	'\0'
7158f0484fSRodney W. Grimes 
72e2dbbd9eSAndrey A. Chernov #define RANGE_MATCH     1
73e2dbbd9eSAndrey A. Chernov #define RANGE_NOMATCH   0
74e2dbbd9eSAndrey A. Chernov #define RANGE_ERROR     (-1)
75e2dbbd9eSAndrey A. Chernov 
769d88e270STim J. Robbins static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
77139ac6b2SJilles Tjoelker static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
78139ac6b2SJilles Tjoelker 		mbstate_t);
7958f0484fSRodney W. Grimes 
8058f0484fSRodney W. Grimes int
8155b6b759SCraig Rodrigues fnmatch(const char *pattern, const char *string, int flags)
8258f0484fSRodney W. Grimes {
839d88e270STim J. Robbins 	static const mbstate_t initial;
849d88e270STim J. Robbins 
85139ac6b2SJilles Tjoelker 	return (fnmatch1(pattern, string, string, flags, initial, initial));
869d88e270STim J. Robbins }
879d88e270STim J. Robbins 
889d88e270STim J. Robbins static int
8955b6b759SCraig Rodrigues fnmatch1(const char *pattern, const char *string, const char *stringstart,
9055b6b759SCraig Rodrigues     int flags, mbstate_t patmbs, mbstate_t strmbs)
919d88e270STim J. Robbins {
923caeab9dSJilles Tjoelker 	const char *bt_pattern, *bt_string;
933caeab9dSJilles Tjoelker 	mbstate_t bt_patmbs, bt_strmbs;
94e2dbbd9eSAndrey A. Chernov 	char *newp;
959d88e270STim J. Robbins 	char c;
969d88e270STim J. Robbins 	wchar_t pc, sc;
979d88e270STim J. Robbins 	size_t pclen, sclen;
9858f0484fSRodney W. Grimes 
993caeab9dSJilles Tjoelker 	bt_pattern = bt_string = NULL;
100139ac6b2SJilles Tjoelker 	for (;;) {
1019d88e270STim J. Robbins 		pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
1029d88e270STim J. Robbins 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
1039d88e270STim J. Robbins 			return (FNM_NOMATCH);
1049d88e270STim J. Robbins 		pattern += pclen;
1059d88e270STim J. Robbins 		sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
1069d88e270STim J. Robbins 		if (sclen == (size_t)-1 || sclen == (size_t)-2) {
1079d88e270STim J. Robbins 			sc = (unsigned char)*string;
1089d88e270STim J. Robbins 			sclen = 1;
1099d88e270STim J. Robbins 			memset(&strmbs, 0, sizeof(strmbs));
1109d88e270STim J. Robbins 		}
1119d88e270STim J. Robbins 		switch (pc) {
11258f0484fSRodney W. Grimes 		case EOS:
1139d88e270STim J. Robbins 			if ((flags & FNM_LEADING_DIR) && sc == '/')
1143deeb59dSAndrey A. Chernov 				return (0);
1153caeab9dSJilles Tjoelker 			if (sc == EOS)
1163caeab9dSJilles Tjoelker 				return (0);
1173caeab9dSJilles Tjoelker 			goto backtrack;
11858f0484fSRodney W. Grimes 		case '?':
1199d88e270STim J. Robbins 			if (sc == EOS)
12058f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1219d88e270STim J. Robbins 			if (sc == '/' && (flags & FNM_PATHNAME))
1223caeab9dSJilles Tjoelker 				goto backtrack;
1239d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
12458f0484fSRodney W. Grimes 			    (string == stringstart ||
12558f0484fSRodney W. Grimes 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
1263caeab9dSJilles Tjoelker 				goto backtrack;
1279d88e270STim J. Robbins 			string += sclen;
12858f0484fSRodney W. Grimes 			break;
12958f0484fSRodney W. Grimes 		case '*':
13058f0484fSRodney W. Grimes 			c = *pattern;
13158f0484fSRodney W. Grimes 			/* Collapse multiple stars. */
13258f0484fSRodney W. Grimes 			while (c == '*')
13358f0484fSRodney W. Grimes 				c = *++pattern;
13458f0484fSRodney W. Grimes 
1359d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
13658f0484fSRodney W. Grimes 			    (string == stringstart ||
13758f0484fSRodney W. Grimes 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
1383caeab9dSJilles Tjoelker 				goto backtrack;
13958f0484fSRodney W. Grimes 
14058f0484fSRodney W. Grimes 			/* Optimize for pattern with * at end or before /. */
14158f0484fSRodney W. Grimes 			if (c == EOS)
14258f0484fSRodney W. Grimes 				if (flags & FNM_PATHNAME)
143298c8e3dSJohn Polstra 					return ((flags & FNM_LEADING_DIR) ||
144298c8e3dSJohn Polstra 					    strchr(string, '/') == NULL ?
14558f0484fSRodney W. Grimes 					    0 : FNM_NOMATCH);
14658f0484fSRodney W. Grimes 				else
14758f0484fSRodney W. Grimes 					return (0);
14858f0484fSRodney W. Grimes 			else if (c == '/' && flags & FNM_PATHNAME) {
14958f0484fSRodney W. Grimes 				if ((string = strchr(string, '/')) == NULL)
15058f0484fSRodney W. Grimes 					return (FNM_NOMATCH);
15158f0484fSRodney W. Grimes 				break;
15258f0484fSRodney W. Grimes 			}
15358f0484fSRodney W. Grimes 
1543caeab9dSJilles Tjoelker 			/*
1553caeab9dSJilles Tjoelker 			 * First try the shortest match for the '*' that
1563caeab9dSJilles Tjoelker 			 * could work. We can forget any earlier '*' since
1573caeab9dSJilles Tjoelker 			 * there is no way having it match more characters
1583caeab9dSJilles Tjoelker 			 * can help us, given that we are already here.
1593caeab9dSJilles Tjoelker 			 */
1603caeab9dSJilles Tjoelker 			bt_pattern = pattern, bt_patmbs = patmbs;
1613caeab9dSJilles Tjoelker 			bt_string = string, bt_strmbs = strmbs;
16258f0484fSRodney W. Grimes 			break;
16358f0484fSRodney W. Grimes 		case '[':
1649d88e270STim J. Robbins 			if (sc == EOS)
16558f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1669d88e270STim J. Robbins 			if (sc == '/' && (flags & FNM_PATHNAME))
1673caeab9dSJilles Tjoelker 				goto backtrack;
1689d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
16905a068e6SAndrey A. Chernov 			    (string == stringstart ||
17005a068e6SAndrey A. Chernov 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
1713caeab9dSJilles Tjoelker 				goto backtrack;
17205a068e6SAndrey A. Chernov 
1739d88e270STim J. Robbins 			switch (rangematch(pattern, sc, flags, &newp,
1749d88e270STim J. Robbins 			    &patmbs)) {
175e2dbbd9eSAndrey A. Chernov 			case RANGE_ERROR:
176e2dbbd9eSAndrey A. Chernov 				goto norm;
177e2dbbd9eSAndrey A. Chernov 			case RANGE_MATCH:
178e2dbbd9eSAndrey A. Chernov 				pattern = newp;
179e2dbbd9eSAndrey A. Chernov 				break;
180e2dbbd9eSAndrey A. Chernov 			case RANGE_NOMATCH:
1813caeab9dSJilles Tjoelker 				goto backtrack;
182e2dbbd9eSAndrey A. Chernov 			}
1839d88e270STim J. Robbins 			string += sclen;
18458f0484fSRodney W. Grimes 			break;
18558f0484fSRodney W. Grimes 		case '\\':
18658f0484fSRodney W. Grimes 			if (!(flags & FNM_NOESCAPE)) {
1879d88e270STim J. Robbins 				pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
1889d88e270STim J. Robbins 				    &patmbs);
189303cbb93SPedro F. Giffuni 				if (pclen == 0 || pclen == (size_t)-1 ||
190303cbb93SPedro F. Giffuni 				    pclen == (size_t)-2)
1919d88e270STim J. Robbins 					return (FNM_NOMATCH);
1929d88e270STim J. Robbins 				pattern += pclen;
19358f0484fSRodney W. Grimes 			}
19458f0484fSRodney W. Grimes 			/* FALLTHROUGH */
19558f0484fSRodney W. Grimes 		default:
196e2dbbd9eSAndrey A. Chernov 		norm:
1973caeab9dSJilles Tjoelker 			string += sclen;
1989d88e270STim J. Robbins 			if (pc == sc)
19995e4966cSWolfram Schneider 				;
2003deeb59dSAndrey A. Chernov 			else if ((flags & FNM_CASEFOLD) &&
2019d88e270STim J. Robbins 				 (towlower(pc) == towlower(sc)))
20295e4966cSWolfram Schneider 				;
2033caeab9dSJilles Tjoelker 			else {
2043caeab9dSJilles Tjoelker 		backtrack:
2053caeab9dSJilles Tjoelker 				/*
2063caeab9dSJilles Tjoelker 				 * If we have a mismatch (other than hitting
2073caeab9dSJilles Tjoelker 				 * the end of the string), go back to the last
2083caeab9dSJilles Tjoelker 				 * '*' seen and have it match one additional
2093caeab9dSJilles Tjoelker 				 * character.
2103caeab9dSJilles Tjoelker 				 */
2113caeab9dSJilles Tjoelker 				if (bt_pattern == NULL)
21258f0484fSRodney W. Grimes 					return (FNM_NOMATCH);
2133caeab9dSJilles Tjoelker 				sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX,
2143caeab9dSJilles Tjoelker 				    &bt_strmbs);
2153caeab9dSJilles Tjoelker 				if (sclen == (size_t)-1 ||
2163caeab9dSJilles Tjoelker 				    sclen == (size_t)-2) {
2173caeab9dSJilles Tjoelker 					sc = (unsigned char)*bt_string;
2183caeab9dSJilles Tjoelker 					sclen = 1;
2193caeab9dSJilles Tjoelker 					memset(&bt_strmbs, 0,
2203caeab9dSJilles Tjoelker 					    sizeof(bt_strmbs));
2213caeab9dSJilles Tjoelker 				}
2223caeab9dSJilles Tjoelker 				if (sc == EOS)
2233caeab9dSJilles Tjoelker 					return (FNM_NOMATCH);
2243caeab9dSJilles Tjoelker 				if (sc == '/' && flags & FNM_PATHNAME)
2253caeab9dSJilles Tjoelker 					return (FNM_NOMATCH);
2263caeab9dSJilles Tjoelker 				bt_string += sclen;
2273caeab9dSJilles Tjoelker 				pattern = bt_pattern, patmbs = bt_patmbs;
2283caeab9dSJilles Tjoelker 				string = bt_string, strmbs = bt_strmbs;
2293caeab9dSJilles Tjoelker 			}
23058f0484fSRodney W. Grimes 			break;
23158f0484fSRodney W. Grimes 		}
2329d88e270STim J. Robbins 	}
23358f0484fSRodney W. Grimes 	/* NOTREACHED */
23458f0484fSRodney W. Grimes }
23558f0484fSRodney W. Grimes 
236e2dbbd9eSAndrey A. Chernov static int
23755b6b759SCraig Rodrigues rangematch(const char *pattern, wchar_t test, int flags, char **newp,
23855b6b759SCraig Rodrigues     mbstate_t *patmbs)
23958f0484fSRodney W. Grimes {
240e728d480SAndrey A. Chernov 	int negate, ok;
2419d88e270STim J. Robbins 	wchar_t c, c2;
2429d88e270STim J. Robbins 	size_t pclen;
2439d88e270STim J. Robbins 	const char *origpat;
2441daad8f5SAndrey A. Chernov 	struct xlocale_collate *table =
2451daad8f5SAndrey A. Chernov 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
24658f0484fSRodney W. Grimes 
24758f0484fSRodney W. Grimes 	/*
24858f0484fSRodney W. Grimes 	 * A bracket expression starting with an unquoted circumflex
24958f0484fSRodney W. Grimes 	 * character produces unspecified results (IEEE 1003.2-1992,
25058f0484fSRodney W. Grimes 	 * 3.13.2).  This implementation treats it like '!', for
25158f0484fSRodney W. Grimes 	 * consistency with the regular expression syntax.
25258f0484fSRodney W. Grimes 	 * J.T. Conklin (conklin@ngai.kaleida.com)
25358f0484fSRodney W. Grimes 	 */
25405a068e6SAndrey A. Chernov 	if ((negate = (*pattern == '!' || *pattern == '^')))
25558f0484fSRodney W. Grimes 		++pattern;
25658f0484fSRodney W. Grimes 
2573deeb59dSAndrey A. Chernov 	if (flags & FNM_CASEFOLD)
2589d88e270STim J. Robbins 		test = towlower(test);
25995e4966cSWolfram Schneider 
260e2dbbd9eSAndrey A. Chernov 	/*
261e2dbbd9eSAndrey A. Chernov 	 * A right bracket shall lose its special meaning and represent
262e2dbbd9eSAndrey A. Chernov 	 * itself in a bracket expression if it occurs first in the list.
263e2dbbd9eSAndrey A. Chernov 	 * -- POSIX.2 2.8.3.2
264e2dbbd9eSAndrey A. Chernov 	 */
265e728d480SAndrey A. Chernov 	ok = 0;
2669d88e270STim J. Robbins 	origpat = pattern;
2679d88e270STim J. Robbins 	for (;;) {
2689d88e270STim J. Robbins 		if (*pattern == ']' && pattern > origpat) {
2699d88e270STim J. Robbins 			pattern++;
2709d88e270STim J. Robbins 			break;
2719d88e270STim J. Robbins 		} else if (*pattern == '\0') {
272e2dbbd9eSAndrey A. Chernov 			return (RANGE_ERROR);
2739d88e270STim J. Robbins 		} else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
27405a068e6SAndrey A. Chernov 			return (RANGE_NOMATCH);
2759d88e270STim J. Robbins 		} else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
2769d88e270STim J. Robbins 			pattern++;
2779d88e270STim J. Robbins 		pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
2789d88e270STim J. Robbins 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
2799d88e270STim J. Robbins 			return (RANGE_NOMATCH);
2809d88e270STim J. Robbins 		pattern += pclen;
28105a068e6SAndrey A. Chernov 
2823deeb59dSAndrey A. Chernov 		if (flags & FNM_CASEFOLD)
2839d88e270STim J. Robbins 			c = towlower(c);
28495e4966cSWolfram Schneider 
2859d88e270STim J. Robbins 		if (*pattern == '-' && *(pattern + 1) != EOS &&
2869d88e270STim J. Robbins 		    *(pattern + 1) != ']') {
2879d88e270STim J. Robbins 			if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
2889d88e270STim J. Robbins 				if (*pattern != EOS)
2899d88e270STim J. Robbins 					pattern++;
2909d88e270STim J. Robbins 			pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
2919d88e270STim J. Robbins 			if (pclen == (size_t)-1 || pclen == (size_t)-2)
2929d88e270STim J. Robbins 				return (RANGE_NOMATCH);
2939d88e270STim J. Robbins 			pattern += pclen;
29458f0484fSRodney W. Grimes 			if (c2 == EOS)
295e2dbbd9eSAndrey A. Chernov 				return (RANGE_ERROR);
29695e4966cSWolfram Schneider 
2973deeb59dSAndrey A. Chernov 			if (flags & FNM_CASEFOLD)
2989d88e270STim J. Robbins 				c2 = towlower(c2);
29995e4966cSWolfram Schneider 
3001daad8f5SAndrey A. Chernov 			if (table->__collate_load_error ?
3011daad8f5SAndrey A. Chernov 			    c <= test && test <= c2 :
30212eae8c8SAndrey A. Chernov 			       __wcollate_range_cmp(c, test) <= 0
30312eae8c8SAndrey A. Chernov 			    && __wcollate_range_cmp(test, c2) <= 0
3041daad8f5SAndrey A. Chernov 			   )
30558f0484fSRodney W. Grimes 				ok = 1;
30658f0484fSRodney W. Grimes 		} else if (c == test)
30758f0484fSRodney W. Grimes 			ok = 1;
3089d88e270STim J. Robbins 	}
309e728d480SAndrey A. Chernov 
310e2dbbd9eSAndrey A. Chernov 	*newp = (char *)pattern;
311e2dbbd9eSAndrey A. Chernov 	return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
31258f0484fSRodney W. Grimes }
313