xref: /freebsd/lib/libc/gen/fnmatch.c (revision 55b6b759c89e6a44468ff442f11bc2a4a1775699)
158f0484fSRodney W. Grimes /*
258f0484fSRodney W. Grimes  * Copyright (c) 1989, 1993, 1994
358f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
458f0484fSRodney W. Grimes  *
558f0484fSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
658f0484fSRodney W. Grimes  * Guido van Rossum.
758f0484fSRodney W. Grimes  *
83c87aa1dSDavid Chisnall  * Copyright (c) 2011 The FreeBSD Foundation
93c87aa1dSDavid Chisnall  * All rights reserved.
103c87aa1dSDavid Chisnall  * Portions of this software were developed by David Chisnall
113c87aa1dSDavid Chisnall  * under sponsorship from the FreeBSD Foundation.
123c87aa1dSDavid Chisnall  *
1358f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
1458f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1558f0484fSRodney W. Grimes  * are met:
1658f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1758f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1858f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1958f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
2058f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
2158f0484fSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
2258f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
2358f0484fSRodney W. Grimes  *    without specific prior written permission.
2458f0484fSRodney W. Grimes  *
2558f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2658f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2758f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2858f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2958f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3058f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3158f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3258f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3358f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3458f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3558f0484fSRodney W. Grimes  * SUCH DAMAGE.
3658f0484fSRodney W. Grimes  */
3758f0484fSRodney W. Grimes 
3858f0484fSRodney W. Grimes #if defined(LIBC_SCCS) && !defined(lint)
3958f0484fSRodney W. Grimes static char sccsid[] = "@(#)fnmatch.c	8.2 (Berkeley) 4/16/94";
4058f0484fSRodney W. Grimes #endif /* LIBC_SCCS and not lint */
41b231cb39SDavid E. O'Brien #include <sys/cdefs.h>
42b231cb39SDavid E. O'Brien __FBSDID("$FreeBSD$");
4358f0484fSRodney W. Grimes 
4458f0484fSRodney W. Grimes /*
4558f0484fSRodney W. Grimes  * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
4658f0484fSRodney W. Grimes  * Compares a filename or pathname to a pattern.
4758f0484fSRodney W. Grimes  */
4858f0484fSRodney W. Grimes 
499d88e270STim J. Robbins /*
509d88e270STim J. Robbins  * Some notes on multibyte character support:
519d88e270STim J. Robbins  * 1. Patterns with illegal byte sequences match nothing.
529d88e270STim J. Robbins  * 2. Illegal byte sequences in the "string" argument are handled by treating
539d88e270STim J. Robbins  *    them as single-byte characters with a value of the first byte of the
549d88e270STim J. Robbins  *    sequence cast to wchar_t.
559d88e270STim J. Robbins  * 3. Multibyte conversion state objects (mbstate_t) are passed around and
569d88e270STim J. Robbins  *    used for most, but not all, conversions. Further work will be required
579d88e270STim J. Robbins  *    to support state-dependent encodings.
589d88e270STim J. Robbins  */
599d88e270STim J. Robbins 
6058f0484fSRodney W. Grimes #include <fnmatch.h>
619d88e270STim J. Robbins #include <limits.h>
6258f0484fSRodney W. Grimes #include <string.h>
639d88e270STim J. Robbins #include <wchar.h>
649d88e270STim J. Robbins #include <wctype.h>
6558f0484fSRodney W. Grimes 
66edcfa072SAndrey A. Chernov #include "collate.h"
67edcfa072SAndrey A. Chernov 
6858f0484fSRodney W. Grimes #define	EOS	'\0'
6958f0484fSRodney W. Grimes 
70e2dbbd9eSAndrey A. Chernov #define RANGE_MATCH     1
71e2dbbd9eSAndrey A. Chernov #define RANGE_NOMATCH   0
72e2dbbd9eSAndrey A. Chernov #define RANGE_ERROR     (-1)
73e2dbbd9eSAndrey A. Chernov 
749d88e270STim J. Robbins static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
75139ac6b2SJilles Tjoelker static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
76139ac6b2SJilles Tjoelker 		mbstate_t);
7758f0484fSRodney W. Grimes 
7858f0484fSRodney W. Grimes int
79*55b6b759SCraig Rodrigues fnmatch(const char *pattern, const char *string, int flags)
8058f0484fSRodney W. Grimes {
819d88e270STim J. Robbins 	static const mbstate_t initial;
829d88e270STim J. Robbins 
83139ac6b2SJilles Tjoelker 	return (fnmatch1(pattern, string, string, flags, initial, initial));
849d88e270STim J. Robbins }
859d88e270STim J. Robbins 
869d88e270STim J. Robbins static int
87*55b6b759SCraig Rodrigues fnmatch1(const char *pattern, const char *string, const char *stringstart,
88*55b6b759SCraig Rodrigues     int flags, mbstate_t patmbs, mbstate_t strmbs)
899d88e270STim J. Robbins {
90e2dbbd9eSAndrey A. Chernov 	char *newp;
919d88e270STim J. Robbins 	char c;
929d88e270STim J. Robbins 	wchar_t pc, sc;
939d88e270STim J. Robbins 	size_t pclen, sclen;
9458f0484fSRodney W. Grimes 
95139ac6b2SJilles Tjoelker 	for (;;) {
969d88e270STim J. Robbins 		pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
979d88e270STim J. Robbins 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
989d88e270STim J. Robbins 			return (FNM_NOMATCH);
999d88e270STim J. Robbins 		pattern += pclen;
1009d88e270STim J. Robbins 		sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
1019d88e270STim J. Robbins 		if (sclen == (size_t)-1 || sclen == (size_t)-2) {
1029d88e270STim J. Robbins 			sc = (unsigned char)*string;
1039d88e270STim J. Robbins 			sclen = 1;
1049d88e270STim J. Robbins 			memset(&strmbs, 0, sizeof(strmbs));
1059d88e270STim J. Robbins 		}
1069d88e270STim J. Robbins 		switch (pc) {
10758f0484fSRodney W. Grimes 		case EOS:
1089d88e270STim J. Robbins 			if ((flags & FNM_LEADING_DIR) && sc == '/')
1093deeb59dSAndrey A. Chernov 				return (0);
1109d88e270STim J. Robbins 			return (sc == EOS ? 0 : FNM_NOMATCH);
11158f0484fSRodney W. Grimes 		case '?':
1129d88e270STim J. Robbins 			if (sc == EOS)
11358f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1149d88e270STim J. Robbins 			if (sc == '/' && (flags & FNM_PATHNAME))
11558f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1169d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
11758f0484fSRodney W. Grimes 			    (string == stringstart ||
11858f0484fSRodney W. Grimes 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
11958f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1209d88e270STim J. Robbins 			string += sclen;
12158f0484fSRodney W. Grimes 			break;
12258f0484fSRodney W. Grimes 		case '*':
12358f0484fSRodney W. Grimes 			c = *pattern;
12458f0484fSRodney W. Grimes 			/* Collapse multiple stars. */
12558f0484fSRodney W. Grimes 			while (c == '*')
12658f0484fSRodney W. Grimes 				c = *++pattern;
12758f0484fSRodney W. Grimes 
1289d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
12958f0484fSRodney W. Grimes 			    (string == stringstart ||
13058f0484fSRodney W. Grimes 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
13158f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
13258f0484fSRodney W. Grimes 
13358f0484fSRodney W. Grimes 			/* Optimize for pattern with * at end or before /. */
13458f0484fSRodney W. Grimes 			if (c == EOS)
13558f0484fSRodney W. Grimes 				if (flags & FNM_PATHNAME)
136298c8e3dSJohn Polstra 					return ((flags & FNM_LEADING_DIR) ||
137298c8e3dSJohn Polstra 					    strchr(string, '/') == NULL ?
13858f0484fSRodney W. Grimes 					    0 : FNM_NOMATCH);
13958f0484fSRodney W. Grimes 				else
14058f0484fSRodney W. Grimes 					return (0);
14158f0484fSRodney W. Grimes 			else if (c == '/' && flags & FNM_PATHNAME) {
14258f0484fSRodney W. Grimes 				if ((string = strchr(string, '/')) == NULL)
14358f0484fSRodney W. Grimes 					return (FNM_NOMATCH);
14458f0484fSRodney W. Grimes 				break;
14558f0484fSRodney W. Grimes 			}
14658f0484fSRodney W. Grimes 
14758f0484fSRodney W. Grimes 			/* General case, use recursion. */
1489d88e270STim J. Robbins 			while (sc != EOS) {
149139ac6b2SJilles Tjoelker 				if (!fnmatch1(pattern, string, stringstart,
150139ac6b2SJilles Tjoelker 				    flags, patmbs, strmbs))
15158f0484fSRodney W. Grimes 					return (0);
1529d88e270STim J. Robbins 				sclen = mbrtowc(&sc, string, MB_LEN_MAX,
1539d88e270STim J. Robbins 				    &strmbs);
1549d88e270STim J. Robbins 				if (sclen == (size_t)-1 ||
1559d88e270STim J. Robbins 				    sclen == (size_t)-2) {
1569d88e270STim J. Robbins 					sc = (unsigned char)*string;
1579d88e270STim J. Robbins 					sclen = 1;
1589d88e270STim J. Robbins 					memset(&strmbs, 0, sizeof(strmbs));
1599d88e270STim J. Robbins 				}
1609d88e270STim J. Robbins 				if (sc == '/' && flags & FNM_PATHNAME)
16158f0484fSRodney W. Grimes 					break;
1629d88e270STim J. Robbins 				string += sclen;
16358f0484fSRodney W. Grimes 			}
16458f0484fSRodney W. Grimes 			return (FNM_NOMATCH);
16558f0484fSRodney W. Grimes 		case '[':
1669d88e270STim J. Robbins 			if (sc == EOS)
16758f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1689d88e270STim J. Robbins 			if (sc == '/' && (flags & FNM_PATHNAME))
16958f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1709d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
17105a068e6SAndrey A. Chernov 			    (string == stringstart ||
17205a068e6SAndrey A. Chernov 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
17305a068e6SAndrey A. Chernov 				return (FNM_NOMATCH);
17405a068e6SAndrey A. Chernov 
1759d88e270STim J. Robbins 			switch (rangematch(pattern, sc, flags, &newp,
1769d88e270STim J. Robbins 			    &patmbs)) {
177e2dbbd9eSAndrey A. Chernov 			case RANGE_ERROR:
178e2dbbd9eSAndrey A. Chernov 				goto norm;
179e2dbbd9eSAndrey A. Chernov 			case RANGE_MATCH:
180e2dbbd9eSAndrey A. Chernov 				pattern = newp;
181e2dbbd9eSAndrey A. Chernov 				break;
182e2dbbd9eSAndrey A. Chernov 			case RANGE_NOMATCH:
18358f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
184e2dbbd9eSAndrey A. Chernov 			}
1859d88e270STim J. Robbins 			string += sclen;
18658f0484fSRodney W. Grimes 			break;
18758f0484fSRodney W. Grimes 		case '\\':
18858f0484fSRodney W. Grimes 			if (!(flags & FNM_NOESCAPE)) {
1899d88e270STim J. Robbins 				pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
1909d88e270STim J. Robbins 				    &patmbs);
1919d88e270STim J. Robbins 				if (pclen == (size_t)-1 || pclen == (size_t)-2)
1929d88e270STim J. Robbins 					return (FNM_NOMATCH);
1939d88e270STim J. Robbins 				pattern += pclen;
19458f0484fSRodney W. Grimes 			}
19558f0484fSRodney W. Grimes 			/* FALLTHROUGH */
19658f0484fSRodney W. Grimes 		default:
197e2dbbd9eSAndrey A. Chernov 		norm:
1989d88e270STim J. Robbins 			if (pc == sc)
19995e4966cSWolfram Schneider 				;
2003deeb59dSAndrey A. Chernov 			else if ((flags & FNM_CASEFOLD) &&
2019d88e270STim J. Robbins 				 (towlower(pc) == towlower(sc)))
20295e4966cSWolfram Schneider 				;
20395e4966cSWolfram Schneider 			else
20458f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
2059d88e270STim J. Robbins 			string += sclen;
20658f0484fSRodney W. Grimes 			break;
20758f0484fSRodney W. Grimes 		}
2089d88e270STim J. Robbins 	}
20958f0484fSRodney W. Grimes 	/* NOTREACHED */
21058f0484fSRodney W. Grimes }
21158f0484fSRodney W. Grimes 
212e2dbbd9eSAndrey A. Chernov static int
213*55b6b759SCraig Rodrigues rangematch(const char *pattern, wchar_t test, int flags, char **newp,
214*55b6b759SCraig Rodrigues     mbstate_t *patmbs)
21558f0484fSRodney W. Grimes {
216e728d480SAndrey A. Chernov 	int negate, ok;
2179d88e270STim J. Robbins 	wchar_t c, c2;
2189d88e270STim J. Robbins 	size_t pclen;
2199d88e270STim J. Robbins 	const char *origpat;
2203c87aa1dSDavid Chisnall 	struct xlocale_collate *table =
2213c87aa1dSDavid Chisnall 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
22258f0484fSRodney W. Grimes 
22358f0484fSRodney W. Grimes 	/*
22458f0484fSRodney W. Grimes 	 * A bracket expression starting with an unquoted circumflex
22558f0484fSRodney W. Grimes 	 * character produces unspecified results (IEEE 1003.2-1992,
22658f0484fSRodney W. Grimes 	 * 3.13.2).  This implementation treats it like '!', for
22758f0484fSRodney W. Grimes 	 * consistency with the regular expression syntax.
22858f0484fSRodney W. Grimes 	 * J.T. Conklin (conklin@ngai.kaleida.com)
22958f0484fSRodney W. Grimes 	 */
23005a068e6SAndrey A. Chernov 	if ( (negate = (*pattern == '!' || *pattern == '^')) )
23158f0484fSRodney W. Grimes 		++pattern;
23258f0484fSRodney W. Grimes 
2333deeb59dSAndrey A. Chernov 	if (flags & FNM_CASEFOLD)
2349d88e270STim J. Robbins 		test = towlower(test);
23595e4966cSWolfram Schneider 
236e2dbbd9eSAndrey A. Chernov 	/*
237e2dbbd9eSAndrey A. Chernov 	 * A right bracket shall lose its special meaning and represent
238e2dbbd9eSAndrey A. Chernov 	 * itself in a bracket expression if it occurs first in the list.
239e2dbbd9eSAndrey A. Chernov 	 * -- POSIX.2 2.8.3.2
240e2dbbd9eSAndrey A. Chernov 	 */
241e728d480SAndrey A. Chernov 	ok = 0;
2429d88e270STim J. Robbins 	origpat = pattern;
2439d88e270STim J. Robbins 	for (;;) {
2449d88e270STim J. Robbins 		if (*pattern == ']' && pattern > origpat) {
2459d88e270STim J. Robbins 			pattern++;
2469d88e270STim J. Robbins 			break;
2479d88e270STim J. Robbins 		} else if (*pattern == '\0') {
248e2dbbd9eSAndrey A. Chernov 			return (RANGE_ERROR);
2499d88e270STim J. Robbins 		} else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
25005a068e6SAndrey A. Chernov 			return (RANGE_NOMATCH);
2519d88e270STim J. Robbins 		} else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
2529d88e270STim J. Robbins 			pattern++;
2539d88e270STim J. Robbins 		pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
2549d88e270STim J. Robbins 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
2559d88e270STim J. Robbins 			return (RANGE_NOMATCH);
2569d88e270STim J. Robbins 		pattern += pclen;
25705a068e6SAndrey A. Chernov 
2583deeb59dSAndrey A. Chernov 		if (flags & FNM_CASEFOLD)
2599d88e270STim J. Robbins 			c = towlower(c);
26095e4966cSWolfram Schneider 
2619d88e270STim J. Robbins 		if (*pattern == '-' && *(pattern + 1) != EOS &&
2629d88e270STim J. Robbins 		    *(pattern + 1) != ']') {
2639d88e270STim J. Robbins 			if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
2649d88e270STim J. Robbins 				if (*pattern != EOS)
2659d88e270STim J. Robbins 					pattern++;
2669d88e270STim J. Robbins 			pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
2679d88e270STim J. Robbins 			if (pclen == (size_t)-1 || pclen == (size_t)-2)
2689d88e270STim J. Robbins 				return (RANGE_NOMATCH);
2699d88e270STim J. Robbins 			pattern += pclen;
27058f0484fSRodney W. Grimes 			if (c2 == EOS)
271e2dbbd9eSAndrey A. Chernov 				return (RANGE_ERROR);
27295e4966cSWolfram Schneider 
2733deeb59dSAndrey A. Chernov 			if (flags & FNM_CASEFOLD)
2749d88e270STim J. Robbins 				c2 = towlower(c2);
27595e4966cSWolfram Schneider 
2763c87aa1dSDavid Chisnall 			if (table->__collate_load_error ?
2776a575f6eSAndrey A. Chernov 			    c <= test && test <= c2 :
2783c87aa1dSDavid Chisnall 			       __collate_range_cmp(table, c, test) <= 0
2793c87aa1dSDavid Chisnall 			    && __collate_range_cmp(table, test, c2) <= 0
28036a00a4bSAndrey A. Chernov 			   )
28158f0484fSRodney W. Grimes 				ok = 1;
28258f0484fSRodney W. Grimes 		} else if (c == test)
28358f0484fSRodney W. Grimes 			ok = 1;
2849d88e270STim J. Robbins 	}
285e728d480SAndrey A. Chernov 
286e2dbbd9eSAndrey A. Chernov 	*newp = (char *)pattern;
287e2dbbd9eSAndrey A. Chernov 	return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
28858f0484fSRodney W. Grimes }
289