xref: /freebsd/lib/libc/gen/fnmatch.c (revision 139ac6b23976252cbbc61df7ac4a49afa3431d1c)
158f0484fSRodney W. Grimes /*
258f0484fSRodney W. Grimes  * Copyright (c) 1989, 1993, 1994
358f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
458f0484fSRodney W. Grimes  *
558f0484fSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
658f0484fSRodney W. Grimes  * Guido van Rossum.
758f0484fSRodney W. Grimes  *
858f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
958f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1058f0484fSRodney W. Grimes  * are met:
1158f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1258f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1358f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1458f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
1558f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
1658f0484fSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
1758f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
1858f0484fSRodney W. Grimes  *    without specific prior written permission.
1958f0484fSRodney W. Grimes  *
2058f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2158f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2258f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2358f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2458f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2558f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2658f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2758f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2858f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2958f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3058f0484fSRodney W. Grimes  * SUCH DAMAGE.
3158f0484fSRodney W. Grimes  */
3258f0484fSRodney W. Grimes 
3358f0484fSRodney W. Grimes #if defined(LIBC_SCCS) && !defined(lint)
3458f0484fSRodney W. Grimes static char sccsid[] = "@(#)fnmatch.c	8.2 (Berkeley) 4/16/94";
3558f0484fSRodney W. Grimes #endif /* LIBC_SCCS and not lint */
36b231cb39SDavid E. O'Brien #include <sys/cdefs.h>
37b231cb39SDavid E. O'Brien __FBSDID("$FreeBSD$");
3858f0484fSRodney W. Grimes 
3958f0484fSRodney W. Grimes /*
4058f0484fSRodney W. Grimes  * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
4158f0484fSRodney W. Grimes  * Compares a filename or pathname to a pattern.
4258f0484fSRodney W. Grimes  */
4358f0484fSRodney W. Grimes 
449d88e270STim J. Robbins /*
459d88e270STim J. Robbins  * Some notes on multibyte character support:
469d88e270STim J. Robbins  * 1. Patterns with illegal byte sequences match nothing.
479d88e270STim J. Robbins  * 2. Illegal byte sequences in the "string" argument are handled by treating
489d88e270STim J. Robbins  *    them as single-byte characters with a value of the first byte of the
499d88e270STim J. Robbins  *    sequence cast to wchar_t.
509d88e270STim J. Robbins  * 3. Multibyte conversion state objects (mbstate_t) are passed around and
519d88e270STim J. Robbins  *    used for most, but not all, conversions. Further work will be required
529d88e270STim J. Robbins  *    to support state-dependent encodings.
539d88e270STim J. Robbins  */
549d88e270STim J. Robbins 
5558f0484fSRodney W. Grimes #include <fnmatch.h>
569d88e270STim J. Robbins #include <limits.h>
5758f0484fSRodney W. Grimes #include <string.h>
589d88e270STim J. Robbins #include <wchar.h>
599d88e270STim J. Robbins #include <wctype.h>
6058f0484fSRodney W. Grimes 
61edcfa072SAndrey A. Chernov #include "collate.h"
62edcfa072SAndrey A. Chernov 
6358f0484fSRodney W. Grimes #define	EOS	'\0'
6458f0484fSRodney W. Grimes 
65e2dbbd9eSAndrey A. Chernov #define RANGE_MATCH     1
66e2dbbd9eSAndrey A. Chernov #define RANGE_NOMATCH   0
67e2dbbd9eSAndrey A. Chernov #define RANGE_ERROR     (-1)
68e2dbbd9eSAndrey A. Chernov 
699d88e270STim J. Robbins static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
70*139ac6b2SJilles Tjoelker static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
71*139ac6b2SJilles Tjoelker 		mbstate_t);
7258f0484fSRodney W. Grimes 
7358f0484fSRodney W. Grimes int
7458f0484fSRodney W. Grimes fnmatch(pattern, string, flags)
7558f0484fSRodney W. Grimes 	const char *pattern, *string;
7658f0484fSRodney W. Grimes 	int flags;
7758f0484fSRodney W. Grimes {
789d88e270STim J. Robbins 	static const mbstate_t initial;
799d88e270STim J. Robbins 
80*139ac6b2SJilles Tjoelker 	return (fnmatch1(pattern, string, string, flags, initial, initial));
819d88e270STim J. Robbins }
829d88e270STim J. Robbins 
839d88e270STim J. Robbins static int
84*139ac6b2SJilles Tjoelker fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs)
85*139ac6b2SJilles Tjoelker 	const char *pattern, *string, *stringstart;
869d88e270STim J. Robbins 	int flags;
879d88e270STim J. Robbins 	mbstate_t patmbs, strmbs;
889d88e270STim J. Robbins {
89e2dbbd9eSAndrey A. Chernov 	char *newp;
909d88e270STim J. Robbins 	char c;
919d88e270STim J. Robbins 	wchar_t pc, sc;
929d88e270STim J. Robbins 	size_t pclen, sclen;
9358f0484fSRodney W. Grimes 
94*139ac6b2SJilles Tjoelker 	for (;;) {
959d88e270STim J. Robbins 		pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
969d88e270STim J. Robbins 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
979d88e270STim J. Robbins 			return (FNM_NOMATCH);
989d88e270STim J. Robbins 		pattern += pclen;
999d88e270STim J. Robbins 		sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
1009d88e270STim J. Robbins 		if (sclen == (size_t)-1 || sclen == (size_t)-2) {
1019d88e270STim J. Robbins 			sc = (unsigned char)*string;
1029d88e270STim J. Robbins 			sclen = 1;
1039d88e270STim J. Robbins 			memset(&strmbs, 0, sizeof(strmbs));
1049d88e270STim J. Robbins 		}
1059d88e270STim J. Robbins 		switch (pc) {
10658f0484fSRodney W. Grimes 		case EOS:
1079d88e270STim J. Robbins 			if ((flags & FNM_LEADING_DIR) && sc == '/')
1083deeb59dSAndrey A. Chernov 				return (0);
1099d88e270STim J. Robbins 			return (sc == EOS ? 0 : FNM_NOMATCH);
11058f0484fSRodney W. Grimes 		case '?':
1119d88e270STim J. Robbins 			if (sc == EOS)
11258f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1139d88e270STim J. Robbins 			if (sc == '/' && (flags & FNM_PATHNAME))
11458f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1159d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
11658f0484fSRodney W. Grimes 			    (string == stringstart ||
11758f0484fSRodney W. Grimes 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
11858f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1199d88e270STim J. Robbins 			string += sclen;
12058f0484fSRodney W. Grimes 			break;
12158f0484fSRodney W. Grimes 		case '*':
12258f0484fSRodney W. Grimes 			c = *pattern;
12358f0484fSRodney W. Grimes 			/* Collapse multiple stars. */
12458f0484fSRodney W. Grimes 			while (c == '*')
12558f0484fSRodney W. Grimes 				c = *++pattern;
12658f0484fSRodney W. Grimes 
1279d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
12858f0484fSRodney W. Grimes 			    (string == stringstart ||
12958f0484fSRodney W. Grimes 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
13058f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
13158f0484fSRodney W. Grimes 
13258f0484fSRodney W. Grimes 			/* Optimize for pattern with * at end or before /. */
13358f0484fSRodney W. Grimes 			if (c == EOS)
13458f0484fSRodney W. Grimes 				if (flags & FNM_PATHNAME)
135298c8e3dSJohn Polstra 					return ((flags & FNM_LEADING_DIR) ||
136298c8e3dSJohn Polstra 					    strchr(string, '/') == NULL ?
13758f0484fSRodney W. Grimes 					    0 : FNM_NOMATCH);
13858f0484fSRodney W. Grimes 				else
13958f0484fSRodney W. Grimes 					return (0);
14058f0484fSRodney W. Grimes 			else if (c == '/' && flags & FNM_PATHNAME) {
14158f0484fSRodney W. Grimes 				if ((string = strchr(string, '/')) == NULL)
14258f0484fSRodney W. Grimes 					return (FNM_NOMATCH);
14358f0484fSRodney W. Grimes 				break;
14458f0484fSRodney W. Grimes 			}
14558f0484fSRodney W. Grimes 
14658f0484fSRodney W. Grimes 			/* General case, use recursion. */
1479d88e270STim J. Robbins 			while (sc != EOS) {
148*139ac6b2SJilles Tjoelker 				if (!fnmatch1(pattern, string, stringstart,
149*139ac6b2SJilles Tjoelker 				    flags, patmbs, strmbs))
15058f0484fSRodney W. Grimes 					return (0);
1519d88e270STim J. Robbins 				sclen = mbrtowc(&sc, string, MB_LEN_MAX,
1529d88e270STim J. Robbins 				    &strmbs);
1539d88e270STim J. Robbins 				if (sclen == (size_t)-1 ||
1549d88e270STim J. Robbins 				    sclen == (size_t)-2) {
1559d88e270STim J. Robbins 					sc = (unsigned char)*string;
1569d88e270STim J. Robbins 					sclen = 1;
1579d88e270STim J. Robbins 					memset(&strmbs, 0, sizeof(strmbs));
1589d88e270STim J. Robbins 				}
1599d88e270STim J. Robbins 				if (sc == '/' && flags & FNM_PATHNAME)
16058f0484fSRodney W. Grimes 					break;
1619d88e270STim J. Robbins 				string += sclen;
16258f0484fSRodney W. Grimes 			}
16358f0484fSRodney W. Grimes 			return (FNM_NOMATCH);
16458f0484fSRodney W. Grimes 		case '[':
1659d88e270STim J. Robbins 			if (sc == EOS)
16658f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1679d88e270STim J. Robbins 			if (sc == '/' && (flags & FNM_PATHNAME))
16858f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1699d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
17005a068e6SAndrey A. Chernov 			    (string == stringstart ||
17105a068e6SAndrey A. Chernov 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
17205a068e6SAndrey A. Chernov 				return (FNM_NOMATCH);
17305a068e6SAndrey A. Chernov 
1749d88e270STim J. Robbins 			switch (rangematch(pattern, sc, flags, &newp,
1759d88e270STim J. Robbins 			    &patmbs)) {
176e2dbbd9eSAndrey A. Chernov 			case RANGE_ERROR:
177e2dbbd9eSAndrey A. Chernov 				goto norm;
178e2dbbd9eSAndrey A. Chernov 			case RANGE_MATCH:
179e2dbbd9eSAndrey A. Chernov 				pattern = newp;
180e2dbbd9eSAndrey A. Chernov 				break;
181e2dbbd9eSAndrey A. Chernov 			case RANGE_NOMATCH:
18258f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
183e2dbbd9eSAndrey A. Chernov 			}
1849d88e270STim J. Robbins 			string += sclen;
18558f0484fSRodney W. Grimes 			break;
18658f0484fSRodney W. Grimes 		case '\\':
18758f0484fSRodney W. Grimes 			if (!(flags & FNM_NOESCAPE)) {
1889d88e270STim J. Robbins 				pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
1899d88e270STim J. Robbins 				    &patmbs);
1909d88e270STim J. Robbins 				if (pclen == (size_t)-1 || pclen == (size_t)-2)
1919d88e270STim J. Robbins 					return (FNM_NOMATCH);
1929d88e270STim J. Robbins 				if (pclen == 0)
1939d88e270STim J. Robbins 					pc = '\\';
1949d88e270STim J. Robbins 				pattern += pclen;
19558f0484fSRodney W. Grimes 			}
19658f0484fSRodney W. Grimes 			/* FALLTHROUGH */
19758f0484fSRodney W. Grimes 		default:
198e2dbbd9eSAndrey A. Chernov 		norm:
1999d88e270STim J. Robbins 			if (pc == sc)
20095e4966cSWolfram Schneider 				;
2013deeb59dSAndrey A. Chernov 			else if ((flags & FNM_CASEFOLD) &&
2029d88e270STim J. Robbins 				 (towlower(pc) == towlower(sc)))
20395e4966cSWolfram Schneider 				;
20495e4966cSWolfram Schneider 			else
20558f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
2069d88e270STim J. Robbins 			string += sclen;
20758f0484fSRodney W. Grimes 			break;
20858f0484fSRodney W. Grimes 		}
2099d88e270STim J. Robbins 	}
21058f0484fSRodney W. Grimes 	/* NOTREACHED */
21158f0484fSRodney W. Grimes }
21258f0484fSRodney W. Grimes 
213e2dbbd9eSAndrey A. Chernov static int
2149d88e270STim J. Robbins rangematch(pattern, test, flags, newp, patmbs)
21558f0484fSRodney W. Grimes 	const char *pattern;
2169d88e270STim J. Robbins 	wchar_t test;
2173deeb59dSAndrey A. Chernov 	int flags;
218e2dbbd9eSAndrey A. Chernov 	char **newp;
2199d88e270STim J. Robbins 	mbstate_t *patmbs;
22058f0484fSRodney W. Grimes {
221e728d480SAndrey A. Chernov 	int negate, ok;
2229d88e270STim J. Robbins 	wchar_t c, c2;
2239d88e270STim J. Robbins 	size_t pclen;
2249d88e270STim J. Robbins 	const char *origpat;
22558f0484fSRodney W. Grimes 
22658f0484fSRodney W. Grimes 	/*
22758f0484fSRodney W. Grimes 	 * A bracket expression starting with an unquoted circumflex
22858f0484fSRodney W. Grimes 	 * character produces unspecified results (IEEE 1003.2-1992,
22958f0484fSRodney W. Grimes 	 * 3.13.2).  This implementation treats it like '!', for
23058f0484fSRodney W. Grimes 	 * consistency with the regular expression syntax.
23158f0484fSRodney W. Grimes 	 * J.T. Conklin (conklin@ngai.kaleida.com)
23258f0484fSRodney W. Grimes 	 */
23305a068e6SAndrey A. Chernov 	if ( (negate = (*pattern == '!' || *pattern == '^')) )
23458f0484fSRodney W. Grimes 		++pattern;
23558f0484fSRodney W. Grimes 
2363deeb59dSAndrey A. Chernov 	if (flags & FNM_CASEFOLD)
2379d88e270STim J. Robbins 		test = towlower(test);
23895e4966cSWolfram Schneider 
239e2dbbd9eSAndrey A. Chernov 	/*
240e2dbbd9eSAndrey A. Chernov 	 * A right bracket shall lose its special meaning and represent
241e2dbbd9eSAndrey A. Chernov 	 * itself in a bracket expression if it occurs first in the list.
242e2dbbd9eSAndrey A. Chernov 	 * -- POSIX.2 2.8.3.2
243e2dbbd9eSAndrey A. Chernov 	 */
244e728d480SAndrey A. Chernov 	ok = 0;
2459d88e270STim J. Robbins 	origpat = pattern;
2469d88e270STim J. Robbins 	for (;;) {
2479d88e270STim J. Robbins 		if (*pattern == ']' && pattern > origpat) {
2489d88e270STim J. Robbins 			pattern++;
2499d88e270STim J. Robbins 			break;
2509d88e270STim J. Robbins 		} else if (*pattern == '\0') {
251e2dbbd9eSAndrey A. Chernov 			return (RANGE_ERROR);
2529d88e270STim J. Robbins 		} else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
25305a068e6SAndrey A. Chernov 			return (RANGE_NOMATCH);
2549d88e270STim J. Robbins 		} else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
2559d88e270STim J. Robbins 			pattern++;
2569d88e270STim J. Robbins 		pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
2579d88e270STim J. Robbins 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
2589d88e270STim J. Robbins 			return (RANGE_NOMATCH);
2599d88e270STim J. Robbins 		pattern += pclen;
26005a068e6SAndrey A. Chernov 
2613deeb59dSAndrey A. Chernov 		if (flags & FNM_CASEFOLD)
2629d88e270STim J. Robbins 			c = towlower(c);
26395e4966cSWolfram Schneider 
2649d88e270STim J. Robbins 		if (*pattern == '-' && *(pattern + 1) != EOS &&
2659d88e270STim J. Robbins 		    *(pattern + 1) != ']') {
2669d88e270STim J. Robbins 			if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
2679d88e270STim J. Robbins 				if (*pattern != EOS)
2689d88e270STim J. Robbins 					pattern++;
2699d88e270STim J. Robbins 			pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
2709d88e270STim J. Robbins 			if (pclen == (size_t)-1 || pclen == (size_t)-2)
2719d88e270STim J. Robbins 				return (RANGE_NOMATCH);
2729d88e270STim J. Robbins 			pattern += pclen;
27358f0484fSRodney W. Grimes 			if (c2 == EOS)
274e2dbbd9eSAndrey A. Chernov 				return (RANGE_ERROR);
27595e4966cSWolfram Schneider 
2763deeb59dSAndrey A. Chernov 			if (flags & FNM_CASEFOLD)
2779d88e270STim J. Robbins 				c2 = towlower(c2);
27895e4966cSWolfram Schneider 
2796a575f6eSAndrey A. Chernov 			if (__collate_load_error ?
2806a575f6eSAndrey A. Chernov 			    c <= test && test <= c2 :
2816a575f6eSAndrey A. Chernov 			       __collate_range_cmp(c, test) <= 0
282edcfa072SAndrey A. Chernov 			    && __collate_range_cmp(test, c2) <= 0
28336a00a4bSAndrey A. Chernov 			   )
28458f0484fSRodney W. Grimes 				ok = 1;
28558f0484fSRodney W. Grimes 		} else if (c == test)
28658f0484fSRodney W. Grimes 			ok = 1;
2879d88e270STim J. Robbins 	}
288e728d480SAndrey A. Chernov 
289e2dbbd9eSAndrey A. Chernov 	*newp = (char *)pattern;
290e2dbbd9eSAndrey A. Chernov 	return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
29158f0484fSRodney W. Grimes }
292