xref: /freebsd/lib/libc/gen/fnmatch.c (revision 5b5fa75acff11d871d0c90045f8c1a58fed85365)
18a16b7a1SPedro F. Giffuni /*-
28a16b7a1SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
38a16b7a1SPedro F. Giffuni  *
458f0484fSRodney W. Grimes  * Copyright (c) 1989, 1993, 1994
558f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
658f0484fSRodney W. Grimes  *
758f0484fSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
858f0484fSRodney W. Grimes  * Guido van Rossum.
958f0484fSRodney W. Grimes  *
103c87aa1dSDavid Chisnall  * Copyright (c) 2011 The FreeBSD Foundation
11*5b5fa75aSEd Maste  *
123c87aa1dSDavid Chisnall  * Portions of this software were developed by David Chisnall
133c87aa1dSDavid Chisnall  * under sponsorship from the FreeBSD Foundation.
143c87aa1dSDavid Chisnall  *
1558f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
1658f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1758f0484fSRodney W. Grimes  * are met:
1858f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1958f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
2058f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
2158f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
2258f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
23fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
2458f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
2558f0484fSRodney W. Grimes  *    without specific prior written permission.
2658f0484fSRodney W. Grimes  *
2758f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2858f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2958f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3058f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3158f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3258f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3358f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3458f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3558f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3658f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3758f0484fSRodney W. Grimes  * SUCH DAMAGE.
3858f0484fSRodney W. Grimes  */
3958f0484fSRodney W. Grimes 
40b231cb39SDavid E. O'Brien #include <sys/cdefs.h>
41c1920558SJohn Baldwin __SCCSID("@(#)fnmatch.c	8.2 (Berkeley) 4/16/94");
42b231cb39SDavid E. O'Brien __FBSDID("$FreeBSD$");
4358f0484fSRodney W. Grimes 
4458f0484fSRodney W. Grimes /*
4558f0484fSRodney W. Grimes  * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
4658f0484fSRodney W. Grimes  * Compares a filename or pathname to a pattern.
4758f0484fSRodney W. Grimes  */
4858f0484fSRodney W. Grimes 
499d88e270STim J. Robbins /*
509d88e270STim J. Robbins  * Some notes on multibyte character support:
519d88e270STim J. Robbins  * 1. Patterns with illegal byte sequences match nothing.
529d88e270STim J. Robbins  * 2. Illegal byte sequences in the "string" argument are handled by treating
539d88e270STim J. Robbins  *    them as single-byte characters with a value of the first byte of the
549d88e270STim J. Robbins  *    sequence cast to wchar_t.
559d88e270STim J. Robbins  * 3. Multibyte conversion state objects (mbstate_t) are passed around and
569d88e270STim J. Robbins  *    used for most, but not all, conversions. Further work will be required
579d88e270STim J. Robbins  *    to support state-dependent encodings.
589d88e270STim J. Robbins  */
599d88e270STim J. Robbins 
6058f0484fSRodney W. Grimes #include <fnmatch.h>
619d88e270STim J. Robbins #include <limits.h>
6258f0484fSRodney W. Grimes #include <string.h>
639d88e270STim J. Robbins #include <wchar.h>
649d88e270STim J. Robbins #include <wctype.h>
6558f0484fSRodney W. Grimes 
661daad8f5SAndrey A. Chernov #include "collate.h"
671daad8f5SAndrey A. Chernov 
6858f0484fSRodney W. Grimes #define	EOS	'\0'
6958f0484fSRodney W. Grimes 
70e2dbbd9eSAndrey A. Chernov #define RANGE_MATCH     1
71e2dbbd9eSAndrey A. Chernov #define RANGE_NOMATCH   0
72e2dbbd9eSAndrey A. Chernov #define RANGE_ERROR     (-1)
73e2dbbd9eSAndrey A. Chernov 
749d88e270STim J. Robbins static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
75139ac6b2SJilles Tjoelker static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
76139ac6b2SJilles Tjoelker 		mbstate_t);
7758f0484fSRodney W. Grimes 
7858f0484fSRodney W. Grimes int
7955b6b759SCraig Rodrigues fnmatch(const char *pattern, const char *string, int flags)
8058f0484fSRodney W. Grimes {
819d88e270STim J. Robbins 	static const mbstate_t initial;
829d88e270STim J. Robbins 
83139ac6b2SJilles Tjoelker 	return (fnmatch1(pattern, string, string, flags, initial, initial));
849d88e270STim J. Robbins }
859d88e270STim J. Robbins 
869d88e270STim J. Robbins static int
8755b6b759SCraig Rodrigues fnmatch1(const char *pattern, const char *string, const char *stringstart,
8855b6b759SCraig Rodrigues     int flags, mbstate_t patmbs, mbstate_t strmbs)
899d88e270STim J. Robbins {
903caeab9dSJilles Tjoelker 	const char *bt_pattern, *bt_string;
913caeab9dSJilles Tjoelker 	mbstate_t bt_patmbs, bt_strmbs;
92e2dbbd9eSAndrey A. Chernov 	char *newp;
939d88e270STim J. Robbins 	char c;
949d88e270STim J. Robbins 	wchar_t pc, sc;
959d88e270STim J. Robbins 	size_t pclen, sclen;
9658f0484fSRodney W. Grimes 
973caeab9dSJilles Tjoelker 	bt_pattern = bt_string = NULL;
98139ac6b2SJilles Tjoelker 	for (;;) {
999d88e270STim J. Robbins 		pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
1009d88e270STim J. Robbins 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
1019d88e270STim J. Robbins 			return (FNM_NOMATCH);
1029d88e270STim J. Robbins 		pattern += pclen;
1039d88e270STim J. Robbins 		sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
1049d88e270STim J. Robbins 		if (sclen == (size_t)-1 || sclen == (size_t)-2) {
1059d88e270STim J. Robbins 			sc = (unsigned char)*string;
1069d88e270STim J. Robbins 			sclen = 1;
1079d88e270STim J. Robbins 			memset(&strmbs, 0, sizeof(strmbs));
1089d88e270STim J. Robbins 		}
1099d88e270STim J. Robbins 		switch (pc) {
11058f0484fSRodney W. Grimes 		case EOS:
1119d88e270STim J. Robbins 			if ((flags & FNM_LEADING_DIR) && sc == '/')
1123deeb59dSAndrey A. Chernov 				return (0);
1133caeab9dSJilles Tjoelker 			if (sc == EOS)
1143caeab9dSJilles Tjoelker 				return (0);
1153caeab9dSJilles Tjoelker 			goto backtrack;
11658f0484fSRodney W. Grimes 		case '?':
1179d88e270STim J. Robbins 			if (sc == EOS)
11858f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1199d88e270STim J. Robbins 			if (sc == '/' && (flags & FNM_PATHNAME))
1203caeab9dSJilles Tjoelker 				goto backtrack;
1219d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
12258f0484fSRodney W. Grimes 			    (string == stringstart ||
12358f0484fSRodney W. Grimes 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
1243caeab9dSJilles Tjoelker 				goto backtrack;
1259d88e270STim J. Robbins 			string += sclen;
12658f0484fSRodney W. Grimes 			break;
12758f0484fSRodney W. Grimes 		case '*':
12858f0484fSRodney W. Grimes 			c = *pattern;
12958f0484fSRodney W. Grimes 			/* Collapse multiple stars. */
13058f0484fSRodney W. Grimes 			while (c == '*')
13158f0484fSRodney W. Grimes 				c = *++pattern;
13258f0484fSRodney W. Grimes 
1339d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
13458f0484fSRodney W. Grimes 			    (string == stringstart ||
13558f0484fSRodney W. Grimes 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
1363caeab9dSJilles Tjoelker 				goto backtrack;
13758f0484fSRodney W. Grimes 
13858f0484fSRodney W. Grimes 			/* Optimize for pattern with * at end or before /. */
13958f0484fSRodney W. Grimes 			if (c == EOS)
14058f0484fSRodney W. Grimes 				if (flags & FNM_PATHNAME)
141298c8e3dSJohn Polstra 					return ((flags & FNM_LEADING_DIR) ||
142298c8e3dSJohn Polstra 					    strchr(string, '/') == NULL ?
14358f0484fSRodney W. Grimes 					    0 : FNM_NOMATCH);
14458f0484fSRodney W. Grimes 				else
14558f0484fSRodney W. Grimes 					return (0);
14658f0484fSRodney W. Grimes 			else if (c == '/' && flags & FNM_PATHNAME) {
14758f0484fSRodney W. Grimes 				if ((string = strchr(string, '/')) == NULL)
14858f0484fSRodney W. Grimes 					return (FNM_NOMATCH);
14958f0484fSRodney W. Grimes 				break;
15058f0484fSRodney W. Grimes 			}
15158f0484fSRodney W. Grimes 
1523caeab9dSJilles Tjoelker 			/*
1533caeab9dSJilles Tjoelker 			 * First try the shortest match for the '*' that
1543caeab9dSJilles Tjoelker 			 * could work. We can forget any earlier '*' since
1553caeab9dSJilles Tjoelker 			 * there is no way having it match more characters
1563caeab9dSJilles Tjoelker 			 * can help us, given that we are already here.
1573caeab9dSJilles Tjoelker 			 */
1583caeab9dSJilles Tjoelker 			bt_pattern = pattern, bt_patmbs = patmbs;
1593caeab9dSJilles Tjoelker 			bt_string = string, bt_strmbs = strmbs;
16058f0484fSRodney W. Grimes 			break;
16158f0484fSRodney W. Grimes 		case '[':
1629d88e270STim J. Robbins 			if (sc == EOS)
16358f0484fSRodney W. Grimes 				return (FNM_NOMATCH);
1649d88e270STim J. Robbins 			if (sc == '/' && (flags & FNM_PATHNAME))
1653caeab9dSJilles Tjoelker 				goto backtrack;
1669d88e270STim J. Robbins 			if (sc == '.' && (flags & FNM_PERIOD) &&
16705a068e6SAndrey A. Chernov 			    (string == stringstart ||
16805a068e6SAndrey A. Chernov 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
1693caeab9dSJilles Tjoelker 				goto backtrack;
17005a068e6SAndrey A. Chernov 
1719d88e270STim J. Robbins 			switch (rangematch(pattern, sc, flags, &newp,
1729d88e270STim J. Robbins 			    &patmbs)) {
173e2dbbd9eSAndrey A. Chernov 			case RANGE_ERROR:
174e2dbbd9eSAndrey A. Chernov 				goto norm;
175e2dbbd9eSAndrey A. Chernov 			case RANGE_MATCH:
176e2dbbd9eSAndrey A. Chernov 				pattern = newp;
177e2dbbd9eSAndrey A. Chernov 				break;
178e2dbbd9eSAndrey A. Chernov 			case RANGE_NOMATCH:
1793caeab9dSJilles Tjoelker 				goto backtrack;
180e2dbbd9eSAndrey A. Chernov 			}
1819d88e270STim J. Robbins 			string += sclen;
18258f0484fSRodney W. Grimes 			break;
18358f0484fSRodney W. Grimes 		case '\\':
18458f0484fSRodney W. Grimes 			if (!(flags & FNM_NOESCAPE)) {
1859d88e270STim J. Robbins 				pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
1869d88e270STim J. Robbins 				    &patmbs);
187303cbb93SPedro F. Giffuni 				if (pclen == 0 || pclen == (size_t)-1 ||
188303cbb93SPedro F. Giffuni 				    pclen == (size_t)-2)
1899d88e270STim J. Robbins 					return (FNM_NOMATCH);
1909d88e270STim J. Robbins 				pattern += pclen;
19158f0484fSRodney W. Grimes 			}
19258f0484fSRodney W. Grimes 			/* FALLTHROUGH */
19358f0484fSRodney W. Grimes 		default:
194e2dbbd9eSAndrey A. Chernov 		norm:
1953caeab9dSJilles Tjoelker 			string += sclen;
1969d88e270STim J. Robbins 			if (pc == sc)
19795e4966cSWolfram Schneider 				;
1983deeb59dSAndrey A. Chernov 			else if ((flags & FNM_CASEFOLD) &&
1999d88e270STim J. Robbins 				 (towlower(pc) == towlower(sc)))
20095e4966cSWolfram Schneider 				;
2013caeab9dSJilles Tjoelker 			else {
2023caeab9dSJilles Tjoelker 		backtrack:
2033caeab9dSJilles Tjoelker 				/*
2043caeab9dSJilles Tjoelker 				 * If we have a mismatch (other than hitting
2053caeab9dSJilles Tjoelker 				 * the end of the string), go back to the last
2063caeab9dSJilles Tjoelker 				 * '*' seen and have it match one additional
2073caeab9dSJilles Tjoelker 				 * character.
2083caeab9dSJilles Tjoelker 				 */
2093caeab9dSJilles Tjoelker 				if (bt_pattern == NULL)
21058f0484fSRodney W. Grimes 					return (FNM_NOMATCH);
2113caeab9dSJilles Tjoelker 				sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX,
2123caeab9dSJilles Tjoelker 				    &bt_strmbs);
2133caeab9dSJilles Tjoelker 				if (sclen == (size_t)-1 ||
2143caeab9dSJilles Tjoelker 				    sclen == (size_t)-2) {
2153caeab9dSJilles Tjoelker 					sc = (unsigned char)*bt_string;
2163caeab9dSJilles Tjoelker 					sclen = 1;
2173caeab9dSJilles Tjoelker 					memset(&bt_strmbs, 0,
2183caeab9dSJilles Tjoelker 					    sizeof(bt_strmbs));
2193caeab9dSJilles Tjoelker 				}
2203caeab9dSJilles Tjoelker 				if (sc == EOS)
2213caeab9dSJilles Tjoelker 					return (FNM_NOMATCH);
2223caeab9dSJilles Tjoelker 				if (sc == '/' && flags & FNM_PATHNAME)
2233caeab9dSJilles Tjoelker 					return (FNM_NOMATCH);
2243caeab9dSJilles Tjoelker 				bt_string += sclen;
2253caeab9dSJilles Tjoelker 				pattern = bt_pattern, patmbs = bt_patmbs;
2263caeab9dSJilles Tjoelker 				string = bt_string, strmbs = bt_strmbs;
2273caeab9dSJilles Tjoelker 			}
22858f0484fSRodney W. Grimes 			break;
22958f0484fSRodney W. Grimes 		}
2309d88e270STim J. Robbins 	}
23158f0484fSRodney W. Grimes 	/* NOTREACHED */
23258f0484fSRodney W. Grimes }
23358f0484fSRodney W. Grimes 
234e2dbbd9eSAndrey A. Chernov static int
23555b6b759SCraig Rodrigues rangematch(const char *pattern, wchar_t test, int flags, char **newp,
23655b6b759SCraig Rodrigues     mbstate_t *patmbs)
23758f0484fSRodney W. Grimes {
238e728d480SAndrey A. Chernov 	int negate, ok;
2399d88e270STim J. Robbins 	wchar_t c, c2;
2409d88e270STim J. Robbins 	size_t pclen;
2419d88e270STim J. Robbins 	const char *origpat;
2421daad8f5SAndrey A. Chernov 	struct xlocale_collate *table =
2431daad8f5SAndrey A. Chernov 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
24458f0484fSRodney W. Grimes 
24558f0484fSRodney W. Grimes 	/*
24658f0484fSRodney W. Grimes 	 * A bracket expression starting with an unquoted circumflex
24758f0484fSRodney W. Grimes 	 * character produces unspecified results (IEEE 1003.2-1992,
24858f0484fSRodney W. Grimes 	 * 3.13.2).  This implementation treats it like '!', for
24958f0484fSRodney W. Grimes 	 * consistency with the regular expression syntax.
25058f0484fSRodney W. Grimes 	 * J.T. Conklin (conklin@ngai.kaleida.com)
25158f0484fSRodney W. Grimes 	 */
25205a068e6SAndrey A. Chernov 	if ((negate = (*pattern == '!' || *pattern == '^')))
25358f0484fSRodney W. Grimes 		++pattern;
25458f0484fSRodney W. Grimes 
2553deeb59dSAndrey A. Chernov 	if (flags & FNM_CASEFOLD)
2569d88e270STim J. Robbins 		test = towlower(test);
25795e4966cSWolfram Schneider 
258e2dbbd9eSAndrey A. Chernov 	/*
259e2dbbd9eSAndrey A. Chernov 	 * A right bracket shall lose its special meaning and represent
260e2dbbd9eSAndrey A. Chernov 	 * itself in a bracket expression if it occurs first in the list.
261e2dbbd9eSAndrey A. Chernov 	 * -- POSIX.2 2.8.3.2
262e2dbbd9eSAndrey A. Chernov 	 */
263e728d480SAndrey A. Chernov 	ok = 0;
2649d88e270STim J. Robbins 	origpat = pattern;
2659d88e270STim J. Robbins 	for (;;) {
2669d88e270STim J. Robbins 		if (*pattern == ']' && pattern > origpat) {
2679d88e270STim J. Robbins 			pattern++;
2689d88e270STim J. Robbins 			break;
2699d88e270STim J. Robbins 		} else if (*pattern == '\0') {
270e2dbbd9eSAndrey A. Chernov 			return (RANGE_ERROR);
2719d88e270STim J. Robbins 		} else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
27205a068e6SAndrey A. Chernov 			return (RANGE_NOMATCH);
2739d88e270STim J. Robbins 		} else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
2749d88e270STim J. Robbins 			pattern++;
2759d88e270STim J. Robbins 		pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
2769d88e270STim J. Robbins 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
2779d88e270STim J. Robbins 			return (RANGE_NOMATCH);
2789d88e270STim J. Robbins 		pattern += pclen;
27905a068e6SAndrey A. Chernov 
2803deeb59dSAndrey A. Chernov 		if (flags & FNM_CASEFOLD)
2819d88e270STim J. Robbins 			c = towlower(c);
28295e4966cSWolfram Schneider 
2839d88e270STim J. Robbins 		if (*pattern == '-' && *(pattern + 1) != EOS &&
2849d88e270STim J. Robbins 		    *(pattern + 1) != ']') {
2859d88e270STim J. Robbins 			if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
2869d88e270STim J. Robbins 				if (*pattern != EOS)
2879d88e270STim J. Robbins 					pattern++;
2889d88e270STim J. Robbins 			pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
2899d88e270STim J. Robbins 			if (pclen == (size_t)-1 || pclen == (size_t)-2)
2909d88e270STim J. Robbins 				return (RANGE_NOMATCH);
2919d88e270STim J. Robbins 			pattern += pclen;
29258f0484fSRodney W. Grimes 			if (c2 == EOS)
293e2dbbd9eSAndrey A. Chernov 				return (RANGE_ERROR);
29495e4966cSWolfram Schneider 
2953deeb59dSAndrey A. Chernov 			if (flags & FNM_CASEFOLD)
2969d88e270STim J. Robbins 				c2 = towlower(c2);
29795e4966cSWolfram Schneider 
2981daad8f5SAndrey A. Chernov 			if (table->__collate_load_error ?
2991daad8f5SAndrey A. Chernov 			    c <= test && test <= c2 :
30012eae8c8SAndrey A. Chernov 			       __wcollate_range_cmp(c, test) <= 0
30112eae8c8SAndrey A. Chernov 			    && __wcollate_range_cmp(test, c2) <= 0
3021daad8f5SAndrey A. Chernov 			   )
30358f0484fSRodney W. Grimes 				ok = 1;
30458f0484fSRodney W. Grimes 		} else if (c == test)
30558f0484fSRodney W. Grimes 			ok = 1;
3069d88e270STim J. Robbins 	}
307e728d480SAndrey A. Chernov 
308e2dbbd9eSAndrey A. Chernov 	*newp = (char *)pattern;
309e2dbbd9eSAndrey A. Chernov 	return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
31058f0484fSRodney W. Grimes }
311