14297a3b0SGarrett D'Amore /* 24297a3b0SGarrett D'Amore * Copyright (c) 1989, 1993, 1994 34297a3b0SGarrett D'Amore * The Regents of the University of California. All rights reserved. 44297a3b0SGarrett D'Amore * 54297a3b0SGarrett D'Amore * This code is derived from software contributed to Berkeley by 64297a3b0SGarrett D'Amore * Guido van Rossum. 74297a3b0SGarrett D'Amore * 84297a3b0SGarrett D'Amore * Redistribution and use in source and binary forms, with or without 94297a3b0SGarrett D'Amore * modification, are permitted provided that the following conditions 104297a3b0SGarrett D'Amore * are met: 114297a3b0SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright 124297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer. 134297a3b0SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright 144297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the 154297a3b0SGarrett D'Amore * documentation and/or other materials provided with the distribution. 164297a3b0SGarrett D'Amore * 4. Neither the name of the University nor the names of its contributors 174297a3b0SGarrett D'Amore * may be used to endorse or promote products derived from this software 184297a3b0SGarrett D'Amore * without specific prior written permission. 194297a3b0SGarrett D'Amore * 204297a3b0SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 214297a3b0SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 224297a3b0SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 234297a3b0SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 244297a3b0SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 254297a3b0SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 264297a3b0SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 274297a3b0SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 284297a3b0SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 294297a3b0SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 304297a3b0SGarrett D'Amore * SUCH DAMAGE. 314297a3b0SGarrett D'Amore */ 324297a3b0SGarrett D'Amore 334297a3b0SGarrett D'Amore /* 34*2d08521bSGarrett D'Amore * Copyright 2013 Garrett D'Amore <garrett@damore.org> 354297a3b0SGarrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 364297a3b0SGarrett D'Amore * Use is subject to license terms. 374297a3b0SGarrett D'Amore */ 384297a3b0SGarrett D'Amore 394297a3b0SGarrett D'Amore /* 404297a3b0SGarrett D'Amore * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. 414297a3b0SGarrett D'Amore * Compares a filename or pathname to a pattern. 424297a3b0SGarrett D'Amore */ 434297a3b0SGarrett D'Amore 444297a3b0SGarrett D'Amore /* 454297a3b0SGarrett D'Amore * Some notes on multibyte character support: 464297a3b0SGarrett D'Amore * 1. Patterns with illegal byte sequences match nothing. 474297a3b0SGarrett D'Amore * 2. Illegal byte sequences in the "string" argument are handled by treating 484297a3b0SGarrett D'Amore * them as single-byte characters with a value of the first byte of the 494297a3b0SGarrett D'Amore * sequence cast to wchar_t. 504297a3b0SGarrett D'Amore * 3. Multibyte conversion state objects (mbstate_t) are passed around and 514297a3b0SGarrett D'Amore * used for most, but not all, conversions. Further work will be required 524297a3b0SGarrett D'Amore * to support state-dependent encodings. 534297a3b0SGarrett D'Amore */ 544297a3b0SGarrett D'Amore 554297a3b0SGarrett D'Amore #include "lint.h" 564297a3b0SGarrett D'Amore #include <fnmatch.h> 574297a3b0SGarrett D'Amore #include <limits.h> 584297a3b0SGarrett D'Amore #include <string.h> 594297a3b0SGarrett D'Amore #include <wchar.h> 60*2d08521bSGarrett D'Amore #include <xlocale.h> 614297a3b0SGarrett D'Amore #include <wctype.h> 62*2d08521bSGarrett D'Amore #include "localeimpl.h" 634297a3b0SGarrett D'Amore #include "collate.h" 644297a3b0SGarrett D'Amore 654297a3b0SGarrett D'Amore #define EOS '\0' 664297a3b0SGarrett D'Amore 674297a3b0SGarrett D'Amore #define RANGE_MATCH 1 684297a3b0SGarrett D'Amore #define RANGE_NOMATCH 0 694297a3b0SGarrett D'Amore #define RANGE_ERROR (-1) 704297a3b0SGarrett D'Amore 71*2d08521bSGarrett D'Amore static int rangematch(const char *, wchar_t, int, char **, mbstate_t *, 72*2d08521bSGarrett D'Amore locale_t); 734297a3b0SGarrett D'Amore static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, 74*2d08521bSGarrett D'Amore mbstate_t, locale_t); 754297a3b0SGarrett D'Amore 764297a3b0SGarrett D'Amore int 774297a3b0SGarrett D'Amore fnmatch(pattern, string, flags) 784297a3b0SGarrett D'Amore const char *pattern, *string; 794297a3b0SGarrett D'Amore int flags; 804297a3b0SGarrett D'Amore { 81*2d08521bSGarrett D'Amore locale_t loc = uselocale(NULL); 824297a3b0SGarrett D'Amore static const mbstate_t initial = { 0 }; 834297a3b0SGarrett D'Amore 84*2d08521bSGarrett D'Amore return (fnmatch1(pattern, string, string, flags, initial, initial, 85*2d08521bSGarrett D'Amore loc)); 864297a3b0SGarrett D'Amore } 874297a3b0SGarrett D'Amore 884297a3b0SGarrett D'Amore static int 894297a3b0SGarrett D'Amore fnmatch1(const char *pattern, const char *string, const char *stringstart, 90*2d08521bSGarrett D'Amore int flags, mbstate_t patmbs, mbstate_t strmbs, locale_t loc) 914297a3b0SGarrett D'Amore { 924297a3b0SGarrett D'Amore char *newp; 934297a3b0SGarrett D'Amore char c; 944297a3b0SGarrett D'Amore wchar_t pc, sc; 954297a3b0SGarrett D'Amore size_t pclen, sclen; 964297a3b0SGarrett D'Amore 974297a3b0SGarrett D'Amore for (;;) { 98*2d08521bSGarrett D'Amore pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc); 994297a3b0SGarrett D'Amore if (pclen == (size_t)-1 || pclen == (size_t)-2) 1004297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1014297a3b0SGarrett D'Amore pattern += pclen; 102*2d08521bSGarrett D'Amore sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, &strmbs, loc); 1034297a3b0SGarrett D'Amore if (sclen == (size_t)-1 || sclen == (size_t)-2) { 1044297a3b0SGarrett D'Amore sc = (unsigned char)*string; 1054297a3b0SGarrett D'Amore sclen = 1; 1064297a3b0SGarrett D'Amore (void) memset(&strmbs, 0, sizeof (strmbs)); 1074297a3b0SGarrett D'Amore } 1084297a3b0SGarrett D'Amore switch (pc) { 1094297a3b0SGarrett D'Amore case EOS: 1104297a3b0SGarrett D'Amore /* 1114297a3b0SGarrett D'Amore * Removed FNM_LEADING_DIR, as it is not present 1124297a3b0SGarrett D'Amore * on Solaris. 1134297a3b0SGarrett D'Amore */ 1144297a3b0SGarrett D'Amore return (sc == EOS ? 0 : FNM_NOMATCH); 1154297a3b0SGarrett D'Amore case '?': 1164297a3b0SGarrett D'Amore if (sc == EOS) 1174297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1184297a3b0SGarrett D'Amore if (sc == '/' && (flags & FNM_PATHNAME)) 1194297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1204297a3b0SGarrett D'Amore if (sc == '.' && (flags & FNM_PERIOD) && 1214297a3b0SGarrett D'Amore (string == stringstart || 1224297a3b0SGarrett D'Amore ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 1234297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1244297a3b0SGarrett D'Amore string += sclen; 1254297a3b0SGarrett D'Amore break; 1264297a3b0SGarrett D'Amore case '*': 1274297a3b0SGarrett D'Amore c = *pattern; 1284297a3b0SGarrett D'Amore /* Collapse multiple stars. */ 1294297a3b0SGarrett D'Amore while (c == '*') 1304297a3b0SGarrett D'Amore c = *++pattern; 1314297a3b0SGarrett D'Amore 1324297a3b0SGarrett D'Amore if (sc == '.' && (flags & FNM_PERIOD) && 1334297a3b0SGarrett D'Amore (string == stringstart || 1344297a3b0SGarrett D'Amore ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 1354297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1364297a3b0SGarrett D'Amore 1374297a3b0SGarrett D'Amore /* Optimize for pattern with * at end or before /. */ 1384297a3b0SGarrett D'Amore if (c == EOS) 1394297a3b0SGarrett D'Amore if (flags & FNM_PATHNAME) 1404297a3b0SGarrett D'Amore return (strchr(string, '/') == NULL ? 1414297a3b0SGarrett D'Amore 0 : FNM_NOMATCH); 1424297a3b0SGarrett D'Amore else 1434297a3b0SGarrett D'Amore return (0); 1444297a3b0SGarrett D'Amore else if (c == '/' && flags & FNM_PATHNAME) { 1454297a3b0SGarrett D'Amore if ((string = strchr(string, '/')) == NULL) 1464297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1474297a3b0SGarrett D'Amore break; 1484297a3b0SGarrett D'Amore } 1494297a3b0SGarrett D'Amore 1504297a3b0SGarrett D'Amore /* General case, use recursion. */ 1514297a3b0SGarrett D'Amore while (sc != EOS) { 1524297a3b0SGarrett D'Amore if (!fnmatch1(pattern, string, stringstart, 153*2d08521bSGarrett D'Amore flags, patmbs, strmbs, loc)) 1544297a3b0SGarrett D'Amore return (0); 155*2d08521bSGarrett D'Amore sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, 156*2d08521bSGarrett D'Amore &strmbs, loc); 1574297a3b0SGarrett D'Amore if (sclen == (size_t)-1 || 1584297a3b0SGarrett D'Amore sclen == (size_t)-2) { 1594297a3b0SGarrett D'Amore sc = (unsigned char)*string; 1604297a3b0SGarrett D'Amore sclen = 1; 1614297a3b0SGarrett D'Amore (void) memset(&strmbs, 0, 1624297a3b0SGarrett D'Amore sizeof (strmbs)); 1634297a3b0SGarrett D'Amore } 1644297a3b0SGarrett D'Amore if (sc == '/' && flags & FNM_PATHNAME) 1654297a3b0SGarrett D'Amore break; 1664297a3b0SGarrett D'Amore string += sclen; 1674297a3b0SGarrett D'Amore } 1684297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1694297a3b0SGarrett D'Amore case '[': 1704297a3b0SGarrett D'Amore if (sc == EOS) 1714297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1724297a3b0SGarrett D'Amore if (sc == '/' && (flags & FNM_PATHNAME)) 1734297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1744297a3b0SGarrett D'Amore if (sc == '.' && (flags & FNM_PERIOD) && 1754297a3b0SGarrett D'Amore (string == stringstart || 1764297a3b0SGarrett D'Amore ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 1774297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1784297a3b0SGarrett D'Amore 1794297a3b0SGarrett D'Amore switch (rangematch(pattern, sc, flags, &newp, 180*2d08521bSGarrett D'Amore &patmbs, loc)) { 1814297a3b0SGarrett D'Amore case RANGE_ERROR: 1824297a3b0SGarrett D'Amore goto norm; 1834297a3b0SGarrett D'Amore case RANGE_MATCH: 1844297a3b0SGarrett D'Amore pattern = newp; 1854297a3b0SGarrett D'Amore break; 1864297a3b0SGarrett D'Amore case RANGE_NOMATCH: 1874297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1884297a3b0SGarrett D'Amore } 1894297a3b0SGarrett D'Amore string += sclen; 1904297a3b0SGarrett D'Amore break; 1914297a3b0SGarrett D'Amore case '\\': 1924297a3b0SGarrett D'Amore if (!(flags & FNM_NOESCAPE)) { 193*2d08521bSGarrett D'Amore pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, 194*2d08521bSGarrett D'Amore &patmbs, loc); 1954297a3b0SGarrett D'Amore if (pclen == (size_t)-1 || pclen == (size_t)-2) 1964297a3b0SGarrett D'Amore return (FNM_NOMATCH); 1974297a3b0SGarrett D'Amore if (pclen == 0) 1984297a3b0SGarrett D'Amore pc = '\\'; 1994297a3b0SGarrett D'Amore pattern += pclen; 2004297a3b0SGarrett D'Amore } 2014297a3b0SGarrett D'Amore /* FALLTHROUGH */ 2024297a3b0SGarrett D'Amore default: 2034297a3b0SGarrett D'Amore norm: 2044297a3b0SGarrett D'Amore if (pc == sc) 2054297a3b0SGarrett D'Amore string += sclen; 2064297a3b0SGarrett D'Amore 2074297a3b0SGarrett D'Amore else if ((flags & FNM_IGNORECASE) && 208*2d08521bSGarrett D'Amore (towlower_l(pc, loc) == towlower_l(sc, loc))) 2094297a3b0SGarrett D'Amore string += sclen; 2104297a3b0SGarrett D'Amore else 2114297a3b0SGarrett D'Amore return (FNM_NOMATCH); 2124297a3b0SGarrett D'Amore 2134297a3b0SGarrett D'Amore break; 2144297a3b0SGarrett D'Amore } 2154297a3b0SGarrett D'Amore } 2164297a3b0SGarrett D'Amore /* NOTREACHED */ 2174297a3b0SGarrett D'Amore } 2184297a3b0SGarrett D'Amore 2194297a3b0SGarrett D'Amore static int 220*2d08521bSGarrett D'Amore rangematch(const char *pattern, wchar_t test, int flags, char **newp, 221*2d08521bSGarrett D'Amore mbstate_t *patmbs, locale_t loc) 2224297a3b0SGarrett D'Amore { 2234297a3b0SGarrett D'Amore int negate, ok; 2244297a3b0SGarrett D'Amore wchar_t c, c2; 2254297a3b0SGarrett D'Amore size_t pclen; 2264297a3b0SGarrett D'Amore const char *origpat; 2274297a3b0SGarrett D'Amore 2284297a3b0SGarrett D'Amore /* 2294297a3b0SGarrett D'Amore * A bracket expression starting with an unquoted circumflex 2304297a3b0SGarrett D'Amore * character produces unspecified results (IEEE 1003.2-1992, 2314297a3b0SGarrett D'Amore * 3.13.2). This implementation treats it like '!', for 2324297a3b0SGarrett D'Amore * consistency with the regular expression syntax. 2334297a3b0SGarrett D'Amore * J.T. Conklin (conklin@ngai.kaleida.com) 2344297a3b0SGarrett D'Amore */ 2354297a3b0SGarrett D'Amore if ((negate = (*pattern == '!' || *pattern == '^')) != 0) 2364297a3b0SGarrett D'Amore ++pattern; 2374297a3b0SGarrett D'Amore 2384297a3b0SGarrett D'Amore if (flags & FNM_IGNORECASE) 239*2d08521bSGarrett D'Amore test = towlower_l(test, loc); 2404297a3b0SGarrett D'Amore 2414297a3b0SGarrett D'Amore /* 2424297a3b0SGarrett D'Amore * A right bracket shall lose its special meaning and represent 2434297a3b0SGarrett D'Amore * itself in a bracket expression if it occurs first in the list. 2444297a3b0SGarrett D'Amore * -- POSIX.2 2.8.3.2 2454297a3b0SGarrett D'Amore */ 2464297a3b0SGarrett D'Amore ok = 0; 2474297a3b0SGarrett D'Amore origpat = pattern; 2484297a3b0SGarrett D'Amore for (;;) { 2494297a3b0SGarrett D'Amore if (*pattern == ']' && pattern > origpat) { 2504297a3b0SGarrett D'Amore pattern++; 2514297a3b0SGarrett D'Amore break; 2524297a3b0SGarrett D'Amore } else if (*pattern == '\0') { 2534297a3b0SGarrett D'Amore return (RANGE_ERROR); 2544297a3b0SGarrett D'Amore } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { 2554297a3b0SGarrett D'Amore return (RANGE_NOMATCH); 2564297a3b0SGarrett D'Amore } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) 2574297a3b0SGarrett D'Amore pattern++; 258*2d08521bSGarrett D'Amore pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc); 2594297a3b0SGarrett D'Amore if (pclen == (size_t)-1 || pclen == (size_t)-2) 2604297a3b0SGarrett D'Amore return (RANGE_NOMATCH); 2614297a3b0SGarrett D'Amore pattern += pclen; 2624297a3b0SGarrett D'Amore 2634297a3b0SGarrett D'Amore if (flags & FNM_IGNORECASE) 264*2d08521bSGarrett D'Amore c = towlower_l(c, loc); 2654297a3b0SGarrett D'Amore 2664297a3b0SGarrett D'Amore if (*pattern == '-' && *(pattern + 1) != EOS && 2674297a3b0SGarrett D'Amore *(pattern + 1) != ']') { 2684297a3b0SGarrett D'Amore if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) 2694297a3b0SGarrett D'Amore if (*pattern != EOS) 2704297a3b0SGarrett D'Amore pattern++; 271*2d08521bSGarrett D'Amore pclen = mbrtowc_l(&c2, pattern, MB_LEN_MAX, patmbs, 272*2d08521bSGarrett D'Amore loc); 2734297a3b0SGarrett D'Amore if (pclen == (size_t)-1 || pclen == (size_t)-2) 2744297a3b0SGarrett D'Amore return (RANGE_NOMATCH); 2754297a3b0SGarrett D'Amore pattern += pclen; 2764297a3b0SGarrett D'Amore if (c2 == EOS) 2774297a3b0SGarrett D'Amore return (RANGE_ERROR); 2784297a3b0SGarrett D'Amore 2794297a3b0SGarrett D'Amore if (flags & FNM_IGNORECASE) 280*2d08521bSGarrett D'Amore c2 = towlower_l(c2, loc); 2814297a3b0SGarrett D'Amore 282*2d08521bSGarrett D'Amore if (loc->collate->lc_is_posix ? 2834297a3b0SGarrett D'Amore c <= test && test <= c2 : 284*2d08521bSGarrett D'Amore _collate_range_cmp(c, test, loc) <= 0 && 285*2d08521bSGarrett D'Amore _collate_range_cmp(test, c2, loc) <= 0) 2864297a3b0SGarrett D'Amore ok = 1; 2874297a3b0SGarrett D'Amore } else if (c == test) 2884297a3b0SGarrett D'Amore ok = 1; 2894297a3b0SGarrett D'Amore } 2904297a3b0SGarrett D'Amore 2914297a3b0SGarrett D'Amore *newp = (char *)pattern; 2924297a3b0SGarrett D'Amore return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 2934297a3b0SGarrett D'Amore } 294