1 /*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "archive_platform.h" 28 29 #ifdef HAVE_STRING_H 30 #include <string.h> 31 #endif 32 #ifdef HAVE_WCHAR_H 33 #include <wchar.h> 34 #endif 35 36 #include "archive_pathmatch.h" 37 38 /* 39 * Check whether a character 'c' is matched by a list specification [...]: 40 * * Leading '!' or '^' negates the class. 41 * * <char>-<char> is a range of characters 42 * * \<char> removes any special meaning for <char> 43 * 44 * Some interesting boundary cases: 45 * a-d-e is one range (a-d) followed by two single characters - and e. 46 * \a-\d is same as a-d 47 * a\-d is three single characters: a, d, - 48 * Trailing - is not special (so [a-] is two characters a and -). 49 * Initial - is not special ([a-] is same as [-a] is same as [\\-a]) 50 * This function never sees a trailing \. 51 * [] always fails 52 * [!] always succeeds 53 */ 54 static int 55 pm_list(const char *start, const char *end, const char c, int flags) 56 { 57 const char *p = start; 58 char rangeStart = '\0', nextRangeStart; 59 int match = 1, nomatch = 0; 60 61 /* This will be used soon... */ 62 (void)flags; /* UNUSED */ 63 64 /* If this is a negated class, return success for nomatch. */ 65 if ((*p == '!' || *p == '^') && p < end) { 66 match = 0; 67 nomatch = 1; 68 ++p; 69 } 70 71 while (p < end) { 72 nextRangeStart = '\0'; 73 switch (*p) { 74 case '-': 75 /* Trailing or initial '-' is not special. */ 76 if ((rangeStart == '\0') || (p == end - 1)) { 77 if (*p == c) 78 return (match); 79 } else { 80 char rangeEnd = *++p; 81 if (rangeEnd == '\\') 82 rangeEnd = *++p; 83 if ((rangeStart <= c) && (c <= rangeEnd)) 84 return (match); 85 } 86 break; 87 case '\\': 88 ++p; 89 /* Fall through */ 90 default: 91 if (*p == c) 92 return (match); 93 nextRangeStart = *p; /* Possible start of range. */ 94 } 95 rangeStart = nextRangeStart; 96 ++p; 97 } 98 return (nomatch); 99 } 100 101 static int 102 pm_list_w(const wchar_t *start, const wchar_t *end, const wchar_t c, int flags) 103 { 104 const wchar_t *p = start; 105 wchar_t rangeStart = L'\0', nextRangeStart; 106 int match = 1, nomatch = 0; 107 108 /* This will be used soon... */ 109 (void)flags; /* UNUSED */ 110 111 /* If this is a negated class, return success for nomatch. */ 112 if ((*p == L'!' || *p == L'^') && p < end) { 113 match = 0; 114 nomatch = 1; 115 ++p; 116 } 117 118 while (p < end) { 119 nextRangeStart = L'\0'; 120 switch (*p) { 121 case L'-': 122 /* Trailing or initial '-' is not special. */ 123 if ((rangeStart == L'\0') || (p == end - 1)) { 124 if (*p == c) 125 return (match); 126 } else { 127 wchar_t rangeEnd = *++p; 128 if (rangeEnd == L'\\') 129 rangeEnd = *++p; 130 if ((rangeStart <= c) && (c <= rangeEnd)) 131 return (match); 132 } 133 break; 134 case L'\\': 135 ++p; 136 /* Fall through */ 137 default: 138 if (*p == c) 139 return (match); 140 nextRangeStart = *p; /* Possible start of range. */ 141 } 142 rangeStart = nextRangeStart; 143 ++p; 144 } 145 return (nomatch); 146 } 147 148 /* 149 * If s is pointing to "./", ".//", "./././" or the like, skip it. 150 */ 151 static const char * 152 pm_slashskip(const char *s) { 153 while ((*s == '/') 154 || (s[0] == '.' && s[1] == '/') 155 || (s[0] == '.' && s[1] == '\0')) 156 ++s; 157 return (s); 158 } 159 160 static const wchar_t * 161 pm_slashskip_w(const wchar_t *s) { 162 while ((*s == L'/') 163 || (s[0] == L'.' && s[1] == L'/') 164 || (s[0] == L'.' && s[1] == L'\0')) 165 ++s; 166 return (s); 167 } 168 169 static int 170 pm(const char *p, const char *s, int flags) 171 { 172 const char *end; 173 174 /* 175 * Ignore leading './', './/', '././', etc. 176 */ 177 if (s[0] == '.' && s[1] == '/') 178 s = pm_slashskip(s + 1); 179 if (p[0] == '.' && p[1] == '/') 180 p = pm_slashskip(p + 1); 181 182 for (;;) { 183 switch (*p) { 184 case '\0': 185 if (s[0] == '/') { 186 if (flags & PATHMATCH_NO_ANCHOR_END) 187 return (1); 188 /* "dir" == "dir/" == "dir/." */ 189 s = pm_slashskip(s); 190 } 191 return (*s == '\0'); 192 case '?': 193 /* ? always succeeds, unless we hit end of 's' */ 194 if (*s == '\0') 195 return (0); 196 break; 197 case '*': 198 /* "*" == "**" == "***" ... */ 199 while (*p == '*') 200 ++p; 201 /* Trailing '*' always succeeds. */ 202 if (*p == '\0') 203 return (1); 204 while (*s) { 205 if (archive_pathmatch(p, s, flags)) 206 return (1); 207 ++s; 208 } 209 return (0); 210 case '[': 211 /* 212 * Find the end of the [...] character class, 213 * ignoring \] that might occur within the class. 214 */ 215 end = p + 1; 216 while (*end != '\0' && *end != ']') { 217 if (*end == '\\' && end[1] != '\0') 218 ++end; 219 ++end; 220 } 221 if (*end == ']') { 222 /* We found [...], try to match it. */ 223 if (!pm_list(p + 1, end, *s, flags)) 224 return (0); 225 p = end; /* Jump to trailing ']' char. */ 226 break; 227 } else 228 /* No final ']', so just match '['. */ 229 if (*p != *s) 230 return (0); 231 break; 232 case '\\': 233 /* Trailing '\\' matches itself. */ 234 if (p[1] == '\0') { 235 if (*s != '\\') 236 return (0); 237 } else { 238 ++p; 239 if (*p != *s) 240 return (0); 241 } 242 break; 243 case '/': 244 if (*s != '/' && *s != '\0') 245 return (0); 246 /* Note: pattern "/\./" won't match "/"; 247 * pm_slashskip() correctly stops at backslash. */ 248 p = pm_slashskip(p); 249 s = pm_slashskip(s); 250 if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)) 251 return (1); 252 --p; /* Counteract the increment below. */ 253 --s; 254 break; 255 case '$': 256 /* '$' is special only at end of pattern and only 257 * if PATHMATCH_NO_ANCHOR_END is specified. */ 258 if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){ 259 /* "dir" == "dir/" == "dir/." */ 260 return (*pm_slashskip(s) == '\0'); 261 } 262 /* Otherwise, '$' is not special. */ 263 /* FALL THROUGH */ 264 default: 265 if (*p != *s) 266 return (0); 267 break; 268 } 269 ++p; 270 ++s; 271 } 272 } 273 274 static int 275 pm_w(const wchar_t *p, const wchar_t *s, int flags) 276 { 277 const wchar_t *end; 278 279 /* 280 * Ignore leading './', './/', '././', etc. 281 */ 282 if (s[0] == L'.' && s[1] == L'/') 283 s = pm_slashskip_w(s + 1); 284 if (p[0] == L'.' && p[1] == L'/') 285 p = pm_slashskip_w(p + 1); 286 287 for (;;) { 288 switch (*p) { 289 case L'\0': 290 if (s[0] == L'/') { 291 if (flags & PATHMATCH_NO_ANCHOR_END) 292 return (1); 293 /* "dir" == "dir/" == "dir/." */ 294 s = pm_slashskip_w(s); 295 } 296 return (*s == L'\0'); 297 case L'?': 298 /* ? always succeeds, unless we hit end of 's' */ 299 if (*s == L'\0') 300 return (0); 301 break; 302 case L'*': 303 /* "*" == "**" == "***" ... */ 304 while (*p == L'*') 305 ++p; 306 /* Trailing '*' always succeeds. */ 307 if (*p == L'\0') 308 return (1); 309 while (*s) { 310 if (archive_pathmatch_w(p, s, flags)) 311 return (1); 312 ++s; 313 } 314 return (0); 315 case L'[': 316 /* 317 * Find the end of the [...] character class, 318 * ignoring \] that might occur within the class. 319 */ 320 end = p + 1; 321 while (*end != L'\0' && *end != L']') { 322 if (*end == L'\\' && end[1] != L'\0') 323 ++end; 324 ++end; 325 } 326 if (*end == L']') { 327 /* We found [...], try to match it. */ 328 if (!pm_list_w(p + 1, end, *s, flags)) 329 return (0); 330 p = end; /* Jump to trailing ']' char. */ 331 break; 332 } else 333 /* No final ']', so just match '['. */ 334 if (*p != *s) 335 return (0); 336 break; 337 case L'\\': 338 /* Trailing '\\' matches itself. */ 339 if (p[1] == L'\0') { 340 if (*s != L'\\') 341 return (0); 342 } else { 343 ++p; 344 if (*p != *s) 345 return (0); 346 } 347 break; 348 case L'/': 349 if (*s != L'/' && *s != L'\0') 350 return (0); 351 /* Note: pattern "/\./" won't match "/"; 352 * pm_slashskip() correctly stops at backslash. */ 353 p = pm_slashskip_w(p); 354 s = pm_slashskip_w(s); 355 if (*p == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)) 356 return (1); 357 --p; /* Counteract the increment below. */ 358 --s; 359 break; 360 case L'$': 361 /* '$' is special only at end of pattern and only 362 * if PATHMATCH_NO_ANCHOR_END is specified. */ 363 if (p[1] == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)){ 364 /* "dir" == "dir/" == "dir/." */ 365 return (*pm_slashskip_w(s) == L'\0'); 366 } 367 /* Otherwise, '$' is not special. */ 368 /* FALL THROUGH */ 369 default: 370 if (*p != *s) 371 return (0); 372 break; 373 } 374 ++p; 375 ++s; 376 } 377 } 378 379 /* Main entry point. */ 380 int 381 __archive_pathmatch(const char *p, const char *s, int flags) 382 { 383 /* Empty pattern only matches the empty string. */ 384 if (p == NULL || *p == '\0') 385 return (s == NULL || *s == '\0'); 386 else if (s == NULL) 387 return (0); 388 389 /* Leading '^' anchors the start of the pattern. */ 390 if (*p == '^') { 391 ++p; 392 flags &= ~PATHMATCH_NO_ANCHOR_START; 393 } 394 395 if (*p == '/' && *s != '/') 396 return (0); 397 398 /* Certain patterns anchor implicitly. */ 399 if (*p == '*' || *p == '/') { 400 while (*p == '/') 401 ++p; 402 while (*s == '/') 403 ++s; 404 return (pm(p, s, flags)); 405 } 406 407 /* If start is unanchored, try to match start of each path element. */ 408 if (flags & PATHMATCH_NO_ANCHOR_START) { 409 for ( ; s != NULL; s = strchr(s, '/')) { 410 if (*s == '/') 411 s++; 412 if (pm(p, s, flags)) 413 return (1); 414 } 415 return (0); 416 } 417 418 /* Default: Match from beginning. */ 419 return (pm(p, s, flags)); 420 } 421 422 int 423 __archive_pathmatch_w(const wchar_t *p, const wchar_t *s, int flags) 424 { 425 /* Empty pattern only matches the empty string. */ 426 if (p == NULL || *p == L'\0') 427 return (s == NULL || *s == L'\0'); 428 else if (s == NULL) 429 return (0); 430 431 /* Leading '^' anchors the start of the pattern. */ 432 if (*p == L'^') { 433 ++p; 434 flags &= ~PATHMATCH_NO_ANCHOR_START; 435 } 436 437 if (*p == L'/' && *s != L'/') 438 return (0); 439 440 /* Certain patterns anchor implicitly. */ 441 if (*p == L'*' || *p == L'/') { 442 while (*p == L'/') 443 ++p; 444 while (*s == L'/') 445 ++s; 446 return (pm_w(p, s, flags)); 447 } 448 449 /* If start is unanchored, try to match start of each path element. */ 450 if (flags & PATHMATCH_NO_ANCHOR_START) { 451 for ( ; s != NULL; s = wcschr(s, L'/')) { 452 if (*s == L'/') 453 s++; 454 if (pm_w(p, s, flags)) 455 return (1); 456 } 457 return (0); 458 } 459 460 /* Default: Match from beginning. */ 461 return (pm_w(p, s, flags)); 462 } 463