xref: /freebsd/lib/libc/gen/glob.c (revision b37f6c9805edb4b89f0a8c2b78f78a3dcfc0647b)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Guido van Rossum.
9  *
10  * Copyright (c) 2011 The FreeBSD Foundation
11  * All rights reserved.
12  * Portions of this software were developed by David Chisnall
13  * under sponsorship from the FreeBSD Foundation.
14  *
15  * Redistribution and use in source and binary forms, with or without
16  * modification, are permitted provided that the following conditions
17  * are met:
18  * 1. Redistributions of source code must retain the above copyright
19  *    notice, this list of conditions and the following disclaimer.
20  * 2. Redistributions in binary form must reproduce the above copyright
21  *    notice, this list of conditions and the following disclaimer in the
22  *    documentation and/or other materials provided with the distribution.
23  * 3. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #if defined(LIBC_SCCS) && !defined(lint)
41 static char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
42 #endif /* LIBC_SCCS and not lint */
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
45 
46 /*
47  * glob(3) -- a superset of the one defined in POSIX 1003.2.
48  *
49  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
50  *
51  * Optional extra services, controlled by flags not defined by POSIX:
52  *
53  * GLOB_QUOTE:
54  *	Escaping convention: \ inhibits any special meaning the following
55  *	character might have (except \ at end of string is retained).
56  * GLOB_MAGCHAR:
57  *	Set in gl_flags if pattern contained a globbing character.
58  * GLOB_NOMAGIC:
59  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
60  *	not contain any magic characters.  [Used in csh style globbing]
61  * GLOB_ALTDIRFUNC:
62  *	Use alternately specified directory access functions.
63  * GLOB_TILDE:
64  *	expand ~user/foo to the /home/dir/of/user/foo
65  * GLOB_BRACE:
66  *	expand {1,2}{a,b} to 1a 1b 2a 2b
67  * gl_matchc:
68  *	Number of matches in the current invocation of glob.
69  */
70 
71 /*
72  * Some notes on multibyte character support:
73  * 1. Patterns with illegal byte sequences match nothing - even if
74  *    GLOB_NOCHECK is specified.
75  * 2. Illegal byte sequences in filenames are handled by treating them as
76  *    single-byte characters with a values of such bytes of the sequence
77  *    cast to wchar_t.
78  * 3. State-dependent encodings are not currently supported.
79  */
80 
81 #include <sys/param.h>
82 #include <sys/stat.h>
83 
84 #include <ctype.h>
85 #include <dirent.h>
86 #include <errno.h>
87 #include <glob.h>
88 #include <limits.h>
89 #include <pwd.h>
90 #include <stdint.h>
91 #include <stdio.h>
92 #include <stdlib.h>
93 #include <string.h>
94 #include <unistd.h>
95 #include <wchar.h>
96 
97 #include "collate.h"
98 
99 /*
100  * glob(3) expansion limits. Stop the expansion if any of these limits
101  * is reached. This caps the runtime in the face of DoS attacks. See
102  * also CVE-2010-2632
103  */
104 #define	GLOB_LIMIT_BRACE	128	/* number of brace calls */
105 #define	GLOB_LIMIT_PATH		65536	/* number of path elements */
106 #define	GLOB_LIMIT_READDIR	16384	/* number of readdirs */
107 #define	GLOB_LIMIT_STAT		1024	/* number of stat system calls */
108 #define	GLOB_LIMIT_STRING	ARG_MAX	/* maximum total size for paths */
109 
110 struct glob_limit {
111 	size_t	l_brace_cnt;
112 	size_t	l_path_lim;
113 	size_t	l_readdir_cnt;
114 	size_t	l_stat_cnt;
115 	size_t	l_string_cnt;
116 };
117 
118 #define	DOT		L'.'
119 #define	EOS		L'\0'
120 #define	LBRACKET	L'['
121 #define	NOT		L'!'
122 #define	QUESTION	L'?'
123 #define	QUOTE		L'\\'
124 #define	RANGE		L'-'
125 #define	RBRACKET	L']'
126 #define	SEP		L'/'
127 #define	STAR		L'*'
128 #define	TILDE		L'~'
129 #define	LBRACE		L'{'
130 #define	RBRACE		L'}'
131 #define	COMMA		L','
132 
133 #define	M_QUOTE		0x8000000000ULL
134 #define	M_PROTECT	0x4000000000ULL
135 #define	M_MASK		0xffffffffffULL
136 #define	M_CHAR		0x00ffffffffULL
137 
138 typedef uint_fast64_t Char;
139 
140 #define	CHAR(c)		((Char)((c)&M_CHAR))
141 #define	META(c)		((Char)((c)|M_QUOTE))
142 #define	UNPROT(c)	((c) & ~M_PROTECT)
143 #define	M_ALL		META(L'*')
144 #define	M_END		META(L']')
145 #define	M_NOT		META(L'!')
146 #define	M_ONE		META(L'?')
147 #define	M_RNG		META(L'-')
148 #define	M_SET		META(L'[')
149 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
150 #ifdef DEBUG
151 #define	isprot(c)	(((c)&M_PROTECT) != 0)
152 #endif
153 
154 static int	 compare(const void *, const void *);
155 static int	 g_Ctoc(const Char *, char *, size_t);
156 static int	 g_lstat(Char *, struct stat *, glob_t *);
157 static DIR	*g_opendir(Char *, glob_t *);
158 static const Char *g_strchr(const Char *, wchar_t);
159 #ifdef notdef
160 static Char	*g_strcat(Char *, const Char *);
161 #endif
162 static int	 g_stat(Char *, struct stat *, glob_t *);
163 static int	 glob0(const Char *, glob_t *, struct glob_limit *,
164     const char *);
165 static int	 glob1(Char *, glob_t *, struct glob_limit *);
166 static int	 glob2(Char *, Char *, Char *, Char *, glob_t *,
167     struct glob_limit *);
168 static int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *,
169     struct glob_limit *);
170 static int	 globextend(const Char *, glob_t *, struct glob_limit *,
171     const char *);
172 static const Char *
173 		 globtilde(const Char *, Char *, size_t, glob_t *);
174 static int	 globexp0(const Char *, glob_t *, struct glob_limit *,
175     const char *);
176 static int	 globexp1(const Char *, glob_t *, struct glob_limit *);
177 static int	 globexp2(const Char *, const Char *, glob_t *,
178     struct glob_limit *);
179 static int	 globfinal(glob_t *, struct glob_limit *, size_t,
180     const char *);
181 static int	 match(Char *, Char *, Char *);
182 static int	 err_nomatch(glob_t *, struct glob_limit *, const char *);
183 static int	 err_aborted(glob_t *, int, char *);
184 #ifdef DEBUG
185 static void	 qprintf(const char *, Char *);
186 #endif
187 
188 int
189 glob(const char * __restrict pattern, int flags,
190 	 int (*errfunc)(const char *, int), glob_t * __restrict pglob)
191 {
192 	struct glob_limit limit = { 0, 0, 0, 0, 0 };
193 	const char *patnext;
194 	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
195 	mbstate_t mbs;
196 	wchar_t wc;
197 	size_t clen;
198 	int too_long;
199 
200 	patnext = pattern;
201 	if (!(flags & GLOB_APPEND)) {
202 		pglob->gl_pathc = 0;
203 		pglob->gl_pathv = NULL;
204 		if (!(flags & GLOB_DOOFFS))
205 			pglob->gl_offs = 0;
206 	}
207 	if (flags & GLOB_LIMIT) {
208 		limit.l_path_lim = pglob->gl_matchc;
209 		if (limit.l_path_lim == 0)
210 			limit.l_path_lim = GLOB_LIMIT_PATH;
211 	}
212 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
213 	pglob->gl_errfunc = errfunc;
214 	pglob->gl_matchc = 0;
215 
216 	bufnext = patbuf;
217 	bufend = bufnext + MAXPATHLEN - 1;
218 	too_long = 1;
219 	if (flags & GLOB_NOESCAPE) {
220 		memset(&mbs, 0, sizeof(mbs));
221 		while (bufnext <= bufend) {
222 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
223 			if (clen == (size_t)-1 || clen == (size_t)-2)
224 				return (err_nomatch(pglob, &limit, pattern));
225 			else if (clen == 0) {
226 				too_long = 0;
227 				break;
228 			}
229 			*bufnext++ = wc;
230 			patnext += clen;
231 		}
232 	} else {
233 		/* Protect the quoted characters. */
234 		memset(&mbs, 0, sizeof(mbs));
235 		while (bufnext <= bufend) {
236 			if (*patnext == '\\') {
237 				if (*++patnext == '\0') {
238 					*bufnext++ = QUOTE;
239 					continue;
240 				}
241 				prot = M_PROTECT;
242 			} else
243 				prot = 0;
244 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
245 			if (clen == (size_t)-1 || clen == (size_t)-2)
246 				return (err_nomatch(pglob, &limit, pattern));
247 			else if (clen == 0) {
248 				too_long = 0;
249 				break;
250 			}
251 			*bufnext++ = wc | prot;
252 			patnext += clen;
253 		}
254 	}
255 	if (too_long)
256 		return (err_nomatch(pglob, &limit, pattern));
257 	*bufnext = EOS;
258 
259 	if (flags & GLOB_BRACE)
260 	    return (globexp0(patbuf, pglob, &limit, pattern));
261 	else
262 	    return (glob0(patbuf, pglob, &limit, pattern));
263 }
264 
265 static int
266 globexp0(const Char *pattern, glob_t *pglob, struct glob_limit *limit,
267     const char *origpat) {
268 	int rv;
269 	size_t oldpathc;
270 
271 	/* Protect a single {}, for find(1), like csh */
272 	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) {
273 		if ((pglob->gl_flags & GLOB_LIMIT) &&
274 		    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
275 			errno = E2BIG;
276 			return (GLOB_NOSPACE);
277 		}
278 		return (glob0(pattern, pglob, limit, origpat));
279 	}
280 
281 	oldpathc = pglob->gl_pathc;
282 
283 	if ((rv = globexp1(pattern, pglob, limit)) != 0)
284 		return rv;
285 
286 	return (globfinal(pglob, limit, oldpathc, origpat));
287 }
288 
289 /*
290  * Expand recursively a glob {} pattern. When there is no more expansion
291  * invoke the standard globbing routine to glob the rest of the magic
292  * characters
293  */
294 static int
295 globexp1(const Char *pattern, glob_t *pglob, struct glob_limit *limit)
296 {
297 	const Char* ptr;
298 
299 	if ((ptr = g_strchr(pattern, LBRACE)) != NULL) {
300 		if ((pglob->gl_flags & GLOB_LIMIT) &&
301 		    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
302 			errno = E2BIG;
303 			return (GLOB_NOSPACE);
304 		}
305 		return (globexp2(ptr, pattern, pglob, limit));
306 	}
307 
308 	return (glob0(pattern, pglob, limit, NULL));
309 }
310 
311 
312 /*
313  * Recursive brace globbing helper. Tries to expand a single brace.
314  * If it succeeds then it invokes globexp1 with the new pattern.
315  * If it fails then it tries to glob the rest of the pattern and returns.
316  */
317 static int
318 globexp2(const Char *ptr, const Char *pattern, glob_t *pglob,
319     struct glob_limit *limit)
320 {
321 	int     i, rv;
322 	Char   *lm, *ls;
323 	const Char *pe, *pm, *pm1, *pl;
324 	Char    patbuf[MAXPATHLEN];
325 
326 	/* copy part up to the brace */
327 	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
328 		continue;
329 	*lm = EOS;
330 	ls = lm;
331 
332 	/* Find the balanced brace */
333 	for (i = 0, pe = ++ptr; *pe != EOS; pe++)
334 		if (*pe == LBRACKET) {
335 			/* Ignore everything between [] */
336 			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
337 				continue;
338 			if (*pe == EOS) {
339 				/*
340 				 * We could not find a matching RBRACKET.
341 				 * Ignore and just look for RBRACE
342 				 */
343 				pe = pm;
344 			}
345 		}
346 		else if (*pe == LBRACE)
347 			i++;
348 		else if (*pe == RBRACE) {
349 			if (i == 0)
350 				break;
351 			i--;
352 		}
353 
354 	/* Non matching braces; just glob the pattern */
355 	if (i != 0 || *pe == EOS)
356 		return (glob0(pattern, pglob, limit, NULL));
357 
358 	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
359 		switch (*pm) {
360 		case LBRACKET:
361 			/* Ignore everything between [] */
362 			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
363 				continue;
364 			if (*pm == EOS) {
365 				/*
366 				 * We could not find a matching RBRACKET.
367 				 * Ignore and just look for RBRACE
368 				 */
369 				pm = pm1;
370 			}
371 			break;
372 
373 		case LBRACE:
374 			i++;
375 			break;
376 
377 		case RBRACE:
378 			if (i) {
379 			    i--;
380 			    break;
381 			}
382 			/* FALLTHROUGH */
383 		case COMMA:
384 			if (i && *pm == COMMA)
385 				break;
386 			else {
387 				/* Append the current string */
388 				for (lm = ls; (pl < pm); *lm++ = *pl++)
389 					continue;
390 				/*
391 				 * Append the rest of the pattern after the
392 				 * closing brace
393 				 */
394 				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
395 					continue;
396 
397 				/* Expand the current pattern */
398 #ifdef DEBUG
399 				qprintf("globexp2:", patbuf);
400 #endif
401 				rv = globexp1(patbuf, pglob, limit);
402 				if (rv)
403 					return (rv);
404 
405 				/* move after the comma, to the next string */
406 				pl = pm + 1;
407 			}
408 			break;
409 
410 		default:
411 			break;
412 		}
413 	return (0);
414 }
415 
416 
417 
418 /*
419  * expand tilde from the passwd file.
420  */
421 static const Char *
422 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
423 {
424 	struct passwd *pwd;
425 	char *h, *sc;
426 	const Char *p;
427 	Char *b, *eb;
428 	wchar_t wc;
429 	wchar_t wbuf[MAXPATHLEN];
430 	wchar_t *wbufend, *dc;
431 	size_t clen;
432 	mbstate_t mbs;
433 	int too_long;
434 
435 	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
436 		return (pattern);
437 
438 	/*
439 	 * Copy up to the end of the string or /
440 	 */
441 	eb = &patbuf[patbuf_len - 1];
442 	for (p = pattern + 1, b = patbuf;
443 	    b < eb && *p != EOS && UNPROT(*p) != SEP; *b++ = *p++)
444 		continue;
445 
446 	if (*p != EOS && UNPROT(*p) != SEP)
447 		return (NULL);
448 
449 	*b = EOS;
450 	h = NULL;
451 
452 	if (patbuf[0] == EOS) {
453 		/*
454 		 * handle a plain ~ or ~/ by expanding $HOME first (iff
455 		 * we're not running setuid or setgid) and then trying
456 		 * the password file
457 		 */
458 		if (issetugid() != 0 ||
459 		    (h = getenv("HOME")) == NULL) {
460 			if (((h = getlogin()) != NULL &&
461 			     (pwd = getpwnam(h)) != NULL) ||
462 			    (pwd = getpwuid(getuid())) != NULL)
463 				h = pwd->pw_dir;
464 			else
465 				return (pattern);
466 		}
467 	}
468 	else {
469 		/*
470 		 * Expand a ~user
471 		 */
472 		if (g_Ctoc(patbuf, (char *)wbuf, sizeof(wbuf)))
473 			return (NULL);
474 		if ((pwd = getpwnam((char *)wbuf)) == NULL)
475 			return (pattern);
476 		else
477 			h = pwd->pw_dir;
478 	}
479 
480 	/* Copy the home directory */
481 	dc = wbuf;
482 	sc = h;
483 	wbufend = wbuf + MAXPATHLEN - 1;
484 	too_long = 1;
485 	memset(&mbs, 0, sizeof(mbs));
486 	while (dc <= wbufend) {
487 		clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
488 		if (clen == (size_t)-1 || clen == (size_t)-2) {
489 			/* XXX See initial comment #2. */
490 			wc = (unsigned char)*sc;
491 			clen = 1;
492 			memset(&mbs, 0, sizeof(mbs));
493 		}
494 		if ((*dc++ = wc) == EOS) {
495 			too_long = 0;
496 			break;
497 		}
498 		sc += clen;
499 	}
500 	if (too_long)
501 		return (NULL);
502 
503 	dc = wbuf;
504 	for (b = patbuf; b < eb && *dc != EOS; *b++ = *dc++ | M_PROTECT)
505 		continue;
506 	if (*dc != EOS)
507 		return (NULL);
508 
509 	/* Append the rest of the pattern */
510 	if (*p != EOS) {
511 		too_long = 1;
512 		while (b <= eb) {
513 			if ((*b++ = *p++) == EOS) {
514 				too_long = 0;
515 				break;
516 			}
517 		}
518 		if (too_long)
519 			return (NULL);
520 	} else
521 		*b = EOS;
522 
523 	return (patbuf);
524 }
525 
526 
527 /*
528  * The main glob() routine: compiles the pattern (optionally processing
529  * quotes), calls glob1() to do the real pattern matching, and finally
530  * sorts the list (unless unsorted operation is requested).  Returns 0
531  * if things went well, nonzero if errors occurred.
532  */
533 static int
534 glob0(const Char *pattern, glob_t *pglob, struct glob_limit *limit,
535     const char *origpat) {
536 	const Char *qpatnext;
537 	int err;
538 	size_t oldpathc;
539 	Char *bufnext, c, patbuf[MAXPATHLEN];
540 
541 	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
542 	if (qpatnext == NULL) {
543 		errno = E2BIG;
544 		return (GLOB_NOSPACE);
545 	}
546 	oldpathc = pglob->gl_pathc;
547 	bufnext = patbuf;
548 
549 	/* We don't need to check for buffer overflow any more. */
550 	while ((c = *qpatnext++) != EOS) {
551 		switch (c) {
552 		case LBRACKET:
553 			c = *qpatnext;
554 			if (c == NOT)
555 				++qpatnext;
556 			if (*qpatnext == EOS ||
557 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
558 				*bufnext++ = LBRACKET;
559 				if (c == NOT)
560 					--qpatnext;
561 				break;
562 			}
563 			*bufnext++ = M_SET;
564 			if (c == NOT)
565 				*bufnext++ = M_NOT;
566 			c = *qpatnext++;
567 			do {
568 				*bufnext++ = CHAR(c);
569 				if (*qpatnext == RANGE &&
570 				    (c = qpatnext[1]) != RBRACKET) {
571 					*bufnext++ = M_RNG;
572 					*bufnext++ = CHAR(c);
573 					qpatnext += 2;
574 				}
575 			} while ((c = *qpatnext++) != RBRACKET);
576 			pglob->gl_flags |= GLOB_MAGCHAR;
577 			*bufnext++ = M_END;
578 			break;
579 		case QUESTION:
580 			pglob->gl_flags |= GLOB_MAGCHAR;
581 			*bufnext++ = M_ONE;
582 			break;
583 		case STAR:
584 			pglob->gl_flags |= GLOB_MAGCHAR;
585 			/* collapse adjacent stars to one,
586 			 * to ensure "**" at the end continues to match the
587 			 * empty string
588 			 */
589 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
590 			    *bufnext++ = M_ALL;
591 			break;
592 		default:
593 			*bufnext++ = CHAR(c);
594 			break;
595 		}
596 	}
597 	*bufnext = EOS;
598 #ifdef DEBUG
599 	qprintf("glob0:", patbuf);
600 #endif
601 
602 	if ((err = glob1(patbuf, pglob, limit)) != 0)
603 		return(err);
604 
605 	if (origpat != NULL)
606 		return (globfinal(pglob, limit, oldpathc, origpat));
607 
608 	return (0);
609 }
610 
611 static int
612 globfinal(glob_t *pglob, struct glob_limit *limit, size_t oldpathc,
613     const char *origpat) {
614 	if (pglob->gl_pathc == oldpathc)
615 		return (err_nomatch(pglob, limit, origpat));
616 
617 	if (!(pglob->gl_flags & GLOB_NOSORT))
618 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
619 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
620 
621 	return (0);
622 }
623 
624 static int
625 compare(const void *p, const void *q)
626 {
627 	return (strcoll(*(char **)p, *(char **)q));
628 }
629 
630 static int
631 glob1(Char *pattern, glob_t *pglob, struct glob_limit *limit)
632 {
633 	Char pathbuf[MAXPATHLEN];
634 
635 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
636 	if (*pattern == EOS)
637 		return (0);
638 	return (glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
639 	    pattern, pglob, limit));
640 }
641 
642 /*
643  * The functions glob2 and glob3 are mutually recursive; there is one level
644  * of recursion for each segment in the pattern that contains one or more
645  * meta characters.
646  */
647 static int
648 glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
649       glob_t *pglob, struct glob_limit *limit)
650 {
651 	struct stat sb;
652 	Char *p, *q;
653 	int anymeta;
654 
655 	/*
656 	 * Loop over pattern segments until end of pattern or until
657 	 * segment with meta character found.
658 	 */
659 	for (anymeta = 0;;) {
660 		if (*pattern == EOS) {		/* End of pattern? */
661 			*pathend = EOS;
662 			if (g_lstat(pathbuf, &sb, pglob))
663 				return (0);
664 
665 			if ((pglob->gl_flags & GLOB_LIMIT) &&
666 			    limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) {
667 				errno = E2BIG;
668 				return (GLOB_NOSPACE);
669 			}
670 			if ((pglob->gl_flags & GLOB_MARK) &&
671 			    UNPROT(pathend[-1]) != SEP &&
672 			    (S_ISDIR(sb.st_mode) ||
673 			    (S_ISLNK(sb.st_mode) &&
674 			    g_stat(pathbuf, &sb, pglob) == 0 &&
675 			    S_ISDIR(sb.st_mode)))) {
676 				if (pathend + 1 > pathend_last) {
677 					errno = E2BIG;
678 					return (GLOB_NOSPACE);
679 				}
680 				*pathend++ = SEP;
681 				*pathend = EOS;
682 			}
683 			++pglob->gl_matchc;
684 			return (globextend(pathbuf, pglob, limit, NULL));
685 		}
686 
687 		/* Find end of next segment, copy tentatively to pathend. */
688 		q = pathend;
689 		p = pattern;
690 		while (*p != EOS && UNPROT(*p) != SEP) {
691 			if (ismeta(*p))
692 				anymeta = 1;
693 			if (q + 1 > pathend_last) {
694 				errno = E2BIG;
695 				return (GLOB_NOSPACE);
696 			}
697 			*q++ = *p++;
698 		}
699 
700 		if (!anymeta) {		/* No expansion, do next segment. */
701 			pathend = q;
702 			pattern = p;
703 			while (UNPROT(*pattern) == SEP) {
704 				if (pathend + 1 > pathend_last) {
705 					errno = E2BIG;
706 					return (GLOB_NOSPACE);
707 				}
708 				*pathend++ = *pattern++;
709 			}
710 		} else			/* Need expansion, recurse. */
711 			return (glob3(pathbuf, pathend, pathend_last, pattern,
712 			    p, pglob, limit));
713 	}
714 	/* NOTREACHED */
715 }
716 
717 static int
718 glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
719       Char *pattern, Char *restpattern,
720       glob_t *pglob, struct glob_limit *limit)
721 {
722 	struct dirent *dp;
723 	DIR *dirp;
724 	int err, too_long, saverrno, saverrno2;
725 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
726 
727 	struct dirent *(*readdirfunc)(DIR *);
728 
729 	if (pathend > pathend_last) {
730 		errno = E2BIG;
731 		return (GLOB_NOSPACE);
732 	}
733 	*pathend = EOS;
734 	if (pglob->gl_errfunc != NULL &&
735 	    g_Ctoc(pathbuf, buf, sizeof(buf))) {
736 		errno = E2BIG;
737 		return (GLOB_NOSPACE);
738 	}
739 
740 	saverrno = errno;
741 	errno = 0;
742 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
743 		if (errno == ENOENT || errno == ENOTDIR)
744 			return (0);
745 		err = err_aborted(pglob, errno, buf);
746 		if (errno == 0)
747 			errno = saverrno;
748 		return (err);
749 	}
750 
751 	err = 0;
752 
753 	/* pglob->gl_readdir takes a void *, fix this manually */
754 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
755 		readdirfunc = (struct dirent *(*)(DIR *))pglob->gl_readdir;
756 	else
757 		readdirfunc = readdir;
758 
759 	errno = 0;
760 	/* Search directory for matching names. */
761 	while ((dp = (*readdirfunc)(dirp)) != NULL) {
762 		char *sc;
763 		Char *dc;
764 		wchar_t wc;
765 		size_t clen;
766 		mbstate_t mbs;
767 
768 		if ((pglob->gl_flags & GLOB_LIMIT) &&
769 		    limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) {
770 			errno = E2BIG;
771 			err = GLOB_NOSPACE;
772 			break;
773 		}
774 
775 		/* Initial DOT must be matched literally. */
776 		if (dp->d_name[0] == '.' && UNPROT(*pattern) != DOT) {
777 			errno = 0;
778 			continue;
779 		}
780 		memset(&mbs, 0, sizeof(mbs));
781 		dc = pathend;
782 		sc = dp->d_name;
783 		too_long = 1;
784 		while (dc <= pathend_last) {
785 			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
786 			if (clen == (size_t)-1 || clen == (size_t)-2) {
787 				/* XXX See initial comment #2. */
788 				wc = (unsigned char)*sc;
789 				clen = 1;
790 				memset(&mbs, 0, sizeof(mbs));
791 			}
792 			if ((*dc++ = wc) == EOS) {
793 				too_long = 0;
794 				break;
795 			}
796 			sc += clen;
797 		}
798 		if (too_long && (err = err_aborted(pglob, ENAMETOOLONG,
799 		    buf))) {
800 			errno = ENAMETOOLONG;
801 			break;
802 		}
803 		if (too_long || !match(pathend, pattern, restpattern)) {
804 			*pathend = EOS;
805 			errno = 0;
806 			continue;
807 		}
808 		if (errno == 0)
809 			errno = saverrno;
810 		err = glob2(pathbuf, --dc, pathend_last, restpattern,
811 		    pglob, limit);
812 		if (err)
813 			break;
814 		errno = 0;
815 	}
816 
817 	saverrno2 = errno;
818 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
819 		(*pglob->gl_closedir)(dirp);
820 	else
821 		closedir(dirp);
822 	errno = saverrno2;
823 
824 	if (err)
825 		return (err);
826 
827 	if (dp == NULL && errno != 0 &&
828 	    (err = err_aborted(pglob, errno, buf)))
829 		return (err);
830 
831 	if (errno == 0)
832 		errno = saverrno;
833 	return (0);
834 }
835 
836 
837 /*
838  * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
839  * add the new item, and update gl_pathc.
840  *
841  * This assumes the BSD realloc, which only copies the block when its size
842  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
843  * behavior.
844  *
845  * Return 0 if new item added, error code if memory couldn't be allocated.
846  *
847  * Invariant of the glob_t structure:
848  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
849  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
850  */
851 static int
852 globextend(const Char *path, glob_t *pglob, struct glob_limit *limit,
853     const char *origpat)
854 {
855 	char **pathv;
856 	size_t i, newn, len;
857 	char *copy;
858 	const Char *p;
859 
860 	if ((pglob->gl_flags & GLOB_LIMIT) &&
861 	    pglob->gl_matchc > limit->l_path_lim) {
862 		errno = E2BIG;
863 		return (GLOB_NOSPACE);
864 	}
865 
866 	newn = 2 + pglob->gl_pathc + pglob->gl_offs;
867 	/* reallocarray(NULL, newn, size) is equivalent to malloc(newn*size). */
868 	pathv = reallocarray(pglob->gl_pathv, newn, sizeof(*pathv));
869 	if (pathv == NULL)
870 		return (GLOB_NOSPACE);
871 
872 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
873 		/* first time around -- clear initial gl_offs items */
874 		pathv += pglob->gl_offs;
875 		for (i = pglob->gl_offs + 1; --i > 0; )
876 			*--pathv = NULL;
877 	}
878 	pglob->gl_pathv = pathv;
879 
880 	if (origpat != NULL)
881 		copy = strdup(origpat);
882 	else {
883 		for (p = path; *p++ != EOS;)
884 			continue;
885 		len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */
886 		if ((copy = malloc(len)) != NULL) {
887 			if (g_Ctoc(path, copy, len)) {
888 				free(copy);
889 				errno = E2BIG;
890 				return (GLOB_NOSPACE);
891 			}
892 		}
893 	}
894 	if (copy != NULL) {
895 		limit->l_string_cnt += strlen(copy) + 1;
896 		if ((pglob->gl_flags & GLOB_LIMIT) &&
897 		    limit->l_string_cnt >= GLOB_LIMIT_STRING) {
898 			free(copy);
899 			errno = E2BIG;
900 			return (GLOB_NOSPACE);
901 		}
902 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
903 	}
904 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
905 	return (copy == NULL ? GLOB_NOSPACE : 0);
906 }
907 
908 /*
909  * pattern matching function for filenames.
910  */
911 static int
912 match(Char *name, Char *pat, Char *patend)
913 {
914 	int ok, negate_range;
915 	Char c, k, *nextp, *nextn;
916 	struct xlocale_collate *table =
917 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
918 
919 	nextn = NULL;
920 	nextp = NULL;
921 
922 	while (1) {
923 		while (pat < patend) {
924 			c = *pat++;
925 			switch (c & M_MASK) {
926 			case M_ALL:
927 				if (pat == patend)
928 					return (1);
929 				if (*name == EOS)
930 					return (0);
931 				nextn = name + 1;
932 				nextp = pat - 1;
933 				break;
934 			case M_ONE:
935 				if (*name++ == EOS)
936 					goto fail;
937 				break;
938 			case M_SET:
939 				ok = 0;
940 				if ((k = *name++) == EOS)
941 					goto fail;
942 				negate_range = ((*pat & M_MASK) == M_NOT);
943 				if (negate_range != 0)
944 					++pat;
945 				while (((c = *pat++) & M_MASK) != M_END)
946 					if ((*pat & M_MASK) == M_RNG) {
947 						if (table->__collate_load_error ?
948 						    CHAR(c) <= CHAR(k) &&
949 						    CHAR(k) <= CHAR(pat[1]) :
950 						    __wcollate_range_cmp(CHAR(c),
951 						    CHAR(k)) <= 0 &&
952 						    __wcollate_range_cmp(CHAR(k),
953 						    CHAR(pat[1])) <= 0)
954 							ok = 1;
955 						pat += 2;
956 					} else if (c == k)
957 						ok = 1;
958 				if (ok == negate_range)
959 					goto fail;
960 				break;
961 			default:
962 				if (*name++ != c)
963 					goto fail;
964 				break;
965 			}
966 		}
967 		if (*name == EOS)
968 			return (1);
969 
970 	fail:
971 		if (nextn == NULL)
972 			break;
973 		pat = nextp;
974 		name = nextn;
975 	}
976 	return (0);
977 }
978 
979 /* Free allocated data belonging to a glob_t structure. */
980 void
981 globfree(glob_t *pglob)
982 {
983 	size_t i;
984 	char **pp;
985 
986 	if (pglob->gl_pathv != NULL) {
987 		pp = pglob->gl_pathv + pglob->gl_offs;
988 		for (i = pglob->gl_pathc; i--; ++pp)
989 			if (*pp)
990 				free(*pp);
991 		free(pglob->gl_pathv);
992 		pglob->gl_pathv = NULL;
993 	}
994 }
995 
996 static DIR *
997 g_opendir(Char *str, glob_t *pglob)
998 {
999 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
1000 
1001 	if (*str == EOS)
1002 		strcpy(buf, ".");
1003 	else {
1004 		if (g_Ctoc(str, buf, sizeof(buf))) {
1005 			errno = ENAMETOOLONG;
1006 			return (NULL);
1007 		}
1008 	}
1009 
1010 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1011 		return ((*pglob->gl_opendir)(buf));
1012 
1013 	return (opendir(buf));
1014 }
1015 
1016 static int
1017 g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
1018 {
1019 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
1020 
1021 	if (g_Ctoc(fn, buf, sizeof(buf))) {
1022 		errno = ENAMETOOLONG;
1023 		return (-1);
1024 	}
1025 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1026 		return((*pglob->gl_lstat)(buf, sb));
1027 	return (lstat(buf, sb));
1028 }
1029 
1030 static int
1031 g_stat(Char *fn, struct stat *sb, glob_t *pglob)
1032 {
1033 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
1034 
1035 	if (g_Ctoc(fn, buf, sizeof(buf))) {
1036 		errno = ENAMETOOLONG;
1037 		return (-1);
1038 	}
1039 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1040 		return ((*pglob->gl_stat)(buf, sb));
1041 	return (stat(buf, sb));
1042 }
1043 
1044 static const Char *
1045 g_strchr(const Char *str, wchar_t ch)
1046 {
1047 
1048 	do {
1049 		if (*str == ch)
1050 			return (str);
1051 	} while (*str++);
1052 	return (NULL);
1053 }
1054 
1055 static int
1056 g_Ctoc(const Char *str, char *buf, size_t len)
1057 {
1058 	mbstate_t mbs;
1059 	size_t clen;
1060 
1061 	memset(&mbs, 0, sizeof(mbs));
1062 	while (len >= MB_CUR_MAX) {
1063 		clen = wcrtomb(buf, CHAR(*str), &mbs);
1064 		if (clen == (size_t)-1) {
1065 			/* XXX See initial comment #2. */
1066 			*buf = (char)CHAR(*str);
1067 			clen = 1;
1068 			memset(&mbs, 0, sizeof(mbs));
1069 		}
1070 		if (CHAR(*str) == EOS)
1071 			return (0);
1072 		str++;
1073 		buf += clen;
1074 		len -= clen;
1075 	}
1076 	return (1);
1077 }
1078 
1079 static int
1080 err_nomatch(glob_t *pglob, struct glob_limit *limit, const char *origpat) {
1081 	/*
1082 	 * If there was no match we are going to append the origpat
1083 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
1084 	 * and the origpat did not contain any magic characters
1085 	 * GLOB_NOMAGIC is there just for compatibility with csh.
1086 	 */
1087 	if ((pglob->gl_flags & GLOB_NOCHECK) ||
1088 	    ((pglob->gl_flags & GLOB_NOMAGIC) &&
1089 	    !(pglob->gl_flags & GLOB_MAGCHAR)))
1090 		return (globextend(NULL, pglob, limit, origpat));
1091 	return (GLOB_NOMATCH);
1092 }
1093 
1094 static int
1095 err_aborted(glob_t *pglob, int err, char *buf) {
1096 	if ((pglob->gl_errfunc != NULL && pglob->gl_errfunc(buf, err)) ||
1097 	    (pglob->gl_flags & GLOB_ERR))
1098 		return (GLOB_ABORTED);
1099 	return (0);
1100 }
1101 
1102 #ifdef DEBUG
1103 static void
1104 qprintf(const char *str, Char *s)
1105 {
1106 	Char *p;
1107 
1108 	(void)printf("%s\n", str);
1109 	if (s != NULL) {
1110 		for (p = s; *p != EOS; p++)
1111 			(void)printf("%c", (char)CHAR(*p));
1112 		(void)printf("\n");
1113 		for (p = s; *p != EOS; p++)
1114 			(void)printf("%c", (isprot(*p) ? '\\' : ' '));
1115 		(void)printf("\n");
1116 		for (p = s; *p != EOS; p++)
1117 			(void)printf("%c", (ismeta(*p) ? '_' : ' '));
1118 		(void)printf("\n");
1119 	}
1120 }
1121 #endif
1122