xref: /freebsd/lib/libc/gen/glob.c (revision 884d26c84cba3ffc3d4e626306098fcdfe6a0c2b)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  * All rights reserved.
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #if defined(LIBC_SCCS) && !defined(lint)
39 static char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
40 #endif /* LIBC_SCCS and not lint */
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 /*
45  * glob(3) -- a superset of the one defined in POSIX 1003.2.
46  *
47  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
48  *
49  * Optional extra services, controlled by flags not defined by POSIX:
50  *
51  * GLOB_QUOTE:
52  *	Escaping convention: \ inhibits any special meaning the following
53  *	character might have (except \ at end of string is retained).
54  * GLOB_MAGCHAR:
55  *	Set in gl_flags if pattern contained a globbing character.
56  * GLOB_NOMAGIC:
57  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
58  *	not contain any magic characters.  [Used in csh style globbing]
59  * GLOB_ALTDIRFUNC:
60  *	Use alternately specified directory access functions.
61  * GLOB_TILDE:
62  *	expand ~user/foo to the /home/dir/of/user/foo
63  * GLOB_BRACE:
64  *	expand {1,2}{a,b} to 1a 1b 2a 2b
65  * gl_matchc:
66  *	Number of matches in the current invocation of glob.
67  */
68 
69 /*
70  * Some notes on multibyte character support:
71  * 1. Patterns with illegal byte sequences match nothing - even if
72  *    GLOB_NOCHECK is specified.
73  * 2. Illegal byte sequences in filenames are handled by treating them as
74  *    single-byte characters with a value of the first byte of the sequence
75  *    cast to wchar_t.
76  * 3. State-dependent encodings are not currently supported.
77  */
78 
79 #include <sys/param.h>
80 #include <sys/stat.h>
81 
82 #include <ctype.h>
83 #include <dirent.h>
84 #include <errno.h>
85 #include <glob.h>
86 #include <limits.h>
87 #include <pwd.h>
88 #include <stdint.h>
89 #include <stdio.h>
90 #include <stdlib.h>
91 #include <string.h>
92 #include <unistd.h>
93 #include <wchar.h>
94 
95 /*
96  * glob(3) expansion limits. Stop the expansion if any of these limits
97  * is reached. This caps the runtime in the face of DoS attacks. See
98  * also CVE-2010-2632
99  */
100 #define	GLOB_LIMIT_BRACE	128	/* number of brace calls */
101 #define	GLOB_LIMIT_PATH		65536	/* number of path elements */
102 #define	GLOB_LIMIT_READDIR	16384	/* number of readdirs */
103 #define	GLOB_LIMIT_STAT		1024	/* number of stat system calls */
104 #define	GLOB_LIMIT_STRING	ARG_MAX	/* maximum total size for paths */
105 
106 struct glob_limit {
107 	size_t	l_brace_cnt;
108 	size_t	l_path_lim;
109 	size_t	l_readdir_cnt;
110 	size_t	l_stat_cnt;
111 	size_t	l_string_cnt;
112 };
113 
114 #define	DOLLAR		'$'
115 #define	DOT		'.'
116 #define	EOS		'\0'
117 #define	LBRACKET	'['
118 #define	NOT		'!'
119 #define	QUESTION	'?'
120 #define	QUOTE		'\\'
121 #define	RANGE		'-'
122 #define	RBRACKET	']'
123 #define	SEP		'/'
124 #define	STAR		'*'
125 #define	TILDE		'~'
126 #define	UNDERSCORE	'_'
127 #define	LBRACE		'{'
128 #define	RBRACE		'}'
129 #define	SLASH		'/'
130 #define	COMMA		','
131 
132 #ifndef DEBUG
133 
134 #define	M_QUOTE		0x8000000000ULL
135 #define	M_PROTECT	0x4000000000ULL
136 #define	M_MASK		0xffffffffffULL
137 #define	M_CHAR		0x00ffffffffULL
138 
139 typedef uint_fast64_t Char;
140 
141 #else
142 
143 #define	M_QUOTE		0x80
144 #define	M_PROTECT	0x40
145 #define	M_MASK		0xff
146 #define	M_CHAR		0x7f
147 
148 typedef char Char;
149 
150 #endif
151 
152 
153 #define	CHAR(c)		((Char)((c)&M_CHAR))
154 #define	META(c)		((Char)((c)|M_QUOTE))
155 #define	M_ALL		META('*')
156 #define	M_END		META(']')
157 #define	M_NOT		META('!')
158 #define	M_ONE		META('?')
159 #define	M_RNG		META('-')
160 #define	M_SET		META('[')
161 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
162 
163 
164 static int	 compare(const void *, const void *);
165 static int	 g_Ctoc(const Char *, char *, size_t);
166 static int	 g_lstat(Char *, struct stat *, glob_t *);
167 static DIR	*g_opendir(Char *, glob_t *);
168 static const Char *g_strchr(const Char *, wchar_t);
169 #ifdef notdef
170 static Char	*g_strcat(Char *, const Char *);
171 #endif
172 static int	 g_stat(Char *, struct stat *, glob_t *);
173 static int	 glob0(const Char *, glob_t *, struct glob_limit *);
174 static int	 glob1(Char *, glob_t *, struct glob_limit *);
175 static int	 glob2(Char *, Char *, Char *, Char *, glob_t *,
176     struct glob_limit *);
177 static int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *,
178     struct glob_limit *);
179 static int	 globextend(const Char *, glob_t *, struct glob_limit *);
180 static const Char *
181 		 globtilde(const Char *, Char *, size_t, glob_t *);
182 static int	 globexp1(const Char *, glob_t *, struct glob_limit *);
183 static int	 globexp2(const Char *, const Char *, glob_t *, int *,
184     struct glob_limit *);
185 static int	 match(Char *, Char *, Char *);
186 #ifdef DEBUG
187 static void	 qprintf(const char *, Char *);
188 #endif
189 
190 int
191 glob(const char * __restrict pattern, int flags,
192 	 int (*errfunc)(const char *, int), glob_t * __restrict pglob)
193 {
194 	struct glob_limit limit = { 0, 0, 0, 0, 0 };
195 	const char *patnext;
196 	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
197 	mbstate_t mbs;
198 	wchar_t wc;
199 	size_t clen;
200 
201 	patnext = pattern;
202 	if (!(flags & GLOB_APPEND)) {
203 		pglob->gl_pathc = 0;
204 		pglob->gl_pathv = NULL;
205 		if (!(flags & GLOB_DOOFFS))
206 			pglob->gl_offs = 0;
207 	}
208 	if (flags & GLOB_LIMIT) {
209 		limit.l_path_lim = pglob->gl_matchc;
210 		if (limit.l_path_lim == 0)
211 			limit.l_path_lim = GLOB_LIMIT_PATH;
212 	}
213 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
214 	pglob->gl_errfunc = errfunc;
215 	pglob->gl_matchc = 0;
216 
217 	bufnext = patbuf;
218 	bufend = bufnext + MAXPATHLEN - 1;
219 	if (flags & GLOB_NOESCAPE) {
220 		memset(&mbs, 0, sizeof(mbs));
221 		while (bufend - bufnext >= MB_CUR_MAX) {
222 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
223 			if (clen == (size_t)-1 || clen == (size_t)-2)
224 				return (GLOB_NOMATCH);
225 			else if (clen == 0)
226 				break;
227 			*bufnext++ = wc;
228 			patnext += clen;
229 		}
230 	} else {
231 		/* Protect the quoted characters. */
232 		memset(&mbs, 0, sizeof(mbs));
233 		while (bufend - bufnext >= MB_CUR_MAX) {
234 			if (*patnext == QUOTE) {
235 				if (*++patnext == EOS) {
236 					*bufnext++ = QUOTE | M_PROTECT;
237 					continue;
238 				}
239 				prot = M_PROTECT;
240 			} else
241 				prot = 0;
242 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
243 			if (clen == (size_t)-1 || clen == (size_t)-2)
244 				return (GLOB_NOMATCH);
245 			else if (clen == 0)
246 				break;
247 			*bufnext++ = wc | prot;
248 			patnext += clen;
249 		}
250 	}
251 	*bufnext = EOS;
252 
253 	if (flags & GLOB_BRACE)
254 	    return (globexp1(patbuf, pglob, &limit));
255 	else
256 	    return (glob0(patbuf, pglob, &limit));
257 }
258 
259 /*
260  * Expand recursively a glob {} pattern. When there is no more expansion
261  * invoke the standard globbing routine to glob the rest of the magic
262  * characters
263  */
264 static int
265 globexp1(const Char *pattern, glob_t *pglob, struct glob_limit *limit)
266 {
267 	const Char* ptr = pattern;
268 	int rv;
269 
270 	if ((pglob->gl_flags & GLOB_LIMIT) &&
271 	    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
272 		errno = 0;
273 		return (GLOB_NOSPACE);
274 	}
275 
276 	/* Protect a single {}, for find(1), like csh */
277 	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
278 		return glob0(pattern, pglob, limit);
279 
280 	while ((ptr = g_strchr(ptr, LBRACE)) != NULL)
281 		if (!globexp2(ptr, pattern, pglob, &rv, limit))
282 			return rv;
283 
284 	return glob0(pattern, pglob, limit);
285 }
286 
287 
288 /*
289  * Recursive brace globbing helper. Tries to expand a single brace.
290  * If it succeeds then it invokes globexp1 with the new pattern.
291  * If it fails then it tries to glob the rest of the pattern and returns.
292  */
293 static int
294 globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv,
295     struct glob_limit *limit)
296 {
297 	int     i;
298 	Char   *lm, *ls;
299 	const Char *pe, *pm, *pm1, *pl;
300 	Char    patbuf[MAXPATHLEN];
301 
302 	/* copy part up to the brace */
303 	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
304 		continue;
305 	*lm = EOS;
306 	ls = lm;
307 
308 	/* Find the balanced brace */
309 	for (i = 0, pe = ++ptr; *pe; pe++)
310 		if (*pe == LBRACKET) {
311 			/* Ignore everything between [] */
312 			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
313 				continue;
314 			if (*pe == EOS) {
315 				/*
316 				 * We could not find a matching RBRACKET.
317 				 * Ignore and just look for RBRACE
318 				 */
319 				pe = pm;
320 			}
321 		}
322 		else if (*pe == LBRACE)
323 			i++;
324 		else if (*pe == RBRACE) {
325 			if (i == 0)
326 				break;
327 			i--;
328 		}
329 
330 	/* Non matching braces; just glob the pattern */
331 	if (i != 0 || *pe == EOS) {
332 		*rv = glob0(patbuf, pglob, limit);
333 		return (0);
334 	}
335 
336 	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
337 		switch (*pm) {
338 		case LBRACKET:
339 			/* Ignore everything between [] */
340 			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
341 				continue;
342 			if (*pm == EOS) {
343 				/*
344 				 * We could not find a matching RBRACKET.
345 				 * Ignore and just look for RBRACE
346 				 */
347 				pm = pm1;
348 			}
349 			break;
350 
351 		case LBRACE:
352 			i++;
353 			break;
354 
355 		case RBRACE:
356 			if (i) {
357 			    i--;
358 			    break;
359 			}
360 			/* FALLTHROUGH */
361 		case COMMA:
362 			if (i && *pm == COMMA)
363 				break;
364 			else {
365 				/* Append the current string */
366 				for (lm = ls; (pl < pm); *lm++ = *pl++)
367 					continue;
368 				/*
369 				 * Append the rest of the pattern after the
370 				 * closing brace
371 				 */
372 				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
373 					continue;
374 
375 				/* Expand the current pattern */
376 #ifdef DEBUG
377 				qprintf("globexp2:", patbuf);
378 #endif
379 				*rv = globexp1(patbuf, pglob, limit);
380 
381 				/* move after the comma, to the next string */
382 				pl = pm + 1;
383 			}
384 			break;
385 
386 		default:
387 			break;
388 		}
389 	*rv = 0;
390 	return (0);
391 }
392 
393 
394 
395 /*
396  * expand tilde from the passwd file.
397  */
398 static const Char *
399 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
400 {
401 	struct passwd *pwd;
402 	char *h;
403 	const Char *p;
404 	Char *b, *eb;
405 
406 	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
407 		return (pattern);
408 
409 	/*
410 	 * Copy up to the end of the string or /
411 	 */
412 	eb = &patbuf[patbuf_len - 1];
413 	for (p = pattern + 1, h = (char *) patbuf;
414 	    h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
415 		continue;
416 
417 	*h = EOS;
418 
419 	if (((char *) patbuf)[0] == EOS) {
420 		/*
421 		 * handle a plain ~ or ~/ by expanding $HOME first (iff
422 		 * we're not running setuid or setgid) and then trying
423 		 * the password file
424 		 */
425 		if (issetugid() != 0 ||
426 		    (h = getenv("HOME")) == NULL) {
427 			if (((h = getlogin()) != NULL &&
428 			     (pwd = getpwnam(h)) != NULL) ||
429 			    (pwd = getpwuid(getuid())) != NULL)
430 				h = pwd->pw_dir;
431 			else
432 				return (pattern);
433 		}
434 	}
435 	else {
436 		/*
437 		 * Expand a ~user
438 		 */
439 		if ((pwd = getpwnam((char*) patbuf)) == NULL)
440 			return (pattern);
441 		else
442 			h = pwd->pw_dir;
443 	}
444 
445 	/* Copy the home directory */
446 	for (b = patbuf; b < eb && *h; *b++ = *h++)
447 		continue;
448 
449 	/* Append the rest of the pattern */
450 	while (b < eb && (*b++ = *p++) != EOS)
451 		continue;
452 	*b = EOS;
453 
454 	return (patbuf);
455 }
456 
457 
458 /*
459  * The main glob() routine: compiles the pattern (optionally processing
460  * quotes), calls glob1() to do the real pattern matching, and finally
461  * sorts the list (unless unsorted operation is requested).  Returns 0
462  * if things went well, nonzero if errors occurred.
463  */
464 static int
465 glob0(const Char *pattern, glob_t *pglob, struct glob_limit *limit)
466 {
467 	const Char *qpatnext;
468 	int err;
469 	size_t oldpathc;
470 	Char *bufnext, c, patbuf[MAXPATHLEN];
471 
472 	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
473 	oldpathc = pglob->gl_pathc;
474 	bufnext = patbuf;
475 
476 	/* We don't need to check for buffer overflow any more. */
477 	while ((c = *qpatnext++) != EOS) {
478 		switch (c) {
479 		case LBRACKET:
480 			c = *qpatnext;
481 			if (c == NOT)
482 				++qpatnext;
483 			if (*qpatnext == EOS ||
484 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
485 				*bufnext++ = LBRACKET;
486 				if (c == NOT)
487 					--qpatnext;
488 				break;
489 			}
490 			*bufnext++ = M_SET;
491 			if (c == NOT)
492 				*bufnext++ = M_NOT;
493 			c = *qpatnext++;
494 			do {
495 				*bufnext++ = CHAR(c);
496 				if (*qpatnext == RANGE &&
497 				    (c = qpatnext[1]) != RBRACKET) {
498 					*bufnext++ = M_RNG;
499 					*bufnext++ = CHAR(c);
500 					qpatnext += 2;
501 				}
502 			} while ((c = *qpatnext++) != RBRACKET);
503 			pglob->gl_flags |= GLOB_MAGCHAR;
504 			*bufnext++ = M_END;
505 			break;
506 		case QUESTION:
507 			pglob->gl_flags |= GLOB_MAGCHAR;
508 			*bufnext++ = M_ONE;
509 			break;
510 		case STAR:
511 			pglob->gl_flags |= GLOB_MAGCHAR;
512 			/* collapse adjacent stars to one,
513 			 * to avoid exponential behavior
514 			 */
515 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
516 			    *bufnext++ = M_ALL;
517 			break;
518 		default:
519 			*bufnext++ = CHAR(c);
520 			break;
521 		}
522 	}
523 	*bufnext = EOS;
524 #ifdef DEBUG
525 	qprintf("glob0:", patbuf);
526 #endif
527 
528 	if ((err = glob1(patbuf, pglob, limit)) != 0)
529 		return(err);
530 
531 	/*
532 	 * If there was no match we are going to append the pattern
533 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
534 	 * and the pattern did not contain any magic characters
535 	 * GLOB_NOMAGIC is there just for compatibility with csh.
536 	 */
537 	if (pglob->gl_pathc == oldpathc) {
538 		if (((pglob->gl_flags & GLOB_NOCHECK) ||
539 		    ((pglob->gl_flags & GLOB_NOMAGIC) &&
540 			!(pglob->gl_flags & GLOB_MAGCHAR))))
541 			return (globextend(pattern, pglob, limit));
542 		else
543 			return (GLOB_NOMATCH);
544 	}
545 	if (!(pglob->gl_flags & GLOB_NOSORT))
546 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
547 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
548 	return (0);
549 }
550 
551 static int
552 compare(const void *p, const void *q)
553 {
554 	return (strcmp(*(char **)p, *(char **)q));
555 }
556 
557 static int
558 glob1(Char *pattern, glob_t *pglob, struct glob_limit *limit)
559 {
560 	Char pathbuf[MAXPATHLEN];
561 
562 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
563 	if (*pattern == EOS)
564 		return (0);
565 	return (glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
566 	    pattern, pglob, limit));
567 }
568 
569 /*
570  * The functions glob2 and glob3 are mutually recursive; there is one level
571  * of recursion for each segment in the pattern that contains one or more
572  * meta characters.
573  */
574 static int
575 glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
576       glob_t *pglob, struct glob_limit *limit)
577 {
578 	struct stat sb;
579 	Char *p, *q;
580 	int anymeta;
581 
582 	/*
583 	 * Loop over pattern segments until end of pattern or until
584 	 * segment with meta character found.
585 	 */
586 	for (anymeta = 0;;) {
587 		if (*pattern == EOS) {		/* End of pattern? */
588 			*pathend = EOS;
589 			if (g_lstat(pathbuf, &sb, pglob))
590 				return (0);
591 
592 			if ((pglob->gl_flags & GLOB_LIMIT) &&
593 			    limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) {
594 				errno = 0;
595 				if (pathend + 1 > pathend_last)
596 					return (GLOB_ABORTED);
597 				*pathend++ = SEP;
598 				*pathend = EOS;
599 				return (GLOB_NOSPACE);
600 			}
601 			if (((pglob->gl_flags & GLOB_MARK) &&
602 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
603 			    || (S_ISLNK(sb.st_mode) &&
604 			    (g_stat(pathbuf, &sb, pglob) == 0) &&
605 			    S_ISDIR(sb.st_mode)))) {
606 				if (pathend + 1 > pathend_last)
607 					return (GLOB_ABORTED);
608 				*pathend++ = SEP;
609 				*pathend = EOS;
610 			}
611 			++pglob->gl_matchc;
612 			return (globextend(pathbuf, pglob, limit));
613 		}
614 
615 		/* Find end of next segment, copy tentatively to pathend. */
616 		q = pathend;
617 		p = pattern;
618 		while (*p != EOS && *p != SEP) {
619 			if (ismeta(*p))
620 				anymeta = 1;
621 			if (q + 1 > pathend_last)
622 				return (GLOB_ABORTED);
623 			*q++ = *p++;
624 		}
625 
626 		if (!anymeta) {		/* No expansion, do next segment. */
627 			pathend = q;
628 			pattern = p;
629 			while (*pattern == SEP) {
630 				if (pathend + 1 > pathend_last)
631 					return (GLOB_ABORTED);
632 				*pathend++ = *pattern++;
633 			}
634 		} else			/* Need expansion, recurse. */
635 			return (glob3(pathbuf, pathend, pathend_last, pattern,
636 			    p, pglob, limit));
637 	}
638 	/* NOTREACHED */
639 }
640 
641 static int
642 glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
643       Char *pattern, Char *restpattern,
644       glob_t *pglob, struct glob_limit *limit)
645 {
646 	struct dirent *dp;
647 	DIR *dirp;
648 	int err;
649 	char buf[MAXPATHLEN];
650 
651 	struct dirent *(*readdirfunc)(DIR *);
652 
653 	if (pathend > pathend_last)
654 		return (GLOB_ABORTED);
655 	*pathend = EOS;
656 	errno = 0;
657 
658 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
659 		/* TODO: don't call for ENOENT or ENOTDIR? */
660 		if (pglob->gl_errfunc) {
661 			if (g_Ctoc(pathbuf, buf, sizeof(buf)))
662 				return (GLOB_ABORTED);
663 			if (pglob->gl_errfunc(buf, errno) ||
664 			    pglob->gl_flags & GLOB_ERR)
665 				return (GLOB_ABORTED);
666 		}
667 		return (0);
668 	}
669 
670 	err = 0;
671 
672 	/* pglob->gl_readdir takes a void *, fix this manually */
673 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
674 		readdirfunc = (struct dirent *(*)(DIR *))pglob->gl_readdir;
675 	else
676 		readdirfunc = readdir;
677 
678 	/* Search directory for matching names. */
679 	while ((dp = (*readdirfunc)(dirp)) != NULL) {
680 		char *sc;
681 		Char *dc;
682 		wchar_t wc;
683 		size_t clen;
684 		mbstate_t mbs;
685 
686 		if ((pglob->gl_flags & GLOB_LIMIT) &&
687 		    limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) {
688 			errno = 0;
689 			if (pathend + 1 > pathend_last)
690 				err = GLOB_ABORTED;
691 			else {
692 				*pathend++ = SEP;
693 				*pathend = EOS;
694 				err = GLOB_NOSPACE;
695 			}
696 			break;
697 		}
698 
699 		/* Initial DOT must be matched literally. */
700 		if (dp->d_name[0] == DOT && *pattern != DOT)
701 			continue;
702 		memset(&mbs, 0, sizeof(mbs));
703 		dc = pathend;
704 		sc = dp->d_name;
705 		while (dc < pathend_last) {
706 			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
707 			if (clen == (size_t)-1 || clen == (size_t)-2) {
708 				wc = *sc;
709 				clen = 1;
710 				memset(&mbs, 0, sizeof(mbs));
711 			}
712 			if ((*dc++ = wc) == EOS)
713 				break;
714 			sc += clen;
715 		}
716 		if (!match(pathend, pattern, restpattern)) {
717 			*pathend = EOS;
718 			continue;
719 		}
720 		err = glob2(pathbuf, --dc, pathend_last, restpattern,
721 		    pglob, limit);
722 		if (err)
723 			break;
724 	}
725 
726 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
727 		(*pglob->gl_closedir)(dirp);
728 	else
729 		closedir(dirp);
730 	return (err);
731 }
732 
733 
734 /*
735  * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
736  * add the new item, and update gl_pathc.
737  *
738  * This assumes the BSD realloc, which only copies the block when its size
739  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
740  * behavior.
741  *
742  * Return 0 if new item added, error code if memory couldn't be allocated.
743  *
744  * Invariant of the glob_t structure:
745  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
746  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
747  */
748 static int
749 globextend(const Char *path, glob_t *pglob, struct glob_limit *limit)
750 {
751 	char **pathv;
752 	size_t i, newsize, len;
753 	char *copy;
754 	const Char *p;
755 
756 	if ((pglob->gl_flags & GLOB_LIMIT) &&
757 	    pglob->gl_matchc > limit->l_path_lim) {
758 		errno = 0;
759 		return (GLOB_NOSPACE);
760 	}
761 
762 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
763 	/* realloc(NULL, newsize) is equivalent to malloc(newsize). */
764 	pathv = realloc((void *)pglob->gl_pathv, newsize);
765 	if (pathv == NULL)
766 		return (GLOB_NOSPACE);
767 
768 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
769 		/* first time around -- clear initial gl_offs items */
770 		pathv += pglob->gl_offs;
771 		for (i = pglob->gl_offs + 1; --i > 0; )
772 			*--pathv = NULL;
773 	}
774 	pglob->gl_pathv = pathv;
775 
776 	for (p = path; *p++;)
777 		continue;
778 	len = MB_CUR_MAX * (size_t)(p - path);	/* XXX overallocation */
779 	limit->l_string_cnt += len;
780 	if ((pglob->gl_flags & GLOB_LIMIT) &&
781 	    limit->l_string_cnt >= GLOB_LIMIT_STRING) {
782 		errno = 0;
783 		return (GLOB_NOSPACE);
784 	}
785 	if ((copy = malloc(len)) != NULL) {
786 		if (g_Ctoc(path, copy, len)) {
787 			free(copy);
788 			return (GLOB_NOSPACE);
789 		}
790 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
791 	}
792 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
793 	return (copy == NULL ? GLOB_NOSPACE : 0);
794 }
795 
796 /*
797  * pattern matching function for filenames.  Each occurrence of the *
798  * pattern causes a recursion level.
799  */
800 static int
801 match(Char *name, Char *pat, Char *patend)
802 {
803 	int ok, negate_range;
804 	Char c, k;
805 
806 	while (pat < patend) {
807 		c = *pat++;
808 		switch (c & M_MASK) {
809 		case M_ALL:
810 			if (pat == patend)
811 				return (1);
812 			do
813 			    if (match(name, pat, patend))
814 				    return (1);
815 			while (*name++ != EOS);
816 			return (0);
817 		case M_ONE:
818 			if (*name++ == EOS)
819 				return (0);
820 			break;
821 		case M_SET:
822 			ok = 0;
823 			if ((k = *name++) == EOS)
824 				return (0);
825 			if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
826 				++pat;
827 			while (((c = *pat++) & M_MASK) != M_END)
828 				if ((*pat & M_MASK) == M_RNG) {
829 					if (CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]))
830 						ok = 1;
831 					pat += 2;
832 				} else if (c == k)
833 					ok = 1;
834 			if (ok == negate_range)
835 				return (0);
836 			break;
837 		default:
838 			if (*name++ != c)
839 				return (0);
840 			break;
841 		}
842 	}
843 	return (*name == EOS);
844 }
845 
846 /* Free allocated data belonging to a glob_t structure. */
847 void
848 globfree(glob_t *pglob)
849 {
850 	size_t i;
851 	char **pp;
852 
853 	if (pglob->gl_pathv != NULL) {
854 		pp = pglob->gl_pathv + pglob->gl_offs;
855 		for (i = pglob->gl_pathc; i--; ++pp)
856 			if (*pp)
857 				free(*pp);
858 		free(pglob->gl_pathv);
859 		pglob->gl_pathv = NULL;
860 	}
861 }
862 
863 static DIR *
864 g_opendir(Char *str, glob_t *pglob)
865 {
866 	char buf[MAXPATHLEN];
867 
868 	if (!*str)
869 		strcpy(buf, ".");
870 	else {
871 		if (g_Ctoc(str, buf, sizeof(buf)))
872 			return (NULL);
873 	}
874 
875 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
876 		return ((*pglob->gl_opendir)(buf));
877 
878 	return (opendir(buf));
879 }
880 
881 static int
882 g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
883 {
884 	char buf[MAXPATHLEN];
885 
886 	if (g_Ctoc(fn, buf, sizeof(buf))) {
887 		errno = ENAMETOOLONG;
888 		return (-1);
889 	}
890 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
891 		return((*pglob->gl_lstat)(buf, sb));
892 	return (lstat(buf, sb));
893 }
894 
895 static int
896 g_stat(Char *fn, struct stat *sb, glob_t *pglob)
897 {
898 	char buf[MAXPATHLEN];
899 
900 	if (g_Ctoc(fn, buf, sizeof(buf))) {
901 		errno = ENAMETOOLONG;
902 		return (-1);
903 	}
904 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
905 		return ((*pglob->gl_stat)(buf, sb));
906 	return (stat(buf, sb));
907 }
908 
909 static const Char *
910 g_strchr(const Char *str, wchar_t ch)
911 {
912 
913 	do {
914 		if (*str == ch)
915 			return (str);
916 	} while (*str++);
917 	return (NULL);
918 }
919 
920 static int
921 g_Ctoc(const Char *str, char *buf, size_t len)
922 {
923 	mbstate_t mbs;
924 	size_t clen;
925 
926 	memset(&mbs, 0, sizeof(mbs));
927 	while (len >= MB_CUR_MAX) {
928 		clen = wcrtomb(buf, *str, &mbs);
929 		if (clen == (size_t)-1)
930 			return (1);
931 		if (*str == L'\0')
932 			return (0);
933 		str++;
934 		buf += clen;
935 		len -= clen;
936 	}
937 	return (1);
938 }
939 
940 #ifdef DEBUG
941 static void
942 qprintf(const char *str, Char *s)
943 {
944 	Char *p;
945 
946 	(void)printf("%s:\n", str);
947 	for (p = s; *p; p++)
948 		(void)printf("%c", CHAR(*p));
949 	(void)printf("\n");
950 	for (p = s; *p; p++)
951 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
952 	(void)printf("\n");
953 	for (p = s; *p; p++)
954 		(void)printf("%c", ismeta(*p) ? '_' : ' ');
955 	(void)printf("\n");
956 }
957 #endif
958