xref: /freebsd/contrib/bmake/dir.c (revision 02e9120893770924227138ba49df1edb3896112a)
1 /*	$NetBSD: dir.c,v 1.282 2023/06/23 04:56:54 rillig Exp $	*/
2 
3 /*
4  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Adam de Boor.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * Copyright (c) 1988, 1989 by Adam de Boor
37  * Copyright (c) 1989 by Berkeley Softworks
38  * All rights reserved.
39  *
40  * This code is derived from software contributed to Berkeley by
41  * Adam de Boor.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *	This product includes software developed by the University of
54  *	California, Berkeley and its contributors.
55  * 4. Neither the name of the University nor the names of its contributors
56  *    may be used to endorse or promote products derived from this software
57  *    without specific prior written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  * SUCH DAMAGE.
70  */
71 
72 /*
73  * Directory searching using wildcards and/or normal names.
74  * Used both for source wildcarding in the makefile and for finding
75  * implicit sources.
76  *
77  * The interface for this module is:
78  *	Dir_Init	Initialize the module.
79  *
80  *	Dir_InitCur	Set the cur CachedDir.
81  *
82  *	Dir_InitDot	Set the dot CachedDir.
83  *
84  *	Dir_End		Clean up the module.
85  *
86  *	Dir_SetPATH	Set ${.PATH} to reflect state of dirSearchPath.
87  *
88  *	Dir_HasWildcards
89  *			Returns true if the name given it needs to
90  *			be wildcard-expanded.
91  *
92  *	SearchPath_Expand
93  *			Expand a filename pattern to find all matching files
94  *			from the search path.
95  *
96  *	Dir_FindFile	Searches for a file on a given search path.
97  *			If it exists, the entire path is returned.
98  *			Otherwise NULL is returned.
99  *
100  *	Dir_FindHereOrAbove
101  *			Search for a path in the current directory and
102  *			then all the directories above it in turn until
103  *			the path is found or we reach the root ("/").
104  *
105  *	Dir_UpdateMTime
106  *			Update the modification time and path of a node with
107  *			data from the file corresponding to the node.
108  *
109  *	SearchPath_Add	Add a directory to a search path.
110  *
111  *	SearchPath_ToFlags
112  *			Given a search path and a command flag, create
113  *			a string with each of the directories in the path
114  *			preceded by the command flag and all of them
115  *			separated by a space.
116  *
117  *	Dir_Destroy	Destroy an element of a search path. Frees up all
118  *			things that can be freed for the element as long
119  *			as the element is no longer referenced by any other
120  *			search path.
121  *
122  *	SearchPath_Clear
123  *			Resets a search path to the empty list.
124  *
125  * For debugging:
126  *	Dir_PrintDirectories
127  *			Print stats about the directory cache.
128  */
129 
130 #include <sys/types.h>
131 #include <sys/stat.h>
132 
133 #include <dirent.h>
134 #include <errno.h>
135 
136 #include "make.h"
137 #include "dir.h"
138 #include "job.h"
139 
140 /*	"@(#)dir.c	8.2 (Berkeley) 1/2/94"	*/
141 MAKE_RCSID("$NetBSD: dir.c,v 1.282 2023/06/23 04:56:54 rillig Exp $");
142 
143 /*
144  * A search path is a list of CachedDir structures. A CachedDir has in it the
145  * name of the directory and the names of all the files in the directory.
146  * This is used to cut down on the number of system calls necessary to find
147  * implicit dependents and their like. Since these searches are made before
148  * any actions are taken, we need not worry about the directory changing due
149  * to creation commands. If this hampers the style of some makefiles, they
150  * must be changed.
151  *
152  * All previously-read directories are kept in openDirs, which is checked
153  * first before a directory is opened.
154  *
155  * The need for the caching of whole directories is brought about by the
156  * multi-level transformation code in suff.c, which tends to search for far
157  * more files than regular make does. In the initial implementation, the
158  * amount of time spent performing "stat" calls was truly astronomical.
159  * The problem with caching at the start is, of course, that pmake doesn't
160  * then detect changes to these directories during the course of the make.
161  * Three possibilities suggest themselves:
162  *
163  * 1)	just use stat to test for a file's existence. As mentioned above,
164  *	this is very inefficient due to the number of checks engendered by
165  *	the multi-level transformation code.
166  *
167  * 2)	use readdir() and company to search the directories, keeping them
168  *	open between checks. I have tried this and while it didn't slow down
169  *	the process too much, it could severely affect the amount of
170  *	parallelism available as each directory open would take another file
171  *	descriptor out of play for handling I/O for another job. Given that
172  *	it is only recently (as of 1993 or earlier) that UNIX OS's have taken
173  *	to allowing more than 20 or 32 file descriptors for a process, this
174  *	doesn't seem acceptable to me.
175  *
176  * 3)	record the mtime of the directory in the CachedDir structure and
177  *	verify the directory hasn't changed since the contents were cached.
178  *	This will catch the creation or deletion of files, but not the
179  *	updating of files. However, since it is the creation and deletion
180  *	that is the problem, this could be a good thing to do. Unfortunately,
181  *	if the directory (say ".") were fairly large and changed fairly
182  *	frequently, the constant reloading could seriously degrade
183  *	performance. It might be good in such cases to keep track of the
184  *	number of reloadings and if the number goes over a (small) limit,
185  *	resort to using stat in its place.
186  *
187  * An additional thing to consider is that pmake is used primarily to create
188  * C programs and until recently (as of 1993 or earlier) pcc-based compilers
189  * refused to allow you to specify where the resulting object file should be
190  * placed. This forced all objects to be created in the current directory.
191  * This isn't meant as a full excuse, just an explanation of some of the
192  * reasons for the caching used here.
193  *
194  * One more note: the location of a target's file is only performed on the
195  * downward traversal of the graph and then only for terminal nodes in the
196  * graph. This could be construed as wrong in some cases, but prevents
197  * inadvertent modification of files when the "installed" directory for a
198  * file is provided in the search path.
199  *
200  * Another data structure maintained by this module is an mtime cache used
201  * when the searching of cached directories fails to find a file. In the past,
202  * Dir_FindFile would simply perform an access() call in such a case to
203  * determine if the file could be found using just the name given. When this
204  * hit, however, all that was gained was the knowledge that the file existed.
205  * Given that an access() is essentially a stat() without the copyout() call,
206  * and that the same filesystem overhead would have to be incurred in
207  * Dir_MTime, it made sense to replace the access() with a stat() and record
208  * the mtime in a cache for when Dir_UpdateMTime was actually called.
209  */
210 
211 
212 /* A cache for the filenames in a directory. */
213 struct CachedDir {
214 	/*
215 	 * Name of directory, either absolute or relative to the current
216 	 * directory. The name is not normalized in any way, that is, "."
217 	 * and "./." are different.
218 	 *
219 	 * Not sure what happens when .CURDIR is assigned a new value; see
220 	 * Parse_Var.
221 	 */
222 	char *name;
223 
224 	/*
225 	 * The number of SearchPaths that refer to this directory.
226 	 * Plus the number of global variables that refer to this directory.
227 	 * References from openDirs do not count though.
228 	 */
229 	int refCount;
230 
231 	/* The number of times a file in this directory has been found. */
232 	int hits;
233 
234 	/* The names of the directory entries. */
235 	HashSet files;
236 };
237 
238 typedef List CachedDirList;
239 typedef ListNode CachedDirListNode;
240 
241 typedef ListNode SearchPathNode;
242 
243 /* A list of cached directories, with fast lookup by directory name. */
244 typedef struct OpenDirs {
245 	CachedDirList list;
246 	HashTable /* of CachedDirListNode */ table;
247 } OpenDirs;
248 
249 
250 SearchPath dirSearchPath = { LST_INIT }; /* main search path */
251 
252 static OpenDirs openDirs;	/* all cached directories */
253 
254 /*
255  * Variables for gathering statistics on the efficiency of the caching
256  * mechanism.
257  */
258 static int hits;		/* Found in directory cache */
259 static int misses;		/* Sad, but not evil misses */
260 static int nearmisses;		/* Found under search path */
261 static int bigmisses;		/* Sought by itself */
262 
263 /* The cached contents of ".", the relative current directory. */
264 static CachedDir *dot = NULL;
265 /* The cached contents of the absolute current directory. */
266 static CachedDir *cur = NULL;
267 /* A fake path entry indicating we need to look for '.' last. */
268 static CachedDir *dotLast = NULL;
269 
270 /*
271  * Results of doing a last-resort stat in Dir_FindFile -- if we have to go to
272  * the system to find the file, we might as well have its mtime on record.
273  *
274  * XXX: If this is done way early, there's a chance other rules will have
275  * already updated the file, in which case we'll update it again. Generally,
276  * there won't be two rules to update a single file, so this should be ok,
277  * but...
278  */
279 static HashTable mtimes;
280 
281 static HashTable lmtimes;	/* same as mtimes but for lstat */
282 
283 
284 static void OpenDirs_Remove(OpenDirs *, const char *);
285 
286 
287 static CachedDir *
288 CachedDir_New(const char *name)
289 {
290 	CachedDir *dir = bmake_malloc(sizeof *dir);
291 
292 	dir->name = bmake_strdup(name);
293 	dir->refCount = 0;
294 	dir->hits = 0;
295 	HashSet_Init(&dir->files);
296 
297 #ifdef DEBUG_REFCNT
298 	DEBUG2(DIR, "CachedDir %p new  for \"%s\"\n", dir, dir->name);
299 #endif
300 
301 	return dir;
302 }
303 
304 static CachedDir *
305 CachedDir_Ref(CachedDir *dir)
306 {
307 	dir->refCount++;
308 
309 #ifdef DEBUG_REFCNT
310 	DEBUG3(DIR, "CachedDir %p ++ %d for \"%s\"\n",
311 	    dir, dir->refCount, dir->name);
312 #endif
313 
314 	return dir;
315 }
316 
317 static void
318 CachedDir_Unref(CachedDir *dir)
319 {
320 	dir->refCount--;
321 
322 #ifdef DEBUG_REFCNT
323 	DEBUG3(DIR, "CachedDir %p -- %d for \"%s\"\n",
324 	    dir, dir->refCount, dir->name);
325 #endif
326 
327 	if (dir->refCount > 0)
328 		return;
329 
330 #ifdef DEBUG_REFCNT
331 	DEBUG2(DIR, "CachedDir %p free for \"%s\"\n", dir, dir->name);
332 #endif
333 
334 	OpenDirs_Remove(&openDirs, dir->name);
335 
336 	free(dir->name);
337 	HashSet_Done(&dir->files);
338 	free(dir);
339 }
340 
341 /* Update the value of the CachedDir variable, updating the reference counts. */
342 static void
343 CachedDir_Assign(CachedDir **var, CachedDir *dir)
344 {
345 	CachedDir *prev;
346 
347 	prev = *var;
348 	*var = dir;
349 	if (dir != NULL)
350 		CachedDir_Ref(dir);
351 	if (prev != NULL)
352 		CachedDir_Unref(prev);
353 }
354 
355 static void
356 OpenDirs_Init(OpenDirs *odirs)
357 {
358 	Lst_Init(&odirs->list);
359 	HashTable_Init(&odirs->table);
360 }
361 
362 #ifdef CLEANUP
363 static void
364 OpenDirs_Done(OpenDirs *odirs)
365 {
366 	CachedDirListNode *ln = odirs->list.first;
367 	DEBUG1(DIR, "OpenDirs_Done: %u entries to remove\n",
368 	    odirs->table.numEntries);
369 	while (ln != NULL) {
370 		CachedDirListNode *next = ln->next;
371 		CachedDir *dir = ln->datum;
372 		DEBUG2(DIR, "OpenDirs_Done: refCount %d for \"%s\"\n",
373 		    dir->refCount, dir->name);
374 		CachedDir_Unref(dir);	/* removes the dir from odirs->list */
375 		ln = next;
376 	}
377 	Lst_Done(&odirs->list);
378 	HashTable_Done(&odirs->table);
379 }
380 #endif
381 
382 static CachedDir *
383 OpenDirs_Find(OpenDirs *odirs, const char *name)
384 {
385 	CachedDirListNode *ln = HashTable_FindValue(&odirs->table, name);
386 	return ln != NULL ? ln->datum : NULL;
387 }
388 
389 static void
390 OpenDirs_Add(OpenDirs *odirs, CachedDir *cdir)
391 {
392 	if (HashTable_FindEntry(&odirs->table, cdir->name) != NULL)
393 		return;
394 	Lst_Append(&odirs->list, cdir);
395 	HashTable_Set(&odirs->table, cdir->name, odirs->list.last);
396 }
397 
398 static void
399 OpenDirs_Remove(OpenDirs *odirs, const char *name)
400 {
401 	HashEntry *he = HashTable_FindEntry(&odirs->table, name);
402 	CachedDirListNode *ln;
403 	if (he == NULL)
404 		return;
405 	ln = HashEntry_Get(he);
406 	HashTable_DeleteEntry(&odirs->table, he);
407 	Lst_Remove(&odirs->list, ln);
408 }
409 
410 /*
411  * Returns 0 and the result of stat(2) or lstat(2) in *out_cst,
412  * or -1 on error.
413  */
414 static int
415 cached_stats(const char *pathname, struct cached_stat *out_cst,
416 	     bool useLstat, bool forceRefresh)
417 {
418 	HashTable *tbl = useLstat ? &lmtimes : &mtimes;
419 	struct stat sys_st;
420 	struct cached_stat *cst;
421 	int rc;
422 
423 	if (pathname == NULL || pathname[0] == '\0')
424 		return -1;	/* This can happen in meta mode. */
425 
426 	cst = HashTable_FindValue(tbl, pathname);
427 	if (cst != NULL && !forceRefresh) {
428 		*out_cst = *cst;
429 		DEBUG2(DIR, "Using cached time %s for %s\n",
430 		    Targ_FmtTime(cst->cst_mtime), pathname);
431 		return 0;
432 	}
433 
434 	rc = (useLstat ? lstat : stat)(pathname, &sys_st);
435 	if (rc == -1)
436 		return -1;	/* don't cache negative lookups */
437 
438 	if (sys_st.st_mtime == 0)
439 		sys_st.st_mtime = 1; /* avoid confusion with missing file */
440 
441 	if (cst == NULL) {
442 		cst = bmake_malloc(sizeof *cst);
443 		HashTable_Set(tbl, pathname, cst);
444 	}
445 
446 	cst->cst_mtime = sys_st.st_mtime;
447 	cst->cst_mode = sys_st.st_mode;
448 
449 	*out_cst = *cst;
450 	DEBUG2(DIR, "   Caching %s for %s\n",
451 	    Targ_FmtTime(sys_st.st_mtime), pathname);
452 
453 	return 0;
454 }
455 
456 int
457 cached_stat(const char *pathname, struct cached_stat *cst)
458 {
459 	return cached_stats(pathname, cst, false, false);
460 }
461 
462 int
463 cached_lstat(const char *pathname, struct cached_stat *cst)
464 {
465 	return cached_stats(pathname, cst, true, false);
466 }
467 
468 /* Initialize the directories module. */
469 void
470 Dir_Init(void)
471 {
472 	OpenDirs_Init(&openDirs);
473 	HashTable_Init(&mtimes);
474 	HashTable_Init(&lmtimes);
475 	CachedDir_Assign(&dotLast, CachedDir_New(".DOTLAST"));
476 }
477 
478 /*
479  * Called by Dir_InitDir and whenever .CURDIR is assigned to.
480  */
481 void
482 Dir_InitCur(const char *newCurdir)
483 {
484 	CachedDir *dir;
485 
486 	if (newCurdir == NULL)
487 		return;
488 
489 	/*
490 	 * Our build directory is not the same as our source directory.
491 	 * Keep this one around too.
492 	 */
493 	dir = SearchPath_Add(NULL, newCurdir);
494 	if (dir == NULL)
495 		return;
496 
497 	CachedDir_Assign(&cur, dir);
498 }
499 
500 /*
501  * (Re)initialize "dot" (current/object directory) path hash.
502  * Some directories may be cached.
503  */
504 void
505 Dir_InitDot(void)
506 {
507 	CachedDir *dir;
508 
509 	dir = SearchPath_Add(NULL, ".");
510 	if (dir == NULL) {
511 		Error("Cannot open `.' (%s)", strerror(errno));
512 		exit(2);	/* Not 1 so -q can distinguish error */
513 	}
514 
515 	CachedDir_Assign(&dot, dir);
516 
517 	Dir_SetPATH();		/* initialize */
518 }
519 
520 /* Clean up the directories module. */
521 void
522 Dir_End(void)
523 {
524 #ifdef CLEANUP
525 	CachedDir_Assign(&cur, NULL);
526 	CachedDir_Assign(&dot, NULL);
527 	CachedDir_Assign(&dotLast, NULL);
528 	SearchPath_Clear(&dirSearchPath);
529 	OpenDirs_Done(&openDirs);
530 	HashTable_Done(&mtimes);
531 	HashTable_Done(&lmtimes);
532 #endif
533 }
534 
535 /*
536  * We want ${.PATH} to indicate the order in which we will actually
537  * search, so we rebuild it after any .PATH: target.
538  * This is the simplest way to deal with the effect of .DOTLAST.
539  */
540 void
541 Dir_SetPATH(void)
542 {
543 	CachedDirListNode *ln;
544 	bool seenDotLast = false;	/* true if we should search '.' last */
545 
546 	Global_Delete(".PATH");
547 
548 	if ((ln = dirSearchPath.dirs.first) != NULL) {
549 		CachedDir *dir = ln->datum;
550 		if (dir == dotLast) {
551 			seenDotLast = true;
552 			Global_Append(".PATH", dotLast->name);
553 		}
554 	}
555 
556 	if (!seenDotLast) {
557 		if (dot != NULL)
558 			Global_Append(".PATH", dot->name);
559 		if (cur != NULL)
560 			Global_Append(".PATH", cur->name);
561 	}
562 
563 	for (ln = dirSearchPath.dirs.first; ln != NULL; ln = ln->next) {
564 		CachedDir *dir = ln->datum;
565 		if (dir == dotLast)
566 			continue;
567 		if (dir == dot && seenDotLast)
568 			continue;
569 		Global_Append(".PATH", dir->name);
570 	}
571 
572 	if (seenDotLast) {
573 		if (dot != NULL)
574 			Global_Append(".PATH", dot->name);
575 		if (cur != NULL)
576 			Global_Append(".PATH", cur->name);
577 	}
578 }
579 
580 
581 void
582 Dir_SetSYSPATH(void)
583 {
584 	CachedDirListNode *ln;
585 
586 	Var_ReadOnly(".SYSPATH", false);
587 	Global_Delete(".SYSPATH");
588 	for (ln = sysIncPath->dirs.first; ln != NULL; ln = ln->next) {
589 		CachedDir *dir = ln->datum;
590 		Global_Append(".SYSPATH", dir->name);
591 	}
592 	Var_ReadOnly(".SYSPATH", true);
593 }
594 
595 /*
596  * See if the given name has any wildcard characters in it and all braces and
597  * brackets are properly balanced.
598  *
599  * XXX: This code is not 100% correct ([^]] fails etc.). I really don't think
600  * that make(1) should be expanding patterns, because then you have to set a
601  * mechanism for escaping the expansion!
602  *
603  * Return true if the word should be expanded, false otherwise.
604  */
605 bool
606 Dir_HasWildcards(const char *name)
607 {
608 	const char *p;
609 	bool wild = false;
610 	int braces = 0, brackets = 0;
611 
612 	for (p = name; *p != '\0'; p++) {
613 		switch (*p) {
614 		case '{':
615 			braces++;
616 			wild = true;
617 			break;
618 		case '}':
619 			braces--;
620 			break;
621 		case '[':
622 			brackets++;
623 			wild = true;
624 			break;
625 		case ']':
626 			brackets--;
627 			break;
628 		case '?':
629 		case '*':
630 			wild = true;
631 			break;
632 		default:
633 			break;
634 		}
635 	}
636 	return wild && brackets == 0 && braces == 0;
637 }
638 
639 /*
640  * See if any files match the pattern and add their names to the 'expansions'
641  * list if they do.
642  *
643  * This is incomplete -- wildcards are only expanded in the final path
644  * component, but not in directories like src/lib*c/file*.c, but it
645  * will do for now (now being 1993 until at least 2020). To expand these,
646  * delegate the work to the shell, using the '!=' variable assignment
647  * operator, the ':sh' variable modifier or the ':!...!' variable modifier,
648  * such as in ${:!echo src/lib*c/file*.c!}.
649  *
650  * Input:
651  *	pattern		Pattern to look for
652  *	dir		Directory to search
653  *	expansion	Place to store the results
654  */
655 static void
656 DirMatchFiles(const char *pattern, CachedDir *dir, StringList *expansions)
657 {
658 	const char *dirName = dir->name;
659 	bool isDot = dirName[0] == '.' && dirName[1] == '\0';
660 	HashIter hi;
661 
662 	/*
663 	 * XXX: Iterating over all hash entries is inefficient.  If the
664 	 * pattern is a plain string without any wildcards, a direct lookup
665 	 * is faster.
666 	 */
667 
668 	HashIter_InitSet(&hi, &dir->files);
669 	while (HashIter_Next(&hi) != NULL) {
670 		const char *base = hi.entry->key;
671 		StrMatchResult res = Str_Match(base, pattern);
672 		/* TODO: handle errors from res.error */
673 
674 		if (!res.matched)
675 			continue;
676 
677 		/*
678 		 * Follow the UNIX convention that dot files are only found
679 		 * if the pattern begins with a dot. The pattern '.*' does
680 		 * not match '.' or '..' since these are not included in the
681 		 * directory cache.
682 		 *
683 		 * This means that the pattern '[a-z.]*' does not find
684 		 * '.file', which is consistent with NetBSD sh, NetBSD ksh,
685 		 * bash, dash, csh and probably many other shells as well.
686 		 */
687 		if (base[0] == '.' && pattern[0] != '.')
688 			continue;
689 
690 		{
691 			char *fullName = isDot
692 			    ? bmake_strdup(base)
693 			    : str_concat3(dirName, "/", base);
694 			Lst_Append(expansions, fullName);
695 		}
696 	}
697 }
698 
699 /*
700  * Find the next closing brace in the string, taking nested braces into
701  * account.
702  */
703 static const char *
704 closing_brace(const char *p)
705 {
706 	int nest = 0;
707 	while (*p != '\0') {
708 		if (*p == '}' && nest == 0)
709 			break;
710 		if (*p == '{')
711 			nest++;
712 		if (*p == '}')
713 			nest--;
714 		p++;
715 	}
716 	return p;
717 }
718 
719 /*
720  * Find the next closing brace or comma in the string, taking nested braces
721  * into account.
722  */
723 static const char *
724 separator_comma(const char *p)
725 {
726 	int nest = 0;
727 	while (*p != '\0') {
728 		if ((*p == '}' || *p == ',') && nest == 0)
729 			break;
730 		if (*p == '{')
731 			nest++;
732 		if (*p == '}')
733 			nest--;
734 		p++;
735 	}
736 	return p;
737 }
738 
739 static bool
740 contains_wildcard(const char *p)
741 {
742 	for (; *p != '\0'; p++) {
743 		switch (*p) {
744 		case '*':
745 		case '?':
746 		case '{':
747 		case '[':
748 			return true;
749 		}
750 	}
751 	return false;
752 }
753 
754 static char *
755 concat3(const char *a, size_t a_len, const char *b, size_t b_len,
756 	const char *c, size_t c_len)
757 {
758 	size_t s_len = a_len + b_len + c_len;
759 	char *s = bmake_malloc(s_len + 1);
760 	memcpy(s, a, a_len);
761 	memcpy(s + a_len, b, b_len);
762 	memcpy(s + a_len + b_len, c, c_len);
763 	s[s_len] = '\0';
764 	return s;
765 }
766 
767 /*
768  * Expand curly braces like the C shell. Brace expansion by itself is purely
769  * textual, the expansions are not looked up in the file system. But if an
770  * expanded word contains wildcard characters, it is expanded further,
771  * matching only the actually existing files.
772  *
773  * Example: "{a{b,c}}" expands to "ab" and "ac".
774  * Example: "{a}" expands to "a".
775  * Example: "{a,*.c}" expands to "a" and all "*.c" files that exist.
776  *
777  * Input:
778  *	word		Entire word to expand
779  *	brace		First curly brace in it
780  *	path		Search path to use
781  *	expansions	Place to store the expansions
782  */
783 static void
784 DirExpandCurly(const char *word, const char *brace, SearchPath *path,
785 	       StringList *expansions)
786 {
787 	const char *prefix, *middle, *piece, *middle_end, *suffix;
788 	size_t prefix_len, suffix_len;
789 
790 	/* Split the word into prefix '{' middle '}' suffix. */
791 
792 	middle = brace + 1;
793 	middle_end = closing_brace(middle);
794 	if (*middle_end == '\0') {
795 		Error("Unterminated {} clause \"%s\"", middle);
796 		return;
797 	}
798 
799 	prefix = word;
800 	prefix_len = (size_t)(brace - prefix);
801 	suffix = middle_end + 1;
802 	suffix_len = strlen(suffix);
803 
804 	/* Split the middle into pieces, separated by commas. */
805 
806 	piece = middle;
807 	while (piece < middle_end + 1) {
808 		const char *piece_end = separator_comma(piece);
809 		size_t piece_len = (size_t)(piece_end - piece);
810 
811 		char *file = concat3(prefix, prefix_len, piece, piece_len,
812 		    suffix, suffix_len);
813 
814 		if (contains_wildcard(file)) {
815 			SearchPath_Expand(path, file, expansions);
816 			free(file);
817 		} else {
818 			Lst_Append(expansions, file);
819 		}
820 
821 		/* skip over the comma or closing brace */
822 		piece = piece_end + 1;
823 	}
824 }
825 
826 
827 /* Expand the pattern in each of the directories from the path. */
828 static void
829 DirExpandPath(const char *pattern, SearchPath *path, StringList *expansions)
830 {
831 	SearchPathNode *ln;
832 	for (ln = path->dirs.first; ln != NULL; ln = ln->next) {
833 		CachedDir *dir = ln->datum;
834 		DirMatchFiles(pattern, dir, expansions);
835 	}
836 }
837 
838 static void
839 PrintExpansions(StringList *expansions)
840 {
841 	const char *sep = "";
842 	StringListNode *ln;
843 	for (ln = expansions->first; ln != NULL; ln = ln->next) {
844 		const char *word = ln->datum;
845 		debug_printf("%s%s", sep, word);
846 		sep = " ";
847 	}
848 	debug_printf("\n");
849 }
850 
851 /*
852  * The wildcard isn't in the first component.
853  * Find all the components up to the one with the wildcard.
854  */
855 static void
856 SearchPath_ExpandMiddle(SearchPath *path, const char *pattern,
857 			const char *wildcardComponent, StringList *expansions)
858 {
859 	char *prefix, *dirpath, *end;
860 	SearchPath *partPath;
861 
862 	prefix = bmake_strsedup(pattern, wildcardComponent + 1);
863 	/*
864 	 * XXX: Check the "the directory is added to the path" part.
865 	 * It is probably surprising that the directory before a
866 	 * wildcard gets added to the path.
867 	 */
868 	/*
869 	 * XXX: Only the first match of the prefix in the path is
870 	 * taken, any others are ignored.  The expectation may be
871 	 * that the pattern is expanded in the whole path.
872 	 */
873 	dirpath = Dir_FindFile(prefix, path);
874 	free(prefix);
875 
876 	/*
877 	 * dirpath is null if can't find the leading component
878 	 *
879 	 * XXX: Dir_FindFile won't find internal components.  i.e. if the
880 	 * path contains ../Etc/Object and we're looking for Etc, it won't
881 	 * be found.  Ah well.  Probably not important.
882 	 *
883 	 * XXX: Check whether the above comment is still true.
884 	 */
885 	if (dirpath == NULL)
886 		return;
887 
888 	end = &dirpath[strlen(dirpath) - 1];
889 	/* XXX: What about multiple trailing slashes? */
890 	if (*end == '/')
891 		*end = '\0';
892 
893 	partPath = SearchPath_New();
894 	(void)SearchPath_Add(partPath, dirpath);
895 	DirExpandPath(wildcardComponent + 1, partPath, expansions);
896 	SearchPath_Free(partPath);
897 }
898 
899 /*
900  * Expand the given pattern into a list of existing filenames by globbing it,
901  * looking in each directory from the search path.
902  *
903  * Input:
904  *	path		the directories in which to find the files
905  *	pattern		the pattern to expand
906  *	expansions	the list on which to place the results
907  */
908 void
909 SearchPath_Expand(SearchPath *path, const char *pattern, StringList *expansions)
910 {
911 	const char *brace, *slash, *wildcard, *wildcardComponent;
912 
913 	assert(path != NULL);
914 	assert(expansions != NULL);
915 
916 	DEBUG1(DIR, "Expanding \"%s\"... ", pattern);
917 
918 	brace = strchr(pattern, '{');
919 	if (brace != NULL) {
920 		DirExpandCurly(pattern, brace, path, expansions);
921 		goto done;
922 	}
923 
924 	/* At this point, the pattern does not contain '{'. */
925 
926 	slash = strchr(pattern, '/');
927 	if (slash == NULL) {
928 		/* The pattern has no directory component. */
929 
930 		/* First the files in dot. */
931 		DirMatchFiles(pattern, dot, expansions);
932 		/* Then the files in every other directory on the path. */
933 		DirExpandPath(pattern, path, expansions);
934 		goto done;
935 	}
936 
937 	/* At this point, the pattern has a directory component. */
938 
939 	/* Find the first wildcard in the pattern. */
940 	for (wildcard = pattern; *wildcard != '\0'; wildcard++)
941 		if (*wildcard == '?' || *wildcard == '[' || *wildcard == '*')
942 			break;
943 
944 	if (*wildcard == '\0') {
945 		/*
946 		 * No directory component and no wildcard at all -- this
947 		 * should never happen as in such a simple case there is no
948 		 * need to expand anything.
949 		 */
950 		DirExpandPath(pattern, path, expansions);
951 		goto done;
952 	}
953 
954 	/* Back up to the start of the component containing the wildcard. */
955 	/* XXX: This handles '///' and '/' differently. */
956 	wildcardComponent = wildcard;
957 	while (wildcardComponent > pattern && *wildcardComponent != '/')
958 		wildcardComponent--;
959 
960 	if (wildcardComponent == pattern) {
961 		/* The first component contains the wildcard. */
962 		/* Start the search from the local directory */
963 		DirExpandPath(pattern, path, expansions);
964 	} else {
965 		SearchPath_ExpandMiddle(path, pattern, wildcardComponent,
966 		    expansions);
967 	}
968 
969 done:
970 	if (DEBUG(DIR))
971 		PrintExpansions(expansions);
972 }
973 
974 /*
975  * Find if the file with the given name exists in the given path.
976  * Return the freshly allocated path to the file, or NULL.
977  */
978 static char *
979 DirLookup(CachedDir *dir, const char *base)
980 {
981 	char *file;		/* the current filename to check */
982 
983 	DEBUG1(DIR, "   %s ...\n", dir->name);
984 
985 	if (!HashSet_Contains(&dir->files, base))
986 		return NULL;
987 
988 	file = str_concat3(dir->name, "/", base);
989 	DEBUG1(DIR, "   returning %s\n", file);
990 	dir->hits++;
991 	hits++;
992 	return file;
993 }
994 
995 
996 /*
997  * Find if the file with the given name exists in the given directory.
998  * Return the freshly allocated path to the file, or NULL.
999  */
1000 static char *
1001 DirLookupSubdir(CachedDir *dir, const char *name)
1002 {
1003 	struct cached_stat cst;
1004 	char *file = dir == dot
1005 	    ? bmake_strdup(name)
1006 	    : str_concat3(dir->name, "/", name);
1007 
1008 	DEBUG1(DIR, "checking %s ...\n", file);
1009 
1010 	if (cached_stat(file, &cst) == 0) {
1011 		nearmisses++;
1012 		return file;
1013 	}
1014 	free(file);
1015 	return NULL;
1016 }
1017 
1018 /*
1019  * Find if the file with the given name exists in the given path.
1020  * Return the freshly allocated path to the file, the empty string, or NULL.
1021  * Returning the empty string means that the search should be terminated.
1022  */
1023 static char *
1024 DirLookupAbs(CachedDir *dir, const char *name, const char *cp)
1025 {
1026 	const char *dnp;	/* pointer into dir->name */
1027 	const char *np;		/* pointer into name */
1028 
1029 	DEBUG1(DIR, "   %s ...\n", dir->name);
1030 
1031 	/*
1032 	 * If the file has a leading path component and that component
1033 	 * exactly matches the entire name of the current search
1034 	 * directory, we can attempt another cache lookup. And if we don't
1035 	 * have a hit, we can safely assume the file does not exist at all.
1036 	 */
1037 	for (dnp = dir->name, np = name;
1038 	     *dnp != '\0' && *dnp == *np; dnp++, np++)
1039 		continue;
1040 	if (*dnp != '\0' || np != cp - 1)
1041 		return NULL;
1042 
1043 	if (!HashSet_Contains(&dir->files, cp)) {
1044 		DEBUG0(DIR, "   must be here but isn't -- returning\n");
1045 		return bmake_strdup("");	/* to terminate the search */
1046 	}
1047 
1048 	dir->hits++;
1049 	hits++;
1050 	DEBUG1(DIR, "   returning %s\n", name);
1051 	return bmake_strdup(name);
1052 }
1053 
1054 /*
1055  * Find the given file in "." or curdir.
1056  * Return the freshly allocated path to the file, or NULL.
1057  */
1058 static char *
1059 DirFindDot(const char *name, const char *base)
1060 {
1061 
1062 	if (HashSet_Contains(&dot->files, base)) {
1063 		DEBUG0(DIR, "   in '.'\n");
1064 		hits++;
1065 		dot->hits++;
1066 		return bmake_strdup(name);
1067 	}
1068 
1069 	if (cur != NULL && HashSet_Contains(&cur->files, base)) {
1070 		DEBUG1(DIR, "   in ${.CURDIR} = %s\n", cur->name);
1071 		hits++;
1072 		cur->hits++;
1073 		return str_concat3(cur->name, "/", base);
1074 	}
1075 
1076 	return NULL;
1077 }
1078 
1079 static bool
1080 FindFileRelative(SearchPath *path, bool seenDotLast,
1081 		 const char *name, char **out_file)
1082 {
1083 	SearchPathNode *ln;
1084 	bool checkedDot = false;
1085 	char *file;
1086 
1087 	DEBUG0(DIR, "   Trying subdirectories...\n");
1088 
1089 	if (!seenDotLast) {
1090 		if (dot != NULL) {
1091 			checkedDot = true;
1092 			if ((file = DirLookupSubdir(dot, name)) != NULL)
1093 				goto found;
1094 		}
1095 		if (cur != NULL &&
1096 		    (file = DirLookupSubdir(cur, name)) != NULL)
1097 			goto found;
1098 	}
1099 
1100 	for (ln = path->dirs.first; ln != NULL; ln = ln->next) {
1101 		CachedDir *dir = ln->datum;
1102 		if (dir == dotLast)
1103 			continue;
1104 		if (dir == dot) {
1105 			if (checkedDot)
1106 				continue;
1107 			checkedDot = true;
1108 		}
1109 		if ((file = DirLookupSubdir(dir, name)) != NULL)
1110 			goto found;
1111 	}
1112 
1113 	if (seenDotLast) {
1114 		if (dot != NULL && !checkedDot) {
1115 			checkedDot = true;
1116 			if ((file = DirLookupSubdir(dot, name)) != NULL)
1117 				goto found;
1118 		}
1119 		if (cur != NULL &&
1120 		    (file = DirLookupSubdir(cur, name)) != NULL)
1121 			goto found;
1122 	}
1123 
1124 	if (checkedDot) {
1125 		/*
1126 		 * Already checked by the given name, since . was in
1127 		 * the path, so no point in proceeding.
1128 		 */
1129 		DEBUG0(DIR, "   Checked . already, returning NULL\n");
1130 		file = NULL;
1131 		goto found;
1132 	}
1133 
1134 	return false;
1135 
1136 found:
1137 	*out_file = file;
1138 	return true;
1139 }
1140 
1141 static bool
1142 FindFileAbsolute(SearchPath *path, bool seenDotLast,
1143 		 const char *name, const char *base, char **out_file)
1144 {
1145 	char *file;
1146 	SearchPathNode *ln;
1147 
1148 	/*
1149 	 * For absolute names, compare directory path prefix against
1150 	 * the the directory path of each member on the search path
1151 	 * for an exact match. If we have an exact match on any member
1152 	 * of the search path, use the cached contents of that member
1153 	 * to lookup the final file component. If that lookup fails we
1154 	 * can safely assume that the file does not exist at all.
1155 	 * This is signified by DirLookupAbs() returning an empty
1156 	 * string.
1157 	 */
1158 	DEBUG0(DIR, "   Trying exact path matches...\n");
1159 
1160 	if (!seenDotLast && cur != NULL &&
1161 	    ((file = DirLookupAbs(cur, name, base)) != NULL))
1162 		goto found;
1163 
1164 	for (ln = path->dirs.first; ln != NULL; ln = ln->next) {
1165 		CachedDir *dir = ln->datum;
1166 		if (dir == dotLast)
1167 			continue;
1168 		if ((file = DirLookupAbs(dir, name, base)) != NULL)
1169 			goto found;
1170 	}
1171 
1172 	if (seenDotLast && cur != NULL &&
1173 	    ((file = DirLookupAbs(cur, name, base)) != NULL))
1174 		goto found;
1175 
1176 	return false;
1177 
1178 found:
1179 	if (file[0] == '\0') {
1180 		free(file);
1181 		file = NULL;
1182 	}
1183 	*out_file = file;
1184 	return true;
1185 }
1186 
1187 /*
1188  * Find the file with the given name along the given search path.
1189  *
1190  * If the file is found in a directory that is not on the path
1191  * already (either 'name' is absolute or it is a relative path
1192  * [ dir1/.../dirn/file ] which exists below one of the directories
1193  * already on the search path), its directory is added to the end
1194  * of the path, on the assumption that there will be more files in
1195  * that directory later on. Sometimes this is true. Sometimes not.
1196  *
1197  * Input:
1198  *	name		the file to find
1199  *	path		the directories to search, or NULL
1200  *
1201  * Results:
1202  *	The freshly allocated path to the file, or NULL.
1203  */
1204 char *
1205 Dir_FindFile(const char *name, SearchPath *path)
1206 {
1207 	char *file;		/* the current filename to check */
1208 	bool seenDotLast = false; /* true if we should search dot last */
1209 	struct cached_stat cst;	/* Buffer for stat, if necessary */
1210 	const char *trailing_dot = ".";
1211 	const char *base = str_basename(name);
1212 
1213 	DEBUG1(DIR, "Searching for %s ...", name);
1214 
1215 	if (path == NULL) {
1216 		DEBUG0(DIR, "couldn't open path, file not found\n");
1217 		misses++;
1218 		return NULL;
1219 	}
1220 
1221 	if (path->dirs.first != NULL) {
1222 		CachedDir *dir = path->dirs.first->datum;
1223 		if (dir == dotLast) {
1224 			seenDotLast = true;
1225 			DEBUG0(DIR, "[dot last]...");
1226 		}
1227 	}
1228 	DEBUG0(DIR, "\n");
1229 
1230 	/*
1231 	 * If there's no leading directory components or if the leading
1232 	 * directory component is exactly `./', consult the cached contents
1233 	 * of each of the directories on the search path.
1234 	 */
1235 	if (base == name || (base - name == 2 && *name == '.')) {
1236 		SearchPathNode *ln;
1237 
1238 		/*
1239 		 * We look through all the directories on the path seeking one
1240 		 * which contains the final component of the given name.  If
1241 		 * such a file is found, we concatenate the directory name
1242 		 * and the final component and return the resulting string.
1243 		 * If we don't find any such thing, we go on to phase two.
1244 		 *
1245 		 * No matter what, we always look for the file in the current
1246 		 * directory before anywhere else (unless we found the magic
1247 		 * DOTLAST path, in which case we search it last) and we *do
1248 		 * not* add the ./ to it if it exists.
1249 		 * This is so there are no conflicts between what the user
1250 		 * specifies (fish.c) and what pmake finds (./fish.c).
1251 		 */
1252 		if (!seenDotLast && (file = DirFindDot(name, base)) != NULL)
1253 			return file;
1254 
1255 		for (ln = path->dirs.first; ln != NULL; ln = ln->next) {
1256 			CachedDir *dir = ln->datum;
1257 			if (dir == dotLast)
1258 				continue;
1259 			if ((file = DirLookup(dir, base)) != NULL)
1260 				return file;
1261 		}
1262 
1263 		if (seenDotLast && (file = DirFindDot(name, base)) != NULL)
1264 			return file;
1265 	}
1266 
1267 	/*
1268 	 * We didn't find the file on any directory in the search path.
1269 	 * If the name doesn't contain a slash, that means it doesn't exist.
1270 	 * If it *does* contain a slash, however, there is still hope: it
1271 	 * could be in a subdirectory of one of the members of the search
1272 	 * path. (eg. /usr/include and sys/types.h. The above search would
1273 	 * fail to turn up types.h in /usr/include, but it *is* in
1274 	 * /usr/include/sys/types.h).
1275 	 * [ This no longer applies: If we find such a file, we assume there
1276 	 * will be more (what else can we assume?) and add all but the last
1277 	 * component of the resulting name onto the search path (at the
1278 	 * end).]
1279 	 * This phase is only performed if the file is *not* absolute.
1280 	 */
1281 	if (base == name) {
1282 		DEBUG0(DIR, "   failed.\n");
1283 		misses++;
1284 		return NULL;
1285 	}
1286 
1287 	if (*base == '\0') {
1288 		/* we were given a trailing "/" */
1289 		base = trailing_dot;
1290 	}
1291 
1292 	if (name[0] != '/') {
1293 		if (FindFileRelative(path, seenDotLast, name, &file))
1294 			return file;
1295 	} else {
1296 		if (FindFileAbsolute(path, seenDotLast, name, base, &file))
1297 			return file;
1298 	}
1299 
1300 	/*
1301 	 * Didn't find it that way, either. Sigh. Phase 3. Add its directory
1302 	 * onto the search path in any case, just in case, then look for the
1303 	 * thing in the hash table. If we find it, grand. We return a new
1304 	 * copy of the name. Otherwise we sadly return a NULL pointer. Sigh.
1305 	 * Note that if the directory holding the file doesn't exist, this
1306 	 * will do an extra search of the final directory on the path. Unless
1307 	 * something weird happens, this search won't succeed and life will
1308 	 * be groovy.
1309 	 *
1310 	 * Sigh. We cannot add the directory onto the search path because
1311 	 * of this amusing case:
1312 	 * $(INSTALLDIR)/$(FILE): $(FILE)
1313 	 *
1314 	 * $(FILE) exists in $(INSTALLDIR) but not in the current one.
1315 	 * When searching for $(FILE), we will find it in $(INSTALLDIR)
1316 	 * b/c we added it here. This is not good...
1317 	 */
1318 #if 0
1319 	{
1320 		CachedDir *dir;
1321 		char *prefix;
1322 
1323 		if (base == trailing_dot) {
1324 			base = strrchr(name, '/');
1325 			base++;
1326 		}
1327 		prefix = bmake_strsedup(name, base - 1);
1328 		(void)SearchPath_Add(path, prefix);
1329 		free(prefix);
1330 
1331 		bigmisses++;
1332 		if (path->last == NULL)
1333 			return NULL;
1334 
1335 		dir = path->last->datum;
1336 		if (HashSet_Contains(&dir->files, base))
1337 			return bmake_strdup(name);
1338 		return NULL;
1339 	}
1340 #else
1341 	DEBUG1(DIR, "   Looking for \"%s\" ...\n", name);
1342 
1343 	bigmisses++;
1344 	if (cached_stat(name, &cst) == 0) {
1345 		return bmake_strdup(name);
1346 	}
1347 
1348 	DEBUG0(DIR, "   failed. Returning NULL\n");
1349 	return NULL;
1350 #endif
1351 }
1352 
1353 
1354 /*
1355  * Search for a path starting at a given directory and then working our way
1356  * up towards the root.
1357  *
1358  * Input:
1359  *	here		starting directory
1360  *	search_path	the relative path we are looking for
1361  *
1362  * Results:
1363  *	The found path, or NULL.
1364  */
1365 char *
1366 Dir_FindHereOrAbove(const char *here, const char *search_path)
1367 {
1368 	struct cached_stat cst;
1369 	char *dirbase, *dirbase_end;
1370 	char *try, *try_end;
1371 
1372 	/* copy out our starting point */
1373 	dirbase = bmake_strdup(here);
1374 	dirbase_end = dirbase + strlen(dirbase);
1375 
1376 	/* loop until we determine a result */
1377 	for (;;) {
1378 
1379 		/* try and stat(2) it ... */
1380 		try = str_concat3(dirbase, "/", search_path);
1381 		if (cached_stat(try, &cst) != -1) {
1382 			/*
1383 			 * success!  if we found a file, chop off
1384 			 * the filename so we return a directory.
1385 			 */
1386 			if ((cst.cst_mode & S_IFMT) != S_IFDIR) {
1387 				try_end = try + strlen(try);
1388 				while (try_end > try && *try_end != '/')
1389 					try_end--;
1390 				if (try_end > try)
1391 					*try_end = '\0';	/* chop! */
1392 			}
1393 
1394 			free(dirbase);
1395 			return try;
1396 		}
1397 		free(try);
1398 
1399 		/*
1400 		 * nope, we didn't find it.  if we used up dirbase we've
1401 		 * reached the root and failed.
1402 		 */
1403 		if (dirbase_end == dirbase)
1404 			break;	/* failed! */
1405 
1406 		/*
1407 		 * truncate dirbase from the end to move up a dir
1408 		 */
1409 		while (dirbase_end > dirbase && *dirbase_end != '/')
1410 			dirbase_end--;
1411 		*dirbase_end = '\0';	/* chop! */
1412 	}
1413 
1414 	free(dirbase);
1415 	return NULL;
1416 }
1417 
1418 /*
1419  * This is an implied source, and it may have moved,
1420  * see if we can find it via the current .PATH
1421  */
1422 static char *
1423 ResolveMovedDepends(GNode *gn)
1424 {
1425 	char *fullName;
1426 
1427 	const char *base = str_basename(gn->name);
1428 	if (base == gn->name)
1429 		return NULL;
1430 
1431 	fullName = Dir_FindFile(base, Suff_FindPath(gn));
1432 	if (fullName == NULL)
1433 		return NULL;
1434 
1435 	/*
1436 	 * Put the found file in gn->path so that we give that to the compiler.
1437 	 */
1438 	/*
1439 	 * XXX: Better just reset gn->path to NULL; updating it is already done
1440 	 * by Dir_UpdateMTime.
1441 	 */
1442 	gn->path = bmake_strdup(fullName);
1443 	if (!Job_RunTarget(".STALE", gn->fname))
1444 		fprintf(stdout,	/* XXX: Why stdout? */
1445 		    "%s: %s, %u: ignoring stale %s for %s, found %s\n",
1446 		    progname, gn->fname, gn->lineno,
1447 		    makeDependfile, gn->name, fullName);
1448 
1449 	return fullName;
1450 }
1451 
1452 static char *
1453 ResolveFullName(GNode *gn)
1454 {
1455 	char *fullName;
1456 
1457 	fullName = gn->path;
1458 	if (fullName == NULL && !(gn->type & OP_NOPATH)) {
1459 
1460 		fullName = Dir_FindFile(gn->name, Suff_FindPath(gn));
1461 
1462 		if (fullName == NULL && gn->flags.fromDepend &&
1463 		    !Lst_IsEmpty(&gn->implicitParents))
1464 			fullName = ResolveMovedDepends(gn);
1465 
1466 		DEBUG2(DIR, "Found '%s' as '%s'\n",
1467 		    gn->name, fullName != NULL ? fullName : "(not found)");
1468 	}
1469 
1470 	if (fullName == NULL)
1471 		fullName = bmake_strdup(gn->name);
1472 
1473 	/* XXX: Is every piece of memory freed as it should? */
1474 
1475 	return fullName;
1476 }
1477 
1478 /*
1479  * Search gn along dirSearchPath and store its modification time in gn->mtime.
1480  * If no file is found, store 0 instead.
1481  *
1482  * The found file is stored in gn->path, unless the node already had a path.
1483  */
1484 void
1485 Dir_UpdateMTime(GNode *gn, bool forceRefresh)
1486 {
1487 	char *fullName;
1488 	struct cached_stat cst;
1489 
1490 	if (gn->type & OP_ARCHV) {
1491 		Arch_UpdateMTime(gn);
1492 		return;
1493 	}
1494 
1495 	if (gn->type & OP_PHONY) {
1496 		gn->mtime = 0;
1497 		return;
1498 	}
1499 
1500 	fullName = ResolveFullName(gn);
1501 
1502 	if (cached_stats(fullName, &cst, false, forceRefresh) < 0) {
1503 		if (gn->type & OP_MEMBER) {
1504 			if (fullName != gn->path)
1505 				free(fullName);
1506 			Arch_UpdateMemberMTime(gn);
1507 			return;
1508 		}
1509 
1510 		cst.cst_mtime = 0;
1511 	}
1512 
1513 	if (fullName != NULL && gn->path == NULL)
1514 		gn->path = fullName;
1515 	/* XXX: else free(fullName)? */
1516 
1517 	gn->mtime = cst.cst_mtime;
1518 }
1519 
1520 /*
1521  * Read the directory and add it to the cache in openDirs.
1522  * If a path is given, add the directory to that path as well.
1523  */
1524 static CachedDir *
1525 CacheNewDir(const char *name, SearchPath *path)
1526 {
1527 	CachedDir *dir = NULL;
1528 	DIR *d;
1529 	struct dirent *dp;
1530 
1531 	if ((d = opendir(name)) == NULL) {
1532 		DEBUG1(DIR, "Caching %s ... not found\n", name);
1533 		return dir;
1534 	}
1535 
1536 	DEBUG1(DIR, "Caching %s ...\n", name);
1537 
1538 	dir = CachedDir_New(name);
1539 
1540 	while ((dp = readdir(d)) != NULL) {
1541 
1542 #if defined(sun) && defined(d_ino) /* d_ino is a sunos4 #define for d_fileno */
1543 		/*
1544 		 * The sun directory library doesn't check for a 0 inode
1545 		 * (0-inode slots just take up space), so we have to do
1546 		 * it ourselves.
1547 		 */
1548 		if (dp->d_fileno == 0)
1549 			continue;
1550 #endif /* sun && d_ino */
1551 
1552 		(void)HashSet_Add(&dir->files, dp->d_name);
1553 	}
1554 	(void)closedir(d);
1555 
1556 	OpenDirs_Add(&openDirs, dir);
1557 	if (path != NULL)
1558 		Lst_Append(&path->dirs, CachedDir_Ref(dir));
1559 
1560 	DEBUG1(DIR, "Caching %s done\n", name);
1561 	return dir;
1562 }
1563 
1564 /*
1565  * Read the list of filenames in the directory and store the result
1566  * in openDirs.
1567  *
1568  * If a path is given, append the directory to that path.
1569  *
1570  * Input:
1571  *	path		The path to which the directory should be
1572  *			added, or NULL to only add the directory to openDirs
1573  *	name		The name of the directory to add.
1574  *			The name is not normalized in any way.
1575  * Output:
1576  *	result		If no path is given and the directory exists, the
1577  *			returned CachedDir has a reference count of 0.  It
1578  *			must either be assigned to a variable using
1579  *			CachedDir_Assign or be appended to a SearchPath using
1580  *			Lst_Append and CachedDir_Ref.
1581  */
1582 CachedDir *
1583 SearchPath_Add(SearchPath *path, const char *name)
1584 {
1585 
1586 	if (path != NULL && strcmp(name, ".DOTLAST") == 0) {
1587 		SearchPathNode *ln;
1588 
1589 		/* XXX: Linear search gets slow with thousands of entries. */
1590 		for (ln = path->dirs.first; ln != NULL; ln = ln->next) {
1591 			CachedDir *pathDir = ln->datum;
1592 			if (strcmp(pathDir->name, name) == 0)
1593 				return pathDir;
1594 		}
1595 
1596 		Lst_Prepend(&path->dirs, CachedDir_Ref(dotLast));
1597 	}
1598 
1599 	if (path != NULL) {
1600 		/* XXX: Why is OpenDirs only checked if path != NULL? */
1601 		CachedDir *dir = OpenDirs_Find(&openDirs, name);
1602 		if (dir != NULL) {
1603 			if (Lst_FindDatum(&path->dirs, dir) == NULL)
1604 				Lst_Append(&path->dirs, CachedDir_Ref(dir));
1605 			return dir;
1606 		}
1607 	}
1608 
1609 	return CacheNewDir(name, path);
1610 }
1611 
1612 /*
1613  * Return a copy of dirSearchPath, incrementing the reference counts for
1614  * the contained directories.
1615  */
1616 SearchPath *
1617 Dir_CopyDirSearchPath(void)
1618 {
1619 	SearchPath *path = SearchPath_New();
1620 	SearchPathNode *ln;
1621 	for (ln = dirSearchPath.dirs.first; ln != NULL; ln = ln->next) {
1622 		CachedDir *dir = ln->datum;
1623 		Lst_Append(&path->dirs, CachedDir_Ref(dir));
1624 	}
1625 	return path;
1626 }
1627 
1628 /*
1629  * Make a string by taking all the directories in the given search path and
1630  * preceding them by the given flag. Used by the suffix module to create
1631  * variables for compilers based on suffix search paths.
1632  *
1633  * Input:
1634  *	flag		flag which should precede each directory
1635  *	path		list of directories
1636  *
1637  * Results:
1638  *	The string mentioned above. Note that there is no space between the
1639  *	given flag and each directory. The empty string is returned if things
1640  *	don't go well.
1641  */
1642 char *
1643 SearchPath_ToFlags(SearchPath *path, const char *flag)
1644 {
1645 	Buffer buf;
1646 	SearchPathNode *ln;
1647 
1648 	Buf_Init(&buf);
1649 
1650 	if (path != NULL) {
1651 		for (ln = path->dirs.first; ln != NULL; ln = ln->next) {
1652 			CachedDir *dir = ln->datum;
1653 			Buf_AddStr(&buf, " ");
1654 			Buf_AddStr(&buf, flag);
1655 			Buf_AddStr(&buf, dir->name);
1656 		}
1657 	}
1658 
1659 	return Buf_DoneData(&buf);
1660 }
1661 
1662 /* Free the search path and all directories mentioned in it. */
1663 void
1664 SearchPath_Free(SearchPath *path)
1665 {
1666 	SearchPathNode *ln;
1667 
1668 	for (ln = path->dirs.first; ln != NULL; ln = ln->next) {
1669 		CachedDir *dir = ln->datum;
1670 		CachedDir_Unref(dir);
1671 	}
1672 	Lst_Done(&path->dirs);
1673 	free(path);
1674 }
1675 
1676 /*
1677  * Clear out all elements from the given search path.
1678  * The path is set to the empty list but is not destroyed.
1679  */
1680 void
1681 SearchPath_Clear(SearchPath *path)
1682 {
1683 	while (!Lst_IsEmpty(&path->dirs)) {
1684 		CachedDir *dir = Lst_Dequeue(&path->dirs);
1685 		CachedDir_Unref(dir);
1686 	}
1687 }
1688 
1689 
1690 /*
1691  * Concatenate two paths, adding the second to the end of the first,
1692  * skipping duplicates.
1693  */
1694 void
1695 SearchPath_AddAll(SearchPath *dst, SearchPath *src)
1696 {
1697 	SearchPathNode *ln;
1698 
1699 	for (ln = src->dirs.first; ln != NULL; ln = ln->next) {
1700 		CachedDir *dir = ln->datum;
1701 		if (Lst_FindDatum(&dst->dirs, dir) == NULL)
1702 			Lst_Append(&dst->dirs, CachedDir_Ref(dir));
1703 	}
1704 }
1705 
1706 static int
1707 percentage(int num, int den)
1708 {
1709 	return den != 0 ? num * 100 / den : 0;
1710 }
1711 
1712 /********** DEBUG INFO **********/
1713 void
1714 Dir_PrintDirectories(void)
1715 {
1716 	CachedDirListNode *ln;
1717 
1718 	debug_printf("#*** Directory Cache:\n");
1719 	debug_printf(
1720 	    "# Stats: %d hits %d misses %d near misses %d losers (%d%%)\n",
1721 	    hits, misses, nearmisses, bigmisses,
1722 	    percentage(hits, hits + bigmisses + nearmisses));
1723 	debug_printf("#  refs  hits  directory\n");
1724 
1725 	for (ln = openDirs.list.first; ln != NULL; ln = ln->next) {
1726 		CachedDir *dir = ln->datum;
1727 		debug_printf("#  %4d  %4d  %s\n",
1728 		    dir->refCount, dir->hits, dir->name);
1729 	}
1730 }
1731 
1732 void
1733 SearchPath_Print(const SearchPath *path)
1734 {
1735 	SearchPathNode *ln;
1736 
1737 	for (ln = path->dirs.first; ln != NULL; ln = ln->next) {
1738 		const CachedDir *dir = ln->datum;
1739 		debug_printf("%s ", dir->name);
1740 	}
1741 }
1742