xref: /titanic_52/usr/src/uts/common/fs/dnlc.c (revision 554ff184129088135ad2643c1c9832174a17be88)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 #pragma ident	"%Z%%M%	%I%	%E% SMI"
41 
42 #include <sys/types.h>
43 #include <sys/systm.h>
44 #include <sys/param.h>
45 #include <sys/t_lock.h>
46 #include <sys/systm.h>
47 #include <sys/vfs.h>
48 #include <sys/vnode.h>
49 #include <sys/dnlc.h>
50 #include <sys/kmem.h>
51 #include <sys/cmn_err.h>
52 #include <sys/vtrace.h>
53 #include <sys/bitmap.h>
54 #include <sys/var.h>
55 #include <sys/sysmacros.h>
56 #include <sys/kstat.h>
57 #include <sys/atomic.h>
58 #include <sys/taskq.h>
59 
60 /*
61  * Directory name lookup cache.
62  * Based on code originally done by Robert Elz at Melbourne.
63  *
64  * Names found by directory scans are retained in a cache
65  * for future reference.  Each hash chain is ordered by LRU
66  * Cache is indexed by hash value obtained from (vp, name)
67  * where the vp refers to the directory containing the name.
68  */
69 
70 /*
71  * Tunable nc_hashavelen is the average length desired for this chain, from
72  * which the size of the nc_hash table is derived at create time.
73  */
74 #define	NC_HASHAVELEN_DEFAULT	4
75 int nc_hashavelen = NC_HASHAVELEN_DEFAULT;
76 
77 /*
78  * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
79  * depth exceeds this value, we move the looked-up entry to the front of
80  * its hash chain.  The idea is to make sure that the most frequently
81  * accessed entries are found most quickly (by keeping them near the
82  * front of their hash chains).
83  */
84 #define	NC_MOVETOFRONT	2
85 
86 /*
87  *
88  * DNLC_MAX_RELE is used to size an array on the stack when releasing
89  * vnodes. This array is used rather than calling VN_RELE() inline because
90  * all dnlc locks must be dropped by that time in order to avoid a
91  * possible deadlock. This deadlock occurs when the dnlc holds the last
92  * reference to the vnode and so the VOP_INACTIVE vector is called which
93  * can in turn call back into the dnlc. A global array was used but had
94  * many problems:
95  *	1) Actually doesn't have an upper bound on the array size as
96  *	   entries can be added after starting the purge.
97  *	2) The locking scheme causes a hang.
98  *	3) Caused serialisation on the global lock.
99  *	4) The array was often unnecessarily huge.
100  *
101  * Note the current value 8 allows up to 4 cache entries (to be purged
102  * from each hash chain), before having to cycle around and retry.
103  * This ought to be ample given that nc_hashavelen is typically very small.
104  */
105 #define	DNLC_MAX_RELE	8 /* must be even */
106 
107 /*
108  * Hash table of name cache entries for fast lookup, dynamically
109  * allocated at startup.
110  */
111 nc_hash_t *nc_hash;
112 
113 /*
114  * Rotors. Used to select entries on a round-robin basis.
115  */
116 static nc_hash_t *dnlc_purge_fs1_rotor;
117 static nc_hash_t *dnlc_free_rotor;
118 
119 /*
120  * # of dnlc entries (uninitialized)
121  *
122  * the initial value was chosen as being
123  * a random string of bits, probably not
124  * normally chosen by a systems administrator
125  */
126 int ncsize = -1;
127 uint32_t dnlc_nentries = 0;	/* current number of name cache entries */
128 static int nc_hashsz;		/* size of hash table */
129 static int nc_hashmask;		/* size of hash table minus 1 */
130 
131 /*
132  * The dnlc_reduce_cache() taskq queue is activated when there are
133  * ncsize name cache entries and it reduces the size down to
134  * dnlc_nentries_low_water, which is by default one hundreth
135  * less (or 99%) of ncsize.
136  */
137 #define	DNLC_LOW_WATER_DIVISOR_DEFAULT 100
138 uint_t dnlc_low_water_divisor = DNLC_LOW_WATER_DIVISOR_DEFAULT;
139 uint_t dnlc_nentries_low_water;
140 int dnlc_reduce_idle = 1; /* no locking needed */
141 
142 /*
143  * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
144  * then this means the dnlc_reduce_cache() taskq is failing to
145  * keep up. In this case we refuse to add new entries to the dnlc
146  * until the taskq catches up.
147  */
148 uint_t dnlc_max_nentries; /* twice ncsize */
149 uint64_t dnlc_max_nentries_cnt = 0; /* statistic on times we failed */
150 
151 /*
152  * Tunable to define when we should just remove items from
153  * the end of the chain.
154  */
155 #define	DNLC_LONG_CHAIN 8
156 uint_t dnlc_long_chain = DNLC_LONG_CHAIN;
157 
158 /*
159  * ncstats has been deprecated, due to the integer size of the counters
160  * which can easily overflow in the dnlc.
161  * It is maintained (at some expense) for compatability.
162  * The preferred interface is the kstat accessible nc_stats below.
163  */
164 struct ncstats ncstats;
165 
166 struct nc_stats ncs = {
167 	{ "hits",			KSTAT_DATA_UINT64 },
168 	{ "misses",			KSTAT_DATA_UINT64 },
169 	{ "negative_cache_hits",	KSTAT_DATA_UINT64 },
170 	{ "enters",			KSTAT_DATA_UINT64 },
171 	{ "double_enters",		KSTAT_DATA_UINT64 },
172 	{ "purge_total_entries",	KSTAT_DATA_UINT64 },
173 	{ "purge_all",			KSTAT_DATA_UINT64 },
174 	{ "purge_vp",			KSTAT_DATA_UINT64 },
175 	{ "purge_vfs",			KSTAT_DATA_UINT64 },
176 	{ "purge_fs1",			KSTAT_DATA_UINT64 },
177 	{ "pick_free",			KSTAT_DATA_UINT64 },
178 	{ "pick_heuristic",		KSTAT_DATA_UINT64 },
179 	{ "pick_last",			KSTAT_DATA_UINT64 },
180 
181 	/* directory caching stats */
182 
183 	{ "dir_hits",			KSTAT_DATA_UINT64 },
184 	{ "dir_misses",			KSTAT_DATA_UINT64 },
185 	{ "dir_cached_current",		KSTAT_DATA_UINT64 },
186 	{ "dir_entries_cached_current",	KSTAT_DATA_UINT64 },
187 	{ "dir_cached_total",		KSTAT_DATA_UINT64 },
188 	{ "dir_start_no_memory",	KSTAT_DATA_UINT64 },
189 	{ "dir_add_no_memory",		KSTAT_DATA_UINT64 },
190 	{ "dir_add_abort",		KSTAT_DATA_UINT64 },
191 	{ "dir_add_max",		KSTAT_DATA_UINT64 },
192 	{ "dir_remove_entry_fail",	KSTAT_DATA_UINT64 },
193 	{ "dir_remove_space_fail",	KSTAT_DATA_UINT64 },
194 	{ "dir_update_fail",		KSTAT_DATA_UINT64 },
195 	{ "dir_fini_purge",		KSTAT_DATA_UINT64 },
196 	{ "dir_reclaim_last",		KSTAT_DATA_UINT64 },
197 	{ "dir_reclaim_any",		KSTAT_DATA_UINT64 },
198 };
199 
200 static int doingcache = 1;
201 
202 vnode_t negative_cache_vnode;
203 
204 /*
205  * Insert entry at the front of the queue
206  */
207 #define	nc_inshash(ncp, hp) \
208 { \
209 	(ncp)->hash_next = (hp)->hash_next; \
210 	(ncp)->hash_prev = (ncache_t *)(hp); \
211 	(hp)->hash_next->hash_prev = (ncp); \
212 	(hp)->hash_next = (ncp); \
213 }
214 
215 /*
216  * Remove entry from hash queue
217  */
218 #define	nc_rmhash(ncp) \
219 { \
220 	(ncp)->hash_prev->hash_next = (ncp)->hash_next; \
221 	(ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
222 	(ncp)->hash_prev = NULL; \
223 	(ncp)->hash_next = NULL; \
224 }
225 
226 /*
227  * Free an entry.
228  */
229 #define	dnlc_free(ncp) \
230 { \
231 	kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \
232 	atomic_add_32(&dnlc_nentries, -1); \
233 }
234 
235 
236 /*
237  * Cached directory info.
238  * ======================
239  */
240 
241 /*
242  * Cached directory free space hash function.
243  * Needs the free space handle and the dcp to get the hash table size
244  * Returns the hash index.
245  */
246 #define	DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
247 
248 /*
249  * Cached directory name entry hash function.
250  * Uses the name and returns in the input arguments the hash and the name
251  * length.
252  */
253 #define	DNLC_DIR_HASH(name, hash, namelen)			\
254 	{							\
255 		char Xc, *Xcp;					\
256 		hash = *name;					\
257 		for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++)	\
258 			hash = (hash << 4) + hash + Xc;		\
259 		ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1));	\
260 		namelen = Xcp - (name);				\
261 	}
262 
263 /* special dircache_t pointer to indicate error should be returned */
264 /*
265  * The anchor directory cache pointer can contain 3 types of values,
266  * 1) NULL: No directory cache
267  * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
268  *    too big or a memory shortage occurred. This value remains in the
269  *    pointer until a dnlc_dir_start() which returns the a DNOMEM error.
270  *    This is kludgy but efficient and only visible in this source file.
271  * 3) A valid cache pointer.
272  */
273 #define	DC_RET_LOW_MEM (dircache_t *)1
274 #define	VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
275 
276 /* Tunables */
277 uint_t dnlc_dir_enable = 1; /* disable caching directories by setting to 0 */
278 uint_t dnlc_dir_min_size = 40; /* min no of directory entries before caching */
279 uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */
280 uint_t dnlc_dir_hash_size_shift = 3; /* 8 entries per hash bucket */
281 uint_t dnlc_dir_min_reclaim =  350000; /* approx 1MB of dcentrys */
282 /*
283  * dnlc_dir_hash_resize_shift determines when the hash tables
284  * get re-adjusted due to growth or shrinkage
285  * - currently 2 indicating that there can be at most 4
286  * times or at least one quarter the number of entries
287  * before hash table readjustment. Note that with
288  * dnlc_dir_hash_size_shift above set at 3 this would
289  * mean readjustment would occur if the average number
290  * of entries went above 32 or below 2
291  */
292 uint_t dnlc_dir_hash_resize_shift = 2; /* readjust rate */
293 
294 static kmem_cache_t *dnlc_dir_space_cache; /* free space entry cache */
295 static dchead_t dc_head; /* anchor of cached directories */
296 
297 /* Prototypes */
298 static ncache_t *dnlc_get(uchar_t namlen);
299 static ncache_t *dnlc_search(vnode_t *dp, char *name, uchar_t namlen, int hash);
300 static void dnlc_reduce_cache(void *unused);
301 static void dnlc_dir_reclaim(void *unused);
302 static void dnlc_dir_abort(dircache_t *dcp);
303 static void dnlc_dir_adjust_fhash(dircache_t *dcp);
304 static void dnlc_dir_adjust_nhash(dircache_t *dcp);
305 
306 
307 /*
308  * Initialize the directory cache.
309  */
310 void
311 dnlc_init()
312 {
313 	nc_hash_t *hp;
314 	kstat_t *ksp;
315 	int i;
316 
317 	/*
318 	 * Set up the size of the dnlc (ncsize) and its low water mark.
319 	 */
320 	if (ncsize == -1) {
321 		/* calculate a reasonable size for the low water */
322 		dnlc_nentries_low_water = 4 * (v.v_proc + maxusers) + 320;
323 		ncsize = dnlc_nentries_low_water +
324 		    (dnlc_nentries_low_water / dnlc_low_water_divisor);
325 	} else {
326 		/* don't change the user specified ncsize */
327 		dnlc_nentries_low_water =
328 		    ncsize - (ncsize / dnlc_low_water_divisor);
329 	}
330 	if (ncsize <= 0) {
331 		doingcache = 0;
332 		dnlc_dir_enable = 0; /* also disable directory caching */
333 		ncsize = 0;
334 		cmn_err(CE_NOTE, "name cache (dnlc) disabled");
335 		return;
336 	}
337 	dnlc_max_nentries = ncsize * 2;
338 
339 	/*
340 	 * Initialise the hash table.
341 	 * Compute hash size rounding to the next power of two.
342 	 */
343 	nc_hashsz = ncsize / nc_hashavelen;
344 	nc_hashsz = 1 << highbit(nc_hashsz);
345 	nc_hashmask = nc_hashsz - 1;
346 	nc_hash = kmem_zalloc(nc_hashsz * sizeof (*nc_hash), KM_SLEEP);
347 	for (i = 0; i < nc_hashsz; i++) {
348 		hp = (nc_hash_t *)&nc_hash[i];
349 		mutex_init(&hp->hash_lock, NULL, MUTEX_DEFAULT, NULL);
350 		hp->hash_next = (ncache_t *)hp;
351 		hp->hash_prev = (ncache_t *)hp;
352 	}
353 
354 	/*
355 	 * Initialize rotors
356 	 */
357 	dnlc_free_rotor = dnlc_purge_fs1_rotor = &nc_hash[0];
358 
359 	/*
360 	 * Set up the directory caching to use kmem_cache_alloc
361 	 * for its free space entries so that we can get a callback
362 	 * when the system is short on memory, to allow us to free
363 	 * up some memory. we don't use the constructor/deconstructor
364 	 * functions.
365 	 */
366 	dnlc_dir_space_cache = kmem_cache_create("dnlc_space_cache",
367 	    sizeof (dcfree_t), 0, NULL, NULL, dnlc_dir_reclaim, NULL,
368 	    NULL, 0);
369 
370 	/*
371 	 * Initialise the head of the cached directory structures
372 	 */
373 	mutex_init(&dc_head.dch_lock, NULL, MUTEX_DEFAULT, NULL);
374 	dc_head.dch_next = (dircache_t *)&dc_head;
375 	dc_head.dch_prev = (dircache_t *)&dc_head;
376 
377 	/*
378 	 * Initialise the reference count of the negative cache vnode to 1
379 	 * so that it never goes away (VOP_INACTIVE isn't called on it).
380 	 */
381 	negative_cache_vnode.v_count = 1;
382 
383 	/*
384 	 * Initialise kstats - both the old compatability raw kind and
385 	 * the more extensive named stats.
386 	 */
387 	ksp = kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW,
388 		sizeof (struct ncstats), KSTAT_FLAG_VIRTUAL);
389 	if (ksp) {
390 		ksp->ks_data = (void *) &ncstats;
391 		kstat_install(ksp);
392 	}
393 	ksp = kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED,
394 	    sizeof (ncs) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
395 	if (ksp) {
396 		ksp->ks_data = (void *) &ncs;
397 		kstat_install(ksp);
398 	}
399 }
400 
401 /*
402  * Add a name to the directory cache.
403  */
404 void
405 dnlc_enter(vnode_t *dp, char *name, vnode_t *vp)
406 {
407 	ncache_t *ncp;
408 	nc_hash_t *hp;
409 	uchar_t namlen;
410 	int hash;
411 
412 	TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_enter_start:");
413 
414 	if (!doingcache) {
415 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
416 		    "dnlc_enter_end:(%S) %d", "not caching", 0);
417 		return;
418 	}
419 
420 	/*
421 	 * Get a new dnlc entry. Assume the entry won't be in the cache
422 	 * and initialize it now
423 	 */
424 	DNLCHASH(name, dp, hash, namlen);
425 	if ((ncp = dnlc_get(namlen)) == NULL)
426 		return;
427 	ncp->dp = dp;
428 	VN_HOLD(dp);
429 	ncp->vp = vp;
430 	VN_HOLD(vp);
431 	bcopy(name, ncp->name, namlen + 1); /* name and null */
432 	ncp->hash = hash;
433 	hp = &nc_hash[hash & nc_hashmask];
434 
435 	mutex_enter(&hp->hash_lock);
436 	if (dnlc_search(dp, name, namlen, hash) != NULL) {
437 		mutex_exit(&hp->hash_lock);
438 		ncstats.dbl_enters++;
439 		ncs.ncs_dbl_enters.value.ui64++;
440 		VN_RELE(dp);
441 		VN_RELE(vp);
442 		dnlc_free(ncp);		/* crfree done here */
443 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
444 			"dnlc_enter_end:(%S) %d",
445 			"dbl enter", ncstats.dbl_enters);
446 		return;
447 	}
448 	/*
449 	 * Insert back into the hash chain.
450 	 */
451 	nc_inshash(ncp, hp);
452 	mutex_exit(&hp->hash_lock);
453 	ncstats.enters++;
454 	ncs.ncs_enters.value.ui64++;
455 	TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
456 	    "dnlc_enter_end:(%S) %d", "done", ncstats.enters);
457 }
458 
459 /*
460  * Add a name to the directory cache.
461  *
462  * This function is basically identical with
463  * dnlc_enter().  The difference is that when the
464  * desired dnlc entry is found, the vnode in the
465  * ncache is compared with the vnode passed in.
466  *
467  * If they are not equal then the ncache is
468  * updated with the passed in vnode.  Otherwise
469  * it just frees up the newly allocated dnlc entry.
470  */
471 void
472 dnlc_update(vnode_t *dp, char *name, vnode_t *vp)
473 {
474 	ncache_t *ncp;
475 	ncache_t *tcp;
476 	vnode_t *tvp;
477 	nc_hash_t *hp;
478 	int hash;
479 	uchar_t namlen;
480 
481 	TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_update_start:");
482 
483 	if (!doingcache) {
484 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
485 		    "dnlc_update_end:(%S) %d", "not caching", 0);
486 		return;
487 	}
488 
489 	/*
490 	 * Get a new dnlc entry and initialize it now.
491 	 * If we fail to get a new entry, call dnlc_remove() to purge
492 	 * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
493 	 * entry.
494 	 * Failure to clear an existing entry could result in false dnlc
495 	 * lookup (negative/stale entry).
496 	 */
497 	DNLCHASH(name, dp, hash, namlen);
498 	if ((ncp = dnlc_get(namlen)) == NULL) {
499 		dnlc_remove(dp, name);
500 		return;
501 	}
502 	ncp->dp = dp;
503 	VN_HOLD(dp);
504 	ncp->vp = vp;
505 	VN_HOLD(vp);
506 	bcopy(name, ncp->name, namlen + 1); /* name and null */
507 	ncp->hash = hash;
508 	hp = &nc_hash[hash & nc_hashmask];
509 
510 	mutex_enter(&hp->hash_lock);
511 	if ((tcp = dnlc_search(dp, name, namlen, hash)) != NULL) {
512 		if (tcp->vp != vp) {
513 			tvp = tcp->vp;
514 			tcp->vp = vp;
515 			mutex_exit(&hp->hash_lock);
516 			VN_RELE(tvp);
517 			ncstats.enters++;
518 			ncs.ncs_enters.value.ui64++;
519 			TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
520 			    "dnlc_update_end:(%S) %d", "done", ncstats.enters);
521 		} else {
522 			mutex_exit(&hp->hash_lock);
523 			VN_RELE(vp);
524 			ncstats.dbl_enters++;
525 			ncs.ncs_dbl_enters.value.ui64++;
526 			TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
527 			    "dnlc_update_end:(%S) %d",
528 			    "dbl enter", ncstats.dbl_enters);
529 		}
530 		VN_RELE(dp);
531 		dnlc_free(ncp);		/* crfree done here */
532 		return;
533 	}
534 	/*
535 	 * insert the new entry, since it is not in dnlc yet
536 	 */
537 	nc_inshash(ncp, hp);
538 	mutex_exit(&hp->hash_lock);
539 	ncstats.enters++;
540 	ncs.ncs_enters.value.ui64++;
541 	TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
542 	    "dnlc_update_end:(%S) %d", "done", ncstats.enters);
543 }
544 
545 /*
546  * Look up a name in the directory name cache.
547  *
548  * Return a doubly-held vnode if found: one hold so that it may
549  * remain in the cache for other users, the other hold so that
550  * the cache is not re-cycled and the identity of the vnode is
551  * lost before the caller can use the vnode.
552  */
553 vnode_t *
554 dnlc_lookup(vnode_t *dp, char *name)
555 {
556 	ncache_t *ncp;
557 	nc_hash_t *hp;
558 	vnode_t *vp;
559 	int hash, depth;
560 	uchar_t namlen;
561 
562 	TRACE_2(TR_FAC_NFS, TR_DNLC_LOOKUP_START,
563 	    "dnlc_lookup_start:dp %x name %s", dp, name);
564 
565 	if (!doingcache) {
566 		TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
567 		    "dnlc_lookup_end:%S %d vp %x name %s",
568 		    "not_caching", 0, NULL, name);
569 		return (NULL);
570 	}
571 
572 	DNLCHASH(name, dp, hash, namlen);
573 	depth = 1;
574 	hp = &nc_hash[hash & nc_hashmask];
575 	mutex_enter(&hp->hash_lock);
576 
577 	for (ncp = hp->hash_next; ncp != (ncache_t *)hp;
578 	    ncp = ncp->hash_next) {
579 		if (ncp->hash == hash &&	/* fast signature check */
580 		    ncp->dp == dp &&
581 		    ncp->namlen == namlen &&
582 		    bcmp(ncp->name, name, namlen) == 0) {
583 			/*
584 			 * Move this entry to the head of its hash chain
585 			 * if it's not already close.
586 			 */
587 			if (depth > NC_MOVETOFRONT) {
588 				ncache_t *next = ncp->hash_next;
589 				ncache_t *prev = ncp->hash_prev;
590 
591 				prev->hash_next = next;
592 				next->hash_prev = prev;
593 				ncp->hash_next = next = hp->hash_next;
594 				ncp->hash_prev = (ncache_t *)hp;
595 				next->hash_prev = ncp;
596 				hp->hash_next = ncp;
597 
598 				ncstats.move_to_front++;
599 			}
600 
601 			/*
602 			 * Put a hold on the vnode now so its identity
603 			 * can't change before the caller has a chance to
604 			 * put a hold on it.
605 			 */
606 			vp = ncp->vp;
607 			VN_HOLD(vp);
608 			mutex_exit(&hp->hash_lock);
609 			ncstats.hits++;
610 			ncs.ncs_hits.value.ui64++;
611 			if (vp == DNLC_NO_VNODE) {
612 				ncs.ncs_neg_hits.value.ui64++;
613 			}
614 			TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
615 				"dnlc_lookup_end:%S %d vp %x name %s",
616 				"hit", ncstats.hits, vp, name);
617 			return (vp);
618 		}
619 		depth++;
620 	}
621 
622 	mutex_exit(&hp->hash_lock);
623 	ncstats.misses++;
624 	ncs.ncs_misses.value.ui64++;
625 	TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
626 		"dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats.misses,
627 	    NULL, name);
628 	return (NULL);
629 }
630 
631 /*
632  * Remove an entry in the directory name cache.
633  */
634 void
635 dnlc_remove(vnode_t *dp, char *name)
636 {
637 	ncache_t *ncp;
638 	nc_hash_t *hp;
639 	uchar_t namlen;
640 	int hash;
641 
642 	if (!doingcache)
643 		return;
644 	DNLCHASH(name, dp, hash, namlen);
645 	hp = &nc_hash[hash & nc_hashmask];
646 
647 	mutex_enter(&hp->hash_lock);
648 	if (ncp = dnlc_search(dp, name, namlen, hash)) {
649 		/*
650 		 * Free up the entry
651 		 */
652 		nc_rmhash(ncp);
653 		mutex_exit(&hp->hash_lock);
654 		VN_RELE(ncp->vp);
655 		VN_RELE(ncp->dp);
656 		dnlc_free(ncp);
657 		return;
658 	}
659 	mutex_exit(&hp->hash_lock);
660 }
661 
662 /*
663  * Purge the entire cache.
664  */
665 void
666 dnlc_purge()
667 {
668 	nc_hash_t *nch;
669 	ncache_t *ncp;
670 	int index;
671 	int i;
672 	vnode_t *nc_rele[DNLC_MAX_RELE];
673 
674 	if (!doingcache)
675 		return;
676 
677 	ncstats.purges++;
678 	ncs.ncs_purge_all.value.ui64++;
679 
680 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
681 		index = 0;
682 		mutex_enter(&nch->hash_lock);
683 		ncp = nch->hash_next;
684 		while (ncp != (ncache_t *)nch) {
685 			ncache_t *np;
686 
687 			np = ncp->hash_next;
688 			nc_rele[index++] = ncp->vp;
689 			nc_rele[index++] = ncp->dp;
690 
691 			nc_rmhash(ncp);
692 			dnlc_free(ncp);
693 			ncp = np;
694 			ncs.ncs_purge_total.value.ui64++;
695 			if (index == DNLC_MAX_RELE)
696 				break;
697 		}
698 		mutex_exit(&nch->hash_lock);
699 
700 		/* Release holds on all the vnodes now that we have no locks */
701 		for (i = 0; i < index; i++) {
702 			VN_RELE(nc_rele[i]);
703 		}
704 		if (ncp != (ncache_t *)nch) {
705 			nch--; /* Do current hash chain again */
706 		}
707 	}
708 }
709 
710 /*
711  * Purge any cache entries referencing a vnode.
712  * Exit as soon as the vnode reference count goes to 1, as the caller
713  * must hold a reference, and the dnlc can therefore have no more.
714  */
715 void
716 dnlc_purge_vp(vnode_t *vp)
717 {
718 	nc_hash_t *nch;
719 	ncache_t *ncp;
720 	int index;
721 	int i;
722 	vnode_t *nc_rele[DNLC_MAX_RELE];
723 
724 	ASSERT(vp->v_count > 0);
725 	if (vp->v_count == 1) {
726 		return;
727 	}
728 
729 	if (!doingcache)
730 		return;
731 
732 	ncstats.purges++;
733 	ncs.ncs_purge_vp.value.ui64++;
734 
735 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
736 		index = 0;
737 		mutex_enter(&nch->hash_lock);
738 		ncp = nch->hash_next;
739 		while (ncp != (ncache_t *)nch) {
740 			ncache_t *np;
741 
742 			np = ncp->hash_next;
743 			if (ncp->dp == vp || ncp->vp == vp) {
744 				nc_rele[index++] = ncp->vp;
745 				nc_rele[index++] = ncp->dp;
746 				nc_rmhash(ncp);
747 				dnlc_free(ncp);
748 				ncs.ncs_purge_total.value.ui64++;
749 				if (index == DNLC_MAX_RELE) {
750 					ncp = np;
751 					break;
752 				}
753 			}
754 			ncp = np;
755 		}
756 		mutex_exit(&nch->hash_lock);
757 
758 		/* Release holds on all the vnodes now that we have no locks */
759 		if (index) {
760 			for (i = 0; i < index; i++) {
761 				VN_RELE(nc_rele[i]);
762 			}
763 
764 			if (vp->v_count == 1) {
765 				return; /* no more dnlc references */
766 			}
767 		}
768 
769 		if (ncp != (ncache_t *)nch) {
770 			nch--; /* Do current hash chain again */
771 		}
772 	}
773 }
774 
775 /*
776  * Purge cache entries referencing a vfsp.  Caller supplies a count
777  * of entries to purge; up to that many will be freed.  A count of
778  * zero indicates that all such entries should be purged.  Returns
779  * the number of entries that were purged.
780  */
781 int
782 dnlc_purge_vfsp(vfs_t *vfsp, int count)
783 {
784 	nc_hash_t *nch;
785 	ncache_t *ncp;
786 	int n = 0;
787 	int index;
788 	int i;
789 	vnode_t *nc_rele[DNLC_MAX_RELE];
790 
791 	if (!doingcache)
792 		return (0);
793 
794 	ncstats.purges++;
795 	ncs.ncs_purge_vfs.value.ui64++;
796 
797 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
798 		index = 0;
799 		mutex_enter(&nch->hash_lock);
800 		ncp = nch->hash_next;
801 		while (ncp != (ncache_t *)nch) {
802 			ncache_t *np;
803 
804 			np = ncp->hash_next;
805 			ASSERT(ncp->dp != NULL);
806 			ASSERT(ncp->vp != NULL);
807 			if ((ncp->dp->v_vfsp == vfsp) ||
808 			    (ncp->vp->v_vfsp == vfsp)) {
809 				n++;
810 				nc_rele[index++] = ncp->vp;
811 				nc_rele[index++] = ncp->dp;
812 				nc_rmhash(ncp);
813 				dnlc_free(ncp);
814 				ncs.ncs_purge_total.value.ui64++;
815 				if (index == DNLC_MAX_RELE) {
816 					ncp = np;
817 					break;
818 				}
819 				if (count != 0 && n >= count) {
820 					break;
821 				}
822 			}
823 			ncp = np;
824 		}
825 		mutex_exit(&nch->hash_lock);
826 		/* Release holds on all the vnodes now that we have no locks */
827 		for (i = 0; i < index; i++) {
828 			VN_RELE(nc_rele[i]);
829 		}
830 		if (count != 0 && n >= count) {
831 			return (n);
832 		}
833 		if (ncp != (ncache_t *)nch) {
834 			nch--; /* Do current hash chain again */
835 		}
836 	}
837 	return (n);
838 }
839 
840 /*
841  * Purge 1 entry from the dnlc that is part of the filesystem(s)
842  * represented by 'vop'. The purpose of this routine is to allow
843  * users of the dnlc to free a vnode that is being held by the dnlc.
844  *
845  * If we find a vnode that we release which will result in
846  * freeing the underlying vnode (count was 1), return 1, 0
847  * if no appropriate vnodes found.
848  *
849  * Note, vop is not the 'right' identifier for a filesystem.
850  */
851 int
852 dnlc_fs_purge1(vnodeops_t *vop)
853 {
854 	nc_hash_t *end;
855 	nc_hash_t *hp;
856 	ncache_t *ncp;
857 	vnode_t *vp;
858 
859 	if (!doingcache)
860 		return (0);
861 
862 	ncs.ncs_purge_fs1.value.ui64++;
863 
864 	/*
865 	 * Scan the dnlc entries looking for a likely candidate.
866 	 */
867 	hp = end = dnlc_purge_fs1_rotor;
868 
869 	do {
870 		if (++hp == &nc_hash[nc_hashsz])
871 			hp = nc_hash;
872 		dnlc_purge_fs1_rotor = hp;
873 		if (hp->hash_next == (ncache_t *)hp)
874 			continue;
875 		mutex_enter(&hp->hash_lock);
876 		for (ncp = hp->hash_prev;
877 		    ncp != (ncache_t *)hp;
878 		    ncp = ncp->hash_prev) {
879 			vp = ncp->vp;
880 			if (!vn_has_cached_data(vp) && (vp->v_count == 1) &&
881 			    vn_matchops(vp, vop))
882 				break;
883 		}
884 		if (ncp != (ncache_t *)hp) {
885 			nc_rmhash(ncp);
886 			mutex_exit(&hp->hash_lock);
887 			VN_RELE(ncp->dp);
888 			VN_RELE(vp)
889 			dnlc_free(ncp);
890 			ncs.ncs_purge_total.value.ui64++;
891 			return (1);
892 		}
893 		mutex_exit(&hp->hash_lock);
894 	} while (hp != end);
895 	return (0);
896 }
897 
898 /*
899  * Perform a reverse lookup in the DNLC.  This will find the first occurrence of
900  * the vnode.  If successful, it will return the vnode of the parent, and the
901  * name of the entry in the given buffer.  If it cannot be found, or the buffer
902  * is too small, then it will return NULL.  Note that this is a highly
903  * inefficient function, since the DNLC is constructed solely for forward
904  * lookups.
905  */
906 vnode_t *
907 dnlc_reverse_lookup(vnode_t *vp, char *buf, size_t buflen)
908 {
909 	nc_hash_t *nch;
910 	ncache_t *ncp;
911 	vnode_t *pvp;
912 
913 	if (!doingcache)
914 		return (NULL);
915 
916 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
917 		mutex_enter(&nch->hash_lock);
918 		ncp = nch->hash_next;
919 		while (ncp != (ncache_t *)nch) {
920 			/*
921 			 * We ignore '..' entries since it can create
922 			 * confusion and infinite loops.
923 			 */
924 			if (ncp->vp == vp && !(ncp->namlen == 2 &&
925 			    0 == bcmp(ncp->name, "..", 2)) &&
926 			    ncp->namlen < buflen) {
927 				bcopy(ncp->name, buf, ncp->namlen);
928 				buf[ncp->namlen] = '\0';
929 				pvp = ncp->dp;
930 				VN_HOLD(pvp);
931 				mutex_exit(&nch->hash_lock);
932 				return (pvp);
933 			}
934 			ncp = ncp->hash_next;
935 		}
936 		mutex_exit(&nch->hash_lock);
937 	}
938 
939 	return (NULL);
940 }
941 /*
942  * Utility routine to search for a cache entry. Return the
943  * ncache entry if found, NULL otherwise.
944  */
945 static ncache_t *
946 dnlc_search(vnode_t *dp, char *name, uchar_t namlen, int hash)
947 {
948 	nc_hash_t *hp;
949 	ncache_t *ncp;
950 
951 	hp = &nc_hash[hash & nc_hashmask];
952 
953 	for (ncp = hp->hash_next; ncp != (ncache_t *)hp; ncp = ncp->hash_next) {
954 		if (ncp->hash == hash &&
955 		    ncp->dp == dp &&
956 		    ncp->namlen == namlen &&
957 		    bcmp(ncp->name, name, namlen) == 0)
958 			return (ncp);
959 	}
960 	return (NULL);
961 }
962 
963 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
964 #error ncache_t name length representation is too small
965 #endif
966 
967 /*
968  * Get a new name cache entry.
969  * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
970  * is short then just return NULL. If we're over ncsize then kick off a
971  * thread to free some in use entries down to dnlc_nentries_low_water.
972  * Caller must initialise all fields except namlen.
973  * Component names are defined to be less than MAXNAMELEN
974  * which includes a null.
975  */
976 static ncache_t *
977 dnlc_get(uchar_t namlen)
978 {
979 	ncache_t *ncp;
980 
981 	if (dnlc_nentries > dnlc_max_nentries) {
982 		dnlc_max_nentries_cnt++; /* keep a statistic */
983 		return (NULL);
984 	}
985 	ncp = kmem_alloc(sizeof (ncache_t) + namlen, KM_NOSLEEP);
986 	if (ncp == NULL) {
987 		return (NULL);
988 	}
989 	ncp->namlen = namlen;
990 	atomic_add_32(&dnlc_nentries, 1);
991 	if (dnlc_reduce_idle && (dnlc_nentries >= ncsize)) {
992 		dnlc_reduce_idle = 0;
993 		(void) taskq_dispatch(system_taskq, dnlc_reduce_cache,
994 		    NULL, TQ_SLEEP);
995 	}
996 	return (ncp);
997 }
998 
999 /*
1000  * Taskq routine to free up name cache entries to reduce the
1001  * cache size to the low water mark.
1002  */
1003 /*ARGSUSED*/
1004 static void
1005 dnlc_reduce_cache(void *unused)
1006 {
1007 	nc_hash_t *hp = dnlc_free_rotor;
1008 	vnode_t *vp;
1009 	ncache_t *ncp;
1010 	int cnt;
1011 
1012 	do {
1013 		/*
1014 		 * Find the first non empty hash queue without locking
1015 		 * Recheck we really have entries to avoid
1016 		 * an infinite loop if all the entries get purged.
1017 		 */
1018 		do {
1019 			if (++hp == &nc_hash[nc_hashsz]) {
1020 				hp = nc_hash;
1021 				if (dnlc_nentries < dnlc_nentries_low_water) {
1022 					dnlc_reduce_idle = 1;
1023 					return;
1024 				}
1025 			}
1026 		} while (hp->hash_next == (ncache_t *)hp);
1027 
1028 		mutex_enter(&hp->hash_lock);
1029 		for (cnt = 0, ncp = hp->hash_prev; ncp != (ncache_t *)hp;
1030 		    ncp = ncp->hash_prev, cnt++) {
1031 			vp = ncp->vp;
1032 			/*
1033 			 * A name cache entry with a reference count
1034 			 * of one is only referenced by the dnlc.
1035 			 * Also negative cache entries are purged first.
1036 			 */
1037 			if (!vn_has_cached_data(vp) &&
1038 			    ((vp->v_count == 1) || (vp == DNLC_NO_VNODE))) {
1039 				ncs.ncs_pick_heur.value.ui64++;
1040 				goto found;
1041 			}
1042 			/*
1043 			 * Remove from the end of the chain if the
1044 			 * chain is too long
1045 			 */
1046 			if (cnt > dnlc_long_chain) {
1047 				ncp = hp->hash_prev;
1048 				ncs.ncs_pick_last.value.ui64++;
1049 				vp = ncp->vp;
1050 				goto found;
1051 			}
1052 		}
1053 		/* check for race and continue */
1054 		if (hp->hash_next == (ncache_t *)hp) {
1055 			mutex_exit(&hp->hash_lock);
1056 			continue;
1057 		}
1058 
1059 		ncp = hp->hash_prev; /* pick the last one in the hash queue */
1060 		ncs.ncs_pick_last.value.ui64++;
1061 		vp = ncp->vp;
1062 found:
1063 		/*
1064 		 * Remove from hash chain.
1065 		 */
1066 		nc_rmhash(ncp);
1067 		mutex_exit(&hp->hash_lock);
1068 		VN_RELE(vp);
1069 		VN_RELE(ncp->dp);
1070 		dnlc_free(ncp);
1071 	} while (dnlc_nentries > dnlc_nentries_low_water);
1072 
1073 	dnlc_free_rotor = hp;
1074 	dnlc_reduce_idle = 1;
1075 }
1076 
1077 /*
1078  * Directory caching routines
1079  * ==========================
1080  *
1081  * See dnlc.h for details of the interfaces below.
1082  */
1083 
1084 /*
1085  * Lookup up an entry in a complete or partial directory cache.
1086  */
1087 dcret_t
1088 dnlc_dir_lookup(dcanchor_t *dcap, char *name, uint64_t *handle)
1089 {
1090 	dircache_t *dcp;
1091 	dcentry_t *dep;
1092 	int hash;
1093 	int ret;
1094 	uchar_t namlen;
1095 
1096 	/*
1097 	 * can test without lock as we are only a cache
1098 	 */
1099 	if (!VALID_DIR_CACHE(dcap->dca_dircache)) {
1100 		ncs.ncs_dir_misses.value.ui64++;
1101 		return (DNOCACHE);
1102 	}
1103 
1104 	if (!dnlc_dir_enable) {
1105 		return (DNOCACHE);
1106 	}
1107 
1108 	mutex_enter(&dcap->dca_lock);
1109 	dcp = (dircache_t *)dcap->dca_dircache;
1110 	if (VALID_DIR_CACHE(dcp)) {
1111 		dcp->dc_actime = lbolt64;
1112 		DNLC_DIR_HASH(name, hash, namlen);
1113 		dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1114 		while (dep != NULL) {
1115 			if ((dep->de_hash == hash) &&
1116 			    (namlen == dep->de_namelen) &&
1117 			    bcmp(dep->de_name, name, namlen) == 0) {
1118 				*handle = dep->de_handle;
1119 				mutex_exit(&dcap->dca_lock);
1120 				ncs.ncs_dir_hits.value.ui64++;
1121 				return (DFOUND);
1122 			}
1123 			dep = dep->de_next;
1124 		}
1125 		if (dcp->dc_complete) {
1126 			ret = DNOENT;
1127 		} else {
1128 			ret = DNOCACHE;
1129 		}
1130 		mutex_exit(&dcap->dca_lock);
1131 		return (ret);
1132 	} else {
1133 		mutex_exit(&dcap->dca_lock);
1134 		ncs.ncs_dir_misses.value.ui64++;
1135 		return (DNOCACHE);
1136 	}
1137 }
1138 
1139 /*
1140  * Start a new directory cache. An estimate of the number of
1141  * entries is provided to as a quick check to ensure the directory
1142  * is cacheable.
1143  */
1144 dcret_t
1145 dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries)
1146 {
1147 	dircache_t *dcp;
1148 
1149 	if (!dnlc_dir_enable ||
1150 	    (num_entries < dnlc_dir_min_size)) {
1151 		return (DNOCACHE);
1152 	}
1153 
1154 	if (num_entries > dnlc_dir_max_size) {
1155 		return (DTOOBIG);
1156 	}
1157 
1158 	mutex_enter(&dc_head.dch_lock);
1159 	mutex_enter(&dcap->dca_lock);
1160 
1161 	if (dcap->dca_dircache == DC_RET_LOW_MEM) {
1162 		dcap->dca_dircache = NULL;
1163 		mutex_exit(&dcap->dca_lock);
1164 		mutex_exit(&dc_head.dch_lock);
1165 		return (DNOMEM);
1166 	}
1167 
1168 	/*
1169 	 * Check if there's currently a cache.
1170 	 * This probably only occurs on a race.
1171 	 */
1172 	if (dcap->dca_dircache != NULL) {
1173 		mutex_exit(&dcap->dca_lock);
1174 		mutex_exit(&dc_head.dch_lock);
1175 		return (DNOCACHE);
1176 	}
1177 
1178 	/*
1179 	 * Allocate the dircache struct, entry and free space hash tables.
1180 	 * These tables are initially just one entry but dynamically resize
1181 	 * when entries and free space are added or removed.
1182 	 */
1183 	if ((dcp = kmem_zalloc(sizeof (dircache_t), KM_NOSLEEP)) == NULL) {
1184 		goto error;
1185 	}
1186 	if ((dcp->dc_namehash = kmem_zalloc(sizeof (dcentry_t *),
1187 	    KM_NOSLEEP)) == NULL) {
1188 		goto error;
1189 	}
1190 	if ((dcp->dc_freehash = kmem_zalloc(sizeof (dcfree_t *),
1191 	    KM_NOSLEEP)) == NULL) {
1192 		goto error;
1193 	}
1194 
1195 	dcp->dc_anchor = dcap; /* set back pointer to anchor */
1196 	dcap->dca_dircache = dcp;
1197 
1198 	/* add into head of global chain */
1199 	dcp->dc_next = dc_head.dch_next;
1200 	dcp->dc_prev = (dircache_t *)&dc_head;
1201 	dcp->dc_next->dc_prev = dcp;
1202 	dc_head.dch_next = dcp;
1203 
1204 	mutex_exit(&dcap->dca_lock);
1205 	mutex_exit(&dc_head.dch_lock);
1206 	ncs.ncs_cur_dirs.value.ui64++;
1207 	ncs.ncs_dirs_cached.value.ui64++;
1208 	return (DOK);
1209 error:
1210 	if (dcp != NULL) {
1211 		if (dcp->dc_namehash) {
1212 			kmem_free(dcp->dc_namehash, sizeof (dcentry_t *));
1213 		}
1214 		kmem_free(dcp, sizeof (dircache_t));
1215 	}
1216 	/*
1217 	 * Must also kmem_free dcp->dc_freehash if more error cases are added
1218 	 */
1219 	mutex_exit(&dcap->dca_lock);
1220 	mutex_exit(&dc_head.dch_lock);
1221 	ncs.ncs_dir_start_nm.value.ui64++;
1222 	return (DNOCACHE);
1223 }
1224 
1225 /*
1226  * Add a directopry entry to a partial or complete directory cache.
1227  */
1228 dcret_t
1229 dnlc_dir_add_entry(dcanchor_t *dcap, char *name, uint64_t handle)
1230 {
1231 	dircache_t *dcp;
1232 	dcentry_t **hp, *dep;
1233 	int hash;
1234 	uint_t capacity;
1235 	uchar_t namlen;
1236 
1237 	/*
1238 	 * Allocate the dcentry struct, including the variable
1239 	 * size name. Note, the null terminator is not copied.
1240 	 *
1241 	 * We do this outside the lock to avoid possible deadlock if
1242 	 * dnlc_dir_reclaim() is called as a result of memory shortage.
1243 	 */
1244 	DNLC_DIR_HASH(name, hash, namlen);
1245 	dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1246 	if (dep == NULL) {
1247 #ifdef DEBUG
1248 		/*
1249 		 * The kmem allocator generates random failures for
1250 		 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1251 		 * So try again before we blow away a perfectly good cache.
1252 		 * This is done not to cover an error but purely for
1253 		 * performance running a debug kernel.
1254 		 * This random error only occurs in debug mode.
1255 		 */
1256 		dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1257 		if (dep != NULL)
1258 			goto ok;
1259 #endif
1260 		ncs.ncs_dir_add_nm.value.ui64++;
1261 		/*
1262 		 * Free a directory cache. This may be the one we are
1263 		 * called with.
1264 		 */
1265 		dnlc_dir_reclaim(NULL);
1266 		dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1267 		if (dep == NULL) {
1268 			/*
1269 			 * still no memory, better delete this cache
1270 			 */
1271 			mutex_enter(&dcap->dca_lock);
1272 			dcp = (dircache_t *)dcap->dca_dircache;
1273 			if (VALID_DIR_CACHE(dcp)) {
1274 				dnlc_dir_abort(dcp);
1275 				dcap->dca_dircache = DC_RET_LOW_MEM;
1276 			}
1277 			mutex_exit(&dcap->dca_lock);
1278 			ncs.ncs_dir_addabort.value.ui64++;
1279 			return (DNOCACHE);
1280 		}
1281 		/*
1282 		 * fall through as if the 1st kmem_alloc had worked
1283 		 */
1284 	}
1285 #ifdef DEBUG
1286 ok:
1287 #endif
1288 	mutex_enter(&dcap->dca_lock);
1289 	dcp = (dircache_t *)dcap->dca_dircache;
1290 	if (VALID_DIR_CACHE(dcp)) {
1291 		/*
1292 		 * If the total number of entries goes above the max
1293 		 * then free this cache
1294 		 */
1295 		if ((dcp->dc_num_entries + dcp->dc_num_free) >
1296 			dnlc_dir_max_size) {
1297 			mutex_exit(&dcap->dca_lock);
1298 			dnlc_dir_purge(dcap);
1299 			kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1300 			ncs.ncs_dir_add_max.value.ui64++;
1301 			return (DTOOBIG);
1302 		}
1303 		dcp->dc_num_entries++;
1304 		capacity = (dcp->dc_nhash_mask + 1) << dnlc_dir_hash_size_shift;
1305 		if (dcp->dc_num_entries >=
1306 		    (capacity << dnlc_dir_hash_resize_shift)) {
1307 			dnlc_dir_adjust_nhash(dcp);
1308 		}
1309 		hp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1310 
1311 		/*
1312 		 * Initialise and chain in new entry
1313 		 */
1314 		dep->de_handle = handle;
1315 		dep->de_hash = hash;
1316 		/*
1317 		 * Note de_namelen is a uchar_t to conserve space
1318 		 * and alignment padding. The max length of any
1319 		 * pathname component is defined as MAXNAMELEN
1320 		 * which is 256 (including the terminating null).
1321 		 * So provided this doesn't change, we don't include the null,
1322 		 * we always use bcmp to compare strings, and we don't
1323 		 * start storing full names, then we are ok.
1324 		 * The space savings is worth it.
1325 		 */
1326 		dep->de_namelen = namlen;
1327 		bcopy(name, dep->de_name, namlen);
1328 		dep->de_next = *hp;
1329 		*hp = dep;
1330 		dcp->dc_actime = lbolt64;
1331 		mutex_exit(&dcap->dca_lock);
1332 		ncs.ncs_dir_num_ents.value.ui64++;
1333 		return (DOK);
1334 	} else {
1335 		mutex_exit(&dcap->dca_lock);
1336 		kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1337 		return (DNOCACHE);
1338 	}
1339 }
1340 
1341 /*
1342  * Add free space to a partial or complete directory cache.
1343  */
1344 dcret_t
1345 dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle)
1346 {
1347 	dircache_t *dcp;
1348 	dcfree_t *dfp, **hp;
1349 	uint_t capacity;
1350 
1351 	/*
1352 	 * We kmem_alloc outside the lock to avoid possible deadlock if
1353 	 * dnlc_dir_reclaim() is called as a result of memory shortage.
1354 	 */
1355 	dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1356 	if (dfp == NULL) {
1357 #ifdef DEBUG
1358 		/*
1359 		 * The kmem allocator generates random failures for
1360 		 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1361 		 * So try again before we blow away a perfectly good cache.
1362 		 * This random error only occurs in debug mode
1363 		 */
1364 		dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1365 		if (dfp != NULL)
1366 			goto ok;
1367 #endif
1368 		ncs.ncs_dir_add_nm.value.ui64++;
1369 		/*
1370 		 * Free a directory cache. This may be the one we are
1371 		 * called with.
1372 		 */
1373 		dnlc_dir_reclaim(NULL);
1374 		dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1375 		if (dfp == NULL) {
1376 			/*
1377 			 * still no memory, better delete this cache
1378 			 */
1379 			mutex_enter(&dcap->dca_lock);
1380 			dcp = (dircache_t *)dcap->dca_dircache;
1381 			if (VALID_DIR_CACHE(dcp)) {
1382 				dnlc_dir_abort(dcp);
1383 				dcap->dca_dircache = DC_RET_LOW_MEM;
1384 			}
1385 			mutex_exit(&dcap->dca_lock);
1386 			ncs.ncs_dir_addabort.value.ui64++;
1387 			return (DNOCACHE);
1388 		}
1389 		/*
1390 		 * fall through as if the 1st kmem_alloc had worked
1391 		 */
1392 	}
1393 
1394 #ifdef DEBUG
1395 ok:
1396 #endif
1397 	mutex_enter(&dcap->dca_lock);
1398 	dcp = (dircache_t *)dcap->dca_dircache;
1399 	if (VALID_DIR_CACHE(dcp)) {
1400 		if ((dcp->dc_num_entries + dcp->dc_num_free) >
1401 			dnlc_dir_max_size) {
1402 			mutex_exit(&dcap->dca_lock);
1403 			dnlc_dir_purge(dcap);
1404 			kmem_cache_free(dnlc_dir_space_cache, dfp);
1405 			ncs.ncs_dir_add_max.value.ui64++;
1406 			return (DTOOBIG);
1407 		}
1408 		dcp->dc_num_free++;
1409 		capacity = (dcp->dc_fhash_mask + 1) << dnlc_dir_hash_size_shift;
1410 		if (dcp->dc_num_free >=
1411 		    (capacity << dnlc_dir_hash_resize_shift)) {
1412 			dnlc_dir_adjust_fhash(dcp);
1413 		}
1414 		/*
1415 		 * Initialise and chain a new entry
1416 		 */
1417 		dfp->df_handle = handle;
1418 		dfp->df_len = len;
1419 		dcp->dc_actime = lbolt64;
1420 		hp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1421 		dfp->df_next = *hp;
1422 		*hp = dfp;
1423 		mutex_exit(&dcap->dca_lock);
1424 		ncs.ncs_dir_num_ents.value.ui64++;
1425 		return (DOK);
1426 	} else {
1427 		mutex_exit(&dcap->dca_lock);
1428 		kmem_cache_free(dnlc_dir_space_cache, dfp);
1429 		return (DNOCACHE);
1430 	}
1431 }
1432 
1433 /*
1434  * Mark a directory cache as complete.
1435  */
1436 void
1437 dnlc_dir_complete(dcanchor_t *dcap)
1438 {
1439 	dircache_t *dcp;
1440 
1441 	mutex_enter(&dcap->dca_lock);
1442 	dcp = (dircache_t *)dcap->dca_dircache;
1443 	if (VALID_DIR_CACHE(dcp)) {
1444 		dcp->dc_complete = B_TRUE;
1445 	}
1446 	mutex_exit(&dcap->dca_lock);
1447 }
1448 
1449 /*
1450  * Internal routine to delete a partial or full directory cache.
1451  * No additional locking needed.
1452  */
1453 static void
1454 dnlc_dir_abort(dircache_t *dcp)
1455 {
1456 	dcentry_t *dep, *nhp;
1457 	dcfree_t *fep, *fhp;
1458 	uint_t nhtsize = dcp->dc_nhash_mask + 1; /* name hash table size */
1459 	uint_t fhtsize = dcp->dc_fhash_mask + 1; /* free hash table size */
1460 	uint_t i;
1461 
1462 	/*
1463 	 * Free up the cached name entries and hash table
1464 	 */
1465 	for (i = 0; i < nhtsize; i++) { /* for each hash bucket */
1466 		nhp = dcp->dc_namehash[i];
1467 		while (nhp != NULL) { /* for each chained entry */
1468 			dep = nhp->de_next;
1469 			kmem_free(nhp, sizeof (dcentry_t) - 1 +
1470 			    nhp->de_namelen);
1471 			nhp = dep;
1472 		}
1473 	}
1474 	kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * nhtsize);
1475 
1476 	/*
1477 	 * Free up the free space entries and hash table
1478 	 */
1479 	for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1480 		fhp = dcp->dc_freehash[i];
1481 		while (fhp != NULL) { /* for each chained entry */
1482 			fep = fhp->df_next;
1483 			kmem_cache_free(dnlc_dir_space_cache, fhp);
1484 			fhp = fep;
1485 		}
1486 	}
1487 	kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * fhtsize);
1488 
1489 	/*
1490 	 * Finally free the directory cache structure itself
1491 	 */
1492 	ncs.ncs_dir_num_ents.value.ui64 -= (dcp->dc_num_entries +
1493 	    dcp->dc_num_free);
1494 	kmem_free(dcp, sizeof (dircache_t));
1495 	ncs.ncs_cur_dirs.value.ui64--;
1496 }
1497 
1498 /*
1499  * Remove a partial or complete directory cache
1500  */
1501 void
1502 dnlc_dir_purge(dcanchor_t *dcap)
1503 {
1504 	dircache_t *dcp;
1505 
1506 	mutex_enter(&dc_head.dch_lock);
1507 	mutex_enter(&dcap->dca_lock);
1508 	dcp = (dircache_t *)dcap->dca_dircache;
1509 	if (!VALID_DIR_CACHE(dcp)) {
1510 		mutex_exit(&dcap->dca_lock);
1511 		mutex_exit(&dc_head.dch_lock);
1512 		return;
1513 	}
1514 	dcap->dca_dircache = NULL;
1515 	/*
1516 	 * Unchain from global list
1517 	 */
1518 	dcp->dc_prev->dc_next = dcp->dc_next;
1519 	dcp->dc_next->dc_prev = dcp->dc_prev;
1520 	mutex_exit(&dcap->dca_lock);
1521 	mutex_exit(&dc_head.dch_lock);
1522 	dnlc_dir_abort(dcp);
1523 }
1524 
1525 /*
1526  * Remove an entry from a complete or partial directory cache.
1527  * Return the handle if it's non null.
1528  */
1529 dcret_t
1530 dnlc_dir_rem_entry(dcanchor_t *dcap, char *name, uint64_t *handlep)
1531 {
1532 	dircache_t *dcp;
1533 	dcentry_t **prevpp, *te;
1534 	uint_t capacity;
1535 	int hash;
1536 	int ret;
1537 	uchar_t namlen;
1538 
1539 	if (!dnlc_dir_enable) {
1540 		return (DNOCACHE);
1541 	}
1542 
1543 	mutex_enter(&dcap->dca_lock);
1544 	dcp = (dircache_t *)dcap->dca_dircache;
1545 	if (VALID_DIR_CACHE(dcp)) {
1546 		dcp->dc_actime = lbolt64;
1547 		if (dcp->dc_nhash_mask > 0) { /* ie not minimum */
1548 			capacity = (dcp->dc_nhash_mask + 1) <<
1549 			    dnlc_dir_hash_size_shift;
1550 			if (dcp->dc_num_entries <=
1551 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1552 				dnlc_dir_adjust_nhash(dcp);
1553 			}
1554 		}
1555 		DNLC_DIR_HASH(name, hash, namlen);
1556 		prevpp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1557 		while (*prevpp != NULL) {
1558 			if (((*prevpp)->de_hash == hash) &&
1559 			    (namlen == (*prevpp)->de_namelen) &&
1560 			    bcmp((*prevpp)->de_name, name, namlen) == 0) {
1561 				if (handlep != NULL) {
1562 					*handlep = (*prevpp)->de_handle;
1563 				}
1564 				te = *prevpp;
1565 				*prevpp = (*prevpp)->de_next;
1566 				kmem_free(te, sizeof (dcentry_t) - 1 +
1567 				    te->de_namelen);
1568 
1569 				/*
1570 				 * If the total number of entries
1571 				 * falls below half the minimum number
1572 				 * of entries then free this cache.
1573 				 */
1574 				if (--dcp->dc_num_entries <
1575 				    (dnlc_dir_min_size >> 1)) {
1576 					mutex_exit(&dcap->dca_lock);
1577 					dnlc_dir_purge(dcap);
1578 				} else {
1579 					mutex_exit(&dcap->dca_lock);
1580 				}
1581 				ncs.ncs_dir_num_ents.value.ui64--;
1582 				return (DFOUND);
1583 			}
1584 			prevpp = &((*prevpp)->de_next);
1585 		}
1586 		if (dcp->dc_complete) {
1587 			ncs.ncs_dir_reme_fai.value.ui64++;
1588 			ret = DNOENT;
1589 		} else {
1590 			ret = DNOCACHE;
1591 		}
1592 		mutex_exit(&dcap->dca_lock);
1593 		return (ret);
1594 	} else {
1595 		mutex_exit(&dcap->dca_lock);
1596 		return (DNOCACHE);
1597 	}
1598 }
1599 
1600 
1601 /*
1602  * Remove free space of at least the given length from a complete
1603  * or partial directory cache.
1604  */
1605 dcret_t
1606 dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len, uint64_t *handlep)
1607 {
1608 	dircache_t *dcp;
1609 	dcfree_t **prevpp, *tfp;
1610 	uint_t fhtsize; /* free hash table size */
1611 	uint_t i;
1612 	uint_t capacity;
1613 	int ret;
1614 
1615 	if (!dnlc_dir_enable) {
1616 		return (DNOCACHE);
1617 	}
1618 
1619 	mutex_enter(&dcap->dca_lock);
1620 	dcp = (dircache_t *)dcap->dca_dircache;
1621 	if (VALID_DIR_CACHE(dcp)) {
1622 		dcp->dc_actime = lbolt64;
1623 		if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1624 			capacity = (dcp->dc_fhash_mask + 1) <<
1625 			    dnlc_dir_hash_size_shift;
1626 			if (dcp->dc_num_free <=
1627 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1628 				dnlc_dir_adjust_fhash(dcp);
1629 			}
1630 		}
1631 		/*
1632 		 * Search for an entry of the appropriate size
1633 		 * on a first fit basis.
1634 		 */
1635 		fhtsize = dcp->dc_fhash_mask + 1;
1636 		for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1637 			prevpp = &(dcp->dc_freehash[i]);
1638 			while (*prevpp != NULL) {
1639 				if ((*prevpp)->df_len >= len) {
1640 					*handlep = (*prevpp)->df_handle;
1641 					tfp = *prevpp;
1642 					*prevpp = (*prevpp)->df_next;
1643 					dcp->dc_num_free--;
1644 					mutex_exit(&dcap->dca_lock);
1645 					kmem_cache_free(dnlc_dir_space_cache,
1646 					    tfp);
1647 					ncs.ncs_dir_num_ents.value.ui64--;
1648 					return (DFOUND);
1649 				}
1650 				prevpp = &((*prevpp)->df_next);
1651 			}
1652 		}
1653 		if (dcp->dc_complete) {
1654 			ret = DNOENT;
1655 		} else {
1656 			ret = DNOCACHE;
1657 		}
1658 		mutex_exit(&dcap->dca_lock);
1659 		return (ret);
1660 	} else {
1661 		mutex_exit(&dcap->dca_lock);
1662 		return (DNOCACHE);
1663 	}
1664 }
1665 
1666 /*
1667  * Remove free space with the given handle from a complete or partial
1668  * directory cache.
1669  */
1670 dcret_t
1671 dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle)
1672 {
1673 	dircache_t *dcp;
1674 	dcfree_t **prevpp, *tfp;
1675 	uint_t capacity;
1676 	int ret;
1677 
1678 	if (!dnlc_dir_enable) {
1679 		return (DNOCACHE);
1680 	}
1681 
1682 	mutex_enter(&dcap->dca_lock);
1683 	dcp = (dircache_t *)dcap->dca_dircache;
1684 	if (VALID_DIR_CACHE(dcp)) {
1685 		dcp->dc_actime = lbolt64;
1686 		if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1687 			capacity = (dcp->dc_fhash_mask + 1) <<
1688 			    dnlc_dir_hash_size_shift;
1689 			if (dcp->dc_num_free <=
1690 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1691 				dnlc_dir_adjust_fhash(dcp);
1692 			}
1693 		}
1694 
1695 		/*
1696 		 * search for the exact entry
1697 		 */
1698 		prevpp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1699 		while (*prevpp != NULL) {
1700 			if ((*prevpp)->df_handle == handle) {
1701 				tfp = *prevpp;
1702 				*prevpp = (*prevpp)->df_next;
1703 				dcp->dc_num_free--;
1704 				mutex_exit(&dcap->dca_lock);
1705 				kmem_cache_free(dnlc_dir_space_cache, tfp);
1706 				ncs.ncs_dir_num_ents.value.ui64--;
1707 				return (DFOUND);
1708 			}
1709 			prevpp = &((*prevpp)->df_next);
1710 		}
1711 		if (dcp->dc_complete) {
1712 			ncs.ncs_dir_rems_fai.value.ui64++;
1713 			ret = DNOENT;
1714 		} else {
1715 			ret = DNOCACHE;
1716 		}
1717 		mutex_exit(&dcap->dca_lock);
1718 		return (ret);
1719 	} else {
1720 		mutex_exit(&dcap->dca_lock);
1721 		return (DNOCACHE);
1722 	}
1723 }
1724 
1725 /*
1726  * Update the handle of an directory cache entry.
1727  */
1728 dcret_t
1729 dnlc_dir_update(dcanchor_t *dcap, char *name, uint64_t handle)
1730 {
1731 	dircache_t *dcp;
1732 	dcentry_t *dep;
1733 	int hash;
1734 	int ret;
1735 	uchar_t namlen;
1736 
1737 	if (!dnlc_dir_enable) {
1738 		return (DNOCACHE);
1739 	}
1740 
1741 	mutex_enter(&dcap->dca_lock);
1742 	dcp = (dircache_t *)dcap->dca_dircache;
1743 	if (VALID_DIR_CACHE(dcp)) {
1744 		dcp->dc_actime = lbolt64;
1745 		DNLC_DIR_HASH(name, hash, namlen);
1746 		dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1747 		while (dep != NULL) {
1748 			if ((dep->de_hash == hash) &&
1749 			    (namlen == dep->de_namelen) &&
1750 			    bcmp(dep->de_name, name, namlen) == 0) {
1751 				dep->de_handle = handle;
1752 				mutex_exit(&dcap->dca_lock);
1753 				return (DFOUND);
1754 			}
1755 			dep = dep->de_next;
1756 		}
1757 		if (dcp->dc_complete) {
1758 			ncs.ncs_dir_upd_fail.value.ui64++;
1759 			ret = DNOENT;
1760 		} else {
1761 			ret = DNOCACHE;
1762 		}
1763 		mutex_exit(&dcap->dca_lock);
1764 		return (ret);
1765 	} else {
1766 		mutex_exit(&dcap->dca_lock);
1767 		return (DNOCACHE);
1768 	}
1769 }
1770 
1771 void
1772 dnlc_dir_fini(dcanchor_t *dcap)
1773 {
1774 	dircache_t *dcp;
1775 
1776 	mutex_enter(&dc_head.dch_lock);
1777 	mutex_enter(&dcap->dca_lock);
1778 	dcp = (dircache_t *)dcap->dca_dircache;
1779 	if (VALID_DIR_CACHE(dcp)) {
1780 		/*
1781 		 * Unchain from global list
1782 		 */
1783 		ncs.ncs_dir_finipurg.value.ui64++;
1784 		dcp->dc_prev->dc_next = dcp->dc_next;
1785 		dcp->dc_next->dc_prev = dcp->dc_prev;
1786 	} else {
1787 		dcp = NULL;
1788 	}
1789 	dcap->dca_dircache = NULL;
1790 	mutex_exit(&dcap->dca_lock);
1791 	mutex_exit(&dc_head.dch_lock);
1792 	mutex_destroy(&dcap->dca_lock);
1793 	if (dcp) {
1794 		dnlc_dir_abort(dcp);
1795 	}
1796 }
1797 
1798 /*
1799  * Reclaim callback for dnlc directory caching.
1800  * Invoked by the kernel memory allocator when memory gets tight.
1801  * This is a pretty serious condition and can lead easily lead to system
1802  * hangs if not enough space is returned.
1803  *
1804  * Deciding which directory (or directories) to purge is tricky.
1805  * Purging everything is an overkill, but purging just the oldest used
1806  * was found to lead to hangs. The largest cached directories use the
1807  * most memory, but take the most effort to rebuild, whereas the smaller
1808  * ones have little value and give back little space. So what to do?
1809  *
1810  * The current policy is to continue purging the oldest used directories
1811  * until at least dnlc_dir_min_reclaim directory entries have been purged.
1812  */
1813 /*ARGSUSED*/
1814 static void
1815 dnlc_dir_reclaim(void *unused)
1816 {
1817 	dircache_t *dcp, *oldest;
1818 	uint_t dirent_cnt = 0;
1819 
1820 	mutex_enter(&dc_head.dch_lock);
1821 	while (dirent_cnt < dnlc_dir_min_reclaim) {
1822 		dcp = dc_head.dch_next;
1823 		oldest = NULL;
1824 		while (dcp != (dircache_t *)&dc_head) {
1825 			if (oldest == NULL) {
1826 				oldest = dcp;
1827 			} else {
1828 				if (dcp->dc_actime < oldest->dc_actime) {
1829 					oldest = dcp;
1830 				}
1831 			}
1832 			dcp = dcp->dc_next;
1833 		}
1834 		if (oldest == NULL) {
1835 			/* nothing to delete */
1836 			mutex_exit(&dc_head.dch_lock);
1837 			return;
1838 		}
1839 		/*
1840 		 * remove from directory chain and purge
1841 		 */
1842 		oldest->dc_prev->dc_next = oldest->dc_next;
1843 		oldest->dc_next->dc_prev = oldest->dc_prev;
1844 		mutex_enter(&oldest->dc_anchor->dca_lock);
1845 		/*
1846 		 * If this was the last entry then it must be too large.
1847 		 * Mark it as such by saving a special dircache_t
1848 		 * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1849 		 * will be presented to the caller of dnlc_dir_start()
1850 		 */
1851 		if (oldest->dc_next == oldest->dc_prev) {
1852 			oldest->dc_anchor->dca_dircache = DC_RET_LOW_MEM;
1853 			ncs.ncs_dir_rec_last.value.ui64++;
1854 		} else {
1855 			oldest->dc_anchor->dca_dircache = NULL;
1856 			ncs.ncs_dir_recl_any.value.ui64++;
1857 		}
1858 		mutex_exit(&oldest->dc_anchor->dca_lock);
1859 		dirent_cnt += oldest->dc_num_entries;
1860 		dnlc_dir_abort(oldest);
1861 	}
1862 	mutex_exit(&dc_head.dch_lock);
1863 }
1864 
1865 /*
1866  * Dynamically grow or shrink the size of the name hash table
1867  */
1868 static void
1869 dnlc_dir_adjust_nhash(dircache_t *dcp)
1870 {
1871 	dcentry_t **newhash, *dep, **nhp, *tep;
1872 	uint_t newsize;
1873 	uint_t oldsize;
1874 	uint_t newsizemask;
1875 	int i;
1876 
1877 	/*
1878 	 * Allocate new hash table
1879 	 */
1880 	newsize = dcp->dc_num_entries >> dnlc_dir_hash_size_shift;
1881 	newhash = kmem_zalloc(sizeof (dcentry_t *) * newsize, KM_NOSLEEP);
1882 	if (newhash == NULL) {
1883 		/*
1884 		 * System is short on memory just return
1885 		 * Note, the old hash table is still usable.
1886 		 * This return is unlikely to repeatedy occur, because
1887 		 * either some other directory caches will be reclaimed
1888 		 * due to memory shortage, thus freeing memory, or this
1889 		 * directory cahe will be reclaimed.
1890 		 */
1891 		return;
1892 	}
1893 	oldsize = dcp->dc_nhash_mask + 1;
1894 	dcp->dc_nhash_mask = newsizemask = newsize - 1;
1895 
1896 	/*
1897 	 * Move entries from the old table to the new
1898 	 */
1899 	for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1900 		dep = dcp->dc_namehash[i];
1901 		while (dep != NULL) { /* for each chained entry */
1902 			tep = dep;
1903 			dep = dep->de_next;
1904 			nhp = &newhash[tep->de_hash & newsizemask];
1905 			tep->de_next = *nhp;
1906 			*nhp = tep;
1907 		}
1908 	}
1909 
1910 	/*
1911 	 * delete old hash table and set new one in place
1912 	 */
1913 	kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * oldsize);
1914 	dcp->dc_namehash = newhash;
1915 }
1916 
1917 /*
1918  * Dynamically grow or shrink the size of the free space hash table
1919  */
1920 static void
1921 dnlc_dir_adjust_fhash(dircache_t *dcp)
1922 {
1923 	dcfree_t **newhash, *dfp, **nhp, *tfp;
1924 	uint_t newsize;
1925 	uint_t oldsize;
1926 	int i;
1927 
1928 	/*
1929 	 * Allocate new hash table
1930 	 */
1931 	newsize = dcp->dc_num_free >> dnlc_dir_hash_size_shift;
1932 	newhash = kmem_zalloc(sizeof (dcfree_t *) * newsize, KM_NOSLEEP);
1933 	if (newhash == NULL) {
1934 		/*
1935 		 * System is short on memory just return
1936 		 * Note, the old hash table is still usable.
1937 		 * This return is unlikely to repeatedy occur, because
1938 		 * either some other directory caches will be reclaimed
1939 		 * due to memory shortage, thus freeing memory, or this
1940 		 * directory cahe will be reclaimed.
1941 		 */
1942 		return;
1943 	}
1944 	oldsize = dcp->dc_fhash_mask + 1;
1945 	dcp->dc_fhash_mask = newsize - 1;
1946 
1947 	/*
1948 	 * Move entries from the old table to the new
1949 	 */
1950 	for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1951 		dfp = dcp->dc_freehash[i];
1952 		while (dfp != NULL) { /* for each chained entry */
1953 			tfp = dfp;
1954 			dfp = dfp->df_next;
1955 			nhp = &newhash[DDFHASH(tfp->df_handle, dcp)];
1956 			tfp->df_next = *nhp;
1957 			*nhp = tfp;
1958 		}
1959 	}
1960 
1961 	/*
1962 	 * delete old hash table and set new one in place
1963 	 */
1964 	kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * oldsize);
1965 	dcp->dc_freehash = newhash;
1966 }
1967