xref: /titanic_41/usr/src/uts/common/fs/dnlc.c (revision 445f2479fe3d7435daab18bf2cdc310b86cd6738)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 
39 #pragma ident	"%Z%%M%	%I%	%E% SMI"
40 
41 #include <sys/types.h>
42 #include <sys/systm.h>
43 #include <sys/param.h>
44 #include <sys/t_lock.h>
45 #include <sys/systm.h>
46 #include <sys/vfs.h>
47 #include <sys/vnode.h>
48 #include <sys/dnlc.h>
49 #include <sys/kmem.h>
50 #include <sys/cmn_err.h>
51 #include <sys/vtrace.h>
52 #include <sys/bitmap.h>
53 #include <sys/var.h>
54 #include <sys/sysmacros.h>
55 #include <sys/kstat.h>
56 #include <sys/atomic.h>
57 #include <sys/taskq.h>
58 
59 /*
60  * Directory name lookup cache.
61  * Based on code originally done by Robert Elz at Melbourne.
62  *
63  * Names found by directory scans are retained in a cache
64  * for future reference.  Each hash chain is ordered by LRU
65  * Cache is indexed by hash value obtained from (vp, name)
66  * where the vp refers to the directory containing the name.
67  */
68 
69 /*
70  * Tunable nc_hashavelen is the average length desired for this chain, from
71  * which the size of the nc_hash table is derived at create time.
72  */
73 #define	NC_HASHAVELEN_DEFAULT	4
74 int nc_hashavelen = NC_HASHAVELEN_DEFAULT;
75 
76 /*
77  * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
78  * depth exceeds this value, we move the looked-up entry to the front of
79  * its hash chain.  The idea is to make sure that the most frequently
80  * accessed entries are found most quickly (by keeping them near the
81  * front of their hash chains).
82  */
83 #define	NC_MOVETOFRONT	2
84 
85 /*
86  *
87  * DNLC_MAX_RELE is used to size an array on the stack when releasing
88  * vnodes. This array is used rather than calling VN_RELE() inline because
89  * all dnlc locks must be dropped by that time in order to avoid a
90  * possible deadlock. This deadlock occurs when the dnlc holds the last
91  * reference to the vnode and so the VOP_INACTIVE vector is called which
92  * can in turn call back into the dnlc. A global array was used but had
93  * many problems:
94  *	1) Actually doesn't have an upper bound on the array size as
95  *	   entries can be added after starting the purge.
96  *	2) The locking scheme causes a hang.
97  *	3) Caused serialisation on the global lock.
98  *	4) The array was often unnecessarily huge.
99  *
100  * Note the current value 8 allows up to 4 cache entries (to be purged
101  * from each hash chain), before having to cycle around and retry.
102  * This ought to be ample given that nc_hashavelen is typically very small.
103  */
104 #define	DNLC_MAX_RELE	8 /* must be even */
105 
106 /*
107  * Hash table of name cache entries for fast lookup, dynamically
108  * allocated at startup.
109  */
110 nc_hash_t *nc_hash;
111 
112 /*
113  * Rotors. Used to select entries on a round-robin basis.
114  */
115 static nc_hash_t *dnlc_purge_fs1_rotor;
116 static nc_hash_t *dnlc_free_rotor;
117 
118 /*
119  * # of dnlc entries (uninitialized)
120  *
121  * the initial value was chosen as being
122  * a random string of bits, probably not
123  * normally chosen by a systems administrator
124  */
125 int ncsize = -1;
126 uint32_t dnlc_nentries = 0;	/* current number of name cache entries */
127 static int nc_hashsz;		/* size of hash table */
128 static int nc_hashmask;		/* size of hash table minus 1 */
129 
130 /*
131  * The dnlc_reduce_cache() taskq queue is activated when there are
132  * ncsize name cache entries and if no parameter is provided, it reduces
133  * the size down to dnlc_nentries_low_water, which is by default one
134  * hundreth less (or 99%) of ncsize.
135  *
136  * If a parameter is provided to dnlc_reduce_cache(), then we reduce
137  * the size down based on ncsize_onepercent - where ncsize_onepercent
138  * is 1% of ncsize.
139  */
140 #define	DNLC_LOW_WATER_DIVISOR_DEFAULT 100
141 uint_t dnlc_low_water_divisor = DNLC_LOW_WATER_DIVISOR_DEFAULT;
142 uint_t dnlc_nentries_low_water;
143 int dnlc_reduce_idle = 1; /* no locking needed */
144 uint_t ncsize_onepercent;
145 
146 /*
147  * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
148  * then this means the dnlc_reduce_cache() taskq is failing to
149  * keep up. In this case we refuse to add new entries to the dnlc
150  * until the taskq catches up.
151  */
152 uint_t dnlc_max_nentries; /* twice ncsize */
153 uint64_t dnlc_max_nentries_cnt = 0; /* statistic on times we failed */
154 
155 /*
156  * Tunable to define when we should just remove items from
157  * the end of the chain.
158  */
159 #define	DNLC_LONG_CHAIN 8
160 uint_t dnlc_long_chain = DNLC_LONG_CHAIN;
161 
162 /*
163  * ncstats has been deprecated, due to the integer size of the counters
164  * which can easily overflow in the dnlc.
165  * It is maintained (at some expense) for compatability.
166  * The preferred interface is the kstat accessible nc_stats below.
167  */
168 struct ncstats ncstats;
169 
170 struct nc_stats ncs = {
171 	{ "hits",			KSTAT_DATA_UINT64 },
172 	{ "misses",			KSTAT_DATA_UINT64 },
173 	{ "negative_cache_hits",	KSTAT_DATA_UINT64 },
174 	{ "enters",			KSTAT_DATA_UINT64 },
175 	{ "double_enters",		KSTAT_DATA_UINT64 },
176 	{ "purge_total_entries",	KSTAT_DATA_UINT64 },
177 	{ "purge_all",			KSTAT_DATA_UINT64 },
178 	{ "purge_vp",			KSTAT_DATA_UINT64 },
179 	{ "purge_vfs",			KSTAT_DATA_UINT64 },
180 	{ "purge_fs1",			KSTAT_DATA_UINT64 },
181 	{ "pick_free",			KSTAT_DATA_UINT64 },
182 	{ "pick_heuristic",		KSTAT_DATA_UINT64 },
183 	{ "pick_last",			KSTAT_DATA_UINT64 },
184 
185 	/* directory caching stats */
186 
187 	{ "dir_hits",			KSTAT_DATA_UINT64 },
188 	{ "dir_misses",			KSTAT_DATA_UINT64 },
189 	{ "dir_cached_current",		KSTAT_DATA_UINT64 },
190 	{ "dir_entries_cached_current",	KSTAT_DATA_UINT64 },
191 	{ "dir_cached_total",		KSTAT_DATA_UINT64 },
192 	{ "dir_start_no_memory",	KSTAT_DATA_UINT64 },
193 	{ "dir_add_no_memory",		KSTAT_DATA_UINT64 },
194 	{ "dir_add_abort",		KSTAT_DATA_UINT64 },
195 	{ "dir_add_max",		KSTAT_DATA_UINT64 },
196 	{ "dir_remove_entry_fail",	KSTAT_DATA_UINT64 },
197 	{ "dir_remove_space_fail",	KSTAT_DATA_UINT64 },
198 	{ "dir_update_fail",		KSTAT_DATA_UINT64 },
199 	{ "dir_fini_purge",		KSTAT_DATA_UINT64 },
200 	{ "dir_reclaim_last",		KSTAT_DATA_UINT64 },
201 	{ "dir_reclaim_any",		KSTAT_DATA_UINT64 },
202 };
203 
204 static int doingcache = 1;
205 
206 vnode_t negative_cache_vnode;
207 
208 /*
209  * Insert entry at the front of the queue
210  */
211 #define	nc_inshash(ncp, hp) \
212 { \
213 	(ncp)->hash_next = (hp)->hash_next; \
214 	(ncp)->hash_prev = (ncache_t *)(hp); \
215 	(hp)->hash_next->hash_prev = (ncp); \
216 	(hp)->hash_next = (ncp); \
217 }
218 
219 /*
220  * Remove entry from hash queue
221  */
222 #define	nc_rmhash(ncp) \
223 { \
224 	(ncp)->hash_prev->hash_next = (ncp)->hash_next; \
225 	(ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
226 	(ncp)->hash_prev = NULL; \
227 	(ncp)->hash_next = NULL; \
228 }
229 
230 /*
231  * Free an entry.
232  */
233 #define	dnlc_free(ncp) \
234 { \
235 	kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \
236 	atomic_add_32(&dnlc_nentries, -1); \
237 }
238 
239 
240 /*
241  * Cached directory info.
242  * ======================
243  */
244 
245 /*
246  * Cached directory free space hash function.
247  * Needs the free space handle and the dcp to get the hash table size
248  * Returns the hash index.
249  */
250 #define	DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
251 
252 /*
253  * Cached directory name entry hash function.
254  * Uses the name and returns in the input arguments the hash and the name
255  * length.
256  */
257 #define	DNLC_DIR_HASH(name, hash, namelen)			\
258 	{							\
259 		char Xc, *Xcp;					\
260 		hash = *name;					\
261 		for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++)	\
262 			hash = (hash << 4) + hash + Xc;		\
263 		ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1));	\
264 		namelen = Xcp - (name);				\
265 	}
266 
267 /* special dircache_t pointer to indicate error should be returned */
268 /*
269  * The anchor directory cache pointer can contain 3 types of values,
270  * 1) NULL: No directory cache
271  * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
272  *    too big or a memory shortage occurred. This value remains in the
273  *    pointer until a dnlc_dir_start() which returns the a DNOMEM error.
274  *    This is kludgy but efficient and only visible in this source file.
275  * 3) A valid cache pointer.
276  */
277 #define	DC_RET_LOW_MEM (dircache_t *)1
278 #define	VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
279 
280 /* Tunables */
281 uint_t dnlc_dir_enable = 1; /* disable caching directories by setting to 0 */
282 uint_t dnlc_dir_min_size = 40; /* min no of directory entries before caching */
283 uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */
284 uint_t dnlc_dir_hash_size_shift = 3; /* 8 entries per hash bucket */
285 uint_t dnlc_dir_min_reclaim =  350000; /* approx 1MB of dcentrys */
286 /*
287  * dnlc_dir_hash_resize_shift determines when the hash tables
288  * get re-adjusted due to growth or shrinkage
289  * - currently 2 indicating that there can be at most 4
290  * times or at least one quarter the number of entries
291  * before hash table readjustment. Note that with
292  * dnlc_dir_hash_size_shift above set at 3 this would
293  * mean readjustment would occur if the average number
294  * of entries went above 32 or below 2
295  */
296 uint_t dnlc_dir_hash_resize_shift = 2; /* readjust rate */
297 
298 static kmem_cache_t *dnlc_dir_space_cache; /* free space entry cache */
299 static dchead_t dc_head; /* anchor of cached directories */
300 
301 /* Prototypes */
302 static ncache_t *dnlc_get(uchar_t namlen);
303 static ncache_t *dnlc_search(vnode_t *dp, char *name, uchar_t namlen, int hash);
304 static void dnlc_dir_reclaim(void *unused);
305 static void dnlc_dir_abort(dircache_t *dcp);
306 static void dnlc_dir_adjust_fhash(dircache_t *dcp);
307 static void dnlc_dir_adjust_nhash(dircache_t *dcp);
308 
309 
310 /*
311  * Initialize the directory cache.
312  */
313 void
314 dnlc_init()
315 {
316 	nc_hash_t *hp;
317 	kstat_t *ksp;
318 	int i;
319 
320 	/*
321 	 * Set up the size of the dnlc (ncsize) and its low water mark.
322 	 */
323 	if (ncsize == -1) {
324 		/* calculate a reasonable size for the low water */
325 		dnlc_nentries_low_water = 4 * (v.v_proc + maxusers) + 320;
326 		ncsize = dnlc_nentries_low_water +
327 		    (dnlc_nentries_low_water / dnlc_low_water_divisor);
328 	} else {
329 		/* don't change the user specified ncsize */
330 		dnlc_nentries_low_water =
331 		    ncsize - (ncsize / dnlc_low_water_divisor);
332 	}
333 	if (ncsize <= 0) {
334 		doingcache = 0;
335 		dnlc_dir_enable = 0; /* also disable directory caching */
336 		ncsize = 0;
337 		cmn_err(CE_NOTE, "name cache (dnlc) disabled");
338 		return;
339 	}
340 	dnlc_max_nentries = ncsize * 2;
341 	ncsize_onepercent = ncsize / 100;
342 
343 	/*
344 	 * Initialise the hash table.
345 	 * Compute hash size rounding to the next power of two.
346 	 */
347 	nc_hashsz = ncsize / nc_hashavelen;
348 	nc_hashsz = 1 << highbit(nc_hashsz);
349 	nc_hashmask = nc_hashsz - 1;
350 	nc_hash = kmem_zalloc(nc_hashsz * sizeof (*nc_hash), KM_SLEEP);
351 	for (i = 0; i < nc_hashsz; i++) {
352 		hp = (nc_hash_t *)&nc_hash[i];
353 		mutex_init(&hp->hash_lock, NULL, MUTEX_DEFAULT, NULL);
354 		hp->hash_next = (ncache_t *)hp;
355 		hp->hash_prev = (ncache_t *)hp;
356 	}
357 
358 	/*
359 	 * Initialize rotors
360 	 */
361 	dnlc_free_rotor = dnlc_purge_fs1_rotor = &nc_hash[0];
362 
363 	/*
364 	 * Set up the directory caching to use kmem_cache_alloc
365 	 * for its free space entries so that we can get a callback
366 	 * when the system is short on memory, to allow us to free
367 	 * up some memory. we don't use the constructor/deconstructor
368 	 * functions.
369 	 */
370 	dnlc_dir_space_cache = kmem_cache_create("dnlc_space_cache",
371 	    sizeof (dcfree_t), 0, NULL, NULL, dnlc_dir_reclaim, NULL,
372 	    NULL, 0);
373 
374 	/*
375 	 * Initialise the head of the cached directory structures
376 	 */
377 	mutex_init(&dc_head.dch_lock, NULL, MUTEX_DEFAULT, NULL);
378 	dc_head.dch_next = (dircache_t *)&dc_head;
379 	dc_head.dch_prev = (dircache_t *)&dc_head;
380 
381 	/*
382 	 * Initialise the reference count of the negative cache vnode to 1
383 	 * so that it never goes away (VOP_INACTIVE isn't called on it).
384 	 */
385 	negative_cache_vnode.v_count = 1;
386 
387 	/*
388 	 * Initialise kstats - both the old compatability raw kind and
389 	 * the more extensive named stats.
390 	 */
391 	ksp = kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW,
392 		sizeof (struct ncstats), KSTAT_FLAG_VIRTUAL);
393 	if (ksp) {
394 		ksp->ks_data = (void *) &ncstats;
395 		kstat_install(ksp);
396 	}
397 	ksp = kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED,
398 	    sizeof (ncs) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
399 	if (ksp) {
400 		ksp->ks_data = (void *) &ncs;
401 		kstat_install(ksp);
402 	}
403 }
404 
405 /*
406  * Add a name to the directory cache.
407  */
408 void
409 dnlc_enter(vnode_t *dp, char *name, vnode_t *vp)
410 {
411 	ncache_t *ncp;
412 	nc_hash_t *hp;
413 	uchar_t namlen;
414 	int hash;
415 
416 	TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_enter_start:");
417 
418 	if (!doingcache) {
419 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
420 		    "dnlc_enter_end:(%S) %d", "not caching", 0);
421 		return;
422 	}
423 
424 	/*
425 	 * Get a new dnlc entry. Assume the entry won't be in the cache
426 	 * and initialize it now
427 	 */
428 	DNLCHASH(name, dp, hash, namlen);
429 	if ((ncp = dnlc_get(namlen)) == NULL)
430 		return;
431 	ncp->dp = dp;
432 	VN_HOLD(dp);
433 	ncp->vp = vp;
434 	VN_HOLD(vp);
435 	bcopy(name, ncp->name, namlen + 1); /* name and null */
436 	ncp->hash = hash;
437 	hp = &nc_hash[hash & nc_hashmask];
438 
439 	mutex_enter(&hp->hash_lock);
440 	if (dnlc_search(dp, name, namlen, hash) != NULL) {
441 		mutex_exit(&hp->hash_lock);
442 		ncstats.dbl_enters++;
443 		ncs.ncs_dbl_enters.value.ui64++;
444 		VN_RELE(dp);
445 		VN_RELE(vp);
446 		dnlc_free(ncp);		/* crfree done here */
447 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
448 			"dnlc_enter_end:(%S) %d",
449 			"dbl enter", ncstats.dbl_enters);
450 		return;
451 	}
452 	/*
453 	 * Insert back into the hash chain.
454 	 */
455 	nc_inshash(ncp, hp);
456 	mutex_exit(&hp->hash_lock);
457 	ncstats.enters++;
458 	ncs.ncs_enters.value.ui64++;
459 	TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
460 	    "dnlc_enter_end:(%S) %d", "done", ncstats.enters);
461 }
462 
463 /*
464  * Add a name to the directory cache.
465  *
466  * This function is basically identical with
467  * dnlc_enter().  The difference is that when the
468  * desired dnlc entry is found, the vnode in the
469  * ncache is compared with the vnode passed in.
470  *
471  * If they are not equal then the ncache is
472  * updated with the passed in vnode.  Otherwise
473  * it just frees up the newly allocated dnlc entry.
474  */
475 void
476 dnlc_update(vnode_t *dp, char *name, vnode_t *vp)
477 {
478 	ncache_t *ncp;
479 	ncache_t *tcp;
480 	vnode_t *tvp;
481 	nc_hash_t *hp;
482 	int hash;
483 	uchar_t namlen;
484 
485 	TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_update_start:");
486 
487 	if (!doingcache) {
488 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
489 		    "dnlc_update_end:(%S) %d", "not caching", 0);
490 		return;
491 	}
492 
493 	/*
494 	 * Get a new dnlc entry and initialize it now.
495 	 * If we fail to get a new entry, call dnlc_remove() to purge
496 	 * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
497 	 * entry.
498 	 * Failure to clear an existing entry could result in false dnlc
499 	 * lookup (negative/stale entry).
500 	 */
501 	DNLCHASH(name, dp, hash, namlen);
502 	if ((ncp = dnlc_get(namlen)) == NULL) {
503 		dnlc_remove(dp, name);
504 		return;
505 	}
506 	ncp->dp = dp;
507 	VN_HOLD(dp);
508 	ncp->vp = vp;
509 	VN_HOLD(vp);
510 	bcopy(name, ncp->name, namlen + 1); /* name and null */
511 	ncp->hash = hash;
512 	hp = &nc_hash[hash & nc_hashmask];
513 
514 	mutex_enter(&hp->hash_lock);
515 	if ((tcp = dnlc_search(dp, name, namlen, hash)) != NULL) {
516 		if (tcp->vp != vp) {
517 			tvp = tcp->vp;
518 			tcp->vp = vp;
519 			mutex_exit(&hp->hash_lock);
520 			VN_RELE(tvp);
521 			ncstats.enters++;
522 			ncs.ncs_enters.value.ui64++;
523 			TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
524 			    "dnlc_update_end:(%S) %d", "done", ncstats.enters);
525 		} else {
526 			mutex_exit(&hp->hash_lock);
527 			VN_RELE(vp);
528 			ncstats.dbl_enters++;
529 			ncs.ncs_dbl_enters.value.ui64++;
530 			TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
531 			    "dnlc_update_end:(%S) %d",
532 			    "dbl enter", ncstats.dbl_enters);
533 		}
534 		VN_RELE(dp);
535 		dnlc_free(ncp);		/* crfree done here */
536 		return;
537 	}
538 	/*
539 	 * insert the new entry, since it is not in dnlc yet
540 	 */
541 	nc_inshash(ncp, hp);
542 	mutex_exit(&hp->hash_lock);
543 	ncstats.enters++;
544 	ncs.ncs_enters.value.ui64++;
545 	TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
546 	    "dnlc_update_end:(%S) %d", "done", ncstats.enters);
547 }
548 
549 /*
550  * Look up a name in the directory name cache.
551  *
552  * Return a doubly-held vnode if found: one hold so that it may
553  * remain in the cache for other users, the other hold so that
554  * the cache is not re-cycled and the identity of the vnode is
555  * lost before the caller can use the vnode.
556  */
557 vnode_t *
558 dnlc_lookup(vnode_t *dp, char *name)
559 {
560 	ncache_t *ncp;
561 	nc_hash_t *hp;
562 	vnode_t *vp;
563 	int hash, depth;
564 	uchar_t namlen;
565 
566 	TRACE_2(TR_FAC_NFS, TR_DNLC_LOOKUP_START,
567 	    "dnlc_lookup_start:dp %x name %s", dp, name);
568 
569 	if (!doingcache) {
570 		TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
571 		    "dnlc_lookup_end:%S %d vp %x name %s",
572 		    "not_caching", 0, NULL, name);
573 		return (NULL);
574 	}
575 
576 	DNLCHASH(name, dp, hash, namlen);
577 	depth = 1;
578 	hp = &nc_hash[hash & nc_hashmask];
579 	mutex_enter(&hp->hash_lock);
580 
581 	for (ncp = hp->hash_next; ncp != (ncache_t *)hp;
582 	    ncp = ncp->hash_next) {
583 		if (ncp->hash == hash &&	/* fast signature check */
584 		    ncp->dp == dp &&
585 		    ncp->namlen == namlen &&
586 		    bcmp(ncp->name, name, namlen) == 0) {
587 			/*
588 			 * Move this entry to the head of its hash chain
589 			 * if it's not already close.
590 			 */
591 			if (depth > NC_MOVETOFRONT) {
592 				ncache_t *next = ncp->hash_next;
593 				ncache_t *prev = ncp->hash_prev;
594 
595 				prev->hash_next = next;
596 				next->hash_prev = prev;
597 				ncp->hash_next = next = hp->hash_next;
598 				ncp->hash_prev = (ncache_t *)hp;
599 				next->hash_prev = ncp;
600 				hp->hash_next = ncp;
601 
602 				ncstats.move_to_front++;
603 			}
604 
605 			/*
606 			 * Put a hold on the vnode now so its identity
607 			 * can't change before the caller has a chance to
608 			 * put a hold on it.
609 			 */
610 			vp = ncp->vp;
611 			VN_HOLD(vp);
612 			mutex_exit(&hp->hash_lock);
613 			ncstats.hits++;
614 			ncs.ncs_hits.value.ui64++;
615 			if (vp == DNLC_NO_VNODE) {
616 				ncs.ncs_neg_hits.value.ui64++;
617 			}
618 			TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
619 				"dnlc_lookup_end:%S %d vp %x name %s",
620 				"hit", ncstats.hits, vp, name);
621 			return (vp);
622 		}
623 		depth++;
624 	}
625 
626 	mutex_exit(&hp->hash_lock);
627 	ncstats.misses++;
628 	ncs.ncs_misses.value.ui64++;
629 	TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
630 		"dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats.misses,
631 	    NULL, name);
632 	return (NULL);
633 }
634 
635 /*
636  * Remove an entry in the directory name cache.
637  */
638 void
639 dnlc_remove(vnode_t *dp, char *name)
640 {
641 	ncache_t *ncp;
642 	nc_hash_t *hp;
643 	uchar_t namlen;
644 	int hash;
645 
646 	if (!doingcache)
647 		return;
648 	DNLCHASH(name, dp, hash, namlen);
649 	hp = &nc_hash[hash & nc_hashmask];
650 
651 	mutex_enter(&hp->hash_lock);
652 	if (ncp = dnlc_search(dp, name, namlen, hash)) {
653 		/*
654 		 * Free up the entry
655 		 */
656 		nc_rmhash(ncp);
657 		mutex_exit(&hp->hash_lock);
658 		VN_RELE(ncp->vp);
659 		VN_RELE(ncp->dp);
660 		dnlc_free(ncp);
661 		return;
662 	}
663 	mutex_exit(&hp->hash_lock);
664 }
665 
666 /*
667  * Purge the entire cache.
668  */
669 void
670 dnlc_purge()
671 {
672 	nc_hash_t *nch;
673 	ncache_t *ncp;
674 	int index;
675 	int i;
676 	vnode_t *nc_rele[DNLC_MAX_RELE];
677 
678 	if (!doingcache)
679 		return;
680 
681 	ncstats.purges++;
682 	ncs.ncs_purge_all.value.ui64++;
683 
684 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
685 		index = 0;
686 		mutex_enter(&nch->hash_lock);
687 		ncp = nch->hash_next;
688 		while (ncp != (ncache_t *)nch) {
689 			ncache_t *np;
690 
691 			np = ncp->hash_next;
692 			nc_rele[index++] = ncp->vp;
693 			nc_rele[index++] = ncp->dp;
694 
695 			nc_rmhash(ncp);
696 			dnlc_free(ncp);
697 			ncp = np;
698 			ncs.ncs_purge_total.value.ui64++;
699 			if (index == DNLC_MAX_RELE)
700 				break;
701 		}
702 		mutex_exit(&nch->hash_lock);
703 
704 		/* Release holds on all the vnodes now that we have no locks */
705 		for (i = 0; i < index; i++) {
706 			VN_RELE(nc_rele[i]);
707 		}
708 		if (ncp != (ncache_t *)nch) {
709 			nch--; /* Do current hash chain again */
710 		}
711 	}
712 }
713 
714 /*
715  * Purge any cache entries referencing a vnode.
716  * Exit as soon as the vnode reference count goes to 1, as the caller
717  * must hold a reference, and the dnlc can therefore have no more.
718  */
719 void
720 dnlc_purge_vp(vnode_t *vp)
721 {
722 	nc_hash_t *nch;
723 	ncache_t *ncp;
724 	int index;
725 	vnode_t *nc_rele[DNLC_MAX_RELE];
726 
727 	ASSERT(vp->v_count > 0);
728 	if (vp->v_count == 1) {
729 		return;
730 	}
731 
732 	if (!doingcache)
733 		return;
734 
735 	ncstats.purges++;
736 	ncs.ncs_purge_vp.value.ui64++;
737 
738 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
739 		index = 0;
740 		mutex_enter(&nch->hash_lock);
741 		ncp = nch->hash_next;
742 		while (ncp != (ncache_t *)nch) {
743 			ncache_t *np;
744 
745 			np = ncp->hash_next;
746 			if (ncp->dp == vp || ncp->vp == vp) {
747 				nc_rele[index++] = ncp->vp;
748 				nc_rele[index++] = ncp->dp;
749 				nc_rmhash(ncp);
750 				dnlc_free(ncp);
751 				ncs.ncs_purge_total.value.ui64++;
752 				if (index == DNLC_MAX_RELE) {
753 					ncp = np;
754 					break;
755 				}
756 			}
757 			ncp = np;
758 		}
759 		mutex_exit(&nch->hash_lock);
760 
761 		/* Release holds on all the vnodes now that we have no locks */
762 		while (index) {
763 			VN_RELE(nc_rele[--index]);
764 		}
765 
766 		if (vp->v_count == 1) {
767 			return; /* no more dnlc references */
768 		}
769 
770 		if (ncp != (ncache_t *)nch) {
771 			nch--; /* Do current hash chain again */
772 		}
773 	}
774 }
775 
776 /*
777  * Purge cache entries referencing a vfsp.  Caller supplies a count
778  * of entries to purge; up to that many will be freed.  A count of
779  * zero indicates that all such entries should be purged.  Returns
780  * the number of entries that were purged.
781  */
782 int
783 dnlc_purge_vfsp(vfs_t *vfsp, int count)
784 {
785 	nc_hash_t *nch;
786 	ncache_t *ncp;
787 	int n = 0;
788 	int index;
789 	int i;
790 	vnode_t *nc_rele[DNLC_MAX_RELE];
791 
792 	if (!doingcache)
793 		return (0);
794 
795 	ncstats.purges++;
796 	ncs.ncs_purge_vfs.value.ui64++;
797 
798 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
799 		index = 0;
800 		mutex_enter(&nch->hash_lock);
801 		ncp = nch->hash_next;
802 		while (ncp != (ncache_t *)nch) {
803 			ncache_t *np;
804 
805 			np = ncp->hash_next;
806 			ASSERT(ncp->dp != NULL);
807 			ASSERT(ncp->vp != NULL);
808 			if ((ncp->dp->v_vfsp == vfsp) ||
809 			    (ncp->vp->v_vfsp == vfsp)) {
810 				n++;
811 				nc_rele[index++] = ncp->vp;
812 				nc_rele[index++] = ncp->dp;
813 				nc_rmhash(ncp);
814 				dnlc_free(ncp);
815 				ncs.ncs_purge_total.value.ui64++;
816 				if (index == DNLC_MAX_RELE) {
817 					ncp = np;
818 					break;
819 				}
820 				if (count != 0 && n >= count) {
821 					break;
822 				}
823 			}
824 			ncp = np;
825 		}
826 		mutex_exit(&nch->hash_lock);
827 		/* Release holds on all the vnodes now that we have no locks */
828 		for (i = 0; i < index; i++) {
829 			VN_RELE(nc_rele[i]);
830 		}
831 		if (count != 0 && n >= count) {
832 			return (n);
833 		}
834 		if (ncp != (ncache_t *)nch) {
835 			nch--; /* Do current hash chain again */
836 		}
837 	}
838 	return (n);
839 }
840 
841 /*
842  * Purge 1 entry from the dnlc that is part of the filesystem(s)
843  * represented by 'vop'. The purpose of this routine is to allow
844  * users of the dnlc to free a vnode that is being held by the dnlc.
845  *
846  * If we find a vnode that we release which will result in
847  * freeing the underlying vnode (count was 1), return 1, 0
848  * if no appropriate vnodes found.
849  *
850  * Note, vop is not the 'right' identifier for a filesystem.
851  */
852 int
853 dnlc_fs_purge1(vnodeops_t *vop)
854 {
855 	nc_hash_t *end;
856 	nc_hash_t *hp;
857 	ncache_t *ncp;
858 	vnode_t *vp;
859 
860 	if (!doingcache)
861 		return (0);
862 
863 	ncs.ncs_purge_fs1.value.ui64++;
864 
865 	/*
866 	 * Scan the dnlc entries looking for a likely candidate.
867 	 */
868 	hp = end = dnlc_purge_fs1_rotor;
869 
870 	do {
871 		if (++hp == &nc_hash[nc_hashsz])
872 			hp = nc_hash;
873 		dnlc_purge_fs1_rotor = hp;
874 		if (hp->hash_next == (ncache_t *)hp)
875 			continue;
876 		mutex_enter(&hp->hash_lock);
877 		for (ncp = hp->hash_prev;
878 		    ncp != (ncache_t *)hp;
879 		    ncp = ncp->hash_prev) {
880 			vp = ncp->vp;
881 			if (!vn_has_cached_data(vp) && (vp->v_count == 1) &&
882 			    vn_matchops(vp, vop))
883 				break;
884 		}
885 		if (ncp != (ncache_t *)hp) {
886 			nc_rmhash(ncp);
887 			mutex_exit(&hp->hash_lock);
888 			VN_RELE(ncp->dp);
889 			VN_RELE(vp)
890 			dnlc_free(ncp);
891 			ncs.ncs_purge_total.value.ui64++;
892 			return (1);
893 		}
894 		mutex_exit(&hp->hash_lock);
895 	} while (hp != end);
896 	return (0);
897 }
898 
899 /*
900  * Perform a reverse lookup in the DNLC.  This will find the first occurrence of
901  * the vnode.  If successful, it will return the vnode of the parent, and the
902  * name of the entry in the given buffer.  If it cannot be found, or the buffer
903  * is too small, then it will return NULL.  Note that this is a highly
904  * inefficient function, since the DNLC is constructed solely for forward
905  * lookups.
906  */
907 vnode_t *
908 dnlc_reverse_lookup(vnode_t *vp, char *buf, size_t buflen)
909 {
910 	nc_hash_t *nch;
911 	ncache_t *ncp;
912 	vnode_t *pvp;
913 
914 	if (!doingcache)
915 		return (NULL);
916 
917 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
918 		mutex_enter(&nch->hash_lock);
919 		ncp = nch->hash_next;
920 		while (ncp != (ncache_t *)nch) {
921 			/*
922 			 * We ignore '..' entries since it can create
923 			 * confusion and infinite loops.
924 			 */
925 			if (ncp->vp == vp && !(ncp->namlen == 2 &&
926 			    0 == bcmp(ncp->name, "..", 2)) &&
927 			    ncp->namlen < buflen) {
928 				bcopy(ncp->name, buf, ncp->namlen);
929 				buf[ncp->namlen] = '\0';
930 				pvp = ncp->dp;
931 				VN_HOLD(pvp);
932 				mutex_exit(&nch->hash_lock);
933 				return (pvp);
934 			}
935 			ncp = ncp->hash_next;
936 		}
937 		mutex_exit(&nch->hash_lock);
938 	}
939 
940 	return (NULL);
941 }
942 /*
943  * Utility routine to search for a cache entry. Return the
944  * ncache entry if found, NULL otherwise.
945  */
946 static ncache_t *
947 dnlc_search(vnode_t *dp, char *name, uchar_t namlen, int hash)
948 {
949 	nc_hash_t *hp;
950 	ncache_t *ncp;
951 
952 	hp = &nc_hash[hash & nc_hashmask];
953 
954 	for (ncp = hp->hash_next; ncp != (ncache_t *)hp; ncp = ncp->hash_next) {
955 		if (ncp->hash == hash &&
956 		    ncp->dp == dp &&
957 		    ncp->namlen == namlen &&
958 		    bcmp(ncp->name, name, namlen) == 0)
959 			return (ncp);
960 	}
961 	return (NULL);
962 }
963 
964 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
965 #error ncache_t name length representation is too small
966 #endif
967 
968 /*
969  * Get a new name cache entry.
970  * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
971  * is short then just return NULL. If we're over ncsize then kick off a
972  * thread to free some in use entries down to dnlc_nentries_low_water.
973  * Caller must initialise all fields except namlen.
974  * Component names are defined to be less than MAXNAMELEN
975  * which includes a null.
976  */
977 static ncache_t *
978 dnlc_get(uchar_t namlen)
979 {
980 	ncache_t *ncp;
981 
982 	if (dnlc_nentries > dnlc_max_nentries) {
983 		dnlc_max_nentries_cnt++; /* keep a statistic */
984 		return (NULL);
985 	}
986 	ncp = kmem_alloc(sizeof (ncache_t) + namlen, KM_NOSLEEP);
987 	if (ncp == NULL) {
988 		return (NULL);
989 	}
990 	ncp->namlen = namlen;
991 	atomic_add_32(&dnlc_nentries, 1);
992 	if (dnlc_reduce_idle && (dnlc_nentries >= ncsize)) {
993 		dnlc_reduce_idle = 0;
994 		(void) taskq_dispatch(system_taskq, dnlc_reduce_cache,
995 		    NULL, TQ_SLEEP);
996 	}
997 	return (ncp);
998 }
999 
1000 /*
1001  * Taskq routine to free up name cache entries to reduce the
1002  * cache size to the low water mark if "reduce_percent" is not provided.
1003  * If "reduce_percent" is provided, reduce cache size by
1004  * (ncsize_onepercent * reduce_percent).
1005  *
1006  * This routine can also be called directly by ZFS's ARC when memory is low.
1007  */
1008 /*ARGSUSED*/
1009 void
1010 dnlc_reduce_cache(void *reduce_percent)
1011 {
1012 	nc_hash_t *hp = dnlc_free_rotor;
1013 	vnode_t *vp;
1014 	ncache_t *ncp;
1015 	int cnt;
1016 	uint_t low_water = dnlc_nentries_low_water;
1017 
1018 	if (reduce_percent) {
1019 		uint_t reduce_cnt;
1020 
1021 		reduce_cnt = ncsize_onepercent *
1022 		    (uint_t)(uintptr_t)reduce_percent;
1023 		if (reduce_cnt > dnlc_nentries)
1024 			low_water = 0;
1025 		else
1026 			low_water = dnlc_nentries - reduce_cnt;
1027 	}
1028 
1029 	do {
1030 		/*
1031 		 * Find the first non empty hash queue without locking
1032 		 * Recheck we really have entries to avoid
1033 		 * an infinite loop if all the entries get purged.
1034 		 */
1035 		do {
1036 			if (++hp == &nc_hash[nc_hashsz]) {
1037 				hp = nc_hash;
1038 				if (dnlc_nentries <= low_water) {
1039 					dnlc_reduce_idle = 1;
1040 					return;
1041 				}
1042 			}
1043 		} while (hp->hash_next == (ncache_t *)hp);
1044 
1045 		mutex_enter(&hp->hash_lock);
1046 		for (cnt = 0, ncp = hp->hash_prev; ncp != (ncache_t *)hp;
1047 		    ncp = ncp->hash_prev, cnt++) {
1048 			vp = ncp->vp;
1049 			/*
1050 			 * A name cache entry with a reference count
1051 			 * of one is only referenced by the dnlc.
1052 			 * Also negative cache entries are purged first.
1053 			 */
1054 			if (!vn_has_cached_data(vp) &&
1055 			    ((vp->v_count == 1) || (vp == DNLC_NO_VNODE))) {
1056 				ncs.ncs_pick_heur.value.ui64++;
1057 				goto found;
1058 			}
1059 			/*
1060 			 * Remove from the end of the chain if the
1061 			 * chain is too long
1062 			 */
1063 			if (cnt > dnlc_long_chain) {
1064 				ncp = hp->hash_prev;
1065 				ncs.ncs_pick_last.value.ui64++;
1066 				vp = ncp->vp;
1067 				goto found;
1068 			}
1069 		}
1070 		/* check for race and continue */
1071 		if (hp->hash_next == (ncache_t *)hp) {
1072 			mutex_exit(&hp->hash_lock);
1073 			continue;
1074 		}
1075 
1076 		ncp = hp->hash_prev; /* pick the last one in the hash queue */
1077 		ncs.ncs_pick_last.value.ui64++;
1078 		vp = ncp->vp;
1079 found:
1080 		/*
1081 		 * Remove from hash chain.
1082 		 */
1083 		nc_rmhash(ncp);
1084 		mutex_exit(&hp->hash_lock);
1085 		VN_RELE(vp);
1086 		VN_RELE(ncp->dp);
1087 		dnlc_free(ncp);
1088 	} while (dnlc_nentries > low_water);
1089 
1090 	dnlc_free_rotor = hp;
1091 	dnlc_reduce_idle = 1;
1092 }
1093 
1094 /*
1095  * Directory caching routines
1096  * ==========================
1097  *
1098  * See dnlc.h for details of the interfaces below.
1099  */
1100 
1101 /*
1102  * Lookup up an entry in a complete or partial directory cache.
1103  */
1104 dcret_t
1105 dnlc_dir_lookup(dcanchor_t *dcap, char *name, uint64_t *handle)
1106 {
1107 	dircache_t *dcp;
1108 	dcentry_t *dep;
1109 	int hash;
1110 	int ret;
1111 	uchar_t namlen;
1112 
1113 	/*
1114 	 * can test without lock as we are only a cache
1115 	 */
1116 	if (!VALID_DIR_CACHE(dcap->dca_dircache)) {
1117 		ncs.ncs_dir_misses.value.ui64++;
1118 		return (DNOCACHE);
1119 	}
1120 
1121 	if (!dnlc_dir_enable) {
1122 		return (DNOCACHE);
1123 	}
1124 
1125 	mutex_enter(&dcap->dca_lock);
1126 	dcp = (dircache_t *)dcap->dca_dircache;
1127 	if (VALID_DIR_CACHE(dcp)) {
1128 		dcp->dc_actime = lbolt64;
1129 		DNLC_DIR_HASH(name, hash, namlen);
1130 		dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1131 		while (dep != NULL) {
1132 			if ((dep->de_hash == hash) &&
1133 			    (namlen == dep->de_namelen) &&
1134 			    bcmp(dep->de_name, name, namlen) == 0) {
1135 				*handle = dep->de_handle;
1136 				mutex_exit(&dcap->dca_lock);
1137 				ncs.ncs_dir_hits.value.ui64++;
1138 				return (DFOUND);
1139 			}
1140 			dep = dep->de_next;
1141 		}
1142 		if (dcp->dc_complete) {
1143 			ret = DNOENT;
1144 		} else {
1145 			ret = DNOCACHE;
1146 		}
1147 		mutex_exit(&dcap->dca_lock);
1148 		return (ret);
1149 	} else {
1150 		mutex_exit(&dcap->dca_lock);
1151 		ncs.ncs_dir_misses.value.ui64++;
1152 		return (DNOCACHE);
1153 	}
1154 }
1155 
1156 /*
1157  * Start a new directory cache. An estimate of the number of
1158  * entries is provided to as a quick check to ensure the directory
1159  * is cacheable.
1160  */
1161 dcret_t
1162 dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries)
1163 {
1164 	dircache_t *dcp;
1165 
1166 	if (!dnlc_dir_enable ||
1167 	    (num_entries < dnlc_dir_min_size)) {
1168 		return (DNOCACHE);
1169 	}
1170 
1171 	if (num_entries > dnlc_dir_max_size) {
1172 		return (DTOOBIG);
1173 	}
1174 
1175 	mutex_enter(&dc_head.dch_lock);
1176 	mutex_enter(&dcap->dca_lock);
1177 
1178 	if (dcap->dca_dircache == DC_RET_LOW_MEM) {
1179 		dcap->dca_dircache = NULL;
1180 		mutex_exit(&dcap->dca_lock);
1181 		mutex_exit(&dc_head.dch_lock);
1182 		return (DNOMEM);
1183 	}
1184 
1185 	/*
1186 	 * Check if there's currently a cache.
1187 	 * This probably only occurs on a race.
1188 	 */
1189 	if (dcap->dca_dircache != NULL) {
1190 		mutex_exit(&dcap->dca_lock);
1191 		mutex_exit(&dc_head.dch_lock);
1192 		return (DNOCACHE);
1193 	}
1194 
1195 	/*
1196 	 * Allocate the dircache struct, entry and free space hash tables.
1197 	 * These tables are initially just one entry but dynamically resize
1198 	 * when entries and free space are added or removed.
1199 	 */
1200 	if ((dcp = kmem_zalloc(sizeof (dircache_t), KM_NOSLEEP)) == NULL) {
1201 		goto error;
1202 	}
1203 	if ((dcp->dc_namehash = kmem_zalloc(sizeof (dcentry_t *),
1204 	    KM_NOSLEEP)) == NULL) {
1205 		goto error;
1206 	}
1207 	if ((dcp->dc_freehash = kmem_zalloc(sizeof (dcfree_t *),
1208 	    KM_NOSLEEP)) == NULL) {
1209 		goto error;
1210 	}
1211 
1212 	dcp->dc_anchor = dcap; /* set back pointer to anchor */
1213 	dcap->dca_dircache = dcp;
1214 
1215 	/* add into head of global chain */
1216 	dcp->dc_next = dc_head.dch_next;
1217 	dcp->dc_prev = (dircache_t *)&dc_head;
1218 	dcp->dc_next->dc_prev = dcp;
1219 	dc_head.dch_next = dcp;
1220 
1221 	mutex_exit(&dcap->dca_lock);
1222 	mutex_exit(&dc_head.dch_lock);
1223 	ncs.ncs_cur_dirs.value.ui64++;
1224 	ncs.ncs_dirs_cached.value.ui64++;
1225 	return (DOK);
1226 error:
1227 	if (dcp != NULL) {
1228 		if (dcp->dc_namehash) {
1229 			kmem_free(dcp->dc_namehash, sizeof (dcentry_t *));
1230 		}
1231 		kmem_free(dcp, sizeof (dircache_t));
1232 	}
1233 	/*
1234 	 * Must also kmem_free dcp->dc_freehash if more error cases are added
1235 	 */
1236 	mutex_exit(&dcap->dca_lock);
1237 	mutex_exit(&dc_head.dch_lock);
1238 	ncs.ncs_dir_start_nm.value.ui64++;
1239 	return (DNOCACHE);
1240 }
1241 
1242 /*
1243  * Add a directopry entry to a partial or complete directory cache.
1244  */
1245 dcret_t
1246 dnlc_dir_add_entry(dcanchor_t *dcap, char *name, uint64_t handle)
1247 {
1248 	dircache_t *dcp;
1249 	dcentry_t **hp, *dep;
1250 	int hash;
1251 	uint_t capacity;
1252 	uchar_t namlen;
1253 
1254 	/*
1255 	 * Allocate the dcentry struct, including the variable
1256 	 * size name. Note, the null terminator is not copied.
1257 	 *
1258 	 * We do this outside the lock to avoid possible deadlock if
1259 	 * dnlc_dir_reclaim() is called as a result of memory shortage.
1260 	 */
1261 	DNLC_DIR_HASH(name, hash, namlen);
1262 	dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1263 	if (dep == NULL) {
1264 #ifdef DEBUG
1265 		/*
1266 		 * The kmem allocator generates random failures for
1267 		 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1268 		 * So try again before we blow away a perfectly good cache.
1269 		 * This is done not to cover an error but purely for
1270 		 * performance running a debug kernel.
1271 		 * This random error only occurs in debug mode.
1272 		 */
1273 		dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1274 		if (dep != NULL)
1275 			goto ok;
1276 #endif
1277 		ncs.ncs_dir_add_nm.value.ui64++;
1278 		/*
1279 		 * Free a directory cache. This may be the one we are
1280 		 * called with.
1281 		 */
1282 		dnlc_dir_reclaim(NULL);
1283 		dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1284 		if (dep == NULL) {
1285 			/*
1286 			 * still no memory, better delete this cache
1287 			 */
1288 			mutex_enter(&dcap->dca_lock);
1289 			dcp = (dircache_t *)dcap->dca_dircache;
1290 			if (VALID_DIR_CACHE(dcp)) {
1291 				dnlc_dir_abort(dcp);
1292 				dcap->dca_dircache = DC_RET_LOW_MEM;
1293 			}
1294 			mutex_exit(&dcap->dca_lock);
1295 			ncs.ncs_dir_addabort.value.ui64++;
1296 			return (DNOCACHE);
1297 		}
1298 		/*
1299 		 * fall through as if the 1st kmem_alloc had worked
1300 		 */
1301 	}
1302 #ifdef DEBUG
1303 ok:
1304 #endif
1305 	mutex_enter(&dcap->dca_lock);
1306 	dcp = (dircache_t *)dcap->dca_dircache;
1307 	if (VALID_DIR_CACHE(dcp)) {
1308 		/*
1309 		 * If the total number of entries goes above the max
1310 		 * then free this cache
1311 		 */
1312 		if ((dcp->dc_num_entries + dcp->dc_num_free) >
1313 			dnlc_dir_max_size) {
1314 			mutex_exit(&dcap->dca_lock);
1315 			dnlc_dir_purge(dcap);
1316 			kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1317 			ncs.ncs_dir_add_max.value.ui64++;
1318 			return (DTOOBIG);
1319 		}
1320 		dcp->dc_num_entries++;
1321 		capacity = (dcp->dc_nhash_mask + 1) << dnlc_dir_hash_size_shift;
1322 		if (dcp->dc_num_entries >=
1323 		    (capacity << dnlc_dir_hash_resize_shift)) {
1324 			dnlc_dir_adjust_nhash(dcp);
1325 		}
1326 		hp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1327 
1328 		/*
1329 		 * Initialise and chain in new entry
1330 		 */
1331 		dep->de_handle = handle;
1332 		dep->de_hash = hash;
1333 		/*
1334 		 * Note de_namelen is a uchar_t to conserve space
1335 		 * and alignment padding. The max length of any
1336 		 * pathname component is defined as MAXNAMELEN
1337 		 * which is 256 (including the terminating null).
1338 		 * So provided this doesn't change, we don't include the null,
1339 		 * we always use bcmp to compare strings, and we don't
1340 		 * start storing full names, then we are ok.
1341 		 * The space savings is worth it.
1342 		 */
1343 		dep->de_namelen = namlen;
1344 		bcopy(name, dep->de_name, namlen);
1345 		dep->de_next = *hp;
1346 		*hp = dep;
1347 		dcp->dc_actime = lbolt64;
1348 		mutex_exit(&dcap->dca_lock);
1349 		ncs.ncs_dir_num_ents.value.ui64++;
1350 		return (DOK);
1351 	} else {
1352 		mutex_exit(&dcap->dca_lock);
1353 		kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1354 		return (DNOCACHE);
1355 	}
1356 }
1357 
1358 /*
1359  * Add free space to a partial or complete directory cache.
1360  */
1361 dcret_t
1362 dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle)
1363 {
1364 	dircache_t *dcp;
1365 	dcfree_t *dfp, **hp;
1366 	uint_t capacity;
1367 
1368 	/*
1369 	 * We kmem_alloc outside the lock to avoid possible deadlock if
1370 	 * dnlc_dir_reclaim() is called as a result of memory shortage.
1371 	 */
1372 	dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1373 	if (dfp == NULL) {
1374 #ifdef DEBUG
1375 		/*
1376 		 * The kmem allocator generates random failures for
1377 		 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1378 		 * So try again before we blow away a perfectly good cache.
1379 		 * This random error only occurs in debug mode
1380 		 */
1381 		dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1382 		if (dfp != NULL)
1383 			goto ok;
1384 #endif
1385 		ncs.ncs_dir_add_nm.value.ui64++;
1386 		/*
1387 		 * Free a directory cache. This may be the one we are
1388 		 * called with.
1389 		 */
1390 		dnlc_dir_reclaim(NULL);
1391 		dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1392 		if (dfp == NULL) {
1393 			/*
1394 			 * still no memory, better delete this cache
1395 			 */
1396 			mutex_enter(&dcap->dca_lock);
1397 			dcp = (dircache_t *)dcap->dca_dircache;
1398 			if (VALID_DIR_CACHE(dcp)) {
1399 				dnlc_dir_abort(dcp);
1400 				dcap->dca_dircache = DC_RET_LOW_MEM;
1401 			}
1402 			mutex_exit(&dcap->dca_lock);
1403 			ncs.ncs_dir_addabort.value.ui64++;
1404 			return (DNOCACHE);
1405 		}
1406 		/*
1407 		 * fall through as if the 1st kmem_alloc had worked
1408 		 */
1409 	}
1410 
1411 #ifdef DEBUG
1412 ok:
1413 #endif
1414 	mutex_enter(&dcap->dca_lock);
1415 	dcp = (dircache_t *)dcap->dca_dircache;
1416 	if (VALID_DIR_CACHE(dcp)) {
1417 		if ((dcp->dc_num_entries + dcp->dc_num_free) >
1418 			dnlc_dir_max_size) {
1419 			mutex_exit(&dcap->dca_lock);
1420 			dnlc_dir_purge(dcap);
1421 			kmem_cache_free(dnlc_dir_space_cache, dfp);
1422 			ncs.ncs_dir_add_max.value.ui64++;
1423 			return (DTOOBIG);
1424 		}
1425 		dcp->dc_num_free++;
1426 		capacity = (dcp->dc_fhash_mask + 1) << dnlc_dir_hash_size_shift;
1427 		if (dcp->dc_num_free >=
1428 		    (capacity << dnlc_dir_hash_resize_shift)) {
1429 			dnlc_dir_adjust_fhash(dcp);
1430 		}
1431 		/*
1432 		 * Initialise and chain a new entry
1433 		 */
1434 		dfp->df_handle = handle;
1435 		dfp->df_len = len;
1436 		dcp->dc_actime = lbolt64;
1437 		hp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1438 		dfp->df_next = *hp;
1439 		*hp = dfp;
1440 		mutex_exit(&dcap->dca_lock);
1441 		ncs.ncs_dir_num_ents.value.ui64++;
1442 		return (DOK);
1443 	} else {
1444 		mutex_exit(&dcap->dca_lock);
1445 		kmem_cache_free(dnlc_dir_space_cache, dfp);
1446 		return (DNOCACHE);
1447 	}
1448 }
1449 
1450 /*
1451  * Mark a directory cache as complete.
1452  */
1453 void
1454 dnlc_dir_complete(dcanchor_t *dcap)
1455 {
1456 	dircache_t *dcp;
1457 
1458 	mutex_enter(&dcap->dca_lock);
1459 	dcp = (dircache_t *)dcap->dca_dircache;
1460 	if (VALID_DIR_CACHE(dcp)) {
1461 		dcp->dc_complete = B_TRUE;
1462 	}
1463 	mutex_exit(&dcap->dca_lock);
1464 }
1465 
1466 /*
1467  * Internal routine to delete a partial or full directory cache.
1468  * No additional locking needed.
1469  */
1470 static void
1471 dnlc_dir_abort(dircache_t *dcp)
1472 {
1473 	dcentry_t *dep, *nhp;
1474 	dcfree_t *fep, *fhp;
1475 	uint_t nhtsize = dcp->dc_nhash_mask + 1; /* name hash table size */
1476 	uint_t fhtsize = dcp->dc_fhash_mask + 1; /* free hash table size */
1477 	uint_t i;
1478 
1479 	/*
1480 	 * Free up the cached name entries and hash table
1481 	 */
1482 	for (i = 0; i < nhtsize; i++) { /* for each hash bucket */
1483 		nhp = dcp->dc_namehash[i];
1484 		while (nhp != NULL) { /* for each chained entry */
1485 			dep = nhp->de_next;
1486 			kmem_free(nhp, sizeof (dcentry_t) - 1 +
1487 			    nhp->de_namelen);
1488 			nhp = dep;
1489 		}
1490 	}
1491 	kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * nhtsize);
1492 
1493 	/*
1494 	 * Free up the free space entries and hash table
1495 	 */
1496 	for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1497 		fhp = dcp->dc_freehash[i];
1498 		while (fhp != NULL) { /* for each chained entry */
1499 			fep = fhp->df_next;
1500 			kmem_cache_free(dnlc_dir_space_cache, fhp);
1501 			fhp = fep;
1502 		}
1503 	}
1504 	kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * fhtsize);
1505 
1506 	/*
1507 	 * Finally free the directory cache structure itself
1508 	 */
1509 	ncs.ncs_dir_num_ents.value.ui64 -= (dcp->dc_num_entries +
1510 	    dcp->dc_num_free);
1511 	kmem_free(dcp, sizeof (dircache_t));
1512 	ncs.ncs_cur_dirs.value.ui64--;
1513 }
1514 
1515 /*
1516  * Remove a partial or complete directory cache
1517  */
1518 void
1519 dnlc_dir_purge(dcanchor_t *dcap)
1520 {
1521 	dircache_t *dcp;
1522 
1523 	mutex_enter(&dc_head.dch_lock);
1524 	mutex_enter(&dcap->dca_lock);
1525 	dcp = (dircache_t *)dcap->dca_dircache;
1526 	if (!VALID_DIR_CACHE(dcp)) {
1527 		mutex_exit(&dcap->dca_lock);
1528 		mutex_exit(&dc_head.dch_lock);
1529 		return;
1530 	}
1531 	dcap->dca_dircache = NULL;
1532 	/*
1533 	 * Unchain from global list
1534 	 */
1535 	dcp->dc_prev->dc_next = dcp->dc_next;
1536 	dcp->dc_next->dc_prev = dcp->dc_prev;
1537 	mutex_exit(&dcap->dca_lock);
1538 	mutex_exit(&dc_head.dch_lock);
1539 	dnlc_dir_abort(dcp);
1540 }
1541 
1542 /*
1543  * Remove an entry from a complete or partial directory cache.
1544  * Return the handle if it's non null.
1545  */
1546 dcret_t
1547 dnlc_dir_rem_entry(dcanchor_t *dcap, char *name, uint64_t *handlep)
1548 {
1549 	dircache_t *dcp;
1550 	dcentry_t **prevpp, *te;
1551 	uint_t capacity;
1552 	int hash;
1553 	int ret;
1554 	uchar_t namlen;
1555 
1556 	if (!dnlc_dir_enable) {
1557 		return (DNOCACHE);
1558 	}
1559 
1560 	mutex_enter(&dcap->dca_lock);
1561 	dcp = (dircache_t *)dcap->dca_dircache;
1562 	if (VALID_DIR_CACHE(dcp)) {
1563 		dcp->dc_actime = lbolt64;
1564 		if (dcp->dc_nhash_mask > 0) { /* ie not minimum */
1565 			capacity = (dcp->dc_nhash_mask + 1) <<
1566 			    dnlc_dir_hash_size_shift;
1567 			if (dcp->dc_num_entries <=
1568 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1569 				dnlc_dir_adjust_nhash(dcp);
1570 			}
1571 		}
1572 		DNLC_DIR_HASH(name, hash, namlen);
1573 		prevpp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1574 		while (*prevpp != NULL) {
1575 			if (((*prevpp)->de_hash == hash) &&
1576 			    (namlen == (*prevpp)->de_namelen) &&
1577 			    bcmp((*prevpp)->de_name, name, namlen) == 0) {
1578 				if (handlep != NULL) {
1579 					*handlep = (*prevpp)->de_handle;
1580 				}
1581 				te = *prevpp;
1582 				*prevpp = (*prevpp)->de_next;
1583 				kmem_free(te, sizeof (dcentry_t) - 1 +
1584 				    te->de_namelen);
1585 
1586 				/*
1587 				 * If the total number of entries
1588 				 * falls below half the minimum number
1589 				 * of entries then free this cache.
1590 				 */
1591 				if (--dcp->dc_num_entries <
1592 				    (dnlc_dir_min_size >> 1)) {
1593 					mutex_exit(&dcap->dca_lock);
1594 					dnlc_dir_purge(dcap);
1595 				} else {
1596 					mutex_exit(&dcap->dca_lock);
1597 				}
1598 				ncs.ncs_dir_num_ents.value.ui64--;
1599 				return (DFOUND);
1600 			}
1601 			prevpp = &((*prevpp)->de_next);
1602 		}
1603 		if (dcp->dc_complete) {
1604 			ncs.ncs_dir_reme_fai.value.ui64++;
1605 			ret = DNOENT;
1606 		} else {
1607 			ret = DNOCACHE;
1608 		}
1609 		mutex_exit(&dcap->dca_lock);
1610 		return (ret);
1611 	} else {
1612 		mutex_exit(&dcap->dca_lock);
1613 		return (DNOCACHE);
1614 	}
1615 }
1616 
1617 
1618 /*
1619  * Remove free space of at least the given length from a complete
1620  * or partial directory cache.
1621  */
1622 dcret_t
1623 dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len, uint64_t *handlep)
1624 {
1625 	dircache_t *dcp;
1626 	dcfree_t **prevpp, *tfp;
1627 	uint_t fhtsize; /* free hash table size */
1628 	uint_t i;
1629 	uint_t capacity;
1630 	int ret;
1631 
1632 	if (!dnlc_dir_enable) {
1633 		return (DNOCACHE);
1634 	}
1635 
1636 	mutex_enter(&dcap->dca_lock);
1637 	dcp = (dircache_t *)dcap->dca_dircache;
1638 	if (VALID_DIR_CACHE(dcp)) {
1639 		dcp->dc_actime = lbolt64;
1640 		if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1641 			capacity = (dcp->dc_fhash_mask + 1) <<
1642 			    dnlc_dir_hash_size_shift;
1643 			if (dcp->dc_num_free <=
1644 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1645 				dnlc_dir_adjust_fhash(dcp);
1646 			}
1647 		}
1648 		/*
1649 		 * Search for an entry of the appropriate size
1650 		 * on a first fit basis.
1651 		 */
1652 		fhtsize = dcp->dc_fhash_mask + 1;
1653 		for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1654 			prevpp = &(dcp->dc_freehash[i]);
1655 			while (*prevpp != NULL) {
1656 				if ((*prevpp)->df_len >= len) {
1657 					*handlep = (*prevpp)->df_handle;
1658 					tfp = *prevpp;
1659 					*prevpp = (*prevpp)->df_next;
1660 					dcp->dc_num_free--;
1661 					mutex_exit(&dcap->dca_lock);
1662 					kmem_cache_free(dnlc_dir_space_cache,
1663 					    tfp);
1664 					ncs.ncs_dir_num_ents.value.ui64--;
1665 					return (DFOUND);
1666 				}
1667 				prevpp = &((*prevpp)->df_next);
1668 			}
1669 		}
1670 		if (dcp->dc_complete) {
1671 			ret = DNOENT;
1672 		} else {
1673 			ret = DNOCACHE;
1674 		}
1675 		mutex_exit(&dcap->dca_lock);
1676 		return (ret);
1677 	} else {
1678 		mutex_exit(&dcap->dca_lock);
1679 		return (DNOCACHE);
1680 	}
1681 }
1682 
1683 /*
1684  * Remove free space with the given handle from a complete or partial
1685  * directory cache.
1686  */
1687 dcret_t
1688 dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle)
1689 {
1690 	dircache_t *dcp;
1691 	dcfree_t **prevpp, *tfp;
1692 	uint_t capacity;
1693 	int ret;
1694 
1695 	if (!dnlc_dir_enable) {
1696 		return (DNOCACHE);
1697 	}
1698 
1699 	mutex_enter(&dcap->dca_lock);
1700 	dcp = (dircache_t *)dcap->dca_dircache;
1701 	if (VALID_DIR_CACHE(dcp)) {
1702 		dcp->dc_actime = lbolt64;
1703 		if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1704 			capacity = (dcp->dc_fhash_mask + 1) <<
1705 			    dnlc_dir_hash_size_shift;
1706 			if (dcp->dc_num_free <=
1707 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1708 				dnlc_dir_adjust_fhash(dcp);
1709 			}
1710 		}
1711 
1712 		/*
1713 		 * search for the exact entry
1714 		 */
1715 		prevpp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1716 		while (*prevpp != NULL) {
1717 			if ((*prevpp)->df_handle == handle) {
1718 				tfp = *prevpp;
1719 				*prevpp = (*prevpp)->df_next;
1720 				dcp->dc_num_free--;
1721 				mutex_exit(&dcap->dca_lock);
1722 				kmem_cache_free(dnlc_dir_space_cache, tfp);
1723 				ncs.ncs_dir_num_ents.value.ui64--;
1724 				return (DFOUND);
1725 			}
1726 			prevpp = &((*prevpp)->df_next);
1727 		}
1728 		if (dcp->dc_complete) {
1729 			ncs.ncs_dir_rems_fai.value.ui64++;
1730 			ret = DNOENT;
1731 		} else {
1732 			ret = DNOCACHE;
1733 		}
1734 		mutex_exit(&dcap->dca_lock);
1735 		return (ret);
1736 	} else {
1737 		mutex_exit(&dcap->dca_lock);
1738 		return (DNOCACHE);
1739 	}
1740 }
1741 
1742 /*
1743  * Update the handle of an directory cache entry.
1744  */
1745 dcret_t
1746 dnlc_dir_update(dcanchor_t *dcap, char *name, uint64_t handle)
1747 {
1748 	dircache_t *dcp;
1749 	dcentry_t *dep;
1750 	int hash;
1751 	int ret;
1752 	uchar_t namlen;
1753 
1754 	if (!dnlc_dir_enable) {
1755 		return (DNOCACHE);
1756 	}
1757 
1758 	mutex_enter(&dcap->dca_lock);
1759 	dcp = (dircache_t *)dcap->dca_dircache;
1760 	if (VALID_DIR_CACHE(dcp)) {
1761 		dcp->dc_actime = lbolt64;
1762 		DNLC_DIR_HASH(name, hash, namlen);
1763 		dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1764 		while (dep != NULL) {
1765 			if ((dep->de_hash == hash) &&
1766 			    (namlen == dep->de_namelen) &&
1767 			    bcmp(dep->de_name, name, namlen) == 0) {
1768 				dep->de_handle = handle;
1769 				mutex_exit(&dcap->dca_lock);
1770 				return (DFOUND);
1771 			}
1772 			dep = dep->de_next;
1773 		}
1774 		if (dcp->dc_complete) {
1775 			ncs.ncs_dir_upd_fail.value.ui64++;
1776 			ret = DNOENT;
1777 		} else {
1778 			ret = DNOCACHE;
1779 		}
1780 		mutex_exit(&dcap->dca_lock);
1781 		return (ret);
1782 	} else {
1783 		mutex_exit(&dcap->dca_lock);
1784 		return (DNOCACHE);
1785 	}
1786 }
1787 
1788 void
1789 dnlc_dir_fini(dcanchor_t *dcap)
1790 {
1791 	dircache_t *dcp;
1792 
1793 	mutex_enter(&dc_head.dch_lock);
1794 	mutex_enter(&dcap->dca_lock);
1795 	dcp = (dircache_t *)dcap->dca_dircache;
1796 	if (VALID_DIR_CACHE(dcp)) {
1797 		/*
1798 		 * Unchain from global list
1799 		 */
1800 		ncs.ncs_dir_finipurg.value.ui64++;
1801 		dcp->dc_prev->dc_next = dcp->dc_next;
1802 		dcp->dc_next->dc_prev = dcp->dc_prev;
1803 	} else {
1804 		dcp = NULL;
1805 	}
1806 	dcap->dca_dircache = NULL;
1807 	mutex_exit(&dcap->dca_lock);
1808 	mutex_exit(&dc_head.dch_lock);
1809 	mutex_destroy(&dcap->dca_lock);
1810 	if (dcp) {
1811 		dnlc_dir_abort(dcp);
1812 	}
1813 }
1814 
1815 /*
1816  * Reclaim callback for dnlc directory caching.
1817  * Invoked by the kernel memory allocator when memory gets tight.
1818  * This is a pretty serious condition and can lead easily lead to system
1819  * hangs if not enough space is returned.
1820  *
1821  * Deciding which directory (or directories) to purge is tricky.
1822  * Purging everything is an overkill, but purging just the oldest used
1823  * was found to lead to hangs. The largest cached directories use the
1824  * most memory, but take the most effort to rebuild, whereas the smaller
1825  * ones have little value and give back little space. So what to do?
1826  *
1827  * The current policy is to continue purging the oldest used directories
1828  * until at least dnlc_dir_min_reclaim directory entries have been purged.
1829  */
1830 /*ARGSUSED*/
1831 static void
1832 dnlc_dir_reclaim(void *unused)
1833 {
1834 	dircache_t *dcp, *oldest;
1835 	uint_t dirent_cnt = 0;
1836 
1837 	mutex_enter(&dc_head.dch_lock);
1838 	while (dirent_cnt < dnlc_dir_min_reclaim) {
1839 		dcp = dc_head.dch_next;
1840 		oldest = NULL;
1841 		while (dcp != (dircache_t *)&dc_head) {
1842 			if (oldest == NULL) {
1843 				oldest = dcp;
1844 			} else {
1845 				if (dcp->dc_actime < oldest->dc_actime) {
1846 					oldest = dcp;
1847 				}
1848 			}
1849 			dcp = dcp->dc_next;
1850 		}
1851 		if (oldest == NULL) {
1852 			/* nothing to delete */
1853 			mutex_exit(&dc_head.dch_lock);
1854 			return;
1855 		}
1856 		/*
1857 		 * remove from directory chain and purge
1858 		 */
1859 		oldest->dc_prev->dc_next = oldest->dc_next;
1860 		oldest->dc_next->dc_prev = oldest->dc_prev;
1861 		mutex_enter(&oldest->dc_anchor->dca_lock);
1862 		/*
1863 		 * If this was the last entry then it must be too large.
1864 		 * Mark it as such by saving a special dircache_t
1865 		 * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1866 		 * will be presented to the caller of dnlc_dir_start()
1867 		 */
1868 		if (oldest->dc_next == oldest->dc_prev) {
1869 			oldest->dc_anchor->dca_dircache = DC_RET_LOW_MEM;
1870 			ncs.ncs_dir_rec_last.value.ui64++;
1871 		} else {
1872 			oldest->dc_anchor->dca_dircache = NULL;
1873 			ncs.ncs_dir_recl_any.value.ui64++;
1874 		}
1875 		mutex_exit(&oldest->dc_anchor->dca_lock);
1876 		dirent_cnt += oldest->dc_num_entries;
1877 		dnlc_dir_abort(oldest);
1878 	}
1879 	mutex_exit(&dc_head.dch_lock);
1880 }
1881 
1882 /*
1883  * Dynamically grow or shrink the size of the name hash table
1884  */
1885 static void
1886 dnlc_dir_adjust_nhash(dircache_t *dcp)
1887 {
1888 	dcentry_t **newhash, *dep, **nhp, *tep;
1889 	uint_t newsize;
1890 	uint_t oldsize;
1891 	uint_t newsizemask;
1892 	int i;
1893 
1894 	/*
1895 	 * Allocate new hash table
1896 	 */
1897 	newsize = dcp->dc_num_entries >> dnlc_dir_hash_size_shift;
1898 	newhash = kmem_zalloc(sizeof (dcentry_t *) * newsize, KM_NOSLEEP);
1899 	if (newhash == NULL) {
1900 		/*
1901 		 * System is short on memory just return
1902 		 * Note, the old hash table is still usable.
1903 		 * This return is unlikely to repeatedy occur, because
1904 		 * either some other directory caches will be reclaimed
1905 		 * due to memory shortage, thus freeing memory, or this
1906 		 * directory cahe will be reclaimed.
1907 		 */
1908 		return;
1909 	}
1910 	oldsize = dcp->dc_nhash_mask + 1;
1911 	dcp->dc_nhash_mask = newsizemask = newsize - 1;
1912 
1913 	/*
1914 	 * Move entries from the old table to the new
1915 	 */
1916 	for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1917 		dep = dcp->dc_namehash[i];
1918 		while (dep != NULL) { /* for each chained entry */
1919 			tep = dep;
1920 			dep = dep->de_next;
1921 			nhp = &newhash[tep->de_hash & newsizemask];
1922 			tep->de_next = *nhp;
1923 			*nhp = tep;
1924 		}
1925 	}
1926 
1927 	/*
1928 	 * delete old hash table and set new one in place
1929 	 */
1930 	kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * oldsize);
1931 	dcp->dc_namehash = newhash;
1932 }
1933 
1934 /*
1935  * Dynamically grow or shrink the size of the free space hash table
1936  */
1937 static void
1938 dnlc_dir_adjust_fhash(dircache_t *dcp)
1939 {
1940 	dcfree_t **newhash, *dfp, **nhp, *tfp;
1941 	uint_t newsize;
1942 	uint_t oldsize;
1943 	int i;
1944 
1945 	/*
1946 	 * Allocate new hash table
1947 	 */
1948 	newsize = dcp->dc_num_free >> dnlc_dir_hash_size_shift;
1949 	newhash = kmem_zalloc(sizeof (dcfree_t *) * newsize, KM_NOSLEEP);
1950 	if (newhash == NULL) {
1951 		/*
1952 		 * System is short on memory just return
1953 		 * Note, the old hash table is still usable.
1954 		 * This return is unlikely to repeatedy occur, because
1955 		 * either some other directory caches will be reclaimed
1956 		 * due to memory shortage, thus freeing memory, or this
1957 		 * directory cahe will be reclaimed.
1958 		 */
1959 		return;
1960 	}
1961 	oldsize = dcp->dc_fhash_mask + 1;
1962 	dcp->dc_fhash_mask = newsize - 1;
1963 
1964 	/*
1965 	 * Move entries from the old table to the new
1966 	 */
1967 	for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1968 		dfp = dcp->dc_freehash[i];
1969 		while (dfp != NULL) { /* for each chained entry */
1970 			tfp = dfp;
1971 			dfp = dfp->df_next;
1972 			nhp = &newhash[DDFHASH(tfp->df_handle, dcp)];
1973 			tfp->df_next = *nhp;
1974 			*nhp = tfp;
1975 		}
1976 	}
1977 
1978 	/*
1979 	 * delete old hash table and set new one in place
1980 	 */
1981 	kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * oldsize);
1982 	dcp->dc_freehash = newhash;
1983 }
1984