xref: /titanic_51/usr/src/uts/common/fs/dnlc.c (revision 441d80aa4f613b6298fc8bd3151f4be02dbf84fc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 
39 #pragma ident	"%Z%%M%	%I%	%E% SMI"
40 
41 #include <sys/types.h>
42 #include <sys/systm.h>
43 #include <sys/param.h>
44 #include <sys/t_lock.h>
45 #include <sys/systm.h>
46 #include <sys/vfs.h>
47 #include <sys/vnode.h>
48 #include <sys/dnlc.h>
49 #include <sys/kmem.h>
50 #include <sys/cmn_err.h>
51 #include <sys/vtrace.h>
52 #include <sys/bitmap.h>
53 #include <sys/var.h>
54 #include <sys/sysmacros.h>
55 #include <sys/kstat.h>
56 #include <sys/atomic.h>
57 #include <sys/taskq.h>
58 
59 /*
60  * Directory name lookup cache.
61  * Based on code originally done by Robert Elz at Melbourne.
62  *
63  * Names found by directory scans are retained in a cache
64  * for future reference.  Each hash chain is ordered by LRU
65  * Cache is indexed by hash value obtained from (vp, name)
66  * where the vp refers to the directory containing the name.
67  */
68 
69 /*
70  * Tunable nc_hashavelen is the average length desired for this chain, from
71  * which the size of the nc_hash table is derived at create time.
72  */
73 #define	NC_HASHAVELEN_DEFAULT	4
74 int nc_hashavelen = NC_HASHAVELEN_DEFAULT;
75 
76 /*
77  * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
78  * depth exceeds this value, we move the looked-up entry to the front of
79  * its hash chain.  The idea is to make sure that the most frequently
80  * accessed entries are found most quickly (by keeping them near the
81  * front of their hash chains).
82  */
83 #define	NC_MOVETOFRONT	2
84 
85 /*
86  *
87  * DNLC_MAX_RELE is used to size an array on the stack when releasing
88  * vnodes. This array is used rather than calling VN_RELE() inline because
89  * all dnlc locks must be dropped by that time in order to avoid a
90  * possible deadlock. This deadlock occurs when the dnlc holds the last
91  * reference to the vnode and so the VOP_INACTIVE vector is called which
92  * can in turn call back into the dnlc. A global array was used but had
93  * many problems:
94  *	1) Actually doesn't have an upper bound on the array size as
95  *	   entries can be added after starting the purge.
96  *	2) The locking scheme causes a hang.
97  *	3) Caused serialisation on the global lock.
98  *	4) The array was often unnecessarily huge.
99  *
100  * Note the current value 8 allows up to 4 cache entries (to be purged
101  * from each hash chain), before having to cycle around and retry.
102  * This ought to be ample given that nc_hashavelen is typically very small.
103  */
104 #define	DNLC_MAX_RELE	8 /* must be even */
105 
106 /*
107  * Hash table of name cache entries for fast lookup, dynamically
108  * allocated at startup.
109  */
110 nc_hash_t *nc_hash;
111 
112 /*
113  * Rotors. Used to select entries on a round-robin basis.
114  */
115 static nc_hash_t *dnlc_purge_fs1_rotor;
116 static nc_hash_t *dnlc_free_rotor;
117 
118 /*
119  * # of dnlc entries (uninitialized)
120  *
121  * the initial value was chosen as being
122  * a random string of bits, probably not
123  * normally chosen by a systems administrator
124  */
125 int ncsize = -1;
126 uint32_t dnlc_nentries = 0;	/* current number of name cache entries */
127 static int nc_hashsz;		/* size of hash table */
128 static int nc_hashmask;		/* size of hash table minus 1 */
129 
130 /*
131  * The dnlc_reduce_cache() taskq queue is activated when there are
132  * ncsize name cache entries and if no parameter is provided, it reduces
133  * the size down to dnlc_nentries_low_water, which is by default one
134  * hundreth less (or 99%) of ncsize.
135  *
136  * If a parameter is provided to dnlc_reduce_cache(), then we reduce
137  * the size down based on ncsize_onepercent - where ncsize_onepercent
138  * is 1% of ncsize.
139  */
140 #define	DNLC_LOW_WATER_DIVISOR_DEFAULT 100
141 uint_t dnlc_low_water_divisor = DNLC_LOW_WATER_DIVISOR_DEFAULT;
142 uint_t dnlc_nentries_low_water;
143 int dnlc_reduce_idle = 1; /* no locking needed */
144 uint_t ncsize_onepercent;
145 
146 /*
147  * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
148  * then this means the dnlc_reduce_cache() taskq is failing to
149  * keep up. In this case we refuse to add new entries to the dnlc
150  * until the taskq catches up.
151  */
152 uint_t dnlc_max_nentries; /* twice ncsize */
153 uint64_t dnlc_max_nentries_cnt = 0; /* statistic on times we failed */
154 
155 /*
156  * Tunable to define when we should just remove items from
157  * the end of the chain.
158  */
159 #define	DNLC_LONG_CHAIN 8
160 uint_t dnlc_long_chain = DNLC_LONG_CHAIN;
161 
162 /*
163  * ncstats has been deprecated, due to the integer size of the counters
164  * which can easily overflow in the dnlc.
165  * It is maintained (at some expense) for compatability.
166  * The preferred interface is the kstat accessible nc_stats below.
167  */
168 struct ncstats ncstats;
169 
170 struct nc_stats ncs = {
171 	{ "hits",			KSTAT_DATA_UINT64 },
172 	{ "misses",			KSTAT_DATA_UINT64 },
173 	{ "negative_cache_hits",	KSTAT_DATA_UINT64 },
174 	{ "enters",			KSTAT_DATA_UINT64 },
175 	{ "double_enters",		KSTAT_DATA_UINT64 },
176 	{ "purge_total_entries",	KSTAT_DATA_UINT64 },
177 	{ "purge_all",			KSTAT_DATA_UINT64 },
178 	{ "purge_vp",			KSTAT_DATA_UINT64 },
179 	{ "purge_vfs",			KSTAT_DATA_UINT64 },
180 	{ "purge_fs1",			KSTAT_DATA_UINT64 },
181 	{ "pick_free",			KSTAT_DATA_UINT64 },
182 	{ "pick_heuristic",		KSTAT_DATA_UINT64 },
183 	{ "pick_last",			KSTAT_DATA_UINT64 },
184 
185 	/* directory caching stats */
186 
187 	{ "dir_hits",			KSTAT_DATA_UINT64 },
188 	{ "dir_misses",			KSTAT_DATA_UINT64 },
189 	{ "dir_cached_current",		KSTAT_DATA_UINT64 },
190 	{ "dir_entries_cached_current",	KSTAT_DATA_UINT64 },
191 	{ "dir_cached_total",		KSTAT_DATA_UINT64 },
192 	{ "dir_start_no_memory",	KSTAT_DATA_UINT64 },
193 	{ "dir_add_no_memory",		KSTAT_DATA_UINT64 },
194 	{ "dir_add_abort",		KSTAT_DATA_UINT64 },
195 	{ "dir_add_max",		KSTAT_DATA_UINT64 },
196 	{ "dir_remove_entry_fail",	KSTAT_DATA_UINT64 },
197 	{ "dir_remove_space_fail",	KSTAT_DATA_UINT64 },
198 	{ "dir_update_fail",		KSTAT_DATA_UINT64 },
199 	{ "dir_fini_purge",		KSTAT_DATA_UINT64 },
200 	{ "dir_reclaim_last",		KSTAT_DATA_UINT64 },
201 	{ "dir_reclaim_any",		KSTAT_DATA_UINT64 },
202 };
203 
204 static int doingcache = 1;
205 
206 vnode_t negative_cache_vnode;
207 
208 /*
209  * Insert entry at the front of the queue
210  */
211 #define	nc_inshash(ncp, hp) \
212 { \
213 	(ncp)->hash_next = (hp)->hash_next; \
214 	(ncp)->hash_prev = (ncache_t *)(hp); \
215 	(hp)->hash_next->hash_prev = (ncp); \
216 	(hp)->hash_next = (ncp); \
217 }
218 
219 /*
220  * Remove entry from hash queue
221  */
222 #define	nc_rmhash(ncp) \
223 { \
224 	(ncp)->hash_prev->hash_next = (ncp)->hash_next; \
225 	(ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
226 	(ncp)->hash_prev = NULL; \
227 	(ncp)->hash_next = NULL; \
228 }
229 
230 /*
231  * Free an entry.
232  */
233 #define	dnlc_free(ncp) \
234 { \
235 	kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \
236 	atomic_add_32(&dnlc_nentries, -1); \
237 }
238 
239 
240 /*
241  * Cached directory info.
242  * ======================
243  */
244 
245 /*
246  * Cached directory free space hash function.
247  * Needs the free space handle and the dcp to get the hash table size
248  * Returns the hash index.
249  */
250 #define	DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
251 
252 /*
253  * Cached directory name entry hash function.
254  * Uses the name and returns in the input arguments the hash and the name
255  * length.
256  */
257 #define	DNLC_DIR_HASH(name, hash, namelen)			\
258 	{							\
259 		char Xc, *Xcp;					\
260 		hash = *name;					\
261 		for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++)	\
262 			hash = (hash << 4) + hash + Xc;		\
263 		ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1));	\
264 		namelen = Xcp - (name);				\
265 	}
266 
267 /* special dircache_t pointer to indicate error should be returned */
268 /*
269  * The anchor directory cache pointer can contain 3 types of values,
270  * 1) NULL: No directory cache
271  * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
272  *    too big or a memory shortage occurred. This value remains in the
273  *    pointer until a dnlc_dir_start() which returns the a DNOMEM error.
274  *    This is kludgy but efficient and only visible in this source file.
275  * 3) A valid cache pointer.
276  */
277 #define	DC_RET_LOW_MEM (dircache_t *)1
278 #define	VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
279 
280 /* Tunables */
281 uint_t dnlc_dir_enable = 1; /* disable caching directories by setting to 0 */
282 uint_t dnlc_dir_min_size = 40; /* min no of directory entries before caching */
283 uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */
284 uint_t dnlc_dir_hash_size_shift = 3; /* 8 entries per hash bucket */
285 uint_t dnlc_dir_min_reclaim =  350000; /* approx 1MB of dcentrys */
286 /*
287  * dnlc_dir_hash_resize_shift determines when the hash tables
288  * get re-adjusted due to growth or shrinkage
289  * - currently 2 indicating that there can be at most 4
290  * times or at least one quarter the number of entries
291  * before hash table readjustment. Note that with
292  * dnlc_dir_hash_size_shift above set at 3 this would
293  * mean readjustment would occur if the average number
294  * of entries went above 32 or below 2
295  */
296 uint_t dnlc_dir_hash_resize_shift = 2; /* readjust rate */
297 
298 static kmem_cache_t *dnlc_dir_space_cache; /* free space entry cache */
299 static dchead_t dc_head; /* anchor of cached directories */
300 
301 /* Prototypes */
302 static ncache_t *dnlc_get(uchar_t namlen);
303 static ncache_t *dnlc_search(vnode_t *dp, char *name, uchar_t namlen, int hash);
304 static void dnlc_dir_reclaim(void *unused);
305 static void dnlc_dir_abort(dircache_t *dcp);
306 static void dnlc_dir_adjust_fhash(dircache_t *dcp);
307 static void dnlc_dir_adjust_nhash(dircache_t *dcp);
308 
309 
310 /*
311  * Initialize the directory cache.
312  */
313 void
314 dnlc_init()
315 {
316 	nc_hash_t *hp;
317 	kstat_t *ksp;
318 	int i;
319 
320 	/*
321 	 * Set up the size of the dnlc (ncsize) and its low water mark.
322 	 */
323 	if (ncsize == -1) {
324 		/* calculate a reasonable size for the low water */
325 		dnlc_nentries_low_water = 4 * (v.v_proc + maxusers) + 320;
326 		ncsize = dnlc_nentries_low_water +
327 		    (dnlc_nentries_low_water / dnlc_low_water_divisor);
328 	} else {
329 		/* don't change the user specified ncsize */
330 		dnlc_nentries_low_water =
331 		    ncsize - (ncsize / dnlc_low_water_divisor);
332 	}
333 	if (ncsize <= 0) {
334 		doingcache = 0;
335 		dnlc_dir_enable = 0; /* also disable directory caching */
336 		ncsize = 0;
337 		cmn_err(CE_NOTE, "name cache (dnlc) disabled");
338 		return;
339 	}
340 	dnlc_max_nentries = ncsize * 2;
341 	ncsize_onepercent = ncsize / 100;
342 
343 	/*
344 	 * Initialise the hash table.
345 	 * Compute hash size rounding to the next power of two.
346 	 */
347 	nc_hashsz = ncsize / nc_hashavelen;
348 	nc_hashsz = 1 << highbit(nc_hashsz);
349 	nc_hashmask = nc_hashsz - 1;
350 	nc_hash = kmem_zalloc(nc_hashsz * sizeof (*nc_hash), KM_SLEEP);
351 	for (i = 0; i < nc_hashsz; i++) {
352 		hp = (nc_hash_t *)&nc_hash[i];
353 		mutex_init(&hp->hash_lock, NULL, MUTEX_DEFAULT, NULL);
354 		hp->hash_next = (ncache_t *)hp;
355 		hp->hash_prev = (ncache_t *)hp;
356 	}
357 
358 	/*
359 	 * Initialize rotors
360 	 */
361 	dnlc_free_rotor = dnlc_purge_fs1_rotor = &nc_hash[0];
362 
363 	/*
364 	 * Set up the directory caching to use kmem_cache_alloc
365 	 * for its free space entries so that we can get a callback
366 	 * when the system is short on memory, to allow us to free
367 	 * up some memory. we don't use the constructor/deconstructor
368 	 * functions.
369 	 */
370 	dnlc_dir_space_cache = kmem_cache_create("dnlc_space_cache",
371 	    sizeof (dcfree_t), 0, NULL, NULL, dnlc_dir_reclaim, NULL,
372 	    NULL, 0);
373 
374 	/*
375 	 * Initialise the head of the cached directory structures
376 	 */
377 	mutex_init(&dc_head.dch_lock, NULL, MUTEX_DEFAULT, NULL);
378 	dc_head.dch_next = (dircache_t *)&dc_head;
379 	dc_head.dch_prev = (dircache_t *)&dc_head;
380 
381 	/*
382 	 * Initialise the reference count of the negative cache vnode to 1
383 	 * so that it never goes away (VOP_INACTIVE isn't called on it).
384 	 */
385 	negative_cache_vnode.v_count = 1;
386 
387 	/*
388 	 * Initialise kstats - both the old compatability raw kind and
389 	 * the more extensive named stats.
390 	 */
391 	ksp = kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW,
392 		sizeof (struct ncstats), KSTAT_FLAG_VIRTUAL);
393 	if (ksp) {
394 		ksp->ks_data = (void *) &ncstats;
395 		kstat_install(ksp);
396 	}
397 	ksp = kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED,
398 	    sizeof (ncs) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
399 	if (ksp) {
400 		ksp->ks_data = (void *) &ncs;
401 		kstat_install(ksp);
402 	}
403 }
404 
405 /*
406  * Add a name to the directory cache.
407  */
408 void
409 dnlc_enter(vnode_t *dp, char *name, vnode_t *vp)
410 {
411 	ncache_t *ncp;
412 	nc_hash_t *hp;
413 	uchar_t namlen;
414 	int hash;
415 
416 	TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_enter_start:");
417 
418 	if (!doingcache) {
419 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
420 		    "dnlc_enter_end:(%S) %d", "not caching", 0);
421 		return;
422 	}
423 
424 	/*
425 	 * Get a new dnlc entry. Assume the entry won't be in the cache
426 	 * and initialize it now
427 	 */
428 	DNLCHASH(name, dp, hash, namlen);
429 	if ((ncp = dnlc_get(namlen)) == NULL)
430 		return;
431 	ncp->dp = dp;
432 	VN_HOLD(dp);
433 	ncp->vp = vp;
434 	VN_HOLD(vp);
435 	bcopy(name, ncp->name, namlen + 1); /* name and null */
436 	ncp->hash = hash;
437 	hp = &nc_hash[hash & nc_hashmask];
438 
439 	mutex_enter(&hp->hash_lock);
440 	if (dnlc_search(dp, name, namlen, hash) != NULL) {
441 		mutex_exit(&hp->hash_lock);
442 		ncstats.dbl_enters++;
443 		ncs.ncs_dbl_enters.value.ui64++;
444 		VN_RELE(dp);
445 		VN_RELE(vp);
446 		dnlc_free(ncp);		/* crfree done here */
447 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
448 			"dnlc_enter_end:(%S) %d",
449 			"dbl enter", ncstats.dbl_enters);
450 		return;
451 	}
452 	/*
453 	 * Insert back into the hash chain.
454 	 */
455 	nc_inshash(ncp, hp);
456 	mutex_exit(&hp->hash_lock);
457 	ncstats.enters++;
458 	ncs.ncs_enters.value.ui64++;
459 	TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
460 	    "dnlc_enter_end:(%S) %d", "done", ncstats.enters);
461 }
462 
463 /*
464  * Add a name to the directory cache.
465  *
466  * This function is basically identical with
467  * dnlc_enter().  The difference is that when the
468  * desired dnlc entry is found, the vnode in the
469  * ncache is compared with the vnode passed in.
470  *
471  * If they are not equal then the ncache is
472  * updated with the passed in vnode.  Otherwise
473  * it just frees up the newly allocated dnlc entry.
474  */
475 void
476 dnlc_update(vnode_t *dp, char *name, vnode_t *vp)
477 {
478 	ncache_t *ncp;
479 	ncache_t *tcp;
480 	vnode_t *tvp;
481 	nc_hash_t *hp;
482 	int hash;
483 	uchar_t namlen;
484 
485 	TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_update_start:");
486 
487 	if (!doingcache) {
488 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
489 		    "dnlc_update_end:(%S) %d", "not caching", 0);
490 		return;
491 	}
492 
493 	/*
494 	 * Get a new dnlc entry and initialize it now.
495 	 * If we fail to get a new entry, call dnlc_remove() to purge
496 	 * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
497 	 * entry.
498 	 * Failure to clear an existing entry could result in false dnlc
499 	 * lookup (negative/stale entry).
500 	 */
501 	DNLCHASH(name, dp, hash, namlen);
502 	if ((ncp = dnlc_get(namlen)) == NULL) {
503 		dnlc_remove(dp, name);
504 		return;
505 	}
506 	ncp->dp = dp;
507 	VN_HOLD(dp);
508 	ncp->vp = vp;
509 	VN_HOLD(vp);
510 	bcopy(name, ncp->name, namlen + 1); /* name and null */
511 	ncp->hash = hash;
512 	hp = &nc_hash[hash & nc_hashmask];
513 
514 	mutex_enter(&hp->hash_lock);
515 	if ((tcp = dnlc_search(dp, name, namlen, hash)) != NULL) {
516 		if (tcp->vp != vp) {
517 			tvp = tcp->vp;
518 			tcp->vp = vp;
519 			mutex_exit(&hp->hash_lock);
520 			VN_RELE(tvp);
521 			ncstats.enters++;
522 			ncs.ncs_enters.value.ui64++;
523 			TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
524 			    "dnlc_update_end:(%S) %d", "done", ncstats.enters);
525 		} else {
526 			mutex_exit(&hp->hash_lock);
527 			VN_RELE(vp);
528 			ncstats.dbl_enters++;
529 			ncs.ncs_dbl_enters.value.ui64++;
530 			TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
531 			    "dnlc_update_end:(%S) %d",
532 			    "dbl enter", ncstats.dbl_enters);
533 		}
534 		VN_RELE(dp);
535 		dnlc_free(ncp);		/* crfree done here */
536 		return;
537 	}
538 	/*
539 	 * insert the new entry, since it is not in dnlc yet
540 	 */
541 	nc_inshash(ncp, hp);
542 	mutex_exit(&hp->hash_lock);
543 	ncstats.enters++;
544 	ncs.ncs_enters.value.ui64++;
545 	TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
546 	    "dnlc_update_end:(%S) %d", "done", ncstats.enters);
547 }
548 
549 /*
550  * Look up a name in the directory name cache.
551  *
552  * Return a doubly-held vnode if found: one hold so that it may
553  * remain in the cache for other users, the other hold so that
554  * the cache is not re-cycled and the identity of the vnode is
555  * lost before the caller can use the vnode.
556  */
557 vnode_t *
558 dnlc_lookup(vnode_t *dp, char *name)
559 {
560 	ncache_t *ncp;
561 	nc_hash_t *hp;
562 	vnode_t *vp;
563 	int hash, depth;
564 	uchar_t namlen;
565 
566 	TRACE_2(TR_FAC_NFS, TR_DNLC_LOOKUP_START,
567 	    "dnlc_lookup_start:dp %x name %s", dp, name);
568 
569 	if (!doingcache) {
570 		TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
571 		    "dnlc_lookup_end:%S %d vp %x name %s",
572 		    "not_caching", 0, NULL, name);
573 		return (NULL);
574 	}
575 
576 	DNLCHASH(name, dp, hash, namlen);
577 	depth = 1;
578 	hp = &nc_hash[hash & nc_hashmask];
579 	mutex_enter(&hp->hash_lock);
580 
581 	for (ncp = hp->hash_next; ncp != (ncache_t *)hp;
582 	    ncp = ncp->hash_next) {
583 		if (ncp->hash == hash &&	/* fast signature check */
584 		    ncp->dp == dp &&
585 		    ncp->namlen == namlen &&
586 		    bcmp(ncp->name, name, namlen) == 0) {
587 			/*
588 			 * Move this entry to the head of its hash chain
589 			 * if it's not already close.
590 			 */
591 			if (depth > NC_MOVETOFRONT) {
592 				ncache_t *next = ncp->hash_next;
593 				ncache_t *prev = ncp->hash_prev;
594 
595 				prev->hash_next = next;
596 				next->hash_prev = prev;
597 				ncp->hash_next = next = hp->hash_next;
598 				ncp->hash_prev = (ncache_t *)hp;
599 				next->hash_prev = ncp;
600 				hp->hash_next = ncp;
601 
602 				ncstats.move_to_front++;
603 			}
604 
605 			/*
606 			 * Put a hold on the vnode now so its identity
607 			 * can't change before the caller has a chance to
608 			 * put a hold on it.
609 			 */
610 			vp = ncp->vp;
611 			VN_HOLD(vp);
612 			mutex_exit(&hp->hash_lock);
613 			ncstats.hits++;
614 			ncs.ncs_hits.value.ui64++;
615 			if (vp == DNLC_NO_VNODE) {
616 				ncs.ncs_neg_hits.value.ui64++;
617 			}
618 			TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
619 				"dnlc_lookup_end:%S %d vp %x name %s",
620 				"hit", ncstats.hits, vp, name);
621 			return (vp);
622 		}
623 		depth++;
624 	}
625 
626 	mutex_exit(&hp->hash_lock);
627 	ncstats.misses++;
628 	ncs.ncs_misses.value.ui64++;
629 	TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
630 		"dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats.misses,
631 	    NULL, name);
632 	return (NULL);
633 }
634 
635 /*
636  * Remove an entry in the directory name cache.
637  */
638 void
639 dnlc_remove(vnode_t *dp, char *name)
640 {
641 	ncache_t *ncp;
642 	nc_hash_t *hp;
643 	uchar_t namlen;
644 	int hash;
645 
646 	if (!doingcache)
647 		return;
648 	DNLCHASH(name, dp, hash, namlen);
649 	hp = &nc_hash[hash & nc_hashmask];
650 
651 	mutex_enter(&hp->hash_lock);
652 	if (ncp = dnlc_search(dp, name, namlen, hash)) {
653 		/*
654 		 * Free up the entry
655 		 */
656 		nc_rmhash(ncp);
657 		mutex_exit(&hp->hash_lock);
658 		VN_RELE(ncp->vp);
659 		VN_RELE(ncp->dp);
660 		dnlc_free(ncp);
661 		return;
662 	}
663 	mutex_exit(&hp->hash_lock);
664 }
665 
666 /*
667  * Purge the entire cache.
668  */
669 void
670 dnlc_purge()
671 {
672 	nc_hash_t *nch;
673 	ncache_t *ncp;
674 	int index;
675 	int i;
676 	vnode_t *nc_rele[DNLC_MAX_RELE];
677 
678 	if (!doingcache)
679 		return;
680 
681 	ncstats.purges++;
682 	ncs.ncs_purge_all.value.ui64++;
683 
684 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
685 		index = 0;
686 		mutex_enter(&nch->hash_lock);
687 		ncp = nch->hash_next;
688 		while (ncp != (ncache_t *)nch) {
689 			ncache_t *np;
690 
691 			np = ncp->hash_next;
692 			nc_rele[index++] = ncp->vp;
693 			nc_rele[index++] = ncp->dp;
694 
695 			nc_rmhash(ncp);
696 			dnlc_free(ncp);
697 			ncp = np;
698 			ncs.ncs_purge_total.value.ui64++;
699 			if (index == DNLC_MAX_RELE)
700 				break;
701 		}
702 		mutex_exit(&nch->hash_lock);
703 
704 		/* Release holds on all the vnodes now that we have no locks */
705 		for (i = 0; i < index; i++) {
706 			VN_RELE(nc_rele[i]);
707 		}
708 		if (ncp != (ncache_t *)nch) {
709 			nch--; /* Do current hash chain again */
710 		}
711 	}
712 }
713 
714 /*
715  * Purge any cache entries referencing a vnode.
716  * Exit as soon as the vnode reference count goes to 1, as the caller
717  * must hold a reference, and the dnlc can therefore have no more.
718  */
719 void
720 dnlc_purge_vp(vnode_t *vp)
721 {
722 	nc_hash_t *nch;
723 	ncache_t *ncp;
724 	int index;
725 	vnode_t *nc_rele[DNLC_MAX_RELE];
726 
727 	ASSERT(vp->v_count > 0);
728 	if (vp->v_count == 1) {
729 		return;
730 	}
731 
732 	if (!doingcache)
733 		return;
734 
735 	ncstats.purges++;
736 	ncs.ncs_purge_vp.value.ui64++;
737 
738 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
739 		index = 0;
740 		mutex_enter(&nch->hash_lock);
741 		ncp = nch->hash_next;
742 		while (ncp != (ncache_t *)nch) {
743 			ncache_t *np;
744 
745 			np = ncp->hash_next;
746 			if (ncp->dp == vp || ncp->vp == vp) {
747 				nc_rele[index++] = ncp->vp;
748 				nc_rele[index++] = ncp->dp;
749 				nc_rmhash(ncp);
750 				dnlc_free(ncp);
751 				ncs.ncs_purge_total.value.ui64++;
752 				if (index == DNLC_MAX_RELE) {
753 					ncp = np;
754 					break;
755 				}
756 			}
757 			ncp = np;
758 		}
759 		mutex_exit(&nch->hash_lock);
760 
761 		/* Release holds on all the vnodes now that we have no locks */
762 		while (index) {
763 			VN_RELE(nc_rele[--index]);
764 		}
765 
766 		if (vp->v_count == 1) {
767 			return; /* no more dnlc references */
768 		}
769 
770 		if (ncp != (ncache_t *)nch) {
771 			nch--; /* Do current hash chain again */
772 		}
773 	}
774 }
775 
776 /*
777  * Purge cache entries referencing a vfsp.  Caller supplies a count
778  * of entries to purge; up to that many will be freed.  A count of
779  * zero indicates that all such entries should be purged.  Returns
780  * the number of entries that were purged.
781  */
782 int
783 dnlc_purge_vfsp(vfs_t *vfsp, int count)
784 {
785 	nc_hash_t *nch;
786 	ncache_t *ncp;
787 	int n = 0;
788 	int index;
789 	int i;
790 	vnode_t *nc_rele[DNLC_MAX_RELE];
791 
792 	if (!doingcache)
793 		return (0);
794 
795 	ncstats.purges++;
796 	ncs.ncs_purge_vfs.value.ui64++;
797 
798 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
799 		index = 0;
800 		mutex_enter(&nch->hash_lock);
801 		ncp = nch->hash_next;
802 		while (ncp != (ncache_t *)nch) {
803 			ncache_t *np;
804 
805 			np = ncp->hash_next;
806 			ASSERT(ncp->dp != NULL);
807 			ASSERT(ncp->vp != NULL);
808 			if ((ncp->dp->v_vfsp == vfsp) ||
809 			    (ncp->vp->v_vfsp == vfsp)) {
810 				n++;
811 				nc_rele[index++] = ncp->vp;
812 				nc_rele[index++] = ncp->dp;
813 				nc_rmhash(ncp);
814 				dnlc_free(ncp);
815 				ncs.ncs_purge_total.value.ui64++;
816 				if (index == DNLC_MAX_RELE) {
817 					ncp = np;
818 					break;
819 				}
820 				if (count != 0 && n >= count) {
821 					break;
822 				}
823 			}
824 			ncp = np;
825 		}
826 		mutex_exit(&nch->hash_lock);
827 		/* Release holds on all the vnodes now that we have no locks */
828 		for (i = 0; i < index; i++) {
829 			VN_RELE(nc_rele[i]);
830 		}
831 		if (count != 0 && n >= count) {
832 			return (n);
833 		}
834 		if (ncp != (ncache_t *)nch) {
835 			nch--; /* Do current hash chain again */
836 		}
837 	}
838 	return (n);
839 }
840 
841 /*
842  * Purge 1 entry from the dnlc that is part of the filesystem(s)
843  * represented by 'vop'. The purpose of this routine is to allow
844  * users of the dnlc to free a vnode that is being held by the dnlc.
845  *
846  * If we find a vnode that we release which will result in
847  * freeing the underlying vnode (count was 1), return 1, 0
848  * if no appropriate vnodes found.
849  *
850  * Note, vop is not the 'right' identifier for a filesystem.
851  */
852 int
853 dnlc_fs_purge1(vnodeops_t *vop)
854 {
855 	nc_hash_t *end;
856 	nc_hash_t *hp;
857 	ncache_t *ncp;
858 	vnode_t *vp;
859 
860 	if (!doingcache)
861 		return (0);
862 
863 	ncs.ncs_purge_fs1.value.ui64++;
864 
865 	/*
866 	 * Scan the dnlc entries looking for a likely candidate.
867 	 */
868 	hp = end = dnlc_purge_fs1_rotor;
869 
870 	do {
871 		if (++hp == &nc_hash[nc_hashsz])
872 			hp = nc_hash;
873 		dnlc_purge_fs1_rotor = hp;
874 		if (hp->hash_next == (ncache_t *)hp)
875 			continue;
876 		mutex_enter(&hp->hash_lock);
877 		for (ncp = hp->hash_prev;
878 		    ncp != (ncache_t *)hp;
879 		    ncp = ncp->hash_prev) {
880 			vp = ncp->vp;
881 			if (!vn_has_cached_data(vp) && (vp->v_count == 1) &&
882 			    vn_matchops(vp, vop))
883 				break;
884 		}
885 		if (ncp != (ncache_t *)hp) {
886 			nc_rmhash(ncp);
887 			mutex_exit(&hp->hash_lock);
888 			VN_RELE(ncp->dp);
889 			VN_RELE(vp)
890 			dnlc_free(ncp);
891 			ncs.ncs_purge_total.value.ui64++;
892 			return (1);
893 		}
894 		mutex_exit(&hp->hash_lock);
895 	} while (hp != end);
896 	return (0);
897 }
898 
899 /*
900  * Perform a reverse lookup in the DNLC.  This will find the first occurrence of
901  * the vnode.  If successful, it will return the vnode of the parent, and the
902  * name of the entry in the given buffer.  If it cannot be found, or the buffer
903  * is too small, then it will return NULL.  Note that this is a highly
904  * inefficient function, since the DNLC is constructed solely for forward
905  * lookups.
906  */
907 vnode_t *
908 dnlc_reverse_lookup(vnode_t *vp, char *buf, size_t buflen)
909 {
910 	nc_hash_t *nch;
911 	ncache_t *ncp;
912 	vnode_t *pvp;
913 
914 	if (!doingcache)
915 		return (NULL);
916 
917 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
918 		mutex_enter(&nch->hash_lock);
919 		ncp = nch->hash_next;
920 		while (ncp != (ncache_t *)nch) {
921 			/*
922 			 * We ignore '..' entries since it can create
923 			 * confusion and infinite loops.
924 			 */
925 			if (ncp->vp == vp && !(ncp->namlen == 2 &&
926 			    0 == bcmp(ncp->name, "..", 2)) &&
927 			    ncp->namlen < buflen) {
928 				bcopy(ncp->name, buf, ncp->namlen);
929 				buf[ncp->namlen] = '\0';
930 				pvp = ncp->dp;
931 				VN_HOLD(pvp);
932 				mutex_exit(&nch->hash_lock);
933 				return (pvp);
934 			}
935 			ncp = ncp->hash_next;
936 		}
937 		mutex_exit(&nch->hash_lock);
938 	}
939 
940 	return (NULL);
941 }
942 /*
943  * Utility routine to search for a cache entry. Return the
944  * ncache entry if found, NULL otherwise.
945  */
946 static ncache_t *
947 dnlc_search(vnode_t *dp, char *name, uchar_t namlen, int hash)
948 {
949 	nc_hash_t *hp;
950 	ncache_t *ncp;
951 
952 	hp = &nc_hash[hash & nc_hashmask];
953 
954 	for (ncp = hp->hash_next; ncp != (ncache_t *)hp; ncp = ncp->hash_next) {
955 		if (ncp->hash == hash &&
956 		    ncp->dp == dp &&
957 		    ncp->namlen == namlen &&
958 		    bcmp(ncp->name, name, namlen) == 0)
959 			return (ncp);
960 	}
961 	return (NULL);
962 }
963 
964 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
965 #error ncache_t name length representation is too small
966 #endif
967 
968 /*
969  * Get a new name cache entry.
970  * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
971  * is short then just return NULL. If we're over ncsize then kick off a
972  * thread to free some in use entries down to dnlc_nentries_low_water.
973  * Caller must initialise all fields except namlen.
974  * Component names are defined to be less than MAXNAMELEN
975  * which includes a null.
976  */
977 static ncache_t *
978 dnlc_get(uchar_t namlen)
979 {
980 	ncache_t *ncp;
981 
982 	if (dnlc_nentries > dnlc_max_nentries) {
983 		dnlc_max_nentries_cnt++; /* keep a statistic */
984 		return (NULL);
985 	}
986 	ncp = kmem_alloc(sizeof (ncache_t) + namlen, KM_NOSLEEP);
987 	if (ncp == NULL) {
988 		return (NULL);
989 	}
990 	ncp->namlen = namlen;
991 	atomic_add_32(&dnlc_nentries, 1);
992 	if (dnlc_reduce_idle && (dnlc_nentries >= ncsize)) {
993 		dnlc_reduce_idle = 0;
994 		(void) taskq_dispatch(system_taskq, dnlc_reduce_cache,
995 		    NULL, TQ_SLEEP);
996 	}
997 	return (ncp);
998 }
999 
1000 /*
1001  * Taskq routine to free up name cache entries to reduce the
1002  * cache size to the low water mark if "reduce_percent" is not provided.
1003  * If "reduce_percent" is provided, reduce cache size by
1004  * (ncsize_onepercent * reduce_percent).
1005  *
1006  * This routine can also be called directly by ZFS's ARC when memory is low.
1007  */
1008 /*ARGSUSED*/
1009 void
1010 dnlc_reduce_cache(void *reduce_percent)
1011 {
1012 	nc_hash_t *hp = dnlc_free_rotor;
1013 	vnode_t *vp;
1014 	ncache_t *ncp;
1015 	int cnt;
1016 	uint_t low_water = dnlc_nentries_low_water;
1017 
1018 	if (reduce_percent) {
1019 		uint_t reduce_cnt;
1020 
1021 		reduce_cnt = ncsize_onepercent * (uint_t)reduce_percent;
1022 		if (reduce_cnt > dnlc_nentries)
1023 			low_water = 0;
1024 		else
1025 			low_water = dnlc_nentries - reduce_cnt;
1026 	}
1027 
1028 	do {
1029 		/*
1030 		 * Find the first non empty hash queue without locking
1031 		 * Recheck we really have entries to avoid
1032 		 * an infinite loop if all the entries get purged.
1033 		 */
1034 		do {
1035 			if (++hp == &nc_hash[nc_hashsz]) {
1036 				hp = nc_hash;
1037 				if (dnlc_nentries <= low_water) {
1038 					dnlc_reduce_idle = 1;
1039 					return;
1040 				}
1041 			}
1042 		} while (hp->hash_next == (ncache_t *)hp);
1043 
1044 		mutex_enter(&hp->hash_lock);
1045 		for (cnt = 0, ncp = hp->hash_prev; ncp != (ncache_t *)hp;
1046 		    ncp = ncp->hash_prev, cnt++) {
1047 			vp = ncp->vp;
1048 			/*
1049 			 * A name cache entry with a reference count
1050 			 * of one is only referenced by the dnlc.
1051 			 * Also negative cache entries are purged first.
1052 			 */
1053 			if (!vn_has_cached_data(vp) &&
1054 			    ((vp->v_count == 1) || (vp == DNLC_NO_VNODE))) {
1055 				ncs.ncs_pick_heur.value.ui64++;
1056 				goto found;
1057 			}
1058 			/*
1059 			 * Remove from the end of the chain if the
1060 			 * chain is too long
1061 			 */
1062 			if (cnt > dnlc_long_chain) {
1063 				ncp = hp->hash_prev;
1064 				ncs.ncs_pick_last.value.ui64++;
1065 				vp = ncp->vp;
1066 				goto found;
1067 			}
1068 		}
1069 		/* check for race and continue */
1070 		if (hp->hash_next == (ncache_t *)hp) {
1071 			mutex_exit(&hp->hash_lock);
1072 			continue;
1073 		}
1074 
1075 		ncp = hp->hash_prev; /* pick the last one in the hash queue */
1076 		ncs.ncs_pick_last.value.ui64++;
1077 		vp = ncp->vp;
1078 found:
1079 		/*
1080 		 * Remove from hash chain.
1081 		 */
1082 		nc_rmhash(ncp);
1083 		mutex_exit(&hp->hash_lock);
1084 		VN_RELE(vp);
1085 		VN_RELE(ncp->dp);
1086 		dnlc_free(ncp);
1087 	} while (dnlc_nentries > low_water);
1088 
1089 	dnlc_free_rotor = hp;
1090 	dnlc_reduce_idle = 1;
1091 }
1092 
1093 /*
1094  * Directory caching routines
1095  * ==========================
1096  *
1097  * See dnlc.h for details of the interfaces below.
1098  */
1099 
1100 /*
1101  * Lookup up an entry in a complete or partial directory cache.
1102  */
1103 dcret_t
1104 dnlc_dir_lookup(dcanchor_t *dcap, char *name, uint64_t *handle)
1105 {
1106 	dircache_t *dcp;
1107 	dcentry_t *dep;
1108 	int hash;
1109 	int ret;
1110 	uchar_t namlen;
1111 
1112 	/*
1113 	 * can test without lock as we are only a cache
1114 	 */
1115 	if (!VALID_DIR_CACHE(dcap->dca_dircache)) {
1116 		ncs.ncs_dir_misses.value.ui64++;
1117 		return (DNOCACHE);
1118 	}
1119 
1120 	if (!dnlc_dir_enable) {
1121 		return (DNOCACHE);
1122 	}
1123 
1124 	mutex_enter(&dcap->dca_lock);
1125 	dcp = (dircache_t *)dcap->dca_dircache;
1126 	if (VALID_DIR_CACHE(dcp)) {
1127 		dcp->dc_actime = lbolt64;
1128 		DNLC_DIR_HASH(name, hash, namlen);
1129 		dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1130 		while (dep != NULL) {
1131 			if ((dep->de_hash == hash) &&
1132 			    (namlen == dep->de_namelen) &&
1133 			    bcmp(dep->de_name, name, namlen) == 0) {
1134 				*handle = dep->de_handle;
1135 				mutex_exit(&dcap->dca_lock);
1136 				ncs.ncs_dir_hits.value.ui64++;
1137 				return (DFOUND);
1138 			}
1139 			dep = dep->de_next;
1140 		}
1141 		if (dcp->dc_complete) {
1142 			ret = DNOENT;
1143 		} else {
1144 			ret = DNOCACHE;
1145 		}
1146 		mutex_exit(&dcap->dca_lock);
1147 		return (ret);
1148 	} else {
1149 		mutex_exit(&dcap->dca_lock);
1150 		ncs.ncs_dir_misses.value.ui64++;
1151 		return (DNOCACHE);
1152 	}
1153 }
1154 
1155 /*
1156  * Start a new directory cache. An estimate of the number of
1157  * entries is provided to as a quick check to ensure the directory
1158  * is cacheable.
1159  */
1160 dcret_t
1161 dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries)
1162 {
1163 	dircache_t *dcp;
1164 
1165 	if (!dnlc_dir_enable ||
1166 	    (num_entries < dnlc_dir_min_size)) {
1167 		return (DNOCACHE);
1168 	}
1169 
1170 	if (num_entries > dnlc_dir_max_size) {
1171 		return (DTOOBIG);
1172 	}
1173 
1174 	mutex_enter(&dc_head.dch_lock);
1175 	mutex_enter(&dcap->dca_lock);
1176 
1177 	if (dcap->dca_dircache == DC_RET_LOW_MEM) {
1178 		dcap->dca_dircache = NULL;
1179 		mutex_exit(&dcap->dca_lock);
1180 		mutex_exit(&dc_head.dch_lock);
1181 		return (DNOMEM);
1182 	}
1183 
1184 	/*
1185 	 * Check if there's currently a cache.
1186 	 * This probably only occurs on a race.
1187 	 */
1188 	if (dcap->dca_dircache != NULL) {
1189 		mutex_exit(&dcap->dca_lock);
1190 		mutex_exit(&dc_head.dch_lock);
1191 		return (DNOCACHE);
1192 	}
1193 
1194 	/*
1195 	 * Allocate the dircache struct, entry and free space hash tables.
1196 	 * These tables are initially just one entry but dynamically resize
1197 	 * when entries and free space are added or removed.
1198 	 */
1199 	if ((dcp = kmem_zalloc(sizeof (dircache_t), KM_NOSLEEP)) == NULL) {
1200 		goto error;
1201 	}
1202 	if ((dcp->dc_namehash = kmem_zalloc(sizeof (dcentry_t *),
1203 	    KM_NOSLEEP)) == NULL) {
1204 		goto error;
1205 	}
1206 	if ((dcp->dc_freehash = kmem_zalloc(sizeof (dcfree_t *),
1207 	    KM_NOSLEEP)) == NULL) {
1208 		goto error;
1209 	}
1210 
1211 	dcp->dc_anchor = dcap; /* set back pointer to anchor */
1212 	dcap->dca_dircache = dcp;
1213 
1214 	/* add into head of global chain */
1215 	dcp->dc_next = dc_head.dch_next;
1216 	dcp->dc_prev = (dircache_t *)&dc_head;
1217 	dcp->dc_next->dc_prev = dcp;
1218 	dc_head.dch_next = dcp;
1219 
1220 	mutex_exit(&dcap->dca_lock);
1221 	mutex_exit(&dc_head.dch_lock);
1222 	ncs.ncs_cur_dirs.value.ui64++;
1223 	ncs.ncs_dirs_cached.value.ui64++;
1224 	return (DOK);
1225 error:
1226 	if (dcp != NULL) {
1227 		if (dcp->dc_namehash) {
1228 			kmem_free(dcp->dc_namehash, sizeof (dcentry_t *));
1229 		}
1230 		kmem_free(dcp, sizeof (dircache_t));
1231 	}
1232 	/*
1233 	 * Must also kmem_free dcp->dc_freehash if more error cases are added
1234 	 */
1235 	mutex_exit(&dcap->dca_lock);
1236 	mutex_exit(&dc_head.dch_lock);
1237 	ncs.ncs_dir_start_nm.value.ui64++;
1238 	return (DNOCACHE);
1239 }
1240 
1241 /*
1242  * Add a directopry entry to a partial or complete directory cache.
1243  */
1244 dcret_t
1245 dnlc_dir_add_entry(dcanchor_t *dcap, char *name, uint64_t handle)
1246 {
1247 	dircache_t *dcp;
1248 	dcentry_t **hp, *dep;
1249 	int hash;
1250 	uint_t capacity;
1251 	uchar_t namlen;
1252 
1253 	/*
1254 	 * Allocate the dcentry struct, including the variable
1255 	 * size name. Note, the null terminator is not copied.
1256 	 *
1257 	 * We do this outside the lock to avoid possible deadlock if
1258 	 * dnlc_dir_reclaim() is called as a result of memory shortage.
1259 	 */
1260 	DNLC_DIR_HASH(name, hash, namlen);
1261 	dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1262 	if (dep == NULL) {
1263 #ifdef DEBUG
1264 		/*
1265 		 * The kmem allocator generates random failures for
1266 		 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1267 		 * So try again before we blow away a perfectly good cache.
1268 		 * This is done not to cover an error but purely for
1269 		 * performance running a debug kernel.
1270 		 * This random error only occurs in debug mode.
1271 		 */
1272 		dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1273 		if (dep != NULL)
1274 			goto ok;
1275 #endif
1276 		ncs.ncs_dir_add_nm.value.ui64++;
1277 		/*
1278 		 * Free a directory cache. This may be the one we are
1279 		 * called with.
1280 		 */
1281 		dnlc_dir_reclaim(NULL);
1282 		dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1283 		if (dep == NULL) {
1284 			/*
1285 			 * still no memory, better delete this cache
1286 			 */
1287 			mutex_enter(&dcap->dca_lock);
1288 			dcp = (dircache_t *)dcap->dca_dircache;
1289 			if (VALID_DIR_CACHE(dcp)) {
1290 				dnlc_dir_abort(dcp);
1291 				dcap->dca_dircache = DC_RET_LOW_MEM;
1292 			}
1293 			mutex_exit(&dcap->dca_lock);
1294 			ncs.ncs_dir_addabort.value.ui64++;
1295 			return (DNOCACHE);
1296 		}
1297 		/*
1298 		 * fall through as if the 1st kmem_alloc had worked
1299 		 */
1300 	}
1301 #ifdef DEBUG
1302 ok:
1303 #endif
1304 	mutex_enter(&dcap->dca_lock);
1305 	dcp = (dircache_t *)dcap->dca_dircache;
1306 	if (VALID_DIR_CACHE(dcp)) {
1307 		/*
1308 		 * If the total number of entries goes above the max
1309 		 * then free this cache
1310 		 */
1311 		if ((dcp->dc_num_entries + dcp->dc_num_free) >
1312 			dnlc_dir_max_size) {
1313 			mutex_exit(&dcap->dca_lock);
1314 			dnlc_dir_purge(dcap);
1315 			kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1316 			ncs.ncs_dir_add_max.value.ui64++;
1317 			return (DTOOBIG);
1318 		}
1319 		dcp->dc_num_entries++;
1320 		capacity = (dcp->dc_nhash_mask + 1) << dnlc_dir_hash_size_shift;
1321 		if (dcp->dc_num_entries >=
1322 		    (capacity << dnlc_dir_hash_resize_shift)) {
1323 			dnlc_dir_adjust_nhash(dcp);
1324 		}
1325 		hp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1326 
1327 		/*
1328 		 * Initialise and chain in new entry
1329 		 */
1330 		dep->de_handle = handle;
1331 		dep->de_hash = hash;
1332 		/*
1333 		 * Note de_namelen is a uchar_t to conserve space
1334 		 * and alignment padding. The max length of any
1335 		 * pathname component is defined as MAXNAMELEN
1336 		 * which is 256 (including the terminating null).
1337 		 * So provided this doesn't change, we don't include the null,
1338 		 * we always use bcmp to compare strings, and we don't
1339 		 * start storing full names, then we are ok.
1340 		 * The space savings is worth it.
1341 		 */
1342 		dep->de_namelen = namlen;
1343 		bcopy(name, dep->de_name, namlen);
1344 		dep->de_next = *hp;
1345 		*hp = dep;
1346 		dcp->dc_actime = lbolt64;
1347 		mutex_exit(&dcap->dca_lock);
1348 		ncs.ncs_dir_num_ents.value.ui64++;
1349 		return (DOK);
1350 	} else {
1351 		mutex_exit(&dcap->dca_lock);
1352 		kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1353 		return (DNOCACHE);
1354 	}
1355 }
1356 
1357 /*
1358  * Add free space to a partial or complete directory cache.
1359  */
1360 dcret_t
1361 dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle)
1362 {
1363 	dircache_t *dcp;
1364 	dcfree_t *dfp, **hp;
1365 	uint_t capacity;
1366 
1367 	/*
1368 	 * We kmem_alloc outside the lock to avoid possible deadlock if
1369 	 * dnlc_dir_reclaim() is called as a result of memory shortage.
1370 	 */
1371 	dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1372 	if (dfp == NULL) {
1373 #ifdef DEBUG
1374 		/*
1375 		 * The kmem allocator generates random failures for
1376 		 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1377 		 * So try again before we blow away a perfectly good cache.
1378 		 * This random error only occurs in debug mode
1379 		 */
1380 		dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1381 		if (dfp != NULL)
1382 			goto ok;
1383 #endif
1384 		ncs.ncs_dir_add_nm.value.ui64++;
1385 		/*
1386 		 * Free a directory cache. This may be the one we are
1387 		 * called with.
1388 		 */
1389 		dnlc_dir_reclaim(NULL);
1390 		dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1391 		if (dfp == NULL) {
1392 			/*
1393 			 * still no memory, better delete this cache
1394 			 */
1395 			mutex_enter(&dcap->dca_lock);
1396 			dcp = (dircache_t *)dcap->dca_dircache;
1397 			if (VALID_DIR_CACHE(dcp)) {
1398 				dnlc_dir_abort(dcp);
1399 				dcap->dca_dircache = DC_RET_LOW_MEM;
1400 			}
1401 			mutex_exit(&dcap->dca_lock);
1402 			ncs.ncs_dir_addabort.value.ui64++;
1403 			return (DNOCACHE);
1404 		}
1405 		/*
1406 		 * fall through as if the 1st kmem_alloc had worked
1407 		 */
1408 	}
1409 
1410 #ifdef DEBUG
1411 ok:
1412 #endif
1413 	mutex_enter(&dcap->dca_lock);
1414 	dcp = (dircache_t *)dcap->dca_dircache;
1415 	if (VALID_DIR_CACHE(dcp)) {
1416 		if ((dcp->dc_num_entries + dcp->dc_num_free) >
1417 			dnlc_dir_max_size) {
1418 			mutex_exit(&dcap->dca_lock);
1419 			dnlc_dir_purge(dcap);
1420 			kmem_cache_free(dnlc_dir_space_cache, dfp);
1421 			ncs.ncs_dir_add_max.value.ui64++;
1422 			return (DTOOBIG);
1423 		}
1424 		dcp->dc_num_free++;
1425 		capacity = (dcp->dc_fhash_mask + 1) << dnlc_dir_hash_size_shift;
1426 		if (dcp->dc_num_free >=
1427 		    (capacity << dnlc_dir_hash_resize_shift)) {
1428 			dnlc_dir_adjust_fhash(dcp);
1429 		}
1430 		/*
1431 		 * Initialise and chain a new entry
1432 		 */
1433 		dfp->df_handle = handle;
1434 		dfp->df_len = len;
1435 		dcp->dc_actime = lbolt64;
1436 		hp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1437 		dfp->df_next = *hp;
1438 		*hp = dfp;
1439 		mutex_exit(&dcap->dca_lock);
1440 		ncs.ncs_dir_num_ents.value.ui64++;
1441 		return (DOK);
1442 	} else {
1443 		mutex_exit(&dcap->dca_lock);
1444 		kmem_cache_free(dnlc_dir_space_cache, dfp);
1445 		return (DNOCACHE);
1446 	}
1447 }
1448 
1449 /*
1450  * Mark a directory cache as complete.
1451  */
1452 void
1453 dnlc_dir_complete(dcanchor_t *dcap)
1454 {
1455 	dircache_t *dcp;
1456 
1457 	mutex_enter(&dcap->dca_lock);
1458 	dcp = (dircache_t *)dcap->dca_dircache;
1459 	if (VALID_DIR_CACHE(dcp)) {
1460 		dcp->dc_complete = B_TRUE;
1461 	}
1462 	mutex_exit(&dcap->dca_lock);
1463 }
1464 
1465 /*
1466  * Internal routine to delete a partial or full directory cache.
1467  * No additional locking needed.
1468  */
1469 static void
1470 dnlc_dir_abort(dircache_t *dcp)
1471 {
1472 	dcentry_t *dep, *nhp;
1473 	dcfree_t *fep, *fhp;
1474 	uint_t nhtsize = dcp->dc_nhash_mask + 1; /* name hash table size */
1475 	uint_t fhtsize = dcp->dc_fhash_mask + 1; /* free hash table size */
1476 	uint_t i;
1477 
1478 	/*
1479 	 * Free up the cached name entries and hash table
1480 	 */
1481 	for (i = 0; i < nhtsize; i++) { /* for each hash bucket */
1482 		nhp = dcp->dc_namehash[i];
1483 		while (nhp != NULL) { /* for each chained entry */
1484 			dep = nhp->de_next;
1485 			kmem_free(nhp, sizeof (dcentry_t) - 1 +
1486 			    nhp->de_namelen);
1487 			nhp = dep;
1488 		}
1489 	}
1490 	kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * nhtsize);
1491 
1492 	/*
1493 	 * Free up the free space entries and hash table
1494 	 */
1495 	for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1496 		fhp = dcp->dc_freehash[i];
1497 		while (fhp != NULL) { /* for each chained entry */
1498 			fep = fhp->df_next;
1499 			kmem_cache_free(dnlc_dir_space_cache, fhp);
1500 			fhp = fep;
1501 		}
1502 	}
1503 	kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * fhtsize);
1504 
1505 	/*
1506 	 * Finally free the directory cache structure itself
1507 	 */
1508 	ncs.ncs_dir_num_ents.value.ui64 -= (dcp->dc_num_entries +
1509 	    dcp->dc_num_free);
1510 	kmem_free(dcp, sizeof (dircache_t));
1511 	ncs.ncs_cur_dirs.value.ui64--;
1512 }
1513 
1514 /*
1515  * Remove a partial or complete directory cache
1516  */
1517 void
1518 dnlc_dir_purge(dcanchor_t *dcap)
1519 {
1520 	dircache_t *dcp;
1521 
1522 	mutex_enter(&dc_head.dch_lock);
1523 	mutex_enter(&dcap->dca_lock);
1524 	dcp = (dircache_t *)dcap->dca_dircache;
1525 	if (!VALID_DIR_CACHE(dcp)) {
1526 		mutex_exit(&dcap->dca_lock);
1527 		mutex_exit(&dc_head.dch_lock);
1528 		return;
1529 	}
1530 	dcap->dca_dircache = NULL;
1531 	/*
1532 	 * Unchain from global list
1533 	 */
1534 	dcp->dc_prev->dc_next = dcp->dc_next;
1535 	dcp->dc_next->dc_prev = dcp->dc_prev;
1536 	mutex_exit(&dcap->dca_lock);
1537 	mutex_exit(&dc_head.dch_lock);
1538 	dnlc_dir_abort(dcp);
1539 }
1540 
1541 /*
1542  * Remove an entry from a complete or partial directory cache.
1543  * Return the handle if it's non null.
1544  */
1545 dcret_t
1546 dnlc_dir_rem_entry(dcanchor_t *dcap, char *name, uint64_t *handlep)
1547 {
1548 	dircache_t *dcp;
1549 	dcentry_t **prevpp, *te;
1550 	uint_t capacity;
1551 	int hash;
1552 	int ret;
1553 	uchar_t namlen;
1554 
1555 	if (!dnlc_dir_enable) {
1556 		return (DNOCACHE);
1557 	}
1558 
1559 	mutex_enter(&dcap->dca_lock);
1560 	dcp = (dircache_t *)dcap->dca_dircache;
1561 	if (VALID_DIR_CACHE(dcp)) {
1562 		dcp->dc_actime = lbolt64;
1563 		if (dcp->dc_nhash_mask > 0) { /* ie not minimum */
1564 			capacity = (dcp->dc_nhash_mask + 1) <<
1565 			    dnlc_dir_hash_size_shift;
1566 			if (dcp->dc_num_entries <=
1567 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1568 				dnlc_dir_adjust_nhash(dcp);
1569 			}
1570 		}
1571 		DNLC_DIR_HASH(name, hash, namlen);
1572 		prevpp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1573 		while (*prevpp != NULL) {
1574 			if (((*prevpp)->de_hash == hash) &&
1575 			    (namlen == (*prevpp)->de_namelen) &&
1576 			    bcmp((*prevpp)->de_name, name, namlen) == 0) {
1577 				if (handlep != NULL) {
1578 					*handlep = (*prevpp)->de_handle;
1579 				}
1580 				te = *prevpp;
1581 				*prevpp = (*prevpp)->de_next;
1582 				kmem_free(te, sizeof (dcentry_t) - 1 +
1583 				    te->de_namelen);
1584 
1585 				/*
1586 				 * If the total number of entries
1587 				 * falls below half the minimum number
1588 				 * of entries then free this cache.
1589 				 */
1590 				if (--dcp->dc_num_entries <
1591 				    (dnlc_dir_min_size >> 1)) {
1592 					mutex_exit(&dcap->dca_lock);
1593 					dnlc_dir_purge(dcap);
1594 				} else {
1595 					mutex_exit(&dcap->dca_lock);
1596 				}
1597 				ncs.ncs_dir_num_ents.value.ui64--;
1598 				return (DFOUND);
1599 			}
1600 			prevpp = &((*prevpp)->de_next);
1601 		}
1602 		if (dcp->dc_complete) {
1603 			ncs.ncs_dir_reme_fai.value.ui64++;
1604 			ret = DNOENT;
1605 		} else {
1606 			ret = DNOCACHE;
1607 		}
1608 		mutex_exit(&dcap->dca_lock);
1609 		return (ret);
1610 	} else {
1611 		mutex_exit(&dcap->dca_lock);
1612 		return (DNOCACHE);
1613 	}
1614 }
1615 
1616 
1617 /*
1618  * Remove free space of at least the given length from a complete
1619  * or partial directory cache.
1620  */
1621 dcret_t
1622 dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len, uint64_t *handlep)
1623 {
1624 	dircache_t *dcp;
1625 	dcfree_t **prevpp, *tfp;
1626 	uint_t fhtsize; /* free hash table size */
1627 	uint_t i;
1628 	uint_t capacity;
1629 	int ret;
1630 
1631 	if (!dnlc_dir_enable) {
1632 		return (DNOCACHE);
1633 	}
1634 
1635 	mutex_enter(&dcap->dca_lock);
1636 	dcp = (dircache_t *)dcap->dca_dircache;
1637 	if (VALID_DIR_CACHE(dcp)) {
1638 		dcp->dc_actime = lbolt64;
1639 		if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1640 			capacity = (dcp->dc_fhash_mask + 1) <<
1641 			    dnlc_dir_hash_size_shift;
1642 			if (dcp->dc_num_free <=
1643 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1644 				dnlc_dir_adjust_fhash(dcp);
1645 			}
1646 		}
1647 		/*
1648 		 * Search for an entry of the appropriate size
1649 		 * on a first fit basis.
1650 		 */
1651 		fhtsize = dcp->dc_fhash_mask + 1;
1652 		for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1653 			prevpp = &(dcp->dc_freehash[i]);
1654 			while (*prevpp != NULL) {
1655 				if ((*prevpp)->df_len >= len) {
1656 					*handlep = (*prevpp)->df_handle;
1657 					tfp = *prevpp;
1658 					*prevpp = (*prevpp)->df_next;
1659 					dcp->dc_num_free--;
1660 					mutex_exit(&dcap->dca_lock);
1661 					kmem_cache_free(dnlc_dir_space_cache,
1662 					    tfp);
1663 					ncs.ncs_dir_num_ents.value.ui64--;
1664 					return (DFOUND);
1665 				}
1666 				prevpp = &((*prevpp)->df_next);
1667 			}
1668 		}
1669 		if (dcp->dc_complete) {
1670 			ret = DNOENT;
1671 		} else {
1672 			ret = DNOCACHE;
1673 		}
1674 		mutex_exit(&dcap->dca_lock);
1675 		return (ret);
1676 	} else {
1677 		mutex_exit(&dcap->dca_lock);
1678 		return (DNOCACHE);
1679 	}
1680 }
1681 
1682 /*
1683  * Remove free space with the given handle from a complete or partial
1684  * directory cache.
1685  */
1686 dcret_t
1687 dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle)
1688 {
1689 	dircache_t *dcp;
1690 	dcfree_t **prevpp, *tfp;
1691 	uint_t capacity;
1692 	int ret;
1693 
1694 	if (!dnlc_dir_enable) {
1695 		return (DNOCACHE);
1696 	}
1697 
1698 	mutex_enter(&dcap->dca_lock);
1699 	dcp = (dircache_t *)dcap->dca_dircache;
1700 	if (VALID_DIR_CACHE(dcp)) {
1701 		dcp->dc_actime = lbolt64;
1702 		if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1703 			capacity = (dcp->dc_fhash_mask + 1) <<
1704 			    dnlc_dir_hash_size_shift;
1705 			if (dcp->dc_num_free <=
1706 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1707 				dnlc_dir_adjust_fhash(dcp);
1708 			}
1709 		}
1710 
1711 		/*
1712 		 * search for the exact entry
1713 		 */
1714 		prevpp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1715 		while (*prevpp != NULL) {
1716 			if ((*prevpp)->df_handle == handle) {
1717 				tfp = *prevpp;
1718 				*prevpp = (*prevpp)->df_next;
1719 				dcp->dc_num_free--;
1720 				mutex_exit(&dcap->dca_lock);
1721 				kmem_cache_free(dnlc_dir_space_cache, tfp);
1722 				ncs.ncs_dir_num_ents.value.ui64--;
1723 				return (DFOUND);
1724 			}
1725 			prevpp = &((*prevpp)->df_next);
1726 		}
1727 		if (dcp->dc_complete) {
1728 			ncs.ncs_dir_rems_fai.value.ui64++;
1729 			ret = DNOENT;
1730 		} else {
1731 			ret = DNOCACHE;
1732 		}
1733 		mutex_exit(&dcap->dca_lock);
1734 		return (ret);
1735 	} else {
1736 		mutex_exit(&dcap->dca_lock);
1737 		return (DNOCACHE);
1738 	}
1739 }
1740 
1741 /*
1742  * Update the handle of an directory cache entry.
1743  */
1744 dcret_t
1745 dnlc_dir_update(dcanchor_t *dcap, char *name, uint64_t handle)
1746 {
1747 	dircache_t *dcp;
1748 	dcentry_t *dep;
1749 	int hash;
1750 	int ret;
1751 	uchar_t namlen;
1752 
1753 	if (!dnlc_dir_enable) {
1754 		return (DNOCACHE);
1755 	}
1756 
1757 	mutex_enter(&dcap->dca_lock);
1758 	dcp = (dircache_t *)dcap->dca_dircache;
1759 	if (VALID_DIR_CACHE(dcp)) {
1760 		dcp->dc_actime = lbolt64;
1761 		DNLC_DIR_HASH(name, hash, namlen);
1762 		dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1763 		while (dep != NULL) {
1764 			if ((dep->de_hash == hash) &&
1765 			    (namlen == dep->de_namelen) &&
1766 			    bcmp(dep->de_name, name, namlen) == 0) {
1767 				dep->de_handle = handle;
1768 				mutex_exit(&dcap->dca_lock);
1769 				return (DFOUND);
1770 			}
1771 			dep = dep->de_next;
1772 		}
1773 		if (dcp->dc_complete) {
1774 			ncs.ncs_dir_upd_fail.value.ui64++;
1775 			ret = DNOENT;
1776 		} else {
1777 			ret = DNOCACHE;
1778 		}
1779 		mutex_exit(&dcap->dca_lock);
1780 		return (ret);
1781 	} else {
1782 		mutex_exit(&dcap->dca_lock);
1783 		return (DNOCACHE);
1784 	}
1785 }
1786 
1787 void
1788 dnlc_dir_fini(dcanchor_t *dcap)
1789 {
1790 	dircache_t *dcp;
1791 
1792 	mutex_enter(&dc_head.dch_lock);
1793 	mutex_enter(&dcap->dca_lock);
1794 	dcp = (dircache_t *)dcap->dca_dircache;
1795 	if (VALID_DIR_CACHE(dcp)) {
1796 		/*
1797 		 * Unchain from global list
1798 		 */
1799 		ncs.ncs_dir_finipurg.value.ui64++;
1800 		dcp->dc_prev->dc_next = dcp->dc_next;
1801 		dcp->dc_next->dc_prev = dcp->dc_prev;
1802 	} else {
1803 		dcp = NULL;
1804 	}
1805 	dcap->dca_dircache = NULL;
1806 	mutex_exit(&dcap->dca_lock);
1807 	mutex_exit(&dc_head.dch_lock);
1808 	mutex_destroy(&dcap->dca_lock);
1809 	if (dcp) {
1810 		dnlc_dir_abort(dcp);
1811 	}
1812 }
1813 
1814 /*
1815  * Reclaim callback for dnlc directory caching.
1816  * Invoked by the kernel memory allocator when memory gets tight.
1817  * This is a pretty serious condition and can lead easily lead to system
1818  * hangs if not enough space is returned.
1819  *
1820  * Deciding which directory (or directories) to purge is tricky.
1821  * Purging everything is an overkill, but purging just the oldest used
1822  * was found to lead to hangs. The largest cached directories use the
1823  * most memory, but take the most effort to rebuild, whereas the smaller
1824  * ones have little value and give back little space. So what to do?
1825  *
1826  * The current policy is to continue purging the oldest used directories
1827  * until at least dnlc_dir_min_reclaim directory entries have been purged.
1828  */
1829 /*ARGSUSED*/
1830 static void
1831 dnlc_dir_reclaim(void *unused)
1832 {
1833 	dircache_t *dcp, *oldest;
1834 	uint_t dirent_cnt = 0;
1835 
1836 	mutex_enter(&dc_head.dch_lock);
1837 	while (dirent_cnt < dnlc_dir_min_reclaim) {
1838 		dcp = dc_head.dch_next;
1839 		oldest = NULL;
1840 		while (dcp != (dircache_t *)&dc_head) {
1841 			if (oldest == NULL) {
1842 				oldest = dcp;
1843 			} else {
1844 				if (dcp->dc_actime < oldest->dc_actime) {
1845 					oldest = dcp;
1846 				}
1847 			}
1848 			dcp = dcp->dc_next;
1849 		}
1850 		if (oldest == NULL) {
1851 			/* nothing to delete */
1852 			mutex_exit(&dc_head.dch_lock);
1853 			return;
1854 		}
1855 		/*
1856 		 * remove from directory chain and purge
1857 		 */
1858 		oldest->dc_prev->dc_next = oldest->dc_next;
1859 		oldest->dc_next->dc_prev = oldest->dc_prev;
1860 		mutex_enter(&oldest->dc_anchor->dca_lock);
1861 		/*
1862 		 * If this was the last entry then it must be too large.
1863 		 * Mark it as such by saving a special dircache_t
1864 		 * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1865 		 * will be presented to the caller of dnlc_dir_start()
1866 		 */
1867 		if (oldest->dc_next == oldest->dc_prev) {
1868 			oldest->dc_anchor->dca_dircache = DC_RET_LOW_MEM;
1869 			ncs.ncs_dir_rec_last.value.ui64++;
1870 		} else {
1871 			oldest->dc_anchor->dca_dircache = NULL;
1872 			ncs.ncs_dir_recl_any.value.ui64++;
1873 		}
1874 		mutex_exit(&oldest->dc_anchor->dca_lock);
1875 		dirent_cnt += oldest->dc_num_entries;
1876 		dnlc_dir_abort(oldest);
1877 	}
1878 	mutex_exit(&dc_head.dch_lock);
1879 }
1880 
1881 /*
1882  * Dynamically grow or shrink the size of the name hash table
1883  */
1884 static void
1885 dnlc_dir_adjust_nhash(dircache_t *dcp)
1886 {
1887 	dcentry_t **newhash, *dep, **nhp, *tep;
1888 	uint_t newsize;
1889 	uint_t oldsize;
1890 	uint_t newsizemask;
1891 	int i;
1892 
1893 	/*
1894 	 * Allocate new hash table
1895 	 */
1896 	newsize = dcp->dc_num_entries >> dnlc_dir_hash_size_shift;
1897 	newhash = kmem_zalloc(sizeof (dcentry_t *) * newsize, KM_NOSLEEP);
1898 	if (newhash == NULL) {
1899 		/*
1900 		 * System is short on memory just return
1901 		 * Note, the old hash table is still usable.
1902 		 * This return is unlikely to repeatedy occur, because
1903 		 * either some other directory caches will be reclaimed
1904 		 * due to memory shortage, thus freeing memory, or this
1905 		 * directory cahe will be reclaimed.
1906 		 */
1907 		return;
1908 	}
1909 	oldsize = dcp->dc_nhash_mask + 1;
1910 	dcp->dc_nhash_mask = newsizemask = newsize - 1;
1911 
1912 	/*
1913 	 * Move entries from the old table to the new
1914 	 */
1915 	for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1916 		dep = dcp->dc_namehash[i];
1917 		while (dep != NULL) { /* for each chained entry */
1918 			tep = dep;
1919 			dep = dep->de_next;
1920 			nhp = &newhash[tep->de_hash & newsizemask];
1921 			tep->de_next = *nhp;
1922 			*nhp = tep;
1923 		}
1924 	}
1925 
1926 	/*
1927 	 * delete old hash table and set new one in place
1928 	 */
1929 	kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * oldsize);
1930 	dcp->dc_namehash = newhash;
1931 }
1932 
1933 /*
1934  * Dynamically grow or shrink the size of the free space hash table
1935  */
1936 static void
1937 dnlc_dir_adjust_fhash(dircache_t *dcp)
1938 {
1939 	dcfree_t **newhash, *dfp, **nhp, *tfp;
1940 	uint_t newsize;
1941 	uint_t oldsize;
1942 	int i;
1943 
1944 	/*
1945 	 * Allocate new hash table
1946 	 */
1947 	newsize = dcp->dc_num_free >> dnlc_dir_hash_size_shift;
1948 	newhash = kmem_zalloc(sizeof (dcfree_t *) * newsize, KM_NOSLEEP);
1949 	if (newhash == NULL) {
1950 		/*
1951 		 * System is short on memory just return
1952 		 * Note, the old hash table is still usable.
1953 		 * This return is unlikely to repeatedy occur, because
1954 		 * either some other directory caches will be reclaimed
1955 		 * due to memory shortage, thus freeing memory, or this
1956 		 * directory cahe will be reclaimed.
1957 		 */
1958 		return;
1959 	}
1960 	oldsize = dcp->dc_fhash_mask + 1;
1961 	dcp->dc_fhash_mask = newsize - 1;
1962 
1963 	/*
1964 	 * Move entries from the old table to the new
1965 	 */
1966 	for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1967 		dfp = dcp->dc_freehash[i];
1968 		while (dfp != NULL) { /* for each chained entry */
1969 			tfp = dfp;
1970 			dfp = dfp->df_next;
1971 			nhp = &newhash[DDFHASH(tfp->df_handle, dcp)];
1972 			tfp->df_next = *nhp;
1973 			*nhp = tfp;
1974 		}
1975 	}
1976 
1977 	/*
1978 	 * delete old hash table and set new one in place
1979 	 */
1980 	kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * oldsize);
1981 	dcp->dc_freehash = newhash;
1982 }
1983