xref: /titanic_50/usr/src/uts/common/fs/dnlc.c (revision 0167b58cea98965c58fab4be4e690b6e456f7440)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 
39 #pragma ident	"%Z%%M%	%I%	%E% SMI"
40 
41 #include <sys/types.h>
42 #include <sys/systm.h>
43 #include <sys/param.h>
44 #include <sys/t_lock.h>
45 #include <sys/systm.h>
46 #include <sys/vfs.h>
47 #include <sys/vnode.h>
48 #include <sys/dnlc.h>
49 #include <sys/kmem.h>
50 #include <sys/cmn_err.h>
51 #include <sys/vtrace.h>
52 #include <sys/bitmap.h>
53 #include <sys/var.h>
54 #include <sys/sysmacros.h>
55 #include <sys/kstat.h>
56 #include <sys/atomic.h>
57 #include <sys/taskq.h>
58 
59 /*
60  * Directory name lookup cache.
61  * Based on code originally done by Robert Elz at Melbourne.
62  *
63  * Names found by directory scans are retained in a cache
64  * for future reference.  Each hash chain is ordered by LRU
65  * Cache is indexed by hash value obtained from (vp, name)
66  * where the vp refers to the directory containing the name.
67  */
68 
69 /*
70  * Tunable nc_hashavelen is the average length desired for this chain, from
71  * which the size of the nc_hash table is derived at create time.
72  */
73 #define	NC_HASHAVELEN_DEFAULT	4
74 int nc_hashavelen = NC_HASHAVELEN_DEFAULT;
75 
76 /*
77  * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
78  * depth exceeds this value, we move the looked-up entry to the front of
79  * its hash chain.  The idea is to make sure that the most frequently
80  * accessed entries are found most quickly (by keeping them near the
81  * front of their hash chains).
82  */
83 #define	NC_MOVETOFRONT	2
84 
85 /*
86  *
87  * DNLC_MAX_RELE is used to size an array on the stack when releasing
88  * vnodes. This array is used rather than calling VN_RELE() inline because
89  * all dnlc locks must be dropped by that time in order to avoid a
90  * possible deadlock. This deadlock occurs when the dnlc holds the last
91  * reference to the vnode and so the VOP_INACTIVE vector is called which
92  * can in turn call back into the dnlc. A global array was used but had
93  * many problems:
94  *	1) Actually doesn't have an upper bound on the array size as
95  *	   entries can be added after starting the purge.
96  *	2) The locking scheme causes a hang.
97  *	3) Caused serialisation on the global lock.
98  *	4) The array was often unnecessarily huge.
99  *
100  * Note the current value 8 allows up to 4 cache entries (to be purged
101  * from each hash chain), before having to cycle around and retry.
102  * This ought to be ample given that nc_hashavelen is typically very small.
103  */
104 #define	DNLC_MAX_RELE	8 /* must be even */
105 
106 /*
107  * Hash table of name cache entries for fast lookup, dynamically
108  * allocated at startup.
109  */
110 nc_hash_t *nc_hash;
111 
112 /*
113  * Rotors. Used to select entries on a round-robin basis.
114  */
115 static nc_hash_t *dnlc_purge_fs1_rotor;
116 static nc_hash_t *dnlc_free_rotor;
117 
118 /*
119  * # of dnlc entries (uninitialized)
120  *
121  * the initial value was chosen as being
122  * a random string of bits, probably not
123  * normally chosen by a systems administrator
124  */
125 int ncsize = -1;
126 uint32_t dnlc_nentries = 0;	/* current number of name cache entries */
127 static int nc_hashsz;		/* size of hash table */
128 static int nc_hashmask;		/* size of hash table minus 1 */
129 
130 /*
131  * The dnlc_reduce_cache() taskq queue is activated when there are
132  * ncsize name cache entries and it reduces the size down to
133  * dnlc_nentries_low_water, which is by default one hundreth
134  * less (or 99%) of ncsize.
135  */
136 #define	DNLC_LOW_WATER_DIVISOR_DEFAULT 100
137 uint_t dnlc_low_water_divisor = DNLC_LOW_WATER_DIVISOR_DEFAULT;
138 uint_t dnlc_nentries_low_water;
139 int dnlc_reduce_idle = 1; /* no locking needed */
140 
141 /*
142  * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
143  * then this means the dnlc_reduce_cache() taskq is failing to
144  * keep up. In this case we refuse to add new entries to the dnlc
145  * until the taskq catches up.
146  */
147 uint_t dnlc_max_nentries; /* twice ncsize */
148 uint64_t dnlc_max_nentries_cnt = 0; /* statistic on times we failed */
149 
150 /*
151  * Tunable to define when we should just remove items from
152  * the end of the chain.
153  */
154 #define	DNLC_LONG_CHAIN 8
155 uint_t dnlc_long_chain = DNLC_LONG_CHAIN;
156 
157 /*
158  * ncstats has been deprecated, due to the integer size of the counters
159  * which can easily overflow in the dnlc.
160  * It is maintained (at some expense) for compatability.
161  * The preferred interface is the kstat accessible nc_stats below.
162  */
163 struct ncstats ncstats;
164 
165 struct nc_stats ncs = {
166 	{ "hits",			KSTAT_DATA_UINT64 },
167 	{ "misses",			KSTAT_DATA_UINT64 },
168 	{ "negative_cache_hits",	KSTAT_DATA_UINT64 },
169 	{ "enters",			KSTAT_DATA_UINT64 },
170 	{ "double_enters",		KSTAT_DATA_UINT64 },
171 	{ "purge_total_entries",	KSTAT_DATA_UINT64 },
172 	{ "purge_all",			KSTAT_DATA_UINT64 },
173 	{ "purge_vp",			KSTAT_DATA_UINT64 },
174 	{ "purge_vfs",			KSTAT_DATA_UINT64 },
175 	{ "purge_fs1",			KSTAT_DATA_UINT64 },
176 	{ "pick_free",			KSTAT_DATA_UINT64 },
177 	{ "pick_heuristic",		KSTAT_DATA_UINT64 },
178 	{ "pick_last",			KSTAT_DATA_UINT64 },
179 
180 	/* directory caching stats */
181 
182 	{ "dir_hits",			KSTAT_DATA_UINT64 },
183 	{ "dir_misses",			KSTAT_DATA_UINT64 },
184 	{ "dir_cached_current",		KSTAT_DATA_UINT64 },
185 	{ "dir_entries_cached_current",	KSTAT_DATA_UINT64 },
186 	{ "dir_cached_total",		KSTAT_DATA_UINT64 },
187 	{ "dir_start_no_memory",	KSTAT_DATA_UINT64 },
188 	{ "dir_add_no_memory",		KSTAT_DATA_UINT64 },
189 	{ "dir_add_abort",		KSTAT_DATA_UINT64 },
190 	{ "dir_add_max",		KSTAT_DATA_UINT64 },
191 	{ "dir_remove_entry_fail",	KSTAT_DATA_UINT64 },
192 	{ "dir_remove_space_fail",	KSTAT_DATA_UINT64 },
193 	{ "dir_update_fail",		KSTAT_DATA_UINT64 },
194 	{ "dir_fini_purge",		KSTAT_DATA_UINT64 },
195 	{ "dir_reclaim_last",		KSTAT_DATA_UINT64 },
196 	{ "dir_reclaim_any",		KSTAT_DATA_UINT64 },
197 };
198 
199 static int doingcache = 1;
200 
201 vnode_t negative_cache_vnode;
202 
203 /*
204  * Insert entry at the front of the queue
205  */
206 #define	nc_inshash(ncp, hp) \
207 { \
208 	(ncp)->hash_next = (hp)->hash_next; \
209 	(ncp)->hash_prev = (ncache_t *)(hp); \
210 	(hp)->hash_next->hash_prev = (ncp); \
211 	(hp)->hash_next = (ncp); \
212 }
213 
214 /*
215  * Remove entry from hash queue
216  */
217 #define	nc_rmhash(ncp) \
218 { \
219 	(ncp)->hash_prev->hash_next = (ncp)->hash_next; \
220 	(ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
221 	(ncp)->hash_prev = NULL; \
222 	(ncp)->hash_next = NULL; \
223 }
224 
225 /*
226  * Free an entry.
227  */
228 #define	dnlc_free(ncp) \
229 { \
230 	kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \
231 	atomic_add_32(&dnlc_nentries, -1); \
232 }
233 
234 
235 /*
236  * Cached directory info.
237  * ======================
238  */
239 
240 /*
241  * Cached directory free space hash function.
242  * Needs the free space handle and the dcp to get the hash table size
243  * Returns the hash index.
244  */
245 #define	DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
246 
247 /*
248  * Cached directory name entry hash function.
249  * Uses the name and returns in the input arguments the hash and the name
250  * length.
251  */
252 #define	DNLC_DIR_HASH(name, hash, namelen)			\
253 	{							\
254 		char Xc, *Xcp;					\
255 		hash = *name;					\
256 		for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++)	\
257 			hash = (hash << 4) + hash + Xc;		\
258 		ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1));	\
259 		namelen = Xcp - (name);				\
260 	}
261 
262 /* special dircache_t pointer to indicate error should be returned */
263 /*
264  * The anchor directory cache pointer can contain 3 types of values,
265  * 1) NULL: No directory cache
266  * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
267  *    too big or a memory shortage occurred. This value remains in the
268  *    pointer until a dnlc_dir_start() which returns the a DNOMEM error.
269  *    This is kludgy but efficient and only visible in this source file.
270  * 3) A valid cache pointer.
271  */
272 #define	DC_RET_LOW_MEM (dircache_t *)1
273 #define	VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
274 
275 /* Tunables */
276 uint_t dnlc_dir_enable = 1; /* disable caching directories by setting to 0 */
277 uint_t dnlc_dir_min_size = 40; /* min no of directory entries before caching */
278 uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */
279 uint_t dnlc_dir_hash_size_shift = 3; /* 8 entries per hash bucket */
280 uint_t dnlc_dir_min_reclaim =  350000; /* approx 1MB of dcentrys */
281 /*
282  * dnlc_dir_hash_resize_shift determines when the hash tables
283  * get re-adjusted due to growth or shrinkage
284  * - currently 2 indicating that there can be at most 4
285  * times or at least one quarter the number of entries
286  * before hash table readjustment. Note that with
287  * dnlc_dir_hash_size_shift above set at 3 this would
288  * mean readjustment would occur if the average number
289  * of entries went above 32 or below 2
290  */
291 uint_t dnlc_dir_hash_resize_shift = 2; /* readjust rate */
292 
293 static kmem_cache_t *dnlc_dir_space_cache; /* free space entry cache */
294 static dchead_t dc_head; /* anchor of cached directories */
295 
296 /* Prototypes */
297 static ncache_t *dnlc_get(uchar_t namlen);
298 static ncache_t *dnlc_search(vnode_t *dp, char *name, uchar_t namlen, int hash);
299 static void dnlc_reduce_cache(void *unused);
300 static void dnlc_dir_reclaim(void *unused);
301 static void dnlc_dir_abort(dircache_t *dcp);
302 static void dnlc_dir_adjust_fhash(dircache_t *dcp);
303 static void dnlc_dir_adjust_nhash(dircache_t *dcp);
304 
305 
306 /*
307  * Initialize the directory cache.
308  */
309 void
310 dnlc_init()
311 {
312 	nc_hash_t *hp;
313 	kstat_t *ksp;
314 	int i;
315 
316 	/*
317 	 * Set up the size of the dnlc (ncsize) and its low water mark.
318 	 */
319 	if (ncsize == -1) {
320 		/* calculate a reasonable size for the low water */
321 		dnlc_nentries_low_water = 4 * (v.v_proc + maxusers) + 320;
322 		ncsize = dnlc_nentries_low_water +
323 		    (dnlc_nentries_low_water / dnlc_low_water_divisor);
324 	} else {
325 		/* don't change the user specified ncsize */
326 		dnlc_nentries_low_water =
327 		    ncsize - (ncsize / dnlc_low_water_divisor);
328 	}
329 	if (ncsize <= 0) {
330 		doingcache = 0;
331 		dnlc_dir_enable = 0; /* also disable directory caching */
332 		ncsize = 0;
333 		cmn_err(CE_NOTE, "name cache (dnlc) disabled");
334 		return;
335 	}
336 	dnlc_max_nentries = ncsize * 2;
337 
338 	/*
339 	 * Initialise the hash table.
340 	 * Compute hash size rounding to the next power of two.
341 	 */
342 	nc_hashsz = ncsize / nc_hashavelen;
343 	nc_hashsz = 1 << highbit(nc_hashsz);
344 	nc_hashmask = nc_hashsz - 1;
345 	nc_hash = kmem_zalloc(nc_hashsz * sizeof (*nc_hash), KM_SLEEP);
346 	for (i = 0; i < nc_hashsz; i++) {
347 		hp = (nc_hash_t *)&nc_hash[i];
348 		mutex_init(&hp->hash_lock, NULL, MUTEX_DEFAULT, NULL);
349 		hp->hash_next = (ncache_t *)hp;
350 		hp->hash_prev = (ncache_t *)hp;
351 	}
352 
353 	/*
354 	 * Initialize rotors
355 	 */
356 	dnlc_free_rotor = dnlc_purge_fs1_rotor = &nc_hash[0];
357 
358 	/*
359 	 * Set up the directory caching to use kmem_cache_alloc
360 	 * for its free space entries so that we can get a callback
361 	 * when the system is short on memory, to allow us to free
362 	 * up some memory. we don't use the constructor/deconstructor
363 	 * functions.
364 	 */
365 	dnlc_dir_space_cache = kmem_cache_create("dnlc_space_cache",
366 	    sizeof (dcfree_t), 0, NULL, NULL, dnlc_dir_reclaim, NULL,
367 	    NULL, 0);
368 
369 	/*
370 	 * Initialise the head of the cached directory structures
371 	 */
372 	mutex_init(&dc_head.dch_lock, NULL, MUTEX_DEFAULT, NULL);
373 	dc_head.dch_next = (dircache_t *)&dc_head;
374 	dc_head.dch_prev = (dircache_t *)&dc_head;
375 
376 	/*
377 	 * Initialise the reference count of the negative cache vnode to 1
378 	 * so that it never goes away (VOP_INACTIVE isn't called on it).
379 	 */
380 	negative_cache_vnode.v_count = 1;
381 
382 	/*
383 	 * Initialise kstats - both the old compatability raw kind and
384 	 * the more extensive named stats.
385 	 */
386 	ksp = kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW,
387 		sizeof (struct ncstats), KSTAT_FLAG_VIRTUAL);
388 	if (ksp) {
389 		ksp->ks_data = (void *) &ncstats;
390 		kstat_install(ksp);
391 	}
392 	ksp = kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED,
393 	    sizeof (ncs) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
394 	if (ksp) {
395 		ksp->ks_data = (void *) &ncs;
396 		kstat_install(ksp);
397 	}
398 }
399 
400 /*
401  * Add a name to the directory cache.
402  */
403 void
404 dnlc_enter(vnode_t *dp, char *name, vnode_t *vp)
405 {
406 	ncache_t *ncp;
407 	nc_hash_t *hp;
408 	uchar_t namlen;
409 	int hash;
410 
411 	TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_enter_start:");
412 
413 	if (!doingcache) {
414 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
415 		    "dnlc_enter_end:(%S) %d", "not caching", 0);
416 		return;
417 	}
418 
419 	/*
420 	 * Get a new dnlc entry. Assume the entry won't be in the cache
421 	 * and initialize it now
422 	 */
423 	DNLCHASH(name, dp, hash, namlen);
424 	if ((ncp = dnlc_get(namlen)) == NULL)
425 		return;
426 	ncp->dp = dp;
427 	VN_HOLD(dp);
428 	ncp->vp = vp;
429 	VN_HOLD(vp);
430 	bcopy(name, ncp->name, namlen + 1); /* name and null */
431 	ncp->hash = hash;
432 	hp = &nc_hash[hash & nc_hashmask];
433 
434 	mutex_enter(&hp->hash_lock);
435 	if (dnlc_search(dp, name, namlen, hash) != NULL) {
436 		mutex_exit(&hp->hash_lock);
437 		ncstats.dbl_enters++;
438 		ncs.ncs_dbl_enters.value.ui64++;
439 		VN_RELE(dp);
440 		VN_RELE(vp);
441 		dnlc_free(ncp);		/* crfree done here */
442 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
443 			"dnlc_enter_end:(%S) %d",
444 			"dbl enter", ncstats.dbl_enters);
445 		return;
446 	}
447 	/*
448 	 * Insert back into the hash chain.
449 	 */
450 	nc_inshash(ncp, hp);
451 	mutex_exit(&hp->hash_lock);
452 	ncstats.enters++;
453 	ncs.ncs_enters.value.ui64++;
454 	TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
455 	    "dnlc_enter_end:(%S) %d", "done", ncstats.enters);
456 }
457 
458 /*
459  * Add a name to the directory cache.
460  *
461  * This function is basically identical with
462  * dnlc_enter().  The difference is that when the
463  * desired dnlc entry is found, the vnode in the
464  * ncache is compared with the vnode passed in.
465  *
466  * If they are not equal then the ncache is
467  * updated with the passed in vnode.  Otherwise
468  * it just frees up the newly allocated dnlc entry.
469  */
470 void
471 dnlc_update(vnode_t *dp, char *name, vnode_t *vp)
472 {
473 	ncache_t *ncp;
474 	ncache_t *tcp;
475 	vnode_t *tvp;
476 	nc_hash_t *hp;
477 	int hash;
478 	uchar_t namlen;
479 
480 	TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_update_start:");
481 
482 	if (!doingcache) {
483 		TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
484 		    "dnlc_update_end:(%S) %d", "not caching", 0);
485 		return;
486 	}
487 
488 	/*
489 	 * Get a new dnlc entry and initialize it now.
490 	 * If we fail to get a new entry, call dnlc_remove() to purge
491 	 * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
492 	 * entry.
493 	 * Failure to clear an existing entry could result in false dnlc
494 	 * lookup (negative/stale entry).
495 	 */
496 	DNLCHASH(name, dp, hash, namlen);
497 	if ((ncp = dnlc_get(namlen)) == NULL) {
498 		dnlc_remove(dp, name);
499 		return;
500 	}
501 	ncp->dp = dp;
502 	VN_HOLD(dp);
503 	ncp->vp = vp;
504 	VN_HOLD(vp);
505 	bcopy(name, ncp->name, namlen + 1); /* name and null */
506 	ncp->hash = hash;
507 	hp = &nc_hash[hash & nc_hashmask];
508 
509 	mutex_enter(&hp->hash_lock);
510 	if ((tcp = dnlc_search(dp, name, namlen, hash)) != NULL) {
511 		if (tcp->vp != vp) {
512 			tvp = tcp->vp;
513 			tcp->vp = vp;
514 			mutex_exit(&hp->hash_lock);
515 			VN_RELE(tvp);
516 			ncstats.enters++;
517 			ncs.ncs_enters.value.ui64++;
518 			TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
519 			    "dnlc_update_end:(%S) %d", "done", ncstats.enters);
520 		} else {
521 			mutex_exit(&hp->hash_lock);
522 			VN_RELE(vp);
523 			ncstats.dbl_enters++;
524 			ncs.ncs_dbl_enters.value.ui64++;
525 			TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
526 			    "dnlc_update_end:(%S) %d",
527 			    "dbl enter", ncstats.dbl_enters);
528 		}
529 		VN_RELE(dp);
530 		dnlc_free(ncp);		/* crfree done here */
531 		return;
532 	}
533 	/*
534 	 * insert the new entry, since it is not in dnlc yet
535 	 */
536 	nc_inshash(ncp, hp);
537 	mutex_exit(&hp->hash_lock);
538 	ncstats.enters++;
539 	ncs.ncs_enters.value.ui64++;
540 	TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
541 	    "dnlc_update_end:(%S) %d", "done", ncstats.enters);
542 }
543 
544 /*
545  * Look up a name in the directory name cache.
546  *
547  * Return a doubly-held vnode if found: one hold so that it may
548  * remain in the cache for other users, the other hold so that
549  * the cache is not re-cycled and the identity of the vnode is
550  * lost before the caller can use the vnode.
551  */
552 vnode_t *
553 dnlc_lookup(vnode_t *dp, char *name)
554 {
555 	ncache_t *ncp;
556 	nc_hash_t *hp;
557 	vnode_t *vp;
558 	int hash, depth;
559 	uchar_t namlen;
560 
561 	TRACE_2(TR_FAC_NFS, TR_DNLC_LOOKUP_START,
562 	    "dnlc_lookup_start:dp %x name %s", dp, name);
563 
564 	if (!doingcache) {
565 		TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
566 		    "dnlc_lookup_end:%S %d vp %x name %s",
567 		    "not_caching", 0, NULL, name);
568 		return (NULL);
569 	}
570 
571 	DNLCHASH(name, dp, hash, namlen);
572 	depth = 1;
573 	hp = &nc_hash[hash & nc_hashmask];
574 	mutex_enter(&hp->hash_lock);
575 
576 	for (ncp = hp->hash_next; ncp != (ncache_t *)hp;
577 	    ncp = ncp->hash_next) {
578 		if (ncp->hash == hash &&	/* fast signature check */
579 		    ncp->dp == dp &&
580 		    ncp->namlen == namlen &&
581 		    bcmp(ncp->name, name, namlen) == 0) {
582 			/*
583 			 * Move this entry to the head of its hash chain
584 			 * if it's not already close.
585 			 */
586 			if (depth > NC_MOVETOFRONT) {
587 				ncache_t *next = ncp->hash_next;
588 				ncache_t *prev = ncp->hash_prev;
589 
590 				prev->hash_next = next;
591 				next->hash_prev = prev;
592 				ncp->hash_next = next = hp->hash_next;
593 				ncp->hash_prev = (ncache_t *)hp;
594 				next->hash_prev = ncp;
595 				hp->hash_next = ncp;
596 
597 				ncstats.move_to_front++;
598 			}
599 
600 			/*
601 			 * Put a hold on the vnode now so its identity
602 			 * can't change before the caller has a chance to
603 			 * put a hold on it.
604 			 */
605 			vp = ncp->vp;
606 			VN_HOLD(vp);
607 			mutex_exit(&hp->hash_lock);
608 			ncstats.hits++;
609 			ncs.ncs_hits.value.ui64++;
610 			if (vp == DNLC_NO_VNODE) {
611 				ncs.ncs_neg_hits.value.ui64++;
612 			}
613 			TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
614 				"dnlc_lookup_end:%S %d vp %x name %s",
615 				"hit", ncstats.hits, vp, name);
616 			return (vp);
617 		}
618 		depth++;
619 	}
620 
621 	mutex_exit(&hp->hash_lock);
622 	ncstats.misses++;
623 	ncs.ncs_misses.value.ui64++;
624 	TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
625 		"dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats.misses,
626 	    NULL, name);
627 	return (NULL);
628 }
629 
630 /*
631  * Remove an entry in the directory name cache.
632  */
633 void
634 dnlc_remove(vnode_t *dp, char *name)
635 {
636 	ncache_t *ncp;
637 	nc_hash_t *hp;
638 	uchar_t namlen;
639 	int hash;
640 
641 	if (!doingcache)
642 		return;
643 	DNLCHASH(name, dp, hash, namlen);
644 	hp = &nc_hash[hash & nc_hashmask];
645 
646 	mutex_enter(&hp->hash_lock);
647 	if (ncp = dnlc_search(dp, name, namlen, hash)) {
648 		/*
649 		 * Free up the entry
650 		 */
651 		nc_rmhash(ncp);
652 		mutex_exit(&hp->hash_lock);
653 		VN_RELE(ncp->vp);
654 		VN_RELE(ncp->dp);
655 		dnlc_free(ncp);
656 		return;
657 	}
658 	mutex_exit(&hp->hash_lock);
659 }
660 
661 /*
662  * Purge the entire cache.
663  */
664 void
665 dnlc_purge()
666 {
667 	nc_hash_t *nch;
668 	ncache_t *ncp;
669 	int index;
670 	int i;
671 	vnode_t *nc_rele[DNLC_MAX_RELE];
672 
673 	if (!doingcache)
674 		return;
675 
676 	ncstats.purges++;
677 	ncs.ncs_purge_all.value.ui64++;
678 
679 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
680 		index = 0;
681 		mutex_enter(&nch->hash_lock);
682 		ncp = nch->hash_next;
683 		while (ncp != (ncache_t *)nch) {
684 			ncache_t *np;
685 
686 			np = ncp->hash_next;
687 			nc_rele[index++] = ncp->vp;
688 			nc_rele[index++] = ncp->dp;
689 
690 			nc_rmhash(ncp);
691 			dnlc_free(ncp);
692 			ncp = np;
693 			ncs.ncs_purge_total.value.ui64++;
694 			if (index == DNLC_MAX_RELE)
695 				break;
696 		}
697 		mutex_exit(&nch->hash_lock);
698 
699 		/* Release holds on all the vnodes now that we have no locks */
700 		for (i = 0; i < index; i++) {
701 			VN_RELE(nc_rele[i]);
702 		}
703 		if (ncp != (ncache_t *)nch) {
704 			nch--; /* Do current hash chain again */
705 		}
706 	}
707 }
708 
709 /*
710  * Purge any cache entries referencing a vnode.
711  * Exit as soon as the vnode reference count goes to 1, as the caller
712  * must hold a reference, and the dnlc can therefore have no more.
713  */
714 void
715 dnlc_purge_vp(vnode_t *vp)
716 {
717 	nc_hash_t *nch;
718 	ncache_t *ncp;
719 	int index;
720 	vnode_t *nc_rele[DNLC_MAX_RELE];
721 
722 	ASSERT(vp->v_count > 0);
723 	if (vp->v_count == 1) {
724 		return;
725 	}
726 
727 	if (!doingcache)
728 		return;
729 
730 	ncstats.purges++;
731 	ncs.ncs_purge_vp.value.ui64++;
732 
733 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
734 		index = 0;
735 		mutex_enter(&nch->hash_lock);
736 		ncp = nch->hash_next;
737 		while (ncp != (ncache_t *)nch) {
738 			ncache_t *np;
739 
740 			np = ncp->hash_next;
741 			if (ncp->dp == vp || ncp->vp == vp) {
742 				nc_rele[index++] = ncp->vp;
743 				nc_rele[index++] = ncp->dp;
744 				nc_rmhash(ncp);
745 				dnlc_free(ncp);
746 				ncs.ncs_purge_total.value.ui64++;
747 				if (index == DNLC_MAX_RELE) {
748 					ncp = np;
749 					break;
750 				}
751 			}
752 			ncp = np;
753 		}
754 		mutex_exit(&nch->hash_lock);
755 
756 		/* Release holds on all the vnodes now that we have no locks */
757 		while (index) {
758 			VN_RELE(nc_rele[--index]);
759 		}
760 
761 		if (vp->v_count == 1) {
762 			return; /* no more dnlc references */
763 		}
764 
765 		if (ncp != (ncache_t *)nch) {
766 			nch--; /* Do current hash chain again */
767 		}
768 	}
769 }
770 
771 /*
772  * Purge cache entries referencing a vfsp.  Caller supplies a count
773  * of entries to purge; up to that many will be freed.  A count of
774  * zero indicates that all such entries should be purged.  Returns
775  * the number of entries that were purged.
776  */
777 int
778 dnlc_purge_vfsp(vfs_t *vfsp, int count)
779 {
780 	nc_hash_t *nch;
781 	ncache_t *ncp;
782 	int n = 0;
783 	int index;
784 	int i;
785 	vnode_t *nc_rele[DNLC_MAX_RELE];
786 
787 	if (!doingcache)
788 		return (0);
789 
790 	ncstats.purges++;
791 	ncs.ncs_purge_vfs.value.ui64++;
792 
793 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
794 		index = 0;
795 		mutex_enter(&nch->hash_lock);
796 		ncp = nch->hash_next;
797 		while (ncp != (ncache_t *)nch) {
798 			ncache_t *np;
799 
800 			np = ncp->hash_next;
801 			ASSERT(ncp->dp != NULL);
802 			ASSERT(ncp->vp != NULL);
803 			if ((ncp->dp->v_vfsp == vfsp) ||
804 			    (ncp->vp->v_vfsp == vfsp)) {
805 				n++;
806 				nc_rele[index++] = ncp->vp;
807 				nc_rele[index++] = ncp->dp;
808 				nc_rmhash(ncp);
809 				dnlc_free(ncp);
810 				ncs.ncs_purge_total.value.ui64++;
811 				if (index == DNLC_MAX_RELE) {
812 					ncp = np;
813 					break;
814 				}
815 				if (count != 0 && n >= count) {
816 					break;
817 				}
818 			}
819 			ncp = np;
820 		}
821 		mutex_exit(&nch->hash_lock);
822 		/* Release holds on all the vnodes now that we have no locks */
823 		for (i = 0; i < index; i++) {
824 			VN_RELE(nc_rele[i]);
825 		}
826 		if (count != 0 && n >= count) {
827 			return (n);
828 		}
829 		if (ncp != (ncache_t *)nch) {
830 			nch--; /* Do current hash chain again */
831 		}
832 	}
833 	return (n);
834 }
835 
836 /*
837  * Purge 1 entry from the dnlc that is part of the filesystem(s)
838  * represented by 'vop'. The purpose of this routine is to allow
839  * users of the dnlc to free a vnode that is being held by the dnlc.
840  *
841  * If we find a vnode that we release which will result in
842  * freeing the underlying vnode (count was 1), return 1, 0
843  * if no appropriate vnodes found.
844  *
845  * Note, vop is not the 'right' identifier for a filesystem.
846  */
847 int
848 dnlc_fs_purge1(vnodeops_t *vop)
849 {
850 	nc_hash_t *end;
851 	nc_hash_t *hp;
852 	ncache_t *ncp;
853 	vnode_t *vp;
854 
855 	if (!doingcache)
856 		return (0);
857 
858 	ncs.ncs_purge_fs1.value.ui64++;
859 
860 	/*
861 	 * Scan the dnlc entries looking for a likely candidate.
862 	 */
863 	hp = end = dnlc_purge_fs1_rotor;
864 
865 	do {
866 		if (++hp == &nc_hash[nc_hashsz])
867 			hp = nc_hash;
868 		dnlc_purge_fs1_rotor = hp;
869 		if (hp->hash_next == (ncache_t *)hp)
870 			continue;
871 		mutex_enter(&hp->hash_lock);
872 		for (ncp = hp->hash_prev;
873 		    ncp != (ncache_t *)hp;
874 		    ncp = ncp->hash_prev) {
875 			vp = ncp->vp;
876 			if (!vn_has_cached_data(vp) && (vp->v_count == 1) &&
877 			    vn_matchops(vp, vop))
878 				break;
879 		}
880 		if (ncp != (ncache_t *)hp) {
881 			nc_rmhash(ncp);
882 			mutex_exit(&hp->hash_lock);
883 			VN_RELE(ncp->dp);
884 			VN_RELE(vp)
885 			dnlc_free(ncp);
886 			ncs.ncs_purge_total.value.ui64++;
887 			return (1);
888 		}
889 		mutex_exit(&hp->hash_lock);
890 	} while (hp != end);
891 	return (0);
892 }
893 
894 /*
895  * Perform a reverse lookup in the DNLC.  This will find the first occurrence of
896  * the vnode.  If successful, it will return the vnode of the parent, and the
897  * name of the entry in the given buffer.  If it cannot be found, or the buffer
898  * is too small, then it will return NULL.  Note that this is a highly
899  * inefficient function, since the DNLC is constructed solely for forward
900  * lookups.
901  */
902 vnode_t *
903 dnlc_reverse_lookup(vnode_t *vp, char *buf, size_t buflen)
904 {
905 	nc_hash_t *nch;
906 	ncache_t *ncp;
907 	vnode_t *pvp;
908 
909 	if (!doingcache)
910 		return (NULL);
911 
912 	for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
913 		mutex_enter(&nch->hash_lock);
914 		ncp = nch->hash_next;
915 		while (ncp != (ncache_t *)nch) {
916 			/*
917 			 * We ignore '..' entries since it can create
918 			 * confusion and infinite loops.
919 			 */
920 			if (ncp->vp == vp && !(ncp->namlen == 2 &&
921 			    0 == bcmp(ncp->name, "..", 2)) &&
922 			    ncp->namlen < buflen) {
923 				bcopy(ncp->name, buf, ncp->namlen);
924 				buf[ncp->namlen] = '\0';
925 				pvp = ncp->dp;
926 				VN_HOLD(pvp);
927 				mutex_exit(&nch->hash_lock);
928 				return (pvp);
929 			}
930 			ncp = ncp->hash_next;
931 		}
932 		mutex_exit(&nch->hash_lock);
933 	}
934 
935 	return (NULL);
936 }
937 /*
938  * Utility routine to search for a cache entry. Return the
939  * ncache entry if found, NULL otherwise.
940  */
941 static ncache_t *
942 dnlc_search(vnode_t *dp, char *name, uchar_t namlen, int hash)
943 {
944 	nc_hash_t *hp;
945 	ncache_t *ncp;
946 
947 	hp = &nc_hash[hash & nc_hashmask];
948 
949 	for (ncp = hp->hash_next; ncp != (ncache_t *)hp; ncp = ncp->hash_next) {
950 		if (ncp->hash == hash &&
951 		    ncp->dp == dp &&
952 		    ncp->namlen == namlen &&
953 		    bcmp(ncp->name, name, namlen) == 0)
954 			return (ncp);
955 	}
956 	return (NULL);
957 }
958 
959 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
960 #error ncache_t name length representation is too small
961 #endif
962 
963 /*
964  * Get a new name cache entry.
965  * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
966  * is short then just return NULL. If we're over ncsize then kick off a
967  * thread to free some in use entries down to dnlc_nentries_low_water.
968  * Caller must initialise all fields except namlen.
969  * Component names are defined to be less than MAXNAMELEN
970  * which includes a null.
971  */
972 static ncache_t *
973 dnlc_get(uchar_t namlen)
974 {
975 	ncache_t *ncp;
976 
977 	if (dnlc_nentries > dnlc_max_nentries) {
978 		dnlc_max_nentries_cnt++; /* keep a statistic */
979 		return (NULL);
980 	}
981 	ncp = kmem_alloc(sizeof (ncache_t) + namlen, KM_NOSLEEP);
982 	if (ncp == NULL) {
983 		return (NULL);
984 	}
985 	ncp->namlen = namlen;
986 	atomic_add_32(&dnlc_nentries, 1);
987 	if (dnlc_reduce_idle && (dnlc_nentries >= ncsize)) {
988 		dnlc_reduce_idle = 0;
989 		(void) taskq_dispatch(system_taskq, dnlc_reduce_cache,
990 		    NULL, TQ_SLEEP);
991 	}
992 	return (ncp);
993 }
994 
995 /*
996  * Taskq routine to free up name cache entries to reduce the
997  * cache size to the low water mark.
998  */
999 /*ARGSUSED*/
1000 static void
1001 dnlc_reduce_cache(void *unused)
1002 {
1003 	nc_hash_t *hp = dnlc_free_rotor;
1004 	vnode_t *vp;
1005 	ncache_t *ncp;
1006 	int cnt;
1007 
1008 	do {
1009 		/*
1010 		 * Find the first non empty hash queue without locking
1011 		 * Recheck we really have entries to avoid
1012 		 * an infinite loop if all the entries get purged.
1013 		 */
1014 		do {
1015 			if (++hp == &nc_hash[nc_hashsz]) {
1016 				hp = nc_hash;
1017 				if (dnlc_nentries < dnlc_nentries_low_water) {
1018 					dnlc_reduce_idle = 1;
1019 					return;
1020 				}
1021 			}
1022 		} while (hp->hash_next == (ncache_t *)hp);
1023 
1024 		mutex_enter(&hp->hash_lock);
1025 		for (cnt = 0, ncp = hp->hash_prev; ncp != (ncache_t *)hp;
1026 		    ncp = ncp->hash_prev, cnt++) {
1027 			vp = ncp->vp;
1028 			/*
1029 			 * A name cache entry with a reference count
1030 			 * of one is only referenced by the dnlc.
1031 			 * Also negative cache entries are purged first.
1032 			 */
1033 			if (!vn_has_cached_data(vp) &&
1034 			    ((vp->v_count == 1) || (vp == DNLC_NO_VNODE))) {
1035 				ncs.ncs_pick_heur.value.ui64++;
1036 				goto found;
1037 			}
1038 			/*
1039 			 * Remove from the end of the chain if the
1040 			 * chain is too long
1041 			 */
1042 			if (cnt > dnlc_long_chain) {
1043 				ncp = hp->hash_prev;
1044 				ncs.ncs_pick_last.value.ui64++;
1045 				vp = ncp->vp;
1046 				goto found;
1047 			}
1048 		}
1049 		/* check for race and continue */
1050 		if (hp->hash_next == (ncache_t *)hp) {
1051 			mutex_exit(&hp->hash_lock);
1052 			continue;
1053 		}
1054 
1055 		ncp = hp->hash_prev; /* pick the last one in the hash queue */
1056 		ncs.ncs_pick_last.value.ui64++;
1057 		vp = ncp->vp;
1058 found:
1059 		/*
1060 		 * Remove from hash chain.
1061 		 */
1062 		nc_rmhash(ncp);
1063 		mutex_exit(&hp->hash_lock);
1064 		VN_RELE(vp);
1065 		VN_RELE(ncp->dp);
1066 		dnlc_free(ncp);
1067 	} while (dnlc_nentries > dnlc_nentries_low_water);
1068 
1069 	dnlc_free_rotor = hp;
1070 	dnlc_reduce_idle = 1;
1071 }
1072 
1073 /*
1074  * Directory caching routines
1075  * ==========================
1076  *
1077  * See dnlc.h for details of the interfaces below.
1078  */
1079 
1080 /*
1081  * Lookup up an entry in a complete or partial directory cache.
1082  */
1083 dcret_t
1084 dnlc_dir_lookup(dcanchor_t *dcap, char *name, uint64_t *handle)
1085 {
1086 	dircache_t *dcp;
1087 	dcentry_t *dep;
1088 	int hash;
1089 	int ret;
1090 	uchar_t namlen;
1091 
1092 	/*
1093 	 * can test without lock as we are only a cache
1094 	 */
1095 	if (!VALID_DIR_CACHE(dcap->dca_dircache)) {
1096 		ncs.ncs_dir_misses.value.ui64++;
1097 		return (DNOCACHE);
1098 	}
1099 
1100 	if (!dnlc_dir_enable) {
1101 		return (DNOCACHE);
1102 	}
1103 
1104 	mutex_enter(&dcap->dca_lock);
1105 	dcp = (dircache_t *)dcap->dca_dircache;
1106 	if (VALID_DIR_CACHE(dcp)) {
1107 		dcp->dc_actime = lbolt64;
1108 		DNLC_DIR_HASH(name, hash, namlen);
1109 		dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1110 		while (dep != NULL) {
1111 			if ((dep->de_hash == hash) &&
1112 			    (namlen == dep->de_namelen) &&
1113 			    bcmp(dep->de_name, name, namlen) == 0) {
1114 				*handle = dep->de_handle;
1115 				mutex_exit(&dcap->dca_lock);
1116 				ncs.ncs_dir_hits.value.ui64++;
1117 				return (DFOUND);
1118 			}
1119 			dep = dep->de_next;
1120 		}
1121 		if (dcp->dc_complete) {
1122 			ret = DNOENT;
1123 		} else {
1124 			ret = DNOCACHE;
1125 		}
1126 		mutex_exit(&dcap->dca_lock);
1127 		return (ret);
1128 	} else {
1129 		mutex_exit(&dcap->dca_lock);
1130 		ncs.ncs_dir_misses.value.ui64++;
1131 		return (DNOCACHE);
1132 	}
1133 }
1134 
1135 /*
1136  * Start a new directory cache. An estimate of the number of
1137  * entries is provided to as a quick check to ensure the directory
1138  * is cacheable.
1139  */
1140 dcret_t
1141 dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries)
1142 {
1143 	dircache_t *dcp;
1144 
1145 	if (!dnlc_dir_enable ||
1146 	    (num_entries < dnlc_dir_min_size)) {
1147 		return (DNOCACHE);
1148 	}
1149 
1150 	if (num_entries > dnlc_dir_max_size) {
1151 		return (DTOOBIG);
1152 	}
1153 
1154 	mutex_enter(&dc_head.dch_lock);
1155 	mutex_enter(&dcap->dca_lock);
1156 
1157 	if (dcap->dca_dircache == DC_RET_LOW_MEM) {
1158 		dcap->dca_dircache = NULL;
1159 		mutex_exit(&dcap->dca_lock);
1160 		mutex_exit(&dc_head.dch_lock);
1161 		return (DNOMEM);
1162 	}
1163 
1164 	/*
1165 	 * Check if there's currently a cache.
1166 	 * This probably only occurs on a race.
1167 	 */
1168 	if (dcap->dca_dircache != NULL) {
1169 		mutex_exit(&dcap->dca_lock);
1170 		mutex_exit(&dc_head.dch_lock);
1171 		return (DNOCACHE);
1172 	}
1173 
1174 	/*
1175 	 * Allocate the dircache struct, entry and free space hash tables.
1176 	 * These tables are initially just one entry but dynamically resize
1177 	 * when entries and free space are added or removed.
1178 	 */
1179 	if ((dcp = kmem_zalloc(sizeof (dircache_t), KM_NOSLEEP)) == NULL) {
1180 		goto error;
1181 	}
1182 	if ((dcp->dc_namehash = kmem_zalloc(sizeof (dcentry_t *),
1183 	    KM_NOSLEEP)) == NULL) {
1184 		goto error;
1185 	}
1186 	if ((dcp->dc_freehash = kmem_zalloc(sizeof (dcfree_t *),
1187 	    KM_NOSLEEP)) == NULL) {
1188 		goto error;
1189 	}
1190 
1191 	dcp->dc_anchor = dcap; /* set back pointer to anchor */
1192 	dcap->dca_dircache = dcp;
1193 
1194 	/* add into head of global chain */
1195 	dcp->dc_next = dc_head.dch_next;
1196 	dcp->dc_prev = (dircache_t *)&dc_head;
1197 	dcp->dc_next->dc_prev = dcp;
1198 	dc_head.dch_next = dcp;
1199 
1200 	mutex_exit(&dcap->dca_lock);
1201 	mutex_exit(&dc_head.dch_lock);
1202 	ncs.ncs_cur_dirs.value.ui64++;
1203 	ncs.ncs_dirs_cached.value.ui64++;
1204 	return (DOK);
1205 error:
1206 	if (dcp != NULL) {
1207 		if (dcp->dc_namehash) {
1208 			kmem_free(dcp->dc_namehash, sizeof (dcentry_t *));
1209 		}
1210 		kmem_free(dcp, sizeof (dircache_t));
1211 	}
1212 	/*
1213 	 * Must also kmem_free dcp->dc_freehash if more error cases are added
1214 	 */
1215 	mutex_exit(&dcap->dca_lock);
1216 	mutex_exit(&dc_head.dch_lock);
1217 	ncs.ncs_dir_start_nm.value.ui64++;
1218 	return (DNOCACHE);
1219 }
1220 
1221 /*
1222  * Add a directopry entry to a partial or complete directory cache.
1223  */
1224 dcret_t
1225 dnlc_dir_add_entry(dcanchor_t *dcap, char *name, uint64_t handle)
1226 {
1227 	dircache_t *dcp;
1228 	dcentry_t **hp, *dep;
1229 	int hash;
1230 	uint_t capacity;
1231 	uchar_t namlen;
1232 
1233 	/*
1234 	 * Allocate the dcentry struct, including the variable
1235 	 * size name. Note, the null terminator is not copied.
1236 	 *
1237 	 * We do this outside the lock to avoid possible deadlock if
1238 	 * dnlc_dir_reclaim() is called as a result of memory shortage.
1239 	 */
1240 	DNLC_DIR_HASH(name, hash, namlen);
1241 	dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1242 	if (dep == NULL) {
1243 #ifdef DEBUG
1244 		/*
1245 		 * The kmem allocator generates random failures for
1246 		 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1247 		 * So try again before we blow away a perfectly good cache.
1248 		 * This is done not to cover an error but purely for
1249 		 * performance running a debug kernel.
1250 		 * This random error only occurs in debug mode.
1251 		 */
1252 		dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1253 		if (dep != NULL)
1254 			goto ok;
1255 #endif
1256 		ncs.ncs_dir_add_nm.value.ui64++;
1257 		/*
1258 		 * Free a directory cache. This may be the one we are
1259 		 * called with.
1260 		 */
1261 		dnlc_dir_reclaim(NULL);
1262 		dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1263 		if (dep == NULL) {
1264 			/*
1265 			 * still no memory, better delete this cache
1266 			 */
1267 			mutex_enter(&dcap->dca_lock);
1268 			dcp = (dircache_t *)dcap->dca_dircache;
1269 			if (VALID_DIR_CACHE(dcp)) {
1270 				dnlc_dir_abort(dcp);
1271 				dcap->dca_dircache = DC_RET_LOW_MEM;
1272 			}
1273 			mutex_exit(&dcap->dca_lock);
1274 			ncs.ncs_dir_addabort.value.ui64++;
1275 			return (DNOCACHE);
1276 		}
1277 		/*
1278 		 * fall through as if the 1st kmem_alloc had worked
1279 		 */
1280 	}
1281 #ifdef DEBUG
1282 ok:
1283 #endif
1284 	mutex_enter(&dcap->dca_lock);
1285 	dcp = (dircache_t *)dcap->dca_dircache;
1286 	if (VALID_DIR_CACHE(dcp)) {
1287 		/*
1288 		 * If the total number of entries goes above the max
1289 		 * then free this cache
1290 		 */
1291 		if ((dcp->dc_num_entries + dcp->dc_num_free) >
1292 			dnlc_dir_max_size) {
1293 			mutex_exit(&dcap->dca_lock);
1294 			dnlc_dir_purge(dcap);
1295 			kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1296 			ncs.ncs_dir_add_max.value.ui64++;
1297 			return (DTOOBIG);
1298 		}
1299 		dcp->dc_num_entries++;
1300 		capacity = (dcp->dc_nhash_mask + 1) << dnlc_dir_hash_size_shift;
1301 		if (dcp->dc_num_entries >=
1302 		    (capacity << dnlc_dir_hash_resize_shift)) {
1303 			dnlc_dir_adjust_nhash(dcp);
1304 		}
1305 		hp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1306 
1307 		/*
1308 		 * Initialise and chain in new entry
1309 		 */
1310 		dep->de_handle = handle;
1311 		dep->de_hash = hash;
1312 		/*
1313 		 * Note de_namelen is a uchar_t to conserve space
1314 		 * and alignment padding. The max length of any
1315 		 * pathname component is defined as MAXNAMELEN
1316 		 * which is 256 (including the terminating null).
1317 		 * So provided this doesn't change, we don't include the null,
1318 		 * we always use bcmp to compare strings, and we don't
1319 		 * start storing full names, then we are ok.
1320 		 * The space savings is worth it.
1321 		 */
1322 		dep->de_namelen = namlen;
1323 		bcopy(name, dep->de_name, namlen);
1324 		dep->de_next = *hp;
1325 		*hp = dep;
1326 		dcp->dc_actime = lbolt64;
1327 		mutex_exit(&dcap->dca_lock);
1328 		ncs.ncs_dir_num_ents.value.ui64++;
1329 		return (DOK);
1330 	} else {
1331 		mutex_exit(&dcap->dca_lock);
1332 		kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1333 		return (DNOCACHE);
1334 	}
1335 }
1336 
1337 /*
1338  * Add free space to a partial or complete directory cache.
1339  */
1340 dcret_t
1341 dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle)
1342 {
1343 	dircache_t *dcp;
1344 	dcfree_t *dfp, **hp;
1345 	uint_t capacity;
1346 
1347 	/*
1348 	 * We kmem_alloc outside the lock to avoid possible deadlock if
1349 	 * dnlc_dir_reclaim() is called as a result of memory shortage.
1350 	 */
1351 	dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1352 	if (dfp == NULL) {
1353 #ifdef DEBUG
1354 		/*
1355 		 * The kmem allocator generates random failures for
1356 		 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1357 		 * So try again before we blow away a perfectly good cache.
1358 		 * This random error only occurs in debug mode
1359 		 */
1360 		dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1361 		if (dfp != NULL)
1362 			goto ok;
1363 #endif
1364 		ncs.ncs_dir_add_nm.value.ui64++;
1365 		/*
1366 		 * Free a directory cache. This may be the one we are
1367 		 * called with.
1368 		 */
1369 		dnlc_dir_reclaim(NULL);
1370 		dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1371 		if (dfp == NULL) {
1372 			/*
1373 			 * still no memory, better delete this cache
1374 			 */
1375 			mutex_enter(&dcap->dca_lock);
1376 			dcp = (dircache_t *)dcap->dca_dircache;
1377 			if (VALID_DIR_CACHE(dcp)) {
1378 				dnlc_dir_abort(dcp);
1379 				dcap->dca_dircache = DC_RET_LOW_MEM;
1380 			}
1381 			mutex_exit(&dcap->dca_lock);
1382 			ncs.ncs_dir_addabort.value.ui64++;
1383 			return (DNOCACHE);
1384 		}
1385 		/*
1386 		 * fall through as if the 1st kmem_alloc had worked
1387 		 */
1388 	}
1389 
1390 #ifdef DEBUG
1391 ok:
1392 #endif
1393 	mutex_enter(&dcap->dca_lock);
1394 	dcp = (dircache_t *)dcap->dca_dircache;
1395 	if (VALID_DIR_CACHE(dcp)) {
1396 		if ((dcp->dc_num_entries + dcp->dc_num_free) >
1397 			dnlc_dir_max_size) {
1398 			mutex_exit(&dcap->dca_lock);
1399 			dnlc_dir_purge(dcap);
1400 			kmem_cache_free(dnlc_dir_space_cache, dfp);
1401 			ncs.ncs_dir_add_max.value.ui64++;
1402 			return (DTOOBIG);
1403 		}
1404 		dcp->dc_num_free++;
1405 		capacity = (dcp->dc_fhash_mask + 1) << dnlc_dir_hash_size_shift;
1406 		if (dcp->dc_num_free >=
1407 		    (capacity << dnlc_dir_hash_resize_shift)) {
1408 			dnlc_dir_adjust_fhash(dcp);
1409 		}
1410 		/*
1411 		 * Initialise and chain a new entry
1412 		 */
1413 		dfp->df_handle = handle;
1414 		dfp->df_len = len;
1415 		dcp->dc_actime = lbolt64;
1416 		hp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1417 		dfp->df_next = *hp;
1418 		*hp = dfp;
1419 		mutex_exit(&dcap->dca_lock);
1420 		ncs.ncs_dir_num_ents.value.ui64++;
1421 		return (DOK);
1422 	} else {
1423 		mutex_exit(&dcap->dca_lock);
1424 		kmem_cache_free(dnlc_dir_space_cache, dfp);
1425 		return (DNOCACHE);
1426 	}
1427 }
1428 
1429 /*
1430  * Mark a directory cache as complete.
1431  */
1432 void
1433 dnlc_dir_complete(dcanchor_t *dcap)
1434 {
1435 	dircache_t *dcp;
1436 
1437 	mutex_enter(&dcap->dca_lock);
1438 	dcp = (dircache_t *)dcap->dca_dircache;
1439 	if (VALID_DIR_CACHE(dcp)) {
1440 		dcp->dc_complete = B_TRUE;
1441 	}
1442 	mutex_exit(&dcap->dca_lock);
1443 }
1444 
1445 /*
1446  * Internal routine to delete a partial or full directory cache.
1447  * No additional locking needed.
1448  */
1449 static void
1450 dnlc_dir_abort(dircache_t *dcp)
1451 {
1452 	dcentry_t *dep, *nhp;
1453 	dcfree_t *fep, *fhp;
1454 	uint_t nhtsize = dcp->dc_nhash_mask + 1; /* name hash table size */
1455 	uint_t fhtsize = dcp->dc_fhash_mask + 1; /* free hash table size */
1456 	uint_t i;
1457 
1458 	/*
1459 	 * Free up the cached name entries and hash table
1460 	 */
1461 	for (i = 0; i < nhtsize; i++) { /* for each hash bucket */
1462 		nhp = dcp->dc_namehash[i];
1463 		while (nhp != NULL) { /* for each chained entry */
1464 			dep = nhp->de_next;
1465 			kmem_free(nhp, sizeof (dcentry_t) - 1 +
1466 			    nhp->de_namelen);
1467 			nhp = dep;
1468 		}
1469 	}
1470 	kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * nhtsize);
1471 
1472 	/*
1473 	 * Free up the free space entries and hash table
1474 	 */
1475 	for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1476 		fhp = dcp->dc_freehash[i];
1477 		while (fhp != NULL) { /* for each chained entry */
1478 			fep = fhp->df_next;
1479 			kmem_cache_free(dnlc_dir_space_cache, fhp);
1480 			fhp = fep;
1481 		}
1482 	}
1483 	kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * fhtsize);
1484 
1485 	/*
1486 	 * Finally free the directory cache structure itself
1487 	 */
1488 	ncs.ncs_dir_num_ents.value.ui64 -= (dcp->dc_num_entries +
1489 	    dcp->dc_num_free);
1490 	kmem_free(dcp, sizeof (dircache_t));
1491 	ncs.ncs_cur_dirs.value.ui64--;
1492 }
1493 
1494 /*
1495  * Remove a partial or complete directory cache
1496  */
1497 void
1498 dnlc_dir_purge(dcanchor_t *dcap)
1499 {
1500 	dircache_t *dcp;
1501 
1502 	mutex_enter(&dc_head.dch_lock);
1503 	mutex_enter(&dcap->dca_lock);
1504 	dcp = (dircache_t *)dcap->dca_dircache;
1505 	if (!VALID_DIR_CACHE(dcp)) {
1506 		mutex_exit(&dcap->dca_lock);
1507 		mutex_exit(&dc_head.dch_lock);
1508 		return;
1509 	}
1510 	dcap->dca_dircache = NULL;
1511 	/*
1512 	 * Unchain from global list
1513 	 */
1514 	dcp->dc_prev->dc_next = dcp->dc_next;
1515 	dcp->dc_next->dc_prev = dcp->dc_prev;
1516 	mutex_exit(&dcap->dca_lock);
1517 	mutex_exit(&dc_head.dch_lock);
1518 	dnlc_dir_abort(dcp);
1519 }
1520 
1521 /*
1522  * Remove an entry from a complete or partial directory cache.
1523  * Return the handle if it's non null.
1524  */
1525 dcret_t
1526 dnlc_dir_rem_entry(dcanchor_t *dcap, char *name, uint64_t *handlep)
1527 {
1528 	dircache_t *dcp;
1529 	dcentry_t **prevpp, *te;
1530 	uint_t capacity;
1531 	int hash;
1532 	int ret;
1533 	uchar_t namlen;
1534 
1535 	if (!dnlc_dir_enable) {
1536 		return (DNOCACHE);
1537 	}
1538 
1539 	mutex_enter(&dcap->dca_lock);
1540 	dcp = (dircache_t *)dcap->dca_dircache;
1541 	if (VALID_DIR_CACHE(dcp)) {
1542 		dcp->dc_actime = lbolt64;
1543 		if (dcp->dc_nhash_mask > 0) { /* ie not minimum */
1544 			capacity = (dcp->dc_nhash_mask + 1) <<
1545 			    dnlc_dir_hash_size_shift;
1546 			if (dcp->dc_num_entries <=
1547 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1548 				dnlc_dir_adjust_nhash(dcp);
1549 			}
1550 		}
1551 		DNLC_DIR_HASH(name, hash, namlen);
1552 		prevpp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1553 		while (*prevpp != NULL) {
1554 			if (((*prevpp)->de_hash == hash) &&
1555 			    (namlen == (*prevpp)->de_namelen) &&
1556 			    bcmp((*prevpp)->de_name, name, namlen) == 0) {
1557 				if (handlep != NULL) {
1558 					*handlep = (*prevpp)->de_handle;
1559 				}
1560 				te = *prevpp;
1561 				*prevpp = (*prevpp)->de_next;
1562 				kmem_free(te, sizeof (dcentry_t) - 1 +
1563 				    te->de_namelen);
1564 
1565 				/*
1566 				 * If the total number of entries
1567 				 * falls below half the minimum number
1568 				 * of entries then free this cache.
1569 				 */
1570 				if (--dcp->dc_num_entries <
1571 				    (dnlc_dir_min_size >> 1)) {
1572 					mutex_exit(&dcap->dca_lock);
1573 					dnlc_dir_purge(dcap);
1574 				} else {
1575 					mutex_exit(&dcap->dca_lock);
1576 				}
1577 				ncs.ncs_dir_num_ents.value.ui64--;
1578 				return (DFOUND);
1579 			}
1580 			prevpp = &((*prevpp)->de_next);
1581 		}
1582 		if (dcp->dc_complete) {
1583 			ncs.ncs_dir_reme_fai.value.ui64++;
1584 			ret = DNOENT;
1585 		} else {
1586 			ret = DNOCACHE;
1587 		}
1588 		mutex_exit(&dcap->dca_lock);
1589 		return (ret);
1590 	} else {
1591 		mutex_exit(&dcap->dca_lock);
1592 		return (DNOCACHE);
1593 	}
1594 }
1595 
1596 
1597 /*
1598  * Remove free space of at least the given length from a complete
1599  * or partial directory cache.
1600  */
1601 dcret_t
1602 dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len, uint64_t *handlep)
1603 {
1604 	dircache_t *dcp;
1605 	dcfree_t **prevpp, *tfp;
1606 	uint_t fhtsize; /* free hash table size */
1607 	uint_t i;
1608 	uint_t capacity;
1609 	int ret;
1610 
1611 	if (!dnlc_dir_enable) {
1612 		return (DNOCACHE);
1613 	}
1614 
1615 	mutex_enter(&dcap->dca_lock);
1616 	dcp = (dircache_t *)dcap->dca_dircache;
1617 	if (VALID_DIR_CACHE(dcp)) {
1618 		dcp->dc_actime = lbolt64;
1619 		if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1620 			capacity = (dcp->dc_fhash_mask + 1) <<
1621 			    dnlc_dir_hash_size_shift;
1622 			if (dcp->dc_num_free <=
1623 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1624 				dnlc_dir_adjust_fhash(dcp);
1625 			}
1626 		}
1627 		/*
1628 		 * Search for an entry of the appropriate size
1629 		 * on a first fit basis.
1630 		 */
1631 		fhtsize = dcp->dc_fhash_mask + 1;
1632 		for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1633 			prevpp = &(dcp->dc_freehash[i]);
1634 			while (*prevpp != NULL) {
1635 				if ((*prevpp)->df_len >= len) {
1636 					*handlep = (*prevpp)->df_handle;
1637 					tfp = *prevpp;
1638 					*prevpp = (*prevpp)->df_next;
1639 					dcp->dc_num_free--;
1640 					mutex_exit(&dcap->dca_lock);
1641 					kmem_cache_free(dnlc_dir_space_cache,
1642 					    tfp);
1643 					ncs.ncs_dir_num_ents.value.ui64--;
1644 					return (DFOUND);
1645 				}
1646 				prevpp = &((*prevpp)->df_next);
1647 			}
1648 		}
1649 		if (dcp->dc_complete) {
1650 			ret = DNOENT;
1651 		} else {
1652 			ret = DNOCACHE;
1653 		}
1654 		mutex_exit(&dcap->dca_lock);
1655 		return (ret);
1656 	} else {
1657 		mutex_exit(&dcap->dca_lock);
1658 		return (DNOCACHE);
1659 	}
1660 }
1661 
1662 /*
1663  * Remove free space with the given handle from a complete or partial
1664  * directory cache.
1665  */
1666 dcret_t
1667 dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle)
1668 {
1669 	dircache_t *dcp;
1670 	dcfree_t **prevpp, *tfp;
1671 	uint_t capacity;
1672 	int ret;
1673 
1674 	if (!dnlc_dir_enable) {
1675 		return (DNOCACHE);
1676 	}
1677 
1678 	mutex_enter(&dcap->dca_lock);
1679 	dcp = (dircache_t *)dcap->dca_dircache;
1680 	if (VALID_DIR_CACHE(dcp)) {
1681 		dcp->dc_actime = lbolt64;
1682 		if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1683 			capacity = (dcp->dc_fhash_mask + 1) <<
1684 			    dnlc_dir_hash_size_shift;
1685 			if (dcp->dc_num_free <=
1686 			    (capacity >> dnlc_dir_hash_resize_shift)) {
1687 				dnlc_dir_adjust_fhash(dcp);
1688 			}
1689 		}
1690 
1691 		/*
1692 		 * search for the exact entry
1693 		 */
1694 		prevpp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1695 		while (*prevpp != NULL) {
1696 			if ((*prevpp)->df_handle == handle) {
1697 				tfp = *prevpp;
1698 				*prevpp = (*prevpp)->df_next;
1699 				dcp->dc_num_free--;
1700 				mutex_exit(&dcap->dca_lock);
1701 				kmem_cache_free(dnlc_dir_space_cache, tfp);
1702 				ncs.ncs_dir_num_ents.value.ui64--;
1703 				return (DFOUND);
1704 			}
1705 			prevpp = &((*prevpp)->df_next);
1706 		}
1707 		if (dcp->dc_complete) {
1708 			ncs.ncs_dir_rems_fai.value.ui64++;
1709 			ret = DNOENT;
1710 		} else {
1711 			ret = DNOCACHE;
1712 		}
1713 		mutex_exit(&dcap->dca_lock);
1714 		return (ret);
1715 	} else {
1716 		mutex_exit(&dcap->dca_lock);
1717 		return (DNOCACHE);
1718 	}
1719 }
1720 
1721 /*
1722  * Update the handle of an directory cache entry.
1723  */
1724 dcret_t
1725 dnlc_dir_update(dcanchor_t *dcap, char *name, uint64_t handle)
1726 {
1727 	dircache_t *dcp;
1728 	dcentry_t *dep;
1729 	int hash;
1730 	int ret;
1731 	uchar_t namlen;
1732 
1733 	if (!dnlc_dir_enable) {
1734 		return (DNOCACHE);
1735 	}
1736 
1737 	mutex_enter(&dcap->dca_lock);
1738 	dcp = (dircache_t *)dcap->dca_dircache;
1739 	if (VALID_DIR_CACHE(dcp)) {
1740 		dcp->dc_actime = lbolt64;
1741 		DNLC_DIR_HASH(name, hash, namlen);
1742 		dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1743 		while (dep != NULL) {
1744 			if ((dep->de_hash == hash) &&
1745 			    (namlen == dep->de_namelen) &&
1746 			    bcmp(dep->de_name, name, namlen) == 0) {
1747 				dep->de_handle = handle;
1748 				mutex_exit(&dcap->dca_lock);
1749 				return (DFOUND);
1750 			}
1751 			dep = dep->de_next;
1752 		}
1753 		if (dcp->dc_complete) {
1754 			ncs.ncs_dir_upd_fail.value.ui64++;
1755 			ret = DNOENT;
1756 		} else {
1757 			ret = DNOCACHE;
1758 		}
1759 		mutex_exit(&dcap->dca_lock);
1760 		return (ret);
1761 	} else {
1762 		mutex_exit(&dcap->dca_lock);
1763 		return (DNOCACHE);
1764 	}
1765 }
1766 
1767 void
1768 dnlc_dir_fini(dcanchor_t *dcap)
1769 {
1770 	dircache_t *dcp;
1771 
1772 	mutex_enter(&dc_head.dch_lock);
1773 	mutex_enter(&dcap->dca_lock);
1774 	dcp = (dircache_t *)dcap->dca_dircache;
1775 	if (VALID_DIR_CACHE(dcp)) {
1776 		/*
1777 		 * Unchain from global list
1778 		 */
1779 		ncs.ncs_dir_finipurg.value.ui64++;
1780 		dcp->dc_prev->dc_next = dcp->dc_next;
1781 		dcp->dc_next->dc_prev = dcp->dc_prev;
1782 	} else {
1783 		dcp = NULL;
1784 	}
1785 	dcap->dca_dircache = NULL;
1786 	mutex_exit(&dcap->dca_lock);
1787 	mutex_exit(&dc_head.dch_lock);
1788 	mutex_destroy(&dcap->dca_lock);
1789 	if (dcp) {
1790 		dnlc_dir_abort(dcp);
1791 	}
1792 }
1793 
1794 /*
1795  * Reclaim callback for dnlc directory caching.
1796  * Invoked by the kernel memory allocator when memory gets tight.
1797  * This is a pretty serious condition and can lead easily lead to system
1798  * hangs if not enough space is returned.
1799  *
1800  * Deciding which directory (or directories) to purge is tricky.
1801  * Purging everything is an overkill, but purging just the oldest used
1802  * was found to lead to hangs. The largest cached directories use the
1803  * most memory, but take the most effort to rebuild, whereas the smaller
1804  * ones have little value and give back little space. So what to do?
1805  *
1806  * The current policy is to continue purging the oldest used directories
1807  * until at least dnlc_dir_min_reclaim directory entries have been purged.
1808  */
1809 /*ARGSUSED*/
1810 static void
1811 dnlc_dir_reclaim(void *unused)
1812 {
1813 	dircache_t *dcp, *oldest;
1814 	uint_t dirent_cnt = 0;
1815 
1816 	mutex_enter(&dc_head.dch_lock);
1817 	while (dirent_cnt < dnlc_dir_min_reclaim) {
1818 		dcp = dc_head.dch_next;
1819 		oldest = NULL;
1820 		while (dcp != (dircache_t *)&dc_head) {
1821 			if (oldest == NULL) {
1822 				oldest = dcp;
1823 			} else {
1824 				if (dcp->dc_actime < oldest->dc_actime) {
1825 					oldest = dcp;
1826 				}
1827 			}
1828 			dcp = dcp->dc_next;
1829 		}
1830 		if (oldest == NULL) {
1831 			/* nothing to delete */
1832 			mutex_exit(&dc_head.dch_lock);
1833 			return;
1834 		}
1835 		/*
1836 		 * remove from directory chain and purge
1837 		 */
1838 		oldest->dc_prev->dc_next = oldest->dc_next;
1839 		oldest->dc_next->dc_prev = oldest->dc_prev;
1840 		mutex_enter(&oldest->dc_anchor->dca_lock);
1841 		/*
1842 		 * If this was the last entry then it must be too large.
1843 		 * Mark it as such by saving a special dircache_t
1844 		 * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1845 		 * will be presented to the caller of dnlc_dir_start()
1846 		 */
1847 		if (oldest->dc_next == oldest->dc_prev) {
1848 			oldest->dc_anchor->dca_dircache = DC_RET_LOW_MEM;
1849 			ncs.ncs_dir_rec_last.value.ui64++;
1850 		} else {
1851 			oldest->dc_anchor->dca_dircache = NULL;
1852 			ncs.ncs_dir_recl_any.value.ui64++;
1853 		}
1854 		mutex_exit(&oldest->dc_anchor->dca_lock);
1855 		dirent_cnt += oldest->dc_num_entries;
1856 		dnlc_dir_abort(oldest);
1857 	}
1858 	mutex_exit(&dc_head.dch_lock);
1859 }
1860 
1861 /*
1862  * Dynamically grow or shrink the size of the name hash table
1863  */
1864 static void
1865 dnlc_dir_adjust_nhash(dircache_t *dcp)
1866 {
1867 	dcentry_t **newhash, *dep, **nhp, *tep;
1868 	uint_t newsize;
1869 	uint_t oldsize;
1870 	uint_t newsizemask;
1871 	int i;
1872 
1873 	/*
1874 	 * Allocate new hash table
1875 	 */
1876 	newsize = dcp->dc_num_entries >> dnlc_dir_hash_size_shift;
1877 	newhash = kmem_zalloc(sizeof (dcentry_t *) * newsize, KM_NOSLEEP);
1878 	if (newhash == NULL) {
1879 		/*
1880 		 * System is short on memory just return
1881 		 * Note, the old hash table is still usable.
1882 		 * This return is unlikely to repeatedy occur, because
1883 		 * either some other directory caches will be reclaimed
1884 		 * due to memory shortage, thus freeing memory, or this
1885 		 * directory cahe will be reclaimed.
1886 		 */
1887 		return;
1888 	}
1889 	oldsize = dcp->dc_nhash_mask + 1;
1890 	dcp->dc_nhash_mask = newsizemask = newsize - 1;
1891 
1892 	/*
1893 	 * Move entries from the old table to the new
1894 	 */
1895 	for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1896 		dep = dcp->dc_namehash[i];
1897 		while (dep != NULL) { /* for each chained entry */
1898 			tep = dep;
1899 			dep = dep->de_next;
1900 			nhp = &newhash[tep->de_hash & newsizemask];
1901 			tep->de_next = *nhp;
1902 			*nhp = tep;
1903 		}
1904 	}
1905 
1906 	/*
1907 	 * delete old hash table and set new one in place
1908 	 */
1909 	kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * oldsize);
1910 	dcp->dc_namehash = newhash;
1911 }
1912 
1913 /*
1914  * Dynamically grow or shrink the size of the free space hash table
1915  */
1916 static void
1917 dnlc_dir_adjust_fhash(dircache_t *dcp)
1918 {
1919 	dcfree_t **newhash, *dfp, **nhp, *tfp;
1920 	uint_t newsize;
1921 	uint_t oldsize;
1922 	int i;
1923 
1924 	/*
1925 	 * Allocate new hash table
1926 	 */
1927 	newsize = dcp->dc_num_free >> dnlc_dir_hash_size_shift;
1928 	newhash = kmem_zalloc(sizeof (dcfree_t *) * newsize, KM_NOSLEEP);
1929 	if (newhash == NULL) {
1930 		/*
1931 		 * System is short on memory just return
1932 		 * Note, the old hash table is still usable.
1933 		 * This return is unlikely to repeatedy occur, because
1934 		 * either some other directory caches will be reclaimed
1935 		 * due to memory shortage, thus freeing memory, or this
1936 		 * directory cahe will be reclaimed.
1937 		 */
1938 		return;
1939 	}
1940 	oldsize = dcp->dc_fhash_mask + 1;
1941 	dcp->dc_fhash_mask = newsize - 1;
1942 
1943 	/*
1944 	 * Move entries from the old table to the new
1945 	 */
1946 	for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1947 		dfp = dcp->dc_freehash[i];
1948 		while (dfp != NULL) { /* for each chained entry */
1949 			tfp = dfp;
1950 			dfp = dfp->df_next;
1951 			nhp = &newhash[DDFHASH(tfp->df_handle, dcp)];
1952 			tfp->df_next = *nhp;
1953 			*nhp = tfp;
1954 		}
1955 	}
1956 
1957 	/*
1958 	 * delete old hash table and set new one in place
1959 	 */
1960 	kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * oldsize);
1961 	dcp->dc_freehash = newhash;
1962 }
1963