1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2015, Joyent, Inc.
24 * Copyright (c) 2017 by Delphix. All rights reserved.
25 */
26
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 /*
31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 * The Regents of the University of California
33 * All Rights Reserved
34 *
35 * University Acknowledgment- Portions of this document are derived from
36 * software developed by the University of California, Berkeley, and its
37 * contributors.
38 */
39
40 #include <sys/types.h>
41 #include <sys/systm.h>
42 #include <sys/param.h>
43 #include <sys/t_lock.h>
44 #include <sys/systm.h>
45 #include <sys/vfs.h>
46 #include <sys/vnode.h>
47 #include <sys/dnlc.h>
48 #include <sys/kmem.h>
49 #include <sys/cmn_err.h>
50 #include <sys/vtrace.h>
51 #include <sys/bitmap.h>
52 #include <sys/var.h>
53 #include <sys/sysmacros.h>
54 #include <sys/kstat.h>
55 #include <sys/atomic.h>
56 #include <sys/taskq.h>
57
58 /*
59 * Directory name lookup cache.
60 * Based on code originally done by Robert Elz at Melbourne.
61 *
62 * Names found by directory scans are retained in a cache
63 * for future reference. Each hash chain is ordered by LRU
64 * Cache is indexed by hash value obtained from (vp, name)
65 * where the vp refers to the directory containing the name.
66 */
67
68 /*
69 * We want to be able to identify files that are referenced only by the DNLC.
70 * When adding a reference from the DNLC, call VN_HOLD_DNLC instead of VN_HOLD,
71 * since multiple DNLC references should only be counted once in v_count. The
72 * VN_HOLD macro itself is aliased to VN_HOLD_CALLER in this file to help
73 * differentiate the behaviors. (Unfortunately it is not possible to #undef
74 * VN_HOLD and retain VN_HOLD_CALLER. Ideally a Makefile rule would grep
75 * uncommented C tokens to check that VN_HOLD is referenced only once in this
76 * file, to define VN_HOLD_CALLER.)
77 */
78 #define VN_HOLD_CALLER VN_HOLD
79 #define VN_HOLD_DNLC(vp) { \
80 mutex_enter(&(vp)->v_lock); \
81 if ((vp)->v_count_dnlc == 0) { \
82 VN_HOLD_LOCKED(vp); \
83 } \
84 (vp)->v_count_dnlc++; \
85 mutex_exit(&(vp)->v_lock); \
86 }
87 #define VN_RELE_DNLC(vp) { \
88 vn_rele_dnlc(vp); \
89 }
90
91 /*
92 * Tunable nc_hashavelen is the average length desired for this chain, from
93 * which the size of the nc_hash table is derived at create time.
94 */
95 #define NC_HASHAVELEN_DEFAULT 4
96 int nc_hashavelen = NC_HASHAVELEN_DEFAULT;
97
98 /*
99 * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
100 * depth exceeds this value, we move the looked-up entry to the front of
101 * its hash chain. The idea is to make sure that the most frequently
102 * accessed entries are found most quickly (by keeping them near the
103 * front of their hash chains).
104 */
105 #define NC_MOVETOFRONT 2
106
107 /*
108 *
109 * DNLC_MAX_RELE is used to size an array on the stack when releasing
110 * vnodes. This array is used rather than calling VN_RELE() inline because
111 * all dnlc locks must be dropped by that time in order to avoid a
112 * possible deadlock. This deadlock occurs when the dnlc holds the last
113 * reference to the vnode and so the VOP_INACTIVE vector is called which
114 * can in turn call back into the dnlc. A global array was used but had
115 * many problems:
116 * 1) Actually doesn't have an upper bound on the array size as
117 * entries can be added after starting the purge.
118 * 2) The locking scheme causes a hang.
119 * 3) Caused serialisation on the global lock.
120 * 4) The array was often unnecessarily huge.
121 *
122 * Note the current value 8 allows up to 4 cache entries (to be purged
123 * from each hash chain), before having to cycle around and retry.
124 * This ought to be ample given that nc_hashavelen is typically very small.
125 */
126 #define DNLC_MAX_RELE 8 /* must be even */
127
128 /*
129 * Hash table of name cache entries for fast lookup, dynamically
130 * allocated at startup.
131 */
132 nc_hash_t *nc_hash;
133
134 /*
135 * Rotors. Used to select entries on a round-robin basis.
136 */
137 static nc_hash_t *dnlc_purge_fs1_rotor;
138 static nc_hash_t *dnlc_free_rotor;
139
140 /*
141 * # of dnlc entries (uninitialized)
142 *
143 * the initial value was chosen as being
144 * a random string of bits, probably not
145 * normally chosen by a systems administrator
146 */
147 int ncsize = -1;
148 volatile uint32_t dnlc_nentries = 0; /* current num of name cache entries */
149 static int nc_hashsz; /* size of hash table */
150 static int nc_hashmask; /* size of hash table minus 1 */
151
152 /*
153 * The dnlc_reduce_cache() taskq queue is activated when there are
154 * ncsize name cache entries and if no parameter is provided, it reduces
155 * the size down to dnlc_nentries_low_water, which is by default one
156 * hundreth less (or 99%) of ncsize.
157 *
158 * If a parameter is provided to dnlc_reduce_cache(), then we reduce
159 * the size down based on ncsize_onepercent - where ncsize_onepercent
160 * is 1% of ncsize; however, we never let dnlc_reduce_cache() reduce
161 * the size below 3% of ncsize (ncsize_min_percent).
162 */
163 #define DNLC_LOW_WATER_DIVISOR_DEFAULT 100
164 uint_t dnlc_low_water_divisor = DNLC_LOW_WATER_DIVISOR_DEFAULT;
165 uint_t dnlc_nentries_low_water;
166 int dnlc_reduce_idle = 1; /* no locking needed */
167 uint_t ncsize_onepercent;
168 uint_t ncsize_min_percent;
169
170 /*
171 * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
172 * then this means the dnlc_reduce_cache() taskq is failing to
173 * keep up. In this case we refuse to add new entries to the dnlc
174 * until the taskq catches up.
175 */
176 uint_t dnlc_max_nentries; /* twice ncsize */
177 uint64_t dnlc_max_nentries_cnt = 0; /* statistic on times we failed */
178
179 /*
180 * Tunable to define when we should just remove items from
181 * the end of the chain.
182 */
183 #define DNLC_LONG_CHAIN 8
184 uint_t dnlc_long_chain = DNLC_LONG_CHAIN;
185
186 /*
187 * ncstats has been deprecated, due to the integer size of the counters
188 * which can easily overflow in the dnlc.
189 * It is maintained (at some expense) for compatability.
190 * The preferred interface is the kstat accessible nc_stats below.
191 */
192 struct ncstats ncstats;
193
194 struct nc_stats ncs = {
195 { "hits", KSTAT_DATA_UINT64 },
196 { "misses", KSTAT_DATA_UINT64 },
197 { "negative_cache_hits", KSTAT_DATA_UINT64 },
198 { "enters", KSTAT_DATA_UINT64 },
199 { "double_enters", KSTAT_DATA_UINT64 },
200 { "purge_total_entries", KSTAT_DATA_UINT64 },
201 { "purge_all", KSTAT_DATA_UINT64 },
202 { "purge_vp", KSTAT_DATA_UINT64 },
203 { "purge_vfs", KSTAT_DATA_UINT64 },
204 { "purge_fs1", KSTAT_DATA_UINT64 },
205 { "pick_free", KSTAT_DATA_UINT64 },
206 { "pick_heuristic", KSTAT_DATA_UINT64 },
207 { "pick_last", KSTAT_DATA_UINT64 },
208
209 /* directory caching stats */
210
211 { "dir_hits", KSTAT_DATA_UINT64 },
212 { "dir_misses", KSTAT_DATA_UINT64 },
213 { "dir_cached_current", KSTAT_DATA_UINT64 },
214 { "dir_entries_cached_current", KSTAT_DATA_UINT64 },
215 { "dir_cached_total", KSTAT_DATA_UINT64 },
216 { "dir_start_no_memory", KSTAT_DATA_UINT64 },
217 { "dir_add_no_memory", KSTAT_DATA_UINT64 },
218 { "dir_add_abort", KSTAT_DATA_UINT64 },
219 { "dir_add_max", KSTAT_DATA_UINT64 },
220 { "dir_remove_entry_fail", KSTAT_DATA_UINT64 },
221 { "dir_remove_space_fail", KSTAT_DATA_UINT64 },
222 { "dir_update_fail", KSTAT_DATA_UINT64 },
223 { "dir_fini_purge", KSTAT_DATA_UINT64 },
224 { "dir_reclaim_last", KSTAT_DATA_UINT64 },
225 { "dir_reclaim_any", KSTAT_DATA_UINT64 },
226 };
227
228 static int doingcache = 1;
229
230 vnode_t negative_cache_vnode;
231
232 /*
233 * Insert entry at the front of the queue
234 */
235 #define nc_inshash(ncp, hp) \
236 { \
237 (ncp)->hash_next = (hp)->hash_next; \
238 (ncp)->hash_prev = (ncache_t *)(hp); \
239 (hp)->hash_next->hash_prev = (ncp); \
240 (hp)->hash_next = (ncp); \
241 }
242
243 /*
244 * Remove entry from hash queue
245 */
246 #define nc_rmhash(ncp) \
247 { \
248 (ncp)->hash_prev->hash_next = (ncp)->hash_next; \
249 (ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
250 (ncp)->hash_prev = NULL; \
251 (ncp)->hash_next = NULL; \
252 }
253
254 /*
255 * Free an entry.
256 */
257 #define dnlc_free(ncp) \
258 { \
259 kmem_free((ncp), NCACHE_SIZE((ncp)->namlen)); \
260 atomic_dec_32(&dnlc_nentries); \
261 }
262
263
264 /*
265 * Cached directory info.
266 * ======================
267 */
268
269 /*
270 * Cached directory free space hash function.
271 * Needs the free space handle and the dcp to get the hash table size
272 * Returns the hash index.
273 */
274 #define DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
275
276 /*
277 * Cached directory name entry hash function.
278 * Uses the name and returns in the input arguments the hash and the name
279 * length.
280 */
281 #define DNLC_DIR_HASH(name, hash, namelen) \
282 { \
283 char Xc; \
284 const char *Xcp; \
285 hash = *name; \
286 for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++) \
287 hash = (hash << 4) + hash + Xc; \
288 ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1)); \
289 namelen = Xcp - (name); \
290 }
291
292 /* special dircache_t pointer to indicate error should be returned */
293 /*
294 * The anchor directory cache pointer can contain 3 types of values,
295 * 1) NULL: No directory cache
296 * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
297 * too big or a memory shortage occurred. This value remains in the
298 * pointer until a dnlc_dir_start() which returns the a DNOMEM error.
299 * This is kludgy but efficient and only visible in this source file.
300 * 3) A valid cache pointer.
301 */
302 #define DC_RET_LOW_MEM (dircache_t *)1
303 #define VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
304
305 /* Tunables */
306 uint_t dnlc_dir_enable = 1; /* disable caching directories by setting to 0 */
307 uint_t dnlc_dir_min_size = 40; /* min no of directory entries before caching */
308 uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */
309 uint_t dnlc_dir_hash_size_shift = 3; /* 8 entries per hash bucket */
310 uint_t dnlc_dir_min_reclaim = 350000; /* approx 1MB of dcentrys */
311 /*
312 * dnlc_dir_hash_resize_shift determines when the hash tables
313 * get re-adjusted due to growth or shrinkage
314 * - currently 2 indicating that there can be at most 4
315 * times or at least one quarter the number of entries
316 * before hash table readjustment. Note that with
317 * dnlc_dir_hash_size_shift above set at 3 this would
318 * mean readjustment would occur if the average number
319 * of entries went above 32 or below 2
320 */
321 uint_t dnlc_dir_hash_resize_shift = 2; /* readjust rate */
322
323 static kmem_cache_t *dnlc_dir_space_cache; /* free space entry cache */
324 static dchead_t dc_head; /* anchor of cached directories */
325
326 /* Prototypes */
327 static ncache_t *dnlc_get(uchar_t namlen);
328 static ncache_t *dnlc_search(vnode_t *dp, const char *name, uchar_t namlen,
329 int hash);
330 static void dnlc_dir_reclaim(void *unused);
331 static void dnlc_dir_abort(dircache_t *dcp);
332 static void dnlc_dir_adjust_fhash(dircache_t *dcp);
333 static void dnlc_dir_adjust_nhash(dircache_t *dcp);
334 static void do_dnlc_reduce_cache(void *);
335
336
337 /*
338 * Initialize the directory cache.
339 */
340 void
dnlc_init()341 dnlc_init()
342 {
343 nc_hash_t *hp;
344 kstat_t *ksp;
345 int i;
346
347 /*
348 * Set up the size of the dnlc (ncsize) and its low water mark.
349 */
350 if (ncsize == -1) {
351 /* calculate a reasonable size for the low water */
352 dnlc_nentries_low_water = 4 * (v.v_proc + maxusers) + 320;
353 ncsize = dnlc_nentries_low_water +
354 (dnlc_nentries_low_water / dnlc_low_water_divisor);
355 } else {
356 /* don't change the user specified ncsize */
357 dnlc_nentries_low_water =
358 ncsize - (ncsize / dnlc_low_water_divisor);
359 }
360 if (ncsize <= 0) {
361 doingcache = 0;
362 dnlc_dir_enable = 0; /* also disable directory caching */
363 ncsize = 0;
364 cmn_err(CE_NOTE, "name cache (dnlc) disabled");
365 return;
366 }
367 dnlc_max_nentries = ncsize * 2;
368 ncsize_onepercent = ncsize / 100;
369 ncsize_min_percent = ncsize_onepercent * 3;
370
371 /*
372 * Initialise the hash table.
373 * Compute hash size rounding to the next power of two.
374 */
375 nc_hashsz = ncsize / nc_hashavelen;
376 nc_hashsz = 1 << highbit(nc_hashsz);
377 nc_hashmask = nc_hashsz - 1;
378 nc_hash = kmem_zalloc(nc_hashsz * sizeof (*nc_hash), KM_SLEEP);
379 for (i = 0; i < nc_hashsz; i++) {
380 hp = (nc_hash_t *)&nc_hash[i];
381 mutex_init(&hp->hash_lock, NULL, MUTEX_DEFAULT, NULL);
382 hp->hash_next = (ncache_t *)hp;
383 hp->hash_prev = (ncache_t *)hp;
384 }
385
386 /*
387 * Initialize rotors
388 */
389 dnlc_free_rotor = dnlc_purge_fs1_rotor = &nc_hash[0];
390
391 /*
392 * Set up the directory caching to use kmem_cache_alloc
393 * for its free space entries so that we can get a callback
394 * when the system is short on memory, to allow us to free
395 * up some memory. we don't use the constructor/deconstructor
396 * functions.
397 */
398 dnlc_dir_space_cache = kmem_cache_create("dnlc_space_cache",
399 sizeof (dcfree_t), 0, NULL, NULL, dnlc_dir_reclaim, NULL,
400 NULL, 0);
401
402 /*
403 * Initialise the head of the cached directory structures
404 */
405 mutex_init(&dc_head.dch_lock, NULL, MUTEX_DEFAULT, NULL);
406 dc_head.dch_next = (dircache_t *)&dc_head;
407 dc_head.dch_prev = (dircache_t *)&dc_head;
408
409 /*
410 * Put a hold on the negative cache vnode so that it never goes away
411 * (VOP_INACTIVE isn't called on it).
412 */
413 vn_reinit(&negative_cache_vnode);
414
415 /*
416 * Initialise kstats - both the old compatability raw kind and
417 * the more extensive named stats.
418 */
419 ksp = kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW,
420 sizeof (struct ncstats), KSTAT_FLAG_VIRTUAL);
421 if (ksp) {
422 ksp->ks_data = (void *) &ncstats;
423 kstat_install(ksp);
424 }
425 ksp = kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED,
426 sizeof (ncs) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
427 if (ksp) {
428 ksp->ks_data = (void *) &ncs;
429 kstat_install(ksp);
430 }
431 }
432
433 /*
434 * Add a name to the directory cache.
435 */
436 void
dnlc_enter(vnode_t * dp,const char * name,vnode_t * vp)437 dnlc_enter(vnode_t *dp, const char *name, vnode_t *vp)
438 {
439 ncache_t *ncp;
440 nc_hash_t *hp;
441 uchar_t namlen;
442 int hash;
443
444 TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_enter_start:");
445
446 if (!doingcache) {
447 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
448 "dnlc_enter_end:(%S) %d", "not caching", 0);
449 return;
450 }
451
452 /*
453 * Get a new dnlc entry. Assume the entry won't be in the cache
454 * and initialize it now
455 */
456 DNLCHASH(name, dp, hash, namlen);
457 if ((ncp = dnlc_get(namlen)) == NULL)
458 return;
459 ncp->dp = dp;
460 VN_HOLD_DNLC(dp);
461 ncp->vp = vp;
462 VN_HOLD_DNLC(vp);
463 bcopy(name, ncp->name, namlen);
464 ncp->hash = hash;
465 hp = &nc_hash[hash & nc_hashmask];
466
467 mutex_enter(&hp->hash_lock);
468 if (dnlc_search(dp, name, namlen, hash) != NULL) {
469 mutex_exit(&hp->hash_lock);
470 ncstats.dbl_enters++;
471 ncs.ncs_dbl_enters.value.ui64++;
472 VN_RELE_DNLC(dp);
473 VN_RELE_DNLC(vp);
474 dnlc_free(ncp); /* crfree done here */
475 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
476 "dnlc_enter_end:(%S) %d", "dbl enter", ncstats.dbl_enters);
477 return;
478 }
479 /*
480 * Insert back into the hash chain.
481 */
482 nc_inshash(ncp, hp);
483 mutex_exit(&hp->hash_lock);
484 ncstats.enters++;
485 ncs.ncs_enters.value.ui64++;
486 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
487 "dnlc_enter_end:(%S) %d", "done", ncstats.enters);
488 }
489
490 /*
491 * Add a name to the directory cache.
492 *
493 * This function is basically identical with
494 * dnlc_enter(). The difference is that when the
495 * desired dnlc entry is found, the vnode in the
496 * ncache is compared with the vnode passed in.
497 *
498 * If they are not equal then the ncache is
499 * updated with the passed in vnode. Otherwise
500 * it just frees up the newly allocated dnlc entry.
501 */
502 void
dnlc_update(vnode_t * dp,const char * name,vnode_t * vp)503 dnlc_update(vnode_t *dp, const char *name, vnode_t *vp)
504 {
505 ncache_t *ncp;
506 ncache_t *tcp;
507 vnode_t *tvp;
508 nc_hash_t *hp;
509 int hash;
510 uchar_t namlen;
511
512 TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_update_start:");
513
514 if (!doingcache) {
515 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
516 "dnlc_update_end:(%S) %d", "not caching", 0);
517 return;
518 }
519
520 /*
521 * Get a new dnlc entry and initialize it now.
522 * If we fail to get a new entry, call dnlc_remove() to purge
523 * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
524 * entry.
525 * Failure to clear an existing entry could result in false dnlc
526 * lookup (negative/stale entry).
527 */
528 DNLCHASH(name, dp, hash, namlen);
529 if ((ncp = dnlc_get(namlen)) == NULL) {
530 dnlc_remove(dp, name);
531 return;
532 }
533 ncp->dp = dp;
534 VN_HOLD_DNLC(dp);
535 ncp->vp = vp;
536 VN_HOLD_DNLC(vp);
537 bcopy(name, ncp->name, namlen);
538 ncp->hash = hash;
539 hp = &nc_hash[hash & nc_hashmask];
540
541 mutex_enter(&hp->hash_lock);
542 if ((tcp = dnlc_search(dp, name, namlen, hash)) != NULL) {
543 if (tcp->vp != vp) {
544 tvp = tcp->vp;
545 tcp->vp = vp;
546 mutex_exit(&hp->hash_lock);
547 VN_RELE_DNLC(tvp);
548 ncstats.enters++;
549 ncs.ncs_enters.value.ui64++;
550 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
551 "dnlc_update_end:(%S) %d", "done", ncstats.enters);
552 } else {
553 mutex_exit(&hp->hash_lock);
554 VN_RELE_DNLC(vp);
555 ncstats.dbl_enters++;
556 ncs.ncs_dbl_enters.value.ui64++;
557 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
558 "dnlc_update_end:(%S) %d",
559 "dbl enter", ncstats.dbl_enters);
560 }
561 VN_RELE_DNLC(dp);
562 dnlc_free(ncp); /* crfree done here */
563 return;
564 }
565 /*
566 * insert the new entry, since it is not in dnlc yet
567 */
568 nc_inshash(ncp, hp);
569 mutex_exit(&hp->hash_lock);
570 ncstats.enters++;
571 ncs.ncs_enters.value.ui64++;
572 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
573 "dnlc_update_end:(%S) %d", "done", ncstats.enters);
574 }
575
576 /*
577 * Look up a name in the directory name cache.
578 *
579 * Return a doubly-held vnode if found: one hold so that it may
580 * remain in the cache for other users, the other hold so that
581 * the cache is not re-cycled and the identity of the vnode is
582 * lost before the caller can use the vnode.
583 */
584 vnode_t *
dnlc_lookup(vnode_t * dp,const char * name)585 dnlc_lookup(vnode_t *dp, const char *name)
586 {
587 ncache_t *ncp;
588 nc_hash_t *hp;
589 vnode_t *vp;
590 int hash, depth;
591 uchar_t namlen;
592
593 TRACE_2(TR_FAC_NFS, TR_DNLC_LOOKUP_START,
594 "dnlc_lookup_start:dp %x name %s", dp, name);
595
596 if (!doingcache) {
597 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
598 "dnlc_lookup_end:%S %d vp %x name %s",
599 "not_caching", 0, NULL, name);
600 return (NULL);
601 }
602
603 DNLCHASH(name, dp, hash, namlen);
604 depth = 1;
605 hp = &nc_hash[hash & nc_hashmask];
606 mutex_enter(&hp->hash_lock);
607
608 for (ncp = hp->hash_next; ncp != (ncache_t *)hp;
609 ncp = ncp->hash_next) {
610 if (ncp->hash == hash && /* fast signature check */
611 ncp->dp == dp &&
612 ncp->namlen == namlen &&
613 bcmp(ncp->name, name, namlen) == 0) {
614 /*
615 * Move this entry to the head of its hash chain
616 * if it's not already close.
617 */
618 if (depth > NC_MOVETOFRONT) {
619 ncache_t *next = ncp->hash_next;
620 ncache_t *prev = ncp->hash_prev;
621
622 prev->hash_next = next;
623 next->hash_prev = prev;
624 ncp->hash_next = next = hp->hash_next;
625 ncp->hash_prev = (ncache_t *)hp;
626 next->hash_prev = ncp;
627 hp->hash_next = ncp;
628
629 ncstats.move_to_front++;
630 }
631
632 /*
633 * Put a hold on the vnode now so its identity
634 * can't change before the caller has a chance to
635 * put a hold on it.
636 */
637 vp = ncp->vp;
638 VN_HOLD_CALLER(vp);
639 mutex_exit(&hp->hash_lock);
640 ncstats.hits++;
641 ncs.ncs_hits.value.ui64++;
642 if (vp == DNLC_NO_VNODE) {
643 ncs.ncs_neg_hits.value.ui64++;
644 }
645 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
646 "dnlc_lookup_end:%S %d vp %x name %s", "hit",
647 ncstats.hits, vp, name);
648 return (vp);
649 }
650 depth++;
651 }
652
653 mutex_exit(&hp->hash_lock);
654 ncstats.misses++;
655 ncs.ncs_misses.value.ui64++;
656 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
657 "dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats.misses,
658 NULL, name);
659 return (NULL);
660 }
661
662 /*
663 * Remove an entry in the directory name cache.
664 */
665 void
dnlc_remove(vnode_t * dp,const char * name)666 dnlc_remove(vnode_t *dp, const char *name)
667 {
668 ncache_t *ncp;
669 nc_hash_t *hp;
670 uchar_t namlen;
671 int hash;
672
673 if (!doingcache)
674 return;
675 DNLCHASH(name, dp, hash, namlen);
676 hp = &nc_hash[hash & nc_hashmask];
677
678 mutex_enter(&hp->hash_lock);
679 if (ncp = dnlc_search(dp, name, namlen, hash)) {
680 /*
681 * Free up the entry
682 */
683 nc_rmhash(ncp);
684 mutex_exit(&hp->hash_lock);
685 VN_RELE_DNLC(ncp->vp);
686 VN_RELE_DNLC(ncp->dp);
687 dnlc_free(ncp);
688 return;
689 }
690 mutex_exit(&hp->hash_lock);
691 }
692
693 /*
694 * Purge the entire cache.
695 */
696 void
dnlc_purge()697 dnlc_purge()
698 {
699 nc_hash_t *nch;
700 ncache_t *ncp;
701 int index;
702 int i;
703 vnode_t *nc_rele[DNLC_MAX_RELE];
704
705 if (!doingcache)
706 return;
707
708 ncstats.purges++;
709 ncs.ncs_purge_all.value.ui64++;
710
711 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
712 index = 0;
713 mutex_enter(&nch->hash_lock);
714 ncp = nch->hash_next;
715 while (ncp != (ncache_t *)nch) {
716 ncache_t *np;
717
718 np = ncp->hash_next;
719 nc_rele[index++] = ncp->vp;
720 nc_rele[index++] = ncp->dp;
721
722 nc_rmhash(ncp);
723 dnlc_free(ncp);
724 ncp = np;
725 ncs.ncs_purge_total.value.ui64++;
726 if (index == DNLC_MAX_RELE)
727 break;
728 }
729 mutex_exit(&nch->hash_lock);
730
731 /* Release holds on all the vnodes now that we have no locks */
732 for (i = 0; i < index; i++) {
733 VN_RELE_DNLC(nc_rele[i]);
734 }
735 if (ncp != (ncache_t *)nch) {
736 nch--; /* Do current hash chain again */
737 }
738 }
739 }
740
741 /*
742 * Purge any cache entries referencing a vnode. Exit as soon as the dnlc
743 * reference count goes to zero (the caller still holds a reference).
744 */
745 void
dnlc_purge_vp(vnode_t * vp)746 dnlc_purge_vp(vnode_t *vp)
747 {
748 nc_hash_t *nch;
749 ncache_t *ncp;
750 int index;
751 vnode_t *nc_rele[DNLC_MAX_RELE];
752
753 ASSERT(vp->v_count > 0);
754 if (vp->v_count_dnlc == 0) {
755 return;
756 }
757
758 if (!doingcache)
759 return;
760
761 ncstats.purges++;
762 ncs.ncs_purge_vp.value.ui64++;
763
764 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
765 index = 0;
766 mutex_enter(&nch->hash_lock);
767 ncp = nch->hash_next;
768 while (ncp != (ncache_t *)nch) {
769 ncache_t *np;
770
771 np = ncp->hash_next;
772 if (ncp->dp == vp || ncp->vp == vp) {
773 nc_rele[index++] = ncp->vp;
774 nc_rele[index++] = ncp->dp;
775 nc_rmhash(ncp);
776 dnlc_free(ncp);
777 ncs.ncs_purge_total.value.ui64++;
778 if (index == DNLC_MAX_RELE) {
779 ncp = np;
780 break;
781 }
782 }
783 ncp = np;
784 }
785 mutex_exit(&nch->hash_lock);
786
787 /* Release holds on all the vnodes now that we have no locks */
788 while (index) {
789 VN_RELE_DNLC(nc_rele[--index]);
790 }
791
792 if (vp->v_count_dnlc == 0) {
793 return;
794 }
795
796 if (ncp != (ncache_t *)nch) {
797 nch--; /* Do current hash chain again */
798 }
799 }
800 }
801
802 /*
803 * Purge cache entries referencing a vfsp. Caller supplies a count
804 * of entries to purge; up to that many will be freed. A count of
805 * zero indicates that all such entries should be purged. Returns
806 * the number of entries that were purged.
807 */
808 int
dnlc_purge_vfsp(vfs_t * vfsp,int count)809 dnlc_purge_vfsp(vfs_t *vfsp, int count)
810 {
811 nc_hash_t *nch;
812 ncache_t *ncp;
813 int n = 0;
814 int index;
815 int i;
816 vnode_t *nc_rele[DNLC_MAX_RELE];
817
818 if (!doingcache)
819 return (0);
820
821 ncstats.purges++;
822 ncs.ncs_purge_vfs.value.ui64++;
823
824 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
825 index = 0;
826 mutex_enter(&nch->hash_lock);
827 ncp = nch->hash_next;
828 while (ncp != (ncache_t *)nch) {
829 ncache_t *np;
830
831 np = ncp->hash_next;
832 ASSERT(ncp->dp != NULL);
833 ASSERT(ncp->vp != NULL);
834 if ((ncp->dp->v_vfsp == vfsp) ||
835 (ncp->vp->v_vfsp == vfsp)) {
836 n++;
837 nc_rele[index++] = ncp->vp;
838 nc_rele[index++] = ncp->dp;
839 nc_rmhash(ncp);
840 dnlc_free(ncp);
841 ncs.ncs_purge_total.value.ui64++;
842 if (index == DNLC_MAX_RELE) {
843 ncp = np;
844 break;
845 }
846 if (count != 0 && n >= count) {
847 break;
848 }
849 }
850 ncp = np;
851 }
852 mutex_exit(&nch->hash_lock);
853 /* Release holds on all the vnodes now that we have no locks */
854 for (i = 0; i < index; i++) {
855 VN_RELE_DNLC(nc_rele[i]);
856 }
857 if (count != 0 && n >= count) {
858 return (n);
859 }
860 if (ncp != (ncache_t *)nch) {
861 nch--; /* Do current hash chain again */
862 }
863 }
864 return (n);
865 }
866
867 /*
868 * Purge 1 entry from the dnlc that is part of the filesystem(s)
869 * represented by 'vop'. The purpose of this routine is to allow
870 * users of the dnlc to free a vnode that is being held by the dnlc.
871 *
872 * If we find a vnode that we release which will result in
873 * freeing the underlying vnode (count was 1), return 1, 0
874 * if no appropriate vnodes found.
875 *
876 * Note, vop is not the 'right' identifier for a filesystem.
877 */
878 int
dnlc_fs_purge1(vnodeops_t * vop)879 dnlc_fs_purge1(vnodeops_t *vop)
880 {
881 nc_hash_t *end;
882 nc_hash_t *hp;
883 ncache_t *ncp;
884 vnode_t *vp;
885
886 if (!doingcache)
887 return (0);
888
889 ncs.ncs_purge_fs1.value.ui64++;
890
891 /*
892 * Scan the dnlc entries looking for a likely candidate.
893 */
894 hp = end = dnlc_purge_fs1_rotor;
895
896 do {
897 if (++hp == &nc_hash[nc_hashsz])
898 hp = nc_hash;
899 dnlc_purge_fs1_rotor = hp;
900 if (hp->hash_next == (ncache_t *)hp)
901 continue;
902 mutex_enter(&hp->hash_lock);
903 for (ncp = hp->hash_prev;
904 ncp != (ncache_t *)hp;
905 ncp = ncp->hash_prev) {
906 vp = ncp->vp;
907 if (!vn_has_cached_data(vp) && (vp->v_count == 1) &&
908 vn_matchops(vp, vop))
909 break;
910 }
911 if (ncp != (ncache_t *)hp) {
912 nc_rmhash(ncp);
913 mutex_exit(&hp->hash_lock);
914 VN_RELE_DNLC(ncp->dp);
915 VN_RELE_DNLC(vp)
916 dnlc_free(ncp);
917 ncs.ncs_purge_total.value.ui64++;
918 return (1);
919 }
920 mutex_exit(&hp->hash_lock);
921 } while (hp != end);
922 return (0);
923 }
924
925 /*
926 * Utility routine to search for a cache entry. Return the
927 * ncache entry if found, NULL otherwise.
928 */
929 static ncache_t *
dnlc_search(vnode_t * dp,const char * name,uchar_t namlen,int hash)930 dnlc_search(vnode_t *dp, const char *name, uchar_t namlen, int hash)
931 {
932 nc_hash_t *hp;
933 ncache_t *ncp;
934
935 hp = &nc_hash[hash & nc_hashmask];
936
937 for (ncp = hp->hash_next; ncp != (ncache_t *)hp; ncp = ncp->hash_next) {
938 if (ncp->hash == hash &&
939 ncp->dp == dp &&
940 ncp->namlen == namlen &&
941 bcmp(ncp->name, name, namlen) == 0)
942 return (ncp);
943 }
944 return (NULL);
945 }
946
947 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
948 #error ncache_t name length representation is too small
949 #endif
950
951 void
dnlc_reduce_cache(void * reduce_percent)952 dnlc_reduce_cache(void *reduce_percent)
953 {
954 if (dnlc_reduce_idle && (dnlc_nentries >= ncsize || reduce_percent)) {
955 dnlc_reduce_idle = 0;
956 if ((taskq_dispatch(system_taskq, do_dnlc_reduce_cache,
957 reduce_percent, TQ_NOSLEEP)) == TASKQID_INVALID)
958 dnlc_reduce_idle = 1;
959 }
960 }
961
962 /*
963 * Get a new name cache entry.
964 * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
965 * is short then just return NULL. If we're over ncsize then kick off a
966 * thread to free some in use entries down to dnlc_nentries_low_water.
967 * Caller must initialise all fields except namlen.
968 * Component names are defined to be less than MAXNAMELEN
969 * which includes a null.
970 */
971 static ncache_t *
dnlc_get(uchar_t namlen)972 dnlc_get(uchar_t namlen)
973 {
974 ncache_t *ncp;
975
976 if (dnlc_nentries > dnlc_max_nentries) {
977 dnlc_max_nentries_cnt++; /* keep a statistic */
978 return (NULL);
979 }
980 ncp = kmem_alloc(NCACHE_SIZE(namlen), KM_NOSLEEP);
981 if (ncp == NULL) {
982 return (NULL);
983 }
984 ncp->namlen = namlen;
985 atomic_inc_32(&dnlc_nentries);
986 dnlc_reduce_cache(NULL);
987 return (ncp);
988 }
989
990 /*
991 * Taskq routine to free up name cache entries to reduce the
992 * cache size to the low water mark if "reduce_percent" is not provided.
993 * If "reduce_percent" is provided, reduce cache size by
994 * (ncsize_onepercent * reduce_percent).
995 */
996 /*ARGSUSED*/
997 static void
do_dnlc_reduce_cache(void * reduce_percent)998 do_dnlc_reduce_cache(void *reduce_percent)
999 {
1000 nc_hash_t *hp = dnlc_free_rotor, *start_hp = hp;
1001 vnode_t *vp;
1002 ncache_t *ncp;
1003 int cnt;
1004 uint_t low_water = dnlc_nentries_low_water;
1005
1006 if (reduce_percent) {
1007 uint_t reduce_cnt;
1008
1009 /*
1010 * Never try to reduce the current number
1011 * of cache entries below 3% of ncsize.
1012 */
1013 if (dnlc_nentries <= ncsize_min_percent) {
1014 dnlc_reduce_idle = 1;
1015 return;
1016 }
1017 reduce_cnt = ncsize_onepercent *
1018 (uint_t)(uintptr_t)reduce_percent;
1019
1020 if (reduce_cnt > dnlc_nentries ||
1021 dnlc_nentries - reduce_cnt < ncsize_min_percent)
1022 low_water = ncsize_min_percent;
1023 else
1024 low_water = dnlc_nentries - reduce_cnt;
1025 }
1026
1027 do {
1028 /*
1029 * Find the first non empty hash queue without locking.
1030 * Only look at each hash queue once to avoid an infinite loop.
1031 */
1032 do {
1033 if (++hp == &nc_hash[nc_hashsz])
1034 hp = nc_hash;
1035 } while (hp->hash_next == (ncache_t *)hp && hp != start_hp);
1036
1037 /* return if all hash queues are empty. */
1038 if (hp->hash_next == (ncache_t *)hp) {
1039 dnlc_reduce_idle = 1;
1040 return;
1041 }
1042
1043 mutex_enter(&hp->hash_lock);
1044 for (cnt = 0, ncp = hp->hash_prev; ncp != (ncache_t *)hp;
1045 ncp = ncp->hash_prev, cnt++) {
1046 vp = ncp->vp;
1047 /*
1048 * A name cache entry with a reference count
1049 * of one is only referenced by the dnlc.
1050 * Also negative cache entries are purged first.
1051 */
1052 if (!vn_has_cached_data(vp) &&
1053 ((vp->v_count == 1) || (vp == DNLC_NO_VNODE))) {
1054 ncs.ncs_pick_heur.value.ui64++;
1055 goto found;
1056 }
1057 /*
1058 * Remove from the end of the chain if the
1059 * chain is too long
1060 */
1061 if (cnt > dnlc_long_chain) {
1062 ncp = hp->hash_prev;
1063 ncs.ncs_pick_last.value.ui64++;
1064 vp = ncp->vp;
1065 goto found;
1066 }
1067 }
1068 /* check for race and continue */
1069 if (hp->hash_next == (ncache_t *)hp) {
1070 mutex_exit(&hp->hash_lock);
1071 continue;
1072 }
1073
1074 ncp = hp->hash_prev; /* pick the last one in the hash queue */
1075 ncs.ncs_pick_last.value.ui64++;
1076 vp = ncp->vp;
1077 found:
1078 /*
1079 * Remove from hash chain.
1080 */
1081 nc_rmhash(ncp);
1082 mutex_exit(&hp->hash_lock);
1083 VN_RELE_DNLC(vp);
1084 VN_RELE_DNLC(ncp->dp);
1085 dnlc_free(ncp);
1086 } while (dnlc_nentries > low_water);
1087
1088 dnlc_free_rotor = hp;
1089 dnlc_reduce_idle = 1;
1090 }
1091
1092 /*
1093 * Directory caching routines
1094 * ==========================
1095 *
1096 * See dnlc.h for details of the interfaces below.
1097 */
1098
1099 /*
1100 * Lookup up an entry in a complete or partial directory cache.
1101 */
1102 dcret_t
dnlc_dir_lookup(dcanchor_t * dcap,const char * name,uint64_t * handle)1103 dnlc_dir_lookup(dcanchor_t *dcap, const char *name, uint64_t *handle)
1104 {
1105 dircache_t *dcp;
1106 dcentry_t *dep;
1107 int hash;
1108 int ret;
1109 uchar_t namlen;
1110
1111 /*
1112 * can test without lock as we are only a cache
1113 */
1114 if (!VALID_DIR_CACHE(dcap->dca_dircache)) {
1115 ncs.ncs_dir_misses.value.ui64++;
1116 return (DNOCACHE);
1117 }
1118
1119 if (!dnlc_dir_enable) {
1120 return (DNOCACHE);
1121 }
1122
1123 mutex_enter(&dcap->dca_lock);
1124 dcp = (dircache_t *)dcap->dca_dircache;
1125 if (VALID_DIR_CACHE(dcp)) {
1126 dcp->dc_actime = ddi_get_lbolt64();
1127 DNLC_DIR_HASH(name, hash, namlen);
1128 dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1129 while (dep != NULL) {
1130 if ((dep->de_hash == hash) &&
1131 (namlen == dep->de_namelen) &&
1132 bcmp(dep->de_name, name, namlen) == 0) {
1133 *handle = dep->de_handle;
1134 mutex_exit(&dcap->dca_lock);
1135 ncs.ncs_dir_hits.value.ui64++;
1136 return (DFOUND);
1137 }
1138 dep = dep->de_next;
1139 }
1140 if (dcp->dc_complete) {
1141 ret = DNOENT;
1142 } else {
1143 ret = DNOCACHE;
1144 }
1145 mutex_exit(&dcap->dca_lock);
1146 return (ret);
1147 } else {
1148 mutex_exit(&dcap->dca_lock);
1149 ncs.ncs_dir_misses.value.ui64++;
1150 return (DNOCACHE);
1151 }
1152 }
1153
1154 /*
1155 * Start a new directory cache. An estimate of the number of
1156 * entries is provided to as a quick check to ensure the directory
1157 * is cacheable.
1158 */
1159 dcret_t
dnlc_dir_start(dcanchor_t * dcap,uint_t num_entries)1160 dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries)
1161 {
1162 dircache_t *dcp;
1163
1164 if (!dnlc_dir_enable ||
1165 (num_entries < dnlc_dir_min_size)) {
1166 return (DNOCACHE);
1167 }
1168
1169 if (num_entries > dnlc_dir_max_size) {
1170 return (DTOOBIG);
1171 }
1172
1173 mutex_enter(&dc_head.dch_lock);
1174 mutex_enter(&dcap->dca_lock);
1175
1176 if (dcap->dca_dircache == DC_RET_LOW_MEM) {
1177 dcap->dca_dircache = NULL;
1178 mutex_exit(&dcap->dca_lock);
1179 mutex_exit(&dc_head.dch_lock);
1180 return (DNOMEM);
1181 }
1182
1183 /*
1184 * Check if there's currently a cache.
1185 * This probably only occurs on a race.
1186 */
1187 if (dcap->dca_dircache != NULL) {
1188 mutex_exit(&dcap->dca_lock);
1189 mutex_exit(&dc_head.dch_lock);
1190 return (DNOCACHE);
1191 }
1192
1193 /*
1194 * Allocate the dircache struct, entry and free space hash tables.
1195 * These tables are initially just one entry but dynamically resize
1196 * when entries and free space are added or removed.
1197 */
1198 if ((dcp = kmem_zalloc(sizeof (dircache_t), KM_NOSLEEP)) == NULL) {
1199 goto error;
1200 }
1201 if ((dcp->dc_namehash = kmem_zalloc(sizeof (dcentry_t *),
1202 KM_NOSLEEP)) == NULL) {
1203 goto error;
1204 }
1205 if ((dcp->dc_freehash = kmem_zalloc(sizeof (dcfree_t *),
1206 KM_NOSLEEP)) == NULL) {
1207 goto error;
1208 }
1209
1210 dcp->dc_anchor = dcap; /* set back pointer to anchor */
1211 dcap->dca_dircache = dcp;
1212
1213 /* add into head of global chain */
1214 dcp->dc_next = dc_head.dch_next;
1215 dcp->dc_prev = (dircache_t *)&dc_head;
1216 dcp->dc_next->dc_prev = dcp;
1217 dc_head.dch_next = dcp;
1218
1219 mutex_exit(&dcap->dca_lock);
1220 mutex_exit(&dc_head.dch_lock);
1221 ncs.ncs_cur_dirs.value.ui64++;
1222 ncs.ncs_dirs_cached.value.ui64++;
1223 return (DOK);
1224 error:
1225 if (dcp != NULL) {
1226 if (dcp->dc_namehash) {
1227 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *));
1228 }
1229 kmem_free(dcp, sizeof (dircache_t));
1230 }
1231 /*
1232 * Must also kmem_free dcp->dc_freehash if more error cases are added
1233 */
1234 mutex_exit(&dcap->dca_lock);
1235 mutex_exit(&dc_head.dch_lock);
1236 ncs.ncs_dir_start_nm.value.ui64++;
1237 return (DNOCACHE);
1238 }
1239
1240 /*
1241 * Add a directopry entry to a partial or complete directory cache.
1242 */
1243 dcret_t
dnlc_dir_add_entry(dcanchor_t * dcap,const char * name,uint64_t handle)1244 dnlc_dir_add_entry(dcanchor_t *dcap, const char *name, uint64_t handle)
1245 {
1246 dircache_t *dcp;
1247 dcentry_t **hp, *dep;
1248 int hash;
1249 uint_t capacity;
1250 uchar_t namlen;
1251
1252 /*
1253 * Allocate the dcentry struct, including the variable
1254 * size name. Note, the null terminator is not copied.
1255 *
1256 * We do this outside the lock to avoid possible deadlock if
1257 * dnlc_dir_reclaim() is called as a result of memory shortage.
1258 */
1259 DNLC_DIR_HASH(name, hash, namlen);
1260 dep = kmem_alloc(DCENTTRY_SIZE(namlen), KM_NOSLEEP);
1261 if (dep == NULL) {
1262 #ifdef DEBUG
1263 /*
1264 * The kmem allocator generates random failures for
1265 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1266 * So try again before we blow away a perfectly good cache.
1267 * This is done not to cover an error but purely for
1268 * performance running a debug kernel.
1269 * This random error only occurs in debug mode.
1270 */
1271 dep = kmem_alloc(DCENTTRY_SIZE(namlen), KM_NOSLEEP);
1272 if (dep != NULL)
1273 goto ok;
1274 #endif
1275 ncs.ncs_dir_add_nm.value.ui64++;
1276 /*
1277 * Free a directory cache. This may be the one we are
1278 * called with.
1279 */
1280 dnlc_dir_reclaim(NULL);
1281 dep = kmem_alloc(DCENTTRY_SIZE(namlen), KM_NOSLEEP);
1282 if (dep == NULL) {
1283 /*
1284 * still no memory, better delete this cache
1285 */
1286 mutex_enter(&dcap->dca_lock);
1287 dcp = (dircache_t *)dcap->dca_dircache;
1288 if (VALID_DIR_CACHE(dcp)) {
1289 dnlc_dir_abort(dcp);
1290 dcap->dca_dircache = DC_RET_LOW_MEM;
1291 }
1292 mutex_exit(&dcap->dca_lock);
1293 ncs.ncs_dir_addabort.value.ui64++;
1294 return (DNOCACHE);
1295 }
1296 /*
1297 * fall through as if the 1st kmem_alloc had worked
1298 */
1299 }
1300 #ifdef DEBUG
1301 ok:
1302 #endif
1303 mutex_enter(&dcap->dca_lock);
1304 dcp = (dircache_t *)dcap->dca_dircache;
1305 if (VALID_DIR_CACHE(dcp)) {
1306 /*
1307 * If the total number of entries goes above the max
1308 * then free this cache
1309 */
1310 if ((dcp->dc_num_entries + dcp->dc_num_free) >
1311 dnlc_dir_max_size) {
1312 mutex_exit(&dcap->dca_lock);
1313 dnlc_dir_purge(dcap);
1314 kmem_free(dep, DCENTTRY_SIZE(namlen));
1315 ncs.ncs_dir_add_max.value.ui64++;
1316 return (DTOOBIG);
1317 }
1318 dcp->dc_num_entries++;
1319 capacity = (dcp->dc_nhash_mask + 1) << dnlc_dir_hash_size_shift;
1320 if (dcp->dc_num_entries >=
1321 (capacity << dnlc_dir_hash_resize_shift)) {
1322 dnlc_dir_adjust_nhash(dcp);
1323 }
1324 hp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1325
1326 /*
1327 * Initialise and chain in new entry
1328 */
1329 dep->de_handle = handle;
1330 dep->de_hash = hash;
1331 /*
1332 * Note de_namelen is a uchar_t to conserve space
1333 * and alignment padding. The max length of any
1334 * pathname component is defined as MAXNAMELEN
1335 * which is 256 (including the terminating null).
1336 * So provided this doesn't change, we don't include the null,
1337 * we always use bcmp to compare strings, and we don't
1338 * start storing full names, then we are ok.
1339 * The space savings is worth it.
1340 */
1341 dep->de_namelen = namlen;
1342 bcopy(name, dep->de_name, namlen);
1343 dep->de_next = *hp;
1344 *hp = dep;
1345 dcp->dc_actime = ddi_get_lbolt64();
1346 mutex_exit(&dcap->dca_lock);
1347 ncs.ncs_dir_num_ents.value.ui64++;
1348 return (DOK);
1349 } else {
1350 mutex_exit(&dcap->dca_lock);
1351 kmem_free(dep, DCENTTRY_SIZE(namlen));
1352 return (DNOCACHE);
1353 }
1354 }
1355
1356 /*
1357 * Add free space to a partial or complete directory cache.
1358 */
1359 dcret_t
dnlc_dir_add_space(dcanchor_t * dcap,uint_t len,uint64_t handle)1360 dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle)
1361 {
1362 dircache_t *dcp;
1363 dcfree_t *dfp, **hp;
1364 uint_t capacity;
1365
1366 /*
1367 * We kmem_alloc outside the lock to avoid possible deadlock if
1368 * dnlc_dir_reclaim() is called as a result of memory shortage.
1369 */
1370 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1371 if (dfp == NULL) {
1372 #ifdef DEBUG
1373 /*
1374 * The kmem allocator generates random failures for
1375 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1376 * So try again before we blow away a perfectly good cache.
1377 * This random error only occurs in debug mode
1378 */
1379 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1380 if (dfp != NULL)
1381 goto ok;
1382 #endif
1383 ncs.ncs_dir_add_nm.value.ui64++;
1384 /*
1385 * Free a directory cache. This may be the one we are
1386 * called with.
1387 */
1388 dnlc_dir_reclaim(NULL);
1389 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1390 if (dfp == NULL) {
1391 /*
1392 * still no memory, better delete this cache
1393 */
1394 mutex_enter(&dcap->dca_lock);
1395 dcp = (dircache_t *)dcap->dca_dircache;
1396 if (VALID_DIR_CACHE(dcp)) {
1397 dnlc_dir_abort(dcp);
1398 dcap->dca_dircache = DC_RET_LOW_MEM;
1399 }
1400 mutex_exit(&dcap->dca_lock);
1401 ncs.ncs_dir_addabort.value.ui64++;
1402 return (DNOCACHE);
1403 }
1404 /*
1405 * fall through as if the 1st kmem_alloc had worked
1406 */
1407 }
1408
1409 #ifdef DEBUG
1410 ok:
1411 #endif
1412 mutex_enter(&dcap->dca_lock);
1413 dcp = (dircache_t *)dcap->dca_dircache;
1414 if (VALID_DIR_CACHE(dcp)) {
1415 if ((dcp->dc_num_entries + dcp->dc_num_free) >
1416 dnlc_dir_max_size) {
1417 mutex_exit(&dcap->dca_lock);
1418 dnlc_dir_purge(dcap);
1419 kmem_cache_free(dnlc_dir_space_cache, dfp);
1420 ncs.ncs_dir_add_max.value.ui64++;
1421 return (DTOOBIG);
1422 }
1423 dcp->dc_num_free++;
1424 capacity = (dcp->dc_fhash_mask + 1) << dnlc_dir_hash_size_shift;
1425 if (dcp->dc_num_free >=
1426 (capacity << dnlc_dir_hash_resize_shift)) {
1427 dnlc_dir_adjust_fhash(dcp);
1428 }
1429 /*
1430 * Initialise and chain a new entry
1431 */
1432 dfp->df_handle = handle;
1433 dfp->df_len = len;
1434 dcp->dc_actime = ddi_get_lbolt64();
1435 hp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1436 dfp->df_next = *hp;
1437 *hp = dfp;
1438 mutex_exit(&dcap->dca_lock);
1439 ncs.ncs_dir_num_ents.value.ui64++;
1440 return (DOK);
1441 } else {
1442 mutex_exit(&dcap->dca_lock);
1443 kmem_cache_free(dnlc_dir_space_cache, dfp);
1444 return (DNOCACHE);
1445 }
1446 }
1447
1448 /*
1449 * Mark a directory cache as complete.
1450 */
1451 void
dnlc_dir_complete(dcanchor_t * dcap)1452 dnlc_dir_complete(dcanchor_t *dcap)
1453 {
1454 dircache_t *dcp;
1455
1456 mutex_enter(&dcap->dca_lock);
1457 dcp = (dircache_t *)dcap->dca_dircache;
1458 if (VALID_DIR_CACHE(dcp)) {
1459 dcp->dc_complete = B_TRUE;
1460 }
1461 mutex_exit(&dcap->dca_lock);
1462 }
1463
1464 /*
1465 * Internal routine to delete a partial or full directory cache.
1466 * No additional locking needed.
1467 */
1468 static void
dnlc_dir_abort(dircache_t * dcp)1469 dnlc_dir_abort(dircache_t *dcp)
1470 {
1471 dcentry_t *dep, *nhp;
1472 dcfree_t *fep, *fhp;
1473 uint_t nhtsize = dcp->dc_nhash_mask + 1; /* name hash table size */
1474 uint_t fhtsize = dcp->dc_fhash_mask + 1; /* free hash table size */
1475 uint_t i;
1476
1477 /*
1478 * Free up the cached name entries and hash table
1479 */
1480 for (i = 0; i < nhtsize; i++) { /* for each hash bucket */
1481 nhp = dcp->dc_namehash[i];
1482 while (nhp != NULL) { /* for each chained entry */
1483 dep = nhp->de_next;
1484 kmem_free(nhp, DCENTTRY_SIZE(nhp->de_namelen));
1485 nhp = dep;
1486 }
1487 }
1488 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * nhtsize);
1489
1490 /*
1491 * Free up the free space entries and hash table
1492 */
1493 for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1494 fhp = dcp->dc_freehash[i];
1495 while (fhp != NULL) { /* for each chained entry */
1496 fep = fhp->df_next;
1497 kmem_cache_free(dnlc_dir_space_cache, fhp);
1498 fhp = fep;
1499 }
1500 }
1501 kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * fhtsize);
1502
1503 /*
1504 * Finally free the directory cache structure itself
1505 */
1506 ncs.ncs_dir_num_ents.value.ui64 -= (dcp->dc_num_entries +
1507 dcp->dc_num_free);
1508 kmem_free(dcp, sizeof (dircache_t));
1509 ncs.ncs_cur_dirs.value.ui64--;
1510 }
1511
1512 /*
1513 * Remove a partial or complete directory cache
1514 */
1515 void
dnlc_dir_purge(dcanchor_t * dcap)1516 dnlc_dir_purge(dcanchor_t *dcap)
1517 {
1518 dircache_t *dcp;
1519
1520 mutex_enter(&dc_head.dch_lock);
1521 mutex_enter(&dcap->dca_lock);
1522 dcp = (dircache_t *)dcap->dca_dircache;
1523 if (!VALID_DIR_CACHE(dcp)) {
1524 mutex_exit(&dcap->dca_lock);
1525 mutex_exit(&dc_head.dch_lock);
1526 return;
1527 }
1528 dcap->dca_dircache = NULL;
1529 /*
1530 * Unchain from global list
1531 */
1532 dcp->dc_prev->dc_next = dcp->dc_next;
1533 dcp->dc_next->dc_prev = dcp->dc_prev;
1534 mutex_exit(&dcap->dca_lock);
1535 mutex_exit(&dc_head.dch_lock);
1536 dnlc_dir_abort(dcp);
1537 }
1538
1539 /*
1540 * Remove an entry from a complete or partial directory cache.
1541 * Return the handle if it's non null.
1542 */
1543 dcret_t
dnlc_dir_rem_entry(dcanchor_t * dcap,const char * name,uint64_t * handlep)1544 dnlc_dir_rem_entry(dcanchor_t *dcap, const char *name, uint64_t *handlep)
1545 {
1546 dircache_t *dcp;
1547 dcentry_t **prevpp, *te;
1548 uint_t capacity;
1549 int hash;
1550 int ret;
1551 uchar_t namlen;
1552
1553 if (!dnlc_dir_enable) {
1554 return (DNOCACHE);
1555 }
1556
1557 mutex_enter(&dcap->dca_lock);
1558 dcp = (dircache_t *)dcap->dca_dircache;
1559 if (VALID_DIR_CACHE(dcp)) {
1560 dcp->dc_actime = ddi_get_lbolt64();
1561 if (dcp->dc_nhash_mask > 0) { /* ie not minimum */
1562 capacity = (dcp->dc_nhash_mask + 1) <<
1563 dnlc_dir_hash_size_shift;
1564 if (dcp->dc_num_entries <=
1565 (capacity >> dnlc_dir_hash_resize_shift)) {
1566 dnlc_dir_adjust_nhash(dcp);
1567 }
1568 }
1569 DNLC_DIR_HASH(name, hash, namlen);
1570 prevpp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1571 while (*prevpp != NULL) {
1572 if (((*prevpp)->de_hash == hash) &&
1573 (namlen == (*prevpp)->de_namelen) &&
1574 bcmp((*prevpp)->de_name, name, namlen) == 0) {
1575 if (handlep != NULL) {
1576 *handlep = (*prevpp)->de_handle;
1577 }
1578 te = *prevpp;
1579 *prevpp = (*prevpp)->de_next;
1580 kmem_free(te, DCENTTRY_SIZE(te->de_namelen));
1581
1582 /*
1583 * If the total number of entries
1584 * falls below half the minimum number
1585 * of entries then free this cache.
1586 */
1587 if (--dcp->dc_num_entries <
1588 (dnlc_dir_min_size >> 1)) {
1589 mutex_exit(&dcap->dca_lock);
1590 dnlc_dir_purge(dcap);
1591 } else {
1592 mutex_exit(&dcap->dca_lock);
1593 }
1594 ncs.ncs_dir_num_ents.value.ui64--;
1595 return (DFOUND);
1596 }
1597 prevpp = &((*prevpp)->de_next);
1598 }
1599 if (dcp->dc_complete) {
1600 ncs.ncs_dir_reme_fai.value.ui64++;
1601 ret = DNOENT;
1602 } else {
1603 ret = DNOCACHE;
1604 }
1605 mutex_exit(&dcap->dca_lock);
1606 return (ret);
1607 } else {
1608 mutex_exit(&dcap->dca_lock);
1609 return (DNOCACHE);
1610 }
1611 }
1612
1613
1614 /*
1615 * Remove free space of at least the given length from a complete
1616 * or partial directory cache.
1617 */
1618 dcret_t
dnlc_dir_rem_space_by_len(dcanchor_t * dcap,uint_t len,uint64_t * handlep)1619 dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len, uint64_t *handlep)
1620 {
1621 dircache_t *dcp;
1622 dcfree_t **prevpp, *tfp;
1623 uint_t fhtsize; /* free hash table size */
1624 uint_t i;
1625 uint_t capacity;
1626 int ret;
1627
1628 if (!dnlc_dir_enable) {
1629 return (DNOCACHE);
1630 }
1631
1632 mutex_enter(&dcap->dca_lock);
1633 dcp = (dircache_t *)dcap->dca_dircache;
1634 if (VALID_DIR_CACHE(dcp)) {
1635 dcp->dc_actime = ddi_get_lbolt64();
1636 if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1637 capacity = (dcp->dc_fhash_mask + 1) <<
1638 dnlc_dir_hash_size_shift;
1639 if (dcp->dc_num_free <=
1640 (capacity >> dnlc_dir_hash_resize_shift)) {
1641 dnlc_dir_adjust_fhash(dcp);
1642 }
1643 }
1644 /*
1645 * Search for an entry of the appropriate size
1646 * on a first fit basis.
1647 */
1648 fhtsize = dcp->dc_fhash_mask + 1;
1649 for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1650 prevpp = &(dcp->dc_freehash[i]);
1651 while (*prevpp != NULL) {
1652 if ((*prevpp)->df_len >= len) {
1653 *handlep = (*prevpp)->df_handle;
1654 tfp = *prevpp;
1655 *prevpp = (*prevpp)->df_next;
1656 dcp->dc_num_free--;
1657 mutex_exit(&dcap->dca_lock);
1658 kmem_cache_free(dnlc_dir_space_cache,
1659 tfp);
1660 ncs.ncs_dir_num_ents.value.ui64--;
1661 return (DFOUND);
1662 }
1663 prevpp = &((*prevpp)->df_next);
1664 }
1665 }
1666 if (dcp->dc_complete) {
1667 ret = DNOENT;
1668 } else {
1669 ret = DNOCACHE;
1670 }
1671 mutex_exit(&dcap->dca_lock);
1672 return (ret);
1673 } else {
1674 mutex_exit(&dcap->dca_lock);
1675 return (DNOCACHE);
1676 }
1677 }
1678
1679 /*
1680 * Remove free space with the given handle from a complete or partial
1681 * directory cache.
1682 */
1683 dcret_t
dnlc_dir_rem_space_by_handle(dcanchor_t * dcap,uint64_t handle)1684 dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle)
1685 {
1686 dircache_t *dcp;
1687 dcfree_t **prevpp, *tfp;
1688 uint_t capacity;
1689 int ret;
1690
1691 if (!dnlc_dir_enable) {
1692 return (DNOCACHE);
1693 }
1694
1695 mutex_enter(&dcap->dca_lock);
1696 dcp = (dircache_t *)dcap->dca_dircache;
1697 if (VALID_DIR_CACHE(dcp)) {
1698 dcp->dc_actime = ddi_get_lbolt64();
1699 if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1700 capacity = (dcp->dc_fhash_mask + 1) <<
1701 dnlc_dir_hash_size_shift;
1702 if (dcp->dc_num_free <=
1703 (capacity >> dnlc_dir_hash_resize_shift)) {
1704 dnlc_dir_adjust_fhash(dcp);
1705 }
1706 }
1707
1708 /*
1709 * search for the exact entry
1710 */
1711 prevpp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1712 while (*prevpp != NULL) {
1713 if ((*prevpp)->df_handle == handle) {
1714 tfp = *prevpp;
1715 *prevpp = (*prevpp)->df_next;
1716 dcp->dc_num_free--;
1717 mutex_exit(&dcap->dca_lock);
1718 kmem_cache_free(dnlc_dir_space_cache, tfp);
1719 ncs.ncs_dir_num_ents.value.ui64--;
1720 return (DFOUND);
1721 }
1722 prevpp = &((*prevpp)->df_next);
1723 }
1724 if (dcp->dc_complete) {
1725 ncs.ncs_dir_rems_fai.value.ui64++;
1726 ret = DNOENT;
1727 } else {
1728 ret = DNOCACHE;
1729 }
1730 mutex_exit(&dcap->dca_lock);
1731 return (ret);
1732 } else {
1733 mutex_exit(&dcap->dca_lock);
1734 return (DNOCACHE);
1735 }
1736 }
1737
1738 /*
1739 * Update the handle of an directory cache entry.
1740 */
1741 dcret_t
dnlc_dir_update(dcanchor_t * dcap,const char * name,uint64_t handle)1742 dnlc_dir_update(dcanchor_t *dcap, const char *name, uint64_t handle)
1743 {
1744 dircache_t *dcp;
1745 dcentry_t *dep;
1746 int hash;
1747 int ret;
1748 uchar_t namlen;
1749
1750 if (!dnlc_dir_enable) {
1751 return (DNOCACHE);
1752 }
1753
1754 mutex_enter(&dcap->dca_lock);
1755 dcp = (dircache_t *)dcap->dca_dircache;
1756 if (VALID_DIR_CACHE(dcp)) {
1757 dcp->dc_actime = ddi_get_lbolt64();
1758 DNLC_DIR_HASH(name, hash, namlen);
1759 dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1760 while (dep != NULL) {
1761 if ((dep->de_hash == hash) &&
1762 (namlen == dep->de_namelen) &&
1763 bcmp(dep->de_name, name, namlen) == 0) {
1764 dep->de_handle = handle;
1765 mutex_exit(&dcap->dca_lock);
1766 return (DFOUND);
1767 }
1768 dep = dep->de_next;
1769 }
1770 if (dcp->dc_complete) {
1771 ncs.ncs_dir_upd_fail.value.ui64++;
1772 ret = DNOENT;
1773 } else {
1774 ret = DNOCACHE;
1775 }
1776 mutex_exit(&dcap->dca_lock);
1777 return (ret);
1778 } else {
1779 mutex_exit(&dcap->dca_lock);
1780 return (DNOCACHE);
1781 }
1782 }
1783
1784 void
dnlc_dir_fini(dcanchor_t * dcap)1785 dnlc_dir_fini(dcanchor_t *dcap)
1786 {
1787 dircache_t *dcp;
1788
1789 mutex_enter(&dc_head.dch_lock);
1790 mutex_enter(&dcap->dca_lock);
1791 dcp = (dircache_t *)dcap->dca_dircache;
1792 if (VALID_DIR_CACHE(dcp)) {
1793 /*
1794 * Unchain from global list
1795 */
1796 ncs.ncs_dir_finipurg.value.ui64++;
1797 dcp->dc_prev->dc_next = dcp->dc_next;
1798 dcp->dc_next->dc_prev = dcp->dc_prev;
1799 } else {
1800 dcp = NULL;
1801 }
1802 dcap->dca_dircache = NULL;
1803 mutex_exit(&dcap->dca_lock);
1804 mutex_exit(&dc_head.dch_lock);
1805 mutex_destroy(&dcap->dca_lock);
1806 if (dcp) {
1807 dnlc_dir_abort(dcp);
1808 }
1809 }
1810
1811 /*
1812 * Reclaim callback for dnlc directory caching.
1813 * Invoked by the kernel memory allocator when memory gets tight.
1814 * This is a pretty serious condition and can lead easily lead to system
1815 * hangs if not enough space is returned.
1816 *
1817 * Deciding which directory (or directories) to purge is tricky.
1818 * Purging everything is an overkill, but purging just the oldest used
1819 * was found to lead to hangs. The largest cached directories use the
1820 * most memory, but take the most effort to rebuild, whereas the smaller
1821 * ones have little value and give back little space. So what to do?
1822 *
1823 * The current policy is to continue purging the oldest used directories
1824 * until at least dnlc_dir_min_reclaim directory entries have been purged.
1825 */
1826 /*ARGSUSED*/
1827 static void
dnlc_dir_reclaim(void * unused)1828 dnlc_dir_reclaim(void *unused)
1829 {
1830 dircache_t *dcp, *oldest;
1831 uint_t dirent_cnt = 0;
1832
1833 mutex_enter(&dc_head.dch_lock);
1834 while (dirent_cnt < dnlc_dir_min_reclaim) {
1835 dcp = dc_head.dch_next;
1836 oldest = NULL;
1837 while (dcp != (dircache_t *)&dc_head) {
1838 if (oldest == NULL) {
1839 oldest = dcp;
1840 } else {
1841 if (dcp->dc_actime < oldest->dc_actime) {
1842 oldest = dcp;
1843 }
1844 }
1845 dcp = dcp->dc_next;
1846 }
1847 if (oldest == NULL) {
1848 /* nothing to delete */
1849 mutex_exit(&dc_head.dch_lock);
1850 return;
1851 }
1852 /*
1853 * remove from directory chain and purge
1854 */
1855 oldest->dc_prev->dc_next = oldest->dc_next;
1856 oldest->dc_next->dc_prev = oldest->dc_prev;
1857 mutex_enter(&oldest->dc_anchor->dca_lock);
1858 /*
1859 * If this was the last entry then it must be too large.
1860 * Mark it as such by saving a special dircache_t
1861 * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1862 * will be presented to the caller of dnlc_dir_start()
1863 */
1864 if (oldest->dc_next == oldest->dc_prev) {
1865 oldest->dc_anchor->dca_dircache = DC_RET_LOW_MEM;
1866 ncs.ncs_dir_rec_last.value.ui64++;
1867 } else {
1868 oldest->dc_anchor->dca_dircache = NULL;
1869 ncs.ncs_dir_recl_any.value.ui64++;
1870 }
1871 mutex_exit(&oldest->dc_anchor->dca_lock);
1872 dirent_cnt += oldest->dc_num_entries;
1873 dnlc_dir_abort(oldest);
1874 }
1875 mutex_exit(&dc_head.dch_lock);
1876 }
1877
1878 /*
1879 * Dynamically grow or shrink the size of the name hash table
1880 */
1881 static void
dnlc_dir_adjust_nhash(dircache_t * dcp)1882 dnlc_dir_adjust_nhash(dircache_t *dcp)
1883 {
1884 dcentry_t **newhash, *dep, **nhp, *tep;
1885 uint_t newsize;
1886 uint_t oldsize;
1887 uint_t newsizemask;
1888 int i;
1889
1890 /*
1891 * Allocate new hash table
1892 */
1893 newsize = dcp->dc_num_entries >> dnlc_dir_hash_size_shift;
1894 newhash = kmem_zalloc(sizeof (dcentry_t *) * newsize, KM_NOSLEEP);
1895 if (newhash == NULL) {
1896 /*
1897 * System is short on memory just return
1898 * Note, the old hash table is still usable.
1899 * This return is unlikely to repeatedy occur, because
1900 * either some other directory caches will be reclaimed
1901 * due to memory shortage, thus freeing memory, or this
1902 * directory cahe will be reclaimed.
1903 */
1904 return;
1905 }
1906 oldsize = dcp->dc_nhash_mask + 1;
1907 dcp->dc_nhash_mask = newsizemask = newsize - 1;
1908
1909 /*
1910 * Move entries from the old table to the new
1911 */
1912 for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1913 dep = dcp->dc_namehash[i];
1914 while (dep != NULL) { /* for each chained entry */
1915 tep = dep;
1916 dep = dep->de_next;
1917 nhp = &newhash[tep->de_hash & newsizemask];
1918 tep->de_next = *nhp;
1919 *nhp = tep;
1920 }
1921 }
1922
1923 /*
1924 * delete old hash table and set new one in place
1925 */
1926 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * oldsize);
1927 dcp->dc_namehash = newhash;
1928 }
1929
1930 /*
1931 * Dynamically grow or shrink the size of the free space hash table
1932 */
1933 static void
dnlc_dir_adjust_fhash(dircache_t * dcp)1934 dnlc_dir_adjust_fhash(dircache_t *dcp)
1935 {
1936 dcfree_t **newhash, *dfp, **nhp, *tfp;
1937 uint_t newsize;
1938 uint_t oldsize;
1939 int i;
1940
1941 /*
1942 * Allocate new hash table
1943 */
1944 newsize = dcp->dc_num_free >> dnlc_dir_hash_size_shift;
1945 newhash = kmem_zalloc(sizeof (dcfree_t *) * newsize, KM_NOSLEEP);
1946 if (newhash == NULL) {
1947 /*
1948 * System is short on memory just return
1949 * Note, the old hash table is still usable.
1950 * This return is unlikely to repeatedy occur, because
1951 * either some other directory caches will be reclaimed
1952 * due to memory shortage, thus freeing memory, or this
1953 * directory cahe will be reclaimed.
1954 */
1955 return;
1956 }
1957 oldsize = dcp->dc_fhash_mask + 1;
1958 dcp->dc_fhash_mask = newsize - 1;
1959
1960 /*
1961 * Move entries from the old table to the new
1962 */
1963 for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1964 dfp = dcp->dc_freehash[i];
1965 while (dfp != NULL) { /* for each chained entry */
1966 tfp = dfp;
1967 dfp = dfp->df_next;
1968 nhp = &newhash[DDFHASH(tfp->df_handle, dcp)];
1969 tfp->df_next = *nhp;
1970 *nhp = tfp;
1971 }
1972 }
1973
1974 /*
1975 * delete old hash table and set new one in place
1976 */
1977 kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * oldsize);
1978 dcp->dc_freehash = newhash;
1979 }
1980