1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2017 by Delphix. All rights reserved. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/systm.h> 41 #include <sys/param.h> 42 #include <sys/t_lock.h> 43 #include <sys/systm.h> 44 #include <sys/vfs.h> 45 #include <sys/vnode.h> 46 #include <sys/dnlc.h> 47 #include <sys/kmem.h> 48 #include <sys/cmn_err.h> 49 #include <sys/vtrace.h> 50 #include <sys/bitmap.h> 51 #include <sys/var.h> 52 #include <sys/sysmacros.h> 53 #include <sys/kstat.h> 54 #include <sys/atomic.h> 55 #include <sys/taskq.h> 56 57 /* 58 * Directory name lookup cache. 59 * Based on code originally done by Robert Elz at Melbourne. 60 * 61 * Names found by directory scans are retained in a cache 62 * for future reference. Each hash chain is ordered by LRU 63 * Cache is indexed by hash value obtained from (vp, name) 64 * where the vp refers to the directory containing the name. 65 */ 66 67 /* 68 * We want to be able to identify files that are referenced only by the DNLC. 69 * When adding a reference from the DNLC, call VN_HOLD_DNLC instead of VN_HOLD, 70 * since multiple DNLC references should only be counted once in v_count. This 71 * file contains only two(2) calls to VN_HOLD, renamed VN_HOLD_CALLER in the 72 * hope that no one will mistakenly add a VN_HOLD to this file. (Unfortunately 73 * it is not possible to #undef VN_HOLD and retain VN_HOLD_CALLER. Ideally a 74 * Makefile rule would grep uncommented C tokens to check that VN_HOLD is 75 * referenced only once in this file, to define VN_HOLD_CALLER.) 76 */ 77 #define VN_HOLD_CALLER VN_HOLD 78 #define VN_HOLD_DNLC(vp) { \ 79 mutex_enter(&(vp)->v_lock); \ 80 if ((vp)->v_count_dnlc == 0) { \ 81 VN_HOLD_LOCKED(vp); \ 82 } \ 83 (vp)->v_count_dnlc++; \ 84 mutex_exit(&(vp)->v_lock); \ 85 } 86 #define VN_RELE_DNLC(vp) { \ 87 vn_rele_dnlc(vp); \ 88 } 89 90 /* 91 * Tunable nc_hashavelen is the average length desired for this chain, from 92 * which the size of the nc_hash table is derived at create time. 93 */ 94 #define NC_HASHAVELEN_DEFAULT 4 95 int nc_hashavelen = NC_HASHAVELEN_DEFAULT; 96 97 /* 98 * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup 99 * depth exceeds this value, we move the looked-up entry to the front of 100 * its hash chain. The idea is to make sure that the most frequently 101 * accessed entries are found most quickly (by keeping them near the 102 * front of their hash chains). 103 */ 104 #define NC_MOVETOFRONT 2 105 106 /* 107 * 108 * DNLC_MAX_RELE is used to size an array on the stack when releasing 109 * vnodes. This array is used rather than calling VN_RELE() inline because 110 * all dnlc locks must be dropped by that time in order to avoid a 111 * possible deadlock. This deadlock occurs when the dnlc holds the last 112 * reference to the vnode and so the VOP_INACTIVE vector is called which 113 * can in turn call back into the dnlc. A global array was used but had 114 * many problems: 115 * 1) Actually doesn't have an upper bound on the array size as 116 * entries can be added after starting the purge. 117 * 2) The locking scheme causes a hang. 118 * 3) Caused serialisation on the global lock. 119 * 4) The array was often unnecessarily huge. 120 * 121 * Note the current value 8 allows up to 4 cache entries (to be purged 122 * from each hash chain), before having to cycle around and retry. 123 * This ought to be ample given that nc_hashavelen is typically very small. 124 */ 125 #define DNLC_MAX_RELE 8 /* must be even */ 126 127 /* 128 * Hash table of name cache entries for fast lookup, dynamically 129 * allocated at startup. 130 */ 131 nc_hash_t *nc_hash; 132 133 /* 134 * Rotors. Used to select entries on a round-robin basis. 135 */ 136 static nc_hash_t *dnlc_purge_fs1_rotor; 137 static nc_hash_t *dnlc_free_rotor; 138 139 /* 140 * # of dnlc entries (uninitialized) 141 * 142 * the initial value was chosen as being 143 * a random string of bits, probably not 144 * normally chosen by a systems administrator 145 */ 146 int ncsize = -1; 147 volatile uint32_t dnlc_nentries = 0; /* current num of name cache entries */ 148 static int nc_hashsz; /* size of hash table */ 149 static int nc_hashmask; /* size of hash table minus 1 */ 150 151 /* 152 * The dnlc_reduce_cache() taskq queue is activated when there are 153 * ncsize name cache entries and if no parameter is provided, it reduces 154 * the size down to dnlc_nentries_low_water, which is by default one 155 * hundreth less (or 99%) of ncsize. 156 * 157 * If a parameter is provided to dnlc_reduce_cache(), then we reduce 158 * the size down based on ncsize_onepercent - where ncsize_onepercent 159 * is 1% of ncsize; however, we never let dnlc_reduce_cache() reduce 160 * the size below 3% of ncsize (ncsize_min_percent). 161 */ 162 #define DNLC_LOW_WATER_DIVISOR_DEFAULT 100 163 uint_t dnlc_low_water_divisor = DNLC_LOW_WATER_DIVISOR_DEFAULT; 164 uint_t dnlc_nentries_low_water; 165 int dnlc_reduce_idle = 1; /* no locking needed */ 166 uint_t ncsize_onepercent; 167 uint_t ncsize_min_percent; 168 169 /* 170 * If dnlc_nentries hits dnlc_max_nentries (twice ncsize) 171 * then this means the dnlc_reduce_cache() taskq is failing to 172 * keep up. In this case we refuse to add new entries to the dnlc 173 * until the taskq catches up. 174 */ 175 uint_t dnlc_max_nentries; /* twice ncsize */ 176 uint64_t dnlc_max_nentries_cnt = 0; /* statistic on times we failed */ 177 178 /* 179 * Tunable to define when we should just remove items from 180 * the end of the chain. 181 */ 182 #define DNLC_LONG_CHAIN 8 183 uint_t dnlc_long_chain = DNLC_LONG_CHAIN; 184 185 /* 186 * ncstats has been deprecated, due to the integer size of the counters 187 * which can easily overflow in the dnlc. 188 * It is maintained (at some expense) for compatability. 189 * The preferred interface is the kstat accessible nc_stats below. 190 */ 191 struct ncstats ncstats; 192 193 struct nc_stats ncs = { 194 { "hits", KSTAT_DATA_UINT64 }, 195 { "misses", KSTAT_DATA_UINT64 }, 196 { "negative_cache_hits", KSTAT_DATA_UINT64 }, 197 { "enters", KSTAT_DATA_UINT64 }, 198 { "double_enters", KSTAT_DATA_UINT64 }, 199 { "purge_total_entries", KSTAT_DATA_UINT64 }, 200 { "purge_all", KSTAT_DATA_UINT64 }, 201 { "purge_vp", KSTAT_DATA_UINT64 }, 202 { "purge_vfs", KSTAT_DATA_UINT64 }, 203 { "purge_fs1", KSTAT_DATA_UINT64 }, 204 { "pick_free", KSTAT_DATA_UINT64 }, 205 { "pick_heuristic", KSTAT_DATA_UINT64 }, 206 { "pick_last", KSTAT_DATA_UINT64 }, 207 208 /* directory caching stats */ 209 210 { "dir_hits", KSTAT_DATA_UINT64 }, 211 { "dir_misses", KSTAT_DATA_UINT64 }, 212 { "dir_cached_current", KSTAT_DATA_UINT64 }, 213 { "dir_entries_cached_current", KSTAT_DATA_UINT64 }, 214 { "dir_cached_total", KSTAT_DATA_UINT64 }, 215 { "dir_start_no_memory", KSTAT_DATA_UINT64 }, 216 { "dir_add_no_memory", KSTAT_DATA_UINT64 }, 217 { "dir_add_abort", KSTAT_DATA_UINT64 }, 218 { "dir_add_max", KSTAT_DATA_UINT64 }, 219 { "dir_remove_entry_fail", KSTAT_DATA_UINT64 }, 220 { "dir_remove_space_fail", KSTAT_DATA_UINT64 }, 221 { "dir_update_fail", KSTAT_DATA_UINT64 }, 222 { "dir_fini_purge", KSTAT_DATA_UINT64 }, 223 { "dir_reclaim_last", KSTAT_DATA_UINT64 }, 224 { "dir_reclaim_any", KSTAT_DATA_UINT64 }, 225 }; 226 227 static int doingcache = 1; 228 229 vnode_t negative_cache_vnode; 230 231 /* 232 * Insert entry at the front of the queue 233 */ 234 #define nc_inshash(ncp, hp) \ 235 { \ 236 (ncp)->hash_next = (hp)->hash_next; \ 237 (ncp)->hash_prev = (ncache_t *)(hp); \ 238 (hp)->hash_next->hash_prev = (ncp); \ 239 (hp)->hash_next = (ncp); \ 240 } 241 242 /* 243 * Remove entry from hash queue 244 */ 245 #define nc_rmhash(ncp) \ 246 { \ 247 (ncp)->hash_prev->hash_next = (ncp)->hash_next; \ 248 (ncp)->hash_next->hash_prev = (ncp)->hash_prev; \ 249 (ncp)->hash_prev = NULL; \ 250 (ncp)->hash_next = NULL; \ 251 } 252 253 /* 254 * Free an entry. 255 */ 256 #define dnlc_free(ncp) \ 257 { \ 258 kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \ 259 atomic_dec_32(&dnlc_nentries); \ 260 } 261 262 263 /* 264 * Cached directory info. 265 * ====================== 266 */ 267 268 /* 269 * Cached directory free space hash function. 270 * Needs the free space handle and the dcp to get the hash table size 271 * Returns the hash index. 272 */ 273 #define DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask) 274 275 /* 276 * Cached directory name entry hash function. 277 * Uses the name and returns in the input arguments the hash and the name 278 * length. 279 */ 280 #define DNLC_DIR_HASH(name, hash, namelen) \ 281 { \ 282 char Xc; \ 283 const char *Xcp; \ 284 hash = *name; \ 285 for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++) \ 286 hash = (hash << 4) + hash + Xc; \ 287 ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1)); \ 288 namelen = Xcp - (name); \ 289 } 290 291 /* special dircache_t pointer to indicate error should be returned */ 292 /* 293 * The anchor directory cache pointer can contain 3 types of values, 294 * 1) NULL: No directory cache 295 * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be 296 * too big or a memory shortage occurred. This value remains in the 297 * pointer until a dnlc_dir_start() which returns the a DNOMEM error. 298 * This is kludgy but efficient and only visible in this source file. 299 * 3) A valid cache pointer. 300 */ 301 #define DC_RET_LOW_MEM (dircache_t *)1 302 #define VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM) 303 304 /* Tunables */ 305 uint_t dnlc_dir_enable = 1; /* disable caching directories by setting to 0 */ 306 uint_t dnlc_dir_min_size = 40; /* min no of directory entries before caching */ 307 uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */ 308 uint_t dnlc_dir_hash_size_shift = 3; /* 8 entries per hash bucket */ 309 uint_t dnlc_dir_min_reclaim = 350000; /* approx 1MB of dcentrys */ 310 /* 311 * dnlc_dir_hash_resize_shift determines when the hash tables 312 * get re-adjusted due to growth or shrinkage 313 * - currently 2 indicating that there can be at most 4 314 * times or at least one quarter the number of entries 315 * before hash table readjustment. Note that with 316 * dnlc_dir_hash_size_shift above set at 3 this would 317 * mean readjustment would occur if the average number 318 * of entries went above 32 or below 2 319 */ 320 uint_t dnlc_dir_hash_resize_shift = 2; /* readjust rate */ 321 322 static kmem_cache_t *dnlc_dir_space_cache; /* free space entry cache */ 323 static dchead_t dc_head; /* anchor of cached directories */ 324 325 /* Prototypes */ 326 static ncache_t *dnlc_get(uchar_t namlen); 327 static ncache_t *dnlc_search(vnode_t *dp, const char *name, uchar_t namlen, 328 int hash); 329 static void dnlc_dir_reclaim(void *unused); 330 static void dnlc_dir_abort(dircache_t *dcp); 331 static void dnlc_dir_adjust_fhash(dircache_t *dcp); 332 static void dnlc_dir_adjust_nhash(dircache_t *dcp); 333 static void do_dnlc_reduce_cache(void *); 334 335 336 /* 337 * Initialize the directory cache. 338 */ 339 void 340 dnlc_init() 341 { 342 nc_hash_t *hp; 343 kstat_t *ksp; 344 int i; 345 346 /* 347 * Set up the size of the dnlc (ncsize) and its low water mark. 348 */ 349 if (ncsize == -1) { 350 /* calculate a reasonable size for the low water */ 351 dnlc_nentries_low_water = 4 * (v.v_proc + maxusers) + 320; 352 ncsize = dnlc_nentries_low_water + 353 (dnlc_nentries_low_water / dnlc_low_water_divisor); 354 } else { 355 /* don't change the user specified ncsize */ 356 dnlc_nentries_low_water = 357 ncsize - (ncsize / dnlc_low_water_divisor); 358 } 359 if (ncsize <= 0) { 360 doingcache = 0; 361 dnlc_dir_enable = 0; /* also disable directory caching */ 362 ncsize = 0; 363 cmn_err(CE_NOTE, "name cache (dnlc) disabled"); 364 return; 365 } 366 dnlc_max_nentries = ncsize * 2; 367 ncsize_onepercent = ncsize / 100; 368 ncsize_min_percent = ncsize_onepercent * 3; 369 370 /* 371 * Initialise the hash table. 372 * Compute hash size rounding to the next power of two. 373 */ 374 nc_hashsz = ncsize / nc_hashavelen; 375 nc_hashsz = 1 << highbit(nc_hashsz); 376 nc_hashmask = nc_hashsz - 1; 377 nc_hash = kmem_zalloc(nc_hashsz * sizeof (*nc_hash), KM_SLEEP); 378 for (i = 0; i < nc_hashsz; i++) { 379 hp = (nc_hash_t *)&nc_hash[i]; 380 mutex_init(&hp->hash_lock, NULL, MUTEX_DEFAULT, NULL); 381 hp->hash_next = (ncache_t *)hp; 382 hp->hash_prev = (ncache_t *)hp; 383 } 384 385 /* 386 * Initialize rotors 387 */ 388 dnlc_free_rotor = dnlc_purge_fs1_rotor = &nc_hash[0]; 389 390 /* 391 * Set up the directory caching to use kmem_cache_alloc 392 * for its free space entries so that we can get a callback 393 * when the system is short on memory, to allow us to free 394 * up some memory. we don't use the constructor/deconstructor 395 * functions. 396 */ 397 dnlc_dir_space_cache = kmem_cache_create("dnlc_space_cache", 398 sizeof (dcfree_t), 0, NULL, NULL, dnlc_dir_reclaim, NULL, 399 NULL, 0); 400 401 /* 402 * Initialise the head of the cached directory structures 403 */ 404 mutex_init(&dc_head.dch_lock, NULL, MUTEX_DEFAULT, NULL); 405 dc_head.dch_next = (dircache_t *)&dc_head; 406 dc_head.dch_prev = (dircache_t *)&dc_head; 407 408 /* 409 * Put a hold on the negative cache vnode so that it never goes away 410 * (VOP_INACTIVE isn't called on it). 411 */ 412 vn_reinit(&negative_cache_vnode); 413 414 /* 415 * Initialise kstats - both the old compatability raw kind and 416 * the more extensive named stats. 417 */ 418 ksp = kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW, 419 sizeof (struct ncstats), KSTAT_FLAG_VIRTUAL); 420 if (ksp) { 421 ksp->ks_data = (void *) &ncstats; 422 kstat_install(ksp); 423 } 424 ksp = kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED, 425 sizeof (ncs) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 426 if (ksp) { 427 ksp->ks_data = (void *) &ncs; 428 kstat_install(ksp); 429 } 430 } 431 432 /* 433 * Add a name to the directory cache. 434 */ 435 void 436 dnlc_enter(vnode_t *dp, const char *name, vnode_t *vp) 437 { 438 ncache_t *ncp; 439 nc_hash_t *hp; 440 uchar_t namlen; 441 int hash; 442 443 TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_enter_start:"); 444 445 if (!doingcache) { 446 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END, 447 "dnlc_enter_end:(%S) %d", "not caching", 0); 448 return; 449 } 450 451 /* 452 * Get a new dnlc entry. Assume the entry won't be in the cache 453 * and initialize it now 454 */ 455 DNLCHASH(name, dp, hash, namlen); 456 if ((ncp = dnlc_get(namlen)) == NULL) 457 return; 458 ncp->dp = dp; 459 VN_HOLD_DNLC(dp); 460 ncp->vp = vp; 461 VN_HOLD_DNLC(vp); 462 bcopy(name, ncp->name, namlen + 1); /* name and null */ 463 ncp->hash = hash; 464 hp = &nc_hash[hash & nc_hashmask]; 465 466 mutex_enter(&hp->hash_lock); 467 if (dnlc_search(dp, name, namlen, hash) != NULL) { 468 mutex_exit(&hp->hash_lock); 469 ncstats.dbl_enters++; 470 ncs.ncs_dbl_enters.value.ui64++; 471 VN_RELE_DNLC(dp); 472 VN_RELE_DNLC(vp); 473 dnlc_free(ncp); /* crfree done here */ 474 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END, 475 "dnlc_enter_end:(%S) %d", "dbl enter", ncstats.dbl_enters); 476 return; 477 } 478 /* 479 * Insert back into the hash chain. 480 */ 481 nc_inshash(ncp, hp); 482 mutex_exit(&hp->hash_lock); 483 ncstats.enters++; 484 ncs.ncs_enters.value.ui64++; 485 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END, 486 "dnlc_enter_end:(%S) %d", "done", ncstats.enters); 487 } 488 489 /* 490 * Add a name to the directory cache. 491 * 492 * This function is basically identical with 493 * dnlc_enter(). The difference is that when the 494 * desired dnlc entry is found, the vnode in the 495 * ncache is compared with the vnode passed in. 496 * 497 * If they are not equal then the ncache is 498 * updated with the passed in vnode. Otherwise 499 * it just frees up the newly allocated dnlc entry. 500 */ 501 void 502 dnlc_update(vnode_t *dp, const char *name, vnode_t *vp) 503 { 504 ncache_t *ncp; 505 ncache_t *tcp; 506 vnode_t *tvp; 507 nc_hash_t *hp; 508 int hash; 509 uchar_t namlen; 510 511 TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_update_start:"); 512 513 if (!doingcache) { 514 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END, 515 "dnlc_update_end:(%S) %d", "not caching", 0); 516 return; 517 } 518 519 /* 520 * Get a new dnlc entry and initialize it now. 521 * If we fail to get a new entry, call dnlc_remove() to purge 522 * any existing dnlc entry including negative cache (DNLC_NO_VNODE) 523 * entry. 524 * Failure to clear an existing entry could result in false dnlc 525 * lookup (negative/stale entry). 526 */ 527 DNLCHASH(name, dp, hash, namlen); 528 if ((ncp = dnlc_get(namlen)) == NULL) { 529 dnlc_remove(dp, name); 530 return; 531 } 532 ncp->dp = dp; 533 VN_HOLD_DNLC(dp); 534 ncp->vp = vp; 535 VN_HOLD_DNLC(vp); 536 bcopy(name, ncp->name, namlen + 1); /* name and null */ 537 ncp->hash = hash; 538 hp = &nc_hash[hash & nc_hashmask]; 539 540 mutex_enter(&hp->hash_lock); 541 if ((tcp = dnlc_search(dp, name, namlen, hash)) != NULL) { 542 if (tcp->vp != vp) { 543 tvp = tcp->vp; 544 tcp->vp = vp; 545 mutex_exit(&hp->hash_lock); 546 VN_RELE_DNLC(tvp); 547 ncstats.enters++; 548 ncs.ncs_enters.value.ui64++; 549 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END, 550 "dnlc_update_end:(%S) %d", "done", ncstats.enters); 551 } else { 552 mutex_exit(&hp->hash_lock); 553 VN_RELE_DNLC(vp); 554 ncstats.dbl_enters++; 555 ncs.ncs_dbl_enters.value.ui64++; 556 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END, 557 "dnlc_update_end:(%S) %d", 558 "dbl enter", ncstats.dbl_enters); 559 } 560 VN_RELE_DNLC(dp); 561 dnlc_free(ncp); /* crfree done here */ 562 return; 563 } 564 /* 565 * insert the new entry, since it is not in dnlc yet 566 */ 567 nc_inshash(ncp, hp); 568 mutex_exit(&hp->hash_lock); 569 ncstats.enters++; 570 ncs.ncs_enters.value.ui64++; 571 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END, 572 "dnlc_update_end:(%S) %d", "done", ncstats.enters); 573 } 574 575 /* 576 * Look up a name in the directory name cache. 577 * 578 * Return a doubly-held vnode if found: one hold so that it may 579 * remain in the cache for other users, the other hold so that 580 * the cache is not re-cycled and the identity of the vnode is 581 * lost before the caller can use the vnode. 582 */ 583 vnode_t * 584 dnlc_lookup(vnode_t *dp, const char *name) 585 { 586 ncache_t *ncp; 587 nc_hash_t *hp; 588 vnode_t *vp; 589 int hash, depth; 590 uchar_t namlen; 591 592 TRACE_2(TR_FAC_NFS, TR_DNLC_LOOKUP_START, 593 "dnlc_lookup_start:dp %x name %s", dp, name); 594 595 if (!doingcache) { 596 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END, 597 "dnlc_lookup_end:%S %d vp %x name %s", 598 "not_caching", 0, NULL, name); 599 return (NULL); 600 } 601 602 DNLCHASH(name, dp, hash, namlen); 603 depth = 1; 604 hp = &nc_hash[hash & nc_hashmask]; 605 mutex_enter(&hp->hash_lock); 606 607 for (ncp = hp->hash_next; ncp != (ncache_t *)hp; 608 ncp = ncp->hash_next) { 609 if (ncp->hash == hash && /* fast signature check */ 610 ncp->dp == dp && 611 ncp->namlen == namlen && 612 bcmp(ncp->name, name, namlen) == 0) { 613 /* 614 * Move this entry to the head of its hash chain 615 * if it's not already close. 616 */ 617 if (depth > NC_MOVETOFRONT) { 618 ncache_t *next = ncp->hash_next; 619 ncache_t *prev = ncp->hash_prev; 620 621 prev->hash_next = next; 622 next->hash_prev = prev; 623 ncp->hash_next = next = hp->hash_next; 624 ncp->hash_prev = (ncache_t *)hp; 625 next->hash_prev = ncp; 626 hp->hash_next = ncp; 627 628 ncstats.move_to_front++; 629 } 630 631 /* 632 * Put a hold on the vnode now so its identity 633 * can't change before the caller has a chance to 634 * put a hold on it. 635 */ 636 vp = ncp->vp; 637 VN_HOLD_CALLER(vp); /* VN_HOLD 1 of 2 in this file */ 638 mutex_exit(&hp->hash_lock); 639 ncstats.hits++; 640 ncs.ncs_hits.value.ui64++; 641 if (vp == DNLC_NO_VNODE) { 642 ncs.ncs_neg_hits.value.ui64++; 643 } 644 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END, 645 "dnlc_lookup_end:%S %d vp %x name %s", "hit", 646 ncstats.hits, vp, name); 647 return (vp); 648 } 649 depth++; 650 } 651 652 mutex_exit(&hp->hash_lock); 653 ncstats.misses++; 654 ncs.ncs_misses.value.ui64++; 655 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END, 656 "dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats.misses, 657 NULL, name); 658 return (NULL); 659 } 660 661 /* 662 * Remove an entry in the directory name cache. 663 */ 664 void 665 dnlc_remove(vnode_t *dp, const char *name) 666 { 667 ncache_t *ncp; 668 nc_hash_t *hp; 669 uchar_t namlen; 670 int hash; 671 672 if (!doingcache) 673 return; 674 DNLCHASH(name, dp, hash, namlen); 675 hp = &nc_hash[hash & nc_hashmask]; 676 677 mutex_enter(&hp->hash_lock); 678 if (ncp = dnlc_search(dp, name, namlen, hash)) { 679 /* 680 * Free up the entry 681 */ 682 nc_rmhash(ncp); 683 mutex_exit(&hp->hash_lock); 684 VN_RELE_DNLC(ncp->vp); 685 VN_RELE_DNLC(ncp->dp); 686 dnlc_free(ncp); 687 return; 688 } 689 mutex_exit(&hp->hash_lock); 690 } 691 692 /* 693 * Purge the entire cache. 694 */ 695 void 696 dnlc_purge() 697 { 698 nc_hash_t *nch; 699 ncache_t *ncp; 700 int index; 701 int i; 702 vnode_t *nc_rele[DNLC_MAX_RELE]; 703 704 if (!doingcache) 705 return; 706 707 ncstats.purges++; 708 ncs.ncs_purge_all.value.ui64++; 709 710 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) { 711 index = 0; 712 mutex_enter(&nch->hash_lock); 713 ncp = nch->hash_next; 714 while (ncp != (ncache_t *)nch) { 715 ncache_t *np; 716 717 np = ncp->hash_next; 718 nc_rele[index++] = ncp->vp; 719 nc_rele[index++] = ncp->dp; 720 721 nc_rmhash(ncp); 722 dnlc_free(ncp); 723 ncp = np; 724 ncs.ncs_purge_total.value.ui64++; 725 if (index == DNLC_MAX_RELE) 726 break; 727 } 728 mutex_exit(&nch->hash_lock); 729 730 /* Release holds on all the vnodes now that we have no locks */ 731 for (i = 0; i < index; i++) { 732 VN_RELE_DNLC(nc_rele[i]); 733 } 734 if (ncp != (ncache_t *)nch) { 735 nch--; /* Do current hash chain again */ 736 } 737 } 738 } 739 740 /* 741 * Purge any cache entries referencing a vnode. Exit as soon as the dnlc 742 * reference count goes to zero (the caller still holds a reference). 743 */ 744 void 745 dnlc_purge_vp(vnode_t *vp) 746 { 747 nc_hash_t *nch; 748 ncache_t *ncp; 749 int index; 750 vnode_t *nc_rele[DNLC_MAX_RELE]; 751 752 ASSERT(vp->v_count > 0); 753 if (vp->v_count_dnlc == 0) { 754 return; 755 } 756 757 if (!doingcache) 758 return; 759 760 ncstats.purges++; 761 ncs.ncs_purge_vp.value.ui64++; 762 763 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) { 764 index = 0; 765 mutex_enter(&nch->hash_lock); 766 ncp = nch->hash_next; 767 while (ncp != (ncache_t *)nch) { 768 ncache_t *np; 769 770 np = ncp->hash_next; 771 if (ncp->dp == vp || ncp->vp == vp) { 772 nc_rele[index++] = ncp->vp; 773 nc_rele[index++] = ncp->dp; 774 nc_rmhash(ncp); 775 dnlc_free(ncp); 776 ncs.ncs_purge_total.value.ui64++; 777 if (index == DNLC_MAX_RELE) { 778 ncp = np; 779 break; 780 } 781 } 782 ncp = np; 783 } 784 mutex_exit(&nch->hash_lock); 785 786 /* Release holds on all the vnodes now that we have no locks */ 787 while (index) { 788 VN_RELE_DNLC(nc_rele[--index]); 789 } 790 791 if (vp->v_count_dnlc == 0) { 792 return; 793 } 794 795 if (ncp != (ncache_t *)nch) { 796 nch--; /* Do current hash chain again */ 797 } 798 } 799 } 800 801 /* 802 * Purge cache entries referencing a vfsp. Caller supplies a count 803 * of entries to purge; up to that many will be freed. A count of 804 * zero indicates that all such entries should be purged. Returns 805 * the number of entries that were purged. 806 */ 807 int 808 dnlc_purge_vfsp(vfs_t *vfsp, int count) 809 { 810 nc_hash_t *nch; 811 ncache_t *ncp; 812 int n = 0; 813 int index; 814 int i; 815 vnode_t *nc_rele[DNLC_MAX_RELE]; 816 817 if (!doingcache) 818 return (0); 819 820 ncstats.purges++; 821 ncs.ncs_purge_vfs.value.ui64++; 822 823 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) { 824 index = 0; 825 mutex_enter(&nch->hash_lock); 826 ncp = nch->hash_next; 827 while (ncp != (ncache_t *)nch) { 828 ncache_t *np; 829 830 np = ncp->hash_next; 831 ASSERT(ncp->dp != NULL); 832 ASSERT(ncp->vp != NULL); 833 if ((ncp->dp->v_vfsp == vfsp) || 834 (ncp->vp->v_vfsp == vfsp)) { 835 n++; 836 nc_rele[index++] = ncp->vp; 837 nc_rele[index++] = ncp->dp; 838 nc_rmhash(ncp); 839 dnlc_free(ncp); 840 ncs.ncs_purge_total.value.ui64++; 841 if (index == DNLC_MAX_RELE) { 842 ncp = np; 843 break; 844 } 845 if (count != 0 && n >= count) { 846 break; 847 } 848 } 849 ncp = np; 850 } 851 mutex_exit(&nch->hash_lock); 852 /* Release holds on all the vnodes now that we have no locks */ 853 for (i = 0; i < index; i++) { 854 VN_RELE_DNLC(nc_rele[i]); 855 } 856 if (count != 0 && n >= count) { 857 return (n); 858 } 859 if (ncp != (ncache_t *)nch) { 860 nch--; /* Do current hash chain again */ 861 } 862 } 863 return (n); 864 } 865 866 /* 867 * Purge 1 entry from the dnlc that is part of the filesystem(s) 868 * represented by 'vop'. The purpose of this routine is to allow 869 * users of the dnlc to free a vnode that is being held by the dnlc. 870 * 871 * If we find a vnode that we release which will result in 872 * freeing the underlying vnode (count was 1), return 1, 0 873 * if no appropriate vnodes found. 874 * 875 * Note, vop is not the 'right' identifier for a filesystem. 876 */ 877 int 878 dnlc_fs_purge1(vnodeops_t *vop) 879 { 880 nc_hash_t *end; 881 nc_hash_t *hp; 882 ncache_t *ncp; 883 vnode_t *vp; 884 885 if (!doingcache) 886 return (0); 887 888 ncs.ncs_purge_fs1.value.ui64++; 889 890 /* 891 * Scan the dnlc entries looking for a likely candidate. 892 */ 893 hp = end = dnlc_purge_fs1_rotor; 894 895 do { 896 if (++hp == &nc_hash[nc_hashsz]) 897 hp = nc_hash; 898 dnlc_purge_fs1_rotor = hp; 899 if (hp->hash_next == (ncache_t *)hp) 900 continue; 901 mutex_enter(&hp->hash_lock); 902 for (ncp = hp->hash_prev; 903 ncp != (ncache_t *)hp; 904 ncp = ncp->hash_prev) { 905 vp = ncp->vp; 906 if (!vn_has_cached_data(vp) && (vp->v_count == 1) && 907 vn_matchops(vp, vop)) 908 break; 909 } 910 if (ncp != (ncache_t *)hp) { 911 nc_rmhash(ncp); 912 mutex_exit(&hp->hash_lock); 913 VN_RELE_DNLC(ncp->dp); 914 VN_RELE_DNLC(vp) 915 dnlc_free(ncp); 916 ncs.ncs_purge_total.value.ui64++; 917 return (1); 918 } 919 mutex_exit(&hp->hash_lock); 920 } while (hp != end); 921 return (0); 922 } 923 924 /* 925 * Perform a reverse lookup in the DNLC. This will find the first occurrence of 926 * the vnode. If successful, it will return the vnode of the parent, and the 927 * name of the entry in the given buffer. If it cannot be found, or the buffer 928 * is too small, then it will return NULL. Note that this is a highly 929 * inefficient function, since the DNLC is constructed solely for forward 930 * lookups. 931 */ 932 vnode_t * 933 dnlc_reverse_lookup(vnode_t *vp, char *buf, size_t buflen) 934 { 935 nc_hash_t *nch; 936 ncache_t *ncp; 937 vnode_t *pvp; 938 939 if (!doingcache) 940 return (NULL); 941 942 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) { 943 mutex_enter(&nch->hash_lock); 944 ncp = nch->hash_next; 945 while (ncp != (ncache_t *)nch) { 946 /* 947 * We ignore '..' entries since it can create 948 * confusion and infinite loops. 949 */ 950 if (ncp->vp == vp && !(ncp->namlen == 2 && 951 0 == bcmp(ncp->name, "..", 2)) && 952 ncp->namlen < buflen) { 953 bcopy(ncp->name, buf, ncp->namlen); 954 buf[ncp->namlen] = '\0'; 955 pvp = ncp->dp; 956 /* VN_HOLD 2 of 2 in this file */ 957 VN_HOLD_CALLER(pvp); 958 mutex_exit(&nch->hash_lock); 959 return (pvp); 960 } 961 ncp = ncp->hash_next; 962 } 963 mutex_exit(&nch->hash_lock); 964 } 965 966 return (NULL); 967 } 968 /* 969 * Utility routine to search for a cache entry. Return the 970 * ncache entry if found, NULL otherwise. 971 */ 972 static ncache_t * 973 dnlc_search(vnode_t *dp, const char *name, uchar_t namlen, int hash) 974 { 975 nc_hash_t *hp; 976 ncache_t *ncp; 977 978 hp = &nc_hash[hash & nc_hashmask]; 979 980 for (ncp = hp->hash_next; ncp != (ncache_t *)hp; ncp = ncp->hash_next) { 981 if (ncp->hash == hash && 982 ncp->dp == dp && 983 ncp->namlen == namlen && 984 bcmp(ncp->name, name, namlen) == 0) 985 return (ncp); 986 } 987 return (NULL); 988 } 989 990 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1) 991 #error ncache_t name length representation is too small 992 #endif 993 994 void 995 dnlc_reduce_cache(void *reduce_percent) 996 { 997 if (dnlc_reduce_idle && (dnlc_nentries >= ncsize || reduce_percent)) { 998 dnlc_reduce_idle = 0; 999 if ((taskq_dispatch(system_taskq, do_dnlc_reduce_cache, 1000 reduce_percent, TQ_NOSLEEP)) == NULL) 1001 dnlc_reduce_idle = 1; 1002 } 1003 } 1004 1005 /* 1006 * Get a new name cache entry. 1007 * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory 1008 * is short then just return NULL. If we're over ncsize then kick off a 1009 * thread to free some in use entries down to dnlc_nentries_low_water. 1010 * Caller must initialise all fields except namlen. 1011 * Component names are defined to be less than MAXNAMELEN 1012 * which includes a null. 1013 */ 1014 static ncache_t * 1015 dnlc_get(uchar_t namlen) 1016 { 1017 ncache_t *ncp; 1018 1019 if (dnlc_nentries > dnlc_max_nentries) { 1020 dnlc_max_nentries_cnt++; /* keep a statistic */ 1021 return (NULL); 1022 } 1023 ncp = kmem_alloc(sizeof (ncache_t) + namlen, KM_NOSLEEP); 1024 if (ncp == NULL) { 1025 return (NULL); 1026 } 1027 ncp->namlen = namlen; 1028 atomic_inc_32(&dnlc_nentries); 1029 dnlc_reduce_cache(NULL); 1030 return (ncp); 1031 } 1032 1033 /* 1034 * Taskq routine to free up name cache entries to reduce the 1035 * cache size to the low water mark if "reduce_percent" is not provided. 1036 * If "reduce_percent" is provided, reduce cache size by 1037 * (ncsize_onepercent * reduce_percent). 1038 */ 1039 /*ARGSUSED*/ 1040 static void 1041 do_dnlc_reduce_cache(void *reduce_percent) 1042 { 1043 nc_hash_t *hp = dnlc_free_rotor, *start_hp = hp; 1044 vnode_t *vp; 1045 ncache_t *ncp; 1046 int cnt; 1047 uint_t low_water = dnlc_nentries_low_water; 1048 1049 if (reduce_percent) { 1050 uint_t reduce_cnt; 1051 1052 /* 1053 * Never try to reduce the current number 1054 * of cache entries below 3% of ncsize. 1055 */ 1056 if (dnlc_nentries <= ncsize_min_percent) { 1057 dnlc_reduce_idle = 1; 1058 return; 1059 } 1060 reduce_cnt = ncsize_onepercent * 1061 (uint_t)(uintptr_t)reduce_percent; 1062 1063 if (reduce_cnt > dnlc_nentries || 1064 dnlc_nentries - reduce_cnt < ncsize_min_percent) 1065 low_water = ncsize_min_percent; 1066 else 1067 low_water = dnlc_nentries - reduce_cnt; 1068 } 1069 1070 do { 1071 /* 1072 * Find the first non empty hash queue without locking. 1073 * Only look at each hash queue once to avoid an infinite loop. 1074 */ 1075 do { 1076 if (++hp == &nc_hash[nc_hashsz]) 1077 hp = nc_hash; 1078 } while (hp->hash_next == (ncache_t *)hp && hp != start_hp); 1079 1080 /* return if all hash queues are empty. */ 1081 if (hp->hash_next == (ncache_t *)hp) { 1082 dnlc_reduce_idle = 1; 1083 return; 1084 } 1085 1086 mutex_enter(&hp->hash_lock); 1087 for (cnt = 0, ncp = hp->hash_prev; ncp != (ncache_t *)hp; 1088 ncp = ncp->hash_prev, cnt++) { 1089 vp = ncp->vp; 1090 /* 1091 * A name cache entry with a reference count 1092 * of one is only referenced by the dnlc. 1093 * Also negative cache entries are purged first. 1094 */ 1095 if (!vn_has_cached_data(vp) && 1096 ((vp->v_count == 1) || (vp == DNLC_NO_VNODE))) { 1097 ncs.ncs_pick_heur.value.ui64++; 1098 goto found; 1099 } 1100 /* 1101 * Remove from the end of the chain if the 1102 * chain is too long 1103 */ 1104 if (cnt > dnlc_long_chain) { 1105 ncp = hp->hash_prev; 1106 ncs.ncs_pick_last.value.ui64++; 1107 vp = ncp->vp; 1108 goto found; 1109 } 1110 } 1111 /* check for race and continue */ 1112 if (hp->hash_next == (ncache_t *)hp) { 1113 mutex_exit(&hp->hash_lock); 1114 continue; 1115 } 1116 1117 ncp = hp->hash_prev; /* pick the last one in the hash queue */ 1118 ncs.ncs_pick_last.value.ui64++; 1119 vp = ncp->vp; 1120 found: 1121 /* 1122 * Remove from hash chain. 1123 */ 1124 nc_rmhash(ncp); 1125 mutex_exit(&hp->hash_lock); 1126 VN_RELE_DNLC(vp); 1127 VN_RELE_DNLC(ncp->dp); 1128 dnlc_free(ncp); 1129 } while (dnlc_nentries > low_water); 1130 1131 dnlc_free_rotor = hp; 1132 dnlc_reduce_idle = 1; 1133 } 1134 1135 /* 1136 * Directory caching routines 1137 * ========================== 1138 * 1139 * See dnlc.h for details of the interfaces below. 1140 */ 1141 1142 /* 1143 * Lookup up an entry in a complete or partial directory cache. 1144 */ 1145 dcret_t 1146 dnlc_dir_lookup(dcanchor_t *dcap, const char *name, uint64_t *handle) 1147 { 1148 dircache_t *dcp; 1149 dcentry_t *dep; 1150 int hash; 1151 int ret; 1152 uchar_t namlen; 1153 1154 /* 1155 * can test without lock as we are only a cache 1156 */ 1157 if (!VALID_DIR_CACHE(dcap->dca_dircache)) { 1158 ncs.ncs_dir_misses.value.ui64++; 1159 return (DNOCACHE); 1160 } 1161 1162 if (!dnlc_dir_enable) { 1163 return (DNOCACHE); 1164 } 1165 1166 mutex_enter(&dcap->dca_lock); 1167 dcp = (dircache_t *)dcap->dca_dircache; 1168 if (VALID_DIR_CACHE(dcp)) { 1169 dcp->dc_actime = ddi_get_lbolt64(); 1170 DNLC_DIR_HASH(name, hash, namlen); 1171 dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask]; 1172 while (dep != NULL) { 1173 if ((dep->de_hash == hash) && 1174 (namlen == dep->de_namelen) && 1175 bcmp(dep->de_name, name, namlen) == 0) { 1176 *handle = dep->de_handle; 1177 mutex_exit(&dcap->dca_lock); 1178 ncs.ncs_dir_hits.value.ui64++; 1179 return (DFOUND); 1180 } 1181 dep = dep->de_next; 1182 } 1183 if (dcp->dc_complete) { 1184 ret = DNOENT; 1185 } else { 1186 ret = DNOCACHE; 1187 } 1188 mutex_exit(&dcap->dca_lock); 1189 return (ret); 1190 } else { 1191 mutex_exit(&dcap->dca_lock); 1192 ncs.ncs_dir_misses.value.ui64++; 1193 return (DNOCACHE); 1194 } 1195 } 1196 1197 /* 1198 * Start a new directory cache. An estimate of the number of 1199 * entries is provided to as a quick check to ensure the directory 1200 * is cacheable. 1201 */ 1202 dcret_t 1203 dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries) 1204 { 1205 dircache_t *dcp; 1206 1207 if (!dnlc_dir_enable || 1208 (num_entries < dnlc_dir_min_size)) { 1209 return (DNOCACHE); 1210 } 1211 1212 if (num_entries > dnlc_dir_max_size) { 1213 return (DTOOBIG); 1214 } 1215 1216 mutex_enter(&dc_head.dch_lock); 1217 mutex_enter(&dcap->dca_lock); 1218 1219 if (dcap->dca_dircache == DC_RET_LOW_MEM) { 1220 dcap->dca_dircache = NULL; 1221 mutex_exit(&dcap->dca_lock); 1222 mutex_exit(&dc_head.dch_lock); 1223 return (DNOMEM); 1224 } 1225 1226 /* 1227 * Check if there's currently a cache. 1228 * This probably only occurs on a race. 1229 */ 1230 if (dcap->dca_dircache != NULL) { 1231 mutex_exit(&dcap->dca_lock); 1232 mutex_exit(&dc_head.dch_lock); 1233 return (DNOCACHE); 1234 } 1235 1236 /* 1237 * Allocate the dircache struct, entry and free space hash tables. 1238 * These tables are initially just one entry but dynamically resize 1239 * when entries and free space are added or removed. 1240 */ 1241 if ((dcp = kmem_zalloc(sizeof (dircache_t), KM_NOSLEEP)) == NULL) { 1242 goto error; 1243 } 1244 if ((dcp->dc_namehash = kmem_zalloc(sizeof (dcentry_t *), 1245 KM_NOSLEEP)) == NULL) { 1246 goto error; 1247 } 1248 if ((dcp->dc_freehash = kmem_zalloc(sizeof (dcfree_t *), 1249 KM_NOSLEEP)) == NULL) { 1250 goto error; 1251 } 1252 1253 dcp->dc_anchor = dcap; /* set back pointer to anchor */ 1254 dcap->dca_dircache = dcp; 1255 1256 /* add into head of global chain */ 1257 dcp->dc_next = dc_head.dch_next; 1258 dcp->dc_prev = (dircache_t *)&dc_head; 1259 dcp->dc_next->dc_prev = dcp; 1260 dc_head.dch_next = dcp; 1261 1262 mutex_exit(&dcap->dca_lock); 1263 mutex_exit(&dc_head.dch_lock); 1264 ncs.ncs_cur_dirs.value.ui64++; 1265 ncs.ncs_dirs_cached.value.ui64++; 1266 return (DOK); 1267 error: 1268 if (dcp != NULL) { 1269 if (dcp->dc_namehash) { 1270 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *)); 1271 } 1272 kmem_free(dcp, sizeof (dircache_t)); 1273 } 1274 /* 1275 * Must also kmem_free dcp->dc_freehash if more error cases are added 1276 */ 1277 mutex_exit(&dcap->dca_lock); 1278 mutex_exit(&dc_head.dch_lock); 1279 ncs.ncs_dir_start_nm.value.ui64++; 1280 return (DNOCACHE); 1281 } 1282 1283 /* 1284 * Add a directopry entry to a partial or complete directory cache. 1285 */ 1286 dcret_t 1287 dnlc_dir_add_entry(dcanchor_t *dcap, const char *name, uint64_t handle) 1288 { 1289 dircache_t *dcp; 1290 dcentry_t **hp, *dep; 1291 int hash; 1292 uint_t capacity; 1293 uchar_t namlen; 1294 1295 /* 1296 * Allocate the dcentry struct, including the variable 1297 * size name. Note, the null terminator is not copied. 1298 * 1299 * We do this outside the lock to avoid possible deadlock if 1300 * dnlc_dir_reclaim() is called as a result of memory shortage. 1301 */ 1302 DNLC_DIR_HASH(name, hash, namlen); 1303 dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP); 1304 if (dep == NULL) { 1305 #ifdef DEBUG 1306 /* 1307 * The kmem allocator generates random failures for 1308 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE) 1309 * So try again before we blow away a perfectly good cache. 1310 * This is done not to cover an error but purely for 1311 * performance running a debug kernel. 1312 * This random error only occurs in debug mode. 1313 */ 1314 dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP); 1315 if (dep != NULL) 1316 goto ok; 1317 #endif 1318 ncs.ncs_dir_add_nm.value.ui64++; 1319 /* 1320 * Free a directory cache. This may be the one we are 1321 * called with. 1322 */ 1323 dnlc_dir_reclaim(NULL); 1324 dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP); 1325 if (dep == NULL) { 1326 /* 1327 * still no memory, better delete this cache 1328 */ 1329 mutex_enter(&dcap->dca_lock); 1330 dcp = (dircache_t *)dcap->dca_dircache; 1331 if (VALID_DIR_CACHE(dcp)) { 1332 dnlc_dir_abort(dcp); 1333 dcap->dca_dircache = DC_RET_LOW_MEM; 1334 } 1335 mutex_exit(&dcap->dca_lock); 1336 ncs.ncs_dir_addabort.value.ui64++; 1337 return (DNOCACHE); 1338 } 1339 /* 1340 * fall through as if the 1st kmem_alloc had worked 1341 */ 1342 } 1343 #ifdef DEBUG 1344 ok: 1345 #endif 1346 mutex_enter(&dcap->dca_lock); 1347 dcp = (dircache_t *)dcap->dca_dircache; 1348 if (VALID_DIR_CACHE(dcp)) { 1349 /* 1350 * If the total number of entries goes above the max 1351 * then free this cache 1352 */ 1353 if ((dcp->dc_num_entries + dcp->dc_num_free) > 1354 dnlc_dir_max_size) { 1355 mutex_exit(&dcap->dca_lock); 1356 dnlc_dir_purge(dcap); 1357 kmem_free(dep, sizeof (dcentry_t) - 1 + namlen); 1358 ncs.ncs_dir_add_max.value.ui64++; 1359 return (DTOOBIG); 1360 } 1361 dcp->dc_num_entries++; 1362 capacity = (dcp->dc_nhash_mask + 1) << dnlc_dir_hash_size_shift; 1363 if (dcp->dc_num_entries >= 1364 (capacity << dnlc_dir_hash_resize_shift)) { 1365 dnlc_dir_adjust_nhash(dcp); 1366 } 1367 hp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask]; 1368 1369 /* 1370 * Initialise and chain in new entry 1371 */ 1372 dep->de_handle = handle; 1373 dep->de_hash = hash; 1374 /* 1375 * Note de_namelen is a uchar_t to conserve space 1376 * and alignment padding. The max length of any 1377 * pathname component is defined as MAXNAMELEN 1378 * which is 256 (including the terminating null). 1379 * So provided this doesn't change, we don't include the null, 1380 * we always use bcmp to compare strings, and we don't 1381 * start storing full names, then we are ok. 1382 * The space savings is worth it. 1383 */ 1384 dep->de_namelen = namlen; 1385 bcopy(name, dep->de_name, namlen); 1386 dep->de_next = *hp; 1387 *hp = dep; 1388 dcp->dc_actime = ddi_get_lbolt64(); 1389 mutex_exit(&dcap->dca_lock); 1390 ncs.ncs_dir_num_ents.value.ui64++; 1391 return (DOK); 1392 } else { 1393 mutex_exit(&dcap->dca_lock); 1394 kmem_free(dep, sizeof (dcentry_t) - 1 + namlen); 1395 return (DNOCACHE); 1396 } 1397 } 1398 1399 /* 1400 * Add free space to a partial or complete directory cache. 1401 */ 1402 dcret_t 1403 dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle) 1404 { 1405 dircache_t *dcp; 1406 dcfree_t *dfp, **hp; 1407 uint_t capacity; 1408 1409 /* 1410 * We kmem_alloc outside the lock to avoid possible deadlock if 1411 * dnlc_dir_reclaim() is called as a result of memory shortage. 1412 */ 1413 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP); 1414 if (dfp == NULL) { 1415 #ifdef DEBUG 1416 /* 1417 * The kmem allocator generates random failures for 1418 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE) 1419 * So try again before we blow away a perfectly good cache. 1420 * This random error only occurs in debug mode 1421 */ 1422 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP); 1423 if (dfp != NULL) 1424 goto ok; 1425 #endif 1426 ncs.ncs_dir_add_nm.value.ui64++; 1427 /* 1428 * Free a directory cache. This may be the one we are 1429 * called with. 1430 */ 1431 dnlc_dir_reclaim(NULL); 1432 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP); 1433 if (dfp == NULL) { 1434 /* 1435 * still no memory, better delete this cache 1436 */ 1437 mutex_enter(&dcap->dca_lock); 1438 dcp = (dircache_t *)dcap->dca_dircache; 1439 if (VALID_DIR_CACHE(dcp)) { 1440 dnlc_dir_abort(dcp); 1441 dcap->dca_dircache = DC_RET_LOW_MEM; 1442 } 1443 mutex_exit(&dcap->dca_lock); 1444 ncs.ncs_dir_addabort.value.ui64++; 1445 return (DNOCACHE); 1446 } 1447 /* 1448 * fall through as if the 1st kmem_alloc had worked 1449 */ 1450 } 1451 1452 #ifdef DEBUG 1453 ok: 1454 #endif 1455 mutex_enter(&dcap->dca_lock); 1456 dcp = (dircache_t *)dcap->dca_dircache; 1457 if (VALID_DIR_CACHE(dcp)) { 1458 if ((dcp->dc_num_entries + dcp->dc_num_free) > 1459 dnlc_dir_max_size) { 1460 mutex_exit(&dcap->dca_lock); 1461 dnlc_dir_purge(dcap); 1462 kmem_cache_free(dnlc_dir_space_cache, dfp); 1463 ncs.ncs_dir_add_max.value.ui64++; 1464 return (DTOOBIG); 1465 } 1466 dcp->dc_num_free++; 1467 capacity = (dcp->dc_fhash_mask + 1) << dnlc_dir_hash_size_shift; 1468 if (dcp->dc_num_free >= 1469 (capacity << dnlc_dir_hash_resize_shift)) { 1470 dnlc_dir_adjust_fhash(dcp); 1471 } 1472 /* 1473 * Initialise and chain a new entry 1474 */ 1475 dfp->df_handle = handle; 1476 dfp->df_len = len; 1477 dcp->dc_actime = ddi_get_lbolt64(); 1478 hp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]); 1479 dfp->df_next = *hp; 1480 *hp = dfp; 1481 mutex_exit(&dcap->dca_lock); 1482 ncs.ncs_dir_num_ents.value.ui64++; 1483 return (DOK); 1484 } else { 1485 mutex_exit(&dcap->dca_lock); 1486 kmem_cache_free(dnlc_dir_space_cache, dfp); 1487 return (DNOCACHE); 1488 } 1489 } 1490 1491 /* 1492 * Mark a directory cache as complete. 1493 */ 1494 void 1495 dnlc_dir_complete(dcanchor_t *dcap) 1496 { 1497 dircache_t *dcp; 1498 1499 mutex_enter(&dcap->dca_lock); 1500 dcp = (dircache_t *)dcap->dca_dircache; 1501 if (VALID_DIR_CACHE(dcp)) { 1502 dcp->dc_complete = B_TRUE; 1503 } 1504 mutex_exit(&dcap->dca_lock); 1505 } 1506 1507 /* 1508 * Internal routine to delete a partial or full directory cache. 1509 * No additional locking needed. 1510 */ 1511 static void 1512 dnlc_dir_abort(dircache_t *dcp) 1513 { 1514 dcentry_t *dep, *nhp; 1515 dcfree_t *fep, *fhp; 1516 uint_t nhtsize = dcp->dc_nhash_mask + 1; /* name hash table size */ 1517 uint_t fhtsize = dcp->dc_fhash_mask + 1; /* free hash table size */ 1518 uint_t i; 1519 1520 /* 1521 * Free up the cached name entries and hash table 1522 */ 1523 for (i = 0; i < nhtsize; i++) { /* for each hash bucket */ 1524 nhp = dcp->dc_namehash[i]; 1525 while (nhp != NULL) { /* for each chained entry */ 1526 dep = nhp->de_next; 1527 kmem_free(nhp, sizeof (dcentry_t) - 1 + 1528 nhp->de_namelen); 1529 nhp = dep; 1530 } 1531 } 1532 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * nhtsize); 1533 1534 /* 1535 * Free up the free space entries and hash table 1536 */ 1537 for (i = 0; i < fhtsize; i++) { /* for each hash bucket */ 1538 fhp = dcp->dc_freehash[i]; 1539 while (fhp != NULL) { /* for each chained entry */ 1540 fep = fhp->df_next; 1541 kmem_cache_free(dnlc_dir_space_cache, fhp); 1542 fhp = fep; 1543 } 1544 } 1545 kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * fhtsize); 1546 1547 /* 1548 * Finally free the directory cache structure itself 1549 */ 1550 ncs.ncs_dir_num_ents.value.ui64 -= (dcp->dc_num_entries + 1551 dcp->dc_num_free); 1552 kmem_free(dcp, sizeof (dircache_t)); 1553 ncs.ncs_cur_dirs.value.ui64--; 1554 } 1555 1556 /* 1557 * Remove a partial or complete directory cache 1558 */ 1559 void 1560 dnlc_dir_purge(dcanchor_t *dcap) 1561 { 1562 dircache_t *dcp; 1563 1564 mutex_enter(&dc_head.dch_lock); 1565 mutex_enter(&dcap->dca_lock); 1566 dcp = (dircache_t *)dcap->dca_dircache; 1567 if (!VALID_DIR_CACHE(dcp)) { 1568 mutex_exit(&dcap->dca_lock); 1569 mutex_exit(&dc_head.dch_lock); 1570 return; 1571 } 1572 dcap->dca_dircache = NULL; 1573 /* 1574 * Unchain from global list 1575 */ 1576 dcp->dc_prev->dc_next = dcp->dc_next; 1577 dcp->dc_next->dc_prev = dcp->dc_prev; 1578 mutex_exit(&dcap->dca_lock); 1579 mutex_exit(&dc_head.dch_lock); 1580 dnlc_dir_abort(dcp); 1581 } 1582 1583 /* 1584 * Remove an entry from a complete or partial directory cache. 1585 * Return the handle if it's non null. 1586 */ 1587 dcret_t 1588 dnlc_dir_rem_entry(dcanchor_t *dcap, const char *name, uint64_t *handlep) 1589 { 1590 dircache_t *dcp; 1591 dcentry_t **prevpp, *te; 1592 uint_t capacity; 1593 int hash; 1594 int ret; 1595 uchar_t namlen; 1596 1597 if (!dnlc_dir_enable) { 1598 return (DNOCACHE); 1599 } 1600 1601 mutex_enter(&dcap->dca_lock); 1602 dcp = (dircache_t *)dcap->dca_dircache; 1603 if (VALID_DIR_CACHE(dcp)) { 1604 dcp->dc_actime = ddi_get_lbolt64(); 1605 if (dcp->dc_nhash_mask > 0) { /* ie not minimum */ 1606 capacity = (dcp->dc_nhash_mask + 1) << 1607 dnlc_dir_hash_size_shift; 1608 if (dcp->dc_num_entries <= 1609 (capacity >> dnlc_dir_hash_resize_shift)) { 1610 dnlc_dir_adjust_nhash(dcp); 1611 } 1612 } 1613 DNLC_DIR_HASH(name, hash, namlen); 1614 prevpp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask]; 1615 while (*prevpp != NULL) { 1616 if (((*prevpp)->de_hash == hash) && 1617 (namlen == (*prevpp)->de_namelen) && 1618 bcmp((*prevpp)->de_name, name, namlen) == 0) { 1619 if (handlep != NULL) { 1620 *handlep = (*prevpp)->de_handle; 1621 } 1622 te = *prevpp; 1623 *prevpp = (*prevpp)->de_next; 1624 kmem_free(te, sizeof (dcentry_t) - 1 + 1625 te->de_namelen); 1626 1627 /* 1628 * If the total number of entries 1629 * falls below half the minimum number 1630 * of entries then free this cache. 1631 */ 1632 if (--dcp->dc_num_entries < 1633 (dnlc_dir_min_size >> 1)) { 1634 mutex_exit(&dcap->dca_lock); 1635 dnlc_dir_purge(dcap); 1636 } else { 1637 mutex_exit(&dcap->dca_lock); 1638 } 1639 ncs.ncs_dir_num_ents.value.ui64--; 1640 return (DFOUND); 1641 } 1642 prevpp = &((*prevpp)->de_next); 1643 } 1644 if (dcp->dc_complete) { 1645 ncs.ncs_dir_reme_fai.value.ui64++; 1646 ret = DNOENT; 1647 } else { 1648 ret = DNOCACHE; 1649 } 1650 mutex_exit(&dcap->dca_lock); 1651 return (ret); 1652 } else { 1653 mutex_exit(&dcap->dca_lock); 1654 return (DNOCACHE); 1655 } 1656 } 1657 1658 1659 /* 1660 * Remove free space of at least the given length from a complete 1661 * or partial directory cache. 1662 */ 1663 dcret_t 1664 dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len, uint64_t *handlep) 1665 { 1666 dircache_t *dcp; 1667 dcfree_t **prevpp, *tfp; 1668 uint_t fhtsize; /* free hash table size */ 1669 uint_t i; 1670 uint_t capacity; 1671 int ret; 1672 1673 if (!dnlc_dir_enable) { 1674 return (DNOCACHE); 1675 } 1676 1677 mutex_enter(&dcap->dca_lock); 1678 dcp = (dircache_t *)dcap->dca_dircache; 1679 if (VALID_DIR_CACHE(dcp)) { 1680 dcp->dc_actime = ddi_get_lbolt64(); 1681 if (dcp->dc_fhash_mask > 0) { /* ie not minimum */ 1682 capacity = (dcp->dc_fhash_mask + 1) << 1683 dnlc_dir_hash_size_shift; 1684 if (dcp->dc_num_free <= 1685 (capacity >> dnlc_dir_hash_resize_shift)) { 1686 dnlc_dir_adjust_fhash(dcp); 1687 } 1688 } 1689 /* 1690 * Search for an entry of the appropriate size 1691 * on a first fit basis. 1692 */ 1693 fhtsize = dcp->dc_fhash_mask + 1; 1694 for (i = 0; i < fhtsize; i++) { /* for each hash bucket */ 1695 prevpp = &(dcp->dc_freehash[i]); 1696 while (*prevpp != NULL) { 1697 if ((*prevpp)->df_len >= len) { 1698 *handlep = (*prevpp)->df_handle; 1699 tfp = *prevpp; 1700 *prevpp = (*prevpp)->df_next; 1701 dcp->dc_num_free--; 1702 mutex_exit(&dcap->dca_lock); 1703 kmem_cache_free(dnlc_dir_space_cache, 1704 tfp); 1705 ncs.ncs_dir_num_ents.value.ui64--; 1706 return (DFOUND); 1707 } 1708 prevpp = &((*prevpp)->df_next); 1709 } 1710 } 1711 if (dcp->dc_complete) { 1712 ret = DNOENT; 1713 } else { 1714 ret = DNOCACHE; 1715 } 1716 mutex_exit(&dcap->dca_lock); 1717 return (ret); 1718 } else { 1719 mutex_exit(&dcap->dca_lock); 1720 return (DNOCACHE); 1721 } 1722 } 1723 1724 /* 1725 * Remove free space with the given handle from a complete or partial 1726 * directory cache. 1727 */ 1728 dcret_t 1729 dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle) 1730 { 1731 dircache_t *dcp; 1732 dcfree_t **prevpp, *tfp; 1733 uint_t capacity; 1734 int ret; 1735 1736 if (!dnlc_dir_enable) { 1737 return (DNOCACHE); 1738 } 1739 1740 mutex_enter(&dcap->dca_lock); 1741 dcp = (dircache_t *)dcap->dca_dircache; 1742 if (VALID_DIR_CACHE(dcp)) { 1743 dcp->dc_actime = ddi_get_lbolt64(); 1744 if (dcp->dc_fhash_mask > 0) { /* ie not minimum */ 1745 capacity = (dcp->dc_fhash_mask + 1) << 1746 dnlc_dir_hash_size_shift; 1747 if (dcp->dc_num_free <= 1748 (capacity >> dnlc_dir_hash_resize_shift)) { 1749 dnlc_dir_adjust_fhash(dcp); 1750 } 1751 } 1752 1753 /* 1754 * search for the exact entry 1755 */ 1756 prevpp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]); 1757 while (*prevpp != NULL) { 1758 if ((*prevpp)->df_handle == handle) { 1759 tfp = *prevpp; 1760 *prevpp = (*prevpp)->df_next; 1761 dcp->dc_num_free--; 1762 mutex_exit(&dcap->dca_lock); 1763 kmem_cache_free(dnlc_dir_space_cache, tfp); 1764 ncs.ncs_dir_num_ents.value.ui64--; 1765 return (DFOUND); 1766 } 1767 prevpp = &((*prevpp)->df_next); 1768 } 1769 if (dcp->dc_complete) { 1770 ncs.ncs_dir_rems_fai.value.ui64++; 1771 ret = DNOENT; 1772 } else { 1773 ret = DNOCACHE; 1774 } 1775 mutex_exit(&dcap->dca_lock); 1776 return (ret); 1777 } else { 1778 mutex_exit(&dcap->dca_lock); 1779 return (DNOCACHE); 1780 } 1781 } 1782 1783 /* 1784 * Update the handle of an directory cache entry. 1785 */ 1786 dcret_t 1787 dnlc_dir_update(dcanchor_t *dcap, const char *name, uint64_t handle) 1788 { 1789 dircache_t *dcp; 1790 dcentry_t *dep; 1791 int hash; 1792 int ret; 1793 uchar_t namlen; 1794 1795 if (!dnlc_dir_enable) { 1796 return (DNOCACHE); 1797 } 1798 1799 mutex_enter(&dcap->dca_lock); 1800 dcp = (dircache_t *)dcap->dca_dircache; 1801 if (VALID_DIR_CACHE(dcp)) { 1802 dcp->dc_actime = ddi_get_lbolt64(); 1803 DNLC_DIR_HASH(name, hash, namlen); 1804 dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask]; 1805 while (dep != NULL) { 1806 if ((dep->de_hash == hash) && 1807 (namlen == dep->de_namelen) && 1808 bcmp(dep->de_name, name, namlen) == 0) { 1809 dep->de_handle = handle; 1810 mutex_exit(&dcap->dca_lock); 1811 return (DFOUND); 1812 } 1813 dep = dep->de_next; 1814 } 1815 if (dcp->dc_complete) { 1816 ncs.ncs_dir_upd_fail.value.ui64++; 1817 ret = DNOENT; 1818 } else { 1819 ret = DNOCACHE; 1820 } 1821 mutex_exit(&dcap->dca_lock); 1822 return (ret); 1823 } else { 1824 mutex_exit(&dcap->dca_lock); 1825 return (DNOCACHE); 1826 } 1827 } 1828 1829 void 1830 dnlc_dir_fini(dcanchor_t *dcap) 1831 { 1832 dircache_t *dcp; 1833 1834 mutex_enter(&dc_head.dch_lock); 1835 mutex_enter(&dcap->dca_lock); 1836 dcp = (dircache_t *)dcap->dca_dircache; 1837 if (VALID_DIR_CACHE(dcp)) { 1838 /* 1839 * Unchain from global list 1840 */ 1841 ncs.ncs_dir_finipurg.value.ui64++; 1842 dcp->dc_prev->dc_next = dcp->dc_next; 1843 dcp->dc_next->dc_prev = dcp->dc_prev; 1844 } else { 1845 dcp = NULL; 1846 } 1847 dcap->dca_dircache = NULL; 1848 mutex_exit(&dcap->dca_lock); 1849 mutex_exit(&dc_head.dch_lock); 1850 mutex_destroy(&dcap->dca_lock); 1851 if (dcp) { 1852 dnlc_dir_abort(dcp); 1853 } 1854 } 1855 1856 /* 1857 * Reclaim callback for dnlc directory caching. 1858 * Invoked by the kernel memory allocator when memory gets tight. 1859 * This is a pretty serious condition and can lead easily lead to system 1860 * hangs if not enough space is returned. 1861 * 1862 * Deciding which directory (or directories) to purge is tricky. 1863 * Purging everything is an overkill, but purging just the oldest used 1864 * was found to lead to hangs. The largest cached directories use the 1865 * most memory, but take the most effort to rebuild, whereas the smaller 1866 * ones have little value and give back little space. So what to do? 1867 * 1868 * The current policy is to continue purging the oldest used directories 1869 * until at least dnlc_dir_min_reclaim directory entries have been purged. 1870 */ 1871 /*ARGSUSED*/ 1872 static void 1873 dnlc_dir_reclaim(void *unused) 1874 { 1875 dircache_t *dcp, *oldest; 1876 uint_t dirent_cnt = 0; 1877 1878 mutex_enter(&dc_head.dch_lock); 1879 while (dirent_cnt < dnlc_dir_min_reclaim) { 1880 dcp = dc_head.dch_next; 1881 oldest = NULL; 1882 while (dcp != (dircache_t *)&dc_head) { 1883 if (oldest == NULL) { 1884 oldest = dcp; 1885 } else { 1886 if (dcp->dc_actime < oldest->dc_actime) { 1887 oldest = dcp; 1888 } 1889 } 1890 dcp = dcp->dc_next; 1891 } 1892 if (oldest == NULL) { 1893 /* nothing to delete */ 1894 mutex_exit(&dc_head.dch_lock); 1895 return; 1896 } 1897 /* 1898 * remove from directory chain and purge 1899 */ 1900 oldest->dc_prev->dc_next = oldest->dc_next; 1901 oldest->dc_next->dc_prev = oldest->dc_prev; 1902 mutex_enter(&oldest->dc_anchor->dca_lock); 1903 /* 1904 * If this was the last entry then it must be too large. 1905 * Mark it as such by saving a special dircache_t 1906 * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM 1907 * will be presented to the caller of dnlc_dir_start() 1908 */ 1909 if (oldest->dc_next == oldest->dc_prev) { 1910 oldest->dc_anchor->dca_dircache = DC_RET_LOW_MEM; 1911 ncs.ncs_dir_rec_last.value.ui64++; 1912 } else { 1913 oldest->dc_anchor->dca_dircache = NULL; 1914 ncs.ncs_dir_recl_any.value.ui64++; 1915 } 1916 mutex_exit(&oldest->dc_anchor->dca_lock); 1917 dirent_cnt += oldest->dc_num_entries; 1918 dnlc_dir_abort(oldest); 1919 } 1920 mutex_exit(&dc_head.dch_lock); 1921 } 1922 1923 /* 1924 * Dynamically grow or shrink the size of the name hash table 1925 */ 1926 static void 1927 dnlc_dir_adjust_nhash(dircache_t *dcp) 1928 { 1929 dcentry_t **newhash, *dep, **nhp, *tep; 1930 uint_t newsize; 1931 uint_t oldsize; 1932 uint_t newsizemask; 1933 int i; 1934 1935 /* 1936 * Allocate new hash table 1937 */ 1938 newsize = dcp->dc_num_entries >> dnlc_dir_hash_size_shift; 1939 newhash = kmem_zalloc(sizeof (dcentry_t *) * newsize, KM_NOSLEEP); 1940 if (newhash == NULL) { 1941 /* 1942 * System is short on memory just return 1943 * Note, the old hash table is still usable. 1944 * This return is unlikely to repeatedy occur, because 1945 * either some other directory caches will be reclaimed 1946 * due to memory shortage, thus freeing memory, or this 1947 * directory cahe will be reclaimed. 1948 */ 1949 return; 1950 } 1951 oldsize = dcp->dc_nhash_mask + 1; 1952 dcp->dc_nhash_mask = newsizemask = newsize - 1; 1953 1954 /* 1955 * Move entries from the old table to the new 1956 */ 1957 for (i = 0; i < oldsize; i++) { /* for each hash bucket */ 1958 dep = dcp->dc_namehash[i]; 1959 while (dep != NULL) { /* for each chained entry */ 1960 tep = dep; 1961 dep = dep->de_next; 1962 nhp = &newhash[tep->de_hash & newsizemask]; 1963 tep->de_next = *nhp; 1964 *nhp = tep; 1965 } 1966 } 1967 1968 /* 1969 * delete old hash table and set new one in place 1970 */ 1971 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * oldsize); 1972 dcp->dc_namehash = newhash; 1973 } 1974 1975 /* 1976 * Dynamically grow or shrink the size of the free space hash table 1977 */ 1978 static void 1979 dnlc_dir_adjust_fhash(dircache_t *dcp) 1980 { 1981 dcfree_t **newhash, *dfp, **nhp, *tfp; 1982 uint_t newsize; 1983 uint_t oldsize; 1984 int i; 1985 1986 /* 1987 * Allocate new hash table 1988 */ 1989 newsize = dcp->dc_num_free >> dnlc_dir_hash_size_shift; 1990 newhash = kmem_zalloc(sizeof (dcfree_t *) * newsize, KM_NOSLEEP); 1991 if (newhash == NULL) { 1992 /* 1993 * System is short on memory just return 1994 * Note, the old hash table is still usable. 1995 * This return is unlikely to repeatedy occur, because 1996 * either some other directory caches will be reclaimed 1997 * due to memory shortage, thus freeing memory, or this 1998 * directory cahe will be reclaimed. 1999 */ 2000 return; 2001 } 2002 oldsize = dcp->dc_fhash_mask + 1; 2003 dcp->dc_fhash_mask = newsize - 1; 2004 2005 /* 2006 * Move entries from the old table to the new 2007 */ 2008 for (i = 0; i < oldsize; i++) { /* for each hash bucket */ 2009 dfp = dcp->dc_freehash[i]; 2010 while (dfp != NULL) { /* for each chained entry */ 2011 tfp = dfp; 2012 dfp = dfp->df_next; 2013 nhp = &newhash[DDFHASH(tfp->df_handle, dcp)]; 2014 tfp->df_next = *nhp; 2015 *nhp = tfp; 2016 } 2017 } 2018 2019 /* 2020 * delete old hash table and set new one in place 2021 */ 2022 kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * oldsize); 2023 dcp->dc_freehash = newhash; 2024 } 2025