1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * negative cache handling for the /dev fs 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/t_lock.h> 35 #include <sys/systm.h> 36 #include <sys/sysmacros.h> 37 #include <sys/user.h> 38 #include <sys/time.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/file.h> 42 #include <sys/fcntl.h> 43 #include <sys/flock.h> 44 #include <sys/kmem.h> 45 #include <sys/uio.h> 46 #include <sys/errno.h> 47 #include <sys/stat.h> 48 #include <sys/cred.h> 49 #include <sys/cmn_err.h> 50 #include <sys/debug.h> 51 #include <sys/mode.h> 52 #include <sys/policy.h> 53 #include <fs/fs_subr.h> 54 #include <sys/mount.h> 55 #include <sys/fs/snode.h> 56 #include <sys/fs/dv_node.h> 57 #include <sys/fs/sdev_node.h> 58 #include <sys/sunndi.h> 59 #include <sys/sunmdi.h> 60 #include <sys/ddi.h> 61 #include <sys/modctl.h> 62 #include <sys/devcache.h> 63 64 65 /* 66 * ncache is a negative cache of failed lookups. An entry 67 * is added after an attempt to configure a device by that 68 * name failed. An accumulation of these entries over time 69 * gives us a set of device name for which implicit reconfiguration 70 * does not need to be attempted. If a name is created matching 71 * an entry in ncache, that entry is removed, with the 72 * persistent store updated. 73 * 74 * Implicit reconfig is initiated for any name during lookup that 75 * can't be resolved from the backing store and that isn't 76 * present in the negative cache. This functionality is 77 * enabled during system startup once communication with devfsadm 78 * can be achieved. Since readdir is more general, implicit 79 * reconfig initiated by reading a directory isn't enabled until 80 * the system is more fully booted, at the time of the multi-user 81 * milestone, corresponding to init state 2. 82 * 83 * A maximum is imposed on the number of entries in the cache 84 * to limit some script going wild and as a defense against attack. 85 * The default limit is 64 and can be adjusted via sdev_nc_max_entries. 86 * 87 * Each entry also has a expiration count. When looked up a name in 88 * the cache is set to the default. Subsequent boots will decrement 89 * the count if a name isn't referenced. This permits a once-only 90 * entry to eventually be removed over time. 91 * 92 * sdev_reconfig_delay implements a "debounce" of the timing beyond 93 * system available indication, providing what the filesystem considers 94 * to be the system-is-fully-booted state. This is provided to adjust 95 * the timing if some application startup is performing a readdir 96 * in /dev that initiates a troublesome implicit reconfig on every boot. 97 * 98 * sdev_nc_disable_reset can be used to disable clearing the negative cache 99 * on reconfig boot. The default is to clear the cache on reconfig boot. 100 * sdev_nc_disable can be used to disable the negative cache itself. 101 * 102 * sdev_reconfig_disable can be used to disable implicit reconfig. 103 * The default is that implicit reconfig is enabled. 104 */ 105 106 /* tunables and defaults */ 107 #define SDEV_NC_EXPIRECNT 4 108 #define SDEV_NC_MAX_ENTRIES 64 109 #define SEV_RECONFIG_DELAY 6 /* seconds */ 110 111 /* tunables */ 112 int sdev_nc_expirecnt = SDEV_NC_EXPIRECNT; 113 int sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES; 114 int sdev_reconfig_delay = SEV_RECONFIG_DELAY; 115 int sdev_reconfig_verbose = 0; 116 int sdev_reconfig_disable = 0; 117 int sdev_nc_disable = 0; 118 int sdev_nc_disable_reset = 0; 119 int sdev_nc_verbose = 0; 120 int sdev_cache_read_disable = 0; 121 int sdev_cache_write_disable = 0; 122 123 /* globals */ 124 int sdev_boot_state = SDEV_BOOT_STATE_INITIAL; 125 int sdev_reconfig_boot = 0; 126 sdev_nc_list_t *sdev_ncache; 127 static nvf_handle_t sdevfd_handle; 128 129 /* static prototypes */ 130 static void sdev_ncache_write_complete(nvf_handle_t); 131 static void sdev_ncache_write(void); 132 static void sdev_ncache_process_store(void); 133 static sdev_nc_list_t *sdev_nc_newlist(void); 134 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *); 135 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *); 136 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *); 137 static void sdev_nc_free_bootonly(void); 138 static int sdev_ncache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *); 139 static int sdev_ncache_pack_list(nvf_handle_t, nvlist_t **); 140 static void sdev_ncache_list_free(nvf_handle_t); 141 static void sdev_nvp_free(nvp_devname_t *); 142 143 /* 144 * Registration for /etc/devices/devname_cache 145 */ 146 static nvf_ops_t sdev_cache_ops = { 147 "/etc/devices/devname_cache", /* path to cache */ 148 sdev_ncache_unpack_nvlist, /* read: unpack nvlist */ 149 sdev_ncache_pack_list, /* write: pack list */ 150 sdev_ncache_list_free, /* free data list */ 151 sdev_ncache_write_complete /* write complete callback */ 152 }; 153 154 /* 155 * called once at filesystem initialization 156 */ 157 void 158 sdev_ncache_init(void) 159 { 160 sdev_ncache = sdev_nc_newlist(); 161 } 162 163 /* 164 * called at mount of the global instance 165 * currently the global instance is never unmounted 166 */ 167 void 168 sdev_ncache_setup(void) 169 { 170 sdevfd_handle = nvf_register_file(&sdev_cache_ops); 171 ASSERT(sdevfd_handle); 172 173 list_create(nvf_list(sdevfd_handle), sizeof (nvp_devname_t), 174 offsetof(nvp_devname_t, nvp_link)); 175 176 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER); 177 if (!sdev_cache_read_disable) { 178 (void) nvf_read_file(sdevfd_handle); 179 } 180 sdev_ncache_process_store(); 181 rw_exit(nvf_lock(sdevfd_handle)); 182 183 sdev_devstate_change(); 184 } 185 186 static void 187 sdev_nvp_free(nvp_devname_t *dp) 188 { 189 int i; 190 char **p; 191 192 if (dp->nvp_npaths > 0) { 193 p = dp->nvp_paths; 194 for (i = 0; i < dp->nvp_npaths; i++, p++) { 195 kmem_free(*p, strlen(*p)+1); 196 } 197 kmem_free(dp->nvp_paths, 198 dp->nvp_npaths * sizeof (char *)); 199 kmem_free(dp->nvp_expirecnts, 200 dp->nvp_npaths * sizeof (int)); 201 } 202 203 kmem_free(dp, sizeof (nvp_devname_t)); 204 } 205 206 static void 207 sdev_ncache_list_free(nvf_handle_t fd) 208 { 209 list_t *listp; 210 nvp_devname_t *dp; 211 212 ASSERT(fd == sdevfd_handle); 213 ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 214 215 listp = nvf_list(fd); 216 if ((dp = list_head(listp)) != NULL) { 217 list_remove(listp, dp); 218 sdev_nvp_free(dp); 219 } 220 } 221 222 /* 223 * Unpack a device path/nvlist pair to internal data list format. 224 * Used to decode the nvlist format into the internal representation 225 * when reading /etc/devices/devname_cache. 226 * Note that the expiration counts are optional, for compatibility 227 * with earlier instances of the cache. If not present, the 228 * expire counts are initialized to defaults. 229 */ 230 static int 231 sdev_ncache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name) 232 { 233 nvp_devname_t *np; 234 char **strs; 235 int *cnts; 236 uint_t nstrs, ncnts; 237 int rval, i; 238 239 ASSERT(fd == sdevfd_handle); 240 ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 241 242 /* name of the sublist must match what we created */ 243 if (strcmp(name, DP_DEVNAME_ID) != 0) { 244 return (-1); 245 } 246 247 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP); 248 249 rval = nvlist_lookup_string_array(nvl, 250 DP_DEVNAME_NCACHE_ID, &strs, &nstrs); 251 if (rval) { 252 kmem_free(np, sizeof (nvp_devname_t)); 253 return (-1); 254 } 255 256 np->nvp_npaths = nstrs; 257 np->nvp_paths = kmem_zalloc(nstrs * sizeof (char *), KM_SLEEP); 258 for (i = 0; i < nstrs; i++) { 259 np->nvp_paths[i] = i_ddi_strdup(strs[i], KM_SLEEP); 260 } 261 np->nvp_expirecnts = kmem_zalloc(nstrs * sizeof (int), KM_SLEEP); 262 for (i = 0; i < nstrs; i++) { 263 np->nvp_expirecnts[i] = sdev_nc_expirecnt; 264 } 265 266 rval = nvlist_lookup_int32_array(nvl, 267 DP_DEVNAME_NC_EXPIRECNT_ID, &cnts, &ncnts); 268 if (rval == 0) { 269 ASSERT(ncnts == nstrs); 270 ncnts = min(ncnts, nstrs); 271 for (i = 0; i < nstrs; i++) { 272 np->nvp_expirecnts[i] = cnts[i]; 273 } 274 } 275 276 list_insert_tail(nvf_list(sdevfd_handle), np); 277 278 return (0); 279 } 280 281 /* 282 * Pack internal format cache data to a single nvlist. 283 * Used when writing the nvlist file. 284 * Note this is called indirectly by the nvpflush daemon. 285 */ 286 static int 287 sdev_ncache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl) 288 { 289 nvlist_t *nvl, *sub_nvl; 290 nvp_devname_t *np; 291 int rval; 292 list_t *listp; 293 294 ASSERT(fd == sdevfd_handle); 295 ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 296 297 rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP); 298 if (rval != 0) { 299 nvf_error("%s: nvlist alloc error %d\n", 300 nvf_cache_name(fd), rval); 301 return (DDI_FAILURE); 302 } 303 304 listp = nvf_list(sdevfd_handle); 305 if ((np = list_head(listp)) != NULL) { 306 ASSERT(list_next(listp, np) == NULL); 307 308 rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP); 309 if (rval != 0) { 310 nvf_error("%s: nvlist alloc error %d\n", 311 nvf_cache_name(fd), rval); 312 sub_nvl = NULL; 313 goto err; 314 } 315 316 rval = nvlist_add_string_array(sub_nvl, 317 DP_DEVNAME_NCACHE_ID, np->nvp_paths, np->nvp_npaths); 318 if (rval != 0) { 319 nvf_error("%s: nvlist add error %d (sdev)\n", 320 nvf_cache_name(fd), rval); 321 goto err; 322 } 323 324 rval = nvlist_add_int32_array(sub_nvl, 325 DP_DEVNAME_NC_EXPIRECNT_ID, 326 np->nvp_expirecnts, np->nvp_npaths); 327 if (rval != 0) { 328 nvf_error("%s: nvlist add error %d (sdev)\n", 329 nvf_cache_name(fd), rval); 330 goto err; 331 } 332 333 rval = nvlist_add_nvlist(nvl, DP_DEVNAME_ID, sub_nvl); 334 if (rval != 0) { 335 nvf_error("%s: nvlist add error %d (sublist)\n", 336 nvf_cache_name(fd), rval); 337 goto err; 338 } 339 nvlist_free(sub_nvl); 340 } 341 342 *ret_nvl = nvl; 343 return (DDI_SUCCESS); 344 345 err: 346 if (sub_nvl) 347 nvlist_free(sub_nvl); 348 nvlist_free(nvl); 349 *ret_nvl = NULL; 350 return (DDI_FAILURE); 351 } 352 353 /* 354 * Run through the data read from the backing cache store 355 * to establish the initial state of the neg. cache. 356 */ 357 static void 358 sdev_ncache_process_store(void) 359 { 360 sdev_nc_list_t *ncl = sdev_ncache; 361 nvp_devname_t *np; 362 sdev_nc_node_t *lp; 363 char *path; 364 int i, n; 365 list_t *listp; 366 367 if (sdev_nc_disable) 368 return; 369 370 ASSERT(RW_WRITE_HELD(nvf_lock(sdevfd_handle))); 371 372 listp = nvf_list(sdevfd_handle); 373 for (np = list_head(listp); np; np = list_next(listp, np)) { 374 for (i = 0; i < np->nvp_npaths; i++) { 375 sdcmn_err5((" %s %d\n", 376 np->nvp_paths[i], np->nvp_expirecnts[i])); 377 if (ncl->ncl_nentries < sdev_nc_max_entries) { 378 path = np->nvp_paths[i]; 379 n = strlen(path) + 1; 380 lp = kmem_alloc(sizeof (sdev_nc_node_t), 381 KM_SLEEP); 382 lp->ncn_name = kmem_alloc(n, KM_SLEEP); 383 bcopy(path, lp->ncn_name, n); 384 lp->ncn_flags = NCN_SRC_STORE; 385 lp->ncn_expirecnt = np->nvp_expirecnts[i]; 386 sdev_nc_insertnode(ncl, lp); 387 } else if (sdev_nc_verbose) { 388 cmn_err(CE_CONT, 389 "?%s: truncating from ncache (max %d)\n", 390 np->nvp_paths[i], sdev_nc_max_entries); 391 } 392 } 393 } 394 } 395 396 /* 397 * called by nvpflush daemon to inform us that an update of 398 * the cache file has been completed. 399 */ 400 static void 401 sdev_ncache_write_complete(nvf_handle_t fd) 402 { 403 sdev_nc_list_t *ncl = sdev_ncache; 404 405 ASSERT(fd == sdevfd_handle); 406 407 mutex_enter(&ncl->ncl_mutex); 408 409 ASSERT(ncl->ncl_flags & NCL_LIST_WRITING); 410 411 if (ncl->ncl_flags & NCL_LIST_DIRTY) { 412 sdcmn_err5(("ncache write complete but dirty again\n")); 413 ncl->ncl_flags &= ~NCL_LIST_DIRTY; 414 mutex_exit(&ncl->ncl_mutex); 415 sdev_ncache_write(); 416 } else { 417 sdcmn_err5(("ncache write complete\n")); 418 ncl->ncl_flags &= ~NCL_LIST_WRITING; 419 mutex_exit(&ncl->ncl_mutex); 420 rw_enter(nvf_lock(fd), RW_WRITER); 421 sdev_ncache_list_free(fd); 422 rw_exit(nvf_lock(fd)); 423 } 424 } 425 426 /* 427 * Prepare to perform an update of the neg. cache backing store. 428 */ 429 static void 430 sdev_ncache_write(void) 431 { 432 sdev_nc_list_t *ncl = sdev_ncache; 433 nvp_devname_t *np; 434 sdev_nc_node_t *lp; 435 int n, i; 436 437 if (sdev_cache_write_disable) { 438 mutex_enter(&ncl->ncl_mutex); 439 ncl->ncl_flags &= ~NCL_LIST_WRITING; 440 mutex_exit(&ncl->ncl_mutex); 441 return; 442 } 443 444 /* proper lock ordering here is essential */ 445 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER); 446 sdev_ncache_list_free(sdevfd_handle); 447 448 rw_enter(&ncl->ncl_lock, RW_READER); 449 n = ncl->ncl_nentries; 450 ASSERT(n <= sdev_nc_max_entries); 451 452 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP); 453 np->nvp_npaths = n; 454 np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP); 455 np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP); 456 457 i = 0; 458 for (lp = list_head(&ncl->ncl_list); lp; 459 lp = list_next(&ncl->ncl_list, lp)) { 460 np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP); 461 np->nvp_expirecnts[i] = lp->ncn_expirecnt; 462 sdcmn_err5((" %s %d\n", 463 np->nvp_paths[i], np->nvp_expirecnts[i])); 464 i++; 465 } 466 467 rw_exit(&ncl->ncl_lock); 468 469 nvf_mark_dirty(sdevfd_handle); 470 list_insert_tail(nvf_list(sdevfd_handle), np); 471 rw_exit(nvf_lock(sdevfd_handle)); 472 473 nvf_wake_daemon(); 474 } 475 476 static void 477 sdev_nc_flush_updates(void) 478 { 479 sdev_nc_list_t *ncl = sdev_ncache; 480 481 if (sdev_nc_disable || sdev_cache_write_disable) 482 return; 483 484 mutex_enter(&ncl->ncl_mutex); 485 if (((ncl->ncl_flags & 486 (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) == 487 (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) { 488 ncl->ncl_flags &= ~NCL_LIST_DIRTY; 489 ncl->ncl_flags |= NCL_LIST_WRITING; 490 mutex_exit(&ncl->ncl_mutex); 491 sdev_ncache_write(); 492 } else { 493 mutex_exit(&ncl->ncl_mutex); 494 } 495 } 496 497 static void 498 sdev_nc_flush_boot_update(void) 499 { 500 sdev_nc_list_t *ncl = sdev_ncache; 501 502 if (sdev_nc_disable || sdev_cache_write_disable || 503 (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) { 504 return; 505 } 506 mutex_enter(&ncl->ncl_mutex); 507 if (ncl->ncl_flags & NCL_LIST_WENABLE) { 508 mutex_exit(&ncl->ncl_mutex); 509 sdev_nc_flush_updates(); 510 } else { 511 mutex_exit(&ncl->ncl_mutex); 512 } 513 514 } 515 516 static void 517 sdev_state_boot_complete() 518 { 519 sdev_nc_list_t *ncl = sdev_ncache; 520 sdev_nc_node_t *lp, *next; 521 522 /* 523 * Once boot is complete, decrement the expire count of each entry 524 * in the cache not touched by a reference. Remove any that 525 * goes to zero. This effectively removes random entries over 526 * time. 527 */ 528 rw_enter(&ncl->ncl_lock, RW_WRITER); 529 mutex_enter(&ncl->ncl_mutex); 530 531 for (lp = list_head(&ncl->ncl_list); lp; lp = next) { 532 next = list_next(&ncl->ncl_list, lp); 533 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) { 534 if (lp->ncn_flags & NCN_ACTIVE) { 535 if (lp->ncn_expirecnt != sdev_nc_expirecnt) { 536 lp->ncn_expirecnt = sdev_nc_expirecnt; 537 ncl->ncl_flags |= NCL_LIST_DIRTY; 538 } 539 } else { 540 if (--lp->ncn_expirecnt == 0) { 541 list_remove(&ncl->ncl_list, lp); 542 sdev_nc_free_unlinked_node(lp); 543 ncl->ncl_nentries--; 544 } 545 ncl->ncl_flags |= NCL_LIST_DIRTY; 546 } 547 } 548 } 549 550 mutex_exit(&ncl->ncl_mutex); 551 rw_exit(&ncl->ncl_lock); 552 553 sdev_nc_flush_boot_update(); 554 sdev_boot_state = SDEV_BOOT_STATE_COMPLETE; 555 } 556 557 /* 558 * Upon transition to the login state on a reconfigure boot, 559 * a debounce timer is set up so that we cache all the nonsense 560 * lookups we're hit with by the windowing system startup. 561 */ 562 563 /*ARGSUSED*/ 564 static void 565 sdev_state_timeout(void *arg) 566 { 567 sdev_state_boot_complete(); 568 } 569 570 static void 571 sdev_state_sysavail() 572 { 573 sdev_nc_list_t *ncl = sdev_ncache; 574 clock_t nticks; 575 int nsecs; 576 577 mutex_enter(&ncl->ncl_mutex); 578 ncl->ncl_flags |= NCL_LIST_WENABLE; 579 mutex_exit(&ncl->ncl_mutex); 580 581 nsecs = sdev_reconfig_delay; 582 if (nsecs == 0) { 583 sdev_state_boot_complete(); 584 } else { 585 nticks = drv_usectohz(1000000 * nsecs); 586 sdcmn_err5(("timeout initiated %ld\n", nticks)); 587 (void) timeout(sdev_state_timeout, NULL, nticks); 588 sdev_nc_flush_boot_update(); 589 } 590 } 591 592 /* 593 * Called to inform the filesystem of progress during boot, 594 * either a notice of reconfiguration boot or an indication of 595 * system boot complete. At system boot complete, set up a 596 * timer at the expiration of which no further failed lookups 597 * will be added to the negative cache. 598 * 599 * The dev filesystem infers from reconfig boot that implicit 600 * reconfig need not be invoked at all as all available devices 601 * will have already been named. 602 * 603 * The dev filesystem infers from "system available" that devfsadmd 604 * can now be run and hence implicit reconfiguration may be initiated. 605 * During early stages of system startup, implicit reconfig is 606 * not done to avoid impacting boot performance. 607 */ 608 void 609 sdev_devstate_change(void) 610 { 611 int new_state; 612 613 /* 614 * Track system state and manage interesting transitions 615 */ 616 new_state = SDEV_BOOT_STATE_INITIAL; 617 if (i_ddi_reconfig()) 618 new_state = SDEV_BOOT_STATE_RECONFIG; 619 if (i_ddi_sysavail()) 620 new_state = SDEV_BOOT_STATE_SYSAVAIL; 621 622 if (sdev_boot_state < new_state) { 623 switch (new_state) { 624 case SDEV_BOOT_STATE_RECONFIG: 625 sdcmn_err5(("state change: reconfigure boot\n")); 626 sdev_boot_state = new_state; 627 sdev_reconfig_boot = 1; 628 if (!sdev_nc_disable_reset) 629 sdev_nc_free_bootonly(); 630 break; 631 case SDEV_BOOT_STATE_SYSAVAIL: 632 sdcmn_err5(("system available\n")); 633 sdev_boot_state = new_state; 634 sdev_state_sysavail(); 635 break; 636 } 637 } 638 } 639 640 /* 641 * Lookup: filter out entries in the negative cache 642 * Return 1 if the lookup should not cause a reconfig. 643 */ 644 int 645 sdev_lookup_filter(sdev_node_t *dv, char *nm) 646 { 647 int n; 648 sdev_nc_list_t *ncl = sdev_ncache; 649 sdev_nc_node_t *lp; 650 char *path; 651 int rval = 0; 652 int changed = 0; 653 654 ASSERT(i_ddi_io_initialized()); 655 ASSERT(SDEVTOV(dv)->v_type == VDIR); 656 657 if (sdev_nc_disable) 658 return (0); 659 660 n = strlen(dv->sdev_path) + strlen(nm) + 2; 661 path = kmem_alloc(n, KM_SLEEP); 662 (void) sprintf(path, "%s/%s", dv->sdev_path, nm); 663 664 rw_enter(&ncl->ncl_lock, RW_READER); 665 if ((lp = sdev_nc_findpath(ncl, path)) != NULL) { 666 sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n", 667 dv->sdev_name, nm, curproc->p_user.u_comm)); 668 if (sdev_nc_verbose) { 669 cmn_err(CE_CONT, 670 "?%s/%s: lookup by %s cached, no reconfig\n", 671 dv->sdev_name, nm, curproc->p_user.u_comm); 672 } 673 mutex_enter(&ncl->ncl_mutex); 674 lp->ncn_flags |= NCN_ACTIVE; 675 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 && 676 lp->ncn_expirecnt < sdev_nc_expirecnt) { 677 lp->ncn_expirecnt = sdev_nc_expirecnt; 678 ncl->ncl_flags |= NCL_LIST_DIRTY; 679 changed = 1; 680 } 681 mutex_exit(&ncl->ncl_mutex); 682 rval = 1; 683 } 684 rw_exit(&ncl->ncl_lock); 685 kmem_free(path, n); 686 if (changed) 687 sdev_nc_flush_boot_update(); 688 return (rval); 689 } 690 691 void 692 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags) 693 { 694 if (sdev_nc_disable) 695 return; 696 697 /* 698 * If we're still in the initial boot stage, always update 699 * the cache - we may not have received notice of the 700 * reconfig boot state yet. On a reconfigure boot, entries 701 * from the backing store are not re-persisted on update, 702 * but new entries are marked as needing an update. 703 * Never cache dynamic or non-global nodes. 704 */ 705 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && 706 !SDEV_IS_NO_NCACHE(dv) && 707 ((failed_flags & SLF_NO_NCACHE) == 0) && 708 ((sdev_reconfig_boot && 709 (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) || 710 (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) { 711 sdev_nc_addname(sdev_ncache, 712 dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE); 713 } 714 } 715 716 static sdev_nc_list_t * 717 sdev_nc_newlist(void) 718 { 719 sdev_nc_list_t *ncl; 720 721 ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP); 722 723 rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL); 724 mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL); 725 list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t), 726 offsetof(sdev_nc_node_t, ncn_link)); 727 728 return (ncl); 729 } 730 731 static void 732 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp) 733 { 734 kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1); 735 kmem_free(lp, sizeof (sdev_nc_node_t)); 736 } 737 738 static sdev_nc_node_t * 739 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path) 740 { 741 sdev_nc_node_t *lp; 742 743 ASSERT(RW_LOCK_HELD(&ncl->ncl_lock)); 744 745 for (lp = list_head(&ncl->ncl_list); lp; 746 lp = list_next(&ncl->ncl_list, lp)) { 747 if (strcmp(path, lp->ncn_name) == 0) 748 return (lp); 749 } 750 751 return (NULL); 752 } 753 754 static void 755 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new) 756 { 757 sdev_nc_node_t *lp; 758 759 rw_enter(&ncl->ncl_lock, RW_WRITER); 760 761 lp = sdev_nc_findpath(ncl, new->ncn_name); 762 if (lp == NULL) { 763 if (ncl->ncl_nentries == sdev_nc_max_entries) { 764 sdcmn_err5(( 765 "%s by %s: not adding to ncache (max %d)\n", 766 new->ncn_name, curproc->p_user.u_comm, 767 ncl->ncl_nentries)); 768 if (sdev_nc_verbose) { 769 cmn_err(CE_CONT, "?%s by %s: " 770 "not adding to ncache (max %d)\n", 771 new->ncn_name, curproc->p_user.u_comm, 772 ncl->ncl_nentries); 773 } 774 rw_exit(&ncl->ncl_lock); 775 sdev_nc_free_unlinked_node(new); 776 } else { 777 778 list_insert_tail(&ncl->ncl_list, new); 779 ncl->ncl_nentries++; 780 781 /* don't mark list dirty for nodes from store */ 782 mutex_enter(&ncl->ncl_mutex); 783 if ((new->ncn_flags & NCN_SRC_STORE) == 0) { 784 sdcmn_err5(("%s by %s: add to ncache\n", 785 new->ncn_name, curproc->p_user.u_comm)); 786 if (sdev_nc_verbose) { 787 cmn_err(CE_CONT, 788 "?%s by %s: add to ncache\n", 789 new->ncn_name, 790 curproc->p_user.u_comm); 791 } 792 ncl->ncl_flags |= NCL_LIST_DIRTY; 793 } 794 mutex_exit(&ncl->ncl_mutex); 795 rw_exit(&ncl->ncl_lock); 796 lp = new; 797 sdev_nc_flush_boot_update(); 798 } 799 } else { 800 mutex_enter(&ncl->ncl_mutex); 801 lp->ncn_flags |= new->ncn_flags; 802 mutex_exit(&ncl->ncl_mutex); 803 rw_exit(&ncl->ncl_lock); 804 sdev_nc_free_unlinked_node(new); 805 } 806 } 807 808 void 809 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags) 810 { 811 int n; 812 sdev_nc_node_t *lp; 813 814 ASSERT(SDEVTOV(dv)->v_type == VDIR); 815 816 lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP); 817 818 n = strlen(dv->sdev_path) + strlen(nm) + 2; 819 lp->ncn_name = kmem_alloc(n, KM_SLEEP); 820 (void) sprintf(lp->ncn_name, "%s/%s", 821 dv->sdev_path, nm); 822 lp->ncn_flags = flags; 823 lp->ncn_expirecnt = sdev_nc_expirecnt; 824 sdev_nc_insertnode(ncl, lp); 825 } 826 827 void 828 sdev_nc_node_exists(sdev_node_t *dv) 829 { 830 /* dynamic and non-global nodes are never cached */ 831 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && 832 !SDEV_IS_NO_NCACHE(dv)) { 833 sdev_nc_path_exists(sdev_ncache, dv->sdev_path); 834 } 835 } 836 837 void 838 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path) 839 { 840 sdev_nc_node_t *lp; 841 842 if (sdev_nc_disable) 843 return; 844 845 rw_enter(&ncl->ncl_lock, RW_READER); 846 if ((lp = sdev_nc_findpath(ncl, path)) == NULL) { 847 rw_exit(&ncl->ncl_lock); 848 return; 849 } 850 if (rw_tryupgrade(&ncl->ncl_lock) == 0) { 851 rw_exit(&ncl->ncl_lock); 852 rw_enter(&ncl->ncl_lock, RW_WRITER); 853 lp = sdev_nc_findpath(ncl, path); 854 } 855 if (lp) { 856 list_remove(&ncl->ncl_list, lp); 857 ncl->ncl_nentries--; 858 mutex_enter(&ncl->ncl_mutex); 859 ncl->ncl_flags |= NCL_LIST_DIRTY; 860 if (ncl->ncl_flags & NCL_LIST_WENABLE) { 861 mutex_exit(&ncl->ncl_mutex); 862 rw_exit(&ncl->ncl_lock); 863 sdev_nc_flush_updates(); 864 } else { 865 mutex_exit(&ncl->ncl_mutex); 866 rw_exit(&ncl->ncl_lock); 867 } 868 sdev_nc_free_unlinked_node(lp); 869 sdcmn_err5(("%s by %s: removed from ncache\n", 870 path, curproc->p_user.u_comm)); 871 if (sdev_nc_verbose) { 872 cmn_err(CE_CONT, "?%s by %s: removed from ncache\n", 873 path, curproc->p_user.u_comm); 874 } 875 } else 876 rw_exit(&ncl->ncl_lock); 877 } 878 879 static void 880 sdev_nc_free_bootonly(void) 881 { 882 sdev_nc_list_t *ncl = sdev_ncache; 883 sdev_nc_node_t *lp; 884 sdev_nc_node_t *next; 885 886 ASSERT(sdev_reconfig_boot); 887 888 rw_enter(&ncl->ncl_lock, RW_WRITER); 889 890 for (lp = list_head(&ncl->ncl_list); lp; lp = next) { 891 next = list_next(&ncl->ncl_list, lp); 892 if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) { 893 sdcmn_err5(("freeing %s\n", lp->ncn_name)); 894 mutex_enter(&ncl->ncl_mutex); 895 ncl->ncl_flags |= NCL_LIST_DIRTY; 896 mutex_exit(&ncl->ncl_mutex); 897 list_remove(&ncl->ncl_list, lp); 898 sdev_nc_free_unlinked_node(lp); 899 ncl->ncl_nentries--; 900 } 901 } 902 903 rw_exit(&ncl->ncl_lock); 904 } 905