1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * negative cache handling for the /dev fs 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/t_lock.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/user.h> 36 #include <sys/time.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/file.h> 40 #include <sys/fcntl.h> 41 #include <sys/flock.h> 42 #include <sys/kmem.h> 43 #include <sys/uio.h> 44 #include <sys/errno.h> 45 #include <sys/stat.h> 46 #include <sys/cred.h> 47 #include <sys/cmn_err.h> 48 #include <sys/debug.h> 49 #include <sys/mode.h> 50 #include <sys/policy.h> 51 #include <fs/fs_subr.h> 52 #include <sys/mount.h> 53 #include <sys/fs/snode.h> 54 #include <sys/fs/dv_node.h> 55 #include <sys/fs/sdev_impl.h> 56 #include <sys/sunndi.h> 57 #include <sys/sunmdi.h> 58 #include <sys/ddi.h> 59 #include <sys/modctl.h> 60 #include <sys/devcache.h> 61 62 63 /* 64 * ncache is a negative cache of failed lookups. An entry 65 * is added after an attempt to configure a device by that 66 * name failed. An accumulation of these entries over time 67 * gives us a set of device name for which implicit reconfiguration 68 * does not need to be attempted. If a name is created matching 69 * an entry in ncache, that entry is removed, with the 70 * persistent store updated. 71 * 72 * Implicit reconfig is initiated for any name during lookup that 73 * can't be resolved from the backing store and that isn't 74 * present in the negative cache. This functionality is 75 * enabled during system startup once communication with devfsadm 76 * can be achieved. Since readdir is more general, implicit 77 * reconfig initiated by reading a directory isn't enabled until 78 * the system is more fully booted, at the time of the multi-user 79 * milestone, corresponding to init state 2. 80 * 81 * A maximum is imposed on the number of entries in the cache 82 * to limit some script going wild and as a defense against attack. 83 * The default limit is 64 and can be adjusted via sdev_nc_max_entries. 84 * 85 * Each entry also has a expiration count. When looked up a name in 86 * the cache is set to the default. Subsequent boots will decrement 87 * the count if a name isn't referenced. This permits a once-only 88 * entry to eventually be removed over time. 89 * 90 * sdev_reconfig_delay implements a "debounce" of the timing beyond 91 * system available indication, providing what the filesystem considers 92 * to be the system-is-fully-booted state. This is provided to adjust 93 * the timing if some application startup is performing a readdir 94 * in /dev that initiates a troublesome implicit reconfig on every boot. 95 * 96 * sdev_nc_disable_reset can be used to disable clearing the negative cache 97 * on reconfig boot. The default is to clear the cache on reconfig boot. 98 * sdev_nc_disable can be used to disable the negative cache itself. 99 * 100 * sdev_reconfig_disable can be used to disable implicit reconfig. 101 * The default is that implicit reconfig is enabled. 102 */ 103 104 /* tunables and defaults */ 105 #define SDEV_NC_EXPIRECNT 4 106 #define SDEV_NC_MAX_ENTRIES 64 107 #define SEV_RECONFIG_DELAY 6 /* seconds */ 108 109 /* tunables */ 110 int sdev_nc_expirecnt = SDEV_NC_EXPIRECNT; 111 int sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES; 112 int sdev_reconfig_delay = SEV_RECONFIG_DELAY; 113 int sdev_reconfig_verbose = 0; 114 int sdev_reconfig_disable = 0; 115 int sdev_nc_disable = 0; 116 int sdev_nc_disable_reset = 0; 117 int sdev_nc_verbose = 0; 118 int sdev_cache_read_disable = 0; 119 int sdev_cache_write_disable = 0; 120 121 /* globals */ 122 int sdev_boot_state = SDEV_BOOT_STATE_INITIAL; 123 int sdev_reconfig_boot = 0; 124 sdev_nc_list_t *sdev_ncache; 125 static nvf_handle_t sdevfd_handle; 126 127 /* static prototypes */ 128 static void sdev_ncache_write_complete(nvf_handle_t); 129 static void sdev_ncache_write(void); 130 static void sdev_ncache_process_store(void); 131 static sdev_nc_list_t *sdev_nc_newlist(void); 132 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *); 133 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *); 134 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *); 135 static void sdev_nc_free_bootonly(void); 136 static int sdev_ncache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *); 137 static int sdev_ncache_pack_list(nvf_handle_t, nvlist_t **); 138 static void sdev_ncache_list_free(nvf_handle_t); 139 static void sdev_nvp_free(nvp_devname_t *); 140 141 /* 142 * Registration for /etc/devices/devname_cache 143 */ 144 static nvf_ops_t sdev_cache_ops = { 145 "/etc/devices/devname_cache", /* path to cache */ 146 sdev_ncache_unpack_nvlist, /* read: unpack nvlist */ 147 sdev_ncache_pack_list, /* write: pack list */ 148 sdev_ncache_list_free, /* free data list */ 149 sdev_ncache_write_complete /* write complete callback */ 150 }; 151 152 /* 153 * called once at filesystem initialization 154 */ 155 void 156 sdev_ncache_init(void) 157 { 158 sdev_ncache = sdev_nc_newlist(); 159 } 160 161 /* 162 * called at mount of the global instance 163 * currently the global instance is never unmounted 164 */ 165 void 166 sdev_ncache_setup(void) 167 { 168 sdevfd_handle = nvf_register_file(&sdev_cache_ops); 169 ASSERT(sdevfd_handle); 170 171 list_create(nvf_list(sdevfd_handle), sizeof (nvp_devname_t), 172 offsetof(nvp_devname_t, nvp_link)); 173 174 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER); 175 if (!sdev_cache_read_disable) { 176 (void) nvf_read_file(sdevfd_handle); 177 } 178 sdev_ncache_process_store(); 179 rw_exit(nvf_lock(sdevfd_handle)); 180 181 sdev_devstate_change(); 182 } 183 184 static void 185 sdev_nvp_free(nvp_devname_t *dp) 186 { 187 int i; 188 char **p; 189 190 if (dp->nvp_npaths > 0) { 191 p = dp->nvp_paths; 192 for (i = 0; i < dp->nvp_npaths; i++, p++) { 193 kmem_free(*p, strlen(*p)+1); 194 } 195 kmem_free(dp->nvp_paths, 196 dp->nvp_npaths * sizeof (char *)); 197 kmem_free(dp->nvp_expirecnts, 198 dp->nvp_npaths * sizeof (int)); 199 } 200 201 kmem_free(dp, sizeof (nvp_devname_t)); 202 } 203 204 static void 205 sdev_ncache_list_free(nvf_handle_t fd) 206 { 207 list_t *listp; 208 nvp_devname_t *dp; 209 210 ASSERT(fd == sdevfd_handle); 211 ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 212 213 listp = nvf_list(fd); 214 if ((dp = list_head(listp)) != NULL) { 215 list_remove(listp, dp); 216 sdev_nvp_free(dp); 217 } 218 } 219 220 /* 221 * Unpack a device path/nvlist pair to internal data list format. 222 * Used to decode the nvlist format into the internal representation 223 * when reading /etc/devices/devname_cache. 224 * Note that the expiration counts are optional, for compatibility 225 * with earlier instances of the cache. If not present, the 226 * expire counts are initialized to defaults. 227 */ 228 static int 229 sdev_ncache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name) 230 { 231 nvp_devname_t *np; 232 char **strs; 233 int *cnts; 234 uint_t nstrs, ncnts; 235 int rval, i; 236 237 ASSERT(fd == sdevfd_handle); 238 ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 239 240 /* name of the sublist must match what we created */ 241 if (strcmp(name, DP_DEVNAME_ID) != 0) { 242 return (-1); 243 } 244 245 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP); 246 247 rval = nvlist_lookup_string_array(nvl, 248 DP_DEVNAME_NCACHE_ID, &strs, &nstrs); 249 if (rval) { 250 kmem_free(np, sizeof (nvp_devname_t)); 251 return (-1); 252 } 253 254 np->nvp_npaths = nstrs; 255 np->nvp_paths = kmem_zalloc(nstrs * sizeof (char *), KM_SLEEP); 256 for (i = 0; i < nstrs; i++) { 257 np->nvp_paths[i] = i_ddi_strdup(strs[i], KM_SLEEP); 258 } 259 np->nvp_expirecnts = kmem_zalloc(nstrs * sizeof (int), KM_SLEEP); 260 for (i = 0; i < nstrs; i++) { 261 np->nvp_expirecnts[i] = sdev_nc_expirecnt; 262 } 263 264 rval = nvlist_lookup_int32_array(nvl, 265 DP_DEVNAME_NC_EXPIRECNT_ID, &cnts, &ncnts); 266 if (rval == 0) { 267 ASSERT(ncnts == nstrs); 268 ncnts = min(ncnts, nstrs); 269 for (i = 0; i < nstrs; i++) { 270 np->nvp_expirecnts[i] = cnts[i]; 271 } 272 } 273 274 list_insert_tail(nvf_list(sdevfd_handle), np); 275 276 return (0); 277 } 278 279 /* 280 * Pack internal format cache data to a single nvlist. 281 * Used when writing the nvlist file. 282 * Note this is called indirectly by the nvpflush daemon. 283 */ 284 static int 285 sdev_ncache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl) 286 { 287 nvlist_t *nvl, *sub_nvl; 288 nvp_devname_t *np; 289 int rval; 290 list_t *listp; 291 292 ASSERT(fd == sdevfd_handle); 293 ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 294 295 rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP); 296 if (rval != 0) { 297 nvf_error("%s: nvlist alloc error %d\n", 298 nvf_cache_name(fd), rval); 299 return (DDI_FAILURE); 300 } 301 302 listp = nvf_list(sdevfd_handle); 303 if ((np = list_head(listp)) != NULL) { 304 ASSERT(list_next(listp, np) == NULL); 305 306 rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP); 307 if (rval != 0) { 308 nvf_error("%s: nvlist alloc error %d\n", 309 nvf_cache_name(fd), rval); 310 sub_nvl = NULL; 311 goto err; 312 } 313 314 rval = nvlist_add_string_array(sub_nvl, 315 DP_DEVNAME_NCACHE_ID, np->nvp_paths, np->nvp_npaths); 316 if (rval != 0) { 317 nvf_error("%s: nvlist add error %d (sdev)\n", 318 nvf_cache_name(fd), rval); 319 goto err; 320 } 321 322 rval = nvlist_add_int32_array(sub_nvl, 323 DP_DEVNAME_NC_EXPIRECNT_ID, 324 np->nvp_expirecnts, np->nvp_npaths); 325 if (rval != 0) { 326 nvf_error("%s: nvlist add error %d (sdev)\n", 327 nvf_cache_name(fd), rval); 328 goto err; 329 } 330 331 rval = nvlist_add_nvlist(nvl, DP_DEVNAME_ID, sub_nvl); 332 if (rval != 0) { 333 nvf_error("%s: nvlist add error %d (sublist)\n", 334 nvf_cache_name(fd), rval); 335 goto err; 336 } 337 nvlist_free(sub_nvl); 338 } 339 340 *ret_nvl = nvl; 341 return (DDI_SUCCESS); 342 343 err: 344 nvlist_free(sub_nvl); 345 nvlist_free(nvl); 346 *ret_nvl = NULL; 347 return (DDI_FAILURE); 348 } 349 350 /* 351 * Run through the data read from the backing cache store 352 * to establish the initial state of the neg. cache. 353 */ 354 static void 355 sdev_ncache_process_store(void) 356 { 357 sdev_nc_list_t *ncl = sdev_ncache; 358 nvp_devname_t *np; 359 sdev_nc_node_t *lp; 360 char *path; 361 int i, n; 362 list_t *listp; 363 364 if (sdev_nc_disable) 365 return; 366 367 ASSERT(RW_WRITE_HELD(nvf_lock(sdevfd_handle))); 368 369 listp = nvf_list(sdevfd_handle); 370 for (np = list_head(listp); np; np = list_next(listp, np)) { 371 for (i = 0; i < np->nvp_npaths; i++) { 372 sdcmn_err5((" %s %d\n", 373 np->nvp_paths[i], np->nvp_expirecnts[i])); 374 if (ncl->ncl_nentries < sdev_nc_max_entries) { 375 path = np->nvp_paths[i]; 376 n = strlen(path) + 1; 377 lp = kmem_alloc(sizeof (sdev_nc_node_t), 378 KM_SLEEP); 379 lp->ncn_name = kmem_alloc(n, KM_SLEEP); 380 bcopy(path, lp->ncn_name, n); 381 lp->ncn_flags = NCN_SRC_STORE; 382 lp->ncn_expirecnt = np->nvp_expirecnts[i]; 383 sdev_nc_insertnode(ncl, lp); 384 } else if (sdev_nc_verbose) { 385 cmn_err(CE_CONT, 386 "?%s: truncating from ncache (max %d)\n", 387 np->nvp_paths[i], sdev_nc_max_entries); 388 } 389 } 390 } 391 } 392 393 /* 394 * called by nvpflush daemon to inform us that an update of 395 * the cache file has been completed. 396 */ 397 static void 398 sdev_ncache_write_complete(nvf_handle_t fd) 399 { 400 sdev_nc_list_t *ncl = sdev_ncache; 401 402 ASSERT(fd == sdevfd_handle); 403 404 mutex_enter(&ncl->ncl_mutex); 405 406 ASSERT(ncl->ncl_flags & NCL_LIST_WRITING); 407 408 if (ncl->ncl_flags & NCL_LIST_DIRTY) { 409 sdcmn_err5(("ncache write complete but dirty again\n")); 410 ncl->ncl_flags &= ~NCL_LIST_DIRTY; 411 mutex_exit(&ncl->ncl_mutex); 412 sdev_ncache_write(); 413 } else { 414 sdcmn_err5(("ncache write complete\n")); 415 ncl->ncl_flags &= ~NCL_LIST_WRITING; 416 mutex_exit(&ncl->ncl_mutex); 417 rw_enter(nvf_lock(fd), RW_WRITER); 418 sdev_ncache_list_free(fd); 419 rw_exit(nvf_lock(fd)); 420 } 421 } 422 423 /* 424 * Prepare to perform an update of the neg. cache backing store. 425 */ 426 static void 427 sdev_ncache_write(void) 428 { 429 sdev_nc_list_t *ncl = sdev_ncache; 430 nvp_devname_t *np; 431 sdev_nc_node_t *lp; 432 int n, i; 433 434 if (sdev_cache_write_disable) { 435 mutex_enter(&ncl->ncl_mutex); 436 ncl->ncl_flags &= ~NCL_LIST_WRITING; 437 mutex_exit(&ncl->ncl_mutex); 438 return; 439 } 440 441 /* proper lock ordering here is essential */ 442 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER); 443 sdev_ncache_list_free(sdevfd_handle); 444 445 rw_enter(&ncl->ncl_lock, RW_READER); 446 n = ncl->ncl_nentries; 447 ASSERT(n <= sdev_nc_max_entries); 448 449 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP); 450 np->nvp_npaths = n; 451 np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP); 452 np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP); 453 454 i = 0; 455 for (lp = list_head(&ncl->ncl_list); lp; 456 lp = list_next(&ncl->ncl_list, lp)) { 457 np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP); 458 np->nvp_expirecnts[i] = lp->ncn_expirecnt; 459 sdcmn_err5((" %s %d\n", 460 np->nvp_paths[i], np->nvp_expirecnts[i])); 461 i++; 462 } 463 464 rw_exit(&ncl->ncl_lock); 465 466 nvf_mark_dirty(sdevfd_handle); 467 list_insert_tail(nvf_list(sdevfd_handle), np); 468 rw_exit(nvf_lock(sdevfd_handle)); 469 470 nvf_wake_daemon(); 471 } 472 473 static void 474 sdev_nc_flush_updates(void) 475 { 476 sdev_nc_list_t *ncl = sdev_ncache; 477 478 if (sdev_nc_disable || sdev_cache_write_disable) 479 return; 480 481 mutex_enter(&ncl->ncl_mutex); 482 if (((ncl->ncl_flags & 483 (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) == 484 (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) { 485 ncl->ncl_flags &= ~NCL_LIST_DIRTY; 486 ncl->ncl_flags |= NCL_LIST_WRITING; 487 mutex_exit(&ncl->ncl_mutex); 488 sdev_ncache_write(); 489 } else { 490 mutex_exit(&ncl->ncl_mutex); 491 } 492 } 493 494 static void 495 sdev_nc_flush_boot_update(void) 496 { 497 sdev_nc_list_t *ncl = sdev_ncache; 498 499 if (sdev_nc_disable || sdev_cache_write_disable || 500 (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) { 501 return; 502 } 503 mutex_enter(&ncl->ncl_mutex); 504 if (ncl->ncl_flags & NCL_LIST_WENABLE) { 505 mutex_exit(&ncl->ncl_mutex); 506 sdev_nc_flush_updates(); 507 } else { 508 mutex_exit(&ncl->ncl_mutex); 509 } 510 511 } 512 513 static void 514 sdev_state_boot_complete() 515 { 516 sdev_nc_list_t *ncl = sdev_ncache; 517 sdev_nc_node_t *lp, *next; 518 519 /* 520 * Once boot is complete, decrement the expire count of each entry 521 * in the cache not touched by a reference. Remove any that 522 * goes to zero. This effectively removes random entries over 523 * time. 524 */ 525 rw_enter(&ncl->ncl_lock, RW_WRITER); 526 mutex_enter(&ncl->ncl_mutex); 527 528 for (lp = list_head(&ncl->ncl_list); lp; lp = next) { 529 next = list_next(&ncl->ncl_list, lp); 530 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) { 531 if (lp->ncn_flags & NCN_ACTIVE) { 532 if (lp->ncn_expirecnt != sdev_nc_expirecnt) { 533 lp->ncn_expirecnt = sdev_nc_expirecnt; 534 ncl->ncl_flags |= NCL_LIST_DIRTY; 535 } 536 } else { 537 if (--lp->ncn_expirecnt == 0) { 538 list_remove(&ncl->ncl_list, lp); 539 sdev_nc_free_unlinked_node(lp); 540 ncl->ncl_nentries--; 541 } 542 ncl->ncl_flags |= NCL_LIST_DIRTY; 543 } 544 } 545 } 546 547 mutex_exit(&ncl->ncl_mutex); 548 rw_exit(&ncl->ncl_lock); 549 550 sdev_nc_flush_boot_update(); 551 sdev_boot_state = SDEV_BOOT_STATE_COMPLETE; 552 } 553 554 /* 555 * Upon transition to the login state on a reconfigure boot, 556 * a debounce timer is set up so that we cache all the nonsense 557 * lookups we're hit with by the windowing system startup. 558 */ 559 560 /*ARGSUSED*/ 561 static void 562 sdev_state_timeout(void *arg) 563 { 564 sdev_state_boot_complete(); 565 } 566 567 static void 568 sdev_state_sysavail() 569 { 570 sdev_nc_list_t *ncl = sdev_ncache; 571 clock_t nticks; 572 int nsecs; 573 574 mutex_enter(&ncl->ncl_mutex); 575 ncl->ncl_flags |= NCL_LIST_WENABLE; 576 mutex_exit(&ncl->ncl_mutex); 577 578 nsecs = sdev_reconfig_delay; 579 if (nsecs == 0) { 580 sdev_state_boot_complete(); 581 } else { 582 nticks = drv_usectohz(1000000 * nsecs); 583 sdcmn_err5(("timeout initiated %ld\n", nticks)); 584 (void) timeout(sdev_state_timeout, NULL, nticks); 585 sdev_nc_flush_boot_update(); 586 } 587 } 588 589 /* 590 * Called to inform the filesystem of progress during boot, 591 * either a notice of reconfiguration boot or an indication of 592 * system boot complete. At system boot complete, set up a 593 * timer at the expiration of which no further failed lookups 594 * will be added to the negative cache. 595 * 596 * The dev filesystem infers from reconfig boot that implicit 597 * reconfig need not be invoked at all as all available devices 598 * will have already been named. 599 * 600 * The dev filesystem infers from "system available" that devfsadmd 601 * can now be run and hence implicit reconfiguration may be initiated. 602 * During early stages of system startup, implicit reconfig is 603 * not done to avoid impacting boot performance. 604 */ 605 void 606 sdev_devstate_change(void) 607 { 608 int new_state; 609 610 /* 611 * Track system state and manage interesting transitions 612 */ 613 new_state = SDEV_BOOT_STATE_INITIAL; 614 if (i_ddi_reconfig()) 615 new_state = SDEV_BOOT_STATE_RECONFIG; 616 if (i_ddi_sysavail()) 617 new_state = SDEV_BOOT_STATE_SYSAVAIL; 618 619 if (sdev_boot_state < new_state) { 620 switch (new_state) { 621 case SDEV_BOOT_STATE_RECONFIG: 622 sdcmn_err5(("state change: reconfigure boot\n")); 623 sdev_boot_state = new_state; 624 /* 625 * The /dev filesystem fills a hot-plug .vs. 626 * public-namespace gap by invoking 'devfsadm' once 627 * as a result of the first /dev lookup failure 628 * (or getdents/readdir). Originally, it was thought 629 * that a reconfig reboot did not have a hot-plug gap, 630 * but this is not true - the gap is just smaller: 631 * it exists from the the time the smf invocation of 632 * devfsadm completes its forced devinfo snapshot, 633 * to the time when the smf devfsadmd daemon invocation 634 * is set up and listening for hotplug sysevents. 635 * Since there is still a gap with reconfig reboot, 636 * we no longer set 'sdev_reconfig_boot'. 637 */ 638 if (!sdev_nc_disable_reset) 639 sdev_nc_free_bootonly(); 640 break; 641 case SDEV_BOOT_STATE_SYSAVAIL: 642 sdcmn_err5(("system available\n")); 643 sdev_boot_state = new_state; 644 sdev_state_sysavail(); 645 break; 646 } 647 } 648 } 649 650 /* 651 * Lookup: filter out entries in the negative cache 652 * Return 1 if the lookup should not cause a reconfig. 653 */ 654 int 655 sdev_lookup_filter(sdev_node_t *dv, char *nm) 656 { 657 int n; 658 sdev_nc_list_t *ncl = sdev_ncache; 659 sdev_nc_node_t *lp; 660 char *path; 661 int rval = 0; 662 int changed = 0; 663 664 ASSERT(i_ddi_io_initialized()); 665 ASSERT(SDEVTOV(dv)->v_type == VDIR); 666 667 if (sdev_nc_disable) 668 return (0); 669 670 n = strlen(dv->sdev_path) + strlen(nm) + 2; 671 path = kmem_alloc(n, KM_SLEEP); 672 (void) sprintf(path, "%s/%s", dv->sdev_path, nm); 673 674 rw_enter(&ncl->ncl_lock, RW_READER); 675 if ((lp = sdev_nc_findpath(ncl, path)) != NULL) { 676 sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n", 677 dv->sdev_name, nm, curproc->p_user.u_comm)); 678 if (sdev_nc_verbose) { 679 cmn_err(CE_CONT, 680 "?%s/%s: lookup by %s cached, no reconfig\n", 681 dv->sdev_name, nm, curproc->p_user.u_comm); 682 } 683 mutex_enter(&ncl->ncl_mutex); 684 lp->ncn_flags |= NCN_ACTIVE; 685 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 && 686 lp->ncn_expirecnt < sdev_nc_expirecnt) { 687 lp->ncn_expirecnt = sdev_nc_expirecnt; 688 ncl->ncl_flags |= NCL_LIST_DIRTY; 689 changed = 1; 690 } 691 mutex_exit(&ncl->ncl_mutex); 692 rval = 1; 693 } 694 rw_exit(&ncl->ncl_lock); 695 kmem_free(path, n); 696 if (changed) 697 sdev_nc_flush_boot_update(); 698 return (rval); 699 } 700 701 void 702 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags) 703 { 704 if (sdev_nc_disable) 705 return; 706 707 /* 708 * If we're still in the initial boot stage, always update 709 * the cache - we may not have received notice of the 710 * reconfig boot state yet. On a reconfigure boot, entries 711 * from the backing store are not re-persisted on update, 712 * but new entries are marked as needing an update. 713 * Never cache dynamic or non-global nodes. 714 */ 715 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && 716 !SDEV_IS_NO_NCACHE(dv) && 717 ((failed_flags & SLF_NO_NCACHE) == 0) && 718 ((sdev_reconfig_boot && 719 (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) || 720 (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) { 721 sdev_nc_addname(sdev_ncache, 722 dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE); 723 } 724 } 725 726 static sdev_nc_list_t * 727 sdev_nc_newlist(void) 728 { 729 sdev_nc_list_t *ncl; 730 731 ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP); 732 733 rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL); 734 mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL); 735 list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t), 736 offsetof(sdev_nc_node_t, ncn_link)); 737 738 return (ncl); 739 } 740 741 static void 742 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp) 743 { 744 kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1); 745 kmem_free(lp, sizeof (sdev_nc_node_t)); 746 } 747 748 static sdev_nc_node_t * 749 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path) 750 { 751 sdev_nc_node_t *lp; 752 753 ASSERT(RW_LOCK_HELD(&ncl->ncl_lock)); 754 755 for (lp = list_head(&ncl->ncl_list); lp; 756 lp = list_next(&ncl->ncl_list, lp)) { 757 if (strcmp(path, lp->ncn_name) == 0) 758 return (lp); 759 } 760 761 return (NULL); 762 } 763 764 static void 765 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new) 766 { 767 sdev_nc_node_t *lp; 768 769 rw_enter(&ncl->ncl_lock, RW_WRITER); 770 771 lp = sdev_nc_findpath(ncl, new->ncn_name); 772 if (lp == NULL) { 773 if (ncl->ncl_nentries == sdev_nc_max_entries) { 774 sdcmn_err5(( 775 "%s by %s: not adding to ncache (max %d)\n", 776 new->ncn_name, curproc->p_user.u_comm, 777 ncl->ncl_nentries)); 778 if (sdev_nc_verbose) { 779 cmn_err(CE_CONT, "?%s by %s: " 780 "not adding to ncache (max %d)\n", 781 new->ncn_name, curproc->p_user.u_comm, 782 ncl->ncl_nentries); 783 } 784 rw_exit(&ncl->ncl_lock); 785 sdev_nc_free_unlinked_node(new); 786 } else { 787 788 list_insert_tail(&ncl->ncl_list, new); 789 ncl->ncl_nentries++; 790 791 /* don't mark list dirty for nodes from store */ 792 mutex_enter(&ncl->ncl_mutex); 793 if ((new->ncn_flags & NCN_SRC_STORE) == 0) { 794 sdcmn_err5(("%s by %s: add to ncache\n", 795 new->ncn_name, curproc->p_user.u_comm)); 796 if (sdev_nc_verbose) { 797 cmn_err(CE_CONT, 798 "?%s by %s: add to ncache\n", 799 new->ncn_name, 800 curproc->p_user.u_comm); 801 } 802 ncl->ncl_flags |= NCL_LIST_DIRTY; 803 } 804 mutex_exit(&ncl->ncl_mutex); 805 rw_exit(&ncl->ncl_lock); 806 lp = new; 807 sdev_nc_flush_boot_update(); 808 } 809 } else { 810 mutex_enter(&ncl->ncl_mutex); 811 lp->ncn_flags |= new->ncn_flags; 812 mutex_exit(&ncl->ncl_mutex); 813 rw_exit(&ncl->ncl_lock); 814 sdev_nc_free_unlinked_node(new); 815 } 816 } 817 818 void 819 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags) 820 { 821 int n; 822 sdev_nc_node_t *lp; 823 824 ASSERT(SDEVTOV(dv)->v_type == VDIR); 825 826 lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP); 827 828 n = strlen(dv->sdev_path) + strlen(nm) + 2; 829 lp->ncn_name = kmem_alloc(n, KM_SLEEP); 830 (void) sprintf(lp->ncn_name, "%s/%s", 831 dv->sdev_path, nm); 832 lp->ncn_flags = flags; 833 lp->ncn_expirecnt = sdev_nc_expirecnt; 834 sdev_nc_insertnode(ncl, lp); 835 } 836 837 void 838 sdev_nc_node_exists(sdev_node_t *dv) 839 { 840 /* dynamic and non-global nodes are never cached */ 841 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && 842 !SDEV_IS_NO_NCACHE(dv)) { 843 sdev_nc_path_exists(sdev_ncache, dv->sdev_path); 844 } 845 } 846 847 void 848 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path) 849 { 850 sdev_nc_node_t *lp; 851 852 if (sdev_nc_disable) 853 return; 854 855 rw_enter(&ncl->ncl_lock, RW_READER); 856 if ((lp = sdev_nc_findpath(ncl, path)) == NULL) { 857 rw_exit(&ncl->ncl_lock); 858 return; 859 } 860 if (rw_tryupgrade(&ncl->ncl_lock) == 0) { 861 rw_exit(&ncl->ncl_lock); 862 rw_enter(&ncl->ncl_lock, RW_WRITER); 863 lp = sdev_nc_findpath(ncl, path); 864 } 865 if (lp) { 866 list_remove(&ncl->ncl_list, lp); 867 ncl->ncl_nentries--; 868 mutex_enter(&ncl->ncl_mutex); 869 ncl->ncl_flags |= NCL_LIST_DIRTY; 870 if (ncl->ncl_flags & NCL_LIST_WENABLE) { 871 mutex_exit(&ncl->ncl_mutex); 872 rw_exit(&ncl->ncl_lock); 873 sdev_nc_flush_updates(); 874 } else { 875 mutex_exit(&ncl->ncl_mutex); 876 rw_exit(&ncl->ncl_lock); 877 } 878 sdev_nc_free_unlinked_node(lp); 879 sdcmn_err5(("%s by %s: removed from ncache\n", 880 path, curproc->p_user.u_comm)); 881 if (sdev_nc_verbose) { 882 cmn_err(CE_CONT, "?%s by %s: removed from ncache\n", 883 path, curproc->p_user.u_comm); 884 } 885 } else 886 rw_exit(&ncl->ncl_lock); 887 } 888 889 static void 890 sdev_nc_free_bootonly(void) 891 { 892 sdev_nc_list_t *ncl = sdev_ncache; 893 sdev_nc_node_t *lp; 894 sdev_nc_node_t *next; 895 896 rw_enter(&ncl->ncl_lock, RW_WRITER); 897 898 for (lp = list_head(&ncl->ncl_list); lp; lp = next) { 899 next = list_next(&ncl->ncl_list, lp); 900 if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) { 901 sdcmn_err5(("freeing %s\n", lp->ncn_name)); 902 mutex_enter(&ncl->ncl_mutex); 903 ncl->ncl_flags |= NCL_LIST_DIRTY; 904 mutex_exit(&ncl->ncl_mutex); 905 list_remove(&ncl->ncl_list, lp); 906 sdev_nc_free_unlinked_node(lp); 907 ncl->ncl_nentries--; 908 } 909 } 910 911 rw_exit(&ncl->ncl_lock); 912 } 913