1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * negative cache handling for the /dev fs
28 */
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/user.h>
36 #include <sys/time.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/flock.h>
42 #include <sys/kmem.h>
43 #include <sys/uio.h>
44 #include <sys/errno.h>
45 #include <sys/stat.h>
46 #include <sys/cred.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/mode.h>
50 #include <sys/policy.h>
51 #include <fs/fs_subr.h>
52 #include <sys/mount.h>
53 #include <sys/fs/snode.h>
54 #include <sys/fs/dv_node.h>
55 #include <sys/fs/sdev_impl.h>
56 #include <sys/sunndi.h>
57 #include <sys/sunmdi.h>
58 #include <sys/ddi.h>
59 #include <sys/modctl.h>
60 #include <sys/devcache.h>
61
62
63 /*
64 * ncache is a negative cache of failed lookups. An entry
65 * is added after an attempt to configure a device by that
66 * name failed. An accumulation of these entries over time
67 * gives us a set of device name for which implicit reconfiguration
68 * does not need to be attempted. If a name is created matching
69 * an entry in ncache, that entry is removed, with the
70 * persistent store updated.
71 *
72 * Implicit reconfig is initiated for any name during lookup that
73 * can't be resolved from the backing store and that isn't
74 * present in the negative cache. This functionality is
75 * enabled during system startup once communication with devfsadm
76 * can be achieved. Since readdir is more general, implicit
77 * reconfig initiated by reading a directory isn't enabled until
78 * the system is more fully booted, at the time of the multi-user
79 * milestone, corresponding to init state 2.
80 *
81 * A maximum is imposed on the number of entries in the cache
82 * to limit some script going wild and as a defense against attack.
83 * The default limit is 64 and can be adjusted via sdev_nc_max_entries.
84 *
85 * Each entry also has a expiration count. When looked up a name in
86 * the cache is set to the default. Subsequent boots will decrement
87 * the count if a name isn't referenced. This permits a once-only
88 * entry to eventually be removed over time.
89 *
90 * sdev_reconfig_delay implements a "debounce" of the timing beyond
91 * system available indication, providing what the filesystem considers
92 * to be the system-is-fully-booted state. This is provided to adjust
93 * the timing if some application startup is performing a readdir
94 * in /dev that initiates a troublesome implicit reconfig on every boot.
95 *
96 * sdev_nc_disable_reset can be used to disable clearing the negative cache
97 * on reconfig boot. The default is to clear the cache on reconfig boot.
98 * sdev_nc_disable can be used to disable the negative cache itself.
99 *
100 * sdev_reconfig_disable can be used to disable implicit reconfig.
101 * The default is that implicit reconfig is enabled.
102 */
103
104 /* tunables and defaults */
105 #define SDEV_NC_EXPIRECNT 4
106 #define SDEV_NC_MAX_ENTRIES 64
107 #define SEV_RECONFIG_DELAY 6 /* seconds */
108
109 /* tunables */
110 int sdev_nc_expirecnt = SDEV_NC_EXPIRECNT;
111 int sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES;
112 int sdev_reconfig_delay = SEV_RECONFIG_DELAY;
113 int sdev_reconfig_verbose = 0;
114 int sdev_reconfig_disable = 0;
115 int sdev_nc_disable = 0;
116 int sdev_nc_disable_reset = 0;
117 int sdev_nc_verbose = 0;
118 int sdev_cache_read_disable = 0;
119 int sdev_cache_write_disable = 0;
120
121 /* globals */
122 int sdev_boot_state = SDEV_BOOT_STATE_INITIAL;
123 int sdev_reconfig_boot = 0;
124 sdev_nc_list_t *sdev_ncache;
125 static nvf_handle_t sdevfd_handle;
126
127 /* static prototypes */
128 static void sdev_ncache_write_complete(nvf_handle_t);
129 static void sdev_ncache_write(void);
130 static void sdev_ncache_process_store(void);
131 static sdev_nc_list_t *sdev_nc_newlist(void);
132 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *);
133 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *);
134 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *);
135 static void sdev_nc_free_bootonly(void);
136 static int sdev_ncache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *);
137 static int sdev_ncache_pack_list(nvf_handle_t, nvlist_t **);
138 static void sdev_ncache_list_free(nvf_handle_t);
139 static void sdev_nvp_free(nvp_devname_t *);
140
141 /*
142 * Registration for /etc/devices/devname_cache
143 */
144 static nvf_ops_t sdev_cache_ops = {
145 "/etc/devices/devname_cache", /* path to cache */
146 sdev_ncache_unpack_nvlist, /* read: unpack nvlist */
147 sdev_ncache_pack_list, /* write: pack list */
148 sdev_ncache_list_free, /* free data list */
149 sdev_ncache_write_complete /* write complete callback */
150 };
151
152 /*
153 * called once at filesystem initialization
154 */
155 void
sdev_ncache_init(void)156 sdev_ncache_init(void)
157 {
158 sdev_ncache = sdev_nc_newlist();
159 }
160
161 /*
162 * called at mount of the global instance
163 * currently the global instance is never unmounted
164 */
165 void
sdev_ncache_setup(void)166 sdev_ncache_setup(void)
167 {
168 sdevfd_handle = nvf_register_file(&sdev_cache_ops);
169 ASSERT(sdevfd_handle);
170
171 list_create(nvf_list(sdevfd_handle), sizeof (nvp_devname_t),
172 offsetof(nvp_devname_t, nvp_link));
173
174 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
175 if (!sdev_cache_read_disable) {
176 (void) nvf_read_file(sdevfd_handle);
177 }
178 sdev_ncache_process_store();
179 rw_exit(nvf_lock(sdevfd_handle));
180
181 sdev_devstate_change();
182 }
183
184 static void
sdev_nvp_free(nvp_devname_t * dp)185 sdev_nvp_free(nvp_devname_t *dp)
186 {
187 int i;
188 char **p;
189
190 if (dp->nvp_npaths > 0) {
191 p = dp->nvp_paths;
192 for (i = 0; i < dp->nvp_npaths; i++, p++) {
193 kmem_free(*p, strlen(*p)+1);
194 }
195 kmem_free(dp->nvp_paths,
196 dp->nvp_npaths * sizeof (char *));
197 kmem_free(dp->nvp_expirecnts,
198 dp->nvp_npaths * sizeof (int));
199 }
200
201 kmem_free(dp, sizeof (nvp_devname_t));
202 }
203
204 static void
sdev_ncache_list_free(nvf_handle_t fd)205 sdev_ncache_list_free(nvf_handle_t fd)
206 {
207 list_t *listp;
208 nvp_devname_t *dp;
209
210 ASSERT(fd == sdevfd_handle);
211 ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
212
213 listp = nvf_list(fd);
214 if ((dp = list_head(listp)) != NULL) {
215 list_remove(listp, dp);
216 sdev_nvp_free(dp);
217 }
218 }
219
220 /*
221 * Unpack a device path/nvlist pair to internal data list format.
222 * Used to decode the nvlist format into the internal representation
223 * when reading /etc/devices/devname_cache.
224 * Note that the expiration counts are optional, for compatibility
225 * with earlier instances of the cache. If not present, the
226 * expire counts are initialized to defaults.
227 */
228 static int
sdev_ncache_unpack_nvlist(nvf_handle_t fd,nvlist_t * nvl,char * name)229 sdev_ncache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name)
230 {
231 nvp_devname_t *np;
232 char **strs;
233 int *cnts;
234 uint_t nstrs, ncnts;
235 int rval, i;
236
237 ASSERT(fd == sdevfd_handle);
238 ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
239
240 /* name of the sublist must match what we created */
241 if (strcmp(name, DP_DEVNAME_ID) != 0) {
242 return (-1);
243 }
244
245 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
246
247 rval = nvlist_lookup_string_array(nvl,
248 DP_DEVNAME_NCACHE_ID, &strs, &nstrs);
249 if (rval) {
250 kmem_free(np, sizeof (nvp_devname_t));
251 return (-1);
252 }
253
254 np->nvp_npaths = nstrs;
255 np->nvp_paths = kmem_zalloc(nstrs * sizeof (char *), KM_SLEEP);
256 for (i = 0; i < nstrs; i++) {
257 np->nvp_paths[i] = i_ddi_strdup(strs[i], KM_SLEEP);
258 }
259 np->nvp_expirecnts = kmem_zalloc(nstrs * sizeof (int), KM_SLEEP);
260 for (i = 0; i < nstrs; i++) {
261 np->nvp_expirecnts[i] = sdev_nc_expirecnt;
262 }
263
264 rval = nvlist_lookup_int32_array(nvl,
265 DP_DEVNAME_NC_EXPIRECNT_ID, &cnts, &ncnts);
266 if (rval == 0) {
267 ASSERT(ncnts == nstrs);
268 ncnts = min(ncnts, nstrs);
269 for (i = 0; i < nstrs; i++) {
270 np->nvp_expirecnts[i] = cnts[i];
271 }
272 }
273
274 list_insert_tail(nvf_list(sdevfd_handle), np);
275
276 return (0);
277 }
278
279 /*
280 * Pack internal format cache data to a single nvlist.
281 * Used when writing the nvlist file.
282 * Note this is called indirectly by the nvpflush daemon.
283 */
284 static int
sdev_ncache_pack_list(nvf_handle_t fd,nvlist_t ** ret_nvl)285 sdev_ncache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl)
286 {
287 nvlist_t *nvl, *sub_nvl;
288 nvp_devname_t *np;
289 int rval;
290 list_t *listp;
291
292 ASSERT(fd == sdevfd_handle);
293 ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
294
295 rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
296 if (rval != 0) {
297 nvf_error("%s: nvlist alloc error %d\n",
298 nvf_cache_name(fd), rval);
299 return (DDI_FAILURE);
300 }
301
302 listp = nvf_list(sdevfd_handle);
303 if ((np = list_head(listp)) != NULL) {
304 ASSERT(list_next(listp, np) == NULL);
305
306 rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP);
307 if (rval != 0) {
308 nvf_error("%s: nvlist alloc error %d\n",
309 nvf_cache_name(fd), rval);
310 sub_nvl = NULL;
311 goto err;
312 }
313
314 rval = nvlist_add_string_array(sub_nvl,
315 DP_DEVNAME_NCACHE_ID, np->nvp_paths, np->nvp_npaths);
316 if (rval != 0) {
317 nvf_error("%s: nvlist add error %d (sdev)\n",
318 nvf_cache_name(fd), rval);
319 goto err;
320 }
321
322 rval = nvlist_add_int32_array(sub_nvl,
323 DP_DEVNAME_NC_EXPIRECNT_ID,
324 np->nvp_expirecnts, np->nvp_npaths);
325 if (rval != 0) {
326 nvf_error("%s: nvlist add error %d (sdev)\n",
327 nvf_cache_name(fd), rval);
328 goto err;
329 }
330
331 rval = nvlist_add_nvlist(nvl, DP_DEVNAME_ID, sub_nvl);
332 if (rval != 0) {
333 nvf_error("%s: nvlist add error %d (sublist)\n",
334 nvf_cache_name(fd), rval);
335 goto err;
336 }
337 nvlist_free(sub_nvl);
338 }
339
340 *ret_nvl = nvl;
341 return (DDI_SUCCESS);
342
343 err:
344 nvlist_free(sub_nvl);
345 nvlist_free(nvl);
346 *ret_nvl = NULL;
347 return (DDI_FAILURE);
348 }
349
350 /*
351 * Run through the data read from the backing cache store
352 * to establish the initial state of the neg. cache.
353 */
354 static void
sdev_ncache_process_store(void)355 sdev_ncache_process_store(void)
356 {
357 sdev_nc_list_t *ncl = sdev_ncache;
358 nvp_devname_t *np;
359 sdev_nc_node_t *lp;
360 char *path;
361 int i, n;
362 list_t *listp;
363
364 if (sdev_nc_disable)
365 return;
366
367 ASSERT(RW_WRITE_HELD(nvf_lock(sdevfd_handle)));
368
369 listp = nvf_list(sdevfd_handle);
370 for (np = list_head(listp); np; np = list_next(listp, np)) {
371 for (i = 0; i < np->nvp_npaths; i++) {
372 sdcmn_err5((" %s %d\n",
373 np->nvp_paths[i], np->nvp_expirecnts[i]));
374 if (ncl->ncl_nentries < sdev_nc_max_entries) {
375 path = np->nvp_paths[i];
376 n = strlen(path) + 1;
377 lp = kmem_alloc(sizeof (sdev_nc_node_t),
378 KM_SLEEP);
379 lp->ncn_name = kmem_alloc(n, KM_SLEEP);
380 bcopy(path, lp->ncn_name, n);
381 lp->ncn_flags = NCN_SRC_STORE;
382 lp->ncn_expirecnt = np->nvp_expirecnts[i];
383 sdev_nc_insertnode(ncl, lp);
384 } else if (sdev_nc_verbose) {
385 cmn_err(CE_CONT,
386 "?%s: truncating from ncache (max %d)\n",
387 np->nvp_paths[i], sdev_nc_max_entries);
388 }
389 }
390 }
391 }
392
393 /*
394 * called by nvpflush daemon to inform us that an update of
395 * the cache file has been completed.
396 */
397 static void
sdev_ncache_write_complete(nvf_handle_t fd)398 sdev_ncache_write_complete(nvf_handle_t fd)
399 {
400 sdev_nc_list_t *ncl = sdev_ncache;
401
402 ASSERT(fd == sdevfd_handle);
403
404 mutex_enter(&ncl->ncl_mutex);
405
406 ASSERT(ncl->ncl_flags & NCL_LIST_WRITING);
407
408 if (ncl->ncl_flags & NCL_LIST_DIRTY) {
409 sdcmn_err5(("ncache write complete but dirty again\n"));
410 ncl->ncl_flags &= ~NCL_LIST_DIRTY;
411 mutex_exit(&ncl->ncl_mutex);
412 sdev_ncache_write();
413 } else {
414 sdcmn_err5(("ncache write complete\n"));
415 ncl->ncl_flags &= ~NCL_LIST_WRITING;
416 mutex_exit(&ncl->ncl_mutex);
417 rw_enter(nvf_lock(fd), RW_WRITER);
418 sdev_ncache_list_free(fd);
419 rw_exit(nvf_lock(fd));
420 }
421 }
422
423 /*
424 * Prepare to perform an update of the neg. cache backing store.
425 */
426 static void
sdev_ncache_write(void)427 sdev_ncache_write(void)
428 {
429 sdev_nc_list_t *ncl = sdev_ncache;
430 nvp_devname_t *np;
431 sdev_nc_node_t *lp;
432 int n, i;
433
434 if (sdev_cache_write_disable) {
435 mutex_enter(&ncl->ncl_mutex);
436 ncl->ncl_flags &= ~NCL_LIST_WRITING;
437 mutex_exit(&ncl->ncl_mutex);
438 return;
439 }
440
441 /* proper lock ordering here is essential */
442 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
443 sdev_ncache_list_free(sdevfd_handle);
444
445 rw_enter(&ncl->ncl_lock, RW_READER);
446 n = ncl->ncl_nentries;
447 ASSERT(n <= sdev_nc_max_entries);
448
449 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
450 np->nvp_npaths = n;
451 np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP);
452 np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP);
453
454 i = 0;
455 for (lp = list_head(&ncl->ncl_list); lp;
456 lp = list_next(&ncl->ncl_list, lp)) {
457 np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP);
458 np->nvp_expirecnts[i] = lp->ncn_expirecnt;
459 sdcmn_err5((" %s %d\n",
460 np->nvp_paths[i], np->nvp_expirecnts[i]));
461 i++;
462 }
463
464 rw_exit(&ncl->ncl_lock);
465
466 nvf_mark_dirty(sdevfd_handle);
467 list_insert_tail(nvf_list(sdevfd_handle), np);
468 rw_exit(nvf_lock(sdevfd_handle));
469
470 nvf_wake_daemon();
471 }
472
473 static void
sdev_nc_flush_updates(void)474 sdev_nc_flush_updates(void)
475 {
476 sdev_nc_list_t *ncl = sdev_ncache;
477
478 if (sdev_nc_disable || sdev_cache_write_disable)
479 return;
480
481 mutex_enter(&ncl->ncl_mutex);
482 if (((ncl->ncl_flags &
483 (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) ==
484 (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) {
485 ncl->ncl_flags &= ~NCL_LIST_DIRTY;
486 ncl->ncl_flags |= NCL_LIST_WRITING;
487 mutex_exit(&ncl->ncl_mutex);
488 sdev_ncache_write();
489 } else {
490 mutex_exit(&ncl->ncl_mutex);
491 }
492 }
493
494 static void
sdev_nc_flush_boot_update(void)495 sdev_nc_flush_boot_update(void)
496 {
497 sdev_nc_list_t *ncl = sdev_ncache;
498
499 if (sdev_nc_disable || sdev_cache_write_disable ||
500 (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) {
501 return;
502 }
503 mutex_enter(&ncl->ncl_mutex);
504 if (ncl->ncl_flags & NCL_LIST_WENABLE) {
505 mutex_exit(&ncl->ncl_mutex);
506 sdev_nc_flush_updates();
507 } else {
508 mutex_exit(&ncl->ncl_mutex);
509 }
510
511 }
512
513 static void
sdev_state_boot_complete()514 sdev_state_boot_complete()
515 {
516 sdev_nc_list_t *ncl = sdev_ncache;
517 sdev_nc_node_t *lp, *next;
518
519 /*
520 * Once boot is complete, decrement the expire count of each entry
521 * in the cache not touched by a reference. Remove any that
522 * goes to zero. This effectively removes random entries over
523 * time.
524 */
525 rw_enter(&ncl->ncl_lock, RW_WRITER);
526 mutex_enter(&ncl->ncl_mutex);
527
528 for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
529 next = list_next(&ncl->ncl_list, lp);
530 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) {
531 if (lp->ncn_flags & NCN_ACTIVE) {
532 if (lp->ncn_expirecnt != sdev_nc_expirecnt) {
533 lp->ncn_expirecnt = sdev_nc_expirecnt;
534 ncl->ncl_flags |= NCL_LIST_DIRTY;
535 }
536 } else {
537 if (--lp->ncn_expirecnt == 0) {
538 list_remove(&ncl->ncl_list, lp);
539 sdev_nc_free_unlinked_node(lp);
540 ncl->ncl_nentries--;
541 }
542 ncl->ncl_flags |= NCL_LIST_DIRTY;
543 }
544 }
545 }
546
547 mutex_exit(&ncl->ncl_mutex);
548 rw_exit(&ncl->ncl_lock);
549
550 sdev_nc_flush_boot_update();
551 sdev_boot_state = SDEV_BOOT_STATE_COMPLETE;
552 }
553
554 /*
555 * Upon transition to the login state on a reconfigure boot,
556 * a debounce timer is set up so that we cache all the nonsense
557 * lookups we're hit with by the windowing system startup.
558 */
559
560 /*ARGSUSED*/
561 static void
sdev_state_timeout(void * arg)562 sdev_state_timeout(void *arg)
563 {
564 sdev_state_boot_complete();
565 }
566
567 static void
sdev_state_sysavail()568 sdev_state_sysavail()
569 {
570 sdev_nc_list_t *ncl = sdev_ncache;
571 clock_t nticks;
572 int nsecs;
573
574 mutex_enter(&ncl->ncl_mutex);
575 ncl->ncl_flags |= NCL_LIST_WENABLE;
576 mutex_exit(&ncl->ncl_mutex);
577
578 nsecs = sdev_reconfig_delay;
579 if (nsecs == 0) {
580 sdev_state_boot_complete();
581 } else {
582 nticks = drv_usectohz(1000000 * nsecs);
583 sdcmn_err5(("timeout initiated %ld\n", nticks));
584 (void) timeout(sdev_state_timeout, NULL, nticks);
585 sdev_nc_flush_boot_update();
586 }
587 }
588
589 /*
590 * Called to inform the filesystem of progress during boot,
591 * either a notice of reconfiguration boot or an indication of
592 * system boot complete. At system boot complete, set up a
593 * timer at the expiration of which no further failed lookups
594 * will be added to the negative cache.
595 *
596 * The dev filesystem infers from reconfig boot that implicit
597 * reconfig need not be invoked at all as all available devices
598 * will have already been named.
599 *
600 * The dev filesystem infers from "system available" that devfsadmd
601 * can now be run and hence implicit reconfiguration may be initiated.
602 * During early stages of system startup, implicit reconfig is
603 * not done to avoid impacting boot performance.
604 */
605 void
sdev_devstate_change(void)606 sdev_devstate_change(void)
607 {
608 int new_state;
609
610 /*
611 * Track system state and manage interesting transitions
612 */
613 new_state = SDEV_BOOT_STATE_INITIAL;
614 if (i_ddi_reconfig())
615 new_state = SDEV_BOOT_STATE_RECONFIG;
616 if (i_ddi_sysavail())
617 new_state = SDEV_BOOT_STATE_SYSAVAIL;
618
619 if (sdev_boot_state < new_state) {
620 switch (new_state) {
621 case SDEV_BOOT_STATE_RECONFIG:
622 sdcmn_err5(("state change: reconfigure boot\n"));
623 sdev_boot_state = new_state;
624 /*
625 * The /dev filesystem fills a hot-plug .vs.
626 * public-namespace gap by invoking 'devfsadm' once
627 * as a result of the first /dev lookup failure
628 * (or getdents/readdir). Originally, it was thought
629 * that a reconfig reboot did not have a hot-plug gap,
630 * but this is not true - the gap is just smaller:
631 * it exists from the the time the smf invocation of
632 * devfsadm completes its forced devinfo snapshot,
633 * to the time when the smf devfsadmd daemon invocation
634 * is set up and listening for hotplug sysevents.
635 * Since there is still a gap with reconfig reboot,
636 * we no longer set 'sdev_reconfig_boot'.
637 */
638 if (!sdev_nc_disable_reset)
639 sdev_nc_free_bootonly();
640 break;
641 case SDEV_BOOT_STATE_SYSAVAIL:
642 sdcmn_err5(("system available\n"));
643 sdev_boot_state = new_state;
644 sdev_state_sysavail();
645 break;
646 }
647 }
648 }
649
650 /*
651 * Lookup: filter out entries in the negative cache
652 * Return 1 if the lookup should not cause a reconfig.
653 */
654 int
sdev_lookup_filter(sdev_node_t * dv,char * nm)655 sdev_lookup_filter(sdev_node_t *dv, char *nm)
656 {
657 int n;
658 sdev_nc_list_t *ncl = sdev_ncache;
659 sdev_nc_node_t *lp;
660 char *path;
661 int rval = 0;
662 int changed = 0;
663
664 ASSERT(i_ddi_io_initialized());
665 ASSERT(SDEVTOV(dv)->v_type == VDIR);
666
667 if (sdev_nc_disable)
668 return (0);
669
670 n = strlen(dv->sdev_path) + strlen(nm) + 2;
671 path = kmem_alloc(n, KM_SLEEP);
672 (void) sprintf(path, "%s/%s", dv->sdev_path, nm);
673
674 rw_enter(&ncl->ncl_lock, RW_READER);
675 if ((lp = sdev_nc_findpath(ncl, path)) != NULL) {
676 sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n",
677 dv->sdev_name, nm, curproc->p_user.u_comm));
678 if (sdev_nc_verbose) {
679 cmn_err(CE_CONT,
680 "?%s/%s: lookup by %s cached, no reconfig\n",
681 dv->sdev_name, nm, curproc->p_user.u_comm);
682 }
683 mutex_enter(&ncl->ncl_mutex);
684 lp->ncn_flags |= NCN_ACTIVE;
685 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 &&
686 lp->ncn_expirecnt < sdev_nc_expirecnt) {
687 lp->ncn_expirecnt = sdev_nc_expirecnt;
688 ncl->ncl_flags |= NCL_LIST_DIRTY;
689 changed = 1;
690 }
691 mutex_exit(&ncl->ncl_mutex);
692 rval = 1;
693 }
694 rw_exit(&ncl->ncl_lock);
695 kmem_free(path, n);
696 if (changed)
697 sdev_nc_flush_boot_update();
698 return (rval);
699 }
700
701 void
sdev_lookup_failed(sdev_node_t * dv,char * nm,int failed_flags)702 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags)
703 {
704 if (sdev_nc_disable)
705 return;
706
707 /*
708 * If we're still in the initial boot stage, always update
709 * the cache - we may not have received notice of the
710 * reconfig boot state yet. On a reconfigure boot, entries
711 * from the backing store are not re-persisted on update,
712 * but new entries are marked as needing an update.
713 * Never cache dynamic or non-global nodes.
714 */
715 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
716 !SDEV_IS_NO_NCACHE(dv) &&
717 ((failed_flags & SLF_NO_NCACHE) == 0) &&
718 ((sdev_reconfig_boot &&
719 (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) ||
720 (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) {
721 sdev_nc_addname(sdev_ncache,
722 dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE);
723 }
724 }
725
726 static sdev_nc_list_t *
sdev_nc_newlist(void)727 sdev_nc_newlist(void)
728 {
729 sdev_nc_list_t *ncl;
730
731 ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP);
732
733 rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL);
734 mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL);
735 list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t),
736 offsetof(sdev_nc_node_t, ncn_link));
737
738 return (ncl);
739 }
740
741 static void
sdev_nc_free_unlinked_node(sdev_nc_node_t * lp)742 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp)
743 {
744 kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1);
745 kmem_free(lp, sizeof (sdev_nc_node_t));
746 }
747
748 static sdev_nc_node_t *
sdev_nc_findpath(sdev_nc_list_t * ncl,char * path)749 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path)
750 {
751 sdev_nc_node_t *lp;
752
753 ASSERT(RW_LOCK_HELD(&ncl->ncl_lock));
754
755 for (lp = list_head(&ncl->ncl_list); lp;
756 lp = list_next(&ncl->ncl_list, lp)) {
757 if (strcmp(path, lp->ncn_name) == 0)
758 return (lp);
759 }
760
761 return (NULL);
762 }
763
764 static void
sdev_nc_insertnode(sdev_nc_list_t * ncl,sdev_nc_node_t * new)765 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new)
766 {
767 sdev_nc_node_t *lp;
768
769 rw_enter(&ncl->ncl_lock, RW_WRITER);
770
771 lp = sdev_nc_findpath(ncl, new->ncn_name);
772 if (lp == NULL) {
773 if (ncl->ncl_nentries == sdev_nc_max_entries) {
774 sdcmn_err5((
775 "%s by %s: not adding to ncache (max %d)\n",
776 new->ncn_name, curproc->p_user.u_comm,
777 ncl->ncl_nentries));
778 if (sdev_nc_verbose) {
779 cmn_err(CE_CONT, "?%s by %s: "
780 "not adding to ncache (max %d)\n",
781 new->ncn_name, curproc->p_user.u_comm,
782 ncl->ncl_nentries);
783 }
784 rw_exit(&ncl->ncl_lock);
785 sdev_nc_free_unlinked_node(new);
786 } else {
787
788 list_insert_tail(&ncl->ncl_list, new);
789 ncl->ncl_nentries++;
790
791 /* don't mark list dirty for nodes from store */
792 mutex_enter(&ncl->ncl_mutex);
793 if ((new->ncn_flags & NCN_SRC_STORE) == 0) {
794 sdcmn_err5(("%s by %s: add to ncache\n",
795 new->ncn_name, curproc->p_user.u_comm));
796 if (sdev_nc_verbose) {
797 cmn_err(CE_CONT,
798 "?%s by %s: add to ncache\n",
799 new->ncn_name,
800 curproc->p_user.u_comm);
801 }
802 ncl->ncl_flags |= NCL_LIST_DIRTY;
803 }
804 mutex_exit(&ncl->ncl_mutex);
805 rw_exit(&ncl->ncl_lock);
806 lp = new;
807 sdev_nc_flush_boot_update();
808 }
809 } else {
810 mutex_enter(&ncl->ncl_mutex);
811 lp->ncn_flags |= new->ncn_flags;
812 mutex_exit(&ncl->ncl_mutex);
813 rw_exit(&ncl->ncl_lock);
814 sdev_nc_free_unlinked_node(new);
815 }
816 }
817
818 void
sdev_nc_addname(sdev_nc_list_t * ncl,sdev_node_t * dv,char * nm,int flags)819 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags)
820 {
821 int n;
822 sdev_nc_node_t *lp;
823
824 ASSERT(SDEVTOV(dv)->v_type == VDIR);
825
826 lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP);
827
828 n = strlen(dv->sdev_path) + strlen(nm) + 2;
829 lp->ncn_name = kmem_alloc(n, KM_SLEEP);
830 (void) sprintf(lp->ncn_name, "%s/%s",
831 dv->sdev_path, nm);
832 lp->ncn_flags = flags;
833 lp->ncn_expirecnt = sdev_nc_expirecnt;
834 sdev_nc_insertnode(ncl, lp);
835 }
836
837 void
sdev_nc_node_exists(sdev_node_t * dv)838 sdev_nc_node_exists(sdev_node_t *dv)
839 {
840 /* dynamic and non-global nodes are never cached */
841 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
842 !SDEV_IS_NO_NCACHE(dv)) {
843 sdev_nc_path_exists(sdev_ncache, dv->sdev_path);
844 }
845 }
846
847 void
sdev_nc_path_exists(sdev_nc_list_t * ncl,char * path)848 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path)
849 {
850 sdev_nc_node_t *lp;
851
852 if (sdev_nc_disable)
853 return;
854
855 rw_enter(&ncl->ncl_lock, RW_READER);
856 if ((lp = sdev_nc_findpath(ncl, path)) == NULL) {
857 rw_exit(&ncl->ncl_lock);
858 return;
859 }
860 if (rw_tryupgrade(&ncl->ncl_lock) == 0) {
861 rw_exit(&ncl->ncl_lock);
862 rw_enter(&ncl->ncl_lock, RW_WRITER);
863 lp = sdev_nc_findpath(ncl, path);
864 }
865 if (lp) {
866 list_remove(&ncl->ncl_list, lp);
867 ncl->ncl_nentries--;
868 mutex_enter(&ncl->ncl_mutex);
869 ncl->ncl_flags |= NCL_LIST_DIRTY;
870 if (ncl->ncl_flags & NCL_LIST_WENABLE) {
871 mutex_exit(&ncl->ncl_mutex);
872 rw_exit(&ncl->ncl_lock);
873 sdev_nc_flush_updates();
874 } else {
875 mutex_exit(&ncl->ncl_mutex);
876 rw_exit(&ncl->ncl_lock);
877 }
878 sdev_nc_free_unlinked_node(lp);
879 sdcmn_err5(("%s by %s: removed from ncache\n",
880 path, curproc->p_user.u_comm));
881 if (sdev_nc_verbose) {
882 cmn_err(CE_CONT, "?%s by %s: removed from ncache\n",
883 path, curproc->p_user.u_comm);
884 }
885 } else
886 rw_exit(&ncl->ncl_lock);
887 }
888
889 static void
sdev_nc_free_bootonly(void)890 sdev_nc_free_bootonly(void)
891 {
892 sdev_nc_list_t *ncl = sdev_ncache;
893 sdev_nc_node_t *lp;
894 sdev_nc_node_t *next;
895
896 rw_enter(&ncl->ncl_lock, RW_WRITER);
897
898 for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
899 next = list_next(&ncl->ncl_list, lp);
900 if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) {
901 sdcmn_err5(("freeing %s\n", lp->ncn_name));
902 mutex_enter(&ncl->ncl_mutex);
903 ncl->ncl_flags |= NCL_LIST_DIRTY;
904 mutex_exit(&ncl->ncl_mutex);
905 list_remove(&ncl->ncl_list, lp);
906 sdev_nc_free_unlinked_node(lp);
907 ncl->ncl_nentries--;
908 }
909 }
910
911 rw_exit(&ncl->ncl_lock);
912 }
913