1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * negative cache handling for the /dev fs
28 */
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/user.h>
36 #include <sys/time.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/flock.h>
42 #include <sys/kmem.h>
43 #include <sys/uio.h>
44 #include <sys/errno.h>
45 #include <sys/stat.h>
46 #include <sys/cred.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/mode.h>
50 #include <sys/policy.h>
51 #include <fs/fs_subr.h>
52 #include <sys/mount.h>
53 #include <sys/fs/snode.h>
54 #include <sys/fs/dv_node.h>
55 #include <sys/fs/sdev_impl.h>
56 #include <sys/sunndi.h>
57 #include <sys/sunmdi.h>
58 #include <sys/ddi.h>
59 #include <sys/modctl.h>
60 #include <sys/devcache.h>
61
62
63 /*
64 * ncache is a negative cache of failed lookups. An entry
65 * is added after an attempt to configure a device by that
66 * name failed. An accumulation of these entries over time
67 * gives us a set of device name for which implicit reconfiguration
68 * does not need to be attempted. If a name is created matching
69 * an entry in ncache, that entry is removed, with the
70 * persistent store updated.
71 *
72 * Implicit reconfig is initiated for any name during lookup that
73 * can't be resolved from the backing store and that isn't
74 * present in the negative cache. This functionality is
75 * enabled during system startup once communication with devfsadm
76 * can be achieved. Since readdir is more general, implicit
77 * reconfig initiated by reading a directory isn't enabled until
78 * the system is more fully booted, at the time of the multi-user
79 * milestone, corresponding to init state 2.
80 *
81 * A maximum is imposed on the number of entries in the cache
82 * to limit some script going wild and as a defense against attack.
83 * The default limit is 64 and can be adjusted via sdev_nc_max_entries.
84 *
85 * Each entry also has a expiration count. When looked up a name in
86 * the cache is set to the default. Subsequent boots will decrement
87 * the count if a name isn't referenced. This permits a once-only
88 * entry to eventually be removed over time.
89 *
90 * sdev_reconfig_delay implements a "debounce" of the timing beyond
91 * system available indication, providing what the filesystem considers
92 * to be the system-is-fully-booted state. This is provided to adjust
93 * the timing if some application startup is performing a readdir
94 * in /dev that initiates a troublesome implicit reconfig on every boot.
95 *
96 * sdev_nc_disable_reset can be used to disable clearing the negative cache
97 * on reconfig boot. The default is to clear the cache on reconfig boot.
98 * sdev_nc_disable can be used to disable the negative cache itself.
99 *
100 * sdev_reconfig_disable can be used to disable implicit reconfig.
101 * The default is that implicit reconfig is enabled.
102 */
103
104 /* tunables and defaults */
105 #define SDEV_NC_EXPIRECNT 4
106 #define SDEV_NC_MAX_ENTRIES 64
107 #define SEV_RECONFIG_DELAY 6 /* seconds */
108
109 /* tunables */
110 int sdev_nc_expirecnt = SDEV_NC_EXPIRECNT;
111 int sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES;
112 int sdev_reconfig_delay = SEV_RECONFIG_DELAY;
113 int sdev_reconfig_verbose = 0;
114 int sdev_reconfig_disable = 0;
115 int sdev_nc_disable = 0;
116 int sdev_nc_disable_reset = 0;
117 int sdev_nc_verbose = 0;
118 int sdev_cache_read_disable = 0;
119 int sdev_cache_write_disable = 0;
120
121 /* globals */
122 int sdev_boot_state = SDEV_BOOT_STATE_INITIAL;
123 int sdev_reconfig_boot = 0;
124 sdev_nc_list_t *sdev_ncache;
125 static nvf_handle_t sdevfd_handle;
126
127 /* static prototypes */
128 static void sdev_ncache_write_complete(nvf_handle_t);
129 static void sdev_ncache_write(void);
130 static void sdev_ncache_process_store(void);
131 static sdev_nc_list_t *sdev_nc_newlist(void);
132 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *);
133 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *);
134 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *);
135 static void sdev_nc_free_bootonly(void);
136 static int sdev_ncache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *);
137 static int sdev_ncache_pack_list(nvf_handle_t, nvlist_t **);
138 static void sdev_ncache_list_free(nvf_handle_t);
139 static void sdev_nvp_free(nvp_devname_t *);
140
141 /*
142 * Registration for /etc/devices/devname_cache
143 */
144 static nvf_ops_t sdev_cache_ops = {
145 "/etc/devices/devname_cache", /* path to cache */
146 sdev_ncache_unpack_nvlist, /* read: unpack nvlist */
147 sdev_ncache_pack_list, /* write: pack list */
148 sdev_ncache_list_free, /* free data list */
149 sdev_ncache_write_complete /* write complete callback */
150 };
151
152 /*
153 * called once at filesystem initialization
154 */
155 void
sdev_ncache_init(void)156 sdev_ncache_init(void)
157 {
158 sdev_ncache = sdev_nc_newlist();
159 }
160
161 /*
162 * called at mount of the global instance
163 * currently the global instance is never unmounted
164 */
165 void
sdev_ncache_setup(void)166 sdev_ncache_setup(void)
167 {
168 sdevfd_handle = nvf_register_file(&sdev_cache_ops);
169 ASSERT(sdevfd_handle);
170
171 list_create(nvf_list(sdevfd_handle), sizeof (nvp_devname_t),
172 offsetof(nvp_devname_t, nvp_link));
173
174 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
175 if (!sdev_cache_read_disable) {
176 (void) nvf_read_file(sdevfd_handle);
177 }
178 sdev_ncache_process_store();
179 rw_exit(nvf_lock(sdevfd_handle));
180
181 sdev_devstate_change();
182 }
183
184 static void
sdev_nvp_free(nvp_devname_t * dp)185 sdev_nvp_free(nvp_devname_t *dp)
186 {
187 int i;
188 char **p;
189
190 if (dp->nvp_npaths > 0) {
191 p = dp->nvp_paths;
192 for (i = 0; i < dp->nvp_npaths; i++, p++) {
193 kmem_free(*p, strlen(*p)+1);
194 }
195 kmem_free(dp->nvp_paths,
196 dp->nvp_npaths * sizeof (char *));
197 kmem_free(dp->nvp_expirecnts,
198 dp->nvp_npaths * sizeof (int));
199 }
200
201 kmem_free(dp, sizeof (nvp_devname_t));
202 }
203
204 static void
sdev_ncache_list_free(nvf_handle_t fd)205 sdev_ncache_list_free(nvf_handle_t fd)
206 {
207 list_t *listp;
208 nvp_devname_t *dp;
209
210 ASSERT(fd == sdevfd_handle);
211 ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
212
213 listp = nvf_list(fd);
214 if ((dp = list_head(listp)) != NULL) {
215 list_remove(listp, dp);
216 sdev_nvp_free(dp);
217 }
218 }
219
220 /*
221 * Unpack a device path/nvlist pair to internal data list format.
222 * Used to decode the nvlist format into the internal representation
223 * when reading /etc/devices/devname_cache.
224 * Note that the expiration counts are optional, for compatibility
225 * with earlier instances of the cache. If not present, the
226 * expire counts are initialized to defaults.
227 */
228 static int
sdev_ncache_unpack_nvlist(nvf_handle_t fd,nvlist_t * nvl,char * name)229 sdev_ncache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name)
230 {
231 nvp_devname_t *np;
232 char **strs;
233 int *cnts;
234 uint_t nstrs, ncnts;
235 int rval, i;
236
237 ASSERT(fd == sdevfd_handle);
238 ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
239
240 /* name of the sublist must match what we created */
241 if (strcmp(name, DP_DEVNAME_ID) != 0) {
242 return (-1);
243 }
244
245 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
246
247 rval = nvlist_lookup_string_array(nvl,
248 DP_DEVNAME_NCACHE_ID, &strs, &nstrs);
249 if (rval) {
250 kmem_free(np, sizeof (nvp_devname_t));
251 return (-1);
252 }
253
254 np->nvp_npaths = nstrs;
255 np->nvp_paths = kmem_zalloc(nstrs * sizeof (char *), KM_SLEEP);
256 for (i = 0; i < nstrs; i++) {
257 np->nvp_paths[i] = i_ddi_strdup(strs[i], KM_SLEEP);
258 }
259 np->nvp_expirecnts = kmem_zalloc(nstrs * sizeof (int), KM_SLEEP);
260 for (i = 0; i < nstrs; i++) {
261 np->nvp_expirecnts[i] = sdev_nc_expirecnt;
262 }
263
264 rval = nvlist_lookup_int32_array(nvl,
265 DP_DEVNAME_NC_EXPIRECNT_ID, &cnts, &ncnts);
266 if (rval == 0) {
267 ASSERT(ncnts == nstrs);
268 ncnts = min(ncnts, nstrs);
269 for (i = 0; i < nstrs; i++) {
270 np->nvp_expirecnts[i] = cnts[i];
271 }
272 }
273
274 list_insert_tail(nvf_list(sdevfd_handle), np);
275
276 return (0);
277 }
278
279 /*
280 * Pack internal format cache data to a single nvlist.
281 * Used when writing the nvlist file.
282 * Note this is called indirectly by the nvpflush daemon.
283 */
284 static int
sdev_ncache_pack_list(nvf_handle_t fd,nvlist_t ** ret_nvl)285 sdev_ncache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl)
286 {
287 nvlist_t *nvl, *sub_nvl;
288 nvp_devname_t *np;
289 int rval;
290 list_t *listp;
291
292 ASSERT(fd == sdevfd_handle);
293 ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
294
295 rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
296 if (rval != 0) {
297 nvf_error("%s: nvlist alloc error %d\n",
298 nvf_cache_name(fd), rval);
299 return (DDI_FAILURE);
300 }
301
302 listp = nvf_list(sdevfd_handle);
303 if ((np = list_head(listp)) != NULL) {
304 ASSERT(list_next(listp, np) == NULL);
305
306 rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP);
307 if (rval != 0) {
308 nvf_error("%s: nvlist alloc error %d\n",
309 nvf_cache_name(fd), rval);
310 sub_nvl = NULL;
311 goto err;
312 }
313
314 rval = nvlist_add_string_array(sub_nvl,
315 DP_DEVNAME_NCACHE_ID, np->nvp_paths, np->nvp_npaths);
316 if (rval != 0) {
317 nvf_error("%s: nvlist add error %d (sdev)\n",
318 nvf_cache_name(fd), rval);
319 goto err;
320 }
321
322 rval = nvlist_add_int32_array(sub_nvl,
323 DP_DEVNAME_NC_EXPIRECNT_ID,
324 np->nvp_expirecnts, np->nvp_npaths);
325 if (rval != 0) {
326 nvf_error("%s: nvlist add error %d (sdev)\n",
327 nvf_cache_name(fd), rval);
328 goto err;
329 }
330
331 rval = nvlist_add_nvlist(nvl, DP_DEVNAME_ID, sub_nvl);
332 if (rval != 0) {
333 nvf_error("%s: nvlist add error %d (sublist)\n",
334 nvf_cache_name(fd), rval);
335 goto err;
336 }
337 nvlist_free(sub_nvl);
338 }
339
340 *ret_nvl = nvl;
341 return (DDI_SUCCESS);
342
343 err:
344 if (sub_nvl)
345 nvlist_free(sub_nvl);
346 nvlist_free(nvl);
347 *ret_nvl = NULL;
348 return (DDI_FAILURE);
349 }
350
351 /*
352 * Run through the data read from the backing cache store
353 * to establish the initial state of the neg. cache.
354 */
355 static void
sdev_ncache_process_store(void)356 sdev_ncache_process_store(void)
357 {
358 sdev_nc_list_t *ncl = sdev_ncache;
359 nvp_devname_t *np;
360 sdev_nc_node_t *lp;
361 char *path;
362 int i, n;
363 list_t *listp;
364
365 if (sdev_nc_disable)
366 return;
367
368 ASSERT(RW_WRITE_HELD(nvf_lock(sdevfd_handle)));
369
370 listp = nvf_list(sdevfd_handle);
371 for (np = list_head(listp); np; np = list_next(listp, np)) {
372 for (i = 0; i < np->nvp_npaths; i++) {
373 sdcmn_err5((" %s %d\n",
374 np->nvp_paths[i], np->nvp_expirecnts[i]));
375 if (ncl->ncl_nentries < sdev_nc_max_entries) {
376 path = np->nvp_paths[i];
377 n = strlen(path) + 1;
378 lp = kmem_alloc(sizeof (sdev_nc_node_t),
379 KM_SLEEP);
380 lp->ncn_name = kmem_alloc(n, KM_SLEEP);
381 bcopy(path, lp->ncn_name, n);
382 lp->ncn_flags = NCN_SRC_STORE;
383 lp->ncn_expirecnt = np->nvp_expirecnts[i];
384 sdev_nc_insertnode(ncl, lp);
385 } else if (sdev_nc_verbose) {
386 cmn_err(CE_CONT,
387 "?%s: truncating from ncache (max %d)\n",
388 np->nvp_paths[i], sdev_nc_max_entries);
389 }
390 }
391 }
392 }
393
394 /*
395 * called by nvpflush daemon to inform us that an update of
396 * the cache file has been completed.
397 */
398 static void
sdev_ncache_write_complete(nvf_handle_t fd)399 sdev_ncache_write_complete(nvf_handle_t fd)
400 {
401 sdev_nc_list_t *ncl = sdev_ncache;
402
403 ASSERT(fd == sdevfd_handle);
404
405 mutex_enter(&ncl->ncl_mutex);
406
407 ASSERT(ncl->ncl_flags & NCL_LIST_WRITING);
408
409 if (ncl->ncl_flags & NCL_LIST_DIRTY) {
410 sdcmn_err5(("ncache write complete but dirty again\n"));
411 ncl->ncl_flags &= ~NCL_LIST_DIRTY;
412 mutex_exit(&ncl->ncl_mutex);
413 sdev_ncache_write();
414 } else {
415 sdcmn_err5(("ncache write complete\n"));
416 ncl->ncl_flags &= ~NCL_LIST_WRITING;
417 mutex_exit(&ncl->ncl_mutex);
418 rw_enter(nvf_lock(fd), RW_WRITER);
419 sdev_ncache_list_free(fd);
420 rw_exit(nvf_lock(fd));
421 }
422 }
423
424 /*
425 * Prepare to perform an update of the neg. cache backing store.
426 */
427 static void
sdev_ncache_write(void)428 sdev_ncache_write(void)
429 {
430 sdev_nc_list_t *ncl = sdev_ncache;
431 nvp_devname_t *np;
432 sdev_nc_node_t *lp;
433 int n, i;
434
435 if (sdev_cache_write_disable) {
436 mutex_enter(&ncl->ncl_mutex);
437 ncl->ncl_flags &= ~NCL_LIST_WRITING;
438 mutex_exit(&ncl->ncl_mutex);
439 return;
440 }
441
442 /* proper lock ordering here is essential */
443 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
444 sdev_ncache_list_free(sdevfd_handle);
445
446 rw_enter(&ncl->ncl_lock, RW_READER);
447 n = ncl->ncl_nentries;
448 ASSERT(n <= sdev_nc_max_entries);
449
450 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
451 np->nvp_npaths = n;
452 np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP);
453 np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP);
454
455 i = 0;
456 for (lp = list_head(&ncl->ncl_list); lp;
457 lp = list_next(&ncl->ncl_list, lp)) {
458 np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP);
459 np->nvp_expirecnts[i] = lp->ncn_expirecnt;
460 sdcmn_err5((" %s %d\n",
461 np->nvp_paths[i], np->nvp_expirecnts[i]));
462 i++;
463 }
464
465 rw_exit(&ncl->ncl_lock);
466
467 nvf_mark_dirty(sdevfd_handle);
468 list_insert_tail(nvf_list(sdevfd_handle), np);
469 rw_exit(nvf_lock(sdevfd_handle));
470
471 nvf_wake_daemon();
472 }
473
474 static void
sdev_nc_flush_updates(void)475 sdev_nc_flush_updates(void)
476 {
477 sdev_nc_list_t *ncl = sdev_ncache;
478
479 if (sdev_nc_disable || sdev_cache_write_disable)
480 return;
481
482 mutex_enter(&ncl->ncl_mutex);
483 if (((ncl->ncl_flags &
484 (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) ==
485 (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) {
486 ncl->ncl_flags &= ~NCL_LIST_DIRTY;
487 ncl->ncl_flags |= NCL_LIST_WRITING;
488 mutex_exit(&ncl->ncl_mutex);
489 sdev_ncache_write();
490 } else {
491 mutex_exit(&ncl->ncl_mutex);
492 }
493 }
494
495 static void
sdev_nc_flush_boot_update(void)496 sdev_nc_flush_boot_update(void)
497 {
498 sdev_nc_list_t *ncl = sdev_ncache;
499
500 if (sdev_nc_disable || sdev_cache_write_disable ||
501 (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) {
502 return;
503 }
504 mutex_enter(&ncl->ncl_mutex);
505 if (ncl->ncl_flags & NCL_LIST_WENABLE) {
506 mutex_exit(&ncl->ncl_mutex);
507 sdev_nc_flush_updates();
508 } else {
509 mutex_exit(&ncl->ncl_mutex);
510 }
511
512 }
513
514 static void
sdev_state_boot_complete()515 sdev_state_boot_complete()
516 {
517 sdev_nc_list_t *ncl = sdev_ncache;
518 sdev_nc_node_t *lp, *next;
519
520 /*
521 * Once boot is complete, decrement the expire count of each entry
522 * in the cache not touched by a reference. Remove any that
523 * goes to zero. This effectively removes random entries over
524 * time.
525 */
526 rw_enter(&ncl->ncl_lock, RW_WRITER);
527 mutex_enter(&ncl->ncl_mutex);
528
529 for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
530 next = list_next(&ncl->ncl_list, lp);
531 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) {
532 if (lp->ncn_flags & NCN_ACTIVE) {
533 if (lp->ncn_expirecnt != sdev_nc_expirecnt) {
534 lp->ncn_expirecnt = sdev_nc_expirecnt;
535 ncl->ncl_flags |= NCL_LIST_DIRTY;
536 }
537 } else {
538 if (--lp->ncn_expirecnt == 0) {
539 list_remove(&ncl->ncl_list, lp);
540 sdev_nc_free_unlinked_node(lp);
541 ncl->ncl_nentries--;
542 }
543 ncl->ncl_flags |= NCL_LIST_DIRTY;
544 }
545 }
546 }
547
548 mutex_exit(&ncl->ncl_mutex);
549 rw_exit(&ncl->ncl_lock);
550
551 sdev_nc_flush_boot_update();
552 sdev_boot_state = SDEV_BOOT_STATE_COMPLETE;
553 }
554
555 /*
556 * Upon transition to the login state on a reconfigure boot,
557 * a debounce timer is set up so that we cache all the nonsense
558 * lookups we're hit with by the windowing system startup.
559 */
560
561 /*ARGSUSED*/
562 static void
sdev_state_timeout(void * arg)563 sdev_state_timeout(void *arg)
564 {
565 sdev_state_boot_complete();
566 }
567
568 static void
sdev_state_sysavail()569 sdev_state_sysavail()
570 {
571 sdev_nc_list_t *ncl = sdev_ncache;
572 clock_t nticks;
573 int nsecs;
574
575 mutex_enter(&ncl->ncl_mutex);
576 ncl->ncl_flags |= NCL_LIST_WENABLE;
577 mutex_exit(&ncl->ncl_mutex);
578
579 nsecs = sdev_reconfig_delay;
580 if (nsecs == 0) {
581 sdev_state_boot_complete();
582 } else {
583 nticks = drv_usectohz(1000000 * nsecs);
584 sdcmn_err5(("timeout initiated %ld\n", nticks));
585 (void) timeout(sdev_state_timeout, NULL, nticks);
586 sdev_nc_flush_boot_update();
587 }
588 }
589
590 /*
591 * Called to inform the filesystem of progress during boot,
592 * either a notice of reconfiguration boot or an indication of
593 * system boot complete. At system boot complete, set up a
594 * timer at the expiration of which no further failed lookups
595 * will be added to the negative cache.
596 *
597 * The dev filesystem infers from reconfig boot that implicit
598 * reconfig need not be invoked at all as all available devices
599 * will have already been named.
600 *
601 * The dev filesystem infers from "system available" that devfsadmd
602 * can now be run and hence implicit reconfiguration may be initiated.
603 * During early stages of system startup, implicit reconfig is
604 * not done to avoid impacting boot performance.
605 */
606 void
sdev_devstate_change(void)607 sdev_devstate_change(void)
608 {
609 int new_state;
610
611 /*
612 * Track system state and manage interesting transitions
613 */
614 new_state = SDEV_BOOT_STATE_INITIAL;
615 if (i_ddi_reconfig())
616 new_state = SDEV_BOOT_STATE_RECONFIG;
617 if (i_ddi_sysavail())
618 new_state = SDEV_BOOT_STATE_SYSAVAIL;
619
620 if (sdev_boot_state < new_state) {
621 switch (new_state) {
622 case SDEV_BOOT_STATE_RECONFIG:
623 sdcmn_err5(("state change: reconfigure boot\n"));
624 sdev_boot_state = new_state;
625 /*
626 * The /dev filesystem fills a hot-plug .vs.
627 * public-namespace gap by invoking 'devfsadm' once
628 * as a result of the first /dev lookup failure
629 * (or getdents/readdir). Originally, it was thought
630 * that a reconfig reboot did not have a hot-plug gap,
631 * but this is not true - the gap is just smaller:
632 * it exists from the the time the smf invocation of
633 * devfsadm completes its forced devinfo snapshot,
634 * to the time when the smf devfsadmd daemon invocation
635 * is set up and listening for hotplug sysevents.
636 * Since there is still a gap with reconfig reboot,
637 * we no longer set 'sdev_reconfig_boot'.
638 */
639 if (!sdev_nc_disable_reset)
640 sdev_nc_free_bootonly();
641 break;
642 case SDEV_BOOT_STATE_SYSAVAIL:
643 sdcmn_err5(("system available\n"));
644 sdev_boot_state = new_state;
645 sdev_state_sysavail();
646 break;
647 }
648 }
649 }
650
651 /*
652 * Lookup: filter out entries in the negative cache
653 * Return 1 if the lookup should not cause a reconfig.
654 */
655 int
sdev_lookup_filter(sdev_node_t * dv,char * nm)656 sdev_lookup_filter(sdev_node_t *dv, char *nm)
657 {
658 int n;
659 sdev_nc_list_t *ncl = sdev_ncache;
660 sdev_nc_node_t *lp;
661 char *path;
662 int rval = 0;
663 int changed = 0;
664
665 ASSERT(i_ddi_io_initialized());
666 ASSERT(SDEVTOV(dv)->v_type == VDIR);
667
668 if (sdev_nc_disable)
669 return (0);
670
671 n = strlen(dv->sdev_path) + strlen(nm) + 2;
672 path = kmem_alloc(n, KM_SLEEP);
673 (void) sprintf(path, "%s/%s", dv->sdev_path, nm);
674
675 rw_enter(&ncl->ncl_lock, RW_READER);
676 if ((lp = sdev_nc_findpath(ncl, path)) != NULL) {
677 sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n",
678 dv->sdev_name, nm, curproc->p_user.u_comm));
679 if (sdev_nc_verbose) {
680 cmn_err(CE_CONT,
681 "?%s/%s: lookup by %s cached, no reconfig\n",
682 dv->sdev_name, nm, curproc->p_user.u_comm);
683 }
684 mutex_enter(&ncl->ncl_mutex);
685 lp->ncn_flags |= NCN_ACTIVE;
686 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 &&
687 lp->ncn_expirecnt < sdev_nc_expirecnt) {
688 lp->ncn_expirecnt = sdev_nc_expirecnt;
689 ncl->ncl_flags |= NCL_LIST_DIRTY;
690 changed = 1;
691 }
692 mutex_exit(&ncl->ncl_mutex);
693 rval = 1;
694 }
695 rw_exit(&ncl->ncl_lock);
696 kmem_free(path, n);
697 if (changed)
698 sdev_nc_flush_boot_update();
699 return (rval);
700 }
701
702 void
sdev_lookup_failed(sdev_node_t * dv,char * nm,int failed_flags)703 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags)
704 {
705 if (sdev_nc_disable)
706 return;
707
708 /*
709 * If we're still in the initial boot stage, always update
710 * the cache - we may not have received notice of the
711 * reconfig boot state yet. On a reconfigure boot, entries
712 * from the backing store are not re-persisted on update,
713 * but new entries are marked as needing an update.
714 * Never cache dynamic or non-global nodes.
715 */
716 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
717 !SDEV_IS_NO_NCACHE(dv) &&
718 ((failed_flags & SLF_NO_NCACHE) == 0) &&
719 ((sdev_reconfig_boot &&
720 (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) ||
721 (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) {
722 sdev_nc_addname(sdev_ncache,
723 dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE);
724 }
725 }
726
727 static sdev_nc_list_t *
sdev_nc_newlist(void)728 sdev_nc_newlist(void)
729 {
730 sdev_nc_list_t *ncl;
731
732 ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP);
733
734 rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL);
735 mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL);
736 list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t),
737 offsetof(sdev_nc_node_t, ncn_link));
738
739 return (ncl);
740 }
741
742 static void
sdev_nc_free_unlinked_node(sdev_nc_node_t * lp)743 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp)
744 {
745 kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1);
746 kmem_free(lp, sizeof (sdev_nc_node_t));
747 }
748
749 static sdev_nc_node_t *
sdev_nc_findpath(sdev_nc_list_t * ncl,char * path)750 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path)
751 {
752 sdev_nc_node_t *lp;
753
754 ASSERT(RW_LOCK_HELD(&ncl->ncl_lock));
755
756 for (lp = list_head(&ncl->ncl_list); lp;
757 lp = list_next(&ncl->ncl_list, lp)) {
758 if (strcmp(path, lp->ncn_name) == 0)
759 return (lp);
760 }
761
762 return (NULL);
763 }
764
765 static void
sdev_nc_insertnode(sdev_nc_list_t * ncl,sdev_nc_node_t * new)766 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new)
767 {
768 sdev_nc_node_t *lp;
769
770 rw_enter(&ncl->ncl_lock, RW_WRITER);
771
772 lp = sdev_nc_findpath(ncl, new->ncn_name);
773 if (lp == NULL) {
774 if (ncl->ncl_nentries == sdev_nc_max_entries) {
775 sdcmn_err5((
776 "%s by %s: not adding to ncache (max %d)\n",
777 new->ncn_name, curproc->p_user.u_comm,
778 ncl->ncl_nentries));
779 if (sdev_nc_verbose) {
780 cmn_err(CE_CONT, "?%s by %s: "
781 "not adding to ncache (max %d)\n",
782 new->ncn_name, curproc->p_user.u_comm,
783 ncl->ncl_nentries);
784 }
785 rw_exit(&ncl->ncl_lock);
786 sdev_nc_free_unlinked_node(new);
787 } else {
788
789 list_insert_tail(&ncl->ncl_list, new);
790 ncl->ncl_nentries++;
791
792 /* don't mark list dirty for nodes from store */
793 mutex_enter(&ncl->ncl_mutex);
794 if ((new->ncn_flags & NCN_SRC_STORE) == 0) {
795 sdcmn_err5(("%s by %s: add to ncache\n",
796 new->ncn_name, curproc->p_user.u_comm));
797 if (sdev_nc_verbose) {
798 cmn_err(CE_CONT,
799 "?%s by %s: add to ncache\n",
800 new->ncn_name,
801 curproc->p_user.u_comm);
802 }
803 ncl->ncl_flags |= NCL_LIST_DIRTY;
804 }
805 mutex_exit(&ncl->ncl_mutex);
806 rw_exit(&ncl->ncl_lock);
807 lp = new;
808 sdev_nc_flush_boot_update();
809 }
810 } else {
811 mutex_enter(&ncl->ncl_mutex);
812 lp->ncn_flags |= new->ncn_flags;
813 mutex_exit(&ncl->ncl_mutex);
814 rw_exit(&ncl->ncl_lock);
815 sdev_nc_free_unlinked_node(new);
816 }
817 }
818
819 void
sdev_nc_addname(sdev_nc_list_t * ncl,sdev_node_t * dv,char * nm,int flags)820 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags)
821 {
822 int n;
823 sdev_nc_node_t *lp;
824
825 ASSERT(SDEVTOV(dv)->v_type == VDIR);
826
827 lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP);
828
829 n = strlen(dv->sdev_path) + strlen(nm) + 2;
830 lp->ncn_name = kmem_alloc(n, KM_SLEEP);
831 (void) sprintf(lp->ncn_name, "%s/%s",
832 dv->sdev_path, nm);
833 lp->ncn_flags = flags;
834 lp->ncn_expirecnt = sdev_nc_expirecnt;
835 sdev_nc_insertnode(ncl, lp);
836 }
837
838 void
sdev_nc_node_exists(sdev_node_t * dv)839 sdev_nc_node_exists(sdev_node_t *dv)
840 {
841 /* dynamic and non-global nodes are never cached */
842 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
843 !SDEV_IS_NO_NCACHE(dv)) {
844 sdev_nc_path_exists(sdev_ncache, dv->sdev_path);
845 }
846 }
847
848 void
sdev_nc_path_exists(sdev_nc_list_t * ncl,char * path)849 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path)
850 {
851 sdev_nc_node_t *lp;
852
853 if (sdev_nc_disable)
854 return;
855
856 rw_enter(&ncl->ncl_lock, RW_READER);
857 if ((lp = sdev_nc_findpath(ncl, path)) == NULL) {
858 rw_exit(&ncl->ncl_lock);
859 return;
860 }
861 if (rw_tryupgrade(&ncl->ncl_lock) == 0) {
862 rw_exit(&ncl->ncl_lock);
863 rw_enter(&ncl->ncl_lock, RW_WRITER);
864 lp = sdev_nc_findpath(ncl, path);
865 }
866 if (lp) {
867 list_remove(&ncl->ncl_list, lp);
868 ncl->ncl_nentries--;
869 mutex_enter(&ncl->ncl_mutex);
870 ncl->ncl_flags |= NCL_LIST_DIRTY;
871 if (ncl->ncl_flags & NCL_LIST_WENABLE) {
872 mutex_exit(&ncl->ncl_mutex);
873 rw_exit(&ncl->ncl_lock);
874 sdev_nc_flush_updates();
875 } else {
876 mutex_exit(&ncl->ncl_mutex);
877 rw_exit(&ncl->ncl_lock);
878 }
879 sdev_nc_free_unlinked_node(lp);
880 sdcmn_err5(("%s by %s: removed from ncache\n",
881 path, curproc->p_user.u_comm));
882 if (sdev_nc_verbose) {
883 cmn_err(CE_CONT, "?%s by %s: removed from ncache\n",
884 path, curproc->p_user.u_comm);
885 }
886 } else
887 rw_exit(&ncl->ncl_lock);
888 }
889
890 static void
sdev_nc_free_bootonly(void)891 sdev_nc_free_bootonly(void)
892 {
893 sdev_nc_list_t *ncl = sdev_ncache;
894 sdev_nc_node_t *lp;
895 sdev_nc_node_t *next;
896
897 rw_enter(&ncl->ncl_lock, RW_WRITER);
898
899 for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
900 next = list_next(&ncl->ncl_list, lp);
901 if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) {
902 sdcmn_err5(("freeing %s\n", lp->ncn_name));
903 mutex_enter(&ncl->ncl_mutex);
904 ncl->ncl_flags |= NCL_LIST_DIRTY;
905 mutex_exit(&ncl->ncl_mutex);
906 list_remove(&ncl->ncl_list, lp);
907 sdev_nc_free_unlinked_node(lp);
908 ncl->ncl_nentries--;
909 }
910 }
911
912 rw_exit(&ncl->ncl_lock);
913 }
914