xref: /titanic_50/usr/src/uts/common/fs/dev/sdev_ncache.c (revision e12a8a13c5492eeed938960ff7c68a46f982288b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * negative cache handling for the /dev fs
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/t_lock.h>
35 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
37 #include <sys/user.h>
38 #include <sys/time.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/file.h>
42 #include <sys/fcntl.h>
43 #include <sys/flock.h>
44 #include <sys/kmem.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/stat.h>
48 #include <sys/cred.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/mode.h>
52 #include <sys/policy.h>
53 #include <fs/fs_subr.h>
54 #include <sys/mount.h>
55 #include <sys/fs/snode.h>
56 #include <sys/fs/dv_node.h>
57 #include <sys/fs/sdev_node.h>
58 #include <sys/sunndi.h>
59 #include <sys/sunmdi.h>
60 #include <sys/ddi.h>
61 #include <sys/modctl.h>
62 #include <sys/devctl_impl.h>
63 
64 
65 /*
66  * ncache is a negative cache of failed lookups.  An entry
67  * is added after an attempt to configure a device by that
68  * name failed.  An accumulation of these entries over time
69  * gives us a set of device name for which implicit reconfiguration
70  * does not need to be attempted.  If a name is created matching
71  * an entry in ncache, that entry is removed, with the
72  * persistent store updated.
73  *
74  * Implicit reconfig is initiated for any name during lookup that
75  * can't be resolved from the backing store and that isn't
76  * present in the negative cache.  This functionality is
77  * enabled during system startup once communication with devfsadm
78  * can be achieved.  Since readdir is more general, implicit
79  * reconfig initiated by reading a directory isn't enabled until
80  * the system is more fully booted, at the time of the multi-user
81  * milestone, corresponding to init state 2.
82  *
83  * A maximum is imposed on the number of entries in the cache
84  * to limit some script going wild and as a defense against attack.
85  * The default limit is 64 and can be adjusted via sdev_nc_max_entries.
86  *
87  * Each entry also has a expiration count.  When looked up a name in
88  * the cache is set to the default.  Subsequent boots will decrement
89  * the count if a name isn't referenced.  This permits a once-only
90  * entry to eventually be removed over time.
91  *
92  * sdev_reconfig_delay implements a "debounce" of the timing beyond
93  * system available indication, providing what the filesystem considers
94  * to be the system-is-fully-booted state.  This is provided to adjust
95  * the timing if some application startup is performing a readdir
96  * in /dev that initiates a troublesome implicit reconfig on every boot.
97  *
98  * sdev_nc_disable_reset can be used to disable clearing the negative cache
99  * on reconfig boot.  The default is to clear the cache on reconfig boot.
100  * sdev_nc_disable can be used to disable the negative cache itself.
101  *
102  * sdev_reconfig_disable can be used to disable implicit reconfig.
103  * The default is that implicit reconfig is enabled.
104  */
105 
106 /* tunables and defaults */
107 #define	SDEV_NC_EXPIRECNT	4
108 #define	SDEV_NC_MAX_ENTRIES	64
109 #define	SEV_RECONFIG_DELAY	6	/* seconds */
110 
111 int			sdev_nc_expirecnt = SDEV_NC_EXPIRECNT;
112 int			sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES;
113 int			sdev_reconfig_delay = SEV_RECONFIG_DELAY;
114 int			sdev_reconfig_verbose = 0;
115 int			sdev_reconfig_disable = 0;
116 int			sdev_nc_disable = 0;
117 int			sdev_nc_disable_reset = 0;
118 int			sdev_nc_verbose = 0;
119 
120 /* globals */
121 sdev_nc_list_t		*sdev_ncache;
122 int			sdev_boot_state = SDEV_BOOT_STATE_INITIAL;
123 int			sdev_reconfig_boot = 0;
124 static timeout_id_t	sdev_timeout_id = 0;
125 
126 /* static prototypes */
127 static void sdev_ncache_write_complete(nvfd_t *);
128 static void sdev_ncache_write(void);
129 static void sdev_ncache_process_store(void);
130 static sdev_nc_list_t *sdev_nc_newlist(void);
131 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *);
132 static void sdev_nc_free_all_nodes(sdev_nc_list_t *);
133 static void sdev_nc_freelist(sdev_nc_list_t *);
134 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *);
135 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *);
136 static void sdev_nc_free_bootonly(void);
137 
138 
139 /*
140  * called once at filesystem initialization
141  */
142 void
143 sdev_ncache_init(void)
144 {
145 	sdev_ncache = sdev_nc_newlist();
146 }
147 
148 /*
149  * called at mount of the global instance
150  * currently the global instance is never unmounted
151  */
152 void
153 sdev_ncache_setup(void)
154 {
155 	nvfd_t	*nvf = sdevfd;
156 
157 	nvf_register_write_complete(nvf, sdev_ncache_write_complete);
158 
159 	i_ddi_read_devname_file();
160 	sdev_ncache_process_store();
161 	sdev_devstate_change();
162 }
163 
164 static void
165 sdev_nvp_cache_free(nvfd_t *nvf)
166 {
167 	nvp_devname_t	*np;
168 	nvp_devname_t	*next;
169 
170 	for (np = NVF_DEVNAME_LIST(nvf); np; np = next) {
171 		next = NVP_DEVNAME_NEXT(np);
172 		nfd_nvp_free_and_unlink(nvf, NVPLIST(np));
173 	}
174 }
175 
176 static void
177 sdev_ncache_process_store(void)
178 {
179 	nvfd_t		*nvf = sdevfd;
180 	sdev_nc_list_t	*ncl = sdev_ncache;
181 	nvp_devname_t	*np;
182 	sdev_nc_node_t	*lp;
183 	char		*path;
184 	int		i, n;
185 
186 	if (sdev_nc_disable)
187 		return;
188 
189 	for (np = NVF_DEVNAME_LIST(nvf); np; np = NVP_DEVNAME_NEXT(np)) {
190 		for (i = 0; i < np->nvp_npaths; i++) {
191 			sdcmn_err5(("    %s %d\n",
192 			    np->nvp_paths[i], np->nvp_expirecnts[i]));
193 			if (ncl->ncl_nentries < sdev_nc_max_entries) {
194 				path = np->nvp_paths[i];
195 				n = strlen(path) + 1;
196 				lp = kmem_alloc(sizeof (sdev_nc_node_t),
197 				    KM_SLEEP);
198 				lp->ncn_name = kmem_alloc(n, KM_SLEEP);
199 				bcopy(path, lp->ncn_name, n);
200 				lp->ncn_flags = NCN_SRC_STORE;
201 				lp->ncn_expirecnt = np->nvp_expirecnts[i];
202 				sdev_nc_insertnode(ncl, lp);
203 			} else if (sdev_nc_verbose) {
204 				cmn_err(CE_CONT,
205 				    "?%s: truncating from ncache (max %d)\n",
206 				    np->nvp_paths[i], sdev_nc_max_entries);
207 			}
208 		}
209 	}
210 }
211 
212 static void
213 sdev_ncache_write_complete(nvfd_t *nvf)
214 {
215 	sdev_nc_list_t	*ncl = sdev_ncache;
216 
217 	mutex_enter(&ncl->ncl_mutex);
218 
219 	ASSERT(ncl->ncl_flags & NCL_LIST_WRITING);
220 
221 	if (ncl->ncl_flags & NCL_LIST_DIRTY) {
222 		sdcmn_err5(("ncache write complete but dirty again\n"));
223 		ncl->ncl_flags &= ~NCL_LIST_DIRTY;
224 		mutex_exit(&ncl->ncl_mutex);
225 		sdev_ncache_write();
226 	} else {
227 		sdcmn_err5(("ncache write complete\n"));
228 		ncl->ncl_flags &= ~NCL_LIST_WRITING;
229 		mutex_exit(&ncl->ncl_mutex);
230 		rw_enter(&nvf->nvf_lock, RW_WRITER);
231 		sdev_nvp_cache_free(nvf);
232 		rw_exit(&nvf->nvf_lock);
233 	}
234 }
235 
236 static void
237 sdev_ncache_write(void)
238 {
239 	nvfd_t		*nvf = sdevfd;
240 	sdev_nc_list_t	*ncl = sdev_ncache;
241 	nvp_devname_t	*np;
242 	sdev_nc_node_t	*lp;
243 	int		n, i;
244 
245 	if (sdev_cache_write_disable) {
246 		mutex_enter(&ncl->ncl_mutex);
247 		ncl->ncl_flags &= ~NCL_LIST_WRITING;
248 		mutex_exit(&ncl->ncl_mutex);
249 		return;
250 	}
251 
252 	/* proper lock ordering here is essential */
253 	rw_enter(&nvf->nvf_lock, RW_WRITER);
254 	sdev_nvp_cache_free(nvf);
255 
256 	rw_enter(&ncl->ncl_lock, RW_READER);
257 	n = ncl->ncl_nentries;
258 	ASSERT(n <= sdev_nc_max_entries);
259 
260 	np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
261 	np->nvp_npaths = n;
262 	np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP);
263 	np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP);
264 
265 	i = 0;
266 	for (lp = list_head(&ncl->ncl_list); lp;
267 	    lp = list_next(&ncl->ncl_list, lp)) {
268 		np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP);
269 		np->nvp_expirecnts[i] = lp->ncn_expirecnt;
270 		sdcmn_err5(("    %s %d\n",
271 		    np->nvp_paths[i], np->nvp_expirecnts[i]));
272 		i++;
273 	}
274 
275 	rw_exit(&ncl->ncl_lock);
276 
277 	NVF_MARK_DIRTY(nvf);
278 	nfd_nvp_link(nvf, NVPLIST(np));
279 	rw_exit(&nvf->nvf_lock);
280 
281 	wake_nvpflush_daemon();
282 }
283 
284 static void
285 sdev_nc_flush_updates(void)
286 {
287 	sdev_nc_list_t *ncl = sdev_ncache;
288 
289 	if (sdev_nc_disable || sdev_cache_write_disable)
290 		return;
291 
292 	mutex_enter(&ncl->ncl_mutex);
293 	if (((ncl->ncl_flags &
294 	    (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) ==
295 	    (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) {
296 		ncl->ncl_flags &= ~NCL_LIST_DIRTY;
297 		ncl->ncl_flags |= NCL_LIST_WRITING;
298 		mutex_exit(&ncl->ncl_mutex);
299 		sdev_ncache_write();
300 	} else {
301 		mutex_exit(&ncl->ncl_mutex);
302 	}
303 }
304 
305 static void
306 sdev_nc_flush_boot_update(void)
307 {
308 	sdev_nc_list_t *ncl = sdev_ncache;
309 
310 	if (sdev_nc_disable || sdev_cache_write_disable ||
311 	    (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) {
312 		return;
313 	}
314 	mutex_enter(&ncl->ncl_mutex);
315 	if (ncl->ncl_flags & NCL_LIST_WENABLE) {
316 		mutex_exit(&ncl->ncl_mutex);
317 		sdev_nc_flush_updates();
318 	} else {
319 		mutex_exit(&ncl->ncl_mutex);
320 	}
321 
322 }
323 
324 static void
325 sdev_state_boot_complete()
326 {
327 	sdev_nc_list_t	*ncl = sdev_ncache;
328 	sdev_nc_node_t	*lp, *next;
329 
330 	/*
331 	 * Once boot is complete, decrement the expire count of each entry
332 	 * in the cache not touched by a reference.  Remove any that
333 	 * goes to zero.  This effectively removes random entries over
334 	 * time.
335 	 */
336 	rw_enter(&ncl->ncl_lock, RW_WRITER);
337 	mutex_enter(&ncl->ncl_mutex);
338 
339 	for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
340 		next = list_next(&ncl->ncl_list, lp);
341 		if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) {
342 			if (lp->ncn_flags & NCN_ACTIVE) {
343 				if (lp->ncn_expirecnt != sdev_nc_expirecnt) {
344 					lp->ncn_expirecnt = sdev_nc_expirecnt;
345 					ncl->ncl_flags |= NCL_LIST_DIRTY;
346 				}
347 			} else {
348 				if (--lp->ncn_expirecnt == 0) {
349 					list_remove(&ncl->ncl_list, lp);
350 					sdev_nc_free_unlinked_node(lp);
351 					ncl->ncl_nentries--;
352 				}
353 				ncl->ncl_flags |= NCL_LIST_DIRTY;
354 			}
355 		}
356 	}
357 
358 	mutex_exit(&ncl->ncl_mutex);
359 	rw_exit(&ncl->ncl_lock);
360 
361 	sdev_nc_flush_boot_update();
362 	sdev_boot_state = SDEV_BOOT_STATE_COMPLETE;
363 }
364 
365 /*
366  * Upon transition to the login state on a reconfigure boot,
367  * a debounce timer is set up so that we cache all the nonsense
368  * lookups we're hit with by the windowing system startup.
369  */
370 
371 /*ARGSUSED*/
372 static void
373 sdev_state_timeout(void *arg)
374 {
375 	sdev_timeout_id = 0;
376 	sdev_state_boot_complete();
377 }
378 
379 static void
380 sdev_state_sysavail()
381 {
382 	sdev_nc_list_t *ncl = sdev_ncache;
383 	clock_t	nticks;
384 	int nsecs;
385 
386 	mutex_enter(&ncl->ncl_mutex);
387 	ncl->ncl_flags |= NCL_LIST_WENABLE;
388 	mutex_exit(&ncl->ncl_mutex);
389 
390 	nsecs = sdev_reconfig_delay;
391 	if (nsecs == 0) {
392 		sdev_state_boot_complete();
393 	} else {
394 		nticks = drv_usectohz(1000000 * nsecs);
395 		sdcmn_err5(("timeout initiated %ld\n", nticks));
396 		sdev_timeout_id = timeout(sdev_state_timeout, NULL, nticks);
397 		sdev_nc_flush_boot_update();
398 	}
399 }
400 
401 /*
402  * Called to inform the filesystem of progress during boot,
403  * either a notice of reconfiguration boot or an indication of
404  * system boot complete.  At system boot complete, set up a
405  * timer at the expiration of which no further failed lookups
406  * will be added to the negative cache.
407  *
408  * The dev filesystem infers from reconfig boot that implicit
409  * reconfig need not be invoked at all as all available devices
410  * will have already been named.
411  *
412  * The dev filesystem infers from "system available" that devfsadmd
413  * can now be run and hence implicit reconfiguration may be initiated.
414  * During early stages of system startup, implicit reconfig is
415  * not done to avoid impacting boot performance.
416  */
417 void
418 sdev_devstate_change(void)
419 {
420 	int new_state;
421 
422 	/*
423 	 * Track system state and manage interesting transitions
424 	 */
425 	new_state = SDEV_BOOT_STATE_INITIAL;
426 	if (i_ddi_reconfig())
427 		new_state = SDEV_BOOT_STATE_RECONFIG;
428 	if (i_ddi_sysavail())
429 		new_state = SDEV_BOOT_STATE_SYSAVAIL;
430 
431 	if (sdev_boot_state < new_state) {
432 		switch (new_state) {
433 		case SDEV_BOOT_STATE_RECONFIG:
434 			sdcmn_err5(("state change: reconfigure boot\n"));
435 			sdev_boot_state = new_state;
436 			sdev_reconfig_boot = 1;
437 			if (!sdev_nc_disable_reset)
438 				sdev_nc_free_bootonly();
439 			break;
440 		case SDEV_BOOT_STATE_SYSAVAIL:
441 			sdcmn_err5(("system available\n"));
442 			sdev_boot_state = new_state;
443 			sdev_state_sysavail();
444 			break;
445 		}
446 	}
447 }
448 
449 /*
450  * Lookup: filter out entries in the negative cache
451  * Return 1 if the lookup should not cause a reconfig.
452  */
453 int
454 sdev_lookup_filter(sdev_node_t *dv, char *nm)
455 {
456 	int n;
457 	sdev_nc_list_t *ncl = sdev_ncache;
458 	sdev_nc_node_t *lp;
459 	char *path;
460 	int rval = 0;
461 	int changed = 0;
462 
463 	ASSERT(i_ddi_io_initialized());
464 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
465 
466 	if (sdev_nc_disable)
467 		return (0);
468 
469 	n = strlen(dv->sdev_path) + strlen(nm) + 2;
470 	path = kmem_alloc(n, KM_SLEEP);
471 	(void) sprintf(path, "%s/%s", dv->sdev_path, nm);
472 
473 	rw_enter(&ncl->ncl_lock, RW_READER);
474 	if ((lp = sdev_nc_findpath(ncl, path)) != NULL) {
475 		sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n",
476 		    dv->sdev_name, nm, curproc->p_user.u_comm));
477 		if (sdev_nc_verbose) {
478 			cmn_err(CE_CONT,
479 			    "?%s/%s: lookup by %s cached, no reconfig\n",
480 			    dv->sdev_name, nm, curproc->p_user.u_comm);
481 		}
482 		mutex_enter(&ncl->ncl_mutex);
483 		lp->ncn_flags |= NCN_ACTIVE;
484 		if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 &&
485 		    lp->ncn_expirecnt < sdev_nc_expirecnt) {
486 			lp->ncn_expirecnt = sdev_nc_expirecnt;
487 			ncl->ncl_flags |= NCL_LIST_DIRTY;
488 			changed = 1;
489 		}
490 		mutex_exit(&ncl->ncl_mutex);
491 		rval = 1;
492 	}
493 	rw_exit(&ncl->ncl_lock);
494 	kmem_free(path, n);
495 	if (changed)
496 		sdev_nc_flush_boot_update();
497 	return (rval);
498 }
499 
500 void
501 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags)
502 {
503 	if (sdev_nc_disable)
504 		return;
505 
506 	/*
507 	 * If we're still in the initial boot stage, always update
508 	 * the cache - we may not have received notice of the
509 	 * reconfig boot state yet.  On a reconfigure boot, entries
510 	 * from the backing store are not re-persisted on update,
511 	 * but new entries are marked as needing an update.
512 	 * Never cache dynamic or non-global nodes.
513 	 */
514 	if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
515 	    !SDEV_IS_NO_NCACHE(dv) &&
516 	    ((failed_flags & SLF_NO_NCACHE) == 0) &&
517 	    ((sdev_reconfig_boot &&
518 		(sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) ||
519 	    (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) {
520 			sdev_nc_addname(sdev_ncache,
521 			    dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE);
522 	}
523 }
524 
525 static sdev_nc_list_t *
526 sdev_nc_newlist(void)
527 {
528 	sdev_nc_list_t	*ncl;
529 
530 	ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP);
531 
532 	rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL);
533 	mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL);
534 	list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t),
535 	    offsetof(sdev_nc_node_t, ncn_link));
536 
537 	return (ncl);
538 }
539 
540 static void
541 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp)
542 {
543 	kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1);
544 	kmem_free(lp, sizeof (sdev_nc_node_t));
545 }
546 
547 static void
548 sdev_nc_free_all_nodes(sdev_nc_list_t *ncl)
549 {
550 	sdev_nc_node_t *lp;
551 
552 	while ((lp = list_head(&ncl->ncl_list)) != NULL) {
553 		list_remove(&ncl->ncl_list, lp);
554 		sdev_nc_free_unlinked_node(lp);
555 		ncl->ncl_nentries--;
556 	}
557 	ASSERT(ncl->ncl_nentries == 0);
558 }
559 
560 static void
561 sdev_nc_freelist(sdev_nc_list_t *ncl)
562 {
563 	if (!list_is_empty(&ncl->ncl_list))
564 		sdev_nc_free_all_nodes(ncl);
565 	ASSERT(list_is_empty(&ncl->ncl_list));
566 	ASSERT(ncl->ncl_nentries == 0);
567 
568 	mutex_destroy(&ncl->ncl_mutex);
569 	rw_destroy(&ncl->ncl_lock);
570 	list_destroy(&ncl->ncl_list);
571 	kmem_free(ncl, sizeof (sdev_nc_list_t));
572 }
573 
574 static sdev_nc_node_t *
575 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path)
576 {
577 	sdev_nc_node_t *lp;
578 
579 	ASSERT(RW_LOCK_HELD(&ncl->ncl_lock));
580 
581 	for (lp = list_head(&ncl->ncl_list); lp;
582 	    lp = list_next(&ncl->ncl_list, lp)) {
583 		if (strcmp(path, lp->ncn_name) == 0)
584 			return (lp);
585 	}
586 
587 	return (NULL);
588 }
589 
590 static void
591 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new)
592 {
593 	sdev_nc_node_t *lp;
594 
595 	rw_enter(&ncl->ncl_lock, RW_WRITER);
596 
597 	lp = sdev_nc_findpath(ncl, new->ncn_name);
598 	if (lp == NULL) {
599 		if (ncl->ncl_nentries == sdev_nc_max_entries) {
600 			sdcmn_err5((
601 			    "%s by %s: not adding to ncache (max %d)\n",
602 			    new->ncn_name, curproc->p_user.u_comm,
603 			    ncl->ncl_nentries));
604 			if (sdev_nc_verbose) {
605 				cmn_err(CE_CONT, "?%s by %s: "
606 				    "not adding to ncache (max %d)\n",
607 				    new->ncn_name, curproc->p_user.u_comm,
608 				    ncl->ncl_nentries);
609 			}
610 			rw_exit(&ncl->ncl_lock);
611 			sdev_nc_free_unlinked_node(new);
612 		} else {
613 
614 			list_insert_tail(&ncl->ncl_list, new);
615 			ncl->ncl_nentries++;
616 
617 			/* don't mark list dirty for nodes from store */
618 			mutex_enter(&ncl->ncl_mutex);
619 			if ((new->ncn_flags & NCN_SRC_STORE) == 0) {
620 				sdcmn_err5(("%s by %s: add to ncache\n",
621 				    new->ncn_name, curproc->p_user.u_comm));
622 				if (sdev_nc_verbose) {
623 					cmn_err(CE_CONT,
624 					    "?%s by %s: add to ncache\n",
625 					    new->ncn_name,
626 					    curproc->p_user.u_comm);
627 				}
628 				ncl->ncl_flags |= NCL_LIST_DIRTY;
629 			}
630 			mutex_exit(&ncl->ncl_mutex);
631 			rw_exit(&ncl->ncl_lock);
632 			lp = new;
633 			sdev_nc_flush_boot_update();
634 		}
635 	} else {
636 		mutex_enter(&ncl->ncl_mutex);
637 		lp->ncn_flags |= new->ncn_flags;
638 		mutex_exit(&ncl->ncl_mutex);
639 		rw_exit(&ncl->ncl_lock);
640 		sdev_nc_free_unlinked_node(new);
641 	}
642 }
643 
644 void
645 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags)
646 {
647 	int n;
648 	sdev_nc_node_t *lp;
649 
650 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
651 
652 	lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP);
653 
654 	n = strlen(dv->sdev_path) + strlen(nm) + 2;
655 	lp->ncn_name = kmem_alloc(n, KM_SLEEP);
656 	(void) sprintf(lp->ncn_name, "%s/%s",
657 		dv->sdev_path, nm);
658 	lp->ncn_flags = flags;
659 	lp->ncn_expirecnt = sdev_nc_expirecnt;
660 	sdev_nc_insertnode(ncl, lp);
661 }
662 
663 void
664 sdev_nc_node_exists(sdev_node_t *dv)
665 {
666 	/* dynamic and non-global nodes are never cached */
667 	if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
668 	    !SDEV_IS_NO_NCACHE(dv)) {
669 		sdev_nc_path_exists(sdev_ncache, dv->sdev_path);
670 	}
671 }
672 
673 void
674 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path)
675 {
676 	sdev_nc_node_t *lp;
677 
678 	if (sdev_nc_disable)
679 		return;
680 
681 	rw_enter(&ncl->ncl_lock, RW_READER);
682 	if ((lp = sdev_nc_findpath(ncl, path)) == NULL) {
683 		rw_exit(&ncl->ncl_lock);
684 		return;
685 	}
686 	if (rw_tryupgrade(&ncl->ncl_lock) == 0) {
687 		rw_exit(&ncl->ncl_lock);
688 		rw_enter(&ncl->ncl_lock, RW_WRITER);
689 		lp = sdev_nc_findpath(ncl, path);
690 	}
691 	if (lp) {
692 		list_remove(&ncl->ncl_list, lp);
693 		ncl->ncl_nentries--;
694 		mutex_enter(&ncl->ncl_mutex);
695 		ncl->ncl_flags |= NCL_LIST_DIRTY;
696 		if (ncl->ncl_flags & NCL_LIST_WENABLE) {
697 			mutex_exit(&ncl->ncl_mutex);
698 			rw_exit(&ncl->ncl_lock);
699 			sdev_nc_flush_updates();
700 		} else {
701 			mutex_exit(&ncl->ncl_mutex);
702 			rw_exit(&ncl->ncl_lock);
703 		}
704 		sdev_nc_free_unlinked_node(lp);
705 		sdcmn_err5(("%s by %s: removed from ncache\n",
706 		    path, curproc->p_user.u_comm));
707 		if (sdev_nc_verbose) {
708 			cmn_err(CE_CONT, "?%s by %s: removed from ncache\n",
709 			    path, curproc->p_user.u_comm);
710 		}
711 	} else
712 		rw_exit(&ncl->ncl_lock);
713 }
714 
715 static void
716 sdev_nc_free_bootonly(void)
717 {
718 	sdev_nc_list_t	*ncl = sdev_ncache;
719 	sdev_nc_node_t *lp;
720 	sdev_nc_node_t *next;
721 
722 	ASSERT(sdev_reconfig_boot);
723 
724 	rw_enter(&ncl->ncl_lock, RW_WRITER);
725 
726 	for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
727 		next = list_next(&ncl->ncl_list, lp);
728 		if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) {
729 			sdcmn_err5(("freeing %s\n", lp->ncn_name));
730 			mutex_enter(&ncl->ncl_mutex);
731 			ncl->ncl_flags |= NCL_LIST_DIRTY;
732 			mutex_exit(&ncl->ncl_mutex);
733 			list_remove(&ncl->ncl_list, lp);
734 			sdev_nc_free_unlinked_node(lp);
735 			ncl->ncl_nentries--;
736 		}
737 	}
738 
739 	rw_exit(&ncl->ncl_lock);
740 }
741