xref: /titanic_50/usr/src/uts/common/fs/dev/sdev_ncache.c (revision aab83bb83be7342f6cfccaed8d5fe0b2f404855d)
1facf4a8dSllai1 /*
2facf4a8dSllai1  * CDDL HEADER START
3facf4a8dSllai1  *
4facf4a8dSllai1  * The contents of this file are subject to the terms of the
5facf4a8dSllai1  * Common Development and Distribution License (the "License").
6facf4a8dSllai1  * You may not use this file except in compliance with the License.
7facf4a8dSllai1  *
8facf4a8dSllai1  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9facf4a8dSllai1  * or http://www.opensolaris.org/os/licensing.
10facf4a8dSllai1  * See the License for the specific language governing permissions
11facf4a8dSllai1  * and limitations under the License.
12facf4a8dSllai1  *
13facf4a8dSllai1  * When distributing Covered Code, include this CDDL HEADER in each
14facf4a8dSllai1  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15facf4a8dSllai1  * If applicable, add the following below this CDDL HEADER, with the
16facf4a8dSllai1  * fields enclosed by brackets "[]" replaced with your own identifying
17facf4a8dSllai1  * information: Portions Copyright [yyyy] [name of copyright owner]
18facf4a8dSllai1  *
19facf4a8dSllai1  * CDDL HEADER END
20facf4a8dSllai1  */
21facf4a8dSllai1 /*
22*89dfdb3fSSrikanth, Ramana  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23facf4a8dSllai1  * Use is subject to license terms.
24facf4a8dSllai1  */
25facf4a8dSllai1 
26facf4a8dSllai1 /*
27facf4a8dSllai1  * negative cache handling for the /dev fs
28facf4a8dSllai1  */
29facf4a8dSllai1 
30facf4a8dSllai1 #include <sys/types.h>
31facf4a8dSllai1 #include <sys/param.h>
32facf4a8dSllai1 #include <sys/t_lock.h>
33facf4a8dSllai1 #include <sys/systm.h>
34facf4a8dSllai1 #include <sys/sysmacros.h>
35facf4a8dSllai1 #include <sys/user.h>
36facf4a8dSllai1 #include <sys/time.h>
37facf4a8dSllai1 #include <sys/vfs.h>
38facf4a8dSllai1 #include <sys/vnode.h>
39facf4a8dSllai1 #include <sys/file.h>
40facf4a8dSllai1 #include <sys/fcntl.h>
41facf4a8dSllai1 #include <sys/flock.h>
42facf4a8dSllai1 #include <sys/kmem.h>
43facf4a8dSllai1 #include <sys/uio.h>
44facf4a8dSllai1 #include <sys/errno.h>
45facf4a8dSllai1 #include <sys/stat.h>
46facf4a8dSllai1 #include <sys/cred.h>
47facf4a8dSllai1 #include <sys/cmn_err.h>
48facf4a8dSllai1 #include <sys/debug.h>
49facf4a8dSllai1 #include <sys/mode.h>
50facf4a8dSllai1 #include <sys/policy.h>
51facf4a8dSllai1 #include <fs/fs_subr.h>
52facf4a8dSllai1 #include <sys/mount.h>
53facf4a8dSllai1 #include <sys/fs/snode.h>
54facf4a8dSllai1 #include <sys/fs/dv_node.h>
553c5e027bSEric Taylor #include <sys/fs/sdev_impl.h>
56facf4a8dSllai1 #include <sys/sunndi.h>
57facf4a8dSllai1 #include <sys/sunmdi.h>
58facf4a8dSllai1 #include <sys/ddi.h>
59facf4a8dSllai1 #include <sys/modctl.h>
6083c4dfe9Sjg #include <sys/devcache.h>
61facf4a8dSllai1 
62facf4a8dSllai1 
63facf4a8dSllai1 /*
64facf4a8dSllai1  * ncache is a negative cache of failed lookups.  An entry
65facf4a8dSllai1  * is added after an attempt to configure a device by that
66facf4a8dSllai1  * name failed.  An accumulation of these entries over time
67facf4a8dSllai1  * gives us a set of device name for which implicit reconfiguration
68facf4a8dSllai1  * does not need to be attempted.  If a name is created matching
69facf4a8dSllai1  * an entry in ncache, that entry is removed, with the
70facf4a8dSllai1  * persistent store updated.
71facf4a8dSllai1  *
72facf4a8dSllai1  * Implicit reconfig is initiated for any name during lookup that
73facf4a8dSllai1  * can't be resolved from the backing store and that isn't
74facf4a8dSllai1  * present in the negative cache.  This functionality is
75facf4a8dSllai1  * enabled during system startup once communication with devfsadm
76facf4a8dSllai1  * can be achieved.  Since readdir is more general, implicit
77facf4a8dSllai1  * reconfig initiated by reading a directory isn't enabled until
78facf4a8dSllai1  * the system is more fully booted, at the time of the multi-user
79facf4a8dSllai1  * milestone, corresponding to init state 2.
80facf4a8dSllai1  *
81facf4a8dSllai1  * A maximum is imposed on the number of entries in the cache
82facf4a8dSllai1  * to limit some script going wild and as a defense against attack.
83facf4a8dSllai1  * The default limit is 64 and can be adjusted via sdev_nc_max_entries.
84facf4a8dSllai1  *
85facf4a8dSllai1  * Each entry also has a expiration count.  When looked up a name in
86facf4a8dSllai1  * the cache is set to the default.  Subsequent boots will decrement
87facf4a8dSllai1  * the count if a name isn't referenced.  This permits a once-only
88facf4a8dSllai1  * entry to eventually be removed over time.
89facf4a8dSllai1  *
90facf4a8dSllai1  * sdev_reconfig_delay implements a "debounce" of the timing beyond
91facf4a8dSllai1  * system available indication, providing what the filesystem considers
92facf4a8dSllai1  * to be the system-is-fully-booted state.  This is provided to adjust
93facf4a8dSllai1  * the timing if some application startup is performing a readdir
94facf4a8dSllai1  * in /dev that initiates a troublesome implicit reconfig on every boot.
95facf4a8dSllai1  *
96facf4a8dSllai1  * sdev_nc_disable_reset can be used to disable clearing the negative cache
97facf4a8dSllai1  * on reconfig boot.  The default is to clear the cache on reconfig boot.
98facf4a8dSllai1  * sdev_nc_disable can be used to disable the negative cache itself.
99facf4a8dSllai1  *
100facf4a8dSllai1  * sdev_reconfig_disable can be used to disable implicit reconfig.
101facf4a8dSllai1  * The default is that implicit reconfig is enabled.
102facf4a8dSllai1  */
103facf4a8dSllai1 
104facf4a8dSllai1 /* tunables and defaults */
105facf4a8dSllai1 #define	SDEV_NC_EXPIRECNT	4
106facf4a8dSllai1 #define	SDEV_NC_MAX_ENTRIES	64
107facf4a8dSllai1 #define	SEV_RECONFIG_DELAY	6	/* seconds */
108facf4a8dSllai1 
10983c4dfe9Sjg /* tunables */
110facf4a8dSllai1 int	sdev_nc_expirecnt = SDEV_NC_EXPIRECNT;
111facf4a8dSllai1 int	sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES;
112facf4a8dSllai1 int	sdev_reconfig_delay = SEV_RECONFIG_DELAY;
113facf4a8dSllai1 int	sdev_reconfig_verbose = 0;
114facf4a8dSllai1 int	sdev_reconfig_disable = 0;
115facf4a8dSllai1 int	sdev_nc_disable = 0;
116facf4a8dSllai1 int	sdev_nc_disable_reset = 0;
117facf4a8dSllai1 int	sdev_nc_verbose = 0;
11883c4dfe9Sjg int	sdev_cache_read_disable = 0;
11983c4dfe9Sjg int	sdev_cache_write_disable = 0;
120facf4a8dSllai1 
121facf4a8dSllai1 /* globals */
122facf4a8dSllai1 int	sdev_boot_state = SDEV_BOOT_STATE_INITIAL;
123facf4a8dSllai1 int	sdev_reconfig_boot = 0;
12483c4dfe9Sjg sdev_nc_list_t *sdev_ncache;
12583c4dfe9Sjg static nvf_handle_t sdevfd_handle;
126facf4a8dSllai1 
127facf4a8dSllai1 /* static prototypes */
12883c4dfe9Sjg static void sdev_ncache_write_complete(nvf_handle_t);
129facf4a8dSllai1 static void sdev_ncache_write(void);
130facf4a8dSllai1 static void sdev_ncache_process_store(void);
131facf4a8dSllai1 static sdev_nc_list_t *sdev_nc_newlist(void);
132facf4a8dSllai1 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *);
133facf4a8dSllai1 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *);
134facf4a8dSllai1 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *);
135facf4a8dSllai1 static void sdev_nc_free_bootonly(void);
13683c4dfe9Sjg static int sdev_ncache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *);
13783c4dfe9Sjg static int sdev_ncache_pack_list(nvf_handle_t, nvlist_t **);
13883c4dfe9Sjg static void sdev_ncache_list_free(nvf_handle_t);
13983c4dfe9Sjg static void sdev_nvp_free(nvp_devname_t *);
140facf4a8dSllai1 
14183c4dfe9Sjg /*
14283c4dfe9Sjg  * Registration for /etc/devices/devname_cache
14383c4dfe9Sjg  */
14483c4dfe9Sjg static nvf_ops_t sdev_cache_ops = {
14583c4dfe9Sjg 	"/etc/devices/devname_cache",		/* path to cache */
14683c4dfe9Sjg 	sdev_ncache_unpack_nvlist,		/* read: unpack nvlist */
14783c4dfe9Sjg 	sdev_ncache_pack_list,			/* write: pack list */
14883c4dfe9Sjg 	sdev_ncache_list_free,			/* free data list */
14983c4dfe9Sjg 	sdev_ncache_write_complete		/* write complete callback */
15083c4dfe9Sjg };
151facf4a8dSllai1 
152facf4a8dSllai1 /*
153facf4a8dSllai1  * called once at filesystem initialization
154facf4a8dSllai1  */
155facf4a8dSllai1 void
sdev_ncache_init(void)156facf4a8dSllai1 sdev_ncache_init(void)
157facf4a8dSllai1 {
158facf4a8dSllai1 	sdev_ncache = sdev_nc_newlist();
159facf4a8dSllai1 }
160facf4a8dSllai1 
161facf4a8dSllai1 /*
162facf4a8dSllai1  * called at mount of the global instance
163facf4a8dSllai1  * currently the global instance is never unmounted
164facf4a8dSllai1  */
165facf4a8dSllai1 void
sdev_ncache_setup(void)166facf4a8dSllai1 sdev_ncache_setup(void)
167facf4a8dSllai1 {
16883c4dfe9Sjg 	sdevfd_handle = nvf_register_file(&sdev_cache_ops);
16983c4dfe9Sjg 	ASSERT(sdevfd_handle);
170facf4a8dSllai1 
17183c4dfe9Sjg 	list_create(nvf_list(sdevfd_handle), sizeof (nvp_devname_t),
17283c4dfe9Sjg 	    offsetof(nvp_devname_t, nvp_link));
173facf4a8dSllai1 
17483c4dfe9Sjg 	rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
17583c4dfe9Sjg 	if (!sdev_cache_read_disable) {
17683c4dfe9Sjg 		(void) nvf_read_file(sdevfd_handle);
17783c4dfe9Sjg 	}
178facf4a8dSllai1 	sdev_ncache_process_store();
17983c4dfe9Sjg 	rw_exit(nvf_lock(sdevfd_handle));
18083c4dfe9Sjg 
181facf4a8dSllai1 	sdev_devstate_change();
182facf4a8dSllai1 }
183facf4a8dSllai1 
184facf4a8dSllai1 static void
sdev_nvp_free(nvp_devname_t * dp)18583c4dfe9Sjg sdev_nvp_free(nvp_devname_t *dp)
186facf4a8dSllai1 {
18783c4dfe9Sjg 	int	i;
18883c4dfe9Sjg 	char	**p;
189facf4a8dSllai1 
19083c4dfe9Sjg 	if (dp->nvp_npaths > 0) {
19183c4dfe9Sjg 		p = dp->nvp_paths;
19283c4dfe9Sjg 		for (i = 0; i < dp->nvp_npaths; i++, p++) {
19383c4dfe9Sjg 			kmem_free(*p, strlen(*p)+1);
194facf4a8dSllai1 		}
19583c4dfe9Sjg 		kmem_free(dp->nvp_paths,
19683c4dfe9Sjg 		    dp->nvp_npaths * sizeof (char *));
19783c4dfe9Sjg 		kmem_free(dp->nvp_expirecnts,
19883c4dfe9Sjg 		    dp->nvp_npaths * sizeof (int));
19983c4dfe9Sjg 	}
20083c4dfe9Sjg 
20183c4dfe9Sjg 	kmem_free(dp, sizeof (nvp_devname_t));
202facf4a8dSllai1 }
203facf4a8dSllai1 
204facf4a8dSllai1 static void
sdev_ncache_list_free(nvf_handle_t fd)20583c4dfe9Sjg sdev_ncache_list_free(nvf_handle_t fd)
20683c4dfe9Sjg {
20783c4dfe9Sjg 	list_t		*listp;
20883c4dfe9Sjg 	nvp_devname_t	*dp;
20983c4dfe9Sjg 
21083c4dfe9Sjg 	ASSERT(fd == sdevfd_handle);
21183c4dfe9Sjg 	ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
21283c4dfe9Sjg 
21383c4dfe9Sjg 	listp = nvf_list(fd);
21483c4dfe9Sjg 	if ((dp = list_head(listp)) != NULL) {
21583c4dfe9Sjg 		list_remove(listp, dp);
21683c4dfe9Sjg 		sdev_nvp_free(dp);
21783c4dfe9Sjg 	}
21883c4dfe9Sjg }
21983c4dfe9Sjg 
22083c4dfe9Sjg /*
22183c4dfe9Sjg  * Unpack a device path/nvlist pair to internal data list format.
22283c4dfe9Sjg  * Used to decode the nvlist format into the internal representation
22383c4dfe9Sjg  * when reading /etc/devices/devname_cache.
22483c4dfe9Sjg  * Note that the expiration counts are optional, for compatibility
22583c4dfe9Sjg  * with earlier instances of the cache.  If not present, the
22683c4dfe9Sjg  * expire counts are initialized to defaults.
22783c4dfe9Sjg  */
22883c4dfe9Sjg static int
sdev_ncache_unpack_nvlist(nvf_handle_t fd,nvlist_t * nvl,char * name)22983c4dfe9Sjg sdev_ncache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name)
23083c4dfe9Sjg {
23183c4dfe9Sjg 	nvp_devname_t *np;
23283c4dfe9Sjg 	char	**strs;
23383c4dfe9Sjg 	int	*cnts;
23483c4dfe9Sjg 	uint_t	nstrs, ncnts;
23583c4dfe9Sjg 	int	rval, i;
23683c4dfe9Sjg 
23783c4dfe9Sjg 	ASSERT(fd == sdevfd_handle);
23883c4dfe9Sjg 	ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
23983c4dfe9Sjg 
24083c4dfe9Sjg 	/* name of the sublist must match what we created */
24183c4dfe9Sjg 	if (strcmp(name, DP_DEVNAME_ID) != 0) {
24283c4dfe9Sjg 		return (-1);
24383c4dfe9Sjg 	}
24483c4dfe9Sjg 
24583c4dfe9Sjg 	np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
24683c4dfe9Sjg 
24783c4dfe9Sjg 	rval = nvlist_lookup_string_array(nvl,
24883c4dfe9Sjg 	    DP_DEVNAME_NCACHE_ID, &strs, &nstrs);
24983c4dfe9Sjg 	if (rval) {
25083c4dfe9Sjg 		kmem_free(np, sizeof (nvp_devname_t));
25183c4dfe9Sjg 		return (-1);
25283c4dfe9Sjg 	}
25383c4dfe9Sjg 
25483c4dfe9Sjg 	np->nvp_npaths = nstrs;
25583c4dfe9Sjg 	np->nvp_paths = kmem_zalloc(nstrs * sizeof (char *), KM_SLEEP);
25683c4dfe9Sjg 	for (i = 0; i < nstrs; i++) {
25783c4dfe9Sjg 		np->nvp_paths[i] = i_ddi_strdup(strs[i], KM_SLEEP);
25883c4dfe9Sjg 	}
25983c4dfe9Sjg 	np->nvp_expirecnts = kmem_zalloc(nstrs * sizeof (int), KM_SLEEP);
26083c4dfe9Sjg 	for (i = 0; i < nstrs; i++) {
26183c4dfe9Sjg 		np->nvp_expirecnts[i] = sdev_nc_expirecnt;
26283c4dfe9Sjg 	}
26383c4dfe9Sjg 
26483c4dfe9Sjg 	rval = nvlist_lookup_int32_array(nvl,
26583c4dfe9Sjg 	    DP_DEVNAME_NC_EXPIRECNT_ID, &cnts, &ncnts);
26683c4dfe9Sjg 	if (rval == 0) {
26783c4dfe9Sjg 		ASSERT(ncnts == nstrs);
26883c4dfe9Sjg 		ncnts = min(ncnts, nstrs);
26983c4dfe9Sjg 		for (i = 0; i < nstrs; i++) {
27083c4dfe9Sjg 			np->nvp_expirecnts[i] = cnts[i];
27183c4dfe9Sjg 		}
27283c4dfe9Sjg 	}
27383c4dfe9Sjg 
27483c4dfe9Sjg 	list_insert_tail(nvf_list(sdevfd_handle), np);
27583c4dfe9Sjg 
27683c4dfe9Sjg 	return (0);
27783c4dfe9Sjg }
27883c4dfe9Sjg 
27983c4dfe9Sjg /*
28083c4dfe9Sjg  * Pack internal format cache data to a single nvlist.
28183c4dfe9Sjg  * Used when writing the nvlist file.
28283c4dfe9Sjg  * Note this is called indirectly by the nvpflush daemon.
28383c4dfe9Sjg  */
28483c4dfe9Sjg static int
sdev_ncache_pack_list(nvf_handle_t fd,nvlist_t ** ret_nvl)28583c4dfe9Sjg sdev_ncache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl)
28683c4dfe9Sjg {
28783c4dfe9Sjg 	nvlist_t	*nvl, *sub_nvl;
28883c4dfe9Sjg 	nvp_devname_t	*np;
28983c4dfe9Sjg 	int		rval;
29083c4dfe9Sjg 	list_t		*listp;
29183c4dfe9Sjg 
29283c4dfe9Sjg 	ASSERT(fd == sdevfd_handle);
29383c4dfe9Sjg 	ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
29483c4dfe9Sjg 
29583c4dfe9Sjg 	rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
29683c4dfe9Sjg 	if (rval != 0) {
29783c4dfe9Sjg 		nvf_error("%s: nvlist alloc error %d\n",
29883c4dfe9Sjg 		    nvf_cache_name(fd), rval);
29983c4dfe9Sjg 		return (DDI_FAILURE);
30083c4dfe9Sjg 	}
30183c4dfe9Sjg 
30283c4dfe9Sjg 	listp = nvf_list(sdevfd_handle);
30383c4dfe9Sjg 	if ((np = list_head(listp)) != NULL) {
30483c4dfe9Sjg 		ASSERT(list_next(listp, np) == NULL);
30583c4dfe9Sjg 
30683c4dfe9Sjg 		rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP);
30783c4dfe9Sjg 		if (rval != 0) {
30883c4dfe9Sjg 			nvf_error("%s: nvlist alloc error %d\n",
30983c4dfe9Sjg 			    nvf_cache_name(fd), rval);
31083c4dfe9Sjg 			sub_nvl = NULL;
31183c4dfe9Sjg 			goto err;
31283c4dfe9Sjg 		}
31383c4dfe9Sjg 
31483c4dfe9Sjg 		rval = nvlist_add_string_array(sub_nvl,
31583c4dfe9Sjg 		    DP_DEVNAME_NCACHE_ID, np->nvp_paths, np->nvp_npaths);
31683c4dfe9Sjg 		if (rval != 0) {
31783c4dfe9Sjg 			nvf_error("%s: nvlist add error %d (sdev)\n",
31883c4dfe9Sjg 			    nvf_cache_name(fd), rval);
31983c4dfe9Sjg 			goto err;
32083c4dfe9Sjg 		}
32183c4dfe9Sjg 
32283c4dfe9Sjg 		rval = nvlist_add_int32_array(sub_nvl,
32383c4dfe9Sjg 		    DP_DEVNAME_NC_EXPIRECNT_ID,
32483c4dfe9Sjg 		    np->nvp_expirecnts, np->nvp_npaths);
32583c4dfe9Sjg 		if (rval != 0) {
32683c4dfe9Sjg 			nvf_error("%s: nvlist add error %d (sdev)\n",
32783c4dfe9Sjg 			    nvf_cache_name(fd), rval);
32883c4dfe9Sjg 			goto err;
32983c4dfe9Sjg 		}
33083c4dfe9Sjg 
33183c4dfe9Sjg 		rval = nvlist_add_nvlist(nvl, DP_DEVNAME_ID, sub_nvl);
33283c4dfe9Sjg 		if (rval != 0) {
33383c4dfe9Sjg 			nvf_error("%s: nvlist add error %d (sublist)\n",
33483c4dfe9Sjg 			    nvf_cache_name(fd), rval);
33583c4dfe9Sjg 			goto err;
33683c4dfe9Sjg 		}
33783c4dfe9Sjg 		nvlist_free(sub_nvl);
33883c4dfe9Sjg 	}
33983c4dfe9Sjg 
34083c4dfe9Sjg 	*ret_nvl = nvl;
34183c4dfe9Sjg 	return (DDI_SUCCESS);
34283c4dfe9Sjg 
34383c4dfe9Sjg err:
34483c4dfe9Sjg 	nvlist_free(sub_nvl);
34583c4dfe9Sjg 	nvlist_free(nvl);
34683c4dfe9Sjg 	*ret_nvl = NULL;
34783c4dfe9Sjg 	return (DDI_FAILURE);
34883c4dfe9Sjg }
34983c4dfe9Sjg 
35083c4dfe9Sjg /*
35183c4dfe9Sjg  * Run through the data read from the backing cache store
35283c4dfe9Sjg  * to establish the initial state of the neg. cache.
35383c4dfe9Sjg  */
35483c4dfe9Sjg static void
sdev_ncache_process_store(void)355facf4a8dSllai1 sdev_ncache_process_store(void)
356facf4a8dSllai1 {
357facf4a8dSllai1 	sdev_nc_list_t	*ncl = sdev_ncache;
358facf4a8dSllai1 	nvp_devname_t	*np;
359facf4a8dSllai1 	sdev_nc_node_t	*lp;
360facf4a8dSllai1 	char		*path;
361facf4a8dSllai1 	int		i, n;
36283c4dfe9Sjg 	list_t		*listp;
363facf4a8dSllai1 
364facf4a8dSllai1 	if (sdev_nc_disable)
365facf4a8dSllai1 		return;
366facf4a8dSllai1 
36783c4dfe9Sjg 	ASSERT(RW_WRITE_HELD(nvf_lock(sdevfd_handle)));
36883c4dfe9Sjg 
36983c4dfe9Sjg 	listp = nvf_list(sdevfd_handle);
37083c4dfe9Sjg 	for (np = list_head(listp); np; np = list_next(listp, np)) {
371facf4a8dSllai1 		for (i = 0; i < np->nvp_npaths; i++) {
372facf4a8dSllai1 			sdcmn_err5(("    %s %d\n",
373facf4a8dSllai1 			    np->nvp_paths[i], np->nvp_expirecnts[i]));
374facf4a8dSllai1 			if (ncl->ncl_nentries < sdev_nc_max_entries) {
375facf4a8dSllai1 				path = np->nvp_paths[i];
376facf4a8dSllai1 				n = strlen(path) + 1;
377facf4a8dSllai1 				lp = kmem_alloc(sizeof (sdev_nc_node_t),
378facf4a8dSllai1 				    KM_SLEEP);
379facf4a8dSllai1 				lp->ncn_name = kmem_alloc(n, KM_SLEEP);
380facf4a8dSllai1 				bcopy(path, lp->ncn_name, n);
381facf4a8dSllai1 				lp->ncn_flags = NCN_SRC_STORE;
382facf4a8dSllai1 				lp->ncn_expirecnt = np->nvp_expirecnts[i];
383facf4a8dSllai1 				sdev_nc_insertnode(ncl, lp);
384facf4a8dSllai1 			} else if (sdev_nc_verbose) {
385facf4a8dSllai1 				cmn_err(CE_CONT,
386facf4a8dSllai1 				    "?%s: truncating from ncache (max %d)\n",
387facf4a8dSllai1 				    np->nvp_paths[i], sdev_nc_max_entries);
388facf4a8dSllai1 			}
389facf4a8dSllai1 		}
390facf4a8dSllai1 	}
391facf4a8dSllai1 }
392facf4a8dSllai1 
39383c4dfe9Sjg /*
39483c4dfe9Sjg  * called by nvpflush daemon to inform us that an update of
39583c4dfe9Sjg  * the cache file has been completed.
39683c4dfe9Sjg  */
397facf4a8dSllai1 static void
sdev_ncache_write_complete(nvf_handle_t fd)39883c4dfe9Sjg sdev_ncache_write_complete(nvf_handle_t fd)
399facf4a8dSllai1 {
400facf4a8dSllai1 	sdev_nc_list_t	*ncl = sdev_ncache;
401facf4a8dSllai1 
40283c4dfe9Sjg 	ASSERT(fd == sdevfd_handle);
40383c4dfe9Sjg 
404facf4a8dSllai1 	mutex_enter(&ncl->ncl_mutex);
405facf4a8dSllai1 
406facf4a8dSllai1 	ASSERT(ncl->ncl_flags & NCL_LIST_WRITING);
407facf4a8dSllai1 
408facf4a8dSllai1 	if (ncl->ncl_flags & NCL_LIST_DIRTY) {
409facf4a8dSllai1 		sdcmn_err5(("ncache write complete but dirty again\n"));
410facf4a8dSllai1 		ncl->ncl_flags &= ~NCL_LIST_DIRTY;
411facf4a8dSllai1 		mutex_exit(&ncl->ncl_mutex);
412facf4a8dSllai1 		sdev_ncache_write();
413facf4a8dSllai1 	} else {
414facf4a8dSllai1 		sdcmn_err5(("ncache write complete\n"));
415facf4a8dSllai1 		ncl->ncl_flags &= ~NCL_LIST_WRITING;
416facf4a8dSllai1 		mutex_exit(&ncl->ncl_mutex);
41783c4dfe9Sjg 		rw_enter(nvf_lock(fd), RW_WRITER);
41883c4dfe9Sjg 		sdev_ncache_list_free(fd);
41983c4dfe9Sjg 		rw_exit(nvf_lock(fd));
420facf4a8dSllai1 	}
421facf4a8dSllai1 }
422facf4a8dSllai1 
42383c4dfe9Sjg /*
42483c4dfe9Sjg  * Prepare to perform an update of the neg. cache backing store.
42583c4dfe9Sjg  */
426facf4a8dSllai1 static void
sdev_ncache_write(void)427facf4a8dSllai1 sdev_ncache_write(void)
428facf4a8dSllai1 {
429facf4a8dSllai1 	sdev_nc_list_t	*ncl = sdev_ncache;
430facf4a8dSllai1 	nvp_devname_t	*np;
431facf4a8dSllai1 	sdev_nc_node_t	*lp;
432facf4a8dSllai1 	int		n, i;
433facf4a8dSllai1 
434facf4a8dSllai1 	if (sdev_cache_write_disable) {
435facf4a8dSllai1 		mutex_enter(&ncl->ncl_mutex);
436facf4a8dSllai1 		ncl->ncl_flags &= ~NCL_LIST_WRITING;
437facf4a8dSllai1 		mutex_exit(&ncl->ncl_mutex);
438facf4a8dSllai1 		return;
439facf4a8dSllai1 	}
440facf4a8dSllai1 
441facf4a8dSllai1 	/* proper lock ordering here is essential */
44283c4dfe9Sjg 	rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
44383c4dfe9Sjg 	sdev_ncache_list_free(sdevfd_handle);
444facf4a8dSllai1 
445facf4a8dSllai1 	rw_enter(&ncl->ncl_lock, RW_READER);
446facf4a8dSllai1 	n = ncl->ncl_nentries;
447facf4a8dSllai1 	ASSERT(n <= sdev_nc_max_entries);
448facf4a8dSllai1 
449facf4a8dSllai1 	np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
450facf4a8dSllai1 	np->nvp_npaths = n;
451facf4a8dSllai1 	np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP);
452facf4a8dSllai1 	np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP);
453facf4a8dSllai1 
454facf4a8dSllai1 	i = 0;
455facf4a8dSllai1 	for (lp = list_head(&ncl->ncl_list); lp;
456facf4a8dSllai1 	    lp = list_next(&ncl->ncl_list, lp)) {
457facf4a8dSllai1 		np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP);
458facf4a8dSllai1 		np->nvp_expirecnts[i] = lp->ncn_expirecnt;
459facf4a8dSllai1 		sdcmn_err5(("    %s %d\n",
460facf4a8dSllai1 		    np->nvp_paths[i], np->nvp_expirecnts[i]));
461facf4a8dSllai1 		i++;
462facf4a8dSllai1 	}
463facf4a8dSllai1 
464facf4a8dSllai1 	rw_exit(&ncl->ncl_lock);
465facf4a8dSllai1 
46683c4dfe9Sjg 	nvf_mark_dirty(sdevfd_handle);
46783c4dfe9Sjg 	list_insert_tail(nvf_list(sdevfd_handle), np);
46883c4dfe9Sjg 	rw_exit(nvf_lock(sdevfd_handle));
469facf4a8dSllai1 
47083c4dfe9Sjg 	nvf_wake_daemon();
471facf4a8dSllai1 }
472facf4a8dSllai1 
473facf4a8dSllai1 static void
sdev_nc_flush_updates(void)474facf4a8dSllai1 sdev_nc_flush_updates(void)
475facf4a8dSllai1 {
476facf4a8dSllai1 	sdev_nc_list_t *ncl = sdev_ncache;
477facf4a8dSllai1 
478facf4a8dSllai1 	if (sdev_nc_disable || sdev_cache_write_disable)
479facf4a8dSllai1 		return;
480facf4a8dSllai1 
481facf4a8dSllai1 	mutex_enter(&ncl->ncl_mutex);
482facf4a8dSllai1 	if (((ncl->ncl_flags &
483facf4a8dSllai1 	    (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) ==
484facf4a8dSllai1 	    (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) {
485facf4a8dSllai1 		ncl->ncl_flags &= ~NCL_LIST_DIRTY;
486facf4a8dSllai1 		ncl->ncl_flags |= NCL_LIST_WRITING;
487facf4a8dSllai1 		mutex_exit(&ncl->ncl_mutex);
488facf4a8dSllai1 		sdev_ncache_write();
489facf4a8dSllai1 	} else {
490facf4a8dSllai1 		mutex_exit(&ncl->ncl_mutex);
491facf4a8dSllai1 	}
492facf4a8dSllai1 }
493facf4a8dSllai1 
494facf4a8dSllai1 static void
sdev_nc_flush_boot_update(void)495facf4a8dSllai1 sdev_nc_flush_boot_update(void)
496facf4a8dSllai1 {
497facf4a8dSllai1 	sdev_nc_list_t *ncl = sdev_ncache;
498facf4a8dSllai1 
499facf4a8dSllai1 	if (sdev_nc_disable || sdev_cache_write_disable ||
500facf4a8dSllai1 	    (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) {
501facf4a8dSllai1 		return;
502facf4a8dSllai1 	}
503facf4a8dSllai1 	mutex_enter(&ncl->ncl_mutex);
504facf4a8dSllai1 	if (ncl->ncl_flags & NCL_LIST_WENABLE) {
505facf4a8dSllai1 		mutex_exit(&ncl->ncl_mutex);
506facf4a8dSllai1 		sdev_nc_flush_updates();
507facf4a8dSllai1 	} else {
508facf4a8dSllai1 		mutex_exit(&ncl->ncl_mutex);
509facf4a8dSllai1 	}
510facf4a8dSllai1 
511facf4a8dSllai1 }
512facf4a8dSllai1 
513facf4a8dSllai1 static void
sdev_state_boot_complete()514facf4a8dSllai1 sdev_state_boot_complete()
515facf4a8dSllai1 {
516facf4a8dSllai1 	sdev_nc_list_t	*ncl = sdev_ncache;
517facf4a8dSllai1 	sdev_nc_node_t	*lp, *next;
518facf4a8dSllai1 
519facf4a8dSllai1 	/*
520facf4a8dSllai1 	 * Once boot is complete, decrement the expire count of each entry
521facf4a8dSllai1 	 * in the cache not touched by a reference.  Remove any that
522facf4a8dSllai1 	 * goes to zero.  This effectively removes random entries over
523facf4a8dSllai1 	 * time.
524facf4a8dSllai1 	 */
525facf4a8dSllai1 	rw_enter(&ncl->ncl_lock, RW_WRITER);
526facf4a8dSllai1 	mutex_enter(&ncl->ncl_mutex);
527facf4a8dSllai1 
528facf4a8dSllai1 	for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
529facf4a8dSllai1 		next = list_next(&ncl->ncl_list, lp);
530facf4a8dSllai1 		if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) {
531facf4a8dSllai1 			if (lp->ncn_flags & NCN_ACTIVE) {
532facf4a8dSllai1 				if (lp->ncn_expirecnt != sdev_nc_expirecnt) {
533facf4a8dSllai1 					lp->ncn_expirecnt = sdev_nc_expirecnt;
534facf4a8dSllai1 					ncl->ncl_flags |= NCL_LIST_DIRTY;
535facf4a8dSllai1 				}
536facf4a8dSllai1 			} else {
537facf4a8dSllai1 				if (--lp->ncn_expirecnt == 0) {
538facf4a8dSllai1 					list_remove(&ncl->ncl_list, lp);
539facf4a8dSllai1 					sdev_nc_free_unlinked_node(lp);
540facf4a8dSllai1 					ncl->ncl_nentries--;
541facf4a8dSllai1 				}
542facf4a8dSllai1 				ncl->ncl_flags |= NCL_LIST_DIRTY;
543facf4a8dSllai1 			}
544facf4a8dSllai1 		}
545facf4a8dSllai1 	}
546facf4a8dSllai1 
547facf4a8dSllai1 	mutex_exit(&ncl->ncl_mutex);
548facf4a8dSllai1 	rw_exit(&ncl->ncl_lock);
549facf4a8dSllai1 
550facf4a8dSllai1 	sdev_nc_flush_boot_update();
551facf4a8dSllai1 	sdev_boot_state = SDEV_BOOT_STATE_COMPLETE;
552facf4a8dSllai1 }
553facf4a8dSllai1 
554facf4a8dSllai1 /*
555facf4a8dSllai1  * Upon transition to the login state on a reconfigure boot,
556facf4a8dSllai1  * a debounce timer is set up so that we cache all the nonsense
557facf4a8dSllai1  * lookups we're hit with by the windowing system startup.
558facf4a8dSllai1  */
559facf4a8dSllai1 
560facf4a8dSllai1 /*ARGSUSED*/
561facf4a8dSllai1 static void
sdev_state_timeout(void * arg)562facf4a8dSllai1 sdev_state_timeout(void *arg)
563facf4a8dSllai1 {
564facf4a8dSllai1 	sdev_state_boot_complete();
565facf4a8dSllai1 }
566facf4a8dSllai1 
567facf4a8dSllai1 static void
sdev_state_sysavail()568facf4a8dSllai1 sdev_state_sysavail()
569facf4a8dSllai1 {
570facf4a8dSllai1 	sdev_nc_list_t *ncl = sdev_ncache;
571facf4a8dSllai1 	clock_t	nticks;
572facf4a8dSllai1 	int nsecs;
573facf4a8dSllai1 
574facf4a8dSllai1 	mutex_enter(&ncl->ncl_mutex);
575facf4a8dSllai1 	ncl->ncl_flags |= NCL_LIST_WENABLE;
576facf4a8dSllai1 	mutex_exit(&ncl->ncl_mutex);
577facf4a8dSllai1 
578facf4a8dSllai1 	nsecs = sdev_reconfig_delay;
579facf4a8dSllai1 	if (nsecs == 0) {
580facf4a8dSllai1 		sdev_state_boot_complete();
581facf4a8dSllai1 	} else {
582facf4a8dSllai1 		nticks = drv_usectohz(1000000 * nsecs);
583facf4a8dSllai1 		sdcmn_err5(("timeout initiated %ld\n", nticks));
584bc1009abSjg 		(void) timeout(sdev_state_timeout, NULL, nticks);
585facf4a8dSllai1 		sdev_nc_flush_boot_update();
586facf4a8dSllai1 	}
587facf4a8dSllai1 }
588facf4a8dSllai1 
589facf4a8dSllai1 /*
590facf4a8dSllai1  * Called to inform the filesystem of progress during boot,
591facf4a8dSllai1  * either a notice of reconfiguration boot or an indication of
592facf4a8dSllai1  * system boot complete.  At system boot complete, set up a
593facf4a8dSllai1  * timer at the expiration of which no further failed lookups
594facf4a8dSllai1  * will be added to the negative cache.
595facf4a8dSllai1  *
596facf4a8dSllai1  * The dev filesystem infers from reconfig boot that implicit
597facf4a8dSllai1  * reconfig need not be invoked at all as all available devices
598facf4a8dSllai1  * will have already been named.
599facf4a8dSllai1  *
600facf4a8dSllai1  * The dev filesystem infers from "system available" that devfsadmd
601facf4a8dSllai1  * can now be run and hence implicit reconfiguration may be initiated.
602facf4a8dSllai1  * During early stages of system startup, implicit reconfig is
603facf4a8dSllai1  * not done to avoid impacting boot performance.
604facf4a8dSllai1  */
605facf4a8dSllai1 void
sdev_devstate_change(void)606facf4a8dSllai1 sdev_devstate_change(void)
607facf4a8dSllai1 {
608facf4a8dSllai1 	int new_state;
609facf4a8dSllai1 
610facf4a8dSllai1 	/*
611facf4a8dSllai1 	 * Track system state and manage interesting transitions
612facf4a8dSllai1 	 */
613facf4a8dSllai1 	new_state = SDEV_BOOT_STATE_INITIAL;
614facf4a8dSllai1 	if (i_ddi_reconfig())
615facf4a8dSllai1 		new_state = SDEV_BOOT_STATE_RECONFIG;
616facf4a8dSllai1 	if (i_ddi_sysavail())
617facf4a8dSllai1 		new_state = SDEV_BOOT_STATE_SYSAVAIL;
618facf4a8dSllai1 
619facf4a8dSllai1 	if (sdev_boot_state < new_state) {
620facf4a8dSllai1 		switch (new_state) {
621facf4a8dSllai1 		case SDEV_BOOT_STATE_RECONFIG:
622facf4a8dSllai1 			sdcmn_err5(("state change: reconfigure boot\n"));
623facf4a8dSllai1 			sdev_boot_state = new_state;
624*89dfdb3fSSrikanth, Ramana 			/*
625*89dfdb3fSSrikanth, Ramana 			 * The /dev filesystem fills a hot-plug .vs.
626*89dfdb3fSSrikanth, Ramana 			 * public-namespace gap by invoking 'devfsadm' once
627*89dfdb3fSSrikanth, Ramana 			 * as a result of the first /dev lookup failure
628*89dfdb3fSSrikanth, Ramana 			 * (or getdents/readdir). Originally, it was thought
629*89dfdb3fSSrikanth, Ramana 			 * that a reconfig reboot did not have a hot-plug gap,
630*89dfdb3fSSrikanth, Ramana 			 * but this is not true - the gap is just smaller:
631*89dfdb3fSSrikanth, Ramana 			 * it exists from the the time the smf invocation of
632*89dfdb3fSSrikanth, Ramana 			 * devfsadm completes its forced devinfo snapshot,
633*89dfdb3fSSrikanth, Ramana 			 * to the time when the smf devfsadmd daemon invocation
634*89dfdb3fSSrikanth, Ramana 			 * is set up and listening for hotplug sysevents.
635*89dfdb3fSSrikanth, Ramana 			 * Since there is still a gap with reconfig reboot,
636*89dfdb3fSSrikanth, Ramana 			 * we no longer set 'sdev_reconfig_boot'.
637*89dfdb3fSSrikanth, Ramana 			 */
638facf4a8dSllai1 			if (!sdev_nc_disable_reset)
639facf4a8dSllai1 				sdev_nc_free_bootonly();
640facf4a8dSllai1 			break;
641facf4a8dSllai1 		case SDEV_BOOT_STATE_SYSAVAIL:
642facf4a8dSllai1 			sdcmn_err5(("system available\n"));
643facf4a8dSllai1 			sdev_boot_state = new_state;
644facf4a8dSllai1 			sdev_state_sysavail();
645facf4a8dSllai1 			break;
646facf4a8dSllai1 		}
647facf4a8dSllai1 	}
648facf4a8dSllai1 }
649facf4a8dSllai1 
650facf4a8dSllai1 /*
651facf4a8dSllai1  * Lookup: filter out entries in the negative cache
652facf4a8dSllai1  * Return 1 if the lookup should not cause a reconfig.
653facf4a8dSllai1  */
654facf4a8dSllai1 int
sdev_lookup_filter(sdev_node_t * dv,char * nm)655facf4a8dSllai1 sdev_lookup_filter(sdev_node_t *dv, char *nm)
656facf4a8dSllai1 {
657facf4a8dSllai1 	int n;
658facf4a8dSllai1 	sdev_nc_list_t *ncl = sdev_ncache;
659facf4a8dSllai1 	sdev_nc_node_t *lp;
660facf4a8dSllai1 	char *path;
661facf4a8dSllai1 	int rval = 0;
662facf4a8dSllai1 	int changed = 0;
663facf4a8dSllai1 
664facf4a8dSllai1 	ASSERT(i_ddi_io_initialized());
665facf4a8dSllai1 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
666facf4a8dSllai1 
667facf4a8dSllai1 	if (sdev_nc_disable)
668facf4a8dSllai1 		return (0);
669facf4a8dSllai1 
670facf4a8dSllai1 	n = strlen(dv->sdev_path) + strlen(nm) + 2;
671facf4a8dSllai1 	path = kmem_alloc(n, KM_SLEEP);
672facf4a8dSllai1 	(void) sprintf(path, "%s/%s", dv->sdev_path, nm);
673facf4a8dSllai1 
674facf4a8dSllai1 	rw_enter(&ncl->ncl_lock, RW_READER);
675facf4a8dSllai1 	if ((lp = sdev_nc_findpath(ncl, path)) != NULL) {
676facf4a8dSllai1 		sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n",
677facf4a8dSllai1 		    dv->sdev_name, nm, curproc->p_user.u_comm));
678facf4a8dSllai1 		if (sdev_nc_verbose) {
679facf4a8dSllai1 			cmn_err(CE_CONT,
680facf4a8dSllai1 			    "?%s/%s: lookup by %s cached, no reconfig\n",
681facf4a8dSllai1 			    dv->sdev_name, nm, curproc->p_user.u_comm);
682facf4a8dSllai1 		}
683facf4a8dSllai1 		mutex_enter(&ncl->ncl_mutex);
684facf4a8dSllai1 		lp->ncn_flags |= NCN_ACTIVE;
685facf4a8dSllai1 		if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 &&
686facf4a8dSllai1 		    lp->ncn_expirecnt < sdev_nc_expirecnt) {
687facf4a8dSllai1 			lp->ncn_expirecnt = sdev_nc_expirecnt;
688facf4a8dSllai1 			ncl->ncl_flags |= NCL_LIST_DIRTY;
689facf4a8dSllai1 			changed = 1;
690facf4a8dSllai1 		}
691facf4a8dSllai1 		mutex_exit(&ncl->ncl_mutex);
692facf4a8dSllai1 		rval = 1;
693facf4a8dSllai1 	}
694facf4a8dSllai1 	rw_exit(&ncl->ncl_lock);
695facf4a8dSllai1 	kmem_free(path, n);
696facf4a8dSllai1 	if (changed)
697facf4a8dSllai1 		sdev_nc_flush_boot_update();
698facf4a8dSllai1 	return (rval);
699facf4a8dSllai1 }
700facf4a8dSllai1 
701facf4a8dSllai1 void
sdev_lookup_failed(sdev_node_t * dv,char * nm,int failed_flags)702facf4a8dSllai1 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags)
703facf4a8dSllai1 {
704facf4a8dSllai1 	if (sdev_nc_disable)
705facf4a8dSllai1 		return;
706facf4a8dSllai1 
707facf4a8dSllai1 	/*
708facf4a8dSllai1 	 * If we're still in the initial boot stage, always update
709facf4a8dSllai1 	 * the cache - we may not have received notice of the
710facf4a8dSllai1 	 * reconfig boot state yet.  On a reconfigure boot, entries
711facf4a8dSllai1 	 * from the backing store are not re-persisted on update,
712facf4a8dSllai1 	 * but new entries are marked as needing an update.
713facf4a8dSllai1 	 * Never cache dynamic or non-global nodes.
714facf4a8dSllai1 	 */
715facf4a8dSllai1 	if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
716facf4a8dSllai1 	    !SDEV_IS_NO_NCACHE(dv) &&
717facf4a8dSllai1 	    ((failed_flags & SLF_NO_NCACHE) == 0) &&
718facf4a8dSllai1 	    ((sdev_reconfig_boot &&
719facf4a8dSllai1 	    (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) ||
720facf4a8dSllai1 	    (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) {
721facf4a8dSllai1 			sdev_nc_addname(sdev_ncache,
722facf4a8dSllai1 			    dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE);
723facf4a8dSllai1 	}
724facf4a8dSllai1 }
725facf4a8dSllai1 
726facf4a8dSllai1 static sdev_nc_list_t *
sdev_nc_newlist(void)727facf4a8dSllai1 sdev_nc_newlist(void)
728facf4a8dSllai1 {
729facf4a8dSllai1 	sdev_nc_list_t	*ncl;
730facf4a8dSllai1 
731facf4a8dSllai1 	ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP);
732facf4a8dSllai1 
733facf4a8dSllai1 	rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL);
734facf4a8dSllai1 	mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL);
735facf4a8dSllai1 	list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t),
736facf4a8dSllai1 	    offsetof(sdev_nc_node_t, ncn_link));
737facf4a8dSllai1 
738facf4a8dSllai1 	return (ncl);
739facf4a8dSllai1 }
740facf4a8dSllai1 
741facf4a8dSllai1 static void
sdev_nc_free_unlinked_node(sdev_nc_node_t * lp)742facf4a8dSllai1 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp)
743facf4a8dSllai1 {
744facf4a8dSllai1 	kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1);
745facf4a8dSllai1 	kmem_free(lp, sizeof (sdev_nc_node_t));
746facf4a8dSllai1 }
747facf4a8dSllai1 
748facf4a8dSllai1 static sdev_nc_node_t *
sdev_nc_findpath(sdev_nc_list_t * ncl,char * path)749facf4a8dSllai1 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path)
750facf4a8dSllai1 {
751facf4a8dSllai1 	sdev_nc_node_t *lp;
752facf4a8dSllai1 
753facf4a8dSllai1 	ASSERT(RW_LOCK_HELD(&ncl->ncl_lock));
754facf4a8dSllai1 
755facf4a8dSllai1 	for (lp = list_head(&ncl->ncl_list); lp;
756facf4a8dSllai1 	    lp = list_next(&ncl->ncl_list, lp)) {
757facf4a8dSllai1 		if (strcmp(path, lp->ncn_name) == 0)
758facf4a8dSllai1 			return (lp);
759facf4a8dSllai1 	}
760facf4a8dSllai1 
761facf4a8dSllai1 	return (NULL);
762facf4a8dSllai1 }
763facf4a8dSllai1 
764facf4a8dSllai1 static void
sdev_nc_insertnode(sdev_nc_list_t * ncl,sdev_nc_node_t * new)765facf4a8dSllai1 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new)
766facf4a8dSllai1 {
767facf4a8dSllai1 	sdev_nc_node_t *lp;
768facf4a8dSllai1 
769facf4a8dSllai1 	rw_enter(&ncl->ncl_lock, RW_WRITER);
770facf4a8dSllai1 
771facf4a8dSllai1 	lp = sdev_nc_findpath(ncl, new->ncn_name);
772facf4a8dSllai1 	if (lp == NULL) {
773facf4a8dSllai1 		if (ncl->ncl_nentries == sdev_nc_max_entries) {
774facf4a8dSllai1 			sdcmn_err5((
775facf4a8dSllai1 			    "%s by %s: not adding to ncache (max %d)\n",
776facf4a8dSllai1 			    new->ncn_name, curproc->p_user.u_comm,
777facf4a8dSllai1 			    ncl->ncl_nentries));
778facf4a8dSllai1 			if (sdev_nc_verbose) {
779facf4a8dSllai1 				cmn_err(CE_CONT, "?%s by %s: "
780facf4a8dSllai1 				    "not adding to ncache (max %d)\n",
781facf4a8dSllai1 				    new->ncn_name, curproc->p_user.u_comm,
782facf4a8dSllai1 				    ncl->ncl_nentries);
783facf4a8dSllai1 			}
784facf4a8dSllai1 			rw_exit(&ncl->ncl_lock);
785facf4a8dSllai1 			sdev_nc_free_unlinked_node(new);
786facf4a8dSllai1 		} else {
787facf4a8dSllai1 
788facf4a8dSllai1 			list_insert_tail(&ncl->ncl_list, new);
789facf4a8dSllai1 			ncl->ncl_nentries++;
790facf4a8dSllai1 
791facf4a8dSllai1 			/* don't mark list dirty for nodes from store */
792facf4a8dSllai1 			mutex_enter(&ncl->ncl_mutex);
793facf4a8dSllai1 			if ((new->ncn_flags & NCN_SRC_STORE) == 0) {
794facf4a8dSllai1 				sdcmn_err5(("%s by %s: add to ncache\n",
795facf4a8dSllai1 				    new->ncn_name, curproc->p_user.u_comm));
796facf4a8dSllai1 				if (sdev_nc_verbose) {
797facf4a8dSllai1 					cmn_err(CE_CONT,
798facf4a8dSllai1 					    "?%s by %s: add to ncache\n",
799facf4a8dSllai1 					    new->ncn_name,
800facf4a8dSllai1 					    curproc->p_user.u_comm);
801facf4a8dSllai1 				}
802facf4a8dSllai1 				ncl->ncl_flags |= NCL_LIST_DIRTY;
803facf4a8dSllai1 			}
804facf4a8dSllai1 			mutex_exit(&ncl->ncl_mutex);
805facf4a8dSllai1 			rw_exit(&ncl->ncl_lock);
806facf4a8dSllai1 			lp = new;
807facf4a8dSllai1 			sdev_nc_flush_boot_update();
808facf4a8dSllai1 		}
809facf4a8dSllai1 	} else {
810facf4a8dSllai1 		mutex_enter(&ncl->ncl_mutex);
811facf4a8dSllai1 		lp->ncn_flags |= new->ncn_flags;
812facf4a8dSllai1 		mutex_exit(&ncl->ncl_mutex);
813facf4a8dSllai1 		rw_exit(&ncl->ncl_lock);
814facf4a8dSllai1 		sdev_nc_free_unlinked_node(new);
815facf4a8dSllai1 	}
816facf4a8dSllai1 }
817facf4a8dSllai1 
818facf4a8dSllai1 void
sdev_nc_addname(sdev_nc_list_t * ncl,sdev_node_t * dv,char * nm,int flags)819facf4a8dSllai1 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags)
820facf4a8dSllai1 {
821facf4a8dSllai1 	int n;
822facf4a8dSllai1 	sdev_nc_node_t *lp;
823facf4a8dSllai1 
824facf4a8dSllai1 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
825facf4a8dSllai1 
826facf4a8dSllai1 	lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP);
827facf4a8dSllai1 
828facf4a8dSllai1 	n = strlen(dv->sdev_path) + strlen(nm) + 2;
829facf4a8dSllai1 	lp->ncn_name = kmem_alloc(n, KM_SLEEP);
830facf4a8dSllai1 	(void) sprintf(lp->ncn_name, "%s/%s",
831facf4a8dSllai1 	    dv->sdev_path, nm);
832facf4a8dSllai1 	lp->ncn_flags = flags;
833facf4a8dSllai1 	lp->ncn_expirecnt = sdev_nc_expirecnt;
834facf4a8dSllai1 	sdev_nc_insertnode(ncl, lp);
835facf4a8dSllai1 }
836facf4a8dSllai1 
837facf4a8dSllai1 void
sdev_nc_node_exists(sdev_node_t * dv)838facf4a8dSllai1 sdev_nc_node_exists(sdev_node_t *dv)
839facf4a8dSllai1 {
840facf4a8dSllai1 	/* dynamic and non-global nodes are never cached */
841facf4a8dSllai1 	if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
842facf4a8dSllai1 	    !SDEV_IS_NO_NCACHE(dv)) {
843facf4a8dSllai1 		sdev_nc_path_exists(sdev_ncache, dv->sdev_path);
844facf4a8dSllai1 	}
845facf4a8dSllai1 }
846facf4a8dSllai1 
847facf4a8dSllai1 void
sdev_nc_path_exists(sdev_nc_list_t * ncl,char * path)848facf4a8dSllai1 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path)
849facf4a8dSllai1 {
850facf4a8dSllai1 	sdev_nc_node_t *lp;
851facf4a8dSllai1 
852facf4a8dSllai1 	if (sdev_nc_disable)
853facf4a8dSllai1 		return;
854facf4a8dSllai1 
855facf4a8dSllai1 	rw_enter(&ncl->ncl_lock, RW_READER);
856facf4a8dSllai1 	if ((lp = sdev_nc_findpath(ncl, path)) == NULL) {
857facf4a8dSllai1 		rw_exit(&ncl->ncl_lock);
858facf4a8dSllai1 		return;
859facf4a8dSllai1 	}
860facf4a8dSllai1 	if (rw_tryupgrade(&ncl->ncl_lock) == 0) {
861facf4a8dSllai1 		rw_exit(&ncl->ncl_lock);
862facf4a8dSllai1 		rw_enter(&ncl->ncl_lock, RW_WRITER);
863facf4a8dSllai1 		lp = sdev_nc_findpath(ncl, path);
864facf4a8dSllai1 	}
865facf4a8dSllai1 	if (lp) {
866facf4a8dSllai1 		list_remove(&ncl->ncl_list, lp);
867facf4a8dSllai1 		ncl->ncl_nentries--;
868facf4a8dSllai1 		mutex_enter(&ncl->ncl_mutex);
869facf4a8dSllai1 		ncl->ncl_flags |= NCL_LIST_DIRTY;
870facf4a8dSllai1 		if (ncl->ncl_flags & NCL_LIST_WENABLE) {
871facf4a8dSllai1 			mutex_exit(&ncl->ncl_mutex);
872facf4a8dSllai1 			rw_exit(&ncl->ncl_lock);
873facf4a8dSllai1 			sdev_nc_flush_updates();
874facf4a8dSllai1 		} else {
875facf4a8dSllai1 			mutex_exit(&ncl->ncl_mutex);
876facf4a8dSllai1 			rw_exit(&ncl->ncl_lock);
877facf4a8dSllai1 		}
878facf4a8dSllai1 		sdev_nc_free_unlinked_node(lp);
879facf4a8dSllai1 		sdcmn_err5(("%s by %s: removed from ncache\n",
880facf4a8dSllai1 		    path, curproc->p_user.u_comm));
881facf4a8dSllai1 		if (sdev_nc_verbose) {
882facf4a8dSllai1 			cmn_err(CE_CONT, "?%s by %s: removed from ncache\n",
883facf4a8dSllai1 			    path, curproc->p_user.u_comm);
884facf4a8dSllai1 		}
885facf4a8dSllai1 	} else
886facf4a8dSllai1 		rw_exit(&ncl->ncl_lock);
887facf4a8dSllai1 }
888facf4a8dSllai1 
889facf4a8dSllai1 static void
sdev_nc_free_bootonly(void)890facf4a8dSllai1 sdev_nc_free_bootonly(void)
891facf4a8dSllai1 {
892facf4a8dSllai1 	sdev_nc_list_t	*ncl = sdev_ncache;
893facf4a8dSllai1 	sdev_nc_node_t *lp;
894facf4a8dSllai1 	sdev_nc_node_t *next;
895facf4a8dSllai1 
896facf4a8dSllai1 	rw_enter(&ncl->ncl_lock, RW_WRITER);
897facf4a8dSllai1 
898facf4a8dSllai1 	for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
899facf4a8dSllai1 		next = list_next(&ncl->ncl_list, lp);
900facf4a8dSllai1 		if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) {
901facf4a8dSllai1 			sdcmn_err5(("freeing %s\n", lp->ncn_name));
902facf4a8dSllai1 			mutex_enter(&ncl->ncl_mutex);
903facf4a8dSllai1 			ncl->ncl_flags |= NCL_LIST_DIRTY;
904facf4a8dSllai1 			mutex_exit(&ncl->ncl_mutex);
905facf4a8dSllai1 			list_remove(&ncl->ncl_list, lp);
906facf4a8dSllai1 			sdev_nc_free_unlinked_node(lp);
907facf4a8dSllai1 			ncl->ncl_nentries--;
908facf4a8dSllai1 		}
909facf4a8dSllai1 	}
910facf4a8dSllai1 
911facf4a8dSllai1 	rw_exit(&ncl->ncl_lock);
912facf4a8dSllai1 }
913