xref: /titanic_44/usr/src/uts/common/fs/dev/sdev_subr.c (revision 2dd2efa5a06a9befe46075cf41e16f57533c9f98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * utility routines for the /dev fs
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/t_lock.h>
35 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
37 #include <sys/user.h>
38 #include <sys/time.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/file.h>
42 #include <sys/fcntl.h>
43 #include <sys/flock.h>
44 #include <sys/kmem.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/stat.h>
48 #include <sys/cred.h>
49 #include <sys/dirent.h>
50 #include <sys/pathname.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/mode.h>
54 #include <sys/policy.h>
55 #include <fs/fs_subr.h>
56 #include <sys/mount.h>
57 #include <sys/fs/snode.h>
58 #include <sys/fs/dv_node.h>
59 #include <sys/fs/sdev_impl.h>
60 #include <sys/fs/sdev_node.h>
61 #include <sys/sunndi.h>
62 #include <sys/sunmdi.h>
63 #include <sys/conf.h>
64 #include <sys/proc.h>
65 #include <sys/user.h>
66 #include <sys/modctl.h>
67 
68 #ifdef DEBUG
69 int sdev_debug = 0x00000001;
70 int sdev_debug_cache_flags = 0;
71 #endif
72 
73 /*
74  * globals
75  */
76 /* prototype memory vattrs */
77 vattr_t sdev_vattr_dir = {
78 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
79 	VDIR,					/* va_type */
80 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
81 	SDEV_UID_DEFAULT,			/* va_uid */
82 	SDEV_GID_DEFAULT,			/* va_gid */
83 	0,					/* va_fsid */
84 	0,					/* va_nodeid */
85 	0,					/* va_nlink */
86 	0,					/* va_size */
87 	0,					/* va_atime */
88 	0,					/* va_mtime */
89 	0,					/* va_ctime */
90 	0,					/* va_rdev */
91 	0,					/* va_blksize */
92 	0,					/* va_nblocks */
93 	0					/* va_vcode */
94 };
95 
96 vattr_t sdev_vattr_lnk = {
97 	AT_TYPE|AT_MODE,			/* va_mask */
98 	VLNK,					/* va_type */
99 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
100 	SDEV_UID_DEFAULT,			/* va_uid */
101 	SDEV_GID_DEFAULT,			/* va_gid */
102 	0,					/* va_fsid */
103 	0,					/* va_nodeid */
104 	0,					/* va_nlink */
105 	0,					/* va_size */
106 	0,					/* va_atime */
107 	0,					/* va_mtime */
108 	0,					/* va_ctime */
109 	0,					/* va_rdev */
110 	0,					/* va_blksize */
111 	0,					/* va_nblocks */
112 	0					/* va_vcode */
113 };
114 
115 vattr_t sdev_vattr_blk = {
116 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
117 	VBLK,					/* va_type */
118 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
119 	SDEV_UID_DEFAULT,			/* va_uid */
120 	SDEV_GID_DEFAULT,			/* va_gid */
121 	0,					/* va_fsid */
122 	0,					/* va_nodeid */
123 	0,					/* va_nlink */
124 	0,					/* va_size */
125 	0,					/* va_atime */
126 	0,					/* va_mtime */
127 	0,					/* va_ctime */
128 	0,					/* va_rdev */
129 	0,					/* va_blksize */
130 	0,					/* va_nblocks */
131 	0					/* va_vcode */
132 };
133 
134 vattr_t sdev_vattr_chr = {
135 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
136 	VCHR,					/* va_type */
137 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
138 	SDEV_UID_DEFAULT,			/* va_uid */
139 	SDEV_GID_DEFAULT,			/* va_gid */
140 	0,					/* va_fsid */
141 	0,					/* va_nodeid */
142 	0,					/* va_nlink */
143 	0,					/* va_size */
144 	0,					/* va_atime */
145 	0,					/* va_mtime */
146 	0,					/* va_ctime */
147 	0,					/* va_rdev */
148 	0,					/* va_blksize */
149 	0,					/* va_nblocks */
150 	0					/* va_vcode */
151 };
152 
153 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
154 int		devtype;		/* fstype */
155 
156 struct devname_ops *devname_ns_ops;	/* default name service directory ops */
157 kmutex_t devname_nsmaps_lock;	/* protect devname_nsmaps */
158 
159 /* static */
160 static struct devname_nsmap *devname_nsmaps = NULL;
161 				/* contents from /etc/dev/devname_master */
162 static int devname_nsmaps_invalidated = 0; /* "devfsadm -m" has run */
163 
164 static struct vnodeops *sdev_get_vop(struct sdev_node *);
165 static void sdev_set_no_nocache(struct sdev_node *);
166 static int sdev_get_moduleops(struct sdev_node *);
167 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
168 static void sdev_free_vtab(fs_operation_def_t *);
169 
170 static void
171 sdev_prof_free(struct sdev_node *dv)
172 {
173 	ASSERT(!SDEV_IS_GLOBAL(dv));
174 	if (dv->sdev_prof.dev_name)
175 		nvlist_free(dv->sdev_prof.dev_name);
176 	if (dv->sdev_prof.dev_map)
177 		nvlist_free(dv->sdev_prof.dev_map);
178 	if (dv->sdev_prof.dev_symlink)
179 		nvlist_free(dv->sdev_prof.dev_symlink);
180 	if (dv->sdev_prof.dev_glob_incdir)
181 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
182 	if (dv->sdev_prof.dev_glob_excdir)
183 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
184 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
185 }
186 
187 /*
188  * sdev_node cache constructor
189  */
190 /*ARGSUSED1*/
191 static int
192 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
193 {
194 	struct sdev_node *dv = (struct sdev_node *)buf;
195 	struct vnode *vp;
196 
197 	ASSERT(flag == KM_SLEEP);
198 
199 	bzero(buf, sizeof (struct sdev_node));
200 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
201 	dv->sdev_vnode = vn_alloc(KM_SLEEP);
202 	vp = SDEVTOV(dv);
203 	vp->v_data = (caddr_t)dv;
204 	return (0);
205 }
206 
207 /* sdev_node destructor for kmem cache */
208 /*ARGSUSED1*/
209 static void
210 i_sdev_node_dtor(void *buf, void *arg)
211 {
212 	struct sdev_node *dv = (struct sdev_node *)buf;
213 	struct vnode *vp = SDEVTOV(dv);
214 
215 	rw_destroy(&dv->sdev_contents);
216 	vn_free(vp);
217 }
218 
219 /* initialize sdev_node cache */
220 void
221 sdev_node_cache_init()
222 {
223 	int flags = 0;
224 
225 #ifdef	DEBUG
226 	flags = sdev_debug_cache_flags;
227 	if (flags)
228 		sdcmn_err(("cache debug flags 0x%x\n", flags));
229 #endif	/* DEBUG */
230 
231 	ASSERT(sdev_node_cache == NULL);
232 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
233 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
234 	    NULL, NULL, NULL, flags);
235 }
236 
237 /* destroy sdev_node cache */
238 void
239 sdev_node_cache_fini()
240 {
241 	ASSERT(sdev_node_cache != NULL);
242 	kmem_cache_destroy(sdev_node_cache);
243 	sdev_node_cache = NULL;
244 }
245 
246 void
247 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
248 {
249 	ASSERT(dv);
250 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
251 	dv->sdev_state = state;
252 }
253 
254 static void
255 sdev_attrinit(struct sdev_node *dv, vattr_t *vap)
256 {
257 	timestruc_t now;
258 
259 	ASSERT(vap);
260 
261 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
262 	*dv->sdev_attr = *vap;
263 
264 	dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode);
265 
266 	gethrestime(&now);
267 	dv->sdev_attr->va_atime = now;
268 	dv->sdev_attr->va_mtime = now;
269 	dv->sdev_attr->va_ctime = now;
270 }
271 
272 /* alloc and initialize a sdev_node */
273 int
274 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
275     vattr_t *vap)
276 {
277 	struct sdev_node *dv = NULL;
278 	struct vnode *vp;
279 	size_t nmlen, len;
280 	devname_handle_t  *dhl;
281 
282 	nmlen = strlen(nm) + 1;
283 	if (nmlen > MAXNAMELEN) {
284 		sdcmn_err9(("sdev_nodeinit: node name %s"
285 		    " too long\n", nm));
286 		*newdv = NULL;
287 		return (ENAMETOOLONG);
288 	}
289 
290 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
291 
292 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
293 	bcopy(nm, dv->sdev_name, nmlen);
294 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
295 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
296 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
297 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
298 	/* overwritten for VLNK nodes */
299 	dv->sdev_symlink = NULL;
300 
301 	vp = SDEVTOV(dv);
302 	vn_reinit(vp);
303 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
304 	if (vap)
305 		vp->v_type = vap->va_type;
306 
307 	/*
308 	 * initialized to the parent's vnodeops.
309 	 * maybe overwriten for a VDIR
310 	 */
311 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
312 	vn_exists(vp);
313 
314 	dv->sdev_dotdot = NULL;
315 	dv->sdev_dot = NULL;
316 	dv->sdev_next = NULL;
317 	dv->sdev_attrvp = NULL;
318 	if (vap) {
319 		sdev_attrinit(dv, vap);
320 	} else {
321 		dv->sdev_attr = NULL;
322 	}
323 
324 	dv->sdev_ino = sdev_mkino(dv);
325 	dv->sdev_nlink = 0;		/* updated on insert */
326 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
327 	dv->sdev_flags |= SDEV_BUILD;
328 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
329 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
330 	if (SDEV_IS_GLOBAL(ddv)) {
331 		dv->sdev_flags |= SDEV_GLOBAL;
332 		dv->sdev_mapinfo = NULL;
333 		dhl = &(dv->sdev_handle);
334 		dhl->dh_data = dv;
335 		dhl->dh_spec = DEVNAME_NS_NONE;
336 		dhl->dh_args = NULL;
337 		sdev_set_no_nocache(dv);
338 		dv->sdev_gdir_gen = 0;
339 	} else {
340 		dv->sdev_flags &= ~SDEV_GLOBAL;
341 		dv->sdev_origin = NULL; /* set later */
342 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
343 		dv->sdev_ldir_gen = 0;
344 		dv->sdev_devtree_gen = 0;
345 	}
346 
347 	rw_enter(&dv->sdev_contents, RW_WRITER);
348 	sdev_set_nodestate(dv, SDEV_INIT);
349 	rw_exit(&dv->sdev_contents);
350 	*newdv = dv;
351 
352 	return (0);
353 }
354 
355 /*
356  * transition a sdev_node into SDEV_READY state
357  */
358 int
359 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
360     void *args, struct cred *cred)
361 {
362 	int error = 0;
363 	struct vnode *vp = SDEVTOV(dv);
364 	vtype_t type;
365 
366 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
367 
368 	type = vap->va_type;
369 	vp->v_type = type;
370 	vp->v_rdev = vap->va_rdev;
371 	rw_enter(&dv->sdev_contents, RW_WRITER);
372 	if (type == VDIR) {
373 		dv->sdev_nlink = 2;
374 		dv->sdev_flags &= ~SDEV_PERSIST;
375 		dv->sdev_flags &= ~SDEV_DYNAMIC;
376 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
377 		error = sdev_get_moduleops(dv); /* from plug-in module */
378 		ASSERT(dv->sdev_dotdot);
379 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
380 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
381 	} else if (type == VLNK) {
382 		ASSERT(args);
383 		dv->sdev_nlink = 1;
384 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
385 	} else {
386 		dv->sdev_nlink = 1;
387 	}
388 
389 	if (!(SDEV_IS_GLOBAL(dv))) {
390 		dv->sdev_origin = (struct sdev_node *)args;
391 		dv->sdev_flags &= ~SDEV_PERSIST;
392 	}
393 
394 	/*
395 	 * shadow node is created here OR
396 	 * if failed (indicated by dv->sdev_attrvp == NULL),
397 	 * created later in sdev_setattr
398 	 */
399 	if (avp) {
400 		dv->sdev_attrvp = avp;
401 	} else {
402 		if (dv->sdev_attr == NULL)
403 			sdev_attrinit(dv, vap);
404 		else
405 			*dv->sdev_attr = *vap;
406 
407 		if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) ||
408 		    ((SDEVTOV(dv)->v_type == VDIR) &&
409 		    (dv->sdev_attrvp == NULL)))
410 			error = sdev_shadow_node(dv, cred);
411 	}
412 
413 	/* transition to READY state */
414 	sdev_set_nodestate(dv, SDEV_READY);
415 	sdev_nc_node_exists(dv);
416 	rw_exit(&dv->sdev_contents);
417 	return (error);
418 }
419 
420 /*
421  * setting ZOMBIE state
422  */
423 static int
424 sdev_nodezombied(struct sdev_node *dv)
425 {
426 	rw_enter(&dv->sdev_contents, RW_WRITER);
427 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
428 	rw_exit(&dv->sdev_contents);
429 	return (0);
430 }
431 
432 /*
433  * Build the VROOT sdev_node.
434  */
435 /*ARGSUSED*/
436 struct sdev_node *
437 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
438     struct vnode *avp, struct cred *cred)
439 {
440 	struct sdev_node *dv;
441 	struct vnode *vp;
442 	char devdir[] = "/dev";
443 
444 	ASSERT(sdev_node_cache != NULL);
445 	ASSERT(avp);
446 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
447 	vp = SDEVTOV(dv);
448 	vn_reinit(vp);
449 	vp->v_flag |= VROOT;
450 	vp->v_vfsp = vfsp;
451 	vp->v_type = VDIR;
452 	vp->v_rdev = devdev;
453 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
454 	vn_exists(vp);
455 
456 	if (vfsp->vfs_mntpt)
457 		dv->sdev_name = i_ddi_strdup(
458 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
459 	else
460 		/* vfs_mountdev1 set mount point later */
461 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
462 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
463 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
464 	dv->sdev_ino = SDEV_ROOTINO;
465 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
466 	dv->sdev_dotdot = dv;		/* .. == self */
467 	dv->sdev_attrvp = avp;
468 	dv->sdev_attr = NULL;
469 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
470 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
471 	if (strcmp(dv->sdev_name, "/dev") == 0) {
472 		mutex_init(&devname_nsmaps_lock, NULL, MUTEX_DEFAULT, NULL);
473 		dv->sdev_mapinfo = NULL;
474 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
475 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
476 		dv->sdev_gdir_gen = 0;
477 	} else {
478 		dv->sdev_flags = SDEV_BUILD;
479 		dv->sdev_flags &= ~SDEV_PERSIST;
480 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
481 		dv->sdev_ldir_gen = 0;
482 		dv->sdev_devtree_gen = 0;
483 	}
484 
485 	rw_enter(&dv->sdev_contents, RW_WRITER);
486 	sdev_set_nodestate(dv, SDEV_READY);
487 	rw_exit(&dv->sdev_contents);
488 	sdev_nc_node_exists(dv);
489 	return (dv);
490 }
491 
492 /*
493  *  1. load the module
494  *  2. modload invokes sdev_module_register, which in turn sets
495  *     the dv->sdev_mapinfo->dir_ops
496  *
497  * note: locking order:
498  *	dv->sdev_contents -> map->dir_lock
499  */
500 static int
501 sdev_get_moduleops(struct sdev_node *dv)
502 {
503 	int error = 0;
504 	struct devname_nsmap *map = NULL;
505 	char *module;
506 	char *path;
507 	int load = 1;
508 
509 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
510 
511 	if (devname_nsmaps == NULL)
512 		return (0);
513 
514 	if (!sdev_nsmaps_loaded() && !sdev_nsmaps_reloaded())
515 		return (0);
516 
517 
518 	path = dv->sdev_path;
519 	if ((map = sdev_get_nsmap_by_dir(path, 0))) {
520 		rw_enter(&map->dir_lock, RW_READER);
521 		if (map->dir_invalid) {
522 			if (map->dir_module && map->dir_newmodule &&
523 			    (strcmp(map->dir_module,
524 					map->dir_newmodule) == 0)) {
525 				load = 0;
526 			}
527 			sdev_replace_nsmap(map, map->dir_newmodule,
528 			    map->dir_newmap);
529 		}
530 
531 		module = map->dir_module;
532 		if (module && load) {
533 			sdcmn_err6(("sdev_get_moduleops: "
534 			    "load module %s", module));
535 			rw_exit(&map->dir_lock);
536 			error = modload("devname", module);
537 			sdcmn_err6(("sdev_get_moduleops: error %d\n", error));
538 			if (error < 0) {
539 				return (-1);
540 			}
541 		} else if (module == NULL) {
542 			/*
543 			 * loading the module ops for name services
544 			 */
545 			if (devname_ns_ops == NULL) {
546 				sdcmn_err6((
547 				    "sdev_get_moduleops: modload default\n"));
548 				error = modload("devname", DEVNAME_NSCONFIG);
549 				sdcmn_err6((
550 				    "sdev_get_moduleops: error %d\n", error));
551 				if (error < 0) {
552 					return (-1);
553 				}
554 			}
555 
556 			if (!rw_tryupgrade(&map->dir_lock)) {
557 				rw_exit(&map->dir_lock);
558 				rw_enter(&map->dir_lock, RW_WRITER);
559 			}
560 			ASSERT(devname_ns_ops);
561 			map->dir_ops = devname_ns_ops;
562 			rw_exit(&map->dir_lock);
563 		}
564 	}
565 
566 	dv->sdev_mapinfo = map;
567 	return (0);
568 }
569 
570 /* directory dependent vop table */
571 struct sdev_vop_table {
572 	char *vt_name;				/* subdirectory name */
573 	const fs_operation_def_t *vt_service;	/* vnodeops table */
574 	struct vnodeops *vt_vops;		/* constructed vop */
575 	struct vnodeops **vt_global_vops;	/* global container for vop */
576 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
577 	int vt_flags;
578 };
579 
580 /*
581  * A nice improvement would be to provide a plug-in mechanism
582  * for this table instead of a const table.
583  */
584 static struct sdev_vop_table vtab[] =
585 {
586 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
587 	SDEV_DYNAMIC | SDEV_VTOR },
588 
589 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
590 
591 	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
592 	SDEV_DYNAMIC | SDEV_VTOR },
593 
594 	{ NULL, NULL, NULL, NULL, NULL, 0}
595 };
596 
597 
598 /*
599  *  sets a directory's vnodeops if the directory is in the vtab;
600  */
601 static struct vnodeops *
602 sdev_get_vop(struct sdev_node *dv)
603 {
604 	int i;
605 	char *path;
606 
607 	path = dv->sdev_path;
608 	ASSERT(path);
609 
610 	/* gets the relative path to /dev/ */
611 	path += 5;
612 
613 	/* gets the vtab entry if matches */
614 	for (i = 0; vtab[i].vt_name; i++) {
615 		if (strcmp(vtab[i].vt_name, path) != 0)
616 			continue;
617 		dv->sdev_flags |= vtab[i].vt_flags;
618 
619 		if (vtab[i].vt_vops) {
620 			if (vtab[i].vt_global_vops)
621 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
622 			return (vtab[i].vt_vops);
623 		}
624 
625 		if (vtab[i].vt_service) {
626 			fs_operation_def_t *templ;
627 			templ = sdev_merge_vtab(vtab[i].vt_service);
628 			if (vn_make_ops(vtab[i].vt_name,
629 			    (const fs_operation_def_t *)templ,
630 			    &vtab[i].vt_vops) != 0) {
631 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
632 				    vtab[i].vt_name);
633 				/*NOTREACHED*/
634 			}
635 			if (vtab[i].vt_global_vops) {
636 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
637 			}
638 			sdev_free_vtab(templ);
639 			return (vtab[i].vt_vops);
640 		}
641 		return (sdev_vnodeops);
642 	}
643 
644 	/* child inherits the persistence of the parent */
645 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
646 		dv->sdev_flags |= SDEV_PERSIST;
647 
648 	return (sdev_vnodeops);
649 }
650 
651 static void
652 sdev_set_no_nocache(struct sdev_node *dv)
653 {
654 	int i;
655 	char *path;
656 
657 	ASSERT(dv->sdev_path);
658 	path = dv->sdev_path + strlen("/dev/");
659 
660 	for (i = 0; vtab[i].vt_name; i++) {
661 		if (strcmp(vtab[i].vt_name, path) == 0) {
662 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
663 				dv->sdev_flags |= SDEV_NO_NCACHE;
664 			break;
665 		}
666 	}
667 }
668 
669 void *
670 sdev_get_vtor(struct sdev_node *dv)
671 {
672 	int i;
673 
674 	for (i = 0; vtab[i].vt_name; i++) {
675 		if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0)
676 			continue;
677 		return ((void *)vtab[i].vt_vtor);
678 	}
679 	return (NULL);
680 }
681 
682 /*
683  * Build the base root inode
684  */
685 ino_t
686 sdev_mkino(struct sdev_node *dv)
687 {
688 	ino_t	ino;
689 
690 	/*
691 	 * for now, follow the lead of tmpfs here
692 	 * need to someday understand the requirements here
693 	 */
694 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
695 	ino += SDEV_ROOTINO + 1;
696 
697 	return (ino);
698 }
699 
700 static int
701 sdev_getlink(struct vnode *linkvp, char **link)
702 {
703 	int err;
704 	char *buf;
705 	struct uio uio = {0};
706 	struct iovec iov = {0};
707 
708 	if (linkvp == NULL)
709 		return (ENOENT);
710 	ASSERT(linkvp->v_type == VLNK);
711 
712 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
713 	iov.iov_base = buf;
714 	iov.iov_len = MAXPATHLEN;
715 	uio.uio_iov = &iov;
716 	uio.uio_iovcnt = 1;
717 	uio.uio_resid = MAXPATHLEN;
718 	uio.uio_segflg = UIO_SYSSPACE;
719 	uio.uio_llimit = MAXOFFSET_T;
720 
721 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
722 	if (err) {
723 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
724 		kmem_free(buf, MAXPATHLEN);
725 		return (ENOENT);
726 	}
727 
728 	/* mission complete */
729 	*link = i_ddi_strdup(buf, KM_SLEEP);
730 	kmem_free(buf, MAXPATHLEN);
731 	return (0);
732 }
733 
734 /*
735  * A convenient wrapper to get the devfs node vnode for a device
736  * minor functionality: readlink() of a /dev symlink
737  * Place the link into dv->sdev_symlink
738  */
739 static int
740 sdev_follow_link(struct sdev_node *dv)
741 {
742 	int err;
743 	struct vnode *linkvp;
744 	char *link = NULL;
745 
746 	linkvp = SDEVTOV(dv);
747 	if (linkvp == NULL)
748 		return (ENOENT);
749 	ASSERT(linkvp->v_type == VLNK);
750 	err = sdev_getlink(linkvp, &link);
751 	if (err) {
752 		(void) sdev_nodezombied(dv);
753 		dv->sdev_symlink = NULL;
754 		return (ENOENT);
755 	}
756 
757 	ASSERT(link != NULL);
758 	dv->sdev_symlink = link;
759 	return (0);
760 }
761 
762 static int
763 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
764 {
765 	vtype_t otype = SDEVTOV(dv)->v_type;
766 
767 	/*
768 	 * existing sdev_node has a different type.
769 	 */
770 	if (otype != nvap->va_type) {
771 		sdcmn_err9(("sdev_node_check: existing node "
772 		    "  %s type %d does not match new node type %d\n",
773 		    dv->sdev_name, otype, nvap->va_type));
774 		return (EEXIST);
775 	}
776 
777 	/*
778 	 * For a symlink, the target should be the same.
779 	 */
780 	if (otype == VLNK) {
781 		ASSERT(nargs != NULL);
782 		ASSERT(dv->sdev_symlink != NULL);
783 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
784 			sdcmn_err9(("sdev_node_check: existing node "
785 			    " %s has different symlink %s as new node "
786 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
787 			    (char *)nargs));
788 			return (EEXIST);
789 		}
790 	}
791 
792 	return (0);
793 }
794 
795 /*
796  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
797  *
798  * arguments:
799  *	- ddv (parent)
800  *	- nm (child name)
801  *	- newdv (sdev_node for nm is returned here)
802  *	- vap (vattr for the node to be created, va_type should be set.
803  *	  the defaults should be used if unknown)
804  *	- cred
805  *	- args
806  *	    . tnm (for VLNK)
807  *	    . global sdev_node (for !SDEV_GLOBAL)
808  * 	- state: SDEV_INIT, SDEV_READY
809  *
810  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
811  *
812  * NOTE:  directory contents writers lock needs to be held before
813  *	  calling this routine.
814  */
815 int
816 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
817     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
818     sdev_node_state_t state)
819 {
820 	int error = 0;
821 	sdev_node_state_t node_state;
822 	struct sdev_node *dv = NULL;
823 
824 	ASSERT(state != SDEV_ZOMBIE);
825 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
826 
827 	if (*newdv) {
828 		dv = *newdv;
829 	} else {
830 		/* allocate and initialize a sdev_node */
831 		if (ddv->sdev_state == SDEV_ZOMBIE) {
832 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
833 			    ddv->sdev_path));
834 			return (ENOENT);
835 		}
836 
837 		error = sdev_nodeinit(ddv, nm, &dv, vap);
838 		if (error != 0) {
839 			sdcmn_err9(("sdev_mknode: error %d,"
840 			    " name %s can not be initialized\n",
841 			    error, nm));
842 			return (ENOENT);
843 		}
844 		ASSERT(dv);
845 
846 		/* insert into the directory cache */
847 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
848 		if (error) {
849 			sdcmn_err9(("sdev_mknode: node %s can not"
850 			    " be added into directory cache\n", nm));
851 			return (ENOENT);
852 		}
853 	}
854 
855 	ASSERT(dv);
856 	node_state = dv->sdev_state;
857 	ASSERT(node_state != SDEV_ZOMBIE);
858 
859 	if (state == SDEV_READY) {
860 		switch (node_state) {
861 		case SDEV_INIT:
862 			error = sdev_nodeready(dv, vap, avp, args, cred);
863 			/*
864 			 * masking the errors with ENOENT
865 			 */
866 			if (error) {
867 				sdcmn_err9(("sdev_mknode: node %s can NOT"
868 				    " be transitioned into READY state, "
869 				    "error %d\n", nm, error));
870 				error = ENOENT;
871 			}
872 			break;
873 		case SDEV_READY:
874 			/*
875 			 * Do some sanity checking to make sure
876 			 * the existing sdev_node is what has been
877 			 * asked for.
878 			 */
879 			error = sdev_node_check(dv, vap, args);
880 			break;
881 		default:
882 			break;
883 		}
884 	}
885 
886 	if (!error) {
887 		*newdv = dv;
888 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
889 	} else {
890 		SDEV_SIMPLE_RELE(dv);
891 		*newdv = NULL;
892 	}
893 
894 	return (error);
895 }
896 
897 /*
898  * convenient wrapper to change vp's ATIME, CTIME and ATIME
899  */
900 void
901 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
902 {
903 	struct vattr attr;
904 	timestruc_t now;
905 	int err;
906 
907 	ASSERT(vp);
908 	gethrestime(&now);
909 	if (mask & AT_CTIME)
910 		attr.va_ctime = now;
911 	if (mask & AT_MTIME)
912 		attr.va_mtime = now;
913 	if (mask & AT_ATIME)
914 		attr.va_atime = now;
915 
916 	attr.va_mask = (mask & AT_TIMES);
917 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
918 	if (err && (err != EROFS)) {
919 		sdcmn_err(("update timestamps error %d\n", err));
920 	}
921 }
922 
923 /*
924  * the backing store vnode is released here
925  */
926 /*ARGSUSED1*/
927 void
928 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
929 {
930 	/* no references */
931 	ASSERT(dv->sdev_nlink == 0);
932 
933 	if (dv->sdev_attrvp != NULLVP) {
934 		VN_RELE(dv->sdev_attrvp);
935 		/*
936 		 * reset the attrvp so that no more
937 		 * references can be made on this already
938 		 * vn_rele() vnode
939 		 */
940 		dv->sdev_attrvp = NULLVP;
941 	}
942 
943 	if (dv->sdev_attr != NULL) {
944 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
945 		dv->sdev_attr = NULL;
946 	}
947 
948 	if (dv->sdev_name != NULL) {
949 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
950 		dv->sdev_name = NULL;
951 	}
952 
953 	if (dv->sdev_symlink != NULL) {
954 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
955 		dv->sdev_symlink = NULL;
956 	}
957 
958 	if (dv->sdev_path) {
959 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
960 		dv->sdev_path = NULL;
961 	}
962 
963 	if (!SDEV_IS_GLOBAL(dv))
964 		sdev_prof_free(dv);
965 
966 	mutex_destroy(&dv->sdev_lookup_lock);
967 	cv_destroy(&dv->sdev_lookup_cv);
968 
969 	/* return node to initial state as per constructor */
970 	(void) memset((void *)&dv->sdev_instance_data, 0,
971 	    sizeof (dv->sdev_instance_data));
972 	vn_invalid(SDEVTOV(dv));
973 	kmem_cache_free(sdev_node_cache, dv);
974 }
975 
976 /*
977  * DIRECTORY CACHE lookup
978  */
979 struct sdev_node *
980 sdev_findbyname(struct sdev_node *ddv, char *nm)
981 {
982 	struct sdev_node *dv;
983 	size_t	nmlen = strlen(nm);
984 
985 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
986 	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next) {
987 		if (dv->sdev_namelen != nmlen) {
988 			continue;
989 		}
990 
991 		/*
992 		 * Can't lookup stale nodes
993 		 */
994 		if (dv->sdev_flags & SDEV_STALE) {
995 			sdcmn_err9((
996 			    "sdev_findbyname: skipped stale node: %s\n",
997 			    dv->sdev_name));
998 			continue;
999 		}
1000 
1001 		if (strcmp(dv->sdev_name, nm) == 0) {
1002 			SDEV_HOLD(dv);
1003 			return (dv);
1004 		}
1005 	}
1006 	return (NULL);
1007 }
1008 
1009 /*
1010  * Inserts a new sdev_node in a parent directory
1011  */
1012 void
1013 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
1014 {
1015 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1016 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1017 	ASSERT(ddv->sdev_nlink >= 2);
1018 	ASSERT(dv->sdev_nlink == 0);
1019 
1020 	dv->sdev_dotdot = ddv;
1021 	dv->sdev_next = ddv->sdev_dot;
1022 	ddv->sdev_dot = dv;
1023 	ddv->sdev_nlink++;
1024 }
1025 
1026 /*
1027  * The following check is needed because while sdev_nodes are linked
1028  * in SDEV_INIT state, they have their link counts incremented only
1029  * in SDEV_READY state.
1030  */
1031 static void
1032 decr_link(struct sdev_node *dv)
1033 {
1034 	if (dv->sdev_state != SDEV_INIT)
1035 		dv->sdev_nlink--;
1036 	else
1037 		ASSERT(dv->sdev_nlink == 0);
1038 }
1039 
1040 /*
1041  * Delete an existing dv from directory cache
1042  *
1043  * In the case of a node is still held by non-zero reference count,
1044  *     the node is put into ZOMBIE state. Once the reference count
1045  *     reaches "0", the node is unlinked and destroyed,
1046  *     in sdev_inactive().
1047  */
1048 static int
1049 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1050 {
1051 	struct sdev_node *idv;
1052 	struct sdev_node *prev = NULL;
1053 	struct vnode *vp;
1054 
1055 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1056 
1057 	vp = SDEVTOV(dv);
1058 	mutex_enter(&vp->v_lock);
1059 
1060 	/* dv is held still */
1061 	if (vp->v_count > 1) {
1062 		rw_enter(&dv->sdev_contents, RW_WRITER);
1063 		if (dv->sdev_state == SDEV_READY) {
1064 			sdcmn_err9((
1065 			    "sdev_delete: node %s busy with count %d\n",
1066 			    dv->sdev_name, vp->v_count));
1067 			dv->sdev_state = SDEV_ZOMBIE;
1068 		}
1069 		rw_exit(&dv->sdev_contents);
1070 		--vp->v_count;
1071 		mutex_exit(&vp->v_lock);
1072 		return (EBUSY);
1073 	}
1074 	ASSERT(vp->v_count == 1);
1075 
1076 	/* unlink from the memory cache */
1077 	ddv->sdev_nlink--;	/* .. to above */
1078 	if (vp->v_type == VDIR) {
1079 		decr_link(dv);		/* . to self */
1080 	}
1081 
1082 	for (idv = ddv->sdev_dot; idv && idv != dv;
1083 	    prev = idv, idv = idv->sdev_next)
1084 		;
1085 	ASSERT(idv == dv);	/* node to be deleted must exist */
1086 	if (prev == NULL)
1087 		ddv->sdev_dot = dv->sdev_next;
1088 	else
1089 		prev->sdev_next = dv->sdev_next;
1090 	dv->sdev_next = NULL;
1091 	decr_link(dv);	/* name, back to zero */
1092 	vp->v_count--;
1093 	mutex_exit(&vp->v_lock);
1094 
1095 	/* destroy the node */
1096 	sdev_nodedestroy(dv, 0);
1097 	return (0);
1098 }
1099 
1100 /*
1101  * check if the source is in the path of the target
1102  *
1103  * source and target are different
1104  */
1105 /*ARGSUSED2*/
1106 static int
1107 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1108 {
1109 	int error = 0;
1110 	struct sdev_node *dotdot, *dir;
1111 
1112 	dotdot = tdv->sdev_dotdot;
1113 	ASSERT(dotdot);
1114 
1115 	/* fs root */
1116 	if (dotdot == tdv) {
1117 		return (0);
1118 	}
1119 
1120 	for (;;) {
1121 		/*
1122 		 * avoid error cases like
1123 		 *	mv a a/b
1124 		 *	mv a a/b/c
1125 		 *	etc.
1126 		 */
1127 		if (dotdot == sdv) {
1128 			error = EINVAL;
1129 			break;
1130 		}
1131 
1132 		dir = dotdot;
1133 		dotdot = dir->sdev_dotdot;
1134 
1135 		/* done checking because root is reached */
1136 		if (dir == dotdot) {
1137 			break;
1138 		}
1139 	}
1140 	return (error);
1141 }
1142 
1143 int
1144 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1145     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1146     struct cred *cred)
1147 {
1148 	int error = 0;
1149 	struct vnode *ovp = SDEVTOV(odv);
1150 	struct vnode *nvp;
1151 	struct vattr vattr;
1152 	int doingdir = (ovp->v_type == VDIR);
1153 	char *link = NULL;
1154 	int samedir = (oddv == nddv) ? 1 : 0;
1155 	int bkstore = 0;
1156 	struct sdev_node *idv = NULL;
1157 	struct sdev_node *ndv = NULL;
1158 	timestruc_t now;
1159 
1160 	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1161 	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1162 	if (error)
1163 		return (error);
1164 
1165 	if (!samedir)
1166 		rw_enter(&oddv->sdev_contents, RW_WRITER);
1167 	rw_enter(&nddv->sdev_contents, RW_WRITER);
1168 
1169 	/*
1170 	 * the source may have been deleted by another thread before
1171 	 * we gets here.
1172 	 */
1173 	if (odv->sdev_state != SDEV_READY) {
1174 		error = ENOENT;
1175 		goto err_out;
1176 	}
1177 
1178 	if (doingdir && (odv == nddv)) {
1179 		error = EINVAL;
1180 		goto err_out;
1181 	}
1182 
1183 	/*
1184 	 * If renaming a directory, and the parents are different (".." must be
1185 	 * changed) then the source dir must not be in the dir hierarchy above
1186 	 * the target since it would orphan everything below the source dir.
1187 	 */
1188 	if (doingdir && (oddv != nddv)) {
1189 		error = sdev_checkpath(odv, nddv, cred);
1190 		if (error)
1191 			goto err_out;
1192 	}
1193 
1194 	/* destination existing */
1195 	if (*ndvp) {
1196 		nvp = SDEVTOV(*ndvp);
1197 		ASSERT(nvp);
1198 
1199 		/* handling renaming to itself */
1200 		if (odv == *ndvp) {
1201 			error = 0;
1202 			goto err_out;
1203 		}
1204 
1205 		if (nvp->v_type == VDIR) {
1206 			if (!doingdir) {
1207 				error = EISDIR;
1208 				goto err_out;
1209 			}
1210 
1211 			if (vn_vfswlock(nvp)) {
1212 				error = EBUSY;
1213 				goto err_out;
1214 			}
1215 
1216 			if (vn_mountedvfs(nvp) != NULL) {
1217 				vn_vfsunlock(nvp);
1218 				error = EBUSY;
1219 				goto err_out;
1220 			}
1221 
1222 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1223 			if ((*ndvp)->sdev_nlink > 2) {
1224 				vn_vfsunlock(nvp);
1225 				error = EEXIST;
1226 				goto err_out;
1227 			}
1228 			vn_vfsunlock(nvp);
1229 
1230 			(void) sdev_dirdelete(nddv, *ndvp);
1231 			*ndvp = NULL;
1232 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1233 				    nddv->sdev_attrvp, cred, NULL, 0);
1234 			if (error)
1235 				goto err_out;
1236 		} else {
1237 			if (doingdir) {
1238 				error = ENOTDIR;
1239 				goto err_out;
1240 			}
1241 
1242 			if (SDEV_IS_PERSIST((*ndvp))) {
1243 				bkstore = 1;
1244 			}
1245 
1246 			/*
1247 			 * get rid of the node from the directory cache
1248 			 * note, in case EBUSY is returned, the ZOMBIE
1249 			 * node is taken care in sdev_mknode.
1250 			 */
1251 			(void) sdev_dirdelete(nddv, *ndvp);
1252 			*ndvp = NULL;
1253 			if (bkstore) {
1254 				error = VOP_REMOVE(nddv->sdev_attrvp,
1255 				    nnm, cred, NULL, 0);
1256 				if (error)
1257 				    goto err_out;
1258 			}
1259 		}
1260 	}
1261 
1262 	/* fix the source for a symlink */
1263 	if (vattr.va_type == VLNK) {
1264 		if (odv->sdev_symlink == NULL) {
1265 			error = sdev_follow_link(odv);
1266 			if (error) {
1267 				error = ENOENT;
1268 				goto err_out;
1269 			}
1270 		}
1271 		ASSERT(odv->sdev_symlink);
1272 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1273 	}
1274 
1275 	/*
1276 	 * make a fresh node from the source attrs
1277 	 */
1278 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1279 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1280 	    NULL, (void *)link, cred, SDEV_READY);
1281 
1282 	if (link)
1283 		kmem_free(link, strlen(link) + 1);
1284 
1285 	if (error)
1286 		goto err_out;
1287 	ASSERT(*ndvp);
1288 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1289 
1290 	/* move dir contents */
1291 	if (doingdir) {
1292 		for (idv = odv->sdev_dot; idv; idv = idv->sdev_next) {
1293 			error = sdev_rnmnode(odv, idv,
1294 			    (struct sdev_node *)(*ndvp), &ndv,
1295 			    idv->sdev_name, cred);
1296 
1297 			if (error)
1298 				goto err_out;
1299 			ndv = NULL;
1300 		}
1301 
1302 	}
1303 
1304 	if ((*ndvp)->sdev_attrvp) {
1305 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1306 		    AT_CTIME|AT_ATIME);
1307 	} else {
1308 		ASSERT((*ndvp)->sdev_attr);
1309 		gethrestime(&now);
1310 		(*ndvp)->sdev_attr->va_ctime = now;
1311 		(*ndvp)->sdev_attr->va_atime = now;
1312 	}
1313 
1314 	if (nddv->sdev_attrvp) {
1315 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1316 		    AT_MTIME|AT_ATIME);
1317 	} else {
1318 		ASSERT(nddv->sdev_attr);
1319 		gethrestime(&now);
1320 		nddv->sdev_attr->va_mtime = now;
1321 		nddv->sdev_attr->va_atime = now;
1322 	}
1323 	rw_exit(&nddv->sdev_contents);
1324 	if (!samedir)
1325 		rw_exit(&oddv->sdev_contents);
1326 
1327 	SDEV_RELE(*ndvp);
1328 	return (error);
1329 
1330 err_out:
1331 	rw_exit(&nddv->sdev_contents);
1332 	if (!samedir)
1333 		rw_exit(&oddv->sdev_contents);
1334 	return (error);
1335 }
1336 
1337 /*
1338  * Merge sdev_node specific information into an attribute structure.
1339  *
1340  * note: sdev_node is not locked here
1341  */
1342 void
1343 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1344 {
1345 	struct vnode *vp = SDEVTOV(dv);
1346 
1347 	vap->va_nlink = dv->sdev_nlink;
1348 	vap->va_nodeid = dv->sdev_ino;
1349 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1350 	vap->va_type = vp->v_type;
1351 
1352 	if (vp->v_type == VDIR) {
1353 		vap->va_rdev = 0;
1354 		vap->va_fsid = vp->v_rdev;
1355 	} else if (vp->v_type == VLNK) {
1356 		vap->va_rdev = 0;
1357 		vap->va_mode  &= ~S_IFMT;
1358 		vap->va_mode |= S_IFLNK;
1359 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1360 		vap->va_rdev = vp->v_rdev;
1361 		vap->va_mode &= ~S_IFMT;
1362 		if (vap->va_type == VCHR)
1363 			vap->va_mode |= S_IFCHR;
1364 		else
1365 			vap->va_mode |= S_IFBLK;
1366 	} else {
1367 		vap->va_rdev = 0;
1368 	}
1369 }
1370 
1371 static struct vattr *
1372 sdev_getdefault_attr(enum vtype type)
1373 {
1374 	if (type == VDIR)
1375 		return (&sdev_vattr_dir);
1376 	else if (type == VCHR)
1377 		return (&sdev_vattr_chr);
1378 	else if (type == VBLK)
1379 		return (&sdev_vattr_blk);
1380 	else if (type == VLNK)
1381 		return (&sdev_vattr_lnk);
1382 	else
1383 		return (NULL);
1384 }
1385 int
1386 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1387 {
1388 	int rv = 0;
1389 	struct vnode *vp = SDEVTOV(dv);
1390 
1391 	switch (vp->v_type) {
1392 	case VCHR:
1393 	case VBLK:
1394 		/*
1395 		 * If vnode is a device, return special vnode instead
1396 		 * (though it knows all about -us- via sp->s_realvp)
1397 		 */
1398 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1399 		VN_RELE(vp);
1400 		if (*vpp == NULLVP)
1401 			rv = ENOSYS;
1402 		break;
1403 	default:	/* most types are returned as is */
1404 		*vpp = vp;
1405 		break;
1406 	}
1407 	return (rv);
1408 }
1409 
1410 /*
1411  * loopback into sdev_lookup()
1412  */
1413 static struct vnode *
1414 devname_find_by_devpath(char *devpath, struct vattr *vattr)
1415 {
1416 	int error = 0;
1417 	struct vnode *vp;
1418 
1419 	error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULLVPP, &vp);
1420 	if (error) {
1421 		return (NULL);
1422 	}
1423 
1424 	if (vattr)
1425 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1426 	return (vp);
1427 }
1428 
1429 /*
1430  * the junction between devname and devfs
1431  */
1432 static struct vnode *
1433 devname_configure_by_path(char *physpath, struct vattr *vattr)
1434 {
1435 	int error = 0;
1436 	struct vnode *vp;
1437 
1438 	ASSERT(strncmp(physpath, "/devices/", sizeof ("/devices/") - 1)
1439 	    == 0);
1440 
1441 	error = devfs_lookupname(physpath + sizeof ("/devices/") - 1,
1442 	    NULLVPP, &vp);
1443 	if (error != 0) {
1444 		if (error == ENODEV) {
1445 			cmn_err(CE_CONT, "%s: not found (line %d)\n",
1446 			    physpath, __LINE__);
1447 		}
1448 
1449 		return (NULL);
1450 	}
1451 
1452 	if (vattr)
1453 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1454 	return (vp);
1455 }
1456 
1457 /*
1458  * junction between devname and root file system, e.g. ufs
1459  */
1460 int
1461 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1462 {
1463 	struct vnode *rdvp = ddv->sdev_attrvp;
1464 	int rval = 0;
1465 
1466 	ASSERT(rdvp);
1467 
1468 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1469 	    NULL);
1470 	return (rval);
1471 }
1472 
1473 static int
1474 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1475 {
1476 	struct sdev_node *dv = NULL;
1477 	char	*nm;
1478 	struct vnode *dirvp;
1479 	int	error;
1480 	vnode_t	*vp;
1481 	int eof;
1482 	struct iovec iov;
1483 	struct uio uio;
1484 	struct dirent64 *dp;
1485 	dirent64_t *dbuf;
1486 	size_t dbuflen;
1487 	struct vattr vattr;
1488 	char *link = NULL;
1489 
1490 	if (ddv->sdev_attrvp == NULL)
1491 		return (0);
1492 	if (!(ddv->sdev_flags & SDEV_BUILD))
1493 		return (0);
1494 
1495 	dirvp = ddv->sdev_attrvp;
1496 	VN_HOLD(dirvp);
1497 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1498 
1499 	uio.uio_iov = &iov;
1500 	uio.uio_iovcnt = 1;
1501 	uio.uio_segflg = UIO_SYSSPACE;
1502 	uio.uio_fmode = 0;
1503 	uio.uio_extflg = UIO_COPY_CACHED;
1504 	uio.uio_loffset = 0;
1505 	uio.uio_llimit = MAXOFFSET_T;
1506 
1507 	eof = 0;
1508 	error = 0;
1509 	while (!error && !eof) {
1510 		uio.uio_resid = dlen;
1511 		iov.iov_base = (char *)dbuf;
1512 		iov.iov_len = dlen;
1513 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1514 		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1515 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1516 
1517 		dbuflen = dlen - uio.uio_resid;
1518 		if (error || dbuflen == 0)
1519 			break;
1520 
1521 		if (!(ddv->sdev_flags & SDEV_BUILD)) {
1522 			error = 0;
1523 			break;
1524 		}
1525 
1526 		for (dp = dbuf; ((intptr_t)dp <
1527 		    (intptr_t)dbuf + dbuflen);
1528 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1529 			nm = dp->d_name;
1530 
1531 			if (strcmp(nm, ".") == 0 ||
1532 			    strcmp(nm, "..") == 0)
1533 				continue;
1534 
1535 			vp = NULLVP;
1536 			dv = sdev_cache_lookup(ddv, nm);
1537 			if (dv) {
1538 				if (dv->sdev_state != SDEV_ZOMBIE) {
1539 					SDEV_SIMPLE_RELE(dv);
1540 				} else {
1541 					/*
1542 					 * A ZOMBIE node may not have been
1543 					 * cleaned up from the backing store,
1544 					 * bypass this entry in this case,
1545 					 * and clean it up from the directory
1546 					 * cache if this is the last call.
1547 					 */
1548 					(void) sdev_dirdelete(ddv, dv);
1549 				}
1550 				continue;
1551 			}
1552 
1553 			/* refill the cache if not already */
1554 			error = devname_backstore_lookup(ddv, nm, &vp);
1555 			if (error)
1556 				continue;
1557 
1558 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1559 			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1560 			if (error)
1561 				continue;
1562 
1563 			if (vattr.va_type == VLNK) {
1564 				error = sdev_getlink(vp, &link);
1565 				if (error) {
1566 					continue;
1567 				}
1568 				ASSERT(link != NULL);
1569 			}
1570 
1571 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1572 				rw_exit(&ddv->sdev_contents);
1573 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1574 			}
1575 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1576 			    cred, SDEV_READY);
1577 			rw_downgrade(&ddv->sdev_contents);
1578 
1579 			if (link != NULL) {
1580 				kmem_free(link, strlen(link) + 1);
1581 				link = NULL;
1582 			}
1583 
1584 			if (!error) {
1585 				ASSERT(dv);
1586 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1587 				SDEV_SIMPLE_RELE(dv);
1588 			}
1589 			vp = NULL;
1590 			dv = NULL;
1591 		}
1592 	}
1593 
1594 done:
1595 	VN_RELE(dirvp);
1596 	kmem_free(dbuf, dlen);
1597 
1598 	return (error);
1599 }
1600 
1601 void
1602 sdev_filldir_dynamic(struct sdev_node *ddv)
1603 {
1604 	int error;
1605 	int i;
1606 	struct vattr *vap;
1607 	char *nm = NULL;
1608 	struct sdev_node *dv = NULL;
1609 
1610 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1611 	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1612 
1613 	vap = sdev_getdefault_attr(VDIR);
1614 	for (i = 0; vtab[i].vt_name != NULL; i++) {
1615 		nm = vtab[i].vt_name;
1616 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1617 		dv = NULL;
1618 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1619 		    NULL, kcred, SDEV_READY);
1620 		if (error) {
1621 			cmn_err(CE_WARN, "%s/%s: error %d\n",
1622 			    ddv->sdev_name, nm, error);
1623 		} else {
1624 			ASSERT(dv);
1625 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1626 			SDEV_SIMPLE_RELE(dv);
1627 		}
1628 	}
1629 }
1630 
1631 /*
1632  * Creating a backing store entry based on sdev_attr.
1633  * This is called either as part of node creation in a persistent directory
1634  * or from setattr/setsecattr to persist access attributes across reboot.
1635  */
1636 int
1637 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1638 {
1639 	int error = 0;
1640 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1641 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1642 	struct vattr *vap = dv->sdev_attr;
1643 	char *nm = dv->sdev_name;
1644 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1645 
1646 	ASSERT(dv && dv->sdev_name && rdvp);
1647 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1648 
1649 lookup:
1650 	/* try to find it in the backing store */
1651 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1652 	    NULL);
1653 	if (error == 0) {
1654 		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1655 			VN_HOLD(rrvp);
1656 			VN_RELE(*rvp);
1657 			*rvp = rrvp;
1658 		}
1659 
1660 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1661 		dv->sdev_attr = NULL;
1662 		dv->sdev_attrvp = *rvp;
1663 		return (0);
1664 	}
1665 
1666 	/* let's try to persist the node */
1667 	gethrestime(&vap->va_atime);
1668 	vap->va_mtime = vap->va_atime;
1669 	vap->va_ctime = vap->va_atime;
1670 	vap->va_mask |= AT_TYPE|AT_MODE;
1671 	switch (vap->va_type) {
1672 	case VDIR:
1673 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1674 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1675 		    (void *)(*rvp), error));
1676 		break;
1677 	case VCHR:
1678 	case VBLK:
1679 	case VREG:
1680 	case VDOOR:
1681 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1682 		    rvp, cred, 0, NULL, NULL);
1683 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1684 		    (void *)(*rvp), error));
1685 		if (!error)
1686 			VN_RELE(*rvp);
1687 		break;
1688 	case VLNK:
1689 		ASSERT(dv->sdev_symlink);
1690 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1691 		    NULL, 0);
1692 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1693 		    error));
1694 		break;
1695 	default:
1696 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1697 		    "create\n", nm);
1698 		/*NOTREACHED*/
1699 	}
1700 
1701 	/* go back to lookup to factor out spec node and set attrvp */
1702 	if (error == 0)
1703 		goto lookup;
1704 
1705 	return (error);
1706 }
1707 
1708 static int
1709 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1710 {
1711 	int error = 0;
1712 	struct sdev_node *dup = NULL;
1713 
1714 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1715 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1716 		sdev_direnter(ddv, *dv);
1717 	} else {
1718 		if (dup->sdev_state == SDEV_ZOMBIE) {
1719 			error = sdev_dirdelete(ddv, dup);
1720 			/*
1721 			 * The ZOMBIE node is still hanging
1722 			 * around with more than one reference counts.
1723 			 * Fail the new node creation so that
1724 			 * the directory cache won't have
1725 			 * duplicate entries for the same named node
1726 			 */
1727 			if (error == EBUSY) {
1728 				SDEV_SIMPLE_RELE(*dv);
1729 				sdev_nodedestroy(*dv, 0);
1730 				*dv = NULL;
1731 				return (error);
1732 			}
1733 			sdev_direnter(ddv, *dv);
1734 		} else {
1735 			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1736 			SDEV_SIMPLE_RELE(*dv);
1737 			sdev_nodedestroy(*dv, 0);
1738 			*dv = dup;
1739 		}
1740 	}
1741 
1742 	return (0);
1743 }
1744 
1745 static int
1746 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1747 {
1748 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1749 	return (sdev_dirdelete(ddv, *dv));
1750 }
1751 
1752 /*
1753  * update the in-core directory cache
1754  */
1755 int
1756 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1757     sdev_cache_ops_t ops)
1758 {
1759 	int error = 0;
1760 
1761 	ASSERT((SDEV_HELD(*dv)));
1762 
1763 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1764 	switch (ops) {
1765 	case SDEV_CACHE_ADD:
1766 		error = sdev_cache_add(ddv, dv, nm);
1767 		break;
1768 	case SDEV_CACHE_DELETE:
1769 		error = sdev_cache_delete(ddv, dv);
1770 		break;
1771 	default:
1772 		break;
1773 	}
1774 
1775 	return (error);
1776 }
1777 
1778 /*
1779  * retrieve the named entry from the directory cache
1780  */
1781 struct sdev_node *
1782 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1783 {
1784 	struct sdev_node *dv = NULL;
1785 
1786 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1787 	dv = sdev_findbyname(ddv, nm);
1788 
1789 	return (dv);
1790 }
1791 
1792 /*
1793  * Implicit reconfig for nodes constructed by a link generator
1794  * Start devfsadm if needed, or if devfsadm is in progress,
1795  * prepare to block on devfsadm either completing or
1796  * constructing the desired node.  As devfsadmd is global
1797  * in scope, constructing all necessary nodes, we only
1798  * need to initiate it once.
1799  */
1800 static int
1801 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1802 {
1803 	int error = 0;
1804 
1805 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1806 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1807 		    ddv->sdev_name, nm, devfsadm_state));
1808 		mutex_enter(&dv->sdev_lookup_lock);
1809 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1810 		mutex_exit(&dv->sdev_lookup_lock);
1811 		error = 0;
1812 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1813 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1814 			ddv->sdev_name, nm, devfsadm_state));
1815 
1816 		sdev_devfsadmd_thread(ddv, dv, kcred);
1817 		mutex_enter(&dv->sdev_lookup_lock);
1818 		SDEV_BLOCK_OTHERS(dv,
1819 		    (SDEV_LOOKUP | SDEV_LGWAITING));
1820 		mutex_exit(&dv->sdev_lookup_lock);
1821 		error = 0;
1822 	} else {
1823 		error = -1;
1824 	}
1825 
1826 	return (error);
1827 }
1828 
1829 static int
1830 sdev_call_modulelookup(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1831     int (*fn)(char *, devname_handle_t *, struct cred *), struct cred *cred)
1832 {
1833 	struct vnode *rvp = NULL;
1834 	int error = 0;
1835 	struct vattr *vap;
1836 	devname_spec_t spec;
1837 	devname_handle_t *hdl;
1838 	void *args = NULL;
1839 	struct sdev_node *dv = *dvp;
1840 
1841 	ASSERT(dv && ddv);
1842 	hdl = &(dv->sdev_handle);
1843 	ASSERT(hdl->dh_data == dv);
1844 	mutex_enter(&dv->sdev_lookup_lock);
1845 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1846 	mutex_exit(&dv->sdev_lookup_lock);
1847 	error = (*fn)(nm, hdl, cred);
1848 	if (error) {
1849 		return (error);
1850 	}
1851 
1852 	spec = hdl->dh_spec;
1853 	args = hdl->dh_args;
1854 	ASSERT(args);
1855 
1856 	switch (spec) {
1857 	case DEVNAME_NS_PATH:
1858 		/*
1859 		 * symlink of:
1860 		 *	/dev/dir/nm -> /device/...
1861 		 */
1862 		rvp = devname_configure_by_path((char *)args, NULL);
1863 		break;
1864 	case DEVNAME_NS_DEV:
1865 		/*
1866 		 * symlink of:
1867 		 *	/dev/dir/nm -> /dev/...
1868 		 */
1869 		rvp = devname_find_by_devpath((char *)args, NULL);
1870 		break;
1871 	default:
1872 		if (args)
1873 			kmem_free((char *)args, strlen(args) + 1);
1874 		return (ENOENT);
1875 
1876 	}
1877 
1878 	if (rvp == NULL) {
1879 		if (args)
1880 			kmem_free((char *)args, strlen(args) + 1);
1881 		return (ENOENT);
1882 	} else {
1883 		vap = sdev_getdefault_attr(VLNK);
1884 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1885 		/*
1886 		 * Could sdev_mknode return a different dv_node
1887 		 * once the lock is dropped?
1888 		 */
1889 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1890 			rw_exit(&ddv->sdev_contents);
1891 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1892 		}
1893 		error = sdev_mknode(ddv, nm, &dv, vap, NULL, args, cred,
1894 		    SDEV_READY);
1895 		rw_downgrade(&ddv->sdev_contents);
1896 		if (error) {
1897 			if (args)
1898 				kmem_free((char *)args, strlen(args) + 1);
1899 			return (error);
1900 		} else {
1901 			mutex_enter(&dv->sdev_lookup_lock);
1902 			SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1903 			mutex_exit(&dv->sdev_lookup_lock);
1904 			error = 0;
1905 		}
1906 	}
1907 
1908 	if (args)
1909 		kmem_free((char *)args, strlen(args) + 1);
1910 
1911 	*dvp = dv;
1912 	return (0);
1913 }
1914 
1915 /*
1916  *  Support for specialized device naming construction mechanisms
1917  */
1918 static int
1919 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1920     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1921     void *, char *), int flags, struct cred *cred)
1922 {
1923 	int rv = 0;
1924 	char *physpath = NULL;
1925 	struct vnode *rvp = NULL;
1926 	struct vattr vattr;
1927 	struct vattr *vap;
1928 	struct sdev_node *dv = *dvp;
1929 
1930 	mutex_enter(&dv->sdev_lookup_lock);
1931 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1932 	mutex_exit(&dv->sdev_lookup_lock);
1933 
1934 	/* for non-devfsadm devices */
1935 	if (flags & SDEV_PATH) {
1936 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1937 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1938 		    NULL);
1939 		if (rv) {
1940 			kmem_free(physpath, MAXPATHLEN);
1941 			return (-1);
1942 		}
1943 
1944 		ASSERT(physpath);
1945 		rvp = devname_configure_by_path(physpath, NULL);
1946 		if (rvp == NULL) {
1947 			sdcmn_err3(("devname_configure_by_path: "
1948 			    "failed for /dev/%s/%s\n",
1949 			    ddv->sdev_name, nm));
1950 			kmem_free(physpath, MAXPATHLEN);
1951 			rv = -1;
1952 		} else {
1953 			vap = sdev_getdefault_attr(VLNK);
1954 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1955 
1956 			/*
1957 			 * Sdev_mknode may return back a different sdev_node
1958 			 * that was created by another thread that
1959 			 * raced to the directroy cache before this thread.
1960 			 *
1961 			 * With current directory cache mechanism
1962 			 * (linked list with the sdev_node name as
1963 			 * the entity key), this is a way to make sure
1964 			 * only one entry exists for the same name
1965 			 * in the same directory. The outcome is
1966 			 * the winner wins.
1967 			 */
1968 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1969 				rw_exit(&ddv->sdev_contents);
1970 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1971 			}
1972 			rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1973 			    (void *)physpath, cred, SDEV_READY);
1974 			rw_downgrade(&ddv->sdev_contents);
1975 			kmem_free(physpath, MAXPATHLEN);
1976 			if (rv) {
1977 				return (rv);
1978 			} else {
1979 				mutex_enter(&dv->sdev_lookup_lock);
1980 				SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1981 				mutex_exit(&dv->sdev_lookup_lock);
1982 				return (0);
1983 			}
1984 		}
1985 	} else if (flags & SDEV_VNODE) {
1986 		/*
1987 		 * DBNR has its own way to create the device
1988 		 * and return a backing store vnode in rvp
1989 		 */
1990 		ASSERT(callback);
1991 		rv = callback(ddv, nm, (void *)&rvp, kcred, NULL, NULL);
1992 		if (rv || (rvp == NULL)) {
1993 			sdcmn_err3(("devname_lookup_func: SDEV_VNODE "
1994 			    "callback failed \n"));
1995 			return (-1);
1996 		}
1997 		vap = sdev_getdefault_attr(rvp->v_type);
1998 		if (vap == NULL)
1999 			return (-1);
2000 
2001 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2002 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2003 			rw_exit(&ddv->sdev_contents);
2004 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2005 		}
2006 		rv = sdev_mknode(ddv, nm, &dv, vap, rvp, NULL,
2007 		    cred, SDEV_READY);
2008 		rw_downgrade(&ddv->sdev_contents);
2009 		if (rv)
2010 			return (rv);
2011 
2012 		mutex_enter(&dv->sdev_lookup_lock);
2013 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2014 		mutex_exit(&dv->sdev_lookup_lock);
2015 		return (0);
2016 	} else if (flags & SDEV_VATTR) {
2017 		/*
2018 		 * /dev/pts
2019 		 *
2020 		 * DBNR has its own way to create the device
2021 		 * "0" is returned upon success.
2022 		 *
2023 		 * callback is responsible to set the basic attributes,
2024 		 * e.g. va_type/va_uid/va_gid/
2025 		 *    dev_t if VCHR or VBLK/
2026 		 */
2027 		ASSERT(callback);
2028 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
2029 		if (rv) {
2030 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
2031 			    "callback failed \n"));
2032 			return (-1);
2033 		}
2034 
2035 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2036 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2037 			rw_exit(&ddv->sdev_contents);
2038 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2039 		}
2040 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
2041 		    cred, SDEV_READY);
2042 		rw_downgrade(&ddv->sdev_contents);
2043 
2044 		if (rv)
2045 			return (rv);
2046 
2047 		mutex_enter(&dv->sdev_lookup_lock);
2048 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2049 		mutex_exit(&dv->sdev_lookup_lock);
2050 		return (0);
2051 	} else {
2052 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
2053 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
2054 		    __LINE__));
2055 		rv = -1;
2056 	}
2057 
2058 	*dvp = dv;
2059 	return (rv);
2060 }
2061 
2062 static int
2063 is_devfsadm_thread(char *exec_name)
2064 {
2065 	/*
2066 	 * note: because devfsadmd -> /usr/sbin/devfsadm
2067 	 * it is safe to use "devfsadm" to capture the lookups
2068 	 * from devfsadm and its daemon version.
2069 	 */
2070 	if (strcmp(exec_name, "devfsadm") == 0)
2071 		return (1);
2072 	return (0);
2073 }
2074 
2075 
2076 /*
2077  * Lookup Order:
2078  *	sdev_node cache;
2079  *	backing store (SDEV_PERSIST);
2080  *	DBNR: a. dir_ops implemented in the loadable modules;
2081  *	      b. vnode ops in vtab.
2082  */
2083 int
2084 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
2085     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
2086     struct cred *, void *, char *), int flags)
2087 {
2088 	int rv = 0, nmlen;
2089 	struct vnode *rvp = NULL;
2090 	struct sdev_node *dv = NULL;
2091 	int	retried = 0;
2092 	int	error = 0;
2093 	struct devname_nsmap *map = NULL;
2094 	struct devname_ops *dirops = NULL;
2095 	int (*fn)(char *, devname_handle_t *, struct cred *) = NULL;
2096 	struct vattr vattr;
2097 	char *lookup_thread = curproc->p_user.u_comm;
2098 	int failed_flags = 0;
2099 	int (*vtor)(struct sdev_node *) = NULL;
2100 	int state;
2101 	int parent_state;
2102 	char *link = NULL;
2103 
2104 	if (SDEVTOV(ddv)->v_type != VDIR)
2105 		return (ENOTDIR);
2106 
2107 	/*
2108 	 * Empty name or ., return node itself.
2109 	 */
2110 	nmlen = strlen(nm);
2111 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
2112 		*vpp = SDEVTOV(ddv);
2113 		VN_HOLD(*vpp);
2114 		return (0);
2115 	}
2116 
2117 	/*
2118 	 * .., return the parent directory
2119 	 */
2120 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
2121 		*vpp = SDEVTOV(ddv->sdev_dotdot);
2122 		VN_HOLD(*vpp);
2123 		return (0);
2124 	}
2125 
2126 	rw_enter(&ddv->sdev_contents, RW_READER);
2127 	if (ddv->sdev_flags & SDEV_VTOR) {
2128 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2129 		ASSERT(vtor);
2130 	}
2131 
2132 tryagain:
2133 	/*
2134 	 * (a) directory cache lookup:
2135 	 */
2136 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2137 	parent_state = ddv->sdev_state;
2138 	dv = sdev_cache_lookup(ddv, nm);
2139 	if (dv) {
2140 		state = dv->sdev_state;
2141 		switch (state) {
2142 		case SDEV_INIT:
2143 			if (is_devfsadm_thread(lookup_thread))
2144 				break;
2145 
2146 			/* ZOMBIED parent won't allow node creation */
2147 			if (parent_state == SDEV_ZOMBIE) {
2148 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
2149 				    retried);
2150 				goto nolock_notfound;
2151 			}
2152 
2153 			mutex_enter(&dv->sdev_lookup_lock);
2154 			/* compensate the threads started after devfsadm */
2155 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2156 			    !(SDEV_IS_LOOKUP(dv)))
2157 				SDEV_BLOCK_OTHERS(dv,
2158 				    (SDEV_LOOKUP | SDEV_LGWAITING));
2159 
2160 			if (SDEV_IS_LOOKUP(dv)) {
2161 				failed_flags |= SLF_REBUILT;
2162 				rw_exit(&ddv->sdev_contents);
2163 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
2164 				mutex_exit(&dv->sdev_lookup_lock);
2165 				rw_enter(&ddv->sdev_contents, RW_READER);
2166 
2167 				if (error != 0) {
2168 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2169 					    retried);
2170 					goto nolock_notfound;
2171 				}
2172 
2173 				state = dv->sdev_state;
2174 				if (state == SDEV_INIT) {
2175 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2176 					    retried);
2177 					goto nolock_notfound;
2178 				} else if (state == SDEV_READY) {
2179 					goto found;
2180 				} else if (state == SDEV_ZOMBIE) {
2181 					rw_exit(&ddv->sdev_contents);
2182 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2183 					    retried);
2184 					SDEV_RELE(dv);
2185 					goto lookup_failed;
2186 				}
2187 			} else {
2188 				mutex_exit(&dv->sdev_lookup_lock);
2189 			}
2190 			break;
2191 		case SDEV_READY:
2192 			goto found;
2193 		case SDEV_ZOMBIE:
2194 			rw_exit(&ddv->sdev_contents);
2195 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2196 			SDEV_RELE(dv);
2197 			goto lookup_failed;
2198 		default:
2199 			rw_exit(&ddv->sdev_contents);
2200 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2201 			sdev_lookup_failed(ddv, nm, failed_flags);
2202 			*vpp = NULLVP;
2203 			return (ENOENT);
2204 		}
2205 	}
2206 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2207 
2208 	/*
2209 	 * ZOMBIED parent does not allow new node creation.
2210 	 * bail out early
2211 	 */
2212 	if (parent_state == SDEV_ZOMBIE) {
2213 		rw_exit(&ddv->sdev_contents);
2214 		*vpp = NULL;
2215 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2216 		return (ENOENT);
2217 	}
2218 
2219 	/*
2220 	 * (b0): backing store lookup
2221 	 *	SDEV_PERSIST is default except:
2222 	 *		1) pts nodes
2223 	 *		2) non-chmod'ed local nodes
2224 	 */
2225 	if (SDEV_IS_PERSIST(ddv)) {
2226 		error = devname_backstore_lookup(ddv, nm, &rvp);
2227 
2228 		if (!error) {
2229 			sdcmn_err3(("devname_backstore_lookup: "
2230 			    "found attrvp %p for %s\n", (void *)rvp, nm));
2231 
2232 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
2233 			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2234 			if (error) {
2235 				rw_exit(&ddv->sdev_contents);
2236 				if (dv)
2237 					SDEV_RELE(dv);
2238 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2239 				sdev_lookup_failed(ddv, nm, failed_flags);
2240 				*vpp = NULLVP;
2241 				return (ENOENT);
2242 			}
2243 
2244 			if (vattr.va_type == VLNK) {
2245 				error = sdev_getlink(rvp, &link);
2246 				if (error) {
2247 					rw_exit(&ddv->sdev_contents);
2248 					if (dv)
2249 						SDEV_RELE(dv);
2250 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2251 					    retried);
2252 					sdev_lookup_failed(ddv, nm,
2253 					    failed_flags);
2254 					*vpp = NULLVP;
2255 					return (ENOENT);
2256 				}
2257 				ASSERT(link != NULL);
2258 			}
2259 
2260 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
2261 				rw_exit(&ddv->sdev_contents);
2262 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2263 			}
2264 			error = sdev_mknode(ddv, nm, &dv, &vattr,
2265 			    rvp, link, cred, SDEV_READY);
2266 			rw_downgrade(&ddv->sdev_contents);
2267 
2268 			if (link != NULL) {
2269 				kmem_free(link, strlen(link) + 1);
2270 				link = NULL;
2271 			}
2272 
2273 			if (error) {
2274 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2275 				rw_exit(&ddv->sdev_contents);
2276 				if (dv)
2277 					SDEV_RELE(dv);
2278 				goto lookup_failed;
2279 			} else {
2280 				goto found;
2281 			}
2282 		} else if (retried) {
2283 			rw_exit(&ddv->sdev_contents);
2284 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2285 			    ddv->sdev_name, nm));
2286 			if (dv)
2287 				SDEV_RELE(dv);
2288 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2289 			sdev_lookup_failed(ddv, nm, failed_flags);
2290 			*vpp = NULLVP;
2291 			return (ENOENT);
2292 		}
2293 	}
2294 
2295 
2296 	/* first thread that is doing the lookup on this node */
2297 	if (!dv) {
2298 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2299 			rw_exit(&ddv->sdev_contents);
2300 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2301 		}
2302 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2303 		    cred, SDEV_INIT);
2304 		if (!dv) {
2305 			rw_exit(&ddv->sdev_contents);
2306 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2307 			sdev_lookup_failed(ddv, nm, failed_flags);
2308 			*vpp = NULLVP;
2309 			return (ENOENT);
2310 		}
2311 		rw_downgrade(&ddv->sdev_contents);
2312 	}
2313 	ASSERT(dv);
2314 	ASSERT(SDEV_HELD(dv));
2315 
2316 	if (SDEV_IS_NO_NCACHE(dv)) {
2317 		failed_flags |= SLF_NO_NCACHE;
2318 	}
2319 
2320 	if (SDEV_IS_GLOBAL(ddv)) {
2321 		map = sdev_get_map(ddv, 1);
2322 		dirops = map ? map->dir_ops : NULL;
2323 		fn = dirops ? dirops->devnops_lookup : NULL;
2324 	}
2325 
2326 	/*
2327 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2328 	 */
2329 	if ((fn == NULL) && !callback) {
2330 
2331 		if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2332 		    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2333 		    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2334 			ASSERT(SDEV_HELD(dv));
2335 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2336 			goto nolock_notfound;
2337 		}
2338 
2339 		/*
2340 		 * filter out known non-existent devices recorded
2341 		 * during initial reconfiguration boot for which
2342 		 * reconfig should not be done and lookup may
2343 		 * be short-circuited now.
2344 		 */
2345 		if (sdev_lookup_filter(ddv, nm)) {
2346 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2347 			goto nolock_notfound;
2348 		}
2349 
2350 		/* bypassing devfsadm internal nodes */
2351 		if (is_devfsadm_thread(lookup_thread)) {
2352 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2353 			goto nolock_notfound;
2354 		}
2355 
2356 		if (sdev_reconfig_disable) {
2357 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2358 			goto nolock_notfound;
2359 		}
2360 
2361 		error = sdev_call_devfsadmd(ddv, dv, nm);
2362 		if (error == 0) {
2363 			sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2364 			    ddv->sdev_name, nm, curproc->p_user.u_comm));
2365 			if (sdev_reconfig_verbose) {
2366 				cmn_err(CE_CONT,
2367 				    "?lookup of %s/%s by %s: reconfig\n",
2368 				    ddv->sdev_name, nm, curproc->p_user.u_comm);
2369 			}
2370 			retried = 1;
2371 			failed_flags |= SLF_REBUILT;
2372 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2373 			SDEV_SIMPLE_RELE(dv);
2374 			goto tryagain;
2375 		} else {
2376 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2377 			goto nolock_notfound;
2378 		}
2379 	}
2380 
2381 	/*
2382 	 * (b2) Directory Based Name Resolution (DBNR):
2383 	 *	ddv	- parent
2384 	 *	nm	- /dev/(ddv->sdev_name)/nm
2385 	 *
2386 	 *	note: module vnode ops take precedence than the build-in ones
2387 	 */
2388 	if (fn) {
2389 		error = sdev_call_modulelookup(ddv, &dv, nm, fn, cred);
2390 		if (error) {
2391 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2392 			goto notfound;
2393 		} else {
2394 			goto found;
2395 		}
2396 	} else if (callback) {
2397 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2398 		    flags, cred);
2399 		if (error == 0) {
2400 			goto found;
2401 		} else {
2402 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2403 			goto notfound;
2404 		}
2405 	}
2406 	ASSERT(rvp);
2407 
2408 found:
2409 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2410 	ASSERT(dv->sdev_state == SDEV_READY);
2411 	if (vtor) {
2412 		/*
2413 		 * Check validity of returned node
2414 		 */
2415 		switch (vtor(dv)) {
2416 		case SDEV_VTOR_VALID:
2417 			break;
2418 		case SDEV_VTOR_INVALID:
2419 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2420 			sdcmn_err7(("lookup: destroy invalid "
2421 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2422 			goto nolock_notfound;
2423 		case SDEV_VTOR_SKIP:
2424 			sdcmn_err7(("lookup: node not applicable - "
2425 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2426 			rw_exit(&ddv->sdev_contents);
2427 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2428 			SDEV_RELE(dv);
2429 			goto lookup_failed;
2430 		default:
2431 			cmn_err(CE_PANIC,
2432 			    "dev fs: validator failed: %s(%p)\n",
2433 			    dv->sdev_name, (void *)dv);
2434 			break;
2435 			/*NOTREACHED*/
2436 		}
2437 	}
2438 
2439 	if ((SDEVTOV(dv)->v_type == VDIR) && SDEV_IS_GLOBAL(dv)) {
2440 		rw_enter(&dv->sdev_contents, RW_READER);
2441 		(void) sdev_get_map(dv, 1);
2442 		rw_exit(&dv->sdev_contents);
2443 	}
2444 	rw_exit(&ddv->sdev_contents);
2445 	rv = sdev_to_vp(dv, vpp);
2446 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2447 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2448 	    dv->sdev_state, nm, rv));
2449 	return (rv);
2450 
2451 notfound:
2452 	mutex_enter(&dv->sdev_lookup_lock);
2453 	SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2454 	mutex_exit(&dv->sdev_lookup_lock);
2455 nolock_notfound:
2456 	/*
2457 	 * Destroy the node that is created for synchronization purposes.
2458 	 */
2459 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2460 	    nm, dv->sdev_state));
2461 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2462 	if (dv->sdev_state == SDEV_INIT) {
2463 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2464 			rw_exit(&ddv->sdev_contents);
2465 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2466 		}
2467 
2468 		/*
2469 		 * Node state may have changed during the lock
2470 		 * changes. Re-check.
2471 		 */
2472 		if (dv->sdev_state == SDEV_INIT) {
2473 			(void) sdev_dirdelete(ddv, dv);
2474 			rw_exit(&ddv->sdev_contents);
2475 			sdev_lookup_failed(ddv, nm, failed_flags);
2476 			*vpp = NULL;
2477 			return (ENOENT);
2478 		}
2479 	}
2480 
2481 	rw_exit(&ddv->sdev_contents);
2482 	SDEV_RELE(dv);
2483 
2484 lookup_failed:
2485 	sdev_lookup_failed(ddv, nm, failed_flags);
2486 	*vpp = NULL;
2487 	return (ENOENT);
2488 }
2489 
2490 /*
2491  * Given a directory node, mark all nodes beneath as
2492  * STALE, i.e. nodes that don't exist as far as new
2493  * consumers are concerned
2494  */
2495 void
2496 sdev_stale(struct sdev_node *ddv)
2497 {
2498 	struct sdev_node *dv;
2499 	struct vnode *vp;
2500 
2501 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2502 
2503 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2504 	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next) {
2505 		vp = SDEVTOV(dv);
2506 		if (vp->v_type == VDIR)
2507 			sdev_stale(dv);
2508 
2509 		sdcmn_err9(("sdev_stale: setting stale %s\n",
2510 		    dv->sdev_name));
2511 		dv->sdev_flags |= SDEV_STALE;
2512 	}
2513 	ddv->sdev_flags |= SDEV_BUILD;
2514 	rw_exit(&ddv->sdev_contents);
2515 }
2516 
2517 /*
2518  * Given a directory node, clean out all the nodes beneath.
2519  * If expr is specified, clean node with names matching expr.
2520  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2521  *	so they are excluded from future lookups.
2522  */
2523 int
2524 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2525 {
2526 	int error = 0;
2527 	int busy = 0;
2528 	struct vnode *vp;
2529 	struct sdev_node *dv, *next = NULL;
2530 	int bkstore = 0;
2531 	int len = 0;
2532 	char *bks_name = NULL;
2533 
2534 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2535 
2536 	/*
2537 	 * We try our best to destroy all unused sdev_node's
2538 	 */
2539 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2540 	for (dv = ddv->sdev_dot; dv; dv = next) {
2541 		next = dv->sdev_next;
2542 		vp = SDEVTOV(dv);
2543 
2544 		if (expr && gmatch(dv->sdev_name, expr) == 0)
2545 			continue;
2546 
2547 		if (vp->v_type == VDIR &&
2548 		    sdev_cleandir(dv, NULL, flags) != 0) {
2549 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2550 			    dv->sdev_name));
2551 			busy++;
2552 			continue;
2553 		}
2554 
2555 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2556 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2557 			    dv->sdev_name));
2558 			busy++;
2559 			continue;
2560 		}
2561 
2562 		/*
2563 		 * at this point, either dv is not held or SDEV_ENFORCE
2564 		 * is specified. In either case, dv needs to be deleted
2565 		 */
2566 		SDEV_HOLD(dv);
2567 
2568 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2569 		if (bkstore && (vp->v_type == VDIR))
2570 			bkstore += 1;
2571 
2572 		if (bkstore) {
2573 			len = strlen(dv->sdev_name) + 1;
2574 			bks_name = kmem_alloc(len, KM_SLEEP);
2575 			bcopy(dv->sdev_name, bks_name, len);
2576 		}
2577 
2578 		error = sdev_dirdelete(ddv, dv);
2579 
2580 		if (error == EBUSY) {
2581 			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2582 			busy++;
2583 		}
2584 
2585 		/* take care the backing store clean up */
2586 		if (bkstore && (error == 0)) {
2587 			ASSERT(bks_name);
2588 			ASSERT(ddv->sdev_attrvp);
2589 
2590 			if (bkstore == 1) {
2591 				error = VOP_REMOVE(ddv->sdev_attrvp,
2592 				    bks_name, kcred, NULL, 0);
2593 			} else if (bkstore == 2) {
2594 				error = VOP_RMDIR(ddv->sdev_attrvp,
2595 				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2596 			}
2597 
2598 			/* do not propagate the backing store errors */
2599 			if (error) {
2600 				sdcmn_err9(("sdev_cleandir: backing store"
2601 				    "not cleaned\n"));
2602 				error = 0;
2603 			}
2604 
2605 			bkstore = 0;
2606 			kmem_free(bks_name, len);
2607 			bks_name = NULL;
2608 			len = 0;
2609 		}
2610 	}
2611 
2612 	ddv->sdev_flags |= SDEV_BUILD;
2613 	rw_exit(&ddv->sdev_contents);
2614 
2615 	if (busy) {
2616 		error = EBUSY;
2617 	}
2618 
2619 	return (error);
2620 }
2621 
2622 /*
2623  * a convenient wrapper for readdir() funcs
2624  */
2625 size_t
2626 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2627 {
2628 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2629 	if (reclen > size)
2630 		return (0);
2631 
2632 	de->d_ino = (ino64_t)ino;
2633 	de->d_off = (off64_t)off + 1;
2634 	de->d_reclen = (ushort_t)reclen;
2635 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2636 	return (reclen);
2637 }
2638 
2639 /*
2640  * sdev_mount service routines
2641  */
2642 int
2643 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2644 {
2645 	int	error;
2646 
2647 	if (uap->datalen != sizeof (*args))
2648 		return (EINVAL);
2649 
2650 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2651 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2652 		    "get user data. error %d\n", error);
2653 		return (EFAULT);
2654 	}
2655 
2656 	return (0);
2657 }
2658 
2659 #ifdef nextdp
2660 #undef nextdp
2661 #endif
2662 #define	nextdp(dp)	((struct dirent64 *) \
2663 			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2664 
2665 /*
2666  * readdir helper func
2667  */
2668 int
2669 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2670     int flags)
2671 {
2672 	struct sdev_node *ddv = VTOSDEV(vp);
2673 	struct sdev_node *dv;
2674 	dirent64_t	*dp;
2675 	ulong_t		outcount = 0;
2676 	size_t		namelen;
2677 	ulong_t		alloc_count;
2678 	void		*outbuf;
2679 	struct iovec	*iovp;
2680 	int		error = 0;
2681 	size_t		reclen;
2682 	offset_t	diroff;
2683 	offset_t	soff;
2684 	int		this_reclen;
2685 	struct devname_nsmap	*map = NULL;
2686 	struct devname_ops	*dirops = NULL;
2687 	int (*fn)(devname_handle_t *, struct cred *) = NULL;
2688 	int (*vtor)(struct sdev_node *) = NULL;
2689 	struct vattr attr;
2690 	timestruc_t now;
2691 
2692 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2693 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2694 
2695 	if (uiop->uio_loffset >= MAXOFF_T) {
2696 		if (eofp)
2697 			*eofp = 1;
2698 		return (0);
2699 	}
2700 
2701 	if (uiop->uio_iovcnt != 1)
2702 		return (EINVAL);
2703 
2704 	if (vp->v_type != VDIR)
2705 		return (ENOTDIR);
2706 
2707 	if (ddv->sdev_flags & SDEV_VTOR) {
2708 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2709 		ASSERT(vtor);
2710 	}
2711 
2712 	if (eofp != NULL)
2713 		*eofp = 0;
2714 
2715 	soff = uiop->uio_loffset;
2716 	iovp = uiop->uio_iov;
2717 	alloc_count = iovp->iov_len;
2718 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2719 	outcount = 0;
2720 
2721 	if (ddv->sdev_state == SDEV_ZOMBIE)
2722 		goto get_cache;
2723 
2724 	if (SDEV_IS_GLOBAL(ddv)) {
2725 		map = sdev_get_map(ddv, 0);
2726 		dirops = map ? map->dir_ops : NULL;
2727 		fn = dirops ? dirops->devnops_readdir : NULL;
2728 
2729 		if (map && map->dir_map) {
2730 			/*
2731 			 * load the name mapping rule database
2732 			 * through invoking devfsadm and symlink
2733 			 * all the entries in the map
2734 			 */
2735 			devname_rdr_result_t rdr_result;
2736 			int do_thread = 0;
2737 
2738 			rw_enter(&map->dir_lock, RW_READER);
2739 			do_thread = map->dir_maploaded ? 0 : 1;
2740 			rw_exit(&map->dir_lock);
2741 
2742 			if (do_thread) {
2743 				mutex_enter(&ddv->sdev_lookup_lock);
2744 				SDEV_BLOCK_OTHERS(ddv, SDEV_READDIR);
2745 				mutex_exit(&ddv->sdev_lookup_lock);
2746 
2747 				sdev_dispatch_to_nsrdr_thread(ddv,
2748 				    map->dir_map, &rdr_result);
2749 			}
2750 		} else if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2751 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2752 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2753 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2754 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2755 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2756 		    !sdev_reconfig_disable) {
2757 			/*
2758 			 * invoking "devfsadm" to do system device reconfig
2759 			 */
2760 			mutex_enter(&ddv->sdev_lookup_lock);
2761 			SDEV_BLOCK_OTHERS(ddv,
2762 			    (SDEV_READDIR|SDEV_LGWAITING));
2763 			mutex_exit(&ddv->sdev_lookup_lock);
2764 
2765 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2766 			    ddv->sdev_path, curproc->p_user.u_comm));
2767 			if (sdev_reconfig_verbose) {
2768 				cmn_err(CE_CONT,
2769 				    "?readdir of %s by %s: reconfig\n",
2770 				    ddv->sdev_path, curproc->p_user.u_comm);
2771 			}
2772 
2773 			sdev_devfsadmd_thread(ddv, NULL, kcred);
2774 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2775 			/*
2776 			 * compensate the "ls" started later than "devfsadm"
2777 			 */
2778 			mutex_enter(&ddv->sdev_lookup_lock);
2779 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2780 			mutex_exit(&ddv->sdev_lookup_lock);
2781 		}
2782 
2783 		/*
2784 		 * release the contents lock so that
2785 		 * the cache may be updated by devfsadmd
2786 		 */
2787 		rw_exit(&ddv->sdev_contents);
2788 		mutex_enter(&ddv->sdev_lookup_lock);
2789 		if (SDEV_IS_READDIR(ddv))
2790 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2791 		mutex_exit(&ddv->sdev_lookup_lock);
2792 		rw_enter(&ddv->sdev_contents, RW_READER);
2793 
2794 		sdcmn_err4(("readdir of directory %s by %s\n",
2795 		    ddv->sdev_name, curproc->p_user.u_comm));
2796 		if (ddv->sdev_flags & SDEV_BUILD) {
2797 			if (SDEV_IS_PERSIST(ddv)) {
2798 				error = sdev_filldir_from_store(ddv,
2799 				    alloc_count, cred);
2800 			}
2801 			ddv->sdev_flags &= ~SDEV_BUILD;
2802 		}
2803 	}
2804 
2805 get_cache:
2806 	/* handle "." and ".." */
2807 	diroff = 0;
2808 	if (soff == 0) {
2809 		/* first time */
2810 		this_reclen = DIRENT64_RECLEN(1);
2811 		if (alloc_count < this_reclen) {
2812 			error = EINVAL;
2813 			goto done;
2814 		}
2815 
2816 		dp->d_ino = (ino64_t)ddv->sdev_ino;
2817 		dp->d_off = (off64_t)1;
2818 		dp->d_reclen = (ushort_t)this_reclen;
2819 
2820 		(void) strncpy(dp->d_name, ".",
2821 		    DIRENT64_NAMELEN(this_reclen));
2822 		outcount += dp->d_reclen;
2823 		dp = nextdp(dp);
2824 	}
2825 
2826 	diroff++;
2827 	if (soff <= 1) {
2828 		this_reclen = DIRENT64_RECLEN(2);
2829 		if (alloc_count < outcount + this_reclen) {
2830 			error = EINVAL;
2831 			goto done;
2832 		}
2833 
2834 		dp->d_reclen = (ushort_t)this_reclen;
2835 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2836 		dp->d_off = (off64_t)2;
2837 
2838 		(void) strncpy(dp->d_name, "..",
2839 		    DIRENT64_NAMELEN(this_reclen));
2840 		outcount += dp->d_reclen;
2841 
2842 		dp = nextdp(dp);
2843 	}
2844 
2845 
2846 	/* gets the cache */
2847 	diroff++;
2848 	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next, diroff++) {
2849 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2850 		    diroff, soff, dv->sdev_name));
2851 
2852 		/* bypassing pre-matured nodes */
2853 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2854 			sdcmn_err3(("sdev_readdir: pre-mature node  "
2855 			    "%s\n", dv->sdev_name));
2856 			continue;
2857 		}
2858 
2859 		/* don't list stale nodes */
2860 		if (dv->sdev_flags & SDEV_STALE) {
2861 			sdcmn_err4(("sdev_readdir: STALE node  "
2862 			    "%s\n", dv->sdev_name));
2863 			continue;
2864 		}
2865 
2866 		/*
2867 		 * Check validity of node
2868 		 */
2869 		if (vtor) {
2870 			switch (vtor(dv)) {
2871 			case SDEV_VTOR_VALID:
2872 				break;
2873 			case SDEV_VTOR_INVALID:
2874 			case SDEV_VTOR_SKIP:
2875 				continue;
2876 			default:
2877 				cmn_err(CE_PANIC,
2878 				    "dev fs: validator failed: %s(%p)\n",
2879 				    dv->sdev_name, (void *)dv);
2880 				break;
2881 			/*NOTREACHED*/
2882 			}
2883 		}
2884 
2885 		/*
2886 		 * call back into the module for the validity/bookkeeping
2887 		 * of this entry
2888 		 */
2889 		if (fn) {
2890 			error = (*fn)(&(dv->sdev_handle), cred);
2891 			if (error) {
2892 				sdcmn_err4(("sdev_readdir: module did not "
2893 				    "validate %s\n", dv->sdev_name));
2894 				continue;
2895 			}
2896 		}
2897 
2898 		namelen = strlen(dv->sdev_name);
2899 		reclen = DIRENT64_RECLEN(namelen);
2900 		if (outcount + reclen > alloc_count) {
2901 			goto full;
2902 		}
2903 		dp->d_reclen = (ushort_t)reclen;
2904 		dp->d_ino = (ino64_t)dv->sdev_ino;
2905 		dp->d_off = (off64_t)diroff + 1;
2906 		(void) strncpy(dp->d_name, dv->sdev_name,
2907 		    DIRENT64_NAMELEN(reclen));
2908 		outcount += reclen;
2909 		dp = nextdp(dp);
2910 	}
2911 
2912 full:
2913 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2914 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2915 	    (void *)dv));
2916 
2917 	if (outcount)
2918 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2919 
2920 	if (!error) {
2921 		uiop->uio_loffset = diroff;
2922 		if (eofp)
2923 			*eofp = dv ? 0 : 1;
2924 	}
2925 
2926 
2927 	if (ddv->sdev_attrvp) {
2928 		gethrestime(&now);
2929 		attr.va_ctime = now;
2930 		attr.va_atime = now;
2931 		attr.va_mask = AT_CTIME|AT_ATIME;
2932 
2933 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2934 	}
2935 done:
2936 	kmem_free(outbuf, alloc_count);
2937 	return (error);
2938 }
2939 
2940 
2941 static int
2942 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2943 {
2944 	vnode_t *vp;
2945 	vnode_t *cvp;
2946 	struct sdev_node *svp;
2947 	char *nm;
2948 	struct pathname pn;
2949 	int error;
2950 	int persisted = 0;
2951 
2952 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2953 		return (error);
2954 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2955 
2956 	vp = rootdir;
2957 	VN_HOLD(vp);
2958 
2959 	while (pn_pathleft(&pn)) {
2960 		ASSERT(vp->v_type == VDIR);
2961 		(void) pn_getcomponent(&pn, nm);
2962 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2963 		    NULL, NULL);
2964 		VN_RELE(vp);
2965 
2966 		if (error)
2967 			break;
2968 
2969 		/* traverse mount points encountered on our journey */
2970 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2971 			VN_RELE(cvp);
2972 			break;
2973 		}
2974 
2975 		/*
2976 		 * Direct the operation to the persisting filesystem
2977 		 * underlying /dev.  Bail if we encounter a
2978 		 * non-persistent dev entity here.
2979 		 */
2980 		if (cvp->v_vfsp->vfs_fstype == devtype) {
2981 
2982 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2983 				error = ENOENT;
2984 				VN_RELE(cvp);
2985 				break;
2986 			}
2987 
2988 			if (VTOSDEV(cvp) == NULL) {
2989 				error = ENOENT;
2990 				VN_RELE(cvp);
2991 				break;
2992 			}
2993 			svp = VTOSDEV(cvp);
2994 			if ((vp = svp->sdev_attrvp) == NULL) {
2995 				error = ENOENT;
2996 				VN_RELE(cvp);
2997 				break;
2998 			}
2999 			persisted = 1;
3000 			VN_HOLD(vp);
3001 			VN_RELE(cvp);
3002 			cvp = vp;
3003 		}
3004 
3005 		vp = cvp;
3006 		pn_skipslash(&pn);
3007 	}
3008 
3009 	kmem_free(nm, MAXNAMELEN);
3010 	pn_free(&pn);
3011 
3012 	if (error)
3013 		return (error);
3014 
3015 	/*
3016 	 * Only return persisted nodes in the filesystem underlying /dev.
3017 	 */
3018 	if (!persisted) {
3019 		VN_RELE(vp);
3020 		return (ENOENT);
3021 	}
3022 
3023 	*r_vp = vp;
3024 	return (0);
3025 }
3026 
3027 int
3028 sdev_modctl_readdir(const char *dir, char ***dirlistp,
3029 	int *npathsp, int *npathsp_alloc)
3030 {
3031 	char	**pathlist = NULL;
3032 	char	**newlist = NULL;
3033 	int	npaths = 0;
3034 	int	npaths_alloc = 0;
3035 	dirent64_t *dbuf = NULL;
3036 	int	n;
3037 	char	*s;
3038 	int error;
3039 	vnode_t *vp;
3040 	int eof;
3041 	struct iovec iov;
3042 	struct uio uio;
3043 	struct dirent64 *dp;
3044 	size_t dlen;
3045 	size_t dbuflen;
3046 	int ndirents = 64;
3047 	char *nm;
3048 
3049 	error = sdev_modctl_lookup(dir, &vp);
3050 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
3051 	    dir, curproc->p_user.u_comm,
3052 	    (error == 0) ? "ok" : "failed"));
3053 	if (error)
3054 		return (error);
3055 
3056 	dlen = ndirents * (sizeof (*dbuf));
3057 	dbuf = kmem_alloc(dlen, KM_SLEEP);
3058 
3059 	uio.uio_iov = &iov;
3060 	uio.uio_iovcnt = 1;
3061 	uio.uio_segflg = UIO_SYSSPACE;
3062 	uio.uio_fmode = 0;
3063 	uio.uio_extflg = UIO_COPY_CACHED;
3064 	uio.uio_loffset = 0;
3065 	uio.uio_llimit = MAXOFFSET_T;
3066 
3067 	eof = 0;
3068 	error = 0;
3069 	while (!error && !eof) {
3070 		uio.uio_resid = dlen;
3071 		iov.iov_base = (char *)dbuf;
3072 		iov.iov_len = dlen;
3073 
3074 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3075 		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
3076 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3077 
3078 		dbuflen = dlen - uio.uio_resid;
3079 
3080 		if (error || dbuflen == 0)
3081 			break;
3082 
3083 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
3084 			dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
3085 
3086 			nm = dp->d_name;
3087 
3088 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
3089 				continue;
3090 
3091 			if (npaths == npaths_alloc) {
3092 				npaths_alloc += 64;
3093 				newlist = (char **)
3094 				    kmem_zalloc((npaths_alloc + 1) *
3095 					sizeof (char *), KM_SLEEP);
3096 				if (pathlist) {
3097 					bcopy(pathlist, newlist,
3098 					    npaths * sizeof (char *));
3099 					kmem_free(pathlist,
3100 					    (npaths + 1) * sizeof (char *));
3101 				}
3102 				pathlist = newlist;
3103 			}
3104 			n = strlen(nm) + 1;
3105 			s = kmem_alloc(n, KM_SLEEP);
3106 			bcopy(nm, s, n);
3107 			pathlist[npaths++] = s;
3108 			sdcmn_err11(("  %s/%s\n", dir, s));
3109 		}
3110 	}
3111 
3112 exit:
3113 	VN_RELE(vp);
3114 
3115 	if (dbuf)
3116 		kmem_free(dbuf, dlen);
3117 
3118 	if (error)
3119 		return (error);
3120 
3121 	*dirlistp = pathlist;
3122 	*npathsp = npaths;
3123 	*npathsp_alloc = npaths_alloc;
3124 
3125 	return (0);
3126 }
3127 
3128 void
3129 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
3130 {
3131 	int	i, n;
3132 
3133 	for (i = 0; i < npaths; i++) {
3134 		n = strlen(pathlist[i]) + 1;
3135 		kmem_free(pathlist[i], n);
3136 	}
3137 
3138 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
3139 }
3140 
3141 int
3142 sdev_modctl_devexists(const char *path)
3143 {
3144 	vnode_t *vp;
3145 	int error;
3146 
3147 	error = sdev_modctl_lookup(path, &vp);
3148 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
3149 	    path, curproc->p_user.u_comm,
3150 	    (error == 0) ? "ok" : "failed"));
3151 	if (error == 0)
3152 		VN_RELE(vp);
3153 
3154 	return (error);
3155 }
3156 
3157 void
3158 sdev_update_newnsmap(struct devname_nsmap *map, char *module, char *mapname)
3159 {
3160 	rw_enter(&map->dir_lock, RW_WRITER);
3161 	if (module) {
3162 		ASSERT(map->dir_newmodule == NULL);
3163 		map->dir_newmodule = i_ddi_strdup(module, KM_SLEEP);
3164 	}
3165 	if (mapname) {
3166 		ASSERT(map->dir_newmap == NULL);
3167 		map->dir_newmap = i_ddi_strdup(mapname, KM_SLEEP);
3168 	}
3169 
3170 	map->dir_invalid = 1;
3171 	rw_exit(&map->dir_lock);
3172 }
3173 
3174 void
3175 sdev_replace_nsmap(struct devname_nsmap *map, char *module, char *mapname)
3176 {
3177 	char *old_module = NULL;
3178 	char *old_map = NULL;
3179 
3180 	ASSERT(RW_LOCK_HELD(&map->dir_lock));
3181 	if (!rw_tryupgrade(&map->dir_lock)) {
3182 		rw_exit(&map->dir_lock);
3183 		rw_enter(&map->dir_lock, RW_WRITER);
3184 	}
3185 
3186 	old_module = map->dir_module;
3187 	if (module) {
3188 		if (old_module && strcmp(old_module, module) != 0) {
3189 			kmem_free(old_module, strlen(old_module) + 1);
3190 		}
3191 		map->dir_module = module;
3192 		map->dir_newmodule = NULL;
3193 	}
3194 
3195 	old_map = map->dir_map;
3196 	if (mapname) {
3197 		if (old_map && strcmp(old_map, mapname) != 0) {
3198 			kmem_free(old_map, strlen(old_map) + 1);
3199 		}
3200 
3201 		map->dir_map = mapname;
3202 		map->dir_newmap = NULL;
3203 	}
3204 	map->dir_maploaded = 0;
3205 	map->dir_invalid = 0;
3206 	rw_downgrade(&map->dir_lock);
3207 }
3208 
3209 /*
3210  * dir_name should have at least one attribute,
3211  *	dir_module
3212  *	or dir_map
3213  *	or both
3214  * caller holds the devname_nsmaps_lock
3215  */
3216 void
3217 sdev_insert_nsmap(char *dir_name, char *dir_module, char *dir_map)
3218 {
3219 	struct devname_nsmap *map;
3220 	int len = 0;
3221 
3222 	ASSERT(dir_name);
3223 	ASSERT(dir_module || dir_map);
3224 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3225 
3226 	if (map = sdev_get_nsmap_by_dir(dir_name, 1)) {
3227 		sdev_update_newnsmap(map, dir_module, dir_map);
3228 		return;
3229 	}
3230 
3231 	map = (struct devname_nsmap *)kmem_zalloc(sizeof (*map), KM_SLEEP);
3232 	map->dir_name = i_ddi_strdup(dir_name, KM_SLEEP);
3233 	if (dir_module) {
3234 		map->dir_module = i_ddi_strdup(dir_module, KM_SLEEP);
3235 	}
3236 
3237 	if (dir_map) {
3238 		if (dir_map[0] != '/') {
3239 			len = strlen(ETC_DEV_DIR) + strlen(dir_map) + 2;
3240 			map->dir_map = kmem_zalloc(len, KM_SLEEP);
3241 			(void) snprintf(map->dir_map, len, "%s/%s", ETC_DEV_DIR,
3242 			    dir_map);
3243 		} else {
3244 			map->dir_map = i_ddi_strdup(dir_map, KM_SLEEP);
3245 		}
3246 	}
3247 
3248 	map->dir_ops = NULL;
3249 	map->dir_maploaded = 0;
3250 	map->dir_invalid = 0;
3251 	rw_init(&map->dir_lock, NULL, RW_DEFAULT, NULL);
3252 
3253 	map->next = devname_nsmaps;
3254 	map->prev = NULL;
3255 	if (devname_nsmaps) {
3256 		devname_nsmaps->prev = map;
3257 	}
3258 	devname_nsmaps = map;
3259 }
3260 
3261 struct devname_nsmap *
3262 sdev_get_nsmap_by_dir(char *dir_path, int locked)
3263 {
3264 	struct devname_nsmap *map = NULL;
3265 
3266 	if (!locked)
3267 		mutex_enter(&devname_nsmaps_lock);
3268 	for (map = devname_nsmaps; map; map = map->next) {
3269 		sdcmn_err6(("sdev_get_nsmap_by_dir: dir %s\n", map->dir_name));
3270 		if (strcmp(map->dir_name, dir_path) == 0) {
3271 			if (!locked)
3272 				mutex_exit(&devname_nsmaps_lock);
3273 			return (map);
3274 		}
3275 	}
3276 	if (!locked)
3277 		mutex_exit(&devname_nsmaps_lock);
3278 	return (NULL);
3279 }
3280 
3281 struct devname_nsmap *
3282 sdev_get_nsmap_by_module(char *mod_name)
3283 {
3284 	struct devname_nsmap *map = NULL;
3285 
3286 	mutex_enter(&devname_nsmaps_lock);
3287 	for (map = devname_nsmaps; map; map = map->next) {
3288 		sdcmn_err7(("sdev_get_nsmap_by_module: module %s\n",
3289 		    map->dir_module));
3290 		if (map->dir_module && strcmp(map->dir_module, mod_name) == 0) {
3291 			mutex_exit(&devname_nsmaps_lock);
3292 			return (map);
3293 		}
3294 	}
3295 	mutex_exit(&devname_nsmaps_lock);
3296 	return (NULL);
3297 }
3298 
3299 void
3300 sdev_invalidate_nsmaps()
3301 {
3302 	struct devname_nsmap *map = NULL;
3303 
3304 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3305 
3306 	if (devname_nsmaps == NULL)
3307 		return;
3308 
3309 	for (map = devname_nsmaps; map; map = map->next) {
3310 		rw_enter(&map->dir_lock, RW_WRITER);
3311 		map->dir_invalid = 1;
3312 		rw_exit(&map->dir_lock);
3313 	}
3314 	devname_nsmaps_invalidated = 1;
3315 }
3316 
3317 
3318 int
3319 sdev_nsmaps_loaded()
3320 {
3321 	int ret = 0;
3322 
3323 	mutex_enter(&devname_nsmaps_lock);
3324 	if (devname_nsmaps_loaded)
3325 		ret = 1;
3326 
3327 	mutex_exit(&devname_nsmaps_lock);
3328 	return (ret);
3329 }
3330 
3331 int
3332 sdev_nsmaps_reloaded()
3333 {
3334 	int ret = 0;
3335 
3336 	mutex_enter(&devname_nsmaps_lock);
3337 	if (devname_nsmaps_invalidated)
3338 		ret = 1;
3339 
3340 	mutex_exit(&devname_nsmaps_lock);
3341 	return (ret);
3342 }
3343 
3344 static void
3345 sdev_free_nsmap(struct devname_nsmap *map)
3346 {
3347 	ASSERT(map);
3348 	if (map->dir_name)
3349 		kmem_free(map->dir_name, strlen(map->dir_name) + 1);
3350 	if (map->dir_module)
3351 		kmem_free(map->dir_module, strlen(map->dir_module) + 1);
3352 	if (map->dir_map)
3353 		kmem_free(map->dir_map, strlen(map->dir_map) + 1);
3354 	rw_destroy(&map->dir_lock);
3355 	kmem_free(map, sizeof (*map));
3356 }
3357 
3358 void
3359 sdev_validate_nsmaps()
3360 {
3361 	struct devname_nsmap *map = NULL;
3362 	struct devname_nsmap *oldmap = NULL;
3363 
3364 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3365 	map = devname_nsmaps;
3366 	while (map) {
3367 		rw_enter(&map->dir_lock, RW_READER);
3368 		if ((map->dir_invalid == 1) && (map->dir_newmodule == NULL) &&
3369 		    (map->dir_newmap == NULL)) {
3370 			oldmap = map;
3371 			rw_exit(&map->dir_lock);
3372 			if (map->prev)
3373 				map->prev->next = oldmap->next;
3374 			if (map == devname_nsmaps)
3375 				devname_nsmaps = oldmap->next;
3376 
3377 			map = oldmap->next;
3378 			if (map)
3379 				map->prev = oldmap->prev;
3380 			sdev_free_nsmap(oldmap);
3381 			oldmap = NULL;
3382 		} else {
3383 			rw_exit(&map->dir_lock);
3384 			map = map->next;
3385 		}
3386 	}
3387 	devname_nsmaps_invalidated = 0;
3388 }
3389 
3390 static int
3391 sdev_map_is_invalid(struct devname_nsmap *map)
3392 {
3393 	int ret = 0;
3394 
3395 	ASSERT(map);
3396 	rw_enter(&map->dir_lock, RW_READER);
3397 	if (map->dir_invalid)
3398 		ret = 1;
3399 	rw_exit(&map->dir_lock);
3400 	return (ret);
3401 }
3402 
3403 static int
3404 sdev_check_map(struct devname_nsmap *map)
3405 {
3406 	struct devname_nsmap *mapp;
3407 
3408 	mutex_enter(&devname_nsmaps_lock);
3409 	if (devname_nsmaps == NULL) {
3410 		mutex_exit(&devname_nsmaps_lock);
3411 		return (1);
3412 	}
3413 
3414 	for (mapp = devname_nsmaps; mapp; mapp = mapp->next) {
3415 		if (mapp == map) {
3416 			mutex_exit(&devname_nsmaps_lock);
3417 			return (0);
3418 		}
3419 	}
3420 
3421 	mutex_exit(&devname_nsmaps_lock);
3422 	return (1);
3423 
3424 }
3425 
3426 struct devname_nsmap *
3427 sdev_get_map(struct sdev_node *dv, int validate)
3428 {
3429 	struct devname_nsmap *map;
3430 	int error;
3431 
3432 	ASSERT(RW_READ_HELD(&dv->sdev_contents));
3433 	map = dv->sdev_mapinfo;
3434 	if (map && sdev_check_map(map)) {
3435 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3436 			rw_exit(&dv->sdev_contents);
3437 			rw_enter(&dv->sdev_contents, RW_WRITER);
3438 		}
3439 		dv->sdev_mapinfo = NULL;
3440 		rw_downgrade(&dv->sdev_contents);
3441 		return (NULL);
3442 	}
3443 
3444 	if (validate && (!map || (map && sdev_map_is_invalid(map)))) {
3445 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3446 			rw_exit(&dv->sdev_contents);
3447 			rw_enter(&dv->sdev_contents, RW_WRITER);
3448 		}
3449 		error = sdev_get_moduleops(dv);
3450 		if (!error)
3451 			map = dv->sdev_mapinfo;
3452 		rw_downgrade(&dv->sdev_contents);
3453 	}
3454 	return (map);
3455 }
3456 
3457 extern int sdev_vnodeops_tbl_size;
3458 
3459 /*
3460  * construct a new template with overrides from vtab
3461  */
3462 static fs_operation_def_t *
3463 sdev_merge_vtab(const fs_operation_def_t tab[])
3464 {
3465 	fs_operation_def_t *new;
3466 	const fs_operation_def_t *tab_entry;
3467 
3468 	/* make a copy of standard vnode ops table */
3469 	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
3470 	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
3471 
3472 	/* replace the overrides from tab */
3473 	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
3474 		fs_operation_def_t *std_entry = new;
3475 		while (std_entry->name) {
3476 			if (strcmp(tab_entry->name, std_entry->name) == 0) {
3477 				std_entry->func = tab_entry->func;
3478 				break;
3479 			}
3480 			std_entry++;
3481 		}
3482 		if (std_entry->name == NULL)
3483 			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
3484 			    tab_entry->name);
3485 	}
3486 
3487 	return (new);
3488 }
3489 
3490 /* free memory allocated by sdev_merge_vtab */
3491 static void
3492 sdev_free_vtab(fs_operation_def_t *new)
3493 {
3494 	kmem_free(new, sdev_vnodeops_tbl_size);
3495 }
3496 
3497 void
3498 devname_get_vnode(devname_handle_t *hdl, vnode_t **vpp)
3499 {
3500 	struct sdev_node *dv = hdl->dh_data;
3501 
3502 	ASSERT(dv);
3503 
3504 	rw_enter(&dv->sdev_contents, RW_READER);
3505 	*vpp = SDEVTOV(dv);
3506 	rw_exit(&dv->sdev_contents);
3507 }
3508 
3509 int
3510 devname_get_path(devname_handle_t *hdl, char **path)
3511 {
3512 	struct sdev_node *dv = hdl->dh_data;
3513 
3514 	ASSERT(dv);
3515 
3516 	rw_enter(&dv->sdev_contents, RW_READER);
3517 	*path = dv->sdev_path;
3518 	rw_exit(&dv->sdev_contents);
3519 	return (0);
3520 }
3521 
3522 int
3523 devname_get_name(devname_handle_t *hdl, char **entry)
3524 {
3525 	struct sdev_node *dv = hdl->dh_data;
3526 
3527 	ASSERT(dv);
3528 	rw_enter(&dv->sdev_contents, RW_READER);
3529 	*entry = dv->sdev_name;
3530 	rw_exit(&dv->sdev_contents);
3531 	return (0);
3532 }
3533 
3534 void
3535 devname_get_dir_vnode(devname_handle_t *hdl, vnode_t **vpp)
3536 {
3537 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3538 
3539 	ASSERT(dv);
3540 
3541 	rw_enter(&dv->sdev_contents, RW_READER);
3542 	*vpp = SDEVTOV(dv);
3543 	rw_exit(&dv->sdev_contents);
3544 }
3545 
3546 int
3547 devname_get_dir_path(devname_handle_t *hdl, char **path)
3548 {
3549 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3550 
3551 	ASSERT(dv);
3552 	rw_enter(&dv->sdev_contents, RW_READER);
3553 	*path = dv->sdev_path;
3554 	rw_exit(&dv->sdev_contents);
3555 	return (0);
3556 }
3557 
3558 int
3559 devname_get_dir_name(devname_handle_t *hdl, char **entry)
3560 {
3561 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3562 
3563 	ASSERT(dv);
3564 	rw_enter(&dv->sdev_contents, RW_READER);
3565 	*entry = dv->sdev_name;
3566 	rw_exit(&dv->sdev_contents);
3567 	return (0);
3568 }
3569 
3570 int
3571 devname_get_dir_nsmap(devname_handle_t *hdl, struct devname_nsmap **map)
3572 {
3573 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3574 
3575 	ASSERT(dv);
3576 	rw_enter(&dv->sdev_contents, RW_READER);
3577 	*map = dv->sdev_mapinfo;
3578 	rw_exit(&dv->sdev_contents);
3579 	return (0);
3580 }
3581 
3582 int
3583 devname_get_dir_handle(devname_handle_t *hdl, devname_handle_t **dir_hdl)
3584 {
3585 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3586 
3587 	ASSERT(dv);
3588 	rw_enter(&dv->sdev_contents, RW_READER);
3589 	*dir_hdl = &(dv->sdev_handle);
3590 	rw_exit(&dv->sdev_contents);
3591 	return (0);
3592 }
3593 
3594 void
3595 devname_set_nodetype(devname_handle_t *hdl, void *args, int spec)
3596 {
3597 	struct sdev_node *dv = hdl->dh_data;
3598 
3599 	ASSERT(dv);
3600 	rw_enter(&dv->sdev_contents, RW_WRITER);
3601 	hdl->dh_spec = (devname_spec_t)spec;
3602 	hdl->dh_args = (void *)i_ddi_strdup((char *)args, KM_SLEEP);
3603 	rw_exit(&dv->sdev_contents);
3604 }
3605 
3606 /*
3607  * a generic setattr() function
3608  *
3609  * note: flags only supports AT_UID and AT_GID.
3610  *	 Future enhancements can be done for other types, e.g. AT_MODE
3611  */
3612 int
3613 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3614     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3615     int), int protocol)
3616 {
3617 	struct sdev_node	*dv = VTOSDEV(vp);
3618 	struct sdev_node	*parent = dv->sdev_dotdot;
3619 	struct vattr		*get;
3620 	uint_t			mask = vap->va_mask;
3621 	int 			error;
3622 
3623 	/* some sanity checks */
3624 	if (vap->va_mask & AT_NOSET)
3625 		return (EINVAL);
3626 
3627 	if (vap->va_mask & AT_SIZE) {
3628 		if (vp->v_type == VDIR) {
3629 			return (EISDIR);
3630 		}
3631 	}
3632 
3633 	/* no need to set attribute, but do not fail either */
3634 	ASSERT(parent);
3635 	rw_enter(&parent->sdev_contents, RW_READER);
3636 	if (dv->sdev_state == SDEV_ZOMBIE) {
3637 		rw_exit(&parent->sdev_contents);
3638 		return (0);
3639 	}
3640 
3641 	/* If backing store exists, just set it. */
3642 	if (dv->sdev_attrvp) {
3643 		rw_exit(&parent->sdev_contents);
3644 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3645 	}
3646 
3647 	/*
3648 	 * Otherwise, for nodes with the persistence attribute, create it.
3649 	 */
3650 	ASSERT(dv->sdev_attr);
3651 	if (SDEV_IS_PERSIST(dv) ||
3652 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3653 		sdev_vattr_merge(dv, vap);
3654 		rw_enter(&dv->sdev_contents, RW_WRITER);
3655 		error = sdev_shadow_node(dv, cred);
3656 		rw_exit(&dv->sdev_contents);
3657 		rw_exit(&parent->sdev_contents);
3658 
3659 		if (error)
3660 			return (error);
3661 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3662 	}
3663 
3664 
3665 	/*
3666 	 * sdev_attr was allocated in sdev_mknode
3667 	 */
3668 	rw_enter(&dv->sdev_contents, RW_WRITER);
3669 	error = secpolicy_vnode_setattr(cred, vp, vap,
3670 	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
3671 	if (error) {
3672 		rw_exit(&dv->sdev_contents);
3673 		rw_exit(&parent->sdev_contents);
3674 		return (error);
3675 	}
3676 
3677 	get = dv->sdev_attr;
3678 	if (mask & AT_MODE) {
3679 		get->va_mode &= S_IFMT;
3680 		get->va_mode |= vap->va_mode & ~S_IFMT;
3681 	}
3682 
3683 	if ((mask & AT_UID) || (mask & AT_GID)) {
3684 		if (mask & AT_UID)
3685 			get->va_uid = vap->va_uid;
3686 		if (mask & AT_GID)
3687 			get->va_gid = vap->va_gid;
3688 		/*
3689 		 * a callback must be provided if the protocol is set
3690 		 */
3691 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3692 			ASSERT(callback);
3693 			error = callback(dv, get, protocol);
3694 			if (error) {
3695 				rw_exit(&dv->sdev_contents);
3696 				rw_exit(&parent->sdev_contents);
3697 				return (error);
3698 			}
3699 		}
3700 	}
3701 
3702 	if (mask & AT_ATIME)
3703 		get->va_atime = vap->va_atime;
3704 	if (mask & AT_MTIME)
3705 		get->va_mtime = vap->va_mtime;
3706 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3707 		gethrestime(&get->va_ctime);
3708 	}
3709 
3710 	sdev_vattr_merge(dv, get);
3711 	rw_exit(&dv->sdev_contents);
3712 	rw_exit(&parent->sdev_contents);
3713 	return (0);
3714 }
3715 
3716 /*
3717  * a generic inactive() function
3718  */
3719 void
3720 devname_inactive_func(struct vnode *vp, struct cred *cred,
3721     void (*callback)(struct vnode *))
3722 {
3723 	int clean;
3724 	struct sdev_node *dv = VTOSDEV(vp);
3725 	struct sdev_node *ddv = dv->sdev_dotdot;
3726 	struct sdev_node *idv;
3727 	struct sdev_node *prev = NULL;
3728 	int state;
3729 	struct devname_nsmap *map = NULL;
3730 	struct devname_ops *dirops = NULL;
3731 	void (*fn)(devname_handle_t *, struct cred *) = NULL;
3732 
3733 	rw_enter(&ddv->sdev_contents, RW_WRITER);
3734 	state = dv->sdev_state;
3735 
3736 	mutex_enter(&vp->v_lock);
3737 	ASSERT(vp->v_count >= 1);
3738 
3739 	if (vp->v_count == 1 && callback != NULL)
3740 		callback(vp);
3741 
3742 	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3743 
3744 	/*
3745 	 * last ref count on the ZOMBIE node is released.
3746 	 * clean up the sdev_node, and
3747 	 * release the hold on the backing store node so that
3748 	 * the ZOMBIE backing stores also cleaned out.
3749 	 */
3750 	if (clean) {
3751 		ASSERT(ddv);
3752 		if (SDEV_IS_GLOBAL(dv)) {
3753 			map = ddv->sdev_mapinfo;
3754 			dirops = map ? map->dir_ops : NULL;
3755 			if (dirops && (fn = dirops->devnops_inactive))
3756 				(*fn)(&(dv->sdev_handle), cred);
3757 		}
3758 
3759 		ddv->sdev_nlink--;
3760 		if (vp->v_type == VDIR) {
3761 			dv->sdev_nlink--;
3762 		}
3763 		for (idv = ddv->sdev_dot; idv && idv != dv;
3764 		    prev = idv, idv = idv->sdev_next)
3765 			;
3766 		ASSERT(idv == dv);
3767 		if (prev == NULL)
3768 			ddv->sdev_dot = dv->sdev_next;
3769 		else
3770 			prev->sdev_next = dv->sdev_next;
3771 		dv->sdev_next = NULL;
3772 		dv->sdev_nlink--;
3773 		--vp->v_count;
3774 		mutex_exit(&vp->v_lock);
3775 		sdev_nodedestroy(dv, 0);
3776 	} else {
3777 		--vp->v_count;
3778 		mutex_exit(&vp->v_lock);
3779 	}
3780 	rw_exit(&ddv->sdev_contents);
3781 }
3782