xref: /titanic_44/usr/src/uts/common/fs/dev/sdev_subr.c (revision b9bc7f7832704fda46b4d6b04f3f7be1227dc644)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * utility routines for the /dev fs
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/t_lock.h>
35 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
37 #include <sys/user.h>
38 #include <sys/time.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/file.h>
42 #include <sys/fcntl.h>
43 #include <sys/flock.h>
44 #include <sys/kmem.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/stat.h>
48 #include <sys/cred.h>
49 #include <sys/dirent.h>
50 #include <sys/pathname.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/mode.h>
54 #include <sys/policy.h>
55 #include <fs/fs_subr.h>
56 #include <sys/mount.h>
57 #include <sys/fs/snode.h>
58 #include <sys/fs/dv_node.h>
59 #include <sys/fs/sdev_impl.h>
60 #include <sys/fs/sdev_node.h>
61 #include <sys/sunndi.h>
62 #include <sys/sunmdi.h>
63 #include <sys/conf.h>
64 #include <sys/proc.h>
65 #include <sys/user.h>
66 #include <sys/modctl.h>
67 
68 #ifdef DEBUG
69 int sdev_debug = 0x00000001;
70 int sdev_debug_cache_flags = 0;
71 #endif
72 
73 /*
74  * globals
75  */
76 /* prototype memory vattrs */
77 vattr_t sdev_vattr_dir = {
78 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
79 	VDIR,					/* va_type */
80 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
81 	SDEV_UID_DEFAULT,			/* va_uid */
82 	SDEV_GID_DEFAULT,			/* va_gid */
83 	0,					/* va_fsid */
84 	0,					/* va_nodeid */
85 	0,					/* va_nlink */
86 	0,					/* va_size */
87 	0,					/* va_atime */
88 	0,					/* va_mtime */
89 	0,					/* va_ctime */
90 	0,					/* va_rdev */
91 	0,					/* va_blksize */
92 	0,					/* va_nblocks */
93 	0					/* va_vcode */
94 };
95 
96 vattr_t sdev_vattr_lnk = {
97 	AT_TYPE|AT_MODE,			/* va_mask */
98 	VLNK,					/* va_type */
99 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
100 	SDEV_UID_DEFAULT,			/* va_uid */
101 	SDEV_GID_DEFAULT,			/* va_gid */
102 	0,					/* va_fsid */
103 	0,					/* va_nodeid */
104 	0,					/* va_nlink */
105 	0,					/* va_size */
106 	0,					/* va_atime */
107 	0,					/* va_mtime */
108 	0,					/* va_ctime */
109 	0,					/* va_rdev */
110 	0,					/* va_blksize */
111 	0,					/* va_nblocks */
112 	0					/* va_vcode */
113 };
114 
115 vattr_t sdev_vattr_blk = {
116 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
117 	VBLK,					/* va_type */
118 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
119 	SDEV_UID_DEFAULT,			/* va_uid */
120 	SDEV_GID_DEFAULT,			/* va_gid */
121 	0,					/* va_fsid */
122 	0,					/* va_nodeid */
123 	0,					/* va_nlink */
124 	0,					/* va_size */
125 	0,					/* va_atime */
126 	0,					/* va_mtime */
127 	0,					/* va_ctime */
128 	0,					/* va_rdev */
129 	0,					/* va_blksize */
130 	0,					/* va_nblocks */
131 	0					/* va_vcode */
132 };
133 
134 vattr_t sdev_vattr_chr = {
135 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
136 	VCHR,					/* va_type */
137 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
138 	SDEV_UID_DEFAULT,			/* va_uid */
139 	SDEV_GID_DEFAULT,			/* va_gid */
140 	0,					/* va_fsid */
141 	0,					/* va_nodeid */
142 	0,					/* va_nlink */
143 	0,					/* va_size */
144 	0,					/* va_atime */
145 	0,					/* va_mtime */
146 	0,					/* va_ctime */
147 	0,					/* va_rdev */
148 	0,					/* va_blksize */
149 	0,					/* va_nblocks */
150 	0					/* va_vcode */
151 };
152 
153 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
154 int		devtype;		/* fstype */
155 
156 struct devname_ops *devname_ns_ops;	/* default name service directory ops */
157 kmutex_t devname_nsmaps_lock;	/* protect devname_nsmaps */
158 
159 /* static */
160 static struct devname_nsmap *devname_nsmaps = NULL;
161 				/* contents from /etc/dev/devname_master */
162 static int devname_nsmaps_invalidated = 0; /* "devfsadm -m" has run */
163 
164 static struct vnodeops *sdev_get_vop(struct sdev_node *);
165 static void sdev_set_no_nocache(struct sdev_node *);
166 static int sdev_get_moduleops(struct sdev_node *);
167 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
168 static void sdev_free_vtab(fs_operation_def_t *);
169 
170 static void
171 sdev_prof_free(struct sdev_node *dv)
172 {
173 	ASSERT(!SDEV_IS_GLOBAL(dv));
174 	if (dv->sdev_prof.dev_name)
175 		nvlist_free(dv->sdev_prof.dev_name);
176 	if (dv->sdev_prof.dev_map)
177 		nvlist_free(dv->sdev_prof.dev_map);
178 	if (dv->sdev_prof.dev_symlink)
179 		nvlist_free(dv->sdev_prof.dev_symlink);
180 	if (dv->sdev_prof.dev_glob_incdir)
181 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
182 	if (dv->sdev_prof.dev_glob_excdir)
183 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
184 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
185 }
186 
187 /*
188  * sdev_node cache constructor
189  */
190 /*ARGSUSED1*/
191 static int
192 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
193 {
194 	struct sdev_node *dv = (struct sdev_node *)buf;
195 	struct vnode *vp;
196 
197 	ASSERT(flag == KM_SLEEP);
198 
199 	bzero(buf, sizeof (struct sdev_node));
200 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
201 	dv->sdev_vnode = vn_alloc(KM_SLEEP);
202 	vp = SDEVTOV(dv);
203 	vp->v_data = (caddr_t)dv;
204 	return (0);
205 }
206 
207 /* sdev_node destructor for kmem cache */
208 /*ARGSUSED1*/
209 static void
210 i_sdev_node_dtor(void *buf, void *arg)
211 {
212 	struct sdev_node *dv = (struct sdev_node *)buf;
213 	struct vnode *vp = SDEVTOV(dv);
214 
215 	rw_destroy(&dv->sdev_contents);
216 	vn_free(vp);
217 }
218 
219 /* initialize sdev_node cache */
220 void
221 sdev_node_cache_init()
222 {
223 	int flags = 0;
224 
225 #ifdef	DEBUG
226 	flags = sdev_debug_cache_flags;
227 	if (flags)
228 		sdcmn_err(("cache debug flags 0x%x\n", flags));
229 #endif	/* DEBUG */
230 
231 	ASSERT(sdev_node_cache == NULL);
232 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
233 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
234 	    NULL, NULL, NULL, flags);
235 }
236 
237 /* destroy sdev_node cache */
238 void
239 sdev_node_cache_fini()
240 {
241 	ASSERT(sdev_node_cache != NULL);
242 	kmem_cache_destroy(sdev_node_cache);
243 	sdev_node_cache = NULL;
244 }
245 
246 void
247 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
248 {
249 	ASSERT(dv);
250 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
251 	dv->sdev_state = state;
252 }
253 
254 static void
255 sdev_attrinit(struct sdev_node *dv, vattr_t *vap)
256 {
257 	timestruc_t now;
258 
259 	ASSERT(vap);
260 
261 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
262 	*dv->sdev_attr = *vap;
263 
264 	dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode);
265 
266 	gethrestime(&now);
267 	dv->sdev_attr->va_atime = now;
268 	dv->sdev_attr->va_mtime = now;
269 	dv->sdev_attr->va_ctime = now;
270 }
271 
272 /* alloc and initialize a sdev_node */
273 int
274 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
275     vattr_t *vap)
276 {
277 	struct sdev_node *dv = NULL;
278 	struct vnode *vp;
279 	size_t nmlen, len;
280 	devname_handle_t  *dhl;
281 
282 	nmlen = strlen(nm) + 1;
283 	if (nmlen > MAXNAMELEN) {
284 		sdcmn_err9(("sdev_nodeinit: node name %s"
285 		    " too long\n", nm));
286 		*newdv = NULL;
287 		return (ENAMETOOLONG);
288 	}
289 
290 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
291 
292 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
293 	bcopy(nm, dv->sdev_name, nmlen);
294 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
295 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
296 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
297 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
298 	/* overwritten for VLNK nodes */
299 	dv->sdev_symlink = NULL;
300 
301 	vp = SDEVTOV(dv);
302 	vn_reinit(vp);
303 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
304 	if (vap)
305 		vp->v_type = vap->va_type;
306 
307 	/*
308 	 * initialized to the parent's vnodeops.
309 	 * maybe overwriten for a VDIR
310 	 */
311 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
312 	vn_exists(vp);
313 
314 	dv->sdev_dotdot = NULL;
315 	dv->sdev_dot = NULL;
316 	dv->sdev_next = NULL;
317 	dv->sdev_attrvp = NULL;
318 	if (vap) {
319 		sdev_attrinit(dv, vap);
320 	} else {
321 		dv->sdev_attr = NULL;
322 	}
323 
324 	dv->sdev_ino = sdev_mkino(dv);
325 	dv->sdev_nlink = 0;		/* updated on insert */
326 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
327 	dv->sdev_flags |= SDEV_BUILD;
328 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
329 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
330 	if (SDEV_IS_GLOBAL(ddv)) {
331 		dv->sdev_flags |= SDEV_GLOBAL;
332 		dv->sdev_mapinfo = NULL;
333 		dhl = &(dv->sdev_handle);
334 		dhl->dh_data = dv;
335 		dhl->dh_spec = DEVNAME_NS_NONE;
336 		dhl->dh_args = NULL;
337 		sdev_set_no_nocache(dv);
338 		dv->sdev_gdir_gen = 0;
339 	} else {
340 		dv->sdev_flags &= ~SDEV_GLOBAL;
341 		dv->sdev_origin = NULL; /* set later */
342 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
343 		dv->sdev_ldir_gen = 0;
344 		dv->sdev_devtree_gen = 0;
345 	}
346 
347 	rw_enter(&dv->sdev_contents, RW_WRITER);
348 	sdev_set_nodestate(dv, SDEV_INIT);
349 	rw_exit(&dv->sdev_contents);
350 	*newdv = dv;
351 
352 	return (0);
353 }
354 
355 /*
356  * transition a sdev_node into SDEV_READY state
357  */
358 int
359 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
360     void *args, struct cred *cred)
361 {
362 	int error = 0;
363 	struct vnode *vp = SDEVTOV(dv);
364 	vtype_t type;
365 
366 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
367 
368 	type = vap->va_type;
369 	vp->v_type = type;
370 	vp->v_rdev = vap->va_rdev;
371 	rw_enter(&dv->sdev_contents, RW_WRITER);
372 	if (type == VDIR) {
373 		dv->sdev_nlink = 2;
374 		dv->sdev_flags &= ~SDEV_PERSIST;
375 		dv->sdev_flags &= ~SDEV_DYNAMIC;
376 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
377 		error = sdev_get_moduleops(dv); /* from plug-in module */
378 		ASSERT(dv->sdev_dotdot);
379 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
380 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
381 	} else if (type == VLNK) {
382 		ASSERT(args);
383 		dv->sdev_nlink = 1;
384 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
385 	} else {
386 		dv->sdev_nlink = 1;
387 	}
388 
389 	if (!(SDEV_IS_GLOBAL(dv))) {
390 		dv->sdev_origin = (struct sdev_node *)args;
391 		dv->sdev_flags &= ~SDEV_PERSIST;
392 	}
393 
394 	/*
395 	 * shadow node is created here OR
396 	 * if failed (indicated by dv->sdev_attrvp == NULL),
397 	 * created later in sdev_setattr
398 	 */
399 	if (avp) {
400 		dv->sdev_attrvp = avp;
401 	} else {
402 		if (dv->sdev_attr == NULL)
403 			sdev_attrinit(dv, vap);
404 		else
405 			*dv->sdev_attr = *vap;
406 
407 		if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) ||
408 		    ((SDEVTOV(dv)->v_type == VDIR) &&
409 		    (dv->sdev_attrvp == NULL)))
410 			error = sdev_shadow_node(dv, cred);
411 	}
412 
413 	/* transition to READY state */
414 	sdev_set_nodestate(dv, SDEV_READY);
415 	sdev_nc_node_exists(dv);
416 	rw_exit(&dv->sdev_contents);
417 	return (error);
418 }
419 
420 /*
421  * setting ZOMBIE state
422  */
423 static int
424 sdev_nodezombied(struct sdev_node *dv)
425 {
426 	rw_enter(&dv->sdev_contents, RW_WRITER);
427 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
428 	rw_exit(&dv->sdev_contents);
429 	return (0);
430 }
431 
432 /*
433  * Build the VROOT sdev_node.
434  */
435 /*ARGSUSED*/
436 struct sdev_node *
437 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
438     struct vnode *avp, struct cred *cred)
439 {
440 	struct sdev_node *dv;
441 	struct vnode *vp;
442 	char devdir[] = "/dev";
443 
444 	ASSERT(sdev_node_cache != NULL);
445 	ASSERT(avp);
446 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
447 	vp = SDEVTOV(dv);
448 	vn_reinit(vp);
449 	vp->v_flag |= VROOT;
450 	vp->v_vfsp = vfsp;
451 	vp->v_type = VDIR;
452 	vp->v_rdev = devdev;
453 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
454 	vn_exists(vp);
455 
456 	if (vfsp->vfs_mntpt)
457 		dv->sdev_name = i_ddi_strdup(
458 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
459 	else
460 		/* vfs_mountdev1 set mount point later */
461 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
462 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
463 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
464 	dv->sdev_ino = SDEV_ROOTINO;
465 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
466 	dv->sdev_dotdot = dv;		/* .. == self */
467 	dv->sdev_attrvp = avp;
468 	dv->sdev_attr = NULL;
469 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
470 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
471 	if (strcmp(dv->sdev_name, "/dev") == 0) {
472 		mutex_init(&devname_nsmaps_lock, NULL, MUTEX_DEFAULT, NULL);
473 		dv->sdev_mapinfo = NULL;
474 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
475 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
476 		dv->sdev_gdir_gen = 0;
477 	} else {
478 		dv->sdev_flags = SDEV_BUILD;
479 		dv->sdev_flags &= ~SDEV_PERSIST;
480 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
481 		dv->sdev_ldir_gen = 0;
482 		dv->sdev_devtree_gen = 0;
483 	}
484 
485 	rw_enter(&dv->sdev_contents, RW_WRITER);
486 	sdev_set_nodestate(dv, SDEV_READY);
487 	rw_exit(&dv->sdev_contents);
488 	sdev_nc_node_exists(dv);
489 	return (dv);
490 }
491 
492 /*
493  *  1. load the module
494  *  2. modload invokes sdev_module_register, which in turn sets
495  *     the dv->sdev_mapinfo->dir_ops
496  *
497  * note: locking order:
498  *	dv->sdev_contents -> map->dir_lock
499  */
500 static int
501 sdev_get_moduleops(struct sdev_node *dv)
502 {
503 	int error = 0;
504 	struct devname_nsmap *map = NULL;
505 	char *module;
506 	char *path;
507 	int load = 1;
508 
509 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
510 
511 	if (devname_nsmaps == NULL)
512 		return (0);
513 
514 	if (!sdev_nsmaps_loaded() && !sdev_nsmaps_reloaded())
515 		return (0);
516 
517 
518 	path = dv->sdev_path;
519 	if ((map = sdev_get_nsmap_by_dir(path, 0))) {
520 		rw_enter(&map->dir_lock, RW_READER);
521 		if (map->dir_invalid) {
522 			if (map->dir_module && map->dir_newmodule &&
523 			    (strcmp(map->dir_module,
524 					map->dir_newmodule) == 0)) {
525 				load = 0;
526 			}
527 			sdev_replace_nsmap(map, map->dir_newmodule,
528 			    map->dir_newmap);
529 		}
530 
531 		module = map->dir_module;
532 		if (module && load) {
533 			sdcmn_err6(("sdev_get_moduleops: "
534 			    "load module %s", module));
535 			rw_exit(&map->dir_lock);
536 			error = modload("devname", module);
537 			sdcmn_err6(("sdev_get_moduleops: error %d\n", error));
538 			if (error < 0) {
539 				return (-1);
540 			}
541 		} else if (module == NULL) {
542 			/*
543 			 * loading the module ops for name services
544 			 */
545 			if (devname_ns_ops == NULL) {
546 				sdcmn_err6((
547 				    "sdev_get_moduleops: modload default\n"));
548 				error = modload("devname", DEVNAME_NSCONFIG);
549 				sdcmn_err6((
550 				    "sdev_get_moduleops: error %d\n", error));
551 				if (error < 0) {
552 					return (-1);
553 				}
554 			}
555 
556 			if (!rw_tryupgrade(&map->dir_lock)) {
557 				rw_exit(&map->dir_lock);
558 				rw_enter(&map->dir_lock, RW_WRITER);
559 			}
560 			ASSERT(devname_ns_ops);
561 			map->dir_ops = devname_ns_ops;
562 			rw_exit(&map->dir_lock);
563 		}
564 	}
565 
566 	dv->sdev_mapinfo = map;
567 	return (0);
568 }
569 
570 /* directory dependent vop table */
571 struct sdev_vop_table {
572 	char *vt_name;				/* subdirectory name */
573 	const fs_operation_def_t *vt_service;	/* vnodeops table */
574 	struct vnodeops *vt_vops;		/* constructed vop */
575 	struct vnodeops **vt_global_vops;	/* global container for vop */
576 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
577 	int vt_flags;
578 };
579 
580 /*
581  * A nice improvement would be to provide a plug-in mechanism
582  * for this table instead of a const table.
583  */
584 static struct sdev_vop_table vtab[] =
585 {
586 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
587 	SDEV_DYNAMIC | SDEV_VTOR },
588 
589 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
590 
591 	{ NULL, NULL, NULL, NULL, NULL, 0}
592 };
593 
594 
595 /*
596  *  sets a directory's vnodeops if the directory is in the vtab;
597  */
598 static struct vnodeops *
599 sdev_get_vop(struct sdev_node *dv)
600 {
601 	int i;
602 	char *path;
603 
604 	path = dv->sdev_path;
605 	ASSERT(path);
606 
607 	/* gets the relative path to /dev/ */
608 	path += 5;
609 
610 	/* gets the vtab entry if matches */
611 	for (i = 0; vtab[i].vt_name; i++) {
612 		if (strcmp(vtab[i].vt_name, path) != 0)
613 			continue;
614 		dv->sdev_flags |= vtab[i].vt_flags;
615 
616 		if (vtab[i].vt_vops) {
617 			if (vtab[i].vt_global_vops)
618 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
619 			return (vtab[i].vt_vops);
620 		}
621 
622 		if (vtab[i].vt_service) {
623 			fs_operation_def_t *templ;
624 			templ = sdev_merge_vtab(vtab[i].vt_service);
625 			if (vn_make_ops(vtab[i].vt_name,
626 			    (const fs_operation_def_t *)templ,
627 			    &vtab[i].vt_vops) != 0) {
628 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
629 				    vtab[i].vt_name);
630 				/*NOTREACHED*/
631 			}
632 			if (vtab[i].vt_global_vops) {
633 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
634 			}
635 			sdev_free_vtab(templ);
636 			return (vtab[i].vt_vops);
637 		}
638 		return (sdev_vnodeops);
639 	}
640 
641 	/* child inherits the persistence of the parent */
642 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
643 		dv->sdev_flags |= SDEV_PERSIST;
644 
645 	return (sdev_vnodeops);
646 }
647 
648 static void
649 sdev_set_no_nocache(struct sdev_node *dv)
650 {
651 	int i;
652 	char *path;
653 
654 	ASSERT(dv->sdev_path);
655 	path = dv->sdev_path + strlen("/dev/");
656 
657 	for (i = 0; vtab[i].vt_name; i++) {
658 		if (strcmp(vtab[i].vt_name, path) == 0) {
659 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
660 				dv->sdev_flags |= SDEV_NO_NCACHE;
661 			break;
662 		}
663 	}
664 }
665 
666 void *
667 sdev_get_vtor(struct sdev_node *dv)
668 {
669 	int i;
670 
671 	for (i = 0; vtab[i].vt_name; i++) {
672 		if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0)
673 			continue;
674 		return ((void *)vtab[i].vt_vtor);
675 	}
676 	return (NULL);
677 }
678 
679 /*
680  * Build the base root inode
681  */
682 ino_t
683 sdev_mkino(struct sdev_node *dv)
684 {
685 	ino_t	ino;
686 
687 	/*
688 	 * for now, follow the lead of tmpfs here
689 	 * need to someday understand the requirements here
690 	 */
691 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
692 	ino += SDEV_ROOTINO + 1;
693 
694 	return (ino);
695 }
696 
697 static int
698 sdev_getlink(struct vnode *linkvp, char **link)
699 {
700 	int err;
701 	char *buf;
702 	struct uio uio = {0};
703 	struct iovec iov = {0};
704 
705 	if (linkvp == NULL)
706 		return (ENOENT);
707 	ASSERT(linkvp->v_type == VLNK);
708 
709 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
710 	iov.iov_base = buf;
711 	iov.iov_len = MAXPATHLEN;
712 	uio.uio_iov = &iov;
713 	uio.uio_iovcnt = 1;
714 	uio.uio_resid = MAXPATHLEN;
715 	uio.uio_segflg = UIO_SYSSPACE;
716 	uio.uio_llimit = MAXOFFSET_T;
717 
718 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
719 	if (err) {
720 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
721 		kmem_free(buf, MAXPATHLEN);
722 		return (ENOENT);
723 	}
724 
725 	/* mission complete */
726 	*link = i_ddi_strdup(buf, KM_SLEEP);
727 	kmem_free(buf, MAXPATHLEN);
728 	return (0);
729 }
730 
731 /*
732  * A convenient wrapper to get the devfs node vnode for a device
733  * minor functionality: readlink() of a /dev symlink
734  * Place the link into dv->sdev_symlink
735  */
736 static int
737 sdev_follow_link(struct sdev_node *dv)
738 {
739 	int err;
740 	struct vnode *linkvp;
741 	char *link = NULL;
742 
743 	linkvp = SDEVTOV(dv);
744 	if (linkvp == NULL)
745 		return (ENOENT);
746 	ASSERT(linkvp->v_type == VLNK);
747 	err = sdev_getlink(linkvp, &link);
748 	if (err) {
749 		(void) sdev_nodezombied(dv);
750 		dv->sdev_symlink = NULL;
751 		return (ENOENT);
752 	}
753 
754 	ASSERT(link != NULL);
755 	dv->sdev_symlink = link;
756 	return (0);
757 }
758 
759 static int
760 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
761 {
762 	vtype_t otype = SDEVTOV(dv)->v_type;
763 
764 	/*
765 	 * existing sdev_node has a different type.
766 	 */
767 	if (otype != nvap->va_type) {
768 		sdcmn_err9(("sdev_node_check: existing node "
769 		    "  %s type %d does not match new node type %d\n",
770 		    dv->sdev_name, otype, nvap->va_type));
771 		return (EEXIST);
772 	}
773 
774 	/*
775 	 * For a symlink, the target should be the same.
776 	 */
777 	if (otype == VLNK) {
778 		ASSERT(nargs != NULL);
779 		ASSERT(dv->sdev_symlink != NULL);
780 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
781 			sdcmn_err9(("sdev_node_check: existing node "
782 			    " %s has different symlink %s as new node "
783 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
784 			    (char *)nargs));
785 			return (EEXIST);
786 		}
787 	}
788 
789 	return (0);
790 }
791 
792 /*
793  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
794  *
795  * arguments:
796  *	- ddv (parent)
797  *	- nm (child name)
798  *	- newdv (sdev_node for nm is returned here)
799  *	- vap (vattr for the node to be created, va_type should be set.
800  *	  the defaults should be used if unknown)
801  *	- cred
802  *	- args
803  *	    . tnm (for VLNK)
804  *	    . global sdev_node (for !SDEV_GLOBAL)
805  * 	- state: SDEV_INIT, SDEV_READY
806  *
807  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
808  *
809  * NOTE:  directory contents writers lock needs to be held before
810  *	  calling this routine.
811  */
812 int
813 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
814     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
815     sdev_node_state_t state)
816 {
817 	int error = 0;
818 	sdev_node_state_t node_state;
819 	struct sdev_node *dv = NULL;
820 
821 	ASSERT(state != SDEV_ZOMBIE);
822 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
823 
824 	if (*newdv) {
825 		dv = *newdv;
826 	} else {
827 		/* allocate and initialize a sdev_node */
828 		if (ddv->sdev_state == SDEV_ZOMBIE) {
829 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
830 			    ddv->sdev_path));
831 			return (ENOENT);
832 		}
833 
834 		error = sdev_nodeinit(ddv, nm, &dv, vap);
835 		if (error != 0) {
836 			sdcmn_err9(("sdev_mknode: error %d,"
837 			    " name %s can not be initialized\n",
838 			    error, nm));
839 			return (ENOENT);
840 		}
841 		ASSERT(dv);
842 
843 		/* insert into the directory cache */
844 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
845 		if (error) {
846 			sdcmn_err9(("sdev_mknode: node %s can not"
847 			    " be added into directory cache\n", nm));
848 			return (ENOENT);
849 		}
850 	}
851 
852 	ASSERT(dv);
853 	node_state = dv->sdev_state;
854 	ASSERT(node_state != SDEV_ZOMBIE);
855 
856 	if (state == SDEV_READY) {
857 		switch (node_state) {
858 		case SDEV_INIT:
859 			error = sdev_nodeready(dv, vap, avp, args, cred);
860 			/*
861 			 * masking the errors with ENOENT
862 			 */
863 			if (error) {
864 				sdcmn_err9(("sdev_mknode: node %s can NOT"
865 				    " be transitioned into READY state, "
866 				    "error %d\n", nm, error));
867 				error = ENOENT;
868 			}
869 			break;
870 		case SDEV_READY:
871 			/*
872 			 * Do some sanity checking to make sure
873 			 * the existing sdev_node is what has been
874 			 * asked for.
875 			 */
876 			error = sdev_node_check(dv, vap, args);
877 			break;
878 		default:
879 			break;
880 		}
881 	}
882 
883 	if (!error) {
884 		*newdv = dv;
885 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
886 	} else {
887 		SDEV_SIMPLE_RELE(dv);
888 		*newdv = NULL;
889 	}
890 
891 	return (error);
892 }
893 
894 /*
895  * convenient wrapper to change vp's ATIME, CTIME and ATIME
896  */
897 void
898 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
899 {
900 	struct vattr attr;
901 	timestruc_t now;
902 	int err;
903 
904 	ASSERT(vp);
905 	gethrestime(&now);
906 	if (mask & AT_CTIME)
907 		attr.va_ctime = now;
908 	if (mask & AT_MTIME)
909 		attr.va_mtime = now;
910 	if (mask & AT_ATIME)
911 		attr.va_atime = now;
912 
913 	attr.va_mask = (mask & AT_TIMES);
914 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
915 	if (err && (err != EROFS)) {
916 		sdcmn_err(("update timestamps error %d\n", err));
917 	}
918 }
919 
920 /*
921  * the backing store vnode is released here
922  */
923 /*ARGSUSED1*/
924 void
925 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
926 {
927 	/* no references */
928 	ASSERT(dv->sdev_nlink == 0);
929 
930 	if (dv->sdev_attrvp != NULLVP) {
931 		VN_RELE(dv->sdev_attrvp);
932 		/*
933 		 * reset the attrvp so that no more
934 		 * references can be made on this already
935 		 * vn_rele() vnode
936 		 */
937 		dv->sdev_attrvp = NULLVP;
938 	}
939 
940 	if (dv->sdev_attr != NULL) {
941 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
942 		dv->sdev_attr = NULL;
943 	}
944 
945 	if (dv->sdev_name != NULL) {
946 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
947 		dv->sdev_name = NULL;
948 	}
949 
950 	if (dv->sdev_symlink != NULL) {
951 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
952 		dv->sdev_symlink = NULL;
953 	}
954 
955 	if (dv->sdev_path) {
956 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
957 		dv->sdev_path = NULL;
958 	}
959 
960 	if (!SDEV_IS_GLOBAL(dv))
961 		sdev_prof_free(dv);
962 
963 	mutex_destroy(&dv->sdev_lookup_lock);
964 	cv_destroy(&dv->sdev_lookup_cv);
965 
966 	/* return node to initial state as per constructor */
967 	(void) memset((void *)&dv->sdev_instance_data, 0,
968 	    sizeof (dv->sdev_instance_data));
969 	vn_invalid(SDEVTOV(dv));
970 	kmem_cache_free(sdev_node_cache, dv);
971 }
972 
973 /*
974  * DIRECTORY CACHE lookup
975  */
976 struct sdev_node *
977 sdev_findbyname(struct sdev_node *ddv, char *nm)
978 {
979 	struct sdev_node *dv;
980 	size_t	nmlen = strlen(nm);
981 
982 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
983 	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next) {
984 		if (dv->sdev_namelen != nmlen) {
985 			continue;
986 		}
987 
988 		/*
989 		 * Can't lookup stale nodes
990 		 */
991 		if (dv->sdev_flags & SDEV_STALE) {
992 			sdcmn_err9((
993 			    "sdev_findbyname: skipped stale node: %s\n",
994 			    dv->sdev_name));
995 			continue;
996 		}
997 
998 		if (strcmp(dv->sdev_name, nm) == 0) {
999 			SDEV_HOLD(dv);
1000 			return (dv);
1001 		}
1002 	}
1003 	return (NULL);
1004 }
1005 
1006 /*
1007  * Inserts a new sdev_node in a parent directory
1008  */
1009 void
1010 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
1011 {
1012 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1013 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1014 	ASSERT(ddv->sdev_nlink >= 2);
1015 	ASSERT(dv->sdev_nlink == 0);
1016 
1017 	dv->sdev_dotdot = ddv;
1018 	dv->sdev_next = ddv->sdev_dot;
1019 	ddv->sdev_dot = dv;
1020 	ddv->sdev_nlink++;
1021 }
1022 
1023 /*
1024  * The following check is needed because while sdev_nodes are linked
1025  * in SDEV_INIT state, they have their link counts incremented only
1026  * in SDEV_READY state.
1027  */
1028 static void
1029 decr_link(struct sdev_node *dv)
1030 {
1031 	if (dv->sdev_state != SDEV_INIT)
1032 		dv->sdev_nlink--;
1033 	else
1034 		ASSERT(dv->sdev_nlink == 0);
1035 }
1036 
1037 /*
1038  * Delete an existing dv from directory cache
1039  *
1040  * In the case of a node is still held by non-zero reference count,
1041  *     the node is put into ZOMBIE state. Once the reference count
1042  *     reaches "0", the node is unlinked and destroyed,
1043  *     in sdev_inactive().
1044  */
1045 static int
1046 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1047 {
1048 	struct sdev_node *idv;
1049 	struct sdev_node *prev = NULL;
1050 	struct vnode *vp;
1051 
1052 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1053 
1054 	vp = SDEVTOV(dv);
1055 	mutex_enter(&vp->v_lock);
1056 
1057 	/* dv is held still */
1058 	if (vp->v_count > 1) {
1059 		rw_enter(&dv->sdev_contents, RW_WRITER);
1060 		if (dv->sdev_state == SDEV_READY) {
1061 			sdcmn_err9((
1062 			    "sdev_delete: node %s busy with count %d\n",
1063 			    dv->sdev_name, vp->v_count));
1064 			dv->sdev_state = SDEV_ZOMBIE;
1065 		}
1066 		rw_exit(&dv->sdev_contents);
1067 		--vp->v_count;
1068 		mutex_exit(&vp->v_lock);
1069 		return (EBUSY);
1070 	}
1071 	ASSERT(vp->v_count == 1);
1072 
1073 	/* unlink from the memory cache */
1074 	ddv->sdev_nlink--;	/* .. to above */
1075 	if (vp->v_type == VDIR) {
1076 		decr_link(dv);		/* . to self */
1077 	}
1078 
1079 	for (idv = ddv->sdev_dot; idv && idv != dv;
1080 	    prev = idv, idv = idv->sdev_next)
1081 		;
1082 	ASSERT(idv == dv);	/* node to be deleted must exist */
1083 	if (prev == NULL)
1084 		ddv->sdev_dot = dv->sdev_next;
1085 	else
1086 		prev->sdev_next = dv->sdev_next;
1087 	dv->sdev_next = NULL;
1088 	decr_link(dv);	/* name, back to zero */
1089 	vp->v_count--;
1090 	mutex_exit(&vp->v_lock);
1091 
1092 	/* destroy the node */
1093 	sdev_nodedestroy(dv, 0);
1094 	return (0);
1095 }
1096 
1097 /*
1098  * check if the source is in the path of the target
1099  *
1100  * source and target are different
1101  */
1102 /*ARGSUSED2*/
1103 static int
1104 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1105 {
1106 	int error = 0;
1107 	struct sdev_node *dotdot, *dir;
1108 
1109 	dotdot = tdv->sdev_dotdot;
1110 	ASSERT(dotdot);
1111 
1112 	/* fs root */
1113 	if (dotdot == tdv) {
1114 		return (0);
1115 	}
1116 
1117 	for (;;) {
1118 		/*
1119 		 * avoid error cases like
1120 		 *	mv a a/b
1121 		 *	mv a a/b/c
1122 		 *	etc.
1123 		 */
1124 		if (dotdot == sdv) {
1125 			error = EINVAL;
1126 			break;
1127 		}
1128 
1129 		dir = dotdot;
1130 		dotdot = dir->sdev_dotdot;
1131 
1132 		/* done checking because root is reached */
1133 		if (dir == dotdot) {
1134 			break;
1135 		}
1136 	}
1137 	return (error);
1138 }
1139 
1140 int
1141 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1142     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1143     struct cred *cred)
1144 {
1145 	int error = 0;
1146 	struct vnode *ovp = SDEVTOV(odv);
1147 	struct vnode *nvp;
1148 	struct vattr vattr;
1149 	int doingdir = (ovp->v_type == VDIR);
1150 	char *link = NULL;
1151 	int samedir = (oddv == nddv) ? 1 : 0;
1152 	int bkstore = 0;
1153 	struct sdev_node *idv = NULL;
1154 	struct sdev_node *ndv = NULL;
1155 	timestruc_t now;
1156 
1157 	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1158 	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1159 	if (error)
1160 		return (error);
1161 
1162 	if (!samedir)
1163 		rw_enter(&oddv->sdev_contents, RW_WRITER);
1164 	rw_enter(&nddv->sdev_contents, RW_WRITER);
1165 
1166 	/*
1167 	 * the source may have been deleted by another thread before
1168 	 * we gets here.
1169 	 */
1170 	if (odv->sdev_state != SDEV_READY) {
1171 		error = ENOENT;
1172 		goto err_out;
1173 	}
1174 
1175 	if (doingdir && (odv == nddv)) {
1176 		error = EINVAL;
1177 		goto err_out;
1178 	}
1179 
1180 	/*
1181 	 * If renaming a directory, and the parents are different (".." must be
1182 	 * changed) then the source dir must not be in the dir hierarchy above
1183 	 * the target since it would orphan everything below the source dir.
1184 	 */
1185 	if (doingdir && (oddv != nddv)) {
1186 		error = sdev_checkpath(odv, nddv, cred);
1187 		if (error)
1188 			goto err_out;
1189 	}
1190 
1191 	/* destination existing */
1192 	if (*ndvp) {
1193 		nvp = SDEVTOV(*ndvp);
1194 		ASSERT(nvp);
1195 
1196 		/* handling renaming to itself */
1197 		if (odv == *ndvp) {
1198 			error = 0;
1199 			goto err_out;
1200 		}
1201 
1202 		if (nvp->v_type == VDIR) {
1203 			if (!doingdir) {
1204 				error = EISDIR;
1205 				goto err_out;
1206 			}
1207 
1208 			if (vn_vfswlock(nvp)) {
1209 				error = EBUSY;
1210 				goto err_out;
1211 			}
1212 
1213 			if (vn_mountedvfs(nvp) != NULL) {
1214 				vn_vfsunlock(nvp);
1215 				error = EBUSY;
1216 				goto err_out;
1217 			}
1218 
1219 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1220 			if ((*ndvp)->sdev_nlink > 2) {
1221 				vn_vfsunlock(nvp);
1222 				error = EEXIST;
1223 				goto err_out;
1224 			}
1225 			vn_vfsunlock(nvp);
1226 
1227 			(void) sdev_dirdelete(nddv, *ndvp);
1228 			*ndvp = NULL;
1229 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1230 				    nddv->sdev_attrvp, cred, NULL, 0);
1231 			if (error)
1232 				goto err_out;
1233 		} else {
1234 			if (doingdir) {
1235 				error = ENOTDIR;
1236 				goto err_out;
1237 			}
1238 
1239 			if (SDEV_IS_PERSIST((*ndvp))) {
1240 				bkstore = 1;
1241 			}
1242 
1243 			/*
1244 			 * get rid of the node from the directory cache
1245 			 * note, in case EBUSY is returned, the ZOMBIE
1246 			 * node is taken care in sdev_mknode.
1247 			 */
1248 			(void) sdev_dirdelete(nddv, *ndvp);
1249 			*ndvp = NULL;
1250 			if (bkstore) {
1251 				error = VOP_REMOVE(nddv->sdev_attrvp,
1252 				    nnm, cred, NULL, 0);
1253 				if (error)
1254 				    goto err_out;
1255 			}
1256 		}
1257 	}
1258 
1259 	/* fix the source for a symlink */
1260 	if (vattr.va_type == VLNK) {
1261 		if (odv->sdev_symlink == NULL) {
1262 			error = sdev_follow_link(odv);
1263 			if (error) {
1264 				error = ENOENT;
1265 				goto err_out;
1266 			}
1267 		}
1268 		ASSERT(odv->sdev_symlink);
1269 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1270 	}
1271 
1272 	/*
1273 	 * make a fresh node from the source attrs
1274 	 */
1275 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1276 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1277 	    NULL, (void *)link, cred, SDEV_READY);
1278 
1279 	if (link)
1280 		kmem_free(link, strlen(link) + 1);
1281 
1282 	if (error)
1283 		goto err_out;
1284 	ASSERT(*ndvp);
1285 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1286 
1287 	/* move dir contents */
1288 	if (doingdir) {
1289 		for (idv = odv->sdev_dot; idv; idv = idv->sdev_next) {
1290 			error = sdev_rnmnode(odv, idv,
1291 			    (struct sdev_node *)(*ndvp), &ndv,
1292 			    idv->sdev_name, cred);
1293 
1294 			if (error)
1295 				goto err_out;
1296 			ndv = NULL;
1297 		}
1298 
1299 	}
1300 
1301 	if ((*ndvp)->sdev_attrvp) {
1302 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1303 		    AT_CTIME|AT_ATIME);
1304 	} else {
1305 		ASSERT((*ndvp)->sdev_attr);
1306 		gethrestime(&now);
1307 		(*ndvp)->sdev_attr->va_ctime = now;
1308 		(*ndvp)->sdev_attr->va_atime = now;
1309 	}
1310 
1311 	if (nddv->sdev_attrvp) {
1312 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1313 		    AT_MTIME|AT_ATIME);
1314 	} else {
1315 		ASSERT(nddv->sdev_attr);
1316 		gethrestime(&now);
1317 		nddv->sdev_attr->va_mtime = now;
1318 		nddv->sdev_attr->va_atime = now;
1319 	}
1320 	rw_exit(&nddv->sdev_contents);
1321 	if (!samedir)
1322 		rw_exit(&oddv->sdev_contents);
1323 
1324 	SDEV_RELE(*ndvp);
1325 	return (error);
1326 
1327 err_out:
1328 	rw_exit(&nddv->sdev_contents);
1329 	if (!samedir)
1330 		rw_exit(&oddv->sdev_contents);
1331 	return (error);
1332 }
1333 
1334 /*
1335  * Merge sdev_node specific information into an attribute structure.
1336  *
1337  * note: sdev_node is not locked here
1338  */
1339 void
1340 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1341 {
1342 	struct vnode *vp = SDEVTOV(dv);
1343 
1344 	vap->va_nlink = dv->sdev_nlink;
1345 	vap->va_nodeid = dv->sdev_ino;
1346 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1347 	vap->va_type = vp->v_type;
1348 
1349 	if (vp->v_type == VDIR) {
1350 		vap->va_rdev = 0;
1351 		vap->va_fsid = vp->v_rdev;
1352 	} else if (vp->v_type == VLNK) {
1353 		vap->va_rdev = 0;
1354 		vap->va_mode  &= ~S_IFMT;
1355 		vap->va_mode |= S_IFLNK;
1356 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1357 		vap->va_rdev = vp->v_rdev;
1358 		vap->va_mode &= ~S_IFMT;
1359 		if (vap->va_type == VCHR)
1360 			vap->va_mode |= S_IFCHR;
1361 		else
1362 			vap->va_mode |= S_IFBLK;
1363 	} else {
1364 		vap->va_rdev = 0;
1365 	}
1366 }
1367 
1368 static struct vattr *
1369 sdev_getdefault_attr(enum vtype type)
1370 {
1371 	if (type == VDIR)
1372 		return (&sdev_vattr_dir);
1373 	else if (type == VCHR)
1374 		return (&sdev_vattr_chr);
1375 	else if (type == VBLK)
1376 		return (&sdev_vattr_blk);
1377 	else if (type == VLNK)
1378 		return (&sdev_vattr_lnk);
1379 	else
1380 		return (NULL);
1381 }
1382 int
1383 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1384 {
1385 	int rv = 0;
1386 	struct vnode *vp = SDEVTOV(dv);
1387 
1388 	switch (vp->v_type) {
1389 	case VCHR:
1390 	case VBLK:
1391 		/*
1392 		 * If vnode is a device, return special vnode instead
1393 		 * (though it knows all about -us- via sp->s_realvp)
1394 		 */
1395 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1396 		VN_RELE(vp);
1397 		if (*vpp == NULLVP)
1398 			rv = ENOSYS;
1399 		break;
1400 	default:	/* most types are returned as is */
1401 		*vpp = vp;
1402 		break;
1403 	}
1404 	return (rv);
1405 }
1406 
1407 /*
1408  * loopback into sdev_lookup()
1409  */
1410 static struct vnode *
1411 devname_find_by_devpath(char *devpath, struct vattr *vattr)
1412 {
1413 	int error = 0;
1414 	struct vnode *vp;
1415 
1416 	error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULLVPP, &vp);
1417 	if (error) {
1418 		return (NULL);
1419 	}
1420 
1421 	if (vattr)
1422 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1423 	return (vp);
1424 }
1425 
1426 /*
1427  * the junction between devname and devfs
1428  */
1429 static struct vnode *
1430 devname_configure_by_path(char *physpath, struct vattr *vattr)
1431 {
1432 	int error = 0;
1433 	struct vnode *vp;
1434 
1435 	ASSERT(strncmp(physpath, "/devices/", sizeof ("/devices/") - 1)
1436 	    == 0);
1437 
1438 	error = devfs_lookupname(physpath + sizeof ("/devices/") - 1,
1439 	    NULLVPP, &vp);
1440 	if (error != 0) {
1441 		if (error == ENODEV) {
1442 			cmn_err(CE_CONT, "%s: not found (line %d)\n",
1443 			    physpath, __LINE__);
1444 		}
1445 
1446 		return (NULL);
1447 	}
1448 
1449 	if (vattr)
1450 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1451 	return (vp);
1452 }
1453 
1454 /*
1455  * junction between devname and root file system, e.g. ufs
1456  */
1457 int
1458 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1459 {
1460 	struct vnode *rdvp = ddv->sdev_attrvp;
1461 	int rval = 0;
1462 
1463 	ASSERT(rdvp);
1464 
1465 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1466 	    NULL);
1467 	return (rval);
1468 }
1469 
1470 static int
1471 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1472 {
1473 	struct sdev_node *dv = NULL;
1474 	char	*nm;
1475 	struct vnode *dirvp;
1476 	int	error;
1477 	vnode_t	*vp;
1478 	int eof;
1479 	struct iovec iov;
1480 	struct uio uio;
1481 	struct dirent64 *dp;
1482 	dirent64_t *dbuf;
1483 	size_t dbuflen;
1484 	struct vattr vattr;
1485 	char *link = NULL;
1486 
1487 	if (ddv->sdev_attrvp == NULL)
1488 		return (0);
1489 	if (!(ddv->sdev_flags & SDEV_BUILD))
1490 		return (0);
1491 
1492 	dirvp = ddv->sdev_attrvp;
1493 	VN_HOLD(dirvp);
1494 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1495 
1496 	uio.uio_iov = &iov;
1497 	uio.uio_iovcnt = 1;
1498 	uio.uio_segflg = UIO_SYSSPACE;
1499 	uio.uio_fmode = 0;
1500 	uio.uio_extflg = UIO_COPY_CACHED;
1501 	uio.uio_loffset = 0;
1502 	uio.uio_llimit = MAXOFFSET_T;
1503 
1504 	eof = 0;
1505 	error = 0;
1506 	while (!error && !eof) {
1507 		uio.uio_resid = dlen;
1508 		iov.iov_base = (char *)dbuf;
1509 		iov.iov_len = dlen;
1510 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1511 		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1512 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1513 
1514 		dbuflen = dlen - uio.uio_resid;
1515 		if (error || dbuflen == 0)
1516 			break;
1517 
1518 		if (!(ddv->sdev_flags & SDEV_BUILD)) {
1519 			error = 0;
1520 			break;
1521 		}
1522 
1523 		for (dp = dbuf; ((intptr_t)dp <
1524 		    (intptr_t)dbuf + dbuflen);
1525 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1526 			nm = dp->d_name;
1527 
1528 			if (strcmp(nm, ".") == 0 ||
1529 			    strcmp(nm, "..") == 0)
1530 				continue;
1531 
1532 			vp = NULLVP;
1533 			dv = sdev_cache_lookup(ddv, nm);
1534 			if (dv) {
1535 				if (dv->sdev_state != SDEV_ZOMBIE) {
1536 					SDEV_SIMPLE_RELE(dv);
1537 				} else {
1538 					/*
1539 					 * A ZOMBIE node may not have been
1540 					 * cleaned up from the backing store,
1541 					 * bypass this entry in this case,
1542 					 * and clean it up from the directory
1543 					 * cache if this is the last call.
1544 					 */
1545 					(void) sdev_dirdelete(ddv, dv);
1546 				}
1547 				continue;
1548 			}
1549 
1550 			/* refill the cache if not already */
1551 			error = devname_backstore_lookup(ddv, nm, &vp);
1552 			if (error)
1553 				continue;
1554 
1555 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1556 			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1557 			if (error)
1558 				continue;
1559 
1560 			if (vattr.va_type == VLNK) {
1561 				error = sdev_getlink(vp, &link);
1562 				if (error) {
1563 					continue;
1564 				}
1565 				ASSERT(link != NULL);
1566 			}
1567 
1568 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1569 				rw_exit(&ddv->sdev_contents);
1570 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1571 			}
1572 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1573 			    cred, SDEV_READY);
1574 			rw_downgrade(&ddv->sdev_contents);
1575 
1576 			if (link != NULL) {
1577 				kmem_free(link, strlen(link) + 1);
1578 				link = NULL;
1579 			}
1580 
1581 			if (!error) {
1582 				ASSERT(dv);
1583 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1584 				SDEV_SIMPLE_RELE(dv);
1585 			}
1586 			vp = NULL;
1587 			dv = NULL;
1588 		}
1589 	}
1590 
1591 done:
1592 	VN_RELE(dirvp);
1593 	kmem_free(dbuf, dlen);
1594 
1595 	return (error);
1596 }
1597 
1598 void
1599 sdev_filldir_dynamic(struct sdev_node *ddv)
1600 {
1601 	int error;
1602 	int i;
1603 	struct vattr *vap;
1604 	char *nm = NULL;
1605 	struct sdev_node *dv = NULL;
1606 
1607 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1608 	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1609 
1610 	vap = sdev_getdefault_attr(VDIR);
1611 	for (i = 0; vtab[i].vt_name != NULL; i++) {
1612 		nm = vtab[i].vt_name;
1613 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1614 		dv = NULL;
1615 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1616 		    NULL, kcred, SDEV_READY);
1617 		if (error) {
1618 			cmn_err(CE_WARN, "%s/%s: error %d\n",
1619 			    ddv->sdev_name, nm, error);
1620 		} else {
1621 			ASSERT(dv);
1622 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1623 			SDEV_SIMPLE_RELE(dv);
1624 		}
1625 	}
1626 }
1627 
1628 /*
1629  * Creating a backing store entry based on sdev_attr.
1630  * This is called either as part of node creation in a persistent directory
1631  * or from setattr/setsecattr to persist access attributes across reboot.
1632  */
1633 int
1634 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1635 {
1636 	int error = 0;
1637 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1638 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1639 	struct vattr *vap = dv->sdev_attr;
1640 	char *nm = dv->sdev_name;
1641 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1642 
1643 	ASSERT(dv && dv->sdev_name && rdvp);
1644 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1645 
1646 lookup:
1647 	/* try to find it in the backing store */
1648 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1649 	    NULL);
1650 	if (error == 0) {
1651 		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1652 			VN_HOLD(rrvp);
1653 			VN_RELE(*rvp);
1654 			*rvp = rrvp;
1655 		}
1656 
1657 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1658 		dv->sdev_attr = NULL;
1659 		dv->sdev_attrvp = *rvp;
1660 		return (0);
1661 	}
1662 
1663 	/* let's try to persist the node */
1664 	gethrestime(&vap->va_atime);
1665 	vap->va_mtime = vap->va_atime;
1666 	vap->va_ctime = vap->va_atime;
1667 	vap->va_mask |= AT_TYPE|AT_MODE;
1668 	switch (vap->va_type) {
1669 	case VDIR:
1670 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1671 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1672 		    (void *)(*rvp), error));
1673 		break;
1674 	case VCHR:
1675 	case VBLK:
1676 	case VREG:
1677 	case VDOOR:
1678 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1679 		    rvp, cred, 0, NULL, NULL);
1680 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1681 		    (void *)(*rvp), error));
1682 		if (!error)
1683 			VN_RELE(*rvp);
1684 		break;
1685 	case VLNK:
1686 		ASSERT(dv->sdev_symlink);
1687 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1688 		    NULL, 0);
1689 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1690 		    error));
1691 		break;
1692 	default:
1693 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1694 		    "create\n", nm);
1695 		/*NOTREACHED*/
1696 	}
1697 
1698 	/* go back to lookup to factor out spec node and set attrvp */
1699 	if (error == 0)
1700 		goto lookup;
1701 
1702 	return (error);
1703 }
1704 
1705 static int
1706 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1707 {
1708 	int error = 0;
1709 	struct sdev_node *dup = NULL;
1710 
1711 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1712 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1713 		sdev_direnter(ddv, *dv);
1714 	} else {
1715 		if (dup->sdev_state == SDEV_ZOMBIE) {
1716 			error = sdev_dirdelete(ddv, dup);
1717 			/*
1718 			 * The ZOMBIE node is still hanging
1719 			 * around with more than one reference counts.
1720 			 * Fail the new node creation so that
1721 			 * the directory cache won't have
1722 			 * duplicate entries for the same named node
1723 			 */
1724 			if (error == EBUSY) {
1725 				SDEV_SIMPLE_RELE(*dv);
1726 				sdev_nodedestroy(*dv, 0);
1727 				*dv = NULL;
1728 				return (error);
1729 			}
1730 			sdev_direnter(ddv, *dv);
1731 		} else {
1732 			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1733 			SDEV_SIMPLE_RELE(*dv);
1734 			sdev_nodedestroy(*dv, 0);
1735 			*dv = dup;
1736 		}
1737 	}
1738 
1739 	return (0);
1740 }
1741 
1742 static int
1743 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1744 {
1745 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1746 	return (sdev_dirdelete(ddv, *dv));
1747 }
1748 
1749 /*
1750  * update the in-core directory cache
1751  */
1752 int
1753 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1754     sdev_cache_ops_t ops)
1755 {
1756 	int error = 0;
1757 
1758 	ASSERT((SDEV_HELD(*dv)));
1759 
1760 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1761 	switch (ops) {
1762 	case SDEV_CACHE_ADD:
1763 		error = sdev_cache_add(ddv, dv, nm);
1764 		break;
1765 	case SDEV_CACHE_DELETE:
1766 		error = sdev_cache_delete(ddv, dv);
1767 		break;
1768 	default:
1769 		break;
1770 	}
1771 
1772 	return (error);
1773 }
1774 
1775 /*
1776  * retrieve the named entry from the directory cache
1777  */
1778 struct sdev_node *
1779 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1780 {
1781 	struct sdev_node *dv = NULL;
1782 
1783 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1784 	dv = sdev_findbyname(ddv, nm);
1785 
1786 	return (dv);
1787 }
1788 
1789 /*
1790  * Implicit reconfig for nodes constructed by a link generator
1791  * Start devfsadm if needed, or if devfsadm is in progress,
1792  * prepare to block on devfsadm either completing or
1793  * constructing the desired node.  As devfsadmd is global
1794  * in scope, constructing all necessary nodes, we only
1795  * need to initiate it once.
1796  */
1797 static int
1798 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1799 {
1800 	int error = 0;
1801 
1802 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1803 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1804 		    ddv->sdev_name, nm, devfsadm_state));
1805 		mutex_enter(&dv->sdev_lookup_lock);
1806 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1807 		mutex_exit(&dv->sdev_lookup_lock);
1808 		error = 0;
1809 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1810 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1811 			ddv->sdev_name, nm, devfsadm_state));
1812 
1813 		sdev_devfsadmd_thread(ddv, dv, kcred);
1814 		mutex_enter(&dv->sdev_lookup_lock);
1815 		SDEV_BLOCK_OTHERS(dv,
1816 		    (SDEV_LOOKUP | SDEV_LGWAITING));
1817 		mutex_exit(&dv->sdev_lookup_lock);
1818 		error = 0;
1819 	} else {
1820 		error = -1;
1821 	}
1822 
1823 	return (error);
1824 }
1825 
1826 static int
1827 sdev_call_modulelookup(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1828     int (*fn)(char *, devname_handle_t *, struct cred *), struct cred *cred)
1829 {
1830 	struct vnode *rvp = NULL;
1831 	int error = 0;
1832 	struct vattr *vap;
1833 	devname_spec_t spec;
1834 	devname_handle_t *hdl;
1835 	void *args = NULL;
1836 	struct sdev_node *dv = *dvp;
1837 
1838 	ASSERT(dv && ddv);
1839 	hdl = &(dv->sdev_handle);
1840 	ASSERT(hdl->dh_data == dv);
1841 	mutex_enter(&dv->sdev_lookup_lock);
1842 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1843 	mutex_exit(&dv->sdev_lookup_lock);
1844 	error = (*fn)(nm, hdl, cred);
1845 	if (error) {
1846 		return (error);
1847 	}
1848 
1849 	spec = hdl->dh_spec;
1850 	args = hdl->dh_args;
1851 	ASSERT(args);
1852 
1853 	switch (spec) {
1854 	case DEVNAME_NS_PATH:
1855 		/*
1856 		 * symlink of:
1857 		 *	/dev/dir/nm -> /device/...
1858 		 */
1859 		rvp = devname_configure_by_path((char *)args, NULL);
1860 		break;
1861 	case DEVNAME_NS_DEV:
1862 		/*
1863 		 * symlink of:
1864 		 *	/dev/dir/nm -> /dev/...
1865 		 */
1866 		rvp = devname_find_by_devpath((char *)args, NULL);
1867 		break;
1868 	default:
1869 		if (args)
1870 			kmem_free((char *)args, strlen(args) + 1);
1871 		return (ENOENT);
1872 
1873 	}
1874 
1875 	if (rvp == NULL) {
1876 		if (args)
1877 			kmem_free((char *)args, strlen(args) + 1);
1878 		return (ENOENT);
1879 	} else {
1880 		vap = sdev_getdefault_attr(VLNK);
1881 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1882 		/*
1883 		 * Could sdev_mknode return a different dv_node
1884 		 * once the lock is dropped?
1885 		 */
1886 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1887 			rw_exit(&ddv->sdev_contents);
1888 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1889 		}
1890 		error = sdev_mknode(ddv, nm, &dv, vap, NULL, args, cred,
1891 		    SDEV_READY);
1892 		rw_downgrade(&ddv->sdev_contents);
1893 		if (error) {
1894 			if (args)
1895 				kmem_free((char *)args, strlen(args) + 1);
1896 			return (error);
1897 		} else {
1898 			mutex_enter(&dv->sdev_lookup_lock);
1899 			SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1900 			mutex_exit(&dv->sdev_lookup_lock);
1901 			error = 0;
1902 		}
1903 	}
1904 
1905 	if (args)
1906 		kmem_free((char *)args, strlen(args) + 1);
1907 
1908 	*dvp = dv;
1909 	return (0);
1910 }
1911 
1912 /*
1913  *  Support for specialized device naming construction mechanisms
1914  */
1915 static int
1916 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1917     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1918     void *, char *), int flags, struct cred *cred)
1919 {
1920 	int rv = 0;
1921 	char *physpath = NULL;
1922 	struct vnode *rvp = NULL;
1923 	struct vattr vattr;
1924 	struct vattr *vap;
1925 	struct sdev_node *dv = *dvp;
1926 
1927 	mutex_enter(&dv->sdev_lookup_lock);
1928 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1929 	mutex_exit(&dv->sdev_lookup_lock);
1930 
1931 	/* for non-devfsadm devices */
1932 	if (flags & SDEV_PATH) {
1933 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1934 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1935 		    NULL);
1936 		if (rv) {
1937 			kmem_free(physpath, MAXPATHLEN);
1938 			return (-1);
1939 		}
1940 
1941 		ASSERT(physpath);
1942 		rvp = devname_configure_by_path(physpath, NULL);
1943 		if (rvp == NULL) {
1944 			sdcmn_err3(("devname_configure_by_path: "
1945 			    "failed for /dev/%s/%s\n",
1946 			    ddv->sdev_name, nm));
1947 			kmem_free(physpath, MAXPATHLEN);
1948 			rv = -1;
1949 		} else {
1950 			vap = sdev_getdefault_attr(VLNK);
1951 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1952 
1953 			/*
1954 			 * Sdev_mknode may return back a different sdev_node
1955 			 * that was created by another thread that
1956 			 * raced to the directroy cache before this thread.
1957 			 *
1958 			 * With current directory cache mechanism
1959 			 * (linked list with the sdev_node name as
1960 			 * the entity key), this is a way to make sure
1961 			 * only one entry exists for the same name
1962 			 * in the same directory. The outcome is
1963 			 * the winner wins.
1964 			 */
1965 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1966 				rw_exit(&ddv->sdev_contents);
1967 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1968 			}
1969 			rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1970 			    (void *)physpath, cred, SDEV_READY);
1971 			rw_downgrade(&ddv->sdev_contents);
1972 			kmem_free(physpath, MAXPATHLEN);
1973 			if (rv) {
1974 				return (rv);
1975 			} else {
1976 				mutex_enter(&dv->sdev_lookup_lock);
1977 				SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1978 				mutex_exit(&dv->sdev_lookup_lock);
1979 				return (0);
1980 			}
1981 		}
1982 	} else if (flags & SDEV_VNODE) {
1983 		/*
1984 		 * DBNR has its own way to create the device
1985 		 * and return a backing store vnode in rvp
1986 		 */
1987 		ASSERT(callback);
1988 		rv = callback(ddv, nm, (void *)&rvp, kcred, NULL, NULL);
1989 		if (rv || (rvp == NULL)) {
1990 			sdcmn_err3(("devname_lookup_func: SDEV_VNODE "
1991 			    "callback failed \n"));
1992 			return (-1);
1993 		}
1994 		vap = sdev_getdefault_attr(rvp->v_type);
1995 		if (vap == NULL)
1996 			return (-1);
1997 
1998 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1999 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2000 			rw_exit(&ddv->sdev_contents);
2001 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2002 		}
2003 		rv = sdev_mknode(ddv, nm, &dv, vap, rvp, NULL,
2004 		    cred, SDEV_READY);
2005 		rw_downgrade(&ddv->sdev_contents);
2006 		if (rv)
2007 			return (rv);
2008 
2009 		mutex_enter(&dv->sdev_lookup_lock);
2010 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2011 		mutex_exit(&dv->sdev_lookup_lock);
2012 		return (0);
2013 	} else if (flags & SDEV_VATTR) {
2014 		/*
2015 		 * /dev/pts
2016 		 *
2017 		 * DBNR has its own way to create the device
2018 		 * "0" is returned upon success.
2019 		 *
2020 		 * callback is responsible to set the basic attributes,
2021 		 * e.g. va_type/va_uid/va_gid/
2022 		 *    dev_t if VCHR or VBLK/
2023 		 */
2024 		ASSERT(callback);
2025 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
2026 		if (rv) {
2027 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
2028 			    "callback failed \n"));
2029 			return (-1);
2030 		}
2031 
2032 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2033 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2034 			rw_exit(&ddv->sdev_contents);
2035 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2036 		}
2037 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
2038 		    cred, SDEV_READY);
2039 		rw_downgrade(&ddv->sdev_contents);
2040 
2041 		if (rv)
2042 			return (rv);
2043 
2044 		mutex_enter(&dv->sdev_lookup_lock);
2045 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2046 		mutex_exit(&dv->sdev_lookup_lock);
2047 		return (0);
2048 	} else {
2049 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
2050 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
2051 		    __LINE__));
2052 		rv = -1;
2053 	}
2054 
2055 	*dvp = dv;
2056 	return (rv);
2057 }
2058 
2059 static int
2060 is_devfsadm_thread(char *exec_name)
2061 {
2062 	/*
2063 	 * note: because devfsadmd -> /usr/sbin/devfsadm
2064 	 * it is safe to use "devfsadm" to capture the lookups
2065 	 * from devfsadm and its daemon version.
2066 	 */
2067 	if (strcmp(exec_name, "devfsadm") == 0)
2068 		return (1);
2069 	return (0);
2070 }
2071 
2072 
2073 /*
2074  * Lookup Order:
2075  *	sdev_node cache;
2076  *	backing store (SDEV_PERSIST);
2077  *	DBNR: a. dir_ops implemented in the loadable modules;
2078  *	      b. vnode ops in vtab.
2079  */
2080 int
2081 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
2082     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
2083     struct cred *, void *, char *), int flags)
2084 {
2085 	int rv = 0, nmlen;
2086 	struct vnode *rvp = NULL;
2087 	struct sdev_node *dv = NULL;
2088 	int	retried = 0;
2089 	int	error = 0;
2090 	struct devname_nsmap *map = NULL;
2091 	struct devname_ops *dirops = NULL;
2092 	int (*fn)(char *, devname_handle_t *, struct cred *) = NULL;
2093 	struct vattr vattr;
2094 	char *lookup_thread = curproc->p_user.u_comm;
2095 	int failed_flags = 0;
2096 	int (*vtor)(struct sdev_node *) = NULL;
2097 	int state;
2098 	int parent_state;
2099 	char *link = NULL;
2100 
2101 	if (SDEVTOV(ddv)->v_type != VDIR)
2102 		return (ENOTDIR);
2103 
2104 	/*
2105 	 * Empty name or ., return node itself.
2106 	 */
2107 	nmlen = strlen(nm);
2108 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
2109 		*vpp = SDEVTOV(ddv);
2110 		VN_HOLD(*vpp);
2111 		return (0);
2112 	}
2113 
2114 	/*
2115 	 * .., return the parent directory
2116 	 */
2117 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
2118 		*vpp = SDEVTOV(ddv->sdev_dotdot);
2119 		VN_HOLD(*vpp);
2120 		return (0);
2121 	}
2122 
2123 	rw_enter(&ddv->sdev_contents, RW_READER);
2124 	if (ddv->sdev_flags & SDEV_VTOR) {
2125 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2126 		ASSERT(vtor);
2127 	}
2128 
2129 tryagain:
2130 	/*
2131 	 * (a) directory cache lookup:
2132 	 */
2133 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2134 	parent_state = ddv->sdev_state;
2135 	dv = sdev_cache_lookup(ddv, nm);
2136 	if (dv) {
2137 		state = dv->sdev_state;
2138 		switch (state) {
2139 		case SDEV_INIT:
2140 			if (is_devfsadm_thread(lookup_thread))
2141 				break;
2142 
2143 			/* ZOMBIED parent won't allow node creation */
2144 			if (parent_state == SDEV_ZOMBIE) {
2145 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
2146 				    retried);
2147 				goto nolock_notfound;
2148 			}
2149 
2150 			mutex_enter(&dv->sdev_lookup_lock);
2151 			/* compensate the threads started after devfsadm */
2152 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2153 			    !(SDEV_IS_LOOKUP(dv)))
2154 				SDEV_BLOCK_OTHERS(dv,
2155 				    (SDEV_LOOKUP | SDEV_LGWAITING));
2156 
2157 			if (SDEV_IS_LOOKUP(dv)) {
2158 				failed_flags |= SLF_REBUILT;
2159 				rw_exit(&ddv->sdev_contents);
2160 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
2161 				mutex_exit(&dv->sdev_lookup_lock);
2162 				rw_enter(&ddv->sdev_contents, RW_READER);
2163 
2164 				if (error != 0) {
2165 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2166 					    retried);
2167 					goto nolock_notfound;
2168 				}
2169 
2170 				state = dv->sdev_state;
2171 				if (state == SDEV_INIT) {
2172 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2173 					    retried);
2174 					goto nolock_notfound;
2175 				} else if (state == SDEV_READY) {
2176 					goto found;
2177 				} else if (state == SDEV_ZOMBIE) {
2178 					rw_exit(&ddv->sdev_contents);
2179 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2180 					    retried);
2181 					SDEV_RELE(dv);
2182 					goto lookup_failed;
2183 				}
2184 			} else {
2185 				mutex_exit(&dv->sdev_lookup_lock);
2186 			}
2187 			break;
2188 		case SDEV_READY:
2189 			goto found;
2190 		case SDEV_ZOMBIE:
2191 			rw_exit(&ddv->sdev_contents);
2192 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2193 			SDEV_RELE(dv);
2194 			goto lookup_failed;
2195 		default:
2196 			rw_exit(&ddv->sdev_contents);
2197 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2198 			sdev_lookup_failed(ddv, nm, failed_flags);
2199 			*vpp = NULLVP;
2200 			return (ENOENT);
2201 		}
2202 	}
2203 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2204 
2205 	/*
2206 	 * ZOMBIED parent does not allow new node creation.
2207 	 * bail out early
2208 	 */
2209 	if (parent_state == SDEV_ZOMBIE) {
2210 		rw_exit(&ddv->sdev_contents);
2211 		*vpp = NULL;
2212 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2213 		return (ENOENT);
2214 	}
2215 
2216 	/*
2217 	 * (b0): backing store lookup
2218 	 *	SDEV_PERSIST is default except:
2219 	 *		1) pts nodes
2220 	 *		2) non-chmod'ed local nodes
2221 	 */
2222 	if (SDEV_IS_PERSIST(ddv)) {
2223 		error = devname_backstore_lookup(ddv, nm, &rvp);
2224 
2225 		if (!error) {
2226 			sdcmn_err3(("devname_backstore_lookup: "
2227 			    "found attrvp %p for %s\n", (void *)rvp, nm));
2228 
2229 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
2230 			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2231 			if (error) {
2232 				rw_exit(&ddv->sdev_contents);
2233 				if (dv)
2234 					SDEV_RELE(dv);
2235 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2236 				sdev_lookup_failed(ddv, nm, failed_flags);
2237 				*vpp = NULLVP;
2238 				return (ENOENT);
2239 			}
2240 
2241 			if (vattr.va_type == VLNK) {
2242 				error = sdev_getlink(rvp, &link);
2243 				if (error) {
2244 					rw_exit(&ddv->sdev_contents);
2245 					if (dv)
2246 						SDEV_RELE(dv);
2247 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2248 					    retried);
2249 					sdev_lookup_failed(ddv, nm,
2250 					    failed_flags);
2251 					*vpp = NULLVP;
2252 					return (ENOENT);
2253 				}
2254 				ASSERT(link != NULL);
2255 			}
2256 
2257 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
2258 				rw_exit(&ddv->sdev_contents);
2259 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2260 			}
2261 			error = sdev_mknode(ddv, nm, &dv, &vattr,
2262 			    rvp, link, cred, SDEV_READY);
2263 			rw_downgrade(&ddv->sdev_contents);
2264 
2265 			if (link != NULL) {
2266 				kmem_free(link, strlen(link) + 1);
2267 				link = NULL;
2268 			}
2269 
2270 			if (error) {
2271 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2272 				rw_exit(&ddv->sdev_contents);
2273 				if (dv)
2274 					SDEV_RELE(dv);
2275 				goto lookup_failed;
2276 			} else {
2277 				goto found;
2278 			}
2279 		} else if (retried) {
2280 			rw_exit(&ddv->sdev_contents);
2281 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2282 			    ddv->sdev_name, nm));
2283 			if (dv)
2284 				SDEV_RELE(dv);
2285 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2286 			sdev_lookup_failed(ddv, nm, failed_flags);
2287 			*vpp = NULLVP;
2288 			return (ENOENT);
2289 		}
2290 	}
2291 
2292 
2293 	/* first thread that is doing the lookup on this node */
2294 	if (!dv) {
2295 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2296 			rw_exit(&ddv->sdev_contents);
2297 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2298 		}
2299 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2300 		    cred, SDEV_INIT);
2301 		if (!dv) {
2302 			rw_exit(&ddv->sdev_contents);
2303 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2304 			sdev_lookup_failed(ddv, nm, failed_flags);
2305 			*vpp = NULLVP;
2306 			return (ENOENT);
2307 		}
2308 		rw_downgrade(&ddv->sdev_contents);
2309 	}
2310 	ASSERT(dv);
2311 	ASSERT(SDEV_HELD(dv));
2312 
2313 	if (SDEV_IS_NO_NCACHE(dv)) {
2314 		failed_flags |= SLF_NO_NCACHE;
2315 	}
2316 
2317 	if (SDEV_IS_GLOBAL(ddv)) {
2318 		map = sdev_get_map(ddv, 1);
2319 		dirops = map ? map->dir_ops : NULL;
2320 		fn = dirops ? dirops->devnops_lookup : NULL;
2321 	}
2322 
2323 	/*
2324 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2325 	 */
2326 	if ((fn == NULL) && !callback) {
2327 
2328 		if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2329 		    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2330 		    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2331 			ASSERT(SDEV_HELD(dv));
2332 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2333 			goto nolock_notfound;
2334 		}
2335 
2336 		/*
2337 		 * filter out known non-existent devices recorded
2338 		 * during initial reconfiguration boot for which
2339 		 * reconfig should not be done and lookup may
2340 		 * be short-circuited now.
2341 		 */
2342 		if (sdev_lookup_filter(ddv, nm)) {
2343 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2344 			goto nolock_notfound;
2345 		}
2346 
2347 		/* bypassing devfsadm internal nodes */
2348 		if (is_devfsadm_thread(lookup_thread)) {
2349 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2350 			goto nolock_notfound;
2351 		}
2352 
2353 		if (sdev_reconfig_disable) {
2354 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2355 			goto nolock_notfound;
2356 		}
2357 
2358 		error = sdev_call_devfsadmd(ddv, dv, nm);
2359 		if (error == 0) {
2360 			sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2361 			    ddv->sdev_name, nm, curproc->p_user.u_comm));
2362 			if (sdev_reconfig_verbose) {
2363 				cmn_err(CE_CONT,
2364 				    "?lookup of %s/%s by %s: reconfig\n",
2365 				    ddv->sdev_name, nm, curproc->p_user.u_comm);
2366 			}
2367 			retried = 1;
2368 			failed_flags |= SLF_REBUILT;
2369 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2370 			SDEV_SIMPLE_RELE(dv);
2371 			goto tryagain;
2372 		} else {
2373 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2374 			goto nolock_notfound;
2375 		}
2376 	}
2377 
2378 	/*
2379 	 * (b2) Directory Based Name Resolution (DBNR):
2380 	 *	ddv	- parent
2381 	 *	nm	- /dev/(ddv->sdev_name)/nm
2382 	 *
2383 	 *	note: module vnode ops take precedence than the build-in ones
2384 	 */
2385 	if (fn) {
2386 		error = sdev_call_modulelookup(ddv, &dv, nm, fn, cred);
2387 		if (error) {
2388 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2389 			goto notfound;
2390 		} else {
2391 			goto found;
2392 		}
2393 	} else if (callback) {
2394 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2395 		    flags, cred);
2396 		if (error == 0) {
2397 			goto found;
2398 		} else {
2399 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2400 			goto notfound;
2401 		}
2402 	}
2403 	ASSERT(rvp);
2404 
2405 found:
2406 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2407 	ASSERT(dv->sdev_state == SDEV_READY);
2408 	if (vtor) {
2409 		/*
2410 		 * Check validity of returned node
2411 		 */
2412 		switch (vtor(dv)) {
2413 		case SDEV_VTOR_VALID:
2414 			break;
2415 		case SDEV_VTOR_INVALID:
2416 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2417 			sdcmn_err7(("lookup: destroy invalid "
2418 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2419 			goto nolock_notfound;
2420 		case SDEV_VTOR_SKIP:
2421 			sdcmn_err7(("lookup: node not applicable - "
2422 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2423 			rw_exit(&ddv->sdev_contents);
2424 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2425 			SDEV_RELE(dv);
2426 			goto lookup_failed;
2427 		default:
2428 			cmn_err(CE_PANIC,
2429 			    "dev fs: validator failed: %s(%p)\n",
2430 			    dv->sdev_name, (void *)dv);
2431 			break;
2432 			/*NOTREACHED*/
2433 		}
2434 	}
2435 
2436 	if ((SDEVTOV(dv)->v_type == VDIR) && SDEV_IS_GLOBAL(dv)) {
2437 		rw_enter(&dv->sdev_contents, RW_READER);
2438 		(void) sdev_get_map(dv, 1);
2439 		rw_exit(&dv->sdev_contents);
2440 	}
2441 	rw_exit(&ddv->sdev_contents);
2442 	rv = sdev_to_vp(dv, vpp);
2443 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2444 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2445 	    dv->sdev_state, nm, rv));
2446 	return (rv);
2447 
2448 notfound:
2449 	mutex_enter(&dv->sdev_lookup_lock);
2450 	SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2451 	mutex_exit(&dv->sdev_lookup_lock);
2452 nolock_notfound:
2453 	/*
2454 	 * Destroy the node that is created for synchronization purposes.
2455 	 */
2456 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2457 	    nm, dv->sdev_state));
2458 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2459 	if (dv->sdev_state == SDEV_INIT) {
2460 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2461 			rw_exit(&ddv->sdev_contents);
2462 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2463 		}
2464 
2465 		/*
2466 		 * Node state may have changed during the lock
2467 		 * changes. Re-check.
2468 		 */
2469 		if (dv->sdev_state == SDEV_INIT) {
2470 			(void) sdev_dirdelete(ddv, dv);
2471 			rw_exit(&ddv->sdev_contents);
2472 			sdev_lookup_failed(ddv, nm, failed_flags);
2473 			*vpp = NULL;
2474 			return (ENOENT);
2475 		}
2476 	}
2477 
2478 	rw_exit(&ddv->sdev_contents);
2479 	SDEV_RELE(dv);
2480 
2481 lookup_failed:
2482 	sdev_lookup_failed(ddv, nm, failed_flags);
2483 	*vpp = NULL;
2484 	return (ENOENT);
2485 }
2486 
2487 /*
2488  * Given a directory node, mark all nodes beneath as
2489  * STALE, i.e. nodes that don't exist as far as new
2490  * consumers are concerned
2491  */
2492 void
2493 sdev_stale(struct sdev_node *ddv)
2494 {
2495 	struct sdev_node *dv;
2496 	struct vnode *vp;
2497 
2498 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2499 
2500 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2501 	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next) {
2502 		vp = SDEVTOV(dv);
2503 		if (vp->v_type == VDIR)
2504 			sdev_stale(dv);
2505 
2506 		sdcmn_err9(("sdev_stale: setting stale %s\n",
2507 		    dv->sdev_name));
2508 		dv->sdev_flags |= SDEV_STALE;
2509 	}
2510 	ddv->sdev_flags |= SDEV_BUILD;
2511 	rw_exit(&ddv->sdev_contents);
2512 }
2513 
2514 /*
2515  * Given a directory node, clean out all the nodes beneath.
2516  * If expr is specified, clean node with names matching expr.
2517  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2518  *	so they are excluded from future lookups.
2519  */
2520 int
2521 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2522 {
2523 	int error = 0;
2524 	int busy = 0;
2525 	struct vnode *vp;
2526 	struct sdev_node *dv, *next = NULL;
2527 	int bkstore = 0;
2528 	int len = 0;
2529 	char *bks_name = NULL;
2530 
2531 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2532 
2533 	/*
2534 	 * We try our best to destroy all unused sdev_node's
2535 	 */
2536 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2537 	for (dv = ddv->sdev_dot; dv; dv = next) {
2538 		next = dv->sdev_next;
2539 		vp = SDEVTOV(dv);
2540 
2541 		if (expr && gmatch(dv->sdev_name, expr) == 0)
2542 			continue;
2543 
2544 		if (vp->v_type == VDIR &&
2545 		    sdev_cleandir(dv, NULL, flags) != 0) {
2546 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2547 			    dv->sdev_name));
2548 			busy++;
2549 			continue;
2550 		}
2551 
2552 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2553 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2554 			    dv->sdev_name));
2555 			busy++;
2556 			continue;
2557 		}
2558 
2559 		/*
2560 		 * at this point, either dv is not held or SDEV_ENFORCE
2561 		 * is specified. In either case, dv needs to be deleted
2562 		 */
2563 		SDEV_HOLD(dv);
2564 
2565 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2566 		if (bkstore && (vp->v_type == VDIR))
2567 			bkstore += 1;
2568 
2569 		if (bkstore) {
2570 			len = strlen(dv->sdev_name) + 1;
2571 			bks_name = kmem_alloc(len, KM_SLEEP);
2572 			bcopy(dv->sdev_name, bks_name, len);
2573 		}
2574 
2575 		error = sdev_dirdelete(ddv, dv);
2576 
2577 		if (error == EBUSY) {
2578 			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2579 			busy++;
2580 		}
2581 
2582 		/* take care the backing store clean up */
2583 		if (bkstore && (error == 0)) {
2584 			ASSERT(bks_name);
2585 			ASSERT(ddv->sdev_attrvp);
2586 
2587 			if (bkstore == 1) {
2588 				error = VOP_REMOVE(ddv->sdev_attrvp,
2589 				    bks_name, kcred, NULL, 0);
2590 			} else if (bkstore == 2) {
2591 				error = VOP_RMDIR(ddv->sdev_attrvp,
2592 				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2593 			}
2594 
2595 			/* do not propagate the backing store errors */
2596 			if (error) {
2597 				sdcmn_err9(("sdev_cleandir: backing store"
2598 				    "not cleaned\n"));
2599 				error = 0;
2600 			}
2601 
2602 			bkstore = 0;
2603 			kmem_free(bks_name, len);
2604 			bks_name = NULL;
2605 			len = 0;
2606 		}
2607 	}
2608 
2609 	ddv->sdev_flags |= SDEV_BUILD;
2610 	rw_exit(&ddv->sdev_contents);
2611 
2612 	if (busy) {
2613 		error = EBUSY;
2614 	}
2615 
2616 	return (error);
2617 }
2618 
2619 /*
2620  * a convenient wrapper for readdir() funcs
2621  */
2622 size_t
2623 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2624 {
2625 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2626 	if (reclen > size)
2627 		return (0);
2628 
2629 	de->d_ino = (ino64_t)ino;
2630 	de->d_off = (off64_t)off + 1;
2631 	de->d_reclen = (ushort_t)reclen;
2632 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2633 	return (reclen);
2634 }
2635 
2636 /*
2637  * sdev_mount service routines
2638  */
2639 int
2640 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2641 {
2642 	int	error;
2643 
2644 	if (uap->datalen != sizeof (*args))
2645 		return (EINVAL);
2646 
2647 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2648 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2649 		    "get user data. error %d\n", error);
2650 		return (EFAULT);
2651 	}
2652 
2653 	return (0);
2654 }
2655 
2656 #ifdef nextdp
2657 #undef nextdp
2658 #endif
2659 #define	nextdp(dp)	((struct dirent64 *) \
2660 			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2661 
2662 /*
2663  * readdir helper func
2664  */
2665 int
2666 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2667     int flags)
2668 {
2669 	struct sdev_node *ddv = VTOSDEV(vp);
2670 	struct sdev_node *dv;
2671 	dirent64_t	*dp;
2672 	ulong_t		outcount = 0;
2673 	size_t		namelen;
2674 	ulong_t		alloc_count;
2675 	void		*outbuf;
2676 	struct iovec	*iovp;
2677 	int		error = 0;
2678 	size_t		reclen;
2679 	offset_t	diroff;
2680 	offset_t	soff;
2681 	int		this_reclen;
2682 	struct devname_nsmap	*map = NULL;
2683 	struct devname_ops	*dirops = NULL;
2684 	int (*fn)(devname_handle_t *, struct cred *) = NULL;
2685 	int (*vtor)(struct sdev_node *) = NULL;
2686 	struct vattr attr;
2687 	timestruc_t now;
2688 
2689 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2690 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2691 
2692 	if (uiop->uio_loffset >= MAXOFF_T) {
2693 		if (eofp)
2694 			*eofp = 1;
2695 		return (0);
2696 	}
2697 
2698 	if (uiop->uio_iovcnt != 1)
2699 		return (EINVAL);
2700 
2701 	if (vp->v_type != VDIR)
2702 		return (ENOTDIR);
2703 
2704 	if (ddv->sdev_flags & SDEV_VTOR) {
2705 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2706 		ASSERT(vtor);
2707 	}
2708 
2709 	if (eofp != NULL)
2710 		*eofp = 0;
2711 
2712 	soff = uiop->uio_loffset;
2713 	iovp = uiop->uio_iov;
2714 	alloc_count = iovp->iov_len;
2715 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2716 	outcount = 0;
2717 
2718 	if (ddv->sdev_state == SDEV_ZOMBIE)
2719 		goto get_cache;
2720 
2721 	if (SDEV_IS_GLOBAL(ddv)) {
2722 		map = sdev_get_map(ddv, 0);
2723 		dirops = map ? map->dir_ops : NULL;
2724 		fn = dirops ? dirops->devnops_readdir : NULL;
2725 
2726 		if (map && map->dir_map) {
2727 			/*
2728 			 * load the name mapping rule database
2729 			 * through invoking devfsadm and symlink
2730 			 * all the entries in the map
2731 			 */
2732 			devname_rdr_result_t rdr_result;
2733 			int do_thread = 0;
2734 
2735 			rw_enter(&map->dir_lock, RW_READER);
2736 			do_thread = map->dir_maploaded ? 0 : 1;
2737 			rw_exit(&map->dir_lock);
2738 
2739 			if (do_thread) {
2740 				mutex_enter(&ddv->sdev_lookup_lock);
2741 				SDEV_BLOCK_OTHERS(ddv, SDEV_READDIR);
2742 				mutex_exit(&ddv->sdev_lookup_lock);
2743 
2744 				sdev_dispatch_to_nsrdr_thread(ddv,
2745 				    map->dir_map, &rdr_result);
2746 			}
2747 		} else if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2748 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2749 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2750 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2751 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2752 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2753 		    !sdev_reconfig_disable) {
2754 			/*
2755 			 * invoking "devfsadm" to do system device reconfig
2756 			 */
2757 			mutex_enter(&ddv->sdev_lookup_lock);
2758 			SDEV_BLOCK_OTHERS(ddv,
2759 			    (SDEV_READDIR|SDEV_LGWAITING));
2760 			mutex_exit(&ddv->sdev_lookup_lock);
2761 
2762 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2763 			    ddv->sdev_path, curproc->p_user.u_comm));
2764 			if (sdev_reconfig_verbose) {
2765 				cmn_err(CE_CONT,
2766 				    "?readdir of %s by %s: reconfig\n",
2767 				    ddv->sdev_path, curproc->p_user.u_comm);
2768 			}
2769 
2770 			sdev_devfsadmd_thread(ddv, NULL, kcred);
2771 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2772 			/*
2773 			 * compensate the "ls" started later than "devfsadm"
2774 			 */
2775 			mutex_enter(&ddv->sdev_lookup_lock);
2776 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2777 			mutex_exit(&ddv->sdev_lookup_lock);
2778 		}
2779 
2780 		/*
2781 		 * release the contents lock so that
2782 		 * the cache may be updated by devfsadmd
2783 		 */
2784 		rw_exit(&ddv->sdev_contents);
2785 		mutex_enter(&ddv->sdev_lookup_lock);
2786 		if (SDEV_IS_READDIR(ddv))
2787 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2788 		mutex_exit(&ddv->sdev_lookup_lock);
2789 		rw_enter(&ddv->sdev_contents, RW_READER);
2790 
2791 		sdcmn_err4(("readdir of directory %s by %s\n",
2792 		    ddv->sdev_name, curproc->p_user.u_comm));
2793 		if (ddv->sdev_flags & SDEV_BUILD) {
2794 			if (SDEV_IS_PERSIST(ddv)) {
2795 				error = sdev_filldir_from_store(ddv,
2796 				    alloc_count, cred);
2797 			}
2798 			ddv->sdev_flags &= ~SDEV_BUILD;
2799 		}
2800 	}
2801 
2802 get_cache:
2803 	/* handle "." and ".." */
2804 	diroff = 0;
2805 	if (soff == 0) {
2806 		/* first time */
2807 		this_reclen = DIRENT64_RECLEN(1);
2808 		if (alloc_count < this_reclen) {
2809 			error = EINVAL;
2810 			goto done;
2811 		}
2812 
2813 		dp->d_ino = (ino64_t)ddv->sdev_ino;
2814 		dp->d_off = (off64_t)1;
2815 		dp->d_reclen = (ushort_t)this_reclen;
2816 
2817 		(void) strncpy(dp->d_name, ".",
2818 		    DIRENT64_NAMELEN(this_reclen));
2819 		outcount += dp->d_reclen;
2820 		dp = nextdp(dp);
2821 	}
2822 
2823 	diroff++;
2824 	if (soff <= 1) {
2825 		this_reclen = DIRENT64_RECLEN(2);
2826 		if (alloc_count < outcount + this_reclen) {
2827 			error = EINVAL;
2828 			goto done;
2829 		}
2830 
2831 		dp->d_reclen = (ushort_t)this_reclen;
2832 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2833 		dp->d_off = (off64_t)2;
2834 
2835 		(void) strncpy(dp->d_name, "..",
2836 		    DIRENT64_NAMELEN(this_reclen));
2837 		outcount += dp->d_reclen;
2838 
2839 		dp = nextdp(dp);
2840 	}
2841 
2842 
2843 	/* gets the cache */
2844 	diroff++;
2845 	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next, diroff++) {
2846 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2847 		    diroff, soff, dv->sdev_name));
2848 
2849 		/* bypassing pre-matured nodes */
2850 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2851 			sdcmn_err3(("sdev_readdir: pre-mature node  "
2852 			    "%s\n", dv->sdev_name));
2853 			continue;
2854 		}
2855 
2856 		/* don't list stale nodes */
2857 		if (dv->sdev_flags & SDEV_STALE) {
2858 			sdcmn_err4(("sdev_readdir: STALE node  "
2859 			    "%s\n", dv->sdev_name));
2860 			continue;
2861 		}
2862 
2863 		/*
2864 		 * Check validity of node
2865 		 */
2866 		if (vtor) {
2867 			switch (vtor(dv)) {
2868 			case SDEV_VTOR_VALID:
2869 				break;
2870 			case SDEV_VTOR_INVALID:
2871 			case SDEV_VTOR_SKIP:
2872 				continue;
2873 			default:
2874 				cmn_err(CE_PANIC,
2875 				    "dev fs: validator failed: %s(%p)\n",
2876 				    dv->sdev_name, (void *)dv);
2877 				break;
2878 			/*NOTREACHED*/
2879 			}
2880 		}
2881 
2882 		/*
2883 		 * call back into the module for the validity/bookkeeping
2884 		 * of this entry
2885 		 */
2886 		if (fn) {
2887 			error = (*fn)(&(dv->sdev_handle), cred);
2888 			if (error) {
2889 				sdcmn_err4(("sdev_readdir: module did not "
2890 				    "validate %s\n", dv->sdev_name));
2891 				continue;
2892 			}
2893 		}
2894 
2895 		namelen = strlen(dv->sdev_name);
2896 		reclen = DIRENT64_RECLEN(namelen);
2897 		if (outcount + reclen > alloc_count) {
2898 			goto full;
2899 		}
2900 		dp->d_reclen = (ushort_t)reclen;
2901 		dp->d_ino = (ino64_t)dv->sdev_ino;
2902 		dp->d_off = (off64_t)diroff + 1;
2903 		(void) strncpy(dp->d_name, dv->sdev_name,
2904 		    DIRENT64_NAMELEN(reclen));
2905 		outcount += reclen;
2906 		dp = nextdp(dp);
2907 	}
2908 
2909 full:
2910 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2911 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2912 	    (void *)dv));
2913 
2914 	if (outcount)
2915 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2916 
2917 	if (!error) {
2918 		uiop->uio_loffset = diroff;
2919 		if (eofp)
2920 			*eofp = dv ? 0 : 1;
2921 	}
2922 
2923 
2924 	if (ddv->sdev_attrvp) {
2925 		gethrestime(&now);
2926 		attr.va_ctime = now;
2927 		attr.va_atime = now;
2928 		attr.va_mask = AT_CTIME|AT_ATIME;
2929 
2930 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2931 	}
2932 done:
2933 	kmem_free(outbuf, alloc_count);
2934 	return (error);
2935 }
2936 
2937 
2938 static int
2939 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2940 {
2941 	vnode_t *vp;
2942 	vnode_t *cvp;
2943 	struct sdev_node *svp;
2944 	char *nm;
2945 	struct pathname pn;
2946 	int error;
2947 	int persisted = 0;
2948 
2949 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2950 		return (error);
2951 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2952 
2953 	vp = rootdir;
2954 	VN_HOLD(vp);
2955 
2956 	while (pn_pathleft(&pn)) {
2957 		ASSERT(vp->v_type == VDIR);
2958 		(void) pn_getcomponent(&pn, nm);
2959 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2960 		    NULL, NULL);
2961 		VN_RELE(vp);
2962 
2963 		if (error)
2964 			break;
2965 
2966 		/* traverse mount points encountered on our journey */
2967 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2968 			VN_RELE(cvp);
2969 			break;
2970 		}
2971 
2972 		/*
2973 		 * Direct the operation to the persisting filesystem
2974 		 * underlying /dev.  Bail if we encounter a
2975 		 * non-persistent dev entity here.
2976 		 */
2977 		if (cvp->v_vfsp->vfs_fstype == devtype) {
2978 
2979 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2980 				error = ENOENT;
2981 				VN_RELE(cvp);
2982 				break;
2983 			}
2984 
2985 			if (VTOSDEV(cvp) == NULL) {
2986 				error = ENOENT;
2987 				VN_RELE(cvp);
2988 				break;
2989 			}
2990 			svp = VTOSDEV(cvp);
2991 			if ((vp = svp->sdev_attrvp) == NULL) {
2992 				error = ENOENT;
2993 				VN_RELE(cvp);
2994 				break;
2995 			}
2996 			persisted = 1;
2997 			VN_HOLD(vp);
2998 			VN_RELE(cvp);
2999 			cvp = vp;
3000 		}
3001 
3002 		vp = cvp;
3003 		pn_skipslash(&pn);
3004 	}
3005 
3006 	kmem_free(nm, MAXNAMELEN);
3007 	pn_free(&pn);
3008 
3009 	if (error)
3010 		return (error);
3011 
3012 	/*
3013 	 * Only return persisted nodes in the filesystem underlying /dev.
3014 	 */
3015 	if (!persisted) {
3016 		VN_RELE(vp);
3017 		return (ENOENT);
3018 	}
3019 
3020 	*r_vp = vp;
3021 	return (0);
3022 }
3023 
3024 int
3025 sdev_modctl_readdir(const char *dir, char ***dirlistp,
3026 	int *npathsp, int *npathsp_alloc)
3027 {
3028 	char	**pathlist = NULL;
3029 	char	**newlist = NULL;
3030 	int	npaths = 0;
3031 	int	npaths_alloc = 0;
3032 	dirent64_t *dbuf = NULL;
3033 	int	n;
3034 	char	*s;
3035 	int error;
3036 	vnode_t *vp;
3037 	int eof;
3038 	struct iovec iov;
3039 	struct uio uio;
3040 	struct dirent64 *dp;
3041 	size_t dlen;
3042 	size_t dbuflen;
3043 	int ndirents = 64;
3044 	char *nm;
3045 
3046 	error = sdev_modctl_lookup(dir, &vp);
3047 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
3048 	    dir, curproc->p_user.u_comm,
3049 	    (error == 0) ? "ok" : "failed"));
3050 	if (error)
3051 		return (error);
3052 
3053 	dlen = ndirents * (sizeof (*dbuf));
3054 	dbuf = kmem_alloc(dlen, KM_SLEEP);
3055 
3056 	uio.uio_iov = &iov;
3057 	uio.uio_iovcnt = 1;
3058 	uio.uio_segflg = UIO_SYSSPACE;
3059 	uio.uio_fmode = 0;
3060 	uio.uio_extflg = UIO_COPY_CACHED;
3061 	uio.uio_loffset = 0;
3062 	uio.uio_llimit = MAXOFFSET_T;
3063 
3064 	eof = 0;
3065 	error = 0;
3066 	while (!error && !eof) {
3067 		uio.uio_resid = dlen;
3068 		iov.iov_base = (char *)dbuf;
3069 		iov.iov_len = dlen;
3070 
3071 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3072 		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
3073 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3074 
3075 		dbuflen = dlen - uio.uio_resid;
3076 
3077 		if (error || dbuflen == 0)
3078 			break;
3079 
3080 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
3081 			dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
3082 
3083 			nm = dp->d_name;
3084 
3085 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
3086 				continue;
3087 
3088 			if (npaths == npaths_alloc) {
3089 				npaths_alloc += 64;
3090 				newlist = (char **)
3091 				    kmem_zalloc((npaths_alloc + 1) *
3092 					sizeof (char *), KM_SLEEP);
3093 				if (pathlist) {
3094 					bcopy(pathlist, newlist,
3095 					    npaths * sizeof (char *));
3096 					kmem_free(pathlist,
3097 					    (npaths + 1) * sizeof (char *));
3098 				}
3099 				pathlist = newlist;
3100 			}
3101 			n = strlen(nm) + 1;
3102 			s = kmem_alloc(n, KM_SLEEP);
3103 			bcopy(nm, s, n);
3104 			pathlist[npaths++] = s;
3105 			sdcmn_err11(("  %s/%s\n", dir, s));
3106 		}
3107 	}
3108 
3109 exit:
3110 	VN_RELE(vp);
3111 
3112 	if (dbuf)
3113 		kmem_free(dbuf, dlen);
3114 
3115 	if (error)
3116 		return (error);
3117 
3118 	*dirlistp = pathlist;
3119 	*npathsp = npaths;
3120 	*npathsp_alloc = npaths_alloc;
3121 
3122 	return (0);
3123 }
3124 
3125 void
3126 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
3127 {
3128 	int	i, n;
3129 
3130 	for (i = 0; i < npaths; i++) {
3131 		n = strlen(pathlist[i]) + 1;
3132 		kmem_free(pathlist[i], n);
3133 	}
3134 
3135 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
3136 }
3137 
3138 int
3139 sdev_modctl_devexists(const char *path)
3140 {
3141 	vnode_t *vp;
3142 	int error;
3143 
3144 	error = sdev_modctl_lookup(path, &vp);
3145 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
3146 	    path, curproc->p_user.u_comm,
3147 	    (error == 0) ? "ok" : "failed"));
3148 	if (error == 0)
3149 		VN_RELE(vp);
3150 
3151 	return (error);
3152 }
3153 
3154 void
3155 sdev_update_newnsmap(struct devname_nsmap *map, char *module, char *mapname)
3156 {
3157 	rw_enter(&map->dir_lock, RW_WRITER);
3158 	if (module) {
3159 		ASSERT(map->dir_newmodule == NULL);
3160 		map->dir_newmodule = i_ddi_strdup(module, KM_SLEEP);
3161 	}
3162 	if (mapname) {
3163 		ASSERT(map->dir_newmap == NULL);
3164 		map->dir_newmap = i_ddi_strdup(mapname, KM_SLEEP);
3165 	}
3166 
3167 	map->dir_invalid = 1;
3168 	rw_exit(&map->dir_lock);
3169 }
3170 
3171 void
3172 sdev_replace_nsmap(struct devname_nsmap *map, char *module, char *mapname)
3173 {
3174 	char *old_module = NULL;
3175 	char *old_map = NULL;
3176 
3177 	ASSERT(RW_LOCK_HELD(&map->dir_lock));
3178 	if (!rw_tryupgrade(&map->dir_lock)) {
3179 		rw_exit(&map->dir_lock);
3180 		rw_enter(&map->dir_lock, RW_WRITER);
3181 	}
3182 
3183 	old_module = map->dir_module;
3184 	if (module) {
3185 		if (old_module && strcmp(old_module, module) != 0) {
3186 			kmem_free(old_module, strlen(old_module) + 1);
3187 		}
3188 		map->dir_module = module;
3189 		map->dir_newmodule = NULL;
3190 	}
3191 
3192 	old_map = map->dir_map;
3193 	if (mapname) {
3194 		if (old_map && strcmp(old_map, mapname) != 0) {
3195 			kmem_free(old_map, strlen(old_map) + 1);
3196 		}
3197 
3198 		map->dir_map = mapname;
3199 		map->dir_newmap = NULL;
3200 	}
3201 	map->dir_maploaded = 0;
3202 	map->dir_invalid = 0;
3203 	rw_downgrade(&map->dir_lock);
3204 }
3205 
3206 /*
3207  * dir_name should have at least one attribute,
3208  *	dir_module
3209  *	or dir_map
3210  *	or both
3211  * caller holds the devname_nsmaps_lock
3212  */
3213 void
3214 sdev_insert_nsmap(char *dir_name, char *dir_module, char *dir_map)
3215 {
3216 	struct devname_nsmap *map;
3217 	int len = 0;
3218 
3219 	ASSERT(dir_name);
3220 	ASSERT(dir_module || dir_map);
3221 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3222 
3223 	if (map = sdev_get_nsmap_by_dir(dir_name, 1)) {
3224 		sdev_update_newnsmap(map, dir_module, dir_map);
3225 		return;
3226 	}
3227 
3228 	map = (struct devname_nsmap *)kmem_zalloc(sizeof (*map), KM_SLEEP);
3229 	map->dir_name = i_ddi_strdup(dir_name, KM_SLEEP);
3230 	if (dir_module) {
3231 		map->dir_module = i_ddi_strdup(dir_module, KM_SLEEP);
3232 	}
3233 
3234 	if (dir_map) {
3235 		if (dir_map[0] != '/') {
3236 			len = strlen(ETC_DEV_DIR) + strlen(dir_map) + 2;
3237 			map->dir_map = kmem_zalloc(len, KM_SLEEP);
3238 			(void) snprintf(map->dir_map, len, "%s/%s", ETC_DEV_DIR,
3239 			    dir_map);
3240 		} else {
3241 			map->dir_map = i_ddi_strdup(dir_map, KM_SLEEP);
3242 		}
3243 	}
3244 
3245 	map->dir_ops = NULL;
3246 	map->dir_maploaded = 0;
3247 	map->dir_invalid = 0;
3248 	rw_init(&map->dir_lock, NULL, RW_DEFAULT, NULL);
3249 
3250 	map->next = devname_nsmaps;
3251 	map->prev = NULL;
3252 	if (devname_nsmaps) {
3253 		devname_nsmaps->prev = map;
3254 	}
3255 	devname_nsmaps = map;
3256 }
3257 
3258 struct devname_nsmap *
3259 sdev_get_nsmap_by_dir(char *dir_path, int locked)
3260 {
3261 	struct devname_nsmap *map = NULL;
3262 
3263 	if (!locked)
3264 		mutex_enter(&devname_nsmaps_lock);
3265 	for (map = devname_nsmaps; map; map = map->next) {
3266 		sdcmn_err6(("sdev_get_nsmap_by_dir: dir %s\n", map->dir_name));
3267 		if (strcmp(map->dir_name, dir_path) == 0) {
3268 			if (!locked)
3269 				mutex_exit(&devname_nsmaps_lock);
3270 			return (map);
3271 		}
3272 	}
3273 	if (!locked)
3274 		mutex_exit(&devname_nsmaps_lock);
3275 	return (NULL);
3276 }
3277 
3278 struct devname_nsmap *
3279 sdev_get_nsmap_by_module(char *mod_name)
3280 {
3281 	struct devname_nsmap *map = NULL;
3282 
3283 	mutex_enter(&devname_nsmaps_lock);
3284 	for (map = devname_nsmaps; map; map = map->next) {
3285 		sdcmn_err7(("sdev_get_nsmap_by_module: module %s\n",
3286 		    map->dir_module));
3287 		if (map->dir_module && strcmp(map->dir_module, mod_name) == 0) {
3288 			mutex_exit(&devname_nsmaps_lock);
3289 			return (map);
3290 		}
3291 	}
3292 	mutex_exit(&devname_nsmaps_lock);
3293 	return (NULL);
3294 }
3295 
3296 void
3297 sdev_invalidate_nsmaps()
3298 {
3299 	struct devname_nsmap *map = NULL;
3300 
3301 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3302 
3303 	if (devname_nsmaps == NULL)
3304 		return;
3305 
3306 	for (map = devname_nsmaps; map; map = map->next) {
3307 		rw_enter(&map->dir_lock, RW_WRITER);
3308 		map->dir_invalid = 1;
3309 		rw_exit(&map->dir_lock);
3310 	}
3311 	devname_nsmaps_invalidated = 1;
3312 }
3313 
3314 
3315 int
3316 sdev_nsmaps_loaded()
3317 {
3318 	int ret = 0;
3319 
3320 	mutex_enter(&devname_nsmaps_lock);
3321 	if (devname_nsmaps_loaded)
3322 		ret = 1;
3323 
3324 	mutex_exit(&devname_nsmaps_lock);
3325 	return (ret);
3326 }
3327 
3328 int
3329 sdev_nsmaps_reloaded()
3330 {
3331 	int ret = 0;
3332 
3333 	mutex_enter(&devname_nsmaps_lock);
3334 	if (devname_nsmaps_invalidated)
3335 		ret = 1;
3336 
3337 	mutex_exit(&devname_nsmaps_lock);
3338 	return (ret);
3339 }
3340 
3341 static void
3342 sdev_free_nsmap(struct devname_nsmap *map)
3343 {
3344 	ASSERT(map);
3345 	if (map->dir_name)
3346 		kmem_free(map->dir_name, strlen(map->dir_name) + 1);
3347 	if (map->dir_module)
3348 		kmem_free(map->dir_module, strlen(map->dir_module) + 1);
3349 	if (map->dir_map)
3350 		kmem_free(map->dir_map, strlen(map->dir_map) + 1);
3351 	rw_destroy(&map->dir_lock);
3352 	kmem_free(map, sizeof (*map));
3353 }
3354 
3355 void
3356 sdev_validate_nsmaps()
3357 {
3358 	struct devname_nsmap *map = NULL;
3359 	struct devname_nsmap *oldmap = NULL;
3360 
3361 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3362 	map = devname_nsmaps;
3363 	while (map) {
3364 		rw_enter(&map->dir_lock, RW_READER);
3365 		if ((map->dir_invalid == 1) && (map->dir_newmodule == NULL) &&
3366 		    (map->dir_newmap == NULL)) {
3367 			oldmap = map;
3368 			rw_exit(&map->dir_lock);
3369 			if (map->prev)
3370 				map->prev->next = oldmap->next;
3371 			if (map == devname_nsmaps)
3372 				devname_nsmaps = oldmap->next;
3373 
3374 			map = oldmap->next;
3375 			if (map)
3376 				map->prev = oldmap->prev;
3377 			sdev_free_nsmap(oldmap);
3378 			oldmap = NULL;
3379 		} else {
3380 			rw_exit(&map->dir_lock);
3381 			map = map->next;
3382 		}
3383 	}
3384 	devname_nsmaps_invalidated = 0;
3385 }
3386 
3387 static int
3388 sdev_map_is_invalid(struct devname_nsmap *map)
3389 {
3390 	int ret = 0;
3391 
3392 	ASSERT(map);
3393 	rw_enter(&map->dir_lock, RW_READER);
3394 	if (map->dir_invalid)
3395 		ret = 1;
3396 	rw_exit(&map->dir_lock);
3397 	return (ret);
3398 }
3399 
3400 static int
3401 sdev_check_map(struct devname_nsmap *map)
3402 {
3403 	struct devname_nsmap *mapp;
3404 
3405 	mutex_enter(&devname_nsmaps_lock);
3406 	if (devname_nsmaps == NULL) {
3407 		mutex_exit(&devname_nsmaps_lock);
3408 		return (1);
3409 	}
3410 
3411 	for (mapp = devname_nsmaps; mapp; mapp = mapp->next) {
3412 		if (mapp == map) {
3413 			mutex_exit(&devname_nsmaps_lock);
3414 			return (0);
3415 		}
3416 	}
3417 
3418 	mutex_exit(&devname_nsmaps_lock);
3419 	return (1);
3420 
3421 }
3422 
3423 struct devname_nsmap *
3424 sdev_get_map(struct sdev_node *dv, int validate)
3425 {
3426 	struct devname_nsmap *map;
3427 	int error;
3428 
3429 	ASSERT(RW_READ_HELD(&dv->sdev_contents));
3430 	map = dv->sdev_mapinfo;
3431 	if (map && sdev_check_map(map)) {
3432 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3433 			rw_exit(&dv->sdev_contents);
3434 			rw_enter(&dv->sdev_contents, RW_WRITER);
3435 		}
3436 		dv->sdev_mapinfo = NULL;
3437 		rw_downgrade(&dv->sdev_contents);
3438 		return (NULL);
3439 	}
3440 
3441 	if (validate && (!map || (map && sdev_map_is_invalid(map)))) {
3442 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3443 			rw_exit(&dv->sdev_contents);
3444 			rw_enter(&dv->sdev_contents, RW_WRITER);
3445 		}
3446 		error = sdev_get_moduleops(dv);
3447 		if (!error)
3448 			map = dv->sdev_mapinfo;
3449 		rw_downgrade(&dv->sdev_contents);
3450 	}
3451 	return (map);
3452 }
3453 
3454 extern int sdev_vnodeops_tbl_size;
3455 
3456 /*
3457  * construct a new template with overrides from vtab
3458  */
3459 static fs_operation_def_t *
3460 sdev_merge_vtab(const fs_operation_def_t tab[])
3461 {
3462 	fs_operation_def_t *new;
3463 	const fs_operation_def_t *tab_entry;
3464 
3465 	/* make a copy of standard vnode ops table */
3466 	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
3467 	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
3468 
3469 	/* replace the overrides from tab */
3470 	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
3471 		fs_operation_def_t *std_entry = new;
3472 		while (std_entry->name) {
3473 			if (strcmp(tab_entry->name, std_entry->name) == 0) {
3474 				std_entry->func = tab_entry->func;
3475 				break;
3476 			}
3477 			std_entry++;
3478 		}
3479 		if (std_entry->name == NULL)
3480 			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
3481 			    tab_entry->name);
3482 	}
3483 
3484 	return (new);
3485 }
3486 
3487 /* free memory allocated by sdev_merge_vtab */
3488 static void
3489 sdev_free_vtab(fs_operation_def_t *new)
3490 {
3491 	kmem_free(new, sdev_vnodeops_tbl_size);
3492 }
3493 
3494 void
3495 devname_get_vnode(devname_handle_t *hdl, vnode_t **vpp)
3496 {
3497 	struct sdev_node *dv = hdl->dh_data;
3498 
3499 	ASSERT(dv);
3500 
3501 	rw_enter(&dv->sdev_contents, RW_READER);
3502 	*vpp = SDEVTOV(dv);
3503 	rw_exit(&dv->sdev_contents);
3504 }
3505 
3506 int
3507 devname_get_path(devname_handle_t *hdl, char **path)
3508 {
3509 	struct sdev_node *dv = hdl->dh_data;
3510 
3511 	ASSERT(dv);
3512 
3513 	rw_enter(&dv->sdev_contents, RW_READER);
3514 	*path = dv->sdev_path;
3515 	rw_exit(&dv->sdev_contents);
3516 	return (0);
3517 }
3518 
3519 int
3520 devname_get_name(devname_handle_t *hdl, char **entry)
3521 {
3522 	struct sdev_node *dv = hdl->dh_data;
3523 
3524 	ASSERT(dv);
3525 	rw_enter(&dv->sdev_contents, RW_READER);
3526 	*entry = dv->sdev_name;
3527 	rw_exit(&dv->sdev_contents);
3528 	return (0);
3529 }
3530 
3531 void
3532 devname_get_dir_vnode(devname_handle_t *hdl, vnode_t **vpp)
3533 {
3534 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3535 
3536 	ASSERT(dv);
3537 
3538 	rw_enter(&dv->sdev_contents, RW_READER);
3539 	*vpp = SDEVTOV(dv);
3540 	rw_exit(&dv->sdev_contents);
3541 }
3542 
3543 int
3544 devname_get_dir_path(devname_handle_t *hdl, char **path)
3545 {
3546 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3547 
3548 	ASSERT(dv);
3549 	rw_enter(&dv->sdev_contents, RW_READER);
3550 	*path = dv->sdev_path;
3551 	rw_exit(&dv->sdev_contents);
3552 	return (0);
3553 }
3554 
3555 int
3556 devname_get_dir_name(devname_handle_t *hdl, char **entry)
3557 {
3558 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3559 
3560 	ASSERT(dv);
3561 	rw_enter(&dv->sdev_contents, RW_READER);
3562 	*entry = dv->sdev_name;
3563 	rw_exit(&dv->sdev_contents);
3564 	return (0);
3565 }
3566 
3567 int
3568 devname_get_dir_nsmap(devname_handle_t *hdl, struct devname_nsmap **map)
3569 {
3570 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3571 
3572 	ASSERT(dv);
3573 	rw_enter(&dv->sdev_contents, RW_READER);
3574 	*map = dv->sdev_mapinfo;
3575 	rw_exit(&dv->sdev_contents);
3576 	return (0);
3577 }
3578 
3579 int
3580 devname_get_dir_handle(devname_handle_t *hdl, devname_handle_t **dir_hdl)
3581 {
3582 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3583 
3584 	ASSERT(dv);
3585 	rw_enter(&dv->sdev_contents, RW_READER);
3586 	*dir_hdl = &(dv->sdev_handle);
3587 	rw_exit(&dv->sdev_contents);
3588 	return (0);
3589 }
3590 
3591 void
3592 devname_set_nodetype(devname_handle_t *hdl, void *args, int spec)
3593 {
3594 	struct sdev_node *dv = hdl->dh_data;
3595 
3596 	ASSERT(dv);
3597 	rw_enter(&dv->sdev_contents, RW_WRITER);
3598 	hdl->dh_spec = (devname_spec_t)spec;
3599 	hdl->dh_args = (void *)i_ddi_strdup((char *)args, KM_SLEEP);
3600 	rw_exit(&dv->sdev_contents);
3601 }
3602 
3603 /*
3604  * a generic setattr() function
3605  *
3606  * note: flags only supports AT_UID and AT_GID.
3607  *	 Future enhancements can be done for other types, e.g. AT_MODE
3608  */
3609 int
3610 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3611     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3612     int), int protocol)
3613 {
3614 	struct sdev_node	*dv = VTOSDEV(vp);
3615 	struct sdev_node	*parent = dv->sdev_dotdot;
3616 	struct vattr		*get;
3617 	uint_t			mask = vap->va_mask;
3618 	int 			error;
3619 
3620 	/* some sanity checks */
3621 	if (vap->va_mask & AT_NOSET)
3622 		return (EINVAL);
3623 
3624 	if (vap->va_mask & AT_SIZE) {
3625 		if (vp->v_type == VDIR) {
3626 			return (EISDIR);
3627 		}
3628 	}
3629 
3630 	/* no need to set attribute, but do not fail either */
3631 	ASSERT(parent);
3632 	rw_enter(&parent->sdev_contents, RW_READER);
3633 	if (dv->sdev_state == SDEV_ZOMBIE) {
3634 		rw_exit(&parent->sdev_contents);
3635 		return (0);
3636 	}
3637 
3638 	/* If backing store exists, just set it. */
3639 	if (dv->sdev_attrvp) {
3640 		rw_exit(&parent->sdev_contents);
3641 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3642 	}
3643 
3644 	/*
3645 	 * Otherwise, for nodes with the persistence attribute, create it.
3646 	 */
3647 	ASSERT(dv->sdev_attr);
3648 	if (SDEV_IS_PERSIST(dv) ||
3649 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3650 		sdev_vattr_merge(dv, vap);
3651 		rw_enter(&dv->sdev_contents, RW_WRITER);
3652 		error = sdev_shadow_node(dv, cred);
3653 		rw_exit(&dv->sdev_contents);
3654 		rw_exit(&parent->sdev_contents);
3655 
3656 		if (error)
3657 			return (error);
3658 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3659 	}
3660 
3661 
3662 	/*
3663 	 * sdev_attr was allocated in sdev_mknode
3664 	 */
3665 	rw_enter(&dv->sdev_contents, RW_WRITER);
3666 	error = secpolicy_vnode_setattr(cred, vp, vap,
3667 	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
3668 	if (error) {
3669 		rw_exit(&dv->sdev_contents);
3670 		rw_exit(&parent->sdev_contents);
3671 		return (error);
3672 	}
3673 
3674 	get = dv->sdev_attr;
3675 	if (mask & AT_MODE) {
3676 		get->va_mode &= S_IFMT;
3677 		get->va_mode |= vap->va_mode & ~S_IFMT;
3678 	}
3679 
3680 	if ((mask & AT_UID) || (mask & AT_GID)) {
3681 		if (mask & AT_UID)
3682 			get->va_uid = vap->va_uid;
3683 		if (mask & AT_GID)
3684 			get->va_gid = vap->va_gid;
3685 		/*
3686 		 * a callback must be provided if the protocol is set
3687 		 */
3688 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3689 			ASSERT(callback);
3690 			error = callback(dv, get, protocol);
3691 			if (error) {
3692 				rw_exit(&dv->sdev_contents);
3693 				rw_exit(&parent->sdev_contents);
3694 				return (error);
3695 			}
3696 		}
3697 	}
3698 
3699 	if (mask & AT_ATIME)
3700 		get->va_atime = vap->va_atime;
3701 	if (mask & AT_MTIME)
3702 		get->va_mtime = vap->va_mtime;
3703 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3704 		gethrestime(&get->va_ctime);
3705 	}
3706 
3707 	sdev_vattr_merge(dv, get);
3708 	rw_exit(&dv->sdev_contents);
3709 	rw_exit(&parent->sdev_contents);
3710 	return (0);
3711 }
3712