xref: /illumos-gate/usr/src/uts/common/fs/dev/sdev_subr.c (revision 842727c2f41f01b380de4f5e787d905702870f23)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * utility routines for the /dev fs
28  */
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/user.h>
36 #include <sys/time.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/flock.h>
42 #include <sys/kmem.h>
43 #include <sys/uio.h>
44 #include <sys/errno.h>
45 #include <sys/stat.h>
46 #include <sys/cred.h>
47 #include <sys/dirent.h>
48 #include <sys/pathname.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/mode.h>
52 #include <sys/policy.h>
53 #include <fs/fs_subr.h>
54 #include <sys/mount.h>
55 #include <sys/fs/snode.h>
56 #include <sys/fs/dv_node.h>
57 #include <sys/fs/sdev_impl.h>
58 #include <sys/sunndi.h>
59 #include <sys/sunmdi.h>
60 #include <sys/conf.h>
61 #include <sys/proc.h>
62 #include <sys/user.h>
63 #include <sys/modctl.h>
64 
65 #ifdef DEBUG
66 int sdev_debug = 0x00000001;
67 int sdev_debug_cache_flags = 0;
68 #endif
69 
70 /*
71  * globals
72  */
73 /* prototype memory vattrs */
74 vattr_t sdev_vattr_dir = {
75 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
76 	VDIR,					/* va_type */
77 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
78 	SDEV_UID_DEFAULT,			/* va_uid */
79 	SDEV_GID_DEFAULT,			/* va_gid */
80 	0,					/* va_fsid */
81 	0,					/* va_nodeid */
82 	0,					/* va_nlink */
83 	0,					/* va_size */
84 	0,					/* va_atime */
85 	0,					/* va_mtime */
86 	0,					/* va_ctime */
87 	0,					/* va_rdev */
88 	0,					/* va_blksize */
89 	0,					/* va_nblocks */
90 	0					/* va_vcode */
91 };
92 
93 vattr_t sdev_vattr_lnk = {
94 	AT_TYPE|AT_MODE,			/* va_mask */
95 	VLNK,					/* va_type */
96 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
97 	SDEV_UID_DEFAULT,			/* va_uid */
98 	SDEV_GID_DEFAULT,			/* va_gid */
99 	0,					/* va_fsid */
100 	0,					/* va_nodeid */
101 	0,					/* va_nlink */
102 	0,					/* va_size */
103 	0,					/* va_atime */
104 	0,					/* va_mtime */
105 	0,					/* va_ctime */
106 	0,					/* va_rdev */
107 	0,					/* va_blksize */
108 	0,					/* va_nblocks */
109 	0					/* va_vcode */
110 };
111 
112 vattr_t sdev_vattr_blk = {
113 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
114 	VBLK,					/* va_type */
115 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
116 	SDEV_UID_DEFAULT,			/* va_uid */
117 	SDEV_GID_DEFAULT,			/* va_gid */
118 	0,					/* va_fsid */
119 	0,					/* va_nodeid */
120 	0,					/* va_nlink */
121 	0,					/* va_size */
122 	0,					/* va_atime */
123 	0,					/* va_mtime */
124 	0,					/* va_ctime */
125 	0,					/* va_rdev */
126 	0,					/* va_blksize */
127 	0,					/* va_nblocks */
128 	0					/* va_vcode */
129 };
130 
131 vattr_t sdev_vattr_chr = {
132 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
133 	VCHR,					/* va_type */
134 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
135 	SDEV_UID_DEFAULT,			/* va_uid */
136 	SDEV_GID_DEFAULT,			/* va_gid */
137 	0,					/* va_fsid */
138 	0,					/* va_nodeid */
139 	0,					/* va_nlink */
140 	0,					/* va_size */
141 	0,					/* va_atime */
142 	0,					/* va_mtime */
143 	0,					/* va_ctime */
144 	0,					/* va_rdev */
145 	0,					/* va_blksize */
146 	0,					/* va_nblocks */
147 	0					/* va_vcode */
148 };
149 
150 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
151 int		devtype;		/* fstype */
152 
153 /* static */
154 static struct vnodeops *sdev_get_vop(struct sdev_node *);
155 static void sdev_set_no_nocache(struct sdev_node *);
156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
157 static void sdev_free_vtab(fs_operation_def_t *);
158 
159 static void
160 sdev_prof_free(struct sdev_node *dv)
161 {
162 	ASSERT(!SDEV_IS_GLOBAL(dv));
163 	if (dv->sdev_prof.dev_name)
164 		nvlist_free(dv->sdev_prof.dev_name);
165 	if (dv->sdev_prof.dev_map)
166 		nvlist_free(dv->sdev_prof.dev_map);
167 	if (dv->sdev_prof.dev_symlink)
168 		nvlist_free(dv->sdev_prof.dev_symlink);
169 	if (dv->sdev_prof.dev_glob_incdir)
170 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
171 	if (dv->sdev_prof.dev_glob_excdir)
172 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
173 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
174 }
175 
176 /* sdev_node cache constructor */
177 /*ARGSUSED1*/
178 static int
179 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
180 {
181 	struct sdev_node *dv = (struct sdev_node *)buf;
182 	struct vnode *vp;
183 
184 	bzero(buf, sizeof (struct sdev_node));
185 	vp = dv->sdev_vnode = vn_alloc(flag);
186 	if (vp == NULL) {
187 		return (-1);
188 	}
189 	vp->v_data = dv;
190 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
191 	return (0);
192 }
193 
194 /* sdev_node cache destructor */
195 /*ARGSUSED1*/
196 static void
197 i_sdev_node_dtor(void *buf, void *arg)
198 {
199 	struct sdev_node *dv = (struct sdev_node *)buf;
200 	struct vnode *vp = SDEVTOV(dv);
201 
202 	rw_destroy(&dv->sdev_contents);
203 	vn_free(vp);
204 }
205 
206 /* initialize sdev_node cache */
207 void
208 sdev_node_cache_init()
209 {
210 	int flags = 0;
211 
212 #ifdef	DEBUG
213 	flags = sdev_debug_cache_flags;
214 	if (flags)
215 		sdcmn_err(("cache debug flags 0x%x\n", flags));
216 #endif	/* DEBUG */
217 
218 	ASSERT(sdev_node_cache == NULL);
219 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
220 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
221 	    NULL, NULL, NULL, flags);
222 }
223 
224 /* destroy sdev_node cache */
225 void
226 sdev_node_cache_fini()
227 {
228 	ASSERT(sdev_node_cache != NULL);
229 	kmem_cache_destroy(sdev_node_cache);
230 	sdev_node_cache = NULL;
231 }
232 
233 /*
234  * Compare two nodes lexographically to balance avl tree
235  */
236 static int
237 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
238 {
239 	int rv;
240 	if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
241 		return (0);
242 	return ((rv < 0) ? -1 : 1);
243 }
244 
245 void
246 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
247 {
248 	ASSERT(dv);
249 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
250 	dv->sdev_state = state;
251 }
252 
253 static void
254 sdev_attrinit(struct sdev_node *dv, vattr_t *vap)
255 {
256 	timestruc_t now;
257 
258 	ASSERT(vap);
259 
260 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
261 	*dv->sdev_attr = *vap;
262 
263 	dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode);
264 
265 	gethrestime(&now);
266 	dv->sdev_attr->va_atime = now;
267 	dv->sdev_attr->va_mtime = now;
268 	dv->sdev_attr->va_ctime = now;
269 }
270 
271 /* alloc and initialize a sdev_node */
272 int
273 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
274     vattr_t *vap)
275 {
276 	struct sdev_node *dv = NULL;
277 	struct vnode *vp;
278 	size_t nmlen, len;
279 	devname_handle_t  *dhl;
280 
281 	nmlen = strlen(nm) + 1;
282 	if (nmlen > MAXNAMELEN) {
283 		sdcmn_err9(("sdev_nodeinit: node name %s"
284 		    " too long\n", nm));
285 		*newdv = NULL;
286 		return (ENAMETOOLONG);
287 	}
288 
289 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
290 
291 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
292 	bcopy(nm, dv->sdev_name, nmlen);
293 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
294 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
295 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
296 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
297 	/* overwritten for VLNK nodes */
298 	dv->sdev_symlink = NULL;
299 
300 	vp = SDEVTOV(dv);
301 	vn_reinit(vp);
302 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
303 	if (vap)
304 		vp->v_type = vap->va_type;
305 
306 	/*
307 	 * initialized to the parent's vnodeops.
308 	 * maybe overwriten for a VDIR
309 	 */
310 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
311 	vn_exists(vp);
312 
313 	dv->sdev_dotdot = NULL;
314 	dv->sdev_attrvp = NULL;
315 	if (vap) {
316 		sdev_attrinit(dv, vap);
317 	} else {
318 		dv->sdev_attr = NULL;
319 	}
320 
321 	dv->sdev_ino = sdev_mkino(dv);
322 	dv->sdev_nlink = 0;		/* updated on insert */
323 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
324 	dv->sdev_flags |= SDEV_BUILD;
325 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
326 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
327 	if (SDEV_IS_GLOBAL(ddv)) {
328 		dv->sdev_flags |= SDEV_GLOBAL;
329 		dhl = &(dv->sdev_handle);
330 		dhl->dh_data = dv;
331 		dhl->dh_args = NULL;
332 		sdev_set_no_nocache(dv);
333 		dv->sdev_gdir_gen = 0;
334 	} else {
335 		dv->sdev_flags &= ~SDEV_GLOBAL;
336 		dv->sdev_origin = NULL; /* set later */
337 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
338 		dv->sdev_ldir_gen = 0;
339 		dv->sdev_devtree_gen = 0;
340 	}
341 
342 	rw_enter(&dv->sdev_contents, RW_WRITER);
343 	sdev_set_nodestate(dv, SDEV_INIT);
344 	rw_exit(&dv->sdev_contents);
345 	*newdv = dv;
346 
347 	return (0);
348 }
349 
350 /*
351  * transition a sdev_node into SDEV_READY state
352  */
353 int
354 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
355     void *args, struct cred *cred)
356 {
357 	int error = 0;
358 	struct vnode *vp = SDEVTOV(dv);
359 	vtype_t type;
360 
361 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
362 
363 	type = vap->va_type;
364 	vp->v_type = type;
365 	vp->v_rdev = vap->va_rdev;
366 	rw_enter(&dv->sdev_contents, RW_WRITER);
367 	if (type == VDIR) {
368 		dv->sdev_nlink = 2;
369 		dv->sdev_flags &= ~SDEV_PERSIST;
370 		dv->sdev_flags &= ~SDEV_DYNAMIC;
371 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
372 		ASSERT(dv->sdev_dotdot);
373 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
374 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
375 		avl_create(&dv->sdev_entries,
376 		    (int (*)(const void *, const void *))sdev_compare_nodes,
377 		    sizeof (struct sdev_node),
378 		    offsetof(struct sdev_node, sdev_avllink));
379 	} else if (type == VLNK) {
380 		ASSERT(args);
381 		dv->sdev_nlink = 1;
382 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
383 	} else {
384 		dv->sdev_nlink = 1;
385 	}
386 
387 	if (!(SDEV_IS_GLOBAL(dv))) {
388 		dv->sdev_origin = (struct sdev_node *)args;
389 		dv->sdev_flags &= ~SDEV_PERSIST;
390 	}
391 
392 	/*
393 	 * shadow node is created here OR
394 	 * if failed (indicated by dv->sdev_attrvp == NULL),
395 	 * created later in sdev_setattr
396 	 */
397 	if (avp) {
398 		dv->sdev_attrvp = avp;
399 	} else {
400 		if (dv->sdev_attr == NULL)
401 			sdev_attrinit(dv, vap);
402 		else
403 			*dv->sdev_attr = *vap;
404 
405 		if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) ||
406 		    ((SDEVTOV(dv)->v_type == VDIR) &&
407 		    (dv->sdev_attrvp == NULL))) {
408 			error = sdev_shadow_node(dv, cred);
409 		}
410 	}
411 
412 	if (error == 0) {
413 		/* transition to READY state */
414 		sdev_set_nodestate(dv, SDEV_READY);
415 		sdev_nc_node_exists(dv);
416 	} else {
417 		sdev_set_nodestate(dv, SDEV_ZOMBIE);
418 	}
419 	rw_exit(&dv->sdev_contents);
420 	return (error);
421 }
422 
423 /*
424  * setting ZOMBIE state
425  */
426 static int
427 sdev_nodezombied(struct sdev_node *dv)
428 {
429 	rw_enter(&dv->sdev_contents, RW_WRITER);
430 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
431 	rw_exit(&dv->sdev_contents);
432 	return (0);
433 }
434 
435 /*
436  * Build the VROOT sdev_node.
437  */
438 /*ARGSUSED*/
439 struct sdev_node *
440 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
441     struct vnode *avp, struct cred *cred)
442 {
443 	struct sdev_node *dv;
444 	struct vnode *vp;
445 	char devdir[] = "/dev";
446 
447 	ASSERT(sdev_node_cache != NULL);
448 	ASSERT(avp);
449 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
450 	vp = SDEVTOV(dv);
451 	vn_reinit(vp);
452 	vp->v_flag |= VROOT;
453 	vp->v_vfsp = vfsp;
454 	vp->v_type = VDIR;
455 	vp->v_rdev = devdev;
456 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
457 	vn_exists(vp);
458 
459 	if (vfsp->vfs_mntpt)
460 		dv->sdev_name = i_ddi_strdup(
461 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
462 	else
463 		/* vfs_mountdev1 set mount point later */
464 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
465 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
466 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
467 	dv->sdev_ino = SDEV_ROOTINO;
468 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
469 	dv->sdev_dotdot = dv;		/* .. == self */
470 	dv->sdev_attrvp = avp;
471 	dv->sdev_attr = NULL;
472 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
473 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
474 	if (strcmp(dv->sdev_name, "/dev") == 0) {
475 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
476 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
477 		dv->sdev_gdir_gen = 0;
478 	} else {
479 		dv->sdev_flags = SDEV_BUILD;
480 		dv->sdev_flags &= ~SDEV_PERSIST;
481 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
482 		dv->sdev_ldir_gen = 0;
483 		dv->sdev_devtree_gen = 0;
484 	}
485 
486 	avl_create(&dv->sdev_entries,
487 	    (int (*)(const void *, const void *))sdev_compare_nodes,
488 	    sizeof (struct sdev_node),
489 	    offsetof(struct sdev_node, sdev_avllink));
490 
491 	rw_enter(&dv->sdev_contents, RW_WRITER);
492 	sdev_set_nodestate(dv, SDEV_READY);
493 	rw_exit(&dv->sdev_contents);
494 	sdev_nc_node_exists(dv);
495 	return (dv);
496 }
497 
498 /* directory dependent vop table */
499 struct sdev_vop_table {
500 	char *vt_name;				/* subdirectory name */
501 	const fs_operation_def_t *vt_service;	/* vnodeops table */
502 	struct vnodeops *vt_vops;		/* constructed vop */
503 	struct vnodeops **vt_global_vops;	/* global container for vop */
504 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
505 	int vt_flags;
506 };
507 
508 /*
509  * A nice improvement would be to provide a plug-in mechanism
510  * for this table instead of a const table.
511  */
512 static struct sdev_vop_table vtab[] =
513 {
514 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
515 	SDEV_DYNAMIC | SDEV_VTOR },
516 
517 	{ "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
518 	SDEV_DYNAMIC | SDEV_VTOR },
519 
520 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
521 
522 	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
523 	SDEV_DYNAMIC | SDEV_VTOR },
524 
525 	{ "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
526 	devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
527 
528 	{ NULL, NULL, NULL, NULL, NULL, 0}
529 };
530 
531 
532 /*
533  *  sets a directory's vnodeops if the directory is in the vtab;
534  */
535 static struct vnodeops *
536 sdev_get_vop(struct sdev_node *dv)
537 {
538 	int i;
539 	char *path;
540 
541 	path = dv->sdev_path;
542 	ASSERT(path);
543 
544 	/* gets the relative path to /dev/ */
545 	path += 5;
546 
547 	/* gets the vtab entry if matches */
548 	for (i = 0; vtab[i].vt_name; i++) {
549 		if (strcmp(vtab[i].vt_name, path) != 0)
550 			continue;
551 		dv->sdev_flags |= vtab[i].vt_flags;
552 
553 		if (vtab[i].vt_vops) {
554 			if (vtab[i].vt_global_vops)
555 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
556 			return (vtab[i].vt_vops);
557 		}
558 
559 		if (vtab[i].vt_service) {
560 			fs_operation_def_t *templ;
561 			templ = sdev_merge_vtab(vtab[i].vt_service);
562 			if (vn_make_ops(vtab[i].vt_name,
563 			    (const fs_operation_def_t *)templ,
564 			    &vtab[i].vt_vops) != 0) {
565 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
566 				    vtab[i].vt_name);
567 				/*NOTREACHED*/
568 			}
569 			if (vtab[i].vt_global_vops) {
570 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
571 			}
572 			sdev_free_vtab(templ);
573 			return (vtab[i].vt_vops);
574 		}
575 		return (sdev_vnodeops);
576 	}
577 
578 	/* child inherits the persistence of the parent */
579 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
580 		dv->sdev_flags |= SDEV_PERSIST;
581 
582 	return (sdev_vnodeops);
583 }
584 
585 static void
586 sdev_set_no_nocache(struct sdev_node *dv)
587 {
588 	int i;
589 	char *path;
590 
591 	ASSERT(dv->sdev_path);
592 	path = dv->sdev_path + strlen("/dev/");
593 
594 	for (i = 0; vtab[i].vt_name; i++) {
595 		if (strcmp(vtab[i].vt_name, path) == 0) {
596 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
597 				dv->sdev_flags |= SDEV_NO_NCACHE;
598 			break;
599 		}
600 	}
601 }
602 
603 void *
604 sdev_get_vtor(struct sdev_node *dv)
605 {
606 	int i;
607 
608 	for (i = 0; vtab[i].vt_name; i++) {
609 		if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0)
610 			continue;
611 		return ((void *)vtab[i].vt_vtor);
612 	}
613 	return (NULL);
614 }
615 
616 /*
617  * Build the base root inode
618  */
619 ino_t
620 sdev_mkino(struct sdev_node *dv)
621 {
622 	ino_t	ino;
623 
624 	/*
625 	 * for now, follow the lead of tmpfs here
626 	 * need to someday understand the requirements here
627 	 */
628 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
629 	ino += SDEV_ROOTINO + 1;
630 
631 	return (ino);
632 }
633 
634 static int
635 sdev_getlink(struct vnode *linkvp, char **link)
636 {
637 	int err;
638 	char *buf;
639 	struct uio uio = {0};
640 	struct iovec iov = {0};
641 
642 	if (linkvp == NULL)
643 		return (ENOENT);
644 	ASSERT(linkvp->v_type == VLNK);
645 
646 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
647 	iov.iov_base = buf;
648 	iov.iov_len = MAXPATHLEN;
649 	uio.uio_iov = &iov;
650 	uio.uio_iovcnt = 1;
651 	uio.uio_resid = MAXPATHLEN;
652 	uio.uio_segflg = UIO_SYSSPACE;
653 	uio.uio_llimit = MAXOFFSET_T;
654 
655 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
656 	if (err) {
657 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
658 		kmem_free(buf, MAXPATHLEN);
659 		return (ENOENT);
660 	}
661 
662 	/* mission complete */
663 	*link = i_ddi_strdup(buf, KM_SLEEP);
664 	kmem_free(buf, MAXPATHLEN);
665 	return (0);
666 }
667 
668 /*
669  * A convenient wrapper to get the devfs node vnode for a device
670  * minor functionality: readlink() of a /dev symlink
671  * Place the link into dv->sdev_symlink
672  */
673 static int
674 sdev_follow_link(struct sdev_node *dv)
675 {
676 	int err;
677 	struct vnode *linkvp;
678 	char *link = NULL;
679 
680 	linkvp = SDEVTOV(dv);
681 	if (linkvp == NULL)
682 		return (ENOENT);
683 	ASSERT(linkvp->v_type == VLNK);
684 	err = sdev_getlink(linkvp, &link);
685 	if (err) {
686 		(void) sdev_nodezombied(dv);
687 		dv->sdev_symlink = NULL;
688 		return (ENOENT);
689 	}
690 
691 	ASSERT(link != NULL);
692 	dv->sdev_symlink = link;
693 	return (0);
694 }
695 
696 static int
697 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
698 {
699 	vtype_t otype = SDEVTOV(dv)->v_type;
700 
701 	/*
702 	 * existing sdev_node has a different type.
703 	 */
704 	if (otype != nvap->va_type) {
705 		sdcmn_err9(("sdev_node_check: existing node "
706 		    "  %s type %d does not match new node type %d\n",
707 		    dv->sdev_name, otype, nvap->va_type));
708 		return (EEXIST);
709 	}
710 
711 	/*
712 	 * For a symlink, the target should be the same.
713 	 */
714 	if (otype == VLNK) {
715 		ASSERT(nargs != NULL);
716 		ASSERT(dv->sdev_symlink != NULL);
717 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
718 			sdcmn_err9(("sdev_node_check: existing node "
719 			    " %s has different symlink %s as new node "
720 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
721 			    (char *)nargs));
722 			return (EEXIST);
723 		}
724 	}
725 
726 	return (0);
727 }
728 
729 /*
730  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
731  *
732  * arguments:
733  *	- ddv (parent)
734  *	- nm (child name)
735  *	- newdv (sdev_node for nm is returned here)
736  *	- vap (vattr for the node to be created, va_type should be set.
737  *	- avp (attribute vnode)
738  *	  the defaults should be used if unknown)
739  *	- cred
740  *	- args
741  *	    . tnm (for VLNK)
742  *	    . global sdev_node (for !SDEV_GLOBAL)
743  * 	- state: SDEV_INIT, SDEV_READY
744  *
745  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
746  *
747  * NOTE:  directory contents writers lock needs to be held before
748  *	  calling this routine.
749  */
750 int
751 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
752     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
753     sdev_node_state_t state)
754 {
755 	int error = 0;
756 	sdev_node_state_t node_state;
757 	struct sdev_node *dv = NULL;
758 
759 	ASSERT(state != SDEV_ZOMBIE);
760 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
761 
762 	if (*newdv) {
763 		dv = *newdv;
764 	} else {
765 		/* allocate and initialize a sdev_node */
766 		if (ddv->sdev_state == SDEV_ZOMBIE) {
767 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
768 			    ddv->sdev_path));
769 			return (ENOENT);
770 		}
771 
772 		error = sdev_nodeinit(ddv, nm, &dv, vap);
773 		if (error != 0) {
774 			sdcmn_err9(("sdev_mknode: error %d,"
775 			    " name %s can not be initialized\n",
776 			    error, nm));
777 			return (error);
778 		}
779 		ASSERT(dv);
780 
781 		/* insert into the directory cache */
782 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
783 		if (error) {
784 			sdcmn_err9(("sdev_mknode: node %s can not"
785 			    " be added into directory cache\n", nm));
786 			return (ENOENT);
787 		}
788 	}
789 
790 	ASSERT(dv);
791 	node_state = dv->sdev_state;
792 	ASSERT(node_state != SDEV_ZOMBIE);
793 
794 	if (state == SDEV_READY) {
795 		switch (node_state) {
796 		case SDEV_INIT:
797 			error = sdev_nodeready(dv, vap, avp, args, cred);
798 			if (error) {
799 				sdcmn_err9(("sdev_mknode: node %s can NOT"
800 				    " be transitioned into READY state, "
801 				    "error %d\n", nm, error));
802 			}
803 			break;
804 		case SDEV_READY:
805 			/*
806 			 * Do some sanity checking to make sure
807 			 * the existing sdev_node is what has been
808 			 * asked for.
809 			 */
810 			error = sdev_node_check(dv, vap, args);
811 			break;
812 		default:
813 			break;
814 		}
815 	}
816 
817 	if (!error) {
818 		*newdv = dv;
819 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
820 	} else {
821 		SDEV_SIMPLE_RELE(dv);
822 		*newdv = NULL;
823 	}
824 
825 	return (error);
826 }
827 
828 /*
829  * convenient wrapper to change vp's ATIME, CTIME and MTIME
830  */
831 void
832 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
833 {
834 	struct vattr attr;
835 	timestruc_t now;
836 	int err;
837 
838 	ASSERT(vp);
839 	gethrestime(&now);
840 	if (mask & AT_CTIME)
841 		attr.va_ctime = now;
842 	if (mask & AT_MTIME)
843 		attr.va_mtime = now;
844 	if (mask & AT_ATIME)
845 		attr.va_atime = now;
846 
847 	attr.va_mask = (mask & AT_TIMES);
848 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
849 	if (err && (err != EROFS)) {
850 		sdcmn_err(("update timestamps error %d\n", err));
851 	}
852 }
853 
854 /*
855  * the backing store vnode is released here
856  */
857 /*ARGSUSED1*/
858 void
859 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
860 {
861 	/* no references */
862 	ASSERT(dv->sdev_nlink == 0);
863 
864 	if (dv->sdev_attrvp != NULLVP) {
865 		VN_RELE(dv->sdev_attrvp);
866 		/*
867 		 * reset the attrvp so that no more
868 		 * references can be made on this already
869 		 * vn_rele() vnode
870 		 */
871 		dv->sdev_attrvp = NULLVP;
872 	}
873 
874 	if (dv->sdev_attr != NULL) {
875 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
876 		dv->sdev_attr = NULL;
877 	}
878 
879 	if (dv->sdev_name != NULL) {
880 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
881 		dv->sdev_name = NULL;
882 	}
883 
884 	if (dv->sdev_symlink != NULL) {
885 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
886 		dv->sdev_symlink = NULL;
887 	}
888 
889 	if (dv->sdev_path) {
890 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
891 		dv->sdev_path = NULL;
892 	}
893 
894 	if (!SDEV_IS_GLOBAL(dv))
895 		sdev_prof_free(dv);
896 
897 	if (SDEVTOV(dv)->v_type == VDIR) {
898 		ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
899 		avl_destroy(&dv->sdev_entries);
900 	}
901 
902 	mutex_destroy(&dv->sdev_lookup_lock);
903 	cv_destroy(&dv->sdev_lookup_cv);
904 
905 	/* return node to initial state as per constructor */
906 	(void) memset((void *)&dv->sdev_instance_data, 0,
907 	    sizeof (dv->sdev_instance_data));
908 	vn_invalid(SDEVTOV(dv));
909 	kmem_cache_free(sdev_node_cache, dv);
910 }
911 
912 /*
913  * DIRECTORY CACHE lookup
914  */
915 struct sdev_node *
916 sdev_findbyname(struct sdev_node *ddv, char *nm)
917 {
918 	struct sdev_node *dv;
919 	struct sdev_node dvtmp;
920 	avl_index_t	where;
921 
922 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
923 
924 	dvtmp.sdev_name = nm;
925 	dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
926 	if (dv) {
927 		ASSERT(dv->sdev_dotdot == ddv);
928 		ASSERT(strcmp(dv->sdev_name, nm) == 0);
929 		SDEV_HOLD(dv);
930 		return (dv);
931 	}
932 	return (NULL);
933 }
934 
935 /*
936  * Inserts a new sdev_node in a parent directory
937  */
938 void
939 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
940 {
941 	avl_index_t where;
942 
943 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
944 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
945 	ASSERT(ddv->sdev_nlink >= 2);
946 	ASSERT(dv->sdev_nlink == 0);
947 
948 	dv->sdev_dotdot = ddv;
949 	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
950 	avl_insert(&ddv->sdev_entries, dv, where);
951 	ddv->sdev_nlink++;
952 }
953 
954 /*
955  * The following check is needed because while sdev_nodes are linked
956  * in SDEV_INIT state, they have their link counts incremented only
957  * in SDEV_READY state.
958  */
959 static void
960 decr_link(struct sdev_node *dv)
961 {
962 	if (dv->sdev_state != SDEV_INIT)
963 		dv->sdev_nlink--;
964 	else
965 		ASSERT(dv->sdev_nlink == 0);
966 }
967 
968 /*
969  * Delete an existing dv from directory cache
970  *
971  * In the case of a node is still held by non-zero reference count,
972  *     the node is put into ZOMBIE state. Once the reference count
973  *     reaches "0", the node is unlinked and destroyed,
974  *     in sdev_inactive().
975  */
976 static int
977 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
978 {
979 	struct vnode *vp;
980 
981 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
982 
983 	vp = SDEVTOV(dv);
984 	mutex_enter(&vp->v_lock);
985 
986 	/* dv is held still */
987 	if (vp->v_count > 1) {
988 		rw_enter(&dv->sdev_contents, RW_WRITER);
989 		if (dv->sdev_state == SDEV_READY) {
990 			sdcmn_err9((
991 			    "sdev_delete: node %s busy with count %d\n",
992 			    dv->sdev_name, vp->v_count));
993 			dv->sdev_state = SDEV_ZOMBIE;
994 		}
995 		rw_exit(&dv->sdev_contents);
996 		--vp->v_count;
997 		mutex_exit(&vp->v_lock);
998 		return (EBUSY);
999 	}
1000 	ASSERT(vp->v_count == 1);
1001 
1002 	/* unlink from the memory cache */
1003 	ddv->sdev_nlink--;	/* .. to above */
1004 	if (vp->v_type == VDIR) {
1005 		decr_link(dv);		/* . to self */
1006 	}
1007 
1008 	avl_remove(&ddv->sdev_entries, dv);
1009 	decr_link(dv);	/* name, back to zero */
1010 	vp->v_count--;
1011 	mutex_exit(&vp->v_lock);
1012 
1013 	/* destroy the node */
1014 	sdev_nodedestroy(dv, 0);
1015 	return (0);
1016 }
1017 
1018 /*
1019  * check if the source is in the path of the target
1020  *
1021  * source and target are different
1022  */
1023 /*ARGSUSED2*/
1024 static int
1025 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1026 {
1027 	int error = 0;
1028 	struct sdev_node *dotdot, *dir;
1029 
1030 	dotdot = tdv->sdev_dotdot;
1031 	ASSERT(dotdot);
1032 
1033 	/* fs root */
1034 	if (dotdot == tdv) {
1035 		return (0);
1036 	}
1037 
1038 	for (;;) {
1039 		/*
1040 		 * avoid error cases like
1041 		 *	mv a a/b
1042 		 *	mv a a/b/c
1043 		 *	etc.
1044 		 */
1045 		if (dotdot == sdv) {
1046 			error = EINVAL;
1047 			break;
1048 		}
1049 
1050 		dir = dotdot;
1051 		dotdot = dir->sdev_dotdot;
1052 
1053 		/* done checking because root is reached */
1054 		if (dir == dotdot) {
1055 			break;
1056 		}
1057 	}
1058 	return (error);
1059 }
1060 
1061 int
1062 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1063     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1064     struct cred *cred)
1065 {
1066 	int error = 0;
1067 	struct vnode *ovp = SDEVTOV(odv);
1068 	struct vnode *nvp;
1069 	struct vattr vattr;
1070 	int doingdir = (ovp->v_type == VDIR);
1071 	char *link = NULL;
1072 	int samedir = (oddv == nddv) ? 1 : 0;
1073 	int bkstore = 0;
1074 	struct sdev_node *idv = NULL;
1075 	struct sdev_node *ndv = NULL;
1076 	timestruc_t now;
1077 
1078 	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1079 	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1080 	if (error)
1081 		return (error);
1082 
1083 	if (!samedir)
1084 		rw_enter(&oddv->sdev_contents, RW_WRITER);
1085 	rw_enter(&nddv->sdev_contents, RW_WRITER);
1086 
1087 	/*
1088 	 * the source may have been deleted by another thread before
1089 	 * we gets here.
1090 	 */
1091 	if (odv->sdev_state != SDEV_READY) {
1092 		error = ENOENT;
1093 		goto err_out;
1094 	}
1095 
1096 	if (doingdir && (odv == nddv)) {
1097 		error = EINVAL;
1098 		goto err_out;
1099 	}
1100 
1101 	/*
1102 	 * If renaming a directory, and the parents are different (".." must be
1103 	 * changed) then the source dir must not be in the dir hierarchy above
1104 	 * the target since it would orphan everything below the source dir.
1105 	 */
1106 	if (doingdir && (oddv != nddv)) {
1107 		error = sdev_checkpath(odv, nddv, cred);
1108 		if (error)
1109 			goto err_out;
1110 	}
1111 
1112 	/* destination existing */
1113 	if (*ndvp) {
1114 		nvp = SDEVTOV(*ndvp);
1115 		ASSERT(nvp);
1116 
1117 		/* handling renaming to itself */
1118 		if (odv == *ndvp) {
1119 			error = 0;
1120 			goto err_out;
1121 		}
1122 
1123 		if (nvp->v_type == VDIR) {
1124 			if (!doingdir) {
1125 				error = EISDIR;
1126 				goto err_out;
1127 			}
1128 
1129 			if (vn_vfswlock(nvp)) {
1130 				error = EBUSY;
1131 				goto err_out;
1132 			}
1133 
1134 			if (vn_mountedvfs(nvp) != NULL) {
1135 				vn_vfsunlock(nvp);
1136 				error = EBUSY;
1137 				goto err_out;
1138 			}
1139 
1140 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1141 			if ((*ndvp)->sdev_nlink > 2) {
1142 				vn_vfsunlock(nvp);
1143 				error = EEXIST;
1144 				goto err_out;
1145 			}
1146 			vn_vfsunlock(nvp);
1147 
1148 			(void) sdev_dirdelete(nddv, *ndvp);
1149 			*ndvp = NULL;
1150 			ASSERT(nddv->sdev_attrvp);
1151 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1152 			    nddv->sdev_attrvp, cred, NULL, 0);
1153 			if (error)
1154 				goto err_out;
1155 		} else {
1156 			if (doingdir) {
1157 				error = ENOTDIR;
1158 				goto err_out;
1159 			}
1160 
1161 			if (SDEV_IS_PERSIST((*ndvp))) {
1162 				bkstore = 1;
1163 			}
1164 
1165 			/*
1166 			 * get rid of the node from the directory cache
1167 			 * note, in case EBUSY is returned, the ZOMBIE
1168 			 * node is taken care in sdev_mknode.
1169 			 */
1170 			(void) sdev_dirdelete(nddv, *ndvp);
1171 			*ndvp = NULL;
1172 			if (bkstore) {
1173 				ASSERT(nddv->sdev_attrvp);
1174 				error = VOP_REMOVE(nddv->sdev_attrvp,
1175 				    nnm, cred, NULL, 0);
1176 				if (error)
1177 					goto err_out;
1178 			}
1179 		}
1180 	}
1181 
1182 	/* fix the source for a symlink */
1183 	if (vattr.va_type == VLNK) {
1184 		if (odv->sdev_symlink == NULL) {
1185 			error = sdev_follow_link(odv);
1186 			if (error) {
1187 				error = ENOENT;
1188 				goto err_out;
1189 			}
1190 		}
1191 		ASSERT(odv->sdev_symlink);
1192 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1193 	}
1194 
1195 	/*
1196 	 * make a fresh node from the source attrs
1197 	 */
1198 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1199 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1200 	    NULL, (void *)link, cred, SDEV_READY);
1201 
1202 	if (link)
1203 		kmem_free(link, strlen(link) + 1);
1204 
1205 	if (error)
1206 		goto err_out;
1207 	ASSERT(*ndvp);
1208 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1209 
1210 	/* move dir contents */
1211 	if (doingdir) {
1212 		for (idv = SDEV_FIRST_ENTRY(odv); idv;
1213 		    idv = SDEV_NEXT_ENTRY(odv, idv)) {
1214 			error = sdev_rnmnode(odv, idv,
1215 			    (struct sdev_node *)(*ndvp), &ndv,
1216 			    idv->sdev_name, cred);
1217 			if (error)
1218 				goto err_out;
1219 			ndv = NULL;
1220 		}
1221 	}
1222 
1223 	if ((*ndvp)->sdev_attrvp) {
1224 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1225 		    AT_CTIME|AT_ATIME);
1226 	} else {
1227 		ASSERT((*ndvp)->sdev_attr);
1228 		gethrestime(&now);
1229 		(*ndvp)->sdev_attr->va_ctime = now;
1230 		(*ndvp)->sdev_attr->va_atime = now;
1231 	}
1232 
1233 	if (nddv->sdev_attrvp) {
1234 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1235 		    AT_MTIME|AT_ATIME);
1236 	} else {
1237 		ASSERT(nddv->sdev_attr);
1238 		gethrestime(&now);
1239 		nddv->sdev_attr->va_mtime = now;
1240 		nddv->sdev_attr->va_atime = now;
1241 	}
1242 	rw_exit(&nddv->sdev_contents);
1243 	if (!samedir)
1244 		rw_exit(&oddv->sdev_contents);
1245 
1246 	SDEV_RELE(*ndvp);
1247 	return (error);
1248 
1249 err_out:
1250 	rw_exit(&nddv->sdev_contents);
1251 	if (!samedir)
1252 		rw_exit(&oddv->sdev_contents);
1253 	return (error);
1254 }
1255 
1256 /*
1257  * Merge sdev_node specific information into an attribute structure.
1258  *
1259  * note: sdev_node is not locked here
1260  */
1261 void
1262 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1263 {
1264 	struct vnode *vp = SDEVTOV(dv);
1265 
1266 	vap->va_nlink = dv->sdev_nlink;
1267 	vap->va_nodeid = dv->sdev_ino;
1268 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1269 	vap->va_type = vp->v_type;
1270 
1271 	if (vp->v_type == VDIR) {
1272 		vap->va_rdev = 0;
1273 		vap->va_fsid = vp->v_rdev;
1274 	} else if (vp->v_type == VLNK) {
1275 		vap->va_rdev = 0;
1276 		vap->va_mode  &= ~S_IFMT;
1277 		vap->va_mode |= S_IFLNK;
1278 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1279 		vap->va_rdev = vp->v_rdev;
1280 		vap->va_mode &= ~S_IFMT;
1281 		if (vap->va_type == VCHR)
1282 			vap->va_mode |= S_IFCHR;
1283 		else
1284 			vap->va_mode |= S_IFBLK;
1285 	} else {
1286 		vap->va_rdev = 0;
1287 	}
1288 }
1289 
1290 static struct vattr *
1291 sdev_getdefault_attr(enum vtype type)
1292 {
1293 	if (type == VDIR)
1294 		return (&sdev_vattr_dir);
1295 	else if (type == VCHR)
1296 		return (&sdev_vattr_chr);
1297 	else if (type == VBLK)
1298 		return (&sdev_vattr_blk);
1299 	else if (type == VLNK)
1300 		return (&sdev_vattr_lnk);
1301 	else
1302 		return (NULL);
1303 }
1304 int
1305 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1306 {
1307 	int rv = 0;
1308 	struct vnode *vp = SDEVTOV(dv);
1309 
1310 	switch (vp->v_type) {
1311 	case VCHR:
1312 	case VBLK:
1313 		/*
1314 		 * If vnode is a device, return special vnode instead
1315 		 * (though it knows all about -us- via sp->s_realvp)
1316 		 */
1317 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1318 		VN_RELE(vp);
1319 		if (*vpp == NULLVP)
1320 			rv = ENOSYS;
1321 		break;
1322 	default:	/* most types are returned as is */
1323 		*vpp = vp;
1324 		break;
1325 	}
1326 	return (rv);
1327 }
1328 
1329 /*
1330  * the junction between devname and devfs
1331  */
1332 static struct vnode *
1333 devname_configure_by_path(char *physpath, struct vattr *vattr)
1334 {
1335 	int error = 0;
1336 	struct vnode *vp;
1337 
1338 	ASSERT(strncmp(physpath, "/devices/", sizeof ("/devices/") - 1)
1339 	    == 0);
1340 
1341 	error = devfs_lookupname(physpath + sizeof ("/devices/") - 1,
1342 	    NULLVPP, &vp);
1343 	if (error != 0) {
1344 		if (error == ENODEV) {
1345 			cmn_err(CE_CONT, "%s: not found (line %d)\n",
1346 			    physpath, __LINE__);
1347 		}
1348 
1349 		return (NULL);
1350 	}
1351 
1352 	if (vattr)
1353 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1354 	return (vp);
1355 }
1356 
1357 /*
1358  * junction between devname and root file system, e.g. ufs
1359  */
1360 int
1361 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1362 {
1363 	struct vnode *rdvp = ddv->sdev_attrvp;
1364 	int rval = 0;
1365 
1366 	ASSERT(rdvp);
1367 
1368 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1369 	    NULL);
1370 	return (rval);
1371 }
1372 
1373 static int
1374 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1375 {
1376 	struct sdev_node *dv = NULL;
1377 	char	*nm;
1378 	struct vnode *dirvp;
1379 	int	error;
1380 	vnode_t	*vp;
1381 	int eof;
1382 	struct iovec iov;
1383 	struct uio uio;
1384 	struct dirent64 *dp;
1385 	dirent64_t *dbuf;
1386 	size_t dbuflen;
1387 	struct vattr vattr;
1388 	char *link = NULL;
1389 
1390 	if (ddv->sdev_attrvp == NULL)
1391 		return (0);
1392 	if (!(ddv->sdev_flags & SDEV_BUILD))
1393 		return (0);
1394 
1395 	dirvp = ddv->sdev_attrvp;
1396 	VN_HOLD(dirvp);
1397 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1398 
1399 	uio.uio_iov = &iov;
1400 	uio.uio_iovcnt = 1;
1401 	uio.uio_segflg = UIO_SYSSPACE;
1402 	uio.uio_fmode = 0;
1403 	uio.uio_extflg = UIO_COPY_CACHED;
1404 	uio.uio_loffset = 0;
1405 	uio.uio_llimit = MAXOFFSET_T;
1406 
1407 	eof = 0;
1408 	error = 0;
1409 	while (!error && !eof) {
1410 		uio.uio_resid = dlen;
1411 		iov.iov_base = (char *)dbuf;
1412 		iov.iov_len = dlen;
1413 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1414 		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1415 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1416 
1417 		dbuflen = dlen - uio.uio_resid;
1418 		if (error || dbuflen == 0)
1419 			break;
1420 
1421 		if (!(ddv->sdev_flags & SDEV_BUILD)) {
1422 			error = 0;
1423 			break;
1424 		}
1425 
1426 		for (dp = dbuf; ((intptr_t)dp <
1427 		    (intptr_t)dbuf + dbuflen);
1428 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1429 			nm = dp->d_name;
1430 
1431 			if (strcmp(nm, ".") == 0 ||
1432 			    strcmp(nm, "..") == 0)
1433 				continue;
1434 
1435 			vp = NULLVP;
1436 			dv = sdev_cache_lookup(ddv, nm);
1437 			if (dv) {
1438 				if (dv->sdev_state != SDEV_ZOMBIE) {
1439 					SDEV_SIMPLE_RELE(dv);
1440 				} else {
1441 					/*
1442 					 * A ZOMBIE node may not have been
1443 					 * cleaned up from the backing store,
1444 					 * bypass this entry in this case,
1445 					 * and clean it up from the directory
1446 					 * cache if this is the last call.
1447 					 */
1448 					(void) sdev_dirdelete(ddv, dv);
1449 				}
1450 				continue;
1451 			}
1452 
1453 			/* refill the cache if not already */
1454 			error = devname_backstore_lookup(ddv, nm, &vp);
1455 			if (error)
1456 				continue;
1457 
1458 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1459 			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1460 			if (error)
1461 				continue;
1462 
1463 			if (vattr.va_type == VLNK) {
1464 				error = sdev_getlink(vp, &link);
1465 				if (error) {
1466 					continue;
1467 				}
1468 				ASSERT(link != NULL);
1469 			}
1470 
1471 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1472 				rw_exit(&ddv->sdev_contents);
1473 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1474 			}
1475 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1476 			    cred, SDEV_READY);
1477 			rw_downgrade(&ddv->sdev_contents);
1478 
1479 			if (link != NULL) {
1480 				kmem_free(link, strlen(link) + 1);
1481 				link = NULL;
1482 			}
1483 
1484 			if (!error) {
1485 				ASSERT(dv);
1486 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1487 				SDEV_SIMPLE_RELE(dv);
1488 			}
1489 			vp = NULL;
1490 			dv = NULL;
1491 		}
1492 	}
1493 
1494 done:
1495 	VN_RELE(dirvp);
1496 	kmem_free(dbuf, dlen);
1497 
1498 	return (error);
1499 }
1500 
1501 void
1502 sdev_filldir_dynamic(struct sdev_node *ddv)
1503 {
1504 	int error;
1505 	int i;
1506 	struct vattr *vap;
1507 	char *nm = NULL;
1508 	struct sdev_node *dv = NULL;
1509 
1510 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1511 	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1512 
1513 	vap = sdev_getdefault_attr(VDIR);
1514 	for (i = 0; vtab[i].vt_name != NULL; i++) {
1515 		nm = vtab[i].vt_name;
1516 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1517 		dv = NULL;
1518 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1519 		    NULL, kcred, SDEV_READY);
1520 		if (error) {
1521 			cmn_err(CE_WARN, "%s/%s: error %d\n",
1522 			    ddv->sdev_name, nm, error);
1523 		} else {
1524 			ASSERT(dv);
1525 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1526 			SDEV_SIMPLE_RELE(dv);
1527 		}
1528 	}
1529 }
1530 
1531 /*
1532  * Creating a backing store entry based on sdev_attr.
1533  * This is called either as part of node creation in a persistent directory
1534  * or from setattr/setsecattr to persist access attributes across reboot.
1535  */
1536 int
1537 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1538 {
1539 	int error = 0;
1540 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1541 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1542 	struct vattr *vap = dv->sdev_attr;
1543 	char *nm = dv->sdev_name;
1544 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1545 
1546 	ASSERT(dv && dv->sdev_name && rdvp);
1547 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1548 
1549 lookup:
1550 	/* try to find it in the backing store */
1551 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1552 	    NULL);
1553 	if (error == 0) {
1554 		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1555 			VN_HOLD(rrvp);
1556 			VN_RELE(*rvp);
1557 			*rvp = rrvp;
1558 		}
1559 
1560 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1561 		dv->sdev_attr = NULL;
1562 		dv->sdev_attrvp = *rvp;
1563 		return (0);
1564 	}
1565 
1566 	/* let's try to persist the node */
1567 	gethrestime(&vap->va_atime);
1568 	vap->va_mtime = vap->va_atime;
1569 	vap->va_ctime = vap->va_atime;
1570 	vap->va_mask |= AT_TYPE|AT_MODE;
1571 	switch (vap->va_type) {
1572 	case VDIR:
1573 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1574 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1575 		    (void *)(*rvp), error));
1576 		break;
1577 	case VCHR:
1578 	case VBLK:
1579 	case VREG:
1580 	case VDOOR:
1581 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1582 		    rvp, cred, 0, NULL, NULL);
1583 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1584 		    (void *)(*rvp), error));
1585 		if (!error)
1586 			VN_RELE(*rvp);
1587 		break;
1588 	case VLNK:
1589 		ASSERT(dv->sdev_symlink);
1590 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1591 		    NULL, 0);
1592 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1593 		    error));
1594 		break;
1595 	default:
1596 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1597 		    "create\n", nm);
1598 		/*NOTREACHED*/
1599 	}
1600 
1601 	/* go back to lookup to factor out spec node and set attrvp */
1602 	if (error == 0)
1603 		goto lookup;
1604 
1605 	sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1606 	return (error);
1607 }
1608 
1609 static int
1610 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1611 {
1612 	int error = 0;
1613 	struct sdev_node *dup = NULL;
1614 
1615 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1616 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1617 		sdev_direnter(ddv, *dv);
1618 	} else {
1619 		if (dup->sdev_state == SDEV_ZOMBIE) {
1620 			error = sdev_dirdelete(ddv, dup);
1621 			/*
1622 			 * The ZOMBIE node is still hanging
1623 			 * around with more than one reference counts.
1624 			 * Fail the new node creation so that
1625 			 * the directory cache won't have
1626 			 * duplicate entries for the same named node
1627 			 */
1628 			if (error == EBUSY) {
1629 				SDEV_SIMPLE_RELE(*dv);
1630 				sdev_nodedestroy(*dv, 0);
1631 				*dv = NULL;
1632 				return (error);
1633 			}
1634 			sdev_direnter(ddv, *dv);
1635 		} else {
1636 			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1637 			SDEV_SIMPLE_RELE(*dv);
1638 			sdev_nodedestroy(*dv, 0);
1639 			*dv = dup;
1640 		}
1641 	}
1642 
1643 	return (0);
1644 }
1645 
1646 static int
1647 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1648 {
1649 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1650 	return (sdev_dirdelete(ddv, *dv));
1651 }
1652 
1653 /*
1654  * update the in-core directory cache
1655  */
1656 int
1657 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1658     sdev_cache_ops_t ops)
1659 {
1660 	int error = 0;
1661 
1662 	ASSERT((SDEV_HELD(*dv)));
1663 
1664 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1665 	switch (ops) {
1666 	case SDEV_CACHE_ADD:
1667 		error = sdev_cache_add(ddv, dv, nm);
1668 		break;
1669 	case SDEV_CACHE_DELETE:
1670 		error = sdev_cache_delete(ddv, dv);
1671 		break;
1672 	default:
1673 		break;
1674 	}
1675 
1676 	return (error);
1677 }
1678 
1679 /*
1680  * retrieve the named entry from the directory cache
1681  */
1682 struct sdev_node *
1683 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1684 {
1685 	struct sdev_node *dv = NULL;
1686 
1687 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1688 	dv = sdev_findbyname(ddv, nm);
1689 
1690 	return (dv);
1691 }
1692 
1693 /*
1694  * Implicit reconfig for nodes constructed by a link generator
1695  * Start devfsadm if needed, or if devfsadm is in progress,
1696  * prepare to block on devfsadm either completing or
1697  * constructing the desired node.  As devfsadmd is global
1698  * in scope, constructing all necessary nodes, we only
1699  * need to initiate it once.
1700  */
1701 static int
1702 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1703 {
1704 	int error = 0;
1705 
1706 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1707 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1708 		    ddv->sdev_name, nm, devfsadm_state));
1709 		mutex_enter(&dv->sdev_lookup_lock);
1710 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1711 		mutex_exit(&dv->sdev_lookup_lock);
1712 		error = 0;
1713 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1714 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1715 		    ddv->sdev_name, nm, devfsadm_state));
1716 
1717 		sdev_devfsadmd_thread(ddv, dv, kcred);
1718 		mutex_enter(&dv->sdev_lookup_lock);
1719 		SDEV_BLOCK_OTHERS(dv,
1720 		    (SDEV_LOOKUP | SDEV_LGWAITING));
1721 		mutex_exit(&dv->sdev_lookup_lock);
1722 		error = 0;
1723 	} else {
1724 		error = -1;
1725 	}
1726 
1727 	return (error);
1728 }
1729 
1730 /*
1731  *  Support for specialized device naming construction mechanisms
1732  */
1733 static int
1734 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1735     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1736     void *, char *), int flags, struct cred *cred)
1737 {
1738 	int rv = 0;
1739 	char *physpath = NULL;
1740 	struct vnode *rvp = NULL;
1741 	struct vattr vattr;
1742 	struct vattr *vap;
1743 	struct sdev_node *dv = *dvp;
1744 
1745 	mutex_enter(&dv->sdev_lookup_lock);
1746 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1747 	mutex_exit(&dv->sdev_lookup_lock);
1748 
1749 	/* for non-devfsadm devices */
1750 	if (flags & SDEV_PATH) {
1751 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1752 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1753 		    NULL);
1754 		if (rv) {
1755 			kmem_free(physpath, MAXPATHLEN);
1756 			return (-1);
1757 		}
1758 
1759 		rvp = devname_configure_by_path(physpath, NULL);
1760 		if (rvp == NULL) {
1761 			sdcmn_err3(("devname_configure_by_path: "
1762 			    "failed for /dev/%s/%s\n",
1763 			    ddv->sdev_name, nm));
1764 			kmem_free(physpath, MAXPATHLEN);
1765 			rv = -1;
1766 		} else {
1767 			vap = sdev_getdefault_attr(VLNK);
1768 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1769 
1770 			/*
1771 			 * Sdev_mknode may return back a different sdev_node
1772 			 * that was created by another thread that
1773 			 * raced to the directroy cache before this thread.
1774 			 *
1775 			 * With current directory cache mechanism
1776 			 * (linked list with the sdev_node name as
1777 			 * the entity key), this is a way to make sure
1778 			 * only one entry exists for the same name
1779 			 * in the same directory. The outcome is
1780 			 * the winner wins.
1781 			 */
1782 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1783 				rw_exit(&ddv->sdev_contents);
1784 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1785 			}
1786 			rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1787 			    (void *)physpath, cred, SDEV_READY);
1788 			rw_downgrade(&ddv->sdev_contents);
1789 			kmem_free(physpath, MAXPATHLEN);
1790 			if (rv) {
1791 				return (rv);
1792 			} else {
1793 				mutex_enter(&dv->sdev_lookup_lock);
1794 				SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1795 				mutex_exit(&dv->sdev_lookup_lock);
1796 				return (0);
1797 			}
1798 		}
1799 	} else if (flags & SDEV_VLINK) {
1800 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1801 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1802 		    NULL);
1803 		if (rv) {
1804 			kmem_free(physpath, MAXPATHLEN);
1805 			return (-1);
1806 		}
1807 
1808 		vap = sdev_getdefault_attr(VLNK);
1809 		vap->va_size = strlen(physpath);
1810 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1811 
1812 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1813 			rw_exit(&ddv->sdev_contents);
1814 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1815 		}
1816 		rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1817 		    (void *)physpath, cred, SDEV_READY);
1818 		rw_downgrade(&ddv->sdev_contents);
1819 		kmem_free(physpath, MAXPATHLEN);
1820 		if (rv)
1821 			return (rv);
1822 
1823 		mutex_enter(&dv->sdev_lookup_lock);
1824 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1825 		mutex_exit(&dv->sdev_lookup_lock);
1826 		return (0);
1827 	} else if (flags & SDEV_VNODE) {
1828 		/*
1829 		 * DBNR has its own way to create the device
1830 		 * and return a backing store vnode in rvp
1831 		 */
1832 		ASSERT(callback);
1833 		rv = callback(ddv, nm, (void *)&rvp, kcred, NULL, NULL);
1834 		if (rv || (rvp == NULL)) {
1835 			sdcmn_err3(("devname_lookup_func: SDEV_VNODE "
1836 			    "callback failed \n"));
1837 			return (-1);
1838 		}
1839 		vap = sdev_getdefault_attr(rvp->v_type);
1840 		if (vap == NULL)
1841 			return (-1);
1842 
1843 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1844 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1845 			rw_exit(&ddv->sdev_contents);
1846 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1847 		}
1848 		rv = sdev_mknode(ddv, nm, &dv, vap, rvp, NULL,
1849 		    cred, SDEV_READY);
1850 		rw_downgrade(&ddv->sdev_contents);
1851 		if (rv)
1852 			return (rv);
1853 
1854 		mutex_enter(&dv->sdev_lookup_lock);
1855 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1856 		mutex_exit(&dv->sdev_lookup_lock);
1857 		return (0);
1858 	} else if (flags & SDEV_VATTR) {
1859 		/*
1860 		 * /dev/pts
1861 		 *
1862 		 * DBNR has its own way to create the device
1863 		 * "0" is returned upon success.
1864 		 *
1865 		 * callback is responsible to set the basic attributes,
1866 		 * e.g. va_type/va_uid/va_gid/
1867 		 *    dev_t if VCHR or VBLK/
1868 		 */
1869 		ASSERT(callback);
1870 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1871 		if (rv) {
1872 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1873 			    "callback failed \n"));
1874 			return (-1);
1875 		}
1876 
1877 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1878 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1879 			rw_exit(&ddv->sdev_contents);
1880 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1881 		}
1882 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1883 		    cred, SDEV_READY);
1884 		rw_downgrade(&ddv->sdev_contents);
1885 
1886 		if (rv)
1887 			return (rv);
1888 
1889 		mutex_enter(&dv->sdev_lookup_lock);
1890 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1891 		mutex_exit(&dv->sdev_lookup_lock);
1892 		return (0);
1893 	} else {
1894 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
1895 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1896 		    __LINE__));
1897 		rv = -1;
1898 	}
1899 
1900 	*dvp = dv;
1901 	return (rv);
1902 }
1903 
1904 static int
1905 is_devfsadm_thread(char *exec_name)
1906 {
1907 	/*
1908 	 * note: because devfsadmd -> /usr/sbin/devfsadm
1909 	 * it is safe to use "devfsadm" to capture the lookups
1910 	 * from devfsadm and its daemon version.
1911 	 */
1912 	if (strcmp(exec_name, "devfsadm") == 0)
1913 		return (1);
1914 	return (0);
1915 }
1916 
1917 
1918 /*
1919  * Lookup Order:
1920  *	sdev_node cache;
1921  *	backing store (SDEV_PERSIST);
1922  *	DBNR: a. dir_ops implemented in the loadable modules;
1923  *	      b. vnode ops in vtab.
1924  */
1925 int
1926 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1927     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1928     struct cred *, void *, char *), int flags)
1929 {
1930 	int rv = 0, nmlen;
1931 	struct vnode *rvp = NULL;
1932 	struct sdev_node *dv = NULL;
1933 	int	retried = 0;
1934 	int	error = 0;
1935 	struct vattr vattr;
1936 	char *lookup_thread = curproc->p_user.u_comm;
1937 	int failed_flags = 0;
1938 	int (*vtor)(struct sdev_node *) = NULL;
1939 	int state;
1940 	int parent_state;
1941 	char *link = NULL;
1942 
1943 	if (SDEVTOV(ddv)->v_type != VDIR)
1944 		return (ENOTDIR);
1945 
1946 	/*
1947 	 * Empty name or ., return node itself.
1948 	 */
1949 	nmlen = strlen(nm);
1950 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1951 		*vpp = SDEVTOV(ddv);
1952 		VN_HOLD(*vpp);
1953 		return (0);
1954 	}
1955 
1956 	/*
1957 	 * .., return the parent directory
1958 	 */
1959 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1960 		*vpp = SDEVTOV(ddv->sdev_dotdot);
1961 		VN_HOLD(*vpp);
1962 		return (0);
1963 	}
1964 
1965 	rw_enter(&ddv->sdev_contents, RW_READER);
1966 	if (ddv->sdev_flags & SDEV_VTOR) {
1967 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1968 		ASSERT(vtor);
1969 	}
1970 
1971 tryagain:
1972 	/*
1973 	 * (a) directory cache lookup:
1974 	 */
1975 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1976 	parent_state = ddv->sdev_state;
1977 	dv = sdev_cache_lookup(ddv, nm);
1978 	if (dv) {
1979 		state = dv->sdev_state;
1980 		switch (state) {
1981 		case SDEV_INIT:
1982 			if (is_devfsadm_thread(lookup_thread))
1983 				break;
1984 
1985 			/* ZOMBIED parent won't allow node creation */
1986 			if (parent_state == SDEV_ZOMBIE) {
1987 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
1988 				    retried);
1989 				goto nolock_notfound;
1990 			}
1991 
1992 			mutex_enter(&dv->sdev_lookup_lock);
1993 			/* compensate the threads started after devfsadm */
1994 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1995 			    !(SDEV_IS_LOOKUP(dv)))
1996 				SDEV_BLOCK_OTHERS(dv,
1997 				    (SDEV_LOOKUP | SDEV_LGWAITING));
1998 
1999 			if (SDEV_IS_LOOKUP(dv)) {
2000 				failed_flags |= SLF_REBUILT;
2001 				rw_exit(&ddv->sdev_contents);
2002 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
2003 				mutex_exit(&dv->sdev_lookup_lock);
2004 				rw_enter(&ddv->sdev_contents, RW_READER);
2005 
2006 				if (error != 0) {
2007 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2008 					    retried);
2009 					goto nolock_notfound;
2010 				}
2011 
2012 				state = dv->sdev_state;
2013 				if (state == SDEV_INIT) {
2014 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2015 					    retried);
2016 					goto nolock_notfound;
2017 				} else if (state == SDEV_READY) {
2018 					goto found;
2019 				} else if (state == SDEV_ZOMBIE) {
2020 					rw_exit(&ddv->sdev_contents);
2021 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2022 					    retried);
2023 					SDEV_RELE(dv);
2024 					goto lookup_failed;
2025 				}
2026 			} else {
2027 				mutex_exit(&dv->sdev_lookup_lock);
2028 			}
2029 			break;
2030 		case SDEV_READY:
2031 			goto found;
2032 		case SDEV_ZOMBIE:
2033 			rw_exit(&ddv->sdev_contents);
2034 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2035 			SDEV_RELE(dv);
2036 			goto lookup_failed;
2037 		default:
2038 			rw_exit(&ddv->sdev_contents);
2039 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2040 			sdev_lookup_failed(ddv, nm, failed_flags);
2041 			*vpp = NULLVP;
2042 			return (ENOENT);
2043 		}
2044 	}
2045 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2046 
2047 	/*
2048 	 * ZOMBIED parent does not allow new node creation.
2049 	 * bail out early
2050 	 */
2051 	if (parent_state == SDEV_ZOMBIE) {
2052 		rw_exit(&ddv->sdev_contents);
2053 		*vpp = NULL;
2054 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2055 		return (ENOENT);
2056 	}
2057 
2058 	/*
2059 	 * (b0): backing store lookup
2060 	 *	SDEV_PERSIST is default except:
2061 	 *		1) pts nodes
2062 	 *		2) non-chmod'ed local nodes
2063 	 */
2064 	if (SDEV_IS_PERSIST(ddv)) {
2065 		error = devname_backstore_lookup(ddv, nm, &rvp);
2066 
2067 		if (!error) {
2068 			sdcmn_err3(("devname_backstore_lookup: "
2069 			    "found attrvp %p for %s\n", (void *)rvp, nm));
2070 
2071 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
2072 			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2073 			if (error) {
2074 				rw_exit(&ddv->sdev_contents);
2075 				if (dv)
2076 					SDEV_RELE(dv);
2077 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2078 				sdev_lookup_failed(ddv, nm, failed_flags);
2079 				*vpp = NULLVP;
2080 				return (ENOENT);
2081 			}
2082 
2083 			if (vattr.va_type == VLNK) {
2084 				error = sdev_getlink(rvp, &link);
2085 				if (error) {
2086 					rw_exit(&ddv->sdev_contents);
2087 					if (dv)
2088 						SDEV_RELE(dv);
2089 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2090 					    retried);
2091 					sdev_lookup_failed(ddv, nm,
2092 					    failed_flags);
2093 					*vpp = NULLVP;
2094 					return (ENOENT);
2095 				}
2096 				ASSERT(link != NULL);
2097 			}
2098 
2099 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
2100 				rw_exit(&ddv->sdev_contents);
2101 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2102 			}
2103 			error = sdev_mknode(ddv, nm, &dv, &vattr,
2104 			    rvp, link, cred, SDEV_READY);
2105 			rw_downgrade(&ddv->sdev_contents);
2106 
2107 			if (link != NULL) {
2108 				kmem_free(link, strlen(link) + 1);
2109 				link = NULL;
2110 			}
2111 
2112 			if (error) {
2113 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2114 				rw_exit(&ddv->sdev_contents);
2115 				if (dv)
2116 					SDEV_RELE(dv);
2117 				goto lookup_failed;
2118 			} else {
2119 				goto found;
2120 			}
2121 		} else if (retried) {
2122 			rw_exit(&ddv->sdev_contents);
2123 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2124 			    ddv->sdev_name, nm));
2125 			if (dv)
2126 				SDEV_RELE(dv);
2127 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2128 			sdev_lookup_failed(ddv, nm, failed_flags);
2129 			*vpp = NULLVP;
2130 			return (ENOENT);
2131 		}
2132 	}
2133 
2134 lookup_create_node:
2135 	/* first thread that is doing the lookup on this node */
2136 	if (!dv) {
2137 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2138 			rw_exit(&ddv->sdev_contents);
2139 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2140 		}
2141 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2142 		    cred, SDEV_INIT);
2143 		if (!dv) {
2144 			rw_exit(&ddv->sdev_contents);
2145 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2146 			sdev_lookup_failed(ddv, nm, failed_flags);
2147 			*vpp = NULLVP;
2148 			return (ENOENT);
2149 		}
2150 		rw_downgrade(&ddv->sdev_contents);
2151 	}
2152 	ASSERT(dv);
2153 	ASSERT(SDEV_HELD(dv));
2154 
2155 	if (SDEV_IS_NO_NCACHE(dv)) {
2156 		failed_flags |= SLF_NO_NCACHE;
2157 	}
2158 
2159 	/*
2160 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2161 	 */
2162 	if (!callback) {
2163 
2164 		if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2165 		    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2166 		    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2167 			ASSERT(SDEV_HELD(dv));
2168 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2169 			goto nolock_notfound;
2170 		}
2171 
2172 		/*
2173 		 * filter out known non-existent devices recorded
2174 		 * during initial reconfiguration boot for which
2175 		 * reconfig should not be done and lookup may
2176 		 * be short-circuited now.
2177 		 */
2178 		if (sdev_lookup_filter(ddv, nm)) {
2179 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2180 			goto nolock_notfound;
2181 		}
2182 
2183 		/* bypassing devfsadm internal nodes */
2184 		if (is_devfsadm_thread(lookup_thread)) {
2185 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2186 			goto nolock_notfound;
2187 		}
2188 
2189 		if (sdev_reconfig_disable) {
2190 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2191 			goto nolock_notfound;
2192 		}
2193 
2194 		error = sdev_call_devfsadmd(ddv, dv, nm);
2195 		if (error == 0) {
2196 			sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2197 			    ddv->sdev_name, nm, curproc->p_user.u_comm));
2198 			if (sdev_reconfig_verbose) {
2199 				cmn_err(CE_CONT,
2200 				    "?lookup of %s/%s by %s: reconfig\n",
2201 				    ddv->sdev_name, nm, curproc->p_user.u_comm);
2202 			}
2203 			retried = 1;
2204 			failed_flags |= SLF_REBUILT;
2205 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2206 			SDEV_SIMPLE_RELE(dv);
2207 			goto tryagain;
2208 		} else {
2209 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2210 			goto nolock_notfound;
2211 		}
2212 	}
2213 
2214 	/*
2215 	 * (b2) Directory Based Name Resolution (DBNR):
2216 	 *	ddv	- parent
2217 	 *	nm	- /dev/(ddv->sdev_name)/nm
2218 	 *
2219 	 *	note: module vnode ops take precedence than the build-in ones
2220 	 */
2221 	if (callback) {
2222 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2223 		    flags, cred);
2224 		if (error == 0) {
2225 			goto found;
2226 		} else {
2227 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2228 			goto notfound;
2229 		}
2230 	}
2231 	ASSERT(rvp);
2232 
2233 found:
2234 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2235 	ASSERT(dv->sdev_state == SDEV_READY);
2236 	if (vtor) {
2237 		/*
2238 		 * Check validity of returned node
2239 		 */
2240 		switch (vtor(dv)) {
2241 		case SDEV_VTOR_VALID:
2242 			break;
2243 		case SDEV_VTOR_STALE:
2244 			/*
2245 			 * The name exists, but the cache entry is
2246 			 * stale and needs to be re-created.
2247 			 */
2248 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2249 			if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2250 				rw_exit(&ddv->sdev_contents);
2251 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2252 			}
2253 			error = sdev_cache_update(ddv, &dv, nm,
2254 			    SDEV_CACHE_DELETE);
2255 			rw_downgrade(&ddv->sdev_contents);
2256 			if (error == 0) {
2257 				dv = NULL;
2258 				goto lookup_create_node;
2259 			}
2260 			/* FALLTHRU */
2261 		case SDEV_VTOR_INVALID:
2262 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2263 			sdcmn_err7(("lookup: destroy invalid "
2264 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2265 			goto nolock_notfound;
2266 		case SDEV_VTOR_SKIP:
2267 			sdcmn_err7(("lookup: node not applicable - "
2268 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2269 			rw_exit(&ddv->sdev_contents);
2270 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2271 			SDEV_RELE(dv);
2272 			goto lookup_failed;
2273 		default:
2274 			cmn_err(CE_PANIC,
2275 			    "dev fs: validator failed: %s(%p)\n",
2276 			    dv->sdev_name, (void *)dv);
2277 			break;
2278 			/*NOTREACHED*/
2279 		}
2280 	}
2281 
2282 	rw_exit(&ddv->sdev_contents);
2283 	rv = sdev_to_vp(dv, vpp);
2284 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2285 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2286 	    dv->sdev_state, nm, rv));
2287 	return (rv);
2288 
2289 notfound:
2290 	mutex_enter(&dv->sdev_lookup_lock);
2291 	SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2292 	mutex_exit(&dv->sdev_lookup_lock);
2293 nolock_notfound:
2294 	/*
2295 	 * Destroy the node that is created for synchronization purposes.
2296 	 */
2297 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2298 	    nm, dv->sdev_state));
2299 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2300 	if (dv->sdev_state == SDEV_INIT) {
2301 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2302 			rw_exit(&ddv->sdev_contents);
2303 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2304 		}
2305 
2306 		/*
2307 		 * Node state may have changed during the lock
2308 		 * changes. Re-check.
2309 		 */
2310 		if (dv->sdev_state == SDEV_INIT) {
2311 			(void) sdev_dirdelete(ddv, dv);
2312 			rw_exit(&ddv->sdev_contents);
2313 			sdev_lookup_failed(ddv, nm, failed_flags);
2314 			*vpp = NULL;
2315 			return (ENOENT);
2316 		}
2317 	}
2318 
2319 	rw_exit(&ddv->sdev_contents);
2320 	SDEV_RELE(dv);
2321 
2322 lookup_failed:
2323 	sdev_lookup_failed(ddv, nm, failed_flags);
2324 	*vpp = NULL;
2325 	return (ENOENT);
2326 }
2327 
2328 /*
2329  * Given a directory node, mark all nodes beneath as
2330  * STALE, i.e. nodes that don't exist as far as new
2331  * consumers are concerned.  Remove them from the
2332  * list of directory entries so that no lookup or
2333  * directory traversal will find them.  The node
2334  * not deallocated so existing holds are not affected.
2335  */
2336 void
2337 sdev_stale(struct sdev_node *ddv)
2338 {
2339 	struct sdev_node *dv;
2340 	struct vnode *vp;
2341 
2342 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2343 
2344 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2345 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) {
2346 		vp = SDEVTOV(dv);
2347 		if (vp->v_type == VDIR)
2348 			sdev_stale(dv);
2349 
2350 		sdcmn_err9(("sdev_stale: setting stale %s\n",
2351 		    dv->sdev_path));
2352 		dv->sdev_flags |= SDEV_STALE;
2353 		avl_remove(&ddv->sdev_entries, dv);
2354 	}
2355 	ddv->sdev_flags |= SDEV_BUILD;
2356 	rw_exit(&ddv->sdev_contents);
2357 }
2358 
2359 /*
2360  * Given a directory node, clean out all the nodes beneath.
2361  * If expr is specified, clean node with names matching expr.
2362  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2363  *	so they are excluded from future lookups.
2364  */
2365 int
2366 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2367 {
2368 	int error = 0;
2369 	int busy = 0;
2370 	struct vnode *vp;
2371 	struct sdev_node *dv, *next = NULL;
2372 	int bkstore = 0;
2373 	int len = 0;
2374 	char *bks_name = NULL;
2375 
2376 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2377 
2378 	/*
2379 	 * We try our best to destroy all unused sdev_node's
2380 	 */
2381 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2382 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
2383 		next = SDEV_NEXT_ENTRY(ddv, dv);
2384 		vp = SDEVTOV(dv);
2385 
2386 		if (expr && gmatch(dv->sdev_name, expr) == 0)
2387 			continue;
2388 
2389 		if (vp->v_type == VDIR &&
2390 		    sdev_cleandir(dv, NULL, flags) != 0) {
2391 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2392 			    dv->sdev_name));
2393 			busy++;
2394 			continue;
2395 		}
2396 
2397 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2398 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2399 			    dv->sdev_name));
2400 			busy++;
2401 			continue;
2402 		}
2403 
2404 		/*
2405 		 * at this point, either dv is not held or SDEV_ENFORCE
2406 		 * is specified. In either case, dv needs to be deleted
2407 		 */
2408 		SDEV_HOLD(dv);
2409 
2410 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2411 		if (bkstore && (vp->v_type == VDIR))
2412 			bkstore += 1;
2413 
2414 		if (bkstore) {
2415 			len = strlen(dv->sdev_name) + 1;
2416 			bks_name = kmem_alloc(len, KM_SLEEP);
2417 			bcopy(dv->sdev_name, bks_name, len);
2418 		}
2419 
2420 		error = sdev_dirdelete(ddv, dv);
2421 
2422 		if (error == EBUSY) {
2423 			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2424 			busy++;
2425 		}
2426 
2427 		/* take care the backing store clean up */
2428 		if (bkstore && (error == 0)) {
2429 			ASSERT(bks_name);
2430 			ASSERT(ddv->sdev_attrvp);
2431 
2432 			if (bkstore == 1) {
2433 				error = VOP_REMOVE(ddv->sdev_attrvp,
2434 				    bks_name, kcred, NULL, 0);
2435 			} else if (bkstore == 2) {
2436 				error = VOP_RMDIR(ddv->sdev_attrvp,
2437 				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2438 			}
2439 
2440 			/* do not propagate the backing store errors */
2441 			if (error) {
2442 				sdcmn_err9(("sdev_cleandir: backing store"
2443 				    "not cleaned\n"));
2444 				error = 0;
2445 			}
2446 
2447 			bkstore = 0;
2448 			kmem_free(bks_name, len);
2449 			bks_name = NULL;
2450 			len = 0;
2451 		}
2452 	}
2453 
2454 	ddv->sdev_flags |= SDEV_BUILD;
2455 	rw_exit(&ddv->sdev_contents);
2456 
2457 	if (busy) {
2458 		error = EBUSY;
2459 	}
2460 
2461 	return (error);
2462 }
2463 
2464 /*
2465  * a convenient wrapper for readdir() funcs
2466  */
2467 size_t
2468 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2469 {
2470 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2471 	if (reclen > size)
2472 		return (0);
2473 
2474 	de->d_ino = (ino64_t)ino;
2475 	de->d_off = (off64_t)off + 1;
2476 	de->d_reclen = (ushort_t)reclen;
2477 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2478 	return (reclen);
2479 }
2480 
2481 /*
2482  * sdev_mount service routines
2483  */
2484 int
2485 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2486 {
2487 	int	error;
2488 
2489 	if (uap->datalen != sizeof (*args))
2490 		return (EINVAL);
2491 
2492 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2493 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2494 		    "get user data. error %d\n", error);
2495 		return (EFAULT);
2496 	}
2497 
2498 	return (0);
2499 }
2500 
2501 #ifdef nextdp
2502 #undef nextdp
2503 #endif
2504 #define	nextdp(dp)	((struct dirent64 *) \
2505 			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2506 
2507 /*
2508  * readdir helper func
2509  */
2510 int
2511 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2512     int flags)
2513 {
2514 	struct sdev_node *ddv = VTOSDEV(vp);
2515 	struct sdev_node *dv;
2516 	dirent64_t	*dp;
2517 	ulong_t		outcount = 0;
2518 	size_t		namelen;
2519 	ulong_t		alloc_count;
2520 	void		*outbuf;
2521 	struct iovec	*iovp;
2522 	int		error = 0;
2523 	size_t		reclen;
2524 	offset_t	diroff;
2525 	offset_t	soff;
2526 	int		this_reclen;
2527 	int (*vtor)(struct sdev_node *) = NULL;
2528 	struct vattr attr;
2529 	timestruc_t now;
2530 
2531 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2532 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2533 
2534 	if (uiop->uio_loffset >= MAXOFF_T) {
2535 		if (eofp)
2536 			*eofp = 1;
2537 		return (0);
2538 	}
2539 
2540 	if (uiop->uio_iovcnt != 1)
2541 		return (EINVAL);
2542 
2543 	if (vp->v_type != VDIR)
2544 		return (ENOTDIR);
2545 
2546 	if (ddv->sdev_flags & SDEV_VTOR) {
2547 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2548 		ASSERT(vtor);
2549 	}
2550 
2551 	if (eofp != NULL)
2552 		*eofp = 0;
2553 
2554 	soff = uiop->uio_loffset;
2555 	iovp = uiop->uio_iov;
2556 	alloc_count = iovp->iov_len;
2557 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2558 	outcount = 0;
2559 
2560 	if (ddv->sdev_state == SDEV_ZOMBIE)
2561 		goto get_cache;
2562 
2563 	if (SDEV_IS_GLOBAL(ddv)) {
2564 
2565 		if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2566 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2567 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2568 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2569 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2570 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2571 		    !sdev_reconfig_disable) {
2572 			/*
2573 			 * invoking "devfsadm" to do system device reconfig
2574 			 */
2575 			mutex_enter(&ddv->sdev_lookup_lock);
2576 			SDEV_BLOCK_OTHERS(ddv,
2577 			    (SDEV_READDIR|SDEV_LGWAITING));
2578 			mutex_exit(&ddv->sdev_lookup_lock);
2579 
2580 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2581 			    ddv->sdev_path, curproc->p_user.u_comm));
2582 			if (sdev_reconfig_verbose) {
2583 				cmn_err(CE_CONT,
2584 				    "?readdir of %s by %s: reconfig\n",
2585 				    ddv->sdev_path, curproc->p_user.u_comm);
2586 			}
2587 
2588 			sdev_devfsadmd_thread(ddv, NULL, kcred);
2589 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2590 			/*
2591 			 * compensate the "ls" started later than "devfsadm"
2592 			 */
2593 			mutex_enter(&ddv->sdev_lookup_lock);
2594 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2595 			mutex_exit(&ddv->sdev_lookup_lock);
2596 		}
2597 
2598 		/*
2599 		 * release the contents lock so that
2600 		 * the cache may be updated by devfsadmd
2601 		 */
2602 		rw_exit(&ddv->sdev_contents);
2603 		mutex_enter(&ddv->sdev_lookup_lock);
2604 		if (SDEV_IS_READDIR(ddv))
2605 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2606 		mutex_exit(&ddv->sdev_lookup_lock);
2607 		rw_enter(&ddv->sdev_contents, RW_READER);
2608 
2609 		sdcmn_err4(("readdir of directory %s by %s\n",
2610 		    ddv->sdev_name, curproc->p_user.u_comm));
2611 		if (ddv->sdev_flags & SDEV_BUILD) {
2612 			if (SDEV_IS_PERSIST(ddv)) {
2613 				error = sdev_filldir_from_store(ddv,
2614 				    alloc_count, cred);
2615 			}
2616 			ddv->sdev_flags &= ~SDEV_BUILD;
2617 		}
2618 	}
2619 
2620 get_cache:
2621 	/* handle "." and ".." */
2622 	diroff = 0;
2623 	if (soff == 0) {
2624 		/* first time */
2625 		this_reclen = DIRENT64_RECLEN(1);
2626 		if (alloc_count < this_reclen) {
2627 			error = EINVAL;
2628 			goto done;
2629 		}
2630 
2631 		dp->d_ino = (ino64_t)ddv->sdev_ino;
2632 		dp->d_off = (off64_t)1;
2633 		dp->d_reclen = (ushort_t)this_reclen;
2634 
2635 		(void) strncpy(dp->d_name, ".",
2636 		    DIRENT64_NAMELEN(this_reclen));
2637 		outcount += dp->d_reclen;
2638 		dp = nextdp(dp);
2639 	}
2640 
2641 	diroff++;
2642 	if (soff <= 1) {
2643 		this_reclen = DIRENT64_RECLEN(2);
2644 		if (alloc_count < outcount + this_reclen) {
2645 			error = EINVAL;
2646 			goto done;
2647 		}
2648 
2649 		dp->d_reclen = (ushort_t)this_reclen;
2650 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2651 		dp->d_off = (off64_t)2;
2652 
2653 		(void) strncpy(dp->d_name, "..",
2654 		    DIRENT64_NAMELEN(this_reclen));
2655 		outcount += dp->d_reclen;
2656 
2657 		dp = nextdp(dp);
2658 	}
2659 
2660 
2661 	/* gets the cache */
2662 	diroff++;
2663 	for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2664 	    dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2665 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2666 		    diroff, soff, dv->sdev_name));
2667 
2668 		/* bypassing pre-matured nodes */
2669 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2670 			sdcmn_err3(("sdev_readdir: pre-mature node  "
2671 			    "%s\n", dv->sdev_name));
2672 			continue;
2673 		}
2674 
2675 		/*
2676 		 * Check validity of node
2677 		 */
2678 		if (vtor) {
2679 			switch (vtor(dv)) {
2680 			case SDEV_VTOR_VALID:
2681 				break;
2682 			case SDEV_VTOR_INVALID:
2683 			case SDEV_VTOR_SKIP:
2684 				continue;
2685 			default:
2686 				cmn_err(CE_PANIC,
2687 				    "dev fs: validator failed: %s(%p)\n",
2688 				    dv->sdev_name, (void *)dv);
2689 				break;
2690 			/*NOTREACHED*/
2691 			}
2692 		}
2693 
2694 		namelen = strlen(dv->sdev_name);
2695 		reclen = DIRENT64_RECLEN(namelen);
2696 		if (outcount + reclen > alloc_count) {
2697 			goto full;
2698 		}
2699 		dp->d_reclen = (ushort_t)reclen;
2700 		dp->d_ino = (ino64_t)dv->sdev_ino;
2701 		dp->d_off = (off64_t)diroff + 1;
2702 		(void) strncpy(dp->d_name, dv->sdev_name,
2703 		    DIRENT64_NAMELEN(reclen));
2704 		outcount += reclen;
2705 		dp = nextdp(dp);
2706 	}
2707 
2708 full:
2709 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2710 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2711 	    (void *)dv));
2712 
2713 	if (outcount)
2714 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2715 
2716 	if (!error) {
2717 		uiop->uio_loffset = diroff;
2718 		if (eofp)
2719 			*eofp = dv ? 0 : 1;
2720 	}
2721 
2722 
2723 	if (ddv->sdev_attrvp) {
2724 		gethrestime(&now);
2725 		attr.va_ctime = now;
2726 		attr.va_atime = now;
2727 		attr.va_mask = AT_CTIME|AT_ATIME;
2728 
2729 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2730 	}
2731 done:
2732 	kmem_free(outbuf, alloc_count);
2733 	return (error);
2734 }
2735 
2736 static int
2737 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2738 {
2739 	vnode_t *vp;
2740 	vnode_t *cvp;
2741 	struct sdev_node *svp;
2742 	char *nm;
2743 	struct pathname pn;
2744 	int error;
2745 	int persisted = 0;
2746 
2747 	ASSERT(INGLOBALZONE(curproc));
2748 
2749 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2750 		return (error);
2751 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2752 
2753 	vp = rootdir;
2754 	VN_HOLD(vp);
2755 
2756 	while (pn_pathleft(&pn)) {
2757 		ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2758 		(void) pn_getcomponent(&pn, nm);
2759 
2760 		/*
2761 		 * Deal with the .. special case where we may be
2762 		 * traversing up across a mount point, to the
2763 		 * root of this filesystem or global root.
2764 		 */
2765 		if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2766 checkforroot:
2767 			if (VN_CMP(vp, rootdir)) {
2768 				nm[1] = 0;
2769 			} else if (vp->v_flag & VROOT) {
2770 				vfs_t *vfsp;
2771 				cvp = vp;
2772 				vfsp = cvp->v_vfsp;
2773 				vfs_rlock_wait(vfsp);
2774 				vp = cvp->v_vfsp->vfs_vnodecovered;
2775 				if (vp == NULL ||
2776 				    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2777 					vfs_unlock(vfsp);
2778 					VN_RELE(cvp);
2779 					error = EIO;
2780 					break;
2781 				}
2782 				VN_HOLD(vp);
2783 				vfs_unlock(vfsp);
2784 				VN_RELE(cvp);
2785 				cvp = NULL;
2786 				goto checkforroot;
2787 			}
2788 		}
2789 
2790 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2791 		    NULL, NULL);
2792 		if (error) {
2793 			VN_RELE(vp);
2794 			break;
2795 		}
2796 
2797 		/* traverse mount points encountered on our journey */
2798 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2799 			VN_RELE(vp);
2800 			VN_RELE(cvp);
2801 			break;
2802 		}
2803 
2804 		/*
2805 		 * symbolic link, can be either relative and absolute
2806 		 */
2807 		if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2808 			struct pathname linkpath;
2809 			pn_alloc(&linkpath);
2810 			if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2811 				pn_free(&linkpath);
2812 				break;
2813 			}
2814 			if (pn_pathleft(&linkpath) == 0)
2815 				(void) pn_set(&linkpath, ".");
2816 			error = pn_insert(&pn, &linkpath, strlen(nm));
2817 			pn_free(&linkpath);
2818 			if (pn.pn_pathlen == 0) {
2819 				VN_RELE(vp);
2820 				return (ENOENT);
2821 			}
2822 			if (pn.pn_path[0] == '/') {
2823 				pn_skipslash(&pn);
2824 				VN_RELE(vp);
2825 				VN_RELE(cvp);
2826 				vp = rootdir;
2827 				VN_HOLD(vp);
2828 			} else {
2829 				VN_RELE(cvp);
2830 			}
2831 			continue;
2832 		}
2833 
2834 		VN_RELE(vp);
2835 
2836 		/*
2837 		 * Direct the operation to the persisting filesystem
2838 		 * underlying /dev.  Bail if we encounter a
2839 		 * non-persistent dev entity here.
2840 		 */
2841 		if (cvp->v_vfsp->vfs_fstype == devtype) {
2842 
2843 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2844 				error = ENOENT;
2845 				VN_RELE(cvp);
2846 				break;
2847 			}
2848 
2849 			if (VTOSDEV(cvp) == NULL) {
2850 				error = ENOENT;
2851 				VN_RELE(cvp);
2852 				break;
2853 			}
2854 			svp = VTOSDEV(cvp);
2855 			if ((vp = svp->sdev_attrvp) == NULL) {
2856 				error = ENOENT;
2857 				VN_RELE(cvp);
2858 				break;
2859 			}
2860 			persisted = 1;
2861 			VN_HOLD(vp);
2862 			VN_RELE(cvp);
2863 			cvp = vp;
2864 		}
2865 
2866 		vp = cvp;
2867 		pn_skipslash(&pn);
2868 	}
2869 
2870 	kmem_free(nm, MAXNAMELEN);
2871 	pn_free(&pn);
2872 
2873 	if (error)
2874 		return (error);
2875 
2876 	/*
2877 	 * Only return persisted nodes in the filesystem underlying /dev.
2878 	 */
2879 	if (!persisted) {
2880 		VN_RELE(vp);
2881 		return (ENOENT);
2882 	}
2883 
2884 	*r_vp = vp;
2885 	return (0);
2886 }
2887 
2888 int
2889 sdev_modctl_readdir(const char *dir, char ***dirlistp,
2890 	int *npathsp, int *npathsp_alloc, int checking_empty)
2891 {
2892 	char	**pathlist = NULL;
2893 	char	**newlist = NULL;
2894 	int	npaths = 0;
2895 	int	npaths_alloc = 0;
2896 	dirent64_t *dbuf = NULL;
2897 	int	n;
2898 	char	*s;
2899 	int error;
2900 	vnode_t *vp;
2901 	int eof;
2902 	struct iovec iov;
2903 	struct uio uio;
2904 	struct dirent64 *dp;
2905 	size_t dlen;
2906 	size_t dbuflen;
2907 	int ndirents = 64;
2908 	char *nm;
2909 
2910 	error = sdev_modctl_lookup(dir, &vp);
2911 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2912 	    dir, curproc->p_user.u_comm,
2913 	    (error == 0) ? "ok" : "failed"));
2914 	if (error)
2915 		return (error);
2916 
2917 	dlen = ndirents * (sizeof (*dbuf));
2918 	dbuf = kmem_alloc(dlen, KM_SLEEP);
2919 
2920 	uio.uio_iov = &iov;
2921 	uio.uio_iovcnt = 1;
2922 	uio.uio_segflg = UIO_SYSSPACE;
2923 	uio.uio_fmode = 0;
2924 	uio.uio_extflg = UIO_COPY_CACHED;
2925 	uio.uio_loffset = 0;
2926 	uio.uio_llimit = MAXOFFSET_T;
2927 
2928 	eof = 0;
2929 	error = 0;
2930 	while (!error && !eof) {
2931 		uio.uio_resid = dlen;
2932 		iov.iov_base = (char *)dbuf;
2933 		iov.iov_len = dlen;
2934 
2935 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2936 		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2937 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2938 
2939 		dbuflen = dlen - uio.uio_resid;
2940 
2941 		if (error || dbuflen == 0)
2942 			break;
2943 
2944 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2945 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2946 
2947 			nm = dp->d_name;
2948 
2949 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2950 				continue;
2951 			if (npaths == npaths_alloc) {
2952 				npaths_alloc += 64;
2953 				newlist = (char **)
2954 				    kmem_zalloc((npaths_alloc + 1) *
2955 				    sizeof (char *), KM_SLEEP);
2956 				if (pathlist) {
2957 					bcopy(pathlist, newlist,
2958 					    npaths * sizeof (char *));
2959 					kmem_free(pathlist,
2960 					    (npaths + 1) * sizeof (char *));
2961 				}
2962 				pathlist = newlist;
2963 			}
2964 			n = strlen(nm) + 1;
2965 			s = kmem_alloc(n, KM_SLEEP);
2966 			bcopy(nm, s, n);
2967 			pathlist[npaths++] = s;
2968 			sdcmn_err11(("  %s/%s\n", dir, s));
2969 
2970 			/* if checking empty, one entry is as good as many */
2971 			if (checking_empty) {
2972 				eof = 1;
2973 				break;
2974 			}
2975 		}
2976 	}
2977 
2978 exit:
2979 	VN_RELE(vp);
2980 
2981 	if (dbuf)
2982 		kmem_free(dbuf, dlen);
2983 
2984 	if (error)
2985 		return (error);
2986 
2987 	*dirlistp = pathlist;
2988 	*npathsp = npaths;
2989 	*npathsp_alloc = npaths_alloc;
2990 
2991 	return (0);
2992 }
2993 
2994 void
2995 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2996 {
2997 	int	i, n;
2998 
2999 	for (i = 0; i < npaths; i++) {
3000 		n = strlen(pathlist[i]) + 1;
3001 		kmem_free(pathlist[i], n);
3002 	}
3003 
3004 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
3005 }
3006 
3007 int
3008 sdev_modctl_devexists(const char *path)
3009 {
3010 	vnode_t *vp;
3011 	int error;
3012 
3013 	error = sdev_modctl_lookup(path, &vp);
3014 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
3015 	    path, curproc->p_user.u_comm,
3016 	    (error == 0) ? "ok" : "failed"));
3017 	if (error == 0)
3018 		VN_RELE(vp);
3019 
3020 	return (error);
3021 }
3022 
3023 extern int sdev_vnodeops_tbl_size;
3024 
3025 /*
3026  * construct a new template with overrides from vtab
3027  */
3028 static fs_operation_def_t *
3029 sdev_merge_vtab(const fs_operation_def_t tab[])
3030 {
3031 	fs_operation_def_t *new;
3032 	const fs_operation_def_t *tab_entry;
3033 
3034 	/* make a copy of standard vnode ops table */
3035 	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
3036 	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
3037 
3038 	/* replace the overrides from tab */
3039 	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
3040 		fs_operation_def_t *std_entry = new;
3041 		while (std_entry->name) {
3042 			if (strcmp(tab_entry->name, std_entry->name) == 0) {
3043 				std_entry->func = tab_entry->func;
3044 				break;
3045 			}
3046 			std_entry++;
3047 		}
3048 		if (std_entry->name == NULL)
3049 			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
3050 			    tab_entry->name);
3051 	}
3052 
3053 	return (new);
3054 }
3055 
3056 /* free memory allocated by sdev_merge_vtab */
3057 static void
3058 sdev_free_vtab(fs_operation_def_t *new)
3059 {
3060 	kmem_free(new, sdev_vnodeops_tbl_size);
3061 }
3062 
3063 /*
3064  * a generic setattr() function
3065  *
3066  * note: flags only supports AT_UID and AT_GID.
3067  *	 Future enhancements can be done for other types, e.g. AT_MODE
3068  */
3069 int
3070 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3071     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3072     int), int protocol)
3073 {
3074 	struct sdev_node	*dv = VTOSDEV(vp);
3075 	struct sdev_node	*parent = dv->sdev_dotdot;
3076 	struct vattr		*get;
3077 	uint_t			mask = vap->va_mask;
3078 	int 			error;
3079 
3080 	/* some sanity checks */
3081 	if (vap->va_mask & AT_NOSET)
3082 		return (EINVAL);
3083 
3084 	if (vap->va_mask & AT_SIZE) {
3085 		if (vp->v_type == VDIR) {
3086 			return (EISDIR);
3087 		}
3088 	}
3089 
3090 	/* no need to set attribute, but do not fail either */
3091 	ASSERT(parent);
3092 	rw_enter(&parent->sdev_contents, RW_READER);
3093 	if (dv->sdev_state == SDEV_ZOMBIE) {
3094 		rw_exit(&parent->sdev_contents);
3095 		return (0);
3096 	}
3097 
3098 	/* If backing store exists, just set it. */
3099 	if (dv->sdev_attrvp) {
3100 		rw_exit(&parent->sdev_contents);
3101 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3102 	}
3103 
3104 	/*
3105 	 * Otherwise, for nodes with the persistence attribute, create it.
3106 	 */
3107 	ASSERT(dv->sdev_attr);
3108 	if (SDEV_IS_PERSIST(dv) ||
3109 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3110 		sdev_vattr_merge(dv, vap);
3111 		rw_enter(&dv->sdev_contents, RW_WRITER);
3112 		error = sdev_shadow_node(dv, cred);
3113 		rw_exit(&dv->sdev_contents);
3114 		rw_exit(&parent->sdev_contents);
3115 
3116 		if (error)
3117 			return (error);
3118 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3119 	}
3120 
3121 
3122 	/*
3123 	 * sdev_attr was allocated in sdev_mknode
3124 	 */
3125 	rw_enter(&dv->sdev_contents, RW_WRITER);
3126 	error = secpolicy_vnode_setattr(cred, vp, vap,
3127 	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
3128 	if (error) {
3129 		rw_exit(&dv->sdev_contents);
3130 		rw_exit(&parent->sdev_contents);
3131 		return (error);
3132 	}
3133 
3134 	get = dv->sdev_attr;
3135 	if (mask & AT_MODE) {
3136 		get->va_mode &= S_IFMT;
3137 		get->va_mode |= vap->va_mode & ~S_IFMT;
3138 	}
3139 
3140 	if ((mask & AT_UID) || (mask & AT_GID)) {
3141 		if (mask & AT_UID)
3142 			get->va_uid = vap->va_uid;
3143 		if (mask & AT_GID)
3144 			get->va_gid = vap->va_gid;
3145 		/*
3146 		 * a callback must be provided if the protocol is set
3147 		 */
3148 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3149 			ASSERT(callback);
3150 			error = callback(dv, get, protocol);
3151 			if (error) {
3152 				rw_exit(&dv->sdev_contents);
3153 				rw_exit(&parent->sdev_contents);
3154 				return (error);
3155 			}
3156 		}
3157 	}
3158 
3159 	if (mask & AT_ATIME)
3160 		get->va_atime = vap->va_atime;
3161 	if (mask & AT_MTIME)
3162 		get->va_mtime = vap->va_mtime;
3163 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3164 		gethrestime(&get->va_ctime);
3165 	}
3166 
3167 	sdev_vattr_merge(dv, get);
3168 	rw_exit(&dv->sdev_contents);
3169 	rw_exit(&parent->sdev_contents);
3170 	return (0);
3171 }
3172 
3173 /*
3174  * a generic inactive() function
3175  */
3176 /*ARGSUSED*/
3177 void
3178 devname_inactive_func(struct vnode *vp, struct cred *cred,
3179     void (*callback)(struct vnode *))
3180 {
3181 	int clean;
3182 	struct sdev_node *dv = VTOSDEV(vp);
3183 	struct sdev_node *ddv = dv->sdev_dotdot;
3184 	int state;
3185 
3186 	rw_enter(&ddv->sdev_contents, RW_WRITER);
3187 	state = dv->sdev_state;
3188 
3189 	mutex_enter(&vp->v_lock);
3190 	ASSERT(vp->v_count >= 1);
3191 
3192 	if (vp->v_count == 1 && callback != NULL)
3193 		callback(vp);
3194 
3195 	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3196 
3197 	/*
3198 	 * last ref count on the ZOMBIE node is released.
3199 	 * clean up the sdev_node, and
3200 	 * release the hold on the backing store node so that
3201 	 * the ZOMBIE backing stores also cleaned out.
3202 	 */
3203 	if (clean) {
3204 		ASSERT(ddv);
3205 
3206 		ddv->sdev_nlink--;
3207 		if (vp->v_type == VDIR) {
3208 			dv->sdev_nlink--;
3209 		}
3210 		if ((dv->sdev_flags & SDEV_STALE) == 0)
3211 			avl_remove(&ddv->sdev_entries, dv);
3212 		dv->sdev_nlink--;
3213 		--vp->v_count;
3214 		mutex_exit(&vp->v_lock);
3215 		sdev_nodedestroy(dv, 0);
3216 	} else {
3217 		--vp->v_count;
3218 		mutex_exit(&vp->v_lock);
3219 	}
3220 	rw_exit(&ddv->sdev_contents);
3221 }
3222