xref: /illumos-gate/usr/src/uts/common/fs/dev/sdev_subr.c (revision 46b592853d0f4f11781b6b0a7533f267c6aee132)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * utility routines for the /dev fs
28  */
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/user.h>
36 #include <sys/time.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/flock.h>
42 #include <sys/kmem.h>
43 #include <sys/uio.h>
44 #include <sys/errno.h>
45 #include <sys/stat.h>
46 #include <sys/cred.h>
47 #include <sys/dirent.h>
48 #include <sys/pathname.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/mode.h>
52 #include <sys/policy.h>
53 #include <fs/fs_subr.h>
54 #include <sys/mount.h>
55 #include <sys/fs/snode.h>
56 #include <sys/fs/dv_node.h>
57 #include <sys/fs/sdev_impl.h>
58 #include <sys/sunndi.h>
59 #include <sys/sunmdi.h>
60 #include <sys/conf.h>
61 #include <sys/proc.h>
62 #include <sys/user.h>
63 #include <sys/modctl.h>
64 
65 #ifdef DEBUG
66 int sdev_debug = 0x00000001;
67 int sdev_debug_cache_flags = 0;
68 #endif
69 
70 /*
71  * globals
72  */
73 /* prototype memory vattrs */
74 vattr_t sdev_vattr_dir = {
75 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
76 	VDIR,					/* va_type */
77 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
78 	SDEV_UID_DEFAULT,			/* va_uid */
79 	SDEV_GID_DEFAULT,			/* va_gid */
80 	0,					/* va_fsid */
81 	0,					/* va_nodeid */
82 	0,					/* va_nlink */
83 	0,					/* va_size */
84 	0,					/* va_atime */
85 	0,					/* va_mtime */
86 	0,					/* va_ctime */
87 	0,					/* va_rdev */
88 	0,					/* va_blksize */
89 	0,					/* va_nblocks */
90 	0					/* va_vcode */
91 };
92 
93 vattr_t sdev_vattr_lnk = {
94 	AT_TYPE|AT_MODE,			/* va_mask */
95 	VLNK,					/* va_type */
96 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
97 	SDEV_UID_DEFAULT,			/* va_uid */
98 	SDEV_GID_DEFAULT,			/* va_gid */
99 	0,					/* va_fsid */
100 	0,					/* va_nodeid */
101 	0,					/* va_nlink */
102 	0,					/* va_size */
103 	0,					/* va_atime */
104 	0,					/* va_mtime */
105 	0,					/* va_ctime */
106 	0,					/* va_rdev */
107 	0,					/* va_blksize */
108 	0,					/* va_nblocks */
109 	0					/* va_vcode */
110 };
111 
112 vattr_t sdev_vattr_blk = {
113 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
114 	VBLK,					/* va_type */
115 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
116 	SDEV_UID_DEFAULT,			/* va_uid */
117 	SDEV_GID_DEFAULT,			/* va_gid */
118 	0,					/* va_fsid */
119 	0,					/* va_nodeid */
120 	0,					/* va_nlink */
121 	0,					/* va_size */
122 	0,					/* va_atime */
123 	0,					/* va_mtime */
124 	0,					/* va_ctime */
125 	0,					/* va_rdev */
126 	0,					/* va_blksize */
127 	0,					/* va_nblocks */
128 	0					/* va_vcode */
129 };
130 
131 vattr_t sdev_vattr_chr = {
132 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
133 	VCHR,					/* va_type */
134 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
135 	SDEV_UID_DEFAULT,			/* va_uid */
136 	SDEV_GID_DEFAULT,			/* va_gid */
137 	0,					/* va_fsid */
138 	0,					/* va_nodeid */
139 	0,					/* va_nlink */
140 	0,					/* va_size */
141 	0,					/* va_atime */
142 	0,					/* va_mtime */
143 	0,					/* va_ctime */
144 	0,					/* va_rdev */
145 	0,					/* va_blksize */
146 	0,					/* va_nblocks */
147 	0					/* va_vcode */
148 };
149 
150 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
151 int		devtype;		/* fstype */
152 
153 /* static */
154 static struct vnodeops *sdev_get_vop(struct sdev_node *);
155 static void sdev_set_no_negcache(struct sdev_node *);
156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
157 static void sdev_free_vtab(fs_operation_def_t *);
158 
159 static void
160 sdev_prof_free(struct sdev_node *dv)
161 {
162 	ASSERT(!SDEV_IS_GLOBAL(dv));
163 	if (dv->sdev_prof.dev_name)
164 		nvlist_free(dv->sdev_prof.dev_name);
165 	if (dv->sdev_prof.dev_map)
166 		nvlist_free(dv->sdev_prof.dev_map);
167 	if (dv->sdev_prof.dev_symlink)
168 		nvlist_free(dv->sdev_prof.dev_symlink);
169 	if (dv->sdev_prof.dev_glob_incdir)
170 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
171 	if (dv->sdev_prof.dev_glob_excdir)
172 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
173 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
174 }
175 
176 /* sdev_node cache constructor */
177 /*ARGSUSED1*/
178 static int
179 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
180 {
181 	struct sdev_node *dv = (struct sdev_node *)buf;
182 	struct vnode *vp;
183 
184 	bzero(buf, sizeof (struct sdev_node));
185 	vp = dv->sdev_vnode = vn_alloc(flag);
186 	if (vp == NULL) {
187 		return (-1);
188 	}
189 	vp->v_data = dv;
190 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
191 	return (0);
192 }
193 
194 /* sdev_node cache destructor */
195 /*ARGSUSED1*/
196 static void
197 i_sdev_node_dtor(void *buf, void *arg)
198 {
199 	struct sdev_node *dv = (struct sdev_node *)buf;
200 	struct vnode *vp = SDEVTOV(dv);
201 
202 	rw_destroy(&dv->sdev_contents);
203 	vn_free(vp);
204 }
205 
206 /* initialize sdev_node cache */
207 void
208 sdev_node_cache_init()
209 {
210 	int flags = 0;
211 
212 #ifdef	DEBUG
213 	flags = sdev_debug_cache_flags;
214 	if (flags)
215 		sdcmn_err(("cache debug flags 0x%x\n", flags));
216 #endif	/* DEBUG */
217 
218 	ASSERT(sdev_node_cache == NULL);
219 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
220 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
221 	    NULL, NULL, NULL, flags);
222 }
223 
224 /* destroy sdev_node cache */
225 void
226 sdev_node_cache_fini()
227 {
228 	ASSERT(sdev_node_cache != NULL);
229 	kmem_cache_destroy(sdev_node_cache);
230 	sdev_node_cache = NULL;
231 }
232 
233 /*
234  * Compare two nodes lexographically to balance avl tree
235  */
236 static int
237 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
238 {
239 	int rv;
240 	if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
241 		return (0);
242 	return ((rv < 0) ? -1 : 1);
243 }
244 
245 void
246 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
247 {
248 	ASSERT(dv);
249 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
250 	dv->sdev_state = state;
251 }
252 
253 static void
254 sdev_attrinit(struct sdev_node *dv, vattr_t *vap)
255 {
256 	timestruc_t now;
257 
258 	ASSERT(vap);
259 
260 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
261 	*dv->sdev_attr = *vap;
262 
263 	dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode);
264 
265 	gethrestime(&now);
266 	dv->sdev_attr->va_atime = now;
267 	dv->sdev_attr->va_mtime = now;
268 	dv->sdev_attr->va_ctime = now;
269 }
270 
271 /* alloc and initialize a sdev_node */
272 int
273 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
274     vattr_t *vap)
275 {
276 	struct sdev_node *dv = NULL;
277 	struct vnode *vp;
278 	size_t nmlen, len;
279 	devname_handle_t  *dhl;
280 
281 	nmlen = strlen(nm) + 1;
282 	if (nmlen > MAXNAMELEN) {
283 		sdcmn_err9(("sdev_nodeinit: node name %s"
284 		    " too long\n", nm));
285 		*newdv = NULL;
286 		return (ENAMETOOLONG);
287 	}
288 
289 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
290 
291 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
292 	bcopy(nm, dv->sdev_name, nmlen);
293 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
294 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
295 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
296 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
297 	/* overwritten for VLNK nodes */
298 	dv->sdev_symlink = NULL;
299 
300 	vp = SDEVTOV(dv);
301 	vn_reinit(vp);
302 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
303 	if (vap)
304 		vp->v_type = vap->va_type;
305 
306 	/*
307 	 * initialized to the parent's vnodeops.
308 	 * maybe overwriten for a VDIR
309 	 */
310 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
311 	vn_exists(vp);
312 
313 	dv->sdev_dotdot = NULL;
314 	dv->sdev_attrvp = NULL;
315 	if (vap) {
316 		sdev_attrinit(dv, vap);
317 	} else {
318 		dv->sdev_attr = NULL;
319 	}
320 
321 	dv->sdev_ino = sdev_mkino(dv);
322 	dv->sdev_nlink = 0;		/* updated on insert */
323 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
324 	dv->sdev_flags |= SDEV_BUILD;
325 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
326 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
327 	if (SDEV_IS_GLOBAL(ddv)) {
328 		dv->sdev_flags |= SDEV_GLOBAL;
329 		dhl = &(dv->sdev_handle);
330 		dhl->dh_data = dv;
331 		dhl->dh_args = NULL;
332 		sdev_set_no_negcache(dv);
333 		dv->sdev_gdir_gen = 0;
334 	} else {
335 		dv->sdev_flags &= ~SDEV_GLOBAL;
336 		dv->sdev_origin = NULL; /* set later */
337 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
338 		dv->sdev_ldir_gen = 0;
339 		dv->sdev_devtree_gen = 0;
340 	}
341 
342 	rw_enter(&dv->sdev_contents, RW_WRITER);
343 	sdev_set_nodestate(dv, SDEV_INIT);
344 	rw_exit(&dv->sdev_contents);
345 	*newdv = dv;
346 
347 	return (0);
348 }
349 
350 /*
351  * transition a sdev_node into SDEV_READY state
352  */
353 int
354 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
355     void *args, struct cred *cred)
356 {
357 	int error = 0;
358 	struct vnode *vp = SDEVTOV(dv);
359 	vtype_t type;
360 
361 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
362 
363 	type = vap->va_type;
364 	vp->v_type = type;
365 	vp->v_rdev = vap->va_rdev;
366 	rw_enter(&dv->sdev_contents, RW_WRITER);
367 	if (type == VDIR) {
368 		dv->sdev_nlink = 2;
369 		dv->sdev_flags &= ~SDEV_PERSIST;
370 		dv->sdev_flags &= ~SDEV_DYNAMIC;
371 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
372 		ASSERT(dv->sdev_dotdot);
373 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
374 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
375 		avl_create(&dv->sdev_entries,
376 		    (int (*)(const void *, const void *))sdev_compare_nodes,
377 		    sizeof (struct sdev_node),
378 		    offsetof(struct sdev_node, sdev_avllink));
379 	} else if (type == VLNK) {
380 		ASSERT(args);
381 		dv->sdev_nlink = 1;
382 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
383 	} else {
384 		dv->sdev_nlink = 1;
385 	}
386 
387 	if (!(SDEV_IS_GLOBAL(dv))) {
388 		dv->sdev_origin = (struct sdev_node *)args;
389 		dv->sdev_flags &= ~SDEV_PERSIST;
390 	}
391 
392 	/*
393 	 * shadow node is created here OR
394 	 * if failed (indicated by dv->sdev_attrvp == NULL),
395 	 * created later in sdev_setattr
396 	 */
397 	if (avp) {
398 		dv->sdev_attrvp = avp;
399 	} else {
400 		if (dv->sdev_attr == NULL)
401 			sdev_attrinit(dv, vap);
402 		else
403 			*dv->sdev_attr = *vap;
404 
405 		if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
406 			error = sdev_shadow_node(dv, cred);
407 	}
408 
409 	if (error == 0) {
410 		/* transition to READY state */
411 		sdev_set_nodestate(dv, SDEV_READY);
412 		sdev_nc_node_exists(dv);
413 	} else {
414 		sdev_set_nodestate(dv, SDEV_ZOMBIE);
415 	}
416 	rw_exit(&dv->sdev_contents);
417 	return (error);
418 }
419 
420 /*
421  * setting ZOMBIE state
422  */
423 static int
424 sdev_nodezombied(struct sdev_node *dv)
425 {
426 	rw_enter(&dv->sdev_contents, RW_WRITER);
427 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
428 	rw_exit(&dv->sdev_contents);
429 	return (0);
430 }
431 
432 /*
433  * Build the VROOT sdev_node.
434  */
435 /*ARGSUSED*/
436 struct sdev_node *
437 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
438     struct vnode *avp, struct cred *cred)
439 {
440 	struct sdev_node *dv;
441 	struct vnode *vp;
442 	char devdir[] = "/dev";
443 
444 	ASSERT(sdev_node_cache != NULL);
445 	ASSERT(avp);
446 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
447 	vp = SDEVTOV(dv);
448 	vn_reinit(vp);
449 	vp->v_flag |= VROOT;
450 	vp->v_vfsp = vfsp;
451 	vp->v_type = VDIR;
452 	vp->v_rdev = devdev;
453 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
454 	vn_exists(vp);
455 
456 	if (vfsp->vfs_mntpt)
457 		dv->sdev_name = i_ddi_strdup(
458 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
459 	else
460 		/* vfs_mountdev1 set mount point later */
461 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
462 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
463 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
464 	dv->sdev_ino = SDEV_ROOTINO;
465 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
466 	dv->sdev_dotdot = dv;		/* .. == self */
467 	dv->sdev_attrvp = avp;
468 	dv->sdev_attr = NULL;
469 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
470 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
471 	if (strcmp(dv->sdev_name, "/dev") == 0) {
472 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
473 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
474 		dv->sdev_gdir_gen = 0;
475 	} else {
476 		dv->sdev_flags = SDEV_BUILD;
477 		dv->sdev_flags &= ~SDEV_PERSIST;
478 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
479 		dv->sdev_ldir_gen = 0;
480 		dv->sdev_devtree_gen = 0;
481 	}
482 
483 	avl_create(&dv->sdev_entries,
484 	    (int (*)(const void *, const void *))sdev_compare_nodes,
485 	    sizeof (struct sdev_node),
486 	    offsetof(struct sdev_node, sdev_avllink));
487 
488 	rw_enter(&dv->sdev_contents, RW_WRITER);
489 	sdev_set_nodestate(dv, SDEV_READY);
490 	rw_exit(&dv->sdev_contents);
491 	sdev_nc_node_exists(dv);
492 	return (dv);
493 }
494 
495 /* directory dependent vop table */
496 struct sdev_vop_table {
497 	char *vt_name;				/* subdirectory name */
498 	const fs_operation_def_t *vt_service;	/* vnodeops table */
499 	struct vnodeops *vt_vops;		/* constructed vop */
500 	struct vnodeops **vt_global_vops;	/* global container for vop */
501 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
502 	int vt_flags;
503 };
504 
505 /*
506  * A nice improvement would be to provide a plug-in mechanism
507  * for this table instead of a const table.
508  */
509 static struct sdev_vop_table vtab[] =
510 {
511 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
512 	SDEV_DYNAMIC | SDEV_VTOR },
513 
514 	{ "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
515 	SDEV_DYNAMIC | SDEV_VTOR },
516 
517 	{ "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
518 	devzvol_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
519 
520 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
521 
522 	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
523 	SDEV_DYNAMIC | SDEV_VTOR },
524 
525 	{ "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
526 	devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
527 
528 	{ NULL, NULL, NULL, NULL, NULL, 0}
529 };
530 
531 struct sdev_vop_table *
532 sdev_match(struct sdev_node *dv)
533 {
534 	int vlen;
535 	int i;
536 
537 	for (i = 0; vtab[i].vt_name; i++) {
538 		if (strcmp(vtab[i].vt_name, dv->sdev_name) == 0)
539 			return (&vtab[i]);
540 		if (vtab[i].vt_flags & SDEV_SUBDIR) {
541 			char *ptr;
542 
543 			ASSERT(strlen(dv->sdev_path) > 5);
544 			ptr = dv->sdev_path + 5;
545 			vlen = strlen(vtab[i].vt_name);
546 			if ((strncmp(vtab[i].vt_name, ptr,
547 			    vlen - 1) == 0) && ptr[vlen] == '/')
548 				return (&vtab[i]);
549 		}
550 
551 	}
552 	return (NULL);
553 }
554 
555 /*
556  *  sets a directory's vnodeops if the directory is in the vtab;
557  */
558 static struct vnodeops *
559 sdev_get_vop(struct sdev_node *dv)
560 {
561 	struct sdev_vop_table *vtp;
562 	char *path;
563 
564 	path = dv->sdev_path;
565 	ASSERT(path);
566 
567 	/* gets the relative path to /dev/ */
568 	path += 5;
569 
570 	/* gets the vtab entry it matches */
571 	if ((vtp = sdev_match(dv)) != NULL) {
572 		dv->sdev_flags |= vtp->vt_flags;
573 
574 		if (vtp->vt_vops) {
575 			if (vtp->vt_global_vops)
576 				*(vtp->vt_global_vops) = vtp->vt_vops;
577 			return (vtp->vt_vops);
578 		}
579 
580 		if (vtp->vt_service) {
581 			fs_operation_def_t *templ;
582 			templ = sdev_merge_vtab(vtp->vt_service);
583 			if (vn_make_ops(vtp->vt_name,
584 			    (const fs_operation_def_t *)templ,
585 			    &vtp->vt_vops) != 0) {
586 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
587 				    vtp->vt_name);
588 				/*NOTREACHED*/
589 			}
590 			if (vtp->vt_global_vops) {
591 				*(vtp->vt_global_vops) = vtp->vt_vops;
592 			}
593 			sdev_free_vtab(templ);
594 			return (vtp->vt_vops);
595 		}
596 		return (sdev_vnodeops);
597 	}
598 
599 	/* child inherits the persistence of the parent */
600 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
601 		dv->sdev_flags |= SDEV_PERSIST;
602 
603 	return (sdev_vnodeops);
604 }
605 
606 static void
607 sdev_set_no_negcache(struct sdev_node *dv)
608 {
609 	int i;
610 	char *path;
611 
612 	ASSERT(dv->sdev_path);
613 	path = dv->sdev_path + strlen("/dev/");
614 
615 	for (i = 0; vtab[i].vt_name; i++) {
616 		if (strcmp(vtab[i].vt_name, path) == 0) {
617 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
618 				dv->sdev_flags |= SDEV_NO_NCACHE;
619 			break;
620 		}
621 	}
622 }
623 
624 void *
625 sdev_get_vtor(struct sdev_node *dv)
626 {
627 	struct sdev_vop_table *vtp;
628 
629 	vtp = sdev_match(dv);
630 	if (vtp)
631 		return ((void *)vtp->vt_vtor);
632 	else
633 		return (NULL);
634 }
635 
636 /*
637  * Build the base root inode
638  */
639 ino_t
640 sdev_mkino(struct sdev_node *dv)
641 {
642 	ino_t	ino;
643 
644 	/*
645 	 * for now, follow the lead of tmpfs here
646 	 * need to someday understand the requirements here
647 	 */
648 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
649 	ino += SDEV_ROOTINO + 1;
650 
651 	return (ino);
652 }
653 
654 int
655 sdev_getlink(struct vnode *linkvp, char **link)
656 {
657 	int err;
658 	char *buf;
659 	struct uio uio = {0};
660 	struct iovec iov = {0};
661 
662 	if (linkvp == NULL)
663 		return (ENOENT);
664 	ASSERT(linkvp->v_type == VLNK);
665 
666 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
667 	iov.iov_base = buf;
668 	iov.iov_len = MAXPATHLEN;
669 	uio.uio_iov = &iov;
670 	uio.uio_iovcnt = 1;
671 	uio.uio_resid = MAXPATHLEN;
672 	uio.uio_segflg = UIO_SYSSPACE;
673 	uio.uio_llimit = MAXOFFSET_T;
674 
675 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
676 	if (err) {
677 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
678 		kmem_free(buf, MAXPATHLEN);
679 		return (ENOENT);
680 	}
681 
682 	/* mission complete */
683 	*link = i_ddi_strdup(buf, KM_SLEEP);
684 	kmem_free(buf, MAXPATHLEN);
685 	return (0);
686 }
687 
688 /*
689  * A convenient wrapper to get the devfs node vnode for a device
690  * minor functionality: readlink() of a /dev symlink
691  * Place the link into dv->sdev_symlink
692  */
693 static int
694 sdev_follow_link(struct sdev_node *dv)
695 {
696 	int err;
697 	struct vnode *linkvp;
698 	char *link = NULL;
699 
700 	linkvp = SDEVTOV(dv);
701 	if (linkvp == NULL)
702 		return (ENOENT);
703 	ASSERT(linkvp->v_type == VLNK);
704 	err = sdev_getlink(linkvp, &link);
705 	if (err) {
706 		(void) sdev_nodezombied(dv);
707 		dv->sdev_symlink = NULL;
708 		return (ENOENT);
709 	}
710 
711 	ASSERT(link != NULL);
712 	dv->sdev_symlink = link;
713 	return (0);
714 }
715 
716 static int
717 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
718 {
719 	vtype_t otype = SDEVTOV(dv)->v_type;
720 
721 	/*
722 	 * existing sdev_node has a different type.
723 	 */
724 	if (otype != nvap->va_type) {
725 		sdcmn_err9(("sdev_node_check: existing node "
726 		    "  %s type %d does not match new node type %d\n",
727 		    dv->sdev_name, otype, nvap->va_type));
728 		return (EEXIST);
729 	}
730 
731 	/*
732 	 * For a symlink, the target should be the same.
733 	 */
734 	if (otype == VLNK) {
735 		ASSERT(nargs != NULL);
736 		ASSERT(dv->sdev_symlink != NULL);
737 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
738 			sdcmn_err9(("sdev_node_check: existing node "
739 			    " %s has different symlink %s as new node "
740 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
741 			    (char *)nargs));
742 			return (EEXIST);
743 		}
744 	}
745 
746 	return (0);
747 }
748 
749 /*
750  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
751  *
752  * arguments:
753  *	- ddv (parent)
754  *	- nm (child name)
755  *	- newdv (sdev_node for nm is returned here)
756  *	- vap (vattr for the node to be created, va_type should be set.
757  *	- avp (attribute vnode)
758  *	  the defaults should be used if unknown)
759  *	- cred
760  *	- args
761  *	    . tnm (for VLNK)
762  *	    . global sdev_node (for !SDEV_GLOBAL)
763  * 	- state: SDEV_INIT, SDEV_READY
764  *
765  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
766  *
767  * NOTE:  directory contents writers lock needs to be held before
768  *	  calling this routine.
769  */
770 int
771 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
772     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
773     sdev_node_state_t state)
774 {
775 	int error = 0;
776 	sdev_node_state_t node_state;
777 	struct sdev_node *dv = NULL;
778 
779 	ASSERT(state != SDEV_ZOMBIE);
780 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
781 
782 	if (*newdv) {
783 		dv = *newdv;
784 	} else {
785 		/* allocate and initialize a sdev_node */
786 		if (ddv->sdev_state == SDEV_ZOMBIE) {
787 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
788 			    ddv->sdev_path));
789 			return (ENOENT);
790 		}
791 
792 		error = sdev_nodeinit(ddv, nm, &dv, vap);
793 		if (error != 0) {
794 			sdcmn_err9(("sdev_mknode: error %d,"
795 			    " name %s can not be initialized\n",
796 			    error, nm));
797 			return (error);
798 		}
799 		ASSERT(dv);
800 
801 		/* insert into the directory cache */
802 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
803 		if (error) {
804 			sdcmn_err9(("sdev_mknode: node %s can not"
805 			    " be added into directory cache\n", nm));
806 			return (ENOENT);
807 		}
808 	}
809 
810 	ASSERT(dv);
811 	node_state = dv->sdev_state;
812 	ASSERT(node_state != SDEV_ZOMBIE);
813 
814 	if (state == SDEV_READY) {
815 		switch (node_state) {
816 		case SDEV_INIT:
817 			error = sdev_nodeready(dv, vap, avp, args, cred);
818 			if (error) {
819 				sdcmn_err9(("sdev_mknode: node %s can NOT"
820 				    " be transitioned into READY state, "
821 				    "error %d\n", nm, error));
822 			}
823 			break;
824 		case SDEV_READY:
825 			/*
826 			 * Do some sanity checking to make sure
827 			 * the existing sdev_node is what has been
828 			 * asked for.
829 			 */
830 			error = sdev_node_check(dv, vap, args);
831 			break;
832 		default:
833 			break;
834 		}
835 	}
836 
837 	if (!error) {
838 		*newdv = dv;
839 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
840 	} else {
841 		SDEV_SIMPLE_RELE(dv);
842 		*newdv = NULL;
843 	}
844 
845 	return (error);
846 }
847 
848 /*
849  * convenient wrapper to change vp's ATIME, CTIME and MTIME
850  */
851 void
852 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
853 {
854 	struct vattr attr;
855 	timestruc_t now;
856 	int err;
857 
858 	ASSERT(vp);
859 	gethrestime(&now);
860 	if (mask & AT_CTIME)
861 		attr.va_ctime = now;
862 	if (mask & AT_MTIME)
863 		attr.va_mtime = now;
864 	if (mask & AT_ATIME)
865 		attr.va_atime = now;
866 
867 	attr.va_mask = (mask & AT_TIMES);
868 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
869 	if (err && (err != EROFS)) {
870 		sdcmn_err(("update timestamps error %d\n", err));
871 	}
872 }
873 
874 /*
875  * the backing store vnode is released here
876  */
877 /*ARGSUSED1*/
878 void
879 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
880 {
881 	/* no references */
882 	ASSERT(dv->sdev_nlink == 0);
883 
884 	if (dv->sdev_attrvp != NULLVP) {
885 		VN_RELE(dv->sdev_attrvp);
886 		/*
887 		 * reset the attrvp so that no more
888 		 * references can be made on this already
889 		 * vn_rele() vnode
890 		 */
891 		dv->sdev_attrvp = NULLVP;
892 	}
893 
894 	if (dv->sdev_attr != NULL) {
895 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
896 		dv->sdev_attr = NULL;
897 	}
898 
899 	if (dv->sdev_name != NULL) {
900 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
901 		dv->sdev_name = NULL;
902 	}
903 
904 	if (dv->sdev_symlink != NULL) {
905 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
906 		dv->sdev_symlink = NULL;
907 	}
908 
909 	if (dv->sdev_path) {
910 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
911 		dv->sdev_path = NULL;
912 	}
913 
914 	if (!SDEV_IS_GLOBAL(dv))
915 		sdev_prof_free(dv);
916 
917 	if (SDEVTOV(dv)->v_type == VDIR) {
918 		ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
919 		avl_destroy(&dv->sdev_entries);
920 	}
921 
922 	mutex_destroy(&dv->sdev_lookup_lock);
923 	cv_destroy(&dv->sdev_lookup_cv);
924 
925 	/* return node to initial state as per constructor */
926 	(void) memset((void *)&dv->sdev_instance_data, 0,
927 	    sizeof (dv->sdev_instance_data));
928 	vn_invalid(SDEVTOV(dv));
929 	kmem_cache_free(sdev_node_cache, dv);
930 }
931 
932 /*
933  * DIRECTORY CACHE lookup
934  */
935 struct sdev_node *
936 sdev_findbyname(struct sdev_node *ddv, char *nm)
937 {
938 	struct sdev_node *dv;
939 	struct sdev_node dvtmp;
940 	avl_index_t	where;
941 
942 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
943 
944 	dvtmp.sdev_name = nm;
945 	dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
946 	if (dv) {
947 		ASSERT(dv->sdev_dotdot == ddv);
948 		ASSERT(strcmp(dv->sdev_name, nm) == 0);
949 		SDEV_HOLD(dv);
950 		return (dv);
951 	}
952 	return (NULL);
953 }
954 
955 /*
956  * Inserts a new sdev_node in a parent directory
957  */
958 void
959 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
960 {
961 	avl_index_t where;
962 
963 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
964 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
965 	ASSERT(ddv->sdev_nlink >= 2);
966 	ASSERT(dv->sdev_nlink == 0);
967 
968 	dv->sdev_dotdot = ddv;
969 	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
970 	avl_insert(&ddv->sdev_entries, dv, where);
971 	ddv->sdev_nlink++;
972 }
973 
974 /*
975  * The following check is needed because while sdev_nodes are linked
976  * in SDEV_INIT state, they have their link counts incremented only
977  * in SDEV_READY state.
978  */
979 static void
980 decr_link(struct sdev_node *dv)
981 {
982 	if (dv->sdev_state != SDEV_INIT)
983 		dv->sdev_nlink--;
984 	else
985 		ASSERT(dv->sdev_nlink == 0);
986 }
987 
988 /*
989  * Delete an existing dv from directory cache
990  *
991  * In the case of a node is still held by non-zero reference count,
992  *     the node is put into ZOMBIE state. Once the reference count
993  *     reaches "0", the node is unlinked and destroyed,
994  *     in sdev_inactive().
995  */
996 static int
997 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
998 {
999 	struct vnode *vp;
1000 
1001 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1002 
1003 	vp = SDEVTOV(dv);
1004 	mutex_enter(&vp->v_lock);
1005 
1006 	/* dv is held still */
1007 	if (vp->v_count > 1) {
1008 		rw_enter(&dv->sdev_contents, RW_WRITER);
1009 		if (dv->sdev_state == SDEV_READY) {
1010 			sdcmn_err9((
1011 			    "sdev_dirdelete: node %s busy with count %d\n",
1012 			    dv->sdev_name, vp->v_count));
1013 			dv->sdev_state = SDEV_ZOMBIE;
1014 		}
1015 		rw_exit(&dv->sdev_contents);
1016 		--vp->v_count;
1017 		mutex_exit(&vp->v_lock);
1018 		return (EBUSY);
1019 	}
1020 	ASSERT(vp->v_count == 1);
1021 
1022 	/* unlink from the memory cache */
1023 	ddv->sdev_nlink--;	/* .. to above */
1024 	if (vp->v_type == VDIR) {
1025 		decr_link(dv);		/* . to self */
1026 	}
1027 
1028 	avl_remove(&ddv->sdev_entries, dv);
1029 	decr_link(dv);	/* name, back to zero */
1030 	vp->v_count--;
1031 	mutex_exit(&vp->v_lock);
1032 
1033 	/* destroy the node */
1034 	sdev_nodedestroy(dv, 0);
1035 	return (0);
1036 }
1037 
1038 /*
1039  * check if the source is in the path of the target
1040  *
1041  * source and target are different
1042  */
1043 /*ARGSUSED2*/
1044 static int
1045 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1046 {
1047 	int error = 0;
1048 	struct sdev_node *dotdot, *dir;
1049 
1050 	dotdot = tdv->sdev_dotdot;
1051 	ASSERT(dotdot);
1052 
1053 	/* fs root */
1054 	if (dotdot == tdv) {
1055 		return (0);
1056 	}
1057 
1058 	for (;;) {
1059 		/*
1060 		 * avoid error cases like
1061 		 *	mv a a/b
1062 		 *	mv a a/b/c
1063 		 *	etc.
1064 		 */
1065 		if (dotdot == sdv) {
1066 			error = EINVAL;
1067 			break;
1068 		}
1069 
1070 		dir = dotdot;
1071 		dotdot = dir->sdev_dotdot;
1072 
1073 		/* done checking because root is reached */
1074 		if (dir == dotdot) {
1075 			break;
1076 		}
1077 	}
1078 	return (error);
1079 }
1080 
1081 int
1082 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1083     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1084     struct cred *cred)
1085 {
1086 	int error = 0;
1087 	struct vnode *ovp = SDEVTOV(odv);
1088 	struct vnode *nvp;
1089 	struct vattr vattr;
1090 	int doingdir = (ovp->v_type == VDIR);
1091 	char *link = NULL;
1092 	int samedir = (oddv == nddv) ? 1 : 0;
1093 	int bkstore = 0;
1094 	struct sdev_node *idv = NULL;
1095 	struct sdev_node *ndv = NULL;
1096 	timestruc_t now;
1097 
1098 	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1099 	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1100 	if (error)
1101 		return (error);
1102 
1103 	if (!samedir)
1104 		rw_enter(&oddv->sdev_contents, RW_WRITER);
1105 	rw_enter(&nddv->sdev_contents, RW_WRITER);
1106 
1107 	/*
1108 	 * the source may have been deleted by another thread before
1109 	 * we gets here.
1110 	 */
1111 	if (odv->sdev_state != SDEV_READY) {
1112 		error = ENOENT;
1113 		goto err_out;
1114 	}
1115 
1116 	if (doingdir && (odv == nddv)) {
1117 		error = EINVAL;
1118 		goto err_out;
1119 	}
1120 
1121 	/*
1122 	 * If renaming a directory, and the parents are different (".." must be
1123 	 * changed) then the source dir must not be in the dir hierarchy above
1124 	 * the target since it would orphan everything below the source dir.
1125 	 */
1126 	if (doingdir && (oddv != nddv)) {
1127 		error = sdev_checkpath(odv, nddv, cred);
1128 		if (error)
1129 			goto err_out;
1130 	}
1131 
1132 	/* destination existing */
1133 	if (*ndvp) {
1134 		nvp = SDEVTOV(*ndvp);
1135 		ASSERT(nvp);
1136 
1137 		/* handling renaming to itself */
1138 		if (odv == *ndvp) {
1139 			error = 0;
1140 			goto err_out;
1141 		}
1142 
1143 		if (nvp->v_type == VDIR) {
1144 			if (!doingdir) {
1145 				error = EISDIR;
1146 				goto err_out;
1147 			}
1148 
1149 			if (vn_vfswlock(nvp)) {
1150 				error = EBUSY;
1151 				goto err_out;
1152 			}
1153 
1154 			if (vn_mountedvfs(nvp) != NULL) {
1155 				vn_vfsunlock(nvp);
1156 				error = EBUSY;
1157 				goto err_out;
1158 			}
1159 
1160 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1161 			if ((*ndvp)->sdev_nlink > 2) {
1162 				vn_vfsunlock(nvp);
1163 				error = EEXIST;
1164 				goto err_out;
1165 			}
1166 			vn_vfsunlock(nvp);
1167 
1168 			(void) sdev_dirdelete(nddv, *ndvp);
1169 			*ndvp = NULL;
1170 			ASSERT(nddv->sdev_attrvp);
1171 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1172 			    nddv->sdev_attrvp, cred, NULL, 0);
1173 			if (error)
1174 				goto err_out;
1175 		} else {
1176 			if (doingdir) {
1177 				error = ENOTDIR;
1178 				goto err_out;
1179 			}
1180 
1181 			if (SDEV_IS_PERSIST((*ndvp))) {
1182 				bkstore = 1;
1183 			}
1184 
1185 			/*
1186 			 * get rid of the node from the directory cache
1187 			 * note, in case EBUSY is returned, the ZOMBIE
1188 			 * node is taken care in sdev_mknode.
1189 			 */
1190 			(void) sdev_dirdelete(nddv, *ndvp);
1191 			*ndvp = NULL;
1192 			if (bkstore) {
1193 				ASSERT(nddv->sdev_attrvp);
1194 				error = VOP_REMOVE(nddv->sdev_attrvp,
1195 				    nnm, cred, NULL, 0);
1196 				if (error)
1197 					goto err_out;
1198 			}
1199 		}
1200 	}
1201 
1202 	/* fix the source for a symlink */
1203 	if (vattr.va_type == VLNK) {
1204 		if (odv->sdev_symlink == NULL) {
1205 			error = sdev_follow_link(odv);
1206 			if (error) {
1207 				error = ENOENT;
1208 				goto err_out;
1209 			}
1210 		}
1211 		ASSERT(odv->sdev_symlink);
1212 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1213 	}
1214 
1215 	/*
1216 	 * make a fresh node from the source attrs
1217 	 */
1218 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1219 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1220 	    NULL, (void *)link, cred, SDEV_READY);
1221 
1222 	if (link)
1223 		kmem_free(link, strlen(link) + 1);
1224 
1225 	if (error)
1226 		goto err_out;
1227 	ASSERT(*ndvp);
1228 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1229 
1230 	/* move dir contents */
1231 	if (doingdir) {
1232 		for (idv = SDEV_FIRST_ENTRY(odv); idv;
1233 		    idv = SDEV_NEXT_ENTRY(odv, idv)) {
1234 			error = sdev_rnmnode(odv, idv,
1235 			    (struct sdev_node *)(*ndvp), &ndv,
1236 			    idv->sdev_name, cred);
1237 			if (error)
1238 				goto err_out;
1239 			ndv = NULL;
1240 		}
1241 	}
1242 
1243 	if ((*ndvp)->sdev_attrvp) {
1244 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1245 		    AT_CTIME|AT_ATIME);
1246 	} else {
1247 		ASSERT((*ndvp)->sdev_attr);
1248 		gethrestime(&now);
1249 		(*ndvp)->sdev_attr->va_ctime = now;
1250 		(*ndvp)->sdev_attr->va_atime = now;
1251 	}
1252 
1253 	if (nddv->sdev_attrvp) {
1254 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1255 		    AT_MTIME|AT_ATIME);
1256 	} else {
1257 		ASSERT(nddv->sdev_attr);
1258 		gethrestime(&now);
1259 		nddv->sdev_attr->va_mtime = now;
1260 		nddv->sdev_attr->va_atime = now;
1261 	}
1262 	rw_exit(&nddv->sdev_contents);
1263 	if (!samedir)
1264 		rw_exit(&oddv->sdev_contents);
1265 
1266 	SDEV_RELE(*ndvp);
1267 	return (error);
1268 
1269 err_out:
1270 	rw_exit(&nddv->sdev_contents);
1271 	if (!samedir)
1272 		rw_exit(&oddv->sdev_contents);
1273 	return (error);
1274 }
1275 
1276 /*
1277  * Merge sdev_node specific information into an attribute structure.
1278  *
1279  * note: sdev_node is not locked here
1280  */
1281 void
1282 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1283 {
1284 	struct vnode *vp = SDEVTOV(dv);
1285 
1286 	vap->va_nlink = dv->sdev_nlink;
1287 	vap->va_nodeid = dv->sdev_ino;
1288 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1289 	vap->va_type = vp->v_type;
1290 
1291 	if (vp->v_type == VDIR) {
1292 		vap->va_rdev = 0;
1293 		vap->va_fsid = vp->v_rdev;
1294 	} else if (vp->v_type == VLNK) {
1295 		vap->va_rdev = 0;
1296 		vap->va_mode  &= ~S_IFMT;
1297 		vap->va_mode |= S_IFLNK;
1298 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1299 		vap->va_rdev = vp->v_rdev;
1300 		vap->va_mode &= ~S_IFMT;
1301 		if (vap->va_type == VCHR)
1302 			vap->va_mode |= S_IFCHR;
1303 		else
1304 			vap->va_mode |= S_IFBLK;
1305 	} else {
1306 		vap->va_rdev = 0;
1307 	}
1308 }
1309 
1310 struct vattr *
1311 sdev_getdefault_attr(enum vtype type)
1312 {
1313 	if (type == VDIR)
1314 		return (&sdev_vattr_dir);
1315 	else if (type == VCHR)
1316 		return (&sdev_vattr_chr);
1317 	else if (type == VBLK)
1318 		return (&sdev_vattr_blk);
1319 	else if (type == VLNK)
1320 		return (&sdev_vattr_lnk);
1321 	else
1322 		return (NULL);
1323 }
1324 int
1325 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1326 {
1327 	int rv = 0;
1328 	struct vnode *vp = SDEVTOV(dv);
1329 
1330 	switch (vp->v_type) {
1331 	case VCHR:
1332 	case VBLK:
1333 		/*
1334 		 * If vnode is a device, return special vnode instead
1335 		 * (though it knows all about -us- via sp->s_realvp)
1336 		 */
1337 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1338 		VN_RELE(vp);
1339 		if (*vpp == NULLVP)
1340 			rv = ENOSYS;
1341 		break;
1342 	default:	/* most types are returned as is */
1343 		*vpp = vp;
1344 		break;
1345 	}
1346 	return (rv);
1347 }
1348 
1349 /*
1350  * junction between devname and root file system, e.g. ufs
1351  */
1352 int
1353 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1354 {
1355 	struct vnode *rdvp = ddv->sdev_attrvp;
1356 	int rval = 0;
1357 
1358 	ASSERT(rdvp);
1359 
1360 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1361 	    NULL);
1362 	return (rval);
1363 }
1364 
1365 static int
1366 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1367 {
1368 	struct sdev_node *dv = NULL;
1369 	char	*nm;
1370 	struct vnode *dirvp;
1371 	int	error;
1372 	vnode_t	*vp;
1373 	int eof;
1374 	struct iovec iov;
1375 	struct uio uio;
1376 	struct dirent64 *dp;
1377 	dirent64_t *dbuf;
1378 	size_t dbuflen;
1379 	struct vattr vattr;
1380 	char *link = NULL;
1381 
1382 	if (ddv->sdev_attrvp == NULL)
1383 		return (0);
1384 	if (!(ddv->sdev_flags & SDEV_BUILD))
1385 		return (0);
1386 
1387 	dirvp = ddv->sdev_attrvp;
1388 	VN_HOLD(dirvp);
1389 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1390 
1391 	uio.uio_iov = &iov;
1392 	uio.uio_iovcnt = 1;
1393 	uio.uio_segflg = UIO_SYSSPACE;
1394 	uio.uio_fmode = 0;
1395 	uio.uio_extflg = UIO_COPY_CACHED;
1396 	uio.uio_loffset = 0;
1397 	uio.uio_llimit = MAXOFFSET_T;
1398 
1399 	eof = 0;
1400 	error = 0;
1401 	while (!error && !eof) {
1402 		uio.uio_resid = dlen;
1403 		iov.iov_base = (char *)dbuf;
1404 		iov.iov_len = dlen;
1405 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1406 		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1407 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1408 
1409 		dbuflen = dlen - uio.uio_resid;
1410 		if (error || dbuflen == 0)
1411 			break;
1412 
1413 		if (!(ddv->sdev_flags & SDEV_BUILD))
1414 			break;
1415 
1416 		for (dp = dbuf; ((intptr_t)dp <
1417 		    (intptr_t)dbuf + dbuflen);
1418 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1419 			nm = dp->d_name;
1420 
1421 			if (strcmp(nm, ".") == 0 ||
1422 			    strcmp(nm, "..") == 0)
1423 				continue;
1424 
1425 			vp = NULLVP;
1426 			dv = sdev_cache_lookup(ddv, nm);
1427 			if (dv) {
1428 				if (dv->sdev_state != SDEV_ZOMBIE) {
1429 					SDEV_SIMPLE_RELE(dv);
1430 				} else {
1431 					/*
1432 					 * A ZOMBIE node may not have been
1433 					 * cleaned up from the backing store,
1434 					 * bypass this entry in this case,
1435 					 * and clean it up from the directory
1436 					 * cache if this is the last call.
1437 					 */
1438 					(void) sdev_dirdelete(ddv, dv);
1439 				}
1440 				continue;
1441 			}
1442 
1443 			/* refill the cache if not already */
1444 			error = devname_backstore_lookup(ddv, nm, &vp);
1445 			if (error)
1446 				continue;
1447 
1448 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1449 			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1450 			if (error)
1451 				continue;
1452 
1453 			if (vattr.va_type == VLNK) {
1454 				error = sdev_getlink(vp, &link);
1455 				if (error) {
1456 					continue;
1457 				}
1458 				ASSERT(link != NULL);
1459 			}
1460 
1461 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1462 				rw_exit(&ddv->sdev_contents);
1463 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1464 			}
1465 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1466 			    cred, SDEV_READY);
1467 			rw_downgrade(&ddv->sdev_contents);
1468 
1469 			if (link != NULL) {
1470 				kmem_free(link, strlen(link) + 1);
1471 				link = NULL;
1472 			}
1473 
1474 			if (!error) {
1475 				ASSERT(dv);
1476 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1477 				SDEV_SIMPLE_RELE(dv);
1478 			}
1479 			vp = NULL;
1480 			dv = NULL;
1481 		}
1482 	}
1483 
1484 done:
1485 	VN_RELE(dirvp);
1486 	kmem_free(dbuf, dlen);
1487 
1488 	return (error);
1489 }
1490 
1491 void
1492 sdev_filldir_dynamic(struct sdev_node *ddv)
1493 {
1494 	int error;
1495 	int i;
1496 	struct vattr *vap;
1497 	char *nm = NULL;
1498 	struct sdev_node *dv = NULL;
1499 
1500 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1501 	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1502 
1503 	vap = sdev_getdefault_attr(VDIR);
1504 	gethrestime(&vap->va_atime);
1505 	vap->va_mtime = vap->va_atime;
1506 	vap->va_ctime = vap->va_atime;
1507 	for (i = 0; vtab[i].vt_name != NULL; i++) {
1508 		nm = vtab[i].vt_name;
1509 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1510 		dv = NULL;
1511 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1512 		    NULL, kcred, SDEV_READY);
1513 		if (error) {
1514 			cmn_err(CE_WARN, "%s/%s: error %d\n",
1515 			    ddv->sdev_name, nm, error);
1516 		} else {
1517 			ASSERT(dv);
1518 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1519 			SDEV_SIMPLE_RELE(dv);
1520 		}
1521 	}
1522 }
1523 
1524 /*
1525  * Creating a backing store entry based on sdev_attr.
1526  * This is called either as part of node creation in a persistent directory
1527  * or from setattr/setsecattr to persist access attributes across reboot.
1528  */
1529 int
1530 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1531 {
1532 	int error = 0;
1533 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1534 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1535 	struct vattr *vap = dv->sdev_attr;
1536 	char *nm = dv->sdev_name;
1537 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1538 
1539 	ASSERT(dv && dv->sdev_name && rdvp);
1540 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1541 
1542 lookup:
1543 	/* try to find it in the backing store */
1544 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1545 	    NULL);
1546 	if (error == 0) {
1547 		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1548 			VN_HOLD(rrvp);
1549 			VN_RELE(*rvp);
1550 			*rvp = rrvp;
1551 		}
1552 
1553 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1554 		dv->sdev_attr = NULL;
1555 		dv->sdev_attrvp = *rvp;
1556 		return (0);
1557 	}
1558 
1559 	/* let's try to persist the node */
1560 	gethrestime(&vap->va_atime);
1561 	vap->va_mtime = vap->va_atime;
1562 	vap->va_ctime = vap->va_atime;
1563 	vap->va_mask |= AT_TYPE|AT_MODE;
1564 	switch (vap->va_type) {
1565 	case VDIR:
1566 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1567 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1568 		    (void *)(*rvp), error));
1569 		break;
1570 	case VCHR:
1571 	case VBLK:
1572 	case VREG:
1573 	case VDOOR:
1574 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1575 		    rvp, cred, 0, NULL, NULL);
1576 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1577 		    (void *)(*rvp), error));
1578 		if (!error)
1579 			VN_RELE(*rvp);
1580 		break;
1581 	case VLNK:
1582 		ASSERT(dv->sdev_symlink);
1583 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1584 		    NULL, 0);
1585 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1586 		    error));
1587 		break;
1588 	default:
1589 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1590 		    "create\n", nm);
1591 		/*NOTREACHED*/
1592 	}
1593 
1594 	/* go back to lookup to factor out spec node and set attrvp */
1595 	if (error == 0)
1596 		goto lookup;
1597 
1598 	sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1599 	return (error);
1600 }
1601 
1602 static int
1603 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1604 {
1605 	int error = 0;
1606 	struct sdev_node *dup = NULL;
1607 
1608 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1609 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1610 		sdev_direnter(ddv, *dv);
1611 	} else {
1612 		if (dup->sdev_state == SDEV_ZOMBIE) {
1613 			error = sdev_dirdelete(ddv, dup);
1614 			/*
1615 			 * The ZOMBIE node is still hanging
1616 			 * around with more than one reference counts.
1617 			 * Fail the new node creation so that
1618 			 * the directory cache won't have
1619 			 * duplicate entries for the same named node
1620 			 */
1621 			if (error == EBUSY) {
1622 				SDEV_SIMPLE_RELE(*dv);
1623 				sdev_nodedestroy(*dv, 0);
1624 				*dv = NULL;
1625 				return (error);
1626 			}
1627 			sdev_direnter(ddv, *dv);
1628 		} else {
1629 			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1630 			SDEV_SIMPLE_RELE(*dv);
1631 			sdev_nodedestroy(*dv, 0);
1632 			*dv = dup;
1633 		}
1634 	}
1635 
1636 	return (0);
1637 }
1638 
1639 static int
1640 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1641 {
1642 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1643 	return (sdev_dirdelete(ddv, *dv));
1644 }
1645 
1646 /*
1647  * update the in-core directory cache
1648  */
1649 int
1650 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1651     sdev_cache_ops_t ops)
1652 {
1653 	int error = 0;
1654 
1655 	ASSERT((SDEV_HELD(*dv)));
1656 
1657 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1658 	switch (ops) {
1659 	case SDEV_CACHE_ADD:
1660 		error = sdev_cache_add(ddv, dv, nm);
1661 		break;
1662 	case SDEV_CACHE_DELETE:
1663 		error = sdev_cache_delete(ddv, dv);
1664 		break;
1665 	default:
1666 		break;
1667 	}
1668 
1669 	return (error);
1670 }
1671 
1672 /*
1673  * retrieve the named entry from the directory cache
1674  */
1675 struct sdev_node *
1676 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1677 {
1678 	struct sdev_node *dv = NULL;
1679 
1680 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1681 	dv = sdev_findbyname(ddv, nm);
1682 
1683 	return (dv);
1684 }
1685 
1686 /*
1687  * Implicit reconfig for nodes constructed by a link generator
1688  * Start devfsadm if needed, or if devfsadm is in progress,
1689  * prepare to block on devfsadm either completing or
1690  * constructing the desired node.  As devfsadmd is global
1691  * in scope, constructing all necessary nodes, we only
1692  * need to initiate it once.
1693  */
1694 static int
1695 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1696 {
1697 	int error = 0;
1698 
1699 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1700 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1701 		    ddv->sdev_name, nm, devfsadm_state));
1702 		mutex_enter(&dv->sdev_lookup_lock);
1703 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1704 		mutex_exit(&dv->sdev_lookup_lock);
1705 		error = 0;
1706 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1707 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1708 		    ddv->sdev_name, nm, devfsadm_state));
1709 
1710 		sdev_devfsadmd_thread(ddv, dv, kcred);
1711 		mutex_enter(&dv->sdev_lookup_lock);
1712 		SDEV_BLOCK_OTHERS(dv,
1713 		    (SDEV_LOOKUP | SDEV_LGWAITING));
1714 		mutex_exit(&dv->sdev_lookup_lock);
1715 		error = 0;
1716 	} else {
1717 		error = -1;
1718 	}
1719 
1720 	return (error);
1721 }
1722 
1723 /*
1724  *  Support for specialized device naming construction mechanisms
1725  */
1726 static int
1727 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1728     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1729     void *, char *), int flags, struct cred *cred)
1730 {
1731 	int rv = 0;
1732 	char *physpath = NULL;
1733 	struct vattr vattr;
1734 	struct vattr *vap;
1735 	struct sdev_node *dv = NULL;
1736 
1737 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1738 	if (flags & SDEV_VLINK) {
1739 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1740 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1741 		    NULL);
1742 		if (rv) {
1743 			kmem_free(physpath, MAXPATHLEN);
1744 			return (-1);
1745 		}
1746 
1747 		vap = sdev_getdefault_attr(VLNK);
1748 		vap->va_size = strlen(physpath);
1749 		gethrestime(&vap->va_atime);
1750 		vap->va_mtime = vap->va_atime;
1751 		vap->va_ctime = vap->va_atime;
1752 
1753 		rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1754 		    (void *)physpath, cred, SDEV_READY);
1755 		kmem_free(physpath, MAXPATHLEN);
1756 		if (rv)
1757 			return (rv);
1758 	} else if (flags & SDEV_VATTR) {
1759 		/*
1760 		 * /dev/pts
1761 		 *
1762 		 * callback is responsible to set the basic attributes,
1763 		 * e.g. va_type/va_uid/va_gid/
1764 		 *    dev_t if VCHR or VBLK/
1765 		 */
1766 		ASSERT(callback);
1767 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1768 		if (rv) {
1769 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1770 			    "callback failed \n"));
1771 			return (-1);
1772 		}
1773 
1774 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1775 		    cred, SDEV_READY);
1776 
1777 		if (rv)
1778 			return (rv);
1779 
1780 	} else {
1781 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
1782 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1783 		    __LINE__));
1784 		rv = -1;
1785 	}
1786 
1787 	*dvp = dv;
1788 	return (rv);
1789 }
1790 
1791 static int
1792 is_devfsadm_thread(char *exec_name)
1793 {
1794 	/*
1795 	 * note: because devfsadmd -> /usr/sbin/devfsadm
1796 	 * it is safe to use "devfsadm" to capture the lookups
1797 	 * from devfsadm and its daemon version.
1798 	 */
1799 	if (strcmp(exec_name, "devfsadm") == 0)
1800 		return (1);
1801 	return (0);
1802 }
1803 
1804 /*
1805  * Lookup Order:
1806  *	sdev_node cache;
1807  *	backing store (SDEV_PERSIST);
1808  *	DBNR: a. dir_ops implemented in the loadable modules;
1809  *	      b. vnode ops in vtab.
1810  */
1811 int
1812 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1813     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1814     struct cred *, void *, char *), int flags)
1815 {
1816 	int rv = 0, nmlen;
1817 	struct vnode *rvp = NULL;
1818 	struct sdev_node *dv = NULL;
1819 	int	retried = 0;
1820 	int	error = 0;
1821 	struct vattr vattr;
1822 	char *lookup_thread = curproc->p_user.u_comm;
1823 	int failed_flags = 0;
1824 	int (*vtor)(struct sdev_node *) = NULL;
1825 	int state;
1826 	int parent_state;
1827 	char *link = NULL;
1828 
1829 	if (SDEVTOV(ddv)->v_type != VDIR)
1830 		return (ENOTDIR);
1831 
1832 	/*
1833 	 * Empty name or ., return node itself.
1834 	 */
1835 	nmlen = strlen(nm);
1836 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1837 		*vpp = SDEVTOV(ddv);
1838 		VN_HOLD(*vpp);
1839 		return (0);
1840 	}
1841 
1842 	/*
1843 	 * .., return the parent directory
1844 	 */
1845 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1846 		*vpp = SDEVTOV(ddv->sdev_dotdot);
1847 		VN_HOLD(*vpp);
1848 		return (0);
1849 	}
1850 
1851 	rw_enter(&ddv->sdev_contents, RW_READER);
1852 	if (ddv->sdev_flags & SDEV_VTOR) {
1853 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1854 		ASSERT(vtor);
1855 	}
1856 
1857 tryagain:
1858 	/*
1859 	 * (a) directory cache lookup:
1860 	 */
1861 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1862 	parent_state = ddv->sdev_state;
1863 	dv = sdev_cache_lookup(ddv, nm);
1864 	if (dv) {
1865 		state = dv->sdev_state;
1866 		switch (state) {
1867 		case SDEV_INIT:
1868 			if (is_devfsadm_thread(lookup_thread))
1869 				break;
1870 
1871 			/* ZOMBIED parent won't allow node creation */
1872 			if (parent_state == SDEV_ZOMBIE) {
1873 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
1874 				    retried);
1875 				goto nolock_notfound;
1876 			}
1877 
1878 			mutex_enter(&dv->sdev_lookup_lock);
1879 			/* compensate the threads started after devfsadm */
1880 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1881 			    !(SDEV_IS_LOOKUP(dv)))
1882 				SDEV_BLOCK_OTHERS(dv,
1883 				    (SDEV_LOOKUP | SDEV_LGWAITING));
1884 
1885 			if (SDEV_IS_LOOKUP(dv)) {
1886 				failed_flags |= SLF_REBUILT;
1887 				rw_exit(&ddv->sdev_contents);
1888 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1889 				mutex_exit(&dv->sdev_lookup_lock);
1890 				rw_enter(&ddv->sdev_contents, RW_READER);
1891 
1892 				if (error != 0) {
1893 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1894 					    retried);
1895 					goto nolock_notfound;
1896 				}
1897 
1898 				state = dv->sdev_state;
1899 				if (state == SDEV_INIT) {
1900 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1901 					    retried);
1902 					goto nolock_notfound;
1903 				} else if (state == SDEV_READY) {
1904 					goto found;
1905 				} else if (state == SDEV_ZOMBIE) {
1906 					rw_exit(&ddv->sdev_contents);
1907 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1908 					    retried);
1909 					SDEV_RELE(dv);
1910 					goto lookup_failed;
1911 				}
1912 			} else {
1913 				mutex_exit(&dv->sdev_lookup_lock);
1914 			}
1915 			break;
1916 		case SDEV_READY:
1917 			goto found;
1918 		case SDEV_ZOMBIE:
1919 			rw_exit(&ddv->sdev_contents);
1920 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1921 			SDEV_RELE(dv);
1922 			goto lookup_failed;
1923 		default:
1924 			rw_exit(&ddv->sdev_contents);
1925 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1926 			sdev_lookup_failed(ddv, nm, failed_flags);
1927 			*vpp = NULLVP;
1928 			return (ENOENT);
1929 		}
1930 	}
1931 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1932 
1933 	/*
1934 	 * ZOMBIED parent does not allow new node creation.
1935 	 * bail out early
1936 	 */
1937 	if (parent_state == SDEV_ZOMBIE) {
1938 		rw_exit(&ddv->sdev_contents);
1939 		*vpp = NULLVP;
1940 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1941 		return (ENOENT);
1942 	}
1943 
1944 	/*
1945 	 * (b0): backing store lookup
1946 	 *	SDEV_PERSIST is default except:
1947 	 *		1) pts nodes
1948 	 *		2) non-chmod'ed local nodes
1949 	 *		3) zvol nodes
1950 	 */
1951 	if (SDEV_IS_PERSIST(ddv)) {
1952 		error = devname_backstore_lookup(ddv, nm, &rvp);
1953 
1954 		if (!error) {
1955 
1956 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1957 			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
1958 			if (error) {
1959 				rw_exit(&ddv->sdev_contents);
1960 				if (dv)
1961 					SDEV_RELE(dv);
1962 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1963 				sdev_lookup_failed(ddv, nm, failed_flags);
1964 				*vpp = NULLVP;
1965 				return (ENOENT);
1966 			}
1967 
1968 			if (vattr.va_type == VLNK) {
1969 				error = sdev_getlink(rvp, &link);
1970 				if (error) {
1971 					rw_exit(&ddv->sdev_contents);
1972 					if (dv)
1973 						SDEV_RELE(dv);
1974 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1975 					    retried);
1976 					sdev_lookup_failed(ddv, nm,
1977 					    failed_flags);
1978 					*vpp = NULLVP;
1979 					return (ENOENT);
1980 				}
1981 				ASSERT(link != NULL);
1982 			}
1983 
1984 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1985 				rw_exit(&ddv->sdev_contents);
1986 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1987 			}
1988 			error = sdev_mknode(ddv, nm, &dv, &vattr,
1989 			    rvp, link, cred, SDEV_READY);
1990 			rw_downgrade(&ddv->sdev_contents);
1991 
1992 			if (link != NULL) {
1993 				kmem_free(link, strlen(link) + 1);
1994 				link = NULL;
1995 			}
1996 
1997 			if (error) {
1998 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1999 				rw_exit(&ddv->sdev_contents);
2000 				if (dv)
2001 					SDEV_RELE(dv);
2002 				goto lookup_failed;
2003 			} else {
2004 				goto found;
2005 			}
2006 		} else if (retried) {
2007 			rw_exit(&ddv->sdev_contents);
2008 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2009 			    ddv->sdev_name, nm));
2010 			if (dv)
2011 				SDEV_RELE(dv);
2012 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2013 			sdev_lookup_failed(ddv, nm, failed_flags);
2014 			*vpp = NULLVP;
2015 			return (ENOENT);
2016 		}
2017 	}
2018 
2019 lookup_create_node:
2020 	/* first thread that is doing the lookup on this node */
2021 	if (callback) {
2022 		ASSERT(dv == NULL);
2023 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2024 			rw_exit(&ddv->sdev_contents);
2025 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2026 		}
2027 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2028 		    flags, cred);
2029 		rw_downgrade(&ddv->sdev_contents);
2030 		if (error == 0) {
2031 			goto found;
2032 		} else {
2033 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2034 			rw_exit(&ddv->sdev_contents);
2035 			goto lookup_failed;
2036 		}
2037 	}
2038 	if (!dv) {
2039 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2040 			rw_exit(&ddv->sdev_contents);
2041 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2042 		}
2043 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2044 		    cred, SDEV_INIT);
2045 		if (!dv) {
2046 			rw_exit(&ddv->sdev_contents);
2047 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2048 			sdev_lookup_failed(ddv, nm, failed_flags);
2049 			*vpp = NULLVP;
2050 			return (ENOENT);
2051 		}
2052 		rw_downgrade(&ddv->sdev_contents);
2053 	}
2054 
2055 	/*
2056 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2057 	 */
2058 	ASSERT(SDEV_HELD(dv));
2059 
2060 	if (SDEV_IS_NO_NCACHE(dv))
2061 		failed_flags |= SLF_NO_NCACHE;
2062 	if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2063 	    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2064 	    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2065 		ASSERT(SDEV_HELD(dv));
2066 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2067 		goto nolock_notfound;
2068 	}
2069 
2070 	/*
2071 	 * filter out known non-existent devices recorded
2072 	 * during initial reconfiguration boot for which
2073 	 * reconfig should not be done and lookup may
2074 	 * be short-circuited now.
2075 	 */
2076 	if (sdev_lookup_filter(ddv, nm)) {
2077 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2078 		goto nolock_notfound;
2079 	}
2080 
2081 	/* bypassing devfsadm internal nodes */
2082 	if (is_devfsadm_thread(lookup_thread)) {
2083 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2084 		goto nolock_notfound;
2085 	}
2086 
2087 	if (sdev_reconfig_disable) {
2088 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2089 		goto nolock_notfound;
2090 	}
2091 
2092 	error = sdev_call_devfsadmd(ddv, dv, nm);
2093 	if (error == 0) {
2094 		sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2095 		    ddv->sdev_name, nm, curproc->p_user.u_comm));
2096 		if (sdev_reconfig_verbose) {
2097 			cmn_err(CE_CONT,
2098 			    "?lookup of %s/%s by %s: reconfig\n",
2099 			    ddv->sdev_name, nm, curproc->p_user.u_comm);
2100 		}
2101 		retried = 1;
2102 		failed_flags |= SLF_REBUILT;
2103 		ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2104 		SDEV_SIMPLE_RELE(dv);
2105 		goto tryagain;
2106 	} else {
2107 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2108 		goto nolock_notfound;
2109 	}
2110 
2111 found:
2112 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2113 	ASSERT(dv->sdev_state == SDEV_READY);
2114 	if (vtor) {
2115 		/*
2116 		 * Check validity of returned node
2117 		 */
2118 		switch (vtor(dv)) {
2119 		case SDEV_VTOR_VALID:
2120 			break;
2121 		case SDEV_VTOR_STALE:
2122 			/*
2123 			 * The name exists, but the cache entry is
2124 			 * stale and needs to be re-created.
2125 			 */
2126 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2127 			if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2128 				rw_exit(&ddv->sdev_contents);
2129 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2130 			}
2131 			error = sdev_cache_update(ddv, &dv, nm,
2132 			    SDEV_CACHE_DELETE);
2133 			rw_downgrade(&ddv->sdev_contents);
2134 			if (error == 0) {
2135 				dv = NULL;
2136 				goto lookup_create_node;
2137 			}
2138 			/* FALLTHRU */
2139 		case SDEV_VTOR_INVALID:
2140 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2141 			sdcmn_err7(("lookup: destroy invalid "
2142 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2143 			goto nolock_notfound;
2144 		case SDEV_VTOR_SKIP:
2145 			sdcmn_err7(("lookup: node not applicable - "
2146 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2147 			rw_exit(&ddv->sdev_contents);
2148 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2149 			SDEV_RELE(dv);
2150 			goto lookup_failed;
2151 		default:
2152 			cmn_err(CE_PANIC,
2153 			    "dev fs: validator failed: %s(%p)\n",
2154 			    dv->sdev_name, (void *)dv);
2155 			break;
2156 		}
2157 	}
2158 
2159 	rw_exit(&ddv->sdev_contents);
2160 	rv = sdev_to_vp(dv, vpp);
2161 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2162 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2163 	    dv->sdev_state, nm, rv));
2164 	return (rv);
2165 
2166 nolock_notfound:
2167 	/*
2168 	 * Destroy the node that is created for synchronization purposes.
2169 	 */
2170 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2171 	    nm, dv->sdev_state));
2172 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2173 	if (dv->sdev_state == SDEV_INIT) {
2174 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2175 			rw_exit(&ddv->sdev_contents);
2176 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2177 		}
2178 
2179 		/*
2180 		 * Node state may have changed during the lock
2181 		 * changes. Re-check.
2182 		 */
2183 		if (dv->sdev_state == SDEV_INIT) {
2184 			(void) sdev_dirdelete(ddv, dv);
2185 			rw_exit(&ddv->sdev_contents);
2186 			sdev_lookup_failed(ddv, nm, failed_flags);
2187 			*vpp = NULL;
2188 			return (ENOENT);
2189 		}
2190 	}
2191 
2192 	rw_exit(&ddv->sdev_contents);
2193 	SDEV_RELE(dv);
2194 
2195 lookup_failed:
2196 	sdev_lookup_failed(ddv, nm, failed_flags);
2197 	*vpp = NULL;
2198 	return (ENOENT);
2199 }
2200 
2201 /*
2202  * Given a directory node, mark all nodes beneath as
2203  * STALE, i.e. nodes that don't exist as far as new
2204  * consumers are concerned.  Remove them from the
2205  * list of directory entries so that no lookup or
2206  * directory traversal will find them.  The node
2207  * not deallocated so existing holds are not affected.
2208  */
2209 void
2210 sdev_stale(struct sdev_node *ddv)
2211 {
2212 	struct sdev_node *dv;
2213 	struct vnode *vp;
2214 
2215 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2216 
2217 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2218 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) {
2219 		vp = SDEVTOV(dv);
2220 		if (vp->v_type == VDIR)
2221 			sdev_stale(dv);
2222 
2223 		sdcmn_err9(("sdev_stale: setting stale %s\n",
2224 		    dv->sdev_path));
2225 		dv->sdev_flags |= SDEV_STALE;
2226 		avl_remove(&ddv->sdev_entries, dv);
2227 	}
2228 	ddv->sdev_flags |= SDEV_BUILD;
2229 	rw_exit(&ddv->sdev_contents);
2230 }
2231 
2232 /*
2233  * Given a directory node, clean out all the nodes beneath.
2234  * If expr is specified, clean node with names matching expr.
2235  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2236  *	so they are excluded from future lookups.
2237  */
2238 int
2239 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2240 {
2241 	int error = 0;
2242 	int busy = 0;
2243 	struct vnode *vp;
2244 	struct sdev_node *dv, *next = NULL;
2245 	int bkstore = 0;
2246 	int len = 0;
2247 	char *bks_name = NULL;
2248 
2249 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2250 
2251 	/*
2252 	 * We try our best to destroy all unused sdev_node's
2253 	 */
2254 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2255 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
2256 		next = SDEV_NEXT_ENTRY(ddv, dv);
2257 		vp = SDEVTOV(dv);
2258 
2259 		if (expr && gmatch(dv->sdev_name, expr) == 0)
2260 			continue;
2261 
2262 		if (vp->v_type == VDIR &&
2263 		    sdev_cleandir(dv, NULL, flags) != 0) {
2264 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2265 			    dv->sdev_name));
2266 			busy++;
2267 			continue;
2268 		}
2269 
2270 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2271 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2272 			    dv->sdev_name));
2273 			busy++;
2274 			continue;
2275 		}
2276 
2277 		/*
2278 		 * at this point, either dv is not held or SDEV_ENFORCE
2279 		 * is specified. In either case, dv needs to be deleted
2280 		 */
2281 		SDEV_HOLD(dv);
2282 
2283 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2284 		if (bkstore && (vp->v_type == VDIR))
2285 			bkstore += 1;
2286 
2287 		if (bkstore) {
2288 			len = strlen(dv->sdev_name) + 1;
2289 			bks_name = kmem_alloc(len, KM_SLEEP);
2290 			bcopy(dv->sdev_name, bks_name, len);
2291 		}
2292 
2293 		error = sdev_dirdelete(ddv, dv);
2294 
2295 		if (error == EBUSY) {
2296 			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2297 			busy++;
2298 		}
2299 
2300 		/* take care the backing store clean up */
2301 		if (bkstore && (error == 0)) {
2302 			ASSERT(bks_name);
2303 			ASSERT(ddv->sdev_attrvp);
2304 
2305 			if (bkstore == 1) {
2306 				error = VOP_REMOVE(ddv->sdev_attrvp,
2307 				    bks_name, kcred, NULL, 0);
2308 			} else if (bkstore == 2) {
2309 				error = VOP_RMDIR(ddv->sdev_attrvp,
2310 				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2311 			}
2312 
2313 			/* do not propagate the backing store errors */
2314 			if (error) {
2315 				sdcmn_err9(("sdev_cleandir: backing store"
2316 				    "not cleaned\n"));
2317 				error = 0;
2318 			}
2319 
2320 			bkstore = 0;
2321 			kmem_free(bks_name, len);
2322 			bks_name = NULL;
2323 			len = 0;
2324 		}
2325 	}
2326 
2327 	ddv->sdev_flags |= SDEV_BUILD;
2328 	rw_exit(&ddv->sdev_contents);
2329 
2330 	if (busy) {
2331 		error = EBUSY;
2332 	}
2333 
2334 	return (error);
2335 }
2336 
2337 /*
2338  * a convenient wrapper for readdir() funcs
2339  */
2340 size_t
2341 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2342 {
2343 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2344 	if (reclen > size)
2345 		return (0);
2346 
2347 	de->d_ino = (ino64_t)ino;
2348 	de->d_off = (off64_t)off + 1;
2349 	de->d_reclen = (ushort_t)reclen;
2350 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2351 	return (reclen);
2352 }
2353 
2354 /*
2355  * sdev_mount service routines
2356  */
2357 int
2358 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2359 {
2360 	int	error;
2361 
2362 	if (uap->datalen != sizeof (*args))
2363 		return (EINVAL);
2364 
2365 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2366 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2367 		    "get user data. error %d\n", error);
2368 		return (EFAULT);
2369 	}
2370 
2371 	return (0);
2372 }
2373 
2374 #ifdef nextdp
2375 #undef nextdp
2376 #endif
2377 #define	nextdp(dp)	((struct dirent64 *) \
2378 			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2379 
2380 /*
2381  * readdir helper func
2382  */
2383 int
2384 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2385     int flags)
2386 {
2387 	struct sdev_node *ddv = VTOSDEV(vp);
2388 	struct sdev_node *dv;
2389 	dirent64_t	*dp;
2390 	ulong_t		outcount = 0;
2391 	size_t		namelen;
2392 	ulong_t		alloc_count;
2393 	void		*outbuf;
2394 	struct iovec	*iovp;
2395 	int		error = 0;
2396 	size_t		reclen;
2397 	offset_t	diroff;
2398 	offset_t	soff;
2399 	int		this_reclen;
2400 	int (*vtor)(struct sdev_node *) = NULL;
2401 	struct vattr attr;
2402 	timestruc_t now;
2403 
2404 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2405 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2406 
2407 	if (uiop->uio_loffset >= MAXOFF_T) {
2408 		if (eofp)
2409 			*eofp = 1;
2410 		return (0);
2411 	}
2412 
2413 	if (uiop->uio_iovcnt != 1)
2414 		return (EINVAL);
2415 
2416 	if (vp->v_type != VDIR)
2417 		return (ENOTDIR);
2418 
2419 	if (ddv->sdev_flags & SDEV_VTOR) {
2420 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2421 		ASSERT(vtor);
2422 	}
2423 
2424 	if (eofp != NULL)
2425 		*eofp = 0;
2426 
2427 	soff = uiop->uio_loffset;
2428 	iovp = uiop->uio_iov;
2429 	alloc_count = iovp->iov_len;
2430 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2431 	outcount = 0;
2432 
2433 	if (ddv->sdev_state == SDEV_ZOMBIE)
2434 		goto get_cache;
2435 
2436 	if (SDEV_IS_GLOBAL(ddv)) {
2437 
2438 		if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2439 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2440 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2441 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2442 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2443 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2444 		    !sdev_reconfig_disable) {
2445 			/*
2446 			 * invoking "devfsadm" to do system device reconfig
2447 			 */
2448 			mutex_enter(&ddv->sdev_lookup_lock);
2449 			SDEV_BLOCK_OTHERS(ddv,
2450 			    (SDEV_READDIR|SDEV_LGWAITING));
2451 			mutex_exit(&ddv->sdev_lookup_lock);
2452 
2453 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2454 			    ddv->sdev_path, curproc->p_user.u_comm));
2455 			if (sdev_reconfig_verbose) {
2456 				cmn_err(CE_CONT,
2457 				    "?readdir of %s by %s: reconfig\n",
2458 				    ddv->sdev_path, curproc->p_user.u_comm);
2459 			}
2460 
2461 			sdev_devfsadmd_thread(ddv, NULL, kcred);
2462 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2463 			/*
2464 			 * compensate the "ls" started later than "devfsadm"
2465 			 */
2466 			mutex_enter(&ddv->sdev_lookup_lock);
2467 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2468 			mutex_exit(&ddv->sdev_lookup_lock);
2469 		}
2470 
2471 		/*
2472 		 * release the contents lock so that
2473 		 * the cache may be updated by devfsadmd
2474 		 */
2475 		rw_exit(&ddv->sdev_contents);
2476 		mutex_enter(&ddv->sdev_lookup_lock);
2477 		if (SDEV_IS_READDIR(ddv))
2478 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2479 		mutex_exit(&ddv->sdev_lookup_lock);
2480 		rw_enter(&ddv->sdev_contents, RW_READER);
2481 
2482 		sdcmn_err4(("readdir of directory %s by %s\n",
2483 		    ddv->sdev_name, curproc->p_user.u_comm));
2484 		if (ddv->sdev_flags & SDEV_BUILD) {
2485 			if (SDEV_IS_PERSIST(ddv)) {
2486 				error = sdev_filldir_from_store(ddv,
2487 				    alloc_count, cred);
2488 			}
2489 			ddv->sdev_flags &= ~SDEV_BUILD;
2490 		}
2491 	}
2492 
2493 get_cache:
2494 	/* handle "." and ".." */
2495 	diroff = 0;
2496 	if (soff == 0) {
2497 		/* first time */
2498 		this_reclen = DIRENT64_RECLEN(1);
2499 		if (alloc_count < this_reclen) {
2500 			error = EINVAL;
2501 			goto done;
2502 		}
2503 
2504 		dp->d_ino = (ino64_t)ddv->sdev_ino;
2505 		dp->d_off = (off64_t)1;
2506 		dp->d_reclen = (ushort_t)this_reclen;
2507 
2508 		(void) strncpy(dp->d_name, ".",
2509 		    DIRENT64_NAMELEN(this_reclen));
2510 		outcount += dp->d_reclen;
2511 		dp = nextdp(dp);
2512 	}
2513 
2514 	diroff++;
2515 	if (soff <= 1) {
2516 		this_reclen = DIRENT64_RECLEN(2);
2517 		if (alloc_count < outcount + this_reclen) {
2518 			error = EINVAL;
2519 			goto done;
2520 		}
2521 
2522 		dp->d_reclen = (ushort_t)this_reclen;
2523 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2524 		dp->d_off = (off64_t)2;
2525 
2526 		(void) strncpy(dp->d_name, "..",
2527 		    DIRENT64_NAMELEN(this_reclen));
2528 		outcount += dp->d_reclen;
2529 
2530 		dp = nextdp(dp);
2531 	}
2532 
2533 
2534 	/* gets the cache */
2535 	diroff++;
2536 	for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2537 	    dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2538 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2539 		    diroff, soff, dv->sdev_name));
2540 
2541 		/* bypassing pre-matured nodes */
2542 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2543 			sdcmn_err3(("sdev_readdir: pre-mature node  "
2544 			    "%s %d\n", dv->sdev_name, dv->sdev_state));
2545 			continue;
2546 		}
2547 
2548 		/*
2549 		 * Check validity of node
2550 		 */
2551 		if (vtor) {
2552 			switch (vtor(dv)) {
2553 			case SDEV_VTOR_VALID:
2554 				break;
2555 			case SDEV_VTOR_INVALID:
2556 			case SDEV_VTOR_SKIP:
2557 				continue;
2558 			default:
2559 				cmn_err(CE_PANIC,
2560 				    "dev fs: validator failed: %s(%p)\n",
2561 				    dv->sdev_name, (void *)dv);
2562 				break;
2563 			/*NOTREACHED*/
2564 			}
2565 		}
2566 
2567 		namelen = strlen(dv->sdev_name);
2568 		reclen = DIRENT64_RECLEN(namelen);
2569 		if (outcount + reclen > alloc_count) {
2570 			goto full;
2571 		}
2572 		dp->d_reclen = (ushort_t)reclen;
2573 		dp->d_ino = (ino64_t)dv->sdev_ino;
2574 		dp->d_off = (off64_t)diroff + 1;
2575 		(void) strncpy(dp->d_name, dv->sdev_name,
2576 		    DIRENT64_NAMELEN(reclen));
2577 		outcount += reclen;
2578 		dp = nextdp(dp);
2579 	}
2580 
2581 full:
2582 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2583 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2584 	    (void *)dv));
2585 
2586 	if (outcount)
2587 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2588 
2589 	if (!error) {
2590 		uiop->uio_loffset = diroff;
2591 		if (eofp)
2592 			*eofp = dv ? 0 : 1;
2593 	}
2594 
2595 
2596 	if (ddv->sdev_attrvp) {
2597 		gethrestime(&now);
2598 		attr.va_ctime = now;
2599 		attr.va_atime = now;
2600 		attr.va_mask = AT_CTIME|AT_ATIME;
2601 
2602 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2603 	}
2604 done:
2605 	kmem_free(outbuf, alloc_count);
2606 	return (error);
2607 }
2608 
2609 static int
2610 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2611 {
2612 	vnode_t *vp;
2613 	vnode_t *cvp;
2614 	struct sdev_node *svp;
2615 	char *nm;
2616 	struct pathname pn;
2617 	int error;
2618 	int persisted = 0;
2619 
2620 	ASSERT(INGLOBALZONE(curproc));
2621 
2622 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2623 		return (error);
2624 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2625 
2626 	vp = rootdir;
2627 	VN_HOLD(vp);
2628 
2629 	while (pn_pathleft(&pn)) {
2630 		ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2631 		(void) pn_getcomponent(&pn, nm);
2632 
2633 		/*
2634 		 * Deal with the .. special case where we may be
2635 		 * traversing up across a mount point, to the
2636 		 * root of this filesystem or global root.
2637 		 */
2638 		if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2639 checkforroot:
2640 			if (VN_CMP(vp, rootdir)) {
2641 				nm[1] = 0;
2642 			} else if (vp->v_flag & VROOT) {
2643 				vfs_t *vfsp;
2644 				cvp = vp;
2645 				vfsp = cvp->v_vfsp;
2646 				vfs_rlock_wait(vfsp);
2647 				vp = cvp->v_vfsp->vfs_vnodecovered;
2648 				if (vp == NULL ||
2649 				    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2650 					vfs_unlock(vfsp);
2651 					VN_RELE(cvp);
2652 					error = EIO;
2653 					break;
2654 				}
2655 				VN_HOLD(vp);
2656 				vfs_unlock(vfsp);
2657 				VN_RELE(cvp);
2658 				cvp = NULL;
2659 				goto checkforroot;
2660 			}
2661 		}
2662 
2663 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2664 		    NULL, NULL);
2665 		if (error) {
2666 			VN_RELE(vp);
2667 			break;
2668 		}
2669 
2670 		/* traverse mount points encountered on our journey */
2671 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2672 			VN_RELE(vp);
2673 			VN_RELE(cvp);
2674 			break;
2675 		}
2676 
2677 		/*
2678 		 * symbolic link, can be either relative and absolute
2679 		 */
2680 		if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2681 			struct pathname linkpath;
2682 			pn_alloc(&linkpath);
2683 			if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2684 				pn_free(&linkpath);
2685 				break;
2686 			}
2687 			if (pn_pathleft(&linkpath) == 0)
2688 				(void) pn_set(&linkpath, ".");
2689 			error = pn_insert(&pn, &linkpath, strlen(nm));
2690 			pn_free(&linkpath);
2691 			if (pn.pn_pathlen == 0) {
2692 				VN_RELE(vp);
2693 				return (ENOENT);
2694 			}
2695 			if (pn.pn_path[0] == '/') {
2696 				pn_skipslash(&pn);
2697 				VN_RELE(vp);
2698 				VN_RELE(cvp);
2699 				vp = rootdir;
2700 				VN_HOLD(vp);
2701 			} else {
2702 				VN_RELE(cvp);
2703 			}
2704 			continue;
2705 		}
2706 
2707 		VN_RELE(vp);
2708 
2709 		/*
2710 		 * Direct the operation to the persisting filesystem
2711 		 * underlying /dev.  Bail if we encounter a
2712 		 * non-persistent dev entity here.
2713 		 */
2714 		if (cvp->v_vfsp->vfs_fstype == devtype) {
2715 
2716 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2717 				error = ENOENT;
2718 				VN_RELE(cvp);
2719 				break;
2720 			}
2721 
2722 			if (VTOSDEV(cvp) == NULL) {
2723 				error = ENOENT;
2724 				VN_RELE(cvp);
2725 				break;
2726 			}
2727 			svp = VTOSDEV(cvp);
2728 			if ((vp = svp->sdev_attrvp) == NULL) {
2729 				error = ENOENT;
2730 				VN_RELE(cvp);
2731 				break;
2732 			}
2733 			persisted = 1;
2734 			VN_HOLD(vp);
2735 			VN_RELE(cvp);
2736 			cvp = vp;
2737 		}
2738 
2739 		vp = cvp;
2740 		pn_skipslash(&pn);
2741 	}
2742 
2743 	kmem_free(nm, MAXNAMELEN);
2744 	pn_free(&pn);
2745 
2746 	if (error)
2747 		return (error);
2748 
2749 	/*
2750 	 * Only return persisted nodes in the filesystem underlying /dev.
2751 	 */
2752 	if (!persisted) {
2753 		VN_RELE(vp);
2754 		return (ENOENT);
2755 	}
2756 
2757 	*r_vp = vp;
2758 	return (0);
2759 }
2760 
2761 int
2762 sdev_modctl_readdir(const char *dir, char ***dirlistp,
2763 	int *npathsp, int *npathsp_alloc, int checking_empty)
2764 {
2765 	char	**pathlist = NULL;
2766 	char	**newlist = NULL;
2767 	int	npaths = 0;
2768 	int	npaths_alloc = 0;
2769 	dirent64_t *dbuf = NULL;
2770 	int	n;
2771 	char	*s;
2772 	int error;
2773 	vnode_t *vp;
2774 	int eof;
2775 	struct iovec iov;
2776 	struct uio uio;
2777 	struct dirent64 *dp;
2778 	size_t dlen;
2779 	size_t dbuflen;
2780 	int ndirents = 64;
2781 	char *nm;
2782 
2783 	error = sdev_modctl_lookup(dir, &vp);
2784 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2785 	    dir, curproc->p_user.u_comm,
2786 	    (error == 0) ? "ok" : "failed"));
2787 	if (error)
2788 		return (error);
2789 
2790 	dlen = ndirents * (sizeof (*dbuf));
2791 	dbuf = kmem_alloc(dlen, KM_SLEEP);
2792 
2793 	uio.uio_iov = &iov;
2794 	uio.uio_iovcnt = 1;
2795 	uio.uio_segflg = UIO_SYSSPACE;
2796 	uio.uio_fmode = 0;
2797 	uio.uio_extflg = UIO_COPY_CACHED;
2798 	uio.uio_loffset = 0;
2799 	uio.uio_llimit = MAXOFFSET_T;
2800 
2801 	eof = 0;
2802 	error = 0;
2803 	while (!error && !eof) {
2804 		uio.uio_resid = dlen;
2805 		iov.iov_base = (char *)dbuf;
2806 		iov.iov_len = dlen;
2807 
2808 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2809 		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2810 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2811 
2812 		dbuflen = dlen - uio.uio_resid;
2813 
2814 		if (error || dbuflen == 0)
2815 			break;
2816 
2817 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2818 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2819 
2820 			nm = dp->d_name;
2821 
2822 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2823 				continue;
2824 			if (npaths == npaths_alloc) {
2825 				npaths_alloc += 64;
2826 				newlist = (char **)
2827 				    kmem_zalloc((npaths_alloc + 1) *
2828 				    sizeof (char *), KM_SLEEP);
2829 				if (pathlist) {
2830 					bcopy(pathlist, newlist,
2831 					    npaths * sizeof (char *));
2832 					kmem_free(pathlist,
2833 					    (npaths + 1) * sizeof (char *));
2834 				}
2835 				pathlist = newlist;
2836 			}
2837 			n = strlen(nm) + 1;
2838 			s = kmem_alloc(n, KM_SLEEP);
2839 			bcopy(nm, s, n);
2840 			pathlist[npaths++] = s;
2841 			sdcmn_err11(("  %s/%s\n", dir, s));
2842 
2843 			/* if checking empty, one entry is as good as many */
2844 			if (checking_empty) {
2845 				eof = 1;
2846 				break;
2847 			}
2848 		}
2849 	}
2850 
2851 exit:
2852 	VN_RELE(vp);
2853 
2854 	if (dbuf)
2855 		kmem_free(dbuf, dlen);
2856 
2857 	if (error)
2858 		return (error);
2859 
2860 	*dirlistp = pathlist;
2861 	*npathsp = npaths;
2862 	*npathsp_alloc = npaths_alloc;
2863 
2864 	return (0);
2865 }
2866 
2867 void
2868 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2869 {
2870 	int	i, n;
2871 
2872 	for (i = 0; i < npaths; i++) {
2873 		n = strlen(pathlist[i]) + 1;
2874 		kmem_free(pathlist[i], n);
2875 	}
2876 
2877 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2878 }
2879 
2880 int
2881 sdev_modctl_devexists(const char *path)
2882 {
2883 	vnode_t *vp;
2884 	int error;
2885 
2886 	error = sdev_modctl_lookup(path, &vp);
2887 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2888 	    path, curproc->p_user.u_comm,
2889 	    (error == 0) ? "ok" : "failed"));
2890 	if (error == 0)
2891 		VN_RELE(vp);
2892 
2893 	return (error);
2894 }
2895 
2896 extern int sdev_vnodeops_tbl_size;
2897 
2898 /*
2899  * construct a new template with overrides from vtab
2900  */
2901 static fs_operation_def_t *
2902 sdev_merge_vtab(const fs_operation_def_t tab[])
2903 {
2904 	fs_operation_def_t *new;
2905 	const fs_operation_def_t *tab_entry;
2906 
2907 	/* make a copy of standard vnode ops table */
2908 	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
2909 	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
2910 
2911 	/* replace the overrides from tab */
2912 	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
2913 		fs_operation_def_t *std_entry = new;
2914 		while (std_entry->name) {
2915 			if (strcmp(tab_entry->name, std_entry->name) == 0) {
2916 				std_entry->func = tab_entry->func;
2917 				break;
2918 			}
2919 			std_entry++;
2920 		}
2921 		if (std_entry->name == NULL)
2922 			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
2923 			    tab_entry->name);
2924 	}
2925 
2926 	return (new);
2927 }
2928 
2929 /* free memory allocated by sdev_merge_vtab */
2930 static void
2931 sdev_free_vtab(fs_operation_def_t *new)
2932 {
2933 	kmem_free(new, sdev_vnodeops_tbl_size);
2934 }
2935 
2936 /*
2937  * a generic setattr() function
2938  *
2939  * note: flags only supports AT_UID and AT_GID.
2940  *	 Future enhancements can be done for other types, e.g. AT_MODE
2941  */
2942 int
2943 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
2944     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
2945     int), int protocol)
2946 {
2947 	struct sdev_node	*dv = VTOSDEV(vp);
2948 	struct sdev_node	*parent = dv->sdev_dotdot;
2949 	struct vattr		*get;
2950 	uint_t			mask = vap->va_mask;
2951 	int 			error;
2952 
2953 	/* some sanity checks */
2954 	if (vap->va_mask & AT_NOSET)
2955 		return (EINVAL);
2956 
2957 	if (vap->va_mask & AT_SIZE) {
2958 		if (vp->v_type == VDIR) {
2959 			return (EISDIR);
2960 		}
2961 	}
2962 
2963 	/* no need to set attribute, but do not fail either */
2964 	ASSERT(parent);
2965 	rw_enter(&parent->sdev_contents, RW_READER);
2966 	if (dv->sdev_state == SDEV_ZOMBIE) {
2967 		rw_exit(&parent->sdev_contents);
2968 		return (0);
2969 	}
2970 
2971 	/* If backing store exists, just set it. */
2972 	if (dv->sdev_attrvp) {
2973 		rw_exit(&parent->sdev_contents);
2974 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
2975 	}
2976 
2977 	/*
2978 	 * Otherwise, for nodes with the persistence attribute, create it.
2979 	 */
2980 	ASSERT(dv->sdev_attr);
2981 	if (SDEV_IS_PERSIST(dv) ||
2982 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
2983 		sdev_vattr_merge(dv, vap);
2984 		rw_enter(&dv->sdev_contents, RW_WRITER);
2985 		error = sdev_shadow_node(dv, cred);
2986 		rw_exit(&dv->sdev_contents);
2987 		rw_exit(&parent->sdev_contents);
2988 
2989 		if (error)
2990 			return (error);
2991 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
2992 	}
2993 
2994 
2995 	/*
2996 	 * sdev_attr was allocated in sdev_mknode
2997 	 */
2998 	rw_enter(&dv->sdev_contents, RW_WRITER);
2999 	error = secpolicy_vnode_setattr(cred, vp, vap,
3000 	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
3001 	if (error) {
3002 		rw_exit(&dv->sdev_contents);
3003 		rw_exit(&parent->sdev_contents);
3004 		return (error);
3005 	}
3006 
3007 	get = dv->sdev_attr;
3008 	if (mask & AT_MODE) {
3009 		get->va_mode &= S_IFMT;
3010 		get->va_mode |= vap->va_mode & ~S_IFMT;
3011 	}
3012 
3013 	if ((mask & AT_UID) || (mask & AT_GID)) {
3014 		if (mask & AT_UID)
3015 			get->va_uid = vap->va_uid;
3016 		if (mask & AT_GID)
3017 			get->va_gid = vap->va_gid;
3018 		/*
3019 		 * a callback must be provided if the protocol is set
3020 		 */
3021 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3022 			ASSERT(callback);
3023 			error = callback(dv, get, protocol);
3024 			if (error) {
3025 				rw_exit(&dv->sdev_contents);
3026 				rw_exit(&parent->sdev_contents);
3027 				return (error);
3028 			}
3029 		}
3030 	}
3031 
3032 	if (mask & AT_ATIME)
3033 		get->va_atime = vap->va_atime;
3034 	if (mask & AT_MTIME)
3035 		get->va_mtime = vap->va_mtime;
3036 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3037 		gethrestime(&get->va_ctime);
3038 	}
3039 
3040 	sdev_vattr_merge(dv, get);
3041 	rw_exit(&dv->sdev_contents);
3042 	rw_exit(&parent->sdev_contents);
3043 	return (0);
3044 }
3045 
3046 /*
3047  * a generic inactive() function
3048  */
3049 /*ARGSUSED*/
3050 void
3051 devname_inactive_func(struct vnode *vp, struct cred *cred,
3052     void (*callback)(struct vnode *))
3053 {
3054 	int clean;
3055 	struct sdev_node *dv = VTOSDEV(vp);
3056 	struct sdev_node *ddv = dv->sdev_dotdot;
3057 	int state;
3058 
3059 	rw_enter(&ddv->sdev_contents, RW_WRITER);
3060 	state = dv->sdev_state;
3061 
3062 	mutex_enter(&vp->v_lock);
3063 	ASSERT(vp->v_count >= 1);
3064 
3065 	if (vp->v_count == 1 && callback != NULL)
3066 		callback(vp);
3067 
3068 	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3069 
3070 	/*
3071 	 * last ref count on the ZOMBIE node is released.
3072 	 * clean up the sdev_node, and
3073 	 * release the hold on the backing store node so that
3074 	 * the ZOMBIE backing stores also cleaned out.
3075 	 */
3076 	if (clean) {
3077 		ASSERT(ddv);
3078 
3079 		ddv->sdev_nlink--;
3080 		if (vp->v_type == VDIR) {
3081 			dv->sdev_nlink--;
3082 		}
3083 		if ((dv->sdev_flags & SDEV_STALE) == 0)
3084 			avl_remove(&ddv->sdev_entries, dv);
3085 		dv->sdev_nlink--;
3086 		--vp->v_count;
3087 		mutex_exit(&vp->v_lock);
3088 		sdev_nodedestroy(dv, 0);
3089 	} else {
3090 		--vp->v_count;
3091 		mutex_exit(&vp->v_lock);
3092 	}
3093 	rw_exit(&ddv->sdev_contents);
3094 }
3095