xref: /titanic_41/usr/src/uts/common/fs/dev/sdev_subr.c (revision 82629e3015252bf18319ba3815c773df23e21436)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * utility routines for the /dev fs
28  */
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/user.h>
36 #include <sys/time.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/flock.h>
42 #include <sys/kmem.h>
43 #include <sys/uio.h>
44 #include <sys/errno.h>
45 #include <sys/stat.h>
46 #include <sys/cred.h>
47 #include <sys/dirent.h>
48 #include <sys/pathname.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/mode.h>
52 #include <sys/policy.h>
53 #include <fs/fs_subr.h>
54 #include <sys/mount.h>
55 #include <sys/fs/snode.h>
56 #include <sys/fs/dv_node.h>
57 #include <sys/fs/sdev_impl.h>
58 #include <sys/sunndi.h>
59 #include <sys/sunmdi.h>
60 #include <sys/conf.h>
61 #include <sys/proc.h>
62 #include <sys/user.h>
63 #include <sys/modctl.h>
64 
65 #ifdef DEBUG
66 int sdev_debug = 0x00000001;
67 int sdev_debug_cache_flags = 0;
68 #endif
69 
70 /*
71  * globals
72  */
73 /* prototype memory vattrs */
74 vattr_t sdev_vattr_dir = {
75 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
76 	VDIR,					/* va_type */
77 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
78 	SDEV_UID_DEFAULT,			/* va_uid */
79 	SDEV_GID_DEFAULT,			/* va_gid */
80 	0,					/* va_fsid */
81 	0,					/* va_nodeid */
82 	0,					/* va_nlink */
83 	0,					/* va_size */
84 	0,					/* va_atime */
85 	0,					/* va_mtime */
86 	0,					/* va_ctime */
87 	0,					/* va_rdev */
88 	0,					/* va_blksize */
89 	0,					/* va_nblocks */
90 	0					/* va_vcode */
91 };
92 
93 vattr_t sdev_vattr_lnk = {
94 	AT_TYPE|AT_MODE,			/* va_mask */
95 	VLNK,					/* va_type */
96 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
97 	SDEV_UID_DEFAULT,			/* va_uid */
98 	SDEV_GID_DEFAULT,			/* va_gid */
99 	0,					/* va_fsid */
100 	0,					/* va_nodeid */
101 	0,					/* va_nlink */
102 	0,					/* va_size */
103 	0,					/* va_atime */
104 	0,					/* va_mtime */
105 	0,					/* va_ctime */
106 	0,					/* va_rdev */
107 	0,					/* va_blksize */
108 	0,					/* va_nblocks */
109 	0					/* va_vcode */
110 };
111 
112 vattr_t sdev_vattr_blk = {
113 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
114 	VBLK,					/* va_type */
115 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
116 	SDEV_UID_DEFAULT,			/* va_uid */
117 	SDEV_GID_DEFAULT,			/* va_gid */
118 	0,					/* va_fsid */
119 	0,					/* va_nodeid */
120 	0,					/* va_nlink */
121 	0,					/* va_size */
122 	0,					/* va_atime */
123 	0,					/* va_mtime */
124 	0,					/* va_ctime */
125 	0,					/* va_rdev */
126 	0,					/* va_blksize */
127 	0,					/* va_nblocks */
128 	0					/* va_vcode */
129 };
130 
131 vattr_t sdev_vattr_chr = {
132 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
133 	VCHR,					/* va_type */
134 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
135 	SDEV_UID_DEFAULT,			/* va_uid */
136 	SDEV_GID_DEFAULT,			/* va_gid */
137 	0,					/* va_fsid */
138 	0,					/* va_nodeid */
139 	0,					/* va_nlink */
140 	0,					/* va_size */
141 	0,					/* va_atime */
142 	0,					/* va_mtime */
143 	0,					/* va_ctime */
144 	0,					/* va_rdev */
145 	0,					/* va_blksize */
146 	0,					/* va_nblocks */
147 	0					/* va_vcode */
148 };
149 
150 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
151 int		devtype;		/* fstype */
152 
153 /* static */
154 static struct vnodeops *sdev_get_vop(struct sdev_node *);
155 static void sdev_set_no_negcache(struct sdev_node *);
156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
157 static void sdev_free_vtab(fs_operation_def_t *);
158 
159 static void
160 sdev_prof_free(struct sdev_node *dv)
161 {
162 	ASSERT(!SDEV_IS_GLOBAL(dv));
163 	if (dv->sdev_prof.dev_name)
164 		nvlist_free(dv->sdev_prof.dev_name);
165 	if (dv->sdev_prof.dev_map)
166 		nvlist_free(dv->sdev_prof.dev_map);
167 	if (dv->sdev_prof.dev_symlink)
168 		nvlist_free(dv->sdev_prof.dev_symlink);
169 	if (dv->sdev_prof.dev_glob_incdir)
170 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
171 	if (dv->sdev_prof.dev_glob_excdir)
172 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
173 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
174 }
175 
176 /* sdev_node cache constructor */
177 /*ARGSUSED1*/
178 static int
179 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
180 {
181 	struct sdev_node *dv = (struct sdev_node *)buf;
182 	struct vnode *vp;
183 
184 	bzero(buf, sizeof (struct sdev_node));
185 	vp = dv->sdev_vnode = vn_alloc(flag);
186 	if (vp == NULL) {
187 		return (-1);
188 	}
189 	vp->v_data = dv;
190 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
191 	return (0);
192 }
193 
194 /* sdev_node cache destructor */
195 /*ARGSUSED1*/
196 static void
197 i_sdev_node_dtor(void *buf, void *arg)
198 {
199 	struct sdev_node *dv = (struct sdev_node *)buf;
200 	struct vnode *vp = SDEVTOV(dv);
201 
202 	rw_destroy(&dv->sdev_contents);
203 	vn_free(vp);
204 }
205 
206 /* initialize sdev_node cache */
207 void
208 sdev_node_cache_init()
209 {
210 	int flags = 0;
211 
212 #ifdef	DEBUG
213 	flags = sdev_debug_cache_flags;
214 	if (flags)
215 		sdcmn_err(("cache debug flags 0x%x\n", flags));
216 #endif	/* DEBUG */
217 
218 	ASSERT(sdev_node_cache == NULL);
219 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
220 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
221 	    NULL, NULL, NULL, flags);
222 }
223 
224 /* destroy sdev_node cache */
225 void
226 sdev_node_cache_fini()
227 {
228 	ASSERT(sdev_node_cache != NULL);
229 	kmem_cache_destroy(sdev_node_cache);
230 	sdev_node_cache = NULL;
231 }
232 
233 /*
234  * Compare two nodes lexographically to balance avl tree
235  */
236 static int
237 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
238 {
239 	int rv;
240 	if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
241 		return (0);
242 	return ((rv < 0) ? -1 : 1);
243 }
244 
245 void
246 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
247 {
248 	ASSERT(dv);
249 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
250 	dv->sdev_state = state;
251 }
252 
253 static void
254 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
255 {
256 	timestruc_t	now;
257 	struct vattr	*attrp;
258 	uint_t		mask;
259 
260 	ASSERT(dv->sdev_attr);
261 	ASSERT(vap);
262 
263 	attrp = dv->sdev_attr;
264 	mask = vap->va_mask;
265 	if (mask & AT_TYPE)
266 		attrp->va_type = vap->va_type;
267 	if (mask & AT_MODE)
268 		attrp->va_mode = vap->va_mode;
269 	if (mask & AT_UID)
270 		attrp->va_uid = vap->va_uid;
271 	if (mask & AT_GID)
272 		attrp->va_gid = vap->va_gid;
273 	if (mask & AT_RDEV)
274 		attrp->va_rdev = vap->va_rdev;
275 
276 	gethrestime(&now);
277 	attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
278 	attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
279 	attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
280 }
281 
282 static void
283 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
284 {
285 	ASSERT(dv->sdev_attr == NULL);
286 	ASSERT(vap->va_mask & AT_TYPE);
287 	ASSERT(vap->va_mask & AT_MODE);
288 
289 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
290 	sdev_attr_update(dv, vap);
291 }
292 
293 /* alloc and initialize a sdev_node */
294 int
295 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
296     vattr_t *vap)
297 {
298 	struct sdev_node *dv = NULL;
299 	struct vnode *vp;
300 	size_t nmlen, len;
301 	devname_handle_t  *dhl;
302 
303 	nmlen = strlen(nm) + 1;
304 	if (nmlen > MAXNAMELEN) {
305 		sdcmn_err9(("sdev_nodeinit: node name %s"
306 		    " too long\n", nm));
307 		*newdv = NULL;
308 		return (ENAMETOOLONG);
309 	}
310 
311 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
312 
313 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
314 	bcopy(nm, dv->sdev_name, nmlen);
315 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
316 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
317 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
318 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
319 	/* overwritten for VLNK nodes */
320 	dv->sdev_symlink = NULL;
321 
322 	vp = SDEVTOV(dv);
323 	vn_reinit(vp);
324 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
325 	if (vap)
326 		vp->v_type = vap->va_type;
327 
328 	/*
329 	 * initialized to the parent's vnodeops.
330 	 * maybe overwriten for a VDIR
331 	 */
332 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
333 	vn_exists(vp);
334 
335 	dv->sdev_dotdot = NULL;
336 	dv->sdev_attrvp = NULL;
337 	if (vap) {
338 		sdev_attr_alloc(dv, vap);
339 	} else {
340 		dv->sdev_attr = NULL;
341 	}
342 
343 	dv->sdev_ino = sdev_mkino(dv);
344 	dv->sdev_nlink = 0;		/* updated on insert */
345 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
346 	dv->sdev_flags |= SDEV_BUILD;
347 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
348 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
349 	if (SDEV_IS_GLOBAL(ddv)) {
350 		dv->sdev_flags |= SDEV_GLOBAL;
351 		dhl = &(dv->sdev_handle);
352 		dhl->dh_data = dv;
353 		dhl->dh_args = NULL;
354 		sdev_set_no_negcache(dv);
355 		dv->sdev_gdir_gen = 0;
356 	} else {
357 		dv->sdev_flags &= ~SDEV_GLOBAL;
358 		dv->sdev_origin = NULL; /* set later */
359 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
360 		dv->sdev_ldir_gen = 0;
361 		dv->sdev_devtree_gen = 0;
362 	}
363 
364 	rw_enter(&dv->sdev_contents, RW_WRITER);
365 	sdev_set_nodestate(dv, SDEV_INIT);
366 	rw_exit(&dv->sdev_contents);
367 	*newdv = dv;
368 
369 	return (0);
370 }
371 
372 /*
373  * transition a sdev_node into SDEV_READY state
374  */
375 int
376 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
377     void *args, struct cred *cred)
378 {
379 	int error = 0;
380 	struct vnode *vp = SDEVTOV(dv);
381 	vtype_t type;
382 
383 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
384 
385 	type = vap->va_type;
386 	vp->v_type = type;
387 	vp->v_rdev = vap->va_rdev;
388 	rw_enter(&dv->sdev_contents, RW_WRITER);
389 	if (type == VDIR) {
390 		dv->sdev_nlink = 2;
391 		dv->sdev_flags &= ~SDEV_PERSIST;
392 		dv->sdev_flags &= ~SDEV_DYNAMIC;
393 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
394 		ASSERT(dv->sdev_dotdot);
395 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
396 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
397 		avl_create(&dv->sdev_entries,
398 		    (int (*)(const void *, const void *))sdev_compare_nodes,
399 		    sizeof (struct sdev_node),
400 		    offsetof(struct sdev_node, sdev_avllink));
401 	} else if (type == VLNK) {
402 		ASSERT(args);
403 		dv->sdev_nlink = 1;
404 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
405 	} else {
406 		dv->sdev_nlink = 1;
407 	}
408 
409 	if (!(SDEV_IS_GLOBAL(dv))) {
410 		dv->sdev_origin = (struct sdev_node *)args;
411 		dv->sdev_flags &= ~SDEV_PERSIST;
412 	}
413 
414 	/*
415 	 * shadow node is created here OR
416 	 * if failed (indicated by dv->sdev_attrvp == NULL),
417 	 * created later in sdev_setattr
418 	 */
419 	if (avp) {
420 		dv->sdev_attrvp = avp;
421 	} else {
422 		if (dv->sdev_attr == NULL) {
423 			sdev_attr_alloc(dv, vap);
424 		} else {
425 			sdev_attr_update(dv, vap);
426 		}
427 
428 		if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
429 			error = sdev_shadow_node(dv, cred);
430 	}
431 
432 	if (error == 0) {
433 		/* transition to READY state */
434 		sdev_set_nodestate(dv, SDEV_READY);
435 		sdev_nc_node_exists(dv);
436 	} else {
437 		sdev_set_nodestate(dv, SDEV_ZOMBIE);
438 	}
439 	rw_exit(&dv->sdev_contents);
440 	return (error);
441 }
442 
443 /*
444  * setting ZOMBIE state
445  */
446 static int
447 sdev_nodezombied(struct sdev_node *dv)
448 {
449 	rw_enter(&dv->sdev_contents, RW_WRITER);
450 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
451 	rw_exit(&dv->sdev_contents);
452 	return (0);
453 }
454 
455 /*
456  * Build the VROOT sdev_node.
457  */
458 /*ARGSUSED*/
459 struct sdev_node *
460 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
461     struct vnode *avp, struct cred *cred)
462 {
463 	struct sdev_node *dv;
464 	struct vnode *vp;
465 	char devdir[] = "/dev";
466 
467 	ASSERT(sdev_node_cache != NULL);
468 	ASSERT(avp);
469 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
470 	vp = SDEVTOV(dv);
471 	vn_reinit(vp);
472 	vp->v_flag |= VROOT;
473 	vp->v_vfsp = vfsp;
474 	vp->v_type = VDIR;
475 	vp->v_rdev = devdev;
476 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
477 	vn_exists(vp);
478 
479 	if (vfsp->vfs_mntpt)
480 		dv->sdev_name = i_ddi_strdup(
481 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
482 	else
483 		/* vfs_mountdev1 set mount point later */
484 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
485 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
486 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
487 	dv->sdev_ino = SDEV_ROOTINO;
488 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
489 	dv->sdev_dotdot = dv;		/* .. == self */
490 	dv->sdev_attrvp = avp;
491 	dv->sdev_attr = NULL;
492 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
493 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
494 	if (strcmp(dv->sdev_name, "/dev") == 0) {
495 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
496 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
497 		dv->sdev_gdir_gen = 0;
498 	} else {
499 		dv->sdev_flags = SDEV_BUILD;
500 		dv->sdev_flags &= ~SDEV_PERSIST;
501 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
502 		dv->sdev_ldir_gen = 0;
503 		dv->sdev_devtree_gen = 0;
504 	}
505 
506 	avl_create(&dv->sdev_entries,
507 	    (int (*)(const void *, const void *))sdev_compare_nodes,
508 	    sizeof (struct sdev_node),
509 	    offsetof(struct sdev_node, sdev_avllink));
510 
511 	rw_enter(&dv->sdev_contents, RW_WRITER);
512 	sdev_set_nodestate(dv, SDEV_READY);
513 	rw_exit(&dv->sdev_contents);
514 	sdev_nc_node_exists(dv);
515 	return (dv);
516 }
517 
518 /* directory dependent vop table */
519 struct sdev_vop_table {
520 	char *vt_name;				/* subdirectory name */
521 	const fs_operation_def_t *vt_service;	/* vnodeops table */
522 	struct vnodeops *vt_vops;		/* constructed vop */
523 	struct vnodeops **vt_global_vops;	/* global container for vop */
524 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
525 	int vt_flags;
526 };
527 
528 /*
529  * A nice improvement would be to provide a plug-in mechanism
530  * for this table instead of a const table.
531  */
532 static struct sdev_vop_table vtab[] =
533 {
534 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
535 	SDEV_DYNAMIC | SDEV_VTOR },
536 
537 	{ "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
538 	SDEV_DYNAMIC | SDEV_VTOR },
539 
540 	{ "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
541 	devzvol_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
542 
543 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
544 
545 	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
546 	SDEV_DYNAMIC | SDEV_VTOR },
547 
548 	{ "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
549 	devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
550 
551 	{ NULL, NULL, NULL, NULL, NULL, 0}
552 };
553 
554 struct sdev_vop_table *
555 sdev_match(struct sdev_node *dv)
556 {
557 	int vlen;
558 	int i;
559 
560 	for (i = 0; vtab[i].vt_name; i++) {
561 		if (strcmp(vtab[i].vt_name, dv->sdev_name) == 0)
562 			return (&vtab[i]);
563 		if (vtab[i].vt_flags & SDEV_SUBDIR) {
564 			char *ptr;
565 
566 			ASSERT(strlen(dv->sdev_path) > 5);
567 			ptr = dv->sdev_path + 5;
568 			vlen = strlen(vtab[i].vt_name);
569 			if ((strncmp(vtab[i].vt_name, ptr,
570 			    vlen - 1) == 0) && ptr[vlen] == '/')
571 				return (&vtab[i]);
572 		}
573 
574 	}
575 	return (NULL);
576 }
577 
578 /*
579  *  sets a directory's vnodeops if the directory is in the vtab;
580  */
581 static struct vnodeops *
582 sdev_get_vop(struct sdev_node *dv)
583 {
584 	struct sdev_vop_table *vtp;
585 	char *path;
586 
587 	path = dv->sdev_path;
588 	ASSERT(path);
589 
590 	/* gets the relative path to /dev/ */
591 	path += 5;
592 
593 	/* gets the vtab entry it matches */
594 	if ((vtp = sdev_match(dv)) != NULL) {
595 		dv->sdev_flags |= vtp->vt_flags;
596 
597 		if (vtp->vt_vops) {
598 			if (vtp->vt_global_vops)
599 				*(vtp->vt_global_vops) = vtp->vt_vops;
600 			return (vtp->vt_vops);
601 		}
602 
603 		if (vtp->vt_service) {
604 			fs_operation_def_t *templ;
605 			templ = sdev_merge_vtab(vtp->vt_service);
606 			if (vn_make_ops(vtp->vt_name,
607 			    (const fs_operation_def_t *)templ,
608 			    &vtp->vt_vops) != 0) {
609 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
610 				    vtp->vt_name);
611 				/*NOTREACHED*/
612 			}
613 			if (vtp->vt_global_vops) {
614 				*(vtp->vt_global_vops) = vtp->vt_vops;
615 			}
616 			sdev_free_vtab(templ);
617 			return (vtp->vt_vops);
618 		}
619 		return (sdev_vnodeops);
620 	}
621 
622 	/* child inherits the persistence of the parent */
623 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
624 		dv->sdev_flags |= SDEV_PERSIST;
625 
626 	return (sdev_vnodeops);
627 }
628 
629 static void
630 sdev_set_no_negcache(struct sdev_node *dv)
631 {
632 	int i;
633 	char *path;
634 
635 	ASSERT(dv->sdev_path);
636 	path = dv->sdev_path + strlen("/dev/");
637 
638 	for (i = 0; vtab[i].vt_name; i++) {
639 		if (strcmp(vtab[i].vt_name, path) == 0) {
640 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
641 				dv->sdev_flags |= SDEV_NO_NCACHE;
642 			break;
643 		}
644 	}
645 }
646 
647 void *
648 sdev_get_vtor(struct sdev_node *dv)
649 {
650 	struct sdev_vop_table *vtp;
651 
652 	vtp = sdev_match(dv);
653 	if (vtp)
654 		return ((void *)vtp->vt_vtor);
655 	else
656 		return (NULL);
657 }
658 
659 /*
660  * Build the base root inode
661  */
662 ino_t
663 sdev_mkino(struct sdev_node *dv)
664 {
665 	ino_t	ino;
666 
667 	/*
668 	 * for now, follow the lead of tmpfs here
669 	 * need to someday understand the requirements here
670 	 */
671 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
672 	ino += SDEV_ROOTINO + 1;
673 
674 	return (ino);
675 }
676 
677 int
678 sdev_getlink(struct vnode *linkvp, char **link)
679 {
680 	int err;
681 	char *buf;
682 	struct uio uio = {0};
683 	struct iovec iov = {0};
684 
685 	if (linkvp == NULL)
686 		return (ENOENT);
687 	ASSERT(linkvp->v_type == VLNK);
688 
689 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
690 	iov.iov_base = buf;
691 	iov.iov_len = MAXPATHLEN;
692 	uio.uio_iov = &iov;
693 	uio.uio_iovcnt = 1;
694 	uio.uio_resid = MAXPATHLEN;
695 	uio.uio_segflg = UIO_SYSSPACE;
696 	uio.uio_llimit = MAXOFFSET_T;
697 
698 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
699 	if (err) {
700 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
701 		kmem_free(buf, MAXPATHLEN);
702 		return (ENOENT);
703 	}
704 
705 	/* mission complete */
706 	*link = i_ddi_strdup(buf, KM_SLEEP);
707 	kmem_free(buf, MAXPATHLEN);
708 	return (0);
709 }
710 
711 /*
712  * A convenient wrapper to get the devfs node vnode for a device
713  * minor functionality: readlink() of a /dev symlink
714  * Place the link into dv->sdev_symlink
715  */
716 static int
717 sdev_follow_link(struct sdev_node *dv)
718 {
719 	int err;
720 	struct vnode *linkvp;
721 	char *link = NULL;
722 
723 	linkvp = SDEVTOV(dv);
724 	if (linkvp == NULL)
725 		return (ENOENT);
726 	ASSERT(linkvp->v_type == VLNK);
727 	err = sdev_getlink(linkvp, &link);
728 	if (err) {
729 		(void) sdev_nodezombied(dv);
730 		dv->sdev_symlink = NULL;
731 		return (ENOENT);
732 	}
733 
734 	ASSERT(link != NULL);
735 	dv->sdev_symlink = link;
736 	return (0);
737 }
738 
739 static int
740 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
741 {
742 	vtype_t otype = SDEVTOV(dv)->v_type;
743 
744 	/*
745 	 * existing sdev_node has a different type.
746 	 */
747 	if (otype != nvap->va_type) {
748 		sdcmn_err9(("sdev_node_check: existing node "
749 		    "  %s type %d does not match new node type %d\n",
750 		    dv->sdev_name, otype, nvap->va_type));
751 		return (EEXIST);
752 	}
753 
754 	/*
755 	 * For a symlink, the target should be the same.
756 	 */
757 	if (otype == VLNK) {
758 		ASSERT(nargs != NULL);
759 		ASSERT(dv->sdev_symlink != NULL);
760 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
761 			sdcmn_err9(("sdev_node_check: existing node "
762 			    " %s has different symlink %s as new node "
763 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
764 			    (char *)nargs));
765 			return (EEXIST);
766 		}
767 	}
768 
769 	return (0);
770 }
771 
772 /*
773  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
774  *
775  * arguments:
776  *	- ddv (parent)
777  *	- nm (child name)
778  *	- newdv (sdev_node for nm is returned here)
779  *	- vap (vattr for the node to be created, va_type should be set.
780  *	- avp (attribute vnode)
781  *	  the defaults should be used if unknown)
782  *	- cred
783  *	- args
784  *	    . tnm (for VLNK)
785  *	    . global sdev_node (for !SDEV_GLOBAL)
786  * 	- state: SDEV_INIT, SDEV_READY
787  *
788  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
789  *
790  * NOTE:  directory contents writers lock needs to be held before
791  *	  calling this routine.
792  */
793 int
794 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
795     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
796     sdev_node_state_t state)
797 {
798 	int error = 0;
799 	sdev_node_state_t node_state;
800 	struct sdev_node *dv = NULL;
801 
802 	ASSERT(state != SDEV_ZOMBIE);
803 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
804 
805 	if (*newdv) {
806 		dv = *newdv;
807 	} else {
808 		/* allocate and initialize a sdev_node */
809 		if (ddv->sdev_state == SDEV_ZOMBIE) {
810 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
811 			    ddv->sdev_path));
812 			return (ENOENT);
813 		}
814 
815 		error = sdev_nodeinit(ddv, nm, &dv, vap);
816 		if (error != 0) {
817 			sdcmn_err9(("sdev_mknode: error %d,"
818 			    " name %s can not be initialized\n",
819 			    error, nm));
820 			return (error);
821 		}
822 		ASSERT(dv);
823 
824 		/* insert into the directory cache */
825 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
826 		if (error) {
827 			sdcmn_err9(("sdev_mknode: node %s can not"
828 			    " be added into directory cache\n", nm));
829 			return (ENOENT);
830 		}
831 	}
832 
833 	ASSERT(dv);
834 	node_state = dv->sdev_state;
835 	ASSERT(node_state != SDEV_ZOMBIE);
836 
837 	if (state == SDEV_READY) {
838 		switch (node_state) {
839 		case SDEV_INIT:
840 			error = sdev_nodeready(dv, vap, avp, args, cred);
841 			if (error) {
842 				sdcmn_err9(("sdev_mknode: node %s can NOT"
843 				    " be transitioned into READY state, "
844 				    "error %d\n", nm, error));
845 			}
846 			break;
847 		case SDEV_READY:
848 			/*
849 			 * Do some sanity checking to make sure
850 			 * the existing sdev_node is what has been
851 			 * asked for.
852 			 */
853 			error = sdev_node_check(dv, vap, args);
854 			break;
855 		default:
856 			break;
857 		}
858 	}
859 
860 	if (!error) {
861 		*newdv = dv;
862 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
863 	} else {
864 		SDEV_SIMPLE_RELE(dv);
865 		*newdv = NULL;
866 	}
867 
868 	return (error);
869 }
870 
871 /*
872  * convenient wrapper to change vp's ATIME, CTIME and MTIME
873  */
874 void
875 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
876 {
877 	struct vattr attr;
878 	timestruc_t now;
879 	int err;
880 
881 	ASSERT(vp);
882 	gethrestime(&now);
883 	if (mask & AT_CTIME)
884 		attr.va_ctime = now;
885 	if (mask & AT_MTIME)
886 		attr.va_mtime = now;
887 	if (mask & AT_ATIME)
888 		attr.va_atime = now;
889 
890 	attr.va_mask = (mask & AT_TIMES);
891 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
892 	if (err && (err != EROFS)) {
893 		sdcmn_err(("update timestamps error %d\n", err));
894 	}
895 }
896 
897 /*
898  * the backing store vnode is released here
899  */
900 /*ARGSUSED1*/
901 void
902 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
903 {
904 	/* no references */
905 	ASSERT(dv->sdev_nlink == 0);
906 
907 	if (dv->sdev_attrvp != NULLVP) {
908 		VN_RELE(dv->sdev_attrvp);
909 		/*
910 		 * reset the attrvp so that no more
911 		 * references can be made on this already
912 		 * vn_rele() vnode
913 		 */
914 		dv->sdev_attrvp = NULLVP;
915 	}
916 
917 	if (dv->sdev_attr != NULL) {
918 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
919 		dv->sdev_attr = NULL;
920 	}
921 
922 	if (dv->sdev_name != NULL) {
923 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
924 		dv->sdev_name = NULL;
925 	}
926 
927 	if (dv->sdev_symlink != NULL) {
928 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
929 		dv->sdev_symlink = NULL;
930 	}
931 
932 	if (dv->sdev_path) {
933 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
934 		dv->sdev_path = NULL;
935 	}
936 
937 	if (!SDEV_IS_GLOBAL(dv))
938 		sdev_prof_free(dv);
939 
940 	if (SDEVTOV(dv)->v_type == VDIR) {
941 		ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
942 		avl_destroy(&dv->sdev_entries);
943 	}
944 
945 	mutex_destroy(&dv->sdev_lookup_lock);
946 	cv_destroy(&dv->sdev_lookup_cv);
947 
948 	/* return node to initial state as per constructor */
949 	(void) memset((void *)&dv->sdev_instance_data, 0,
950 	    sizeof (dv->sdev_instance_data));
951 	vn_invalid(SDEVTOV(dv));
952 	kmem_cache_free(sdev_node_cache, dv);
953 }
954 
955 /*
956  * DIRECTORY CACHE lookup
957  */
958 struct sdev_node *
959 sdev_findbyname(struct sdev_node *ddv, char *nm)
960 {
961 	struct sdev_node *dv;
962 	struct sdev_node dvtmp;
963 	avl_index_t	where;
964 
965 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
966 
967 	dvtmp.sdev_name = nm;
968 	dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
969 	if (dv) {
970 		ASSERT(dv->sdev_dotdot == ddv);
971 		ASSERT(strcmp(dv->sdev_name, nm) == 0);
972 		SDEV_HOLD(dv);
973 		return (dv);
974 	}
975 	return (NULL);
976 }
977 
978 /*
979  * Inserts a new sdev_node in a parent directory
980  */
981 void
982 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
983 {
984 	avl_index_t where;
985 
986 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
987 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
988 	ASSERT(ddv->sdev_nlink >= 2);
989 	ASSERT(dv->sdev_nlink == 0);
990 
991 	dv->sdev_dotdot = ddv;
992 	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
993 	avl_insert(&ddv->sdev_entries, dv, where);
994 	ddv->sdev_nlink++;
995 }
996 
997 /*
998  * The following check is needed because while sdev_nodes are linked
999  * in SDEV_INIT state, they have their link counts incremented only
1000  * in SDEV_READY state.
1001  */
1002 static void
1003 decr_link(struct sdev_node *dv)
1004 {
1005 	if (dv->sdev_state != SDEV_INIT)
1006 		dv->sdev_nlink--;
1007 	else
1008 		ASSERT(dv->sdev_nlink == 0);
1009 }
1010 
1011 /*
1012  * Delete an existing dv from directory cache
1013  *
1014  * In the case of a node is still held by non-zero reference count,
1015  *     the node is put into ZOMBIE state. Once the reference count
1016  *     reaches "0", the node is unlinked and destroyed,
1017  *     in sdev_inactive().
1018  */
1019 static int
1020 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1021 {
1022 	struct vnode *vp;
1023 
1024 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1025 
1026 	vp = SDEVTOV(dv);
1027 	mutex_enter(&vp->v_lock);
1028 
1029 	/* dv is held still */
1030 	if (vp->v_count > 1) {
1031 		rw_enter(&dv->sdev_contents, RW_WRITER);
1032 		if (dv->sdev_state == SDEV_READY) {
1033 			sdcmn_err9((
1034 			    "sdev_dirdelete: node %s busy with count %d\n",
1035 			    dv->sdev_name, vp->v_count));
1036 			dv->sdev_state = SDEV_ZOMBIE;
1037 		}
1038 		rw_exit(&dv->sdev_contents);
1039 		--vp->v_count;
1040 		mutex_exit(&vp->v_lock);
1041 		return (EBUSY);
1042 	}
1043 	ASSERT(vp->v_count == 1);
1044 
1045 	/* unlink from the memory cache */
1046 	ddv->sdev_nlink--;	/* .. to above */
1047 	if (vp->v_type == VDIR) {
1048 		decr_link(dv);		/* . to self */
1049 	}
1050 
1051 	avl_remove(&ddv->sdev_entries, dv);
1052 	decr_link(dv);	/* name, back to zero */
1053 	vp->v_count--;
1054 	mutex_exit(&vp->v_lock);
1055 
1056 	/* destroy the node */
1057 	sdev_nodedestroy(dv, 0);
1058 	return (0);
1059 }
1060 
1061 /*
1062  * check if the source is in the path of the target
1063  *
1064  * source and target are different
1065  */
1066 /*ARGSUSED2*/
1067 static int
1068 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1069 {
1070 	int error = 0;
1071 	struct sdev_node *dotdot, *dir;
1072 
1073 	dotdot = tdv->sdev_dotdot;
1074 	ASSERT(dotdot);
1075 
1076 	/* fs root */
1077 	if (dotdot == tdv) {
1078 		return (0);
1079 	}
1080 
1081 	for (;;) {
1082 		/*
1083 		 * avoid error cases like
1084 		 *	mv a a/b
1085 		 *	mv a a/b/c
1086 		 *	etc.
1087 		 */
1088 		if (dotdot == sdv) {
1089 			error = EINVAL;
1090 			break;
1091 		}
1092 
1093 		dir = dotdot;
1094 		dotdot = dir->sdev_dotdot;
1095 
1096 		/* done checking because root is reached */
1097 		if (dir == dotdot) {
1098 			break;
1099 		}
1100 	}
1101 	return (error);
1102 }
1103 
1104 int
1105 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1106     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1107     struct cred *cred)
1108 {
1109 	int error = 0;
1110 	struct vnode *ovp = SDEVTOV(odv);
1111 	struct vnode *nvp;
1112 	struct vattr vattr;
1113 	int doingdir = (ovp->v_type == VDIR);
1114 	char *link = NULL;
1115 	int samedir = (oddv == nddv) ? 1 : 0;
1116 	int bkstore = 0;
1117 	struct sdev_node *idv = NULL;
1118 	struct sdev_node *ndv = NULL;
1119 	timestruc_t now;
1120 
1121 	vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1122 	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1123 	if (error)
1124 		return (error);
1125 
1126 	if (!samedir)
1127 		rw_enter(&oddv->sdev_contents, RW_WRITER);
1128 	rw_enter(&nddv->sdev_contents, RW_WRITER);
1129 
1130 	/*
1131 	 * the source may have been deleted by another thread before
1132 	 * we gets here.
1133 	 */
1134 	if (odv->sdev_state != SDEV_READY) {
1135 		error = ENOENT;
1136 		goto err_out;
1137 	}
1138 
1139 	if (doingdir && (odv == nddv)) {
1140 		error = EINVAL;
1141 		goto err_out;
1142 	}
1143 
1144 	/*
1145 	 * If renaming a directory, and the parents are different (".." must be
1146 	 * changed) then the source dir must not be in the dir hierarchy above
1147 	 * the target since it would orphan everything below the source dir.
1148 	 */
1149 	if (doingdir && (oddv != nddv)) {
1150 		error = sdev_checkpath(odv, nddv, cred);
1151 		if (error)
1152 			goto err_out;
1153 	}
1154 
1155 	/* destination existing */
1156 	if (*ndvp) {
1157 		nvp = SDEVTOV(*ndvp);
1158 		ASSERT(nvp);
1159 
1160 		/* handling renaming to itself */
1161 		if (odv == *ndvp) {
1162 			error = 0;
1163 			goto err_out;
1164 		}
1165 
1166 		if (nvp->v_type == VDIR) {
1167 			if (!doingdir) {
1168 				error = EISDIR;
1169 				goto err_out;
1170 			}
1171 
1172 			if (vn_vfswlock(nvp)) {
1173 				error = EBUSY;
1174 				goto err_out;
1175 			}
1176 
1177 			if (vn_mountedvfs(nvp) != NULL) {
1178 				vn_vfsunlock(nvp);
1179 				error = EBUSY;
1180 				goto err_out;
1181 			}
1182 
1183 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1184 			if ((*ndvp)->sdev_nlink > 2) {
1185 				vn_vfsunlock(nvp);
1186 				error = EEXIST;
1187 				goto err_out;
1188 			}
1189 			vn_vfsunlock(nvp);
1190 
1191 			(void) sdev_dirdelete(nddv, *ndvp);
1192 			*ndvp = NULL;
1193 			ASSERT(nddv->sdev_attrvp);
1194 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1195 			    nddv->sdev_attrvp, cred, NULL, 0);
1196 			if (error)
1197 				goto err_out;
1198 		} else {
1199 			if (doingdir) {
1200 				error = ENOTDIR;
1201 				goto err_out;
1202 			}
1203 
1204 			if (SDEV_IS_PERSIST((*ndvp))) {
1205 				bkstore = 1;
1206 			}
1207 
1208 			/*
1209 			 * get rid of the node from the directory cache
1210 			 * note, in case EBUSY is returned, the ZOMBIE
1211 			 * node is taken care in sdev_mknode.
1212 			 */
1213 			(void) sdev_dirdelete(nddv, *ndvp);
1214 			*ndvp = NULL;
1215 			if (bkstore) {
1216 				ASSERT(nddv->sdev_attrvp);
1217 				error = VOP_REMOVE(nddv->sdev_attrvp,
1218 				    nnm, cred, NULL, 0);
1219 				if (error)
1220 					goto err_out;
1221 			}
1222 		}
1223 	}
1224 
1225 	/* fix the source for a symlink */
1226 	if (vattr.va_type == VLNK) {
1227 		if (odv->sdev_symlink == NULL) {
1228 			error = sdev_follow_link(odv);
1229 			if (error) {
1230 				error = ENOENT;
1231 				goto err_out;
1232 			}
1233 		}
1234 		ASSERT(odv->sdev_symlink);
1235 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1236 	}
1237 
1238 	/*
1239 	 * make a fresh node from the source attrs
1240 	 */
1241 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1242 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1243 	    NULL, (void *)link, cred, SDEV_READY);
1244 
1245 	if (link)
1246 		kmem_free(link, strlen(link) + 1);
1247 
1248 	if (error)
1249 		goto err_out;
1250 	ASSERT(*ndvp);
1251 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1252 
1253 	/* move dir contents */
1254 	if (doingdir) {
1255 		for (idv = SDEV_FIRST_ENTRY(odv); idv;
1256 		    idv = SDEV_NEXT_ENTRY(odv, idv)) {
1257 			error = sdev_rnmnode(odv, idv,
1258 			    (struct sdev_node *)(*ndvp), &ndv,
1259 			    idv->sdev_name, cred);
1260 			if (error)
1261 				goto err_out;
1262 			ndv = NULL;
1263 		}
1264 	}
1265 
1266 	if ((*ndvp)->sdev_attrvp) {
1267 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1268 		    AT_CTIME|AT_ATIME);
1269 	} else {
1270 		ASSERT((*ndvp)->sdev_attr);
1271 		gethrestime(&now);
1272 		(*ndvp)->sdev_attr->va_ctime = now;
1273 		(*ndvp)->sdev_attr->va_atime = now;
1274 	}
1275 
1276 	if (nddv->sdev_attrvp) {
1277 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1278 		    AT_MTIME|AT_ATIME);
1279 	} else {
1280 		ASSERT(nddv->sdev_attr);
1281 		gethrestime(&now);
1282 		nddv->sdev_attr->va_mtime = now;
1283 		nddv->sdev_attr->va_atime = now;
1284 	}
1285 	rw_exit(&nddv->sdev_contents);
1286 	if (!samedir)
1287 		rw_exit(&oddv->sdev_contents);
1288 
1289 	SDEV_RELE(*ndvp);
1290 	return (error);
1291 
1292 err_out:
1293 	rw_exit(&nddv->sdev_contents);
1294 	if (!samedir)
1295 		rw_exit(&oddv->sdev_contents);
1296 	return (error);
1297 }
1298 
1299 /*
1300  * Merge sdev_node specific information into an attribute structure.
1301  *
1302  * note: sdev_node is not locked here
1303  */
1304 void
1305 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1306 {
1307 	struct vnode *vp = SDEVTOV(dv);
1308 
1309 	vap->va_nlink = dv->sdev_nlink;
1310 	vap->va_nodeid = dv->sdev_ino;
1311 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1312 	vap->va_type = vp->v_type;
1313 
1314 	if (vp->v_type == VDIR) {
1315 		vap->va_rdev = 0;
1316 		vap->va_fsid = vp->v_rdev;
1317 	} else if (vp->v_type == VLNK) {
1318 		vap->va_rdev = 0;
1319 		vap->va_mode  &= ~S_IFMT;
1320 		vap->va_mode |= S_IFLNK;
1321 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1322 		vap->va_rdev = vp->v_rdev;
1323 		vap->va_mode &= ~S_IFMT;
1324 		if (vap->va_type == VCHR)
1325 			vap->va_mode |= S_IFCHR;
1326 		else
1327 			vap->va_mode |= S_IFBLK;
1328 	} else {
1329 		vap->va_rdev = 0;
1330 	}
1331 }
1332 
1333 struct vattr *
1334 sdev_getdefault_attr(enum vtype type)
1335 {
1336 	if (type == VDIR)
1337 		return (&sdev_vattr_dir);
1338 	else if (type == VCHR)
1339 		return (&sdev_vattr_chr);
1340 	else if (type == VBLK)
1341 		return (&sdev_vattr_blk);
1342 	else if (type == VLNK)
1343 		return (&sdev_vattr_lnk);
1344 	else
1345 		return (NULL);
1346 }
1347 int
1348 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1349 {
1350 	int rv = 0;
1351 	struct vnode *vp = SDEVTOV(dv);
1352 
1353 	switch (vp->v_type) {
1354 	case VCHR:
1355 	case VBLK:
1356 		/*
1357 		 * If vnode is a device, return special vnode instead
1358 		 * (though it knows all about -us- via sp->s_realvp)
1359 		 */
1360 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1361 		VN_RELE(vp);
1362 		if (*vpp == NULLVP)
1363 			rv = ENOSYS;
1364 		break;
1365 	default:	/* most types are returned as is */
1366 		*vpp = vp;
1367 		break;
1368 	}
1369 	return (rv);
1370 }
1371 
1372 /*
1373  * junction between devname and root file system, e.g. ufs
1374  */
1375 int
1376 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1377 {
1378 	struct vnode *rdvp = ddv->sdev_attrvp;
1379 	int rval = 0;
1380 
1381 	ASSERT(rdvp);
1382 
1383 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1384 	    NULL);
1385 	return (rval);
1386 }
1387 
1388 static int
1389 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1390 {
1391 	struct sdev_node *dv = NULL;
1392 	char	*nm;
1393 	struct vnode *dirvp;
1394 	int	error;
1395 	vnode_t	*vp;
1396 	int eof;
1397 	struct iovec iov;
1398 	struct uio uio;
1399 	struct dirent64 *dp;
1400 	dirent64_t *dbuf;
1401 	size_t dbuflen;
1402 	struct vattr vattr;
1403 	char *link = NULL;
1404 
1405 	if (ddv->sdev_attrvp == NULL)
1406 		return (0);
1407 	if (!(ddv->sdev_flags & SDEV_BUILD))
1408 		return (0);
1409 
1410 	dirvp = ddv->sdev_attrvp;
1411 	VN_HOLD(dirvp);
1412 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1413 
1414 	uio.uio_iov = &iov;
1415 	uio.uio_iovcnt = 1;
1416 	uio.uio_segflg = UIO_SYSSPACE;
1417 	uio.uio_fmode = 0;
1418 	uio.uio_extflg = UIO_COPY_CACHED;
1419 	uio.uio_loffset = 0;
1420 	uio.uio_llimit = MAXOFFSET_T;
1421 
1422 	eof = 0;
1423 	error = 0;
1424 	while (!error && !eof) {
1425 		uio.uio_resid = dlen;
1426 		iov.iov_base = (char *)dbuf;
1427 		iov.iov_len = dlen;
1428 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1429 		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1430 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1431 
1432 		dbuflen = dlen - uio.uio_resid;
1433 		if (error || dbuflen == 0)
1434 			break;
1435 
1436 		if (!(ddv->sdev_flags & SDEV_BUILD))
1437 			break;
1438 
1439 		for (dp = dbuf; ((intptr_t)dp <
1440 		    (intptr_t)dbuf + dbuflen);
1441 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1442 			nm = dp->d_name;
1443 
1444 			if (strcmp(nm, ".") == 0 ||
1445 			    strcmp(nm, "..") == 0)
1446 				continue;
1447 
1448 			vp = NULLVP;
1449 			dv = sdev_cache_lookup(ddv, nm);
1450 			if (dv) {
1451 				if (dv->sdev_state != SDEV_ZOMBIE) {
1452 					SDEV_SIMPLE_RELE(dv);
1453 				} else {
1454 					/*
1455 					 * A ZOMBIE node may not have been
1456 					 * cleaned up from the backing store,
1457 					 * bypass this entry in this case,
1458 					 * and clean it up from the directory
1459 					 * cache if this is the last call.
1460 					 */
1461 					(void) sdev_dirdelete(ddv, dv);
1462 				}
1463 				continue;
1464 			}
1465 
1466 			/* refill the cache if not already */
1467 			error = devname_backstore_lookup(ddv, nm, &vp);
1468 			if (error)
1469 				continue;
1470 
1471 			vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1472 			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1473 			if (error)
1474 				continue;
1475 
1476 			if (vattr.va_type == VLNK) {
1477 				error = sdev_getlink(vp, &link);
1478 				if (error) {
1479 					continue;
1480 				}
1481 				ASSERT(link != NULL);
1482 			}
1483 
1484 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1485 				rw_exit(&ddv->sdev_contents);
1486 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1487 			}
1488 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1489 			    cred, SDEV_READY);
1490 			rw_downgrade(&ddv->sdev_contents);
1491 
1492 			if (link != NULL) {
1493 				kmem_free(link, strlen(link) + 1);
1494 				link = NULL;
1495 			}
1496 
1497 			if (!error) {
1498 				ASSERT(dv);
1499 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1500 				SDEV_SIMPLE_RELE(dv);
1501 			}
1502 			vp = NULL;
1503 			dv = NULL;
1504 		}
1505 	}
1506 
1507 done:
1508 	VN_RELE(dirvp);
1509 	kmem_free(dbuf, dlen);
1510 
1511 	return (error);
1512 }
1513 
1514 void
1515 sdev_filldir_dynamic(struct sdev_node *ddv)
1516 {
1517 	int error;
1518 	int i;
1519 	struct vattr vattr;
1520 	struct vattr *vap = &vattr;
1521 	char *nm = NULL;
1522 	struct sdev_node *dv = NULL;
1523 
1524 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1525 	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1526 
1527 	*vap = *sdev_getdefault_attr(VDIR);	/* note structure copy here */
1528 	gethrestime(&vap->va_atime);
1529 	vap->va_mtime = vap->va_atime;
1530 	vap->va_ctime = vap->va_atime;
1531 	for (i = 0; vtab[i].vt_name != NULL; i++) {
1532 		nm = vtab[i].vt_name;
1533 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1534 		dv = NULL;
1535 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1536 		    NULL, kcred, SDEV_READY);
1537 		if (error) {
1538 			cmn_err(CE_WARN, "%s/%s: error %d\n",
1539 			    ddv->sdev_name, nm, error);
1540 		} else {
1541 			ASSERT(dv);
1542 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1543 			SDEV_SIMPLE_RELE(dv);
1544 		}
1545 	}
1546 }
1547 
1548 /*
1549  * Creating a backing store entry based on sdev_attr.
1550  * This is called either as part of node creation in a persistent directory
1551  * or from setattr/setsecattr to persist access attributes across reboot.
1552  */
1553 int
1554 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1555 {
1556 	int error = 0;
1557 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1558 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1559 	struct vattr *vap = dv->sdev_attr;
1560 	char *nm = dv->sdev_name;
1561 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1562 
1563 	ASSERT(dv && dv->sdev_name && rdvp);
1564 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1565 
1566 lookup:
1567 	/* try to find it in the backing store */
1568 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1569 	    NULL);
1570 	if (error == 0) {
1571 		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1572 			VN_HOLD(rrvp);
1573 			VN_RELE(*rvp);
1574 			*rvp = rrvp;
1575 		}
1576 
1577 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1578 		dv->sdev_attr = NULL;
1579 		dv->sdev_attrvp = *rvp;
1580 		return (0);
1581 	}
1582 
1583 	/* let's try to persist the node */
1584 	gethrestime(&vap->va_atime);
1585 	vap->va_mtime = vap->va_atime;
1586 	vap->va_ctime = vap->va_atime;
1587 	vap->va_mask |= AT_TYPE|AT_MODE;
1588 	switch (vap->va_type) {
1589 	case VDIR:
1590 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1591 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1592 		    (void *)(*rvp), error));
1593 		break;
1594 	case VCHR:
1595 	case VBLK:
1596 	case VREG:
1597 	case VDOOR:
1598 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1599 		    rvp, cred, 0, NULL, NULL);
1600 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1601 		    (void *)(*rvp), error));
1602 		if (!error)
1603 			VN_RELE(*rvp);
1604 		break;
1605 	case VLNK:
1606 		ASSERT(dv->sdev_symlink);
1607 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1608 		    NULL, 0);
1609 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1610 		    error));
1611 		break;
1612 	default:
1613 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1614 		    "create\n", nm);
1615 		/*NOTREACHED*/
1616 	}
1617 
1618 	/* go back to lookup to factor out spec node and set attrvp */
1619 	if (error == 0)
1620 		goto lookup;
1621 
1622 	sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1623 	return (error);
1624 }
1625 
1626 static int
1627 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1628 {
1629 	int error = 0;
1630 	struct sdev_node *dup = NULL;
1631 
1632 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1633 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1634 		sdev_direnter(ddv, *dv);
1635 	} else {
1636 		if (dup->sdev_state == SDEV_ZOMBIE) {
1637 			error = sdev_dirdelete(ddv, dup);
1638 			/*
1639 			 * The ZOMBIE node is still hanging
1640 			 * around with more than one reference counts.
1641 			 * Fail the new node creation so that
1642 			 * the directory cache won't have
1643 			 * duplicate entries for the same named node
1644 			 */
1645 			if (error == EBUSY) {
1646 				SDEV_SIMPLE_RELE(*dv);
1647 				sdev_nodedestroy(*dv, 0);
1648 				*dv = NULL;
1649 				return (error);
1650 			}
1651 			sdev_direnter(ddv, *dv);
1652 		} else {
1653 			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1654 			SDEV_SIMPLE_RELE(*dv);
1655 			sdev_nodedestroy(*dv, 0);
1656 			*dv = dup;
1657 		}
1658 	}
1659 
1660 	return (0);
1661 }
1662 
1663 static int
1664 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1665 {
1666 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1667 	return (sdev_dirdelete(ddv, *dv));
1668 }
1669 
1670 /*
1671  * update the in-core directory cache
1672  */
1673 int
1674 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1675     sdev_cache_ops_t ops)
1676 {
1677 	int error = 0;
1678 
1679 	ASSERT((SDEV_HELD(*dv)));
1680 
1681 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1682 	switch (ops) {
1683 	case SDEV_CACHE_ADD:
1684 		error = sdev_cache_add(ddv, dv, nm);
1685 		break;
1686 	case SDEV_CACHE_DELETE:
1687 		error = sdev_cache_delete(ddv, dv);
1688 		break;
1689 	default:
1690 		break;
1691 	}
1692 
1693 	return (error);
1694 }
1695 
1696 /*
1697  * retrieve the named entry from the directory cache
1698  */
1699 struct sdev_node *
1700 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1701 {
1702 	struct sdev_node *dv = NULL;
1703 
1704 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1705 	dv = sdev_findbyname(ddv, nm);
1706 
1707 	return (dv);
1708 }
1709 
1710 /*
1711  * Implicit reconfig for nodes constructed by a link generator
1712  * Start devfsadm if needed, or if devfsadm is in progress,
1713  * prepare to block on devfsadm either completing or
1714  * constructing the desired node.  As devfsadmd is global
1715  * in scope, constructing all necessary nodes, we only
1716  * need to initiate it once.
1717  */
1718 static int
1719 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1720 {
1721 	int error = 0;
1722 
1723 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1724 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1725 		    ddv->sdev_name, nm, devfsadm_state));
1726 		mutex_enter(&dv->sdev_lookup_lock);
1727 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1728 		mutex_exit(&dv->sdev_lookup_lock);
1729 		error = 0;
1730 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1731 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1732 		    ddv->sdev_name, nm, devfsadm_state));
1733 
1734 		sdev_devfsadmd_thread(ddv, dv, kcred);
1735 		mutex_enter(&dv->sdev_lookup_lock);
1736 		SDEV_BLOCK_OTHERS(dv,
1737 		    (SDEV_LOOKUP | SDEV_LGWAITING));
1738 		mutex_exit(&dv->sdev_lookup_lock);
1739 		error = 0;
1740 	} else {
1741 		error = -1;
1742 	}
1743 
1744 	return (error);
1745 }
1746 
1747 /*
1748  *  Support for specialized device naming construction mechanisms
1749  */
1750 static int
1751 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1752     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1753     void *, char *), int flags, struct cred *cred)
1754 {
1755 	int rv = 0;
1756 	char *physpath = NULL;
1757 	struct vattr vattr;
1758 	struct vattr *vap = &vattr;
1759 	struct sdev_node *dv = NULL;
1760 
1761 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1762 	if (flags & SDEV_VLINK) {
1763 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1764 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1765 		    NULL);
1766 		if (rv) {
1767 			kmem_free(physpath, MAXPATHLEN);
1768 			return (-1);
1769 		}
1770 
1771 		*vap = *sdev_getdefault_attr(VLNK);	/* structure copy */
1772 		vap->va_size = strlen(physpath);
1773 		gethrestime(&vap->va_atime);
1774 		vap->va_mtime = vap->va_atime;
1775 		vap->va_ctime = vap->va_atime;
1776 
1777 		rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1778 		    (void *)physpath, cred, SDEV_READY);
1779 		kmem_free(physpath, MAXPATHLEN);
1780 		if (rv)
1781 			return (rv);
1782 	} else if (flags & SDEV_VATTR) {
1783 		/*
1784 		 * /dev/pts
1785 		 *
1786 		 * callback is responsible to set the basic attributes,
1787 		 * e.g. va_type/va_uid/va_gid/
1788 		 *    dev_t if VCHR or VBLK/
1789 		 */
1790 		ASSERT(callback);
1791 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1792 		if (rv) {
1793 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1794 			    "callback failed \n"));
1795 			return (-1);
1796 		}
1797 
1798 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1799 		    cred, SDEV_READY);
1800 
1801 		if (rv)
1802 			return (rv);
1803 
1804 	} else {
1805 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
1806 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1807 		    __LINE__));
1808 		rv = -1;
1809 	}
1810 
1811 	*dvp = dv;
1812 	return (rv);
1813 }
1814 
1815 static int
1816 is_devfsadm_thread(char *exec_name)
1817 {
1818 	/*
1819 	 * note: because devfsadmd -> /usr/sbin/devfsadm
1820 	 * it is safe to use "devfsadm" to capture the lookups
1821 	 * from devfsadm and its daemon version.
1822 	 */
1823 	if (strcmp(exec_name, "devfsadm") == 0)
1824 		return (1);
1825 	return (0);
1826 }
1827 
1828 /*
1829  * Lookup Order:
1830  *	sdev_node cache;
1831  *	backing store (SDEV_PERSIST);
1832  *	DBNR: a. dir_ops implemented in the loadable modules;
1833  *	      b. vnode ops in vtab.
1834  */
1835 int
1836 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1837     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1838     struct cred *, void *, char *), int flags)
1839 {
1840 	int rv = 0, nmlen;
1841 	struct vnode *rvp = NULL;
1842 	struct sdev_node *dv = NULL;
1843 	int	retried = 0;
1844 	int	error = 0;
1845 	struct vattr vattr;
1846 	char *lookup_thread = curproc->p_user.u_comm;
1847 	int failed_flags = 0;
1848 	int (*vtor)(struct sdev_node *) = NULL;
1849 	int state;
1850 	int parent_state;
1851 	char *link = NULL;
1852 
1853 	if (SDEVTOV(ddv)->v_type != VDIR)
1854 		return (ENOTDIR);
1855 
1856 	/*
1857 	 * Empty name or ., return node itself.
1858 	 */
1859 	nmlen = strlen(nm);
1860 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1861 		*vpp = SDEVTOV(ddv);
1862 		VN_HOLD(*vpp);
1863 		return (0);
1864 	}
1865 
1866 	/*
1867 	 * .., return the parent directory
1868 	 */
1869 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1870 		*vpp = SDEVTOV(ddv->sdev_dotdot);
1871 		VN_HOLD(*vpp);
1872 		return (0);
1873 	}
1874 
1875 	rw_enter(&ddv->sdev_contents, RW_READER);
1876 	if (ddv->sdev_flags & SDEV_VTOR) {
1877 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1878 		ASSERT(vtor);
1879 	}
1880 
1881 tryagain:
1882 	/*
1883 	 * (a) directory cache lookup:
1884 	 */
1885 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1886 	parent_state = ddv->sdev_state;
1887 	dv = sdev_cache_lookup(ddv, nm);
1888 	if (dv) {
1889 		state = dv->sdev_state;
1890 		switch (state) {
1891 		case SDEV_INIT:
1892 			if (is_devfsadm_thread(lookup_thread))
1893 				break;
1894 
1895 			/* ZOMBIED parent won't allow node creation */
1896 			if (parent_state == SDEV_ZOMBIE) {
1897 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
1898 				    retried);
1899 				goto nolock_notfound;
1900 			}
1901 
1902 			mutex_enter(&dv->sdev_lookup_lock);
1903 			/* compensate the threads started after devfsadm */
1904 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1905 			    !(SDEV_IS_LOOKUP(dv)))
1906 				SDEV_BLOCK_OTHERS(dv,
1907 				    (SDEV_LOOKUP | SDEV_LGWAITING));
1908 
1909 			if (SDEV_IS_LOOKUP(dv)) {
1910 				failed_flags |= SLF_REBUILT;
1911 				rw_exit(&ddv->sdev_contents);
1912 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1913 				mutex_exit(&dv->sdev_lookup_lock);
1914 				rw_enter(&ddv->sdev_contents, RW_READER);
1915 
1916 				if (error != 0) {
1917 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1918 					    retried);
1919 					goto nolock_notfound;
1920 				}
1921 
1922 				state = dv->sdev_state;
1923 				if (state == SDEV_INIT) {
1924 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1925 					    retried);
1926 					goto nolock_notfound;
1927 				} else if (state == SDEV_READY) {
1928 					goto found;
1929 				} else if (state == SDEV_ZOMBIE) {
1930 					rw_exit(&ddv->sdev_contents);
1931 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1932 					    retried);
1933 					SDEV_RELE(dv);
1934 					goto lookup_failed;
1935 				}
1936 			} else {
1937 				mutex_exit(&dv->sdev_lookup_lock);
1938 			}
1939 			break;
1940 		case SDEV_READY:
1941 			goto found;
1942 		case SDEV_ZOMBIE:
1943 			rw_exit(&ddv->sdev_contents);
1944 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1945 			SDEV_RELE(dv);
1946 			goto lookup_failed;
1947 		default:
1948 			rw_exit(&ddv->sdev_contents);
1949 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1950 			sdev_lookup_failed(ddv, nm, failed_flags);
1951 			*vpp = NULLVP;
1952 			return (ENOENT);
1953 		}
1954 	}
1955 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1956 
1957 	/*
1958 	 * ZOMBIED parent does not allow new node creation.
1959 	 * bail out early
1960 	 */
1961 	if (parent_state == SDEV_ZOMBIE) {
1962 		rw_exit(&ddv->sdev_contents);
1963 		*vpp = NULLVP;
1964 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1965 		return (ENOENT);
1966 	}
1967 
1968 	/*
1969 	 * (b0): backing store lookup
1970 	 *	SDEV_PERSIST is default except:
1971 	 *		1) pts nodes
1972 	 *		2) non-chmod'ed local nodes
1973 	 *		3) zvol nodes
1974 	 */
1975 	if (SDEV_IS_PERSIST(ddv)) {
1976 		error = devname_backstore_lookup(ddv, nm, &rvp);
1977 
1978 		if (!error) {
1979 
1980 			vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1981 			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
1982 			if (error) {
1983 				rw_exit(&ddv->sdev_contents);
1984 				if (dv)
1985 					SDEV_RELE(dv);
1986 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1987 				sdev_lookup_failed(ddv, nm, failed_flags);
1988 				*vpp = NULLVP;
1989 				return (ENOENT);
1990 			}
1991 
1992 			if (vattr.va_type == VLNK) {
1993 				error = sdev_getlink(rvp, &link);
1994 				if (error) {
1995 					rw_exit(&ddv->sdev_contents);
1996 					if (dv)
1997 						SDEV_RELE(dv);
1998 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1999 					    retried);
2000 					sdev_lookup_failed(ddv, nm,
2001 					    failed_flags);
2002 					*vpp = NULLVP;
2003 					return (ENOENT);
2004 				}
2005 				ASSERT(link != NULL);
2006 			}
2007 
2008 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
2009 				rw_exit(&ddv->sdev_contents);
2010 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2011 			}
2012 			error = sdev_mknode(ddv, nm, &dv, &vattr,
2013 			    rvp, link, cred, SDEV_READY);
2014 			rw_downgrade(&ddv->sdev_contents);
2015 
2016 			if (link != NULL) {
2017 				kmem_free(link, strlen(link) + 1);
2018 				link = NULL;
2019 			}
2020 
2021 			if (error) {
2022 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2023 				rw_exit(&ddv->sdev_contents);
2024 				if (dv)
2025 					SDEV_RELE(dv);
2026 				goto lookup_failed;
2027 			} else {
2028 				goto found;
2029 			}
2030 		} else if (retried) {
2031 			rw_exit(&ddv->sdev_contents);
2032 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2033 			    ddv->sdev_name, nm));
2034 			if (dv)
2035 				SDEV_RELE(dv);
2036 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2037 			sdev_lookup_failed(ddv, nm, failed_flags);
2038 			*vpp = NULLVP;
2039 			return (ENOENT);
2040 		}
2041 	}
2042 
2043 lookup_create_node:
2044 	/* first thread that is doing the lookup on this node */
2045 	if (callback) {
2046 		ASSERT(dv == NULL);
2047 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2048 			rw_exit(&ddv->sdev_contents);
2049 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2050 		}
2051 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2052 		    flags, cred);
2053 		rw_downgrade(&ddv->sdev_contents);
2054 		if (error == 0) {
2055 			goto found;
2056 		} else {
2057 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2058 			rw_exit(&ddv->sdev_contents);
2059 			goto lookup_failed;
2060 		}
2061 	}
2062 	if (!dv) {
2063 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2064 			rw_exit(&ddv->sdev_contents);
2065 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2066 		}
2067 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2068 		    cred, SDEV_INIT);
2069 		if (!dv) {
2070 			rw_exit(&ddv->sdev_contents);
2071 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2072 			sdev_lookup_failed(ddv, nm, failed_flags);
2073 			*vpp = NULLVP;
2074 			return (ENOENT);
2075 		}
2076 		rw_downgrade(&ddv->sdev_contents);
2077 	}
2078 
2079 	/*
2080 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2081 	 */
2082 	ASSERT(SDEV_HELD(dv));
2083 
2084 	if (SDEV_IS_NO_NCACHE(dv))
2085 		failed_flags |= SLF_NO_NCACHE;
2086 	if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2087 	    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2088 	    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2089 		ASSERT(SDEV_HELD(dv));
2090 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2091 		goto nolock_notfound;
2092 	}
2093 
2094 	/*
2095 	 * filter out known non-existent devices recorded
2096 	 * during initial reconfiguration boot for which
2097 	 * reconfig should not be done and lookup may
2098 	 * be short-circuited now.
2099 	 */
2100 	if (sdev_lookup_filter(ddv, nm)) {
2101 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2102 		goto nolock_notfound;
2103 	}
2104 
2105 	/* bypassing devfsadm internal nodes */
2106 	if (is_devfsadm_thread(lookup_thread)) {
2107 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2108 		goto nolock_notfound;
2109 	}
2110 
2111 	if (sdev_reconfig_disable) {
2112 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2113 		goto nolock_notfound;
2114 	}
2115 
2116 	error = sdev_call_devfsadmd(ddv, dv, nm);
2117 	if (error == 0) {
2118 		sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2119 		    ddv->sdev_name, nm, curproc->p_user.u_comm));
2120 		if (sdev_reconfig_verbose) {
2121 			cmn_err(CE_CONT,
2122 			    "?lookup of %s/%s by %s: reconfig\n",
2123 			    ddv->sdev_name, nm, curproc->p_user.u_comm);
2124 		}
2125 		retried = 1;
2126 		failed_flags |= SLF_REBUILT;
2127 		ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2128 		SDEV_SIMPLE_RELE(dv);
2129 		goto tryagain;
2130 	} else {
2131 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2132 		goto nolock_notfound;
2133 	}
2134 
2135 found:
2136 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2137 	ASSERT(dv->sdev_state == SDEV_READY);
2138 	if (vtor) {
2139 		/*
2140 		 * Check validity of returned node
2141 		 */
2142 		switch (vtor(dv)) {
2143 		case SDEV_VTOR_VALID:
2144 			break;
2145 		case SDEV_VTOR_STALE:
2146 			/*
2147 			 * The name exists, but the cache entry is
2148 			 * stale and needs to be re-created.
2149 			 */
2150 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2151 			if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2152 				rw_exit(&ddv->sdev_contents);
2153 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2154 			}
2155 			error = sdev_cache_update(ddv, &dv, nm,
2156 			    SDEV_CACHE_DELETE);
2157 			rw_downgrade(&ddv->sdev_contents);
2158 			if (error == 0) {
2159 				dv = NULL;
2160 				goto lookup_create_node;
2161 			}
2162 			/* FALLTHRU */
2163 		case SDEV_VTOR_INVALID:
2164 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2165 			sdcmn_err7(("lookup: destroy invalid "
2166 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2167 			goto nolock_notfound;
2168 		case SDEV_VTOR_SKIP:
2169 			sdcmn_err7(("lookup: node not applicable - "
2170 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2171 			rw_exit(&ddv->sdev_contents);
2172 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2173 			SDEV_RELE(dv);
2174 			goto lookup_failed;
2175 		default:
2176 			cmn_err(CE_PANIC,
2177 			    "dev fs: validator failed: %s(%p)\n",
2178 			    dv->sdev_name, (void *)dv);
2179 			break;
2180 		}
2181 	}
2182 
2183 	rw_exit(&ddv->sdev_contents);
2184 	rv = sdev_to_vp(dv, vpp);
2185 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2186 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2187 	    dv->sdev_state, nm, rv));
2188 	return (rv);
2189 
2190 nolock_notfound:
2191 	/*
2192 	 * Destroy the node that is created for synchronization purposes.
2193 	 */
2194 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2195 	    nm, dv->sdev_state));
2196 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2197 	if (dv->sdev_state == SDEV_INIT) {
2198 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2199 			rw_exit(&ddv->sdev_contents);
2200 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2201 		}
2202 
2203 		/*
2204 		 * Node state may have changed during the lock
2205 		 * changes. Re-check.
2206 		 */
2207 		if (dv->sdev_state == SDEV_INIT) {
2208 			(void) sdev_dirdelete(ddv, dv);
2209 			rw_exit(&ddv->sdev_contents);
2210 			sdev_lookup_failed(ddv, nm, failed_flags);
2211 			*vpp = NULL;
2212 			return (ENOENT);
2213 		}
2214 	}
2215 
2216 	rw_exit(&ddv->sdev_contents);
2217 	SDEV_RELE(dv);
2218 
2219 lookup_failed:
2220 	sdev_lookup_failed(ddv, nm, failed_flags);
2221 	*vpp = NULL;
2222 	return (ENOENT);
2223 }
2224 
2225 /*
2226  * Given a directory node, mark all nodes beneath as
2227  * STALE, i.e. nodes that don't exist as far as new
2228  * consumers are concerned.  Remove them from the
2229  * list of directory entries so that no lookup or
2230  * directory traversal will find them.  The node
2231  * not deallocated so existing holds are not affected.
2232  */
2233 void
2234 sdev_stale(struct sdev_node *ddv)
2235 {
2236 	struct sdev_node *dv;
2237 	struct vnode *vp;
2238 
2239 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2240 
2241 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2242 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) {
2243 		vp = SDEVTOV(dv);
2244 		if (vp->v_type == VDIR)
2245 			sdev_stale(dv);
2246 
2247 		sdcmn_err9(("sdev_stale: setting stale %s\n",
2248 		    dv->sdev_path));
2249 		dv->sdev_flags |= SDEV_STALE;
2250 		avl_remove(&ddv->sdev_entries, dv);
2251 	}
2252 	ddv->sdev_flags |= SDEV_BUILD;
2253 	rw_exit(&ddv->sdev_contents);
2254 }
2255 
2256 /*
2257  * Given a directory node, clean out all the nodes beneath.
2258  * If expr is specified, clean node with names matching expr.
2259  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2260  *	so they are excluded from future lookups.
2261  */
2262 int
2263 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2264 {
2265 	int error = 0;
2266 	int busy = 0;
2267 	struct vnode *vp;
2268 	struct sdev_node *dv, *next = NULL;
2269 	int bkstore = 0;
2270 	int len = 0;
2271 	char *bks_name = NULL;
2272 
2273 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2274 
2275 	/*
2276 	 * We try our best to destroy all unused sdev_node's
2277 	 */
2278 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2279 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
2280 		next = SDEV_NEXT_ENTRY(ddv, dv);
2281 		vp = SDEVTOV(dv);
2282 
2283 		if (expr && gmatch(dv->sdev_name, expr) == 0)
2284 			continue;
2285 
2286 		if (vp->v_type == VDIR &&
2287 		    sdev_cleandir(dv, NULL, flags) != 0) {
2288 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2289 			    dv->sdev_name));
2290 			busy++;
2291 			continue;
2292 		}
2293 
2294 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2295 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2296 			    dv->sdev_name));
2297 			busy++;
2298 			continue;
2299 		}
2300 
2301 		/*
2302 		 * at this point, either dv is not held or SDEV_ENFORCE
2303 		 * is specified. In either case, dv needs to be deleted
2304 		 */
2305 		SDEV_HOLD(dv);
2306 
2307 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2308 		if (bkstore && (vp->v_type == VDIR))
2309 			bkstore += 1;
2310 
2311 		if (bkstore) {
2312 			len = strlen(dv->sdev_name) + 1;
2313 			bks_name = kmem_alloc(len, KM_SLEEP);
2314 			bcopy(dv->sdev_name, bks_name, len);
2315 		}
2316 
2317 		error = sdev_dirdelete(ddv, dv);
2318 
2319 		if (error == EBUSY) {
2320 			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2321 			busy++;
2322 		}
2323 
2324 		/* take care the backing store clean up */
2325 		if (bkstore && (error == 0)) {
2326 			ASSERT(bks_name);
2327 			ASSERT(ddv->sdev_attrvp);
2328 
2329 			if (bkstore == 1) {
2330 				error = VOP_REMOVE(ddv->sdev_attrvp,
2331 				    bks_name, kcred, NULL, 0);
2332 			} else if (bkstore == 2) {
2333 				error = VOP_RMDIR(ddv->sdev_attrvp,
2334 				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2335 			}
2336 
2337 			/* do not propagate the backing store errors */
2338 			if (error) {
2339 				sdcmn_err9(("sdev_cleandir: backing store"
2340 				    "not cleaned\n"));
2341 				error = 0;
2342 			}
2343 
2344 			bkstore = 0;
2345 			kmem_free(bks_name, len);
2346 			bks_name = NULL;
2347 			len = 0;
2348 		}
2349 	}
2350 
2351 	ddv->sdev_flags |= SDEV_BUILD;
2352 	rw_exit(&ddv->sdev_contents);
2353 
2354 	if (busy) {
2355 		error = EBUSY;
2356 	}
2357 
2358 	return (error);
2359 }
2360 
2361 /*
2362  * a convenient wrapper for readdir() funcs
2363  */
2364 size_t
2365 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2366 {
2367 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2368 	if (reclen > size)
2369 		return (0);
2370 
2371 	de->d_ino = (ino64_t)ino;
2372 	de->d_off = (off64_t)off + 1;
2373 	de->d_reclen = (ushort_t)reclen;
2374 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2375 	return (reclen);
2376 }
2377 
2378 /*
2379  * sdev_mount service routines
2380  */
2381 int
2382 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2383 {
2384 	int	error;
2385 
2386 	if (uap->datalen != sizeof (*args))
2387 		return (EINVAL);
2388 
2389 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2390 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2391 		    "get user data. error %d\n", error);
2392 		return (EFAULT);
2393 	}
2394 
2395 	return (0);
2396 }
2397 
2398 #ifdef nextdp
2399 #undef nextdp
2400 #endif
2401 #define	nextdp(dp)	((struct dirent64 *) \
2402 			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2403 
2404 /*
2405  * readdir helper func
2406  */
2407 int
2408 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2409     int flags)
2410 {
2411 	struct sdev_node *ddv = VTOSDEV(vp);
2412 	struct sdev_node *dv;
2413 	dirent64_t	*dp;
2414 	ulong_t		outcount = 0;
2415 	size_t		namelen;
2416 	ulong_t		alloc_count;
2417 	void		*outbuf;
2418 	struct iovec	*iovp;
2419 	int		error = 0;
2420 	size_t		reclen;
2421 	offset_t	diroff;
2422 	offset_t	soff;
2423 	int		this_reclen;
2424 	int (*vtor)(struct sdev_node *) = NULL;
2425 	struct vattr attr;
2426 	timestruc_t now;
2427 
2428 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2429 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2430 
2431 	if (uiop->uio_loffset >= MAXOFF_T) {
2432 		if (eofp)
2433 			*eofp = 1;
2434 		return (0);
2435 	}
2436 
2437 	if (uiop->uio_iovcnt != 1)
2438 		return (EINVAL);
2439 
2440 	if (vp->v_type != VDIR)
2441 		return (ENOTDIR);
2442 
2443 	if (ddv->sdev_flags & SDEV_VTOR) {
2444 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2445 		ASSERT(vtor);
2446 	}
2447 
2448 	if (eofp != NULL)
2449 		*eofp = 0;
2450 
2451 	soff = uiop->uio_loffset;
2452 	iovp = uiop->uio_iov;
2453 	alloc_count = iovp->iov_len;
2454 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2455 	outcount = 0;
2456 
2457 	if (ddv->sdev_state == SDEV_ZOMBIE)
2458 		goto get_cache;
2459 
2460 	if (SDEV_IS_GLOBAL(ddv)) {
2461 
2462 		if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2463 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2464 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2465 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2466 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2467 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2468 		    !sdev_reconfig_disable) {
2469 			/*
2470 			 * invoking "devfsadm" to do system device reconfig
2471 			 */
2472 			mutex_enter(&ddv->sdev_lookup_lock);
2473 			SDEV_BLOCK_OTHERS(ddv,
2474 			    (SDEV_READDIR|SDEV_LGWAITING));
2475 			mutex_exit(&ddv->sdev_lookup_lock);
2476 
2477 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2478 			    ddv->sdev_path, curproc->p_user.u_comm));
2479 			if (sdev_reconfig_verbose) {
2480 				cmn_err(CE_CONT,
2481 				    "?readdir of %s by %s: reconfig\n",
2482 				    ddv->sdev_path, curproc->p_user.u_comm);
2483 			}
2484 
2485 			sdev_devfsadmd_thread(ddv, NULL, kcred);
2486 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2487 			/*
2488 			 * compensate the "ls" started later than "devfsadm"
2489 			 */
2490 			mutex_enter(&ddv->sdev_lookup_lock);
2491 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2492 			mutex_exit(&ddv->sdev_lookup_lock);
2493 		}
2494 
2495 		/*
2496 		 * release the contents lock so that
2497 		 * the cache may be updated by devfsadmd
2498 		 */
2499 		rw_exit(&ddv->sdev_contents);
2500 		mutex_enter(&ddv->sdev_lookup_lock);
2501 		if (SDEV_IS_READDIR(ddv))
2502 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2503 		mutex_exit(&ddv->sdev_lookup_lock);
2504 		rw_enter(&ddv->sdev_contents, RW_READER);
2505 
2506 		sdcmn_err4(("readdir of directory %s by %s\n",
2507 		    ddv->sdev_name, curproc->p_user.u_comm));
2508 		if (ddv->sdev_flags & SDEV_BUILD) {
2509 			if (SDEV_IS_PERSIST(ddv)) {
2510 				error = sdev_filldir_from_store(ddv,
2511 				    alloc_count, cred);
2512 			}
2513 			ddv->sdev_flags &= ~SDEV_BUILD;
2514 		}
2515 	}
2516 
2517 get_cache:
2518 	/* handle "." and ".." */
2519 	diroff = 0;
2520 	if (soff == 0) {
2521 		/* first time */
2522 		this_reclen = DIRENT64_RECLEN(1);
2523 		if (alloc_count < this_reclen) {
2524 			error = EINVAL;
2525 			goto done;
2526 		}
2527 
2528 		dp->d_ino = (ino64_t)ddv->sdev_ino;
2529 		dp->d_off = (off64_t)1;
2530 		dp->d_reclen = (ushort_t)this_reclen;
2531 
2532 		(void) strncpy(dp->d_name, ".",
2533 		    DIRENT64_NAMELEN(this_reclen));
2534 		outcount += dp->d_reclen;
2535 		dp = nextdp(dp);
2536 	}
2537 
2538 	diroff++;
2539 	if (soff <= 1) {
2540 		this_reclen = DIRENT64_RECLEN(2);
2541 		if (alloc_count < outcount + this_reclen) {
2542 			error = EINVAL;
2543 			goto done;
2544 		}
2545 
2546 		dp->d_reclen = (ushort_t)this_reclen;
2547 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2548 		dp->d_off = (off64_t)2;
2549 
2550 		(void) strncpy(dp->d_name, "..",
2551 		    DIRENT64_NAMELEN(this_reclen));
2552 		outcount += dp->d_reclen;
2553 
2554 		dp = nextdp(dp);
2555 	}
2556 
2557 
2558 	/* gets the cache */
2559 	diroff++;
2560 	for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2561 	    dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2562 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2563 		    diroff, soff, dv->sdev_name));
2564 
2565 		/* bypassing pre-matured nodes */
2566 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2567 			sdcmn_err3(("sdev_readdir: pre-mature node  "
2568 			    "%s %d\n", dv->sdev_name, dv->sdev_state));
2569 			continue;
2570 		}
2571 
2572 		/*
2573 		 * Check validity of node
2574 		 */
2575 		if (vtor) {
2576 			switch (vtor(dv)) {
2577 			case SDEV_VTOR_VALID:
2578 				break;
2579 			case SDEV_VTOR_INVALID:
2580 			case SDEV_VTOR_SKIP:
2581 				continue;
2582 			default:
2583 				cmn_err(CE_PANIC,
2584 				    "dev fs: validator failed: %s(%p)\n",
2585 				    dv->sdev_name, (void *)dv);
2586 				break;
2587 			/*NOTREACHED*/
2588 			}
2589 		}
2590 
2591 		namelen = strlen(dv->sdev_name);
2592 		reclen = DIRENT64_RECLEN(namelen);
2593 		if (outcount + reclen > alloc_count) {
2594 			goto full;
2595 		}
2596 		dp->d_reclen = (ushort_t)reclen;
2597 		dp->d_ino = (ino64_t)dv->sdev_ino;
2598 		dp->d_off = (off64_t)diroff + 1;
2599 		(void) strncpy(dp->d_name, dv->sdev_name,
2600 		    DIRENT64_NAMELEN(reclen));
2601 		outcount += reclen;
2602 		dp = nextdp(dp);
2603 	}
2604 
2605 full:
2606 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2607 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2608 	    (void *)dv));
2609 
2610 	if (outcount)
2611 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2612 
2613 	if (!error) {
2614 		uiop->uio_loffset = diroff;
2615 		if (eofp)
2616 			*eofp = dv ? 0 : 1;
2617 	}
2618 
2619 
2620 	if (ddv->sdev_attrvp) {
2621 		gethrestime(&now);
2622 		attr.va_ctime = now;
2623 		attr.va_atime = now;
2624 		attr.va_mask = AT_CTIME|AT_ATIME;
2625 
2626 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2627 	}
2628 done:
2629 	kmem_free(outbuf, alloc_count);
2630 	return (error);
2631 }
2632 
2633 static int
2634 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2635 {
2636 	vnode_t *vp;
2637 	vnode_t *cvp;
2638 	struct sdev_node *svp;
2639 	char *nm;
2640 	struct pathname pn;
2641 	int error;
2642 	int persisted = 0;
2643 
2644 	ASSERT(INGLOBALZONE(curproc));
2645 
2646 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2647 		return (error);
2648 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2649 
2650 	vp = rootdir;
2651 	VN_HOLD(vp);
2652 
2653 	while (pn_pathleft(&pn)) {
2654 		ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2655 		(void) pn_getcomponent(&pn, nm);
2656 
2657 		/*
2658 		 * Deal with the .. special case where we may be
2659 		 * traversing up across a mount point, to the
2660 		 * root of this filesystem or global root.
2661 		 */
2662 		if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2663 checkforroot:
2664 			if (VN_CMP(vp, rootdir)) {
2665 				nm[1] = 0;
2666 			} else if (vp->v_flag & VROOT) {
2667 				vfs_t *vfsp;
2668 				cvp = vp;
2669 				vfsp = cvp->v_vfsp;
2670 				vfs_rlock_wait(vfsp);
2671 				vp = cvp->v_vfsp->vfs_vnodecovered;
2672 				if (vp == NULL ||
2673 				    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2674 					vfs_unlock(vfsp);
2675 					VN_RELE(cvp);
2676 					error = EIO;
2677 					break;
2678 				}
2679 				VN_HOLD(vp);
2680 				vfs_unlock(vfsp);
2681 				VN_RELE(cvp);
2682 				cvp = NULL;
2683 				goto checkforroot;
2684 			}
2685 		}
2686 
2687 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2688 		    NULL, NULL);
2689 		if (error) {
2690 			VN_RELE(vp);
2691 			break;
2692 		}
2693 
2694 		/* traverse mount points encountered on our journey */
2695 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2696 			VN_RELE(vp);
2697 			VN_RELE(cvp);
2698 			break;
2699 		}
2700 
2701 		/*
2702 		 * symbolic link, can be either relative and absolute
2703 		 */
2704 		if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2705 			struct pathname linkpath;
2706 			pn_alloc(&linkpath);
2707 			if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2708 				pn_free(&linkpath);
2709 				break;
2710 			}
2711 			if (pn_pathleft(&linkpath) == 0)
2712 				(void) pn_set(&linkpath, ".");
2713 			error = pn_insert(&pn, &linkpath, strlen(nm));
2714 			pn_free(&linkpath);
2715 			if (pn.pn_pathlen == 0) {
2716 				VN_RELE(vp);
2717 				return (ENOENT);
2718 			}
2719 			if (pn.pn_path[0] == '/') {
2720 				pn_skipslash(&pn);
2721 				VN_RELE(vp);
2722 				VN_RELE(cvp);
2723 				vp = rootdir;
2724 				VN_HOLD(vp);
2725 			} else {
2726 				VN_RELE(cvp);
2727 			}
2728 			continue;
2729 		}
2730 
2731 		VN_RELE(vp);
2732 
2733 		/*
2734 		 * Direct the operation to the persisting filesystem
2735 		 * underlying /dev.  Bail if we encounter a
2736 		 * non-persistent dev entity here.
2737 		 */
2738 		if (cvp->v_vfsp->vfs_fstype == devtype) {
2739 
2740 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2741 				error = ENOENT;
2742 				VN_RELE(cvp);
2743 				break;
2744 			}
2745 
2746 			if (VTOSDEV(cvp) == NULL) {
2747 				error = ENOENT;
2748 				VN_RELE(cvp);
2749 				break;
2750 			}
2751 			svp = VTOSDEV(cvp);
2752 			if ((vp = svp->sdev_attrvp) == NULL) {
2753 				error = ENOENT;
2754 				VN_RELE(cvp);
2755 				break;
2756 			}
2757 			persisted = 1;
2758 			VN_HOLD(vp);
2759 			VN_RELE(cvp);
2760 			cvp = vp;
2761 		}
2762 
2763 		vp = cvp;
2764 		pn_skipslash(&pn);
2765 	}
2766 
2767 	kmem_free(nm, MAXNAMELEN);
2768 	pn_free(&pn);
2769 
2770 	if (error)
2771 		return (error);
2772 
2773 	/*
2774 	 * Only return persisted nodes in the filesystem underlying /dev.
2775 	 */
2776 	if (!persisted) {
2777 		VN_RELE(vp);
2778 		return (ENOENT);
2779 	}
2780 
2781 	*r_vp = vp;
2782 	return (0);
2783 }
2784 
2785 int
2786 sdev_modctl_readdir(const char *dir, char ***dirlistp,
2787 	int *npathsp, int *npathsp_alloc, int checking_empty)
2788 {
2789 	char	**pathlist = NULL;
2790 	char	**newlist = NULL;
2791 	int	npaths = 0;
2792 	int	npaths_alloc = 0;
2793 	dirent64_t *dbuf = NULL;
2794 	int	n;
2795 	char	*s;
2796 	int error;
2797 	vnode_t *vp;
2798 	int eof;
2799 	struct iovec iov;
2800 	struct uio uio;
2801 	struct dirent64 *dp;
2802 	size_t dlen;
2803 	size_t dbuflen;
2804 	int ndirents = 64;
2805 	char *nm;
2806 
2807 	error = sdev_modctl_lookup(dir, &vp);
2808 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2809 	    dir, curproc->p_user.u_comm,
2810 	    (error == 0) ? "ok" : "failed"));
2811 	if (error)
2812 		return (error);
2813 
2814 	dlen = ndirents * (sizeof (*dbuf));
2815 	dbuf = kmem_alloc(dlen, KM_SLEEP);
2816 
2817 	uio.uio_iov = &iov;
2818 	uio.uio_iovcnt = 1;
2819 	uio.uio_segflg = UIO_SYSSPACE;
2820 	uio.uio_fmode = 0;
2821 	uio.uio_extflg = UIO_COPY_CACHED;
2822 	uio.uio_loffset = 0;
2823 	uio.uio_llimit = MAXOFFSET_T;
2824 
2825 	eof = 0;
2826 	error = 0;
2827 	while (!error && !eof) {
2828 		uio.uio_resid = dlen;
2829 		iov.iov_base = (char *)dbuf;
2830 		iov.iov_len = dlen;
2831 
2832 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2833 		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2834 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2835 
2836 		dbuflen = dlen - uio.uio_resid;
2837 
2838 		if (error || dbuflen == 0)
2839 			break;
2840 
2841 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2842 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2843 
2844 			nm = dp->d_name;
2845 
2846 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2847 				continue;
2848 			if (npaths == npaths_alloc) {
2849 				npaths_alloc += 64;
2850 				newlist = (char **)
2851 				    kmem_zalloc((npaths_alloc + 1) *
2852 				    sizeof (char *), KM_SLEEP);
2853 				if (pathlist) {
2854 					bcopy(pathlist, newlist,
2855 					    npaths * sizeof (char *));
2856 					kmem_free(pathlist,
2857 					    (npaths + 1) * sizeof (char *));
2858 				}
2859 				pathlist = newlist;
2860 			}
2861 			n = strlen(nm) + 1;
2862 			s = kmem_alloc(n, KM_SLEEP);
2863 			bcopy(nm, s, n);
2864 			pathlist[npaths++] = s;
2865 			sdcmn_err11(("  %s/%s\n", dir, s));
2866 
2867 			/* if checking empty, one entry is as good as many */
2868 			if (checking_empty) {
2869 				eof = 1;
2870 				break;
2871 			}
2872 		}
2873 	}
2874 
2875 exit:
2876 	VN_RELE(vp);
2877 
2878 	if (dbuf)
2879 		kmem_free(dbuf, dlen);
2880 
2881 	if (error)
2882 		return (error);
2883 
2884 	*dirlistp = pathlist;
2885 	*npathsp = npaths;
2886 	*npathsp_alloc = npaths_alloc;
2887 
2888 	return (0);
2889 }
2890 
2891 void
2892 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2893 {
2894 	int	i, n;
2895 
2896 	for (i = 0; i < npaths; i++) {
2897 		n = strlen(pathlist[i]) + 1;
2898 		kmem_free(pathlist[i], n);
2899 	}
2900 
2901 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2902 }
2903 
2904 int
2905 sdev_modctl_devexists(const char *path)
2906 {
2907 	vnode_t *vp;
2908 	int error;
2909 
2910 	error = sdev_modctl_lookup(path, &vp);
2911 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2912 	    path, curproc->p_user.u_comm,
2913 	    (error == 0) ? "ok" : "failed"));
2914 	if (error == 0)
2915 		VN_RELE(vp);
2916 
2917 	return (error);
2918 }
2919 
2920 extern int sdev_vnodeops_tbl_size;
2921 
2922 /*
2923  * construct a new template with overrides from vtab
2924  */
2925 static fs_operation_def_t *
2926 sdev_merge_vtab(const fs_operation_def_t tab[])
2927 {
2928 	fs_operation_def_t *new;
2929 	const fs_operation_def_t *tab_entry;
2930 
2931 	/* make a copy of standard vnode ops table */
2932 	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
2933 	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
2934 
2935 	/* replace the overrides from tab */
2936 	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
2937 		fs_operation_def_t *std_entry = new;
2938 		while (std_entry->name) {
2939 			if (strcmp(tab_entry->name, std_entry->name) == 0) {
2940 				std_entry->func = tab_entry->func;
2941 				break;
2942 			}
2943 			std_entry++;
2944 		}
2945 		if (std_entry->name == NULL)
2946 			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
2947 			    tab_entry->name);
2948 	}
2949 
2950 	return (new);
2951 }
2952 
2953 /* free memory allocated by sdev_merge_vtab */
2954 static void
2955 sdev_free_vtab(fs_operation_def_t *new)
2956 {
2957 	kmem_free(new, sdev_vnodeops_tbl_size);
2958 }
2959 
2960 /*
2961  * a generic setattr() function
2962  *
2963  * note: flags only supports AT_UID and AT_GID.
2964  *	 Future enhancements can be done for other types, e.g. AT_MODE
2965  */
2966 int
2967 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
2968     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
2969     int), int protocol)
2970 {
2971 	struct sdev_node	*dv = VTOSDEV(vp);
2972 	struct sdev_node	*parent = dv->sdev_dotdot;
2973 	struct vattr		*get;
2974 	uint_t			mask = vap->va_mask;
2975 	int 			error;
2976 
2977 	/* some sanity checks */
2978 	if (vap->va_mask & AT_NOSET)
2979 		return (EINVAL);
2980 
2981 	if (vap->va_mask & AT_SIZE) {
2982 		if (vp->v_type == VDIR) {
2983 			return (EISDIR);
2984 		}
2985 	}
2986 
2987 	/* no need to set attribute, but do not fail either */
2988 	ASSERT(parent);
2989 	rw_enter(&parent->sdev_contents, RW_READER);
2990 	if (dv->sdev_state == SDEV_ZOMBIE) {
2991 		rw_exit(&parent->sdev_contents);
2992 		return (0);
2993 	}
2994 
2995 	/* If backing store exists, just set it. */
2996 	if (dv->sdev_attrvp) {
2997 		rw_exit(&parent->sdev_contents);
2998 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
2999 	}
3000 
3001 	/*
3002 	 * Otherwise, for nodes with the persistence attribute, create it.
3003 	 */
3004 	ASSERT(dv->sdev_attr);
3005 	if (SDEV_IS_PERSIST(dv) ||
3006 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3007 		sdev_vattr_merge(dv, vap);
3008 		rw_enter(&dv->sdev_contents, RW_WRITER);
3009 		error = sdev_shadow_node(dv, cred);
3010 		rw_exit(&dv->sdev_contents);
3011 		rw_exit(&parent->sdev_contents);
3012 
3013 		if (error)
3014 			return (error);
3015 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3016 	}
3017 
3018 
3019 	/*
3020 	 * sdev_attr was allocated in sdev_mknode
3021 	 */
3022 	rw_enter(&dv->sdev_contents, RW_WRITER);
3023 	error = secpolicy_vnode_setattr(cred, vp, vap,
3024 	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
3025 	if (error) {
3026 		rw_exit(&dv->sdev_contents);
3027 		rw_exit(&parent->sdev_contents);
3028 		return (error);
3029 	}
3030 
3031 	get = dv->sdev_attr;
3032 	if (mask & AT_MODE) {
3033 		get->va_mode &= S_IFMT;
3034 		get->va_mode |= vap->va_mode & ~S_IFMT;
3035 	}
3036 
3037 	if ((mask & AT_UID) || (mask & AT_GID)) {
3038 		if (mask & AT_UID)
3039 			get->va_uid = vap->va_uid;
3040 		if (mask & AT_GID)
3041 			get->va_gid = vap->va_gid;
3042 		/*
3043 		 * a callback must be provided if the protocol is set
3044 		 */
3045 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3046 			ASSERT(callback);
3047 			error = callback(dv, get, protocol);
3048 			if (error) {
3049 				rw_exit(&dv->sdev_contents);
3050 				rw_exit(&parent->sdev_contents);
3051 				return (error);
3052 			}
3053 		}
3054 	}
3055 
3056 	if (mask & AT_ATIME)
3057 		get->va_atime = vap->va_atime;
3058 	if (mask & AT_MTIME)
3059 		get->va_mtime = vap->va_mtime;
3060 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3061 		gethrestime(&get->va_ctime);
3062 	}
3063 
3064 	sdev_vattr_merge(dv, get);
3065 	rw_exit(&dv->sdev_contents);
3066 	rw_exit(&parent->sdev_contents);
3067 	return (0);
3068 }
3069 
3070 /*
3071  * a generic inactive() function
3072  */
3073 /*ARGSUSED*/
3074 void
3075 devname_inactive_func(struct vnode *vp, struct cred *cred,
3076     void (*callback)(struct vnode *))
3077 {
3078 	int clean;
3079 	struct sdev_node *dv = VTOSDEV(vp);
3080 	struct sdev_node *ddv = dv->sdev_dotdot;
3081 	int state;
3082 
3083 	rw_enter(&ddv->sdev_contents, RW_WRITER);
3084 	state = dv->sdev_state;
3085 
3086 	mutex_enter(&vp->v_lock);
3087 	ASSERT(vp->v_count >= 1);
3088 
3089 	if (vp->v_count == 1 && callback != NULL)
3090 		callback(vp);
3091 
3092 	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3093 
3094 	/*
3095 	 * last ref count on the ZOMBIE node is released.
3096 	 * clean up the sdev_node, and
3097 	 * release the hold on the backing store node so that
3098 	 * the ZOMBIE backing stores also cleaned out.
3099 	 */
3100 	if (clean) {
3101 		ASSERT(ddv);
3102 
3103 		ddv->sdev_nlink--;
3104 		if (vp->v_type == VDIR) {
3105 			dv->sdev_nlink--;
3106 		}
3107 		if ((dv->sdev_flags & SDEV_STALE) == 0)
3108 			avl_remove(&ddv->sdev_entries, dv);
3109 		dv->sdev_nlink--;
3110 		--vp->v_count;
3111 		mutex_exit(&vp->v_lock);
3112 		sdev_nodedestroy(dv, 0);
3113 	} else {
3114 		--vp->v_count;
3115 		mutex_exit(&vp->v_lock);
3116 	}
3117 	rw_exit(&ddv->sdev_contents);
3118 }
3119