xref: /titanic_50/usr/src/uts/common/fs/dev/sdev_subr.c (revision 6c7d2bac378b037c98f53a2f7a51c5f3c7741dc3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * utility routines for the /dev fs
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/t_lock.h>
35 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
37 #include <sys/user.h>
38 #include <sys/time.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/file.h>
42 #include <sys/fcntl.h>
43 #include <sys/flock.h>
44 #include <sys/kmem.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/stat.h>
48 #include <sys/cred.h>
49 #include <sys/dirent.h>
50 #include <sys/pathname.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/mode.h>
54 #include <sys/policy.h>
55 #include <fs/fs_subr.h>
56 #include <sys/mount.h>
57 #include <sys/fs/snode.h>
58 #include <sys/fs/dv_node.h>
59 #include <sys/fs/sdev_impl.h>
60 #include <sys/fs/sdev_node.h>
61 #include <sys/sunndi.h>
62 #include <sys/sunmdi.h>
63 #include <sys/conf.h>
64 #include <sys/proc.h>
65 #include <sys/user.h>
66 #include <sys/modctl.h>
67 
68 #ifdef DEBUG
69 int sdev_debug = 0x00000001;
70 int sdev_debug_cache_flags = 0;
71 #endif
72 
73 /*
74  * globals
75  */
76 /* prototype memory vattrs */
77 vattr_t sdev_vattr_dir = {
78 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
79 	VDIR,					/* va_type */
80 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
81 	SDEV_UID_DEFAULT,			/* va_uid */
82 	SDEV_GID_DEFAULT,			/* va_gid */
83 	0,					/* va_fsid */
84 	0,					/* va_nodeid */
85 	0,					/* va_nlink */
86 	0,					/* va_size */
87 	0,					/* va_atime */
88 	0,					/* va_mtime */
89 	0,					/* va_ctime */
90 	0,					/* va_rdev */
91 	0,					/* va_blksize */
92 	0,					/* va_nblocks */
93 	0					/* va_vcode */
94 };
95 
96 vattr_t sdev_vattr_lnk = {
97 	AT_TYPE|AT_MODE,			/* va_mask */
98 	VLNK,					/* va_type */
99 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
100 	SDEV_UID_DEFAULT,			/* va_uid */
101 	SDEV_GID_DEFAULT,			/* va_gid */
102 	0,					/* va_fsid */
103 	0,					/* va_nodeid */
104 	0,					/* va_nlink */
105 	0,					/* va_size */
106 	0,					/* va_atime */
107 	0,					/* va_mtime */
108 	0,					/* va_ctime */
109 	0,					/* va_rdev */
110 	0,					/* va_blksize */
111 	0,					/* va_nblocks */
112 	0					/* va_vcode */
113 };
114 
115 vattr_t sdev_vattr_blk = {
116 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
117 	VBLK,					/* va_type */
118 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
119 	SDEV_UID_DEFAULT,			/* va_uid */
120 	SDEV_GID_DEFAULT,			/* va_gid */
121 	0,					/* va_fsid */
122 	0,					/* va_nodeid */
123 	0,					/* va_nlink */
124 	0,					/* va_size */
125 	0,					/* va_atime */
126 	0,					/* va_mtime */
127 	0,					/* va_ctime */
128 	0,					/* va_rdev */
129 	0,					/* va_blksize */
130 	0,					/* va_nblocks */
131 	0					/* va_vcode */
132 };
133 
134 vattr_t sdev_vattr_chr = {
135 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
136 	VCHR,					/* va_type */
137 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
138 	SDEV_UID_DEFAULT,			/* va_uid */
139 	SDEV_GID_DEFAULT,			/* va_gid */
140 	0,					/* va_fsid */
141 	0,					/* va_nodeid */
142 	0,					/* va_nlink */
143 	0,					/* va_size */
144 	0,					/* va_atime */
145 	0,					/* va_mtime */
146 	0,					/* va_ctime */
147 	0,					/* va_rdev */
148 	0,					/* va_blksize */
149 	0,					/* va_nblocks */
150 	0					/* va_vcode */
151 };
152 
153 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
154 int		devtype;		/* fstype */
155 
156 struct devname_ops *devname_ns_ops;	/* default name service directory ops */
157 kmutex_t devname_nsmaps_lock;	/* protect devname_nsmaps */
158 
159 /* static */
160 static struct devname_nsmap *devname_nsmaps = NULL;
161 				/* contents from /etc/dev/devname_master */
162 static int devname_nsmaps_invalidated = 0; /* "devfsadm -m" has run */
163 
164 static struct vnodeops *sdev_get_vop(struct sdev_node *);
165 static void sdev_set_no_nocache(struct sdev_node *);
166 static int sdev_get_moduleops(struct sdev_node *);
167 static void sdev_handle_alloc(struct sdev_node *);
168 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
169 static void sdev_free_vtab(fs_operation_def_t *);
170 
171 static void
172 sdev_prof_free(struct sdev_node *dv)
173 {
174 	ASSERT(!SDEV_IS_GLOBAL(dv));
175 	if (dv->sdev_prof.dev_name)
176 		nvlist_free(dv->sdev_prof.dev_name);
177 	if (dv->sdev_prof.dev_map)
178 		nvlist_free(dv->sdev_prof.dev_map);
179 	if (dv->sdev_prof.dev_symlink)
180 		nvlist_free(dv->sdev_prof.dev_symlink);
181 	if (dv->sdev_prof.dev_glob_incdir)
182 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
183 	if (dv->sdev_prof.dev_glob_excdir)
184 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
185 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
186 }
187 
188 /*
189  * sdev_node cache constructor
190  */
191 /*ARGSUSED1*/
192 static int
193 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
194 {
195 	struct sdev_node *dv = (struct sdev_node *)buf;
196 	struct vnode *vp;
197 
198 	ASSERT(flag == KM_SLEEP);
199 
200 	bzero(buf, sizeof (struct sdev_node));
201 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
202 	dv->sdev_vnode = vn_alloc(KM_SLEEP);
203 	vp = SDEVTOV(dv);
204 	vp->v_data = (caddr_t)dv;
205 	return (0);
206 }
207 
208 /* sdev_node destructor for kmem cache */
209 /*ARGSUSED1*/
210 static void
211 i_sdev_node_dtor(void *buf, void *arg)
212 {
213 	struct sdev_node *dv = (struct sdev_node *)buf;
214 	struct vnode *vp = SDEVTOV(dv);
215 
216 	rw_destroy(&dv->sdev_contents);
217 	vn_free(vp);
218 }
219 
220 /* initialize sdev_node cache */
221 void
222 sdev_node_cache_init()
223 {
224 	int flags = 0;
225 
226 #ifdef	DEBUG
227 	flags = sdev_debug_cache_flags;
228 	if (flags)
229 		sdcmn_err(("cache debug flags 0x%x\n", flags));
230 #endif	/* DEBUG */
231 
232 	ASSERT(sdev_node_cache == NULL);
233 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
234 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
235 	    NULL, NULL, NULL, flags);
236 }
237 
238 /* destroy sdev_node cache */
239 void
240 sdev_node_cache_fini()
241 {
242 	ASSERT(sdev_node_cache != NULL);
243 	kmem_cache_destroy(sdev_node_cache);
244 	sdev_node_cache = NULL;
245 }
246 
247 void
248 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
249 {
250 	ASSERT(dv);
251 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
252 	dv->sdev_state = state;
253 }
254 
255 static void
256 sdev_attrinit(struct sdev_node *dv, vattr_t *vap)
257 {
258 	timestruc_t now;
259 
260 	ASSERT(vap);
261 
262 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
263 	*dv->sdev_attr = *vap;
264 
265 	dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode);
266 
267 	gethrestime(&now);
268 	dv->sdev_attr->va_atime = now;
269 	dv->sdev_attr->va_mtime = now;
270 	dv->sdev_attr->va_ctime = now;
271 }
272 
273 /* alloc and initialize a sdev_node */
274 int
275 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
276     vattr_t *vap)
277 {
278 	struct sdev_node *dv = NULL;
279 	struct vnode *vp;
280 	size_t nmlen, len;
281 	devname_handle_t  *dhl;
282 
283 	nmlen = strlen(nm) + 1;
284 	if (nmlen > MAXNAMELEN) {
285 		sdcmn_err9(("sdev_nodeinit: node name %s"
286 		    " too long\n", nm));
287 		*newdv = NULL;
288 		return (ENAMETOOLONG);
289 	}
290 
291 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
292 
293 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
294 	bcopy(nm, dv->sdev_name, nmlen);
295 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
296 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
297 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
298 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
299 	/* overwritten for VLNK nodes */
300 	dv->sdev_symlink = NULL;
301 
302 	vp = SDEVTOV(dv);
303 	vn_reinit(vp);
304 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
305 	if (vap)
306 		vp->v_type = vap->va_type;
307 
308 	/*
309 	 * initialized to the parent's vnodeops.
310 	 * maybe overwriten for a VDIR
311 	 */
312 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
313 	vn_exists(vp);
314 
315 	dv->sdev_dotdot = NULL;
316 	dv->sdev_dot = NULL;
317 	dv->sdev_next = NULL;
318 	dv->sdev_attrvp = NULL;
319 	if (vap) {
320 		sdev_attrinit(dv, vap);
321 	} else {
322 		dv->sdev_attr = NULL;
323 	}
324 
325 	dv->sdev_ino = sdev_mkino(dv);
326 	dv->sdev_nlink = 0;		/* updated on insert */
327 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
328 	dv->sdev_flags |= SDEV_BUILD;
329 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
330 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
331 	if (SDEV_IS_GLOBAL(ddv)) {
332 		dv->sdev_flags |= SDEV_GLOBAL;
333 		dv->sdev_mapinfo = NULL;
334 		dhl = &(dv->sdev_handle);
335 		dhl->dh_data = dv;
336 		dhl->dh_spec = DEVNAME_NS_NONE;
337 		dhl->dh_args = NULL;
338 		sdev_set_no_nocache(dv);
339 		dv->sdev_gdir_gen = 0;
340 	} else {
341 		dv->sdev_flags &= ~SDEV_GLOBAL;
342 		dv->sdev_origin = NULL; /* set later */
343 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
344 		dv->sdev_ldir_gen = 0;
345 		dv->sdev_devtree_gen = 0;
346 	}
347 
348 	rw_enter(&dv->sdev_contents, RW_WRITER);
349 	sdev_set_nodestate(dv, SDEV_INIT);
350 	rw_exit(&dv->sdev_contents);
351 	*newdv = dv;
352 
353 	return (0);
354 }
355 
356 /*
357  * transition a sdev_node into SDEV_READY state
358  */
359 int
360 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
361     void *args, struct cred *cred)
362 {
363 	int error = 0;
364 	struct vnode *vp = SDEVTOV(dv);
365 	vtype_t type;
366 
367 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
368 
369 	type = vap->va_type;
370 	vp->v_type = type;
371 	vp->v_rdev = vap->va_rdev;
372 	rw_enter(&dv->sdev_contents, RW_WRITER);
373 	if (type == VDIR) {
374 		dv->sdev_nlink = 2;
375 		dv->sdev_flags &= ~SDEV_PERSIST;
376 		dv->sdev_flags &= ~SDEV_DYNAMIC;
377 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
378 		error = sdev_get_moduleops(dv); /* from plug-in module */
379 		ASSERT(dv->sdev_dotdot);
380 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
381 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
382 	} else if (type == VLNK) {
383 		ASSERT(args);
384 		dv->sdev_nlink = 1;
385 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
386 	} else {
387 		dv->sdev_nlink = 1;
388 	}
389 
390 	if (!(SDEV_IS_GLOBAL(dv))) {
391 		dv->sdev_origin = (struct sdev_node *)args;
392 		dv->sdev_flags &= ~SDEV_PERSIST;
393 	}
394 
395 	/*
396 	 * shadow node is created here OR
397 	 * if failed (indicated by dv->sdev_attrvp == NULL),
398 	 * created later in sdev_setattr
399 	 */
400 	if (avp) {
401 		dv->sdev_attrvp = avp;
402 	} else {
403 		if (dv->sdev_attr == NULL)
404 			sdev_attrinit(dv, vap);
405 		else
406 			*dv->sdev_attr = *vap;
407 
408 		if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) ||
409 		    ((SDEVTOV(dv)->v_type == VDIR) &&
410 		    (dv->sdev_attrvp == NULL)))
411 			error = sdev_shadow_node(dv, cred);
412 	}
413 
414 	/* transition to READY state */
415 	sdev_set_nodestate(dv, SDEV_READY);
416 	sdev_nc_node_exists(dv);
417 	rw_exit(&dv->sdev_contents);
418 	return (error);
419 }
420 
421 /*
422  * setting ZOMBIE state
423  */
424 static int
425 sdev_nodezombied(struct sdev_node *dv)
426 {
427 	rw_enter(&dv->sdev_contents, RW_WRITER);
428 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
429 	rw_exit(&dv->sdev_contents);
430 	return (0);
431 }
432 
433 /*
434  * Build the VROOT sdev_node.
435  */
436 /*ARGSUSED*/
437 struct sdev_node *
438 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
439     struct vnode *avp, struct cred *cred)
440 {
441 	struct sdev_node *dv;
442 	struct vnode *vp;
443 	char devdir[] = "/dev";
444 
445 	ASSERT(sdev_node_cache != NULL);
446 	ASSERT(avp);
447 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
448 	vp = SDEVTOV(dv);
449 	vn_reinit(vp);
450 	vp->v_flag |= VROOT;
451 	vp->v_vfsp = vfsp;
452 	vp->v_type = VDIR;
453 	vp->v_rdev = devdev;
454 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
455 	vn_exists(vp);
456 
457 	if (vfsp->vfs_mntpt)
458 		dv->sdev_name = i_ddi_strdup(
459 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
460 	else
461 		/* vfs_mountdev1 set mount point later */
462 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
463 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
464 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
465 	dv->sdev_ino = SDEV_ROOTINO;
466 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
467 	dv->sdev_dotdot = dv;		/* .. == self */
468 	dv->sdev_attrvp = avp;
469 	dv->sdev_attr = NULL;
470 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
471 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
472 	if (strcmp(dv->sdev_name, "/dev") == 0) {
473 		mutex_init(&devname_nsmaps_lock, NULL, MUTEX_DEFAULT, NULL);
474 		dv->sdev_mapinfo = NULL;
475 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
476 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
477 		dv->sdev_gdir_gen = 0;
478 	} else {
479 		dv->sdev_flags = SDEV_BUILD;
480 		dv->sdev_flags &= ~SDEV_PERSIST;
481 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
482 		dv->sdev_ldir_gen = 0;
483 		dv->sdev_devtree_gen = 0;
484 	}
485 
486 	rw_enter(&dv->sdev_contents, RW_WRITER);
487 	sdev_set_nodestate(dv, SDEV_READY);
488 	rw_exit(&dv->sdev_contents);
489 	sdev_nc_node_exists(dv);
490 	return (dv);
491 }
492 
493 /*
494  *  1. load the module
495  *  2. modload invokes sdev_module_register, which in turn sets
496  *     the dv->sdev_mapinfo->dir_ops
497  *
498  * note: locking order:
499  *	dv->sdev_contents -> map->dir_lock
500  */
501 static int
502 sdev_get_moduleops(struct sdev_node *dv)
503 {
504 	int error = 0;
505 	struct devname_nsmap *map = NULL;
506 	char *module;
507 	char *path;
508 	int load = 1;
509 
510 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
511 
512 	if (devname_nsmaps == NULL)
513 		return (0);
514 
515 	if (!sdev_nsmaps_loaded() && !sdev_nsmaps_reloaded())
516 		return (0);
517 
518 
519 	path = dv->sdev_path;
520 	if ((map = sdev_get_nsmap_by_dir(path, 0))) {
521 		rw_enter(&map->dir_lock, RW_READER);
522 		if (map->dir_invalid) {
523 			if (map->dir_module && map->dir_newmodule &&
524 			    (strcmp(map->dir_module,
525 					map->dir_newmodule) == 0)) {
526 				load = 0;
527 			}
528 			sdev_replace_nsmap(map, map->dir_newmodule,
529 			    map->dir_newmap);
530 		}
531 
532 		module = map->dir_module;
533 		if (module && load) {
534 			sdcmn_err6(("sdev_get_moduleops: "
535 			    "load module %s", module));
536 			rw_exit(&map->dir_lock);
537 			error = modload("devname", module);
538 			sdcmn_err6(("sdev_get_moduleops: error %d\n", error));
539 			if (error < 0) {
540 				return (-1);
541 			}
542 		} else if (module == NULL) {
543 			/*
544 			 * loading the module ops for name services
545 			 */
546 			if (devname_ns_ops == NULL) {
547 				sdcmn_err6((
548 				    "sdev_get_moduleops: modload default\n"));
549 				error = modload("devname", DEVNAME_NSCONFIG);
550 				sdcmn_err6((
551 				    "sdev_get_moduleops: error %d\n", error));
552 				if (error < 0) {
553 					return (-1);
554 				}
555 			}
556 
557 			if (!rw_tryupgrade(&map->dir_lock)) {
558 				rw_exit(&map->dir_lock);
559 				rw_enter(&map->dir_lock, RW_WRITER);
560 			}
561 			ASSERT(devname_ns_ops);
562 			map->dir_ops = devname_ns_ops;
563 			rw_exit(&map->dir_lock);
564 		}
565 	}
566 
567 	dv->sdev_mapinfo = map;
568 	return (0);
569 }
570 
571 /* directory dependent vop table */
572 struct sdev_vop_table {
573 	char *vt_name;				/* subdirectory name */
574 	const fs_operation_def_t *vt_service;	/* vnodeops table */
575 	struct vnodeops *vt_vops;		/* constructed vop */
576 	struct vnodeops **vt_global_vops;	/* global container for vop */
577 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
578 	int vt_flags;
579 };
580 
581 /*
582  * A nice improvement would be to provide a plug-in mechanism
583  * for this table instead of a const table.
584  */
585 static struct sdev_vop_table vtab[] =
586 {
587 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
588 	SDEV_DYNAMIC | SDEV_VTOR },
589 
590 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
591 
592 	{ NULL, NULL, NULL, NULL, NULL, 0}
593 };
594 
595 
596 /*
597  *  sets a directory's vnodeops if the directory is in the vtab;
598  */
599 static struct vnodeops *
600 sdev_get_vop(struct sdev_node *dv)
601 {
602 	int i;
603 	char *path;
604 
605 	path = dv->sdev_path;
606 	ASSERT(path);
607 
608 	/* gets the relative path to /dev/ */
609 	path += 5;
610 
611 	/* gets the vtab entry if matches */
612 	for (i = 0; vtab[i].vt_name; i++) {
613 		if (strcmp(vtab[i].vt_name, path) != 0)
614 			continue;
615 		dv->sdev_flags |= vtab[i].vt_flags;
616 
617 		if (vtab[i].vt_vops) {
618 			if (vtab[i].vt_global_vops)
619 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
620 			return (vtab[i].vt_vops);
621 		}
622 
623 		if (vtab[i].vt_service) {
624 			fs_operation_def_t *templ;
625 			templ = sdev_merge_vtab(vtab[i].vt_service);
626 			if (vn_make_ops(vtab[i].vt_name,
627 			    (const fs_operation_def_t *)templ,
628 			    &vtab[i].vt_vops) != 0) {
629 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
630 				    vtab[i].vt_name);
631 				/*NOTREACHED*/
632 			}
633 			if (vtab[i].vt_global_vops) {
634 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
635 			}
636 			sdev_free_vtab(templ);
637 			return (vtab[i].vt_vops);
638 		}
639 		return (sdev_vnodeops);
640 	}
641 
642 	/* child inherits the persistence of the parent */
643 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
644 		dv->sdev_flags |= SDEV_PERSIST;
645 
646 	return (sdev_vnodeops);
647 }
648 
649 static void
650 sdev_set_no_nocache(struct sdev_node *dv)
651 {
652 	int i;
653 	char *path;
654 
655 	ASSERT(dv->sdev_path);
656 	path = dv->sdev_path + strlen("/dev/");
657 
658 	for (i = 0; vtab[i].vt_name; i++) {
659 		if (strcmp(vtab[i].vt_name, path) == 0) {
660 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
661 				dv->sdev_flags |= SDEV_NO_NCACHE;
662 			break;
663 		}
664 	}
665 }
666 
667 void *
668 sdev_get_vtor(struct sdev_node *dv)
669 {
670 	int i;
671 
672 	for (i = 0; vtab[i].vt_name; i++) {
673 		if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0)
674 			continue;
675 		return ((void *)vtab[i].vt_vtor);
676 	}
677 	return (NULL);
678 }
679 
680 /*
681  * Build the base root inode
682  */
683 ino_t
684 sdev_mkino(struct sdev_node *dv)
685 {
686 	ino_t	ino;
687 
688 	/*
689 	 * for now, follow the lead of tmpfs here
690 	 * need to someday understand the requirements here
691 	 */
692 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
693 	ino += SDEV_ROOTINO + 1;
694 
695 	return (ino);
696 }
697 
698 static int
699 sdev_getlink(struct vnode *linkvp, char **link)
700 {
701 	int err;
702 	char *buf;
703 	struct uio uio = {0};
704 	struct iovec iov = {0};
705 
706 	if (linkvp == NULL)
707 		return (ENOENT);
708 	ASSERT(linkvp->v_type == VLNK);
709 
710 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
711 	iov.iov_base = buf;
712 	iov.iov_len = MAXPATHLEN;
713 	uio.uio_iov = &iov;
714 	uio.uio_iovcnt = 1;
715 	uio.uio_resid = MAXPATHLEN;
716 	uio.uio_segflg = UIO_SYSSPACE;
717 	uio.uio_llimit = MAXOFFSET_T;
718 
719 	err = VOP_READLINK(linkvp, &uio, kcred);
720 	if (err) {
721 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
722 		kmem_free(buf, MAXPATHLEN);
723 		return (ENOENT);
724 	}
725 
726 	/* mission complete */
727 	*link = i_ddi_strdup(buf, KM_SLEEP);
728 	kmem_free(buf, MAXPATHLEN);
729 	return (0);
730 }
731 
732 /*
733  * A convenient wrapper to get the devfs node vnode for a device
734  * minor functionality: readlink() of a /dev symlink
735  * Place the link into dv->sdev_symlink
736  */
737 static int
738 sdev_follow_link(struct sdev_node *dv)
739 {
740 	int err;
741 	struct vnode *linkvp;
742 	char *link = NULL;
743 
744 	linkvp = SDEVTOV(dv);
745 	if (linkvp == NULL)
746 		return (ENOENT);
747 	ASSERT(linkvp->v_type == VLNK);
748 	err = sdev_getlink(linkvp, &link);
749 	if (err) {
750 		(void) sdev_nodezombied(dv);
751 		dv->sdev_symlink = NULL;
752 		return (ENOENT);
753 	}
754 
755 	ASSERT(link != NULL);
756 	dv->sdev_symlink = link;
757 	return (0);
758 }
759 
760 static int
761 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
762 {
763 	vtype_t otype = SDEVTOV(dv)->v_type;
764 
765 	/*
766 	 * existing sdev_node has a different type.
767 	 */
768 	if (otype != nvap->va_type) {
769 		sdcmn_err9(("sdev_node_check: existing node "
770 		    "  %s type %d does not match new node type %d\n",
771 		    dv->sdev_name, otype, nvap->va_type));
772 		return (EEXIST);
773 	}
774 
775 	/*
776 	 * For a symlink, the target should be the same.
777 	 */
778 	if (otype == VLNK) {
779 		ASSERT(nargs != NULL);
780 		ASSERT(dv->sdev_symlink != NULL);
781 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
782 			sdcmn_err9(("sdev_node_check: existing node "
783 			    " %s has different symlink %s as new node "
784 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
785 			    (char *)nargs));
786 			return (EEXIST);
787 		}
788 	}
789 
790 	return (0);
791 }
792 
793 /*
794  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
795  *
796  * arguments:
797  *	- ddv (parent)
798  *	- nm (child name)
799  *	- newdv (sdev_node for nm is returned here)
800  *	- vap (vattr for the node to be created, va_type should be set.
801  *	  the defaults should be used if unknown)
802  *	- cred
803  *	- args
804  *	    . tnm (for VLNK)
805  *	    . global sdev_node (for !SDEV_GLOBAL)
806  * 	- state: SDEV_INIT, SDEV_READY
807  *
808  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
809  *
810  * NOTE:  directory contents writers lock needs to be held before
811  *	  calling this routine.
812  */
813 int
814 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
815     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
816     sdev_node_state_t state)
817 {
818 	int error = 0;
819 	sdev_node_state_t node_state;
820 	struct sdev_node *dv = NULL;
821 
822 	ASSERT(state != SDEV_ZOMBIE);
823 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
824 
825 	if (*newdv) {
826 		dv = *newdv;
827 	} else {
828 		/* allocate and initialize a sdev_node */
829 		if (ddv->sdev_state == SDEV_ZOMBIE) {
830 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
831 			    ddv->sdev_path));
832 			return (ENOENT);
833 		}
834 
835 		error = sdev_nodeinit(ddv, nm, &dv, vap);
836 		if (error != 0) {
837 			sdcmn_err9(("sdev_mknode: error %d,"
838 			    " name %s can not be initialized\n",
839 			    error, nm));
840 			return (ENOENT);
841 		}
842 		ASSERT(dv);
843 
844 		/* insert into the directory cache */
845 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
846 		if (error) {
847 			sdcmn_err9(("sdev_mknode: node %s can not"
848 			    " be added into directory cache\n", nm));
849 			return (ENOENT);
850 		}
851 	}
852 
853 	ASSERT(dv);
854 	node_state = dv->sdev_state;
855 	ASSERT(node_state != SDEV_ZOMBIE);
856 
857 	if (state == SDEV_READY) {
858 		switch (node_state) {
859 		case SDEV_INIT:
860 			error = sdev_nodeready(dv, vap, avp, args, cred);
861 			/*
862 			 * masking the errors with ENOENT
863 			 */
864 			if (error) {
865 				sdcmn_err9(("sdev_mknode: node %s can NOT"
866 				    " be transitioned into READY state, "
867 				    "error %d\n", nm, error));
868 				error = ENOENT;
869 			}
870 			break;
871 		case SDEV_READY:
872 			/*
873 			 * Do some sanity checking to make sure
874 			 * the existing sdev_node is what has been
875 			 * asked for.
876 			 */
877 			error = sdev_node_check(dv, vap, args);
878 			break;
879 		default:
880 			break;
881 		}
882 	}
883 
884 	if (!error) {
885 		*newdv = dv;
886 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
887 	} else {
888 		SDEV_SIMPLE_RELE(dv);
889 		*newdv = NULL;
890 	}
891 
892 	return (error);
893 }
894 
895 /*
896  * convenient wrapper to change vp's ATIME, CTIME and ATIME
897  */
898 void
899 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
900 {
901 	struct vattr attr;
902 	timestruc_t now;
903 	int err;
904 
905 	ASSERT(vp);
906 	gethrestime(&now);
907 	if (mask & AT_CTIME)
908 		attr.va_ctime = now;
909 	if (mask & AT_MTIME)
910 		attr.va_mtime = now;
911 	if (mask & AT_ATIME)
912 		attr.va_atime = now;
913 
914 	attr.va_mask = (mask & AT_TIMES);
915 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
916 	if (err && (err != EROFS)) {
917 		sdcmn_err(("update timestamps error %d\n", err));
918 	}
919 }
920 
921 /*
922  * the backing store vnode is released here
923  */
924 /*ARGSUSED1*/
925 void
926 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
927 {
928 	/* no references */
929 	ASSERT(dv->sdev_nlink == 0);
930 
931 	if (dv->sdev_attrvp != NULLVP) {
932 		VN_RELE(dv->sdev_attrvp);
933 		/*
934 		 * reset the attrvp so that no more
935 		 * references can be made on this already
936 		 * vn_rele() vnode
937 		 */
938 		dv->sdev_attrvp = NULLVP;
939 	}
940 
941 	if (dv->sdev_attr != NULL) {
942 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
943 		dv->sdev_attr = NULL;
944 	}
945 
946 	if (dv->sdev_name != NULL) {
947 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
948 		dv->sdev_name = NULL;
949 	}
950 
951 	if (dv->sdev_symlink != NULL) {
952 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
953 		dv->sdev_symlink = NULL;
954 	}
955 
956 	if (dv->sdev_path) {
957 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
958 		dv->sdev_path = NULL;
959 	}
960 
961 	if (!SDEV_IS_GLOBAL(dv))
962 		sdev_prof_free(dv);
963 
964 	mutex_destroy(&dv->sdev_lookup_lock);
965 	cv_destroy(&dv->sdev_lookup_cv);
966 
967 	/* return node to initial state as per constructor */
968 	(void) memset((void *)&dv->sdev_instance_data, 0,
969 	    sizeof (dv->sdev_instance_data));
970 	vn_invalid(SDEVTOV(dv));
971 	kmem_cache_free(sdev_node_cache, dv);
972 }
973 
974 /*
975  * DIRECTORY CACHE lookup
976  */
977 struct sdev_node *
978 sdev_findbyname(struct sdev_node *ddv, char *nm)
979 {
980 	struct sdev_node *dv;
981 	size_t	nmlen = strlen(nm);
982 
983 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
984 	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next) {
985 		if (dv->sdev_namelen != nmlen) {
986 			continue;
987 		}
988 
989 		/*
990 		 * Can't lookup stale nodes
991 		 */
992 		if (dv->sdev_flags & SDEV_STALE) {
993 			sdcmn_err9((
994 			    "sdev_findbyname: skipped stale node: %s\n",
995 			    dv->sdev_name));
996 			continue;
997 		}
998 
999 		if (strcmp(dv->sdev_name, nm) == 0) {
1000 			SDEV_HOLD(dv);
1001 			return (dv);
1002 		}
1003 	}
1004 	return (NULL);
1005 }
1006 
1007 /*
1008  * Inserts a new sdev_node in a parent directory
1009  */
1010 void
1011 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
1012 {
1013 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1014 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1015 	ASSERT(ddv->sdev_nlink >= 2);
1016 	ASSERT(dv->sdev_nlink == 0);
1017 
1018 	dv->sdev_dotdot = ddv;
1019 	dv->sdev_next = ddv->sdev_dot;
1020 	ddv->sdev_dot = dv;
1021 	ddv->sdev_nlink++;
1022 }
1023 
1024 /*
1025  * The following check is needed because while sdev_nodes are linked
1026  * in SDEV_INIT state, they have their link counts incremented only
1027  * in SDEV_READY state.
1028  */
1029 static void
1030 decr_link(struct sdev_node *dv)
1031 {
1032 	if (dv->sdev_state != SDEV_INIT)
1033 		dv->sdev_nlink--;
1034 	else
1035 		ASSERT(dv->sdev_nlink == 0);
1036 }
1037 
1038 /*
1039  * Delete an existing dv from directory cache
1040  *
1041  * In the case of a node is still held by non-zero reference count,
1042  *     the node is put into ZOMBIE state. Once the reference count
1043  *     reaches "0", the node is unlinked and destroyed,
1044  *     in sdev_inactive().
1045  */
1046 static int
1047 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1048 {
1049 	struct sdev_node *idv;
1050 	struct sdev_node *prev = NULL;
1051 	struct vnode *vp;
1052 
1053 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1054 
1055 	vp = SDEVTOV(dv);
1056 	mutex_enter(&vp->v_lock);
1057 
1058 	/* dv is held still */
1059 	if (vp->v_count > 1) {
1060 		rw_enter(&dv->sdev_contents, RW_WRITER);
1061 		if (dv->sdev_state == SDEV_READY) {
1062 			sdcmn_err9((
1063 			    "sdev_delete: node %s busy with count %d\n",
1064 			    dv->sdev_name, vp->v_count));
1065 			dv->sdev_state = SDEV_ZOMBIE;
1066 		}
1067 		rw_exit(&dv->sdev_contents);
1068 		--vp->v_count;
1069 		mutex_exit(&vp->v_lock);
1070 		return (EBUSY);
1071 	}
1072 	ASSERT(vp->v_count == 1);
1073 
1074 	/* unlink from the memory cache */
1075 	ddv->sdev_nlink--;	/* .. to above */
1076 	if (vp->v_type == VDIR) {
1077 		decr_link(dv);		/* . to self */
1078 	}
1079 
1080 	for (idv = ddv->sdev_dot; idv && idv != dv;
1081 	    prev = idv, idv = idv->sdev_next)
1082 		;
1083 	ASSERT(idv == dv);	/* node to be deleted must exist */
1084 	if (prev == NULL)
1085 		ddv->sdev_dot = dv->sdev_next;
1086 	else
1087 		prev->sdev_next = dv->sdev_next;
1088 	dv->sdev_next = NULL;
1089 	decr_link(dv);	/* name, back to zero */
1090 	vp->v_count--;
1091 	mutex_exit(&vp->v_lock);
1092 
1093 	/* destroy the node */
1094 	sdev_nodedestroy(dv, 0);
1095 	return (0);
1096 }
1097 
1098 /*
1099  * check if the source is in the path of the target
1100  *
1101  * source and target are different
1102  */
1103 /*ARGSUSED2*/
1104 static int
1105 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1106 {
1107 	int error = 0;
1108 	struct sdev_node *dotdot, *dir;
1109 
1110 	dotdot = tdv->sdev_dotdot;
1111 	ASSERT(dotdot);
1112 
1113 	/* fs root */
1114 	if (dotdot == tdv) {
1115 		return (0);
1116 	}
1117 
1118 	for (;;) {
1119 		/*
1120 		 * avoid error cases like
1121 		 *	mv a a/b
1122 		 *	mv a a/b/c
1123 		 *	etc.
1124 		 */
1125 		if (dotdot == sdv) {
1126 			error = EINVAL;
1127 			break;
1128 		}
1129 
1130 		dir = dotdot;
1131 		dotdot = dir->sdev_dotdot;
1132 
1133 		/* done checking because root is reached */
1134 		if (dir == dotdot) {
1135 			break;
1136 		}
1137 	}
1138 	return (error);
1139 }
1140 
1141 int
1142 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1143     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1144     struct cred *cred)
1145 {
1146 	int error = 0;
1147 	struct vnode *ovp = SDEVTOV(odv);
1148 	struct vnode *nvp;
1149 	struct vattr vattr;
1150 	int doingdir = (ovp->v_type == VDIR);
1151 	char *link = NULL;
1152 	int samedir = (oddv == nddv) ? 1 : 0;
1153 	int bkstore = 0;
1154 	int bypass = 0;
1155 	struct sdev_node *idv = NULL;
1156 	struct sdev_node *ndv = NULL;
1157 	timestruc_t now;
1158 
1159 	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1160 	error = VOP_GETATTR(ovp, &vattr, 0, cred);
1161 	if (error)
1162 		return (error);
1163 
1164 	if (!samedir)
1165 		rw_enter(&oddv->sdev_contents, RW_WRITER);
1166 	rw_enter(&nddv->sdev_contents, RW_WRITER);
1167 
1168 	/*
1169 	 * the source may have been deleted by another thread before
1170 	 * we gets here.
1171 	 */
1172 	if (odv->sdev_state != SDEV_READY) {
1173 		error = ENOENT;
1174 		goto err_out;
1175 	}
1176 
1177 	if (doingdir && (odv == nddv)) {
1178 		error = EINVAL;
1179 		goto err_out;
1180 	}
1181 
1182 	/*
1183 	 * If renaming a directory, and the parents are different (".." must be
1184 	 * changed) then the source dir must not be in the dir hierarchy above
1185 	 * the target since it would orphan everything below the source dir.
1186 	 */
1187 	if (doingdir && (oddv != nddv)) {
1188 		error = sdev_checkpath(odv, nddv, cred);
1189 		if (error)
1190 			goto err_out;
1191 	}
1192 
1193 	/* destination existing */
1194 	if (*ndvp) {
1195 		nvp = SDEVTOV(*ndvp);
1196 		ASSERT(nvp);
1197 
1198 		/* handling renaming to itself */
1199 		if (odv == *ndvp) {
1200 			error = 0;
1201 			goto err_out;
1202 		}
1203 
1204 		if (nvp->v_type == VDIR) {
1205 			if (!doingdir) {
1206 				error = EISDIR;
1207 				goto err_out;
1208 			}
1209 
1210 			if (vn_vfswlock(nvp)) {
1211 				error = EBUSY;
1212 				goto err_out;
1213 			}
1214 
1215 			if (vn_mountedvfs(nvp) != NULL) {
1216 				vn_vfsunlock(nvp);
1217 				error = EBUSY;
1218 				goto err_out;
1219 			}
1220 
1221 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1222 			if ((*ndvp)->sdev_nlink > 2) {
1223 				vn_vfsunlock(nvp);
1224 				error = EEXIST;
1225 				goto err_out;
1226 			}
1227 			vn_vfsunlock(nvp);
1228 
1229 			(void) sdev_dirdelete(nddv, *ndvp);
1230 			*ndvp = NULL;
1231 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1232 				    nddv->sdev_attrvp, cred);
1233 			if (error)
1234 				goto err_out;
1235 		} else {
1236 			if (doingdir) {
1237 				error = ENOTDIR;
1238 				goto err_out;
1239 			}
1240 
1241 			if (SDEV_IS_PERSIST((*ndvp))) {
1242 				bkstore = 1;
1243 			}
1244 
1245 			/*
1246 			 * get rid of the node from the directory cache
1247 			 * note, in case EBUSY is returned, the ZOMBIE
1248 			 * node is taken care in sdev_mknode.
1249 			 */
1250 			(void) sdev_dirdelete(nddv, *ndvp);
1251 			*ndvp = NULL;
1252 			if (bkstore) {
1253 				error = VOP_REMOVE(nddv->sdev_attrvp,
1254 				    nnm, cred);
1255 				if (error)
1256 				    goto err_out;
1257 			}
1258 		}
1259 	}
1260 
1261 	/* fix the source for a symlink */
1262 	if (vattr.va_type == VLNK) {
1263 		if (odv->sdev_symlink == NULL) {
1264 			error = sdev_follow_link(odv);
1265 			if (error) {
1266 				error = ENOENT;
1267 				goto err_out;
1268 			}
1269 		}
1270 		ASSERT(odv->sdev_symlink);
1271 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1272 	}
1273 
1274 	/*
1275 	 * make a fresh node from the source attrs
1276 	 */
1277 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1278 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1279 	    NULL, (void *)link, cred, SDEV_READY);
1280 
1281 	if (link)
1282 		kmem_free(link, strlen(link) + 1);
1283 
1284 	if (error)
1285 		goto err_out;
1286 	ASSERT(*ndvp);
1287 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1288 
1289 	/* move dir contents */
1290 	if (doingdir) {
1291 		for (idv = odv->sdev_dot; idv; idv = idv->sdev_next) {
1292 			error = sdev_rnmnode(odv, idv,
1293 			    (struct sdev_node *)(*ndvp), &ndv,
1294 			    idv->sdev_name, cred);
1295 
1296 			if (error)
1297 				goto err_out;
1298 			ndv = NULL;
1299 		}
1300 
1301 	}
1302 
1303 	if ((*ndvp)->sdev_attrvp) {
1304 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1305 		    AT_CTIME|AT_ATIME);
1306 	} else {
1307 		ASSERT((*ndvp)->sdev_attr);
1308 		gethrestime(&now);
1309 		(*ndvp)->sdev_attr->va_ctime = now;
1310 		(*ndvp)->sdev_attr->va_atime = now;
1311 	}
1312 
1313 	if (nddv->sdev_attrvp) {
1314 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1315 		    AT_MTIME|AT_ATIME);
1316 	} else {
1317 		ASSERT(nddv->sdev_attr);
1318 		gethrestime(&now);
1319 		nddv->sdev_attr->va_mtime = now;
1320 		nddv->sdev_attr->va_atime = now;
1321 	}
1322 	rw_exit(&nddv->sdev_contents);
1323 	if (!samedir)
1324 		rw_exit(&oddv->sdev_contents);
1325 
1326 	SDEV_RELE(*ndvp);
1327 	return (error);
1328 
1329 err_out:
1330 	rw_exit(&nddv->sdev_contents);
1331 	if (!samedir)
1332 		rw_exit(&oddv->sdev_contents);
1333 	return (error);
1334 }
1335 
1336 /*
1337  * Merge sdev_node specific information into an attribute structure.
1338  *
1339  * note: sdev_node is not locked here
1340  */
1341 void
1342 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1343 {
1344 	struct vnode *vp = SDEVTOV(dv);
1345 
1346 	vap->va_nlink = dv->sdev_nlink;
1347 	vap->va_nodeid = dv->sdev_ino;
1348 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1349 	vap->va_type = vp->v_type;
1350 
1351 	if (vp->v_type == VDIR) {
1352 		vap->va_rdev = 0;
1353 		vap->va_fsid = vp->v_rdev;
1354 	} else if (vp->v_type == VLNK) {
1355 		vap->va_rdev = 0;
1356 		vap->va_mode  &= ~S_IFMT;
1357 		vap->va_mode |= S_IFLNK;
1358 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1359 		vap->va_rdev = vp->v_rdev;
1360 		vap->va_mode &= ~S_IFMT;
1361 		if (vap->va_type == VCHR)
1362 			vap->va_mode |= S_IFCHR;
1363 		else
1364 			vap->va_mode |= S_IFBLK;
1365 	} else {
1366 		vap->va_rdev = 0;
1367 	}
1368 }
1369 
1370 static struct vattr *
1371 sdev_getdefault_attr(enum vtype type)
1372 {
1373 	if (type == VDIR)
1374 		return (&sdev_vattr_dir);
1375 	else if (type == VCHR)
1376 		return (&sdev_vattr_chr);
1377 	else if (type == VBLK)
1378 		return (&sdev_vattr_blk);
1379 	else if (type == VLNK)
1380 		return (&sdev_vattr_lnk);
1381 	else
1382 		return (NULL);
1383 }
1384 int
1385 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1386 {
1387 	int rv = 0;
1388 	struct vnode *vp = SDEVTOV(dv);
1389 
1390 	switch (vp->v_type) {
1391 	case VCHR:
1392 	case VBLK:
1393 		/*
1394 		 * If vnode is a device, return special vnode instead
1395 		 * (though it knows all about -us- via sp->s_realvp)
1396 		 */
1397 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1398 		VN_RELE(vp);
1399 		if (*vpp == NULLVP)
1400 			rv = ENOSYS;
1401 		break;
1402 	default:	/* most types are returned as is */
1403 		*vpp = vp;
1404 		break;
1405 	}
1406 	return (rv);
1407 }
1408 
1409 /*
1410  * loopback into sdev_lookup()
1411  */
1412 static struct vnode *
1413 devname_find_by_devpath(char *devpath, struct vattr *vattr)
1414 {
1415 	int error = 0;
1416 	struct vnode *vp;
1417 
1418 	error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULLVPP, &vp);
1419 	if (error) {
1420 		return (NULL);
1421 	}
1422 
1423 	if (vattr)
1424 		(void) VOP_GETATTR(vp, vattr, 0, kcred);
1425 	return (vp);
1426 }
1427 
1428 /*
1429  * the junction between devname and devfs
1430  */
1431 static struct vnode *
1432 devname_configure_by_path(char *physpath, struct vattr *vattr)
1433 {
1434 	int error = 0;
1435 	struct vnode *vp;
1436 
1437 	ASSERT(strncmp(physpath, "/devices/", sizeof ("/devices/") - 1)
1438 	    == 0);
1439 
1440 	error = devfs_lookupname(physpath + sizeof ("/devices/") - 1,
1441 	    NULLVPP, &vp);
1442 	if (error != 0) {
1443 		if (error == ENODEV) {
1444 			cmn_err(CE_CONT, "%s: not found (line %d)\n",
1445 			    physpath, __LINE__);
1446 		}
1447 
1448 		return (NULL);
1449 	}
1450 
1451 	if (vattr)
1452 		(void) VOP_GETATTR(vp, vattr, 0, kcred);
1453 	return (vp);
1454 }
1455 
1456 /*
1457  * junction between devname and root file system, e.g. ufs
1458  */
1459 int
1460 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1461 {
1462 	struct vnode *rdvp = ddv->sdev_attrvp;
1463 	int rval = 0;
1464 
1465 	ASSERT(rdvp);
1466 
1467 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred);
1468 	return (rval);
1469 }
1470 
1471 static int
1472 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1473 {
1474 	struct sdev_node *dv = NULL;
1475 	char	*nm;
1476 	struct vnode *dirvp;
1477 	int	error;
1478 	vnode_t	*vp;
1479 	int eof;
1480 	struct iovec iov;
1481 	struct uio uio;
1482 	struct dirent64 *dp;
1483 	dirent64_t *dbuf;
1484 	size_t dbuflen;
1485 	struct vattr vattr;
1486 	char *link = NULL;
1487 
1488 	if (ddv->sdev_attrvp == NULL)
1489 		return (0);
1490 	if (!(ddv->sdev_flags & SDEV_BUILD))
1491 		return (0);
1492 
1493 	dirvp = ddv->sdev_attrvp;
1494 	VN_HOLD(dirvp);
1495 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1496 
1497 	uio.uio_iov = &iov;
1498 	uio.uio_iovcnt = 1;
1499 	uio.uio_segflg = UIO_SYSSPACE;
1500 	uio.uio_fmode = 0;
1501 	uio.uio_extflg = UIO_COPY_CACHED;
1502 	uio.uio_loffset = 0;
1503 	uio.uio_llimit = MAXOFFSET_T;
1504 
1505 	eof = 0;
1506 	error = 0;
1507 	while (!error && !eof) {
1508 		uio.uio_resid = dlen;
1509 		iov.iov_base = (char *)dbuf;
1510 		iov.iov_len = dlen;
1511 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1512 		error = VOP_READDIR(dirvp, &uio, kcred, &eof);
1513 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1514 
1515 		dbuflen = dlen - uio.uio_resid;
1516 		if (error || dbuflen == 0)
1517 			break;
1518 
1519 		if (!(ddv->sdev_flags & SDEV_BUILD)) {
1520 			error = 0;
1521 			break;
1522 		}
1523 
1524 		for (dp = dbuf; ((intptr_t)dp <
1525 		    (intptr_t)dbuf + dbuflen);
1526 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1527 			nm = dp->d_name;
1528 
1529 			if (strcmp(nm, ".") == 0 ||
1530 			    strcmp(nm, "..") == 0)
1531 				continue;
1532 
1533 			vp = NULLVP;
1534 			dv = sdev_cache_lookup(ddv, nm);
1535 			if (dv) {
1536 				if (dv->sdev_state != SDEV_ZOMBIE) {
1537 					SDEV_SIMPLE_RELE(dv);
1538 				} else {
1539 					/*
1540 					 * A ZOMBIE node may not have been
1541 					 * cleaned up from the backing store,
1542 					 * bypass this entry in this case,
1543 					 * and clean it up from the directory
1544 					 * cache if this is the last call.
1545 					 */
1546 					(void) sdev_dirdelete(ddv, dv);
1547 				}
1548 				continue;
1549 			}
1550 
1551 			/* refill the cache if not already */
1552 			error = devname_backstore_lookup(ddv, nm, &vp);
1553 			if (error)
1554 				continue;
1555 
1556 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1557 			error = VOP_GETATTR(vp, &vattr, 0, cred);
1558 			if (error)
1559 				continue;
1560 
1561 			if (vattr.va_type == VLNK) {
1562 				error = sdev_getlink(vp, &link);
1563 				if (error) {
1564 					continue;
1565 				}
1566 				ASSERT(link != NULL);
1567 			}
1568 
1569 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1570 				rw_exit(&ddv->sdev_contents);
1571 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1572 			}
1573 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1574 			    cred, SDEV_READY);
1575 			rw_downgrade(&ddv->sdev_contents);
1576 
1577 			if (link != NULL) {
1578 				kmem_free(link, strlen(link) + 1);
1579 				link = NULL;
1580 			}
1581 
1582 			if (!error) {
1583 				ASSERT(dv);
1584 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1585 				SDEV_SIMPLE_RELE(dv);
1586 			}
1587 			vp = NULL;
1588 			dv = NULL;
1589 		}
1590 	}
1591 
1592 done:
1593 	VN_RELE(dirvp);
1594 	kmem_free(dbuf, dlen);
1595 
1596 	return (error);
1597 }
1598 
1599 static int
1600 sdev_filldir_dynamic(struct sdev_node *ddv)
1601 {
1602 	int error;
1603 	int i;
1604 	struct vattr *vap;
1605 	char *nm = NULL;
1606 	struct sdev_node *dv = NULL;
1607 
1608 	if (!(ddv->sdev_flags & SDEV_BUILD)) {
1609 		return (0);
1610 	}
1611 
1612 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1613 	if (!rw_tryupgrade(&ddv->sdev_contents)) {
1614 		rw_exit(&ddv->sdev_contents);
1615 		rw_enter(&ddv->sdev_contents, RW_WRITER);
1616 	}
1617 
1618 	vap = sdev_getdefault_attr(VDIR);
1619 	for (i = 0; vtab[i].vt_name != NULL; i++) {
1620 		nm = vtab[i].vt_name;
1621 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1622 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1623 		    NULL, kcred, SDEV_READY);
1624 		if (error)
1625 			continue;
1626 		ASSERT(dv);
1627 		ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1628 		SDEV_SIMPLE_RELE(dv);
1629 		dv = NULL;
1630 	}
1631 	rw_downgrade(&ddv->sdev_contents);
1632 	return (0);
1633 }
1634 
1635 /*
1636  * Creating a backing store entry based on sdev_attr.
1637  * This is called either as part of node creation in a persistent directory
1638  * or from setattr/setsecattr to persist access attributes across reboot.
1639  */
1640 int
1641 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1642 {
1643 	int error = 0;
1644 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1645 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1646 	struct vattr *vap = dv->sdev_attr;
1647 	char *nm = dv->sdev_name;
1648 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1649 
1650 	ASSERT(dv && dv->sdev_name && rdvp);
1651 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1652 
1653 lookup:
1654 	/* try to find it in the backing store */
1655 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred);
1656 	if (error == 0) {
1657 		if (VOP_REALVP(*rvp, &rrvp) == 0) {
1658 			VN_HOLD(rrvp);
1659 			VN_RELE(*rvp);
1660 			*rvp = rrvp;
1661 		}
1662 
1663 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1664 		dv->sdev_attr = NULL;
1665 		dv->sdev_attrvp = *rvp;
1666 		return (0);
1667 	}
1668 
1669 	/* let's try to persist the node */
1670 	gethrestime(&vap->va_atime);
1671 	vap->va_mtime = vap->va_atime;
1672 	vap->va_ctime = vap->va_atime;
1673 	vap->va_mask |= AT_TYPE|AT_MODE;
1674 	switch (vap->va_type) {
1675 	case VDIR:
1676 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred);
1677 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1678 		    (void *)(*rvp), error));
1679 		break;
1680 	case VCHR:
1681 	case VBLK:
1682 	case VREG:
1683 	case VDOOR:
1684 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1685 		    rvp, cred, 0);
1686 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1687 		    (void *)(*rvp), error));
1688 		if (!error)
1689 			VN_RELE(*rvp);
1690 		break;
1691 	case VLNK:
1692 		ASSERT(dv->sdev_symlink);
1693 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred);
1694 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1695 		    error));
1696 		break;
1697 	default:
1698 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1699 		    "create\n", nm);
1700 		/*NOTREACHED*/
1701 	}
1702 
1703 	/* go back to lookup to factor out spec node and set attrvp */
1704 	if (error == 0)
1705 		goto lookup;
1706 
1707 	return (error);
1708 }
1709 
1710 static int
1711 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1712 {
1713 	int error = 0;
1714 	struct sdev_node *dup = NULL;
1715 
1716 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1717 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1718 		sdev_direnter(ddv, *dv);
1719 	} else {
1720 		if (dup->sdev_state == SDEV_ZOMBIE) {
1721 			error = sdev_dirdelete(ddv, dup);
1722 			/*
1723 			 * The ZOMBIE node is still hanging
1724 			 * around with more than one reference counts.
1725 			 * Fail the new node creation so that
1726 			 * the directory cache won't have
1727 			 * duplicate entries for the same named node
1728 			 */
1729 			if (error == EBUSY) {
1730 				SDEV_SIMPLE_RELE(*dv);
1731 				sdev_nodedestroy(*dv, 0);
1732 				*dv = NULL;
1733 				return (error);
1734 			}
1735 			sdev_direnter(ddv, *dv);
1736 		} else {
1737 			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1738 			SDEV_SIMPLE_RELE(*dv);
1739 			sdev_nodedestroy(*dv, 0);
1740 			*dv = dup;
1741 		}
1742 	}
1743 
1744 	return (0);
1745 }
1746 
1747 static int
1748 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1749 {
1750 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1751 	return (sdev_dirdelete(ddv, *dv));
1752 }
1753 
1754 /*
1755  * update the in-core directory cache
1756  */
1757 int
1758 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1759     sdev_cache_ops_t ops)
1760 {
1761 	int error = 0;
1762 
1763 	ASSERT((SDEV_HELD(*dv)));
1764 
1765 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1766 	switch (ops) {
1767 	case SDEV_CACHE_ADD:
1768 		error = sdev_cache_add(ddv, dv, nm);
1769 		break;
1770 	case SDEV_CACHE_DELETE:
1771 		error = sdev_cache_delete(ddv, dv);
1772 		break;
1773 	default:
1774 		break;
1775 	}
1776 
1777 	return (error);
1778 }
1779 
1780 /*
1781  * retrive the named entry from the directory cache
1782  */
1783 struct sdev_node *
1784 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1785 {
1786 	struct sdev_node *dv = NULL;
1787 
1788 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1789 	dv = sdev_findbyname(ddv, nm);
1790 
1791 	return (dv);
1792 }
1793 
1794 /*
1795  * Implicit reconfig for nodes constructed by a link generator
1796  * Start devfsadm if needed, or if devfsadm is in progress,
1797  * prepare to block on devfsadm either completing or
1798  * constructing the desired node.  As devfsadmd is global
1799  * in scope, constructing all necessary nodes, we only
1800  * need to initiate it once.
1801  */
1802 static int
1803 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1804 {
1805 	int error = 0;
1806 
1807 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1808 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1809 		    ddv->sdev_name, nm, devfsadm_state));
1810 		mutex_enter(&dv->sdev_lookup_lock);
1811 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1812 		mutex_exit(&dv->sdev_lookup_lock);
1813 		error = 0;
1814 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1815 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1816 			ddv->sdev_name, nm, devfsadm_state));
1817 
1818 		sdev_devfsadmd_thread(ddv, dv, kcred);
1819 		mutex_enter(&dv->sdev_lookup_lock);
1820 		SDEV_BLOCK_OTHERS(dv,
1821 		    (SDEV_LOOKUP | SDEV_LGWAITING));
1822 		mutex_exit(&dv->sdev_lookup_lock);
1823 		error = 0;
1824 	} else {
1825 		error = -1;
1826 	}
1827 
1828 	return (error);
1829 }
1830 
1831 static int
1832 sdev_call_modulelookup(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1833     int (*fn)(char *, devname_handle_t *, struct cred *), struct cred *cred)
1834 {
1835 	struct vnode *rvp = NULL;
1836 	int error = 0;
1837 	struct vattr *vap;
1838 	devname_spec_t spec;
1839 	devname_handle_t *hdl;
1840 	void *args = NULL;
1841 	struct sdev_node *dv = *dvp;
1842 
1843 	ASSERT(dv && ddv);
1844 	hdl = &(dv->sdev_handle);
1845 	ASSERT(hdl->dh_data == dv);
1846 	mutex_enter(&dv->sdev_lookup_lock);
1847 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1848 	mutex_exit(&dv->sdev_lookup_lock);
1849 	error = (*fn)(nm, hdl, cred);
1850 	if (error) {
1851 		return (error);
1852 	}
1853 
1854 	spec = hdl->dh_spec;
1855 	args = hdl->dh_args;
1856 	ASSERT(args);
1857 
1858 	switch (spec) {
1859 	case DEVNAME_NS_PATH:
1860 		/*
1861 		 * symlink of:
1862 		 *	/dev/dir/nm -> /device/...
1863 		 */
1864 		rvp = devname_configure_by_path((char *)args, NULL);
1865 		break;
1866 	case DEVNAME_NS_DEV:
1867 		/*
1868 		 * symlink of:
1869 		 *	/dev/dir/nm -> /dev/...
1870 		 */
1871 		rvp = devname_find_by_devpath((char *)args, NULL);
1872 		break;
1873 	default:
1874 		if (args)
1875 			kmem_free((char *)args, strlen(args) + 1);
1876 		return (ENOENT);
1877 
1878 	}
1879 
1880 	if (rvp == NULL) {
1881 		if (args)
1882 			kmem_free((char *)args, strlen(args) + 1);
1883 		return (ENOENT);
1884 	} else {
1885 		vap = sdev_getdefault_attr(VLNK);
1886 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1887 		/*
1888 		 * Could sdev_mknode return a different dv_node
1889 		 * once the lock is dropped?
1890 		 */
1891 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1892 			rw_exit(&ddv->sdev_contents);
1893 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1894 		}
1895 		error = sdev_mknode(ddv, nm, &dv, vap, NULL, args, cred,
1896 		    SDEV_READY);
1897 		rw_downgrade(&ddv->sdev_contents);
1898 		if (error) {
1899 			if (args)
1900 				kmem_free((char *)args, strlen(args) + 1);
1901 			return (error);
1902 		} else {
1903 			mutex_enter(&dv->sdev_lookup_lock);
1904 			SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1905 			mutex_exit(&dv->sdev_lookup_lock);
1906 			error = 0;
1907 		}
1908 	}
1909 
1910 	if (args)
1911 		kmem_free((char *)args, strlen(args) + 1);
1912 
1913 	*dvp = dv;
1914 	return (0);
1915 }
1916 
1917 /*
1918  *  Support for specialized device naming construction mechanisms
1919  */
1920 static int
1921 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1922     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1923     void *, char *), int flags, struct cred *cred)
1924 {
1925 	int rv = 0;
1926 	char *physpath = NULL;
1927 	struct vnode *rvp = NULL;
1928 	struct vattr vattr;
1929 	struct vattr *vap;
1930 	struct sdev_node *dv = *dvp;
1931 
1932 	mutex_enter(&dv->sdev_lookup_lock);
1933 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1934 	mutex_exit(&dv->sdev_lookup_lock);
1935 
1936 	/* for non-devfsadm devices */
1937 	if (flags & SDEV_PATH) {
1938 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1939 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1940 		    NULL);
1941 		if (rv) {
1942 			kmem_free(physpath, MAXPATHLEN);
1943 			return (-1);
1944 		}
1945 
1946 		ASSERT(physpath);
1947 		rvp = devname_configure_by_path(physpath, NULL);
1948 		if (rvp == NULL) {
1949 			sdcmn_err3(("devname_configure_by_path: "
1950 			    "failed for /dev/%s/%s\n",
1951 			    ddv->sdev_name, nm));
1952 			kmem_free(physpath, MAXPATHLEN);
1953 			rv = -1;
1954 		} else {
1955 			vap = sdev_getdefault_attr(VLNK);
1956 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1957 
1958 			/*
1959 			 * Sdev_mknode may return back a different sdev_node
1960 			 * that was created by another thread that
1961 			 * raced to the directroy cache before this thread.
1962 			 *
1963 			 * With current directory cache mechanism
1964 			 * (linked list with the sdev_node name as
1965 			 * the entity key), this is a way to make sure
1966 			 * only one entry exists for the same name
1967 			 * in the same directory. The outcome is
1968 			 * the winner wins.
1969 			 */
1970 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1971 				rw_exit(&ddv->sdev_contents);
1972 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1973 			}
1974 			rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1975 			    (void *)physpath, cred, SDEV_READY);
1976 			rw_downgrade(&ddv->sdev_contents);
1977 			kmem_free(physpath, MAXPATHLEN);
1978 			if (rv) {
1979 				return (rv);
1980 			} else {
1981 				mutex_enter(&dv->sdev_lookup_lock);
1982 				SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1983 				mutex_exit(&dv->sdev_lookup_lock);
1984 				return (0);
1985 			}
1986 		}
1987 	} else if (flags & SDEV_VNODE) {
1988 		/*
1989 		 * DBNR has its own way to create the device
1990 		 * and return a backing store vnode in rvp
1991 		 */
1992 		ASSERT(callback);
1993 		rv = callback(ddv, nm, (void *)&rvp, kcred, NULL, NULL);
1994 		if (rv || (rvp == NULL)) {
1995 			sdcmn_err3(("devname_lookup_func: SDEV_VNODE "
1996 			    "callback failed \n"));
1997 			return (-1);
1998 		}
1999 		vap = sdev_getdefault_attr(rvp->v_type);
2000 		if (vap == NULL)
2001 			return (-1);
2002 
2003 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2004 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2005 			rw_exit(&ddv->sdev_contents);
2006 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2007 		}
2008 		rv = sdev_mknode(ddv, nm, &dv, vap, rvp, NULL,
2009 		    cred, SDEV_READY);
2010 		rw_downgrade(&ddv->sdev_contents);
2011 		if (rv)
2012 			return (rv);
2013 
2014 		mutex_enter(&dv->sdev_lookup_lock);
2015 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2016 		mutex_exit(&dv->sdev_lookup_lock);
2017 		return (0);
2018 	} else if (flags & SDEV_VATTR) {
2019 		/*
2020 		 * /dev/pts
2021 		 *
2022 		 * DBNR has its own way to create the device
2023 		 * "0" is returned upon success.
2024 		 *
2025 		 * callback is responsible to set the basic attributes,
2026 		 * e.g. va_type/va_uid/va_gid/
2027 		 *    dev_t if VCHR or VBLK/
2028 		 */
2029 		ASSERT(callback);
2030 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
2031 		if (rv) {
2032 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
2033 			    "callback failed \n"));
2034 			return (-1);
2035 		}
2036 
2037 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2038 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2039 			rw_exit(&ddv->sdev_contents);
2040 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2041 		}
2042 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
2043 		    cred, SDEV_READY);
2044 		rw_downgrade(&ddv->sdev_contents);
2045 
2046 		if (rv)
2047 			return (rv);
2048 
2049 		mutex_enter(&dv->sdev_lookup_lock);
2050 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2051 		mutex_exit(&dv->sdev_lookup_lock);
2052 		return (0);
2053 	} else {
2054 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
2055 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
2056 		    __LINE__));
2057 		rv = -1;
2058 	}
2059 
2060 	*dvp = dv;
2061 	return (rv);
2062 }
2063 
2064 static int
2065 is_devfsadm_thread(char *exec_name)
2066 {
2067 	/*
2068 	 * note: because devfsadmd -> /usr/sbin/devfsadm
2069 	 * it is safe to use "devfsadm" to capture the lookups
2070 	 * from devfsadm and its daemon version.
2071 	 */
2072 	if (strcmp(exec_name, "devfsadm") == 0)
2073 		return (1);
2074 	return (0);
2075 }
2076 
2077 
2078 /*
2079  * Lookup Order:
2080  *	sdev_node cache;
2081  *	backing store (SDEV_PERSIST);
2082  *	DBNR: a. dir_ops implemented in the loadable modules;
2083  *	      b. vnode ops in vtab.
2084  */
2085 int
2086 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
2087     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
2088     struct cred *, void *, char *), int flags)
2089 {
2090 	int rv = 0, nmlen;
2091 	struct vnode *rvp = NULL;
2092 	struct sdev_node *dv = NULL;
2093 	int	retried = 0;
2094 	int	error = 0;
2095 	struct devname_nsmap *map = NULL;
2096 	struct devname_ops *dirops = NULL;
2097 	int (*fn)(char *, devname_handle_t *, struct cred *) = NULL;
2098 	struct vattr vattr;
2099 	char *lookup_thread = curproc->p_user.u_comm;
2100 	int failed_flags = 0;
2101 	int (*vtor)(struct sdev_node *) = NULL;
2102 	int state;
2103 	int parent_state;
2104 	char *link = NULL;
2105 
2106 	if (SDEVTOV(ddv)->v_type != VDIR)
2107 		return (ENOTDIR);
2108 
2109 	/*
2110 	 * Empty name or ., return node itself.
2111 	 */
2112 	nmlen = strlen(nm);
2113 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
2114 		*vpp = SDEVTOV(ddv);
2115 		VN_HOLD(*vpp);
2116 		return (0);
2117 	}
2118 
2119 	/*
2120 	 * .., return the parent directory
2121 	 */
2122 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
2123 		*vpp = SDEVTOV(ddv->sdev_dotdot);
2124 		VN_HOLD(*vpp);
2125 		return (0);
2126 	}
2127 
2128 	rw_enter(&ddv->sdev_contents, RW_READER);
2129 	if (ddv->sdev_flags & SDEV_VTOR) {
2130 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2131 		ASSERT(vtor);
2132 	}
2133 
2134 tryagain:
2135 	/*
2136 	 * (a) directory cache lookup:
2137 	 */
2138 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2139 	parent_state = ddv->sdev_state;
2140 	dv = sdev_cache_lookup(ddv, nm);
2141 	if (dv) {
2142 		state = dv->sdev_state;
2143 		switch (state) {
2144 		case SDEV_INIT:
2145 			if (is_devfsadm_thread(lookup_thread))
2146 				break;
2147 
2148 			/* ZOMBIED parent won't allow node creation */
2149 			if (parent_state == SDEV_ZOMBIE) {
2150 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
2151 				    retried);
2152 				goto nolock_notfound;
2153 			}
2154 
2155 			mutex_enter(&dv->sdev_lookup_lock);
2156 			/* compensate the threads started after devfsadm */
2157 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2158 			    !(SDEV_IS_LOOKUP(dv)))
2159 				SDEV_BLOCK_OTHERS(dv,
2160 				    (SDEV_LOOKUP | SDEV_LGWAITING));
2161 
2162 			if (SDEV_IS_LOOKUP(dv)) {
2163 				failed_flags |= SLF_REBUILT;
2164 				rw_exit(&ddv->sdev_contents);
2165 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
2166 				mutex_exit(&dv->sdev_lookup_lock);
2167 				rw_enter(&ddv->sdev_contents, RW_READER);
2168 
2169 				if (error != 0) {
2170 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2171 					    retried);
2172 					goto nolock_notfound;
2173 				}
2174 
2175 				state = dv->sdev_state;
2176 				if (state == SDEV_INIT) {
2177 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2178 					    retried);
2179 					goto nolock_notfound;
2180 				} else if (state == SDEV_READY) {
2181 					goto found;
2182 				} else if (state == SDEV_ZOMBIE) {
2183 					rw_exit(&ddv->sdev_contents);
2184 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2185 					    retried);
2186 					SDEV_RELE(dv);
2187 					goto lookup_failed;
2188 				}
2189 			} else {
2190 				mutex_exit(&dv->sdev_lookup_lock);
2191 			}
2192 			break;
2193 		case SDEV_READY:
2194 			goto found;
2195 		case SDEV_ZOMBIE:
2196 			rw_exit(&ddv->sdev_contents);
2197 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2198 			SDEV_RELE(dv);
2199 			goto lookup_failed;
2200 		default:
2201 			rw_exit(&ddv->sdev_contents);
2202 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2203 			sdev_lookup_failed(ddv, nm, failed_flags);
2204 			*vpp = NULLVP;
2205 			return (ENOENT);
2206 		}
2207 	}
2208 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2209 
2210 	/*
2211 	 * ZOMBIED parent does not allow new node creation.
2212 	 * bail out early
2213 	 */
2214 	if (parent_state == SDEV_ZOMBIE) {
2215 		rw_exit(&ddv->sdev_contents);
2216 		*vpp = NULL;
2217 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2218 		return (ENOENT);
2219 	}
2220 
2221 	/*
2222 	 * (b0): backing store lookup
2223 	 *	SDEV_PERSIST is default except:
2224 	 *		1) pts nodes
2225 	 *		2) non-chmod'ed local nodes
2226 	 */
2227 	if (SDEV_IS_PERSIST(ddv)) {
2228 		error = devname_backstore_lookup(ddv, nm, &rvp);
2229 
2230 		if (!error) {
2231 			sdcmn_err3(("devname_backstore_lookup: "
2232 			    "found attrvp %p for %s\n", (void *)rvp, nm));
2233 
2234 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
2235 			error = VOP_GETATTR(rvp, &vattr, 0, cred);
2236 			if (error) {
2237 				rw_exit(&ddv->sdev_contents);
2238 				if (dv)
2239 					SDEV_RELE(dv);
2240 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2241 				sdev_lookup_failed(ddv, nm, failed_flags);
2242 				*vpp = NULLVP;
2243 				return (ENOENT);
2244 			}
2245 
2246 			if (vattr.va_type == VLNK) {
2247 				error = sdev_getlink(rvp, &link);
2248 				if (error) {
2249 					rw_exit(&ddv->sdev_contents);
2250 					if (dv)
2251 						SDEV_RELE(dv);
2252 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2253 					    retried);
2254 					sdev_lookup_failed(ddv, nm,
2255 					    failed_flags);
2256 					*vpp = NULLVP;
2257 					return (ENOENT);
2258 				}
2259 				ASSERT(link != NULL);
2260 			}
2261 
2262 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
2263 				rw_exit(&ddv->sdev_contents);
2264 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2265 			}
2266 			error = sdev_mknode(ddv, nm, &dv, &vattr,
2267 			    rvp, link, cred, SDEV_READY);
2268 			rw_downgrade(&ddv->sdev_contents);
2269 
2270 			if (link != NULL) {
2271 				kmem_free(link, strlen(link) + 1);
2272 				link = NULL;
2273 			}
2274 
2275 			if (error) {
2276 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2277 				rw_exit(&ddv->sdev_contents);
2278 				if (dv)
2279 					SDEV_RELE(dv);
2280 				goto lookup_failed;
2281 			} else {
2282 				goto found;
2283 			}
2284 		} else if (retried) {
2285 			rw_exit(&ddv->sdev_contents);
2286 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2287 			    ddv->sdev_name, nm));
2288 			if (dv)
2289 				SDEV_RELE(dv);
2290 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2291 			sdev_lookup_failed(ddv, nm, failed_flags);
2292 			*vpp = NULLVP;
2293 			return (ENOENT);
2294 		}
2295 	}
2296 
2297 
2298 	/* first thread that is doing the lookup on this node */
2299 	if (!dv) {
2300 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2301 			rw_exit(&ddv->sdev_contents);
2302 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2303 		}
2304 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2305 		    cred, SDEV_INIT);
2306 		if (!dv) {
2307 			rw_exit(&ddv->sdev_contents);
2308 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2309 			sdev_lookup_failed(ddv, nm, failed_flags);
2310 			*vpp = NULLVP;
2311 			return (ENOENT);
2312 		}
2313 		rw_downgrade(&ddv->sdev_contents);
2314 	}
2315 	ASSERT(dv);
2316 	ASSERT(SDEV_HELD(dv));
2317 
2318 	if (SDEV_IS_NO_NCACHE(dv)) {
2319 		failed_flags |= SLF_NO_NCACHE;
2320 	}
2321 
2322 	if (SDEV_IS_GLOBAL(ddv)) {
2323 		map = sdev_get_map(ddv, 1);
2324 		dirops = map ? map->dir_ops : NULL;
2325 		fn = dirops ? dirops->devnops_lookup : NULL;
2326 	}
2327 
2328 	/*
2329 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2330 	 */
2331 	if ((fn == NULL) && !callback) {
2332 
2333 		if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2334 		    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2335 		    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2336 			ASSERT(SDEV_HELD(dv));
2337 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2338 			goto nolock_notfound;
2339 		}
2340 
2341 		/*
2342 		 * filter out known non-existent devices recorded
2343 		 * during initial reconfiguration boot for which
2344 		 * reconfig should not be done and lookup may
2345 		 * be short-circuited now.
2346 		 */
2347 		if (sdev_lookup_filter(ddv, nm)) {
2348 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2349 			goto nolock_notfound;
2350 		}
2351 
2352 		/* bypassing devfsadm internal nodes */
2353 		if (is_devfsadm_thread(lookup_thread)) {
2354 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2355 			goto nolock_notfound;
2356 		}
2357 
2358 		if (sdev_reconfig_disable) {
2359 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2360 			goto nolock_notfound;
2361 		}
2362 
2363 		error = sdev_call_devfsadmd(ddv, dv, nm);
2364 		if (error == 0) {
2365 			sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2366 			    ddv->sdev_name, nm, curproc->p_user.u_comm));
2367 			if (sdev_reconfig_verbose) {
2368 				cmn_err(CE_CONT,
2369 				    "?lookup of %s/%s by %s: reconfig\n",
2370 				    ddv->sdev_name, nm, curproc->p_user.u_comm);
2371 			}
2372 			retried = 1;
2373 			failed_flags |= SLF_REBUILT;
2374 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2375 			SDEV_SIMPLE_RELE(dv);
2376 			goto tryagain;
2377 		} else {
2378 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2379 			goto nolock_notfound;
2380 		}
2381 	}
2382 
2383 	/*
2384 	 * (b2) Directory Based Name Resolution (DBNR):
2385 	 *	ddv	- parent
2386 	 *	nm	- /dev/(ddv->sdev_name)/nm
2387 	 *
2388 	 *	note: module vnode ops take precedence than the build-in ones
2389 	 */
2390 	if (fn) {
2391 		error = sdev_call_modulelookup(ddv, &dv, nm, fn, cred);
2392 		if (error) {
2393 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2394 			goto notfound;
2395 		} else {
2396 			goto found;
2397 		}
2398 	} else if (callback) {
2399 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2400 		    flags, cred);
2401 		if (error == 0) {
2402 			goto found;
2403 		} else {
2404 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2405 			goto notfound;
2406 		}
2407 	}
2408 	ASSERT(rvp);
2409 
2410 found:
2411 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2412 	ASSERT(dv->sdev_state == SDEV_READY);
2413 	if (vtor) {
2414 		/*
2415 		 * Check validity of returned node
2416 		 */
2417 		switch (vtor(dv)) {
2418 		case SDEV_VTOR_VALID:
2419 			break;
2420 		case SDEV_VTOR_INVALID:
2421 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2422 			sdcmn_err7(("lookup: destroy invalid "
2423 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2424 			goto nolock_notfound;
2425 		case SDEV_VTOR_SKIP:
2426 			sdcmn_err7(("lookup: node not applicable - "
2427 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2428 			rw_exit(&ddv->sdev_contents);
2429 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2430 			SDEV_RELE(dv);
2431 			goto lookup_failed;
2432 		default:
2433 			cmn_err(CE_PANIC,
2434 			    "dev fs: validator failed: %s(%p)\n",
2435 			    dv->sdev_name, (void *)dv);
2436 			break;
2437 			/*NOTREACHED*/
2438 		}
2439 	}
2440 
2441 	if ((SDEVTOV(dv)->v_type == VDIR) && SDEV_IS_GLOBAL(dv)) {
2442 		rw_enter(&dv->sdev_contents, RW_READER);
2443 		(void) sdev_get_map(dv, 1);
2444 		rw_exit(&dv->sdev_contents);
2445 	}
2446 	rw_exit(&ddv->sdev_contents);
2447 	rv = sdev_to_vp(dv, vpp);
2448 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2449 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2450 	    dv->sdev_state, nm, rv));
2451 	return (rv);
2452 
2453 notfound:
2454 	mutex_enter(&dv->sdev_lookup_lock);
2455 	SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2456 	mutex_exit(&dv->sdev_lookup_lock);
2457 nolock_notfound:
2458 	/*
2459 	 * Destroy the node that is created for synchronization purposes.
2460 	 */
2461 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2462 	    nm, dv->sdev_state));
2463 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2464 	if (dv->sdev_state == SDEV_INIT) {
2465 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2466 			rw_exit(&ddv->sdev_contents);
2467 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2468 		}
2469 
2470 		/*
2471 		 * Node state may have changed during the lock
2472 		 * changes. Re-check.
2473 		 */
2474 		if (dv->sdev_state == SDEV_INIT) {
2475 			(void) sdev_dirdelete(ddv, dv);
2476 			rw_exit(&ddv->sdev_contents);
2477 			sdev_lookup_failed(ddv, nm, failed_flags);
2478 			*vpp = NULL;
2479 			return (ENOENT);
2480 		}
2481 	}
2482 
2483 	rw_exit(&ddv->sdev_contents);
2484 	SDEV_RELE(dv);
2485 
2486 lookup_failed:
2487 	sdev_lookup_failed(ddv, nm, failed_flags);
2488 	*vpp = NULL;
2489 	return (ENOENT);
2490 }
2491 
2492 /*
2493  * Given a directory node, mark all nodes beneath as
2494  * STALE, i.e. nodes that don't exist as far as new
2495  * consumers are concerned
2496  */
2497 void
2498 sdev_stale(struct sdev_node *ddv)
2499 {
2500 	struct sdev_node *dv;
2501 	struct vnode *vp;
2502 
2503 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2504 
2505 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2506 	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next) {
2507 		vp = SDEVTOV(dv);
2508 		if (vp->v_type == VDIR)
2509 			sdev_stale(dv);
2510 
2511 		sdcmn_err9(("sdev_stale: setting stale %s\n",
2512 		    dv->sdev_name));
2513 		dv->sdev_flags |= SDEV_STALE;
2514 	}
2515 	ddv->sdev_flags |= SDEV_BUILD;
2516 	rw_exit(&ddv->sdev_contents);
2517 }
2518 
2519 /*
2520  * Given a directory node, clean out all the nodes beneath.
2521  * If expr is specified, clean node with names matching expr.
2522  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2523  *	so they are excluded from future lookups.
2524  */
2525 int
2526 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2527 {
2528 	int error = 0;
2529 	int busy = 0;
2530 	struct vnode *vp;
2531 	struct sdev_node *dv, *next = NULL;
2532 	int bkstore = 0;
2533 	int len = 0;
2534 	char *bks_name = NULL;
2535 
2536 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2537 
2538 	/*
2539 	 * We try our best to destroy all unused sdev_node's
2540 	 */
2541 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2542 	for (dv = ddv->sdev_dot; dv; dv = next) {
2543 		next = dv->sdev_next;
2544 		vp = SDEVTOV(dv);
2545 
2546 		if (expr && gmatch(dv->sdev_name, expr) == 0)
2547 			continue;
2548 
2549 		if (vp->v_type == VDIR &&
2550 		    sdev_cleandir(dv, NULL, flags) != 0) {
2551 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2552 			    dv->sdev_name));
2553 			busy++;
2554 			continue;
2555 		}
2556 
2557 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2558 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2559 			    dv->sdev_name));
2560 			busy++;
2561 			continue;
2562 		}
2563 
2564 		/*
2565 		 * at this point, either dv is not held or SDEV_ENFORCE
2566 		 * is specified. In either case, dv needs to be deleted
2567 		 */
2568 		SDEV_HOLD(dv);
2569 
2570 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2571 		if (bkstore && (vp->v_type == VDIR))
2572 			bkstore += 1;
2573 
2574 		if (bkstore) {
2575 			len = strlen(dv->sdev_name) + 1;
2576 			bks_name = kmem_alloc(len, KM_SLEEP);
2577 			bcopy(dv->sdev_name, bks_name, len);
2578 		}
2579 
2580 		error = sdev_dirdelete(ddv, dv);
2581 
2582 		if (error == EBUSY) {
2583 			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2584 			busy++;
2585 		}
2586 
2587 		/* take care the backing store clean up */
2588 		if (bkstore && (error == 0)) {
2589 			ASSERT(bks_name);
2590 			ASSERT(ddv->sdev_attrvp);
2591 
2592 			if (bkstore == 1) {
2593 				error = VOP_REMOVE(ddv->sdev_attrvp,
2594 				    bks_name, kcred);
2595 			} else if (bkstore == 2) {
2596 				error = VOP_RMDIR(ddv->sdev_attrvp,
2597 				    bks_name, ddv->sdev_attrvp, kcred);
2598 			}
2599 
2600 			/* do not propagate the backing store errors */
2601 			if (error) {
2602 				sdcmn_err9(("sdev_cleandir: backing store"
2603 				    "not cleaned\n"));
2604 				error = 0;
2605 			}
2606 
2607 			bkstore = 0;
2608 			kmem_free(bks_name, len);
2609 			bks_name = NULL;
2610 			len = 0;
2611 		}
2612 	}
2613 
2614 	ddv->sdev_flags |= SDEV_BUILD;
2615 	rw_exit(&ddv->sdev_contents);
2616 
2617 	if (busy) {
2618 		error = EBUSY;
2619 	}
2620 
2621 	return (error);
2622 }
2623 
2624 /*
2625  * a convenient wrapper for readdir() funcs
2626  */
2627 size_t
2628 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2629 {
2630 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2631 	if (reclen > size)
2632 		return (0);
2633 
2634 	de->d_ino = (ino64_t)ino;
2635 	de->d_off = (off64_t)off + 1;
2636 	de->d_reclen = (ushort_t)reclen;
2637 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2638 	return (reclen);
2639 }
2640 
2641 /*
2642  * sdev_mount service routines
2643  */
2644 int
2645 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2646 {
2647 	int	error;
2648 
2649 	if (uap->datalen != sizeof (*args))
2650 		return (EINVAL);
2651 
2652 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2653 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2654 		    "get user data. error %d\n", error);
2655 		return (EFAULT);
2656 	}
2657 
2658 	return (0);
2659 }
2660 
2661 #ifdef nextdp
2662 #undef nextdp
2663 #endif
2664 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
2665 
2666 /*
2667  * readdir helper func
2668  */
2669 int
2670 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2671     int flags)
2672 {
2673 	struct sdev_node *ddv = VTOSDEV(vp);
2674 	struct sdev_node *dv;
2675 	dirent64_t	*dp;
2676 	ulong_t		outcount = 0;
2677 	size_t		namelen;
2678 	ulong_t		alloc_count;
2679 	void		*outbuf;
2680 	struct iovec	*iovp;
2681 	int		error = 0;
2682 	size_t		reclen;
2683 	offset_t	diroff;
2684 	offset_t	soff;
2685 	int		this_reclen;
2686 	struct devname_nsmap	*map = NULL;
2687 	struct devname_ops	*dirops = NULL;
2688 	int (*fn)(devname_handle_t *, struct cred *) = NULL;
2689 	int (*vtor)(struct sdev_node *) = NULL;
2690 	struct vattr attr;
2691 	timestruc_t now;
2692 
2693 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2694 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2695 
2696 	if (uiop->uio_loffset >= MAXOFF_T) {
2697 		if (eofp)
2698 			*eofp = 1;
2699 		return (0);
2700 	}
2701 
2702 	if (uiop->uio_iovcnt != 1)
2703 		return (EINVAL);
2704 
2705 	if (vp->v_type != VDIR)
2706 		return (ENOTDIR);
2707 
2708 	if (ddv->sdev_flags & SDEV_VTOR) {
2709 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2710 		ASSERT(vtor);
2711 	}
2712 
2713 	if (eofp != NULL)
2714 		*eofp = 0;
2715 
2716 	soff = uiop->uio_offset;
2717 	iovp = uiop->uio_iov;
2718 	alloc_count = iovp->iov_len;
2719 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2720 	outcount = 0;
2721 
2722 	if (ddv->sdev_state == SDEV_ZOMBIE)
2723 		goto get_cache;
2724 
2725 	if (SDEV_IS_GLOBAL(ddv)) {
2726 		map = sdev_get_map(ddv, 0);
2727 		dirops = map ? map->dir_ops : NULL;
2728 		fn = dirops ? dirops->devnops_readdir : NULL;
2729 
2730 		if (map && map->dir_map) {
2731 			/*
2732 			 * load the name mapping rule database
2733 			 * through invoking devfsadm and symlink
2734 			 * all the entries in the map
2735 			 */
2736 			devname_rdr_result_t rdr_result;
2737 			int do_thread = 0;
2738 
2739 			rw_enter(&map->dir_lock, RW_READER);
2740 			do_thread = map->dir_maploaded ? 0 : 1;
2741 			rw_exit(&map->dir_lock);
2742 
2743 			if (do_thread) {
2744 				mutex_enter(&ddv->sdev_lookup_lock);
2745 				SDEV_BLOCK_OTHERS(ddv, SDEV_READDIR);
2746 				mutex_exit(&ddv->sdev_lookup_lock);
2747 
2748 				sdev_dispatch_to_nsrdr_thread(ddv,
2749 				    map->dir_map, &rdr_result);
2750 			}
2751 		} else if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2752 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2753 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2754 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2755 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2756 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2757 		    !sdev_reconfig_disable) {
2758 			/*
2759 			 * invoking "devfsadm" to do system device reconfig
2760 			 */
2761 			mutex_enter(&ddv->sdev_lookup_lock);
2762 			SDEV_BLOCK_OTHERS(ddv,
2763 			    (SDEV_READDIR|SDEV_LGWAITING));
2764 			mutex_exit(&ddv->sdev_lookup_lock);
2765 
2766 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2767 			    ddv->sdev_path, curproc->p_user.u_comm));
2768 			if (sdev_reconfig_verbose) {
2769 				cmn_err(CE_CONT,
2770 				    "?readdir of %s by %s: reconfig\n",
2771 				    ddv->sdev_path, curproc->p_user.u_comm);
2772 			}
2773 
2774 			sdev_devfsadmd_thread(ddv, NULL, kcred);
2775 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2776 			/*
2777 			 * compensate the "ls" started later than "devfsadm"
2778 			 */
2779 			mutex_enter(&ddv->sdev_lookup_lock);
2780 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2781 			mutex_exit(&ddv->sdev_lookup_lock);
2782 		}
2783 
2784 		/*
2785 		 * release the contents lock so that
2786 		 * the cache maybe updated by devfsadmd
2787 		 */
2788 		rw_exit(&ddv->sdev_contents);
2789 		mutex_enter(&ddv->sdev_lookup_lock);
2790 		if (SDEV_IS_READDIR(ddv))
2791 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2792 		mutex_exit(&ddv->sdev_lookup_lock);
2793 		rw_enter(&ddv->sdev_contents, RW_READER);
2794 
2795 		sdcmn_err4(("readdir of directory %s by %s\n",
2796 		    ddv->sdev_name, curproc->p_user.u_comm));
2797 		while (ddv->sdev_flags & SDEV_BUILD) {
2798 			if (SDEV_IS_PERSIST(ddv)) {
2799 				error = sdev_filldir_from_store(ddv,
2800 				    alloc_count, cred);
2801 			}
2802 
2803 			/*
2804 			 * pre-creating the directories
2805 			 * defined in vtab
2806 			 */
2807 			if (SDEVTOV(ddv)->v_flag & VROOT) {
2808 				error = sdev_filldir_dynamic(ddv);
2809 			}
2810 
2811 			if (!error)
2812 				ddv->sdev_flags &= ~SDEV_BUILD;
2813 		}
2814 	}
2815 
2816 get_cache:
2817 	/* handle "." and ".." */
2818 	diroff = 0;
2819 	if (soff == 0) {
2820 		/* first time */
2821 		this_reclen = DIRENT64_RECLEN(1);
2822 		if (alloc_count < this_reclen) {
2823 			error = EINVAL;
2824 			goto done;
2825 		}
2826 
2827 		dp->d_ino = (ino64_t)ddv->sdev_ino;
2828 		dp->d_off = (off64_t)1;
2829 		dp->d_reclen = (ushort_t)this_reclen;
2830 
2831 		(void) strncpy(dp->d_name, ".",
2832 		    DIRENT64_NAMELEN(this_reclen));
2833 		outcount += dp->d_reclen;
2834 		dp = nextdp(dp);
2835 	}
2836 
2837 	diroff++;
2838 	if (soff <= 1) {
2839 		this_reclen = DIRENT64_RECLEN(2);
2840 		if (alloc_count < outcount + this_reclen) {
2841 			error = EINVAL;
2842 			goto done;
2843 		}
2844 
2845 		dp->d_reclen = (ushort_t)this_reclen;
2846 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2847 		dp->d_off = (off64_t)2;
2848 
2849 		(void) strncpy(dp->d_name, "..",
2850 		    DIRENT64_NAMELEN(this_reclen));
2851 		outcount += dp->d_reclen;
2852 
2853 		dp = nextdp(dp);
2854 	}
2855 
2856 
2857 	/* gets the cache */
2858 	diroff++;
2859 	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next, diroff++) {
2860 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2861 		    diroff, soff, dv->sdev_name));
2862 
2863 		/* bypassing pre-matured nodes */
2864 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2865 			sdcmn_err3(("sdev_readdir: pre-mature node  "
2866 			    "%s\n", dv->sdev_name));
2867 			continue;
2868 		}
2869 
2870 		/* don't list stale nodes */
2871 		if (dv->sdev_flags & SDEV_STALE) {
2872 			sdcmn_err4(("sdev_readdir: STALE node  "
2873 			    "%s\n", dv->sdev_name));
2874 			continue;
2875 		}
2876 
2877 		/*
2878 		 * Check validity of node
2879 		 */
2880 		if (vtor) {
2881 			switch (vtor(dv)) {
2882 			case SDEV_VTOR_VALID:
2883 				break;
2884 			case SDEV_VTOR_INVALID:
2885 			case SDEV_VTOR_SKIP:
2886 				continue;
2887 			default:
2888 				cmn_err(CE_PANIC,
2889 				    "dev fs: validator failed: %s(%p)\n",
2890 				    dv->sdev_name, (void *)dv);
2891 				break;
2892 			/*NOTREACHED*/
2893 			}
2894 		}
2895 
2896 		/*
2897 		 * call back into the module for the validity/bookkeeping
2898 		 * of this entry
2899 		 */
2900 		if (fn) {
2901 			error = (*fn)(&(dv->sdev_handle), cred);
2902 			if (error) {
2903 				sdcmn_err4(("sdev_readdir: module did not "
2904 				    "validate %s\n", dv->sdev_name));
2905 				continue;
2906 			}
2907 		}
2908 
2909 		namelen = strlen(dv->sdev_name);
2910 		reclen = DIRENT64_RECLEN(namelen);
2911 		if (outcount + reclen > alloc_count) {
2912 			goto full;
2913 		}
2914 		dp->d_reclen = (ushort_t)reclen;
2915 		dp->d_ino = (ino64_t)dv->sdev_ino;
2916 		dp->d_off = (off64_t)diroff + 1;
2917 		(void) strncpy(dp->d_name, dv->sdev_name,
2918 		    DIRENT64_NAMELEN(reclen));
2919 		outcount += reclen;
2920 		dp = nextdp(dp);
2921 	}
2922 
2923 full:
2924 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2925 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2926 	    (void *)dv));
2927 
2928 	if (outcount)
2929 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2930 
2931 	if (!error) {
2932 		uiop->uio_offset = diroff;
2933 		if (eofp)
2934 			*eofp = dv ? 0 : 1;
2935 	}
2936 
2937 
2938 	if (ddv->sdev_attrvp) {
2939 		gethrestime(&now);
2940 		attr.va_ctime = now;
2941 		attr.va_atime = now;
2942 		attr.va_mask = AT_CTIME|AT_ATIME;
2943 
2944 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2945 	}
2946 done:
2947 	kmem_free(outbuf, alloc_count);
2948 	return (error);
2949 }
2950 
2951 
2952 static int
2953 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2954 {
2955 	vnode_t *vp;
2956 	vnode_t *cvp;
2957 	struct sdev_node *svp;
2958 	char *nm;
2959 	struct pathname pn;
2960 	int error;
2961 	int persisted = 0;
2962 
2963 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2964 		return (error);
2965 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2966 
2967 	vp = rootdir;
2968 	VN_HOLD(vp);
2969 
2970 	while (pn_pathleft(&pn)) {
2971 		ASSERT(vp->v_type == VDIR);
2972 		(void) pn_getcomponent(&pn, nm);
2973 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred);
2974 		VN_RELE(vp);
2975 
2976 		if (error)
2977 			break;
2978 
2979 		/* traverse mount points encountered on our journey */
2980 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2981 			VN_RELE(cvp);
2982 			break;
2983 		}
2984 
2985 		/*
2986 		 * Direct the operation to the persisting filesystem
2987 		 * underlying /dev.  Bail if we encounter a
2988 		 * non-persistent dev entity here.
2989 		 */
2990 		if (cvp->v_vfsp->vfs_fstype == devtype) {
2991 
2992 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2993 				error = ENOENT;
2994 				VN_RELE(cvp);
2995 				break;
2996 			}
2997 
2998 			if (VTOSDEV(cvp) == NULL) {
2999 				error = ENOENT;
3000 				VN_RELE(cvp);
3001 				break;
3002 			}
3003 			svp = VTOSDEV(cvp);
3004 			if ((vp = svp->sdev_attrvp) == NULL) {
3005 				error = ENOENT;
3006 				VN_RELE(cvp);
3007 				break;
3008 			}
3009 			persisted = 1;
3010 			VN_HOLD(vp);
3011 			VN_RELE(cvp);
3012 			cvp = vp;
3013 		}
3014 
3015 		vp = cvp;
3016 		pn_skipslash(&pn);
3017 	}
3018 
3019 	kmem_free(nm, MAXNAMELEN);
3020 	pn_free(&pn);
3021 
3022 	if (error)
3023 		return (error);
3024 
3025 	/*
3026 	 * Only return persisted nodes in the filesystem underlying /dev.
3027 	 */
3028 	if (!persisted) {
3029 		VN_RELE(vp);
3030 		return (ENOENT);
3031 	}
3032 
3033 	*r_vp = vp;
3034 	return (0);
3035 }
3036 
3037 int
3038 sdev_modctl_readdir(const char *dir, char ***dirlistp,
3039 	int *npathsp, int *npathsp_alloc)
3040 {
3041 	char	**pathlist = NULL;
3042 	char	**newlist = NULL;
3043 	int	npaths = 0;
3044 	int	npaths_alloc = 0;
3045 	dirent64_t *dbuf = NULL;
3046 	int	n;
3047 	char	*s;
3048 	int error;
3049 	vnode_t *vp;
3050 	int eof;
3051 	struct iovec iov;
3052 	struct uio uio;
3053 	struct dirent64 *dp;
3054 	size_t dlen;
3055 	size_t dbuflen;
3056 	int ndirents = 64;
3057 	char *nm;
3058 
3059 	error = sdev_modctl_lookup(dir, &vp);
3060 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
3061 	    dir, curproc->p_user.u_comm,
3062 	    (error == 0) ? "ok" : "failed"));
3063 	if (error)
3064 		return (error);
3065 
3066 	dlen = ndirents * (sizeof (*dbuf));
3067 	dbuf = kmem_alloc(dlen, KM_SLEEP);
3068 
3069 	uio.uio_iov = &iov;
3070 	uio.uio_iovcnt = 1;
3071 	uio.uio_segflg = UIO_SYSSPACE;
3072 	uio.uio_fmode = 0;
3073 	uio.uio_extflg = UIO_COPY_CACHED;
3074 	uio.uio_loffset = 0;
3075 	uio.uio_llimit = MAXOFFSET_T;
3076 
3077 	eof = 0;
3078 	error = 0;
3079 	while (!error && !eof) {
3080 		uio.uio_resid = dlen;
3081 		iov.iov_base = (char *)dbuf;
3082 		iov.iov_len = dlen;
3083 
3084 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3085 		error = VOP_READDIR(vp, &uio, kcred, &eof);
3086 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3087 
3088 		dbuflen = dlen - uio.uio_resid;
3089 
3090 		if (error || dbuflen == 0)
3091 			break;
3092 
3093 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
3094 			dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
3095 
3096 			nm = dp->d_name;
3097 
3098 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
3099 				continue;
3100 
3101 			if (npaths == npaths_alloc) {
3102 				npaths_alloc += 64;
3103 				newlist = (char **)
3104 				    kmem_zalloc((npaths_alloc + 1) *
3105 					sizeof (char *), KM_SLEEP);
3106 				if (pathlist) {
3107 					bcopy(pathlist, newlist,
3108 					    npaths * sizeof (char *));
3109 					kmem_free(pathlist,
3110 					    (npaths + 1) * sizeof (char *));
3111 				}
3112 				pathlist = newlist;
3113 			}
3114 			n = strlen(nm) + 1;
3115 			s = kmem_alloc(n, KM_SLEEP);
3116 			bcopy(nm, s, n);
3117 			pathlist[npaths++] = s;
3118 			sdcmn_err11(("  %s/%s\n", dir, s));
3119 		}
3120 	}
3121 
3122 exit:
3123 	VN_RELE(vp);
3124 
3125 	if (dbuf)
3126 		kmem_free(dbuf, dlen);
3127 
3128 	if (error)
3129 		return (error);
3130 
3131 	*dirlistp = pathlist;
3132 	*npathsp = npaths;
3133 	*npathsp_alloc = npaths_alloc;
3134 
3135 	return (0);
3136 }
3137 
3138 void
3139 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
3140 {
3141 	int	i, n;
3142 
3143 	for (i = 0; i < npaths; i++) {
3144 		n = strlen(pathlist[i]) + 1;
3145 		kmem_free(pathlist[i], n);
3146 	}
3147 
3148 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
3149 }
3150 
3151 int
3152 sdev_modctl_devexists(const char *path)
3153 {
3154 	vnode_t *vp;
3155 	int error;
3156 
3157 	error = sdev_modctl_lookup(path, &vp);
3158 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
3159 	    path, curproc->p_user.u_comm,
3160 	    (error == 0) ? "ok" : "failed"));
3161 	if (error == 0)
3162 		VN_RELE(vp);
3163 
3164 	return (error);
3165 }
3166 
3167 void
3168 sdev_update_newnsmap(struct devname_nsmap *map, char *module, char *mapname)
3169 {
3170 	rw_enter(&map->dir_lock, RW_WRITER);
3171 	if (module) {
3172 		ASSERT(map->dir_newmodule == NULL);
3173 		map->dir_newmodule = i_ddi_strdup(module, KM_SLEEP);
3174 	}
3175 	if (mapname) {
3176 		ASSERT(map->dir_newmap == NULL);
3177 		map->dir_newmap = i_ddi_strdup(mapname, KM_SLEEP);
3178 	}
3179 
3180 	map->dir_invalid = 1;
3181 	rw_exit(&map->dir_lock);
3182 }
3183 
3184 void
3185 sdev_replace_nsmap(struct devname_nsmap *map, char *module, char *mapname)
3186 {
3187 	char *old_module = NULL;
3188 	char *old_map = NULL;
3189 
3190 	ASSERT(RW_LOCK_HELD(&map->dir_lock));
3191 	if (!rw_tryupgrade(&map->dir_lock)) {
3192 		rw_exit(&map->dir_lock);
3193 		rw_enter(&map->dir_lock, RW_WRITER);
3194 	}
3195 
3196 	old_module = map->dir_module;
3197 	if (module) {
3198 		if (old_module && strcmp(old_module, module) != 0) {
3199 			kmem_free(old_module, strlen(old_module) + 1);
3200 		}
3201 		map->dir_module = module;
3202 		map->dir_newmodule = NULL;
3203 	}
3204 
3205 	old_map = map->dir_map;
3206 	if (mapname) {
3207 		if (old_map && strcmp(old_map, mapname) != 0) {
3208 			kmem_free(old_map, strlen(old_map) + 1);
3209 		}
3210 
3211 		map->dir_map = mapname;
3212 		map->dir_newmap = NULL;
3213 	}
3214 	map->dir_maploaded = 0;
3215 	map->dir_invalid = 0;
3216 	rw_downgrade(&map->dir_lock);
3217 }
3218 
3219 /*
3220  * dir_name should have at least one attribute,
3221  *	dir_module
3222  *	or dir_map
3223  *	or both
3224  * caller holds the devname_nsmaps_lock
3225  */
3226 void
3227 sdev_insert_nsmap(char *dir_name, char *dir_module, char *dir_map)
3228 {
3229 	struct devname_nsmap *map;
3230 	int len = 0;
3231 
3232 	ASSERT(dir_name);
3233 	ASSERT(dir_module || dir_map);
3234 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3235 
3236 	if (map = sdev_get_nsmap_by_dir(dir_name, 1)) {
3237 		sdev_update_newnsmap(map, dir_module, dir_map);
3238 		return;
3239 	}
3240 
3241 	map = (struct devname_nsmap *)kmem_zalloc(sizeof (*map), KM_SLEEP);
3242 	map->dir_name = i_ddi_strdup(dir_name, KM_SLEEP);
3243 	if (dir_module) {
3244 		map->dir_module = i_ddi_strdup(dir_module, KM_SLEEP);
3245 	}
3246 
3247 	if (dir_map) {
3248 		if (dir_map[0] != '/') {
3249 			len = strlen(ETC_DEV_DIR) + strlen(dir_map) + 2;
3250 			map->dir_map = kmem_zalloc(len, KM_SLEEP);
3251 			(void) snprintf(map->dir_map, len, "%s/%s", ETC_DEV_DIR,
3252 			    dir_map);
3253 		} else {
3254 			map->dir_map = i_ddi_strdup(dir_map, KM_SLEEP);
3255 		}
3256 	}
3257 
3258 	map->dir_ops = NULL;
3259 	map->dir_maploaded = 0;
3260 	map->dir_invalid = 0;
3261 	rw_init(&map->dir_lock, NULL, RW_DEFAULT, NULL);
3262 
3263 	map->next = devname_nsmaps;
3264 	map->prev = NULL;
3265 	if (devname_nsmaps) {
3266 		devname_nsmaps->prev = map;
3267 	}
3268 	devname_nsmaps = map;
3269 }
3270 
3271 struct devname_nsmap *
3272 sdev_get_nsmap_by_dir(char *dir_path, int locked)
3273 {
3274 	struct devname_nsmap *map = NULL;
3275 
3276 	if (!locked)
3277 		mutex_enter(&devname_nsmaps_lock);
3278 	for (map = devname_nsmaps; map; map = map->next) {
3279 		sdcmn_err6(("sdev_get_nsmap_by_dir: dir %s\n", map->dir_name));
3280 		if (strcmp(map->dir_name, dir_path) == 0) {
3281 			if (!locked)
3282 				mutex_exit(&devname_nsmaps_lock);
3283 			return (map);
3284 		}
3285 	}
3286 	if (!locked)
3287 		mutex_exit(&devname_nsmaps_lock);
3288 	return (NULL);
3289 }
3290 
3291 struct devname_nsmap *
3292 sdev_get_nsmap_by_module(char *mod_name)
3293 {
3294 	struct devname_nsmap *map = NULL;
3295 
3296 	mutex_enter(&devname_nsmaps_lock);
3297 	for (map = devname_nsmaps; map; map = map->next) {
3298 		sdcmn_err7(("sdev_get_nsmap_by_module: module %s\n",
3299 		    map->dir_module));
3300 		if (map->dir_module && strcmp(map->dir_module, mod_name) == 0) {
3301 			mutex_exit(&devname_nsmaps_lock);
3302 			return (map);
3303 		}
3304 	}
3305 	mutex_exit(&devname_nsmaps_lock);
3306 	return (NULL);
3307 }
3308 
3309 void
3310 sdev_invalidate_nsmaps()
3311 {
3312 	struct devname_nsmap *map = NULL;
3313 
3314 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3315 
3316 	if (devname_nsmaps == NULL)
3317 		return;
3318 
3319 	for (map = devname_nsmaps; map; map = map->next) {
3320 		rw_enter(&map->dir_lock, RW_WRITER);
3321 		map->dir_invalid = 1;
3322 		rw_exit(&map->dir_lock);
3323 	}
3324 	devname_nsmaps_invalidated = 1;
3325 }
3326 
3327 
3328 int
3329 sdev_nsmaps_loaded()
3330 {
3331 	int ret = 0;
3332 
3333 	mutex_enter(&devname_nsmaps_lock);
3334 	if (devname_nsmaps_loaded)
3335 		ret = 1;
3336 
3337 	mutex_exit(&devname_nsmaps_lock);
3338 	return (ret);
3339 }
3340 
3341 int
3342 sdev_nsmaps_reloaded()
3343 {
3344 	int ret = 0;
3345 
3346 	mutex_enter(&devname_nsmaps_lock);
3347 	if (devname_nsmaps_invalidated)
3348 		ret = 1;
3349 
3350 	mutex_exit(&devname_nsmaps_lock);
3351 	return (ret);
3352 }
3353 
3354 static void
3355 sdev_free_nsmap(struct devname_nsmap *map)
3356 {
3357 	ASSERT(map);
3358 	if (map->dir_name)
3359 		kmem_free(map->dir_name, strlen(map->dir_name) + 1);
3360 	if (map->dir_module)
3361 		kmem_free(map->dir_module, strlen(map->dir_module) + 1);
3362 	if (map->dir_map)
3363 		kmem_free(map->dir_map, strlen(map->dir_map) + 1);
3364 	rw_destroy(&map->dir_lock);
3365 	kmem_free(map, sizeof (*map));
3366 }
3367 
3368 void
3369 sdev_validate_nsmaps()
3370 {
3371 	struct devname_nsmap *map = NULL;
3372 	struct devname_nsmap *oldmap = NULL;
3373 
3374 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3375 	map = devname_nsmaps;
3376 	while (map) {
3377 		rw_enter(&map->dir_lock, RW_READER);
3378 		if ((map->dir_invalid == 1) && (map->dir_newmodule == NULL) &&
3379 		    (map->dir_newmap == NULL)) {
3380 			oldmap = map;
3381 			rw_exit(&map->dir_lock);
3382 			if (map->prev)
3383 				map->prev->next = oldmap->next;
3384 			if (map == devname_nsmaps)
3385 				devname_nsmaps = oldmap->next;
3386 
3387 			map = oldmap->next;
3388 			if (map)
3389 				map->prev = oldmap->prev;
3390 			sdev_free_nsmap(oldmap);
3391 			oldmap = NULL;
3392 		} else {
3393 			rw_exit(&map->dir_lock);
3394 			map = map->next;
3395 		}
3396 	}
3397 	devname_nsmaps_invalidated = 0;
3398 }
3399 
3400 static int
3401 sdev_map_is_invalid(struct devname_nsmap *map)
3402 {
3403 	int ret = 0;
3404 
3405 	ASSERT(map);
3406 	rw_enter(&map->dir_lock, RW_READER);
3407 	if (map->dir_invalid)
3408 		ret = 1;
3409 	rw_exit(&map->dir_lock);
3410 	return (ret);
3411 }
3412 
3413 static int
3414 sdev_check_map(struct devname_nsmap *map)
3415 {
3416 	struct devname_nsmap *mapp;
3417 
3418 	mutex_enter(&devname_nsmaps_lock);
3419 	if (devname_nsmaps == NULL) {
3420 		mutex_exit(&devname_nsmaps_lock);
3421 		return (1);
3422 	}
3423 
3424 	for (mapp = devname_nsmaps; mapp; mapp = mapp->next) {
3425 		if (mapp == map) {
3426 			mutex_exit(&devname_nsmaps_lock);
3427 			return (0);
3428 		}
3429 	}
3430 
3431 	mutex_exit(&devname_nsmaps_lock);
3432 	return (1);
3433 
3434 }
3435 
3436 struct devname_nsmap *
3437 sdev_get_map(struct sdev_node *dv, int validate)
3438 {
3439 	struct devname_nsmap *map;
3440 	int error;
3441 
3442 	ASSERT(RW_READ_HELD(&dv->sdev_contents));
3443 	map = dv->sdev_mapinfo;
3444 	if (map && sdev_check_map(map)) {
3445 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3446 			rw_exit(&dv->sdev_contents);
3447 			rw_enter(&dv->sdev_contents, RW_WRITER);
3448 		}
3449 		dv->sdev_mapinfo = NULL;
3450 		rw_downgrade(&dv->sdev_contents);
3451 		return (NULL);
3452 	}
3453 
3454 	if (validate && (!map || (map && sdev_map_is_invalid(map)))) {
3455 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3456 			rw_exit(&dv->sdev_contents);
3457 			rw_enter(&dv->sdev_contents, RW_WRITER);
3458 		}
3459 		error = sdev_get_moduleops(dv);
3460 		if (!error)
3461 			map = dv->sdev_mapinfo;
3462 		rw_downgrade(&dv->sdev_contents);
3463 	}
3464 	return (map);
3465 }
3466 
3467 void
3468 sdev_handle_alloc(struct sdev_node *dv)
3469 {
3470 	rw_enter(&dv->sdev_contents, RW_WRITER);
3471 	dv->sdev_handle.dh_data = dv;
3472 	rw_exit(&dv->sdev_contents);
3473 }
3474 
3475 
3476 extern int sdev_vnodeops_tbl_size;
3477 
3478 /*
3479  * construct a new template with overrides from vtab
3480  */
3481 static fs_operation_def_t *
3482 sdev_merge_vtab(const fs_operation_def_t tab[])
3483 {
3484 	fs_operation_def_t *new;
3485 	const fs_operation_def_t *tab_entry;
3486 
3487 	/* make a copy of standard vnode ops table */
3488 	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
3489 	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
3490 
3491 	/* replace the overrides from tab */
3492 	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
3493 		fs_operation_def_t *std_entry = new;
3494 		while (std_entry->name) {
3495 			if (strcmp(tab_entry->name, std_entry->name) == 0) {
3496 				std_entry->func = tab_entry->func;
3497 				break;
3498 			}
3499 			std_entry++;
3500 		}
3501 		if (std_entry->name == NULL)
3502 			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
3503 			    tab_entry->name);
3504 	}
3505 
3506 	return (new);
3507 }
3508 
3509 /* free memory allocated by sdev_merge_vtab */
3510 static void
3511 sdev_free_vtab(fs_operation_def_t *new)
3512 {
3513 	kmem_free(new, sdev_vnodeops_tbl_size);
3514 }
3515 
3516 void
3517 devname_get_vnode(devname_handle_t *hdl, vnode_t **vpp)
3518 {
3519 	struct sdev_node *dv = hdl->dh_data;
3520 
3521 	ASSERT(dv);
3522 
3523 	rw_enter(&dv->sdev_contents, RW_READER);
3524 	*vpp = SDEVTOV(dv);
3525 	rw_exit(&dv->sdev_contents);
3526 }
3527 
3528 int
3529 devname_get_path(devname_handle_t *hdl, char **path)
3530 {
3531 	struct sdev_node *dv = hdl->dh_data;
3532 
3533 	ASSERT(dv);
3534 
3535 	rw_enter(&dv->sdev_contents, RW_READER);
3536 	*path = dv->sdev_path;
3537 	rw_exit(&dv->sdev_contents);
3538 	return (0);
3539 }
3540 
3541 int
3542 devname_get_name(devname_handle_t *hdl, char **entry)
3543 {
3544 	struct sdev_node *dv = hdl->dh_data;
3545 
3546 	ASSERT(dv);
3547 	rw_enter(&dv->sdev_contents, RW_READER);
3548 	*entry = dv->sdev_name;
3549 	rw_exit(&dv->sdev_contents);
3550 	return (0);
3551 }
3552 
3553 void
3554 devname_get_dir_vnode(devname_handle_t *hdl, vnode_t **vpp)
3555 {
3556 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3557 
3558 	ASSERT(dv);
3559 
3560 	rw_enter(&dv->sdev_contents, RW_READER);
3561 	*vpp = SDEVTOV(dv);
3562 	rw_exit(&dv->sdev_contents);
3563 }
3564 
3565 int
3566 devname_get_dir_path(devname_handle_t *hdl, char **path)
3567 {
3568 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3569 
3570 	ASSERT(dv);
3571 	rw_enter(&dv->sdev_contents, RW_READER);
3572 	*path = dv->sdev_path;
3573 	rw_exit(&dv->sdev_contents);
3574 	return (0);
3575 }
3576 
3577 int
3578 devname_get_dir_name(devname_handle_t *hdl, char **entry)
3579 {
3580 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3581 
3582 	ASSERT(dv);
3583 	rw_enter(&dv->sdev_contents, RW_READER);
3584 	*entry = dv->sdev_name;
3585 	rw_exit(&dv->sdev_contents);
3586 	return (0);
3587 }
3588 
3589 int
3590 devname_get_dir_nsmap(devname_handle_t *hdl, struct devname_nsmap **map)
3591 {
3592 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3593 
3594 	ASSERT(dv);
3595 	rw_enter(&dv->sdev_contents, RW_READER);
3596 	*map = dv->sdev_mapinfo;
3597 	rw_exit(&dv->sdev_contents);
3598 	return (0);
3599 }
3600 
3601 int
3602 devname_get_dir_handle(devname_handle_t *hdl, devname_handle_t **dir_hdl)
3603 {
3604 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3605 
3606 	ASSERT(dv);
3607 	rw_enter(&dv->sdev_contents, RW_READER);
3608 	*dir_hdl = &(dv->sdev_handle);
3609 	rw_exit(&dv->sdev_contents);
3610 	return (0);
3611 }
3612 
3613 void
3614 devname_set_nodetype(devname_handle_t *hdl, void *args, int spec)
3615 {
3616 	struct sdev_node *dv = hdl->dh_data;
3617 
3618 	ASSERT(dv);
3619 	rw_enter(&dv->sdev_contents, RW_WRITER);
3620 	hdl->dh_spec = (devname_spec_t)spec;
3621 	hdl->dh_args = (void *)i_ddi_strdup((char *)args, KM_SLEEP);
3622 	rw_exit(&dv->sdev_contents);
3623 }
3624 
3625 /*
3626  * a generic setattr() function
3627  *
3628  * note: flags only supports AT_UID and AT_GID.
3629  *	 Future enhancements can be done for other types, e.g. AT_MODE
3630  */
3631 int
3632 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3633     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3634     int), int protocol)
3635 {
3636 	struct sdev_node	*dv = VTOSDEV(vp);
3637 	struct sdev_node	*parent = dv->sdev_dotdot;
3638 	struct vattr		*get;
3639 	uint_t			mask = vap->va_mask;
3640 	int 			error;
3641 
3642 	/* some sanity checks */
3643 	if (vap->va_mask & AT_NOSET)
3644 		return (EINVAL);
3645 
3646 	if (vap->va_mask & AT_SIZE) {
3647 		if (vp->v_type == VDIR) {
3648 			return (EISDIR);
3649 		}
3650 	}
3651 
3652 	/* no need to set attribute, but do not fail either */
3653 	ASSERT(parent);
3654 	rw_enter(&parent->sdev_contents, RW_READER);
3655 	if (dv->sdev_state == SDEV_ZOMBIE) {
3656 		rw_exit(&parent->sdev_contents);
3657 		return (0);
3658 	}
3659 
3660 	/* If backing store exists, just set it. */
3661 	if (dv->sdev_attrvp) {
3662 		rw_exit(&parent->sdev_contents);
3663 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3664 	}
3665 
3666 	/*
3667 	 * Otherwise, for nodes with the persistence attribute, create it.
3668 	 */
3669 	ASSERT(dv->sdev_attr);
3670 	if (SDEV_IS_PERSIST(dv) ||
3671 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3672 		sdev_vattr_merge(dv, vap);
3673 		rw_enter(&dv->sdev_contents, RW_WRITER);
3674 		error = sdev_shadow_node(dv, cred);
3675 		rw_exit(&dv->sdev_contents);
3676 		rw_exit(&parent->sdev_contents);
3677 
3678 		if (error)
3679 			return (error);
3680 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3681 	}
3682 
3683 
3684 	/*
3685 	 * sdev_attr was allocated in sdev_mknode
3686 	 */
3687 	rw_enter(&dv->sdev_contents, RW_WRITER);
3688 	error = secpolicy_vnode_setattr(cred, vp, vap, dv->sdev_attr,
3689 	    flags, sdev_unlocked_access, dv);
3690 	if (error) {
3691 		rw_exit(&dv->sdev_contents);
3692 		rw_exit(&parent->sdev_contents);
3693 		return (error);
3694 	}
3695 
3696 	get = dv->sdev_attr;
3697 	if (mask & AT_MODE) {
3698 		get->va_mode &= S_IFMT;
3699 		get->va_mode |= vap->va_mode & ~S_IFMT;
3700 	}
3701 
3702 	if ((mask & AT_UID) || (mask & AT_GID)) {
3703 		if (mask & AT_UID)
3704 			get->va_uid = vap->va_uid;
3705 		if (mask & AT_GID)
3706 			get->va_gid = vap->va_gid;
3707 		/*
3708 		 * a callback must be provided if the protocol is set
3709 		 */
3710 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3711 			ASSERT(callback);
3712 			error = callback(dv, get, protocol);
3713 			if (error) {
3714 				rw_exit(&dv->sdev_contents);
3715 				rw_exit(&parent->sdev_contents);
3716 				return (error);
3717 			}
3718 		}
3719 	}
3720 
3721 	if (mask & AT_ATIME)
3722 		get->va_atime = vap->va_atime;
3723 	if (mask & AT_MTIME)
3724 		get->va_mtime = vap->va_mtime;
3725 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3726 		gethrestime(&get->va_ctime);
3727 	}
3728 
3729 	sdev_vattr_merge(dv, get);
3730 	rw_exit(&dv->sdev_contents);
3731 	rw_exit(&parent->sdev_contents);
3732 	return (0);
3733 }
3734