xref: /titanic_50/usr/src/uts/common/fs/dev/sdev_subr.c (revision d876c67df282cf8c0136415d482d92d16ee59a0b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * utility routines for the /dev fs
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/t_lock.h>
35 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
37 #include <sys/user.h>
38 #include <sys/time.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/file.h>
42 #include <sys/fcntl.h>
43 #include <sys/flock.h>
44 #include <sys/kmem.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/stat.h>
48 #include <sys/cred.h>
49 #include <sys/dirent.h>
50 #include <sys/pathname.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/mode.h>
54 #include <sys/policy.h>
55 #include <fs/fs_subr.h>
56 #include <sys/mount.h>
57 #include <sys/fs/snode.h>
58 #include <sys/fs/dv_node.h>
59 #include <sys/fs/sdev_impl.h>
60 #include <sys/fs/sdev_node.h>
61 #include <sys/sunndi.h>
62 #include <sys/sunmdi.h>
63 #include <sys/conf.h>
64 #include <sys/proc.h>
65 #include <sys/user.h>
66 #include <sys/modctl.h>
67 
68 #ifdef DEBUG
69 int sdev_debug = 0x00000001;
70 int sdev_debug_cache_flags = 0;
71 #endif
72 
73 /*
74  * globals
75  */
76 /* prototype memory vattrs */
77 vattr_t sdev_vattr_dir = {
78 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
79 	VDIR,					/* va_type */
80 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
81 	SDEV_UID_DEFAULT,			/* va_uid */
82 	SDEV_GID_DEFAULT,			/* va_gid */
83 	0,					/* va_fsid */
84 	0,					/* va_nodeid */
85 	0,					/* va_nlink */
86 	0,					/* va_size */
87 	0,					/* va_atime */
88 	0,					/* va_mtime */
89 	0,					/* va_ctime */
90 	0,					/* va_rdev */
91 	0,					/* va_blksize */
92 	0,					/* va_nblocks */
93 	0					/* va_vcode */
94 };
95 
96 vattr_t sdev_vattr_lnk = {
97 	AT_TYPE|AT_MODE,			/* va_mask */
98 	VLNK,					/* va_type */
99 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
100 	SDEV_UID_DEFAULT,			/* va_uid */
101 	SDEV_GID_DEFAULT,			/* va_gid */
102 	0,					/* va_fsid */
103 	0,					/* va_nodeid */
104 	0,					/* va_nlink */
105 	0,					/* va_size */
106 	0,					/* va_atime */
107 	0,					/* va_mtime */
108 	0,					/* va_ctime */
109 	0,					/* va_rdev */
110 	0,					/* va_blksize */
111 	0,					/* va_nblocks */
112 	0					/* va_vcode */
113 };
114 
115 vattr_t sdev_vattr_blk = {
116 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
117 	VBLK,					/* va_type */
118 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
119 	SDEV_UID_DEFAULT,			/* va_uid */
120 	SDEV_GID_DEFAULT,			/* va_gid */
121 	0,					/* va_fsid */
122 	0,					/* va_nodeid */
123 	0,					/* va_nlink */
124 	0,					/* va_size */
125 	0,					/* va_atime */
126 	0,					/* va_mtime */
127 	0,					/* va_ctime */
128 	0,					/* va_rdev */
129 	0,					/* va_blksize */
130 	0,					/* va_nblocks */
131 	0					/* va_vcode */
132 };
133 
134 vattr_t sdev_vattr_chr = {
135 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
136 	VCHR,					/* va_type */
137 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
138 	SDEV_UID_DEFAULT,			/* va_uid */
139 	SDEV_GID_DEFAULT,			/* va_gid */
140 	0,					/* va_fsid */
141 	0,					/* va_nodeid */
142 	0,					/* va_nlink */
143 	0,					/* va_size */
144 	0,					/* va_atime */
145 	0,					/* va_mtime */
146 	0,					/* va_ctime */
147 	0,					/* va_rdev */
148 	0,					/* va_blksize */
149 	0,					/* va_nblocks */
150 	0					/* va_vcode */
151 };
152 
153 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
154 int		devtype;		/* fstype */
155 
156 struct devname_ops *devname_ns_ops;	/* default name service directory ops */
157 kmutex_t devname_nsmaps_lock;	/* protect devname_nsmaps */
158 
159 /* static */
160 static struct devname_nsmap *devname_nsmaps = NULL;
161 				/* contents from /etc/dev/devname_master */
162 static int devname_nsmaps_invalidated = 0; /* "devfsadm -m" has run */
163 
164 static struct vnodeops *sdev_get_vop(struct sdev_node *);
165 static void sdev_set_no_nocache(struct sdev_node *);
166 static int sdev_get_moduleops(struct sdev_node *);
167 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
168 static void sdev_free_vtab(fs_operation_def_t *);
169 
170 static void
171 sdev_prof_free(struct sdev_node *dv)
172 {
173 	ASSERT(!SDEV_IS_GLOBAL(dv));
174 	if (dv->sdev_prof.dev_name)
175 		nvlist_free(dv->sdev_prof.dev_name);
176 	if (dv->sdev_prof.dev_map)
177 		nvlist_free(dv->sdev_prof.dev_map);
178 	if (dv->sdev_prof.dev_symlink)
179 		nvlist_free(dv->sdev_prof.dev_symlink);
180 	if (dv->sdev_prof.dev_glob_incdir)
181 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
182 	if (dv->sdev_prof.dev_glob_excdir)
183 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
184 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
185 }
186 
187 /*
188  * sdev_node cache constructor
189  */
190 /*ARGSUSED1*/
191 static int
192 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
193 {
194 	struct sdev_node *dv = (struct sdev_node *)buf;
195 	struct vnode *vp;
196 
197 	ASSERT(flag == KM_SLEEP);
198 
199 	bzero(buf, sizeof (struct sdev_node));
200 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
201 	dv->sdev_vnode = vn_alloc(KM_SLEEP);
202 	vp = SDEVTOV(dv);
203 	vp->v_data = (caddr_t)dv;
204 	return (0);
205 }
206 
207 /* sdev_node destructor for kmem cache */
208 /*ARGSUSED1*/
209 static void
210 i_sdev_node_dtor(void *buf, void *arg)
211 {
212 	struct sdev_node *dv = (struct sdev_node *)buf;
213 	struct vnode *vp = SDEVTOV(dv);
214 
215 	rw_destroy(&dv->sdev_contents);
216 	vn_free(vp);
217 }
218 
219 /* initialize sdev_node cache */
220 void
221 sdev_node_cache_init()
222 {
223 	int flags = 0;
224 
225 #ifdef	DEBUG
226 	flags = sdev_debug_cache_flags;
227 	if (flags)
228 		sdcmn_err(("cache debug flags 0x%x\n", flags));
229 #endif	/* DEBUG */
230 
231 	ASSERT(sdev_node_cache == NULL);
232 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
233 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
234 	    NULL, NULL, NULL, flags);
235 }
236 
237 /* destroy sdev_node cache */
238 void
239 sdev_node_cache_fini()
240 {
241 	ASSERT(sdev_node_cache != NULL);
242 	kmem_cache_destroy(sdev_node_cache);
243 	sdev_node_cache = NULL;
244 }
245 
246 /*
247  * Compare two nodes lexographically to balance avl tree
248  */
249 static int
250 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
251 {
252 	int rv;
253 	if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
254 		return (0);
255 	return ((rv < 0) ? -1 : 1);
256 }
257 
258 void
259 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
260 {
261 	ASSERT(dv);
262 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
263 	dv->sdev_state = state;
264 }
265 
266 static void
267 sdev_attrinit(struct sdev_node *dv, vattr_t *vap)
268 {
269 	timestruc_t now;
270 
271 	ASSERT(vap);
272 
273 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
274 	*dv->sdev_attr = *vap;
275 
276 	dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode);
277 
278 	gethrestime(&now);
279 	dv->sdev_attr->va_atime = now;
280 	dv->sdev_attr->va_mtime = now;
281 	dv->sdev_attr->va_ctime = now;
282 }
283 
284 /* alloc and initialize a sdev_node */
285 int
286 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
287     vattr_t *vap)
288 {
289 	struct sdev_node *dv = NULL;
290 	struct vnode *vp;
291 	size_t nmlen, len;
292 	devname_handle_t  *dhl;
293 
294 	nmlen = strlen(nm) + 1;
295 	if (nmlen > MAXNAMELEN) {
296 		sdcmn_err9(("sdev_nodeinit: node name %s"
297 		    " too long\n", nm));
298 		*newdv = NULL;
299 		return (ENAMETOOLONG);
300 	}
301 
302 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
303 
304 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
305 	bcopy(nm, dv->sdev_name, nmlen);
306 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
307 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
308 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
309 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
310 	/* overwritten for VLNK nodes */
311 	dv->sdev_symlink = NULL;
312 
313 	vp = SDEVTOV(dv);
314 	vn_reinit(vp);
315 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
316 	if (vap)
317 		vp->v_type = vap->va_type;
318 
319 	/*
320 	 * initialized to the parent's vnodeops.
321 	 * maybe overwriten for a VDIR
322 	 */
323 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
324 	vn_exists(vp);
325 
326 	dv->sdev_dotdot = NULL;
327 	dv->sdev_attrvp = NULL;
328 	if (vap) {
329 		sdev_attrinit(dv, vap);
330 	} else {
331 		dv->sdev_attr = NULL;
332 	}
333 
334 	dv->sdev_ino = sdev_mkino(dv);
335 	dv->sdev_nlink = 0;		/* updated on insert */
336 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
337 	dv->sdev_flags |= SDEV_BUILD;
338 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
339 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
340 	if (SDEV_IS_GLOBAL(ddv)) {
341 		dv->sdev_flags |= SDEV_GLOBAL;
342 		dv->sdev_mapinfo = NULL;
343 		dhl = &(dv->sdev_handle);
344 		dhl->dh_data = dv;
345 		dhl->dh_spec = DEVNAME_NS_NONE;
346 		dhl->dh_args = NULL;
347 		sdev_set_no_nocache(dv);
348 		dv->sdev_gdir_gen = 0;
349 	} else {
350 		dv->sdev_flags &= ~SDEV_GLOBAL;
351 		dv->sdev_origin = NULL; /* set later */
352 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
353 		dv->sdev_ldir_gen = 0;
354 		dv->sdev_devtree_gen = 0;
355 	}
356 
357 	rw_enter(&dv->sdev_contents, RW_WRITER);
358 	sdev_set_nodestate(dv, SDEV_INIT);
359 	rw_exit(&dv->sdev_contents);
360 	*newdv = dv;
361 
362 	return (0);
363 }
364 
365 /*
366  * transition a sdev_node into SDEV_READY state
367  */
368 int
369 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
370     void *args, struct cred *cred)
371 {
372 	int error = 0;
373 	struct vnode *vp = SDEVTOV(dv);
374 	vtype_t type;
375 
376 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
377 
378 	type = vap->va_type;
379 	vp->v_type = type;
380 	vp->v_rdev = vap->va_rdev;
381 	rw_enter(&dv->sdev_contents, RW_WRITER);
382 	if (type == VDIR) {
383 		dv->sdev_nlink = 2;
384 		dv->sdev_flags &= ~SDEV_PERSIST;
385 		dv->sdev_flags &= ~SDEV_DYNAMIC;
386 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
387 		error = sdev_get_moduleops(dv); /* from plug-in module */
388 		ASSERT(dv->sdev_dotdot);
389 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
390 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
391 		avl_create(&dv->sdev_entries,
392 		    (int (*)(const void *, const void *))sdev_compare_nodes,
393 		    sizeof (struct sdev_node),
394 		    offsetof(struct sdev_node, sdev_avllink));
395 	} else if (type == VLNK) {
396 		ASSERT(args);
397 		dv->sdev_nlink = 1;
398 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
399 	} else {
400 		dv->sdev_nlink = 1;
401 	}
402 
403 	if (!(SDEV_IS_GLOBAL(dv))) {
404 		dv->sdev_origin = (struct sdev_node *)args;
405 		dv->sdev_flags &= ~SDEV_PERSIST;
406 	}
407 
408 	/*
409 	 * shadow node is created here OR
410 	 * if failed (indicated by dv->sdev_attrvp == NULL),
411 	 * created later in sdev_setattr
412 	 */
413 	if (avp) {
414 		dv->sdev_attrvp = avp;
415 	} else {
416 		if (dv->sdev_attr == NULL)
417 			sdev_attrinit(dv, vap);
418 		else
419 			*dv->sdev_attr = *vap;
420 
421 		if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) ||
422 		    ((SDEVTOV(dv)->v_type == VDIR) &&
423 		    (dv->sdev_attrvp == NULL)))
424 			error = sdev_shadow_node(dv, cred);
425 	}
426 
427 	/* transition to READY state */
428 	sdev_set_nodestate(dv, SDEV_READY);
429 	sdev_nc_node_exists(dv);
430 	rw_exit(&dv->sdev_contents);
431 	return (error);
432 }
433 
434 /*
435  * setting ZOMBIE state
436  */
437 static int
438 sdev_nodezombied(struct sdev_node *dv)
439 {
440 	rw_enter(&dv->sdev_contents, RW_WRITER);
441 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
442 	rw_exit(&dv->sdev_contents);
443 	return (0);
444 }
445 
446 /*
447  * Build the VROOT sdev_node.
448  */
449 /*ARGSUSED*/
450 struct sdev_node *
451 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
452     struct vnode *avp, struct cred *cred)
453 {
454 	struct sdev_node *dv;
455 	struct vnode *vp;
456 	char devdir[] = "/dev";
457 
458 	ASSERT(sdev_node_cache != NULL);
459 	ASSERT(avp);
460 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
461 	vp = SDEVTOV(dv);
462 	vn_reinit(vp);
463 	vp->v_flag |= VROOT;
464 	vp->v_vfsp = vfsp;
465 	vp->v_type = VDIR;
466 	vp->v_rdev = devdev;
467 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
468 	vn_exists(vp);
469 
470 	if (vfsp->vfs_mntpt)
471 		dv->sdev_name = i_ddi_strdup(
472 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
473 	else
474 		/* vfs_mountdev1 set mount point later */
475 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
476 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
477 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
478 	dv->sdev_ino = SDEV_ROOTINO;
479 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
480 	dv->sdev_dotdot = dv;		/* .. == self */
481 	dv->sdev_attrvp = avp;
482 	dv->sdev_attr = NULL;
483 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
484 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
485 	if (strcmp(dv->sdev_name, "/dev") == 0) {
486 		mutex_init(&devname_nsmaps_lock, NULL, MUTEX_DEFAULT, NULL);
487 		dv->sdev_mapinfo = NULL;
488 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
489 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
490 		dv->sdev_gdir_gen = 0;
491 	} else {
492 		dv->sdev_flags = SDEV_BUILD;
493 		dv->sdev_flags &= ~SDEV_PERSIST;
494 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
495 		dv->sdev_ldir_gen = 0;
496 		dv->sdev_devtree_gen = 0;
497 	}
498 
499 	avl_create(&dv->sdev_entries,
500 	    (int (*)(const void *, const void *))sdev_compare_nodes,
501 	    sizeof (struct sdev_node),
502 	    offsetof(struct sdev_node, sdev_avllink));
503 
504 	rw_enter(&dv->sdev_contents, RW_WRITER);
505 	sdev_set_nodestate(dv, SDEV_READY);
506 	rw_exit(&dv->sdev_contents);
507 	sdev_nc_node_exists(dv);
508 	return (dv);
509 }
510 
511 /*
512  *  1. load the module
513  *  2. modload invokes sdev_module_register, which in turn sets
514  *     the dv->sdev_mapinfo->dir_ops
515  *
516  * note: locking order:
517  *	dv->sdev_contents -> map->dir_lock
518  */
519 static int
520 sdev_get_moduleops(struct sdev_node *dv)
521 {
522 	int error = 0;
523 	struct devname_nsmap *map = NULL;
524 	char *module;
525 	char *path;
526 	int load = 1;
527 
528 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
529 
530 	if (devname_nsmaps == NULL)
531 		return (0);
532 
533 	if (!sdev_nsmaps_loaded() && !sdev_nsmaps_reloaded())
534 		return (0);
535 
536 
537 	path = dv->sdev_path;
538 	if ((map = sdev_get_nsmap_by_dir(path, 0))) {
539 		rw_enter(&map->dir_lock, RW_READER);
540 		if (map->dir_invalid) {
541 			if (map->dir_module && map->dir_newmodule &&
542 			    (strcmp(map->dir_module,
543 			    map->dir_newmodule) == 0)) {
544 				load = 0;
545 			}
546 			sdev_replace_nsmap(map, map->dir_newmodule,
547 			    map->dir_newmap);
548 		}
549 
550 		module = map->dir_module;
551 		if (module && load) {
552 			sdcmn_err6(("sdev_get_moduleops: "
553 			    "load module %s", module));
554 			rw_exit(&map->dir_lock);
555 			error = modload("devname", module);
556 			sdcmn_err6(("sdev_get_moduleops: error %d\n", error));
557 			if (error < 0) {
558 				return (-1);
559 			}
560 		} else if (module == NULL) {
561 			/*
562 			 * loading the module ops for name services
563 			 */
564 			if (devname_ns_ops == NULL) {
565 				sdcmn_err6((
566 				    "sdev_get_moduleops: modload default\n"));
567 				error = modload("devname", DEVNAME_NSCONFIG);
568 				sdcmn_err6((
569 				    "sdev_get_moduleops: error %d\n", error));
570 				if (error < 0) {
571 					return (-1);
572 				}
573 			}
574 
575 			if (!rw_tryupgrade(&map->dir_lock)) {
576 				rw_exit(&map->dir_lock);
577 				rw_enter(&map->dir_lock, RW_WRITER);
578 			}
579 			ASSERT(devname_ns_ops);
580 			map->dir_ops = devname_ns_ops;
581 			rw_exit(&map->dir_lock);
582 		}
583 	}
584 
585 	dv->sdev_mapinfo = map;
586 	return (0);
587 }
588 
589 /* directory dependent vop table */
590 struct sdev_vop_table {
591 	char *vt_name;				/* subdirectory name */
592 	const fs_operation_def_t *vt_service;	/* vnodeops table */
593 	struct vnodeops *vt_vops;		/* constructed vop */
594 	struct vnodeops **vt_global_vops;	/* global container for vop */
595 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
596 	int vt_flags;
597 };
598 
599 /*
600  * A nice improvement would be to provide a plug-in mechanism
601  * for this table instead of a const table.
602  */
603 static struct sdev_vop_table vtab[] =
604 {
605 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
606 	SDEV_DYNAMIC | SDEV_VTOR },
607 
608 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
609 
610 	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
611 	SDEV_DYNAMIC | SDEV_VTOR },
612 
613 	{ NULL, NULL, NULL, NULL, NULL, 0}
614 };
615 
616 
617 /*
618  *  sets a directory's vnodeops if the directory is in the vtab;
619  */
620 static struct vnodeops *
621 sdev_get_vop(struct sdev_node *dv)
622 {
623 	int i;
624 	char *path;
625 
626 	path = dv->sdev_path;
627 	ASSERT(path);
628 
629 	/* gets the relative path to /dev/ */
630 	path += 5;
631 
632 	/* gets the vtab entry if matches */
633 	for (i = 0; vtab[i].vt_name; i++) {
634 		if (strcmp(vtab[i].vt_name, path) != 0)
635 			continue;
636 		dv->sdev_flags |= vtab[i].vt_flags;
637 
638 		if (vtab[i].vt_vops) {
639 			if (vtab[i].vt_global_vops)
640 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
641 			return (vtab[i].vt_vops);
642 		}
643 
644 		if (vtab[i].vt_service) {
645 			fs_operation_def_t *templ;
646 			templ = sdev_merge_vtab(vtab[i].vt_service);
647 			if (vn_make_ops(vtab[i].vt_name,
648 			    (const fs_operation_def_t *)templ,
649 			    &vtab[i].vt_vops) != 0) {
650 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
651 				    vtab[i].vt_name);
652 				/*NOTREACHED*/
653 			}
654 			if (vtab[i].vt_global_vops) {
655 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
656 			}
657 			sdev_free_vtab(templ);
658 			return (vtab[i].vt_vops);
659 		}
660 		return (sdev_vnodeops);
661 	}
662 
663 	/* child inherits the persistence of the parent */
664 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
665 		dv->sdev_flags |= SDEV_PERSIST;
666 
667 	return (sdev_vnodeops);
668 }
669 
670 static void
671 sdev_set_no_nocache(struct sdev_node *dv)
672 {
673 	int i;
674 	char *path;
675 
676 	ASSERT(dv->sdev_path);
677 	path = dv->sdev_path + strlen("/dev/");
678 
679 	for (i = 0; vtab[i].vt_name; i++) {
680 		if (strcmp(vtab[i].vt_name, path) == 0) {
681 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
682 				dv->sdev_flags |= SDEV_NO_NCACHE;
683 			break;
684 		}
685 	}
686 }
687 
688 void *
689 sdev_get_vtor(struct sdev_node *dv)
690 {
691 	int i;
692 
693 	for (i = 0; vtab[i].vt_name; i++) {
694 		if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0)
695 			continue;
696 		return ((void *)vtab[i].vt_vtor);
697 	}
698 	return (NULL);
699 }
700 
701 /*
702  * Build the base root inode
703  */
704 ino_t
705 sdev_mkino(struct sdev_node *dv)
706 {
707 	ino_t	ino;
708 
709 	/*
710 	 * for now, follow the lead of tmpfs here
711 	 * need to someday understand the requirements here
712 	 */
713 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
714 	ino += SDEV_ROOTINO + 1;
715 
716 	return (ino);
717 }
718 
719 static int
720 sdev_getlink(struct vnode *linkvp, char **link)
721 {
722 	int err;
723 	char *buf;
724 	struct uio uio = {0};
725 	struct iovec iov = {0};
726 
727 	if (linkvp == NULL)
728 		return (ENOENT);
729 	ASSERT(linkvp->v_type == VLNK);
730 
731 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
732 	iov.iov_base = buf;
733 	iov.iov_len = MAXPATHLEN;
734 	uio.uio_iov = &iov;
735 	uio.uio_iovcnt = 1;
736 	uio.uio_resid = MAXPATHLEN;
737 	uio.uio_segflg = UIO_SYSSPACE;
738 	uio.uio_llimit = MAXOFFSET_T;
739 
740 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
741 	if (err) {
742 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
743 		kmem_free(buf, MAXPATHLEN);
744 		return (ENOENT);
745 	}
746 
747 	/* mission complete */
748 	*link = i_ddi_strdup(buf, KM_SLEEP);
749 	kmem_free(buf, MAXPATHLEN);
750 	return (0);
751 }
752 
753 /*
754  * A convenient wrapper to get the devfs node vnode for a device
755  * minor functionality: readlink() of a /dev symlink
756  * Place the link into dv->sdev_symlink
757  */
758 static int
759 sdev_follow_link(struct sdev_node *dv)
760 {
761 	int err;
762 	struct vnode *linkvp;
763 	char *link = NULL;
764 
765 	linkvp = SDEVTOV(dv);
766 	if (linkvp == NULL)
767 		return (ENOENT);
768 	ASSERT(linkvp->v_type == VLNK);
769 	err = sdev_getlink(linkvp, &link);
770 	if (err) {
771 		(void) sdev_nodezombied(dv);
772 		dv->sdev_symlink = NULL;
773 		return (ENOENT);
774 	}
775 
776 	ASSERT(link != NULL);
777 	dv->sdev_symlink = link;
778 	return (0);
779 }
780 
781 static int
782 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
783 {
784 	vtype_t otype = SDEVTOV(dv)->v_type;
785 
786 	/*
787 	 * existing sdev_node has a different type.
788 	 */
789 	if (otype != nvap->va_type) {
790 		sdcmn_err9(("sdev_node_check: existing node "
791 		    "  %s type %d does not match new node type %d\n",
792 		    dv->sdev_name, otype, nvap->va_type));
793 		return (EEXIST);
794 	}
795 
796 	/*
797 	 * For a symlink, the target should be the same.
798 	 */
799 	if (otype == VLNK) {
800 		ASSERT(nargs != NULL);
801 		ASSERT(dv->sdev_symlink != NULL);
802 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
803 			sdcmn_err9(("sdev_node_check: existing node "
804 			    " %s has different symlink %s as new node "
805 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
806 			    (char *)nargs));
807 			return (EEXIST);
808 		}
809 	}
810 
811 	return (0);
812 }
813 
814 /*
815  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
816  *
817  * arguments:
818  *	- ddv (parent)
819  *	- nm (child name)
820  *	- newdv (sdev_node for nm is returned here)
821  *	- vap (vattr for the node to be created, va_type should be set.
822  *	  the defaults should be used if unknown)
823  *	- cred
824  *	- args
825  *	    . tnm (for VLNK)
826  *	    . global sdev_node (for !SDEV_GLOBAL)
827  * 	- state: SDEV_INIT, SDEV_READY
828  *
829  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
830  *
831  * NOTE:  directory contents writers lock needs to be held before
832  *	  calling this routine.
833  */
834 int
835 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
836     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
837     sdev_node_state_t state)
838 {
839 	int error = 0;
840 	sdev_node_state_t node_state;
841 	struct sdev_node *dv = NULL;
842 
843 	ASSERT(state != SDEV_ZOMBIE);
844 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
845 
846 	if (*newdv) {
847 		dv = *newdv;
848 	} else {
849 		/* allocate and initialize a sdev_node */
850 		if (ddv->sdev_state == SDEV_ZOMBIE) {
851 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
852 			    ddv->sdev_path));
853 			return (ENOENT);
854 		}
855 
856 		error = sdev_nodeinit(ddv, nm, &dv, vap);
857 		if (error != 0) {
858 			sdcmn_err9(("sdev_mknode: error %d,"
859 			    " name %s can not be initialized\n",
860 			    error, nm));
861 			return (ENOENT);
862 		}
863 		ASSERT(dv);
864 
865 		/* insert into the directory cache */
866 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
867 		if (error) {
868 			sdcmn_err9(("sdev_mknode: node %s can not"
869 			    " be added into directory cache\n", nm));
870 			return (ENOENT);
871 		}
872 	}
873 
874 	ASSERT(dv);
875 	node_state = dv->sdev_state;
876 	ASSERT(node_state != SDEV_ZOMBIE);
877 
878 	if (state == SDEV_READY) {
879 		switch (node_state) {
880 		case SDEV_INIT:
881 			error = sdev_nodeready(dv, vap, avp, args, cred);
882 			/*
883 			 * masking the errors with ENOENT
884 			 */
885 			if (error) {
886 				sdcmn_err9(("sdev_mknode: node %s can NOT"
887 				    " be transitioned into READY state, "
888 				    "error %d\n", nm, error));
889 				error = ENOENT;
890 			}
891 			break;
892 		case SDEV_READY:
893 			/*
894 			 * Do some sanity checking to make sure
895 			 * the existing sdev_node is what has been
896 			 * asked for.
897 			 */
898 			error = sdev_node_check(dv, vap, args);
899 			break;
900 		default:
901 			break;
902 		}
903 	}
904 
905 	if (!error) {
906 		*newdv = dv;
907 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
908 	} else {
909 		SDEV_SIMPLE_RELE(dv);
910 		*newdv = NULL;
911 	}
912 
913 	return (error);
914 }
915 
916 /*
917  * convenient wrapper to change vp's ATIME, CTIME and ATIME
918  */
919 void
920 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
921 {
922 	struct vattr attr;
923 	timestruc_t now;
924 	int err;
925 
926 	ASSERT(vp);
927 	gethrestime(&now);
928 	if (mask & AT_CTIME)
929 		attr.va_ctime = now;
930 	if (mask & AT_MTIME)
931 		attr.va_mtime = now;
932 	if (mask & AT_ATIME)
933 		attr.va_atime = now;
934 
935 	attr.va_mask = (mask & AT_TIMES);
936 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
937 	if (err && (err != EROFS)) {
938 		sdcmn_err(("update timestamps error %d\n", err));
939 	}
940 }
941 
942 /*
943  * the backing store vnode is released here
944  */
945 /*ARGSUSED1*/
946 void
947 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
948 {
949 	/* no references */
950 	ASSERT(dv->sdev_nlink == 0);
951 
952 	if (dv->sdev_attrvp != NULLVP) {
953 		VN_RELE(dv->sdev_attrvp);
954 		/*
955 		 * reset the attrvp so that no more
956 		 * references can be made on this already
957 		 * vn_rele() vnode
958 		 */
959 		dv->sdev_attrvp = NULLVP;
960 	}
961 
962 	if (dv->sdev_attr != NULL) {
963 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
964 		dv->sdev_attr = NULL;
965 	}
966 
967 	if (dv->sdev_name != NULL) {
968 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
969 		dv->sdev_name = NULL;
970 	}
971 
972 	if (dv->sdev_symlink != NULL) {
973 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
974 		dv->sdev_symlink = NULL;
975 	}
976 
977 	if (dv->sdev_path) {
978 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
979 		dv->sdev_path = NULL;
980 	}
981 
982 	if (!SDEV_IS_GLOBAL(dv))
983 		sdev_prof_free(dv);
984 
985 	if (SDEVTOV(dv)->v_type == VDIR) {
986 		ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
987 		avl_destroy(&dv->sdev_entries);
988 	}
989 
990 	mutex_destroy(&dv->sdev_lookup_lock);
991 	cv_destroy(&dv->sdev_lookup_cv);
992 
993 	/* return node to initial state as per constructor */
994 	(void) memset((void *)&dv->sdev_instance_data, 0,
995 	    sizeof (dv->sdev_instance_data));
996 	vn_invalid(SDEVTOV(dv));
997 	kmem_cache_free(sdev_node_cache, dv);
998 }
999 
1000 /*
1001  * DIRECTORY CACHE lookup
1002  */
1003 struct sdev_node *
1004 sdev_findbyname(struct sdev_node *ddv, char *nm)
1005 {
1006 	struct sdev_node *dv;
1007 	struct sdev_node dvtmp;
1008 	avl_index_t	where;
1009 
1010 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1011 
1012 	dvtmp.sdev_name = nm;
1013 	dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
1014 	if (dv) {
1015 		ASSERT(dv->sdev_dotdot == ddv);
1016 		ASSERT(strcmp(dv->sdev_name, nm) == 0);
1017 		/* Can't lookup stale nodes */
1018 		if (dv->sdev_flags & SDEV_STALE) {
1019 			sdcmn_err9((
1020 			    "sdev_findbyname: skipped stale node: %s\n", nm));
1021 		} else {
1022 			SDEV_HOLD(dv);
1023 			return (dv);
1024 		}
1025 	}
1026 	return (NULL);
1027 }
1028 
1029 /*
1030  * Inserts a new sdev_node in a parent directory
1031  */
1032 void
1033 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
1034 {
1035 	avl_index_t where;
1036 
1037 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1038 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1039 	ASSERT(ddv->sdev_nlink >= 2);
1040 	ASSERT(dv->sdev_nlink == 0);
1041 
1042 	dv->sdev_dotdot = ddv;
1043 	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1044 	avl_insert(&ddv->sdev_entries, dv, where);
1045 	ddv->sdev_nlink++;
1046 }
1047 
1048 /*
1049  * The following check is needed because while sdev_nodes are linked
1050  * in SDEV_INIT state, they have their link counts incremented only
1051  * in SDEV_READY state.
1052  */
1053 static void
1054 decr_link(struct sdev_node *dv)
1055 {
1056 	if (dv->sdev_state != SDEV_INIT)
1057 		dv->sdev_nlink--;
1058 	else
1059 		ASSERT(dv->sdev_nlink == 0);
1060 }
1061 
1062 /*
1063  * Delete an existing dv from directory cache
1064  *
1065  * In the case of a node is still held by non-zero reference count,
1066  *     the node is put into ZOMBIE state. Once the reference count
1067  *     reaches "0", the node is unlinked and destroyed,
1068  *     in sdev_inactive().
1069  */
1070 static int
1071 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1072 {
1073 	struct vnode *vp;
1074 
1075 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1076 
1077 	vp = SDEVTOV(dv);
1078 	mutex_enter(&vp->v_lock);
1079 
1080 	/* dv is held still */
1081 	if (vp->v_count > 1) {
1082 		rw_enter(&dv->sdev_contents, RW_WRITER);
1083 		if (dv->sdev_state == SDEV_READY) {
1084 			sdcmn_err9((
1085 			    "sdev_delete: node %s busy with count %d\n",
1086 			    dv->sdev_name, vp->v_count));
1087 			dv->sdev_state = SDEV_ZOMBIE;
1088 		}
1089 		rw_exit(&dv->sdev_contents);
1090 		--vp->v_count;
1091 		mutex_exit(&vp->v_lock);
1092 		return (EBUSY);
1093 	}
1094 	ASSERT(vp->v_count == 1);
1095 
1096 	/* unlink from the memory cache */
1097 	ddv->sdev_nlink--;	/* .. to above */
1098 	if (vp->v_type == VDIR) {
1099 		decr_link(dv);		/* . to self */
1100 	}
1101 
1102 	avl_remove(&ddv->sdev_entries, dv);
1103 	decr_link(dv);	/* name, back to zero */
1104 	vp->v_count--;
1105 	mutex_exit(&vp->v_lock);
1106 
1107 	/* destroy the node */
1108 	sdev_nodedestroy(dv, 0);
1109 	return (0);
1110 }
1111 
1112 /*
1113  * check if the source is in the path of the target
1114  *
1115  * source and target are different
1116  */
1117 /*ARGSUSED2*/
1118 static int
1119 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1120 {
1121 	int error = 0;
1122 	struct sdev_node *dotdot, *dir;
1123 
1124 	dotdot = tdv->sdev_dotdot;
1125 	ASSERT(dotdot);
1126 
1127 	/* fs root */
1128 	if (dotdot == tdv) {
1129 		return (0);
1130 	}
1131 
1132 	for (;;) {
1133 		/*
1134 		 * avoid error cases like
1135 		 *	mv a a/b
1136 		 *	mv a a/b/c
1137 		 *	etc.
1138 		 */
1139 		if (dotdot == sdv) {
1140 			error = EINVAL;
1141 			break;
1142 		}
1143 
1144 		dir = dotdot;
1145 		dotdot = dir->sdev_dotdot;
1146 
1147 		/* done checking because root is reached */
1148 		if (dir == dotdot) {
1149 			break;
1150 		}
1151 	}
1152 	return (error);
1153 }
1154 
1155 int
1156 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1157     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1158     struct cred *cred)
1159 {
1160 	int error = 0;
1161 	struct vnode *ovp = SDEVTOV(odv);
1162 	struct vnode *nvp;
1163 	struct vattr vattr;
1164 	int doingdir = (ovp->v_type == VDIR);
1165 	char *link = NULL;
1166 	int samedir = (oddv == nddv) ? 1 : 0;
1167 	int bkstore = 0;
1168 	struct sdev_node *idv = NULL;
1169 	struct sdev_node *ndv = NULL;
1170 	timestruc_t now;
1171 
1172 	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1173 	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1174 	if (error)
1175 		return (error);
1176 
1177 	if (!samedir)
1178 		rw_enter(&oddv->sdev_contents, RW_WRITER);
1179 	rw_enter(&nddv->sdev_contents, RW_WRITER);
1180 
1181 	/*
1182 	 * the source may have been deleted by another thread before
1183 	 * we gets here.
1184 	 */
1185 	if (odv->sdev_state != SDEV_READY) {
1186 		error = ENOENT;
1187 		goto err_out;
1188 	}
1189 
1190 	if (doingdir && (odv == nddv)) {
1191 		error = EINVAL;
1192 		goto err_out;
1193 	}
1194 
1195 	/*
1196 	 * If renaming a directory, and the parents are different (".." must be
1197 	 * changed) then the source dir must not be in the dir hierarchy above
1198 	 * the target since it would orphan everything below the source dir.
1199 	 */
1200 	if (doingdir && (oddv != nddv)) {
1201 		error = sdev_checkpath(odv, nddv, cred);
1202 		if (error)
1203 			goto err_out;
1204 	}
1205 
1206 	/* destination existing */
1207 	if (*ndvp) {
1208 		nvp = SDEVTOV(*ndvp);
1209 		ASSERT(nvp);
1210 
1211 		/* handling renaming to itself */
1212 		if (odv == *ndvp) {
1213 			error = 0;
1214 			goto err_out;
1215 		}
1216 
1217 		if (nvp->v_type == VDIR) {
1218 			if (!doingdir) {
1219 				error = EISDIR;
1220 				goto err_out;
1221 			}
1222 
1223 			if (vn_vfswlock(nvp)) {
1224 				error = EBUSY;
1225 				goto err_out;
1226 			}
1227 
1228 			if (vn_mountedvfs(nvp) != NULL) {
1229 				vn_vfsunlock(nvp);
1230 				error = EBUSY;
1231 				goto err_out;
1232 			}
1233 
1234 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1235 			if ((*ndvp)->sdev_nlink > 2) {
1236 				vn_vfsunlock(nvp);
1237 				error = EEXIST;
1238 				goto err_out;
1239 			}
1240 			vn_vfsunlock(nvp);
1241 
1242 			(void) sdev_dirdelete(nddv, *ndvp);
1243 			*ndvp = NULL;
1244 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1245 			    nddv->sdev_attrvp, cred, NULL, 0);
1246 			if (error)
1247 				goto err_out;
1248 		} else {
1249 			if (doingdir) {
1250 				error = ENOTDIR;
1251 				goto err_out;
1252 			}
1253 
1254 			if (SDEV_IS_PERSIST((*ndvp))) {
1255 				bkstore = 1;
1256 			}
1257 
1258 			/*
1259 			 * get rid of the node from the directory cache
1260 			 * note, in case EBUSY is returned, the ZOMBIE
1261 			 * node is taken care in sdev_mknode.
1262 			 */
1263 			(void) sdev_dirdelete(nddv, *ndvp);
1264 			*ndvp = NULL;
1265 			if (bkstore) {
1266 				error = VOP_REMOVE(nddv->sdev_attrvp,
1267 				    nnm, cred, NULL, 0);
1268 				if (error)
1269 					goto err_out;
1270 			}
1271 		}
1272 	}
1273 
1274 	/* fix the source for a symlink */
1275 	if (vattr.va_type == VLNK) {
1276 		if (odv->sdev_symlink == NULL) {
1277 			error = sdev_follow_link(odv);
1278 			if (error) {
1279 				error = ENOENT;
1280 				goto err_out;
1281 			}
1282 		}
1283 		ASSERT(odv->sdev_symlink);
1284 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1285 	}
1286 
1287 	/*
1288 	 * make a fresh node from the source attrs
1289 	 */
1290 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1291 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1292 	    NULL, (void *)link, cred, SDEV_READY);
1293 
1294 	if (link)
1295 		kmem_free(link, strlen(link) + 1);
1296 
1297 	if (error)
1298 		goto err_out;
1299 	ASSERT(*ndvp);
1300 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1301 
1302 	/* move dir contents */
1303 	if (doingdir) {
1304 		for (idv = SDEV_FIRST_ENTRY(odv); idv;
1305 		    idv = SDEV_NEXT_ENTRY(odv, idv)) {
1306 			error = sdev_rnmnode(odv, idv,
1307 			    (struct sdev_node *)(*ndvp), &ndv,
1308 			    idv->sdev_name, cred);
1309 			if (error)
1310 				goto err_out;
1311 			ndv = NULL;
1312 		}
1313 	}
1314 
1315 	if ((*ndvp)->sdev_attrvp) {
1316 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1317 		    AT_CTIME|AT_ATIME);
1318 	} else {
1319 		ASSERT((*ndvp)->sdev_attr);
1320 		gethrestime(&now);
1321 		(*ndvp)->sdev_attr->va_ctime = now;
1322 		(*ndvp)->sdev_attr->va_atime = now;
1323 	}
1324 
1325 	if (nddv->sdev_attrvp) {
1326 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1327 		    AT_MTIME|AT_ATIME);
1328 	} else {
1329 		ASSERT(nddv->sdev_attr);
1330 		gethrestime(&now);
1331 		nddv->sdev_attr->va_mtime = now;
1332 		nddv->sdev_attr->va_atime = now;
1333 	}
1334 	rw_exit(&nddv->sdev_contents);
1335 	if (!samedir)
1336 		rw_exit(&oddv->sdev_contents);
1337 
1338 	SDEV_RELE(*ndvp);
1339 	return (error);
1340 
1341 err_out:
1342 	rw_exit(&nddv->sdev_contents);
1343 	if (!samedir)
1344 		rw_exit(&oddv->sdev_contents);
1345 	return (error);
1346 }
1347 
1348 /*
1349  * Merge sdev_node specific information into an attribute structure.
1350  *
1351  * note: sdev_node is not locked here
1352  */
1353 void
1354 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1355 {
1356 	struct vnode *vp = SDEVTOV(dv);
1357 
1358 	vap->va_nlink = dv->sdev_nlink;
1359 	vap->va_nodeid = dv->sdev_ino;
1360 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1361 	vap->va_type = vp->v_type;
1362 
1363 	if (vp->v_type == VDIR) {
1364 		vap->va_rdev = 0;
1365 		vap->va_fsid = vp->v_rdev;
1366 	} else if (vp->v_type == VLNK) {
1367 		vap->va_rdev = 0;
1368 		vap->va_mode  &= ~S_IFMT;
1369 		vap->va_mode |= S_IFLNK;
1370 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1371 		vap->va_rdev = vp->v_rdev;
1372 		vap->va_mode &= ~S_IFMT;
1373 		if (vap->va_type == VCHR)
1374 			vap->va_mode |= S_IFCHR;
1375 		else
1376 			vap->va_mode |= S_IFBLK;
1377 	} else {
1378 		vap->va_rdev = 0;
1379 	}
1380 }
1381 
1382 static struct vattr *
1383 sdev_getdefault_attr(enum vtype type)
1384 {
1385 	if (type == VDIR)
1386 		return (&sdev_vattr_dir);
1387 	else if (type == VCHR)
1388 		return (&sdev_vattr_chr);
1389 	else if (type == VBLK)
1390 		return (&sdev_vattr_blk);
1391 	else if (type == VLNK)
1392 		return (&sdev_vattr_lnk);
1393 	else
1394 		return (NULL);
1395 }
1396 int
1397 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1398 {
1399 	int rv = 0;
1400 	struct vnode *vp = SDEVTOV(dv);
1401 
1402 	switch (vp->v_type) {
1403 	case VCHR:
1404 	case VBLK:
1405 		/*
1406 		 * If vnode is a device, return special vnode instead
1407 		 * (though it knows all about -us- via sp->s_realvp)
1408 		 */
1409 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1410 		VN_RELE(vp);
1411 		if (*vpp == NULLVP)
1412 			rv = ENOSYS;
1413 		break;
1414 	default:	/* most types are returned as is */
1415 		*vpp = vp;
1416 		break;
1417 	}
1418 	return (rv);
1419 }
1420 
1421 /*
1422  * loopback into sdev_lookup()
1423  */
1424 static struct vnode *
1425 devname_find_by_devpath(char *devpath, struct vattr *vattr)
1426 {
1427 	int error = 0;
1428 	struct vnode *vp;
1429 
1430 	error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULLVPP, &vp);
1431 	if (error) {
1432 		return (NULL);
1433 	}
1434 
1435 	if (vattr)
1436 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1437 	return (vp);
1438 }
1439 
1440 /*
1441  * the junction between devname and devfs
1442  */
1443 static struct vnode *
1444 devname_configure_by_path(char *physpath, struct vattr *vattr)
1445 {
1446 	int error = 0;
1447 	struct vnode *vp;
1448 
1449 	ASSERT(strncmp(physpath, "/devices/", sizeof ("/devices/") - 1)
1450 	    == 0);
1451 
1452 	error = devfs_lookupname(physpath + sizeof ("/devices/") - 1,
1453 	    NULLVPP, &vp);
1454 	if (error != 0) {
1455 		if (error == ENODEV) {
1456 			cmn_err(CE_CONT, "%s: not found (line %d)\n",
1457 			    physpath, __LINE__);
1458 		}
1459 
1460 		return (NULL);
1461 	}
1462 
1463 	if (vattr)
1464 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1465 	return (vp);
1466 }
1467 
1468 /*
1469  * junction between devname and root file system, e.g. ufs
1470  */
1471 int
1472 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1473 {
1474 	struct vnode *rdvp = ddv->sdev_attrvp;
1475 	int rval = 0;
1476 
1477 	ASSERT(rdvp);
1478 
1479 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1480 	    NULL);
1481 	return (rval);
1482 }
1483 
1484 static int
1485 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1486 {
1487 	struct sdev_node *dv = NULL;
1488 	char	*nm;
1489 	struct vnode *dirvp;
1490 	int	error;
1491 	vnode_t	*vp;
1492 	int eof;
1493 	struct iovec iov;
1494 	struct uio uio;
1495 	struct dirent64 *dp;
1496 	dirent64_t *dbuf;
1497 	size_t dbuflen;
1498 	struct vattr vattr;
1499 	char *link = NULL;
1500 
1501 	if (ddv->sdev_attrvp == NULL)
1502 		return (0);
1503 	if (!(ddv->sdev_flags & SDEV_BUILD))
1504 		return (0);
1505 
1506 	dirvp = ddv->sdev_attrvp;
1507 	VN_HOLD(dirvp);
1508 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1509 
1510 	uio.uio_iov = &iov;
1511 	uio.uio_iovcnt = 1;
1512 	uio.uio_segflg = UIO_SYSSPACE;
1513 	uio.uio_fmode = 0;
1514 	uio.uio_extflg = UIO_COPY_CACHED;
1515 	uio.uio_loffset = 0;
1516 	uio.uio_llimit = MAXOFFSET_T;
1517 
1518 	eof = 0;
1519 	error = 0;
1520 	while (!error && !eof) {
1521 		uio.uio_resid = dlen;
1522 		iov.iov_base = (char *)dbuf;
1523 		iov.iov_len = dlen;
1524 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1525 		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1526 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1527 
1528 		dbuflen = dlen - uio.uio_resid;
1529 		if (error || dbuflen == 0)
1530 			break;
1531 
1532 		if (!(ddv->sdev_flags & SDEV_BUILD)) {
1533 			error = 0;
1534 			break;
1535 		}
1536 
1537 		for (dp = dbuf; ((intptr_t)dp <
1538 		    (intptr_t)dbuf + dbuflen);
1539 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1540 			nm = dp->d_name;
1541 
1542 			if (strcmp(nm, ".") == 0 ||
1543 			    strcmp(nm, "..") == 0)
1544 				continue;
1545 
1546 			vp = NULLVP;
1547 			dv = sdev_cache_lookup(ddv, nm);
1548 			if (dv) {
1549 				if (dv->sdev_state != SDEV_ZOMBIE) {
1550 					SDEV_SIMPLE_RELE(dv);
1551 				} else {
1552 					/*
1553 					 * A ZOMBIE node may not have been
1554 					 * cleaned up from the backing store,
1555 					 * bypass this entry in this case,
1556 					 * and clean it up from the directory
1557 					 * cache if this is the last call.
1558 					 */
1559 					(void) sdev_dirdelete(ddv, dv);
1560 				}
1561 				continue;
1562 			}
1563 
1564 			/* refill the cache if not already */
1565 			error = devname_backstore_lookup(ddv, nm, &vp);
1566 			if (error)
1567 				continue;
1568 
1569 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1570 			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1571 			if (error)
1572 				continue;
1573 
1574 			if (vattr.va_type == VLNK) {
1575 				error = sdev_getlink(vp, &link);
1576 				if (error) {
1577 					continue;
1578 				}
1579 				ASSERT(link != NULL);
1580 			}
1581 
1582 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1583 				rw_exit(&ddv->sdev_contents);
1584 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1585 			}
1586 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1587 			    cred, SDEV_READY);
1588 			rw_downgrade(&ddv->sdev_contents);
1589 
1590 			if (link != NULL) {
1591 				kmem_free(link, strlen(link) + 1);
1592 				link = NULL;
1593 			}
1594 
1595 			if (!error) {
1596 				ASSERT(dv);
1597 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1598 				SDEV_SIMPLE_RELE(dv);
1599 			}
1600 			vp = NULL;
1601 			dv = NULL;
1602 		}
1603 	}
1604 
1605 done:
1606 	VN_RELE(dirvp);
1607 	kmem_free(dbuf, dlen);
1608 
1609 	return (error);
1610 }
1611 
1612 void
1613 sdev_filldir_dynamic(struct sdev_node *ddv)
1614 {
1615 	int error;
1616 	int i;
1617 	struct vattr *vap;
1618 	char *nm = NULL;
1619 	struct sdev_node *dv = NULL;
1620 
1621 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1622 	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1623 
1624 	vap = sdev_getdefault_attr(VDIR);
1625 	for (i = 0; vtab[i].vt_name != NULL; i++) {
1626 		nm = vtab[i].vt_name;
1627 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1628 		dv = NULL;
1629 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1630 		    NULL, kcred, SDEV_READY);
1631 		if (error) {
1632 			cmn_err(CE_WARN, "%s/%s: error %d\n",
1633 			    ddv->sdev_name, nm, error);
1634 		} else {
1635 			ASSERT(dv);
1636 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1637 			SDEV_SIMPLE_RELE(dv);
1638 		}
1639 	}
1640 }
1641 
1642 /*
1643  * Creating a backing store entry based on sdev_attr.
1644  * This is called either as part of node creation in a persistent directory
1645  * or from setattr/setsecattr to persist access attributes across reboot.
1646  */
1647 int
1648 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1649 {
1650 	int error = 0;
1651 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1652 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1653 	struct vattr *vap = dv->sdev_attr;
1654 	char *nm = dv->sdev_name;
1655 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1656 
1657 	ASSERT(dv && dv->sdev_name && rdvp);
1658 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1659 
1660 lookup:
1661 	/* try to find it in the backing store */
1662 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1663 	    NULL);
1664 	if (error == 0) {
1665 		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1666 			VN_HOLD(rrvp);
1667 			VN_RELE(*rvp);
1668 			*rvp = rrvp;
1669 		}
1670 
1671 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1672 		dv->sdev_attr = NULL;
1673 		dv->sdev_attrvp = *rvp;
1674 		return (0);
1675 	}
1676 
1677 	/* let's try to persist the node */
1678 	gethrestime(&vap->va_atime);
1679 	vap->va_mtime = vap->va_atime;
1680 	vap->va_ctime = vap->va_atime;
1681 	vap->va_mask |= AT_TYPE|AT_MODE;
1682 	switch (vap->va_type) {
1683 	case VDIR:
1684 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1685 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1686 		    (void *)(*rvp), error));
1687 		break;
1688 	case VCHR:
1689 	case VBLK:
1690 	case VREG:
1691 	case VDOOR:
1692 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1693 		    rvp, cred, 0, NULL, NULL);
1694 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1695 		    (void *)(*rvp), error));
1696 		if (!error)
1697 			VN_RELE(*rvp);
1698 		break;
1699 	case VLNK:
1700 		ASSERT(dv->sdev_symlink);
1701 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1702 		    NULL, 0);
1703 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1704 		    error));
1705 		break;
1706 	default:
1707 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1708 		    "create\n", nm);
1709 		/*NOTREACHED*/
1710 	}
1711 
1712 	/* go back to lookup to factor out spec node and set attrvp */
1713 	if (error == 0)
1714 		goto lookup;
1715 
1716 	return (error);
1717 }
1718 
1719 static int
1720 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1721 {
1722 	int error = 0;
1723 	struct sdev_node *dup = NULL;
1724 
1725 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1726 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1727 		sdev_direnter(ddv, *dv);
1728 	} else {
1729 		if (dup->sdev_state == SDEV_ZOMBIE) {
1730 			error = sdev_dirdelete(ddv, dup);
1731 			/*
1732 			 * The ZOMBIE node is still hanging
1733 			 * around with more than one reference counts.
1734 			 * Fail the new node creation so that
1735 			 * the directory cache won't have
1736 			 * duplicate entries for the same named node
1737 			 */
1738 			if (error == EBUSY) {
1739 				SDEV_SIMPLE_RELE(*dv);
1740 				sdev_nodedestroy(*dv, 0);
1741 				*dv = NULL;
1742 				return (error);
1743 			}
1744 			sdev_direnter(ddv, *dv);
1745 		} else {
1746 			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1747 			SDEV_SIMPLE_RELE(*dv);
1748 			sdev_nodedestroy(*dv, 0);
1749 			*dv = dup;
1750 		}
1751 	}
1752 
1753 	return (0);
1754 }
1755 
1756 static int
1757 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1758 {
1759 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1760 	return (sdev_dirdelete(ddv, *dv));
1761 }
1762 
1763 /*
1764  * update the in-core directory cache
1765  */
1766 int
1767 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1768     sdev_cache_ops_t ops)
1769 {
1770 	int error = 0;
1771 
1772 	ASSERT((SDEV_HELD(*dv)));
1773 
1774 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1775 	switch (ops) {
1776 	case SDEV_CACHE_ADD:
1777 		error = sdev_cache_add(ddv, dv, nm);
1778 		break;
1779 	case SDEV_CACHE_DELETE:
1780 		error = sdev_cache_delete(ddv, dv);
1781 		break;
1782 	default:
1783 		break;
1784 	}
1785 
1786 	return (error);
1787 }
1788 
1789 /*
1790  * retrieve the named entry from the directory cache
1791  */
1792 struct sdev_node *
1793 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1794 {
1795 	struct sdev_node *dv = NULL;
1796 
1797 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1798 	dv = sdev_findbyname(ddv, nm);
1799 
1800 	return (dv);
1801 }
1802 
1803 /*
1804  * Implicit reconfig for nodes constructed by a link generator
1805  * Start devfsadm if needed, or if devfsadm is in progress,
1806  * prepare to block on devfsadm either completing or
1807  * constructing the desired node.  As devfsadmd is global
1808  * in scope, constructing all necessary nodes, we only
1809  * need to initiate it once.
1810  */
1811 static int
1812 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1813 {
1814 	int error = 0;
1815 
1816 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1817 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1818 		    ddv->sdev_name, nm, devfsadm_state));
1819 		mutex_enter(&dv->sdev_lookup_lock);
1820 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1821 		mutex_exit(&dv->sdev_lookup_lock);
1822 		error = 0;
1823 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1824 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1825 		    ddv->sdev_name, nm, devfsadm_state));
1826 
1827 		sdev_devfsadmd_thread(ddv, dv, kcred);
1828 		mutex_enter(&dv->sdev_lookup_lock);
1829 		SDEV_BLOCK_OTHERS(dv,
1830 		    (SDEV_LOOKUP | SDEV_LGWAITING));
1831 		mutex_exit(&dv->sdev_lookup_lock);
1832 		error = 0;
1833 	} else {
1834 		error = -1;
1835 	}
1836 
1837 	return (error);
1838 }
1839 
1840 static int
1841 sdev_call_modulelookup(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1842     int (*fn)(char *, devname_handle_t *, struct cred *), struct cred *cred)
1843 {
1844 	struct vnode *rvp = NULL;
1845 	int error = 0;
1846 	struct vattr *vap;
1847 	devname_spec_t spec;
1848 	devname_handle_t *hdl;
1849 	void *args = NULL;
1850 	struct sdev_node *dv = *dvp;
1851 
1852 	ASSERT(dv && ddv);
1853 	hdl = &(dv->sdev_handle);
1854 	ASSERT(hdl->dh_data == dv);
1855 	mutex_enter(&dv->sdev_lookup_lock);
1856 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1857 	mutex_exit(&dv->sdev_lookup_lock);
1858 	error = (*fn)(nm, hdl, cred);
1859 	if (error) {
1860 		return (error);
1861 	}
1862 
1863 	spec = hdl->dh_spec;
1864 	args = hdl->dh_args;
1865 	ASSERT(args);
1866 
1867 	switch (spec) {
1868 	case DEVNAME_NS_PATH:
1869 		/*
1870 		 * symlink of:
1871 		 *	/dev/dir/nm -> /device/...
1872 		 */
1873 		rvp = devname_configure_by_path((char *)args, NULL);
1874 		break;
1875 	case DEVNAME_NS_DEV:
1876 		/*
1877 		 * symlink of:
1878 		 *	/dev/dir/nm -> /dev/...
1879 		 */
1880 		rvp = devname_find_by_devpath((char *)args, NULL);
1881 		break;
1882 	default:
1883 		if (args)
1884 			kmem_free((char *)args, strlen(args) + 1);
1885 		return (ENOENT);
1886 
1887 	}
1888 
1889 	if (rvp == NULL) {
1890 		if (args)
1891 			kmem_free((char *)args, strlen(args) + 1);
1892 		return (ENOENT);
1893 	} else {
1894 		vap = sdev_getdefault_attr(VLNK);
1895 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1896 		/*
1897 		 * Could sdev_mknode return a different dv_node
1898 		 * once the lock is dropped?
1899 		 */
1900 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1901 			rw_exit(&ddv->sdev_contents);
1902 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1903 		}
1904 		error = sdev_mknode(ddv, nm, &dv, vap, NULL, args, cred,
1905 		    SDEV_READY);
1906 		rw_downgrade(&ddv->sdev_contents);
1907 		if (error) {
1908 			if (args)
1909 				kmem_free((char *)args, strlen(args) + 1);
1910 			return (error);
1911 		} else {
1912 			mutex_enter(&dv->sdev_lookup_lock);
1913 			SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1914 			mutex_exit(&dv->sdev_lookup_lock);
1915 			error = 0;
1916 		}
1917 	}
1918 
1919 	if (args)
1920 		kmem_free((char *)args, strlen(args) + 1);
1921 
1922 	*dvp = dv;
1923 	return (0);
1924 }
1925 
1926 /*
1927  *  Support for specialized device naming construction mechanisms
1928  */
1929 static int
1930 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1931     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1932     void *, char *), int flags, struct cred *cred)
1933 {
1934 	int rv = 0;
1935 	char *physpath = NULL;
1936 	struct vnode *rvp = NULL;
1937 	struct vattr vattr;
1938 	struct vattr *vap;
1939 	struct sdev_node *dv = *dvp;
1940 
1941 	mutex_enter(&dv->sdev_lookup_lock);
1942 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1943 	mutex_exit(&dv->sdev_lookup_lock);
1944 
1945 	/* for non-devfsadm devices */
1946 	if (flags & SDEV_PATH) {
1947 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1948 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1949 		    NULL);
1950 		if (rv) {
1951 			kmem_free(physpath, MAXPATHLEN);
1952 			return (-1);
1953 		}
1954 
1955 		ASSERT(physpath);
1956 		rvp = devname_configure_by_path(physpath, NULL);
1957 		if (rvp == NULL) {
1958 			sdcmn_err3(("devname_configure_by_path: "
1959 			    "failed for /dev/%s/%s\n",
1960 			    ddv->sdev_name, nm));
1961 			kmem_free(physpath, MAXPATHLEN);
1962 			rv = -1;
1963 		} else {
1964 			vap = sdev_getdefault_attr(VLNK);
1965 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1966 
1967 			/*
1968 			 * Sdev_mknode may return back a different sdev_node
1969 			 * that was created by another thread that
1970 			 * raced to the directroy cache before this thread.
1971 			 *
1972 			 * With current directory cache mechanism
1973 			 * (linked list with the sdev_node name as
1974 			 * the entity key), this is a way to make sure
1975 			 * only one entry exists for the same name
1976 			 * in the same directory. The outcome is
1977 			 * the winner wins.
1978 			 */
1979 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1980 				rw_exit(&ddv->sdev_contents);
1981 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1982 			}
1983 			rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1984 			    (void *)physpath, cred, SDEV_READY);
1985 			rw_downgrade(&ddv->sdev_contents);
1986 			kmem_free(physpath, MAXPATHLEN);
1987 			if (rv) {
1988 				return (rv);
1989 			} else {
1990 				mutex_enter(&dv->sdev_lookup_lock);
1991 				SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1992 				mutex_exit(&dv->sdev_lookup_lock);
1993 				return (0);
1994 			}
1995 		}
1996 	} else if (flags & SDEV_VNODE) {
1997 		/*
1998 		 * DBNR has its own way to create the device
1999 		 * and return a backing store vnode in rvp
2000 		 */
2001 		ASSERT(callback);
2002 		rv = callback(ddv, nm, (void *)&rvp, kcred, NULL, NULL);
2003 		if (rv || (rvp == NULL)) {
2004 			sdcmn_err3(("devname_lookup_func: SDEV_VNODE "
2005 			    "callback failed \n"));
2006 			return (-1);
2007 		}
2008 		vap = sdev_getdefault_attr(rvp->v_type);
2009 		if (vap == NULL)
2010 			return (-1);
2011 
2012 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2013 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2014 			rw_exit(&ddv->sdev_contents);
2015 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2016 		}
2017 		rv = sdev_mknode(ddv, nm, &dv, vap, rvp, NULL,
2018 		    cred, SDEV_READY);
2019 		rw_downgrade(&ddv->sdev_contents);
2020 		if (rv)
2021 			return (rv);
2022 
2023 		mutex_enter(&dv->sdev_lookup_lock);
2024 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2025 		mutex_exit(&dv->sdev_lookup_lock);
2026 		return (0);
2027 	} else if (flags & SDEV_VATTR) {
2028 		/*
2029 		 * /dev/pts
2030 		 *
2031 		 * DBNR has its own way to create the device
2032 		 * "0" is returned upon success.
2033 		 *
2034 		 * callback is responsible to set the basic attributes,
2035 		 * e.g. va_type/va_uid/va_gid/
2036 		 *    dev_t if VCHR or VBLK/
2037 		 */
2038 		ASSERT(callback);
2039 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
2040 		if (rv) {
2041 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
2042 			    "callback failed \n"));
2043 			return (-1);
2044 		}
2045 
2046 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2047 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2048 			rw_exit(&ddv->sdev_contents);
2049 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2050 		}
2051 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
2052 		    cred, SDEV_READY);
2053 		rw_downgrade(&ddv->sdev_contents);
2054 
2055 		if (rv)
2056 			return (rv);
2057 
2058 		mutex_enter(&dv->sdev_lookup_lock);
2059 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2060 		mutex_exit(&dv->sdev_lookup_lock);
2061 		return (0);
2062 	} else {
2063 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
2064 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
2065 		    __LINE__));
2066 		rv = -1;
2067 	}
2068 
2069 	*dvp = dv;
2070 	return (rv);
2071 }
2072 
2073 static int
2074 is_devfsadm_thread(char *exec_name)
2075 {
2076 	/*
2077 	 * note: because devfsadmd -> /usr/sbin/devfsadm
2078 	 * it is safe to use "devfsadm" to capture the lookups
2079 	 * from devfsadm and its daemon version.
2080 	 */
2081 	if (strcmp(exec_name, "devfsadm") == 0)
2082 		return (1);
2083 	return (0);
2084 }
2085 
2086 
2087 /*
2088  * Lookup Order:
2089  *	sdev_node cache;
2090  *	backing store (SDEV_PERSIST);
2091  *	DBNR: a. dir_ops implemented in the loadable modules;
2092  *	      b. vnode ops in vtab.
2093  */
2094 int
2095 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
2096     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
2097     struct cred *, void *, char *), int flags)
2098 {
2099 	int rv = 0, nmlen;
2100 	struct vnode *rvp = NULL;
2101 	struct sdev_node *dv = NULL;
2102 	int	retried = 0;
2103 	int	error = 0;
2104 	struct devname_nsmap *map = NULL;
2105 	struct devname_ops *dirops = NULL;
2106 	int (*fn)(char *, devname_handle_t *, struct cred *) = NULL;
2107 	struct vattr vattr;
2108 	char *lookup_thread = curproc->p_user.u_comm;
2109 	int failed_flags = 0;
2110 	int (*vtor)(struct sdev_node *) = NULL;
2111 	int state;
2112 	int parent_state;
2113 	char *link = NULL;
2114 
2115 	if (SDEVTOV(ddv)->v_type != VDIR)
2116 		return (ENOTDIR);
2117 
2118 	/*
2119 	 * Empty name or ., return node itself.
2120 	 */
2121 	nmlen = strlen(nm);
2122 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
2123 		*vpp = SDEVTOV(ddv);
2124 		VN_HOLD(*vpp);
2125 		return (0);
2126 	}
2127 
2128 	/*
2129 	 * .., return the parent directory
2130 	 */
2131 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
2132 		*vpp = SDEVTOV(ddv->sdev_dotdot);
2133 		VN_HOLD(*vpp);
2134 		return (0);
2135 	}
2136 
2137 	rw_enter(&ddv->sdev_contents, RW_READER);
2138 	if (ddv->sdev_flags & SDEV_VTOR) {
2139 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2140 		ASSERT(vtor);
2141 	}
2142 
2143 tryagain:
2144 	/*
2145 	 * (a) directory cache lookup:
2146 	 */
2147 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2148 	parent_state = ddv->sdev_state;
2149 	dv = sdev_cache_lookup(ddv, nm);
2150 	if (dv) {
2151 		state = dv->sdev_state;
2152 		switch (state) {
2153 		case SDEV_INIT:
2154 			if (is_devfsadm_thread(lookup_thread))
2155 				break;
2156 
2157 			/* ZOMBIED parent won't allow node creation */
2158 			if (parent_state == SDEV_ZOMBIE) {
2159 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
2160 				    retried);
2161 				goto nolock_notfound;
2162 			}
2163 
2164 			mutex_enter(&dv->sdev_lookup_lock);
2165 			/* compensate the threads started after devfsadm */
2166 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2167 			    !(SDEV_IS_LOOKUP(dv)))
2168 				SDEV_BLOCK_OTHERS(dv,
2169 				    (SDEV_LOOKUP | SDEV_LGWAITING));
2170 
2171 			if (SDEV_IS_LOOKUP(dv)) {
2172 				failed_flags |= SLF_REBUILT;
2173 				rw_exit(&ddv->sdev_contents);
2174 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
2175 				mutex_exit(&dv->sdev_lookup_lock);
2176 				rw_enter(&ddv->sdev_contents, RW_READER);
2177 
2178 				if (error != 0) {
2179 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2180 					    retried);
2181 					goto nolock_notfound;
2182 				}
2183 
2184 				state = dv->sdev_state;
2185 				if (state == SDEV_INIT) {
2186 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2187 					    retried);
2188 					goto nolock_notfound;
2189 				} else if (state == SDEV_READY) {
2190 					goto found;
2191 				} else if (state == SDEV_ZOMBIE) {
2192 					rw_exit(&ddv->sdev_contents);
2193 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2194 					    retried);
2195 					SDEV_RELE(dv);
2196 					goto lookup_failed;
2197 				}
2198 			} else {
2199 				mutex_exit(&dv->sdev_lookup_lock);
2200 			}
2201 			break;
2202 		case SDEV_READY:
2203 			goto found;
2204 		case SDEV_ZOMBIE:
2205 			rw_exit(&ddv->sdev_contents);
2206 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2207 			SDEV_RELE(dv);
2208 			goto lookup_failed;
2209 		default:
2210 			rw_exit(&ddv->sdev_contents);
2211 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2212 			sdev_lookup_failed(ddv, nm, failed_flags);
2213 			*vpp = NULLVP;
2214 			return (ENOENT);
2215 		}
2216 	}
2217 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2218 
2219 	/*
2220 	 * ZOMBIED parent does not allow new node creation.
2221 	 * bail out early
2222 	 */
2223 	if (parent_state == SDEV_ZOMBIE) {
2224 		rw_exit(&ddv->sdev_contents);
2225 		*vpp = NULL;
2226 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2227 		return (ENOENT);
2228 	}
2229 
2230 	/*
2231 	 * (b0): backing store lookup
2232 	 *	SDEV_PERSIST is default except:
2233 	 *		1) pts nodes
2234 	 *		2) non-chmod'ed local nodes
2235 	 */
2236 	if (SDEV_IS_PERSIST(ddv)) {
2237 		error = devname_backstore_lookup(ddv, nm, &rvp);
2238 
2239 		if (!error) {
2240 			sdcmn_err3(("devname_backstore_lookup: "
2241 			    "found attrvp %p for %s\n", (void *)rvp, nm));
2242 
2243 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
2244 			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2245 			if (error) {
2246 				rw_exit(&ddv->sdev_contents);
2247 				if (dv)
2248 					SDEV_RELE(dv);
2249 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2250 				sdev_lookup_failed(ddv, nm, failed_flags);
2251 				*vpp = NULLVP;
2252 				return (ENOENT);
2253 			}
2254 
2255 			if (vattr.va_type == VLNK) {
2256 				error = sdev_getlink(rvp, &link);
2257 				if (error) {
2258 					rw_exit(&ddv->sdev_contents);
2259 					if (dv)
2260 						SDEV_RELE(dv);
2261 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2262 					    retried);
2263 					sdev_lookup_failed(ddv, nm,
2264 					    failed_flags);
2265 					*vpp = NULLVP;
2266 					return (ENOENT);
2267 				}
2268 				ASSERT(link != NULL);
2269 			}
2270 
2271 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
2272 				rw_exit(&ddv->sdev_contents);
2273 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2274 			}
2275 			error = sdev_mknode(ddv, nm, &dv, &vattr,
2276 			    rvp, link, cred, SDEV_READY);
2277 			rw_downgrade(&ddv->sdev_contents);
2278 
2279 			if (link != NULL) {
2280 				kmem_free(link, strlen(link) + 1);
2281 				link = NULL;
2282 			}
2283 
2284 			if (error) {
2285 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2286 				rw_exit(&ddv->sdev_contents);
2287 				if (dv)
2288 					SDEV_RELE(dv);
2289 				goto lookup_failed;
2290 			} else {
2291 				goto found;
2292 			}
2293 		} else if (retried) {
2294 			rw_exit(&ddv->sdev_contents);
2295 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2296 			    ddv->sdev_name, nm));
2297 			if (dv)
2298 				SDEV_RELE(dv);
2299 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2300 			sdev_lookup_failed(ddv, nm, failed_flags);
2301 			*vpp = NULLVP;
2302 			return (ENOENT);
2303 		}
2304 	}
2305 
2306 
2307 	/* first thread that is doing the lookup on this node */
2308 	if (!dv) {
2309 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2310 			rw_exit(&ddv->sdev_contents);
2311 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2312 		}
2313 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2314 		    cred, SDEV_INIT);
2315 		if (!dv) {
2316 			rw_exit(&ddv->sdev_contents);
2317 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2318 			sdev_lookup_failed(ddv, nm, failed_flags);
2319 			*vpp = NULLVP;
2320 			return (ENOENT);
2321 		}
2322 		rw_downgrade(&ddv->sdev_contents);
2323 	}
2324 	ASSERT(dv);
2325 	ASSERT(SDEV_HELD(dv));
2326 
2327 	if (SDEV_IS_NO_NCACHE(dv)) {
2328 		failed_flags |= SLF_NO_NCACHE;
2329 	}
2330 
2331 	if (SDEV_IS_GLOBAL(ddv)) {
2332 		map = sdev_get_map(ddv, 1);
2333 		dirops = map ? map->dir_ops : NULL;
2334 		fn = dirops ? dirops->devnops_lookup : NULL;
2335 	}
2336 
2337 	/*
2338 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2339 	 */
2340 	if ((fn == NULL) && !callback) {
2341 
2342 		if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2343 		    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2344 		    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2345 			ASSERT(SDEV_HELD(dv));
2346 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2347 			goto nolock_notfound;
2348 		}
2349 
2350 		/*
2351 		 * filter out known non-existent devices recorded
2352 		 * during initial reconfiguration boot for which
2353 		 * reconfig should not be done and lookup may
2354 		 * be short-circuited now.
2355 		 */
2356 		if (sdev_lookup_filter(ddv, nm)) {
2357 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2358 			goto nolock_notfound;
2359 		}
2360 
2361 		/* bypassing devfsadm internal nodes */
2362 		if (is_devfsadm_thread(lookup_thread)) {
2363 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2364 			goto nolock_notfound;
2365 		}
2366 
2367 		if (sdev_reconfig_disable) {
2368 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2369 			goto nolock_notfound;
2370 		}
2371 
2372 		error = sdev_call_devfsadmd(ddv, dv, nm);
2373 		if (error == 0) {
2374 			sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2375 			    ddv->sdev_name, nm, curproc->p_user.u_comm));
2376 			if (sdev_reconfig_verbose) {
2377 				cmn_err(CE_CONT,
2378 				    "?lookup of %s/%s by %s: reconfig\n",
2379 				    ddv->sdev_name, nm, curproc->p_user.u_comm);
2380 			}
2381 			retried = 1;
2382 			failed_flags |= SLF_REBUILT;
2383 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2384 			SDEV_SIMPLE_RELE(dv);
2385 			goto tryagain;
2386 		} else {
2387 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2388 			goto nolock_notfound;
2389 		}
2390 	}
2391 
2392 	/*
2393 	 * (b2) Directory Based Name Resolution (DBNR):
2394 	 *	ddv	- parent
2395 	 *	nm	- /dev/(ddv->sdev_name)/nm
2396 	 *
2397 	 *	note: module vnode ops take precedence than the build-in ones
2398 	 */
2399 	if (fn) {
2400 		error = sdev_call_modulelookup(ddv, &dv, nm, fn, cred);
2401 		if (error) {
2402 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2403 			goto notfound;
2404 		} else {
2405 			goto found;
2406 		}
2407 	} else if (callback) {
2408 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2409 		    flags, cred);
2410 		if (error == 0) {
2411 			goto found;
2412 		} else {
2413 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2414 			goto notfound;
2415 		}
2416 	}
2417 	ASSERT(rvp);
2418 
2419 found:
2420 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2421 	ASSERT(dv->sdev_state == SDEV_READY);
2422 	if (vtor) {
2423 		/*
2424 		 * Check validity of returned node
2425 		 */
2426 		switch (vtor(dv)) {
2427 		case SDEV_VTOR_VALID:
2428 			break;
2429 		case SDEV_VTOR_INVALID:
2430 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2431 			sdcmn_err7(("lookup: destroy invalid "
2432 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2433 			goto nolock_notfound;
2434 		case SDEV_VTOR_SKIP:
2435 			sdcmn_err7(("lookup: node not applicable - "
2436 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2437 			rw_exit(&ddv->sdev_contents);
2438 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2439 			SDEV_RELE(dv);
2440 			goto lookup_failed;
2441 		default:
2442 			cmn_err(CE_PANIC,
2443 			    "dev fs: validator failed: %s(%p)\n",
2444 			    dv->sdev_name, (void *)dv);
2445 			break;
2446 			/*NOTREACHED*/
2447 		}
2448 	}
2449 
2450 	if ((SDEVTOV(dv)->v_type == VDIR) && SDEV_IS_GLOBAL(dv)) {
2451 		rw_enter(&dv->sdev_contents, RW_READER);
2452 		(void) sdev_get_map(dv, 1);
2453 		rw_exit(&dv->sdev_contents);
2454 	}
2455 	rw_exit(&ddv->sdev_contents);
2456 	rv = sdev_to_vp(dv, vpp);
2457 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2458 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2459 	    dv->sdev_state, nm, rv));
2460 	return (rv);
2461 
2462 notfound:
2463 	mutex_enter(&dv->sdev_lookup_lock);
2464 	SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2465 	mutex_exit(&dv->sdev_lookup_lock);
2466 nolock_notfound:
2467 	/*
2468 	 * Destroy the node that is created for synchronization purposes.
2469 	 */
2470 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2471 	    nm, dv->sdev_state));
2472 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2473 	if (dv->sdev_state == SDEV_INIT) {
2474 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2475 			rw_exit(&ddv->sdev_contents);
2476 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2477 		}
2478 
2479 		/*
2480 		 * Node state may have changed during the lock
2481 		 * changes. Re-check.
2482 		 */
2483 		if (dv->sdev_state == SDEV_INIT) {
2484 			(void) sdev_dirdelete(ddv, dv);
2485 			rw_exit(&ddv->sdev_contents);
2486 			sdev_lookup_failed(ddv, nm, failed_flags);
2487 			*vpp = NULL;
2488 			return (ENOENT);
2489 		}
2490 	}
2491 
2492 	rw_exit(&ddv->sdev_contents);
2493 	SDEV_RELE(dv);
2494 
2495 lookup_failed:
2496 	sdev_lookup_failed(ddv, nm, failed_flags);
2497 	*vpp = NULL;
2498 	return (ENOENT);
2499 }
2500 
2501 /*
2502  * Given a directory node, mark all nodes beneath as
2503  * STALE, i.e. nodes that don't exist as far as new
2504  * consumers are concerned
2505  */
2506 void
2507 sdev_stale(struct sdev_node *ddv)
2508 {
2509 	struct sdev_node *dv;
2510 	struct vnode *vp;
2511 
2512 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2513 
2514 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2515 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) {
2516 		vp = SDEVTOV(dv);
2517 		if (vp->v_type == VDIR)
2518 			sdev_stale(dv);
2519 
2520 		sdcmn_err9(("sdev_stale: setting stale %s\n",
2521 		    dv->sdev_name));
2522 		dv->sdev_flags |= SDEV_STALE;
2523 	}
2524 	ddv->sdev_flags |= SDEV_BUILD;
2525 	rw_exit(&ddv->sdev_contents);
2526 }
2527 
2528 /*
2529  * Given a directory node, clean out all the nodes beneath.
2530  * If expr is specified, clean node with names matching expr.
2531  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2532  *	so they are excluded from future lookups.
2533  */
2534 int
2535 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2536 {
2537 	int error = 0;
2538 	int busy = 0;
2539 	struct vnode *vp;
2540 	struct sdev_node *dv, *next = NULL;
2541 	int bkstore = 0;
2542 	int len = 0;
2543 	char *bks_name = NULL;
2544 
2545 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2546 
2547 	/*
2548 	 * We try our best to destroy all unused sdev_node's
2549 	 */
2550 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2551 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
2552 		next = SDEV_NEXT_ENTRY(ddv, dv);
2553 		vp = SDEVTOV(dv);
2554 
2555 		if (expr && gmatch(dv->sdev_name, expr) == 0)
2556 			continue;
2557 
2558 		if (vp->v_type == VDIR &&
2559 		    sdev_cleandir(dv, NULL, flags) != 0) {
2560 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2561 			    dv->sdev_name));
2562 			busy++;
2563 			continue;
2564 		}
2565 
2566 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2567 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2568 			    dv->sdev_name));
2569 			busy++;
2570 			continue;
2571 		}
2572 
2573 		/*
2574 		 * at this point, either dv is not held or SDEV_ENFORCE
2575 		 * is specified. In either case, dv needs to be deleted
2576 		 */
2577 		SDEV_HOLD(dv);
2578 
2579 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2580 		if (bkstore && (vp->v_type == VDIR))
2581 			bkstore += 1;
2582 
2583 		if (bkstore) {
2584 			len = strlen(dv->sdev_name) + 1;
2585 			bks_name = kmem_alloc(len, KM_SLEEP);
2586 			bcopy(dv->sdev_name, bks_name, len);
2587 		}
2588 
2589 		error = sdev_dirdelete(ddv, dv);
2590 
2591 		if (error == EBUSY) {
2592 			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2593 			busy++;
2594 		}
2595 
2596 		/* take care the backing store clean up */
2597 		if (bkstore && (error == 0)) {
2598 			ASSERT(bks_name);
2599 			ASSERT(ddv->sdev_attrvp);
2600 
2601 			if (bkstore == 1) {
2602 				error = VOP_REMOVE(ddv->sdev_attrvp,
2603 				    bks_name, kcred, NULL, 0);
2604 			} else if (bkstore == 2) {
2605 				error = VOP_RMDIR(ddv->sdev_attrvp,
2606 				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2607 			}
2608 
2609 			/* do not propagate the backing store errors */
2610 			if (error) {
2611 				sdcmn_err9(("sdev_cleandir: backing store"
2612 				    "not cleaned\n"));
2613 				error = 0;
2614 			}
2615 
2616 			bkstore = 0;
2617 			kmem_free(bks_name, len);
2618 			bks_name = NULL;
2619 			len = 0;
2620 		}
2621 	}
2622 
2623 	ddv->sdev_flags |= SDEV_BUILD;
2624 	rw_exit(&ddv->sdev_contents);
2625 
2626 	if (busy) {
2627 		error = EBUSY;
2628 	}
2629 
2630 	return (error);
2631 }
2632 
2633 /*
2634  * a convenient wrapper for readdir() funcs
2635  */
2636 size_t
2637 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2638 {
2639 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2640 	if (reclen > size)
2641 		return (0);
2642 
2643 	de->d_ino = (ino64_t)ino;
2644 	de->d_off = (off64_t)off + 1;
2645 	de->d_reclen = (ushort_t)reclen;
2646 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2647 	return (reclen);
2648 }
2649 
2650 /*
2651  * sdev_mount service routines
2652  */
2653 int
2654 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2655 {
2656 	int	error;
2657 
2658 	if (uap->datalen != sizeof (*args))
2659 		return (EINVAL);
2660 
2661 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2662 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2663 		    "get user data. error %d\n", error);
2664 		return (EFAULT);
2665 	}
2666 
2667 	return (0);
2668 }
2669 
2670 #ifdef nextdp
2671 #undef nextdp
2672 #endif
2673 #define	nextdp(dp)	((struct dirent64 *) \
2674 			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2675 
2676 /*
2677  * readdir helper func
2678  */
2679 int
2680 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2681     int flags)
2682 {
2683 	struct sdev_node *ddv = VTOSDEV(vp);
2684 	struct sdev_node *dv;
2685 	dirent64_t	*dp;
2686 	ulong_t		outcount = 0;
2687 	size_t		namelen;
2688 	ulong_t		alloc_count;
2689 	void		*outbuf;
2690 	struct iovec	*iovp;
2691 	int		error = 0;
2692 	size_t		reclen;
2693 	offset_t	diroff;
2694 	offset_t	soff;
2695 	int		this_reclen;
2696 	struct devname_nsmap	*map = NULL;
2697 	struct devname_ops	*dirops = NULL;
2698 	int (*fn)(devname_handle_t *, struct cred *) = NULL;
2699 	int (*vtor)(struct sdev_node *) = NULL;
2700 	struct vattr attr;
2701 	timestruc_t now;
2702 
2703 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2704 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2705 
2706 	if (uiop->uio_loffset >= MAXOFF_T) {
2707 		if (eofp)
2708 			*eofp = 1;
2709 		return (0);
2710 	}
2711 
2712 	if (uiop->uio_iovcnt != 1)
2713 		return (EINVAL);
2714 
2715 	if (vp->v_type != VDIR)
2716 		return (ENOTDIR);
2717 
2718 	if (ddv->sdev_flags & SDEV_VTOR) {
2719 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2720 		ASSERT(vtor);
2721 	}
2722 
2723 	if (eofp != NULL)
2724 		*eofp = 0;
2725 
2726 	soff = uiop->uio_loffset;
2727 	iovp = uiop->uio_iov;
2728 	alloc_count = iovp->iov_len;
2729 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2730 	outcount = 0;
2731 
2732 	if (ddv->sdev_state == SDEV_ZOMBIE)
2733 		goto get_cache;
2734 
2735 	if (SDEV_IS_GLOBAL(ddv)) {
2736 		map = sdev_get_map(ddv, 0);
2737 		dirops = map ? map->dir_ops : NULL;
2738 		fn = dirops ? dirops->devnops_readdir : NULL;
2739 
2740 		if (map && map->dir_map) {
2741 			/*
2742 			 * load the name mapping rule database
2743 			 * through invoking devfsadm and symlink
2744 			 * all the entries in the map
2745 			 */
2746 			devname_rdr_result_t rdr_result;
2747 			int do_thread = 0;
2748 
2749 			rw_enter(&map->dir_lock, RW_READER);
2750 			do_thread = map->dir_maploaded ? 0 : 1;
2751 			rw_exit(&map->dir_lock);
2752 
2753 			if (do_thread) {
2754 				mutex_enter(&ddv->sdev_lookup_lock);
2755 				SDEV_BLOCK_OTHERS(ddv, SDEV_READDIR);
2756 				mutex_exit(&ddv->sdev_lookup_lock);
2757 
2758 				sdev_dispatch_to_nsrdr_thread(ddv,
2759 				    map->dir_map, &rdr_result);
2760 			}
2761 		} else if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2762 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2763 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2764 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2765 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2766 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2767 		    !sdev_reconfig_disable) {
2768 			/*
2769 			 * invoking "devfsadm" to do system device reconfig
2770 			 */
2771 			mutex_enter(&ddv->sdev_lookup_lock);
2772 			SDEV_BLOCK_OTHERS(ddv,
2773 			    (SDEV_READDIR|SDEV_LGWAITING));
2774 			mutex_exit(&ddv->sdev_lookup_lock);
2775 
2776 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2777 			    ddv->sdev_path, curproc->p_user.u_comm));
2778 			if (sdev_reconfig_verbose) {
2779 				cmn_err(CE_CONT,
2780 				    "?readdir of %s by %s: reconfig\n",
2781 				    ddv->sdev_path, curproc->p_user.u_comm);
2782 			}
2783 
2784 			sdev_devfsadmd_thread(ddv, NULL, kcred);
2785 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2786 			/*
2787 			 * compensate the "ls" started later than "devfsadm"
2788 			 */
2789 			mutex_enter(&ddv->sdev_lookup_lock);
2790 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2791 			mutex_exit(&ddv->sdev_lookup_lock);
2792 		}
2793 
2794 		/*
2795 		 * release the contents lock so that
2796 		 * the cache may be updated by devfsadmd
2797 		 */
2798 		rw_exit(&ddv->sdev_contents);
2799 		mutex_enter(&ddv->sdev_lookup_lock);
2800 		if (SDEV_IS_READDIR(ddv))
2801 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2802 		mutex_exit(&ddv->sdev_lookup_lock);
2803 		rw_enter(&ddv->sdev_contents, RW_READER);
2804 
2805 		sdcmn_err4(("readdir of directory %s by %s\n",
2806 		    ddv->sdev_name, curproc->p_user.u_comm));
2807 		if (ddv->sdev_flags & SDEV_BUILD) {
2808 			if (SDEV_IS_PERSIST(ddv)) {
2809 				error = sdev_filldir_from_store(ddv,
2810 				    alloc_count, cred);
2811 			}
2812 			ddv->sdev_flags &= ~SDEV_BUILD;
2813 		}
2814 	}
2815 
2816 get_cache:
2817 	/* handle "." and ".." */
2818 	diroff = 0;
2819 	if (soff == 0) {
2820 		/* first time */
2821 		this_reclen = DIRENT64_RECLEN(1);
2822 		if (alloc_count < this_reclen) {
2823 			error = EINVAL;
2824 			goto done;
2825 		}
2826 
2827 		dp->d_ino = (ino64_t)ddv->sdev_ino;
2828 		dp->d_off = (off64_t)1;
2829 		dp->d_reclen = (ushort_t)this_reclen;
2830 
2831 		(void) strncpy(dp->d_name, ".",
2832 		    DIRENT64_NAMELEN(this_reclen));
2833 		outcount += dp->d_reclen;
2834 		dp = nextdp(dp);
2835 	}
2836 
2837 	diroff++;
2838 	if (soff <= 1) {
2839 		this_reclen = DIRENT64_RECLEN(2);
2840 		if (alloc_count < outcount + this_reclen) {
2841 			error = EINVAL;
2842 			goto done;
2843 		}
2844 
2845 		dp->d_reclen = (ushort_t)this_reclen;
2846 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2847 		dp->d_off = (off64_t)2;
2848 
2849 		(void) strncpy(dp->d_name, "..",
2850 		    DIRENT64_NAMELEN(this_reclen));
2851 		outcount += dp->d_reclen;
2852 
2853 		dp = nextdp(dp);
2854 	}
2855 
2856 
2857 	/* gets the cache */
2858 	diroff++;
2859 	for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2860 	    dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2861 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2862 		    diroff, soff, dv->sdev_name));
2863 
2864 		/* bypassing pre-matured nodes */
2865 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2866 			sdcmn_err3(("sdev_readdir: pre-mature node  "
2867 			    "%s\n", dv->sdev_name));
2868 			continue;
2869 		}
2870 
2871 		/* don't list stale nodes */
2872 		if (dv->sdev_flags & SDEV_STALE) {
2873 			sdcmn_err4(("sdev_readdir: STALE node  "
2874 			    "%s\n", dv->sdev_name));
2875 			continue;
2876 		}
2877 
2878 		/*
2879 		 * Check validity of node
2880 		 */
2881 		if (vtor) {
2882 			switch (vtor(dv)) {
2883 			case SDEV_VTOR_VALID:
2884 				break;
2885 			case SDEV_VTOR_INVALID:
2886 			case SDEV_VTOR_SKIP:
2887 				continue;
2888 			default:
2889 				cmn_err(CE_PANIC,
2890 				    "dev fs: validator failed: %s(%p)\n",
2891 				    dv->sdev_name, (void *)dv);
2892 				break;
2893 			/*NOTREACHED*/
2894 			}
2895 		}
2896 
2897 		/*
2898 		 * call back into the module for the validity/bookkeeping
2899 		 * of this entry
2900 		 */
2901 		if (fn) {
2902 			error = (*fn)(&(dv->sdev_handle), cred);
2903 			if (error) {
2904 				sdcmn_err4(("sdev_readdir: module did not "
2905 				    "validate %s\n", dv->sdev_name));
2906 				continue;
2907 			}
2908 		}
2909 
2910 		namelen = strlen(dv->sdev_name);
2911 		reclen = DIRENT64_RECLEN(namelen);
2912 		if (outcount + reclen > alloc_count) {
2913 			goto full;
2914 		}
2915 		dp->d_reclen = (ushort_t)reclen;
2916 		dp->d_ino = (ino64_t)dv->sdev_ino;
2917 		dp->d_off = (off64_t)diroff + 1;
2918 		(void) strncpy(dp->d_name, dv->sdev_name,
2919 		    DIRENT64_NAMELEN(reclen));
2920 		outcount += reclen;
2921 		dp = nextdp(dp);
2922 	}
2923 
2924 full:
2925 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2926 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2927 	    (void *)dv));
2928 
2929 	if (outcount)
2930 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2931 
2932 	if (!error) {
2933 		uiop->uio_loffset = diroff;
2934 		if (eofp)
2935 			*eofp = dv ? 0 : 1;
2936 	}
2937 
2938 
2939 	if (ddv->sdev_attrvp) {
2940 		gethrestime(&now);
2941 		attr.va_ctime = now;
2942 		attr.va_atime = now;
2943 		attr.va_mask = AT_CTIME|AT_ATIME;
2944 
2945 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2946 	}
2947 done:
2948 	kmem_free(outbuf, alloc_count);
2949 	return (error);
2950 }
2951 
2952 
2953 static int
2954 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2955 {
2956 	vnode_t *vp;
2957 	vnode_t *cvp;
2958 	struct sdev_node *svp;
2959 	char *nm;
2960 	struct pathname pn;
2961 	int error;
2962 	int persisted = 0;
2963 
2964 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2965 		return (error);
2966 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2967 
2968 	vp = rootdir;
2969 	VN_HOLD(vp);
2970 
2971 	while (pn_pathleft(&pn)) {
2972 		ASSERT(vp->v_type == VDIR);
2973 		(void) pn_getcomponent(&pn, nm);
2974 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2975 		    NULL, NULL);
2976 		VN_RELE(vp);
2977 
2978 		if (error)
2979 			break;
2980 
2981 		/* traverse mount points encountered on our journey */
2982 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2983 			VN_RELE(cvp);
2984 			break;
2985 		}
2986 
2987 		/*
2988 		 * Direct the operation to the persisting filesystem
2989 		 * underlying /dev.  Bail if we encounter a
2990 		 * non-persistent dev entity here.
2991 		 */
2992 		if (cvp->v_vfsp->vfs_fstype == devtype) {
2993 
2994 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2995 				error = ENOENT;
2996 				VN_RELE(cvp);
2997 				break;
2998 			}
2999 
3000 			if (VTOSDEV(cvp) == NULL) {
3001 				error = ENOENT;
3002 				VN_RELE(cvp);
3003 				break;
3004 			}
3005 			svp = VTOSDEV(cvp);
3006 			if ((vp = svp->sdev_attrvp) == NULL) {
3007 				error = ENOENT;
3008 				VN_RELE(cvp);
3009 				break;
3010 			}
3011 			persisted = 1;
3012 			VN_HOLD(vp);
3013 			VN_RELE(cvp);
3014 			cvp = vp;
3015 		}
3016 
3017 		vp = cvp;
3018 		pn_skipslash(&pn);
3019 	}
3020 
3021 	kmem_free(nm, MAXNAMELEN);
3022 	pn_free(&pn);
3023 
3024 	if (error)
3025 		return (error);
3026 
3027 	/*
3028 	 * Only return persisted nodes in the filesystem underlying /dev.
3029 	 */
3030 	if (!persisted) {
3031 		VN_RELE(vp);
3032 		return (ENOENT);
3033 	}
3034 
3035 	*r_vp = vp;
3036 	return (0);
3037 }
3038 
3039 int
3040 sdev_modctl_readdir(const char *dir, char ***dirlistp,
3041 	int *npathsp, int *npathsp_alloc, int checking_empty)
3042 {
3043 	char	**pathlist = NULL;
3044 	char	**newlist = NULL;
3045 	int	npaths = 0;
3046 	int	npaths_alloc = 0;
3047 	dirent64_t *dbuf = NULL;
3048 	int	n;
3049 	char	*s;
3050 	int error;
3051 	vnode_t *vp;
3052 	int eof;
3053 	struct iovec iov;
3054 	struct uio uio;
3055 	struct dirent64 *dp;
3056 	size_t dlen;
3057 	size_t dbuflen;
3058 	int ndirents = 64;
3059 	char *nm;
3060 
3061 	error = sdev_modctl_lookup(dir, &vp);
3062 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
3063 	    dir, curproc->p_user.u_comm,
3064 	    (error == 0) ? "ok" : "failed"));
3065 	if (error)
3066 		return (error);
3067 
3068 	dlen = ndirents * (sizeof (*dbuf));
3069 	dbuf = kmem_alloc(dlen, KM_SLEEP);
3070 
3071 	uio.uio_iov = &iov;
3072 	uio.uio_iovcnt = 1;
3073 	uio.uio_segflg = UIO_SYSSPACE;
3074 	uio.uio_fmode = 0;
3075 	uio.uio_extflg = UIO_COPY_CACHED;
3076 	uio.uio_loffset = 0;
3077 	uio.uio_llimit = MAXOFFSET_T;
3078 
3079 	eof = 0;
3080 	error = 0;
3081 	while (!error && !eof) {
3082 		uio.uio_resid = dlen;
3083 		iov.iov_base = (char *)dbuf;
3084 		iov.iov_len = dlen;
3085 
3086 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3087 		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
3088 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3089 
3090 		dbuflen = dlen - uio.uio_resid;
3091 
3092 		if (error || dbuflen == 0)
3093 			break;
3094 
3095 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
3096 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
3097 
3098 			nm = dp->d_name;
3099 
3100 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
3101 				continue;
3102 			if (npaths == npaths_alloc) {
3103 				npaths_alloc += 64;
3104 				newlist = (char **)
3105 				    kmem_zalloc((npaths_alloc + 1) *
3106 				    sizeof (char *), KM_SLEEP);
3107 				if (pathlist) {
3108 					bcopy(pathlist, newlist,
3109 					    npaths * sizeof (char *));
3110 					kmem_free(pathlist,
3111 					    (npaths + 1) * sizeof (char *));
3112 				}
3113 				pathlist = newlist;
3114 			}
3115 			n = strlen(nm) + 1;
3116 			s = kmem_alloc(n, KM_SLEEP);
3117 			bcopy(nm, s, n);
3118 			pathlist[npaths++] = s;
3119 			sdcmn_err11(("  %s/%s\n", dir, s));
3120 
3121 			/* if checking empty, one entry is as good as many */
3122 			if (checking_empty) {
3123 				eof = 1;
3124 				break;
3125 			}
3126 		}
3127 	}
3128 
3129 exit:
3130 	VN_RELE(vp);
3131 
3132 	if (dbuf)
3133 		kmem_free(dbuf, dlen);
3134 
3135 	if (error)
3136 		return (error);
3137 
3138 	*dirlistp = pathlist;
3139 	*npathsp = npaths;
3140 	*npathsp_alloc = npaths_alloc;
3141 
3142 	return (0);
3143 }
3144 
3145 void
3146 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
3147 {
3148 	int	i, n;
3149 
3150 	for (i = 0; i < npaths; i++) {
3151 		n = strlen(pathlist[i]) + 1;
3152 		kmem_free(pathlist[i], n);
3153 	}
3154 
3155 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
3156 }
3157 
3158 int
3159 sdev_modctl_devexists(const char *path)
3160 {
3161 	vnode_t *vp;
3162 	int error;
3163 
3164 	error = sdev_modctl_lookup(path, &vp);
3165 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
3166 	    path, curproc->p_user.u_comm,
3167 	    (error == 0) ? "ok" : "failed"));
3168 	if (error == 0)
3169 		VN_RELE(vp);
3170 
3171 	return (error);
3172 }
3173 
3174 void
3175 sdev_update_newnsmap(struct devname_nsmap *map, char *module, char *mapname)
3176 {
3177 	rw_enter(&map->dir_lock, RW_WRITER);
3178 	if (module) {
3179 		ASSERT(map->dir_newmodule == NULL);
3180 		map->dir_newmodule = i_ddi_strdup(module, KM_SLEEP);
3181 	}
3182 	if (mapname) {
3183 		ASSERT(map->dir_newmap == NULL);
3184 		map->dir_newmap = i_ddi_strdup(mapname, KM_SLEEP);
3185 	}
3186 
3187 	map->dir_invalid = 1;
3188 	rw_exit(&map->dir_lock);
3189 }
3190 
3191 void
3192 sdev_replace_nsmap(struct devname_nsmap *map, char *module, char *mapname)
3193 {
3194 	char *old_module = NULL;
3195 	char *old_map = NULL;
3196 
3197 	ASSERT(RW_LOCK_HELD(&map->dir_lock));
3198 	if (!rw_tryupgrade(&map->dir_lock)) {
3199 		rw_exit(&map->dir_lock);
3200 		rw_enter(&map->dir_lock, RW_WRITER);
3201 	}
3202 
3203 	old_module = map->dir_module;
3204 	if (module) {
3205 		if (old_module && strcmp(old_module, module) != 0) {
3206 			kmem_free(old_module, strlen(old_module) + 1);
3207 		}
3208 		map->dir_module = module;
3209 		map->dir_newmodule = NULL;
3210 	}
3211 
3212 	old_map = map->dir_map;
3213 	if (mapname) {
3214 		if (old_map && strcmp(old_map, mapname) != 0) {
3215 			kmem_free(old_map, strlen(old_map) + 1);
3216 		}
3217 
3218 		map->dir_map = mapname;
3219 		map->dir_newmap = NULL;
3220 	}
3221 	map->dir_maploaded = 0;
3222 	map->dir_invalid = 0;
3223 	rw_downgrade(&map->dir_lock);
3224 }
3225 
3226 /*
3227  * dir_name should have at least one attribute,
3228  *	dir_module
3229  *	or dir_map
3230  *	or both
3231  * caller holds the devname_nsmaps_lock
3232  */
3233 void
3234 sdev_insert_nsmap(char *dir_name, char *dir_module, char *dir_map)
3235 {
3236 	struct devname_nsmap *map;
3237 	int len = 0;
3238 
3239 	ASSERT(dir_name);
3240 	ASSERT(dir_module || dir_map);
3241 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3242 
3243 	if (map = sdev_get_nsmap_by_dir(dir_name, 1)) {
3244 		sdev_update_newnsmap(map, dir_module, dir_map);
3245 		return;
3246 	}
3247 
3248 	map = (struct devname_nsmap *)kmem_zalloc(sizeof (*map), KM_SLEEP);
3249 	map->dir_name = i_ddi_strdup(dir_name, KM_SLEEP);
3250 	if (dir_module) {
3251 		map->dir_module = i_ddi_strdup(dir_module, KM_SLEEP);
3252 	}
3253 
3254 	if (dir_map) {
3255 		if (dir_map[0] != '/') {
3256 			len = strlen(ETC_DEV_DIR) + strlen(dir_map) + 2;
3257 			map->dir_map = kmem_zalloc(len, KM_SLEEP);
3258 			(void) snprintf(map->dir_map, len, "%s/%s", ETC_DEV_DIR,
3259 			    dir_map);
3260 		} else {
3261 			map->dir_map = i_ddi_strdup(dir_map, KM_SLEEP);
3262 		}
3263 	}
3264 
3265 	map->dir_ops = NULL;
3266 	map->dir_maploaded = 0;
3267 	map->dir_invalid = 0;
3268 	rw_init(&map->dir_lock, NULL, RW_DEFAULT, NULL);
3269 
3270 	map->next = devname_nsmaps;
3271 	map->prev = NULL;
3272 	if (devname_nsmaps) {
3273 		devname_nsmaps->prev = map;
3274 	}
3275 	devname_nsmaps = map;
3276 }
3277 
3278 struct devname_nsmap *
3279 sdev_get_nsmap_by_dir(char *dir_path, int locked)
3280 {
3281 	struct devname_nsmap *map = NULL;
3282 
3283 	if (!locked)
3284 		mutex_enter(&devname_nsmaps_lock);
3285 	for (map = devname_nsmaps; map; map = map->next) {
3286 		sdcmn_err6(("sdev_get_nsmap_by_dir: dir %s\n", map->dir_name));
3287 		if (strcmp(map->dir_name, dir_path) == 0) {
3288 			if (!locked)
3289 				mutex_exit(&devname_nsmaps_lock);
3290 			return (map);
3291 		}
3292 	}
3293 	if (!locked)
3294 		mutex_exit(&devname_nsmaps_lock);
3295 	return (NULL);
3296 }
3297 
3298 struct devname_nsmap *
3299 sdev_get_nsmap_by_module(char *mod_name)
3300 {
3301 	struct devname_nsmap *map = NULL;
3302 
3303 	mutex_enter(&devname_nsmaps_lock);
3304 	for (map = devname_nsmaps; map; map = map->next) {
3305 		sdcmn_err7(("sdev_get_nsmap_by_module: module %s\n",
3306 		    map->dir_module));
3307 		if (map->dir_module && strcmp(map->dir_module, mod_name) == 0) {
3308 			mutex_exit(&devname_nsmaps_lock);
3309 			return (map);
3310 		}
3311 	}
3312 	mutex_exit(&devname_nsmaps_lock);
3313 	return (NULL);
3314 }
3315 
3316 void
3317 sdev_invalidate_nsmaps()
3318 {
3319 	struct devname_nsmap *map = NULL;
3320 
3321 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3322 
3323 	if (devname_nsmaps == NULL)
3324 		return;
3325 
3326 	for (map = devname_nsmaps; map; map = map->next) {
3327 		rw_enter(&map->dir_lock, RW_WRITER);
3328 		map->dir_invalid = 1;
3329 		rw_exit(&map->dir_lock);
3330 	}
3331 	devname_nsmaps_invalidated = 1;
3332 }
3333 
3334 
3335 int
3336 sdev_nsmaps_loaded()
3337 {
3338 	int ret = 0;
3339 
3340 	mutex_enter(&devname_nsmaps_lock);
3341 	if (devname_nsmaps_loaded)
3342 		ret = 1;
3343 
3344 	mutex_exit(&devname_nsmaps_lock);
3345 	return (ret);
3346 }
3347 
3348 int
3349 sdev_nsmaps_reloaded()
3350 {
3351 	int ret = 0;
3352 
3353 	mutex_enter(&devname_nsmaps_lock);
3354 	if (devname_nsmaps_invalidated)
3355 		ret = 1;
3356 
3357 	mutex_exit(&devname_nsmaps_lock);
3358 	return (ret);
3359 }
3360 
3361 static void
3362 sdev_free_nsmap(struct devname_nsmap *map)
3363 {
3364 	ASSERT(map);
3365 	if (map->dir_name)
3366 		kmem_free(map->dir_name, strlen(map->dir_name) + 1);
3367 	if (map->dir_module)
3368 		kmem_free(map->dir_module, strlen(map->dir_module) + 1);
3369 	if (map->dir_map)
3370 		kmem_free(map->dir_map, strlen(map->dir_map) + 1);
3371 	rw_destroy(&map->dir_lock);
3372 	kmem_free(map, sizeof (*map));
3373 }
3374 
3375 void
3376 sdev_validate_nsmaps()
3377 {
3378 	struct devname_nsmap *map = NULL;
3379 	struct devname_nsmap *oldmap = NULL;
3380 
3381 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3382 	map = devname_nsmaps;
3383 	while (map) {
3384 		rw_enter(&map->dir_lock, RW_READER);
3385 		if ((map->dir_invalid == 1) && (map->dir_newmodule == NULL) &&
3386 		    (map->dir_newmap == NULL)) {
3387 			oldmap = map;
3388 			rw_exit(&map->dir_lock);
3389 			if (map->prev)
3390 				map->prev->next = oldmap->next;
3391 			if (map == devname_nsmaps)
3392 				devname_nsmaps = oldmap->next;
3393 
3394 			map = oldmap->next;
3395 			if (map)
3396 				map->prev = oldmap->prev;
3397 			sdev_free_nsmap(oldmap);
3398 			oldmap = NULL;
3399 		} else {
3400 			rw_exit(&map->dir_lock);
3401 			map = map->next;
3402 		}
3403 	}
3404 	devname_nsmaps_invalidated = 0;
3405 }
3406 
3407 static int
3408 sdev_map_is_invalid(struct devname_nsmap *map)
3409 {
3410 	int ret = 0;
3411 
3412 	ASSERT(map);
3413 	rw_enter(&map->dir_lock, RW_READER);
3414 	if (map->dir_invalid)
3415 		ret = 1;
3416 	rw_exit(&map->dir_lock);
3417 	return (ret);
3418 }
3419 
3420 static int
3421 sdev_check_map(struct devname_nsmap *map)
3422 {
3423 	struct devname_nsmap *mapp;
3424 
3425 	mutex_enter(&devname_nsmaps_lock);
3426 	if (devname_nsmaps == NULL) {
3427 		mutex_exit(&devname_nsmaps_lock);
3428 		return (1);
3429 	}
3430 
3431 	for (mapp = devname_nsmaps; mapp; mapp = mapp->next) {
3432 		if (mapp == map) {
3433 			mutex_exit(&devname_nsmaps_lock);
3434 			return (0);
3435 		}
3436 	}
3437 
3438 	mutex_exit(&devname_nsmaps_lock);
3439 	return (1);
3440 
3441 }
3442 
3443 struct devname_nsmap *
3444 sdev_get_map(struct sdev_node *dv, int validate)
3445 {
3446 	struct devname_nsmap *map;
3447 	int error;
3448 
3449 	ASSERT(RW_READ_HELD(&dv->sdev_contents));
3450 	map = dv->sdev_mapinfo;
3451 	if (map && sdev_check_map(map)) {
3452 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3453 			rw_exit(&dv->sdev_contents);
3454 			rw_enter(&dv->sdev_contents, RW_WRITER);
3455 		}
3456 		dv->sdev_mapinfo = NULL;
3457 		rw_downgrade(&dv->sdev_contents);
3458 		return (NULL);
3459 	}
3460 
3461 	if (validate && (!map || (map && sdev_map_is_invalid(map)))) {
3462 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3463 			rw_exit(&dv->sdev_contents);
3464 			rw_enter(&dv->sdev_contents, RW_WRITER);
3465 		}
3466 		error = sdev_get_moduleops(dv);
3467 		if (!error)
3468 			map = dv->sdev_mapinfo;
3469 		rw_downgrade(&dv->sdev_contents);
3470 	}
3471 	return (map);
3472 }
3473 
3474 extern int sdev_vnodeops_tbl_size;
3475 
3476 /*
3477  * construct a new template with overrides from vtab
3478  */
3479 static fs_operation_def_t *
3480 sdev_merge_vtab(const fs_operation_def_t tab[])
3481 {
3482 	fs_operation_def_t *new;
3483 	const fs_operation_def_t *tab_entry;
3484 
3485 	/* make a copy of standard vnode ops table */
3486 	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
3487 	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
3488 
3489 	/* replace the overrides from tab */
3490 	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
3491 		fs_operation_def_t *std_entry = new;
3492 		while (std_entry->name) {
3493 			if (strcmp(tab_entry->name, std_entry->name) == 0) {
3494 				std_entry->func = tab_entry->func;
3495 				break;
3496 			}
3497 			std_entry++;
3498 		}
3499 		if (std_entry->name == NULL)
3500 			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
3501 			    tab_entry->name);
3502 	}
3503 
3504 	return (new);
3505 }
3506 
3507 /* free memory allocated by sdev_merge_vtab */
3508 static void
3509 sdev_free_vtab(fs_operation_def_t *new)
3510 {
3511 	kmem_free(new, sdev_vnodeops_tbl_size);
3512 }
3513 
3514 void
3515 devname_get_vnode(devname_handle_t *hdl, vnode_t **vpp)
3516 {
3517 	struct sdev_node *dv = hdl->dh_data;
3518 
3519 	ASSERT(dv);
3520 
3521 	rw_enter(&dv->sdev_contents, RW_READER);
3522 	*vpp = SDEVTOV(dv);
3523 	rw_exit(&dv->sdev_contents);
3524 }
3525 
3526 int
3527 devname_get_path(devname_handle_t *hdl, char **path)
3528 {
3529 	struct sdev_node *dv = hdl->dh_data;
3530 
3531 	ASSERT(dv);
3532 
3533 	rw_enter(&dv->sdev_contents, RW_READER);
3534 	*path = dv->sdev_path;
3535 	rw_exit(&dv->sdev_contents);
3536 	return (0);
3537 }
3538 
3539 int
3540 devname_get_name(devname_handle_t *hdl, char **entry)
3541 {
3542 	struct sdev_node *dv = hdl->dh_data;
3543 
3544 	ASSERT(dv);
3545 	rw_enter(&dv->sdev_contents, RW_READER);
3546 	*entry = dv->sdev_name;
3547 	rw_exit(&dv->sdev_contents);
3548 	return (0);
3549 }
3550 
3551 void
3552 devname_get_dir_vnode(devname_handle_t *hdl, vnode_t **vpp)
3553 {
3554 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3555 
3556 	ASSERT(dv);
3557 
3558 	rw_enter(&dv->sdev_contents, RW_READER);
3559 	*vpp = SDEVTOV(dv);
3560 	rw_exit(&dv->sdev_contents);
3561 }
3562 
3563 int
3564 devname_get_dir_path(devname_handle_t *hdl, char **path)
3565 {
3566 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3567 
3568 	ASSERT(dv);
3569 	rw_enter(&dv->sdev_contents, RW_READER);
3570 	*path = dv->sdev_path;
3571 	rw_exit(&dv->sdev_contents);
3572 	return (0);
3573 }
3574 
3575 int
3576 devname_get_dir_name(devname_handle_t *hdl, char **entry)
3577 {
3578 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3579 
3580 	ASSERT(dv);
3581 	rw_enter(&dv->sdev_contents, RW_READER);
3582 	*entry = dv->sdev_name;
3583 	rw_exit(&dv->sdev_contents);
3584 	return (0);
3585 }
3586 
3587 int
3588 devname_get_dir_nsmap(devname_handle_t *hdl, struct devname_nsmap **map)
3589 {
3590 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3591 
3592 	ASSERT(dv);
3593 	rw_enter(&dv->sdev_contents, RW_READER);
3594 	*map = dv->sdev_mapinfo;
3595 	rw_exit(&dv->sdev_contents);
3596 	return (0);
3597 }
3598 
3599 int
3600 devname_get_dir_handle(devname_handle_t *hdl, devname_handle_t **dir_hdl)
3601 {
3602 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3603 
3604 	ASSERT(dv);
3605 	rw_enter(&dv->sdev_contents, RW_READER);
3606 	*dir_hdl = &(dv->sdev_handle);
3607 	rw_exit(&dv->sdev_contents);
3608 	return (0);
3609 }
3610 
3611 void
3612 devname_set_nodetype(devname_handle_t *hdl, void *args, int spec)
3613 {
3614 	struct sdev_node *dv = hdl->dh_data;
3615 
3616 	ASSERT(dv);
3617 	rw_enter(&dv->sdev_contents, RW_WRITER);
3618 	hdl->dh_spec = (devname_spec_t)spec;
3619 	hdl->dh_args = (void *)i_ddi_strdup((char *)args, KM_SLEEP);
3620 	rw_exit(&dv->sdev_contents);
3621 }
3622 
3623 /*
3624  * a generic setattr() function
3625  *
3626  * note: flags only supports AT_UID and AT_GID.
3627  *	 Future enhancements can be done for other types, e.g. AT_MODE
3628  */
3629 int
3630 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3631     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3632     int), int protocol)
3633 {
3634 	struct sdev_node	*dv = VTOSDEV(vp);
3635 	struct sdev_node	*parent = dv->sdev_dotdot;
3636 	struct vattr		*get;
3637 	uint_t			mask = vap->va_mask;
3638 	int 			error;
3639 
3640 	/* some sanity checks */
3641 	if (vap->va_mask & AT_NOSET)
3642 		return (EINVAL);
3643 
3644 	if (vap->va_mask & AT_SIZE) {
3645 		if (vp->v_type == VDIR) {
3646 			return (EISDIR);
3647 		}
3648 	}
3649 
3650 	/* no need to set attribute, but do not fail either */
3651 	ASSERT(parent);
3652 	rw_enter(&parent->sdev_contents, RW_READER);
3653 	if (dv->sdev_state == SDEV_ZOMBIE) {
3654 		rw_exit(&parent->sdev_contents);
3655 		return (0);
3656 	}
3657 
3658 	/* If backing store exists, just set it. */
3659 	if (dv->sdev_attrvp) {
3660 		rw_exit(&parent->sdev_contents);
3661 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3662 	}
3663 
3664 	/*
3665 	 * Otherwise, for nodes with the persistence attribute, create it.
3666 	 */
3667 	ASSERT(dv->sdev_attr);
3668 	if (SDEV_IS_PERSIST(dv) ||
3669 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3670 		sdev_vattr_merge(dv, vap);
3671 		rw_enter(&dv->sdev_contents, RW_WRITER);
3672 		error = sdev_shadow_node(dv, cred);
3673 		rw_exit(&dv->sdev_contents);
3674 		rw_exit(&parent->sdev_contents);
3675 
3676 		if (error)
3677 			return (error);
3678 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3679 	}
3680 
3681 
3682 	/*
3683 	 * sdev_attr was allocated in sdev_mknode
3684 	 */
3685 	rw_enter(&dv->sdev_contents, RW_WRITER);
3686 	error = secpolicy_vnode_setattr(cred, vp, vap,
3687 	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
3688 	if (error) {
3689 		rw_exit(&dv->sdev_contents);
3690 		rw_exit(&parent->sdev_contents);
3691 		return (error);
3692 	}
3693 
3694 	get = dv->sdev_attr;
3695 	if (mask & AT_MODE) {
3696 		get->va_mode &= S_IFMT;
3697 		get->va_mode |= vap->va_mode & ~S_IFMT;
3698 	}
3699 
3700 	if ((mask & AT_UID) || (mask & AT_GID)) {
3701 		if (mask & AT_UID)
3702 			get->va_uid = vap->va_uid;
3703 		if (mask & AT_GID)
3704 			get->va_gid = vap->va_gid;
3705 		/*
3706 		 * a callback must be provided if the protocol is set
3707 		 */
3708 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3709 			ASSERT(callback);
3710 			error = callback(dv, get, protocol);
3711 			if (error) {
3712 				rw_exit(&dv->sdev_contents);
3713 				rw_exit(&parent->sdev_contents);
3714 				return (error);
3715 			}
3716 		}
3717 	}
3718 
3719 	if (mask & AT_ATIME)
3720 		get->va_atime = vap->va_atime;
3721 	if (mask & AT_MTIME)
3722 		get->va_mtime = vap->va_mtime;
3723 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3724 		gethrestime(&get->va_ctime);
3725 	}
3726 
3727 	sdev_vattr_merge(dv, get);
3728 	rw_exit(&dv->sdev_contents);
3729 	rw_exit(&parent->sdev_contents);
3730 	return (0);
3731 }
3732 
3733 /*
3734  * a generic inactive() function
3735  */
3736 void
3737 devname_inactive_func(struct vnode *vp, struct cred *cred,
3738     void (*callback)(struct vnode *))
3739 {
3740 	int clean;
3741 	struct sdev_node *dv = VTOSDEV(vp);
3742 	struct sdev_node *ddv = dv->sdev_dotdot;
3743 	int state;
3744 	struct devname_nsmap *map = NULL;
3745 	struct devname_ops *dirops = NULL;
3746 	void (*fn)(devname_handle_t *, struct cred *) = NULL;
3747 
3748 	rw_enter(&ddv->sdev_contents, RW_WRITER);
3749 	state = dv->sdev_state;
3750 
3751 	mutex_enter(&vp->v_lock);
3752 	ASSERT(vp->v_count >= 1);
3753 
3754 	if (vp->v_count == 1 && callback != NULL)
3755 		callback(vp);
3756 
3757 	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3758 
3759 	/*
3760 	 * last ref count on the ZOMBIE node is released.
3761 	 * clean up the sdev_node, and
3762 	 * release the hold on the backing store node so that
3763 	 * the ZOMBIE backing stores also cleaned out.
3764 	 */
3765 	if (clean) {
3766 		ASSERT(ddv);
3767 		if (SDEV_IS_GLOBAL(dv)) {
3768 			map = ddv->sdev_mapinfo;
3769 			dirops = map ? map->dir_ops : NULL;
3770 			if (dirops && (fn = dirops->devnops_inactive))
3771 				(*fn)(&(dv->sdev_handle), cred);
3772 		}
3773 
3774 		ddv->sdev_nlink--;
3775 		if (vp->v_type == VDIR) {
3776 			dv->sdev_nlink--;
3777 		}
3778 		avl_remove(&ddv->sdev_entries, dv);
3779 		dv->sdev_nlink--;
3780 		--vp->v_count;
3781 		mutex_exit(&vp->v_lock);
3782 		sdev_nodedestroy(dv, 0);
3783 	} else {
3784 		--vp->v_count;
3785 		mutex_exit(&vp->v_lock);
3786 	}
3787 	rw_exit(&ddv->sdev_contents);
3788 }
3789