xref: /titanic_51/usr/src/uts/common/fs/dev/sdev_subr.c (revision a563a037ee1e9e7c39304f3775eb7327ab86b914)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * utility routines for the /dev fs
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/t_lock.h>
35 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
37 #include <sys/user.h>
38 #include <sys/time.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/file.h>
42 #include <sys/fcntl.h>
43 #include <sys/flock.h>
44 #include <sys/kmem.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/stat.h>
48 #include <sys/cred.h>
49 #include <sys/dirent.h>
50 #include <sys/pathname.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/mode.h>
54 #include <sys/policy.h>
55 #include <fs/fs_subr.h>
56 #include <sys/mount.h>
57 #include <sys/fs/snode.h>
58 #include <sys/fs/dv_node.h>
59 #include <sys/fs/sdev_impl.h>
60 #include <sys/fs/sdev_node.h>
61 #include <sys/sunndi.h>
62 #include <sys/sunmdi.h>
63 #include <sys/conf.h>
64 #include <sys/proc.h>
65 #include <sys/user.h>
66 #include <sys/modctl.h>
67 
68 #ifdef DEBUG
69 int sdev_debug = 0x00000001;
70 int sdev_debug_cache_flags = 0;
71 #endif
72 
73 /*
74  * globals
75  */
76 /* prototype memory vattrs */
77 vattr_t sdev_vattr_dir = {
78 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
79 	VDIR,					/* va_type */
80 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
81 	SDEV_UID_DEFAULT,			/* va_uid */
82 	SDEV_GID_DEFAULT,			/* va_gid */
83 	0,					/* va_fsid */
84 	0,					/* va_nodeid */
85 	0,					/* va_nlink */
86 	0,					/* va_size */
87 	0,					/* va_atime */
88 	0,					/* va_mtime */
89 	0,					/* va_ctime */
90 	0,					/* va_rdev */
91 	0,					/* va_blksize */
92 	0,					/* va_nblocks */
93 	0					/* va_vcode */
94 };
95 
96 vattr_t sdev_vattr_lnk = {
97 	AT_TYPE|AT_MODE,			/* va_mask */
98 	VLNK,					/* va_type */
99 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
100 	SDEV_UID_DEFAULT,			/* va_uid */
101 	SDEV_GID_DEFAULT,			/* va_gid */
102 	0,					/* va_fsid */
103 	0,					/* va_nodeid */
104 	0,					/* va_nlink */
105 	0,					/* va_size */
106 	0,					/* va_atime */
107 	0,					/* va_mtime */
108 	0,					/* va_ctime */
109 	0,					/* va_rdev */
110 	0,					/* va_blksize */
111 	0,					/* va_nblocks */
112 	0					/* va_vcode */
113 };
114 
115 vattr_t sdev_vattr_blk = {
116 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
117 	VBLK,					/* va_type */
118 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
119 	SDEV_UID_DEFAULT,			/* va_uid */
120 	SDEV_GID_DEFAULT,			/* va_gid */
121 	0,					/* va_fsid */
122 	0,					/* va_nodeid */
123 	0,					/* va_nlink */
124 	0,					/* va_size */
125 	0,					/* va_atime */
126 	0,					/* va_mtime */
127 	0,					/* va_ctime */
128 	0,					/* va_rdev */
129 	0,					/* va_blksize */
130 	0,					/* va_nblocks */
131 	0					/* va_vcode */
132 };
133 
134 vattr_t sdev_vattr_chr = {
135 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
136 	VCHR,					/* va_type */
137 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
138 	SDEV_UID_DEFAULT,			/* va_uid */
139 	SDEV_GID_DEFAULT,			/* va_gid */
140 	0,					/* va_fsid */
141 	0,					/* va_nodeid */
142 	0,					/* va_nlink */
143 	0,					/* va_size */
144 	0,					/* va_atime */
145 	0,					/* va_mtime */
146 	0,					/* va_ctime */
147 	0,					/* va_rdev */
148 	0,					/* va_blksize */
149 	0,					/* va_nblocks */
150 	0					/* va_vcode */
151 };
152 
153 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
154 int		devtype;		/* fstype */
155 
156 struct devname_ops *devname_ns_ops;	/* default name service directory ops */
157 kmutex_t devname_nsmaps_lock;	/* protect devname_nsmaps */
158 
159 /* static */
160 static struct devname_nsmap *devname_nsmaps = NULL;
161 				/* contents from /etc/dev/devname_master */
162 static int devname_nsmaps_invalidated = 0; /* "devfsadm -m" has run */
163 
164 static struct vnodeops *sdev_get_vop(struct sdev_node *);
165 static void sdev_set_no_nocache(struct sdev_node *);
166 static int sdev_get_moduleops(struct sdev_node *);
167 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
168 static void sdev_free_vtab(fs_operation_def_t *);
169 
170 static void
171 sdev_prof_free(struct sdev_node *dv)
172 {
173 	ASSERT(!SDEV_IS_GLOBAL(dv));
174 	if (dv->sdev_prof.dev_name)
175 		nvlist_free(dv->sdev_prof.dev_name);
176 	if (dv->sdev_prof.dev_map)
177 		nvlist_free(dv->sdev_prof.dev_map);
178 	if (dv->sdev_prof.dev_symlink)
179 		nvlist_free(dv->sdev_prof.dev_symlink);
180 	if (dv->sdev_prof.dev_glob_incdir)
181 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
182 	if (dv->sdev_prof.dev_glob_excdir)
183 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
184 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
185 }
186 
187 /*
188  * sdev_node cache constructor
189  */
190 /*ARGSUSED1*/
191 static int
192 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
193 {
194 	struct sdev_node *dv = (struct sdev_node *)buf;
195 	struct vnode *vp;
196 
197 	ASSERT(flag == KM_SLEEP);
198 
199 	bzero(buf, sizeof (struct sdev_node));
200 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
201 	dv->sdev_vnode = vn_alloc(KM_SLEEP);
202 	vp = SDEVTOV(dv);
203 	vp->v_data = (caddr_t)dv;
204 	return (0);
205 }
206 
207 /* sdev_node destructor for kmem cache */
208 /*ARGSUSED1*/
209 static void
210 i_sdev_node_dtor(void *buf, void *arg)
211 {
212 	struct sdev_node *dv = (struct sdev_node *)buf;
213 	struct vnode *vp = SDEVTOV(dv);
214 
215 	rw_destroy(&dv->sdev_contents);
216 	vn_free(vp);
217 }
218 
219 /* initialize sdev_node cache */
220 void
221 sdev_node_cache_init()
222 {
223 	int flags = 0;
224 
225 #ifdef	DEBUG
226 	flags = sdev_debug_cache_flags;
227 	if (flags)
228 		sdcmn_err(("cache debug flags 0x%x\n", flags));
229 #endif	/* DEBUG */
230 
231 	ASSERT(sdev_node_cache == NULL);
232 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
233 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
234 	    NULL, NULL, NULL, flags);
235 }
236 
237 /* destroy sdev_node cache */
238 void
239 sdev_node_cache_fini()
240 {
241 	ASSERT(sdev_node_cache != NULL);
242 	kmem_cache_destroy(sdev_node_cache);
243 	sdev_node_cache = NULL;
244 }
245 
246 /*
247  * Compare two nodes lexographically to balance avl tree
248  */
249 static int
250 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
251 {
252 	int rv;
253 	if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
254 		return (0);
255 	return ((rv < 0) ? -1 : 1);
256 }
257 
258 void
259 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
260 {
261 	ASSERT(dv);
262 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
263 	dv->sdev_state = state;
264 }
265 
266 static void
267 sdev_attrinit(struct sdev_node *dv, vattr_t *vap)
268 {
269 	timestruc_t now;
270 
271 	ASSERT(vap);
272 
273 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
274 	*dv->sdev_attr = *vap;
275 
276 	dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode);
277 
278 	gethrestime(&now);
279 	dv->sdev_attr->va_atime = now;
280 	dv->sdev_attr->va_mtime = now;
281 	dv->sdev_attr->va_ctime = now;
282 }
283 
284 /* alloc and initialize a sdev_node */
285 int
286 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
287     vattr_t *vap)
288 {
289 	struct sdev_node *dv = NULL;
290 	struct vnode *vp;
291 	size_t nmlen, len;
292 	devname_handle_t  *dhl;
293 
294 	nmlen = strlen(nm) + 1;
295 	if (nmlen > MAXNAMELEN) {
296 		sdcmn_err9(("sdev_nodeinit: node name %s"
297 		    " too long\n", nm));
298 		*newdv = NULL;
299 		return (ENAMETOOLONG);
300 	}
301 
302 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
303 
304 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
305 	bcopy(nm, dv->sdev_name, nmlen);
306 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
307 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
308 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
309 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
310 	/* overwritten for VLNK nodes */
311 	dv->sdev_symlink = NULL;
312 
313 	vp = SDEVTOV(dv);
314 	vn_reinit(vp);
315 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
316 	if (vap)
317 		vp->v_type = vap->va_type;
318 
319 	/*
320 	 * initialized to the parent's vnodeops.
321 	 * maybe overwriten for a VDIR
322 	 */
323 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
324 	vn_exists(vp);
325 
326 	dv->sdev_dotdot = NULL;
327 	dv->sdev_attrvp = NULL;
328 	if (vap) {
329 		sdev_attrinit(dv, vap);
330 	} else {
331 		dv->sdev_attr = NULL;
332 	}
333 
334 	dv->sdev_ino = sdev_mkino(dv);
335 	dv->sdev_nlink = 0;		/* updated on insert */
336 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
337 	dv->sdev_flags |= SDEV_BUILD;
338 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
339 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
340 	if (SDEV_IS_GLOBAL(ddv)) {
341 		dv->sdev_flags |= SDEV_GLOBAL;
342 		dv->sdev_mapinfo = NULL;
343 		dhl = &(dv->sdev_handle);
344 		dhl->dh_data = dv;
345 		dhl->dh_spec = DEVNAME_NS_NONE;
346 		dhl->dh_args = NULL;
347 		sdev_set_no_nocache(dv);
348 		dv->sdev_gdir_gen = 0;
349 	} else {
350 		dv->sdev_flags &= ~SDEV_GLOBAL;
351 		dv->sdev_origin = NULL; /* set later */
352 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
353 		dv->sdev_ldir_gen = 0;
354 		dv->sdev_devtree_gen = 0;
355 	}
356 
357 	rw_enter(&dv->sdev_contents, RW_WRITER);
358 	sdev_set_nodestate(dv, SDEV_INIT);
359 	rw_exit(&dv->sdev_contents);
360 	*newdv = dv;
361 
362 	return (0);
363 }
364 
365 /*
366  * transition a sdev_node into SDEV_READY state
367  */
368 int
369 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
370     void *args, struct cred *cred)
371 {
372 	int error = 0;
373 	struct vnode *vp = SDEVTOV(dv);
374 	vtype_t type;
375 
376 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
377 
378 	type = vap->va_type;
379 	vp->v_type = type;
380 	vp->v_rdev = vap->va_rdev;
381 	rw_enter(&dv->sdev_contents, RW_WRITER);
382 	if (type == VDIR) {
383 		dv->sdev_nlink = 2;
384 		dv->sdev_flags &= ~SDEV_PERSIST;
385 		dv->sdev_flags &= ~SDEV_DYNAMIC;
386 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
387 		error = sdev_get_moduleops(dv); /* from plug-in module */
388 		ASSERT(dv->sdev_dotdot);
389 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
390 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
391 		avl_create(&dv->sdev_entries,
392 		    (int (*)(const void *, const void *))sdev_compare_nodes,
393 		    sizeof (struct sdev_node),
394 		    offsetof(struct sdev_node, sdev_avllink));
395 	} else if (type == VLNK) {
396 		ASSERT(args);
397 		dv->sdev_nlink = 1;
398 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
399 	} else {
400 		dv->sdev_nlink = 1;
401 	}
402 
403 	if (!(SDEV_IS_GLOBAL(dv))) {
404 		dv->sdev_origin = (struct sdev_node *)args;
405 		dv->sdev_flags &= ~SDEV_PERSIST;
406 	}
407 
408 	/*
409 	 * shadow node is created here OR
410 	 * if failed (indicated by dv->sdev_attrvp == NULL),
411 	 * created later in sdev_setattr
412 	 */
413 	if (avp) {
414 		dv->sdev_attrvp = avp;
415 	} else {
416 		if (dv->sdev_attr == NULL)
417 			sdev_attrinit(dv, vap);
418 		else
419 			*dv->sdev_attr = *vap;
420 
421 		if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) ||
422 		    ((SDEVTOV(dv)->v_type == VDIR) &&
423 		    (dv->sdev_attrvp == NULL))) {
424 			error = sdev_shadow_node(dv, cred);
425 		}
426 	}
427 
428 	if (error == 0) {
429 		/* transition to READY state */
430 		sdev_set_nodestate(dv, SDEV_READY);
431 		sdev_nc_node_exists(dv);
432 	} else {
433 		sdev_set_nodestate(dv, SDEV_ZOMBIE);
434 	}
435 	rw_exit(&dv->sdev_contents);
436 	return (error);
437 }
438 
439 /*
440  * setting ZOMBIE state
441  */
442 static int
443 sdev_nodezombied(struct sdev_node *dv)
444 {
445 	rw_enter(&dv->sdev_contents, RW_WRITER);
446 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
447 	rw_exit(&dv->sdev_contents);
448 	return (0);
449 }
450 
451 /*
452  * Build the VROOT sdev_node.
453  */
454 /*ARGSUSED*/
455 struct sdev_node *
456 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
457     struct vnode *avp, struct cred *cred)
458 {
459 	struct sdev_node *dv;
460 	struct vnode *vp;
461 	char devdir[] = "/dev";
462 
463 	ASSERT(sdev_node_cache != NULL);
464 	ASSERT(avp);
465 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
466 	vp = SDEVTOV(dv);
467 	vn_reinit(vp);
468 	vp->v_flag |= VROOT;
469 	vp->v_vfsp = vfsp;
470 	vp->v_type = VDIR;
471 	vp->v_rdev = devdev;
472 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
473 	vn_exists(vp);
474 
475 	if (vfsp->vfs_mntpt)
476 		dv->sdev_name = i_ddi_strdup(
477 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
478 	else
479 		/* vfs_mountdev1 set mount point later */
480 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
481 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
482 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
483 	dv->sdev_ino = SDEV_ROOTINO;
484 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
485 	dv->sdev_dotdot = dv;		/* .. == self */
486 	dv->sdev_attrvp = avp;
487 	dv->sdev_attr = NULL;
488 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
489 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
490 	if (strcmp(dv->sdev_name, "/dev") == 0) {
491 		mutex_init(&devname_nsmaps_lock, NULL, MUTEX_DEFAULT, NULL);
492 		dv->sdev_mapinfo = NULL;
493 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
494 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
495 		dv->sdev_gdir_gen = 0;
496 	} else {
497 		dv->sdev_flags = SDEV_BUILD;
498 		dv->sdev_flags &= ~SDEV_PERSIST;
499 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
500 		dv->sdev_ldir_gen = 0;
501 		dv->sdev_devtree_gen = 0;
502 	}
503 
504 	avl_create(&dv->sdev_entries,
505 	    (int (*)(const void *, const void *))sdev_compare_nodes,
506 	    sizeof (struct sdev_node),
507 	    offsetof(struct sdev_node, sdev_avllink));
508 
509 	rw_enter(&dv->sdev_contents, RW_WRITER);
510 	sdev_set_nodestate(dv, SDEV_READY);
511 	rw_exit(&dv->sdev_contents);
512 	sdev_nc_node_exists(dv);
513 	return (dv);
514 }
515 
516 /*
517  *  1. load the module
518  *  2. modload invokes sdev_module_register, which in turn sets
519  *     the dv->sdev_mapinfo->dir_ops
520  *
521  * note: locking order:
522  *	dv->sdev_contents -> map->dir_lock
523  */
524 static int
525 sdev_get_moduleops(struct sdev_node *dv)
526 {
527 	int error = 0;
528 	struct devname_nsmap *map = NULL;
529 	char *module;
530 	char *path;
531 	int load = 1;
532 
533 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
534 
535 	if (devname_nsmaps == NULL)
536 		return (0);
537 
538 	if (!sdev_nsmaps_loaded() && !sdev_nsmaps_reloaded())
539 		return (0);
540 
541 
542 	path = dv->sdev_path;
543 	if ((map = sdev_get_nsmap_by_dir(path, 0))) {
544 		rw_enter(&map->dir_lock, RW_READER);
545 		if (map->dir_invalid) {
546 			if (map->dir_module && map->dir_newmodule &&
547 			    (strcmp(map->dir_module,
548 			    map->dir_newmodule) == 0)) {
549 				load = 0;
550 			}
551 			sdev_replace_nsmap(map, map->dir_newmodule,
552 			    map->dir_newmap);
553 		}
554 
555 		module = map->dir_module;
556 		if (module && load) {
557 			sdcmn_err6(("sdev_get_moduleops: "
558 			    "load module %s", module));
559 			rw_exit(&map->dir_lock);
560 			error = modload("devname", module);
561 			sdcmn_err6(("sdev_get_moduleops: error %d\n", error));
562 			if (error < 0) {
563 				return (-1);
564 			}
565 		} else if (module == NULL) {
566 			/*
567 			 * loading the module ops for name services
568 			 */
569 			if (devname_ns_ops == NULL) {
570 				sdcmn_err6((
571 				    "sdev_get_moduleops: modload default\n"));
572 				error = modload("devname", DEVNAME_NSCONFIG);
573 				sdcmn_err6((
574 				    "sdev_get_moduleops: error %d\n", error));
575 				if (error < 0) {
576 					return (-1);
577 				}
578 			}
579 
580 			if (!rw_tryupgrade(&map->dir_lock)) {
581 				rw_exit(&map->dir_lock);
582 				rw_enter(&map->dir_lock, RW_WRITER);
583 			}
584 			ASSERT(devname_ns_ops);
585 			map->dir_ops = devname_ns_ops;
586 			rw_exit(&map->dir_lock);
587 		}
588 	}
589 
590 	dv->sdev_mapinfo = map;
591 	return (0);
592 }
593 
594 /* directory dependent vop table */
595 struct sdev_vop_table {
596 	char *vt_name;				/* subdirectory name */
597 	const fs_operation_def_t *vt_service;	/* vnodeops table */
598 	struct vnodeops *vt_vops;		/* constructed vop */
599 	struct vnodeops **vt_global_vops;	/* global container for vop */
600 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
601 	int vt_flags;
602 };
603 
604 /*
605  * A nice improvement would be to provide a plug-in mechanism
606  * for this table instead of a const table.
607  */
608 static struct sdev_vop_table vtab[] =
609 {
610 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
611 	SDEV_DYNAMIC | SDEV_VTOR },
612 
613 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
614 
615 	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
616 	SDEV_DYNAMIC | SDEV_VTOR },
617 
618 	{ NULL, NULL, NULL, NULL, NULL, 0}
619 };
620 
621 
622 /*
623  *  sets a directory's vnodeops if the directory is in the vtab;
624  */
625 static struct vnodeops *
626 sdev_get_vop(struct sdev_node *dv)
627 {
628 	int i;
629 	char *path;
630 
631 	path = dv->sdev_path;
632 	ASSERT(path);
633 
634 	/* gets the relative path to /dev/ */
635 	path += 5;
636 
637 	/* gets the vtab entry if matches */
638 	for (i = 0; vtab[i].vt_name; i++) {
639 		if (strcmp(vtab[i].vt_name, path) != 0)
640 			continue;
641 		dv->sdev_flags |= vtab[i].vt_flags;
642 
643 		if (vtab[i].vt_vops) {
644 			if (vtab[i].vt_global_vops)
645 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
646 			return (vtab[i].vt_vops);
647 		}
648 
649 		if (vtab[i].vt_service) {
650 			fs_operation_def_t *templ;
651 			templ = sdev_merge_vtab(vtab[i].vt_service);
652 			if (vn_make_ops(vtab[i].vt_name,
653 			    (const fs_operation_def_t *)templ,
654 			    &vtab[i].vt_vops) != 0) {
655 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
656 				    vtab[i].vt_name);
657 				/*NOTREACHED*/
658 			}
659 			if (vtab[i].vt_global_vops) {
660 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
661 			}
662 			sdev_free_vtab(templ);
663 			return (vtab[i].vt_vops);
664 		}
665 		return (sdev_vnodeops);
666 	}
667 
668 	/* child inherits the persistence of the parent */
669 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
670 		dv->sdev_flags |= SDEV_PERSIST;
671 
672 	return (sdev_vnodeops);
673 }
674 
675 static void
676 sdev_set_no_nocache(struct sdev_node *dv)
677 {
678 	int i;
679 	char *path;
680 
681 	ASSERT(dv->sdev_path);
682 	path = dv->sdev_path + strlen("/dev/");
683 
684 	for (i = 0; vtab[i].vt_name; i++) {
685 		if (strcmp(vtab[i].vt_name, path) == 0) {
686 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
687 				dv->sdev_flags |= SDEV_NO_NCACHE;
688 			break;
689 		}
690 	}
691 }
692 
693 void *
694 sdev_get_vtor(struct sdev_node *dv)
695 {
696 	int i;
697 
698 	for (i = 0; vtab[i].vt_name; i++) {
699 		if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0)
700 			continue;
701 		return ((void *)vtab[i].vt_vtor);
702 	}
703 	return (NULL);
704 }
705 
706 /*
707  * Build the base root inode
708  */
709 ino_t
710 sdev_mkino(struct sdev_node *dv)
711 {
712 	ino_t	ino;
713 
714 	/*
715 	 * for now, follow the lead of tmpfs here
716 	 * need to someday understand the requirements here
717 	 */
718 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
719 	ino += SDEV_ROOTINO + 1;
720 
721 	return (ino);
722 }
723 
724 static int
725 sdev_getlink(struct vnode *linkvp, char **link)
726 {
727 	int err;
728 	char *buf;
729 	struct uio uio = {0};
730 	struct iovec iov = {0};
731 
732 	if (linkvp == NULL)
733 		return (ENOENT);
734 	ASSERT(linkvp->v_type == VLNK);
735 
736 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
737 	iov.iov_base = buf;
738 	iov.iov_len = MAXPATHLEN;
739 	uio.uio_iov = &iov;
740 	uio.uio_iovcnt = 1;
741 	uio.uio_resid = MAXPATHLEN;
742 	uio.uio_segflg = UIO_SYSSPACE;
743 	uio.uio_llimit = MAXOFFSET_T;
744 
745 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
746 	if (err) {
747 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
748 		kmem_free(buf, MAXPATHLEN);
749 		return (ENOENT);
750 	}
751 
752 	/* mission complete */
753 	*link = i_ddi_strdup(buf, KM_SLEEP);
754 	kmem_free(buf, MAXPATHLEN);
755 	return (0);
756 }
757 
758 /*
759  * A convenient wrapper to get the devfs node vnode for a device
760  * minor functionality: readlink() of a /dev symlink
761  * Place the link into dv->sdev_symlink
762  */
763 static int
764 sdev_follow_link(struct sdev_node *dv)
765 {
766 	int err;
767 	struct vnode *linkvp;
768 	char *link = NULL;
769 
770 	linkvp = SDEVTOV(dv);
771 	if (linkvp == NULL)
772 		return (ENOENT);
773 	ASSERT(linkvp->v_type == VLNK);
774 	err = sdev_getlink(linkvp, &link);
775 	if (err) {
776 		(void) sdev_nodezombied(dv);
777 		dv->sdev_symlink = NULL;
778 		return (ENOENT);
779 	}
780 
781 	ASSERT(link != NULL);
782 	dv->sdev_symlink = link;
783 	return (0);
784 }
785 
786 static int
787 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
788 {
789 	vtype_t otype = SDEVTOV(dv)->v_type;
790 
791 	/*
792 	 * existing sdev_node has a different type.
793 	 */
794 	if (otype != nvap->va_type) {
795 		sdcmn_err9(("sdev_node_check: existing node "
796 		    "  %s type %d does not match new node type %d\n",
797 		    dv->sdev_name, otype, nvap->va_type));
798 		return (EEXIST);
799 	}
800 
801 	/*
802 	 * For a symlink, the target should be the same.
803 	 */
804 	if (otype == VLNK) {
805 		ASSERT(nargs != NULL);
806 		ASSERT(dv->sdev_symlink != NULL);
807 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
808 			sdcmn_err9(("sdev_node_check: existing node "
809 			    " %s has different symlink %s as new node "
810 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
811 			    (char *)nargs));
812 			return (EEXIST);
813 		}
814 	}
815 
816 	return (0);
817 }
818 
819 /*
820  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
821  *
822  * arguments:
823  *	- ddv (parent)
824  *	- nm (child name)
825  *	- newdv (sdev_node for nm is returned here)
826  *	- vap (vattr for the node to be created, va_type should be set.
827  *	- avp (attribute vnode)
828  *	  the defaults should be used if unknown)
829  *	- cred
830  *	- args
831  *	    . tnm (for VLNK)
832  *	    . global sdev_node (for !SDEV_GLOBAL)
833  * 	- state: SDEV_INIT, SDEV_READY
834  *
835  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
836  *
837  * NOTE:  directory contents writers lock needs to be held before
838  *	  calling this routine.
839  */
840 int
841 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
842     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
843     sdev_node_state_t state)
844 {
845 	int error = 0;
846 	sdev_node_state_t node_state;
847 	struct sdev_node *dv = NULL;
848 
849 	ASSERT(state != SDEV_ZOMBIE);
850 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
851 
852 	if (*newdv) {
853 		dv = *newdv;
854 	} else {
855 		/* allocate and initialize a sdev_node */
856 		if (ddv->sdev_state == SDEV_ZOMBIE) {
857 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
858 			    ddv->sdev_path));
859 			return (ENOENT);
860 		}
861 
862 		error = sdev_nodeinit(ddv, nm, &dv, vap);
863 		if (error != 0) {
864 			sdcmn_err9(("sdev_mknode: error %d,"
865 			    " name %s can not be initialized\n",
866 			    error, nm));
867 			return (error);
868 		}
869 		ASSERT(dv);
870 
871 		/* insert into the directory cache */
872 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
873 		if (error) {
874 			sdcmn_err9(("sdev_mknode: node %s can not"
875 			    " be added into directory cache\n", nm));
876 			return (ENOENT);
877 		}
878 	}
879 
880 	ASSERT(dv);
881 	node_state = dv->sdev_state;
882 	ASSERT(node_state != SDEV_ZOMBIE);
883 
884 	if (state == SDEV_READY) {
885 		switch (node_state) {
886 		case SDEV_INIT:
887 			error = sdev_nodeready(dv, vap, avp, args, cred);
888 			if (error) {
889 				sdcmn_err9(("sdev_mknode: node %s can NOT"
890 				    " be transitioned into READY state, "
891 				    "error %d\n", nm, error));
892 			}
893 			break;
894 		case SDEV_READY:
895 			/*
896 			 * Do some sanity checking to make sure
897 			 * the existing sdev_node is what has been
898 			 * asked for.
899 			 */
900 			error = sdev_node_check(dv, vap, args);
901 			break;
902 		default:
903 			break;
904 		}
905 	}
906 
907 	if (!error) {
908 		*newdv = dv;
909 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
910 	} else {
911 		SDEV_SIMPLE_RELE(dv);
912 		*newdv = NULL;
913 	}
914 
915 	return (error);
916 }
917 
918 /*
919  * convenient wrapper to change vp's ATIME, CTIME and MTIME
920  */
921 void
922 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
923 {
924 	struct vattr attr;
925 	timestruc_t now;
926 	int err;
927 
928 	ASSERT(vp);
929 	gethrestime(&now);
930 	if (mask & AT_CTIME)
931 		attr.va_ctime = now;
932 	if (mask & AT_MTIME)
933 		attr.va_mtime = now;
934 	if (mask & AT_ATIME)
935 		attr.va_atime = now;
936 
937 	attr.va_mask = (mask & AT_TIMES);
938 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
939 	if (err && (err != EROFS)) {
940 		sdcmn_err(("update timestamps error %d\n", err));
941 	}
942 }
943 
944 /*
945  * the backing store vnode is released here
946  */
947 /*ARGSUSED1*/
948 void
949 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
950 {
951 	/* no references */
952 	ASSERT(dv->sdev_nlink == 0);
953 
954 	if (dv->sdev_attrvp != NULLVP) {
955 		VN_RELE(dv->sdev_attrvp);
956 		/*
957 		 * reset the attrvp so that no more
958 		 * references can be made on this already
959 		 * vn_rele() vnode
960 		 */
961 		dv->sdev_attrvp = NULLVP;
962 	}
963 
964 	if (dv->sdev_attr != NULL) {
965 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
966 		dv->sdev_attr = NULL;
967 	}
968 
969 	if (dv->sdev_name != NULL) {
970 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
971 		dv->sdev_name = NULL;
972 	}
973 
974 	if (dv->sdev_symlink != NULL) {
975 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
976 		dv->sdev_symlink = NULL;
977 	}
978 
979 	if (dv->sdev_path) {
980 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
981 		dv->sdev_path = NULL;
982 	}
983 
984 	if (!SDEV_IS_GLOBAL(dv))
985 		sdev_prof_free(dv);
986 
987 	if (SDEVTOV(dv)->v_type == VDIR) {
988 		ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
989 		avl_destroy(&dv->sdev_entries);
990 	}
991 
992 	mutex_destroy(&dv->sdev_lookup_lock);
993 	cv_destroy(&dv->sdev_lookup_cv);
994 
995 	/* return node to initial state as per constructor */
996 	(void) memset((void *)&dv->sdev_instance_data, 0,
997 	    sizeof (dv->sdev_instance_data));
998 	vn_invalid(SDEVTOV(dv));
999 	kmem_cache_free(sdev_node_cache, dv);
1000 }
1001 
1002 /*
1003  * DIRECTORY CACHE lookup
1004  */
1005 struct sdev_node *
1006 sdev_findbyname(struct sdev_node *ddv, char *nm)
1007 {
1008 	struct sdev_node *dv;
1009 	struct sdev_node dvtmp;
1010 	avl_index_t	where;
1011 
1012 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1013 
1014 	dvtmp.sdev_name = nm;
1015 	dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
1016 	if (dv) {
1017 		ASSERT(dv->sdev_dotdot == ddv);
1018 		ASSERT(strcmp(dv->sdev_name, nm) == 0);
1019 		/* Can't lookup stale nodes */
1020 		if (dv->sdev_flags & SDEV_STALE) {
1021 			sdcmn_err9((
1022 			    "sdev_findbyname: skipped stale node: %s\n", nm));
1023 		} else {
1024 			SDEV_HOLD(dv);
1025 			return (dv);
1026 		}
1027 	}
1028 	return (NULL);
1029 }
1030 
1031 /*
1032  * Inserts a new sdev_node in a parent directory
1033  */
1034 void
1035 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
1036 {
1037 	avl_index_t where;
1038 
1039 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1040 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1041 	ASSERT(ddv->sdev_nlink >= 2);
1042 	ASSERT(dv->sdev_nlink == 0);
1043 
1044 	dv->sdev_dotdot = ddv;
1045 	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1046 	avl_insert(&ddv->sdev_entries, dv, where);
1047 	ddv->sdev_nlink++;
1048 }
1049 
1050 /*
1051  * The following check is needed because while sdev_nodes are linked
1052  * in SDEV_INIT state, they have their link counts incremented only
1053  * in SDEV_READY state.
1054  */
1055 static void
1056 decr_link(struct sdev_node *dv)
1057 {
1058 	if (dv->sdev_state != SDEV_INIT)
1059 		dv->sdev_nlink--;
1060 	else
1061 		ASSERT(dv->sdev_nlink == 0);
1062 }
1063 
1064 /*
1065  * Delete an existing dv from directory cache
1066  *
1067  * In the case of a node is still held by non-zero reference count,
1068  *     the node is put into ZOMBIE state. Once the reference count
1069  *     reaches "0", the node is unlinked and destroyed,
1070  *     in sdev_inactive().
1071  */
1072 static int
1073 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1074 {
1075 	struct vnode *vp;
1076 
1077 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1078 
1079 	vp = SDEVTOV(dv);
1080 	mutex_enter(&vp->v_lock);
1081 
1082 	/* dv is held still */
1083 	if (vp->v_count > 1) {
1084 		rw_enter(&dv->sdev_contents, RW_WRITER);
1085 		if (dv->sdev_state == SDEV_READY) {
1086 			sdcmn_err9((
1087 			    "sdev_delete: node %s busy with count %d\n",
1088 			    dv->sdev_name, vp->v_count));
1089 			dv->sdev_state = SDEV_ZOMBIE;
1090 		}
1091 		rw_exit(&dv->sdev_contents);
1092 		--vp->v_count;
1093 		mutex_exit(&vp->v_lock);
1094 		return (EBUSY);
1095 	}
1096 	ASSERT(vp->v_count == 1);
1097 
1098 	/* unlink from the memory cache */
1099 	ddv->sdev_nlink--;	/* .. to above */
1100 	if (vp->v_type == VDIR) {
1101 		decr_link(dv);		/* . to self */
1102 	}
1103 
1104 	avl_remove(&ddv->sdev_entries, dv);
1105 	decr_link(dv);	/* name, back to zero */
1106 	vp->v_count--;
1107 	mutex_exit(&vp->v_lock);
1108 
1109 	/* destroy the node */
1110 	sdev_nodedestroy(dv, 0);
1111 	return (0);
1112 }
1113 
1114 /*
1115  * check if the source is in the path of the target
1116  *
1117  * source and target are different
1118  */
1119 /*ARGSUSED2*/
1120 static int
1121 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1122 {
1123 	int error = 0;
1124 	struct sdev_node *dotdot, *dir;
1125 
1126 	dotdot = tdv->sdev_dotdot;
1127 	ASSERT(dotdot);
1128 
1129 	/* fs root */
1130 	if (dotdot == tdv) {
1131 		return (0);
1132 	}
1133 
1134 	for (;;) {
1135 		/*
1136 		 * avoid error cases like
1137 		 *	mv a a/b
1138 		 *	mv a a/b/c
1139 		 *	etc.
1140 		 */
1141 		if (dotdot == sdv) {
1142 			error = EINVAL;
1143 			break;
1144 		}
1145 
1146 		dir = dotdot;
1147 		dotdot = dir->sdev_dotdot;
1148 
1149 		/* done checking because root is reached */
1150 		if (dir == dotdot) {
1151 			break;
1152 		}
1153 	}
1154 	return (error);
1155 }
1156 
1157 int
1158 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1159     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1160     struct cred *cred)
1161 {
1162 	int error = 0;
1163 	struct vnode *ovp = SDEVTOV(odv);
1164 	struct vnode *nvp;
1165 	struct vattr vattr;
1166 	int doingdir = (ovp->v_type == VDIR);
1167 	char *link = NULL;
1168 	int samedir = (oddv == nddv) ? 1 : 0;
1169 	int bkstore = 0;
1170 	struct sdev_node *idv = NULL;
1171 	struct sdev_node *ndv = NULL;
1172 	timestruc_t now;
1173 
1174 	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1175 	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1176 	if (error)
1177 		return (error);
1178 
1179 	if (!samedir)
1180 		rw_enter(&oddv->sdev_contents, RW_WRITER);
1181 	rw_enter(&nddv->sdev_contents, RW_WRITER);
1182 
1183 	/*
1184 	 * the source may have been deleted by another thread before
1185 	 * we gets here.
1186 	 */
1187 	if (odv->sdev_state != SDEV_READY) {
1188 		error = ENOENT;
1189 		goto err_out;
1190 	}
1191 
1192 	if (doingdir && (odv == nddv)) {
1193 		error = EINVAL;
1194 		goto err_out;
1195 	}
1196 
1197 	/*
1198 	 * If renaming a directory, and the parents are different (".." must be
1199 	 * changed) then the source dir must not be in the dir hierarchy above
1200 	 * the target since it would orphan everything below the source dir.
1201 	 */
1202 	if (doingdir && (oddv != nddv)) {
1203 		error = sdev_checkpath(odv, nddv, cred);
1204 		if (error)
1205 			goto err_out;
1206 	}
1207 
1208 	/* destination existing */
1209 	if (*ndvp) {
1210 		nvp = SDEVTOV(*ndvp);
1211 		ASSERT(nvp);
1212 
1213 		/* handling renaming to itself */
1214 		if (odv == *ndvp) {
1215 			error = 0;
1216 			goto err_out;
1217 		}
1218 
1219 		if (nvp->v_type == VDIR) {
1220 			if (!doingdir) {
1221 				error = EISDIR;
1222 				goto err_out;
1223 			}
1224 
1225 			if (vn_vfswlock(nvp)) {
1226 				error = EBUSY;
1227 				goto err_out;
1228 			}
1229 
1230 			if (vn_mountedvfs(nvp) != NULL) {
1231 				vn_vfsunlock(nvp);
1232 				error = EBUSY;
1233 				goto err_out;
1234 			}
1235 
1236 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1237 			if ((*ndvp)->sdev_nlink > 2) {
1238 				vn_vfsunlock(nvp);
1239 				error = EEXIST;
1240 				goto err_out;
1241 			}
1242 			vn_vfsunlock(nvp);
1243 
1244 			(void) sdev_dirdelete(nddv, *ndvp);
1245 			*ndvp = NULL;
1246 			ASSERT(nddv->sdev_attrvp);
1247 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1248 			    nddv->sdev_attrvp, cred, NULL, 0);
1249 			if (error)
1250 				goto err_out;
1251 		} else {
1252 			if (doingdir) {
1253 				error = ENOTDIR;
1254 				goto err_out;
1255 			}
1256 
1257 			if (SDEV_IS_PERSIST((*ndvp))) {
1258 				bkstore = 1;
1259 			}
1260 
1261 			/*
1262 			 * get rid of the node from the directory cache
1263 			 * note, in case EBUSY is returned, the ZOMBIE
1264 			 * node is taken care in sdev_mknode.
1265 			 */
1266 			(void) sdev_dirdelete(nddv, *ndvp);
1267 			*ndvp = NULL;
1268 			if (bkstore) {
1269 				ASSERT(nddv->sdev_attrvp);
1270 				error = VOP_REMOVE(nddv->sdev_attrvp,
1271 				    nnm, cred, NULL, 0);
1272 				if (error)
1273 					goto err_out;
1274 			}
1275 		}
1276 	}
1277 
1278 	/* fix the source for a symlink */
1279 	if (vattr.va_type == VLNK) {
1280 		if (odv->sdev_symlink == NULL) {
1281 			error = sdev_follow_link(odv);
1282 			if (error) {
1283 				error = ENOENT;
1284 				goto err_out;
1285 			}
1286 		}
1287 		ASSERT(odv->sdev_symlink);
1288 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1289 	}
1290 
1291 	/*
1292 	 * make a fresh node from the source attrs
1293 	 */
1294 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1295 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1296 	    NULL, (void *)link, cred, SDEV_READY);
1297 
1298 	if (link)
1299 		kmem_free(link, strlen(link) + 1);
1300 
1301 	if (error)
1302 		goto err_out;
1303 	ASSERT(*ndvp);
1304 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1305 
1306 	/* move dir contents */
1307 	if (doingdir) {
1308 		for (idv = SDEV_FIRST_ENTRY(odv); idv;
1309 		    idv = SDEV_NEXT_ENTRY(odv, idv)) {
1310 			error = sdev_rnmnode(odv, idv,
1311 			    (struct sdev_node *)(*ndvp), &ndv,
1312 			    idv->sdev_name, cred);
1313 			if (error)
1314 				goto err_out;
1315 			ndv = NULL;
1316 		}
1317 	}
1318 
1319 	if ((*ndvp)->sdev_attrvp) {
1320 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1321 		    AT_CTIME|AT_ATIME);
1322 	} else {
1323 		ASSERT((*ndvp)->sdev_attr);
1324 		gethrestime(&now);
1325 		(*ndvp)->sdev_attr->va_ctime = now;
1326 		(*ndvp)->sdev_attr->va_atime = now;
1327 	}
1328 
1329 	if (nddv->sdev_attrvp) {
1330 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1331 		    AT_MTIME|AT_ATIME);
1332 	} else {
1333 		ASSERT(nddv->sdev_attr);
1334 		gethrestime(&now);
1335 		nddv->sdev_attr->va_mtime = now;
1336 		nddv->sdev_attr->va_atime = now;
1337 	}
1338 	rw_exit(&nddv->sdev_contents);
1339 	if (!samedir)
1340 		rw_exit(&oddv->sdev_contents);
1341 
1342 	SDEV_RELE(*ndvp);
1343 	return (error);
1344 
1345 err_out:
1346 	rw_exit(&nddv->sdev_contents);
1347 	if (!samedir)
1348 		rw_exit(&oddv->sdev_contents);
1349 	return (error);
1350 }
1351 
1352 /*
1353  * Merge sdev_node specific information into an attribute structure.
1354  *
1355  * note: sdev_node is not locked here
1356  */
1357 void
1358 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1359 {
1360 	struct vnode *vp = SDEVTOV(dv);
1361 
1362 	vap->va_nlink = dv->sdev_nlink;
1363 	vap->va_nodeid = dv->sdev_ino;
1364 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1365 	vap->va_type = vp->v_type;
1366 
1367 	if (vp->v_type == VDIR) {
1368 		vap->va_rdev = 0;
1369 		vap->va_fsid = vp->v_rdev;
1370 	} else if (vp->v_type == VLNK) {
1371 		vap->va_rdev = 0;
1372 		vap->va_mode  &= ~S_IFMT;
1373 		vap->va_mode |= S_IFLNK;
1374 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1375 		vap->va_rdev = vp->v_rdev;
1376 		vap->va_mode &= ~S_IFMT;
1377 		if (vap->va_type == VCHR)
1378 			vap->va_mode |= S_IFCHR;
1379 		else
1380 			vap->va_mode |= S_IFBLK;
1381 	} else {
1382 		vap->va_rdev = 0;
1383 	}
1384 }
1385 
1386 static struct vattr *
1387 sdev_getdefault_attr(enum vtype type)
1388 {
1389 	if (type == VDIR)
1390 		return (&sdev_vattr_dir);
1391 	else if (type == VCHR)
1392 		return (&sdev_vattr_chr);
1393 	else if (type == VBLK)
1394 		return (&sdev_vattr_blk);
1395 	else if (type == VLNK)
1396 		return (&sdev_vattr_lnk);
1397 	else
1398 		return (NULL);
1399 }
1400 int
1401 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1402 {
1403 	int rv = 0;
1404 	struct vnode *vp = SDEVTOV(dv);
1405 
1406 	switch (vp->v_type) {
1407 	case VCHR:
1408 	case VBLK:
1409 		/*
1410 		 * If vnode is a device, return special vnode instead
1411 		 * (though it knows all about -us- via sp->s_realvp)
1412 		 */
1413 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1414 		VN_RELE(vp);
1415 		if (*vpp == NULLVP)
1416 			rv = ENOSYS;
1417 		break;
1418 	default:	/* most types are returned as is */
1419 		*vpp = vp;
1420 		break;
1421 	}
1422 	return (rv);
1423 }
1424 
1425 /*
1426  * loopback into sdev_lookup()
1427  */
1428 static struct vnode *
1429 devname_find_by_devpath(char *devpath, struct vattr *vattr)
1430 {
1431 	int error = 0;
1432 	struct vnode *vp;
1433 
1434 	error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULLVPP, &vp);
1435 	if (error) {
1436 		return (NULL);
1437 	}
1438 
1439 	if (vattr)
1440 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1441 	return (vp);
1442 }
1443 
1444 /*
1445  * the junction between devname and devfs
1446  */
1447 static struct vnode *
1448 devname_configure_by_path(char *physpath, struct vattr *vattr)
1449 {
1450 	int error = 0;
1451 	struct vnode *vp;
1452 
1453 	ASSERT(strncmp(physpath, "/devices/", sizeof ("/devices/") - 1)
1454 	    == 0);
1455 
1456 	error = devfs_lookupname(physpath + sizeof ("/devices/") - 1,
1457 	    NULLVPP, &vp);
1458 	if (error != 0) {
1459 		if (error == ENODEV) {
1460 			cmn_err(CE_CONT, "%s: not found (line %d)\n",
1461 			    physpath, __LINE__);
1462 		}
1463 
1464 		return (NULL);
1465 	}
1466 
1467 	if (vattr)
1468 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1469 	return (vp);
1470 }
1471 
1472 /*
1473  * junction between devname and root file system, e.g. ufs
1474  */
1475 int
1476 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1477 {
1478 	struct vnode *rdvp = ddv->sdev_attrvp;
1479 	int rval = 0;
1480 
1481 	ASSERT(rdvp);
1482 
1483 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1484 	    NULL);
1485 	return (rval);
1486 }
1487 
1488 static int
1489 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1490 {
1491 	struct sdev_node *dv = NULL;
1492 	char	*nm;
1493 	struct vnode *dirvp;
1494 	int	error;
1495 	vnode_t	*vp;
1496 	int eof;
1497 	struct iovec iov;
1498 	struct uio uio;
1499 	struct dirent64 *dp;
1500 	dirent64_t *dbuf;
1501 	size_t dbuflen;
1502 	struct vattr vattr;
1503 	char *link = NULL;
1504 
1505 	if (ddv->sdev_attrvp == NULL)
1506 		return (0);
1507 	if (!(ddv->sdev_flags & SDEV_BUILD))
1508 		return (0);
1509 
1510 	dirvp = ddv->sdev_attrvp;
1511 	VN_HOLD(dirvp);
1512 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1513 
1514 	uio.uio_iov = &iov;
1515 	uio.uio_iovcnt = 1;
1516 	uio.uio_segflg = UIO_SYSSPACE;
1517 	uio.uio_fmode = 0;
1518 	uio.uio_extflg = UIO_COPY_CACHED;
1519 	uio.uio_loffset = 0;
1520 	uio.uio_llimit = MAXOFFSET_T;
1521 
1522 	eof = 0;
1523 	error = 0;
1524 	while (!error && !eof) {
1525 		uio.uio_resid = dlen;
1526 		iov.iov_base = (char *)dbuf;
1527 		iov.iov_len = dlen;
1528 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1529 		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1530 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1531 
1532 		dbuflen = dlen - uio.uio_resid;
1533 		if (error || dbuflen == 0)
1534 			break;
1535 
1536 		if (!(ddv->sdev_flags & SDEV_BUILD)) {
1537 			error = 0;
1538 			break;
1539 		}
1540 
1541 		for (dp = dbuf; ((intptr_t)dp <
1542 		    (intptr_t)dbuf + dbuflen);
1543 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1544 			nm = dp->d_name;
1545 
1546 			if (strcmp(nm, ".") == 0 ||
1547 			    strcmp(nm, "..") == 0)
1548 				continue;
1549 
1550 			vp = NULLVP;
1551 			dv = sdev_cache_lookup(ddv, nm);
1552 			if (dv) {
1553 				if (dv->sdev_state != SDEV_ZOMBIE) {
1554 					SDEV_SIMPLE_RELE(dv);
1555 				} else {
1556 					/*
1557 					 * A ZOMBIE node may not have been
1558 					 * cleaned up from the backing store,
1559 					 * bypass this entry in this case,
1560 					 * and clean it up from the directory
1561 					 * cache if this is the last call.
1562 					 */
1563 					(void) sdev_dirdelete(ddv, dv);
1564 				}
1565 				continue;
1566 			}
1567 
1568 			/* refill the cache if not already */
1569 			error = devname_backstore_lookup(ddv, nm, &vp);
1570 			if (error)
1571 				continue;
1572 
1573 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1574 			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1575 			if (error)
1576 				continue;
1577 
1578 			if (vattr.va_type == VLNK) {
1579 				error = sdev_getlink(vp, &link);
1580 				if (error) {
1581 					continue;
1582 				}
1583 				ASSERT(link != NULL);
1584 			}
1585 
1586 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1587 				rw_exit(&ddv->sdev_contents);
1588 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1589 			}
1590 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1591 			    cred, SDEV_READY);
1592 			rw_downgrade(&ddv->sdev_contents);
1593 
1594 			if (link != NULL) {
1595 				kmem_free(link, strlen(link) + 1);
1596 				link = NULL;
1597 			}
1598 
1599 			if (!error) {
1600 				ASSERT(dv);
1601 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1602 				SDEV_SIMPLE_RELE(dv);
1603 			}
1604 			vp = NULL;
1605 			dv = NULL;
1606 		}
1607 	}
1608 
1609 done:
1610 	VN_RELE(dirvp);
1611 	kmem_free(dbuf, dlen);
1612 
1613 	return (error);
1614 }
1615 
1616 void
1617 sdev_filldir_dynamic(struct sdev_node *ddv)
1618 {
1619 	int error;
1620 	int i;
1621 	struct vattr *vap;
1622 	char *nm = NULL;
1623 	struct sdev_node *dv = NULL;
1624 
1625 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1626 	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1627 
1628 	vap = sdev_getdefault_attr(VDIR);
1629 	for (i = 0; vtab[i].vt_name != NULL; i++) {
1630 		nm = vtab[i].vt_name;
1631 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1632 		dv = NULL;
1633 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1634 		    NULL, kcred, SDEV_READY);
1635 		if (error) {
1636 			cmn_err(CE_WARN, "%s/%s: error %d\n",
1637 			    ddv->sdev_name, nm, error);
1638 		} else {
1639 			ASSERT(dv);
1640 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1641 			SDEV_SIMPLE_RELE(dv);
1642 		}
1643 	}
1644 }
1645 
1646 /*
1647  * Creating a backing store entry based on sdev_attr.
1648  * This is called either as part of node creation in a persistent directory
1649  * or from setattr/setsecattr to persist access attributes across reboot.
1650  */
1651 int
1652 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1653 {
1654 	int error = 0;
1655 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1656 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1657 	struct vattr *vap = dv->sdev_attr;
1658 	char *nm = dv->sdev_name;
1659 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1660 
1661 	ASSERT(dv && dv->sdev_name && rdvp);
1662 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1663 
1664 lookup:
1665 	/* try to find it in the backing store */
1666 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1667 	    NULL);
1668 	if (error == 0) {
1669 		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1670 			VN_HOLD(rrvp);
1671 			VN_RELE(*rvp);
1672 			*rvp = rrvp;
1673 		}
1674 
1675 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1676 		dv->sdev_attr = NULL;
1677 		dv->sdev_attrvp = *rvp;
1678 		return (0);
1679 	}
1680 
1681 	/* let's try to persist the node */
1682 	gethrestime(&vap->va_atime);
1683 	vap->va_mtime = vap->va_atime;
1684 	vap->va_ctime = vap->va_atime;
1685 	vap->va_mask |= AT_TYPE|AT_MODE;
1686 	switch (vap->va_type) {
1687 	case VDIR:
1688 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1689 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1690 		    (void *)(*rvp), error));
1691 		break;
1692 	case VCHR:
1693 	case VBLK:
1694 	case VREG:
1695 	case VDOOR:
1696 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1697 		    rvp, cred, 0, NULL, NULL);
1698 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1699 		    (void *)(*rvp), error));
1700 		if (!error)
1701 			VN_RELE(*rvp);
1702 		break;
1703 	case VLNK:
1704 		ASSERT(dv->sdev_symlink);
1705 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1706 		    NULL, 0);
1707 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1708 		    error));
1709 		break;
1710 	default:
1711 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1712 		    "create\n", nm);
1713 		/*NOTREACHED*/
1714 	}
1715 
1716 	/* go back to lookup to factor out spec node and set attrvp */
1717 	if (error == 0)
1718 		goto lookup;
1719 
1720 	sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1721 	return (error);
1722 }
1723 
1724 static int
1725 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1726 {
1727 	int error = 0;
1728 	struct sdev_node *dup = NULL;
1729 
1730 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1731 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1732 		sdev_direnter(ddv, *dv);
1733 	} else {
1734 		if (dup->sdev_state == SDEV_ZOMBIE) {
1735 			error = sdev_dirdelete(ddv, dup);
1736 			/*
1737 			 * The ZOMBIE node is still hanging
1738 			 * around with more than one reference counts.
1739 			 * Fail the new node creation so that
1740 			 * the directory cache won't have
1741 			 * duplicate entries for the same named node
1742 			 */
1743 			if (error == EBUSY) {
1744 				SDEV_SIMPLE_RELE(*dv);
1745 				sdev_nodedestroy(*dv, 0);
1746 				*dv = NULL;
1747 				return (error);
1748 			}
1749 			sdev_direnter(ddv, *dv);
1750 		} else {
1751 			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1752 			SDEV_SIMPLE_RELE(*dv);
1753 			sdev_nodedestroy(*dv, 0);
1754 			*dv = dup;
1755 		}
1756 	}
1757 
1758 	return (0);
1759 }
1760 
1761 static int
1762 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1763 {
1764 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1765 	return (sdev_dirdelete(ddv, *dv));
1766 }
1767 
1768 /*
1769  * update the in-core directory cache
1770  */
1771 int
1772 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1773     sdev_cache_ops_t ops)
1774 {
1775 	int error = 0;
1776 
1777 	ASSERT((SDEV_HELD(*dv)));
1778 
1779 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1780 	switch (ops) {
1781 	case SDEV_CACHE_ADD:
1782 		error = sdev_cache_add(ddv, dv, nm);
1783 		break;
1784 	case SDEV_CACHE_DELETE:
1785 		error = sdev_cache_delete(ddv, dv);
1786 		break;
1787 	default:
1788 		break;
1789 	}
1790 
1791 	return (error);
1792 }
1793 
1794 /*
1795  * retrieve the named entry from the directory cache
1796  */
1797 struct sdev_node *
1798 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1799 {
1800 	struct sdev_node *dv = NULL;
1801 
1802 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1803 	dv = sdev_findbyname(ddv, nm);
1804 
1805 	return (dv);
1806 }
1807 
1808 /*
1809  * Implicit reconfig for nodes constructed by a link generator
1810  * Start devfsadm if needed, or if devfsadm is in progress,
1811  * prepare to block on devfsadm either completing or
1812  * constructing the desired node.  As devfsadmd is global
1813  * in scope, constructing all necessary nodes, we only
1814  * need to initiate it once.
1815  */
1816 static int
1817 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1818 {
1819 	int error = 0;
1820 
1821 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1822 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1823 		    ddv->sdev_name, nm, devfsadm_state));
1824 		mutex_enter(&dv->sdev_lookup_lock);
1825 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1826 		mutex_exit(&dv->sdev_lookup_lock);
1827 		error = 0;
1828 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1829 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1830 		    ddv->sdev_name, nm, devfsadm_state));
1831 
1832 		sdev_devfsadmd_thread(ddv, dv, kcred);
1833 		mutex_enter(&dv->sdev_lookup_lock);
1834 		SDEV_BLOCK_OTHERS(dv,
1835 		    (SDEV_LOOKUP | SDEV_LGWAITING));
1836 		mutex_exit(&dv->sdev_lookup_lock);
1837 		error = 0;
1838 	} else {
1839 		error = -1;
1840 	}
1841 
1842 	return (error);
1843 }
1844 
1845 static int
1846 sdev_call_modulelookup(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1847     int (*fn)(char *, devname_handle_t *, struct cred *), struct cred *cred)
1848 {
1849 	struct vnode *rvp = NULL;
1850 	int error = 0;
1851 	struct vattr *vap;
1852 	devname_spec_t spec;
1853 	devname_handle_t *hdl;
1854 	void *args = NULL;
1855 	struct sdev_node *dv = *dvp;
1856 
1857 	ASSERT(dv && ddv);
1858 	hdl = &(dv->sdev_handle);
1859 	ASSERT(hdl->dh_data == dv);
1860 	mutex_enter(&dv->sdev_lookup_lock);
1861 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1862 	mutex_exit(&dv->sdev_lookup_lock);
1863 	error = (*fn)(nm, hdl, cred);
1864 	if (error) {
1865 		return (error);
1866 	}
1867 
1868 	spec = hdl->dh_spec;
1869 	args = hdl->dh_args;
1870 	ASSERT(args);
1871 
1872 	switch (spec) {
1873 	case DEVNAME_NS_PATH:
1874 		/*
1875 		 * symlink of:
1876 		 *	/dev/dir/nm -> /device/...
1877 		 */
1878 		rvp = devname_configure_by_path((char *)args, NULL);
1879 		break;
1880 	case DEVNAME_NS_DEV:
1881 		/*
1882 		 * symlink of:
1883 		 *	/dev/dir/nm -> /dev/...
1884 		 */
1885 		rvp = devname_find_by_devpath((char *)args, NULL);
1886 		break;
1887 	default:
1888 		if (args)
1889 			kmem_free((char *)args, strlen(args) + 1);
1890 		return (ENOENT);
1891 
1892 	}
1893 
1894 	if (rvp == NULL) {
1895 		if (args)
1896 			kmem_free((char *)args, strlen(args) + 1);
1897 		return (ENOENT);
1898 	} else {
1899 		vap = sdev_getdefault_attr(VLNK);
1900 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1901 		/*
1902 		 * Could sdev_mknode return a different dv_node
1903 		 * once the lock is dropped?
1904 		 */
1905 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1906 			rw_exit(&ddv->sdev_contents);
1907 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1908 		}
1909 		error = sdev_mknode(ddv, nm, &dv, vap, NULL, args, cred,
1910 		    SDEV_READY);
1911 		rw_downgrade(&ddv->sdev_contents);
1912 		if (error) {
1913 			if (args)
1914 				kmem_free((char *)args, strlen(args) + 1);
1915 			return (error);
1916 		} else {
1917 			mutex_enter(&dv->sdev_lookup_lock);
1918 			SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1919 			mutex_exit(&dv->sdev_lookup_lock);
1920 			error = 0;
1921 		}
1922 	}
1923 
1924 	if (args)
1925 		kmem_free((char *)args, strlen(args) + 1);
1926 
1927 	*dvp = dv;
1928 	return (0);
1929 }
1930 
1931 /*
1932  *  Support for specialized device naming construction mechanisms
1933  */
1934 static int
1935 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1936     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1937     void *, char *), int flags, struct cred *cred)
1938 {
1939 	int rv = 0;
1940 	char *physpath = NULL;
1941 	struct vnode *rvp = NULL;
1942 	struct vattr vattr;
1943 	struct vattr *vap;
1944 	struct sdev_node *dv = *dvp;
1945 
1946 	mutex_enter(&dv->sdev_lookup_lock);
1947 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1948 	mutex_exit(&dv->sdev_lookup_lock);
1949 
1950 	/* for non-devfsadm devices */
1951 	if (flags & SDEV_PATH) {
1952 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1953 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1954 		    NULL);
1955 		if (rv) {
1956 			kmem_free(physpath, MAXPATHLEN);
1957 			return (-1);
1958 		}
1959 
1960 		ASSERT(physpath);
1961 		rvp = devname_configure_by_path(physpath, NULL);
1962 		if (rvp == NULL) {
1963 			sdcmn_err3(("devname_configure_by_path: "
1964 			    "failed for /dev/%s/%s\n",
1965 			    ddv->sdev_name, nm));
1966 			kmem_free(physpath, MAXPATHLEN);
1967 			rv = -1;
1968 		} else {
1969 			vap = sdev_getdefault_attr(VLNK);
1970 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1971 
1972 			/*
1973 			 * Sdev_mknode may return back a different sdev_node
1974 			 * that was created by another thread that
1975 			 * raced to the directroy cache before this thread.
1976 			 *
1977 			 * With current directory cache mechanism
1978 			 * (linked list with the sdev_node name as
1979 			 * the entity key), this is a way to make sure
1980 			 * only one entry exists for the same name
1981 			 * in the same directory. The outcome is
1982 			 * the winner wins.
1983 			 */
1984 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1985 				rw_exit(&ddv->sdev_contents);
1986 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1987 			}
1988 			rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1989 			    (void *)physpath, cred, SDEV_READY);
1990 			rw_downgrade(&ddv->sdev_contents);
1991 			kmem_free(physpath, MAXPATHLEN);
1992 			if (rv) {
1993 				return (rv);
1994 			} else {
1995 				mutex_enter(&dv->sdev_lookup_lock);
1996 				SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1997 				mutex_exit(&dv->sdev_lookup_lock);
1998 				return (0);
1999 			}
2000 		}
2001 	} else if (flags & SDEV_VNODE) {
2002 		/*
2003 		 * DBNR has its own way to create the device
2004 		 * and return a backing store vnode in rvp
2005 		 */
2006 		ASSERT(callback);
2007 		rv = callback(ddv, nm, (void *)&rvp, kcred, NULL, NULL);
2008 		if (rv || (rvp == NULL)) {
2009 			sdcmn_err3(("devname_lookup_func: SDEV_VNODE "
2010 			    "callback failed \n"));
2011 			return (-1);
2012 		}
2013 		vap = sdev_getdefault_attr(rvp->v_type);
2014 		if (vap == NULL)
2015 			return (-1);
2016 
2017 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2018 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2019 			rw_exit(&ddv->sdev_contents);
2020 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2021 		}
2022 		rv = sdev_mknode(ddv, nm, &dv, vap, rvp, NULL,
2023 		    cred, SDEV_READY);
2024 		rw_downgrade(&ddv->sdev_contents);
2025 		if (rv)
2026 			return (rv);
2027 
2028 		mutex_enter(&dv->sdev_lookup_lock);
2029 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2030 		mutex_exit(&dv->sdev_lookup_lock);
2031 		return (0);
2032 	} else if (flags & SDEV_VATTR) {
2033 		/*
2034 		 * /dev/pts
2035 		 *
2036 		 * DBNR has its own way to create the device
2037 		 * "0" is returned upon success.
2038 		 *
2039 		 * callback is responsible to set the basic attributes,
2040 		 * e.g. va_type/va_uid/va_gid/
2041 		 *    dev_t if VCHR or VBLK/
2042 		 */
2043 		ASSERT(callback);
2044 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
2045 		if (rv) {
2046 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
2047 			    "callback failed \n"));
2048 			return (-1);
2049 		}
2050 
2051 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2052 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2053 			rw_exit(&ddv->sdev_contents);
2054 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2055 		}
2056 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
2057 		    cred, SDEV_READY);
2058 		rw_downgrade(&ddv->sdev_contents);
2059 
2060 		if (rv)
2061 			return (rv);
2062 
2063 		mutex_enter(&dv->sdev_lookup_lock);
2064 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2065 		mutex_exit(&dv->sdev_lookup_lock);
2066 		return (0);
2067 	} else {
2068 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
2069 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
2070 		    __LINE__));
2071 		rv = -1;
2072 	}
2073 
2074 	*dvp = dv;
2075 	return (rv);
2076 }
2077 
2078 static int
2079 is_devfsadm_thread(char *exec_name)
2080 {
2081 	/*
2082 	 * note: because devfsadmd -> /usr/sbin/devfsadm
2083 	 * it is safe to use "devfsadm" to capture the lookups
2084 	 * from devfsadm and its daemon version.
2085 	 */
2086 	if (strcmp(exec_name, "devfsadm") == 0)
2087 		return (1);
2088 	return (0);
2089 }
2090 
2091 
2092 /*
2093  * Lookup Order:
2094  *	sdev_node cache;
2095  *	backing store (SDEV_PERSIST);
2096  *	DBNR: a. dir_ops implemented in the loadable modules;
2097  *	      b. vnode ops in vtab.
2098  */
2099 int
2100 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
2101     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
2102     struct cred *, void *, char *), int flags)
2103 {
2104 	int rv = 0, nmlen;
2105 	struct vnode *rvp = NULL;
2106 	struct sdev_node *dv = NULL;
2107 	int	retried = 0;
2108 	int	error = 0;
2109 	struct devname_nsmap *map = NULL;
2110 	struct devname_ops *dirops = NULL;
2111 	int (*fn)(char *, devname_handle_t *, struct cred *) = NULL;
2112 	struct vattr vattr;
2113 	char *lookup_thread = curproc->p_user.u_comm;
2114 	int failed_flags = 0;
2115 	int (*vtor)(struct sdev_node *) = NULL;
2116 	int state;
2117 	int parent_state;
2118 	char *link = NULL;
2119 
2120 	if (SDEVTOV(ddv)->v_type != VDIR)
2121 		return (ENOTDIR);
2122 
2123 	/*
2124 	 * Empty name or ., return node itself.
2125 	 */
2126 	nmlen = strlen(nm);
2127 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
2128 		*vpp = SDEVTOV(ddv);
2129 		VN_HOLD(*vpp);
2130 		return (0);
2131 	}
2132 
2133 	/*
2134 	 * .., return the parent directory
2135 	 */
2136 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
2137 		*vpp = SDEVTOV(ddv->sdev_dotdot);
2138 		VN_HOLD(*vpp);
2139 		return (0);
2140 	}
2141 
2142 	rw_enter(&ddv->sdev_contents, RW_READER);
2143 	if (ddv->sdev_flags & SDEV_VTOR) {
2144 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2145 		ASSERT(vtor);
2146 	}
2147 
2148 tryagain:
2149 	/*
2150 	 * (a) directory cache lookup:
2151 	 */
2152 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2153 	parent_state = ddv->sdev_state;
2154 	dv = sdev_cache_lookup(ddv, nm);
2155 	if (dv) {
2156 		state = dv->sdev_state;
2157 		switch (state) {
2158 		case SDEV_INIT:
2159 			if (is_devfsadm_thread(lookup_thread))
2160 				break;
2161 
2162 			/* ZOMBIED parent won't allow node creation */
2163 			if (parent_state == SDEV_ZOMBIE) {
2164 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
2165 				    retried);
2166 				goto nolock_notfound;
2167 			}
2168 
2169 			mutex_enter(&dv->sdev_lookup_lock);
2170 			/* compensate the threads started after devfsadm */
2171 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2172 			    !(SDEV_IS_LOOKUP(dv)))
2173 				SDEV_BLOCK_OTHERS(dv,
2174 				    (SDEV_LOOKUP | SDEV_LGWAITING));
2175 
2176 			if (SDEV_IS_LOOKUP(dv)) {
2177 				failed_flags |= SLF_REBUILT;
2178 				rw_exit(&ddv->sdev_contents);
2179 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
2180 				mutex_exit(&dv->sdev_lookup_lock);
2181 				rw_enter(&ddv->sdev_contents, RW_READER);
2182 
2183 				if (error != 0) {
2184 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2185 					    retried);
2186 					goto nolock_notfound;
2187 				}
2188 
2189 				state = dv->sdev_state;
2190 				if (state == SDEV_INIT) {
2191 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2192 					    retried);
2193 					goto nolock_notfound;
2194 				} else if (state == SDEV_READY) {
2195 					goto found;
2196 				} else if (state == SDEV_ZOMBIE) {
2197 					rw_exit(&ddv->sdev_contents);
2198 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2199 					    retried);
2200 					SDEV_RELE(dv);
2201 					goto lookup_failed;
2202 				}
2203 			} else {
2204 				mutex_exit(&dv->sdev_lookup_lock);
2205 			}
2206 			break;
2207 		case SDEV_READY:
2208 			goto found;
2209 		case SDEV_ZOMBIE:
2210 			rw_exit(&ddv->sdev_contents);
2211 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2212 			SDEV_RELE(dv);
2213 			goto lookup_failed;
2214 		default:
2215 			rw_exit(&ddv->sdev_contents);
2216 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2217 			sdev_lookup_failed(ddv, nm, failed_flags);
2218 			*vpp = NULLVP;
2219 			return (ENOENT);
2220 		}
2221 	}
2222 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2223 
2224 	/*
2225 	 * ZOMBIED parent does not allow new node creation.
2226 	 * bail out early
2227 	 */
2228 	if (parent_state == SDEV_ZOMBIE) {
2229 		rw_exit(&ddv->sdev_contents);
2230 		*vpp = NULL;
2231 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2232 		return (ENOENT);
2233 	}
2234 
2235 	/*
2236 	 * (b0): backing store lookup
2237 	 *	SDEV_PERSIST is default except:
2238 	 *		1) pts nodes
2239 	 *		2) non-chmod'ed local nodes
2240 	 */
2241 	if (SDEV_IS_PERSIST(ddv)) {
2242 		error = devname_backstore_lookup(ddv, nm, &rvp);
2243 
2244 		if (!error) {
2245 			sdcmn_err3(("devname_backstore_lookup: "
2246 			    "found attrvp %p for %s\n", (void *)rvp, nm));
2247 
2248 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
2249 			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2250 			if (error) {
2251 				rw_exit(&ddv->sdev_contents);
2252 				if (dv)
2253 					SDEV_RELE(dv);
2254 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2255 				sdev_lookup_failed(ddv, nm, failed_flags);
2256 				*vpp = NULLVP;
2257 				return (ENOENT);
2258 			}
2259 
2260 			if (vattr.va_type == VLNK) {
2261 				error = sdev_getlink(rvp, &link);
2262 				if (error) {
2263 					rw_exit(&ddv->sdev_contents);
2264 					if (dv)
2265 						SDEV_RELE(dv);
2266 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2267 					    retried);
2268 					sdev_lookup_failed(ddv, nm,
2269 					    failed_flags);
2270 					*vpp = NULLVP;
2271 					return (ENOENT);
2272 				}
2273 				ASSERT(link != NULL);
2274 			}
2275 
2276 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
2277 				rw_exit(&ddv->sdev_contents);
2278 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2279 			}
2280 			error = sdev_mknode(ddv, nm, &dv, &vattr,
2281 			    rvp, link, cred, SDEV_READY);
2282 			rw_downgrade(&ddv->sdev_contents);
2283 
2284 			if (link != NULL) {
2285 				kmem_free(link, strlen(link) + 1);
2286 				link = NULL;
2287 			}
2288 
2289 			if (error) {
2290 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2291 				rw_exit(&ddv->sdev_contents);
2292 				if (dv)
2293 					SDEV_RELE(dv);
2294 				goto lookup_failed;
2295 			} else {
2296 				goto found;
2297 			}
2298 		} else if (retried) {
2299 			rw_exit(&ddv->sdev_contents);
2300 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2301 			    ddv->sdev_name, nm));
2302 			if (dv)
2303 				SDEV_RELE(dv);
2304 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2305 			sdev_lookup_failed(ddv, nm, failed_flags);
2306 			*vpp = NULLVP;
2307 			return (ENOENT);
2308 		}
2309 	}
2310 
2311 
2312 	/* first thread that is doing the lookup on this node */
2313 	if (!dv) {
2314 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2315 			rw_exit(&ddv->sdev_contents);
2316 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2317 		}
2318 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2319 		    cred, SDEV_INIT);
2320 		if (!dv) {
2321 			rw_exit(&ddv->sdev_contents);
2322 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2323 			sdev_lookup_failed(ddv, nm, failed_flags);
2324 			*vpp = NULLVP;
2325 			return (ENOENT);
2326 		}
2327 		rw_downgrade(&ddv->sdev_contents);
2328 	}
2329 	ASSERT(dv);
2330 	ASSERT(SDEV_HELD(dv));
2331 
2332 	if (SDEV_IS_NO_NCACHE(dv)) {
2333 		failed_flags |= SLF_NO_NCACHE;
2334 	}
2335 
2336 	if (SDEV_IS_GLOBAL(ddv)) {
2337 		map = sdev_get_map(ddv, 1);
2338 		dirops = map ? map->dir_ops : NULL;
2339 		fn = dirops ? dirops->devnops_lookup : NULL;
2340 	}
2341 
2342 	/*
2343 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2344 	 */
2345 	if ((fn == NULL) && !callback) {
2346 
2347 		if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2348 		    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2349 		    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2350 			ASSERT(SDEV_HELD(dv));
2351 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2352 			goto nolock_notfound;
2353 		}
2354 
2355 		/*
2356 		 * filter out known non-existent devices recorded
2357 		 * during initial reconfiguration boot for which
2358 		 * reconfig should not be done and lookup may
2359 		 * be short-circuited now.
2360 		 */
2361 		if (sdev_lookup_filter(ddv, nm)) {
2362 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2363 			goto nolock_notfound;
2364 		}
2365 
2366 		/* bypassing devfsadm internal nodes */
2367 		if (is_devfsadm_thread(lookup_thread)) {
2368 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2369 			goto nolock_notfound;
2370 		}
2371 
2372 		if (sdev_reconfig_disable) {
2373 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2374 			goto nolock_notfound;
2375 		}
2376 
2377 		error = sdev_call_devfsadmd(ddv, dv, nm);
2378 		if (error == 0) {
2379 			sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2380 			    ddv->sdev_name, nm, curproc->p_user.u_comm));
2381 			if (sdev_reconfig_verbose) {
2382 				cmn_err(CE_CONT,
2383 				    "?lookup of %s/%s by %s: reconfig\n",
2384 				    ddv->sdev_name, nm, curproc->p_user.u_comm);
2385 			}
2386 			retried = 1;
2387 			failed_flags |= SLF_REBUILT;
2388 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2389 			SDEV_SIMPLE_RELE(dv);
2390 			goto tryagain;
2391 		} else {
2392 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2393 			goto nolock_notfound;
2394 		}
2395 	}
2396 
2397 	/*
2398 	 * (b2) Directory Based Name Resolution (DBNR):
2399 	 *	ddv	- parent
2400 	 *	nm	- /dev/(ddv->sdev_name)/nm
2401 	 *
2402 	 *	note: module vnode ops take precedence than the build-in ones
2403 	 */
2404 	if (fn) {
2405 		error = sdev_call_modulelookup(ddv, &dv, nm, fn, cred);
2406 		if (error) {
2407 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2408 			goto notfound;
2409 		} else {
2410 			goto found;
2411 		}
2412 	} else if (callback) {
2413 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2414 		    flags, cred);
2415 		if (error == 0) {
2416 			goto found;
2417 		} else {
2418 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2419 			goto notfound;
2420 		}
2421 	}
2422 	ASSERT(rvp);
2423 
2424 found:
2425 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2426 	ASSERT(dv->sdev_state == SDEV_READY);
2427 	if (vtor) {
2428 		/*
2429 		 * Check validity of returned node
2430 		 */
2431 		switch (vtor(dv)) {
2432 		case SDEV_VTOR_VALID:
2433 			break;
2434 		case SDEV_VTOR_INVALID:
2435 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2436 			sdcmn_err7(("lookup: destroy invalid "
2437 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2438 			goto nolock_notfound;
2439 		case SDEV_VTOR_SKIP:
2440 			sdcmn_err7(("lookup: node not applicable - "
2441 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2442 			rw_exit(&ddv->sdev_contents);
2443 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2444 			SDEV_RELE(dv);
2445 			goto lookup_failed;
2446 		default:
2447 			cmn_err(CE_PANIC,
2448 			    "dev fs: validator failed: %s(%p)\n",
2449 			    dv->sdev_name, (void *)dv);
2450 			break;
2451 			/*NOTREACHED*/
2452 		}
2453 	}
2454 
2455 	if ((SDEVTOV(dv)->v_type == VDIR) && SDEV_IS_GLOBAL(dv)) {
2456 		rw_enter(&dv->sdev_contents, RW_READER);
2457 		(void) sdev_get_map(dv, 1);
2458 		rw_exit(&dv->sdev_contents);
2459 	}
2460 	rw_exit(&ddv->sdev_contents);
2461 	rv = sdev_to_vp(dv, vpp);
2462 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2463 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2464 	    dv->sdev_state, nm, rv));
2465 	return (rv);
2466 
2467 notfound:
2468 	mutex_enter(&dv->sdev_lookup_lock);
2469 	SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2470 	mutex_exit(&dv->sdev_lookup_lock);
2471 nolock_notfound:
2472 	/*
2473 	 * Destroy the node that is created for synchronization purposes.
2474 	 */
2475 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2476 	    nm, dv->sdev_state));
2477 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2478 	if (dv->sdev_state == SDEV_INIT) {
2479 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2480 			rw_exit(&ddv->sdev_contents);
2481 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2482 		}
2483 
2484 		/*
2485 		 * Node state may have changed during the lock
2486 		 * changes. Re-check.
2487 		 */
2488 		if (dv->sdev_state == SDEV_INIT) {
2489 			(void) sdev_dirdelete(ddv, dv);
2490 			rw_exit(&ddv->sdev_contents);
2491 			sdev_lookup_failed(ddv, nm, failed_flags);
2492 			*vpp = NULL;
2493 			return (ENOENT);
2494 		}
2495 	}
2496 
2497 	rw_exit(&ddv->sdev_contents);
2498 	SDEV_RELE(dv);
2499 
2500 lookup_failed:
2501 	sdev_lookup_failed(ddv, nm, failed_flags);
2502 	*vpp = NULL;
2503 	return (ENOENT);
2504 }
2505 
2506 /*
2507  * Given a directory node, mark all nodes beneath as
2508  * STALE, i.e. nodes that don't exist as far as new
2509  * consumers are concerned
2510  */
2511 void
2512 sdev_stale(struct sdev_node *ddv)
2513 {
2514 	struct sdev_node *dv;
2515 	struct vnode *vp;
2516 
2517 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2518 
2519 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2520 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) {
2521 		vp = SDEVTOV(dv);
2522 		if (vp->v_type == VDIR)
2523 			sdev_stale(dv);
2524 
2525 		sdcmn_err9(("sdev_stale: setting stale %s\n",
2526 		    dv->sdev_name));
2527 		dv->sdev_flags |= SDEV_STALE;
2528 	}
2529 	ddv->sdev_flags |= SDEV_BUILD;
2530 	rw_exit(&ddv->sdev_contents);
2531 }
2532 
2533 /*
2534  * Given a directory node, clean out all the nodes beneath.
2535  * If expr is specified, clean node with names matching expr.
2536  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2537  *	so they are excluded from future lookups.
2538  */
2539 int
2540 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2541 {
2542 	int error = 0;
2543 	int busy = 0;
2544 	struct vnode *vp;
2545 	struct sdev_node *dv, *next = NULL;
2546 	int bkstore = 0;
2547 	int len = 0;
2548 	char *bks_name = NULL;
2549 
2550 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2551 
2552 	/*
2553 	 * We try our best to destroy all unused sdev_node's
2554 	 */
2555 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2556 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
2557 		next = SDEV_NEXT_ENTRY(ddv, dv);
2558 		vp = SDEVTOV(dv);
2559 
2560 		if (expr && gmatch(dv->sdev_name, expr) == 0)
2561 			continue;
2562 
2563 		if (vp->v_type == VDIR &&
2564 		    sdev_cleandir(dv, NULL, flags) != 0) {
2565 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2566 			    dv->sdev_name));
2567 			busy++;
2568 			continue;
2569 		}
2570 
2571 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2572 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2573 			    dv->sdev_name));
2574 			busy++;
2575 			continue;
2576 		}
2577 
2578 		/*
2579 		 * at this point, either dv is not held or SDEV_ENFORCE
2580 		 * is specified. In either case, dv needs to be deleted
2581 		 */
2582 		SDEV_HOLD(dv);
2583 
2584 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2585 		if (bkstore && (vp->v_type == VDIR))
2586 			bkstore += 1;
2587 
2588 		if (bkstore) {
2589 			len = strlen(dv->sdev_name) + 1;
2590 			bks_name = kmem_alloc(len, KM_SLEEP);
2591 			bcopy(dv->sdev_name, bks_name, len);
2592 		}
2593 
2594 		error = sdev_dirdelete(ddv, dv);
2595 
2596 		if (error == EBUSY) {
2597 			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2598 			busy++;
2599 		}
2600 
2601 		/* take care the backing store clean up */
2602 		if (bkstore && (error == 0)) {
2603 			ASSERT(bks_name);
2604 			ASSERT(ddv->sdev_attrvp);
2605 
2606 			if (bkstore == 1) {
2607 				error = VOP_REMOVE(ddv->sdev_attrvp,
2608 				    bks_name, kcred, NULL, 0);
2609 			} else if (bkstore == 2) {
2610 				error = VOP_RMDIR(ddv->sdev_attrvp,
2611 				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2612 			}
2613 
2614 			/* do not propagate the backing store errors */
2615 			if (error) {
2616 				sdcmn_err9(("sdev_cleandir: backing store"
2617 				    "not cleaned\n"));
2618 				error = 0;
2619 			}
2620 
2621 			bkstore = 0;
2622 			kmem_free(bks_name, len);
2623 			bks_name = NULL;
2624 			len = 0;
2625 		}
2626 	}
2627 
2628 	ddv->sdev_flags |= SDEV_BUILD;
2629 	rw_exit(&ddv->sdev_contents);
2630 
2631 	if (busy) {
2632 		error = EBUSY;
2633 	}
2634 
2635 	return (error);
2636 }
2637 
2638 /*
2639  * a convenient wrapper for readdir() funcs
2640  */
2641 size_t
2642 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2643 {
2644 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2645 	if (reclen > size)
2646 		return (0);
2647 
2648 	de->d_ino = (ino64_t)ino;
2649 	de->d_off = (off64_t)off + 1;
2650 	de->d_reclen = (ushort_t)reclen;
2651 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2652 	return (reclen);
2653 }
2654 
2655 /*
2656  * sdev_mount service routines
2657  */
2658 int
2659 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2660 {
2661 	int	error;
2662 
2663 	if (uap->datalen != sizeof (*args))
2664 		return (EINVAL);
2665 
2666 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2667 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2668 		    "get user data. error %d\n", error);
2669 		return (EFAULT);
2670 	}
2671 
2672 	return (0);
2673 }
2674 
2675 #ifdef nextdp
2676 #undef nextdp
2677 #endif
2678 #define	nextdp(dp)	((struct dirent64 *) \
2679 			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2680 
2681 /*
2682  * readdir helper func
2683  */
2684 int
2685 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2686     int flags)
2687 {
2688 	struct sdev_node *ddv = VTOSDEV(vp);
2689 	struct sdev_node *dv;
2690 	dirent64_t	*dp;
2691 	ulong_t		outcount = 0;
2692 	size_t		namelen;
2693 	ulong_t		alloc_count;
2694 	void		*outbuf;
2695 	struct iovec	*iovp;
2696 	int		error = 0;
2697 	size_t		reclen;
2698 	offset_t	diroff;
2699 	offset_t	soff;
2700 	int		this_reclen;
2701 	struct devname_nsmap	*map = NULL;
2702 	struct devname_ops	*dirops = NULL;
2703 	int (*fn)(devname_handle_t *, struct cred *) = NULL;
2704 	int (*vtor)(struct sdev_node *) = NULL;
2705 	struct vattr attr;
2706 	timestruc_t now;
2707 
2708 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2709 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2710 
2711 	if (uiop->uio_loffset >= MAXOFF_T) {
2712 		if (eofp)
2713 			*eofp = 1;
2714 		return (0);
2715 	}
2716 
2717 	if (uiop->uio_iovcnt != 1)
2718 		return (EINVAL);
2719 
2720 	if (vp->v_type != VDIR)
2721 		return (ENOTDIR);
2722 
2723 	if (ddv->sdev_flags & SDEV_VTOR) {
2724 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2725 		ASSERT(vtor);
2726 	}
2727 
2728 	if (eofp != NULL)
2729 		*eofp = 0;
2730 
2731 	soff = uiop->uio_loffset;
2732 	iovp = uiop->uio_iov;
2733 	alloc_count = iovp->iov_len;
2734 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2735 	outcount = 0;
2736 
2737 	if (ddv->sdev_state == SDEV_ZOMBIE)
2738 		goto get_cache;
2739 
2740 	if (SDEV_IS_GLOBAL(ddv)) {
2741 		map = sdev_get_map(ddv, 0);
2742 		dirops = map ? map->dir_ops : NULL;
2743 		fn = dirops ? dirops->devnops_readdir : NULL;
2744 
2745 		if (map && map->dir_map) {
2746 			/*
2747 			 * load the name mapping rule database
2748 			 * through invoking devfsadm and symlink
2749 			 * all the entries in the map
2750 			 */
2751 			devname_rdr_result_t rdr_result;
2752 			int do_thread = 0;
2753 
2754 			rw_enter(&map->dir_lock, RW_READER);
2755 			do_thread = map->dir_maploaded ? 0 : 1;
2756 			rw_exit(&map->dir_lock);
2757 
2758 			if (do_thread) {
2759 				mutex_enter(&ddv->sdev_lookup_lock);
2760 				SDEV_BLOCK_OTHERS(ddv, SDEV_READDIR);
2761 				mutex_exit(&ddv->sdev_lookup_lock);
2762 
2763 				sdev_dispatch_to_nsrdr_thread(ddv,
2764 				    map->dir_map, &rdr_result);
2765 			}
2766 		} else if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2767 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2768 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2769 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2770 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2771 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2772 		    !sdev_reconfig_disable) {
2773 			/*
2774 			 * invoking "devfsadm" to do system device reconfig
2775 			 */
2776 			mutex_enter(&ddv->sdev_lookup_lock);
2777 			SDEV_BLOCK_OTHERS(ddv,
2778 			    (SDEV_READDIR|SDEV_LGWAITING));
2779 			mutex_exit(&ddv->sdev_lookup_lock);
2780 
2781 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2782 			    ddv->sdev_path, curproc->p_user.u_comm));
2783 			if (sdev_reconfig_verbose) {
2784 				cmn_err(CE_CONT,
2785 				    "?readdir of %s by %s: reconfig\n",
2786 				    ddv->sdev_path, curproc->p_user.u_comm);
2787 			}
2788 
2789 			sdev_devfsadmd_thread(ddv, NULL, kcred);
2790 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2791 			/*
2792 			 * compensate the "ls" started later than "devfsadm"
2793 			 */
2794 			mutex_enter(&ddv->sdev_lookup_lock);
2795 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2796 			mutex_exit(&ddv->sdev_lookup_lock);
2797 		}
2798 
2799 		/*
2800 		 * release the contents lock so that
2801 		 * the cache may be updated by devfsadmd
2802 		 */
2803 		rw_exit(&ddv->sdev_contents);
2804 		mutex_enter(&ddv->sdev_lookup_lock);
2805 		if (SDEV_IS_READDIR(ddv))
2806 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2807 		mutex_exit(&ddv->sdev_lookup_lock);
2808 		rw_enter(&ddv->sdev_contents, RW_READER);
2809 
2810 		sdcmn_err4(("readdir of directory %s by %s\n",
2811 		    ddv->sdev_name, curproc->p_user.u_comm));
2812 		if (ddv->sdev_flags & SDEV_BUILD) {
2813 			if (SDEV_IS_PERSIST(ddv)) {
2814 				error = sdev_filldir_from_store(ddv,
2815 				    alloc_count, cred);
2816 			}
2817 			ddv->sdev_flags &= ~SDEV_BUILD;
2818 		}
2819 	}
2820 
2821 get_cache:
2822 	/* handle "." and ".." */
2823 	diroff = 0;
2824 	if (soff == 0) {
2825 		/* first time */
2826 		this_reclen = DIRENT64_RECLEN(1);
2827 		if (alloc_count < this_reclen) {
2828 			error = EINVAL;
2829 			goto done;
2830 		}
2831 
2832 		dp->d_ino = (ino64_t)ddv->sdev_ino;
2833 		dp->d_off = (off64_t)1;
2834 		dp->d_reclen = (ushort_t)this_reclen;
2835 
2836 		(void) strncpy(dp->d_name, ".",
2837 		    DIRENT64_NAMELEN(this_reclen));
2838 		outcount += dp->d_reclen;
2839 		dp = nextdp(dp);
2840 	}
2841 
2842 	diroff++;
2843 	if (soff <= 1) {
2844 		this_reclen = DIRENT64_RECLEN(2);
2845 		if (alloc_count < outcount + this_reclen) {
2846 			error = EINVAL;
2847 			goto done;
2848 		}
2849 
2850 		dp->d_reclen = (ushort_t)this_reclen;
2851 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2852 		dp->d_off = (off64_t)2;
2853 
2854 		(void) strncpy(dp->d_name, "..",
2855 		    DIRENT64_NAMELEN(this_reclen));
2856 		outcount += dp->d_reclen;
2857 
2858 		dp = nextdp(dp);
2859 	}
2860 
2861 
2862 	/* gets the cache */
2863 	diroff++;
2864 	for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2865 	    dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2866 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2867 		    diroff, soff, dv->sdev_name));
2868 
2869 		/* bypassing pre-matured nodes */
2870 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2871 			sdcmn_err3(("sdev_readdir: pre-mature node  "
2872 			    "%s\n", dv->sdev_name));
2873 			continue;
2874 		}
2875 
2876 		/* don't list stale nodes */
2877 		if (dv->sdev_flags & SDEV_STALE) {
2878 			sdcmn_err4(("sdev_readdir: STALE node  "
2879 			    "%s\n", dv->sdev_name));
2880 			continue;
2881 		}
2882 
2883 		/*
2884 		 * Check validity of node
2885 		 */
2886 		if (vtor) {
2887 			switch (vtor(dv)) {
2888 			case SDEV_VTOR_VALID:
2889 				break;
2890 			case SDEV_VTOR_INVALID:
2891 			case SDEV_VTOR_SKIP:
2892 				continue;
2893 			default:
2894 				cmn_err(CE_PANIC,
2895 				    "dev fs: validator failed: %s(%p)\n",
2896 				    dv->sdev_name, (void *)dv);
2897 				break;
2898 			/*NOTREACHED*/
2899 			}
2900 		}
2901 
2902 		/*
2903 		 * call back into the module for the validity/bookkeeping
2904 		 * of this entry
2905 		 */
2906 		if (fn) {
2907 			error = (*fn)(&(dv->sdev_handle), cred);
2908 			if (error) {
2909 				sdcmn_err4(("sdev_readdir: module did not "
2910 				    "validate %s\n", dv->sdev_name));
2911 				continue;
2912 			}
2913 		}
2914 
2915 		namelen = strlen(dv->sdev_name);
2916 		reclen = DIRENT64_RECLEN(namelen);
2917 		if (outcount + reclen > alloc_count) {
2918 			goto full;
2919 		}
2920 		dp->d_reclen = (ushort_t)reclen;
2921 		dp->d_ino = (ino64_t)dv->sdev_ino;
2922 		dp->d_off = (off64_t)diroff + 1;
2923 		(void) strncpy(dp->d_name, dv->sdev_name,
2924 		    DIRENT64_NAMELEN(reclen));
2925 		outcount += reclen;
2926 		dp = nextdp(dp);
2927 	}
2928 
2929 full:
2930 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2931 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2932 	    (void *)dv));
2933 
2934 	if (outcount)
2935 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2936 
2937 	if (!error) {
2938 		uiop->uio_loffset = diroff;
2939 		if (eofp)
2940 			*eofp = dv ? 0 : 1;
2941 	}
2942 
2943 
2944 	if (ddv->sdev_attrvp) {
2945 		gethrestime(&now);
2946 		attr.va_ctime = now;
2947 		attr.va_atime = now;
2948 		attr.va_mask = AT_CTIME|AT_ATIME;
2949 
2950 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2951 	}
2952 done:
2953 	kmem_free(outbuf, alloc_count);
2954 	return (error);
2955 }
2956 
2957 
2958 static int
2959 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2960 {
2961 	vnode_t *vp;
2962 	vnode_t *cvp;
2963 	struct sdev_node *svp;
2964 	char *nm;
2965 	struct pathname pn;
2966 	int error;
2967 	int persisted = 0;
2968 
2969 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2970 		return (error);
2971 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2972 
2973 	vp = rootdir;
2974 	VN_HOLD(vp);
2975 
2976 	while (pn_pathleft(&pn)) {
2977 		ASSERT(vp->v_type == VDIR);
2978 		(void) pn_getcomponent(&pn, nm);
2979 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2980 		    NULL, NULL);
2981 		VN_RELE(vp);
2982 
2983 		if (error)
2984 			break;
2985 
2986 		/* traverse mount points encountered on our journey */
2987 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2988 			VN_RELE(cvp);
2989 			break;
2990 		}
2991 
2992 		/*
2993 		 * Direct the operation to the persisting filesystem
2994 		 * underlying /dev.  Bail if we encounter a
2995 		 * non-persistent dev entity here.
2996 		 */
2997 		if (cvp->v_vfsp->vfs_fstype == devtype) {
2998 
2999 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
3000 				error = ENOENT;
3001 				VN_RELE(cvp);
3002 				break;
3003 			}
3004 
3005 			if (VTOSDEV(cvp) == NULL) {
3006 				error = ENOENT;
3007 				VN_RELE(cvp);
3008 				break;
3009 			}
3010 			svp = VTOSDEV(cvp);
3011 			if ((vp = svp->sdev_attrvp) == NULL) {
3012 				error = ENOENT;
3013 				VN_RELE(cvp);
3014 				break;
3015 			}
3016 			persisted = 1;
3017 			VN_HOLD(vp);
3018 			VN_RELE(cvp);
3019 			cvp = vp;
3020 		}
3021 
3022 		vp = cvp;
3023 		pn_skipslash(&pn);
3024 	}
3025 
3026 	kmem_free(nm, MAXNAMELEN);
3027 	pn_free(&pn);
3028 
3029 	if (error)
3030 		return (error);
3031 
3032 	/*
3033 	 * Only return persisted nodes in the filesystem underlying /dev.
3034 	 */
3035 	if (!persisted) {
3036 		VN_RELE(vp);
3037 		return (ENOENT);
3038 	}
3039 
3040 	*r_vp = vp;
3041 	return (0);
3042 }
3043 
3044 int
3045 sdev_modctl_readdir(const char *dir, char ***dirlistp,
3046 	int *npathsp, int *npathsp_alloc, int checking_empty)
3047 {
3048 	char	**pathlist = NULL;
3049 	char	**newlist = NULL;
3050 	int	npaths = 0;
3051 	int	npaths_alloc = 0;
3052 	dirent64_t *dbuf = NULL;
3053 	int	n;
3054 	char	*s;
3055 	int error;
3056 	vnode_t *vp;
3057 	int eof;
3058 	struct iovec iov;
3059 	struct uio uio;
3060 	struct dirent64 *dp;
3061 	size_t dlen;
3062 	size_t dbuflen;
3063 	int ndirents = 64;
3064 	char *nm;
3065 
3066 	error = sdev_modctl_lookup(dir, &vp);
3067 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
3068 	    dir, curproc->p_user.u_comm,
3069 	    (error == 0) ? "ok" : "failed"));
3070 	if (error)
3071 		return (error);
3072 
3073 	dlen = ndirents * (sizeof (*dbuf));
3074 	dbuf = kmem_alloc(dlen, KM_SLEEP);
3075 
3076 	uio.uio_iov = &iov;
3077 	uio.uio_iovcnt = 1;
3078 	uio.uio_segflg = UIO_SYSSPACE;
3079 	uio.uio_fmode = 0;
3080 	uio.uio_extflg = UIO_COPY_CACHED;
3081 	uio.uio_loffset = 0;
3082 	uio.uio_llimit = MAXOFFSET_T;
3083 
3084 	eof = 0;
3085 	error = 0;
3086 	while (!error && !eof) {
3087 		uio.uio_resid = dlen;
3088 		iov.iov_base = (char *)dbuf;
3089 		iov.iov_len = dlen;
3090 
3091 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3092 		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
3093 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3094 
3095 		dbuflen = dlen - uio.uio_resid;
3096 
3097 		if (error || dbuflen == 0)
3098 			break;
3099 
3100 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
3101 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
3102 
3103 			nm = dp->d_name;
3104 
3105 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
3106 				continue;
3107 			if (npaths == npaths_alloc) {
3108 				npaths_alloc += 64;
3109 				newlist = (char **)
3110 				    kmem_zalloc((npaths_alloc + 1) *
3111 				    sizeof (char *), KM_SLEEP);
3112 				if (pathlist) {
3113 					bcopy(pathlist, newlist,
3114 					    npaths * sizeof (char *));
3115 					kmem_free(pathlist,
3116 					    (npaths + 1) * sizeof (char *));
3117 				}
3118 				pathlist = newlist;
3119 			}
3120 			n = strlen(nm) + 1;
3121 			s = kmem_alloc(n, KM_SLEEP);
3122 			bcopy(nm, s, n);
3123 			pathlist[npaths++] = s;
3124 			sdcmn_err11(("  %s/%s\n", dir, s));
3125 
3126 			/* if checking empty, one entry is as good as many */
3127 			if (checking_empty) {
3128 				eof = 1;
3129 				break;
3130 			}
3131 		}
3132 	}
3133 
3134 exit:
3135 	VN_RELE(vp);
3136 
3137 	if (dbuf)
3138 		kmem_free(dbuf, dlen);
3139 
3140 	if (error)
3141 		return (error);
3142 
3143 	*dirlistp = pathlist;
3144 	*npathsp = npaths;
3145 	*npathsp_alloc = npaths_alloc;
3146 
3147 	return (0);
3148 }
3149 
3150 void
3151 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
3152 {
3153 	int	i, n;
3154 
3155 	for (i = 0; i < npaths; i++) {
3156 		n = strlen(pathlist[i]) + 1;
3157 		kmem_free(pathlist[i], n);
3158 	}
3159 
3160 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
3161 }
3162 
3163 int
3164 sdev_modctl_devexists(const char *path)
3165 {
3166 	vnode_t *vp;
3167 	int error;
3168 
3169 	error = sdev_modctl_lookup(path, &vp);
3170 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
3171 	    path, curproc->p_user.u_comm,
3172 	    (error == 0) ? "ok" : "failed"));
3173 	if (error == 0)
3174 		VN_RELE(vp);
3175 
3176 	return (error);
3177 }
3178 
3179 void
3180 sdev_update_newnsmap(struct devname_nsmap *map, char *module, char *mapname)
3181 {
3182 	rw_enter(&map->dir_lock, RW_WRITER);
3183 	if (module) {
3184 		ASSERT(map->dir_newmodule == NULL);
3185 		map->dir_newmodule = i_ddi_strdup(module, KM_SLEEP);
3186 	}
3187 	if (mapname) {
3188 		ASSERT(map->dir_newmap == NULL);
3189 		map->dir_newmap = i_ddi_strdup(mapname, KM_SLEEP);
3190 	}
3191 
3192 	map->dir_invalid = 1;
3193 	rw_exit(&map->dir_lock);
3194 }
3195 
3196 void
3197 sdev_replace_nsmap(struct devname_nsmap *map, char *module, char *mapname)
3198 {
3199 	char *old_module = NULL;
3200 	char *old_map = NULL;
3201 
3202 	ASSERT(RW_LOCK_HELD(&map->dir_lock));
3203 	if (!rw_tryupgrade(&map->dir_lock)) {
3204 		rw_exit(&map->dir_lock);
3205 		rw_enter(&map->dir_lock, RW_WRITER);
3206 	}
3207 
3208 	old_module = map->dir_module;
3209 	if (module) {
3210 		if (old_module && strcmp(old_module, module) != 0) {
3211 			kmem_free(old_module, strlen(old_module) + 1);
3212 		}
3213 		map->dir_module = module;
3214 		map->dir_newmodule = NULL;
3215 	}
3216 
3217 	old_map = map->dir_map;
3218 	if (mapname) {
3219 		if (old_map && strcmp(old_map, mapname) != 0) {
3220 			kmem_free(old_map, strlen(old_map) + 1);
3221 		}
3222 
3223 		map->dir_map = mapname;
3224 		map->dir_newmap = NULL;
3225 	}
3226 	map->dir_maploaded = 0;
3227 	map->dir_invalid = 0;
3228 	rw_downgrade(&map->dir_lock);
3229 }
3230 
3231 /*
3232  * dir_name should have at least one attribute,
3233  *	dir_module
3234  *	or dir_map
3235  *	or both
3236  * caller holds the devname_nsmaps_lock
3237  */
3238 void
3239 sdev_insert_nsmap(char *dir_name, char *dir_module, char *dir_map)
3240 {
3241 	struct devname_nsmap *map;
3242 	int len = 0;
3243 
3244 	ASSERT(dir_name);
3245 	ASSERT(dir_module || dir_map);
3246 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3247 
3248 	if (map = sdev_get_nsmap_by_dir(dir_name, 1)) {
3249 		sdev_update_newnsmap(map, dir_module, dir_map);
3250 		return;
3251 	}
3252 
3253 	map = (struct devname_nsmap *)kmem_zalloc(sizeof (*map), KM_SLEEP);
3254 	map->dir_name = i_ddi_strdup(dir_name, KM_SLEEP);
3255 	if (dir_module) {
3256 		map->dir_module = i_ddi_strdup(dir_module, KM_SLEEP);
3257 	}
3258 
3259 	if (dir_map) {
3260 		if (dir_map[0] != '/') {
3261 			len = strlen(ETC_DEV_DIR) + strlen(dir_map) + 2;
3262 			map->dir_map = kmem_zalloc(len, KM_SLEEP);
3263 			(void) snprintf(map->dir_map, len, "%s/%s", ETC_DEV_DIR,
3264 			    dir_map);
3265 		} else {
3266 			map->dir_map = i_ddi_strdup(dir_map, KM_SLEEP);
3267 		}
3268 	}
3269 
3270 	map->dir_ops = NULL;
3271 	map->dir_maploaded = 0;
3272 	map->dir_invalid = 0;
3273 	rw_init(&map->dir_lock, NULL, RW_DEFAULT, NULL);
3274 
3275 	map->next = devname_nsmaps;
3276 	map->prev = NULL;
3277 	if (devname_nsmaps) {
3278 		devname_nsmaps->prev = map;
3279 	}
3280 	devname_nsmaps = map;
3281 }
3282 
3283 struct devname_nsmap *
3284 sdev_get_nsmap_by_dir(char *dir_path, int locked)
3285 {
3286 	struct devname_nsmap *map = NULL;
3287 
3288 	if (!locked)
3289 		mutex_enter(&devname_nsmaps_lock);
3290 	for (map = devname_nsmaps; map; map = map->next) {
3291 		sdcmn_err6(("sdev_get_nsmap_by_dir: dir %s\n", map->dir_name));
3292 		if (strcmp(map->dir_name, dir_path) == 0) {
3293 			if (!locked)
3294 				mutex_exit(&devname_nsmaps_lock);
3295 			return (map);
3296 		}
3297 	}
3298 	if (!locked)
3299 		mutex_exit(&devname_nsmaps_lock);
3300 	return (NULL);
3301 }
3302 
3303 struct devname_nsmap *
3304 sdev_get_nsmap_by_module(char *mod_name)
3305 {
3306 	struct devname_nsmap *map = NULL;
3307 
3308 	mutex_enter(&devname_nsmaps_lock);
3309 	for (map = devname_nsmaps; map; map = map->next) {
3310 		sdcmn_err7(("sdev_get_nsmap_by_module: module %s\n",
3311 		    map->dir_module));
3312 		if (map->dir_module && strcmp(map->dir_module, mod_name) == 0) {
3313 			mutex_exit(&devname_nsmaps_lock);
3314 			return (map);
3315 		}
3316 	}
3317 	mutex_exit(&devname_nsmaps_lock);
3318 	return (NULL);
3319 }
3320 
3321 void
3322 sdev_invalidate_nsmaps()
3323 {
3324 	struct devname_nsmap *map = NULL;
3325 
3326 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3327 
3328 	if (devname_nsmaps == NULL)
3329 		return;
3330 
3331 	for (map = devname_nsmaps; map; map = map->next) {
3332 		rw_enter(&map->dir_lock, RW_WRITER);
3333 		map->dir_invalid = 1;
3334 		rw_exit(&map->dir_lock);
3335 	}
3336 	devname_nsmaps_invalidated = 1;
3337 }
3338 
3339 
3340 int
3341 sdev_nsmaps_loaded()
3342 {
3343 	int ret = 0;
3344 
3345 	mutex_enter(&devname_nsmaps_lock);
3346 	if (devname_nsmaps_loaded)
3347 		ret = 1;
3348 
3349 	mutex_exit(&devname_nsmaps_lock);
3350 	return (ret);
3351 }
3352 
3353 int
3354 sdev_nsmaps_reloaded()
3355 {
3356 	int ret = 0;
3357 
3358 	mutex_enter(&devname_nsmaps_lock);
3359 	if (devname_nsmaps_invalidated)
3360 		ret = 1;
3361 
3362 	mutex_exit(&devname_nsmaps_lock);
3363 	return (ret);
3364 }
3365 
3366 static void
3367 sdev_free_nsmap(struct devname_nsmap *map)
3368 {
3369 	ASSERT(map);
3370 	if (map->dir_name)
3371 		kmem_free(map->dir_name, strlen(map->dir_name) + 1);
3372 	if (map->dir_module)
3373 		kmem_free(map->dir_module, strlen(map->dir_module) + 1);
3374 	if (map->dir_map)
3375 		kmem_free(map->dir_map, strlen(map->dir_map) + 1);
3376 	rw_destroy(&map->dir_lock);
3377 	kmem_free(map, sizeof (*map));
3378 }
3379 
3380 void
3381 sdev_validate_nsmaps()
3382 {
3383 	struct devname_nsmap *map = NULL;
3384 	struct devname_nsmap *oldmap = NULL;
3385 
3386 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3387 	map = devname_nsmaps;
3388 	while (map) {
3389 		rw_enter(&map->dir_lock, RW_READER);
3390 		if ((map->dir_invalid == 1) && (map->dir_newmodule == NULL) &&
3391 		    (map->dir_newmap == NULL)) {
3392 			oldmap = map;
3393 			rw_exit(&map->dir_lock);
3394 			if (map->prev)
3395 				map->prev->next = oldmap->next;
3396 			if (map == devname_nsmaps)
3397 				devname_nsmaps = oldmap->next;
3398 
3399 			map = oldmap->next;
3400 			if (map)
3401 				map->prev = oldmap->prev;
3402 			sdev_free_nsmap(oldmap);
3403 			oldmap = NULL;
3404 		} else {
3405 			rw_exit(&map->dir_lock);
3406 			map = map->next;
3407 		}
3408 	}
3409 	devname_nsmaps_invalidated = 0;
3410 }
3411 
3412 static int
3413 sdev_map_is_invalid(struct devname_nsmap *map)
3414 {
3415 	int ret = 0;
3416 
3417 	ASSERT(map);
3418 	rw_enter(&map->dir_lock, RW_READER);
3419 	if (map->dir_invalid)
3420 		ret = 1;
3421 	rw_exit(&map->dir_lock);
3422 	return (ret);
3423 }
3424 
3425 static int
3426 sdev_check_map(struct devname_nsmap *map)
3427 {
3428 	struct devname_nsmap *mapp;
3429 
3430 	mutex_enter(&devname_nsmaps_lock);
3431 	if (devname_nsmaps == NULL) {
3432 		mutex_exit(&devname_nsmaps_lock);
3433 		return (1);
3434 	}
3435 
3436 	for (mapp = devname_nsmaps; mapp; mapp = mapp->next) {
3437 		if (mapp == map) {
3438 			mutex_exit(&devname_nsmaps_lock);
3439 			return (0);
3440 		}
3441 	}
3442 
3443 	mutex_exit(&devname_nsmaps_lock);
3444 	return (1);
3445 
3446 }
3447 
3448 struct devname_nsmap *
3449 sdev_get_map(struct sdev_node *dv, int validate)
3450 {
3451 	struct devname_nsmap *map;
3452 	int error;
3453 
3454 	ASSERT(RW_READ_HELD(&dv->sdev_contents));
3455 	map = dv->sdev_mapinfo;
3456 	if (map && sdev_check_map(map)) {
3457 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3458 			rw_exit(&dv->sdev_contents);
3459 			rw_enter(&dv->sdev_contents, RW_WRITER);
3460 		}
3461 		dv->sdev_mapinfo = NULL;
3462 		rw_downgrade(&dv->sdev_contents);
3463 		return (NULL);
3464 	}
3465 
3466 	if (validate && (!map || (map && sdev_map_is_invalid(map)))) {
3467 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3468 			rw_exit(&dv->sdev_contents);
3469 			rw_enter(&dv->sdev_contents, RW_WRITER);
3470 		}
3471 		error = sdev_get_moduleops(dv);
3472 		if (!error)
3473 			map = dv->sdev_mapinfo;
3474 		rw_downgrade(&dv->sdev_contents);
3475 	}
3476 	return (map);
3477 }
3478 
3479 extern int sdev_vnodeops_tbl_size;
3480 
3481 /*
3482  * construct a new template with overrides from vtab
3483  */
3484 static fs_operation_def_t *
3485 sdev_merge_vtab(const fs_operation_def_t tab[])
3486 {
3487 	fs_operation_def_t *new;
3488 	const fs_operation_def_t *tab_entry;
3489 
3490 	/* make a copy of standard vnode ops table */
3491 	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
3492 	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
3493 
3494 	/* replace the overrides from tab */
3495 	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
3496 		fs_operation_def_t *std_entry = new;
3497 		while (std_entry->name) {
3498 			if (strcmp(tab_entry->name, std_entry->name) == 0) {
3499 				std_entry->func = tab_entry->func;
3500 				break;
3501 			}
3502 			std_entry++;
3503 		}
3504 		if (std_entry->name == NULL)
3505 			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
3506 			    tab_entry->name);
3507 	}
3508 
3509 	return (new);
3510 }
3511 
3512 /* free memory allocated by sdev_merge_vtab */
3513 static void
3514 sdev_free_vtab(fs_operation_def_t *new)
3515 {
3516 	kmem_free(new, sdev_vnodeops_tbl_size);
3517 }
3518 
3519 void
3520 devname_get_vnode(devname_handle_t *hdl, vnode_t **vpp)
3521 {
3522 	struct sdev_node *dv = hdl->dh_data;
3523 
3524 	ASSERT(dv);
3525 
3526 	rw_enter(&dv->sdev_contents, RW_READER);
3527 	*vpp = SDEVTOV(dv);
3528 	rw_exit(&dv->sdev_contents);
3529 }
3530 
3531 int
3532 devname_get_path(devname_handle_t *hdl, char **path)
3533 {
3534 	struct sdev_node *dv = hdl->dh_data;
3535 
3536 	ASSERT(dv);
3537 
3538 	rw_enter(&dv->sdev_contents, RW_READER);
3539 	*path = dv->sdev_path;
3540 	rw_exit(&dv->sdev_contents);
3541 	return (0);
3542 }
3543 
3544 int
3545 devname_get_name(devname_handle_t *hdl, char **entry)
3546 {
3547 	struct sdev_node *dv = hdl->dh_data;
3548 
3549 	ASSERT(dv);
3550 	rw_enter(&dv->sdev_contents, RW_READER);
3551 	*entry = dv->sdev_name;
3552 	rw_exit(&dv->sdev_contents);
3553 	return (0);
3554 }
3555 
3556 void
3557 devname_get_dir_vnode(devname_handle_t *hdl, vnode_t **vpp)
3558 {
3559 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3560 
3561 	ASSERT(dv);
3562 
3563 	rw_enter(&dv->sdev_contents, RW_READER);
3564 	*vpp = SDEVTOV(dv);
3565 	rw_exit(&dv->sdev_contents);
3566 }
3567 
3568 int
3569 devname_get_dir_path(devname_handle_t *hdl, char **path)
3570 {
3571 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3572 
3573 	ASSERT(dv);
3574 	rw_enter(&dv->sdev_contents, RW_READER);
3575 	*path = dv->sdev_path;
3576 	rw_exit(&dv->sdev_contents);
3577 	return (0);
3578 }
3579 
3580 int
3581 devname_get_dir_name(devname_handle_t *hdl, char **entry)
3582 {
3583 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3584 
3585 	ASSERT(dv);
3586 	rw_enter(&dv->sdev_contents, RW_READER);
3587 	*entry = dv->sdev_name;
3588 	rw_exit(&dv->sdev_contents);
3589 	return (0);
3590 }
3591 
3592 int
3593 devname_get_dir_nsmap(devname_handle_t *hdl, struct devname_nsmap **map)
3594 {
3595 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3596 
3597 	ASSERT(dv);
3598 	rw_enter(&dv->sdev_contents, RW_READER);
3599 	*map = dv->sdev_mapinfo;
3600 	rw_exit(&dv->sdev_contents);
3601 	return (0);
3602 }
3603 
3604 int
3605 devname_get_dir_handle(devname_handle_t *hdl, devname_handle_t **dir_hdl)
3606 {
3607 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3608 
3609 	ASSERT(dv);
3610 	rw_enter(&dv->sdev_contents, RW_READER);
3611 	*dir_hdl = &(dv->sdev_handle);
3612 	rw_exit(&dv->sdev_contents);
3613 	return (0);
3614 }
3615 
3616 void
3617 devname_set_nodetype(devname_handle_t *hdl, void *args, int spec)
3618 {
3619 	struct sdev_node *dv = hdl->dh_data;
3620 
3621 	ASSERT(dv);
3622 	rw_enter(&dv->sdev_contents, RW_WRITER);
3623 	hdl->dh_spec = (devname_spec_t)spec;
3624 	hdl->dh_args = (void *)i_ddi_strdup((char *)args, KM_SLEEP);
3625 	rw_exit(&dv->sdev_contents);
3626 }
3627 
3628 /*
3629  * a generic setattr() function
3630  *
3631  * note: flags only supports AT_UID and AT_GID.
3632  *	 Future enhancements can be done for other types, e.g. AT_MODE
3633  */
3634 int
3635 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3636     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3637     int), int protocol)
3638 {
3639 	struct sdev_node	*dv = VTOSDEV(vp);
3640 	struct sdev_node	*parent = dv->sdev_dotdot;
3641 	struct vattr		*get;
3642 	uint_t			mask = vap->va_mask;
3643 	int 			error;
3644 
3645 	/* some sanity checks */
3646 	if (vap->va_mask & AT_NOSET)
3647 		return (EINVAL);
3648 
3649 	if (vap->va_mask & AT_SIZE) {
3650 		if (vp->v_type == VDIR) {
3651 			return (EISDIR);
3652 		}
3653 	}
3654 
3655 	/* no need to set attribute, but do not fail either */
3656 	ASSERT(parent);
3657 	rw_enter(&parent->sdev_contents, RW_READER);
3658 	if (dv->sdev_state == SDEV_ZOMBIE) {
3659 		rw_exit(&parent->sdev_contents);
3660 		return (0);
3661 	}
3662 
3663 	/* If backing store exists, just set it. */
3664 	if (dv->sdev_attrvp) {
3665 		rw_exit(&parent->sdev_contents);
3666 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3667 	}
3668 
3669 	/*
3670 	 * Otherwise, for nodes with the persistence attribute, create it.
3671 	 */
3672 	ASSERT(dv->sdev_attr);
3673 	if (SDEV_IS_PERSIST(dv) ||
3674 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3675 		sdev_vattr_merge(dv, vap);
3676 		rw_enter(&dv->sdev_contents, RW_WRITER);
3677 		error = sdev_shadow_node(dv, cred);
3678 		rw_exit(&dv->sdev_contents);
3679 		rw_exit(&parent->sdev_contents);
3680 
3681 		if (error)
3682 			return (error);
3683 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3684 	}
3685 
3686 
3687 	/*
3688 	 * sdev_attr was allocated in sdev_mknode
3689 	 */
3690 	rw_enter(&dv->sdev_contents, RW_WRITER);
3691 	error = secpolicy_vnode_setattr(cred, vp, vap,
3692 	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
3693 	if (error) {
3694 		rw_exit(&dv->sdev_contents);
3695 		rw_exit(&parent->sdev_contents);
3696 		return (error);
3697 	}
3698 
3699 	get = dv->sdev_attr;
3700 	if (mask & AT_MODE) {
3701 		get->va_mode &= S_IFMT;
3702 		get->va_mode |= vap->va_mode & ~S_IFMT;
3703 	}
3704 
3705 	if ((mask & AT_UID) || (mask & AT_GID)) {
3706 		if (mask & AT_UID)
3707 			get->va_uid = vap->va_uid;
3708 		if (mask & AT_GID)
3709 			get->va_gid = vap->va_gid;
3710 		/*
3711 		 * a callback must be provided if the protocol is set
3712 		 */
3713 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3714 			ASSERT(callback);
3715 			error = callback(dv, get, protocol);
3716 			if (error) {
3717 				rw_exit(&dv->sdev_contents);
3718 				rw_exit(&parent->sdev_contents);
3719 				return (error);
3720 			}
3721 		}
3722 	}
3723 
3724 	if (mask & AT_ATIME)
3725 		get->va_atime = vap->va_atime;
3726 	if (mask & AT_MTIME)
3727 		get->va_mtime = vap->va_mtime;
3728 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3729 		gethrestime(&get->va_ctime);
3730 	}
3731 
3732 	sdev_vattr_merge(dv, get);
3733 	rw_exit(&dv->sdev_contents);
3734 	rw_exit(&parent->sdev_contents);
3735 	return (0);
3736 }
3737 
3738 /*
3739  * a generic inactive() function
3740  */
3741 void
3742 devname_inactive_func(struct vnode *vp, struct cred *cred,
3743     void (*callback)(struct vnode *))
3744 {
3745 	int clean;
3746 	struct sdev_node *dv = VTOSDEV(vp);
3747 	struct sdev_node *ddv = dv->sdev_dotdot;
3748 	int state;
3749 	struct devname_nsmap *map = NULL;
3750 	struct devname_ops *dirops = NULL;
3751 	void (*fn)(devname_handle_t *, struct cred *) = NULL;
3752 
3753 	rw_enter(&ddv->sdev_contents, RW_WRITER);
3754 	state = dv->sdev_state;
3755 
3756 	mutex_enter(&vp->v_lock);
3757 	ASSERT(vp->v_count >= 1);
3758 
3759 	if (vp->v_count == 1 && callback != NULL)
3760 		callback(vp);
3761 
3762 	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3763 
3764 	/*
3765 	 * last ref count on the ZOMBIE node is released.
3766 	 * clean up the sdev_node, and
3767 	 * release the hold on the backing store node so that
3768 	 * the ZOMBIE backing stores also cleaned out.
3769 	 */
3770 	if (clean) {
3771 		ASSERT(ddv);
3772 		if (SDEV_IS_GLOBAL(dv)) {
3773 			map = ddv->sdev_mapinfo;
3774 			dirops = map ? map->dir_ops : NULL;
3775 			if (dirops && (fn = dirops->devnops_inactive))
3776 				(*fn)(&(dv->sdev_handle), cred);
3777 		}
3778 
3779 		ddv->sdev_nlink--;
3780 		if (vp->v_type == VDIR) {
3781 			dv->sdev_nlink--;
3782 		}
3783 		avl_remove(&ddv->sdev_entries, dv);
3784 		dv->sdev_nlink--;
3785 		--vp->v_count;
3786 		mutex_exit(&vp->v_lock);
3787 		sdev_nodedestroy(dv, 0);
3788 	} else {
3789 		--vp->v_count;
3790 		mutex_exit(&vp->v_lock);
3791 	}
3792 	rw_exit(&ddv->sdev_contents);
3793 }
3794