xref: /titanic_44/usr/src/uts/common/fs/dev/sdev_subr.c (revision ea01bd62c06264135e67699c4e213c6de9313abe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * utility routines for the /dev fs
28  */
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/user.h>
36 #include <sys/time.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/flock.h>
42 #include <sys/kmem.h>
43 #include <sys/uio.h>
44 #include <sys/errno.h>
45 #include <sys/stat.h>
46 #include <sys/cred.h>
47 #include <sys/dirent.h>
48 #include <sys/pathname.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/mode.h>
52 #include <sys/policy.h>
53 #include <fs/fs_subr.h>
54 #include <sys/mount.h>
55 #include <sys/fs/snode.h>
56 #include <sys/fs/dv_node.h>
57 #include <sys/fs/sdev_impl.h>
58 #include <sys/fs/sdev_node.h>
59 #include <sys/sunndi.h>
60 #include <sys/sunmdi.h>
61 #include <sys/conf.h>
62 #include <sys/proc.h>
63 #include <sys/user.h>
64 #include <sys/modctl.h>
65 
66 #ifdef DEBUG
67 int sdev_debug = 0x00000001;
68 int sdev_debug_cache_flags = 0;
69 #endif
70 
71 /*
72  * globals
73  */
74 /* prototype memory vattrs */
75 vattr_t sdev_vattr_dir = {
76 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
77 	VDIR,					/* va_type */
78 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
79 	SDEV_UID_DEFAULT,			/* va_uid */
80 	SDEV_GID_DEFAULT,			/* va_gid */
81 	0,					/* va_fsid */
82 	0,					/* va_nodeid */
83 	0,					/* va_nlink */
84 	0,					/* va_size */
85 	0,					/* va_atime */
86 	0,					/* va_mtime */
87 	0,					/* va_ctime */
88 	0,					/* va_rdev */
89 	0,					/* va_blksize */
90 	0,					/* va_nblocks */
91 	0					/* va_vcode */
92 };
93 
94 vattr_t sdev_vattr_lnk = {
95 	AT_TYPE|AT_MODE,			/* va_mask */
96 	VLNK,					/* va_type */
97 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
98 	SDEV_UID_DEFAULT,			/* va_uid */
99 	SDEV_GID_DEFAULT,			/* va_gid */
100 	0,					/* va_fsid */
101 	0,					/* va_nodeid */
102 	0,					/* va_nlink */
103 	0,					/* va_size */
104 	0,					/* va_atime */
105 	0,					/* va_mtime */
106 	0,					/* va_ctime */
107 	0,					/* va_rdev */
108 	0,					/* va_blksize */
109 	0,					/* va_nblocks */
110 	0					/* va_vcode */
111 };
112 
113 vattr_t sdev_vattr_blk = {
114 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
115 	VBLK,					/* va_type */
116 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
117 	SDEV_UID_DEFAULT,			/* va_uid */
118 	SDEV_GID_DEFAULT,			/* va_gid */
119 	0,					/* va_fsid */
120 	0,					/* va_nodeid */
121 	0,					/* va_nlink */
122 	0,					/* va_size */
123 	0,					/* va_atime */
124 	0,					/* va_mtime */
125 	0,					/* va_ctime */
126 	0,					/* va_rdev */
127 	0,					/* va_blksize */
128 	0,					/* va_nblocks */
129 	0					/* va_vcode */
130 };
131 
132 vattr_t sdev_vattr_chr = {
133 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
134 	VCHR,					/* va_type */
135 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
136 	SDEV_UID_DEFAULT,			/* va_uid */
137 	SDEV_GID_DEFAULT,			/* va_gid */
138 	0,					/* va_fsid */
139 	0,					/* va_nodeid */
140 	0,					/* va_nlink */
141 	0,					/* va_size */
142 	0,					/* va_atime */
143 	0,					/* va_mtime */
144 	0,					/* va_ctime */
145 	0,					/* va_rdev */
146 	0,					/* va_blksize */
147 	0,					/* va_nblocks */
148 	0					/* va_vcode */
149 };
150 
151 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
152 int		devtype;		/* fstype */
153 
154 struct devname_ops *devname_ns_ops;	/* default name service directory ops */
155 kmutex_t devname_nsmaps_lock;	/* protect devname_nsmaps */
156 
157 /* static */
158 static struct devname_nsmap *devname_nsmaps = NULL;
159 				/* contents from /etc/dev/devname_master */
160 static int devname_nsmaps_invalidated = 0; /* "devfsadm -m" has run */
161 
162 static struct vnodeops *sdev_get_vop(struct sdev_node *);
163 static void sdev_set_no_nocache(struct sdev_node *);
164 static int sdev_get_moduleops(struct sdev_node *);
165 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
166 static void sdev_free_vtab(fs_operation_def_t *);
167 
168 static void
169 sdev_prof_free(struct sdev_node *dv)
170 {
171 	ASSERT(!SDEV_IS_GLOBAL(dv));
172 	if (dv->sdev_prof.dev_name)
173 		nvlist_free(dv->sdev_prof.dev_name);
174 	if (dv->sdev_prof.dev_map)
175 		nvlist_free(dv->sdev_prof.dev_map);
176 	if (dv->sdev_prof.dev_symlink)
177 		nvlist_free(dv->sdev_prof.dev_symlink);
178 	if (dv->sdev_prof.dev_glob_incdir)
179 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
180 	if (dv->sdev_prof.dev_glob_excdir)
181 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
182 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
183 }
184 
185 /* sdev_node cache constructor */
186 /*ARGSUSED1*/
187 static int
188 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
189 {
190 	struct sdev_node *dv = (struct sdev_node *)buf;
191 	struct vnode *vp;
192 
193 	bzero(buf, sizeof (struct sdev_node));
194 	vp = dv->sdev_vnode = vn_alloc(flag);
195 	if (vp == NULL) {
196 		return (-1);
197 	}
198 	vp->v_data = dv;
199 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
200 	return (0);
201 }
202 
203 /* sdev_node cache destructor */
204 /*ARGSUSED1*/
205 static void
206 i_sdev_node_dtor(void *buf, void *arg)
207 {
208 	struct sdev_node *dv = (struct sdev_node *)buf;
209 	struct vnode *vp = SDEVTOV(dv);
210 
211 	rw_destroy(&dv->sdev_contents);
212 	vn_free(vp);
213 }
214 
215 /* initialize sdev_node cache */
216 void
217 sdev_node_cache_init()
218 {
219 	int flags = 0;
220 
221 #ifdef	DEBUG
222 	flags = sdev_debug_cache_flags;
223 	if (flags)
224 		sdcmn_err(("cache debug flags 0x%x\n", flags));
225 #endif	/* DEBUG */
226 
227 	ASSERT(sdev_node_cache == NULL);
228 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
229 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
230 	    NULL, NULL, NULL, flags);
231 }
232 
233 /* destroy sdev_node cache */
234 void
235 sdev_node_cache_fini()
236 {
237 	ASSERT(sdev_node_cache != NULL);
238 	kmem_cache_destroy(sdev_node_cache);
239 	sdev_node_cache = NULL;
240 }
241 
242 /*
243  * Compare two nodes lexographically to balance avl tree
244  */
245 static int
246 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
247 {
248 	int rv;
249 	if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
250 		return (0);
251 	return ((rv < 0) ? -1 : 1);
252 }
253 
254 void
255 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
256 {
257 	ASSERT(dv);
258 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
259 	dv->sdev_state = state;
260 }
261 
262 static void
263 sdev_attrinit(struct sdev_node *dv, vattr_t *vap)
264 {
265 	timestruc_t now;
266 
267 	ASSERT(vap);
268 
269 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
270 	*dv->sdev_attr = *vap;
271 
272 	dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode);
273 
274 	gethrestime(&now);
275 	dv->sdev_attr->va_atime = now;
276 	dv->sdev_attr->va_mtime = now;
277 	dv->sdev_attr->va_ctime = now;
278 }
279 
280 /* alloc and initialize a sdev_node */
281 int
282 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
283     vattr_t *vap)
284 {
285 	struct sdev_node *dv = NULL;
286 	struct vnode *vp;
287 	size_t nmlen, len;
288 	devname_handle_t  *dhl;
289 
290 	nmlen = strlen(nm) + 1;
291 	if (nmlen > MAXNAMELEN) {
292 		sdcmn_err9(("sdev_nodeinit: node name %s"
293 		    " too long\n", nm));
294 		*newdv = NULL;
295 		return (ENAMETOOLONG);
296 	}
297 
298 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
299 
300 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
301 	bcopy(nm, dv->sdev_name, nmlen);
302 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
303 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
304 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
305 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
306 	/* overwritten for VLNK nodes */
307 	dv->sdev_symlink = NULL;
308 
309 	vp = SDEVTOV(dv);
310 	vn_reinit(vp);
311 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
312 	if (vap)
313 		vp->v_type = vap->va_type;
314 
315 	/*
316 	 * initialized to the parent's vnodeops.
317 	 * maybe overwriten for a VDIR
318 	 */
319 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
320 	vn_exists(vp);
321 
322 	dv->sdev_dotdot = NULL;
323 	dv->sdev_attrvp = NULL;
324 	if (vap) {
325 		sdev_attrinit(dv, vap);
326 	} else {
327 		dv->sdev_attr = NULL;
328 	}
329 
330 	dv->sdev_ino = sdev_mkino(dv);
331 	dv->sdev_nlink = 0;		/* updated on insert */
332 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
333 	dv->sdev_flags |= SDEV_BUILD;
334 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
335 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
336 	if (SDEV_IS_GLOBAL(ddv)) {
337 		dv->sdev_flags |= SDEV_GLOBAL;
338 		dv->sdev_mapinfo = NULL;
339 		dhl = &(dv->sdev_handle);
340 		dhl->dh_data = dv;
341 		dhl->dh_spec = DEVNAME_NS_NONE;
342 		dhl->dh_args = NULL;
343 		sdev_set_no_nocache(dv);
344 		dv->sdev_gdir_gen = 0;
345 	} else {
346 		dv->sdev_flags &= ~SDEV_GLOBAL;
347 		dv->sdev_origin = NULL; /* set later */
348 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
349 		dv->sdev_ldir_gen = 0;
350 		dv->sdev_devtree_gen = 0;
351 	}
352 
353 	rw_enter(&dv->sdev_contents, RW_WRITER);
354 	sdev_set_nodestate(dv, SDEV_INIT);
355 	rw_exit(&dv->sdev_contents);
356 	*newdv = dv;
357 
358 	return (0);
359 }
360 
361 /*
362  * transition a sdev_node into SDEV_READY state
363  */
364 int
365 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
366     void *args, struct cred *cred)
367 {
368 	int error = 0;
369 	struct vnode *vp = SDEVTOV(dv);
370 	vtype_t type;
371 
372 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
373 
374 	type = vap->va_type;
375 	vp->v_type = type;
376 	vp->v_rdev = vap->va_rdev;
377 	rw_enter(&dv->sdev_contents, RW_WRITER);
378 	if (type == VDIR) {
379 		dv->sdev_nlink = 2;
380 		dv->sdev_flags &= ~SDEV_PERSIST;
381 		dv->sdev_flags &= ~SDEV_DYNAMIC;
382 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
383 		error = sdev_get_moduleops(dv); /* from plug-in module */
384 		ASSERT(dv->sdev_dotdot);
385 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
386 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
387 		avl_create(&dv->sdev_entries,
388 		    (int (*)(const void *, const void *))sdev_compare_nodes,
389 		    sizeof (struct sdev_node),
390 		    offsetof(struct sdev_node, sdev_avllink));
391 	} else if (type == VLNK) {
392 		ASSERT(args);
393 		dv->sdev_nlink = 1;
394 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
395 	} else {
396 		dv->sdev_nlink = 1;
397 	}
398 
399 	if (!(SDEV_IS_GLOBAL(dv))) {
400 		dv->sdev_origin = (struct sdev_node *)args;
401 		dv->sdev_flags &= ~SDEV_PERSIST;
402 	}
403 
404 	/*
405 	 * shadow node is created here OR
406 	 * if failed (indicated by dv->sdev_attrvp == NULL),
407 	 * created later in sdev_setattr
408 	 */
409 	if (avp) {
410 		dv->sdev_attrvp = avp;
411 	} else {
412 		if (dv->sdev_attr == NULL)
413 			sdev_attrinit(dv, vap);
414 		else
415 			*dv->sdev_attr = *vap;
416 
417 		if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) ||
418 		    ((SDEVTOV(dv)->v_type == VDIR) &&
419 		    (dv->sdev_attrvp == NULL))) {
420 			error = sdev_shadow_node(dv, cred);
421 		}
422 	}
423 
424 	if (error == 0) {
425 		/* transition to READY state */
426 		sdev_set_nodestate(dv, SDEV_READY);
427 		sdev_nc_node_exists(dv);
428 	} else {
429 		sdev_set_nodestate(dv, SDEV_ZOMBIE);
430 	}
431 	rw_exit(&dv->sdev_contents);
432 	return (error);
433 }
434 
435 /*
436  * setting ZOMBIE state
437  */
438 static int
439 sdev_nodezombied(struct sdev_node *dv)
440 {
441 	rw_enter(&dv->sdev_contents, RW_WRITER);
442 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
443 	rw_exit(&dv->sdev_contents);
444 	return (0);
445 }
446 
447 /*
448  * Build the VROOT sdev_node.
449  */
450 /*ARGSUSED*/
451 struct sdev_node *
452 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
453     struct vnode *avp, struct cred *cred)
454 {
455 	struct sdev_node *dv;
456 	struct vnode *vp;
457 	char devdir[] = "/dev";
458 
459 	ASSERT(sdev_node_cache != NULL);
460 	ASSERT(avp);
461 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
462 	vp = SDEVTOV(dv);
463 	vn_reinit(vp);
464 	vp->v_flag |= VROOT;
465 	vp->v_vfsp = vfsp;
466 	vp->v_type = VDIR;
467 	vp->v_rdev = devdev;
468 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
469 	vn_exists(vp);
470 
471 	if (vfsp->vfs_mntpt)
472 		dv->sdev_name = i_ddi_strdup(
473 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
474 	else
475 		/* vfs_mountdev1 set mount point later */
476 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
477 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
478 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
479 	dv->sdev_ino = SDEV_ROOTINO;
480 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
481 	dv->sdev_dotdot = dv;		/* .. == self */
482 	dv->sdev_attrvp = avp;
483 	dv->sdev_attr = NULL;
484 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
485 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
486 	if (strcmp(dv->sdev_name, "/dev") == 0) {
487 		mutex_init(&devname_nsmaps_lock, NULL, MUTEX_DEFAULT, NULL);
488 		dv->sdev_mapinfo = NULL;
489 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
490 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
491 		dv->sdev_gdir_gen = 0;
492 	} else {
493 		dv->sdev_flags = SDEV_BUILD;
494 		dv->sdev_flags &= ~SDEV_PERSIST;
495 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
496 		dv->sdev_ldir_gen = 0;
497 		dv->sdev_devtree_gen = 0;
498 	}
499 
500 	avl_create(&dv->sdev_entries,
501 	    (int (*)(const void *, const void *))sdev_compare_nodes,
502 	    sizeof (struct sdev_node),
503 	    offsetof(struct sdev_node, sdev_avllink));
504 
505 	rw_enter(&dv->sdev_contents, RW_WRITER);
506 	sdev_set_nodestate(dv, SDEV_READY);
507 	rw_exit(&dv->sdev_contents);
508 	sdev_nc_node_exists(dv);
509 	return (dv);
510 }
511 
512 /*
513  *  1. load the module
514  *  2. modload invokes sdev_module_register, which in turn sets
515  *     the dv->sdev_mapinfo->dir_ops
516  *
517  * note: locking order:
518  *	dv->sdev_contents -> map->dir_lock
519  */
520 static int
521 sdev_get_moduleops(struct sdev_node *dv)
522 {
523 	int error = 0;
524 	struct devname_nsmap *map = NULL;
525 	char *module;
526 	char *path;
527 	int load = 1;
528 
529 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
530 
531 	if (devname_nsmaps == NULL)
532 		return (0);
533 
534 	if (!sdev_nsmaps_loaded() && !sdev_nsmaps_reloaded())
535 		return (0);
536 
537 
538 	path = dv->sdev_path;
539 	if ((map = sdev_get_nsmap_by_dir(path, 0))) {
540 		rw_enter(&map->dir_lock, RW_READER);
541 		if (map->dir_invalid) {
542 			if (map->dir_module && map->dir_newmodule &&
543 			    (strcmp(map->dir_module,
544 			    map->dir_newmodule) == 0)) {
545 				load = 0;
546 			}
547 			sdev_replace_nsmap(map, map->dir_newmodule,
548 			    map->dir_newmap);
549 		}
550 
551 		module = map->dir_module;
552 		if (module && load) {
553 			sdcmn_err6(("sdev_get_moduleops: "
554 			    "load module %s", module));
555 			rw_exit(&map->dir_lock);
556 			error = modload("devname", module);
557 			sdcmn_err6(("sdev_get_moduleops: error %d\n", error));
558 			if (error < 0) {
559 				return (-1);
560 			}
561 		} else if (module == NULL) {
562 			/*
563 			 * loading the module ops for name services
564 			 */
565 			if (devname_ns_ops == NULL) {
566 				sdcmn_err6((
567 				    "sdev_get_moduleops: modload default\n"));
568 				error = modload("devname", DEVNAME_NSCONFIG);
569 				sdcmn_err6((
570 				    "sdev_get_moduleops: error %d\n", error));
571 				if (error < 0) {
572 					return (-1);
573 				}
574 			}
575 
576 			if (!rw_tryupgrade(&map->dir_lock)) {
577 				rw_exit(&map->dir_lock);
578 				rw_enter(&map->dir_lock, RW_WRITER);
579 			}
580 			ASSERT(devname_ns_ops);
581 			map->dir_ops = devname_ns_ops;
582 			rw_exit(&map->dir_lock);
583 		}
584 	}
585 
586 	dv->sdev_mapinfo = map;
587 	return (0);
588 }
589 
590 /* directory dependent vop table */
591 struct sdev_vop_table {
592 	char *vt_name;				/* subdirectory name */
593 	const fs_operation_def_t *vt_service;	/* vnodeops table */
594 	struct vnodeops *vt_vops;		/* constructed vop */
595 	struct vnodeops **vt_global_vops;	/* global container for vop */
596 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
597 	int vt_flags;
598 };
599 
600 /*
601  * A nice improvement would be to provide a plug-in mechanism
602  * for this table instead of a const table.
603  */
604 static struct sdev_vop_table vtab[] =
605 {
606 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
607 	SDEV_DYNAMIC | SDEV_VTOR },
608 
609 	{ "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
610 	SDEV_DYNAMIC | SDEV_VTOR },
611 
612 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
613 
614 	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
615 	SDEV_DYNAMIC | SDEV_VTOR },
616 
617 	{ NULL, NULL, NULL, NULL, NULL, 0}
618 };
619 
620 
621 /*
622  *  sets a directory's vnodeops if the directory is in the vtab;
623  */
624 static struct vnodeops *
625 sdev_get_vop(struct sdev_node *dv)
626 {
627 	int i;
628 	char *path;
629 
630 	path = dv->sdev_path;
631 	ASSERT(path);
632 
633 	/* gets the relative path to /dev/ */
634 	path += 5;
635 
636 	/* gets the vtab entry if matches */
637 	for (i = 0; vtab[i].vt_name; i++) {
638 		if (strcmp(vtab[i].vt_name, path) != 0)
639 			continue;
640 		dv->sdev_flags |= vtab[i].vt_flags;
641 
642 		if (vtab[i].vt_vops) {
643 			if (vtab[i].vt_global_vops)
644 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
645 			return (vtab[i].vt_vops);
646 		}
647 
648 		if (vtab[i].vt_service) {
649 			fs_operation_def_t *templ;
650 			templ = sdev_merge_vtab(vtab[i].vt_service);
651 			if (vn_make_ops(vtab[i].vt_name,
652 			    (const fs_operation_def_t *)templ,
653 			    &vtab[i].vt_vops) != 0) {
654 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
655 				    vtab[i].vt_name);
656 				/*NOTREACHED*/
657 			}
658 			if (vtab[i].vt_global_vops) {
659 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
660 			}
661 			sdev_free_vtab(templ);
662 			return (vtab[i].vt_vops);
663 		}
664 		return (sdev_vnodeops);
665 	}
666 
667 	/* child inherits the persistence of the parent */
668 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
669 		dv->sdev_flags |= SDEV_PERSIST;
670 
671 	return (sdev_vnodeops);
672 }
673 
674 static void
675 sdev_set_no_nocache(struct sdev_node *dv)
676 {
677 	int i;
678 	char *path;
679 
680 	ASSERT(dv->sdev_path);
681 	path = dv->sdev_path + strlen("/dev/");
682 
683 	for (i = 0; vtab[i].vt_name; i++) {
684 		if (strcmp(vtab[i].vt_name, path) == 0) {
685 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
686 				dv->sdev_flags |= SDEV_NO_NCACHE;
687 			break;
688 		}
689 	}
690 }
691 
692 void *
693 sdev_get_vtor(struct sdev_node *dv)
694 {
695 	int i;
696 
697 	for (i = 0; vtab[i].vt_name; i++) {
698 		if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0)
699 			continue;
700 		return ((void *)vtab[i].vt_vtor);
701 	}
702 	return (NULL);
703 }
704 
705 /*
706  * Build the base root inode
707  */
708 ino_t
709 sdev_mkino(struct sdev_node *dv)
710 {
711 	ino_t	ino;
712 
713 	/*
714 	 * for now, follow the lead of tmpfs here
715 	 * need to someday understand the requirements here
716 	 */
717 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
718 	ino += SDEV_ROOTINO + 1;
719 
720 	return (ino);
721 }
722 
723 static int
724 sdev_getlink(struct vnode *linkvp, char **link)
725 {
726 	int err;
727 	char *buf;
728 	struct uio uio = {0};
729 	struct iovec iov = {0};
730 
731 	if (linkvp == NULL)
732 		return (ENOENT);
733 	ASSERT(linkvp->v_type == VLNK);
734 
735 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
736 	iov.iov_base = buf;
737 	iov.iov_len = MAXPATHLEN;
738 	uio.uio_iov = &iov;
739 	uio.uio_iovcnt = 1;
740 	uio.uio_resid = MAXPATHLEN;
741 	uio.uio_segflg = UIO_SYSSPACE;
742 	uio.uio_llimit = MAXOFFSET_T;
743 
744 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
745 	if (err) {
746 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
747 		kmem_free(buf, MAXPATHLEN);
748 		return (ENOENT);
749 	}
750 
751 	/* mission complete */
752 	*link = i_ddi_strdup(buf, KM_SLEEP);
753 	kmem_free(buf, MAXPATHLEN);
754 	return (0);
755 }
756 
757 /*
758  * A convenient wrapper to get the devfs node vnode for a device
759  * minor functionality: readlink() of a /dev symlink
760  * Place the link into dv->sdev_symlink
761  */
762 static int
763 sdev_follow_link(struct sdev_node *dv)
764 {
765 	int err;
766 	struct vnode *linkvp;
767 	char *link = NULL;
768 
769 	linkvp = SDEVTOV(dv);
770 	if (linkvp == NULL)
771 		return (ENOENT);
772 	ASSERT(linkvp->v_type == VLNK);
773 	err = sdev_getlink(linkvp, &link);
774 	if (err) {
775 		(void) sdev_nodezombied(dv);
776 		dv->sdev_symlink = NULL;
777 		return (ENOENT);
778 	}
779 
780 	ASSERT(link != NULL);
781 	dv->sdev_symlink = link;
782 	return (0);
783 }
784 
785 static int
786 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
787 {
788 	vtype_t otype = SDEVTOV(dv)->v_type;
789 
790 	/*
791 	 * existing sdev_node has a different type.
792 	 */
793 	if (otype != nvap->va_type) {
794 		sdcmn_err9(("sdev_node_check: existing node "
795 		    "  %s type %d does not match new node type %d\n",
796 		    dv->sdev_name, otype, nvap->va_type));
797 		return (EEXIST);
798 	}
799 
800 	/*
801 	 * For a symlink, the target should be the same.
802 	 */
803 	if (otype == VLNK) {
804 		ASSERT(nargs != NULL);
805 		ASSERT(dv->sdev_symlink != NULL);
806 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
807 			sdcmn_err9(("sdev_node_check: existing node "
808 			    " %s has different symlink %s as new node "
809 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
810 			    (char *)nargs));
811 			return (EEXIST);
812 		}
813 	}
814 
815 	return (0);
816 }
817 
818 /*
819  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
820  *
821  * arguments:
822  *	- ddv (parent)
823  *	- nm (child name)
824  *	- newdv (sdev_node for nm is returned here)
825  *	- vap (vattr for the node to be created, va_type should be set.
826  *	- avp (attribute vnode)
827  *	  the defaults should be used if unknown)
828  *	- cred
829  *	- args
830  *	    . tnm (for VLNK)
831  *	    . global sdev_node (for !SDEV_GLOBAL)
832  * 	- state: SDEV_INIT, SDEV_READY
833  *
834  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
835  *
836  * NOTE:  directory contents writers lock needs to be held before
837  *	  calling this routine.
838  */
839 int
840 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
841     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
842     sdev_node_state_t state)
843 {
844 	int error = 0;
845 	sdev_node_state_t node_state;
846 	struct sdev_node *dv = NULL;
847 
848 	ASSERT(state != SDEV_ZOMBIE);
849 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
850 
851 	if (*newdv) {
852 		dv = *newdv;
853 	} else {
854 		/* allocate and initialize a sdev_node */
855 		if (ddv->sdev_state == SDEV_ZOMBIE) {
856 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
857 			    ddv->sdev_path));
858 			return (ENOENT);
859 		}
860 
861 		error = sdev_nodeinit(ddv, nm, &dv, vap);
862 		if (error != 0) {
863 			sdcmn_err9(("sdev_mknode: error %d,"
864 			    " name %s can not be initialized\n",
865 			    error, nm));
866 			return (error);
867 		}
868 		ASSERT(dv);
869 
870 		/* insert into the directory cache */
871 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
872 		if (error) {
873 			sdcmn_err9(("sdev_mknode: node %s can not"
874 			    " be added into directory cache\n", nm));
875 			return (ENOENT);
876 		}
877 	}
878 
879 	ASSERT(dv);
880 	node_state = dv->sdev_state;
881 	ASSERT(node_state != SDEV_ZOMBIE);
882 
883 	if (state == SDEV_READY) {
884 		switch (node_state) {
885 		case SDEV_INIT:
886 			error = sdev_nodeready(dv, vap, avp, args, cred);
887 			if (error) {
888 				sdcmn_err9(("sdev_mknode: node %s can NOT"
889 				    " be transitioned into READY state, "
890 				    "error %d\n", nm, error));
891 			}
892 			break;
893 		case SDEV_READY:
894 			/*
895 			 * Do some sanity checking to make sure
896 			 * the existing sdev_node is what has been
897 			 * asked for.
898 			 */
899 			error = sdev_node_check(dv, vap, args);
900 			break;
901 		default:
902 			break;
903 		}
904 	}
905 
906 	if (!error) {
907 		*newdv = dv;
908 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
909 	} else {
910 		SDEV_SIMPLE_RELE(dv);
911 		*newdv = NULL;
912 	}
913 
914 	return (error);
915 }
916 
917 /*
918  * convenient wrapper to change vp's ATIME, CTIME and MTIME
919  */
920 void
921 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
922 {
923 	struct vattr attr;
924 	timestruc_t now;
925 	int err;
926 
927 	ASSERT(vp);
928 	gethrestime(&now);
929 	if (mask & AT_CTIME)
930 		attr.va_ctime = now;
931 	if (mask & AT_MTIME)
932 		attr.va_mtime = now;
933 	if (mask & AT_ATIME)
934 		attr.va_atime = now;
935 
936 	attr.va_mask = (mask & AT_TIMES);
937 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
938 	if (err && (err != EROFS)) {
939 		sdcmn_err(("update timestamps error %d\n", err));
940 	}
941 }
942 
943 /*
944  * the backing store vnode is released here
945  */
946 /*ARGSUSED1*/
947 void
948 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
949 {
950 	/* no references */
951 	ASSERT(dv->sdev_nlink == 0);
952 
953 	if (dv->sdev_attrvp != NULLVP) {
954 		VN_RELE(dv->sdev_attrvp);
955 		/*
956 		 * reset the attrvp so that no more
957 		 * references can be made on this already
958 		 * vn_rele() vnode
959 		 */
960 		dv->sdev_attrvp = NULLVP;
961 	}
962 
963 	if (dv->sdev_attr != NULL) {
964 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
965 		dv->sdev_attr = NULL;
966 	}
967 
968 	if (dv->sdev_name != NULL) {
969 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
970 		dv->sdev_name = NULL;
971 	}
972 
973 	if (dv->sdev_symlink != NULL) {
974 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
975 		dv->sdev_symlink = NULL;
976 	}
977 
978 	if (dv->sdev_path) {
979 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
980 		dv->sdev_path = NULL;
981 	}
982 
983 	if (!SDEV_IS_GLOBAL(dv))
984 		sdev_prof_free(dv);
985 
986 	if (SDEVTOV(dv)->v_type == VDIR) {
987 		ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
988 		avl_destroy(&dv->sdev_entries);
989 	}
990 
991 	mutex_destroy(&dv->sdev_lookup_lock);
992 	cv_destroy(&dv->sdev_lookup_cv);
993 
994 	/* return node to initial state as per constructor */
995 	(void) memset((void *)&dv->sdev_instance_data, 0,
996 	    sizeof (dv->sdev_instance_data));
997 	vn_invalid(SDEVTOV(dv));
998 	kmem_cache_free(sdev_node_cache, dv);
999 }
1000 
1001 /*
1002  * DIRECTORY CACHE lookup
1003  */
1004 struct sdev_node *
1005 sdev_findbyname(struct sdev_node *ddv, char *nm)
1006 {
1007 	struct sdev_node *dv;
1008 	struct sdev_node dvtmp;
1009 	avl_index_t	where;
1010 
1011 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1012 
1013 	dvtmp.sdev_name = nm;
1014 	dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
1015 	if (dv) {
1016 		ASSERT(dv->sdev_dotdot == ddv);
1017 		ASSERT(strcmp(dv->sdev_name, nm) == 0);
1018 		SDEV_HOLD(dv);
1019 		return (dv);
1020 	}
1021 	return (NULL);
1022 }
1023 
1024 /*
1025  * Inserts a new sdev_node in a parent directory
1026  */
1027 void
1028 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
1029 {
1030 	avl_index_t where;
1031 
1032 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1033 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1034 	ASSERT(ddv->sdev_nlink >= 2);
1035 	ASSERT(dv->sdev_nlink == 0);
1036 
1037 	dv->sdev_dotdot = ddv;
1038 	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1039 	avl_insert(&ddv->sdev_entries, dv, where);
1040 	ddv->sdev_nlink++;
1041 }
1042 
1043 /*
1044  * The following check is needed because while sdev_nodes are linked
1045  * in SDEV_INIT state, they have their link counts incremented only
1046  * in SDEV_READY state.
1047  */
1048 static void
1049 decr_link(struct sdev_node *dv)
1050 {
1051 	if (dv->sdev_state != SDEV_INIT)
1052 		dv->sdev_nlink--;
1053 	else
1054 		ASSERT(dv->sdev_nlink == 0);
1055 }
1056 
1057 /*
1058  * Delete an existing dv from directory cache
1059  *
1060  * In the case of a node is still held by non-zero reference count,
1061  *     the node is put into ZOMBIE state. Once the reference count
1062  *     reaches "0", the node is unlinked and destroyed,
1063  *     in sdev_inactive().
1064  */
1065 static int
1066 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1067 {
1068 	struct vnode *vp;
1069 
1070 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1071 
1072 	vp = SDEVTOV(dv);
1073 	mutex_enter(&vp->v_lock);
1074 
1075 	/* dv is held still */
1076 	if (vp->v_count > 1) {
1077 		rw_enter(&dv->sdev_contents, RW_WRITER);
1078 		if (dv->sdev_state == SDEV_READY) {
1079 			sdcmn_err9((
1080 			    "sdev_delete: node %s busy with count %d\n",
1081 			    dv->sdev_name, vp->v_count));
1082 			dv->sdev_state = SDEV_ZOMBIE;
1083 		}
1084 		rw_exit(&dv->sdev_contents);
1085 		--vp->v_count;
1086 		mutex_exit(&vp->v_lock);
1087 		return (EBUSY);
1088 	}
1089 	ASSERT(vp->v_count == 1);
1090 
1091 	/* unlink from the memory cache */
1092 	ddv->sdev_nlink--;	/* .. to above */
1093 	if (vp->v_type == VDIR) {
1094 		decr_link(dv);		/* . to self */
1095 	}
1096 
1097 	avl_remove(&ddv->sdev_entries, dv);
1098 	decr_link(dv);	/* name, back to zero */
1099 	vp->v_count--;
1100 	mutex_exit(&vp->v_lock);
1101 
1102 	/* destroy the node */
1103 	sdev_nodedestroy(dv, 0);
1104 	return (0);
1105 }
1106 
1107 /*
1108  * check if the source is in the path of the target
1109  *
1110  * source and target are different
1111  */
1112 /*ARGSUSED2*/
1113 static int
1114 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1115 {
1116 	int error = 0;
1117 	struct sdev_node *dotdot, *dir;
1118 
1119 	dotdot = tdv->sdev_dotdot;
1120 	ASSERT(dotdot);
1121 
1122 	/* fs root */
1123 	if (dotdot == tdv) {
1124 		return (0);
1125 	}
1126 
1127 	for (;;) {
1128 		/*
1129 		 * avoid error cases like
1130 		 *	mv a a/b
1131 		 *	mv a a/b/c
1132 		 *	etc.
1133 		 */
1134 		if (dotdot == sdv) {
1135 			error = EINVAL;
1136 			break;
1137 		}
1138 
1139 		dir = dotdot;
1140 		dotdot = dir->sdev_dotdot;
1141 
1142 		/* done checking because root is reached */
1143 		if (dir == dotdot) {
1144 			break;
1145 		}
1146 	}
1147 	return (error);
1148 }
1149 
1150 int
1151 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1152     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1153     struct cred *cred)
1154 {
1155 	int error = 0;
1156 	struct vnode *ovp = SDEVTOV(odv);
1157 	struct vnode *nvp;
1158 	struct vattr vattr;
1159 	int doingdir = (ovp->v_type == VDIR);
1160 	char *link = NULL;
1161 	int samedir = (oddv == nddv) ? 1 : 0;
1162 	int bkstore = 0;
1163 	struct sdev_node *idv = NULL;
1164 	struct sdev_node *ndv = NULL;
1165 	timestruc_t now;
1166 
1167 	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1168 	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1169 	if (error)
1170 		return (error);
1171 
1172 	if (!samedir)
1173 		rw_enter(&oddv->sdev_contents, RW_WRITER);
1174 	rw_enter(&nddv->sdev_contents, RW_WRITER);
1175 
1176 	/*
1177 	 * the source may have been deleted by another thread before
1178 	 * we gets here.
1179 	 */
1180 	if (odv->sdev_state != SDEV_READY) {
1181 		error = ENOENT;
1182 		goto err_out;
1183 	}
1184 
1185 	if (doingdir && (odv == nddv)) {
1186 		error = EINVAL;
1187 		goto err_out;
1188 	}
1189 
1190 	/*
1191 	 * If renaming a directory, and the parents are different (".." must be
1192 	 * changed) then the source dir must not be in the dir hierarchy above
1193 	 * the target since it would orphan everything below the source dir.
1194 	 */
1195 	if (doingdir && (oddv != nddv)) {
1196 		error = sdev_checkpath(odv, nddv, cred);
1197 		if (error)
1198 			goto err_out;
1199 	}
1200 
1201 	/* destination existing */
1202 	if (*ndvp) {
1203 		nvp = SDEVTOV(*ndvp);
1204 		ASSERT(nvp);
1205 
1206 		/* handling renaming to itself */
1207 		if (odv == *ndvp) {
1208 			error = 0;
1209 			goto err_out;
1210 		}
1211 
1212 		if (nvp->v_type == VDIR) {
1213 			if (!doingdir) {
1214 				error = EISDIR;
1215 				goto err_out;
1216 			}
1217 
1218 			if (vn_vfswlock(nvp)) {
1219 				error = EBUSY;
1220 				goto err_out;
1221 			}
1222 
1223 			if (vn_mountedvfs(nvp) != NULL) {
1224 				vn_vfsunlock(nvp);
1225 				error = EBUSY;
1226 				goto err_out;
1227 			}
1228 
1229 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1230 			if ((*ndvp)->sdev_nlink > 2) {
1231 				vn_vfsunlock(nvp);
1232 				error = EEXIST;
1233 				goto err_out;
1234 			}
1235 			vn_vfsunlock(nvp);
1236 
1237 			(void) sdev_dirdelete(nddv, *ndvp);
1238 			*ndvp = NULL;
1239 			ASSERT(nddv->sdev_attrvp);
1240 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1241 			    nddv->sdev_attrvp, cred, NULL, 0);
1242 			if (error)
1243 				goto err_out;
1244 		} else {
1245 			if (doingdir) {
1246 				error = ENOTDIR;
1247 				goto err_out;
1248 			}
1249 
1250 			if (SDEV_IS_PERSIST((*ndvp))) {
1251 				bkstore = 1;
1252 			}
1253 
1254 			/*
1255 			 * get rid of the node from the directory cache
1256 			 * note, in case EBUSY is returned, the ZOMBIE
1257 			 * node is taken care in sdev_mknode.
1258 			 */
1259 			(void) sdev_dirdelete(nddv, *ndvp);
1260 			*ndvp = NULL;
1261 			if (bkstore) {
1262 				ASSERT(nddv->sdev_attrvp);
1263 				error = VOP_REMOVE(nddv->sdev_attrvp,
1264 				    nnm, cred, NULL, 0);
1265 				if (error)
1266 					goto err_out;
1267 			}
1268 		}
1269 	}
1270 
1271 	/* fix the source for a symlink */
1272 	if (vattr.va_type == VLNK) {
1273 		if (odv->sdev_symlink == NULL) {
1274 			error = sdev_follow_link(odv);
1275 			if (error) {
1276 				error = ENOENT;
1277 				goto err_out;
1278 			}
1279 		}
1280 		ASSERT(odv->sdev_symlink);
1281 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1282 	}
1283 
1284 	/*
1285 	 * make a fresh node from the source attrs
1286 	 */
1287 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1288 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1289 	    NULL, (void *)link, cred, SDEV_READY);
1290 
1291 	if (link)
1292 		kmem_free(link, strlen(link) + 1);
1293 
1294 	if (error)
1295 		goto err_out;
1296 	ASSERT(*ndvp);
1297 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1298 
1299 	/* move dir contents */
1300 	if (doingdir) {
1301 		for (idv = SDEV_FIRST_ENTRY(odv); idv;
1302 		    idv = SDEV_NEXT_ENTRY(odv, idv)) {
1303 			error = sdev_rnmnode(odv, idv,
1304 			    (struct sdev_node *)(*ndvp), &ndv,
1305 			    idv->sdev_name, cred);
1306 			if (error)
1307 				goto err_out;
1308 			ndv = NULL;
1309 		}
1310 	}
1311 
1312 	if ((*ndvp)->sdev_attrvp) {
1313 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1314 		    AT_CTIME|AT_ATIME);
1315 	} else {
1316 		ASSERT((*ndvp)->sdev_attr);
1317 		gethrestime(&now);
1318 		(*ndvp)->sdev_attr->va_ctime = now;
1319 		(*ndvp)->sdev_attr->va_atime = now;
1320 	}
1321 
1322 	if (nddv->sdev_attrvp) {
1323 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1324 		    AT_MTIME|AT_ATIME);
1325 	} else {
1326 		ASSERT(nddv->sdev_attr);
1327 		gethrestime(&now);
1328 		nddv->sdev_attr->va_mtime = now;
1329 		nddv->sdev_attr->va_atime = now;
1330 	}
1331 	rw_exit(&nddv->sdev_contents);
1332 	if (!samedir)
1333 		rw_exit(&oddv->sdev_contents);
1334 
1335 	SDEV_RELE(*ndvp);
1336 	return (error);
1337 
1338 err_out:
1339 	rw_exit(&nddv->sdev_contents);
1340 	if (!samedir)
1341 		rw_exit(&oddv->sdev_contents);
1342 	return (error);
1343 }
1344 
1345 /*
1346  * Merge sdev_node specific information into an attribute structure.
1347  *
1348  * note: sdev_node is not locked here
1349  */
1350 void
1351 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1352 {
1353 	struct vnode *vp = SDEVTOV(dv);
1354 
1355 	vap->va_nlink = dv->sdev_nlink;
1356 	vap->va_nodeid = dv->sdev_ino;
1357 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1358 	vap->va_type = vp->v_type;
1359 
1360 	if (vp->v_type == VDIR) {
1361 		vap->va_rdev = 0;
1362 		vap->va_fsid = vp->v_rdev;
1363 	} else if (vp->v_type == VLNK) {
1364 		vap->va_rdev = 0;
1365 		vap->va_mode  &= ~S_IFMT;
1366 		vap->va_mode |= S_IFLNK;
1367 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1368 		vap->va_rdev = vp->v_rdev;
1369 		vap->va_mode &= ~S_IFMT;
1370 		if (vap->va_type == VCHR)
1371 			vap->va_mode |= S_IFCHR;
1372 		else
1373 			vap->va_mode |= S_IFBLK;
1374 	} else {
1375 		vap->va_rdev = 0;
1376 	}
1377 }
1378 
1379 static struct vattr *
1380 sdev_getdefault_attr(enum vtype type)
1381 {
1382 	if (type == VDIR)
1383 		return (&sdev_vattr_dir);
1384 	else if (type == VCHR)
1385 		return (&sdev_vattr_chr);
1386 	else if (type == VBLK)
1387 		return (&sdev_vattr_blk);
1388 	else if (type == VLNK)
1389 		return (&sdev_vattr_lnk);
1390 	else
1391 		return (NULL);
1392 }
1393 int
1394 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1395 {
1396 	int rv = 0;
1397 	struct vnode *vp = SDEVTOV(dv);
1398 
1399 	switch (vp->v_type) {
1400 	case VCHR:
1401 	case VBLK:
1402 		/*
1403 		 * If vnode is a device, return special vnode instead
1404 		 * (though it knows all about -us- via sp->s_realvp)
1405 		 */
1406 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1407 		VN_RELE(vp);
1408 		if (*vpp == NULLVP)
1409 			rv = ENOSYS;
1410 		break;
1411 	default:	/* most types are returned as is */
1412 		*vpp = vp;
1413 		break;
1414 	}
1415 	return (rv);
1416 }
1417 
1418 /*
1419  * loopback into sdev_lookup()
1420  */
1421 static struct vnode *
1422 devname_find_by_devpath(char *devpath, struct vattr *vattr)
1423 {
1424 	int error = 0;
1425 	struct vnode *vp;
1426 
1427 	error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULLVPP, &vp);
1428 	if (error) {
1429 		return (NULL);
1430 	}
1431 
1432 	if (vattr)
1433 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1434 	return (vp);
1435 }
1436 
1437 /*
1438  * the junction between devname and devfs
1439  */
1440 static struct vnode *
1441 devname_configure_by_path(char *physpath, struct vattr *vattr)
1442 {
1443 	int error = 0;
1444 	struct vnode *vp;
1445 
1446 	ASSERT(strncmp(physpath, "/devices/", sizeof ("/devices/") - 1)
1447 	    == 0);
1448 
1449 	error = devfs_lookupname(physpath + sizeof ("/devices/") - 1,
1450 	    NULLVPP, &vp);
1451 	if (error != 0) {
1452 		if (error == ENODEV) {
1453 			cmn_err(CE_CONT, "%s: not found (line %d)\n",
1454 			    physpath, __LINE__);
1455 		}
1456 
1457 		return (NULL);
1458 	}
1459 
1460 	if (vattr)
1461 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1462 	return (vp);
1463 }
1464 
1465 /*
1466  * junction between devname and root file system, e.g. ufs
1467  */
1468 int
1469 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1470 {
1471 	struct vnode *rdvp = ddv->sdev_attrvp;
1472 	int rval = 0;
1473 
1474 	ASSERT(rdvp);
1475 
1476 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1477 	    NULL);
1478 	return (rval);
1479 }
1480 
1481 static int
1482 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1483 {
1484 	struct sdev_node *dv = NULL;
1485 	char	*nm;
1486 	struct vnode *dirvp;
1487 	int	error;
1488 	vnode_t	*vp;
1489 	int eof;
1490 	struct iovec iov;
1491 	struct uio uio;
1492 	struct dirent64 *dp;
1493 	dirent64_t *dbuf;
1494 	size_t dbuflen;
1495 	struct vattr vattr;
1496 	char *link = NULL;
1497 
1498 	if (ddv->sdev_attrvp == NULL)
1499 		return (0);
1500 	if (!(ddv->sdev_flags & SDEV_BUILD))
1501 		return (0);
1502 
1503 	dirvp = ddv->sdev_attrvp;
1504 	VN_HOLD(dirvp);
1505 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1506 
1507 	uio.uio_iov = &iov;
1508 	uio.uio_iovcnt = 1;
1509 	uio.uio_segflg = UIO_SYSSPACE;
1510 	uio.uio_fmode = 0;
1511 	uio.uio_extflg = UIO_COPY_CACHED;
1512 	uio.uio_loffset = 0;
1513 	uio.uio_llimit = MAXOFFSET_T;
1514 
1515 	eof = 0;
1516 	error = 0;
1517 	while (!error && !eof) {
1518 		uio.uio_resid = dlen;
1519 		iov.iov_base = (char *)dbuf;
1520 		iov.iov_len = dlen;
1521 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1522 		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1523 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1524 
1525 		dbuflen = dlen - uio.uio_resid;
1526 		if (error || dbuflen == 0)
1527 			break;
1528 
1529 		if (!(ddv->sdev_flags & SDEV_BUILD)) {
1530 			error = 0;
1531 			break;
1532 		}
1533 
1534 		for (dp = dbuf; ((intptr_t)dp <
1535 		    (intptr_t)dbuf + dbuflen);
1536 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1537 			nm = dp->d_name;
1538 
1539 			if (strcmp(nm, ".") == 0 ||
1540 			    strcmp(nm, "..") == 0)
1541 				continue;
1542 
1543 			vp = NULLVP;
1544 			dv = sdev_cache_lookup(ddv, nm);
1545 			if (dv) {
1546 				if (dv->sdev_state != SDEV_ZOMBIE) {
1547 					SDEV_SIMPLE_RELE(dv);
1548 				} else {
1549 					/*
1550 					 * A ZOMBIE node may not have been
1551 					 * cleaned up from the backing store,
1552 					 * bypass this entry in this case,
1553 					 * and clean it up from the directory
1554 					 * cache if this is the last call.
1555 					 */
1556 					(void) sdev_dirdelete(ddv, dv);
1557 				}
1558 				continue;
1559 			}
1560 
1561 			/* refill the cache if not already */
1562 			error = devname_backstore_lookup(ddv, nm, &vp);
1563 			if (error)
1564 				continue;
1565 
1566 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1567 			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1568 			if (error)
1569 				continue;
1570 
1571 			if (vattr.va_type == VLNK) {
1572 				error = sdev_getlink(vp, &link);
1573 				if (error) {
1574 					continue;
1575 				}
1576 				ASSERT(link != NULL);
1577 			}
1578 
1579 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1580 				rw_exit(&ddv->sdev_contents);
1581 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1582 			}
1583 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1584 			    cred, SDEV_READY);
1585 			rw_downgrade(&ddv->sdev_contents);
1586 
1587 			if (link != NULL) {
1588 				kmem_free(link, strlen(link) + 1);
1589 				link = NULL;
1590 			}
1591 
1592 			if (!error) {
1593 				ASSERT(dv);
1594 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1595 				SDEV_SIMPLE_RELE(dv);
1596 			}
1597 			vp = NULL;
1598 			dv = NULL;
1599 		}
1600 	}
1601 
1602 done:
1603 	VN_RELE(dirvp);
1604 	kmem_free(dbuf, dlen);
1605 
1606 	return (error);
1607 }
1608 
1609 void
1610 sdev_filldir_dynamic(struct sdev_node *ddv)
1611 {
1612 	int error;
1613 	int i;
1614 	struct vattr *vap;
1615 	char *nm = NULL;
1616 	struct sdev_node *dv = NULL;
1617 
1618 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1619 	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1620 
1621 	vap = sdev_getdefault_attr(VDIR);
1622 	for (i = 0; vtab[i].vt_name != NULL; i++) {
1623 		nm = vtab[i].vt_name;
1624 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1625 		dv = NULL;
1626 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1627 		    NULL, kcred, SDEV_READY);
1628 		if (error) {
1629 			cmn_err(CE_WARN, "%s/%s: error %d\n",
1630 			    ddv->sdev_name, nm, error);
1631 		} else {
1632 			ASSERT(dv);
1633 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1634 			SDEV_SIMPLE_RELE(dv);
1635 		}
1636 	}
1637 }
1638 
1639 /*
1640  * Creating a backing store entry based on sdev_attr.
1641  * This is called either as part of node creation in a persistent directory
1642  * or from setattr/setsecattr to persist access attributes across reboot.
1643  */
1644 int
1645 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1646 {
1647 	int error = 0;
1648 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1649 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1650 	struct vattr *vap = dv->sdev_attr;
1651 	char *nm = dv->sdev_name;
1652 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1653 
1654 	ASSERT(dv && dv->sdev_name && rdvp);
1655 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1656 
1657 lookup:
1658 	/* try to find it in the backing store */
1659 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1660 	    NULL);
1661 	if (error == 0) {
1662 		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1663 			VN_HOLD(rrvp);
1664 			VN_RELE(*rvp);
1665 			*rvp = rrvp;
1666 		}
1667 
1668 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1669 		dv->sdev_attr = NULL;
1670 		dv->sdev_attrvp = *rvp;
1671 		return (0);
1672 	}
1673 
1674 	/* let's try to persist the node */
1675 	gethrestime(&vap->va_atime);
1676 	vap->va_mtime = vap->va_atime;
1677 	vap->va_ctime = vap->va_atime;
1678 	vap->va_mask |= AT_TYPE|AT_MODE;
1679 	switch (vap->va_type) {
1680 	case VDIR:
1681 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1682 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1683 		    (void *)(*rvp), error));
1684 		break;
1685 	case VCHR:
1686 	case VBLK:
1687 	case VREG:
1688 	case VDOOR:
1689 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1690 		    rvp, cred, 0, NULL, NULL);
1691 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1692 		    (void *)(*rvp), error));
1693 		if (!error)
1694 			VN_RELE(*rvp);
1695 		break;
1696 	case VLNK:
1697 		ASSERT(dv->sdev_symlink);
1698 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1699 		    NULL, 0);
1700 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1701 		    error));
1702 		break;
1703 	default:
1704 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1705 		    "create\n", nm);
1706 		/*NOTREACHED*/
1707 	}
1708 
1709 	/* go back to lookup to factor out spec node and set attrvp */
1710 	if (error == 0)
1711 		goto lookup;
1712 
1713 	sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1714 	return (error);
1715 }
1716 
1717 static int
1718 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1719 {
1720 	int error = 0;
1721 	struct sdev_node *dup = NULL;
1722 
1723 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1724 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1725 		sdev_direnter(ddv, *dv);
1726 	} else {
1727 		if (dup->sdev_state == SDEV_ZOMBIE) {
1728 			error = sdev_dirdelete(ddv, dup);
1729 			/*
1730 			 * The ZOMBIE node is still hanging
1731 			 * around with more than one reference counts.
1732 			 * Fail the new node creation so that
1733 			 * the directory cache won't have
1734 			 * duplicate entries for the same named node
1735 			 */
1736 			if (error == EBUSY) {
1737 				SDEV_SIMPLE_RELE(*dv);
1738 				sdev_nodedestroy(*dv, 0);
1739 				*dv = NULL;
1740 				return (error);
1741 			}
1742 			sdev_direnter(ddv, *dv);
1743 		} else {
1744 			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1745 			SDEV_SIMPLE_RELE(*dv);
1746 			sdev_nodedestroy(*dv, 0);
1747 			*dv = dup;
1748 		}
1749 	}
1750 
1751 	return (0);
1752 }
1753 
1754 static int
1755 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1756 {
1757 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1758 	return (sdev_dirdelete(ddv, *dv));
1759 }
1760 
1761 /*
1762  * update the in-core directory cache
1763  */
1764 int
1765 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1766     sdev_cache_ops_t ops)
1767 {
1768 	int error = 0;
1769 
1770 	ASSERT((SDEV_HELD(*dv)));
1771 
1772 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1773 	switch (ops) {
1774 	case SDEV_CACHE_ADD:
1775 		error = sdev_cache_add(ddv, dv, nm);
1776 		break;
1777 	case SDEV_CACHE_DELETE:
1778 		error = sdev_cache_delete(ddv, dv);
1779 		break;
1780 	default:
1781 		break;
1782 	}
1783 
1784 	return (error);
1785 }
1786 
1787 /*
1788  * retrieve the named entry from the directory cache
1789  */
1790 struct sdev_node *
1791 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1792 {
1793 	struct sdev_node *dv = NULL;
1794 
1795 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1796 	dv = sdev_findbyname(ddv, nm);
1797 
1798 	return (dv);
1799 }
1800 
1801 /*
1802  * Implicit reconfig for nodes constructed by a link generator
1803  * Start devfsadm if needed, or if devfsadm is in progress,
1804  * prepare to block on devfsadm either completing or
1805  * constructing the desired node.  As devfsadmd is global
1806  * in scope, constructing all necessary nodes, we only
1807  * need to initiate it once.
1808  */
1809 static int
1810 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1811 {
1812 	int error = 0;
1813 
1814 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1815 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1816 		    ddv->sdev_name, nm, devfsadm_state));
1817 		mutex_enter(&dv->sdev_lookup_lock);
1818 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1819 		mutex_exit(&dv->sdev_lookup_lock);
1820 		error = 0;
1821 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1822 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1823 		    ddv->sdev_name, nm, devfsadm_state));
1824 
1825 		sdev_devfsadmd_thread(ddv, dv, kcred);
1826 		mutex_enter(&dv->sdev_lookup_lock);
1827 		SDEV_BLOCK_OTHERS(dv,
1828 		    (SDEV_LOOKUP | SDEV_LGWAITING));
1829 		mutex_exit(&dv->sdev_lookup_lock);
1830 		error = 0;
1831 	} else {
1832 		error = -1;
1833 	}
1834 
1835 	return (error);
1836 }
1837 
1838 static int
1839 sdev_call_modulelookup(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1840     int (*fn)(char *, devname_handle_t *, struct cred *), struct cred *cred)
1841 {
1842 	struct vnode *rvp = NULL;
1843 	int error = 0;
1844 	struct vattr *vap;
1845 	devname_spec_t spec;
1846 	devname_handle_t *hdl;
1847 	void *args = NULL;
1848 	struct sdev_node *dv = *dvp;
1849 
1850 	ASSERT(dv && ddv);
1851 	hdl = &(dv->sdev_handle);
1852 	ASSERT(hdl->dh_data == dv);
1853 	mutex_enter(&dv->sdev_lookup_lock);
1854 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1855 	mutex_exit(&dv->sdev_lookup_lock);
1856 	error = (*fn)(nm, hdl, cred);
1857 	if (error) {
1858 		return (error);
1859 	}
1860 
1861 	spec = hdl->dh_spec;
1862 	args = hdl->dh_args;
1863 	ASSERT(args);
1864 
1865 	switch (spec) {
1866 	case DEVNAME_NS_PATH:
1867 		/*
1868 		 * symlink of:
1869 		 *	/dev/dir/nm -> /device/...
1870 		 */
1871 		rvp = devname_configure_by_path((char *)args, NULL);
1872 		break;
1873 	case DEVNAME_NS_DEV:
1874 		/*
1875 		 * symlink of:
1876 		 *	/dev/dir/nm -> /dev/...
1877 		 */
1878 		rvp = devname_find_by_devpath((char *)args, NULL);
1879 		break;
1880 	default:
1881 		if (args)
1882 			kmem_free((char *)args, strlen(args) + 1);
1883 		return (ENOENT);
1884 
1885 	}
1886 
1887 	if (rvp == NULL) {
1888 		if (args)
1889 			kmem_free((char *)args, strlen(args) + 1);
1890 		return (ENOENT);
1891 	} else {
1892 		vap = sdev_getdefault_attr(VLNK);
1893 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1894 		/*
1895 		 * Could sdev_mknode return a different dv_node
1896 		 * once the lock is dropped?
1897 		 */
1898 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1899 			rw_exit(&ddv->sdev_contents);
1900 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1901 		}
1902 		error = sdev_mknode(ddv, nm, &dv, vap, NULL, args, cred,
1903 		    SDEV_READY);
1904 		rw_downgrade(&ddv->sdev_contents);
1905 		if (error) {
1906 			if (args)
1907 				kmem_free((char *)args, strlen(args) + 1);
1908 			return (error);
1909 		} else {
1910 			mutex_enter(&dv->sdev_lookup_lock);
1911 			SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1912 			mutex_exit(&dv->sdev_lookup_lock);
1913 			error = 0;
1914 		}
1915 	}
1916 
1917 	if (args)
1918 		kmem_free((char *)args, strlen(args) + 1);
1919 
1920 	*dvp = dv;
1921 	return (0);
1922 }
1923 
1924 /*
1925  *  Support for specialized device naming construction mechanisms
1926  */
1927 static int
1928 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1929     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1930     void *, char *), int flags, struct cred *cred)
1931 {
1932 	int rv = 0;
1933 	char *physpath = NULL;
1934 	struct vnode *rvp = NULL;
1935 	struct vattr vattr;
1936 	struct vattr *vap;
1937 	struct sdev_node *dv = *dvp;
1938 
1939 	mutex_enter(&dv->sdev_lookup_lock);
1940 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1941 	mutex_exit(&dv->sdev_lookup_lock);
1942 
1943 	/* for non-devfsadm devices */
1944 	if (flags & SDEV_PATH) {
1945 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1946 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1947 		    NULL);
1948 		if (rv) {
1949 			kmem_free(physpath, MAXPATHLEN);
1950 			return (-1);
1951 		}
1952 
1953 		rvp = devname_configure_by_path(physpath, NULL);
1954 		if (rvp == NULL) {
1955 			sdcmn_err3(("devname_configure_by_path: "
1956 			    "failed for /dev/%s/%s\n",
1957 			    ddv->sdev_name, nm));
1958 			kmem_free(physpath, MAXPATHLEN);
1959 			rv = -1;
1960 		} else {
1961 			vap = sdev_getdefault_attr(VLNK);
1962 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1963 
1964 			/*
1965 			 * Sdev_mknode may return back a different sdev_node
1966 			 * that was created by another thread that
1967 			 * raced to the directroy cache before this thread.
1968 			 *
1969 			 * With current directory cache mechanism
1970 			 * (linked list with the sdev_node name as
1971 			 * the entity key), this is a way to make sure
1972 			 * only one entry exists for the same name
1973 			 * in the same directory. The outcome is
1974 			 * the winner wins.
1975 			 */
1976 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1977 				rw_exit(&ddv->sdev_contents);
1978 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1979 			}
1980 			rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1981 			    (void *)physpath, cred, SDEV_READY);
1982 			rw_downgrade(&ddv->sdev_contents);
1983 			kmem_free(physpath, MAXPATHLEN);
1984 			if (rv) {
1985 				return (rv);
1986 			} else {
1987 				mutex_enter(&dv->sdev_lookup_lock);
1988 				SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1989 				mutex_exit(&dv->sdev_lookup_lock);
1990 				return (0);
1991 			}
1992 		}
1993 	} else if (flags & SDEV_VLINK) {
1994 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1995 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1996 		    NULL);
1997 		if (rv) {
1998 			kmem_free(physpath, MAXPATHLEN);
1999 			return (-1);
2000 		}
2001 
2002 		vap = sdev_getdefault_attr(VLNK);
2003 		vap->va_size = strlen(physpath);
2004 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2005 
2006 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2007 			rw_exit(&ddv->sdev_contents);
2008 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2009 		}
2010 		rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
2011 		    (void *)physpath, cred, SDEV_READY);
2012 		rw_downgrade(&ddv->sdev_contents);
2013 		kmem_free(physpath, MAXPATHLEN);
2014 		if (rv)
2015 			return (rv);
2016 
2017 		mutex_enter(&dv->sdev_lookup_lock);
2018 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2019 		mutex_exit(&dv->sdev_lookup_lock);
2020 		return (0);
2021 	} else if (flags & SDEV_VNODE) {
2022 		/*
2023 		 * DBNR has its own way to create the device
2024 		 * and return a backing store vnode in rvp
2025 		 */
2026 		ASSERT(callback);
2027 		rv = callback(ddv, nm, (void *)&rvp, kcred, NULL, NULL);
2028 		if (rv || (rvp == NULL)) {
2029 			sdcmn_err3(("devname_lookup_func: SDEV_VNODE "
2030 			    "callback failed \n"));
2031 			return (-1);
2032 		}
2033 		vap = sdev_getdefault_attr(rvp->v_type);
2034 		if (vap == NULL)
2035 			return (-1);
2036 
2037 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2038 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2039 			rw_exit(&ddv->sdev_contents);
2040 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2041 		}
2042 		rv = sdev_mknode(ddv, nm, &dv, vap, rvp, NULL,
2043 		    cred, SDEV_READY);
2044 		rw_downgrade(&ddv->sdev_contents);
2045 		if (rv)
2046 			return (rv);
2047 
2048 		mutex_enter(&dv->sdev_lookup_lock);
2049 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2050 		mutex_exit(&dv->sdev_lookup_lock);
2051 		return (0);
2052 	} else if (flags & SDEV_VATTR) {
2053 		/*
2054 		 * /dev/pts
2055 		 *
2056 		 * DBNR has its own way to create the device
2057 		 * "0" is returned upon success.
2058 		 *
2059 		 * callback is responsible to set the basic attributes,
2060 		 * e.g. va_type/va_uid/va_gid/
2061 		 *    dev_t if VCHR or VBLK/
2062 		 */
2063 		ASSERT(callback);
2064 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
2065 		if (rv) {
2066 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
2067 			    "callback failed \n"));
2068 			return (-1);
2069 		}
2070 
2071 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2072 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2073 			rw_exit(&ddv->sdev_contents);
2074 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2075 		}
2076 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
2077 		    cred, SDEV_READY);
2078 		rw_downgrade(&ddv->sdev_contents);
2079 
2080 		if (rv)
2081 			return (rv);
2082 
2083 		mutex_enter(&dv->sdev_lookup_lock);
2084 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2085 		mutex_exit(&dv->sdev_lookup_lock);
2086 		return (0);
2087 	} else {
2088 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
2089 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
2090 		    __LINE__));
2091 		rv = -1;
2092 	}
2093 
2094 	*dvp = dv;
2095 	return (rv);
2096 }
2097 
2098 static int
2099 is_devfsadm_thread(char *exec_name)
2100 {
2101 	/*
2102 	 * note: because devfsadmd -> /usr/sbin/devfsadm
2103 	 * it is safe to use "devfsadm" to capture the lookups
2104 	 * from devfsadm and its daemon version.
2105 	 */
2106 	if (strcmp(exec_name, "devfsadm") == 0)
2107 		return (1);
2108 	return (0);
2109 }
2110 
2111 
2112 /*
2113  * Lookup Order:
2114  *	sdev_node cache;
2115  *	backing store (SDEV_PERSIST);
2116  *	DBNR: a. dir_ops implemented in the loadable modules;
2117  *	      b. vnode ops in vtab.
2118  */
2119 int
2120 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
2121     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
2122     struct cred *, void *, char *), int flags)
2123 {
2124 	int rv = 0, nmlen;
2125 	struct vnode *rvp = NULL;
2126 	struct sdev_node *dv = NULL;
2127 	int	retried = 0;
2128 	int	error = 0;
2129 	struct devname_nsmap *map = NULL;
2130 	struct devname_ops *dirops = NULL;
2131 	int (*fn)(char *, devname_handle_t *, struct cred *) = NULL;
2132 	struct vattr vattr;
2133 	char *lookup_thread = curproc->p_user.u_comm;
2134 	int failed_flags = 0;
2135 	int (*vtor)(struct sdev_node *) = NULL;
2136 	int state;
2137 	int parent_state;
2138 	char *link = NULL;
2139 
2140 	if (SDEVTOV(ddv)->v_type != VDIR)
2141 		return (ENOTDIR);
2142 
2143 	/*
2144 	 * Empty name or ., return node itself.
2145 	 */
2146 	nmlen = strlen(nm);
2147 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
2148 		*vpp = SDEVTOV(ddv);
2149 		VN_HOLD(*vpp);
2150 		return (0);
2151 	}
2152 
2153 	/*
2154 	 * .., return the parent directory
2155 	 */
2156 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
2157 		*vpp = SDEVTOV(ddv->sdev_dotdot);
2158 		VN_HOLD(*vpp);
2159 		return (0);
2160 	}
2161 
2162 	rw_enter(&ddv->sdev_contents, RW_READER);
2163 	if (ddv->sdev_flags & SDEV_VTOR) {
2164 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2165 		ASSERT(vtor);
2166 	}
2167 
2168 tryagain:
2169 	/*
2170 	 * (a) directory cache lookup:
2171 	 */
2172 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2173 	parent_state = ddv->sdev_state;
2174 	dv = sdev_cache_lookup(ddv, nm);
2175 	if (dv) {
2176 		state = dv->sdev_state;
2177 		switch (state) {
2178 		case SDEV_INIT:
2179 			if (is_devfsadm_thread(lookup_thread))
2180 				break;
2181 
2182 			/* ZOMBIED parent won't allow node creation */
2183 			if (parent_state == SDEV_ZOMBIE) {
2184 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
2185 				    retried);
2186 				goto nolock_notfound;
2187 			}
2188 
2189 			mutex_enter(&dv->sdev_lookup_lock);
2190 			/* compensate the threads started after devfsadm */
2191 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2192 			    !(SDEV_IS_LOOKUP(dv)))
2193 				SDEV_BLOCK_OTHERS(dv,
2194 				    (SDEV_LOOKUP | SDEV_LGWAITING));
2195 
2196 			if (SDEV_IS_LOOKUP(dv)) {
2197 				failed_flags |= SLF_REBUILT;
2198 				rw_exit(&ddv->sdev_contents);
2199 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
2200 				mutex_exit(&dv->sdev_lookup_lock);
2201 				rw_enter(&ddv->sdev_contents, RW_READER);
2202 
2203 				if (error != 0) {
2204 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2205 					    retried);
2206 					goto nolock_notfound;
2207 				}
2208 
2209 				state = dv->sdev_state;
2210 				if (state == SDEV_INIT) {
2211 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2212 					    retried);
2213 					goto nolock_notfound;
2214 				} else if (state == SDEV_READY) {
2215 					goto found;
2216 				} else if (state == SDEV_ZOMBIE) {
2217 					rw_exit(&ddv->sdev_contents);
2218 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2219 					    retried);
2220 					SDEV_RELE(dv);
2221 					goto lookup_failed;
2222 				}
2223 			} else {
2224 				mutex_exit(&dv->sdev_lookup_lock);
2225 			}
2226 			break;
2227 		case SDEV_READY:
2228 			goto found;
2229 		case SDEV_ZOMBIE:
2230 			rw_exit(&ddv->sdev_contents);
2231 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2232 			SDEV_RELE(dv);
2233 			goto lookup_failed;
2234 		default:
2235 			rw_exit(&ddv->sdev_contents);
2236 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2237 			sdev_lookup_failed(ddv, nm, failed_flags);
2238 			*vpp = NULLVP;
2239 			return (ENOENT);
2240 		}
2241 	}
2242 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2243 
2244 	/*
2245 	 * ZOMBIED parent does not allow new node creation.
2246 	 * bail out early
2247 	 */
2248 	if (parent_state == SDEV_ZOMBIE) {
2249 		rw_exit(&ddv->sdev_contents);
2250 		*vpp = NULL;
2251 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2252 		return (ENOENT);
2253 	}
2254 
2255 	/*
2256 	 * (b0): backing store lookup
2257 	 *	SDEV_PERSIST is default except:
2258 	 *		1) pts nodes
2259 	 *		2) non-chmod'ed local nodes
2260 	 */
2261 	if (SDEV_IS_PERSIST(ddv)) {
2262 		error = devname_backstore_lookup(ddv, nm, &rvp);
2263 
2264 		if (!error) {
2265 			sdcmn_err3(("devname_backstore_lookup: "
2266 			    "found attrvp %p for %s\n", (void *)rvp, nm));
2267 
2268 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
2269 			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2270 			if (error) {
2271 				rw_exit(&ddv->sdev_contents);
2272 				if (dv)
2273 					SDEV_RELE(dv);
2274 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2275 				sdev_lookup_failed(ddv, nm, failed_flags);
2276 				*vpp = NULLVP;
2277 				return (ENOENT);
2278 			}
2279 
2280 			if (vattr.va_type == VLNK) {
2281 				error = sdev_getlink(rvp, &link);
2282 				if (error) {
2283 					rw_exit(&ddv->sdev_contents);
2284 					if (dv)
2285 						SDEV_RELE(dv);
2286 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2287 					    retried);
2288 					sdev_lookup_failed(ddv, nm,
2289 					    failed_flags);
2290 					*vpp = NULLVP;
2291 					return (ENOENT);
2292 				}
2293 				ASSERT(link != NULL);
2294 			}
2295 
2296 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
2297 				rw_exit(&ddv->sdev_contents);
2298 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2299 			}
2300 			error = sdev_mknode(ddv, nm, &dv, &vattr,
2301 			    rvp, link, cred, SDEV_READY);
2302 			rw_downgrade(&ddv->sdev_contents);
2303 
2304 			if (link != NULL) {
2305 				kmem_free(link, strlen(link) + 1);
2306 				link = NULL;
2307 			}
2308 
2309 			if (error) {
2310 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2311 				rw_exit(&ddv->sdev_contents);
2312 				if (dv)
2313 					SDEV_RELE(dv);
2314 				goto lookup_failed;
2315 			} else {
2316 				goto found;
2317 			}
2318 		} else if (retried) {
2319 			rw_exit(&ddv->sdev_contents);
2320 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2321 			    ddv->sdev_name, nm));
2322 			if (dv)
2323 				SDEV_RELE(dv);
2324 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2325 			sdev_lookup_failed(ddv, nm, failed_flags);
2326 			*vpp = NULLVP;
2327 			return (ENOENT);
2328 		}
2329 	}
2330 
2331 
2332 	/* first thread that is doing the lookup on this node */
2333 	if (!dv) {
2334 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2335 			rw_exit(&ddv->sdev_contents);
2336 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2337 		}
2338 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2339 		    cred, SDEV_INIT);
2340 		if (!dv) {
2341 			rw_exit(&ddv->sdev_contents);
2342 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2343 			sdev_lookup_failed(ddv, nm, failed_flags);
2344 			*vpp = NULLVP;
2345 			return (ENOENT);
2346 		}
2347 		rw_downgrade(&ddv->sdev_contents);
2348 	}
2349 	ASSERT(dv);
2350 	ASSERT(SDEV_HELD(dv));
2351 
2352 	if (SDEV_IS_NO_NCACHE(dv)) {
2353 		failed_flags |= SLF_NO_NCACHE;
2354 	}
2355 
2356 	if (SDEV_IS_GLOBAL(ddv)) {
2357 		map = sdev_get_map(ddv, 1);
2358 		dirops = map ? map->dir_ops : NULL;
2359 		fn = dirops ? dirops->devnops_lookup : NULL;
2360 	}
2361 
2362 	/*
2363 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2364 	 */
2365 	if ((fn == NULL) && !callback) {
2366 
2367 		if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2368 		    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2369 		    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2370 			ASSERT(SDEV_HELD(dv));
2371 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2372 			goto nolock_notfound;
2373 		}
2374 
2375 		/*
2376 		 * filter out known non-existent devices recorded
2377 		 * during initial reconfiguration boot for which
2378 		 * reconfig should not be done and lookup may
2379 		 * be short-circuited now.
2380 		 */
2381 		if (sdev_lookup_filter(ddv, nm)) {
2382 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2383 			goto nolock_notfound;
2384 		}
2385 
2386 		/* bypassing devfsadm internal nodes */
2387 		if (is_devfsadm_thread(lookup_thread)) {
2388 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2389 			goto nolock_notfound;
2390 		}
2391 
2392 		if (sdev_reconfig_disable) {
2393 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2394 			goto nolock_notfound;
2395 		}
2396 
2397 		error = sdev_call_devfsadmd(ddv, dv, nm);
2398 		if (error == 0) {
2399 			sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2400 			    ddv->sdev_name, nm, curproc->p_user.u_comm));
2401 			if (sdev_reconfig_verbose) {
2402 				cmn_err(CE_CONT,
2403 				    "?lookup of %s/%s by %s: reconfig\n",
2404 				    ddv->sdev_name, nm, curproc->p_user.u_comm);
2405 			}
2406 			retried = 1;
2407 			failed_flags |= SLF_REBUILT;
2408 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2409 			SDEV_SIMPLE_RELE(dv);
2410 			goto tryagain;
2411 		} else {
2412 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2413 			goto nolock_notfound;
2414 		}
2415 	}
2416 
2417 	/*
2418 	 * (b2) Directory Based Name Resolution (DBNR):
2419 	 *	ddv	- parent
2420 	 *	nm	- /dev/(ddv->sdev_name)/nm
2421 	 *
2422 	 *	note: module vnode ops take precedence than the build-in ones
2423 	 */
2424 	if (fn) {
2425 		error = sdev_call_modulelookup(ddv, &dv, nm, fn, cred);
2426 		if (error) {
2427 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2428 			goto notfound;
2429 		} else {
2430 			goto found;
2431 		}
2432 	} else if (callback) {
2433 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2434 		    flags, cred);
2435 		if (error == 0) {
2436 			goto found;
2437 		} else {
2438 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2439 			goto notfound;
2440 		}
2441 	}
2442 	ASSERT(rvp);
2443 
2444 found:
2445 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2446 	ASSERT(dv->sdev_state == SDEV_READY);
2447 	if (vtor) {
2448 		/*
2449 		 * Check validity of returned node
2450 		 */
2451 		switch (vtor(dv)) {
2452 		case SDEV_VTOR_VALID:
2453 			break;
2454 		case SDEV_VTOR_INVALID:
2455 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2456 			sdcmn_err7(("lookup: destroy invalid "
2457 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2458 			goto nolock_notfound;
2459 		case SDEV_VTOR_SKIP:
2460 			sdcmn_err7(("lookup: node not applicable - "
2461 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2462 			rw_exit(&ddv->sdev_contents);
2463 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2464 			SDEV_RELE(dv);
2465 			goto lookup_failed;
2466 		default:
2467 			cmn_err(CE_PANIC,
2468 			    "dev fs: validator failed: %s(%p)\n",
2469 			    dv->sdev_name, (void *)dv);
2470 			break;
2471 			/*NOTREACHED*/
2472 		}
2473 	}
2474 
2475 	if ((SDEVTOV(dv)->v_type == VDIR) && SDEV_IS_GLOBAL(dv)) {
2476 		rw_enter(&dv->sdev_contents, RW_READER);
2477 		(void) sdev_get_map(dv, 1);
2478 		rw_exit(&dv->sdev_contents);
2479 	}
2480 	rw_exit(&ddv->sdev_contents);
2481 	rv = sdev_to_vp(dv, vpp);
2482 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2483 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2484 	    dv->sdev_state, nm, rv));
2485 	return (rv);
2486 
2487 notfound:
2488 	mutex_enter(&dv->sdev_lookup_lock);
2489 	SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2490 	mutex_exit(&dv->sdev_lookup_lock);
2491 nolock_notfound:
2492 	/*
2493 	 * Destroy the node that is created for synchronization purposes.
2494 	 */
2495 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2496 	    nm, dv->sdev_state));
2497 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2498 	if (dv->sdev_state == SDEV_INIT) {
2499 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2500 			rw_exit(&ddv->sdev_contents);
2501 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2502 		}
2503 
2504 		/*
2505 		 * Node state may have changed during the lock
2506 		 * changes. Re-check.
2507 		 */
2508 		if (dv->sdev_state == SDEV_INIT) {
2509 			(void) sdev_dirdelete(ddv, dv);
2510 			rw_exit(&ddv->sdev_contents);
2511 			sdev_lookup_failed(ddv, nm, failed_flags);
2512 			*vpp = NULL;
2513 			return (ENOENT);
2514 		}
2515 	}
2516 
2517 	rw_exit(&ddv->sdev_contents);
2518 	SDEV_RELE(dv);
2519 
2520 lookup_failed:
2521 	sdev_lookup_failed(ddv, nm, failed_flags);
2522 	*vpp = NULL;
2523 	return (ENOENT);
2524 }
2525 
2526 /*
2527  * Given a directory node, mark all nodes beneath as
2528  * STALE, i.e. nodes that don't exist as far as new
2529  * consumers are concerned.  Remove them from the
2530  * list of directory entries so that no lookup or
2531  * directory traversal will find them.  The node
2532  * not deallocated so existing holds are not affected.
2533  */
2534 void
2535 sdev_stale(struct sdev_node *ddv)
2536 {
2537 	struct sdev_node *dv;
2538 	struct vnode *vp;
2539 
2540 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2541 
2542 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2543 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) {
2544 		vp = SDEVTOV(dv);
2545 		if (vp->v_type == VDIR)
2546 			sdev_stale(dv);
2547 
2548 		sdcmn_err9(("sdev_stale: setting stale %s\n",
2549 		    dv->sdev_path));
2550 		dv->sdev_flags |= SDEV_STALE;
2551 		avl_remove(&ddv->sdev_entries, dv);
2552 	}
2553 	ddv->sdev_flags |= SDEV_BUILD;
2554 	rw_exit(&ddv->sdev_contents);
2555 }
2556 
2557 /*
2558  * Given a directory node, clean out all the nodes beneath.
2559  * If expr is specified, clean node with names matching expr.
2560  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2561  *	so they are excluded from future lookups.
2562  */
2563 int
2564 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2565 {
2566 	int error = 0;
2567 	int busy = 0;
2568 	struct vnode *vp;
2569 	struct sdev_node *dv, *next = NULL;
2570 	int bkstore = 0;
2571 	int len = 0;
2572 	char *bks_name = NULL;
2573 
2574 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2575 
2576 	/*
2577 	 * We try our best to destroy all unused sdev_node's
2578 	 */
2579 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2580 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
2581 		next = SDEV_NEXT_ENTRY(ddv, dv);
2582 		vp = SDEVTOV(dv);
2583 
2584 		if (expr && gmatch(dv->sdev_name, expr) == 0)
2585 			continue;
2586 
2587 		if (vp->v_type == VDIR &&
2588 		    sdev_cleandir(dv, NULL, flags) != 0) {
2589 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2590 			    dv->sdev_name));
2591 			busy++;
2592 			continue;
2593 		}
2594 
2595 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2596 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2597 			    dv->sdev_name));
2598 			busy++;
2599 			continue;
2600 		}
2601 
2602 		/*
2603 		 * at this point, either dv is not held or SDEV_ENFORCE
2604 		 * is specified. In either case, dv needs to be deleted
2605 		 */
2606 		SDEV_HOLD(dv);
2607 
2608 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2609 		if (bkstore && (vp->v_type == VDIR))
2610 			bkstore += 1;
2611 
2612 		if (bkstore) {
2613 			len = strlen(dv->sdev_name) + 1;
2614 			bks_name = kmem_alloc(len, KM_SLEEP);
2615 			bcopy(dv->sdev_name, bks_name, len);
2616 		}
2617 
2618 		error = sdev_dirdelete(ddv, dv);
2619 
2620 		if (error == EBUSY) {
2621 			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2622 			busy++;
2623 		}
2624 
2625 		/* take care the backing store clean up */
2626 		if (bkstore && (error == 0)) {
2627 			ASSERT(bks_name);
2628 			ASSERT(ddv->sdev_attrvp);
2629 
2630 			if (bkstore == 1) {
2631 				error = VOP_REMOVE(ddv->sdev_attrvp,
2632 				    bks_name, kcred, NULL, 0);
2633 			} else if (bkstore == 2) {
2634 				error = VOP_RMDIR(ddv->sdev_attrvp,
2635 				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2636 			}
2637 
2638 			/* do not propagate the backing store errors */
2639 			if (error) {
2640 				sdcmn_err9(("sdev_cleandir: backing store"
2641 				    "not cleaned\n"));
2642 				error = 0;
2643 			}
2644 
2645 			bkstore = 0;
2646 			kmem_free(bks_name, len);
2647 			bks_name = NULL;
2648 			len = 0;
2649 		}
2650 	}
2651 
2652 	ddv->sdev_flags |= SDEV_BUILD;
2653 	rw_exit(&ddv->sdev_contents);
2654 
2655 	if (busy) {
2656 		error = EBUSY;
2657 	}
2658 
2659 	return (error);
2660 }
2661 
2662 /*
2663  * a convenient wrapper for readdir() funcs
2664  */
2665 size_t
2666 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2667 {
2668 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2669 	if (reclen > size)
2670 		return (0);
2671 
2672 	de->d_ino = (ino64_t)ino;
2673 	de->d_off = (off64_t)off + 1;
2674 	de->d_reclen = (ushort_t)reclen;
2675 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2676 	return (reclen);
2677 }
2678 
2679 /*
2680  * sdev_mount service routines
2681  */
2682 int
2683 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2684 {
2685 	int	error;
2686 
2687 	if (uap->datalen != sizeof (*args))
2688 		return (EINVAL);
2689 
2690 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2691 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2692 		    "get user data. error %d\n", error);
2693 		return (EFAULT);
2694 	}
2695 
2696 	return (0);
2697 }
2698 
2699 #ifdef nextdp
2700 #undef nextdp
2701 #endif
2702 #define	nextdp(dp)	((struct dirent64 *) \
2703 			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2704 
2705 /*
2706  * readdir helper func
2707  */
2708 int
2709 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2710     int flags)
2711 {
2712 	struct sdev_node *ddv = VTOSDEV(vp);
2713 	struct sdev_node *dv;
2714 	dirent64_t	*dp;
2715 	ulong_t		outcount = 0;
2716 	size_t		namelen;
2717 	ulong_t		alloc_count;
2718 	void		*outbuf;
2719 	struct iovec	*iovp;
2720 	int		error = 0;
2721 	size_t		reclen;
2722 	offset_t	diroff;
2723 	offset_t	soff;
2724 	int		this_reclen;
2725 	struct devname_nsmap	*map = NULL;
2726 	struct devname_ops	*dirops = NULL;
2727 	int (*fn)(devname_handle_t *, struct cred *) = NULL;
2728 	int (*vtor)(struct sdev_node *) = NULL;
2729 	struct vattr attr;
2730 	timestruc_t now;
2731 
2732 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2733 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2734 
2735 	if (uiop->uio_loffset >= MAXOFF_T) {
2736 		if (eofp)
2737 			*eofp = 1;
2738 		return (0);
2739 	}
2740 
2741 	if (uiop->uio_iovcnt != 1)
2742 		return (EINVAL);
2743 
2744 	if (vp->v_type != VDIR)
2745 		return (ENOTDIR);
2746 
2747 	if (ddv->sdev_flags & SDEV_VTOR) {
2748 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2749 		ASSERT(vtor);
2750 	}
2751 
2752 	if (eofp != NULL)
2753 		*eofp = 0;
2754 
2755 	soff = uiop->uio_loffset;
2756 	iovp = uiop->uio_iov;
2757 	alloc_count = iovp->iov_len;
2758 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2759 	outcount = 0;
2760 
2761 	if (ddv->sdev_state == SDEV_ZOMBIE)
2762 		goto get_cache;
2763 
2764 	if (SDEV_IS_GLOBAL(ddv)) {
2765 		map = sdev_get_map(ddv, 0);
2766 		dirops = map ? map->dir_ops : NULL;
2767 		fn = dirops ? dirops->devnops_readdir : NULL;
2768 
2769 		if (map && map->dir_map) {
2770 			/*
2771 			 * load the name mapping rule database
2772 			 * through invoking devfsadm and symlink
2773 			 * all the entries in the map
2774 			 */
2775 			devname_rdr_result_t rdr_result;
2776 			int do_thread = 0;
2777 
2778 			rw_enter(&map->dir_lock, RW_READER);
2779 			do_thread = map->dir_maploaded ? 0 : 1;
2780 			rw_exit(&map->dir_lock);
2781 
2782 			if (do_thread) {
2783 				mutex_enter(&ddv->sdev_lookup_lock);
2784 				SDEV_BLOCK_OTHERS(ddv, SDEV_READDIR);
2785 				mutex_exit(&ddv->sdev_lookup_lock);
2786 
2787 				sdev_dispatch_to_nsrdr_thread(ddv,
2788 				    map->dir_map, &rdr_result);
2789 			}
2790 		} else if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2791 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2792 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2793 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2794 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2795 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2796 		    !sdev_reconfig_disable) {
2797 			/*
2798 			 * invoking "devfsadm" to do system device reconfig
2799 			 */
2800 			mutex_enter(&ddv->sdev_lookup_lock);
2801 			SDEV_BLOCK_OTHERS(ddv,
2802 			    (SDEV_READDIR|SDEV_LGWAITING));
2803 			mutex_exit(&ddv->sdev_lookup_lock);
2804 
2805 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2806 			    ddv->sdev_path, curproc->p_user.u_comm));
2807 			if (sdev_reconfig_verbose) {
2808 				cmn_err(CE_CONT,
2809 				    "?readdir of %s by %s: reconfig\n",
2810 				    ddv->sdev_path, curproc->p_user.u_comm);
2811 			}
2812 
2813 			sdev_devfsadmd_thread(ddv, NULL, kcred);
2814 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2815 			/*
2816 			 * compensate the "ls" started later than "devfsadm"
2817 			 */
2818 			mutex_enter(&ddv->sdev_lookup_lock);
2819 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2820 			mutex_exit(&ddv->sdev_lookup_lock);
2821 		}
2822 
2823 		/*
2824 		 * release the contents lock so that
2825 		 * the cache may be updated by devfsadmd
2826 		 */
2827 		rw_exit(&ddv->sdev_contents);
2828 		mutex_enter(&ddv->sdev_lookup_lock);
2829 		if (SDEV_IS_READDIR(ddv))
2830 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2831 		mutex_exit(&ddv->sdev_lookup_lock);
2832 		rw_enter(&ddv->sdev_contents, RW_READER);
2833 
2834 		sdcmn_err4(("readdir of directory %s by %s\n",
2835 		    ddv->sdev_name, curproc->p_user.u_comm));
2836 		if (ddv->sdev_flags & SDEV_BUILD) {
2837 			if (SDEV_IS_PERSIST(ddv)) {
2838 				error = sdev_filldir_from_store(ddv,
2839 				    alloc_count, cred);
2840 			}
2841 			ddv->sdev_flags &= ~SDEV_BUILD;
2842 		}
2843 	}
2844 
2845 get_cache:
2846 	/* handle "." and ".." */
2847 	diroff = 0;
2848 	if (soff == 0) {
2849 		/* first time */
2850 		this_reclen = DIRENT64_RECLEN(1);
2851 		if (alloc_count < this_reclen) {
2852 			error = EINVAL;
2853 			goto done;
2854 		}
2855 
2856 		dp->d_ino = (ino64_t)ddv->sdev_ino;
2857 		dp->d_off = (off64_t)1;
2858 		dp->d_reclen = (ushort_t)this_reclen;
2859 
2860 		(void) strncpy(dp->d_name, ".",
2861 		    DIRENT64_NAMELEN(this_reclen));
2862 		outcount += dp->d_reclen;
2863 		dp = nextdp(dp);
2864 	}
2865 
2866 	diroff++;
2867 	if (soff <= 1) {
2868 		this_reclen = DIRENT64_RECLEN(2);
2869 		if (alloc_count < outcount + this_reclen) {
2870 			error = EINVAL;
2871 			goto done;
2872 		}
2873 
2874 		dp->d_reclen = (ushort_t)this_reclen;
2875 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2876 		dp->d_off = (off64_t)2;
2877 
2878 		(void) strncpy(dp->d_name, "..",
2879 		    DIRENT64_NAMELEN(this_reclen));
2880 		outcount += dp->d_reclen;
2881 
2882 		dp = nextdp(dp);
2883 	}
2884 
2885 
2886 	/* gets the cache */
2887 	diroff++;
2888 	for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2889 	    dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2890 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2891 		    diroff, soff, dv->sdev_name));
2892 
2893 		/* bypassing pre-matured nodes */
2894 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2895 			sdcmn_err3(("sdev_readdir: pre-mature node  "
2896 			    "%s\n", dv->sdev_name));
2897 			continue;
2898 		}
2899 
2900 		/*
2901 		 * Check validity of node
2902 		 */
2903 		if (vtor) {
2904 			switch (vtor(dv)) {
2905 			case SDEV_VTOR_VALID:
2906 				break;
2907 			case SDEV_VTOR_INVALID:
2908 			case SDEV_VTOR_SKIP:
2909 				continue;
2910 			default:
2911 				cmn_err(CE_PANIC,
2912 				    "dev fs: validator failed: %s(%p)\n",
2913 				    dv->sdev_name, (void *)dv);
2914 				break;
2915 			/*NOTREACHED*/
2916 			}
2917 		}
2918 
2919 		/*
2920 		 * call back into the module for the validity/bookkeeping
2921 		 * of this entry
2922 		 */
2923 		if (fn) {
2924 			error = (*fn)(&(dv->sdev_handle), cred);
2925 			if (error) {
2926 				sdcmn_err4(("sdev_readdir: module did not "
2927 				    "validate %s\n", dv->sdev_name));
2928 				continue;
2929 			}
2930 		}
2931 
2932 		namelen = strlen(dv->sdev_name);
2933 		reclen = DIRENT64_RECLEN(namelen);
2934 		if (outcount + reclen > alloc_count) {
2935 			goto full;
2936 		}
2937 		dp->d_reclen = (ushort_t)reclen;
2938 		dp->d_ino = (ino64_t)dv->sdev_ino;
2939 		dp->d_off = (off64_t)diroff + 1;
2940 		(void) strncpy(dp->d_name, dv->sdev_name,
2941 		    DIRENT64_NAMELEN(reclen));
2942 		outcount += reclen;
2943 		dp = nextdp(dp);
2944 	}
2945 
2946 full:
2947 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2948 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2949 	    (void *)dv));
2950 
2951 	if (outcount)
2952 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2953 
2954 	if (!error) {
2955 		uiop->uio_loffset = diroff;
2956 		if (eofp)
2957 			*eofp = dv ? 0 : 1;
2958 	}
2959 
2960 
2961 	if (ddv->sdev_attrvp) {
2962 		gethrestime(&now);
2963 		attr.va_ctime = now;
2964 		attr.va_atime = now;
2965 		attr.va_mask = AT_CTIME|AT_ATIME;
2966 
2967 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2968 	}
2969 done:
2970 	kmem_free(outbuf, alloc_count);
2971 	return (error);
2972 }
2973 
2974 
2975 static int
2976 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2977 {
2978 	vnode_t *vp;
2979 	vnode_t *cvp;
2980 	struct sdev_node *svp;
2981 	char *nm;
2982 	struct pathname pn;
2983 	int error;
2984 	int persisted = 0;
2985 
2986 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2987 		return (error);
2988 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2989 
2990 	vp = rootdir;
2991 	VN_HOLD(vp);
2992 
2993 	while (pn_pathleft(&pn)) {
2994 		ASSERT(vp->v_type == VDIR);
2995 		(void) pn_getcomponent(&pn, nm);
2996 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2997 		    NULL, NULL);
2998 		VN_RELE(vp);
2999 
3000 		if (error)
3001 			break;
3002 
3003 		/* traverse mount points encountered on our journey */
3004 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
3005 			VN_RELE(cvp);
3006 			break;
3007 		}
3008 
3009 		/*
3010 		 * Direct the operation to the persisting filesystem
3011 		 * underlying /dev.  Bail if we encounter a
3012 		 * non-persistent dev entity here.
3013 		 */
3014 		if (cvp->v_vfsp->vfs_fstype == devtype) {
3015 
3016 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
3017 				error = ENOENT;
3018 				VN_RELE(cvp);
3019 				break;
3020 			}
3021 
3022 			if (VTOSDEV(cvp) == NULL) {
3023 				error = ENOENT;
3024 				VN_RELE(cvp);
3025 				break;
3026 			}
3027 			svp = VTOSDEV(cvp);
3028 			if ((vp = svp->sdev_attrvp) == NULL) {
3029 				error = ENOENT;
3030 				VN_RELE(cvp);
3031 				break;
3032 			}
3033 			persisted = 1;
3034 			VN_HOLD(vp);
3035 			VN_RELE(cvp);
3036 			cvp = vp;
3037 		}
3038 
3039 		vp = cvp;
3040 		pn_skipslash(&pn);
3041 	}
3042 
3043 	kmem_free(nm, MAXNAMELEN);
3044 	pn_free(&pn);
3045 
3046 	if (error)
3047 		return (error);
3048 
3049 	/*
3050 	 * Only return persisted nodes in the filesystem underlying /dev.
3051 	 */
3052 	if (!persisted) {
3053 		VN_RELE(vp);
3054 		return (ENOENT);
3055 	}
3056 
3057 	*r_vp = vp;
3058 	return (0);
3059 }
3060 
3061 int
3062 sdev_modctl_readdir(const char *dir, char ***dirlistp,
3063 	int *npathsp, int *npathsp_alloc, int checking_empty)
3064 {
3065 	char	**pathlist = NULL;
3066 	char	**newlist = NULL;
3067 	int	npaths = 0;
3068 	int	npaths_alloc = 0;
3069 	dirent64_t *dbuf = NULL;
3070 	int	n;
3071 	char	*s;
3072 	int error;
3073 	vnode_t *vp;
3074 	int eof;
3075 	struct iovec iov;
3076 	struct uio uio;
3077 	struct dirent64 *dp;
3078 	size_t dlen;
3079 	size_t dbuflen;
3080 	int ndirents = 64;
3081 	char *nm;
3082 
3083 	error = sdev_modctl_lookup(dir, &vp);
3084 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
3085 	    dir, curproc->p_user.u_comm,
3086 	    (error == 0) ? "ok" : "failed"));
3087 	if (error)
3088 		return (error);
3089 
3090 	dlen = ndirents * (sizeof (*dbuf));
3091 	dbuf = kmem_alloc(dlen, KM_SLEEP);
3092 
3093 	uio.uio_iov = &iov;
3094 	uio.uio_iovcnt = 1;
3095 	uio.uio_segflg = UIO_SYSSPACE;
3096 	uio.uio_fmode = 0;
3097 	uio.uio_extflg = UIO_COPY_CACHED;
3098 	uio.uio_loffset = 0;
3099 	uio.uio_llimit = MAXOFFSET_T;
3100 
3101 	eof = 0;
3102 	error = 0;
3103 	while (!error && !eof) {
3104 		uio.uio_resid = dlen;
3105 		iov.iov_base = (char *)dbuf;
3106 		iov.iov_len = dlen;
3107 
3108 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3109 		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
3110 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3111 
3112 		dbuflen = dlen - uio.uio_resid;
3113 
3114 		if (error || dbuflen == 0)
3115 			break;
3116 
3117 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
3118 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
3119 
3120 			nm = dp->d_name;
3121 
3122 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
3123 				continue;
3124 			if (npaths == npaths_alloc) {
3125 				npaths_alloc += 64;
3126 				newlist = (char **)
3127 				    kmem_zalloc((npaths_alloc + 1) *
3128 				    sizeof (char *), KM_SLEEP);
3129 				if (pathlist) {
3130 					bcopy(pathlist, newlist,
3131 					    npaths * sizeof (char *));
3132 					kmem_free(pathlist,
3133 					    (npaths + 1) * sizeof (char *));
3134 				}
3135 				pathlist = newlist;
3136 			}
3137 			n = strlen(nm) + 1;
3138 			s = kmem_alloc(n, KM_SLEEP);
3139 			bcopy(nm, s, n);
3140 			pathlist[npaths++] = s;
3141 			sdcmn_err11(("  %s/%s\n", dir, s));
3142 
3143 			/* if checking empty, one entry is as good as many */
3144 			if (checking_empty) {
3145 				eof = 1;
3146 				break;
3147 			}
3148 		}
3149 	}
3150 
3151 exit:
3152 	VN_RELE(vp);
3153 
3154 	if (dbuf)
3155 		kmem_free(dbuf, dlen);
3156 
3157 	if (error)
3158 		return (error);
3159 
3160 	*dirlistp = pathlist;
3161 	*npathsp = npaths;
3162 	*npathsp_alloc = npaths_alloc;
3163 
3164 	return (0);
3165 }
3166 
3167 void
3168 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
3169 {
3170 	int	i, n;
3171 
3172 	for (i = 0; i < npaths; i++) {
3173 		n = strlen(pathlist[i]) + 1;
3174 		kmem_free(pathlist[i], n);
3175 	}
3176 
3177 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
3178 }
3179 
3180 int
3181 sdev_modctl_devexists(const char *path)
3182 {
3183 	vnode_t *vp;
3184 	int error;
3185 
3186 	error = sdev_modctl_lookup(path, &vp);
3187 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
3188 	    path, curproc->p_user.u_comm,
3189 	    (error == 0) ? "ok" : "failed"));
3190 	if (error == 0)
3191 		VN_RELE(vp);
3192 
3193 	return (error);
3194 }
3195 
3196 void
3197 sdev_update_newnsmap(struct devname_nsmap *map, char *module, char *mapname)
3198 {
3199 	rw_enter(&map->dir_lock, RW_WRITER);
3200 	if (module) {
3201 		ASSERT(map->dir_newmodule == NULL);
3202 		map->dir_newmodule = i_ddi_strdup(module, KM_SLEEP);
3203 	}
3204 	if (mapname) {
3205 		ASSERT(map->dir_newmap == NULL);
3206 		map->dir_newmap = i_ddi_strdup(mapname, KM_SLEEP);
3207 	}
3208 
3209 	map->dir_invalid = 1;
3210 	rw_exit(&map->dir_lock);
3211 }
3212 
3213 void
3214 sdev_replace_nsmap(struct devname_nsmap *map, char *module, char *mapname)
3215 {
3216 	char *old_module = NULL;
3217 	char *old_map = NULL;
3218 
3219 	ASSERT(RW_LOCK_HELD(&map->dir_lock));
3220 	if (!rw_tryupgrade(&map->dir_lock)) {
3221 		rw_exit(&map->dir_lock);
3222 		rw_enter(&map->dir_lock, RW_WRITER);
3223 	}
3224 
3225 	old_module = map->dir_module;
3226 	if (module) {
3227 		if (old_module && strcmp(old_module, module) != 0) {
3228 			kmem_free(old_module, strlen(old_module) + 1);
3229 		}
3230 		map->dir_module = module;
3231 		map->dir_newmodule = NULL;
3232 	}
3233 
3234 	old_map = map->dir_map;
3235 	if (mapname) {
3236 		if (old_map && strcmp(old_map, mapname) != 0) {
3237 			kmem_free(old_map, strlen(old_map) + 1);
3238 		}
3239 
3240 		map->dir_map = mapname;
3241 		map->dir_newmap = NULL;
3242 	}
3243 	map->dir_maploaded = 0;
3244 	map->dir_invalid = 0;
3245 	rw_downgrade(&map->dir_lock);
3246 }
3247 
3248 /*
3249  * dir_name should have at least one attribute,
3250  *	dir_module
3251  *	or dir_map
3252  *	or both
3253  * caller holds the devname_nsmaps_lock
3254  */
3255 void
3256 sdev_insert_nsmap(char *dir_name, char *dir_module, char *dir_map)
3257 {
3258 	struct devname_nsmap *map;
3259 	int len = 0;
3260 
3261 	ASSERT(dir_name);
3262 	ASSERT(dir_module || dir_map);
3263 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3264 
3265 	if (map = sdev_get_nsmap_by_dir(dir_name, 1)) {
3266 		sdev_update_newnsmap(map, dir_module, dir_map);
3267 		return;
3268 	}
3269 
3270 	map = (struct devname_nsmap *)kmem_zalloc(sizeof (*map), KM_SLEEP);
3271 	map->dir_name = i_ddi_strdup(dir_name, KM_SLEEP);
3272 	if (dir_module) {
3273 		map->dir_module = i_ddi_strdup(dir_module, KM_SLEEP);
3274 	}
3275 
3276 	if (dir_map) {
3277 		if (dir_map[0] != '/') {
3278 			len = strlen(ETC_DEV_DIR) + strlen(dir_map) + 2;
3279 			map->dir_map = kmem_zalloc(len, KM_SLEEP);
3280 			(void) snprintf(map->dir_map, len, "%s/%s", ETC_DEV_DIR,
3281 			    dir_map);
3282 		} else {
3283 			map->dir_map = i_ddi_strdup(dir_map, KM_SLEEP);
3284 		}
3285 	}
3286 
3287 	map->dir_ops = NULL;
3288 	map->dir_maploaded = 0;
3289 	map->dir_invalid = 0;
3290 	rw_init(&map->dir_lock, NULL, RW_DEFAULT, NULL);
3291 
3292 	map->next = devname_nsmaps;
3293 	map->prev = NULL;
3294 	if (devname_nsmaps) {
3295 		devname_nsmaps->prev = map;
3296 	}
3297 	devname_nsmaps = map;
3298 }
3299 
3300 struct devname_nsmap *
3301 sdev_get_nsmap_by_dir(char *dir_path, int locked)
3302 {
3303 	struct devname_nsmap *map = NULL;
3304 
3305 	if (!locked)
3306 		mutex_enter(&devname_nsmaps_lock);
3307 	for (map = devname_nsmaps; map; map = map->next) {
3308 		sdcmn_err6(("sdev_get_nsmap_by_dir: dir %s\n", map->dir_name));
3309 		if (strcmp(map->dir_name, dir_path) == 0) {
3310 			if (!locked)
3311 				mutex_exit(&devname_nsmaps_lock);
3312 			return (map);
3313 		}
3314 	}
3315 	if (!locked)
3316 		mutex_exit(&devname_nsmaps_lock);
3317 	return (NULL);
3318 }
3319 
3320 struct devname_nsmap *
3321 sdev_get_nsmap_by_module(char *mod_name)
3322 {
3323 	struct devname_nsmap *map = NULL;
3324 
3325 	mutex_enter(&devname_nsmaps_lock);
3326 	for (map = devname_nsmaps; map; map = map->next) {
3327 		sdcmn_err7(("sdev_get_nsmap_by_module: module %s\n",
3328 		    map->dir_module));
3329 		if (map->dir_module && strcmp(map->dir_module, mod_name) == 0) {
3330 			mutex_exit(&devname_nsmaps_lock);
3331 			return (map);
3332 		}
3333 	}
3334 	mutex_exit(&devname_nsmaps_lock);
3335 	return (NULL);
3336 }
3337 
3338 void
3339 sdev_invalidate_nsmaps()
3340 {
3341 	struct devname_nsmap *map = NULL;
3342 
3343 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3344 
3345 	if (devname_nsmaps == NULL)
3346 		return;
3347 
3348 	for (map = devname_nsmaps; map; map = map->next) {
3349 		rw_enter(&map->dir_lock, RW_WRITER);
3350 		map->dir_invalid = 1;
3351 		rw_exit(&map->dir_lock);
3352 	}
3353 	devname_nsmaps_invalidated = 1;
3354 }
3355 
3356 
3357 int
3358 sdev_nsmaps_loaded()
3359 {
3360 	int ret = 0;
3361 
3362 	mutex_enter(&devname_nsmaps_lock);
3363 	if (devname_nsmaps_loaded)
3364 		ret = 1;
3365 
3366 	mutex_exit(&devname_nsmaps_lock);
3367 	return (ret);
3368 }
3369 
3370 int
3371 sdev_nsmaps_reloaded()
3372 {
3373 	int ret = 0;
3374 
3375 	mutex_enter(&devname_nsmaps_lock);
3376 	if (devname_nsmaps_invalidated)
3377 		ret = 1;
3378 
3379 	mutex_exit(&devname_nsmaps_lock);
3380 	return (ret);
3381 }
3382 
3383 static void
3384 sdev_free_nsmap(struct devname_nsmap *map)
3385 {
3386 	ASSERT(map);
3387 	if (map->dir_name)
3388 		kmem_free(map->dir_name, strlen(map->dir_name) + 1);
3389 	if (map->dir_module)
3390 		kmem_free(map->dir_module, strlen(map->dir_module) + 1);
3391 	if (map->dir_map)
3392 		kmem_free(map->dir_map, strlen(map->dir_map) + 1);
3393 	rw_destroy(&map->dir_lock);
3394 	kmem_free(map, sizeof (*map));
3395 }
3396 
3397 void
3398 sdev_validate_nsmaps()
3399 {
3400 	struct devname_nsmap *map = NULL;
3401 	struct devname_nsmap *oldmap = NULL;
3402 
3403 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3404 	map = devname_nsmaps;
3405 	while (map) {
3406 		rw_enter(&map->dir_lock, RW_READER);
3407 		if ((map->dir_invalid == 1) && (map->dir_newmodule == NULL) &&
3408 		    (map->dir_newmap == NULL)) {
3409 			oldmap = map;
3410 			rw_exit(&map->dir_lock);
3411 			if (map->prev)
3412 				map->prev->next = oldmap->next;
3413 			if (map == devname_nsmaps)
3414 				devname_nsmaps = oldmap->next;
3415 
3416 			map = oldmap->next;
3417 			if (map)
3418 				map->prev = oldmap->prev;
3419 			sdev_free_nsmap(oldmap);
3420 			oldmap = NULL;
3421 		} else {
3422 			rw_exit(&map->dir_lock);
3423 			map = map->next;
3424 		}
3425 	}
3426 	devname_nsmaps_invalidated = 0;
3427 }
3428 
3429 static int
3430 sdev_map_is_invalid(struct devname_nsmap *map)
3431 {
3432 	int ret = 0;
3433 
3434 	ASSERT(map);
3435 	rw_enter(&map->dir_lock, RW_READER);
3436 	if (map->dir_invalid)
3437 		ret = 1;
3438 	rw_exit(&map->dir_lock);
3439 	return (ret);
3440 }
3441 
3442 static int
3443 sdev_check_map(struct devname_nsmap *map)
3444 {
3445 	struct devname_nsmap *mapp;
3446 
3447 	mutex_enter(&devname_nsmaps_lock);
3448 	if (devname_nsmaps == NULL) {
3449 		mutex_exit(&devname_nsmaps_lock);
3450 		return (1);
3451 	}
3452 
3453 	for (mapp = devname_nsmaps; mapp; mapp = mapp->next) {
3454 		if (mapp == map) {
3455 			mutex_exit(&devname_nsmaps_lock);
3456 			return (0);
3457 		}
3458 	}
3459 
3460 	mutex_exit(&devname_nsmaps_lock);
3461 	return (1);
3462 
3463 }
3464 
3465 struct devname_nsmap *
3466 sdev_get_map(struct sdev_node *dv, int validate)
3467 {
3468 	struct devname_nsmap *map;
3469 	int error;
3470 
3471 	ASSERT(RW_READ_HELD(&dv->sdev_contents));
3472 	map = dv->sdev_mapinfo;
3473 	if (map && sdev_check_map(map)) {
3474 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3475 			rw_exit(&dv->sdev_contents);
3476 			rw_enter(&dv->sdev_contents, RW_WRITER);
3477 		}
3478 		dv->sdev_mapinfo = NULL;
3479 		rw_downgrade(&dv->sdev_contents);
3480 		return (NULL);
3481 	}
3482 
3483 	if (validate && (!map || (map && sdev_map_is_invalid(map)))) {
3484 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3485 			rw_exit(&dv->sdev_contents);
3486 			rw_enter(&dv->sdev_contents, RW_WRITER);
3487 		}
3488 		error = sdev_get_moduleops(dv);
3489 		if (!error)
3490 			map = dv->sdev_mapinfo;
3491 		rw_downgrade(&dv->sdev_contents);
3492 	}
3493 	return (map);
3494 }
3495 
3496 extern int sdev_vnodeops_tbl_size;
3497 
3498 /*
3499  * construct a new template with overrides from vtab
3500  */
3501 static fs_operation_def_t *
3502 sdev_merge_vtab(const fs_operation_def_t tab[])
3503 {
3504 	fs_operation_def_t *new;
3505 	const fs_operation_def_t *tab_entry;
3506 
3507 	/* make a copy of standard vnode ops table */
3508 	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
3509 	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
3510 
3511 	/* replace the overrides from tab */
3512 	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
3513 		fs_operation_def_t *std_entry = new;
3514 		while (std_entry->name) {
3515 			if (strcmp(tab_entry->name, std_entry->name) == 0) {
3516 				std_entry->func = tab_entry->func;
3517 				break;
3518 			}
3519 			std_entry++;
3520 		}
3521 		if (std_entry->name == NULL)
3522 			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
3523 			    tab_entry->name);
3524 	}
3525 
3526 	return (new);
3527 }
3528 
3529 /* free memory allocated by sdev_merge_vtab */
3530 static void
3531 sdev_free_vtab(fs_operation_def_t *new)
3532 {
3533 	kmem_free(new, sdev_vnodeops_tbl_size);
3534 }
3535 
3536 void
3537 devname_get_vnode(devname_handle_t *hdl, vnode_t **vpp)
3538 {
3539 	struct sdev_node *dv = hdl->dh_data;
3540 
3541 	ASSERT(dv);
3542 
3543 	rw_enter(&dv->sdev_contents, RW_READER);
3544 	*vpp = SDEVTOV(dv);
3545 	rw_exit(&dv->sdev_contents);
3546 }
3547 
3548 int
3549 devname_get_path(devname_handle_t *hdl, char **path)
3550 {
3551 	struct sdev_node *dv = hdl->dh_data;
3552 
3553 	ASSERT(dv);
3554 
3555 	rw_enter(&dv->sdev_contents, RW_READER);
3556 	*path = dv->sdev_path;
3557 	rw_exit(&dv->sdev_contents);
3558 	return (0);
3559 }
3560 
3561 int
3562 devname_get_name(devname_handle_t *hdl, char **entry)
3563 {
3564 	struct sdev_node *dv = hdl->dh_data;
3565 
3566 	ASSERT(dv);
3567 	rw_enter(&dv->sdev_contents, RW_READER);
3568 	*entry = dv->sdev_name;
3569 	rw_exit(&dv->sdev_contents);
3570 	return (0);
3571 }
3572 
3573 void
3574 devname_get_dir_vnode(devname_handle_t *hdl, vnode_t **vpp)
3575 {
3576 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3577 
3578 	ASSERT(dv);
3579 
3580 	rw_enter(&dv->sdev_contents, RW_READER);
3581 	*vpp = SDEVTOV(dv);
3582 	rw_exit(&dv->sdev_contents);
3583 }
3584 
3585 int
3586 devname_get_dir_path(devname_handle_t *hdl, char **path)
3587 {
3588 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3589 
3590 	ASSERT(dv);
3591 	rw_enter(&dv->sdev_contents, RW_READER);
3592 	*path = dv->sdev_path;
3593 	rw_exit(&dv->sdev_contents);
3594 	return (0);
3595 }
3596 
3597 int
3598 devname_get_dir_name(devname_handle_t *hdl, char **entry)
3599 {
3600 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3601 
3602 	ASSERT(dv);
3603 	rw_enter(&dv->sdev_contents, RW_READER);
3604 	*entry = dv->sdev_name;
3605 	rw_exit(&dv->sdev_contents);
3606 	return (0);
3607 }
3608 
3609 int
3610 devname_get_dir_nsmap(devname_handle_t *hdl, struct devname_nsmap **map)
3611 {
3612 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3613 
3614 	ASSERT(dv);
3615 	rw_enter(&dv->sdev_contents, RW_READER);
3616 	*map = dv->sdev_mapinfo;
3617 	rw_exit(&dv->sdev_contents);
3618 	return (0);
3619 }
3620 
3621 int
3622 devname_get_dir_handle(devname_handle_t *hdl, devname_handle_t **dir_hdl)
3623 {
3624 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3625 
3626 	ASSERT(dv);
3627 	rw_enter(&dv->sdev_contents, RW_READER);
3628 	*dir_hdl = &(dv->sdev_handle);
3629 	rw_exit(&dv->sdev_contents);
3630 	return (0);
3631 }
3632 
3633 void
3634 devname_set_nodetype(devname_handle_t *hdl, void *args, int spec)
3635 {
3636 	struct sdev_node *dv = hdl->dh_data;
3637 
3638 	ASSERT(dv);
3639 	rw_enter(&dv->sdev_contents, RW_WRITER);
3640 	hdl->dh_spec = (devname_spec_t)spec;
3641 	hdl->dh_args = (void *)i_ddi_strdup((char *)args, KM_SLEEP);
3642 	rw_exit(&dv->sdev_contents);
3643 }
3644 
3645 /*
3646  * a generic setattr() function
3647  *
3648  * note: flags only supports AT_UID and AT_GID.
3649  *	 Future enhancements can be done for other types, e.g. AT_MODE
3650  */
3651 int
3652 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3653     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3654     int), int protocol)
3655 {
3656 	struct sdev_node	*dv = VTOSDEV(vp);
3657 	struct sdev_node	*parent = dv->sdev_dotdot;
3658 	struct vattr		*get;
3659 	uint_t			mask = vap->va_mask;
3660 	int 			error;
3661 
3662 	/* some sanity checks */
3663 	if (vap->va_mask & AT_NOSET)
3664 		return (EINVAL);
3665 
3666 	if (vap->va_mask & AT_SIZE) {
3667 		if (vp->v_type == VDIR) {
3668 			return (EISDIR);
3669 		}
3670 	}
3671 
3672 	/* no need to set attribute, but do not fail either */
3673 	ASSERT(parent);
3674 	rw_enter(&parent->sdev_contents, RW_READER);
3675 	if (dv->sdev_state == SDEV_ZOMBIE) {
3676 		rw_exit(&parent->sdev_contents);
3677 		return (0);
3678 	}
3679 
3680 	/* If backing store exists, just set it. */
3681 	if (dv->sdev_attrvp) {
3682 		rw_exit(&parent->sdev_contents);
3683 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3684 	}
3685 
3686 	/*
3687 	 * Otherwise, for nodes with the persistence attribute, create it.
3688 	 */
3689 	ASSERT(dv->sdev_attr);
3690 	if (SDEV_IS_PERSIST(dv) ||
3691 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3692 		sdev_vattr_merge(dv, vap);
3693 		rw_enter(&dv->sdev_contents, RW_WRITER);
3694 		error = sdev_shadow_node(dv, cred);
3695 		rw_exit(&dv->sdev_contents);
3696 		rw_exit(&parent->sdev_contents);
3697 
3698 		if (error)
3699 			return (error);
3700 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3701 	}
3702 
3703 
3704 	/*
3705 	 * sdev_attr was allocated in sdev_mknode
3706 	 */
3707 	rw_enter(&dv->sdev_contents, RW_WRITER);
3708 	error = secpolicy_vnode_setattr(cred, vp, vap,
3709 	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
3710 	if (error) {
3711 		rw_exit(&dv->sdev_contents);
3712 		rw_exit(&parent->sdev_contents);
3713 		return (error);
3714 	}
3715 
3716 	get = dv->sdev_attr;
3717 	if (mask & AT_MODE) {
3718 		get->va_mode &= S_IFMT;
3719 		get->va_mode |= vap->va_mode & ~S_IFMT;
3720 	}
3721 
3722 	if ((mask & AT_UID) || (mask & AT_GID)) {
3723 		if (mask & AT_UID)
3724 			get->va_uid = vap->va_uid;
3725 		if (mask & AT_GID)
3726 			get->va_gid = vap->va_gid;
3727 		/*
3728 		 * a callback must be provided if the protocol is set
3729 		 */
3730 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3731 			ASSERT(callback);
3732 			error = callback(dv, get, protocol);
3733 			if (error) {
3734 				rw_exit(&dv->sdev_contents);
3735 				rw_exit(&parent->sdev_contents);
3736 				return (error);
3737 			}
3738 		}
3739 	}
3740 
3741 	if (mask & AT_ATIME)
3742 		get->va_atime = vap->va_atime;
3743 	if (mask & AT_MTIME)
3744 		get->va_mtime = vap->va_mtime;
3745 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3746 		gethrestime(&get->va_ctime);
3747 	}
3748 
3749 	sdev_vattr_merge(dv, get);
3750 	rw_exit(&dv->sdev_contents);
3751 	rw_exit(&parent->sdev_contents);
3752 	return (0);
3753 }
3754 
3755 /*
3756  * a generic inactive() function
3757  */
3758 void
3759 devname_inactive_func(struct vnode *vp, struct cred *cred,
3760     void (*callback)(struct vnode *))
3761 {
3762 	int clean;
3763 	struct sdev_node *dv = VTOSDEV(vp);
3764 	struct sdev_node *ddv = dv->sdev_dotdot;
3765 	int state;
3766 	struct devname_nsmap *map = NULL;
3767 	struct devname_ops *dirops = NULL;
3768 	void (*fn)(devname_handle_t *, struct cred *) = NULL;
3769 
3770 	rw_enter(&ddv->sdev_contents, RW_WRITER);
3771 	state = dv->sdev_state;
3772 
3773 	mutex_enter(&vp->v_lock);
3774 	ASSERT(vp->v_count >= 1);
3775 
3776 	if (vp->v_count == 1 && callback != NULL)
3777 		callback(vp);
3778 
3779 	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3780 
3781 	/*
3782 	 * last ref count on the ZOMBIE node is released.
3783 	 * clean up the sdev_node, and
3784 	 * release the hold on the backing store node so that
3785 	 * the ZOMBIE backing stores also cleaned out.
3786 	 */
3787 	if (clean) {
3788 		ASSERT(ddv);
3789 		if (SDEV_IS_GLOBAL(dv)) {
3790 			map = ddv->sdev_mapinfo;
3791 			dirops = map ? map->dir_ops : NULL;
3792 			if (dirops && (fn = dirops->devnops_inactive))
3793 				(*fn)(&(dv->sdev_handle), cred);
3794 		}
3795 
3796 		ddv->sdev_nlink--;
3797 		if (vp->v_type == VDIR) {
3798 			dv->sdev_nlink--;
3799 		}
3800 		if ((dv->sdev_flags & SDEV_STALE) == 0)
3801 			avl_remove(&ddv->sdev_entries, dv);
3802 		dv->sdev_nlink--;
3803 		--vp->v_count;
3804 		mutex_exit(&vp->v_lock);
3805 		sdev_nodedestroy(dv, 0);
3806 	} else {
3807 		--vp->v_count;
3808 		mutex_exit(&vp->v_lock);
3809 	}
3810 	rw_exit(&ddv->sdev_contents);
3811 }
3812