xref: /titanic_52/usr/src/uts/common/fs/dev/sdev_subr.c (revision 22872efb9462b28180d11ea401344608e641a5aa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * utility routines for the /dev fs
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/t_lock.h>
35 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
37 #include <sys/user.h>
38 #include <sys/time.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/file.h>
42 #include <sys/fcntl.h>
43 #include <sys/flock.h>
44 #include <sys/kmem.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/stat.h>
48 #include <sys/cred.h>
49 #include <sys/dirent.h>
50 #include <sys/pathname.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/mode.h>
54 #include <sys/policy.h>
55 #include <fs/fs_subr.h>
56 #include <sys/mount.h>
57 #include <sys/fs/snode.h>
58 #include <sys/fs/dv_node.h>
59 #include <sys/fs/sdev_impl.h>
60 #include <sys/fs/sdev_node.h>
61 #include <sys/sunndi.h>
62 #include <sys/sunmdi.h>
63 #include <sys/conf.h>
64 #include <sys/proc.h>
65 #include <sys/user.h>
66 #include <sys/modctl.h>
67 
68 #ifdef DEBUG
69 int sdev_debug = 0x00000001;
70 int sdev_debug_cache_flags = 0;
71 #endif
72 
73 /*
74  * globals
75  */
76 /* prototype memory vattrs */
77 vattr_t sdev_vattr_dir = {
78 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
79 	VDIR,					/* va_type */
80 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
81 	SDEV_UID_DEFAULT,			/* va_uid */
82 	SDEV_GID_DEFAULT,			/* va_gid */
83 	0,					/* va_fsid */
84 	0,					/* va_nodeid */
85 	0,					/* va_nlink */
86 	0,					/* va_size */
87 	0,					/* va_atime */
88 	0,					/* va_mtime */
89 	0,					/* va_ctime */
90 	0,					/* va_rdev */
91 	0,					/* va_blksize */
92 	0,					/* va_nblocks */
93 	0					/* va_vcode */
94 };
95 
96 vattr_t sdev_vattr_lnk = {
97 	AT_TYPE|AT_MODE,			/* va_mask */
98 	VLNK,					/* va_type */
99 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
100 	SDEV_UID_DEFAULT,			/* va_uid */
101 	SDEV_GID_DEFAULT,			/* va_gid */
102 	0,					/* va_fsid */
103 	0,					/* va_nodeid */
104 	0,					/* va_nlink */
105 	0,					/* va_size */
106 	0,					/* va_atime */
107 	0,					/* va_mtime */
108 	0,					/* va_ctime */
109 	0,					/* va_rdev */
110 	0,					/* va_blksize */
111 	0,					/* va_nblocks */
112 	0					/* va_vcode */
113 };
114 
115 vattr_t sdev_vattr_blk = {
116 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
117 	VBLK,					/* va_type */
118 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
119 	SDEV_UID_DEFAULT,			/* va_uid */
120 	SDEV_GID_DEFAULT,			/* va_gid */
121 	0,					/* va_fsid */
122 	0,					/* va_nodeid */
123 	0,					/* va_nlink */
124 	0,					/* va_size */
125 	0,					/* va_atime */
126 	0,					/* va_mtime */
127 	0,					/* va_ctime */
128 	0,					/* va_rdev */
129 	0,					/* va_blksize */
130 	0,					/* va_nblocks */
131 	0					/* va_vcode */
132 };
133 
134 vattr_t sdev_vattr_chr = {
135 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
136 	VCHR,					/* va_type */
137 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
138 	SDEV_UID_DEFAULT,			/* va_uid */
139 	SDEV_GID_DEFAULT,			/* va_gid */
140 	0,					/* va_fsid */
141 	0,					/* va_nodeid */
142 	0,					/* va_nlink */
143 	0,					/* va_size */
144 	0,					/* va_atime */
145 	0,					/* va_mtime */
146 	0,					/* va_ctime */
147 	0,					/* va_rdev */
148 	0,					/* va_blksize */
149 	0,					/* va_nblocks */
150 	0					/* va_vcode */
151 };
152 
153 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
154 int		devtype;		/* fstype */
155 
156 struct devname_ops *devname_ns_ops;	/* default name service directory ops */
157 kmutex_t devname_nsmaps_lock;	/* protect devname_nsmaps */
158 
159 /* static */
160 static struct devname_nsmap *devname_nsmaps = NULL;
161 				/* contents from /etc/dev/devname_master */
162 static int devname_nsmaps_invalidated = 0; /* "devfsadm -m" has run */
163 
164 static struct vnodeops *sdev_get_vop(struct sdev_node *);
165 static void sdev_set_no_nocache(struct sdev_node *);
166 static int sdev_get_moduleops(struct sdev_node *);
167 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
168 static void sdev_free_vtab(fs_operation_def_t *);
169 
170 static void
171 sdev_prof_free(struct sdev_node *dv)
172 {
173 	ASSERT(!SDEV_IS_GLOBAL(dv));
174 	if (dv->sdev_prof.dev_name)
175 		nvlist_free(dv->sdev_prof.dev_name);
176 	if (dv->sdev_prof.dev_map)
177 		nvlist_free(dv->sdev_prof.dev_map);
178 	if (dv->sdev_prof.dev_symlink)
179 		nvlist_free(dv->sdev_prof.dev_symlink);
180 	if (dv->sdev_prof.dev_glob_incdir)
181 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
182 	if (dv->sdev_prof.dev_glob_excdir)
183 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
184 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
185 }
186 
187 /* sdev_node cache constructor */
188 /*ARGSUSED1*/
189 static int
190 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
191 {
192 	struct sdev_node *dv = (struct sdev_node *)buf;
193 	struct vnode *vp;
194 
195 	bzero(buf, sizeof (struct sdev_node));
196 	vp = dv->sdev_vnode = vn_alloc(flag);
197 	if (vp == NULL) {
198 		return (-1);
199 	}
200 	vp->v_data = dv;
201 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
202 	return (0);
203 }
204 
205 /* sdev_node cache destructor */
206 /*ARGSUSED1*/
207 static void
208 i_sdev_node_dtor(void *buf, void *arg)
209 {
210 	struct sdev_node *dv = (struct sdev_node *)buf;
211 	struct vnode *vp = SDEVTOV(dv);
212 
213 	rw_destroy(&dv->sdev_contents);
214 	vn_free(vp);
215 }
216 
217 /* initialize sdev_node cache */
218 void
219 sdev_node_cache_init()
220 {
221 	int flags = 0;
222 
223 #ifdef	DEBUG
224 	flags = sdev_debug_cache_flags;
225 	if (flags)
226 		sdcmn_err(("cache debug flags 0x%x\n", flags));
227 #endif	/* DEBUG */
228 
229 	ASSERT(sdev_node_cache == NULL);
230 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
231 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
232 	    NULL, NULL, NULL, flags);
233 }
234 
235 /* destroy sdev_node cache */
236 void
237 sdev_node_cache_fini()
238 {
239 	ASSERT(sdev_node_cache != NULL);
240 	kmem_cache_destroy(sdev_node_cache);
241 	sdev_node_cache = NULL;
242 }
243 
244 /*
245  * Compare two nodes lexographically to balance avl tree
246  */
247 static int
248 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
249 {
250 	int rv;
251 	if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
252 		return (0);
253 	return ((rv < 0) ? -1 : 1);
254 }
255 
256 void
257 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
258 {
259 	ASSERT(dv);
260 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
261 	dv->sdev_state = state;
262 }
263 
264 static void
265 sdev_attrinit(struct sdev_node *dv, vattr_t *vap)
266 {
267 	timestruc_t now;
268 
269 	ASSERT(vap);
270 
271 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
272 	*dv->sdev_attr = *vap;
273 
274 	dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode);
275 
276 	gethrestime(&now);
277 	dv->sdev_attr->va_atime = now;
278 	dv->sdev_attr->va_mtime = now;
279 	dv->sdev_attr->va_ctime = now;
280 }
281 
282 /* alloc and initialize a sdev_node */
283 int
284 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
285     vattr_t *vap)
286 {
287 	struct sdev_node *dv = NULL;
288 	struct vnode *vp;
289 	size_t nmlen, len;
290 	devname_handle_t  *dhl;
291 
292 	nmlen = strlen(nm) + 1;
293 	if (nmlen > MAXNAMELEN) {
294 		sdcmn_err9(("sdev_nodeinit: node name %s"
295 		    " too long\n", nm));
296 		*newdv = NULL;
297 		return (ENAMETOOLONG);
298 	}
299 
300 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
301 
302 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
303 	bcopy(nm, dv->sdev_name, nmlen);
304 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
305 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
306 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
307 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
308 	/* overwritten for VLNK nodes */
309 	dv->sdev_symlink = NULL;
310 
311 	vp = SDEVTOV(dv);
312 	vn_reinit(vp);
313 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
314 	if (vap)
315 		vp->v_type = vap->va_type;
316 
317 	/*
318 	 * initialized to the parent's vnodeops.
319 	 * maybe overwriten for a VDIR
320 	 */
321 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
322 	vn_exists(vp);
323 
324 	dv->sdev_dotdot = NULL;
325 	dv->sdev_attrvp = NULL;
326 	if (vap) {
327 		sdev_attrinit(dv, vap);
328 	} else {
329 		dv->sdev_attr = NULL;
330 	}
331 
332 	dv->sdev_ino = sdev_mkino(dv);
333 	dv->sdev_nlink = 0;		/* updated on insert */
334 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
335 	dv->sdev_flags |= SDEV_BUILD;
336 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
337 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
338 	if (SDEV_IS_GLOBAL(ddv)) {
339 		dv->sdev_flags |= SDEV_GLOBAL;
340 		dv->sdev_mapinfo = NULL;
341 		dhl = &(dv->sdev_handle);
342 		dhl->dh_data = dv;
343 		dhl->dh_spec = DEVNAME_NS_NONE;
344 		dhl->dh_args = NULL;
345 		sdev_set_no_nocache(dv);
346 		dv->sdev_gdir_gen = 0;
347 	} else {
348 		dv->sdev_flags &= ~SDEV_GLOBAL;
349 		dv->sdev_origin = NULL; /* set later */
350 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
351 		dv->sdev_ldir_gen = 0;
352 		dv->sdev_devtree_gen = 0;
353 	}
354 
355 	rw_enter(&dv->sdev_contents, RW_WRITER);
356 	sdev_set_nodestate(dv, SDEV_INIT);
357 	rw_exit(&dv->sdev_contents);
358 	*newdv = dv;
359 
360 	return (0);
361 }
362 
363 /*
364  * transition a sdev_node into SDEV_READY state
365  */
366 int
367 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
368     void *args, struct cred *cred)
369 {
370 	int error = 0;
371 	struct vnode *vp = SDEVTOV(dv);
372 	vtype_t type;
373 
374 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
375 
376 	type = vap->va_type;
377 	vp->v_type = type;
378 	vp->v_rdev = vap->va_rdev;
379 	rw_enter(&dv->sdev_contents, RW_WRITER);
380 	if (type == VDIR) {
381 		dv->sdev_nlink = 2;
382 		dv->sdev_flags &= ~SDEV_PERSIST;
383 		dv->sdev_flags &= ~SDEV_DYNAMIC;
384 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
385 		error = sdev_get_moduleops(dv); /* from plug-in module */
386 		ASSERT(dv->sdev_dotdot);
387 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
388 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
389 		avl_create(&dv->sdev_entries,
390 		    (int (*)(const void *, const void *))sdev_compare_nodes,
391 		    sizeof (struct sdev_node),
392 		    offsetof(struct sdev_node, sdev_avllink));
393 	} else if (type == VLNK) {
394 		ASSERT(args);
395 		dv->sdev_nlink = 1;
396 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
397 	} else {
398 		dv->sdev_nlink = 1;
399 	}
400 
401 	if (!(SDEV_IS_GLOBAL(dv))) {
402 		dv->sdev_origin = (struct sdev_node *)args;
403 		dv->sdev_flags &= ~SDEV_PERSIST;
404 	}
405 
406 	/*
407 	 * shadow node is created here OR
408 	 * if failed (indicated by dv->sdev_attrvp == NULL),
409 	 * created later in sdev_setattr
410 	 */
411 	if (avp) {
412 		dv->sdev_attrvp = avp;
413 	} else {
414 		if (dv->sdev_attr == NULL)
415 			sdev_attrinit(dv, vap);
416 		else
417 			*dv->sdev_attr = *vap;
418 
419 		if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) ||
420 		    ((SDEVTOV(dv)->v_type == VDIR) &&
421 		    (dv->sdev_attrvp == NULL))) {
422 			error = sdev_shadow_node(dv, cred);
423 		}
424 	}
425 
426 	if (error == 0) {
427 		/* transition to READY state */
428 		sdev_set_nodestate(dv, SDEV_READY);
429 		sdev_nc_node_exists(dv);
430 	} else {
431 		sdev_set_nodestate(dv, SDEV_ZOMBIE);
432 	}
433 	rw_exit(&dv->sdev_contents);
434 	return (error);
435 }
436 
437 /*
438  * setting ZOMBIE state
439  */
440 static int
441 sdev_nodezombied(struct sdev_node *dv)
442 {
443 	rw_enter(&dv->sdev_contents, RW_WRITER);
444 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
445 	rw_exit(&dv->sdev_contents);
446 	return (0);
447 }
448 
449 /*
450  * Build the VROOT sdev_node.
451  */
452 /*ARGSUSED*/
453 struct sdev_node *
454 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
455     struct vnode *avp, struct cred *cred)
456 {
457 	struct sdev_node *dv;
458 	struct vnode *vp;
459 	char devdir[] = "/dev";
460 
461 	ASSERT(sdev_node_cache != NULL);
462 	ASSERT(avp);
463 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
464 	vp = SDEVTOV(dv);
465 	vn_reinit(vp);
466 	vp->v_flag |= VROOT;
467 	vp->v_vfsp = vfsp;
468 	vp->v_type = VDIR;
469 	vp->v_rdev = devdev;
470 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
471 	vn_exists(vp);
472 
473 	if (vfsp->vfs_mntpt)
474 		dv->sdev_name = i_ddi_strdup(
475 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
476 	else
477 		/* vfs_mountdev1 set mount point later */
478 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
479 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
480 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
481 	dv->sdev_ino = SDEV_ROOTINO;
482 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
483 	dv->sdev_dotdot = dv;		/* .. == self */
484 	dv->sdev_attrvp = avp;
485 	dv->sdev_attr = NULL;
486 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
487 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
488 	if (strcmp(dv->sdev_name, "/dev") == 0) {
489 		mutex_init(&devname_nsmaps_lock, NULL, MUTEX_DEFAULT, NULL);
490 		dv->sdev_mapinfo = NULL;
491 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
492 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
493 		dv->sdev_gdir_gen = 0;
494 	} else {
495 		dv->sdev_flags = SDEV_BUILD;
496 		dv->sdev_flags &= ~SDEV_PERSIST;
497 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
498 		dv->sdev_ldir_gen = 0;
499 		dv->sdev_devtree_gen = 0;
500 	}
501 
502 	avl_create(&dv->sdev_entries,
503 	    (int (*)(const void *, const void *))sdev_compare_nodes,
504 	    sizeof (struct sdev_node),
505 	    offsetof(struct sdev_node, sdev_avllink));
506 
507 	rw_enter(&dv->sdev_contents, RW_WRITER);
508 	sdev_set_nodestate(dv, SDEV_READY);
509 	rw_exit(&dv->sdev_contents);
510 	sdev_nc_node_exists(dv);
511 	return (dv);
512 }
513 
514 /*
515  *  1. load the module
516  *  2. modload invokes sdev_module_register, which in turn sets
517  *     the dv->sdev_mapinfo->dir_ops
518  *
519  * note: locking order:
520  *	dv->sdev_contents -> map->dir_lock
521  */
522 static int
523 sdev_get_moduleops(struct sdev_node *dv)
524 {
525 	int error = 0;
526 	struct devname_nsmap *map = NULL;
527 	char *module;
528 	char *path;
529 	int load = 1;
530 
531 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
532 
533 	if (devname_nsmaps == NULL)
534 		return (0);
535 
536 	if (!sdev_nsmaps_loaded() && !sdev_nsmaps_reloaded())
537 		return (0);
538 
539 
540 	path = dv->sdev_path;
541 	if ((map = sdev_get_nsmap_by_dir(path, 0))) {
542 		rw_enter(&map->dir_lock, RW_READER);
543 		if (map->dir_invalid) {
544 			if (map->dir_module && map->dir_newmodule &&
545 			    (strcmp(map->dir_module,
546 			    map->dir_newmodule) == 0)) {
547 				load = 0;
548 			}
549 			sdev_replace_nsmap(map, map->dir_newmodule,
550 			    map->dir_newmap);
551 		}
552 
553 		module = map->dir_module;
554 		if (module && load) {
555 			sdcmn_err6(("sdev_get_moduleops: "
556 			    "load module %s", module));
557 			rw_exit(&map->dir_lock);
558 			error = modload("devname", module);
559 			sdcmn_err6(("sdev_get_moduleops: error %d\n", error));
560 			if (error < 0) {
561 				return (-1);
562 			}
563 		} else if (module == NULL) {
564 			/*
565 			 * loading the module ops for name services
566 			 */
567 			if (devname_ns_ops == NULL) {
568 				sdcmn_err6((
569 				    "sdev_get_moduleops: modload default\n"));
570 				error = modload("devname", DEVNAME_NSCONFIG);
571 				sdcmn_err6((
572 				    "sdev_get_moduleops: error %d\n", error));
573 				if (error < 0) {
574 					return (-1);
575 				}
576 			}
577 
578 			if (!rw_tryupgrade(&map->dir_lock)) {
579 				rw_exit(&map->dir_lock);
580 				rw_enter(&map->dir_lock, RW_WRITER);
581 			}
582 			ASSERT(devname_ns_ops);
583 			map->dir_ops = devname_ns_ops;
584 			rw_exit(&map->dir_lock);
585 		}
586 	}
587 
588 	dv->sdev_mapinfo = map;
589 	return (0);
590 }
591 
592 /* directory dependent vop table */
593 struct sdev_vop_table {
594 	char *vt_name;				/* subdirectory name */
595 	const fs_operation_def_t *vt_service;	/* vnodeops table */
596 	struct vnodeops *vt_vops;		/* constructed vop */
597 	struct vnodeops **vt_global_vops;	/* global container for vop */
598 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
599 	int vt_flags;
600 };
601 
602 /*
603  * A nice improvement would be to provide a plug-in mechanism
604  * for this table instead of a const table.
605  */
606 static struct sdev_vop_table vtab[] =
607 {
608 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
609 	SDEV_DYNAMIC | SDEV_VTOR },
610 
611 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
612 
613 	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
614 	SDEV_DYNAMIC | SDEV_VTOR },
615 
616 	{ NULL, NULL, NULL, NULL, NULL, 0}
617 };
618 
619 
620 /*
621  *  sets a directory's vnodeops if the directory is in the vtab;
622  */
623 static struct vnodeops *
624 sdev_get_vop(struct sdev_node *dv)
625 {
626 	int i;
627 	char *path;
628 
629 	path = dv->sdev_path;
630 	ASSERT(path);
631 
632 	/* gets the relative path to /dev/ */
633 	path += 5;
634 
635 	/* gets the vtab entry if matches */
636 	for (i = 0; vtab[i].vt_name; i++) {
637 		if (strcmp(vtab[i].vt_name, path) != 0)
638 			continue;
639 		dv->sdev_flags |= vtab[i].vt_flags;
640 
641 		if (vtab[i].vt_vops) {
642 			if (vtab[i].vt_global_vops)
643 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
644 			return (vtab[i].vt_vops);
645 		}
646 
647 		if (vtab[i].vt_service) {
648 			fs_operation_def_t *templ;
649 			templ = sdev_merge_vtab(vtab[i].vt_service);
650 			if (vn_make_ops(vtab[i].vt_name,
651 			    (const fs_operation_def_t *)templ,
652 			    &vtab[i].vt_vops) != 0) {
653 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
654 				    vtab[i].vt_name);
655 				/*NOTREACHED*/
656 			}
657 			if (vtab[i].vt_global_vops) {
658 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
659 			}
660 			sdev_free_vtab(templ);
661 			return (vtab[i].vt_vops);
662 		}
663 		return (sdev_vnodeops);
664 	}
665 
666 	/* child inherits the persistence of the parent */
667 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
668 		dv->sdev_flags |= SDEV_PERSIST;
669 
670 	return (sdev_vnodeops);
671 }
672 
673 static void
674 sdev_set_no_nocache(struct sdev_node *dv)
675 {
676 	int i;
677 	char *path;
678 
679 	ASSERT(dv->sdev_path);
680 	path = dv->sdev_path + strlen("/dev/");
681 
682 	for (i = 0; vtab[i].vt_name; i++) {
683 		if (strcmp(vtab[i].vt_name, path) == 0) {
684 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
685 				dv->sdev_flags |= SDEV_NO_NCACHE;
686 			break;
687 		}
688 	}
689 }
690 
691 void *
692 sdev_get_vtor(struct sdev_node *dv)
693 {
694 	int i;
695 
696 	for (i = 0; vtab[i].vt_name; i++) {
697 		if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0)
698 			continue;
699 		return ((void *)vtab[i].vt_vtor);
700 	}
701 	return (NULL);
702 }
703 
704 /*
705  * Build the base root inode
706  */
707 ino_t
708 sdev_mkino(struct sdev_node *dv)
709 {
710 	ino_t	ino;
711 
712 	/*
713 	 * for now, follow the lead of tmpfs here
714 	 * need to someday understand the requirements here
715 	 */
716 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
717 	ino += SDEV_ROOTINO + 1;
718 
719 	return (ino);
720 }
721 
722 static int
723 sdev_getlink(struct vnode *linkvp, char **link)
724 {
725 	int err;
726 	char *buf;
727 	struct uio uio = {0};
728 	struct iovec iov = {0};
729 
730 	if (linkvp == NULL)
731 		return (ENOENT);
732 	ASSERT(linkvp->v_type == VLNK);
733 
734 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
735 	iov.iov_base = buf;
736 	iov.iov_len = MAXPATHLEN;
737 	uio.uio_iov = &iov;
738 	uio.uio_iovcnt = 1;
739 	uio.uio_resid = MAXPATHLEN;
740 	uio.uio_segflg = UIO_SYSSPACE;
741 	uio.uio_llimit = MAXOFFSET_T;
742 
743 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
744 	if (err) {
745 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
746 		kmem_free(buf, MAXPATHLEN);
747 		return (ENOENT);
748 	}
749 
750 	/* mission complete */
751 	*link = i_ddi_strdup(buf, KM_SLEEP);
752 	kmem_free(buf, MAXPATHLEN);
753 	return (0);
754 }
755 
756 /*
757  * A convenient wrapper to get the devfs node vnode for a device
758  * minor functionality: readlink() of a /dev symlink
759  * Place the link into dv->sdev_symlink
760  */
761 static int
762 sdev_follow_link(struct sdev_node *dv)
763 {
764 	int err;
765 	struct vnode *linkvp;
766 	char *link = NULL;
767 
768 	linkvp = SDEVTOV(dv);
769 	if (linkvp == NULL)
770 		return (ENOENT);
771 	ASSERT(linkvp->v_type == VLNK);
772 	err = sdev_getlink(linkvp, &link);
773 	if (err) {
774 		(void) sdev_nodezombied(dv);
775 		dv->sdev_symlink = NULL;
776 		return (ENOENT);
777 	}
778 
779 	ASSERT(link != NULL);
780 	dv->sdev_symlink = link;
781 	return (0);
782 }
783 
784 static int
785 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
786 {
787 	vtype_t otype = SDEVTOV(dv)->v_type;
788 
789 	/*
790 	 * existing sdev_node has a different type.
791 	 */
792 	if (otype != nvap->va_type) {
793 		sdcmn_err9(("sdev_node_check: existing node "
794 		    "  %s type %d does not match new node type %d\n",
795 		    dv->sdev_name, otype, nvap->va_type));
796 		return (EEXIST);
797 	}
798 
799 	/*
800 	 * For a symlink, the target should be the same.
801 	 */
802 	if (otype == VLNK) {
803 		ASSERT(nargs != NULL);
804 		ASSERT(dv->sdev_symlink != NULL);
805 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
806 			sdcmn_err9(("sdev_node_check: existing node "
807 			    " %s has different symlink %s as new node "
808 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
809 			    (char *)nargs));
810 			return (EEXIST);
811 		}
812 	}
813 
814 	return (0);
815 }
816 
817 /*
818  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
819  *
820  * arguments:
821  *	- ddv (parent)
822  *	- nm (child name)
823  *	- newdv (sdev_node for nm is returned here)
824  *	- vap (vattr for the node to be created, va_type should be set.
825  *	- avp (attribute vnode)
826  *	  the defaults should be used if unknown)
827  *	- cred
828  *	- args
829  *	    . tnm (for VLNK)
830  *	    . global sdev_node (for !SDEV_GLOBAL)
831  * 	- state: SDEV_INIT, SDEV_READY
832  *
833  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
834  *
835  * NOTE:  directory contents writers lock needs to be held before
836  *	  calling this routine.
837  */
838 int
839 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
840     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
841     sdev_node_state_t state)
842 {
843 	int error = 0;
844 	sdev_node_state_t node_state;
845 	struct sdev_node *dv = NULL;
846 
847 	ASSERT(state != SDEV_ZOMBIE);
848 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
849 
850 	if (*newdv) {
851 		dv = *newdv;
852 	} else {
853 		/* allocate and initialize a sdev_node */
854 		if (ddv->sdev_state == SDEV_ZOMBIE) {
855 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
856 			    ddv->sdev_path));
857 			return (ENOENT);
858 		}
859 
860 		error = sdev_nodeinit(ddv, nm, &dv, vap);
861 		if (error != 0) {
862 			sdcmn_err9(("sdev_mknode: error %d,"
863 			    " name %s can not be initialized\n",
864 			    error, nm));
865 			return (error);
866 		}
867 		ASSERT(dv);
868 
869 		/* insert into the directory cache */
870 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
871 		if (error) {
872 			sdcmn_err9(("sdev_mknode: node %s can not"
873 			    " be added into directory cache\n", nm));
874 			return (ENOENT);
875 		}
876 	}
877 
878 	ASSERT(dv);
879 	node_state = dv->sdev_state;
880 	ASSERT(node_state != SDEV_ZOMBIE);
881 
882 	if (state == SDEV_READY) {
883 		switch (node_state) {
884 		case SDEV_INIT:
885 			error = sdev_nodeready(dv, vap, avp, args, cred);
886 			if (error) {
887 				sdcmn_err9(("sdev_mknode: node %s can NOT"
888 				    " be transitioned into READY state, "
889 				    "error %d\n", nm, error));
890 			}
891 			break;
892 		case SDEV_READY:
893 			/*
894 			 * Do some sanity checking to make sure
895 			 * the existing sdev_node is what has been
896 			 * asked for.
897 			 */
898 			error = sdev_node_check(dv, vap, args);
899 			break;
900 		default:
901 			break;
902 		}
903 	}
904 
905 	if (!error) {
906 		*newdv = dv;
907 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
908 	} else {
909 		SDEV_SIMPLE_RELE(dv);
910 		*newdv = NULL;
911 	}
912 
913 	return (error);
914 }
915 
916 /*
917  * convenient wrapper to change vp's ATIME, CTIME and MTIME
918  */
919 void
920 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
921 {
922 	struct vattr attr;
923 	timestruc_t now;
924 	int err;
925 
926 	ASSERT(vp);
927 	gethrestime(&now);
928 	if (mask & AT_CTIME)
929 		attr.va_ctime = now;
930 	if (mask & AT_MTIME)
931 		attr.va_mtime = now;
932 	if (mask & AT_ATIME)
933 		attr.va_atime = now;
934 
935 	attr.va_mask = (mask & AT_TIMES);
936 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
937 	if (err && (err != EROFS)) {
938 		sdcmn_err(("update timestamps error %d\n", err));
939 	}
940 }
941 
942 /*
943  * the backing store vnode is released here
944  */
945 /*ARGSUSED1*/
946 void
947 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
948 {
949 	/* no references */
950 	ASSERT(dv->sdev_nlink == 0);
951 
952 	if (dv->sdev_attrvp != NULLVP) {
953 		VN_RELE(dv->sdev_attrvp);
954 		/*
955 		 * reset the attrvp so that no more
956 		 * references can be made on this already
957 		 * vn_rele() vnode
958 		 */
959 		dv->sdev_attrvp = NULLVP;
960 	}
961 
962 	if (dv->sdev_attr != NULL) {
963 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
964 		dv->sdev_attr = NULL;
965 	}
966 
967 	if (dv->sdev_name != NULL) {
968 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
969 		dv->sdev_name = NULL;
970 	}
971 
972 	if (dv->sdev_symlink != NULL) {
973 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
974 		dv->sdev_symlink = NULL;
975 	}
976 
977 	if (dv->sdev_path) {
978 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
979 		dv->sdev_path = NULL;
980 	}
981 
982 	if (!SDEV_IS_GLOBAL(dv))
983 		sdev_prof_free(dv);
984 
985 	if (SDEVTOV(dv)->v_type == VDIR) {
986 		ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
987 		avl_destroy(&dv->sdev_entries);
988 	}
989 
990 	mutex_destroy(&dv->sdev_lookup_lock);
991 	cv_destroy(&dv->sdev_lookup_cv);
992 
993 	/* return node to initial state as per constructor */
994 	(void) memset((void *)&dv->sdev_instance_data, 0,
995 	    sizeof (dv->sdev_instance_data));
996 	vn_invalid(SDEVTOV(dv));
997 	kmem_cache_free(sdev_node_cache, dv);
998 }
999 
1000 /*
1001  * DIRECTORY CACHE lookup
1002  */
1003 struct sdev_node *
1004 sdev_findbyname(struct sdev_node *ddv, char *nm)
1005 {
1006 	struct sdev_node *dv;
1007 	struct sdev_node dvtmp;
1008 	avl_index_t	where;
1009 
1010 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1011 
1012 	dvtmp.sdev_name = nm;
1013 	dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
1014 	if (dv) {
1015 		ASSERT(dv->sdev_dotdot == ddv);
1016 		ASSERT(strcmp(dv->sdev_name, nm) == 0);
1017 		SDEV_HOLD(dv);
1018 		return (dv);
1019 	}
1020 	return (NULL);
1021 }
1022 
1023 /*
1024  * Inserts a new sdev_node in a parent directory
1025  */
1026 void
1027 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
1028 {
1029 	avl_index_t where;
1030 
1031 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1032 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1033 	ASSERT(ddv->sdev_nlink >= 2);
1034 	ASSERT(dv->sdev_nlink == 0);
1035 
1036 	dv->sdev_dotdot = ddv;
1037 	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1038 	avl_insert(&ddv->sdev_entries, dv, where);
1039 	ddv->sdev_nlink++;
1040 }
1041 
1042 /*
1043  * The following check is needed because while sdev_nodes are linked
1044  * in SDEV_INIT state, they have their link counts incremented only
1045  * in SDEV_READY state.
1046  */
1047 static void
1048 decr_link(struct sdev_node *dv)
1049 {
1050 	if (dv->sdev_state != SDEV_INIT)
1051 		dv->sdev_nlink--;
1052 	else
1053 		ASSERT(dv->sdev_nlink == 0);
1054 }
1055 
1056 /*
1057  * Delete an existing dv from directory cache
1058  *
1059  * In the case of a node is still held by non-zero reference count,
1060  *     the node is put into ZOMBIE state. Once the reference count
1061  *     reaches "0", the node is unlinked and destroyed,
1062  *     in sdev_inactive().
1063  */
1064 static int
1065 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1066 {
1067 	struct vnode *vp;
1068 
1069 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1070 
1071 	vp = SDEVTOV(dv);
1072 	mutex_enter(&vp->v_lock);
1073 
1074 	/* dv is held still */
1075 	if (vp->v_count > 1) {
1076 		rw_enter(&dv->sdev_contents, RW_WRITER);
1077 		if (dv->sdev_state == SDEV_READY) {
1078 			sdcmn_err9((
1079 			    "sdev_delete: node %s busy with count %d\n",
1080 			    dv->sdev_name, vp->v_count));
1081 			dv->sdev_state = SDEV_ZOMBIE;
1082 		}
1083 		rw_exit(&dv->sdev_contents);
1084 		--vp->v_count;
1085 		mutex_exit(&vp->v_lock);
1086 		return (EBUSY);
1087 	}
1088 	ASSERT(vp->v_count == 1);
1089 
1090 	/* unlink from the memory cache */
1091 	ddv->sdev_nlink--;	/* .. to above */
1092 	if (vp->v_type == VDIR) {
1093 		decr_link(dv);		/* . to self */
1094 	}
1095 
1096 	avl_remove(&ddv->sdev_entries, dv);
1097 	decr_link(dv);	/* name, back to zero */
1098 	vp->v_count--;
1099 	mutex_exit(&vp->v_lock);
1100 
1101 	/* destroy the node */
1102 	sdev_nodedestroy(dv, 0);
1103 	return (0);
1104 }
1105 
1106 /*
1107  * check if the source is in the path of the target
1108  *
1109  * source and target are different
1110  */
1111 /*ARGSUSED2*/
1112 static int
1113 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1114 {
1115 	int error = 0;
1116 	struct sdev_node *dotdot, *dir;
1117 
1118 	dotdot = tdv->sdev_dotdot;
1119 	ASSERT(dotdot);
1120 
1121 	/* fs root */
1122 	if (dotdot == tdv) {
1123 		return (0);
1124 	}
1125 
1126 	for (;;) {
1127 		/*
1128 		 * avoid error cases like
1129 		 *	mv a a/b
1130 		 *	mv a a/b/c
1131 		 *	etc.
1132 		 */
1133 		if (dotdot == sdv) {
1134 			error = EINVAL;
1135 			break;
1136 		}
1137 
1138 		dir = dotdot;
1139 		dotdot = dir->sdev_dotdot;
1140 
1141 		/* done checking because root is reached */
1142 		if (dir == dotdot) {
1143 			break;
1144 		}
1145 	}
1146 	return (error);
1147 }
1148 
1149 int
1150 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1151     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1152     struct cred *cred)
1153 {
1154 	int error = 0;
1155 	struct vnode *ovp = SDEVTOV(odv);
1156 	struct vnode *nvp;
1157 	struct vattr vattr;
1158 	int doingdir = (ovp->v_type == VDIR);
1159 	char *link = NULL;
1160 	int samedir = (oddv == nddv) ? 1 : 0;
1161 	int bkstore = 0;
1162 	struct sdev_node *idv = NULL;
1163 	struct sdev_node *ndv = NULL;
1164 	timestruc_t now;
1165 
1166 	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1167 	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1168 	if (error)
1169 		return (error);
1170 
1171 	if (!samedir)
1172 		rw_enter(&oddv->sdev_contents, RW_WRITER);
1173 	rw_enter(&nddv->sdev_contents, RW_WRITER);
1174 
1175 	/*
1176 	 * the source may have been deleted by another thread before
1177 	 * we gets here.
1178 	 */
1179 	if (odv->sdev_state != SDEV_READY) {
1180 		error = ENOENT;
1181 		goto err_out;
1182 	}
1183 
1184 	if (doingdir && (odv == nddv)) {
1185 		error = EINVAL;
1186 		goto err_out;
1187 	}
1188 
1189 	/*
1190 	 * If renaming a directory, and the parents are different (".." must be
1191 	 * changed) then the source dir must not be in the dir hierarchy above
1192 	 * the target since it would orphan everything below the source dir.
1193 	 */
1194 	if (doingdir && (oddv != nddv)) {
1195 		error = sdev_checkpath(odv, nddv, cred);
1196 		if (error)
1197 			goto err_out;
1198 	}
1199 
1200 	/* destination existing */
1201 	if (*ndvp) {
1202 		nvp = SDEVTOV(*ndvp);
1203 		ASSERT(nvp);
1204 
1205 		/* handling renaming to itself */
1206 		if (odv == *ndvp) {
1207 			error = 0;
1208 			goto err_out;
1209 		}
1210 
1211 		if (nvp->v_type == VDIR) {
1212 			if (!doingdir) {
1213 				error = EISDIR;
1214 				goto err_out;
1215 			}
1216 
1217 			if (vn_vfswlock(nvp)) {
1218 				error = EBUSY;
1219 				goto err_out;
1220 			}
1221 
1222 			if (vn_mountedvfs(nvp) != NULL) {
1223 				vn_vfsunlock(nvp);
1224 				error = EBUSY;
1225 				goto err_out;
1226 			}
1227 
1228 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1229 			if ((*ndvp)->sdev_nlink > 2) {
1230 				vn_vfsunlock(nvp);
1231 				error = EEXIST;
1232 				goto err_out;
1233 			}
1234 			vn_vfsunlock(nvp);
1235 
1236 			(void) sdev_dirdelete(nddv, *ndvp);
1237 			*ndvp = NULL;
1238 			ASSERT(nddv->sdev_attrvp);
1239 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1240 			    nddv->sdev_attrvp, cred, NULL, 0);
1241 			if (error)
1242 				goto err_out;
1243 		} else {
1244 			if (doingdir) {
1245 				error = ENOTDIR;
1246 				goto err_out;
1247 			}
1248 
1249 			if (SDEV_IS_PERSIST((*ndvp))) {
1250 				bkstore = 1;
1251 			}
1252 
1253 			/*
1254 			 * get rid of the node from the directory cache
1255 			 * note, in case EBUSY is returned, the ZOMBIE
1256 			 * node is taken care in sdev_mknode.
1257 			 */
1258 			(void) sdev_dirdelete(nddv, *ndvp);
1259 			*ndvp = NULL;
1260 			if (bkstore) {
1261 				ASSERT(nddv->sdev_attrvp);
1262 				error = VOP_REMOVE(nddv->sdev_attrvp,
1263 				    nnm, cred, NULL, 0);
1264 				if (error)
1265 					goto err_out;
1266 			}
1267 		}
1268 	}
1269 
1270 	/* fix the source for a symlink */
1271 	if (vattr.va_type == VLNK) {
1272 		if (odv->sdev_symlink == NULL) {
1273 			error = sdev_follow_link(odv);
1274 			if (error) {
1275 				error = ENOENT;
1276 				goto err_out;
1277 			}
1278 		}
1279 		ASSERT(odv->sdev_symlink);
1280 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1281 	}
1282 
1283 	/*
1284 	 * make a fresh node from the source attrs
1285 	 */
1286 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1287 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1288 	    NULL, (void *)link, cred, SDEV_READY);
1289 
1290 	if (link)
1291 		kmem_free(link, strlen(link) + 1);
1292 
1293 	if (error)
1294 		goto err_out;
1295 	ASSERT(*ndvp);
1296 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1297 
1298 	/* move dir contents */
1299 	if (doingdir) {
1300 		for (idv = SDEV_FIRST_ENTRY(odv); idv;
1301 		    idv = SDEV_NEXT_ENTRY(odv, idv)) {
1302 			error = sdev_rnmnode(odv, idv,
1303 			    (struct sdev_node *)(*ndvp), &ndv,
1304 			    idv->sdev_name, cred);
1305 			if (error)
1306 				goto err_out;
1307 			ndv = NULL;
1308 		}
1309 	}
1310 
1311 	if ((*ndvp)->sdev_attrvp) {
1312 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1313 		    AT_CTIME|AT_ATIME);
1314 	} else {
1315 		ASSERT((*ndvp)->sdev_attr);
1316 		gethrestime(&now);
1317 		(*ndvp)->sdev_attr->va_ctime = now;
1318 		(*ndvp)->sdev_attr->va_atime = now;
1319 	}
1320 
1321 	if (nddv->sdev_attrvp) {
1322 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1323 		    AT_MTIME|AT_ATIME);
1324 	} else {
1325 		ASSERT(nddv->sdev_attr);
1326 		gethrestime(&now);
1327 		nddv->sdev_attr->va_mtime = now;
1328 		nddv->sdev_attr->va_atime = now;
1329 	}
1330 	rw_exit(&nddv->sdev_contents);
1331 	if (!samedir)
1332 		rw_exit(&oddv->sdev_contents);
1333 
1334 	SDEV_RELE(*ndvp);
1335 	return (error);
1336 
1337 err_out:
1338 	rw_exit(&nddv->sdev_contents);
1339 	if (!samedir)
1340 		rw_exit(&oddv->sdev_contents);
1341 	return (error);
1342 }
1343 
1344 /*
1345  * Merge sdev_node specific information into an attribute structure.
1346  *
1347  * note: sdev_node is not locked here
1348  */
1349 void
1350 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1351 {
1352 	struct vnode *vp = SDEVTOV(dv);
1353 
1354 	vap->va_nlink = dv->sdev_nlink;
1355 	vap->va_nodeid = dv->sdev_ino;
1356 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1357 	vap->va_type = vp->v_type;
1358 
1359 	if (vp->v_type == VDIR) {
1360 		vap->va_rdev = 0;
1361 		vap->va_fsid = vp->v_rdev;
1362 	} else if (vp->v_type == VLNK) {
1363 		vap->va_rdev = 0;
1364 		vap->va_mode  &= ~S_IFMT;
1365 		vap->va_mode |= S_IFLNK;
1366 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1367 		vap->va_rdev = vp->v_rdev;
1368 		vap->va_mode &= ~S_IFMT;
1369 		if (vap->va_type == VCHR)
1370 			vap->va_mode |= S_IFCHR;
1371 		else
1372 			vap->va_mode |= S_IFBLK;
1373 	} else {
1374 		vap->va_rdev = 0;
1375 	}
1376 }
1377 
1378 static struct vattr *
1379 sdev_getdefault_attr(enum vtype type)
1380 {
1381 	if (type == VDIR)
1382 		return (&sdev_vattr_dir);
1383 	else if (type == VCHR)
1384 		return (&sdev_vattr_chr);
1385 	else if (type == VBLK)
1386 		return (&sdev_vattr_blk);
1387 	else if (type == VLNK)
1388 		return (&sdev_vattr_lnk);
1389 	else
1390 		return (NULL);
1391 }
1392 int
1393 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1394 {
1395 	int rv = 0;
1396 	struct vnode *vp = SDEVTOV(dv);
1397 
1398 	switch (vp->v_type) {
1399 	case VCHR:
1400 	case VBLK:
1401 		/*
1402 		 * If vnode is a device, return special vnode instead
1403 		 * (though it knows all about -us- via sp->s_realvp)
1404 		 */
1405 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1406 		VN_RELE(vp);
1407 		if (*vpp == NULLVP)
1408 			rv = ENOSYS;
1409 		break;
1410 	default:	/* most types are returned as is */
1411 		*vpp = vp;
1412 		break;
1413 	}
1414 	return (rv);
1415 }
1416 
1417 /*
1418  * loopback into sdev_lookup()
1419  */
1420 static struct vnode *
1421 devname_find_by_devpath(char *devpath, struct vattr *vattr)
1422 {
1423 	int error = 0;
1424 	struct vnode *vp;
1425 
1426 	error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULLVPP, &vp);
1427 	if (error) {
1428 		return (NULL);
1429 	}
1430 
1431 	if (vattr)
1432 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1433 	return (vp);
1434 }
1435 
1436 /*
1437  * the junction between devname and devfs
1438  */
1439 static struct vnode *
1440 devname_configure_by_path(char *physpath, struct vattr *vattr)
1441 {
1442 	int error = 0;
1443 	struct vnode *vp;
1444 
1445 	ASSERT(strncmp(physpath, "/devices/", sizeof ("/devices/") - 1)
1446 	    == 0);
1447 
1448 	error = devfs_lookupname(physpath + sizeof ("/devices/") - 1,
1449 	    NULLVPP, &vp);
1450 	if (error != 0) {
1451 		if (error == ENODEV) {
1452 			cmn_err(CE_CONT, "%s: not found (line %d)\n",
1453 			    physpath, __LINE__);
1454 		}
1455 
1456 		return (NULL);
1457 	}
1458 
1459 	if (vattr)
1460 		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1461 	return (vp);
1462 }
1463 
1464 /*
1465  * junction between devname and root file system, e.g. ufs
1466  */
1467 int
1468 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1469 {
1470 	struct vnode *rdvp = ddv->sdev_attrvp;
1471 	int rval = 0;
1472 
1473 	ASSERT(rdvp);
1474 
1475 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1476 	    NULL);
1477 	return (rval);
1478 }
1479 
1480 static int
1481 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1482 {
1483 	struct sdev_node *dv = NULL;
1484 	char	*nm;
1485 	struct vnode *dirvp;
1486 	int	error;
1487 	vnode_t	*vp;
1488 	int eof;
1489 	struct iovec iov;
1490 	struct uio uio;
1491 	struct dirent64 *dp;
1492 	dirent64_t *dbuf;
1493 	size_t dbuflen;
1494 	struct vattr vattr;
1495 	char *link = NULL;
1496 
1497 	if (ddv->sdev_attrvp == NULL)
1498 		return (0);
1499 	if (!(ddv->sdev_flags & SDEV_BUILD))
1500 		return (0);
1501 
1502 	dirvp = ddv->sdev_attrvp;
1503 	VN_HOLD(dirvp);
1504 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1505 
1506 	uio.uio_iov = &iov;
1507 	uio.uio_iovcnt = 1;
1508 	uio.uio_segflg = UIO_SYSSPACE;
1509 	uio.uio_fmode = 0;
1510 	uio.uio_extflg = UIO_COPY_CACHED;
1511 	uio.uio_loffset = 0;
1512 	uio.uio_llimit = MAXOFFSET_T;
1513 
1514 	eof = 0;
1515 	error = 0;
1516 	while (!error && !eof) {
1517 		uio.uio_resid = dlen;
1518 		iov.iov_base = (char *)dbuf;
1519 		iov.iov_len = dlen;
1520 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1521 		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1522 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1523 
1524 		dbuflen = dlen - uio.uio_resid;
1525 		if (error || dbuflen == 0)
1526 			break;
1527 
1528 		if (!(ddv->sdev_flags & SDEV_BUILD)) {
1529 			error = 0;
1530 			break;
1531 		}
1532 
1533 		for (dp = dbuf; ((intptr_t)dp <
1534 		    (intptr_t)dbuf + dbuflen);
1535 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1536 			nm = dp->d_name;
1537 
1538 			if (strcmp(nm, ".") == 0 ||
1539 			    strcmp(nm, "..") == 0)
1540 				continue;
1541 
1542 			vp = NULLVP;
1543 			dv = sdev_cache_lookup(ddv, nm);
1544 			if (dv) {
1545 				if (dv->sdev_state != SDEV_ZOMBIE) {
1546 					SDEV_SIMPLE_RELE(dv);
1547 				} else {
1548 					/*
1549 					 * A ZOMBIE node may not have been
1550 					 * cleaned up from the backing store,
1551 					 * bypass this entry in this case,
1552 					 * and clean it up from the directory
1553 					 * cache if this is the last call.
1554 					 */
1555 					(void) sdev_dirdelete(ddv, dv);
1556 				}
1557 				continue;
1558 			}
1559 
1560 			/* refill the cache if not already */
1561 			error = devname_backstore_lookup(ddv, nm, &vp);
1562 			if (error)
1563 				continue;
1564 
1565 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1566 			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1567 			if (error)
1568 				continue;
1569 
1570 			if (vattr.va_type == VLNK) {
1571 				error = sdev_getlink(vp, &link);
1572 				if (error) {
1573 					continue;
1574 				}
1575 				ASSERT(link != NULL);
1576 			}
1577 
1578 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1579 				rw_exit(&ddv->sdev_contents);
1580 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1581 			}
1582 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1583 			    cred, SDEV_READY);
1584 			rw_downgrade(&ddv->sdev_contents);
1585 
1586 			if (link != NULL) {
1587 				kmem_free(link, strlen(link) + 1);
1588 				link = NULL;
1589 			}
1590 
1591 			if (!error) {
1592 				ASSERT(dv);
1593 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1594 				SDEV_SIMPLE_RELE(dv);
1595 			}
1596 			vp = NULL;
1597 			dv = NULL;
1598 		}
1599 	}
1600 
1601 done:
1602 	VN_RELE(dirvp);
1603 	kmem_free(dbuf, dlen);
1604 
1605 	return (error);
1606 }
1607 
1608 void
1609 sdev_filldir_dynamic(struct sdev_node *ddv)
1610 {
1611 	int error;
1612 	int i;
1613 	struct vattr *vap;
1614 	char *nm = NULL;
1615 	struct sdev_node *dv = NULL;
1616 
1617 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1618 	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1619 
1620 	vap = sdev_getdefault_attr(VDIR);
1621 	for (i = 0; vtab[i].vt_name != NULL; i++) {
1622 		nm = vtab[i].vt_name;
1623 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1624 		dv = NULL;
1625 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1626 		    NULL, kcred, SDEV_READY);
1627 		if (error) {
1628 			cmn_err(CE_WARN, "%s/%s: error %d\n",
1629 			    ddv->sdev_name, nm, error);
1630 		} else {
1631 			ASSERT(dv);
1632 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1633 			SDEV_SIMPLE_RELE(dv);
1634 		}
1635 	}
1636 }
1637 
1638 /*
1639  * Creating a backing store entry based on sdev_attr.
1640  * This is called either as part of node creation in a persistent directory
1641  * or from setattr/setsecattr to persist access attributes across reboot.
1642  */
1643 int
1644 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1645 {
1646 	int error = 0;
1647 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1648 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1649 	struct vattr *vap = dv->sdev_attr;
1650 	char *nm = dv->sdev_name;
1651 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1652 
1653 	ASSERT(dv && dv->sdev_name && rdvp);
1654 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1655 
1656 lookup:
1657 	/* try to find it in the backing store */
1658 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1659 	    NULL);
1660 	if (error == 0) {
1661 		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1662 			VN_HOLD(rrvp);
1663 			VN_RELE(*rvp);
1664 			*rvp = rrvp;
1665 		}
1666 
1667 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1668 		dv->sdev_attr = NULL;
1669 		dv->sdev_attrvp = *rvp;
1670 		return (0);
1671 	}
1672 
1673 	/* let's try to persist the node */
1674 	gethrestime(&vap->va_atime);
1675 	vap->va_mtime = vap->va_atime;
1676 	vap->va_ctime = vap->va_atime;
1677 	vap->va_mask |= AT_TYPE|AT_MODE;
1678 	switch (vap->va_type) {
1679 	case VDIR:
1680 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1681 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1682 		    (void *)(*rvp), error));
1683 		break;
1684 	case VCHR:
1685 	case VBLK:
1686 	case VREG:
1687 	case VDOOR:
1688 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1689 		    rvp, cred, 0, NULL, NULL);
1690 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1691 		    (void *)(*rvp), error));
1692 		if (!error)
1693 			VN_RELE(*rvp);
1694 		break;
1695 	case VLNK:
1696 		ASSERT(dv->sdev_symlink);
1697 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1698 		    NULL, 0);
1699 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1700 		    error));
1701 		break;
1702 	default:
1703 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1704 		    "create\n", nm);
1705 		/*NOTREACHED*/
1706 	}
1707 
1708 	/* go back to lookup to factor out spec node and set attrvp */
1709 	if (error == 0)
1710 		goto lookup;
1711 
1712 	sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1713 	return (error);
1714 }
1715 
1716 static int
1717 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1718 {
1719 	int error = 0;
1720 	struct sdev_node *dup = NULL;
1721 
1722 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1723 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1724 		sdev_direnter(ddv, *dv);
1725 	} else {
1726 		if (dup->sdev_state == SDEV_ZOMBIE) {
1727 			error = sdev_dirdelete(ddv, dup);
1728 			/*
1729 			 * The ZOMBIE node is still hanging
1730 			 * around with more than one reference counts.
1731 			 * Fail the new node creation so that
1732 			 * the directory cache won't have
1733 			 * duplicate entries for the same named node
1734 			 */
1735 			if (error == EBUSY) {
1736 				SDEV_SIMPLE_RELE(*dv);
1737 				sdev_nodedestroy(*dv, 0);
1738 				*dv = NULL;
1739 				return (error);
1740 			}
1741 			sdev_direnter(ddv, *dv);
1742 		} else {
1743 			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1744 			SDEV_SIMPLE_RELE(*dv);
1745 			sdev_nodedestroy(*dv, 0);
1746 			*dv = dup;
1747 		}
1748 	}
1749 
1750 	return (0);
1751 }
1752 
1753 static int
1754 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1755 {
1756 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1757 	return (sdev_dirdelete(ddv, *dv));
1758 }
1759 
1760 /*
1761  * update the in-core directory cache
1762  */
1763 int
1764 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1765     sdev_cache_ops_t ops)
1766 {
1767 	int error = 0;
1768 
1769 	ASSERT((SDEV_HELD(*dv)));
1770 
1771 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1772 	switch (ops) {
1773 	case SDEV_CACHE_ADD:
1774 		error = sdev_cache_add(ddv, dv, nm);
1775 		break;
1776 	case SDEV_CACHE_DELETE:
1777 		error = sdev_cache_delete(ddv, dv);
1778 		break;
1779 	default:
1780 		break;
1781 	}
1782 
1783 	return (error);
1784 }
1785 
1786 /*
1787  * retrieve the named entry from the directory cache
1788  */
1789 struct sdev_node *
1790 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1791 {
1792 	struct sdev_node *dv = NULL;
1793 
1794 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1795 	dv = sdev_findbyname(ddv, nm);
1796 
1797 	return (dv);
1798 }
1799 
1800 /*
1801  * Implicit reconfig for nodes constructed by a link generator
1802  * Start devfsadm if needed, or if devfsadm is in progress,
1803  * prepare to block on devfsadm either completing or
1804  * constructing the desired node.  As devfsadmd is global
1805  * in scope, constructing all necessary nodes, we only
1806  * need to initiate it once.
1807  */
1808 static int
1809 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1810 {
1811 	int error = 0;
1812 
1813 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1814 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1815 		    ddv->sdev_name, nm, devfsadm_state));
1816 		mutex_enter(&dv->sdev_lookup_lock);
1817 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1818 		mutex_exit(&dv->sdev_lookup_lock);
1819 		error = 0;
1820 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1821 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1822 		    ddv->sdev_name, nm, devfsadm_state));
1823 
1824 		sdev_devfsadmd_thread(ddv, dv, kcred);
1825 		mutex_enter(&dv->sdev_lookup_lock);
1826 		SDEV_BLOCK_OTHERS(dv,
1827 		    (SDEV_LOOKUP | SDEV_LGWAITING));
1828 		mutex_exit(&dv->sdev_lookup_lock);
1829 		error = 0;
1830 	} else {
1831 		error = -1;
1832 	}
1833 
1834 	return (error);
1835 }
1836 
1837 static int
1838 sdev_call_modulelookup(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1839     int (*fn)(char *, devname_handle_t *, struct cred *), struct cred *cred)
1840 {
1841 	struct vnode *rvp = NULL;
1842 	int error = 0;
1843 	struct vattr *vap;
1844 	devname_spec_t spec;
1845 	devname_handle_t *hdl;
1846 	void *args = NULL;
1847 	struct sdev_node *dv = *dvp;
1848 
1849 	ASSERT(dv && ddv);
1850 	hdl = &(dv->sdev_handle);
1851 	ASSERT(hdl->dh_data == dv);
1852 	mutex_enter(&dv->sdev_lookup_lock);
1853 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1854 	mutex_exit(&dv->sdev_lookup_lock);
1855 	error = (*fn)(nm, hdl, cred);
1856 	if (error) {
1857 		return (error);
1858 	}
1859 
1860 	spec = hdl->dh_spec;
1861 	args = hdl->dh_args;
1862 	ASSERT(args);
1863 
1864 	switch (spec) {
1865 	case DEVNAME_NS_PATH:
1866 		/*
1867 		 * symlink of:
1868 		 *	/dev/dir/nm -> /device/...
1869 		 */
1870 		rvp = devname_configure_by_path((char *)args, NULL);
1871 		break;
1872 	case DEVNAME_NS_DEV:
1873 		/*
1874 		 * symlink of:
1875 		 *	/dev/dir/nm -> /dev/...
1876 		 */
1877 		rvp = devname_find_by_devpath((char *)args, NULL);
1878 		break;
1879 	default:
1880 		if (args)
1881 			kmem_free((char *)args, strlen(args) + 1);
1882 		return (ENOENT);
1883 
1884 	}
1885 
1886 	if (rvp == NULL) {
1887 		if (args)
1888 			kmem_free((char *)args, strlen(args) + 1);
1889 		return (ENOENT);
1890 	} else {
1891 		vap = sdev_getdefault_attr(VLNK);
1892 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1893 		/*
1894 		 * Could sdev_mknode return a different dv_node
1895 		 * once the lock is dropped?
1896 		 */
1897 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1898 			rw_exit(&ddv->sdev_contents);
1899 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1900 		}
1901 		error = sdev_mknode(ddv, nm, &dv, vap, NULL, args, cred,
1902 		    SDEV_READY);
1903 		rw_downgrade(&ddv->sdev_contents);
1904 		if (error) {
1905 			if (args)
1906 				kmem_free((char *)args, strlen(args) + 1);
1907 			return (error);
1908 		} else {
1909 			mutex_enter(&dv->sdev_lookup_lock);
1910 			SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1911 			mutex_exit(&dv->sdev_lookup_lock);
1912 			error = 0;
1913 		}
1914 	}
1915 
1916 	if (args)
1917 		kmem_free((char *)args, strlen(args) + 1);
1918 
1919 	*dvp = dv;
1920 	return (0);
1921 }
1922 
1923 /*
1924  *  Support for specialized device naming construction mechanisms
1925  */
1926 static int
1927 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1928     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1929     void *, char *), int flags, struct cred *cred)
1930 {
1931 	int rv = 0;
1932 	char *physpath = NULL;
1933 	struct vnode *rvp = NULL;
1934 	struct vattr vattr;
1935 	struct vattr *vap;
1936 	struct sdev_node *dv = *dvp;
1937 
1938 	mutex_enter(&dv->sdev_lookup_lock);
1939 	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1940 	mutex_exit(&dv->sdev_lookup_lock);
1941 
1942 	/* for non-devfsadm devices */
1943 	if (flags & SDEV_PATH) {
1944 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1945 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1946 		    NULL);
1947 		if (rv) {
1948 			kmem_free(physpath, MAXPATHLEN);
1949 			return (-1);
1950 		}
1951 
1952 		ASSERT(physpath);
1953 		rvp = devname_configure_by_path(physpath, NULL);
1954 		if (rvp == NULL) {
1955 			sdcmn_err3(("devname_configure_by_path: "
1956 			    "failed for /dev/%s/%s\n",
1957 			    ddv->sdev_name, nm));
1958 			kmem_free(physpath, MAXPATHLEN);
1959 			rv = -1;
1960 		} else {
1961 			vap = sdev_getdefault_attr(VLNK);
1962 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1963 
1964 			/*
1965 			 * Sdev_mknode may return back a different sdev_node
1966 			 * that was created by another thread that
1967 			 * raced to the directroy cache before this thread.
1968 			 *
1969 			 * With current directory cache mechanism
1970 			 * (linked list with the sdev_node name as
1971 			 * the entity key), this is a way to make sure
1972 			 * only one entry exists for the same name
1973 			 * in the same directory. The outcome is
1974 			 * the winner wins.
1975 			 */
1976 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1977 				rw_exit(&ddv->sdev_contents);
1978 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1979 			}
1980 			rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1981 			    (void *)physpath, cred, SDEV_READY);
1982 			rw_downgrade(&ddv->sdev_contents);
1983 			kmem_free(physpath, MAXPATHLEN);
1984 			if (rv) {
1985 				return (rv);
1986 			} else {
1987 				mutex_enter(&dv->sdev_lookup_lock);
1988 				SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1989 				mutex_exit(&dv->sdev_lookup_lock);
1990 				return (0);
1991 			}
1992 		}
1993 	} else if (flags & SDEV_VNODE) {
1994 		/*
1995 		 * DBNR has its own way to create the device
1996 		 * and return a backing store vnode in rvp
1997 		 */
1998 		ASSERT(callback);
1999 		rv = callback(ddv, nm, (void *)&rvp, kcred, NULL, NULL);
2000 		if (rv || (rvp == NULL)) {
2001 			sdcmn_err3(("devname_lookup_func: SDEV_VNODE "
2002 			    "callback failed \n"));
2003 			return (-1);
2004 		}
2005 		vap = sdev_getdefault_attr(rvp->v_type);
2006 		if (vap == NULL)
2007 			return (-1);
2008 
2009 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2010 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2011 			rw_exit(&ddv->sdev_contents);
2012 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2013 		}
2014 		rv = sdev_mknode(ddv, nm, &dv, vap, rvp, NULL,
2015 		    cred, SDEV_READY);
2016 		rw_downgrade(&ddv->sdev_contents);
2017 		if (rv)
2018 			return (rv);
2019 
2020 		mutex_enter(&dv->sdev_lookup_lock);
2021 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2022 		mutex_exit(&dv->sdev_lookup_lock);
2023 		return (0);
2024 	} else if (flags & SDEV_VATTR) {
2025 		/*
2026 		 * /dev/pts
2027 		 *
2028 		 * DBNR has its own way to create the device
2029 		 * "0" is returned upon success.
2030 		 *
2031 		 * callback is responsible to set the basic attributes,
2032 		 * e.g. va_type/va_uid/va_gid/
2033 		 *    dev_t if VCHR or VBLK/
2034 		 */
2035 		ASSERT(callback);
2036 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
2037 		if (rv) {
2038 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
2039 			    "callback failed \n"));
2040 			return (-1);
2041 		}
2042 
2043 		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2044 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2045 			rw_exit(&ddv->sdev_contents);
2046 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2047 		}
2048 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
2049 		    cred, SDEV_READY);
2050 		rw_downgrade(&ddv->sdev_contents);
2051 
2052 		if (rv)
2053 			return (rv);
2054 
2055 		mutex_enter(&dv->sdev_lookup_lock);
2056 		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2057 		mutex_exit(&dv->sdev_lookup_lock);
2058 		return (0);
2059 	} else {
2060 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
2061 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
2062 		    __LINE__));
2063 		rv = -1;
2064 	}
2065 
2066 	*dvp = dv;
2067 	return (rv);
2068 }
2069 
2070 static int
2071 is_devfsadm_thread(char *exec_name)
2072 {
2073 	/*
2074 	 * note: because devfsadmd -> /usr/sbin/devfsadm
2075 	 * it is safe to use "devfsadm" to capture the lookups
2076 	 * from devfsadm and its daemon version.
2077 	 */
2078 	if (strcmp(exec_name, "devfsadm") == 0)
2079 		return (1);
2080 	return (0);
2081 }
2082 
2083 
2084 /*
2085  * Lookup Order:
2086  *	sdev_node cache;
2087  *	backing store (SDEV_PERSIST);
2088  *	DBNR: a. dir_ops implemented in the loadable modules;
2089  *	      b. vnode ops in vtab.
2090  */
2091 int
2092 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
2093     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
2094     struct cred *, void *, char *), int flags)
2095 {
2096 	int rv = 0, nmlen;
2097 	struct vnode *rvp = NULL;
2098 	struct sdev_node *dv = NULL;
2099 	int	retried = 0;
2100 	int	error = 0;
2101 	struct devname_nsmap *map = NULL;
2102 	struct devname_ops *dirops = NULL;
2103 	int (*fn)(char *, devname_handle_t *, struct cred *) = NULL;
2104 	struct vattr vattr;
2105 	char *lookup_thread = curproc->p_user.u_comm;
2106 	int failed_flags = 0;
2107 	int (*vtor)(struct sdev_node *) = NULL;
2108 	int state;
2109 	int parent_state;
2110 	char *link = NULL;
2111 
2112 	if (SDEVTOV(ddv)->v_type != VDIR)
2113 		return (ENOTDIR);
2114 
2115 	/*
2116 	 * Empty name or ., return node itself.
2117 	 */
2118 	nmlen = strlen(nm);
2119 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
2120 		*vpp = SDEVTOV(ddv);
2121 		VN_HOLD(*vpp);
2122 		return (0);
2123 	}
2124 
2125 	/*
2126 	 * .., return the parent directory
2127 	 */
2128 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
2129 		*vpp = SDEVTOV(ddv->sdev_dotdot);
2130 		VN_HOLD(*vpp);
2131 		return (0);
2132 	}
2133 
2134 	rw_enter(&ddv->sdev_contents, RW_READER);
2135 	if (ddv->sdev_flags & SDEV_VTOR) {
2136 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2137 		ASSERT(vtor);
2138 	}
2139 
2140 tryagain:
2141 	/*
2142 	 * (a) directory cache lookup:
2143 	 */
2144 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2145 	parent_state = ddv->sdev_state;
2146 	dv = sdev_cache_lookup(ddv, nm);
2147 	if (dv) {
2148 		state = dv->sdev_state;
2149 		switch (state) {
2150 		case SDEV_INIT:
2151 			if (is_devfsadm_thread(lookup_thread))
2152 				break;
2153 
2154 			/* ZOMBIED parent won't allow node creation */
2155 			if (parent_state == SDEV_ZOMBIE) {
2156 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
2157 				    retried);
2158 				goto nolock_notfound;
2159 			}
2160 
2161 			mutex_enter(&dv->sdev_lookup_lock);
2162 			/* compensate the threads started after devfsadm */
2163 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2164 			    !(SDEV_IS_LOOKUP(dv)))
2165 				SDEV_BLOCK_OTHERS(dv,
2166 				    (SDEV_LOOKUP | SDEV_LGWAITING));
2167 
2168 			if (SDEV_IS_LOOKUP(dv)) {
2169 				failed_flags |= SLF_REBUILT;
2170 				rw_exit(&ddv->sdev_contents);
2171 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
2172 				mutex_exit(&dv->sdev_lookup_lock);
2173 				rw_enter(&ddv->sdev_contents, RW_READER);
2174 
2175 				if (error != 0) {
2176 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2177 					    retried);
2178 					goto nolock_notfound;
2179 				}
2180 
2181 				state = dv->sdev_state;
2182 				if (state == SDEV_INIT) {
2183 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2184 					    retried);
2185 					goto nolock_notfound;
2186 				} else if (state == SDEV_READY) {
2187 					goto found;
2188 				} else if (state == SDEV_ZOMBIE) {
2189 					rw_exit(&ddv->sdev_contents);
2190 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2191 					    retried);
2192 					SDEV_RELE(dv);
2193 					goto lookup_failed;
2194 				}
2195 			} else {
2196 				mutex_exit(&dv->sdev_lookup_lock);
2197 			}
2198 			break;
2199 		case SDEV_READY:
2200 			goto found;
2201 		case SDEV_ZOMBIE:
2202 			rw_exit(&ddv->sdev_contents);
2203 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2204 			SDEV_RELE(dv);
2205 			goto lookup_failed;
2206 		default:
2207 			rw_exit(&ddv->sdev_contents);
2208 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2209 			sdev_lookup_failed(ddv, nm, failed_flags);
2210 			*vpp = NULLVP;
2211 			return (ENOENT);
2212 		}
2213 	}
2214 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2215 
2216 	/*
2217 	 * ZOMBIED parent does not allow new node creation.
2218 	 * bail out early
2219 	 */
2220 	if (parent_state == SDEV_ZOMBIE) {
2221 		rw_exit(&ddv->sdev_contents);
2222 		*vpp = NULL;
2223 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2224 		return (ENOENT);
2225 	}
2226 
2227 	/*
2228 	 * (b0): backing store lookup
2229 	 *	SDEV_PERSIST is default except:
2230 	 *		1) pts nodes
2231 	 *		2) non-chmod'ed local nodes
2232 	 */
2233 	if (SDEV_IS_PERSIST(ddv)) {
2234 		error = devname_backstore_lookup(ddv, nm, &rvp);
2235 
2236 		if (!error) {
2237 			sdcmn_err3(("devname_backstore_lookup: "
2238 			    "found attrvp %p for %s\n", (void *)rvp, nm));
2239 
2240 			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
2241 			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2242 			if (error) {
2243 				rw_exit(&ddv->sdev_contents);
2244 				if (dv)
2245 					SDEV_RELE(dv);
2246 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2247 				sdev_lookup_failed(ddv, nm, failed_flags);
2248 				*vpp = NULLVP;
2249 				return (ENOENT);
2250 			}
2251 
2252 			if (vattr.va_type == VLNK) {
2253 				error = sdev_getlink(rvp, &link);
2254 				if (error) {
2255 					rw_exit(&ddv->sdev_contents);
2256 					if (dv)
2257 						SDEV_RELE(dv);
2258 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2259 					    retried);
2260 					sdev_lookup_failed(ddv, nm,
2261 					    failed_flags);
2262 					*vpp = NULLVP;
2263 					return (ENOENT);
2264 				}
2265 				ASSERT(link != NULL);
2266 			}
2267 
2268 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
2269 				rw_exit(&ddv->sdev_contents);
2270 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2271 			}
2272 			error = sdev_mknode(ddv, nm, &dv, &vattr,
2273 			    rvp, link, cred, SDEV_READY);
2274 			rw_downgrade(&ddv->sdev_contents);
2275 
2276 			if (link != NULL) {
2277 				kmem_free(link, strlen(link) + 1);
2278 				link = NULL;
2279 			}
2280 
2281 			if (error) {
2282 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2283 				rw_exit(&ddv->sdev_contents);
2284 				if (dv)
2285 					SDEV_RELE(dv);
2286 				goto lookup_failed;
2287 			} else {
2288 				goto found;
2289 			}
2290 		} else if (retried) {
2291 			rw_exit(&ddv->sdev_contents);
2292 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2293 			    ddv->sdev_name, nm));
2294 			if (dv)
2295 				SDEV_RELE(dv);
2296 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2297 			sdev_lookup_failed(ddv, nm, failed_flags);
2298 			*vpp = NULLVP;
2299 			return (ENOENT);
2300 		}
2301 	}
2302 
2303 
2304 	/* first thread that is doing the lookup on this node */
2305 	if (!dv) {
2306 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2307 			rw_exit(&ddv->sdev_contents);
2308 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2309 		}
2310 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2311 		    cred, SDEV_INIT);
2312 		if (!dv) {
2313 			rw_exit(&ddv->sdev_contents);
2314 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2315 			sdev_lookup_failed(ddv, nm, failed_flags);
2316 			*vpp = NULLVP;
2317 			return (ENOENT);
2318 		}
2319 		rw_downgrade(&ddv->sdev_contents);
2320 	}
2321 	ASSERT(dv);
2322 	ASSERT(SDEV_HELD(dv));
2323 
2324 	if (SDEV_IS_NO_NCACHE(dv)) {
2325 		failed_flags |= SLF_NO_NCACHE;
2326 	}
2327 
2328 	if (SDEV_IS_GLOBAL(ddv)) {
2329 		map = sdev_get_map(ddv, 1);
2330 		dirops = map ? map->dir_ops : NULL;
2331 		fn = dirops ? dirops->devnops_lookup : NULL;
2332 	}
2333 
2334 	/*
2335 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2336 	 */
2337 	if ((fn == NULL) && !callback) {
2338 
2339 		if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2340 		    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2341 		    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2342 			ASSERT(SDEV_HELD(dv));
2343 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2344 			goto nolock_notfound;
2345 		}
2346 
2347 		/*
2348 		 * filter out known non-existent devices recorded
2349 		 * during initial reconfiguration boot for which
2350 		 * reconfig should not be done and lookup may
2351 		 * be short-circuited now.
2352 		 */
2353 		if (sdev_lookup_filter(ddv, nm)) {
2354 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2355 			goto nolock_notfound;
2356 		}
2357 
2358 		/* bypassing devfsadm internal nodes */
2359 		if (is_devfsadm_thread(lookup_thread)) {
2360 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2361 			goto nolock_notfound;
2362 		}
2363 
2364 		if (sdev_reconfig_disable) {
2365 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2366 			goto nolock_notfound;
2367 		}
2368 
2369 		error = sdev_call_devfsadmd(ddv, dv, nm);
2370 		if (error == 0) {
2371 			sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2372 			    ddv->sdev_name, nm, curproc->p_user.u_comm));
2373 			if (sdev_reconfig_verbose) {
2374 				cmn_err(CE_CONT,
2375 				    "?lookup of %s/%s by %s: reconfig\n",
2376 				    ddv->sdev_name, nm, curproc->p_user.u_comm);
2377 			}
2378 			retried = 1;
2379 			failed_flags |= SLF_REBUILT;
2380 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2381 			SDEV_SIMPLE_RELE(dv);
2382 			goto tryagain;
2383 		} else {
2384 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2385 			goto nolock_notfound;
2386 		}
2387 	}
2388 
2389 	/*
2390 	 * (b2) Directory Based Name Resolution (DBNR):
2391 	 *	ddv	- parent
2392 	 *	nm	- /dev/(ddv->sdev_name)/nm
2393 	 *
2394 	 *	note: module vnode ops take precedence than the build-in ones
2395 	 */
2396 	if (fn) {
2397 		error = sdev_call_modulelookup(ddv, &dv, nm, fn, cred);
2398 		if (error) {
2399 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2400 			goto notfound;
2401 		} else {
2402 			goto found;
2403 		}
2404 	} else if (callback) {
2405 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2406 		    flags, cred);
2407 		if (error == 0) {
2408 			goto found;
2409 		} else {
2410 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2411 			goto notfound;
2412 		}
2413 	}
2414 	ASSERT(rvp);
2415 
2416 found:
2417 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2418 	ASSERT(dv->sdev_state == SDEV_READY);
2419 	if (vtor) {
2420 		/*
2421 		 * Check validity of returned node
2422 		 */
2423 		switch (vtor(dv)) {
2424 		case SDEV_VTOR_VALID:
2425 			break;
2426 		case SDEV_VTOR_INVALID:
2427 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2428 			sdcmn_err7(("lookup: destroy invalid "
2429 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2430 			goto nolock_notfound;
2431 		case SDEV_VTOR_SKIP:
2432 			sdcmn_err7(("lookup: node not applicable - "
2433 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2434 			rw_exit(&ddv->sdev_contents);
2435 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2436 			SDEV_RELE(dv);
2437 			goto lookup_failed;
2438 		default:
2439 			cmn_err(CE_PANIC,
2440 			    "dev fs: validator failed: %s(%p)\n",
2441 			    dv->sdev_name, (void *)dv);
2442 			break;
2443 			/*NOTREACHED*/
2444 		}
2445 	}
2446 
2447 	if ((SDEVTOV(dv)->v_type == VDIR) && SDEV_IS_GLOBAL(dv)) {
2448 		rw_enter(&dv->sdev_contents, RW_READER);
2449 		(void) sdev_get_map(dv, 1);
2450 		rw_exit(&dv->sdev_contents);
2451 	}
2452 	rw_exit(&ddv->sdev_contents);
2453 	rv = sdev_to_vp(dv, vpp);
2454 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2455 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2456 	    dv->sdev_state, nm, rv));
2457 	return (rv);
2458 
2459 notfound:
2460 	mutex_enter(&dv->sdev_lookup_lock);
2461 	SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2462 	mutex_exit(&dv->sdev_lookup_lock);
2463 nolock_notfound:
2464 	/*
2465 	 * Destroy the node that is created for synchronization purposes.
2466 	 */
2467 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2468 	    nm, dv->sdev_state));
2469 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2470 	if (dv->sdev_state == SDEV_INIT) {
2471 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2472 			rw_exit(&ddv->sdev_contents);
2473 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2474 		}
2475 
2476 		/*
2477 		 * Node state may have changed during the lock
2478 		 * changes. Re-check.
2479 		 */
2480 		if (dv->sdev_state == SDEV_INIT) {
2481 			(void) sdev_dirdelete(ddv, dv);
2482 			rw_exit(&ddv->sdev_contents);
2483 			sdev_lookup_failed(ddv, nm, failed_flags);
2484 			*vpp = NULL;
2485 			return (ENOENT);
2486 		}
2487 	}
2488 
2489 	rw_exit(&ddv->sdev_contents);
2490 	SDEV_RELE(dv);
2491 
2492 lookup_failed:
2493 	sdev_lookup_failed(ddv, nm, failed_flags);
2494 	*vpp = NULL;
2495 	return (ENOENT);
2496 }
2497 
2498 /*
2499  * Given a directory node, mark all nodes beneath as
2500  * STALE, i.e. nodes that don't exist as far as new
2501  * consumers are concerned.  Remove them from the
2502  * list of directory entries so that no lookup or
2503  * directory traversal will find them.  The node
2504  * not deallocated so existing holds are not affected.
2505  */
2506 void
2507 sdev_stale(struct sdev_node *ddv)
2508 {
2509 	struct sdev_node *dv;
2510 	struct vnode *vp;
2511 
2512 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2513 
2514 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2515 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) {
2516 		vp = SDEVTOV(dv);
2517 		if (vp->v_type == VDIR)
2518 			sdev_stale(dv);
2519 
2520 		sdcmn_err9(("sdev_stale: setting stale %s\n",
2521 		    dv->sdev_path));
2522 		dv->sdev_flags |= SDEV_STALE;
2523 		avl_remove(&ddv->sdev_entries, dv);
2524 	}
2525 	ddv->sdev_flags |= SDEV_BUILD;
2526 	rw_exit(&ddv->sdev_contents);
2527 }
2528 
2529 /*
2530  * Given a directory node, clean out all the nodes beneath.
2531  * If expr is specified, clean node with names matching expr.
2532  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2533  *	so they are excluded from future lookups.
2534  */
2535 int
2536 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2537 {
2538 	int error = 0;
2539 	int busy = 0;
2540 	struct vnode *vp;
2541 	struct sdev_node *dv, *next = NULL;
2542 	int bkstore = 0;
2543 	int len = 0;
2544 	char *bks_name = NULL;
2545 
2546 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2547 
2548 	/*
2549 	 * We try our best to destroy all unused sdev_node's
2550 	 */
2551 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2552 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
2553 		next = SDEV_NEXT_ENTRY(ddv, dv);
2554 		vp = SDEVTOV(dv);
2555 
2556 		if (expr && gmatch(dv->sdev_name, expr) == 0)
2557 			continue;
2558 
2559 		if (vp->v_type == VDIR &&
2560 		    sdev_cleandir(dv, NULL, flags) != 0) {
2561 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2562 			    dv->sdev_name));
2563 			busy++;
2564 			continue;
2565 		}
2566 
2567 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2568 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2569 			    dv->sdev_name));
2570 			busy++;
2571 			continue;
2572 		}
2573 
2574 		/*
2575 		 * at this point, either dv is not held or SDEV_ENFORCE
2576 		 * is specified. In either case, dv needs to be deleted
2577 		 */
2578 		SDEV_HOLD(dv);
2579 
2580 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2581 		if (bkstore && (vp->v_type == VDIR))
2582 			bkstore += 1;
2583 
2584 		if (bkstore) {
2585 			len = strlen(dv->sdev_name) + 1;
2586 			bks_name = kmem_alloc(len, KM_SLEEP);
2587 			bcopy(dv->sdev_name, bks_name, len);
2588 		}
2589 
2590 		error = sdev_dirdelete(ddv, dv);
2591 
2592 		if (error == EBUSY) {
2593 			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2594 			busy++;
2595 		}
2596 
2597 		/* take care the backing store clean up */
2598 		if (bkstore && (error == 0)) {
2599 			ASSERT(bks_name);
2600 			ASSERT(ddv->sdev_attrvp);
2601 
2602 			if (bkstore == 1) {
2603 				error = VOP_REMOVE(ddv->sdev_attrvp,
2604 				    bks_name, kcred, NULL, 0);
2605 			} else if (bkstore == 2) {
2606 				error = VOP_RMDIR(ddv->sdev_attrvp,
2607 				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2608 			}
2609 
2610 			/* do not propagate the backing store errors */
2611 			if (error) {
2612 				sdcmn_err9(("sdev_cleandir: backing store"
2613 				    "not cleaned\n"));
2614 				error = 0;
2615 			}
2616 
2617 			bkstore = 0;
2618 			kmem_free(bks_name, len);
2619 			bks_name = NULL;
2620 			len = 0;
2621 		}
2622 	}
2623 
2624 	ddv->sdev_flags |= SDEV_BUILD;
2625 	rw_exit(&ddv->sdev_contents);
2626 
2627 	if (busy) {
2628 		error = EBUSY;
2629 	}
2630 
2631 	return (error);
2632 }
2633 
2634 /*
2635  * a convenient wrapper for readdir() funcs
2636  */
2637 size_t
2638 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2639 {
2640 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2641 	if (reclen > size)
2642 		return (0);
2643 
2644 	de->d_ino = (ino64_t)ino;
2645 	de->d_off = (off64_t)off + 1;
2646 	de->d_reclen = (ushort_t)reclen;
2647 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2648 	return (reclen);
2649 }
2650 
2651 /*
2652  * sdev_mount service routines
2653  */
2654 int
2655 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2656 {
2657 	int	error;
2658 
2659 	if (uap->datalen != sizeof (*args))
2660 		return (EINVAL);
2661 
2662 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2663 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2664 		    "get user data. error %d\n", error);
2665 		return (EFAULT);
2666 	}
2667 
2668 	return (0);
2669 }
2670 
2671 #ifdef nextdp
2672 #undef nextdp
2673 #endif
2674 #define	nextdp(dp)	((struct dirent64 *) \
2675 			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2676 
2677 /*
2678  * readdir helper func
2679  */
2680 int
2681 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2682     int flags)
2683 {
2684 	struct sdev_node *ddv = VTOSDEV(vp);
2685 	struct sdev_node *dv;
2686 	dirent64_t	*dp;
2687 	ulong_t		outcount = 0;
2688 	size_t		namelen;
2689 	ulong_t		alloc_count;
2690 	void		*outbuf;
2691 	struct iovec	*iovp;
2692 	int		error = 0;
2693 	size_t		reclen;
2694 	offset_t	diroff;
2695 	offset_t	soff;
2696 	int		this_reclen;
2697 	struct devname_nsmap	*map = NULL;
2698 	struct devname_ops	*dirops = NULL;
2699 	int (*fn)(devname_handle_t *, struct cred *) = NULL;
2700 	int (*vtor)(struct sdev_node *) = NULL;
2701 	struct vattr attr;
2702 	timestruc_t now;
2703 
2704 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2705 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2706 
2707 	if (uiop->uio_loffset >= MAXOFF_T) {
2708 		if (eofp)
2709 			*eofp = 1;
2710 		return (0);
2711 	}
2712 
2713 	if (uiop->uio_iovcnt != 1)
2714 		return (EINVAL);
2715 
2716 	if (vp->v_type != VDIR)
2717 		return (ENOTDIR);
2718 
2719 	if (ddv->sdev_flags & SDEV_VTOR) {
2720 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2721 		ASSERT(vtor);
2722 	}
2723 
2724 	if (eofp != NULL)
2725 		*eofp = 0;
2726 
2727 	soff = uiop->uio_loffset;
2728 	iovp = uiop->uio_iov;
2729 	alloc_count = iovp->iov_len;
2730 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2731 	outcount = 0;
2732 
2733 	if (ddv->sdev_state == SDEV_ZOMBIE)
2734 		goto get_cache;
2735 
2736 	if (SDEV_IS_GLOBAL(ddv)) {
2737 		map = sdev_get_map(ddv, 0);
2738 		dirops = map ? map->dir_ops : NULL;
2739 		fn = dirops ? dirops->devnops_readdir : NULL;
2740 
2741 		if (map && map->dir_map) {
2742 			/*
2743 			 * load the name mapping rule database
2744 			 * through invoking devfsadm and symlink
2745 			 * all the entries in the map
2746 			 */
2747 			devname_rdr_result_t rdr_result;
2748 			int do_thread = 0;
2749 
2750 			rw_enter(&map->dir_lock, RW_READER);
2751 			do_thread = map->dir_maploaded ? 0 : 1;
2752 			rw_exit(&map->dir_lock);
2753 
2754 			if (do_thread) {
2755 				mutex_enter(&ddv->sdev_lookup_lock);
2756 				SDEV_BLOCK_OTHERS(ddv, SDEV_READDIR);
2757 				mutex_exit(&ddv->sdev_lookup_lock);
2758 
2759 				sdev_dispatch_to_nsrdr_thread(ddv,
2760 				    map->dir_map, &rdr_result);
2761 			}
2762 		} else if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2763 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2764 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2765 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2766 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2767 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2768 		    !sdev_reconfig_disable) {
2769 			/*
2770 			 * invoking "devfsadm" to do system device reconfig
2771 			 */
2772 			mutex_enter(&ddv->sdev_lookup_lock);
2773 			SDEV_BLOCK_OTHERS(ddv,
2774 			    (SDEV_READDIR|SDEV_LGWAITING));
2775 			mutex_exit(&ddv->sdev_lookup_lock);
2776 
2777 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2778 			    ddv->sdev_path, curproc->p_user.u_comm));
2779 			if (sdev_reconfig_verbose) {
2780 				cmn_err(CE_CONT,
2781 				    "?readdir of %s by %s: reconfig\n",
2782 				    ddv->sdev_path, curproc->p_user.u_comm);
2783 			}
2784 
2785 			sdev_devfsadmd_thread(ddv, NULL, kcred);
2786 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2787 			/*
2788 			 * compensate the "ls" started later than "devfsadm"
2789 			 */
2790 			mutex_enter(&ddv->sdev_lookup_lock);
2791 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2792 			mutex_exit(&ddv->sdev_lookup_lock);
2793 		}
2794 
2795 		/*
2796 		 * release the contents lock so that
2797 		 * the cache may be updated by devfsadmd
2798 		 */
2799 		rw_exit(&ddv->sdev_contents);
2800 		mutex_enter(&ddv->sdev_lookup_lock);
2801 		if (SDEV_IS_READDIR(ddv))
2802 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2803 		mutex_exit(&ddv->sdev_lookup_lock);
2804 		rw_enter(&ddv->sdev_contents, RW_READER);
2805 
2806 		sdcmn_err4(("readdir of directory %s by %s\n",
2807 		    ddv->sdev_name, curproc->p_user.u_comm));
2808 		if (ddv->sdev_flags & SDEV_BUILD) {
2809 			if (SDEV_IS_PERSIST(ddv)) {
2810 				error = sdev_filldir_from_store(ddv,
2811 				    alloc_count, cred);
2812 			}
2813 			ddv->sdev_flags &= ~SDEV_BUILD;
2814 		}
2815 	}
2816 
2817 get_cache:
2818 	/* handle "." and ".." */
2819 	diroff = 0;
2820 	if (soff == 0) {
2821 		/* first time */
2822 		this_reclen = DIRENT64_RECLEN(1);
2823 		if (alloc_count < this_reclen) {
2824 			error = EINVAL;
2825 			goto done;
2826 		}
2827 
2828 		dp->d_ino = (ino64_t)ddv->sdev_ino;
2829 		dp->d_off = (off64_t)1;
2830 		dp->d_reclen = (ushort_t)this_reclen;
2831 
2832 		(void) strncpy(dp->d_name, ".",
2833 		    DIRENT64_NAMELEN(this_reclen));
2834 		outcount += dp->d_reclen;
2835 		dp = nextdp(dp);
2836 	}
2837 
2838 	diroff++;
2839 	if (soff <= 1) {
2840 		this_reclen = DIRENT64_RECLEN(2);
2841 		if (alloc_count < outcount + this_reclen) {
2842 			error = EINVAL;
2843 			goto done;
2844 		}
2845 
2846 		dp->d_reclen = (ushort_t)this_reclen;
2847 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2848 		dp->d_off = (off64_t)2;
2849 
2850 		(void) strncpy(dp->d_name, "..",
2851 		    DIRENT64_NAMELEN(this_reclen));
2852 		outcount += dp->d_reclen;
2853 
2854 		dp = nextdp(dp);
2855 	}
2856 
2857 
2858 	/* gets the cache */
2859 	diroff++;
2860 	for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2861 	    dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2862 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2863 		    diroff, soff, dv->sdev_name));
2864 
2865 		/* bypassing pre-matured nodes */
2866 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2867 			sdcmn_err3(("sdev_readdir: pre-mature node  "
2868 			    "%s\n", dv->sdev_name));
2869 			continue;
2870 		}
2871 
2872 		/*
2873 		 * Check validity of node
2874 		 */
2875 		if (vtor) {
2876 			switch (vtor(dv)) {
2877 			case SDEV_VTOR_VALID:
2878 				break;
2879 			case SDEV_VTOR_INVALID:
2880 			case SDEV_VTOR_SKIP:
2881 				continue;
2882 			default:
2883 				cmn_err(CE_PANIC,
2884 				    "dev fs: validator failed: %s(%p)\n",
2885 				    dv->sdev_name, (void *)dv);
2886 				break;
2887 			/*NOTREACHED*/
2888 			}
2889 		}
2890 
2891 		/*
2892 		 * call back into the module for the validity/bookkeeping
2893 		 * of this entry
2894 		 */
2895 		if (fn) {
2896 			error = (*fn)(&(dv->sdev_handle), cred);
2897 			if (error) {
2898 				sdcmn_err4(("sdev_readdir: module did not "
2899 				    "validate %s\n", dv->sdev_name));
2900 				continue;
2901 			}
2902 		}
2903 
2904 		namelen = strlen(dv->sdev_name);
2905 		reclen = DIRENT64_RECLEN(namelen);
2906 		if (outcount + reclen > alloc_count) {
2907 			goto full;
2908 		}
2909 		dp->d_reclen = (ushort_t)reclen;
2910 		dp->d_ino = (ino64_t)dv->sdev_ino;
2911 		dp->d_off = (off64_t)diroff + 1;
2912 		(void) strncpy(dp->d_name, dv->sdev_name,
2913 		    DIRENT64_NAMELEN(reclen));
2914 		outcount += reclen;
2915 		dp = nextdp(dp);
2916 	}
2917 
2918 full:
2919 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2920 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2921 	    (void *)dv));
2922 
2923 	if (outcount)
2924 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2925 
2926 	if (!error) {
2927 		uiop->uio_loffset = diroff;
2928 		if (eofp)
2929 			*eofp = dv ? 0 : 1;
2930 	}
2931 
2932 
2933 	if (ddv->sdev_attrvp) {
2934 		gethrestime(&now);
2935 		attr.va_ctime = now;
2936 		attr.va_atime = now;
2937 		attr.va_mask = AT_CTIME|AT_ATIME;
2938 
2939 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2940 	}
2941 done:
2942 	kmem_free(outbuf, alloc_count);
2943 	return (error);
2944 }
2945 
2946 
2947 static int
2948 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2949 {
2950 	vnode_t *vp;
2951 	vnode_t *cvp;
2952 	struct sdev_node *svp;
2953 	char *nm;
2954 	struct pathname pn;
2955 	int error;
2956 	int persisted = 0;
2957 
2958 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2959 		return (error);
2960 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2961 
2962 	vp = rootdir;
2963 	VN_HOLD(vp);
2964 
2965 	while (pn_pathleft(&pn)) {
2966 		ASSERT(vp->v_type == VDIR);
2967 		(void) pn_getcomponent(&pn, nm);
2968 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2969 		    NULL, NULL);
2970 		VN_RELE(vp);
2971 
2972 		if (error)
2973 			break;
2974 
2975 		/* traverse mount points encountered on our journey */
2976 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2977 			VN_RELE(cvp);
2978 			break;
2979 		}
2980 
2981 		/*
2982 		 * Direct the operation to the persisting filesystem
2983 		 * underlying /dev.  Bail if we encounter a
2984 		 * non-persistent dev entity here.
2985 		 */
2986 		if (cvp->v_vfsp->vfs_fstype == devtype) {
2987 
2988 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2989 				error = ENOENT;
2990 				VN_RELE(cvp);
2991 				break;
2992 			}
2993 
2994 			if (VTOSDEV(cvp) == NULL) {
2995 				error = ENOENT;
2996 				VN_RELE(cvp);
2997 				break;
2998 			}
2999 			svp = VTOSDEV(cvp);
3000 			if ((vp = svp->sdev_attrvp) == NULL) {
3001 				error = ENOENT;
3002 				VN_RELE(cvp);
3003 				break;
3004 			}
3005 			persisted = 1;
3006 			VN_HOLD(vp);
3007 			VN_RELE(cvp);
3008 			cvp = vp;
3009 		}
3010 
3011 		vp = cvp;
3012 		pn_skipslash(&pn);
3013 	}
3014 
3015 	kmem_free(nm, MAXNAMELEN);
3016 	pn_free(&pn);
3017 
3018 	if (error)
3019 		return (error);
3020 
3021 	/*
3022 	 * Only return persisted nodes in the filesystem underlying /dev.
3023 	 */
3024 	if (!persisted) {
3025 		VN_RELE(vp);
3026 		return (ENOENT);
3027 	}
3028 
3029 	*r_vp = vp;
3030 	return (0);
3031 }
3032 
3033 int
3034 sdev_modctl_readdir(const char *dir, char ***dirlistp,
3035 	int *npathsp, int *npathsp_alloc, int checking_empty)
3036 {
3037 	char	**pathlist = NULL;
3038 	char	**newlist = NULL;
3039 	int	npaths = 0;
3040 	int	npaths_alloc = 0;
3041 	dirent64_t *dbuf = NULL;
3042 	int	n;
3043 	char	*s;
3044 	int error;
3045 	vnode_t *vp;
3046 	int eof;
3047 	struct iovec iov;
3048 	struct uio uio;
3049 	struct dirent64 *dp;
3050 	size_t dlen;
3051 	size_t dbuflen;
3052 	int ndirents = 64;
3053 	char *nm;
3054 
3055 	error = sdev_modctl_lookup(dir, &vp);
3056 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
3057 	    dir, curproc->p_user.u_comm,
3058 	    (error == 0) ? "ok" : "failed"));
3059 	if (error)
3060 		return (error);
3061 
3062 	dlen = ndirents * (sizeof (*dbuf));
3063 	dbuf = kmem_alloc(dlen, KM_SLEEP);
3064 
3065 	uio.uio_iov = &iov;
3066 	uio.uio_iovcnt = 1;
3067 	uio.uio_segflg = UIO_SYSSPACE;
3068 	uio.uio_fmode = 0;
3069 	uio.uio_extflg = UIO_COPY_CACHED;
3070 	uio.uio_loffset = 0;
3071 	uio.uio_llimit = MAXOFFSET_T;
3072 
3073 	eof = 0;
3074 	error = 0;
3075 	while (!error && !eof) {
3076 		uio.uio_resid = dlen;
3077 		iov.iov_base = (char *)dbuf;
3078 		iov.iov_len = dlen;
3079 
3080 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3081 		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
3082 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3083 
3084 		dbuflen = dlen - uio.uio_resid;
3085 
3086 		if (error || dbuflen == 0)
3087 			break;
3088 
3089 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
3090 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
3091 
3092 			nm = dp->d_name;
3093 
3094 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
3095 				continue;
3096 			if (npaths == npaths_alloc) {
3097 				npaths_alloc += 64;
3098 				newlist = (char **)
3099 				    kmem_zalloc((npaths_alloc + 1) *
3100 				    sizeof (char *), KM_SLEEP);
3101 				if (pathlist) {
3102 					bcopy(pathlist, newlist,
3103 					    npaths * sizeof (char *));
3104 					kmem_free(pathlist,
3105 					    (npaths + 1) * sizeof (char *));
3106 				}
3107 				pathlist = newlist;
3108 			}
3109 			n = strlen(nm) + 1;
3110 			s = kmem_alloc(n, KM_SLEEP);
3111 			bcopy(nm, s, n);
3112 			pathlist[npaths++] = s;
3113 			sdcmn_err11(("  %s/%s\n", dir, s));
3114 
3115 			/* if checking empty, one entry is as good as many */
3116 			if (checking_empty) {
3117 				eof = 1;
3118 				break;
3119 			}
3120 		}
3121 	}
3122 
3123 exit:
3124 	VN_RELE(vp);
3125 
3126 	if (dbuf)
3127 		kmem_free(dbuf, dlen);
3128 
3129 	if (error)
3130 		return (error);
3131 
3132 	*dirlistp = pathlist;
3133 	*npathsp = npaths;
3134 	*npathsp_alloc = npaths_alloc;
3135 
3136 	return (0);
3137 }
3138 
3139 void
3140 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
3141 {
3142 	int	i, n;
3143 
3144 	for (i = 0; i < npaths; i++) {
3145 		n = strlen(pathlist[i]) + 1;
3146 		kmem_free(pathlist[i], n);
3147 	}
3148 
3149 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
3150 }
3151 
3152 int
3153 sdev_modctl_devexists(const char *path)
3154 {
3155 	vnode_t *vp;
3156 	int error;
3157 
3158 	error = sdev_modctl_lookup(path, &vp);
3159 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
3160 	    path, curproc->p_user.u_comm,
3161 	    (error == 0) ? "ok" : "failed"));
3162 	if (error == 0)
3163 		VN_RELE(vp);
3164 
3165 	return (error);
3166 }
3167 
3168 void
3169 sdev_update_newnsmap(struct devname_nsmap *map, char *module, char *mapname)
3170 {
3171 	rw_enter(&map->dir_lock, RW_WRITER);
3172 	if (module) {
3173 		ASSERT(map->dir_newmodule == NULL);
3174 		map->dir_newmodule = i_ddi_strdup(module, KM_SLEEP);
3175 	}
3176 	if (mapname) {
3177 		ASSERT(map->dir_newmap == NULL);
3178 		map->dir_newmap = i_ddi_strdup(mapname, KM_SLEEP);
3179 	}
3180 
3181 	map->dir_invalid = 1;
3182 	rw_exit(&map->dir_lock);
3183 }
3184 
3185 void
3186 sdev_replace_nsmap(struct devname_nsmap *map, char *module, char *mapname)
3187 {
3188 	char *old_module = NULL;
3189 	char *old_map = NULL;
3190 
3191 	ASSERT(RW_LOCK_HELD(&map->dir_lock));
3192 	if (!rw_tryupgrade(&map->dir_lock)) {
3193 		rw_exit(&map->dir_lock);
3194 		rw_enter(&map->dir_lock, RW_WRITER);
3195 	}
3196 
3197 	old_module = map->dir_module;
3198 	if (module) {
3199 		if (old_module && strcmp(old_module, module) != 0) {
3200 			kmem_free(old_module, strlen(old_module) + 1);
3201 		}
3202 		map->dir_module = module;
3203 		map->dir_newmodule = NULL;
3204 	}
3205 
3206 	old_map = map->dir_map;
3207 	if (mapname) {
3208 		if (old_map && strcmp(old_map, mapname) != 0) {
3209 			kmem_free(old_map, strlen(old_map) + 1);
3210 		}
3211 
3212 		map->dir_map = mapname;
3213 		map->dir_newmap = NULL;
3214 	}
3215 	map->dir_maploaded = 0;
3216 	map->dir_invalid = 0;
3217 	rw_downgrade(&map->dir_lock);
3218 }
3219 
3220 /*
3221  * dir_name should have at least one attribute,
3222  *	dir_module
3223  *	or dir_map
3224  *	or both
3225  * caller holds the devname_nsmaps_lock
3226  */
3227 void
3228 sdev_insert_nsmap(char *dir_name, char *dir_module, char *dir_map)
3229 {
3230 	struct devname_nsmap *map;
3231 	int len = 0;
3232 
3233 	ASSERT(dir_name);
3234 	ASSERT(dir_module || dir_map);
3235 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3236 
3237 	if (map = sdev_get_nsmap_by_dir(dir_name, 1)) {
3238 		sdev_update_newnsmap(map, dir_module, dir_map);
3239 		return;
3240 	}
3241 
3242 	map = (struct devname_nsmap *)kmem_zalloc(sizeof (*map), KM_SLEEP);
3243 	map->dir_name = i_ddi_strdup(dir_name, KM_SLEEP);
3244 	if (dir_module) {
3245 		map->dir_module = i_ddi_strdup(dir_module, KM_SLEEP);
3246 	}
3247 
3248 	if (dir_map) {
3249 		if (dir_map[0] != '/') {
3250 			len = strlen(ETC_DEV_DIR) + strlen(dir_map) + 2;
3251 			map->dir_map = kmem_zalloc(len, KM_SLEEP);
3252 			(void) snprintf(map->dir_map, len, "%s/%s", ETC_DEV_DIR,
3253 			    dir_map);
3254 		} else {
3255 			map->dir_map = i_ddi_strdup(dir_map, KM_SLEEP);
3256 		}
3257 	}
3258 
3259 	map->dir_ops = NULL;
3260 	map->dir_maploaded = 0;
3261 	map->dir_invalid = 0;
3262 	rw_init(&map->dir_lock, NULL, RW_DEFAULT, NULL);
3263 
3264 	map->next = devname_nsmaps;
3265 	map->prev = NULL;
3266 	if (devname_nsmaps) {
3267 		devname_nsmaps->prev = map;
3268 	}
3269 	devname_nsmaps = map;
3270 }
3271 
3272 struct devname_nsmap *
3273 sdev_get_nsmap_by_dir(char *dir_path, int locked)
3274 {
3275 	struct devname_nsmap *map = NULL;
3276 
3277 	if (!locked)
3278 		mutex_enter(&devname_nsmaps_lock);
3279 	for (map = devname_nsmaps; map; map = map->next) {
3280 		sdcmn_err6(("sdev_get_nsmap_by_dir: dir %s\n", map->dir_name));
3281 		if (strcmp(map->dir_name, dir_path) == 0) {
3282 			if (!locked)
3283 				mutex_exit(&devname_nsmaps_lock);
3284 			return (map);
3285 		}
3286 	}
3287 	if (!locked)
3288 		mutex_exit(&devname_nsmaps_lock);
3289 	return (NULL);
3290 }
3291 
3292 struct devname_nsmap *
3293 sdev_get_nsmap_by_module(char *mod_name)
3294 {
3295 	struct devname_nsmap *map = NULL;
3296 
3297 	mutex_enter(&devname_nsmaps_lock);
3298 	for (map = devname_nsmaps; map; map = map->next) {
3299 		sdcmn_err7(("sdev_get_nsmap_by_module: module %s\n",
3300 		    map->dir_module));
3301 		if (map->dir_module && strcmp(map->dir_module, mod_name) == 0) {
3302 			mutex_exit(&devname_nsmaps_lock);
3303 			return (map);
3304 		}
3305 	}
3306 	mutex_exit(&devname_nsmaps_lock);
3307 	return (NULL);
3308 }
3309 
3310 void
3311 sdev_invalidate_nsmaps()
3312 {
3313 	struct devname_nsmap *map = NULL;
3314 
3315 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3316 
3317 	if (devname_nsmaps == NULL)
3318 		return;
3319 
3320 	for (map = devname_nsmaps; map; map = map->next) {
3321 		rw_enter(&map->dir_lock, RW_WRITER);
3322 		map->dir_invalid = 1;
3323 		rw_exit(&map->dir_lock);
3324 	}
3325 	devname_nsmaps_invalidated = 1;
3326 }
3327 
3328 
3329 int
3330 sdev_nsmaps_loaded()
3331 {
3332 	int ret = 0;
3333 
3334 	mutex_enter(&devname_nsmaps_lock);
3335 	if (devname_nsmaps_loaded)
3336 		ret = 1;
3337 
3338 	mutex_exit(&devname_nsmaps_lock);
3339 	return (ret);
3340 }
3341 
3342 int
3343 sdev_nsmaps_reloaded()
3344 {
3345 	int ret = 0;
3346 
3347 	mutex_enter(&devname_nsmaps_lock);
3348 	if (devname_nsmaps_invalidated)
3349 		ret = 1;
3350 
3351 	mutex_exit(&devname_nsmaps_lock);
3352 	return (ret);
3353 }
3354 
3355 static void
3356 sdev_free_nsmap(struct devname_nsmap *map)
3357 {
3358 	ASSERT(map);
3359 	if (map->dir_name)
3360 		kmem_free(map->dir_name, strlen(map->dir_name) + 1);
3361 	if (map->dir_module)
3362 		kmem_free(map->dir_module, strlen(map->dir_module) + 1);
3363 	if (map->dir_map)
3364 		kmem_free(map->dir_map, strlen(map->dir_map) + 1);
3365 	rw_destroy(&map->dir_lock);
3366 	kmem_free(map, sizeof (*map));
3367 }
3368 
3369 void
3370 sdev_validate_nsmaps()
3371 {
3372 	struct devname_nsmap *map = NULL;
3373 	struct devname_nsmap *oldmap = NULL;
3374 
3375 	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3376 	map = devname_nsmaps;
3377 	while (map) {
3378 		rw_enter(&map->dir_lock, RW_READER);
3379 		if ((map->dir_invalid == 1) && (map->dir_newmodule == NULL) &&
3380 		    (map->dir_newmap == NULL)) {
3381 			oldmap = map;
3382 			rw_exit(&map->dir_lock);
3383 			if (map->prev)
3384 				map->prev->next = oldmap->next;
3385 			if (map == devname_nsmaps)
3386 				devname_nsmaps = oldmap->next;
3387 
3388 			map = oldmap->next;
3389 			if (map)
3390 				map->prev = oldmap->prev;
3391 			sdev_free_nsmap(oldmap);
3392 			oldmap = NULL;
3393 		} else {
3394 			rw_exit(&map->dir_lock);
3395 			map = map->next;
3396 		}
3397 	}
3398 	devname_nsmaps_invalidated = 0;
3399 }
3400 
3401 static int
3402 sdev_map_is_invalid(struct devname_nsmap *map)
3403 {
3404 	int ret = 0;
3405 
3406 	ASSERT(map);
3407 	rw_enter(&map->dir_lock, RW_READER);
3408 	if (map->dir_invalid)
3409 		ret = 1;
3410 	rw_exit(&map->dir_lock);
3411 	return (ret);
3412 }
3413 
3414 static int
3415 sdev_check_map(struct devname_nsmap *map)
3416 {
3417 	struct devname_nsmap *mapp;
3418 
3419 	mutex_enter(&devname_nsmaps_lock);
3420 	if (devname_nsmaps == NULL) {
3421 		mutex_exit(&devname_nsmaps_lock);
3422 		return (1);
3423 	}
3424 
3425 	for (mapp = devname_nsmaps; mapp; mapp = mapp->next) {
3426 		if (mapp == map) {
3427 			mutex_exit(&devname_nsmaps_lock);
3428 			return (0);
3429 		}
3430 	}
3431 
3432 	mutex_exit(&devname_nsmaps_lock);
3433 	return (1);
3434 
3435 }
3436 
3437 struct devname_nsmap *
3438 sdev_get_map(struct sdev_node *dv, int validate)
3439 {
3440 	struct devname_nsmap *map;
3441 	int error;
3442 
3443 	ASSERT(RW_READ_HELD(&dv->sdev_contents));
3444 	map = dv->sdev_mapinfo;
3445 	if (map && sdev_check_map(map)) {
3446 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3447 			rw_exit(&dv->sdev_contents);
3448 			rw_enter(&dv->sdev_contents, RW_WRITER);
3449 		}
3450 		dv->sdev_mapinfo = NULL;
3451 		rw_downgrade(&dv->sdev_contents);
3452 		return (NULL);
3453 	}
3454 
3455 	if (validate && (!map || (map && sdev_map_is_invalid(map)))) {
3456 		if (!rw_tryupgrade(&dv->sdev_contents)) {
3457 			rw_exit(&dv->sdev_contents);
3458 			rw_enter(&dv->sdev_contents, RW_WRITER);
3459 		}
3460 		error = sdev_get_moduleops(dv);
3461 		if (!error)
3462 			map = dv->sdev_mapinfo;
3463 		rw_downgrade(&dv->sdev_contents);
3464 	}
3465 	return (map);
3466 }
3467 
3468 extern int sdev_vnodeops_tbl_size;
3469 
3470 /*
3471  * construct a new template with overrides from vtab
3472  */
3473 static fs_operation_def_t *
3474 sdev_merge_vtab(const fs_operation_def_t tab[])
3475 {
3476 	fs_operation_def_t *new;
3477 	const fs_operation_def_t *tab_entry;
3478 
3479 	/* make a copy of standard vnode ops table */
3480 	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
3481 	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
3482 
3483 	/* replace the overrides from tab */
3484 	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
3485 		fs_operation_def_t *std_entry = new;
3486 		while (std_entry->name) {
3487 			if (strcmp(tab_entry->name, std_entry->name) == 0) {
3488 				std_entry->func = tab_entry->func;
3489 				break;
3490 			}
3491 			std_entry++;
3492 		}
3493 		if (std_entry->name == NULL)
3494 			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
3495 			    tab_entry->name);
3496 	}
3497 
3498 	return (new);
3499 }
3500 
3501 /* free memory allocated by sdev_merge_vtab */
3502 static void
3503 sdev_free_vtab(fs_operation_def_t *new)
3504 {
3505 	kmem_free(new, sdev_vnodeops_tbl_size);
3506 }
3507 
3508 void
3509 devname_get_vnode(devname_handle_t *hdl, vnode_t **vpp)
3510 {
3511 	struct sdev_node *dv = hdl->dh_data;
3512 
3513 	ASSERT(dv);
3514 
3515 	rw_enter(&dv->sdev_contents, RW_READER);
3516 	*vpp = SDEVTOV(dv);
3517 	rw_exit(&dv->sdev_contents);
3518 }
3519 
3520 int
3521 devname_get_path(devname_handle_t *hdl, char **path)
3522 {
3523 	struct sdev_node *dv = hdl->dh_data;
3524 
3525 	ASSERT(dv);
3526 
3527 	rw_enter(&dv->sdev_contents, RW_READER);
3528 	*path = dv->sdev_path;
3529 	rw_exit(&dv->sdev_contents);
3530 	return (0);
3531 }
3532 
3533 int
3534 devname_get_name(devname_handle_t *hdl, char **entry)
3535 {
3536 	struct sdev_node *dv = hdl->dh_data;
3537 
3538 	ASSERT(dv);
3539 	rw_enter(&dv->sdev_contents, RW_READER);
3540 	*entry = dv->sdev_name;
3541 	rw_exit(&dv->sdev_contents);
3542 	return (0);
3543 }
3544 
3545 void
3546 devname_get_dir_vnode(devname_handle_t *hdl, vnode_t **vpp)
3547 {
3548 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3549 
3550 	ASSERT(dv);
3551 
3552 	rw_enter(&dv->sdev_contents, RW_READER);
3553 	*vpp = SDEVTOV(dv);
3554 	rw_exit(&dv->sdev_contents);
3555 }
3556 
3557 int
3558 devname_get_dir_path(devname_handle_t *hdl, char **path)
3559 {
3560 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3561 
3562 	ASSERT(dv);
3563 	rw_enter(&dv->sdev_contents, RW_READER);
3564 	*path = dv->sdev_path;
3565 	rw_exit(&dv->sdev_contents);
3566 	return (0);
3567 }
3568 
3569 int
3570 devname_get_dir_name(devname_handle_t *hdl, char **entry)
3571 {
3572 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3573 
3574 	ASSERT(dv);
3575 	rw_enter(&dv->sdev_contents, RW_READER);
3576 	*entry = dv->sdev_name;
3577 	rw_exit(&dv->sdev_contents);
3578 	return (0);
3579 }
3580 
3581 int
3582 devname_get_dir_nsmap(devname_handle_t *hdl, struct devname_nsmap **map)
3583 {
3584 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3585 
3586 	ASSERT(dv);
3587 	rw_enter(&dv->sdev_contents, RW_READER);
3588 	*map = dv->sdev_mapinfo;
3589 	rw_exit(&dv->sdev_contents);
3590 	return (0);
3591 }
3592 
3593 int
3594 devname_get_dir_handle(devname_handle_t *hdl, devname_handle_t **dir_hdl)
3595 {
3596 	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3597 
3598 	ASSERT(dv);
3599 	rw_enter(&dv->sdev_contents, RW_READER);
3600 	*dir_hdl = &(dv->sdev_handle);
3601 	rw_exit(&dv->sdev_contents);
3602 	return (0);
3603 }
3604 
3605 void
3606 devname_set_nodetype(devname_handle_t *hdl, void *args, int spec)
3607 {
3608 	struct sdev_node *dv = hdl->dh_data;
3609 
3610 	ASSERT(dv);
3611 	rw_enter(&dv->sdev_contents, RW_WRITER);
3612 	hdl->dh_spec = (devname_spec_t)spec;
3613 	hdl->dh_args = (void *)i_ddi_strdup((char *)args, KM_SLEEP);
3614 	rw_exit(&dv->sdev_contents);
3615 }
3616 
3617 /*
3618  * a generic setattr() function
3619  *
3620  * note: flags only supports AT_UID and AT_GID.
3621  *	 Future enhancements can be done for other types, e.g. AT_MODE
3622  */
3623 int
3624 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3625     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3626     int), int protocol)
3627 {
3628 	struct sdev_node	*dv = VTOSDEV(vp);
3629 	struct sdev_node	*parent = dv->sdev_dotdot;
3630 	struct vattr		*get;
3631 	uint_t			mask = vap->va_mask;
3632 	int 			error;
3633 
3634 	/* some sanity checks */
3635 	if (vap->va_mask & AT_NOSET)
3636 		return (EINVAL);
3637 
3638 	if (vap->va_mask & AT_SIZE) {
3639 		if (vp->v_type == VDIR) {
3640 			return (EISDIR);
3641 		}
3642 	}
3643 
3644 	/* no need to set attribute, but do not fail either */
3645 	ASSERT(parent);
3646 	rw_enter(&parent->sdev_contents, RW_READER);
3647 	if (dv->sdev_state == SDEV_ZOMBIE) {
3648 		rw_exit(&parent->sdev_contents);
3649 		return (0);
3650 	}
3651 
3652 	/* If backing store exists, just set it. */
3653 	if (dv->sdev_attrvp) {
3654 		rw_exit(&parent->sdev_contents);
3655 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3656 	}
3657 
3658 	/*
3659 	 * Otherwise, for nodes with the persistence attribute, create it.
3660 	 */
3661 	ASSERT(dv->sdev_attr);
3662 	if (SDEV_IS_PERSIST(dv) ||
3663 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3664 		sdev_vattr_merge(dv, vap);
3665 		rw_enter(&dv->sdev_contents, RW_WRITER);
3666 		error = sdev_shadow_node(dv, cred);
3667 		rw_exit(&dv->sdev_contents);
3668 		rw_exit(&parent->sdev_contents);
3669 
3670 		if (error)
3671 			return (error);
3672 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3673 	}
3674 
3675 
3676 	/*
3677 	 * sdev_attr was allocated in sdev_mknode
3678 	 */
3679 	rw_enter(&dv->sdev_contents, RW_WRITER);
3680 	error = secpolicy_vnode_setattr(cred, vp, vap,
3681 	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
3682 	if (error) {
3683 		rw_exit(&dv->sdev_contents);
3684 		rw_exit(&parent->sdev_contents);
3685 		return (error);
3686 	}
3687 
3688 	get = dv->sdev_attr;
3689 	if (mask & AT_MODE) {
3690 		get->va_mode &= S_IFMT;
3691 		get->va_mode |= vap->va_mode & ~S_IFMT;
3692 	}
3693 
3694 	if ((mask & AT_UID) || (mask & AT_GID)) {
3695 		if (mask & AT_UID)
3696 			get->va_uid = vap->va_uid;
3697 		if (mask & AT_GID)
3698 			get->va_gid = vap->va_gid;
3699 		/*
3700 		 * a callback must be provided if the protocol is set
3701 		 */
3702 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3703 			ASSERT(callback);
3704 			error = callback(dv, get, protocol);
3705 			if (error) {
3706 				rw_exit(&dv->sdev_contents);
3707 				rw_exit(&parent->sdev_contents);
3708 				return (error);
3709 			}
3710 		}
3711 	}
3712 
3713 	if (mask & AT_ATIME)
3714 		get->va_atime = vap->va_atime;
3715 	if (mask & AT_MTIME)
3716 		get->va_mtime = vap->va_mtime;
3717 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3718 		gethrestime(&get->va_ctime);
3719 	}
3720 
3721 	sdev_vattr_merge(dv, get);
3722 	rw_exit(&dv->sdev_contents);
3723 	rw_exit(&parent->sdev_contents);
3724 	return (0);
3725 }
3726 
3727 /*
3728  * a generic inactive() function
3729  */
3730 void
3731 devname_inactive_func(struct vnode *vp, struct cred *cred,
3732     void (*callback)(struct vnode *))
3733 {
3734 	int clean;
3735 	struct sdev_node *dv = VTOSDEV(vp);
3736 	struct sdev_node *ddv = dv->sdev_dotdot;
3737 	int state;
3738 	struct devname_nsmap *map = NULL;
3739 	struct devname_ops *dirops = NULL;
3740 	void (*fn)(devname_handle_t *, struct cred *) = NULL;
3741 
3742 	rw_enter(&ddv->sdev_contents, RW_WRITER);
3743 	state = dv->sdev_state;
3744 
3745 	mutex_enter(&vp->v_lock);
3746 	ASSERT(vp->v_count >= 1);
3747 
3748 	if (vp->v_count == 1 && callback != NULL)
3749 		callback(vp);
3750 
3751 	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3752 
3753 	/*
3754 	 * last ref count on the ZOMBIE node is released.
3755 	 * clean up the sdev_node, and
3756 	 * release the hold on the backing store node so that
3757 	 * the ZOMBIE backing stores also cleaned out.
3758 	 */
3759 	if (clean) {
3760 		ASSERT(ddv);
3761 		if (SDEV_IS_GLOBAL(dv)) {
3762 			map = ddv->sdev_mapinfo;
3763 			dirops = map ? map->dir_ops : NULL;
3764 			if (dirops && (fn = dirops->devnops_inactive))
3765 				(*fn)(&(dv->sdev_handle), cred);
3766 		}
3767 
3768 		ddv->sdev_nlink--;
3769 		if (vp->v_type == VDIR) {
3770 			dv->sdev_nlink--;
3771 		}
3772 		if ((dv->sdev_flags & SDEV_STALE) == 0)
3773 			avl_remove(&ddv->sdev_entries, dv);
3774 		dv->sdev_nlink--;
3775 		--vp->v_count;
3776 		mutex_exit(&vp->v_lock);
3777 		sdev_nodedestroy(dv, 0);
3778 	} else {
3779 		--vp->v_count;
3780 		mutex_exit(&vp->v_lock);
3781 	}
3782 	rw_exit(&ddv->sdev_contents);
3783 }
3784