xref: /illumos-gate/usr/src/uts/common/fs/dev/sdev_subr.c (revision 33efde4275d24731ef87927237b0ffb0630b6b2d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved.
24  * Copyright (c) 2017 by Delphix. All rights reserved.
25  */
26 
27 /*
28  * utility routines for the /dev fs
29  */
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/t_lock.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/user.h>
37 #include <sys/time.h>
38 #include <sys/vfs.h>
39 #include <sys/vnode.h>
40 #include <sys/file.h>
41 #include <sys/fcntl.h>
42 #include <sys/flock.h>
43 #include <sys/kmem.h>
44 #include <sys/uio.h>
45 #include <sys/errno.h>
46 #include <sys/stat.h>
47 #include <sys/cred.h>
48 #include <sys/dirent.h>
49 #include <sys/pathname.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
52 #include <sys/mode.h>
53 #include <sys/policy.h>
54 #include <fs/fs_subr.h>
55 #include <sys/mount.h>
56 #include <sys/fs/snode.h>
57 #include <sys/fs/dv_node.h>
58 #include <sys/fs/sdev_impl.h>
59 #include <sys/sunndi.h>
60 #include <sys/sunmdi.h>
61 #include <sys/conf.h>
62 #include <sys/proc.h>
63 #include <sys/user.h>
64 #include <sys/modctl.h>
65 
66 #ifdef DEBUG
67 int sdev_debug = 0x00000001;
68 int sdev_debug_cache_flags = 0;
69 #endif
70 
71 /*
72  * globals
73  */
74 /* prototype memory vattrs */
75 vattr_t sdev_vattr_dir = {
76 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
77 	VDIR,					/* va_type */
78 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
79 	SDEV_UID_DEFAULT,			/* va_uid */
80 	SDEV_GID_DEFAULT,			/* va_gid */
81 	0,					/* va_fsid */
82 	0,					/* va_nodeid */
83 	0,					/* va_nlink */
84 	0,					/* va_size */
85 	0,					/* va_atime */
86 	0,					/* va_mtime */
87 	0,					/* va_ctime */
88 	0,					/* va_rdev */
89 	0,					/* va_blksize */
90 	0,					/* va_nblocks */
91 	0					/* va_vcode */
92 };
93 
94 vattr_t sdev_vattr_lnk = {
95 	AT_TYPE|AT_MODE,			/* va_mask */
96 	VLNK,					/* va_type */
97 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
98 	SDEV_UID_DEFAULT,			/* va_uid */
99 	SDEV_GID_DEFAULT,			/* va_gid */
100 	0,					/* va_fsid */
101 	0,					/* va_nodeid */
102 	0,					/* va_nlink */
103 	0,					/* va_size */
104 	0,					/* va_atime */
105 	0,					/* va_mtime */
106 	0,					/* va_ctime */
107 	0,					/* va_rdev */
108 	0,					/* va_blksize */
109 	0,					/* va_nblocks */
110 	0					/* va_vcode */
111 };
112 
113 vattr_t sdev_vattr_blk = {
114 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
115 	VBLK,					/* va_type */
116 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
117 	SDEV_UID_DEFAULT,			/* va_uid */
118 	SDEV_GID_DEFAULT,			/* va_gid */
119 	0,					/* va_fsid */
120 	0,					/* va_nodeid */
121 	0,					/* va_nlink */
122 	0,					/* va_size */
123 	0,					/* va_atime */
124 	0,					/* va_mtime */
125 	0,					/* va_ctime */
126 	0,					/* va_rdev */
127 	0,					/* va_blksize */
128 	0,					/* va_nblocks */
129 	0					/* va_vcode */
130 };
131 
132 vattr_t sdev_vattr_chr = {
133 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
134 	VCHR,					/* va_type */
135 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
136 	SDEV_UID_DEFAULT,			/* va_uid */
137 	SDEV_GID_DEFAULT,			/* va_gid */
138 	0,					/* va_fsid */
139 	0,					/* va_nodeid */
140 	0,					/* va_nlink */
141 	0,					/* va_size */
142 	0,					/* va_atime */
143 	0,					/* va_mtime */
144 	0,					/* va_ctime */
145 	0,					/* va_rdev */
146 	0,					/* va_blksize */
147 	0,					/* va_nblocks */
148 	0					/* va_vcode */
149 };
150 
151 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
152 int		devtype;		/* fstype */
153 
154 static void
sdev_prof_free(struct sdev_node * dv)155 sdev_prof_free(struct sdev_node *dv)
156 {
157 	ASSERT(!SDEV_IS_GLOBAL(dv));
158 	nvlist_free(dv->sdev_prof.dev_name);
159 	nvlist_free(dv->sdev_prof.dev_map);
160 	nvlist_free(dv->sdev_prof.dev_symlink);
161 	nvlist_free(dv->sdev_prof.dev_glob_incdir);
162 	nvlist_free(dv->sdev_prof.dev_glob_excdir);
163 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
164 }
165 
166 /* sdev_node cache constructor */
167 /*ARGSUSED1*/
168 static int
i_sdev_node_ctor(void * buf,void * cfarg,int flag)169 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
170 {
171 	struct sdev_node *dv = (struct sdev_node *)buf;
172 	struct vnode *vp;
173 
174 	bzero(buf, sizeof (struct sdev_node));
175 	vp = dv->sdev_vnode = vn_alloc(flag);
176 	if (vp == NULL) {
177 		return (-1);
178 	}
179 	vp->v_data = dv;
180 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
181 	return (0);
182 }
183 
184 /* sdev_node cache destructor */
185 /*ARGSUSED1*/
186 static void
i_sdev_node_dtor(void * buf,void * arg)187 i_sdev_node_dtor(void *buf, void *arg)
188 {
189 	struct sdev_node *dv = (struct sdev_node *)buf;
190 	struct vnode *vp = SDEVTOV(dv);
191 
192 	rw_destroy(&dv->sdev_contents);
193 	vn_free(vp);
194 }
195 
196 /* initialize sdev_node cache */
197 void
sdev_node_cache_init()198 sdev_node_cache_init()
199 {
200 	int flags = 0;
201 
202 #ifdef	DEBUG
203 	flags = sdev_debug_cache_flags;
204 	if (flags)
205 		sdcmn_err(("cache debug flags 0x%x\n", flags));
206 #endif	/* DEBUG */
207 
208 	ASSERT(sdev_node_cache == NULL);
209 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
210 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
211 	    NULL, NULL, NULL, flags);
212 }
213 
214 /* destroy sdev_node cache */
215 void
sdev_node_cache_fini()216 sdev_node_cache_fini()
217 {
218 	ASSERT(sdev_node_cache != NULL);
219 	kmem_cache_destroy(sdev_node_cache);
220 	sdev_node_cache = NULL;
221 }
222 
223 /*
224  * Compare two nodes lexographically to balance avl tree
225  */
226 static int
sdev_compare_nodes(const struct sdev_node * dv1,const struct sdev_node * dv2)227 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
228 {
229 	int rv;
230 	if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
231 		return (0);
232 	return ((rv < 0) ? -1 : 1);
233 }
234 
235 void
sdev_set_nodestate(struct sdev_node * dv,sdev_node_state_t state)236 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
237 {
238 	ASSERT(dv);
239 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
240 	dv->sdev_state = state;
241 }
242 
243 static void
sdev_attr_update(struct sdev_node * dv,vattr_t * vap)244 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
245 {
246 	timestruc_t	now;
247 	struct vattr	*attrp;
248 	uint_t		mask;
249 
250 	ASSERT(dv->sdev_attr);
251 	ASSERT(vap);
252 
253 	attrp = dv->sdev_attr;
254 	mask = vap->va_mask;
255 	if (mask & AT_TYPE)
256 		attrp->va_type = vap->va_type;
257 	if (mask & AT_MODE)
258 		attrp->va_mode = vap->va_mode;
259 	if (mask & AT_UID)
260 		attrp->va_uid = vap->va_uid;
261 	if (mask & AT_GID)
262 		attrp->va_gid = vap->va_gid;
263 	if (mask & AT_RDEV)
264 		attrp->va_rdev = vap->va_rdev;
265 
266 	gethrestime(&now);
267 	attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
268 	attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
269 	attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
270 }
271 
272 static void
sdev_attr_alloc(struct sdev_node * dv,vattr_t * vap)273 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
274 {
275 	ASSERT(dv->sdev_attr == NULL);
276 	ASSERT(vap->va_mask & AT_TYPE);
277 	ASSERT(vap->va_mask & AT_MODE);
278 
279 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
280 	sdev_attr_update(dv, vap);
281 }
282 
283 /* alloc and initialize a sdev_node */
284 int
sdev_nodeinit(struct sdev_node * ddv,char * nm,struct sdev_node ** newdv,vattr_t * vap)285 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
286     vattr_t *vap)
287 {
288 	struct sdev_node *dv = NULL;
289 	struct vnode *vp;
290 	size_t nmlen, len;
291 	devname_handle_t  *dhl;
292 
293 	nmlen = strlen(nm) + 1;
294 	if (nmlen > MAXNAMELEN) {
295 		sdcmn_err9(("sdev_nodeinit: node name %s"
296 		    " too long\n", nm));
297 		*newdv = NULL;
298 		return (ENAMETOOLONG);
299 	}
300 
301 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
302 
303 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
304 	bcopy(nm, dv->sdev_name, nmlen);
305 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
306 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
307 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
308 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
309 	/* overwritten for VLNK nodes */
310 	dv->sdev_symlink = NULL;
311 	list_link_init(&dv->sdev_plist);
312 
313 	vp = SDEVTOV(dv);
314 	vn_reinit(vp);
315 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
316 	if (vap)
317 		vp->v_type = vap->va_type;
318 
319 	/*
320 	 * initialized to the parent's vnodeops.
321 	 * maybe overwriten for a VDIR
322 	 */
323 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
324 	vn_exists(vp);
325 
326 	dv->sdev_dotdot = NULL;
327 	dv->sdev_attrvp = NULL;
328 	if (vap) {
329 		sdev_attr_alloc(dv, vap);
330 	} else {
331 		dv->sdev_attr = NULL;
332 	}
333 
334 	dv->sdev_ino = sdev_mkino(dv);
335 	dv->sdev_nlink = 0;		/* updated on insert */
336 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
337 	dv->sdev_flags |= SDEV_BUILD;
338 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
339 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
340 	if (SDEV_IS_GLOBAL(ddv)) {
341 		dv->sdev_flags |= SDEV_GLOBAL;
342 		dhl = &(dv->sdev_handle);
343 		dhl->dh_data = dv;
344 		dhl->dh_args = NULL;
345 		sdev_set_no_negcache(dv);
346 		dv->sdev_gdir_gen = 0;
347 	} else {
348 		dv->sdev_flags &= ~SDEV_GLOBAL;
349 		dv->sdev_origin = NULL; /* set later */
350 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
351 		dv->sdev_ldir_gen = 0;
352 		dv->sdev_devtree_gen = 0;
353 	}
354 
355 	rw_enter(&dv->sdev_contents, RW_WRITER);
356 	sdev_set_nodestate(dv, SDEV_INIT);
357 	rw_exit(&dv->sdev_contents);
358 	*newdv = dv;
359 
360 	return (0);
361 }
362 
363 /*
364  * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the
365  * caller to transition the node to the SDEV_ZOMBIE state.
366  */
367 int
sdev_nodeready(struct sdev_node * dv,struct vattr * vap,struct vnode * avp,void * args,struct cred * cred)368 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
369     void *args, struct cred *cred)
370 {
371 	int error = 0;
372 	struct vnode *vp = SDEVTOV(dv);
373 	vtype_t type;
374 
375 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
376 
377 	type = vap->va_type;
378 	vp->v_type = type;
379 	vp->v_rdev = vap->va_rdev;
380 	rw_enter(&dv->sdev_contents, RW_WRITER);
381 	if (type == VDIR) {
382 		dv->sdev_nlink = 2;
383 		dv->sdev_flags &= ~SDEV_PERSIST;
384 		dv->sdev_flags &= ~SDEV_DYNAMIC;
385 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
386 		ASSERT(dv->sdev_dotdot);
387 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
388 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
389 		avl_create(&dv->sdev_entries,
390 		    (int (*)(const void *, const void *))sdev_compare_nodes,
391 		    sizeof (struct sdev_node),
392 		    offsetof(struct sdev_node, sdev_avllink));
393 	} else if (type == VLNK) {
394 		ASSERT(args);
395 		dv->sdev_nlink = 1;
396 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
397 	} else {
398 		dv->sdev_nlink = 1;
399 	}
400 	sdev_plugin_nodeready(dv);
401 
402 	if (!(SDEV_IS_GLOBAL(dv))) {
403 		dv->sdev_origin = (struct sdev_node *)args;
404 		dv->sdev_flags &= ~SDEV_PERSIST;
405 	}
406 
407 	/*
408 	 * shadow node is created here OR
409 	 * if failed (indicated by dv->sdev_attrvp == NULL),
410 	 * created later in sdev_setattr
411 	 */
412 	if (avp) {
413 		dv->sdev_attrvp = avp;
414 	} else {
415 		if (dv->sdev_attr == NULL) {
416 			sdev_attr_alloc(dv, vap);
417 		} else {
418 			sdev_attr_update(dv, vap);
419 		}
420 
421 		if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
422 			error = sdev_shadow_node(dv, cred);
423 	}
424 
425 	if (error == 0) {
426 		/* transition to READY state */
427 		sdev_set_nodestate(dv, SDEV_READY);
428 		sdev_nc_node_exists(dv);
429 	}
430 	rw_exit(&dv->sdev_contents);
431 	return (error);
432 }
433 
434 /*
435  * Build the VROOT sdev_node.
436  */
437 /*ARGSUSED*/
438 struct sdev_node *
sdev_mkroot(struct vfs * vfsp,dev_t devdev,struct vnode * mvp,struct vnode * avp,struct cred * cred)439 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
440     struct vnode *avp, struct cred *cred)
441 {
442 	struct sdev_node *dv;
443 	struct vnode *vp;
444 	char devdir[] = "/dev";
445 
446 	ASSERT(sdev_node_cache != NULL);
447 	ASSERT(avp);
448 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
449 	vp = SDEVTOV(dv);
450 	vn_reinit(vp);
451 	vp->v_flag |= VROOT;
452 	vp->v_vfsp = vfsp;
453 	vp->v_type = VDIR;
454 	vp->v_rdev = devdev;
455 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
456 	vn_exists(vp);
457 
458 	if (vfsp->vfs_mntpt)
459 		dv->sdev_name = i_ddi_strdup(
460 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
461 	else
462 		/* vfs_mountdev1 set mount point later */
463 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
464 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
465 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
466 	dv->sdev_ino = SDEV_ROOTINO;
467 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
468 	dv->sdev_dotdot = dv;		/* .. == self */
469 	dv->sdev_attrvp = avp;
470 	dv->sdev_attr = NULL;
471 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
472 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
473 	if (strcmp(dv->sdev_name, "/dev") == 0) {
474 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
475 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
476 		dv->sdev_gdir_gen = 0;
477 	} else {
478 		dv->sdev_flags = SDEV_BUILD;
479 		dv->sdev_flags &= ~SDEV_PERSIST;
480 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
481 		dv->sdev_ldir_gen = 0;
482 		dv->sdev_devtree_gen = 0;
483 	}
484 
485 	avl_create(&dv->sdev_entries,
486 	    (int (*)(const void *, const void *))sdev_compare_nodes,
487 	    sizeof (struct sdev_node),
488 	    offsetof(struct sdev_node, sdev_avllink));
489 
490 	rw_enter(&dv->sdev_contents, RW_WRITER);
491 	sdev_set_nodestate(dv, SDEV_READY);
492 	rw_exit(&dv->sdev_contents);
493 	sdev_nc_node_exists(dv);
494 	return (dv);
495 }
496 
497 struct sdev_vop_table vtab[] = {
498 	{ "pts", devpts_vnodeops_tbl, &devpts_vnodeops, devpts_validate,
499 	SDEV_DYNAMIC | SDEV_VTOR },
500 
501 	{ "vt", devvt_vnodeops_tbl, &devvt_vnodeops, devvt_validate,
502 	SDEV_DYNAMIC | SDEV_VTOR },
503 
504 	{ "zvol", devzvol_vnodeops_tbl, &devzvol_vnodeops,
505 	devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
506 
507 	{ "zcons", NULL, NULL, NULL, SDEV_NO_NCACHE },
508 
509 	{ "net", devnet_vnodeops_tbl, &devnet_vnodeops, devnet_validate,
510 	SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
511 
512 	{ "ipnet", devipnet_vnodeops_tbl, &devipnet_vnodeops,
513 	devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
514 
515 	/*
516 	 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
517 	 * lofi driver controls child nodes.
518 	 *
519 	 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
520 	 * stale nodes (e.g. from devfsadm -R).
521 	 *
522 	 * In addition, devfsadm knows not to attempt a rmdir: a zone
523 	 * may hold a reference, which would zombify the node,
524 	 * preventing a mkdir.
525 	 */
526 
527 	{ "lofi", NULL, NULL, NULL,
528 	    SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
529 	{ "rlofi", NULL, NULL, NULL,
530 	    SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
531 
532 	{ NULL, NULL, NULL, NULL, 0}
533 };
534 
535 
536 /*
537  * Build the base root inode
538  */
539 ino_t
sdev_mkino(struct sdev_node * dv)540 sdev_mkino(struct sdev_node *dv)
541 {
542 	ino_t	ino;
543 
544 	/*
545 	 * for now, follow the lead of tmpfs here
546 	 * need to someday understand the requirements here
547 	 */
548 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
549 	ino += SDEV_ROOTINO + 1;
550 
551 	return (ino);
552 }
553 
554 int
sdev_getlink(struct vnode * linkvp,char ** link)555 sdev_getlink(struct vnode *linkvp, char **link)
556 {
557 	int err;
558 	char *buf;
559 	struct uio uio = {0};
560 	struct iovec iov = {0};
561 
562 	if (linkvp == NULL)
563 		return (ENOENT);
564 	ASSERT(linkvp->v_type == VLNK);
565 
566 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
567 	iov.iov_base = buf;
568 	iov.iov_len = MAXPATHLEN;
569 	uio.uio_iov = &iov;
570 	uio.uio_iovcnt = 1;
571 	uio.uio_resid = MAXPATHLEN;
572 	uio.uio_segflg = UIO_SYSSPACE;
573 	uio.uio_llimit = MAXOFFSET_T;
574 
575 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
576 	if (err) {
577 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
578 		kmem_free(buf, MAXPATHLEN);
579 		return (ENOENT);
580 	}
581 
582 	/* mission complete */
583 	*link = i_ddi_strdup(buf, KM_SLEEP);
584 	kmem_free(buf, MAXPATHLEN);
585 	return (0);
586 }
587 
588 /*
589  * A convenient wrapper to get the devfs node vnode for a device
590  * minor functionality: readlink() of a /dev symlink
591  * Place the link into dv->sdev_symlink
592  */
593 static int
sdev_follow_link(struct sdev_node * dv)594 sdev_follow_link(struct sdev_node *dv)
595 {
596 	int err;
597 	struct vnode *linkvp;
598 	char *link = NULL;
599 
600 	linkvp = SDEVTOV(dv);
601 	if (linkvp == NULL)
602 		return (ENOENT);
603 	ASSERT(linkvp->v_type == VLNK);
604 	err = sdev_getlink(linkvp, &link);
605 	if (err) {
606 		dv->sdev_symlink = NULL;
607 		return (ENOENT);
608 	}
609 
610 	ASSERT(link != NULL);
611 	dv->sdev_symlink = link;
612 	return (0);
613 }
614 
615 static int
sdev_node_check(struct sdev_node * dv,struct vattr * nvap,void * nargs)616 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
617 {
618 	vtype_t otype = SDEVTOV(dv)->v_type;
619 
620 	/*
621 	 * existing sdev_node has a different type.
622 	 */
623 	if (otype != nvap->va_type) {
624 		sdcmn_err9(("sdev_node_check: existing node "
625 		    "  %s type %d does not match new node type %d\n",
626 		    dv->sdev_name, otype, nvap->va_type));
627 		return (EEXIST);
628 	}
629 
630 	/*
631 	 * For a symlink, the target should be the same.
632 	 */
633 	if (otype == VLNK) {
634 		ASSERT(nargs != NULL);
635 		ASSERT(dv->sdev_symlink != NULL);
636 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
637 			sdcmn_err9(("sdev_node_check: existing node "
638 			    " %s has different symlink %s as new node "
639 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
640 			    (char *)nargs));
641 			return (EEXIST);
642 		}
643 	}
644 
645 	return (0);
646 }
647 
648 /*
649  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
650  *
651  * arguments:
652  *	- ddv (parent)
653  *	- nm (child name)
654  *	- newdv (sdev_node for nm is returned here)
655  *	- vap (vattr for the node to be created, va_type should be set.
656  *	- avp (attribute vnode)
657  *	  the defaults should be used if unknown)
658  *	- cred
659  *	- args
660  *	    . tnm (for VLNK)
661  *	    . global sdev_node (for !SDEV_GLOBAL)
662  * 	- state: SDEV_INIT, SDEV_READY
663  *
664  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
665  *
666  * NOTE:  directory contents writers lock needs to be held before
667  *	  calling this routine.
668  */
669 int
sdev_mknode(struct sdev_node * ddv,char * nm,struct sdev_node ** newdv,struct vattr * vap,struct vnode * avp,void * args,struct cred * cred,sdev_node_state_t state)670 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
671     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
672     sdev_node_state_t state)
673 {
674 	int error = 0;
675 	sdev_node_state_t node_state;
676 	struct sdev_node *dv = NULL;
677 
678 	ASSERT(state != SDEV_ZOMBIE);
679 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
680 
681 	if (*newdv) {
682 		dv = *newdv;
683 	} else {
684 		/* allocate and initialize a sdev_node */
685 		if (ddv->sdev_state == SDEV_ZOMBIE) {
686 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
687 			    ddv->sdev_path));
688 			return (ENOENT);
689 		}
690 
691 		error = sdev_nodeinit(ddv, nm, &dv, vap);
692 		if (error != 0) {
693 			sdcmn_err9(("sdev_mknode: error %d,"
694 			    " name %s can not be initialized\n",
695 			    error, nm));
696 			return (error);
697 		}
698 		ASSERT(dv);
699 
700 		/* insert into the directory cache */
701 		sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
702 	}
703 
704 	ASSERT(dv);
705 	node_state = dv->sdev_state;
706 	ASSERT(node_state != SDEV_ZOMBIE);
707 
708 	if (state == SDEV_READY) {
709 		switch (node_state) {
710 		case SDEV_INIT:
711 			error = sdev_nodeready(dv, vap, avp, args, cred);
712 			if (error) {
713 				sdcmn_err9(("sdev_mknode: node %s can NOT"
714 				    " be transitioned into READY state, "
715 				    "error %d\n", nm, error));
716 			}
717 			break;
718 		case SDEV_READY:
719 			/*
720 			 * Do some sanity checking to make sure
721 			 * the existing sdev_node is what has been
722 			 * asked for.
723 			 */
724 			error = sdev_node_check(dv, vap, args);
725 			break;
726 		default:
727 			break;
728 		}
729 	}
730 
731 	if (!error) {
732 		*newdv = dv;
733 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
734 	} else {
735 		sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
736 		/*
737 		 * We created this node, it wasn't passed into us. Therefore it
738 		 * is up to us to delete it.
739 		 */
740 		if (*newdv == NULL)
741 			SDEV_SIMPLE_RELE(dv);
742 		*newdv = NULL;
743 	}
744 
745 	return (error);
746 }
747 
748 /*
749  * convenient wrapper to change vp's ATIME, CTIME and MTIME
750  */
751 void
sdev_update_timestamps(struct vnode * vp,cred_t * cred,uint_t mask)752 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
753 {
754 	struct vattr attr;
755 	timestruc_t now;
756 	int err;
757 
758 	ASSERT(vp);
759 	gethrestime(&now);
760 	if (mask & AT_CTIME)
761 		attr.va_ctime = now;
762 	if (mask & AT_MTIME)
763 		attr.va_mtime = now;
764 	if (mask & AT_ATIME)
765 		attr.va_atime = now;
766 
767 	attr.va_mask = (mask & AT_TIMES);
768 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
769 	if (err && (err != EROFS)) {
770 		sdcmn_err(("update timestamps error %d\n", err));
771 	}
772 }
773 
774 /*
775  * the backing store vnode is released here
776  */
777 /*ARGSUSED1*/
778 void
sdev_nodedestroy(struct sdev_node * dv,uint_t flags)779 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
780 {
781 	/* no references */
782 	ASSERT(dv->sdev_nlink == 0);
783 
784 	if (dv->sdev_attrvp != NULLVP) {
785 		VN_RELE(dv->sdev_attrvp);
786 		/*
787 		 * reset the attrvp so that no more
788 		 * references can be made on this already
789 		 * vn_rele() vnode
790 		 */
791 		dv->sdev_attrvp = NULLVP;
792 	}
793 
794 	if (dv->sdev_attr != NULL) {
795 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
796 		dv->sdev_attr = NULL;
797 	}
798 
799 	if (dv->sdev_name != NULL) {
800 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
801 		dv->sdev_name = NULL;
802 	}
803 
804 	if (dv->sdev_symlink != NULL) {
805 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
806 		dv->sdev_symlink = NULL;
807 	}
808 
809 	if (dv->sdev_path) {
810 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
811 		dv->sdev_path = NULL;
812 	}
813 
814 	if (!SDEV_IS_GLOBAL(dv)) {
815 		sdev_prof_free(dv);
816 		if (dv->sdev_vnode->v_type != VLNK && dv->sdev_origin != NULL)
817 			SDEV_RELE(dv->sdev_origin);
818 	}
819 
820 	if (SDEVTOV(dv)->v_type == VDIR) {
821 		ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
822 		avl_destroy(&dv->sdev_entries);
823 	}
824 
825 	mutex_destroy(&dv->sdev_lookup_lock);
826 	cv_destroy(&dv->sdev_lookup_cv);
827 
828 	/* return node to initial state as per constructor */
829 	(void) memset((void *)&dv->sdev_instance_data, 0,
830 	    sizeof (dv->sdev_instance_data));
831 	vn_invalid(SDEVTOV(dv));
832 	dv->sdev_private = NULL;
833 	kmem_cache_free(sdev_node_cache, dv);
834 }
835 
836 /*
837  * DIRECTORY CACHE lookup
838  */
839 struct sdev_node *
sdev_findbyname(struct sdev_node * ddv,char * nm)840 sdev_findbyname(struct sdev_node *ddv, char *nm)
841 {
842 	struct sdev_node *dv;
843 	struct sdev_node dvtmp;
844 	avl_index_t	where;
845 
846 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
847 
848 	dvtmp.sdev_name = nm;
849 	dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
850 	if (dv) {
851 		ASSERT(dv->sdev_dotdot == ddv);
852 		ASSERT(strcmp(dv->sdev_name, nm) == 0);
853 		ASSERT(dv->sdev_state != SDEV_ZOMBIE);
854 		SDEV_HOLD(dv);
855 		return (dv);
856 	}
857 	return (NULL);
858 }
859 
860 /*
861  * Inserts a new sdev_node in a parent directory
862  */
863 void
sdev_direnter(struct sdev_node * ddv,struct sdev_node * dv)864 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
865 {
866 	avl_index_t where;
867 
868 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
869 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
870 	ASSERT(ddv->sdev_nlink >= 2);
871 	ASSERT(dv->sdev_nlink == 0);
872 	ASSERT(dv->sdev_state != SDEV_ZOMBIE);
873 
874 	dv->sdev_dotdot = ddv;
875 	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
876 	avl_insert(&ddv->sdev_entries, dv, where);
877 	ddv->sdev_nlink++;
878 }
879 
880 /*
881  * The following check is needed because while sdev_nodes are linked
882  * in SDEV_INIT state, they have their link counts incremented only
883  * in SDEV_READY state.
884  */
885 static void
decr_link(struct sdev_node * dv)886 decr_link(struct sdev_node *dv)
887 {
888 	VERIFY(RW_WRITE_HELD(&dv->sdev_contents));
889 	if (dv->sdev_state != SDEV_INIT) {
890 		VERIFY(dv->sdev_nlink >= 1);
891 		dv->sdev_nlink--;
892 	} else {
893 		VERIFY(dv->sdev_nlink == 0);
894 	}
895 }
896 
897 /*
898  * Delete an existing dv from directory cache
899  *
900  * In the case of a node is still held by non-zero reference count, the node is
901  * put into ZOMBIE state. The node is always unlinked from its parent, but it is
902  * not destroyed via sdev_inactive until its reference count reaches "0".
903  */
904 static void
sdev_dirdelete(struct sdev_node * ddv,struct sdev_node * dv)905 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
906 {
907 	struct vnode *vp;
908 	sdev_node_state_t os;
909 
910 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
911 
912 	vp = SDEVTOV(dv);
913 	mutex_enter(&vp->v_lock);
914 	rw_enter(&dv->sdev_contents, RW_WRITER);
915 	os = dv->sdev_state;
916 	ASSERT(os != SDEV_ZOMBIE);
917 	dv->sdev_state = SDEV_ZOMBIE;
918 
919 	/*
920 	 * unlink ourselves from the parent directory now to take care of the ..
921 	 * link. However, if we're a directory, we don't remove our reference to
922 	 * ourself eg. '.' until we are torn down in the inactive callback.
923 	 */
924 	decr_link(ddv);
925 	avl_remove(&ddv->sdev_entries, dv);
926 	/*
927 	 * sdev_inactive expects nodes to have a link to themselves when we're
928 	 * tearing them down. If we're transitioning from the initial state to
929 	 * zombie and not via ready, then we're not going to have this link that
930 	 * comes from the node being ready. As a result, we need to increment
931 	 * our link count by one to account for this.
932 	 */
933 	if (os == SDEV_INIT && dv->sdev_nlink == 0)
934 		dv->sdev_nlink++;
935 	rw_exit(&dv->sdev_contents);
936 	mutex_exit(&vp->v_lock);
937 }
938 
939 /*
940  * check if the source is in the path of the target
941  *
942  * source and target are different
943  */
944 /*ARGSUSED2*/
945 static int
sdev_checkpath(struct sdev_node * sdv,struct sdev_node * tdv,struct cred * cred)946 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
947 {
948 	int error = 0;
949 	struct sdev_node *dotdot, *dir;
950 
951 	dotdot = tdv->sdev_dotdot;
952 	ASSERT(dotdot);
953 
954 	/* fs root */
955 	if (dotdot == tdv) {
956 		return (0);
957 	}
958 
959 	for (;;) {
960 		/*
961 		 * avoid error cases like
962 		 *	mv a a/b
963 		 *	mv a a/b/c
964 		 *	etc.
965 		 */
966 		if (dotdot == sdv) {
967 			error = EINVAL;
968 			break;
969 		}
970 
971 		dir = dotdot;
972 		dotdot = dir->sdev_dotdot;
973 
974 		/* done checking because root is reached */
975 		if (dir == dotdot) {
976 			break;
977 		}
978 	}
979 	return (error);
980 }
981 
982 int
sdev_rnmnode(struct sdev_node * oddv,struct sdev_node * odv,struct sdev_node * nddv,struct sdev_node ** ndvp,char * nnm,struct cred * cred)983 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
984     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
985     struct cred *cred)
986 {
987 	int error = 0;
988 	struct vnode *ovp = SDEVTOV(odv);
989 	struct vnode *nvp;
990 	struct vattr vattr;
991 	int doingdir = (ovp->v_type == VDIR);
992 	char *link = NULL;
993 	int samedir = (oddv == nddv) ? 1 : 0;
994 	int bkstore = 0;
995 	struct sdev_node *idv = NULL;
996 	struct sdev_node *ndv = NULL;
997 	timestruc_t now;
998 
999 	vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1000 	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1001 	if (error)
1002 		return (error);
1003 
1004 	if (!samedir)
1005 		rw_enter(&oddv->sdev_contents, RW_WRITER);
1006 	rw_enter(&nddv->sdev_contents, RW_WRITER);
1007 
1008 	/*
1009 	 * the source may have been deleted by another thread before
1010 	 * we gets here.
1011 	 */
1012 	if (odv->sdev_state != SDEV_READY) {
1013 		error = ENOENT;
1014 		goto err_out;
1015 	}
1016 
1017 	if (doingdir && (odv == nddv)) {
1018 		error = EINVAL;
1019 		goto err_out;
1020 	}
1021 
1022 	/*
1023 	 * If renaming a directory, and the parents are different (".." must be
1024 	 * changed) then the source dir must not be in the dir hierarchy above
1025 	 * the target since it would orphan everything below the source dir.
1026 	 */
1027 	if (doingdir && (oddv != nddv)) {
1028 		error = sdev_checkpath(odv, nddv, cred);
1029 		if (error)
1030 			goto err_out;
1031 	}
1032 
1033 	/* fix the source for a symlink */
1034 	if (vattr.va_type == VLNK) {
1035 		if (odv->sdev_symlink == NULL) {
1036 			error = sdev_follow_link(odv);
1037 			if (error) {
1038 				/*
1039 				 * The underlying symlink doesn't exist. This
1040 				 * node probably shouldn't even exist. While
1041 				 * it's a bit jarring to consumers, we're going
1042 				 * to remove the node from /dev.
1043 				 */
1044 				if (SDEV_IS_PERSIST((*ndvp)))
1045 					bkstore = 1;
1046 				sdev_dirdelete(oddv, odv);
1047 				if (bkstore) {
1048 					ASSERT(nddv->sdev_attrvp);
1049 					error = VOP_REMOVE(nddv->sdev_attrvp,
1050 					    nnm, cred, NULL, 0);
1051 					if (error)
1052 						goto err_out;
1053 				}
1054 				error = ENOENT;
1055 				goto err_out;
1056 			}
1057 		}
1058 		ASSERT(odv->sdev_symlink);
1059 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1060 	}
1061 
1062 	/* destination existing */
1063 	if (*ndvp) {
1064 		nvp = SDEVTOV(*ndvp);
1065 		ASSERT(nvp);
1066 
1067 		/* handling renaming to itself */
1068 		if (odv == *ndvp) {
1069 			error = 0;
1070 			goto err_out;
1071 		}
1072 
1073 		if (nvp->v_type == VDIR) {
1074 			if (!doingdir) {
1075 				error = EISDIR;
1076 				goto err_out;
1077 			}
1078 
1079 			if (vn_vfswlock(nvp)) {
1080 				error = EBUSY;
1081 				goto err_out;
1082 			}
1083 
1084 			if (vn_mountedvfs(nvp) != NULL) {
1085 				vn_vfsunlock(nvp);
1086 				error = EBUSY;
1087 				goto err_out;
1088 			}
1089 
1090 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1091 			if ((*ndvp)->sdev_nlink > 2) {
1092 				vn_vfsunlock(nvp);
1093 				error = EEXIST;
1094 				goto err_out;
1095 			}
1096 			vn_vfsunlock(nvp);
1097 
1098 			/*
1099 			 * We did not place the hold on *ndvp, so even though
1100 			 * we're deleting the node, we should not get rid of our
1101 			 * reference.
1102 			 */
1103 			sdev_dirdelete(nddv, *ndvp);
1104 			*ndvp = NULL;
1105 			ASSERT(nddv->sdev_attrvp);
1106 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1107 			    nddv->sdev_attrvp, cred, NULL, 0);
1108 			if (error)
1109 				goto err_out;
1110 		} else {
1111 			if (doingdir) {
1112 				error = ENOTDIR;
1113 				goto err_out;
1114 			}
1115 
1116 			if (SDEV_IS_PERSIST((*ndvp))) {
1117 				bkstore = 1;
1118 			}
1119 
1120 			/*
1121 			 * Get rid of the node from the directory cache note.
1122 			 * Don't forget that it's not up to us to remove the vn
1123 			 * ref on the sdev node, as we did not place it.
1124 			 */
1125 			sdev_dirdelete(nddv, *ndvp);
1126 			*ndvp = NULL;
1127 			if (bkstore) {
1128 				ASSERT(nddv->sdev_attrvp);
1129 				error = VOP_REMOVE(nddv->sdev_attrvp,
1130 				    nnm, cred, NULL, 0);
1131 				if (error)
1132 					goto err_out;
1133 			}
1134 		}
1135 	}
1136 
1137 	/*
1138 	 * make a fresh node from the source attrs
1139 	 */
1140 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1141 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1142 	    NULL, (void *)link, cred, SDEV_READY);
1143 
1144 	if (link != NULL) {
1145 		kmem_free(link, strlen(link) + 1);
1146 		link = NULL;
1147 	}
1148 
1149 	if (error)
1150 		goto err_out;
1151 	ASSERT(*ndvp);
1152 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1153 
1154 	/* move dir contents */
1155 	if (doingdir) {
1156 		for (idv = SDEV_FIRST_ENTRY(odv); idv;
1157 		    idv = SDEV_NEXT_ENTRY(odv, idv)) {
1158 			SDEV_HOLD(idv);
1159 			error = sdev_rnmnode(odv, idv,
1160 			    (struct sdev_node *)(*ndvp), &ndv,
1161 			    idv->sdev_name, cred);
1162 			SDEV_RELE(idv);
1163 			if (error)
1164 				goto err_out;
1165 			ndv = NULL;
1166 		}
1167 	}
1168 
1169 	if ((*ndvp)->sdev_attrvp) {
1170 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1171 		    AT_CTIME|AT_ATIME);
1172 	} else {
1173 		ASSERT((*ndvp)->sdev_attr);
1174 		gethrestime(&now);
1175 		(*ndvp)->sdev_attr->va_ctime = now;
1176 		(*ndvp)->sdev_attr->va_atime = now;
1177 	}
1178 
1179 	if (nddv->sdev_attrvp) {
1180 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1181 		    AT_MTIME|AT_ATIME);
1182 	} else {
1183 		ASSERT(nddv->sdev_attr);
1184 		gethrestime(&now);
1185 		nddv->sdev_attr->va_mtime = now;
1186 		nddv->sdev_attr->va_atime = now;
1187 	}
1188 	rw_exit(&nddv->sdev_contents);
1189 	if (!samedir)
1190 		rw_exit(&oddv->sdev_contents);
1191 
1192 	SDEV_RELE(*ndvp);
1193 	return (error);
1194 
1195 err_out:
1196 	if (link != NULL) {
1197 		kmem_free(link, strlen(link) + 1);
1198 		link = NULL;
1199 	}
1200 
1201 	rw_exit(&nddv->sdev_contents);
1202 	if (!samedir)
1203 		rw_exit(&oddv->sdev_contents);
1204 	return (error);
1205 }
1206 
1207 /*
1208  * Merge sdev_node specific information into an attribute structure.
1209  *
1210  * note: sdev_node is not locked here
1211  */
1212 void
sdev_vattr_merge(struct sdev_node * dv,struct vattr * vap)1213 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1214 {
1215 	struct vnode *vp = SDEVTOV(dv);
1216 
1217 	vap->va_nlink = dv->sdev_nlink;
1218 	vap->va_nodeid = dv->sdev_ino;
1219 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1220 	vap->va_type = vp->v_type;
1221 
1222 	if (vp->v_type == VDIR) {
1223 		vap->va_rdev = 0;
1224 		vap->va_fsid = vp->v_rdev;
1225 	} else if (vp->v_type == VLNK) {
1226 		vap->va_rdev = 0;
1227 		vap->va_mode  &= ~S_IFMT;
1228 		vap->va_mode |= S_IFLNK;
1229 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1230 		vap->va_rdev = vp->v_rdev;
1231 		vap->va_mode &= ~S_IFMT;
1232 		if (vap->va_type == VCHR)
1233 			vap->va_mode |= S_IFCHR;
1234 		else
1235 			vap->va_mode |= S_IFBLK;
1236 	} else {
1237 		vap->va_rdev = 0;
1238 	}
1239 }
1240 
1241 struct vattr *
sdev_getdefault_attr(enum vtype type)1242 sdev_getdefault_attr(enum vtype type)
1243 {
1244 	if (type == VDIR)
1245 		return (&sdev_vattr_dir);
1246 	else if (type == VCHR)
1247 		return (&sdev_vattr_chr);
1248 	else if (type == VBLK)
1249 		return (&sdev_vattr_blk);
1250 	else if (type == VLNK)
1251 		return (&sdev_vattr_lnk);
1252 	else
1253 		return (NULL);
1254 }
1255 int
sdev_to_vp(struct sdev_node * dv,struct vnode ** vpp)1256 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1257 {
1258 	int rv = 0;
1259 	struct vnode *vp = SDEVTOV(dv);
1260 
1261 	switch (vp->v_type) {
1262 	case VCHR:
1263 	case VBLK:
1264 		/*
1265 		 * If vnode is a device, return special vnode instead
1266 		 * (though it knows all about -us- via sp->s_realvp)
1267 		 */
1268 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1269 		VN_RELE(vp);
1270 		if (*vpp == NULLVP)
1271 			rv = ENOSYS;
1272 		break;
1273 	default:	/* most types are returned as is */
1274 		*vpp = vp;
1275 		break;
1276 	}
1277 	return (rv);
1278 }
1279 
1280 /*
1281  * junction between devname and root file system, e.g. ufs
1282  */
1283 int
devname_backstore_lookup(struct sdev_node * ddv,char * nm,struct vnode ** rvp)1284 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1285 {
1286 	struct vnode *rdvp = ddv->sdev_attrvp;
1287 	int rval = 0;
1288 
1289 	ASSERT(rdvp);
1290 
1291 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1292 	    NULL);
1293 	return (rval);
1294 }
1295 
1296 static int
sdev_filldir_from_store(struct sdev_node * ddv,int dlen,struct cred * cred)1297 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1298 {
1299 	struct sdev_node *dv = NULL;
1300 	char	*nm;
1301 	struct vnode *dirvp;
1302 	int	error;
1303 	vnode_t	*vp;
1304 	int eof;
1305 	struct iovec iov;
1306 	struct uio uio;
1307 	struct dirent64 *dp;
1308 	dirent64_t *dbuf;
1309 	size_t dbuflen;
1310 	struct vattr vattr;
1311 	char *link = NULL;
1312 
1313 	if (ddv->sdev_attrvp == NULL)
1314 		return (0);
1315 	if (!(ddv->sdev_flags & SDEV_BUILD))
1316 		return (0);
1317 
1318 	dirvp = ddv->sdev_attrvp;
1319 	VN_HOLD(dirvp);
1320 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1321 
1322 	uio.uio_iov = &iov;
1323 	uio.uio_iovcnt = 1;
1324 	uio.uio_segflg = UIO_SYSSPACE;
1325 	uio.uio_fmode = 0;
1326 	uio.uio_extflg = UIO_COPY_CACHED;
1327 	uio.uio_loffset = 0;
1328 	uio.uio_llimit = MAXOFFSET_T;
1329 
1330 	eof = 0;
1331 	error = 0;
1332 	while (!error && !eof) {
1333 		uio.uio_resid = dlen;
1334 		iov.iov_base = (char *)dbuf;
1335 		iov.iov_len = dlen;
1336 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1337 		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1338 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1339 
1340 		dbuflen = dlen - uio.uio_resid;
1341 		if (error || dbuflen == 0)
1342 			break;
1343 
1344 		if (!(ddv->sdev_flags & SDEV_BUILD))
1345 			break;
1346 
1347 		for (dp = dbuf; ((intptr_t)dp <
1348 		    (intptr_t)dbuf + dbuflen);
1349 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1350 			nm = dp->d_name;
1351 
1352 			if (strcmp(nm, ".") == 0 ||
1353 			    strcmp(nm, "..") == 0)
1354 				continue;
1355 
1356 			vp = NULLVP;
1357 			dv = sdev_cache_lookup(ddv, nm);
1358 			if (dv) {
1359 				VERIFY(dv->sdev_state != SDEV_ZOMBIE);
1360 				SDEV_SIMPLE_RELE(dv);
1361 				continue;
1362 			}
1363 
1364 			/* refill the cache if not already */
1365 			error = devname_backstore_lookup(ddv, nm, &vp);
1366 			if (error)
1367 				continue;
1368 
1369 			vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1370 			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1371 			if (error)
1372 				continue;
1373 
1374 			if (vattr.va_type == VLNK) {
1375 				error = sdev_getlink(vp, &link);
1376 				if (error) {
1377 					continue;
1378 				}
1379 				ASSERT(link != NULL);
1380 			}
1381 
1382 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1383 				rw_exit(&ddv->sdev_contents);
1384 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1385 			}
1386 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1387 			    cred, SDEV_READY);
1388 			rw_downgrade(&ddv->sdev_contents);
1389 
1390 			if (link != NULL) {
1391 				kmem_free(link, strlen(link) + 1);
1392 				link = NULL;
1393 			}
1394 
1395 			if (!error) {
1396 				ASSERT(dv);
1397 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1398 				SDEV_SIMPLE_RELE(dv);
1399 			}
1400 			vp = NULL;
1401 			dv = NULL;
1402 		}
1403 	}
1404 
1405 	VN_RELE(dirvp);
1406 	kmem_free(dbuf, dlen);
1407 
1408 	return (error);
1409 }
1410 
1411 void
sdev_filldir_dynamic(struct sdev_node * ddv)1412 sdev_filldir_dynamic(struct sdev_node *ddv)
1413 {
1414 	int error;
1415 	int i;
1416 	struct vattr vattr;
1417 	struct vattr *vap = &vattr;
1418 	char *nm = NULL;
1419 	struct sdev_node *dv = NULL;
1420 
1421 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1422 	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1423 
1424 	*vap = *sdev_getdefault_attr(VDIR);	/* note structure copy here */
1425 	gethrestime(&vap->va_atime);
1426 	vap->va_mtime = vap->va_atime;
1427 	vap->va_ctime = vap->va_atime;
1428 	for (i = 0; vtab[i].vt_name != NULL; i++) {
1429 		/*
1430 		 * This early, we may be in a read-only /dev environment: leave
1431 		 * the creation of any nodes we'd attempt to persist to
1432 		 * devfsadm. Because /dev itself is normally persistent, any
1433 		 * node which is not marked dynamic will end up being marked
1434 		 * persistent. However, some nodes are both dynamic and
1435 		 * persistent, mostly lofi and rlofi, so we need to be careful
1436 		 * in our check.
1437 		 */
1438 		if ((vtab[i].vt_flags & SDEV_PERSIST) ||
1439 		    !(vtab[i].vt_flags & SDEV_DYNAMIC))
1440 			continue;
1441 		nm = vtab[i].vt_name;
1442 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1443 		dv = NULL;
1444 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1445 		    NULL, kcred, SDEV_READY);
1446 		if (error) {
1447 			cmn_err(CE_WARN, "%s/%s: error %d\n",
1448 			    ddv->sdev_name, nm, error);
1449 		} else {
1450 			ASSERT(dv);
1451 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1452 			SDEV_SIMPLE_RELE(dv);
1453 		}
1454 	}
1455 }
1456 
1457 /*
1458  * Creating a backing store entry based on sdev_attr.
1459  * This is called either as part of node creation in a persistent directory
1460  * or from setattr/setsecattr to persist access attributes across reboot.
1461  */
1462 int
sdev_shadow_node(struct sdev_node * dv,struct cred * cred)1463 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1464 {
1465 	int error = 0;
1466 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1467 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1468 	struct vattr *vap = dv->sdev_attr;
1469 	char *nm = dv->sdev_name;
1470 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1471 
1472 	ASSERT(dv && dv->sdev_name && rdvp);
1473 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1474 
1475 lookup:
1476 	/* try to find it in the backing store */
1477 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1478 	    NULL);
1479 	if (error == 0) {
1480 		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1481 			VN_HOLD(rrvp);
1482 			VN_RELE(*rvp);
1483 			*rvp = rrvp;
1484 		}
1485 
1486 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1487 		dv->sdev_attr = NULL;
1488 		dv->sdev_attrvp = *rvp;
1489 		return (0);
1490 	}
1491 
1492 	/* let's try to persist the node */
1493 	gethrestime(&vap->va_atime);
1494 	vap->va_mtime = vap->va_atime;
1495 	vap->va_ctime = vap->va_atime;
1496 	vap->va_mask |= AT_TYPE|AT_MODE;
1497 	switch (vap->va_type) {
1498 	case VDIR:
1499 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1500 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1501 		    (void *)(*rvp), error));
1502 		if (!error)
1503 			VN_RELE(*rvp);
1504 		break;
1505 	case VCHR:
1506 	case VBLK:
1507 	case VREG:
1508 	case VDOOR:
1509 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1510 		    rvp, cred, 0, NULL, NULL);
1511 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1512 		    (void *)(*rvp), error));
1513 		if (!error)
1514 			VN_RELE(*rvp);
1515 		break;
1516 	case VLNK:
1517 		ASSERT(dv->sdev_symlink);
1518 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1519 		    NULL, 0);
1520 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1521 		    error));
1522 		break;
1523 	default:
1524 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1525 		    "create\n", nm);
1526 		/*NOTREACHED*/
1527 	}
1528 
1529 	/* go back to lookup to factor out spec node and set attrvp */
1530 	if (error == 0)
1531 		goto lookup;
1532 
1533 	sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1534 	return (error);
1535 }
1536 
1537 static void
sdev_cache_add(struct sdev_node * ddv,struct sdev_node ** dv,char * nm)1538 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1539 {
1540 	struct sdev_node *dup = NULL;
1541 
1542 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1543 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1544 		sdev_direnter(ddv, *dv);
1545 	} else {
1546 		VERIFY(dup->sdev_state != SDEV_ZOMBIE);
1547 		SDEV_SIMPLE_RELE(*dv);
1548 		sdev_nodedestroy(*dv, 0);
1549 		*dv = dup;
1550 	}
1551 }
1552 
1553 static void
sdev_cache_delete(struct sdev_node * ddv,struct sdev_node ** dv)1554 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1555 {
1556 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1557 	sdev_dirdelete(ddv, *dv);
1558 }
1559 
1560 /*
1561  * update the in-core directory cache
1562  */
1563 void
sdev_cache_update(struct sdev_node * ddv,struct sdev_node ** dv,char * nm,sdev_cache_ops_t ops)1564 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1565     sdev_cache_ops_t ops)
1566 {
1567 	ASSERT((SDEV_HELD(*dv)));
1568 
1569 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1570 	switch (ops) {
1571 	case SDEV_CACHE_ADD:
1572 		sdev_cache_add(ddv, dv, nm);
1573 		break;
1574 	case SDEV_CACHE_DELETE:
1575 		sdev_cache_delete(ddv, dv);
1576 		break;
1577 	default:
1578 		break;
1579 	}
1580 }
1581 
1582 /*
1583  * retrieve the named entry from the directory cache
1584  */
1585 struct sdev_node *
sdev_cache_lookup(struct sdev_node * ddv,char * nm)1586 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1587 {
1588 	struct sdev_node *dv = NULL;
1589 
1590 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1591 	dv = sdev_findbyname(ddv, nm);
1592 
1593 	return (dv);
1594 }
1595 
1596 /*
1597  * Implicit reconfig for nodes constructed by a link generator
1598  * Start devfsadm if needed, or if devfsadm is in progress,
1599  * prepare to block on devfsadm either completing or
1600  * constructing the desired node.  As devfsadmd is global
1601  * in scope, constructing all necessary nodes, we only
1602  * need to initiate it once.
1603  */
1604 static int
sdev_call_devfsadmd(struct sdev_node * ddv,struct sdev_node * dv,char * nm)1605 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1606 {
1607 	int error = 0;
1608 
1609 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1610 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1611 		    ddv->sdev_name, nm, devfsadm_state));
1612 		mutex_enter(&dv->sdev_lookup_lock);
1613 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1614 		mutex_exit(&dv->sdev_lookup_lock);
1615 		error = 0;
1616 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1617 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1618 		    ddv->sdev_name, nm, devfsadm_state));
1619 
1620 		sdev_devfsadmd_thread(ddv, dv, kcred);
1621 		mutex_enter(&dv->sdev_lookup_lock);
1622 		SDEV_BLOCK_OTHERS(dv,
1623 		    (SDEV_LOOKUP | SDEV_LGWAITING));
1624 		mutex_exit(&dv->sdev_lookup_lock);
1625 		error = 0;
1626 	} else {
1627 		error = -1;
1628 	}
1629 
1630 	return (error);
1631 }
1632 
1633 /*
1634  *  Support for specialized device naming construction mechanisms
1635  */
1636 static int
sdev_call_dircallback(struct sdev_node * ddv,struct sdev_node ** dvp,char * nm,int (* callback)(struct sdev_node *,char *,void **,struct cred *,void *,char *),int flags,struct cred * cred)1637 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1638     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1639     void *, char *), int flags, struct cred *cred)
1640 {
1641 	int rv = 0;
1642 	char *physpath = NULL;
1643 	struct vattr vattr;
1644 	struct vattr *vap = &vattr;
1645 	struct sdev_node *dv = NULL;
1646 
1647 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1648 	if (flags & SDEV_VLINK) {
1649 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1650 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1651 		    NULL);
1652 		if (rv) {
1653 			kmem_free(physpath, MAXPATHLEN);
1654 			return (-1);
1655 		}
1656 
1657 		*vap = *sdev_getdefault_attr(VLNK);	/* structure copy */
1658 		vap->va_size = strlen(physpath);
1659 		gethrestime(&vap->va_atime);
1660 		vap->va_mtime = vap->va_atime;
1661 		vap->va_ctime = vap->va_atime;
1662 
1663 		rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1664 		    (void *)physpath, cred, SDEV_READY);
1665 		kmem_free(physpath, MAXPATHLEN);
1666 		if (rv)
1667 			return (rv);
1668 	} else if (flags & SDEV_VATTR) {
1669 		/*
1670 		 * /dev/pts
1671 		 *
1672 		 * callback is responsible to set the basic attributes,
1673 		 * e.g. va_type/va_uid/va_gid/
1674 		 *    dev_t if VCHR or VBLK/
1675 		 */
1676 		ASSERT(callback);
1677 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1678 		if (rv) {
1679 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1680 			    "callback failed \n"));
1681 			return (-1);
1682 		}
1683 
1684 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1685 		    cred, SDEV_READY);
1686 
1687 		if (rv)
1688 			return (rv);
1689 
1690 	} else {
1691 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
1692 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1693 		    __LINE__));
1694 		rv = -1;
1695 	}
1696 
1697 	*dvp = dv;
1698 	return (rv);
1699 }
1700 
1701 static int
is_devfsadm_thread(char * exec_name)1702 is_devfsadm_thread(char *exec_name)
1703 {
1704 	/*
1705 	 * note: because devfsadmd -> /usr/sbin/devfsadm
1706 	 * it is safe to use "devfsadm" to capture the lookups
1707 	 * from devfsadm and its daemon version.
1708 	 */
1709 	if (strcmp(exec_name, "devfsadm") == 0)
1710 		return (1);
1711 	return (0);
1712 }
1713 
1714 /*
1715  * Lookup Order:
1716  *	sdev_node cache;
1717  *	backing store (SDEV_PERSIST);
1718  *	DBNR: a. dir_ops implemented in the loadable modules;
1719  *	      b. vnode ops in vtab.
1720  */
1721 int
devname_lookup_func(struct sdev_node * ddv,char * nm,struct vnode ** vpp,struct cred * cred,int (* callback)(struct sdev_node *,char *,void **,struct cred *,void *,char *),int flags)1722 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1723     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1724     struct cred *, void *, char *), int flags)
1725 {
1726 	int rv = 0, nmlen;
1727 	struct vnode *rvp = NULL;
1728 	struct sdev_node *dv = NULL;
1729 	int	retried = 0;
1730 	int	error = 0;
1731 	struct vattr vattr;
1732 	char *lookup_thread = curproc->p_user.u_comm;
1733 	int failed_flags = 0;
1734 	int (*vtor)(struct sdev_node *) = NULL;
1735 	int state;
1736 	int parent_state;
1737 	char *link = NULL;
1738 
1739 	if (SDEVTOV(ddv)->v_type != VDIR)
1740 		return (ENOTDIR);
1741 
1742 	/*
1743 	 * Empty name or ., return node itself.
1744 	 */
1745 	nmlen = strlen(nm);
1746 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1747 		*vpp = SDEVTOV(ddv);
1748 		VN_HOLD(*vpp);
1749 		return (0);
1750 	}
1751 
1752 	/*
1753 	 * .., return the parent directory
1754 	 */
1755 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1756 		*vpp = SDEVTOV(ddv->sdev_dotdot);
1757 		VN_HOLD(*vpp);
1758 		return (0);
1759 	}
1760 
1761 	rw_enter(&ddv->sdev_contents, RW_READER);
1762 	if (ddv->sdev_flags & SDEV_VTOR) {
1763 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1764 		ASSERT(vtor);
1765 	}
1766 
1767 tryagain:
1768 	/*
1769 	 * (a) directory cache lookup:
1770 	 */
1771 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1772 	parent_state = ddv->sdev_state;
1773 	dv = sdev_cache_lookup(ddv, nm);
1774 	if (dv) {
1775 		state = dv->sdev_state;
1776 		switch (state) {
1777 		case SDEV_INIT:
1778 			if (is_devfsadm_thread(lookup_thread))
1779 				break;
1780 
1781 			/* ZOMBIED parent won't allow node creation */
1782 			if (parent_state == SDEV_ZOMBIE) {
1783 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
1784 				    retried);
1785 				goto nolock_notfound;
1786 			}
1787 
1788 			mutex_enter(&dv->sdev_lookup_lock);
1789 			/* compensate the threads started after devfsadm */
1790 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1791 			    !(SDEV_IS_LOOKUP(dv)))
1792 				SDEV_BLOCK_OTHERS(dv,
1793 				    (SDEV_LOOKUP | SDEV_LGWAITING));
1794 
1795 			if (SDEV_IS_LOOKUP(dv)) {
1796 				failed_flags |= SLF_REBUILT;
1797 				rw_exit(&ddv->sdev_contents);
1798 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1799 				mutex_exit(&dv->sdev_lookup_lock);
1800 				rw_enter(&ddv->sdev_contents, RW_READER);
1801 
1802 				if (error != 0) {
1803 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1804 					    retried);
1805 					goto nolock_notfound;
1806 				}
1807 
1808 				state = dv->sdev_state;
1809 				if (state == SDEV_INIT) {
1810 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1811 					    retried);
1812 					goto nolock_notfound;
1813 				} else if (state == SDEV_READY) {
1814 					goto found;
1815 				} else if (state == SDEV_ZOMBIE) {
1816 					rw_exit(&ddv->sdev_contents);
1817 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1818 					    retried);
1819 					SDEV_RELE(dv);
1820 					goto lookup_failed;
1821 				}
1822 			} else {
1823 				mutex_exit(&dv->sdev_lookup_lock);
1824 			}
1825 			break;
1826 		case SDEV_READY:
1827 			goto found;
1828 		case SDEV_ZOMBIE:
1829 			rw_exit(&ddv->sdev_contents);
1830 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1831 			SDEV_RELE(dv);
1832 			goto lookup_failed;
1833 		default:
1834 			rw_exit(&ddv->sdev_contents);
1835 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1836 			sdev_lookup_failed(ddv, nm, failed_flags);
1837 			*vpp = NULLVP;
1838 			return (ENOENT);
1839 		}
1840 	}
1841 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1842 
1843 	/*
1844 	 * ZOMBIED parent does not allow new node creation.
1845 	 * bail out early
1846 	 */
1847 	if (parent_state == SDEV_ZOMBIE) {
1848 		rw_exit(&ddv->sdev_contents);
1849 		*vpp = NULLVP;
1850 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1851 		return (ENOENT);
1852 	}
1853 
1854 	/*
1855 	 * (b0): backing store lookup
1856 	 *	SDEV_PERSIST is default except:
1857 	 *		1) pts nodes
1858 	 *		2) non-chmod'ed local nodes
1859 	 *		3) zvol nodes
1860 	 */
1861 	if (SDEV_IS_PERSIST(ddv)) {
1862 		error = devname_backstore_lookup(ddv, nm, &rvp);
1863 
1864 		if (!error) {
1865 
1866 			vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1867 			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
1868 			if (error) {
1869 				rw_exit(&ddv->sdev_contents);
1870 				if (dv)
1871 					SDEV_RELE(dv);
1872 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1873 				sdev_lookup_failed(ddv, nm, failed_flags);
1874 				*vpp = NULLVP;
1875 				return (ENOENT);
1876 			}
1877 
1878 			if (vattr.va_type == VLNK) {
1879 				error = sdev_getlink(rvp, &link);
1880 				if (error) {
1881 					rw_exit(&ddv->sdev_contents);
1882 					if (dv)
1883 						SDEV_RELE(dv);
1884 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1885 					    retried);
1886 					sdev_lookup_failed(ddv, nm,
1887 					    failed_flags);
1888 					*vpp = NULLVP;
1889 					return (ENOENT);
1890 				}
1891 				ASSERT(link != NULL);
1892 			}
1893 
1894 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1895 				rw_exit(&ddv->sdev_contents);
1896 				rw_enter(&ddv->sdev_contents, RW_WRITER);
1897 			}
1898 			error = sdev_mknode(ddv, nm, &dv, &vattr,
1899 			    rvp, link, cred, SDEV_READY);
1900 			rw_downgrade(&ddv->sdev_contents);
1901 
1902 			if (link != NULL) {
1903 				kmem_free(link, strlen(link) + 1);
1904 				link = NULL;
1905 			}
1906 
1907 			if (error) {
1908 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1909 				rw_exit(&ddv->sdev_contents);
1910 				if (dv)
1911 					SDEV_RELE(dv);
1912 				goto lookup_failed;
1913 			} else {
1914 				goto found;
1915 			}
1916 		} else if (retried) {
1917 			rw_exit(&ddv->sdev_contents);
1918 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
1919 			    ddv->sdev_name, nm));
1920 			if (dv)
1921 				SDEV_RELE(dv);
1922 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1923 			sdev_lookup_failed(ddv, nm, failed_flags);
1924 			*vpp = NULLVP;
1925 			return (ENOENT);
1926 		}
1927 	}
1928 
1929 lookup_create_node:
1930 	/* first thread that is doing the lookup on this node */
1931 	if (callback) {
1932 		ASSERT(dv == NULL);
1933 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1934 			rw_exit(&ddv->sdev_contents);
1935 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1936 		}
1937 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
1938 		    flags, cred);
1939 		rw_downgrade(&ddv->sdev_contents);
1940 		if (error == 0) {
1941 			goto found;
1942 		} else {
1943 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1944 			rw_exit(&ddv->sdev_contents);
1945 			goto lookup_failed;
1946 		}
1947 	}
1948 	if (!dv) {
1949 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1950 			rw_exit(&ddv->sdev_contents);
1951 			rw_enter(&ddv->sdev_contents, RW_WRITER);
1952 		}
1953 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
1954 		    cred, SDEV_INIT);
1955 		if (!dv) {
1956 			rw_exit(&ddv->sdev_contents);
1957 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1958 			sdev_lookup_failed(ddv, nm, failed_flags);
1959 			*vpp = NULLVP;
1960 			return (ENOENT);
1961 		}
1962 		rw_downgrade(&ddv->sdev_contents);
1963 	}
1964 
1965 	/*
1966 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
1967 	 */
1968 	ASSERT(SDEV_HELD(dv));
1969 
1970 	if (SDEV_IS_NO_NCACHE(dv))
1971 		failed_flags |= SLF_NO_NCACHE;
1972 	if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
1973 	    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
1974 	    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
1975 		ASSERT(SDEV_HELD(dv));
1976 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1977 		goto nolock_notfound;
1978 	}
1979 
1980 	/*
1981 	 * filter out known non-existent devices recorded
1982 	 * during initial reconfiguration boot for which
1983 	 * reconfig should not be done and lookup may
1984 	 * be short-circuited now.
1985 	 */
1986 	if (sdev_lookup_filter(ddv, nm)) {
1987 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1988 		goto nolock_notfound;
1989 	}
1990 
1991 	/* bypassing devfsadm internal nodes */
1992 	if (is_devfsadm_thread(lookup_thread)) {
1993 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1994 		goto nolock_notfound;
1995 	}
1996 
1997 	if (sdev_reconfig_disable) {
1998 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1999 		goto nolock_notfound;
2000 	}
2001 
2002 	error = sdev_call_devfsadmd(ddv, dv, nm);
2003 	if (error == 0) {
2004 		sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2005 		    ddv->sdev_name, nm, curproc->p_user.u_comm));
2006 		if (sdev_reconfig_verbose) {
2007 			cmn_err(CE_CONT,
2008 			    "?lookup of %s/%s by %s: reconfig\n",
2009 			    ddv->sdev_name, nm, curproc->p_user.u_comm);
2010 		}
2011 		retried = 1;
2012 		failed_flags |= SLF_REBUILT;
2013 		ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2014 		SDEV_SIMPLE_RELE(dv);
2015 		goto tryagain;
2016 	} else {
2017 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2018 		goto nolock_notfound;
2019 	}
2020 
2021 found:
2022 	ASSERT(dv->sdev_state == SDEV_READY);
2023 	if (vtor) {
2024 		/*
2025 		 * Check validity of returned node
2026 		 */
2027 		switch (vtor(dv)) {
2028 		case SDEV_VTOR_VALID:
2029 			break;
2030 		case SDEV_VTOR_STALE:
2031 			/*
2032 			 * The name exists, but the cache entry is
2033 			 * stale and needs to be re-created.
2034 			 */
2035 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2036 			if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2037 				rw_exit(&ddv->sdev_contents);
2038 				rw_enter(&ddv->sdev_contents, RW_WRITER);
2039 			}
2040 			sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
2041 			rw_downgrade(&ddv->sdev_contents);
2042 			SDEV_RELE(dv);
2043 			dv = NULL;
2044 			goto lookup_create_node;
2045 			/* FALLTHRU */
2046 		case SDEV_VTOR_INVALID:
2047 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2048 			sdcmn_err7(("lookup: destroy invalid "
2049 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2050 			goto nolock_notfound;
2051 		case SDEV_VTOR_SKIP:
2052 			sdcmn_err7(("lookup: node not applicable - "
2053 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2054 			rw_exit(&ddv->sdev_contents);
2055 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2056 			SDEV_RELE(dv);
2057 			goto lookup_failed;
2058 		default:
2059 			cmn_err(CE_PANIC,
2060 			    "dev fs: validator failed: %s(%p)\n",
2061 			    dv->sdev_name, (void *)dv);
2062 			break;
2063 		}
2064 	}
2065 
2066 	rw_exit(&ddv->sdev_contents);
2067 	rv = sdev_to_vp(dv, vpp);
2068 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2069 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2070 	    dv->sdev_state, nm, rv));
2071 	return (rv);
2072 
2073 nolock_notfound:
2074 	/*
2075 	 * Destroy the node that is created for synchronization purposes.
2076 	 */
2077 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2078 	    nm, dv->sdev_state));
2079 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2080 	if (dv->sdev_state == SDEV_INIT) {
2081 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2082 			rw_exit(&ddv->sdev_contents);
2083 			rw_enter(&ddv->sdev_contents, RW_WRITER);
2084 		}
2085 
2086 		/*
2087 		 * Node state may have changed during the lock
2088 		 * changes. Re-check.
2089 		 */
2090 		if (dv->sdev_state == SDEV_INIT) {
2091 			sdev_dirdelete(ddv, dv);
2092 			rw_exit(&ddv->sdev_contents);
2093 			sdev_lookup_failed(ddv, nm, failed_flags);
2094 			SDEV_RELE(dv);
2095 			*vpp = NULL;
2096 			return (ENOENT);
2097 		}
2098 	}
2099 
2100 	rw_exit(&ddv->sdev_contents);
2101 	SDEV_RELE(dv);
2102 
2103 lookup_failed:
2104 	sdev_lookup_failed(ddv, nm, failed_flags);
2105 	*vpp = NULL;
2106 	return (ENOENT);
2107 }
2108 
2109 /*
2110  * Given a directory node, mark all nodes beneath as
2111  * STALE, i.e. nodes that don't exist as far as new
2112  * consumers are concerned.  Remove them from the
2113  * list of directory entries so that no lookup or
2114  * directory traversal will find them.  The node
2115  * not deallocated so existing holds are not affected.
2116  */
2117 void
sdev_stale(struct sdev_node * ddv)2118 sdev_stale(struct sdev_node *ddv)
2119 {
2120 	struct sdev_node *dv;
2121 	struct vnode *vp;
2122 
2123 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2124 
2125 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2126 	while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2127 		vp = SDEVTOV(dv);
2128 		SDEV_HOLD(dv);
2129 		if (vp->v_type == VDIR)
2130 			sdev_stale(dv);
2131 
2132 		sdev_dirdelete(ddv, dv);
2133 		SDEV_RELE(dv);
2134 	}
2135 	ddv->sdev_flags |= SDEV_BUILD;
2136 	rw_exit(&ddv->sdev_contents);
2137 }
2138 
2139 /*
2140  * Given a directory node, clean out all the nodes beneath.
2141  * If expr is specified, clean node with names matching expr.
2142  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2143  *	so they are excluded from future lookups.
2144  */
2145 int
sdev_cleandir(struct sdev_node * ddv,char * expr,uint_t flags)2146 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2147 {
2148 	int error = 0;
2149 	int busy = 0;
2150 	struct vnode *vp;
2151 	struct sdev_node *dv, *next;
2152 	int bkstore = 0;
2153 	int len = 0;
2154 	char *bks_name = NULL;
2155 
2156 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2157 
2158 	/*
2159 	 * We try our best to destroy all unused sdev_node's
2160 	 */
2161 	rw_enter(&ddv->sdev_contents, RW_WRITER);
2162 	for (dv = SDEV_FIRST_ENTRY(ddv); dv != NULL; dv = next) {
2163 		next = SDEV_NEXT_ENTRY(ddv, dv);
2164 		vp = SDEVTOV(dv);
2165 
2166 		if (expr && gmatch(dv->sdev_name, expr) == 0)
2167 			continue;
2168 
2169 		if (vp->v_type == VDIR &&
2170 		    sdev_cleandir(dv, NULL, flags) != 0) {
2171 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2172 			    dv->sdev_name));
2173 			busy++;
2174 			continue;
2175 		}
2176 
2177 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2178 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2179 			    dv->sdev_name));
2180 			busy++;
2181 			continue;
2182 		}
2183 
2184 		/*
2185 		 * at this point, either dv is not held or SDEV_ENFORCE
2186 		 * is specified. In either case, dv needs to be deleted
2187 		 */
2188 		SDEV_HOLD(dv);
2189 
2190 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2191 		if (bkstore && (vp->v_type == VDIR))
2192 			bkstore += 1;
2193 
2194 		if (bkstore) {
2195 			len = strlen(dv->sdev_name) + 1;
2196 			bks_name = kmem_alloc(len, KM_SLEEP);
2197 			bcopy(dv->sdev_name, bks_name, len);
2198 		}
2199 
2200 		sdev_dirdelete(ddv, dv);
2201 
2202 		/* take care the backing store clean up */
2203 		if (bkstore) {
2204 			ASSERT(bks_name);
2205 			ASSERT(ddv->sdev_attrvp);
2206 
2207 			if (bkstore == 1) {
2208 				error = VOP_REMOVE(ddv->sdev_attrvp,
2209 				    bks_name, kcred, NULL, 0);
2210 			} else if (bkstore == 2) {
2211 				error = VOP_RMDIR(ddv->sdev_attrvp,
2212 				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2213 			}
2214 
2215 			/* do not propagate the backing store errors */
2216 			if (error) {
2217 				sdcmn_err9(("sdev_cleandir: backing store"
2218 				    "not cleaned\n"));
2219 				error = 0;
2220 			}
2221 
2222 			bkstore = 0;
2223 			kmem_free(bks_name, len);
2224 			bks_name = NULL;
2225 			len = 0;
2226 		}
2227 
2228 		ddv->sdev_flags |= SDEV_BUILD;
2229 		SDEV_RELE(dv);
2230 	}
2231 
2232 	ddv->sdev_flags |= SDEV_BUILD;
2233 	rw_exit(&ddv->sdev_contents);
2234 
2235 	if (busy) {
2236 		error = EBUSY;
2237 	}
2238 
2239 	return (error);
2240 }
2241 
2242 /*
2243  * a convenient wrapper for readdir() funcs
2244  */
2245 size_t
add_dir_entry(dirent64_t * de,char * nm,size_t size,ino_t ino,offset_t off)2246 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2247 {
2248 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2249 	if (reclen > size)
2250 		return (0);
2251 
2252 	de->d_ino = (ino64_t)ino;
2253 	de->d_off = (off64_t)off + 1;
2254 	de->d_reclen = (ushort_t)reclen;
2255 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2256 	return (reclen);
2257 }
2258 
2259 /*
2260  * sdev_mount service routines
2261  */
2262 int
sdev_copyin_mountargs(struct mounta * uap,struct sdev_mountargs * args)2263 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2264 {
2265 	int	error;
2266 
2267 	if (uap->datalen != sizeof (*args))
2268 		return (EINVAL);
2269 
2270 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2271 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2272 		    "get user data. error %d\n", error);
2273 		return (EFAULT);
2274 	}
2275 
2276 	return (0);
2277 }
2278 
2279 #ifdef nextdp
2280 #undef nextdp
2281 #endif
2282 #define	nextdp(dp)	((struct dirent64 *) \
2283 			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2284 
2285 /*
2286  * readdir helper func
2287  */
2288 int
devname_readdir_func(vnode_t * vp,uio_t * uiop,cred_t * cred,int * eofp,int flags)2289 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2290     int flags)
2291 {
2292 	struct sdev_node *ddv = VTOSDEV(vp);
2293 	struct sdev_node *dv;
2294 	dirent64_t	*dp;
2295 	ulong_t		outcount = 0;
2296 	size_t		namelen;
2297 	ulong_t		alloc_count;
2298 	void		*outbuf;
2299 	struct iovec	*iovp;
2300 	int		error = 0;
2301 	size_t		reclen;
2302 	offset_t	diroff;
2303 	offset_t	soff;
2304 	int		this_reclen;
2305 	int (*vtor)(struct sdev_node *) = NULL;
2306 	struct vattr attr;
2307 	timestruc_t now;
2308 
2309 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2310 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2311 
2312 	if (uiop->uio_loffset >= MAXOFF_T) {
2313 		if (eofp)
2314 			*eofp = 1;
2315 		return (0);
2316 	}
2317 
2318 	if (uiop->uio_iovcnt != 1)
2319 		return (EINVAL);
2320 
2321 	if (vp->v_type != VDIR)
2322 		return (ENOTDIR);
2323 
2324 	if (ddv->sdev_flags & SDEV_VTOR) {
2325 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2326 		ASSERT(vtor);
2327 	}
2328 
2329 	if (eofp != NULL)
2330 		*eofp = 0;
2331 
2332 	soff = uiop->uio_loffset;
2333 	iovp = uiop->uio_iov;
2334 	alloc_count = iovp->iov_len;
2335 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2336 	outcount = 0;
2337 
2338 	if (ddv->sdev_state == SDEV_ZOMBIE)
2339 		goto get_cache;
2340 
2341 	if (SDEV_IS_GLOBAL(ddv)) {
2342 
2343 		if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2344 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2345 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2346 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2347 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2348 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2349 		    !sdev_reconfig_disable) {
2350 			/*
2351 			 * invoking "devfsadm" to do system device reconfig
2352 			 */
2353 			mutex_enter(&ddv->sdev_lookup_lock);
2354 			SDEV_BLOCK_OTHERS(ddv,
2355 			    (SDEV_READDIR|SDEV_LGWAITING));
2356 			mutex_exit(&ddv->sdev_lookup_lock);
2357 
2358 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2359 			    ddv->sdev_path, curproc->p_user.u_comm));
2360 			if (sdev_reconfig_verbose) {
2361 				cmn_err(CE_CONT,
2362 				    "?readdir of %s by %s: reconfig\n",
2363 				    ddv->sdev_path, curproc->p_user.u_comm);
2364 			}
2365 
2366 			sdev_devfsadmd_thread(ddv, NULL, kcred);
2367 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2368 			/*
2369 			 * compensate the "ls" started later than "devfsadm"
2370 			 */
2371 			mutex_enter(&ddv->sdev_lookup_lock);
2372 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2373 			mutex_exit(&ddv->sdev_lookup_lock);
2374 		}
2375 
2376 		/*
2377 		 * release the contents lock so that
2378 		 * the cache may be updated by devfsadmd
2379 		 */
2380 		rw_exit(&ddv->sdev_contents);
2381 		mutex_enter(&ddv->sdev_lookup_lock);
2382 		if (SDEV_IS_READDIR(ddv))
2383 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2384 		mutex_exit(&ddv->sdev_lookup_lock);
2385 		rw_enter(&ddv->sdev_contents, RW_READER);
2386 
2387 		sdcmn_err4(("readdir of directory %s by %s\n",
2388 		    ddv->sdev_name, curproc->p_user.u_comm));
2389 		if (ddv->sdev_flags & SDEV_BUILD) {
2390 			if (SDEV_IS_PERSIST(ddv)) {
2391 				error = sdev_filldir_from_store(ddv,
2392 				    alloc_count, cred);
2393 			}
2394 			ddv->sdev_flags &= ~SDEV_BUILD;
2395 		}
2396 	}
2397 
2398 get_cache:
2399 	/* handle "." and ".." */
2400 	diroff = 0;
2401 	if (soff == 0) {
2402 		/* first time */
2403 		this_reclen = DIRENT64_RECLEN(1);
2404 		if (alloc_count < this_reclen) {
2405 			error = EINVAL;
2406 			goto done;
2407 		}
2408 
2409 		dp->d_ino = (ino64_t)ddv->sdev_ino;
2410 		dp->d_off = (off64_t)1;
2411 		dp->d_reclen = (ushort_t)this_reclen;
2412 
2413 		(void) strncpy(dp->d_name, ".",
2414 		    DIRENT64_NAMELEN(this_reclen));
2415 		outcount += dp->d_reclen;
2416 		dp = nextdp(dp);
2417 	}
2418 
2419 	diroff++;
2420 	if (soff <= 1) {
2421 		this_reclen = DIRENT64_RECLEN(2);
2422 		if (alloc_count < outcount + this_reclen) {
2423 			error = EINVAL;
2424 			goto done;
2425 		}
2426 
2427 		dp->d_reclen = (ushort_t)this_reclen;
2428 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2429 		dp->d_off = (off64_t)2;
2430 
2431 		(void) strncpy(dp->d_name, "..",
2432 		    DIRENT64_NAMELEN(this_reclen));
2433 		outcount += dp->d_reclen;
2434 
2435 		dp = nextdp(dp);
2436 	}
2437 
2438 
2439 	/* gets the cache */
2440 	diroff++;
2441 	for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2442 	    dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2443 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2444 		    diroff, soff, dv->sdev_name));
2445 
2446 		/* bypassing pre-matured nodes */
2447 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2448 			sdcmn_err3(("sdev_readdir: pre-mature node  "
2449 			    "%s %d\n", dv->sdev_name, dv->sdev_state));
2450 			continue;
2451 		}
2452 
2453 		/*
2454 		 * Check validity of node
2455 		 * Drop invalid and nodes to be skipped.
2456 		 * A node the validator indicates as stale needs
2457 		 * to be returned as presumably the node name itself
2458 		 * is valid and the node data itself will be refreshed
2459 		 * on lookup.  An application performing a readdir then
2460 		 * stat on each entry should thus always see consistent
2461 		 * data.  In any case, it is not possible to synchronize
2462 		 * with dynamic kernel state, and any view we return can
2463 		 * never be anything more than a snapshot at a point in time.
2464 		 */
2465 		if (vtor) {
2466 			switch (vtor(dv)) {
2467 			case SDEV_VTOR_VALID:
2468 				break;
2469 			case SDEV_VTOR_INVALID:
2470 			case SDEV_VTOR_SKIP:
2471 				continue;
2472 			case SDEV_VTOR_STALE:
2473 				sdcmn_err3(("sdev_readir: %s stale\n",
2474 				    dv->sdev_name));
2475 				break;
2476 			default:
2477 				cmn_err(CE_PANIC,
2478 				    "dev fs: validator failed: %s(%p)\n",
2479 				    dv->sdev_name, (void *)dv);
2480 				break;
2481 			/*NOTREACHED*/
2482 			}
2483 		}
2484 
2485 		namelen = strlen(dv->sdev_name);
2486 		reclen = DIRENT64_RECLEN(namelen);
2487 		if (outcount + reclen > alloc_count) {
2488 			goto full;
2489 		}
2490 		dp->d_reclen = (ushort_t)reclen;
2491 		dp->d_ino = (ino64_t)dv->sdev_ino;
2492 		dp->d_off = (off64_t)diroff + 1;
2493 		(void) strncpy(dp->d_name, dv->sdev_name,
2494 		    DIRENT64_NAMELEN(reclen));
2495 		outcount += reclen;
2496 		dp = nextdp(dp);
2497 	}
2498 
2499 full:
2500 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2501 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2502 	    (void *)dv));
2503 
2504 	if (outcount)
2505 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2506 
2507 	if (!error) {
2508 		uiop->uio_loffset = diroff;
2509 		if (eofp)
2510 			*eofp = dv ? 0 : 1;
2511 	}
2512 
2513 
2514 	if (ddv->sdev_attrvp) {
2515 		gethrestime(&now);
2516 		attr.va_ctime = now;
2517 		attr.va_atime = now;
2518 		attr.va_mask = AT_CTIME|AT_ATIME;
2519 
2520 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2521 	}
2522 done:
2523 	kmem_free(outbuf, alloc_count);
2524 	return (error);
2525 }
2526 
2527 static int
sdev_modctl_lookup(const char * path,vnode_t ** r_vp)2528 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2529 {
2530 	vnode_t *vp;
2531 	vnode_t *cvp;
2532 	struct sdev_node *svp;
2533 	char *nm;
2534 	struct pathname pn;
2535 	int error;
2536 	int persisted = 0;
2537 
2538 	ASSERT(INGLOBALZONE(curproc));
2539 
2540 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2541 		return (error);
2542 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2543 
2544 	vp = rootdir;
2545 	VN_HOLD(vp);
2546 
2547 	while (pn_pathleft(&pn)) {
2548 		ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2549 		(void) pn_getcomponent(&pn, nm);
2550 
2551 		/*
2552 		 * Deal with the .. special case where we may be
2553 		 * traversing up across a mount point, to the
2554 		 * root of this filesystem or global root.
2555 		 */
2556 		if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2557 checkforroot:
2558 			if (VN_CMP(vp, rootdir)) {
2559 				nm[1] = 0;
2560 			} else if (vp->v_flag & VROOT) {
2561 				vfs_t *vfsp;
2562 				cvp = vp;
2563 				vfsp = cvp->v_vfsp;
2564 				vfs_rlock_wait(vfsp);
2565 				vp = cvp->v_vfsp->vfs_vnodecovered;
2566 				if (vp == NULL ||
2567 				    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2568 					vfs_unlock(vfsp);
2569 					VN_RELE(cvp);
2570 					error = EIO;
2571 					break;
2572 				}
2573 				VN_HOLD(vp);
2574 				vfs_unlock(vfsp);
2575 				VN_RELE(cvp);
2576 				cvp = NULL;
2577 				goto checkforroot;
2578 			}
2579 		}
2580 
2581 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2582 		    NULL, NULL);
2583 		if (error) {
2584 			VN_RELE(vp);
2585 			break;
2586 		}
2587 
2588 		/* traverse mount points encountered on our journey */
2589 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2590 			VN_RELE(vp);
2591 			VN_RELE(cvp);
2592 			break;
2593 		}
2594 
2595 		/*
2596 		 * symbolic link, can be either relative and absolute
2597 		 */
2598 		if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2599 			struct pathname linkpath;
2600 			pn_alloc(&linkpath);
2601 			if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2602 				pn_free(&linkpath);
2603 				break;
2604 			}
2605 			if (pn_pathleft(&linkpath) == 0)
2606 				(void) pn_set(&linkpath, ".");
2607 			error = pn_insert(&pn, &linkpath, strlen(nm));
2608 			pn_free(&linkpath);
2609 			if (pn.pn_pathlen == 0) {
2610 				VN_RELE(vp);
2611 				return (ENOENT);
2612 			}
2613 			if (pn.pn_path[0] == '/') {
2614 				pn_skipslash(&pn);
2615 				VN_RELE(vp);
2616 				VN_RELE(cvp);
2617 				vp = rootdir;
2618 				VN_HOLD(vp);
2619 			} else {
2620 				VN_RELE(cvp);
2621 			}
2622 			continue;
2623 		}
2624 
2625 		VN_RELE(vp);
2626 
2627 		/*
2628 		 * Direct the operation to the persisting filesystem
2629 		 * underlying /dev.  Bail if we encounter a
2630 		 * non-persistent dev entity here.
2631 		 */
2632 		if (cvp->v_vfsp->vfs_fstype == devtype) {
2633 
2634 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2635 				error = ENOENT;
2636 				VN_RELE(cvp);
2637 				break;
2638 			}
2639 
2640 			if (VTOSDEV(cvp) == NULL) {
2641 				error = ENOENT;
2642 				VN_RELE(cvp);
2643 				break;
2644 			}
2645 			svp = VTOSDEV(cvp);
2646 			if ((vp = svp->sdev_attrvp) == NULL) {
2647 				error = ENOENT;
2648 				VN_RELE(cvp);
2649 				break;
2650 			}
2651 			persisted = 1;
2652 			VN_HOLD(vp);
2653 			VN_RELE(cvp);
2654 			cvp = vp;
2655 		}
2656 
2657 		vp = cvp;
2658 		pn_skipslash(&pn);
2659 	}
2660 
2661 	kmem_free(nm, MAXNAMELEN);
2662 	pn_free(&pn);
2663 
2664 	if (error)
2665 		return (error);
2666 
2667 	/*
2668 	 * Only return persisted nodes in the filesystem underlying /dev.
2669 	 */
2670 	if (!persisted) {
2671 		VN_RELE(vp);
2672 		return (ENOENT);
2673 	}
2674 
2675 	*r_vp = vp;
2676 	return (0);
2677 }
2678 
2679 int
sdev_modctl_readdir(const char * dir,char *** dirlistp,int * npathsp,int * npathsp_alloc,int checking_empty)2680 sdev_modctl_readdir(const char *dir, char ***dirlistp, int *npathsp,
2681     int *npathsp_alloc, int checking_empty)
2682 {
2683 	char	**pathlist = NULL;
2684 	char	**newlist = NULL;
2685 	int	npaths = 0;
2686 	int	npaths_alloc = 0;
2687 	dirent64_t *dbuf = NULL;
2688 	int	n;
2689 	char	*s;
2690 	int error;
2691 	vnode_t *vp;
2692 	int eof;
2693 	struct iovec iov;
2694 	struct uio uio;
2695 	struct dirent64 *dp;
2696 	size_t dlen;
2697 	size_t dbuflen;
2698 	int ndirents = 64;
2699 	char *nm;
2700 
2701 	error = sdev_modctl_lookup(dir, &vp);
2702 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2703 	    dir, curproc->p_user.u_comm,
2704 	    (error == 0) ? "ok" : "failed"));
2705 	if (error)
2706 		return (error);
2707 
2708 	dlen = ndirents * (sizeof (*dbuf));
2709 	dbuf = kmem_alloc(dlen, KM_SLEEP);
2710 
2711 	uio.uio_iov = &iov;
2712 	uio.uio_iovcnt = 1;
2713 	uio.uio_segflg = UIO_SYSSPACE;
2714 	uio.uio_fmode = 0;
2715 	uio.uio_extflg = UIO_COPY_CACHED;
2716 	uio.uio_loffset = 0;
2717 	uio.uio_llimit = MAXOFFSET_T;
2718 
2719 	eof = 0;
2720 	error = 0;
2721 	while (!error && !eof) {
2722 		uio.uio_resid = dlen;
2723 		iov.iov_base = (char *)dbuf;
2724 		iov.iov_len = dlen;
2725 
2726 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2727 		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2728 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2729 
2730 		dbuflen = dlen - uio.uio_resid;
2731 
2732 		if (error || dbuflen == 0)
2733 			break;
2734 
2735 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2736 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2737 
2738 			nm = dp->d_name;
2739 
2740 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2741 				continue;
2742 			if (npaths == npaths_alloc) {
2743 				npaths_alloc += 64;
2744 				newlist = (char **)
2745 				    kmem_zalloc((npaths_alloc + 1) *
2746 				    sizeof (char *), KM_SLEEP);
2747 				if (pathlist) {
2748 					bcopy(pathlist, newlist,
2749 					    npaths * sizeof (char *));
2750 					kmem_free(pathlist,
2751 					    (npaths + 1) * sizeof (char *));
2752 				}
2753 				pathlist = newlist;
2754 			}
2755 			n = strlen(nm) + 1;
2756 			s = kmem_alloc(n, KM_SLEEP);
2757 			bcopy(nm, s, n);
2758 			pathlist[npaths++] = s;
2759 			sdcmn_err11(("  %s/%s\n", dir, s));
2760 
2761 			/* if checking empty, one entry is as good as many */
2762 			if (checking_empty) {
2763 				eof = 1;
2764 				break;
2765 			}
2766 		}
2767 	}
2768 
2769 	VN_RELE(vp);
2770 
2771 	if (dbuf)
2772 		kmem_free(dbuf, dlen);
2773 
2774 	if (error)
2775 		return (error);
2776 
2777 	*dirlistp = pathlist;
2778 	*npathsp = npaths;
2779 	*npathsp_alloc = npaths_alloc;
2780 
2781 	return (0);
2782 }
2783 
2784 void
sdev_modctl_readdir_free(char ** pathlist,int npaths,int npaths_alloc)2785 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2786 {
2787 	int	i, n;
2788 
2789 	for (i = 0; i < npaths; i++) {
2790 		n = strlen(pathlist[i]) + 1;
2791 		kmem_free(pathlist[i], n);
2792 	}
2793 
2794 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2795 }
2796 
2797 int
sdev_modctl_devexists(const char * path)2798 sdev_modctl_devexists(const char *path)
2799 {
2800 	vnode_t *vp;
2801 	int error;
2802 
2803 	error = sdev_modctl_lookup(path, &vp);
2804 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2805 	    path, curproc->p_user.u_comm,
2806 	    (error == 0) ? "ok" : "failed"));
2807 	if (error == 0)
2808 		VN_RELE(vp);
2809 
2810 	return (error);
2811 }
2812 
2813 /*
2814  * a generic setattr() function
2815  *
2816  * note: flags only supports AT_UID and AT_GID.
2817  *	 Future enhancements can be done for other types, e.g. AT_MODE
2818  */
2819 int
devname_setattr_func(struct vnode * vp,struct vattr * vap,int flags,struct cred * cred,int (* callback)(struct sdev_node *,struct vattr *,int),int protocol)2820 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
2821     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
2822     int), int protocol)
2823 {
2824 	struct sdev_node	*dv = VTOSDEV(vp);
2825 	struct sdev_node	*parent = dv->sdev_dotdot;
2826 	struct vattr		*get;
2827 	uint_t			mask = vap->va_mask;
2828 	int 			error;
2829 
2830 	/* some sanity checks */
2831 	if (vap->va_mask & AT_NOSET)
2832 		return (EINVAL);
2833 
2834 	if (vap->va_mask & AT_SIZE) {
2835 		if (vp->v_type == VDIR) {
2836 			return (EISDIR);
2837 		}
2838 	}
2839 
2840 	/* no need to set attribute, but do not fail either */
2841 	ASSERT(parent);
2842 	rw_enter(&parent->sdev_contents, RW_READER);
2843 	if (dv->sdev_state == SDEV_ZOMBIE) {
2844 		rw_exit(&parent->sdev_contents);
2845 		return (0);
2846 	}
2847 
2848 	/* If backing store exists, just set it. */
2849 	if (dv->sdev_attrvp) {
2850 		rw_exit(&parent->sdev_contents);
2851 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
2852 	}
2853 
2854 	/*
2855 	 * Otherwise, for nodes with the persistence attribute, create it.
2856 	 */
2857 	ASSERT(dv->sdev_attr);
2858 	if (SDEV_IS_PERSIST(dv) ||
2859 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
2860 		sdev_vattr_merge(dv, vap);
2861 		rw_enter(&dv->sdev_contents, RW_WRITER);
2862 		error = sdev_shadow_node(dv, cred);
2863 		rw_exit(&dv->sdev_contents);
2864 		rw_exit(&parent->sdev_contents);
2865 
2866 		if (error)
2867 			return (error);
2868 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
2869 	}
2870 
2871 
2872 	/*
2873 	 * sdev_attr was allocated in sdev_mknode
2874 	 */
2875 	rw_enter(&dv->sdev_contents, RW_WRITER);
2876 	error = secpolicy_vnode_setattr(cred, vp, vap,
2877 	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
2878 	if (error) {
2879 		rw_exit(&dv->sdev_contents);
2880 		rw_exit(&parent->sdev_contents);
2881 		return (error);
2882 	}
2883 
2884 	get = dv->sdev_attr;
2885 	if (mask & AT_MODE) {
2886 		get->va_mode &= S_IFMT;
2887 		get->va_mode |= vap->va_mode & ~S_IFMT;
2888 	}
2889 
2890 	if ((mask & AT_UID) || (mask & AT_GID)) {
2891 		if (mask & AT_UID)
2892 			get->va_uid = vap->va_uid;
2893 		if (mask & AT_GID)
2894 			get->va_gid = vap->va_gid;
2895 		/*
2896 		 * a callback must be provided if the protocol is set
2897 		 */
2898 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
2899 			ASSERT(callback);
2900 			error = callback(dv, get, protocol);
2901 			if (error) {
2902 				rw_exit(&dv->sdev_contents);
2903 				rw_exit(&parent->sdev_contents);
2904 				return (error);
2905 			}
2906 		}
2907 	}
2908 
2909 	if (mask & AT_ATIME)
2910 		get->va_atime = vap->va_atime;
2911 	if (mask & AT_MTIME)
2912 		get->va_mtime = vap->va_mtime;
2913 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
2914 		gethrestime(&get->va_ctime);
2915 	}
2916 
2917 	sdev_vattr_merge(dv, get);
2918 	rw_exit(&dv->sdev_contents);
2919 	rw_exit(&parent->sdev_contents);
2920 	return (0);
2921 }
2922 
2923 /*
2924  * a generic inactive() function
2925  */
2926 /*ARGSUSED*/
2927 void
devname_inactive_func(struct vnode * vp,struct cred * cred,void (* callback)(struct vnode *))2928 devname_inactive_func(struct vnode *vp, struct cred *cred,
2929     void (*callback)(struct vnode *))
2930 {
2931 	int clean;
2932 	struct sdev_node *dv = VTOSDEV(vp);
2933 	int state;
2934 
2935 	mutex_enter(&vp->v_lock);
2936 	ASSERT(vp->v_count >= 1);
2937 
2938 
2939 	if (vp->v_count == 1 && callback != NULL)
2940 		callback(vp);
2941 
2942 	rw_enter(&dv->sdev_contents, RW_WRITER);
2943 	state = dv->sdev_state;
2944 
2945 	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
2946 
2947 	/*
2948 	 * sdev is a rather bad public citizen. It violates the general
2949 	 * agreement that in memory nodes should always have a valid reference
2950 	 * count on their vnode. But that's not the case here. This means that
2951 	 * we do actually have to distinguish between getting inactive callbacks
2952 	 * for zombies and otherwise. This should probably be fixed.
2953 	 */
2954 	if (clean) {
2955 		/* Remove the . entry to ourselves */
2956 		if (vp->v_type == VDIR) {
2957 			decr_link(dv);
2958 		}
2959 		VERIFY(dv->sdev_nlink == 1);
2960 		decr_link(dv);
2961 		VN_RELE_LOCKED(vp);
2962 		rw_exit(&dv->sdev_contents);
2963 		mutex_exit(&vp->v_lock);
2964 		sdev_nodedestroy(dv, 0);
2965 	} else {
2966 		VN_RELE_LOCKED(vp);
2967 		rw_exit(&dv->sdev_contents);
2968 		mutex_exit(&vp->v_lock);
2969 	}
2970 }
2971