1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved.
24 * Copyright (c) 2017 by Delphix. All rights reserved.
25 */
26
27 /*
28 * utility routines for the /dev fs
29 */
30
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/t_lock.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/user.h>
37 #include <sys/time.h>
38 #include <sys/vfs.h>
39 #include <sys/vnode.h>
40 #include <sys/file.h>
41 #include <sys/fcntl.h>
42 #include <sys/flock.h>
43 #include <sys/kmem.h>
44 #include <sys/uio.h>
45 #include <sys/errno.h>
46 #include <sys/stat.h>
47 #include <sys/cred.h>
48 #include <sys/dirent.h>
49 #include <sys/pathname.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
52 #include <sys/mode.h>
53 #include <sys/policy.h>
54 #include <fs/fs_subr.h>
55 #include <sys/mount.h>
56 #include <sys/fs/snode.h>
57 #include <sys/fs/dv_node.h>
58 #include <sys/fs/sdev_impl.h>
59 #include <sys/sunndi.h>
60 #include <sys/sunmdi.h>
61 #include <sys/conf.h>
62 #include <sys/proc.h>
63 #include <sys/user.h>
64 #include <sys/modctl.h>
65
66 #ifdef DEBUG
67 int sdev_debug = 0x00000001;
68 int sdev_debug_cache_flags = 0;
69 #endif
70
71 /*
72 * globals
73 */
74 /* prototype memory vattrs */
75 vattr_t sdev_vattr_dir = {
76 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
77 VDIR, /* va_type */
78 SDEV_DIRMODE_DEFAULT, /* va_mode */
79 SDEV_UID_DEFAULT, /* va_uid */
80 SDEV_GID_DEFAULT, /* va_gid */
81 0, /* va_fsid */
82 0, /* va_nodeid */
83 0, /* va_nlink */
84 0, /* va_size */
85 0, /* va_atime */
86 0, /* va_mtime */
87 0, /* va_ctime */
88 0, /* va_rdev */
89 0, /* va_blksize */
90 0, /* va_nblocks */
91 0 /* va_vcode */
92 };
93
94 vattr_t sdev_vattr_lnk = {
95 AT_TYPE|AT_MODE, /* va_mask */
96 VLNK, /* va_type */
97 SDEV_LNKMODE_DEFAULT, /* va_mode */
98 SDEV_UID_DEFAULT, /* va_uid */
99 SDEV_GID_DEFAULT, /* va_gid */
100 0, /* va_fsid */
101 0, /* va_nodeid */
102 0, /* va_nlink */
103 0, /* va_size */
104 0, /* va_atime */
105 0, /* va_mtime */
106 0, /* va_ctime */
107 0, /* va_rdev */
108 0, /* va_blksize */
109 0, /* va_nblocks */
110 0 /* va_vcode */
111 };
112
113 vattr_t sdev_vattr_blk = {
114 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
115 VBLK, /* va_type */
116 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */
117 SDEV_UID_DEFAULT, /* va_uid */
118 SDEV_GID_DEFAULT, /* va_gid */
119 0, /* va_fsid */
120 0, /* va_nodeid */
121 0, /* va_nlink */
122 0, /* va_size */
123 0, /* va_atime */
124 0, /* va_mtime */
125 0, /* va_ctime */
126 0, /* va_rdev */
127 0, /* va_blksize */
128 0, /* va_nblocks */
129 0 /* va_vcode */
130 };
131
132 vattr_t sdev_vattr_chr = {
133 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
134 VCHR, /* va_type */
135 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */
136 SDEV_UID_DEFAULT, /* va_uid */
137 SDEV_GID_DEFAULT, /* va_gid */
138 0, /* va_fsid */
139 0, /* va_nodeid */
140 0, /* va_nlink */
141 0, /* va_size */
142 0, /* va_atime */
143 0, /* va_mtime */
144 0, /* va_ctime */
145 0, /* va_rdev */
146 0, /* va_blksize */
147 0, /* va_nblocks */
148 0 /* va_vcode */
149 };
150
151 kmem_cache_t *sdev_node_cache; /* sdev_node cache */
152 int devtype; /* fstype */
153
154 static void
sdev_prof_free(struct sdev_node * dv)155 sdev_prof_free(struct sdev_node *dv)
156 {
157 ASSERT(!SDEV_IS_GLOBAL(dv));
158 nvlist_free(dv->sdev_prof.dev_name);
159 nvlist_free(dv->sdev_prof.dev_map);
160 nvlist_free(dv->sdev_prof.dev_symlink);
161 nvlist_free(dv->sdev_prof.dev_glob_incdir);
162 nvlist_free(dv->sdev_prof.dev_glob_excdir);
163 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
164 }
165
166 /* sdev_node cache constructor */
167 /*ARGSUSED1*/
168 static int
i_sdev_node_ctor(void * buf,void * cfarg,int flag)169 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
170 {
171 struct sdev_node *dv = (struct sdev_node *)buf;
172 struct vnode *vp;
173
174 bzero(buf, sizeof (struct sdev_node));
175 vp = dv->sdev_vnode = vn_alloc(flag);
176 if (vp == NULL) {
177 return (-1);
178 }
179 vp->v_data = dv;
180 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
181 return (0);
182 }
183
184 /* sdev_node cache destructor */
185 /*ARGSUSED1*/
186 static void
i_sdev_node_dtor(void * buf,void * arg)187 i_sdev_node_dtor(void *buf, void *arg)
188 {
189 struct sdev_node *dv = (struct sdev_node *)buf;
190 struct vnode *vp = SDEVTOV(dv);
191
192 rw_destroy(&dv->sdev_contents);
193 vn_free(vp);
194 }
195
196 /* initialize sdev_node cache */
197 void
sdev_node_cache_init()198 sdev_node_cache_init()
199 {
200 int flags = 0;
201
202 #ifdef DEBUG
203 flags = sdev_debug_cache_flags;
204 if (flags)
205 sdcmn_err(("cache debug flags 0x%x\n", flags));
206 #endif /* DEBUG */
207
208 ASSERT(sdev_node_cache == NULL);
209 sdev_node_cache = kmem_cache_create("sdev_node_cache",
210 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
211 NULL, NULL, NULL, flags);
212 }
213
214 /* destroy sdev_node cache */
215 void
sdev_node_cache_fini()216 sdev_node_cache_fini()
217 {
218 ASSERT(sdev_node_cache != NULL);
219 kmem_cache_destroy(sdev_node_cache);
220 sdev_node_cache = NULL;
221 }
222
223 /*
224 * Compare two nodes lexographically to balance avl tree
225 */
226 static int
sdev_compare_nodes(const struct sdev_node * dv1,const struct sdev_node * dv2)227 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
228 {
229 int rv;
230 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
231 return (0);
232 return ((rv < 0) ? -1 : 1);
233 }
234
235 void
sdev_set_nodestate(struct sdev_node * dv,sdev_node_state_t state)236 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
237 {
238 ASSERT(dv);
239 ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
240 dv->sdev_state = state;
241 }
242
243 static void
sdev_attr_update(struct sdev_node * dv,vattr_t * vap)244 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
245 {
246 timestruc_t now;
247 struct vattr *attrp;
248 uint_t mask;
249
250 ASSERT(dv->sdev_attr);
251 ASSERT(vap);
252
253 attrp = dv->sdev_attr;
254 mask = vap->va_mask;
255 if (mask & AT_TYPE)
256 attrp->va_type = vap->va_type;
257 if (mask & AT_MODE)
258 attrp->va_mode = vap->va_mode;
259 if (mask & AT_UID)
260 attrp->va_uid = vap->va_uid;
261 if (mask & AT_GID)
262 attrp->va_gid = vap->va_gid;
263 if (mask & AT_RDEV)
264 attrp->va_rdev = vap->va_rdev;
265
266 gethrestime(&now);
267 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
268 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
269 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
270 }
271
272 static void
sdev_attr_alloc(struct sdev_node * dv,vattr_t * vap)273 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
274 {
275 ASSERT(dv->sdev_attr == NULL);
276 ASSERT(vap->va_mask & AT_TYPE);
277 ASSERT(vap->va_mask & AT_MODE);
278
279 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
280 sdev_attr_update(dv, vap);
281 }
282
283 /* alloc and initialize a sdev_node */
284 int
sdev_nodeinit(struct sdev_node * ddv,char * nm,struct sdev_node ** newdv,vattr_t * vap)285 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
286 vattr_t *vap)
287 {
288 struct sdev_node *dv = NULL;
289 struct vnode *vp;
290 size_t nmlen, len;
291 devname_handle_t *dhl;
292
293 nmlen = strlen(nm) + 1;
294 if (nmlen > MAXNAMELEN) {
295 sdcmn_err9(("sdev_nodeinit: node name %s"
296 " too long\n", nm));
297 *newdv = NULL;
298 return (ENAMETOOLONG);
299 }
300
301 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
302
303 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
304 bcopy(nm, dv->sdev_name, nmlen);
305 dv->sdev_namelen = nmlen - 1; /* '\0' not included */
306 len = strlen(ddv->sdev_path) + strlen(nm) + 2;
307 dv->sdev_path = kmem_alloc(len, KM_SLEEP);
308 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
309 /* overwritten for VLNK nodes */
310 dv->sdev_symlink = NULL;
311 list_link_init(&dv->sdev_plist);
312
313 vp = SDEVTOV(dv);
314 vn_reinit(vp);
315 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
316 if (vap)
317 vp->v_type = vap->va_type;
318
319 /*
320 * initialized to the parent's vnodeops.
321 * maybe overwriten for a VDIR
322 */
323 vn_setops(vp, vn_getops(SDEVTOV(ddv)));
324 vn_exists(vp);
325
326 dv->sdev_dotdot = NULL;
327 dv->sdev_attrvp = NULL;
328 if (vap) {
329 sdev_attr_alloc(dv, vap);
330 } else {
331 dv->sdev_attr = NULL;
332 }
333
334 dv->sdev_ino = sdev_mkino(dv);
335 dv->sdev_nlink = 0; /* updated on insert */
336 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
337 dv->sdev_flags |= SDEV_BUILD;
338 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
339 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
340 if (SDEV_IS_GLOBAL(ddv)) {
341 dv->sdev_flags |= SDEV_GLOBAL;
342 dhl = &(dv->sdev_handle);
343 dhl->dh_data = dv;
344 dhl->dh_args = NULL;
345 sdev_set_no_negcache(dv);
346 dv->sdev_gdir_gen = 0;
347 } else {
348 dv->sdev_flags &= ~SDEV_GLOBAL;
349 dv->sdev_origin = NULL; /* set later */
350 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
351 dv->sdev_ldir_gen = 0;
352 dv->sdev_devtree_gen = 0;
353 }
354
355 rw_enter(&dv->sdev_contents, RW_WRITER);
356 sdev_set_nodestate(dv, SDEV_INIT);
357 rw_exit(&dv->sdev_contents);
358 *newdv = dv;
359
360 return (0);
361 }
362
363 /*
364 * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the
365 * caller to transition the node to the SDEV_ZOMBIE state.
366 */
367 int
sdev_nodeready(struct sdev_node * dv,struct vattr * vap,struct vnode * avp,void * args,struct cred * cred)368 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
369 void *args, struct cred *cred)
370 {
371 int error = 0;
372 struct vnode *vp = SDEVTOV(dv);
373 vtype_t type;
374
375 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
376
377 type = vap->va_type;
378 vp->v_type = type;
379 vp->v_rdev = vap->va_rdev;
380 rw_enter(&dv->sdev_contents, RW_WRITER);
381 if (type == VDIR) {
382 dv->sdev_nlink = 2;
383 dv->sdev_flags &= ~SDEV_PERSIST;
384 dv->sdev_flags &= ~SDEV_DYNAMIC;
385 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
386 ASSERT(dv->sdev_dotdot);
387 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
388 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
389 avl_create(&dv->sdev_entries,
390 (int (*)(const void *, const void *))sdev_compare_nodes,
391 sizeof (struct sdev_node),
392 offsetof(struct sdev_node, sdev_avllink));
393 } else if (type == VLNK) {
394 ASSERT(args);
395 dv->sdev_nlink = 1;
396 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
397 } else {
398 dv->sdev_nlink = 1;
399 }
400 sdev_plugin_nodeready(dv);
401
402 if (!(SDEV_IS_GLOBAL(dv))) {
403 dv->sdev_origin = (struct sdev_node *)args;
404 dv->sdev_flags &= ~SDEV_PERSIST;
405 }
406
407 /*
408 * shadow node is created here OR
409 * if failed (indicated by dv->sdev_attrvp == NULL),
410 * created later in sdev_setattr
411 */
412 if (avp) {
413 dv->sdev_attrvp = avp;
414 } else {
415 if (dv->sdev_attr == NULL) {
416 sdev_attr_alloc(dv, vap);
417 } else {
418 sdev_attr_update(dv, vap);
419 }
420
421 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
422 error = sdev_shadow_node(dv, cred);
423 }
424
425 if (error == 0) {
426 /* transition to READY state */
427 sdev_set_nodestate(dv, SDEV_READY);
428 sdev_nc_node_exists(dv);
429 }
430 rw_exit(&dv->sdev_contents);
431 return (error);
432 }
433
434 /*
435 * Build the VROOT sdev_node.
436 */
437 /*ARGSUSED*/
438 struct sdev_node *
sdev_mkroot(struct vfs * vfsp,dev_t devdev,struct vnode * mvp,struct vnode * avp,struct cred * cred)439 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
440 struct vnode *avp, struct cred *cred)
441 {
442 struct sdev_node *dv;
443 struct vnode *vp;
444 char devdir[] = "/dev";
445
446 ASSERT(sdev_node_cache != NULL);
447 ASSERT(avp);
448 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
449 vp = SDEVTOV(dv);
450 vn_reinit(vp);
451 vp->v_flag |= VROOT;
452 vp->v_vfsp = vfsp;
453 vp->v_type = VDIR;
454 vp->v_rdev = devdev;
455 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
456 vn_exists(vp);
457
458 if (vfsp->vfs_mntpt)
459 dv->sdev_name = i_ddi_strdup(
460 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
461 else
462 /* vfs_mountdev1 set mount point later */
463 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
464 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
465 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
466 dv->sdev_ino = SDEV_ROOTINO;
467 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */
468 dv->sdev_dotdot = dv; /* .. == self */
469 dv->sdev_attrvp = avp;
470 dv->sdev_attr = NULL;
471 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
472 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
473 if (strcmp(dv->sdev_name, "/dev") == 0) {
474 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
475 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
476 dv->sdev_gdir_gen = 0;
477 } else {
478 dv->sdev_flags = SDEV_BUILD;
479 dv->sdev_flags &= ~SDEV_PERSIST;
480 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
481 dv->sdev_ldir_gen = 0;
482 dv->sdev_devtree_gen = 0;
483 }
484
485 avl_create(&dv->sdev_entries,
486 (int (*)(const void *, const void *))sdev_compare_nodes,
487 sizeof (struct sdev_node),
488 offsetof(struct sdev_node, sdev_avllink));
489
490 rw_enter(&dv->sdev_contents, RW_WRITER);
491 sdev_set_nodestate(dv, SDEV_READY);
492 rw_exit(&dv->sdev_contents);
493 sdev_nc_node_exists(dv);
494 return (dv);
495 }
496
497 struct sdev_vop_table vtab[] = {
498 { "pts", devpts_vnodeops_tbl, &devpts_vnodeops, devpts_validate,
499 SDEV_DYNAMIC | SDEV_VTOR },
500
501 { "vt", devvt_vnodeops_tbl, &devvt_vnodeops, devvt_validate,
502 SDEV_DYNAMIC | SDEV_VTOR },
503
504 { "zvol", devzvol_vnodeops_tbl, &devzvol_vnodeops,
505 devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
506
507 { "zcons", NULL, NULL, NULL, SDEV_NO_NCACHE },
508
509 { "net", devnet_vnodeops_tbl, &devnet_vnodeops, devnet_validate,
510 SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
511
512 { "ipnet", devipnet_vnodeops_tbl, &devipnet_vnodeops,
513 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
514
515 /*
516 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
517 * lofi driver controls child nodes.
518 *
519 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
520 * stale nodes (e.g. from devfsadm -R).
521 *
522 * In addition, devfsadm knows not to attempt a rmdir: a zone
523 * may hold a reference, which would zombify the node,
524 * preventing a mkdir.
525 */
526
527 { "lofi", NULL, NULL, NULL,
528 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
529 { "rlofi", NULL, NULL, NULL,
530 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
531
532 { NULL, NULL, NULL, NULL, 0}
533 };
534
535
536 /*
537 * Build the base root inode
538 */
539 ino_t
sdev_mkino(struct sdev_node * dv)540 sdev_mkino(struct sdev_node *dv)
541 {
542 ino_t ino;
543
544 /*
545 * for now, follow the lead of tmpfs here
546 * need to someday understand the requirements here
547 */
548 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
549 ino += SDEV_ROOTINO + 1;
550
551 return (ino);
552 }
553
554 int
sdev_getlink(struct vnode * linkvp,char ** link)555 sdev_getlink(struct vnode *linkvp, char **link)
556 {
557 int err;
558 char *buf;
559 struct uio uio = {0};
560 struct iovec iov = {0};
561
562 if (linkvp == NULL)
563 return (ENOENT);
564 ASSERT(linkvp->v_type == VLNK);
565
566 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
567 iov.iov_base = buf;
568 iov.iov_len = MAXPATHLEN;
569 uio.uio_iov = &iov;
570 uio.uio_iovcnt = 1;
571 uio.uio_resid = MAXPATHLEN;
572 uio.uio_segflg = UIO_SYSSPACE;
573 uio.uio_llimit = MAXOFFSET_T;
574
575 err = VOP_READLINK(linkvp, &uio, kcred, NULL);
576 if (err) {
577 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
578 kmem_free(buf, MAXPATHLEN);
579 return (ENOENT);
580 }
581
582 /* mission complete */
583 *link = i_ddi_strdup(buf, KM_SLEEP);
584 kmem_free(buf, MAXPATHLEN);
585 return (0);
586 }
587
588 /*
589 * A convenient wrapper to get the devfs node vnode for a device
590 * minor functionality: readlink() of a /dev symlink
591 * Place the link into dv->sdev_symlink
592 */
593 static int
sdev_follow_link(struct sdev_node * dv)594 sdev_follow_link(struct sdev_node *dv)
595 {
596 int err;
597 struct vnode *linkvp;
598 char *link = NULL;
599
600 linkvp = SDEVTOV(dv);
601 if (linkvp == NULL)
602 return (ENOENT);
603 ASSERT(linkvp->v_type == VLNK);
604 err = sdev_getlink(linkvp, &link);
605 if (err) {
606 dv->sdev_symlink = NULL;
607 return (ENOENT);
608 }
609
610 ASSERT(link != NULL);
611 dv->sdev_symlink = link;
612 return (0);
613 }
614
615 static int
sdev_node_check(struct sdev_node * dv,struct vattr * nvap,void * nargs)616 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
617 {
618 vtype_t otype = SDEVTOV(dv)->v_type;
619
620 /*
621 * existing sdev_node has a different type.
622 */
623 if (otype != nvap->va_type) {
624 sdcmn_err9(("sdev_node_check: existing node "
625 " %s type %d does not match new node type %d\n",
626 dv->sdev_name, otype, nvap->va_type));
627 return (EEXIST);
628 }
629
630 /*
631 * For a symlink, the target should be the same.
632 */
633 if (otype == VLNK) {
634 ASSERT(nargs != NULL);
635 ASSERT(dv->sdev_symlink != NULL);
636 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
637 sdcmn_err9(("sdev_node_check: existing node "
638 " %s has different symlink %s as new node "
639 " %s\n", dv->sdev_name, dv->sdev_symlink,
640 (char *)nargs));
641 return (EEXIST);
642 }
643 }
644
645 return (0);
646 }
647
648 /*
649 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
650 *
651 * arguments:
652 * - ddv (parent)
653 * - nm (child name)
654 * - newdv (sdev_node for nm is returned here)
655 * - vap (vattr for the node to be created, va_type should be set.
656 * - avp (attribute vnode)
657 * the defaults should be used if unknown)
658 * - cred
659 * - args
660 * . tnm (for VLNK)
661 * . global sdev_node (for !SDEV_GLOBAL)
662 * - state: SDEV_INIT, SDEV_READY
663 *
664 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
665 *
666 * NOTE: directory contents writers lock needs to be held before
667 * calling this routine.
668 */
669 int
sdev_mknode(struct sdev_node * ddv,char * nm,struct sdev_node ** newdv,struct vattr * vap,struct vnode * avp,void * args,struct cred * cred,sdev_node_state_t state)670 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
671 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
672 sdev_node_state_t state)
673 {
674 int error = 0;
675 sdev_node_state_t node_state;
676 struct sdev_node *dv = NULL;
677
678 ASSERT(state != SDEV_ZOMBIE);
679 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
680
681 if (*newdv) {
682 dv = *newdv;
683 } else {
684 /* allocate and initialize a sdev_node */
685 if (ddv->sdev_state == SDEV_ZOMBIE) {
686 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
687 ddv->sdev_path));
688 return (ENOENT);
689 }
690
691 error = sdev_nodeinit(ddv, nm, &dv, vap);
692 if (error != 0) {
693 sdcmn_err9(("sdev_mknode: error %d,"
694 " name %s can not be initialized\n",
695 error, nm));
696 return (error);
697 }
698 ASSERT(dv);
699
700 /* insert into the directory cache */
701 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
702 }
703
704 ASSERT(dv);
705 node_state = dv->sdev_state;
706 ASSERT(node_state != SDEV_ZOMBIE);
707
708 if (state == SDEV_READY) {
709 switch (node_state) {
710 case SDEV_INIT:
711 error = sdev_nodeready(dv, vap, avp, args, cred);
712 if (error) {
713 sdcmn_err9(("sdev_mknode: node %s can NOT"
714 " be transitioned into READY state, "
715 "error %d\n", nm, error));
716 }
717 break;
718 case SDEV_READY:
719 /*
720 * Do some sanity checking to make sure
721 * the existing sdev_node is what has been
722 * asked for.
723 */
724 error = sdev_node_check(dv, vap, args);
725 break;
726 default:
727 break;
728 }
729 }
730
731 if (!error) {
732 *newdv = dv;
733 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
734 } else {
735 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
736 /*
737 * We created this node, it wasn't passed into us. Therefore it
738 * is up to us to delete it.
739 */
740 if (*newdv == NULL)
741 SDEV_SIMPLE_RELE(dv);
742 *newdv = NULL;
743 }
744
745 return (error);
746 }
747
748 /*
749 * convenient wrapper to change vp's ATIME, CTIME and MTIME
750 */
751 void
sdev_update_timestamps(struct vnode * vp,cred_t * cred,uint_t mask)752 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
753 {
754 struct vattr attr;
755 timestruc_t now;
756 int err;
757
758 ASSERT(vp);
759 gethrestime(&now);
760 if (mask & AT_CTIME)
761 attr.va_ctime = now;
762 if (mask & AT_MTIME)
763 attr.va_mtime = now;
764 if (mask & AT_ATIME)
765 attr.va_atime = now;
766
767 attr.va_mask = (mask & AT_TIMES);
768 err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
769 if (err && (err != EROFS)) {
770 sdcmn_err(("update timestamps error %d\n", err));
771 }
772 }
773
774 /*
775 * the backing store vnode is released here
776 */
777 /*ARGSUSED1*/
778 void
sdev_nodedestroy(struct sdev_node * dv,uint_t flags)779 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
780 {
781 /* no references */
782 ASSERT(dv->sdev_nlink == 0);
783
784 if (dv->sdev_attrvp != NULLVP) {
785 VN_RELE(dv->sdev_attrvp);
786 /*
787 * reset the attrvp so that no more
788 * references can be made on this already
789 * vn_rele() vnode
790 */
791 dv->sdev_attrvp = NULLVP;
792 }
793
794 if (dv->sdev_attr != NULL) {
795 kmem_free(dv->sdev_attr, sizeof (struct vattr));
796 dv->sdev_attr = NULL;
797 }
798
799 if (dv->sdev_name != NULL) {
800 kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
801 dv->sdev_name = NULL;
802 }
803
804 if (dv->sdev_symlink != NULL) {
805 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
806 dv->sdev_symlink = NULL;
807 }
808
809 if (dv->sdev_path) {
810 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
811 dv->sdev_path = NULL;
812 }
813
814 if (!SDEV_IS_GLOBAL(dv)) {
815 sdev_prof_free(dv);
816 if (dv->sdev_vnode->v_type != VLNK && dv->sdev_origin != NULL)
817 SDEV_RELE(dv->sdev_origin);
818 }
819
820 if (SDEVTOV(dv)->v_type == VDIR) {
821 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
822 avl_destroy(&dv->sdev_entries);
823 }
824
825 mutex_destroy(&dv->sdev_lookup_lock);
826 cv_destroy(&dv->sdev_lookup_cv);
827
828 /* return node to initial state as per constructor */
829 (void) memset((void *)&dv->sdev_instance_data, 0,
830 sizeof (dv->sdev_instance_data));
831 vn_invalid(SDEVTOV(dv));
832 dv->sdev_private = NULL;
833 kmem_cache_free(sdev_node_cache, dv);
834 }
835
836 /*
837 * DIRECTORY CACHE lookup
838 */
839 struct sdev_node *
sdev_findbyname(struct sdev_node * ddv,char * nm)840 sdev_findbyname(struct sdev_node *ddv, char *nm)
841 {
842 struct sdev_node *dv;
843 struct sdev_node dvtmp;
844 avl_index_t where;
845
846 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
847
848 dvtmp.sdev_name = nm;
849 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
850 if (dv) {
851 ASSERT(dv->sdev_dotdot == ddv);
852 ASSERT(strcmp(dv->sdev_name, nm) == 0);
853 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
854 SDEV_HOLD(dv);
855 return (dv);
856 }
857 return (NULL);
858 }
859
860 /*
861 * Inserts a new sdev_node in a parent directory
862 */
863 void
sdev_direnter(struct sdev_node * ddv,struct sdev_node * dv)864 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
865 {
866 avl_index_t where;
867
868 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
869 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
870 ASSERT(ddv->sdev_nlink >= 2);
871 ASSERT(dv->sdev_nlink == 0);
872 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
873
874 dv->sdev_dotdot = ddv;
875 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
876 avl_insert(&ddv->sdev_entries, dv, where);
877 ddv->sdev_nlink++;
878 }
879
880 /*
881 * The following check is needed because while sdev_nodes are linked
882 * in SDEV_INIT state, they have their link counts incremented only
883 * in SDEV_READY state.
884 */
885 static void
decr_link(struct sdev_node * dv)886 decr_link(struct sdev_node *dv)
887 {
888 VERIFY(RW_WRITE_HELD(&dv->sdev_contents));
889 if (dv->sdev_state != SDEV_INIT) {
890 VERIFY(dv->sdev_nlink >= 1);
891 dv->sdev_nlink--;
892 } else {
893 VERIFY(dv->sdev_nlink == 0);
894 }
895 }
896
897 /*
898 * Delete an existing dv from directory cache
899 *
900 * In the case of a node is still held by non-zero reference count, the node is
901 * put into ZOMBIE state. The node is always unlinked from its parent, but it is
902 * not destroyed via sdev_inactive until its reference count reaches "0".
903 */
904 static void
sdev_dirdelete(struct sdev_node * ddv,struct sdev_node * dv)905 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
906 {
907 struct vnode *vp;
908 sdev_node_state_t os;
909
910 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
911
912 vp = SDEVTOV(dv);
913 mutex_enter(&vp->v_lock);
914 rw_enter(&dv->sdev_contents, RW_WRITER);
915 os = dv->sdev_state;
916 ASSERT(os != SDEV_ZOMBIE);
917 dv->sdev_state = SDEV_ZOMBIE;
918
919 /*
920 * unlink ourselves from the parent directory now to take care of the ..
921 * link. However, if we're a directory, we don't remove our reference to
922 * ourself eg. '.' until we are torn down in the inactive callback.
923 */
924 decr_link(ddv);
925 avl_remove(&ddv->sdev_entries, dv);
926 /*
927 * sdev_inactive expects nodes to have a link to themselves when we're
928 * tearing them down. If we're transitioning from the initial state to
929 * zombie and not via ready, then we're not going to have this link that
930 * comes from the node being ready. As a result, we need to increment
931 * our link count by one to account for this.
932 */
933 if (os == SDEV_INIT && dv->sdev_nlink == 0)
934 dv->sdev_nlink++;
935 rw_exit(&dv->sdev_contents);
936 mutex_exit(&vp->v_lock);
937 }
938
939 /*
940 * check if the source is in the path of the target
941 *
942 * source and target are different
943 */
944 /*ARGSUSED2*/
945 static int
sdev_checkpath(struct sdev_node * sdv,struct sdev_node * tdv,struct cred * cred)946 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
947 {
948 int error = 0;
949 struct sdev_node *dotdot, *dir;
950
951 dotdot = tdv->sdev_dotdot;
952 ASSERT(dotdot);
953
954 /* fs root */
955 if (dotdot == tdv) {
956 return (0);
957 }
958
959 for (;;) {
960 /*
961 * avoid error cases like
962 * mv a a/b
963 * mv a a/b/c
964 * etc.
965 */
966 if (dotdot == sdv) {
967 error = EINVAL;
968 break;
969 }
970
971 dir = dotdot;
972 dotdot = dir->sdev_dotdot;
973
974 /* done checking because root is reached */
975 if (dir == dotdot) {
976 break;
977 }
978 }
979 return (error);
980 }
981
982 int
sdev_rnmnode(struct sdev_node * oddv,struct sdev_node * odv,struct sdev_node * nddv,struct sdev_node ** ndvp,char * nnm,struct cred * cred)983 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
984 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
985 struct cred *cred)
986 {
987 int error = 0;
988 struct vnode *ovp = SDEVTOV(odv);
989 struct vnode *nvp;
990 struct vattr vattr;
991 int doingdir = (ovp->v_type == VDIR);
992 char *link = NULL;
993 int samedir = (oddv == nddv) ? 1 : 0;
994 int bkstore = 0;
995 struct sdev_node *idv = NULL;
996 struct sdev_node *ndv = NULL;
997 timestruc_t now;
998
999 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1000 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1001 if (error)
1002 return (error);
1003
1004 if (!samedir)
1005 rw_enter(&oddv->sdev_contents, RW_WRITER);
1006 rw_enter(&nddv->sdev_contents, RW_WRITER);
1007
1008 /*
1009 * the source may have been deleted by another thread before
1010 * we gets here.
1011 */
1012 if (odv->sdev_state != SDEV_READY) {
1013 error = ENOENT;
1014 goto err_out;
1015 }
1016
1017 if (doingdir && (odv == nddv)) {
1018 error = EINVAL;
1019 goto err_out;
1020 }
1021
1022 /*
1023 * If renaming a directory, and the parents are different (".." must be
1024 * changed) then the source dir must not be in the dir hierarchy above
1025 * the target since it would orphan everything below the source dir.
1026 */
1027 if (doingdir && (oddv != nddv)) {
1028 error = sdev_checkpath(odv, nddv, cred);
1029 if (error)
1030 goto err_out;
1031 }
1032
1033 /* fix the source for a symlink */
1034 if (vattr.va_type == VLNK) {
1035 if (odv->sdev_symlink == NULL) {
1036 error = sdev_follow_link(odv);
1037 if (error) {
1038 /*
1039 * The underlying symlink doesn't exist. This
1040 * node probably shouldn't even exist. While
1041 * it's a bit jarring to consumers, we're going
1042 * to remove the node from /dev.
1043 */
1044 if (SDEV_IS_PERSIST((*ndvp)))
1045 bkstore = 1;
1046 sdev_dirdelete(oddv, odv);
1047 if (bkstore) {
1048 ASSERT(nddv->sdev_attrvp);
1049 error = VOP_REMOVE(nddv->sdev_attrvp,
1050 nnm, cred, NULL, 0);
1051 if (error)
1052 goto err_out;
1053 }
1054 error = ENOENT;
1055 goto err_out;
1056 }
1057 }
1058 ASSERT(odv->sdev_symlink);
1059 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1060 }
1061
1062 /* destination existing */
1063 if (*ndvp) {
1064 nvp = SDEVTOV(*ndvp);
1065 ASSERT(nvp);
1066
1067 /* handling renaming to itself */
1068 if (odv == *ndvp) {
1069 error = 0;
1070 goto err_out;
1071 }
1072
1073 if (nvp->v_type == VDIR) {
1074 if (!doingdir) {
1075 error = EISDIR;
1076 goto err_out;
1077 }
1078
1079 if (vn_vfswlock(nvp)) {
1080 error = EBUSY;
1081 goto err_out;
1082 }
1083
1084 if (vn_mountedvfs(nvp) != NULL) {
1085 vn_vfsunlock(nvp);
1086 error = EBUSY;
1087 goto err_out;
1088 }
1089
1090 /* in case dir1 exists in dir2 and "mv dir1 dir2" */
1091 if ((*ndvp)->sdev_nlink > 2) {
1092 vn_vfsunlock(nvp);
1093 error = EEXIST;
1094 goto err_out;
1095 }
1096 vn_vfsunlock(nvp);
1097
1098 /*
1099 * We did not place the hold on *ndvp, so even though
1100 * we're deleting the node, we should not get rid of our
1101 * reference.
1102 */
1103 sdev_dirdelete(nddv, *ndvp);
1104 *ndvp = NULL;
1105 ASSERT(nddv->sdev_attrvp);
1106 error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1107 nddv->sdev_attrvp, cred, NULL, 0);
1108 if (error)
1109 goto err_out;
1110 } else {
1111 if (doingdir) {
1112 error = ENOTDIR;
1113 goto err_out;
1114 }
1115
1116 if (SDEV_IS_PERSIST((*ndvp))) {
1117 bkstore = 1;
1118 }
1119
1120 /*
1121 * Get rid of the node from the directory cache note.
1122 * Don't forget that it's not up to us to remove the vn
1123 * ref on the sdev node, as we did not place it.
1124 */
1125 sdev_dirdelete(nddv, *ndvp);
1126 *ndvp = NULL;
1127 if (bkstore) {
1128 ASSERT(nddv->sdev_attrvp);
1129 error = VOP_REMOVE(nddv->sdev_attrvp,
1130 nnm, cred, NULL, 0);
1131 if (error)
1132 goto err_out;
1133 }
1134 }
1135 }
1136
1137 /*
1138 * make a fresh node from the source attrs
1139 */
1140 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1141 error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1142 NULL, (void *)link, cred, SDEV_READY);
1143
1144 if (link != NULL) {
1145 kmem_free(link, strlen(link) + 1);
1146 link = NULL;
1147 }
1148
1149 if (error)
1150 goto err_out;
1151 ASSERT(*ndvp);
1152 ASSERT((*ndvp)->sdev_state == SDEV_READY);
1153
1154 /* move dir contents */
1155 if (doingdir) {
1156 for (idv = SDEV_FIRST_ENTRY(odv); idv;
1157 idv = SDEV_NEXT_ENTRY(odv, idv)) {
1158 SDEV_HOLD(idv);
1159 error = sdev_rnmnode(odv, idv,
1160 (struct sdev_node *)(*ndvp), &ndv,
1161 idv->sdev_name, cred);
1162 SDEV_RELE(idv);
1163 if (error)
1164 goto err_out;
1165 ndv = NULL;
1166 }
1167 }
1168
1169 if ((*ndvp)->sdev_attrvp) {
1170 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1171 AT_CTIME|AT_ATIME);
1172 } else {
1173 ASSERT((*ndvp)->sdev_attr);
1174 gethrestime(&now);
1175 (*ndvp)->sdev_attr->va_ctime = now;
1176 (*ndvp)->sdev_attr->va_atime = now;
1177 }
1178
1179 if (nddv->sdev_attrvp) {
1180 sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1181 AT_MTIME|AT_ATIME);
1182 } else {
1183 ASSERT(nddv->sdev_attr);
1184 gethrestime(&now);
1185 nddv->sdev_attr->va_mtime = now;
1186 nddv->sdev_attr->va_atime = now;
1187 }
1188 rw_exit(&nddv->sdev_contents);
1189 if (!samedir)
1190 rw_exit(&oddv->sdev_contents);
1191
1192 SDEV_RELE(*ndvp);
1193 return (error);
1194
1195 err_out:
1196 if (link != NULL) {
1197 kmem_free(link, strlen(link) + 1);
1198 link = NULL;
1199 }
1200
1201 rw_exit(&nddv->sdev_contents);
1202 if (!samedir)
1203 rw_exit(&oddv->sdev_contents);
1204 return (error);
1205 }
1206
1207 /*
1208 * Merge sdev_node specific information into an attribute structure.
1209 *
1210 * note: sdev_node is not locked here
1211 */
1212 void
sdev_vattr_merge(struct sdev_node * dv,struct vattr * vap)1213 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1214 {
1215 struct vnode *vp = SDEVTOV(dv);
1216
1217 vap->va_nlink = dv->sdev_nlink;
1218 vap->va_nodeid = dv->sdev_ino;
1219 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1220 vap->va_type = vp->v_type;
1221
1222 if (vp->v_type == VDIR) {
1223 vap->va_rdev = 0;
1224 vap->va_fsid = vp->v_rdev;
1225 } else if (vp->v_type == VLNK) {
1226 vap->va_rdev = 0;
1227 vap->va_mode &= ~S_IFMT;
1228 vap->va_mode |= S_IFLNK;
1229 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1230 vap->va_rdev = vp->v_rdev;
1231 vap->va_mode &= ~S_IFMT;
1232 if (vap->va_type == VCHR)
1233 vap->va_mode |= S_IFCHR;
1234 else
1235 vap->va_mode |= S_IFBLK;
1236 } else {
1237 vap->va_rdev = 0;
1238 }
1239 }
1240
1241 struct vattr *
sdev_getdefault_attr(enum vtype type)1242 sdev_getdefault_attr(enum vtype type)
1243 {
1244 if (type == VDIR)
1245 return (&sdev_vattr_dir);
1246 else if (type == VCHR)
1247 return (&sdev_vattr_chr);
1248 else if (type == VBLK)
1249 return (&sdev_vattr_blk);
1250 else if (type == VLNK)
1251 return (&sdev_vattr_lnk);
1252 else
1253 return (NULL);
1254 }
1255 int
sdev_to_vp(struct sdev_node * dv,struct vnode ** vpp)1256 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1257 {
1258 int rv = 0;
1259 struct vnode *vp = SDEVTOV(dv);
1260
1261 switch (vp->v_type) {
1262 case VCHR:
1263 case VBLK:
1264 /*
1265 * If vnode is a device, return special vnode instead
1266 * (though it knows all about -us- via sp->s_realvp)
1267 */
1268 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1269 VN_RELE(vp);
1270 if (*vpp == NULLVP)
1271 rv = ENOSYS;
1272 break;
1273 default: /* most types are returned as is */
1274 *vpp = vp;
1275 break;
1276 }
1277 return (rv);
1278 }
1279
1280 /*
1281 * junction between devname and root file system, e.g. ufs
1282 */
1283 int
devname_backstore_lookup(struct sdev_node * ddv,char * nm,struct vnode ** rvp)1284 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1285 {
1286 struct vnode *rdvp = ddv->sdev_attrvp;
1287 int rval = 0;
1288
1289 ASSERT(rdvp);
1290
1291 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1292 NULL);
1293 return (rval);
1294 }
1295
1296 static int
sdev_filldir_from_store(struct sdev_node * ddv,int dlen,struct cred * cred)1297 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1298 {
1299 struct sdev_node *dv = NULL;
1300 char *nm;
1301 struct vnode *dirvp;
1302 int error;
1303 vnode_t *vp;
1304 int eof;
1305 struct iovec iov;
1306 struct uio uio;
1307 struct dirent64 *dp;
1308 dirent64_t *dbuf;
1309 size_t dbuflen;
1310 struct vattr vattr;
1311 char *link = NULL;
1312
1313 if (ddv->sdev_attrvp == NULL)
1314 return (0);
1315 if (!(ddv->sdev_flags & SDEV_BUILD))
1316 return (0);
1317
1318 dirvp = ddv->sdev_attrvp;
1319 VN_HOLD(dirvp);
1320 dbuf = kmem_zalloc(dlen, KM_SLEEP);
1321
1322 uio.uio_iov = &iov;
1323 uio.uio_iovcnt = 1;
1324 uio.uio_segflg = UIO_SYSSPACE;
1325 uio.uio_fmode = 0;
1326 uio.uio_extflg = UIO_COPY_CACHED;
1327 uio.uio_loffset = 0;
1328 uio.uio_llimit = MAXOFFSET_T;
1329
1330 eof = 0;
1331 error = 0;
1332 while (!error && !eof) {
1333 uio.uio_resid = dlen;
1334 iov.iov_base = (char *)dbuf;
1335 iov.iov_len = dlen;
1336 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1337 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1338 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1339
1340 dbuflen = dlen - uio.uio_resid;
1341 if (error || dbuflen == 0)
1342 break;
1343
1344 if (!(ddv->sdev_flags & SDEV_BUILD))
1345 break;
1346
1347 for (dp = dbuf; ((intptr_t)dp <
1348 (intptr_t)dbuf + dbuflen);
1349 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1350 nm = dp->d_name;
1351
1352 if (strcmp(nm, ".") == 0 ||
1353 strcmp(nm, "..") == 0)
1354 continue;
1355
1356 vp = NULLVP;
1357 dv = sdev_cache_lookup(ddv, nm);
1358 if (dv) {
1359 VERIFY(dv->sdev_state != SDEV_ZOMBIE);
1360 SDEV_SIMPLE_RELE(dv);
1361 continue;
1362 }
1363
1364 /* refill the cache if not already */
1365 error = devname_backstore_lookup(ddv, nm, &vp);
1366 if (error)
1367 continue;
1368
1369 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1370 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1371 if (error)
1372 continue;
1373
1374 if (vattr.va_type == VLNK) {
1375 error = sdev_getlink(vp, &link);
1376 if (error) {
1377 continue;
1378 }
1379 ASSERT(link != NULL);
1380 }
1381
1382 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1383 rw_exit(&ddv->sdev_contents);
1384 rw_enter(&ddv->sdev_contents, RW_WRITER);
1385 }
1386 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1387 cred, SDEV_READY);
1388 rw_downgrade(&ddv->sdev_contents);
1389
1390 if (link != NULL) {
1391 kmem_free(link, strlen(link) + 1);
1392 link = NULL;
1393 }
1394
1395 if (!error) {
1396 ASSERT(dv);
1397 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1398 SDEV_SIMPLE_RELE(dv);
1399 }
1400 vp = NULL;
1401 dv = NULL;
1402 }
1403 }
1404
1405 done:
1406 VN_RELE(dirvp);
1407 kmem_free(dbuf, dlen);
1408
1409 return (error);
1410 }
1411
1412 void
sdev_filldir_dynamic(struct sdev_node * ddv)1413 sdev_filldir_dynamic(struct sdev_node *ddv)
1414 {
1415 int error;
1416 int i;
1417 struct vattr vattr;
1418 struct vattr *vap = &vattr;
1419 char *nm = NULL;
1420 struct sdev_node *dv = NULL;
1421
1422 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1423 ASSERT((ddv->sdev_flags & SDEV_BUILD));
1424
1425 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */
1426 gethrestime(&vap->va_atime);
1427 vap->va_mtime = vap->va_atime;
1428 vap->va_ctime = vap->va_atime;
1429 for (i = 0; vtab[i].vt_name != NULL; i++) {
1430 /*
1431 * This early, we may be in a read-only /dev environment: leave
1432 * the creation of any nodes we'd attempt to persist to
1433 * devfsadm. Because /dev itself is normally persistent, any
1434 * node which is not marked dynamic will end up being marked
1435 * persistent. However, some nodes are both dynamic and
1436 * persistent, mostly lofi and rlofi, so we need to be careful
1437 * in our check.
1438 */
1439 if ((vtab[i].vt_flags & SDEV_PERSIST) ||
1440 !(vtab[i].vt_flags & SDEV_DYNAMIC))
1441 continue;
1442 nm = vtab[i].vt_name;
1443 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1444 dv = NULL;
1445 error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1446 NULL, kcred, SDEV_READY);
1447 if (error) {
1448 cmn_err(CE_WARN, "%s/%s: error %d\n",
1449 ddv->sdev_name, nm, error);
1450 } else {
1451 ASSERT(dv);
1452 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1453 SDEV_SIMPLE_RELE(dv);
1454 }
1455 }
1456 }
1457
1458 /*
1459 * Creating a backing store entry based on sdev_attr.
1460 * This is called either as part of node creation in a persistent directory
1461 * or from setattr/setsecattr to persist access attributes across reboot.
1462 */
1463 int
sdev_shadow_node(struct sdev_node * dv,struct cred * cred)1464 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1465 {
1466 int error = 0;
1467 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1468 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1469 struct vattr *vap = dv->sdev_attr;
1470 char *nm = dv->sdev_name;
1471 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1472
1473 ASSERT(dv && dv->sdev_name && rdvp);
1474 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1475
1476 lookup:
1477 /* try to find it in the backing store */
1478 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1479 NULL);
1480 if (error == 0) {
1481 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1482 VN_HOLD(rrvp);
1483 VN_RELE(*rvp);
1484 *rvp = rrvp;
1485 }
1486
1487 kmem_free(dv->sdev_attr, sizeof (vattr_t));
1488 dv->sdev_attr = NULL;
1489 dv->sdev_attrvp = *rvp;
1490 return (0);
1491 }
1492
1493 /* let's try to persist the node */
1494 gethrestime(&vap->va_atime);
1495 vap->va_mtime = vap->va_atime;
1496 vap->va_ctime = vap->va_atime;
1497 vap->va_mask |= AT_TYPE|AT_MODE;
1498 switch (vap->va_type) {
1499 case VDIR:
1500 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1501 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1502 (void *)(*rvp), error));
1503 if (!error)
1504 VN_RELE(*rvp);
1505 break;
1506 case VCHR:
1507 case VBLK:
1508 case VREG:
1509 case VDOOR:
1510 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1511 rvp, cred, 0, NULL, NULL);
1512 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1513 (void *)(*rvp), error));
1514 if (!error)
1515 VN_RELE(*rvp);
1516 break;
1517 case VLNK:
1518 ASSERT(dv->sdev_symlink);
1519 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1520 NULL, 0);
1521 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1522 error));
1523 break;
1524 default:
1525 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1526 "create\n", nm);
1527 /*NOTREACHED*/
1528 }
1529
1530 /* go back to lookup to factor out spec node and set attrvp */
1531 if (error == 0)
1532 goto lookup;
1533
1534 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1535 return (error);
1536 }
1537
1538 static void
sdev_cache_add(struct sdev_node * ddv,struct sdev_node ** dv,char * nm)1539 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1540 {
1541 struct sdev_node *dup = NULL;
1542
1543 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1544 if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1545 sdev_direnter(ddv, *dv);
1546 } else {
1547 VERIFY(dup->sdev_state != SDEV_ZOMBIE);
1548 SDEV_SIMPLE_RELE(*dv);
1549 sdev_nodedestroy(*dv, 0);
1550 *dv = dup;
1551 }
1552 }
1553
1554 static void
sdev_cache_delete(struct sdev_node * ddv,struct sdev_node ** dv)1555 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1556 {
1557 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1558 sdev_dirdelete(ddv, *dv);
1559 }
1560
1561 /*
1562 * update the in-core directory cache
1563 */
1564 void
sdev_cache_update(struct sdev_node * ddv,struct sdev_node ** dv,char * nm,sdev_cache_ops_t ops)1565 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1566 sdev_cache_ops_t ops)
1567 {
1568 ASSERT((SDEV_HELD(*dv)));
1569
1570 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1571 switch (ops) {
1572 case SDEV_CACHE_ADD:
1573 sdev_cache_add(ddv, dv, nm);
1574 break;
1575 case SDEV_CACHE_DELETE:
1576 sdev_cache_delete(ddv, dv);
1577 break;
1578 default:
1579 break;
1580 }
1581 }
1582
1583 /*
1584 * retrieve the named entry from the directory cache
1585 */
1586 struct sdev_node *
sdev_cache_lookup(struct sdev_node * ddv,char * nm)1587 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1588 {
1589 struct sdev_node *dv = NULL;
1590
1591 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1592 dv = sdev_findbyname(ddv, nm);
1593
1594 return (dv);
1595 }
1596
1597 /*
1598 * Implicit reconfig for nodes constructed by a link generator
1599 * Start devfsadm if needed, or if devfsadm is in progress,
1600 * prepare to block on devfsadm either completing or
1601 * constructing the desired node. As devfsadmd is global
1602 * in scope, constructing all necessary nodes, we only
1603 * need to initiate it once.
1604 */
1605 static int
sdev_call_devfsadmd(struct sdev_node * ddv,struct sdev_node * dv,char * nm)1606 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1607 {
1608 int error = 0;
1609
1610 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1611 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1612 ddv->sdev_name, nm, devfsadm_state));
1613 mutex_enter(&dv->sdev_lookup_lock);
1614 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1615 mutex_exit(&dv->sdev_lookup_lock);
1616 error = 0;
1617 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1618 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1619 ddv->sdev_name, nm, devfsadm_state));
1620
1621 sdev_devfsadmd_thread(ddv, dv, kcred);
1622 mutex_enter(&dv->sdev_lookup_lock);
1623 SDEV_BLOCK_OTHERS(dv,
1624 (SDEV_LOOKUP | SDEV_LGWAITING));
1625 mutex_exit(&dv->sdev_lookup_lock);
1626 error = 0;
1627 } else {
1628 error = -1;
1629 }
1630
1631 return (error);
1632 }
1633
1634 /*
1635 * Support for specialized device naming construction mechanisms
1636 */
1637 static int
sdev_call_dircallback(struct sdev_node * ddv,struct sdev_node ** dvp,char * nm,int (* callback)(struct sdev_node *,char *,void **,struct cred *,void *,char *),int flags,struct cred * cred)1638 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1639 int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1640 void *, char *), int flags, struct cred *cred)
1641 {
1642 int rv = 0;
1643 char *physpath = NULL;
1644 struct vattr vattr;
1645 struct vattr *vap = &vattr;
1646 struct sdev_node *dv = NULL;
1647
1648 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1649 if (flags & SDEV_VLINK) {
1650 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1651 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1652 NULL);
1653 if (rv) {
1654 kmem_free(physpath, MAXPATHLEN);
1655 return (-1);
1656 }
1657
1658 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */
1659 vap->va_size = strlen(physpath);
1660 gethrestime(&vap->va_atime);
1661 vap->va_mtime = vap->va_atime;
1662 vap->va_ctime = vap->va_atime;
1663
1664 rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1665 (void *)physpath, cred, SDEV_READY);
1666 kmem_free(physpath, MAXPATHLEN);
1667 if (rv)
1668 return (rv);
1669 } else if (flags & SDEV_VATTR) {
1670 /*
1671 * /dev/pts
1672 *
1673 * callback is responsible to set the basic attributes,
1674 * e.g. va_type/va_uid/va_gid/
1675 * dev_t if VCHR or VBLK/
1676 */
1677 ASSERT(callback);
1678 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1679 if (rv) {
1680 sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1681 "callback failed \n"));
1682 return (-1);
1683 }
1684
1685 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1686 cred, SDEV_READY);
1687
1688 if (rv)
1689 return (rv);
1690
1691 } else {
1692 impossible(("lookup: %s/%s by %s not supported (%d)\n",
1693 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1694 __LINE__));
1695 rv = -1;
1696 }
1697
1698 *dvp = dv;
1699 return (rv);
1700 }
1701
1702 static int
is_devfsadm_thread(char * exec_name)1703 is_devfsadm_thread(char *exec_name)
1704 {
1705 /*
1706 * note: because devfsadmd -> /usr/sbin/devfsadm
1707 * it is safe to use "devfsadm" to capture the lookups
1708 * from devfsadm and its daemon version.
1709 */
1710 if (strcmp(exec_name, "devfsadm") == 0)
1711 return (1);
1712 return (0);
1713 }
1714
1715 /*
1716 * Lookup Order:
1717 * sdev_node cache;
1718 * backing store (SDEV_PERSIST);
1719 * DBNR: a. dir_ops implemented in the loadable modules;
1720 * b. vnode ops in vtab.
1721 */
1722 int
devname_lookup_func(struct sdev_node * ddv,char * nm,struct vnode ** vpp,struct cred * cred,int (* callback)(struct sdev_node *,char *,void **,struct cred *,void *,char *),int flags)1723 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1724 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1725 struct cred *, void *, char *), int flags)
1726 {
1727 int rv = 0, nmlen;
1728 struct vnode *rvp = NULL;
1729 struct sdev_node *dv = NULL;
1730 int retried = 0;
1731 int error = 0;
1732 struct vattr vattr;
1733 char *lookup_thread = curproc->p_user.u_comm;
1734 int failed_flags = 0;
1735 int (*vtor)(struct sdev_node *) = NULL;
1736 int state;
1737 int parent_state;
1738 char *link = NULL;
1739
1740 if (SDEVTOV(ddv)->v_type != VDIR)
1741 return (ENOTDIR);
1742
1743 /*
1744 * Empty name or ., return node itself.
1745 */
1746 nmlen = strlen(nm);
1747 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1748 *vpp = SDEVTOV(ddv);
1749 VN_HOLD(*vpp);
1750 return (0);
1751 }
1752
1753 /*
1754 * .., return the parent directory
1755 */
1756 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1757 *vpp = SDEVTOV(ddv->sdev_dotdot);
1758 VN_HOLD(*vpp);
1759 return (0);
1760 }
1761
1762 rw_enter(&ddv->sdev_contents, RW_READER);
1763 if (ddv->sdev_flags & SDEV_VTOR) {
1764 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1765 ASSERT(vtor);
1766 }
1767
1768 tryagain:
1769 /*
1770 * (a) directory cache lookup:
1771 */
1772 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1773 parent_state = ddv->sdev_state;
1774 dv = sdev_cache_lookup(ddv, nm);
1775 if (dv) {
1776 state = dv->sdev_state;
1777 switch (state) {
1778 case SDEV_INIT:
1779 if (is_devfsadm_thread(lookup_thread))
1780 break;
1781
1782 /* ZOMBIED parent won't allow node creation */
1783 if (parent_state == SDEV_ZOMBIE) {
1784 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1785 retried);
1786 goto nolock_notfound;
1787 }
1788
1789 mutex_enter(&dv->sdev_lookup_lock);
1790 /* compensate the threads started after devfsadm */
1791 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1792 !(SDEV_IS_LOOKUP(dv)))
1793 SDEV_BLOCK_OTHERS(dv,
1794 (SDEV_LOOKUP | SDEV_LGWAITING));
1795
1796 if (SDEV_IS_LOOKUP(dv)) {
1797 failed_flags |= SLF_REBUILT;
1798 rw_exit(&ddv->sdev_contents);
1799 error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1800 mutex_exit(&dv->sdev_lookup_lock);
1801 rw_enter(&ddv->sdev_contents, RW_READER);
1802
1803 if (error != 0) {
1804 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1805 retried);
1806 goto nolock_notfound;
1807 }
1808
1809 state = dv->sdev_state;
1810 if (state == SDEV_INIT) {
1811 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1812 retried);
1813 goto nolock_notfound;
1814 } else if (state == SDEV_READY) {
1815 goto found;
1816 } else if (state == SDEV_ZOMBIE) {
1817 rw_exit(&ddv->sdev_contents);
1818 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1819 retried);
1820 SDEV_RELE(dv);
1821 goto lookup_failed;
1822 }
1823 } else {
1824 mutex_exit(&dv->sdev_lookup_lock);
1825 }
1826 break;
1827 case SDEV_READY:
1828 goto found;
1829 case SDEV_ZOMBIE:
1830 rw_exit(&ddv->sdev_contents);
1831 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1832 SDEV_RELE(dv);
1833 goto lookup_failed;
1834 default:
1835 rw_exit(&ddv->sdev_contents);
1836 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1837 sdev_lookup_failed(ddv, nm, failed_flags);
1838 *vpp = NULLVP;
1839 return (ENOENT);
1840 }
1841 }
1842 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1843
1844 /*
1845 * ZOMBIED parent does not allow new node creation.
1846 * bail out early
1847 */
1848 if (parent_state == SDEV_ZOMBIE) {
1849 rw_exit(&ddv->sdev_contents);
1850 *vpp = NULLVP;
1851 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1852 return (ENOENT);
1853 }
1854
1855 /*
1856 * (b0): backing store lookup
1857 * SDEV_PERSIST is default except:
1858 * 1) pts nodes
1859 * 2) non-chmod'ed local nodes
1860 * 3) zvol nodes
1861 */
1862 if (SDEV_IS_PERSIST(ddv)) {
1863 error = devname_backstore_lookup(ddv, nm, &rvp);
1864
1865 if (!error) {
1866
1867 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1868 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
1869 if (error) {
1870 rw_exit(&ddv->sdev_contents);
1871 if (dv)
1872 SDEV_RELE(dv);
1873 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1874 sdev_lookup_failed(ddv, nm, failed_flags);
1875 *vpp = NULLVP;
1876 return (ENOENT);
1877 }
1878
1879 if (vattr.va_type == VLNK) {
1880 error = sdev_getlink(rvp, &link);
1881 if (error) {
1882 rw_exit(&ddv->sdev_contents);
1883 if (dv)
1884 SDEV_RELE(dv);
1885 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1886 retried);
1887 sdev_lookup_failed(ddv, nm,
1888 failed_flags);
1889 *vpp = NULLVP;
1890 return (ENOENT);
1891 }
1892 ASSERT(link != NULL);
1893 }
1894
1895 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1896 rw_exit(&ddv->sdev_contents);
1897 rw_enter(&ddv->sdev_contents, RW_WRITER);
1898 }
1899 error = sdev_mknode(ddv, nm, &dv, &vattr,
1900 rvp, link, cred, SDEV_READY);
1901 rw_downgrade(&ddv->sdev_contents);
1902
1903 if (link != NULL) {
1904 kmem_free(link, strlen(link) + 1);
1905 link = NULL;
1906 }
1907
1908 if (error) {
1909 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1910 rw_exit(&ddv->sdev_contents);
1911 if (dv)
1912 SDEV_RELE(dv);
1913 goto lookup_failed;
1914 } else {
1915 goto found;
1916 }
1917 } else if (retried) {
1918 rw_exit(&ddv->sdev_contents);
1919 sdcmn_err3(("retry of lookup of %s/%s: failed\n",
1920 ddv->sdev_name, nm));
1921 if (dv)
1922 SDEV_RELE(dv);
1923 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1924 sdev_lookup_failed(ddv, nm, failed_flags);
1925 *vpp = NULLVP;
1926 return (ENOENT);
1927 }
1928 }
1929
1930 lookup_create_node:
1931 /* first thread that is doing the lookup on this node */
1932 if (callback) {
1933 ASSERT(dv == NULL);
1934 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1935 rw_exit(&ddv->sdev_contents);
1936 rw_enter(&ddv->sdev_contents, RW_WRITER);
1937 }
1938 error = sdev_call_dircallback(ddv, &dv, nm, callback,
1939 flags, cred);
1940 rw_downgrade(&ddv->sdev_contents);
1941 if (error == 0) {
1942 goto found;
1943 } else {
1944 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1945 rw_exit(&ddv->sdev_contents);
1946 goto lookup_failed;
1947 }
1948 }
1949 if (!dv) {
1950 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1951 rw_exit(&ddv->sdev_contents);
1952 rw_enter(&ddv->sdev_contents, RW_WRITER);
1953 }
1954 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
1955 cred, SDEV_INIT);
1956 if (!dv) {
1957 rw_exit(&ddv->sdev_contents);
1958 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1959 sdev_lookup_failed(ddv, nm, failed_flags);
1960 *vpp = NULLVP;
1961 return (ENOENT);
1962 }
1963 rw_downgrade(&ddv->sdev_contents);
1964 }
1965
1966 /*
1967 * (b1) invoking devfsadm once per life time for devfsadm nodes
1968 */
1969 ASSERT(SDEV_HELD(dv));
1970
1971 if (SDEV_IS_NO_NCACHE(dv))
1972 failed_flags |= SLF_NO_NCACHE;
1973 if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
1974 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
1975 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
1976 ASSERT(SDEV_HELD(dv));
1977 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1978 goto nolock_notfound;
1979 }
1980
1981 /*
1982 * filter out known non-existent devices recorded
1983 * during initial reconfiguration boot for which
1984 * reconfig should not be done and lookup may
1985 * be short-circuited now.
1986 */
1987 if (sdev_lookup_filter(ddv, nm)) {
1988 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1989 goto nolock_notfound;
1990 }
1991
1992 /* bypassing devfsadm internal nodes */
1993 if (is_devfsadm_thread(lookup_thread)) {
1994 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1995 goto nolock_notfound;
1996 }
1997
1998 if (sdev_reconfig_disable) {
1999 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2000 goto nolock_notfound;
2001 }
2002
2003 error = sdev_call_devfsadmd(ddv, dv, nm);
2004 if (error == 0) {
2005 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2006 ddv->sdev_name, nm, curproc->p_user.u_comm));
2007 if (sdev_reconfig_verbose) {
2008 cmn_err(CE_CONT,
2009 "?lookup of %s/%s by %s: reconfig\n",
2010 ddv->sdev_name, nm, curproc->p_user.u_comm);
2011 }
2012 retried = 1;
2013 failed_flags |= SLF_REBUILT;
2014 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2015 SDEV_SIMPLE_RELE(dv);
2016 goto tryagain;
2017 } else {
2018 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2019 goto nolock_notfound;
2020 }
2021
2022 found:
2023 ASSERT(dv->sdev_state == SDEV_READY);
2024 if (vtor) {
2025 /*
2026 * Check validity of returned node
2027 */
2028 switch (vtor(dv)) {
2029 case SDEV_VTOR_VALID:
2030 break;
2031 case SDEV_VTOR_STALE:
2032 /*
2033 * The name exists, but the cache entry is
2034 * stale and needs to be re-created.
2035 */
2036 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2037 if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2038 rw_exit(&ddv->sdev_contents);
2039 rw_enter(&ddv->sdev_contents, RW_WRITER);
2040 }
2041 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
2042 rw_downgrade(&ddv->sdev_contents);
2043 SDEV_RELE(dv);
2044 dv = NULL;
2045 goto lookup_create_node;
2046 /* FALLTHRU */
2047 case SDEV_VTOR_INVALID:
2048 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2049 sdcmn_err7(("lookup: destroy invalid "
2050 "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2051 goto nolock_notfound;
2052 case SDEV_VTOR_SKIP:
2053 sdcmn_err7(("lookup: node not applicable - "
2054 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2055 rw_exit(&ddv->sdev_contents);
2056 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2057 SDEV_RELE(dv);
2058 goto lookup_failed;
2059 default:
2060 cmn_err(CE_PANIC,
2061 "dev fs: validator failed: %s(%p)\n",
2062 dv->sdev_name, (void *)dv);
2063 break;
2064 }
2065 }
2066
2067 rw_exit(&ddv->sdev_contents);
2068 rv = sdev_to_vp(dv, vpp);
2069 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2070 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2071 dv->sdev_state, nm, rv));
2072 return (rv);
2073
2074 nolock_notfound:
2075 /*
2076 * Destroy the node that is created for synchronization purposes.
2077 */
2078 sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2079 nm, dv->sdev_state));
2080 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2081 if (dv->sdev_state == SDEV_INIT) {
2082 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2083 rw_exit(&ddv->sdev_contents);
2084 rw_enter(&ddv->sdev_contents, RW_WRITER);
2085 }
2086
2087 /*
2088 * Node state may have changed during the lock
2089 * changes. Re-check.
2090 */
2091 if (dv->sdev_state == SDEV_INIT) {
2092 sdev_dirdelete(ddv, dv);
2093 rw_exit(&ddv->sdev_contents);
2094 sdev_lookup_failed(ddv, nm, failed_flags);
2095 SDEV_RELE(dv);
2096 *vpp = NULL;
2097 return (ENOENT);
2098 }
2099 }
2100
2101 rw_exit(&ddv->sdev_contents);
2102 SDEV_RELE(dv);
2103
2104 lookup_failed:
2105 sdev_lookup_failed(ddv, nm, failed_flags);
2106 *vpp = NULL;
2107 return (ENOENT);
2108 }
2109
2110 /*
2111 * Given a directory node, mark all nodes beneath as
2112 * STALE, i.e. nodes that don't exist as far as new
2113 * consumers are concerned. Remove them from the
2114 * list of directory entries so that no lookup or
2115 * directory traversal will find them. The node
2116 * not deallocated so existing holds are not affected.
2117 */
2118 void
sdev_stale(struct sdev_node * ddv)2119 sdev_stale(struct sdev_node *ddv)
2120 {
2121 struct sdev_node *dv;
2122 struct vnode *vp;
2123
2124 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2125
2126 rw_enter(&ddv->sdev_contents, RW_WRITER);
2127 while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2128 vp = SDEVTOV(dv);
2129 SDEV_HOLD(dv);
2130 if (vp->v_type == VDIR)
2131 sdev_stale(dv);
2132
2133 sdev_dirdelete(ddv, dv);
2134 SDEV_RELE(dv);
2135 }
2136 ddv->sdev_flags |= SDEV_BUILD;
2137 rw_exit(&ddv->sdev_contents);
2138 }
2139
2140 /*
2141 * Given a directory node, clean out all the nodes beneath.
2142 * If expr is specified, clean node with names matching expr.
2143 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2144 * so they are excluded from future lookups.
2145 */
2146 int
sdev_cleandir(struct sdev_node * ddv,char * expr,uint_t flags)2147 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2148 {
2149 int error = 0;
2150 int busy = 0;
2151 struct vnode *vp;
2152 struct sdev_node *dv, *next;
2153 int bkstore = 0;
2154 int len = 0;
2155 char *bks_name = NULL;
2156
2157 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2158
2159 /*
2160 * We try our best to destroy all unused sdev_node's
2161 */
2162 rw_enter(&ddv->sdev_contents, RW_WRITER);
2163 for (dv = SDEV_FIRST_ENTRY(ddv); dv != NULL; dv = next) {
2164 next = SDEV_NEXT_ENTRY(ddv, dv);
2165 vp = SDEVTOV(dv);
2166
2167 if (expr && gmatch(dv->sdev_name, expr) == 0)
2168 continue;
2169
2170 if (vp->v_type == VDIR &&
2171 sdev_cleandir(dv, NULL, flags) != 0) {
2172 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2173 dv->sdev_name));
2174 busy++;
2175 continue;
2176 }
2177
2178 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2179 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2180 dv->sdev_name));
2181 busy++;
2182 continue;
2183 }
2184
2185 /*
2186 * at this point, either dv is not held or SDEV_ENFORCE
2187 * is specified. In either case, dv needs to be deleted
2188 */
2189 SDEV_HOLD(dv);
2190
2191 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2192 if (bkstore && (vp->v_type == VDIR))
2193 bkstore += 1;
2194
2195 if (bkstore) {
2196 len = strlen(dv->sdev_name) + 1;
2197 bks_name = kmem_alloc(len, KM_SLEEP);
2198 bcopy(dv->sdev_name, bks_name, len);
2199 }
2200
2201 sdev_dirdelete(ddv, dv);
2202
2203 /* take care the backing store clean up */
2204 if (bkstore) {
2205 ASSERT(bks_name);
2206 ASSERT(ddv->sdev_attrvp);
2207
2208 if (bkstore == 1) {
2209 error = VOP_REMOVE(ddv->sdev_attrvp,
2210 bks_name, kcred, NULL, 0);
2211 } else if (bkstore == 2) {
2212 error = VOP_RMDIR(ddv->sdev_attrvp,
2213 bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2214 }
2215
2216 /* do not propagate the backing store errors */
2217 if (error) {
2218 sdcmn_err9(("sdev_cleandir: backing store"
2219 "not cleaned\n"));
2220 error = 0;
2221 }
2222
2223 bkstore = 0;
2224 kmem_free(bks_name, len);
2225 bks_name = NULL;
2226 len = 0;
2227 }
2228
2229 ddv->sdev_flags |= SDEV_BUILD;
2230 SDEV_RELE(dv);
2231 }
2232
2233 ddv->sdev_flags |= SDEV_BUILD;
2234 rw_exit(&ddv->sdev_contents);
2235
2236 if (busy) {
2237 error = EBUSY;
2238 }
2239
2240 return (error);
2241 }
2242
2243 /*
2244 * a convenient wrapper for readdir() funcs
2245 */
2246 size_t
add_dir_entry(dirent64_t * de,char * nm,size_t size,ino_t ino,offset_t off)2247 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2248 {
2249 size_t reclen = DIRENT64_RECLEN(strlen(nm));
2250 if (reclen > size)
2251 return (0);
2252
2253 de->d_ino = (ino64_t)ino;
2254 de->d_off = (off64_t)off + 1;
2255 de->d_reclen = (ushort_t)reclen;
2256 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2257 return (reclen);
2258 }
2259
2260 /*
2261 * sdev_mount service routines
2262 */
2263 int
sdev_copyin_mountargs(struct mounta * uap,struct sdev_mountargs * args)2264 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2265 {
2266 int error;
2267
2268 if (uap->datalen != sizeof (*args))
2269 return (EINVAL);
2270
2271 if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2272 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2273 "get user data. error %d\n", error);
2274 return (EFAULT);
2275 }
2276
2277 return (0);
2278 }
2279
2280 #ifdef nextdp
2281 #undef nextdp
2282 #endif
2283 #define nextdp(dp) ((struct dirent64 *) \
2284 (intptr_t)((char *)(dp) + (dp)->d_reclen))
2285
2286 /*
2287 * readdir helper func
2288 */
2289 int
devname_readdir_func(vnode_t * vp,uio_t * uiop,cred_t * cred,int * eofp,int flags)2290 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2291 int flags)
2292 {
2293 struct sdev_node *ddv = VTOSDEV(vp);
2294 struct sdev_node *dv;
2295 dirent64_t *dp;
2296 ulong_t outcount = 0;
2297 size_t namelen;
2298 ulong_t alloc_count;
2299 void *outbuf;
2300 struct iovec *iovp;
2301 int error = 0;
2302 size_t reclen;
2303 offset_t diroff;
2304 offset_t soff;
2305 int this_reclen;
2306 int (*vtor)(struct sdev_node *) = NULL;
2307 struct vattr attr;
2308 timestruc_t now;
2309
2310 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2311 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2312
2313 if (uiop->uio_loffset >= MAXOFF_T) {
2314 if (eofp)
2315 *eofp = 1;
2316 return (0);
2317 }
2318
2319 if (uiop->uio_iovcnt != 1)
2320 return (EINVAL);
2321
2322 if (vp->v_type != VDIR)
2323 return (ENOTDIR);
2324
2325 if (ddv->sdev_flags & SDEV_VTOR) {
2326 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2327 ASSERT(vtor);
2328 }
2329
2330 if (eofp != NULL)
2331 *eofp = 0;
2332
2333 soff = uiop->uio_loffset;
2334 iovp = uiop->uio_iov;
2335 alloc_count = iovp->iov_len;
2336 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2337 outcount = 0;
2338
2339 if (ddv->sdev_state == SDEV_ZOMBIE)
2340 goto get_cache;
2341
2342 if (SDEV_IS_GLOBAL(ddv)) {
2343
2344 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2345 !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2346 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2347 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2348 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2349 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2350 !sdev_reconfig_disable) {
2351 /*
2352 * invoking "devfsadm" to do system device reconfig
2353 */
2354 mutex_enter(&ddv->sdev_lookup_lock);
2355 SDEV_BLOCK_OTHERS(ddv,
2356 (SDEV_READDIR|SDEV_LGWAITING));
2357 mutex_exit(&ddv->sdev_lookup_lock);
2358
2359 sdcmn_err8(("readdir of %s by %s: reconfig\n",
2360 ddv->sdev_path, curproc->p_user.u_comm));
2361 if (sdev_reconfig_verbose) {
2362 cmn_err(CE_CONT,
2363 "?readdir of %s by %s: reconfig\n",
2364 ddv->sdev_path, curproc->p_user.u_comm);
2365 }
2366
2367 sdev_devfsadmd_thread(ddv, NULL, kcred);
2368 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2369 /*
2370 * compensate the "ls" started later than "devfsadm"
2371 */
2372 mutex_enter(&ddv->sdev_lookup_lock);
2373 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2374 mutex_exit(&ddv->sdev_lookup_lock);
2375 }
2376
2377 /*
2378 * release the contents lock so that
2379 * the cache may be updated by devfsadmd
2380 */
2381 rw_exit(&ddv->sdev_contents);
2382 mutex_enter(&ddv->sdev_lookup_lock);
2383 if (SDEV_IS_READDIR(ddv))
2384 (void) sdev_wait4lookup(ddv, SDEV_READDIR);
2385 mutex_exit(&ddv->sdev_lookup_lock);
2386 rw_enter(&ddv->sdev_contents, RW_READER);
2387
2388 sdcmn_err4(("readdir of directory %s by %s\n",
2389 ddv->sdev_name, curproc->p_user.u_comm));
2390 if (ddv->sdev_flags & SDEV_BUILD) {
2391 if (SDEV_IS_PERSIST(ddv)) {
2392 error = sdev_filldir_from_store(ddv,
2393 alloc_count, cred);
2394 }
2395 ddv->sdev_flags &= ~SDEV_BUILD;
2396 }
2397 }
2398
2399 get_cache:
2400 /* handle "." and ".." */
2401 diroff = 0;
2402 if (soff == 0) {
2403 /* first time */
2404 this_reclen = DIRENT64_RECLEN(1);
2405 if (alloc_count < this_reclen) {
2406 error = EINVAL;
2407 goto done;
2408 }
2409
2410 dp->d_ino = (ino64_t)ddv->sdev_ino;
2411 dp->d_off = (off64_t)1;
2412 dp->d_reclen = (ushort_t)this_reclen;
2413
2414 (void) strncpy(dp->d_name, ".",
2415 DIRENT64_NAMELEN(this_reclen));
2416 outcount += dp->d_reclen;
2417 dp = nextdp(dp);
2418 }
2419
2420 diroff++;
2421 if (soff <= 1) {
2422 this_reclen = DIRENT64_RECLEN(2);
2423 if (alloc_count < outcount + this_reclen) {
2424 error = EINVAL;
2425 goto done;
2426 }
2427
2428 dp->d_reclen = (ushort_t)this_reclen;
2429 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2430 dp->d_off = (off64_t)2;
2431
2432 (void) strncpy(dp->d_name, "..",
2433 DIRENT64_NAMELEN(this_reclen));
2434 outcount += dp->d_reclen;
2435
2436 dp = nextdp(dp);
2437 }
2438
2439
2440 /* gets the cache */
2441 diroff++;
2442 for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2443 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2444 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2445 diroff, soff, dv->sdev_name));
2446
2447 /* bypassing pre-matured nodes */
2448 if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2449 sdcmn_err3(("sdev_readdir: pre-mature node "
2450 "%s %d\n", dv->sdev_name, dv->sdev_state));
2451 continue;
2452 }
2453
2454 /*
2455 * Check validity of node
2456 * Drop invalid and nodes to be skipped.
2457 * A node the validator indicates as stale needs
2458 * to be returned as presumably the node name itself
2459 * is valid and the node data itself will be refreshed
2460 * on lookup. An application performing a readdir then
2461 * stat on each entry should thus always see consistent
2462 * data. In any case, it is not possible to synchronize
2463 * with dynamic kernel state, and any view we return can
2464 * never be anything more than a snapshot at a point in time.
2465 */
2466 if (vtor) {
2467 switch (vtor(dv)) {
2468 case SDEV_VTOR_VALID:
2469 break;
2470 case SDEV_VTOR_INVALID:
2471 case SDEV_VTOR_SKIP:
2472 continue;
2473 case SDEV_VTOR_STALE:
2474 sdcmn_err3(("sdev_readir: %s stale\n",
2475 dv->sdev_name));
2476 break;
2477 default:
2478 cmn_err(CE_PANIC,
2479 "dev fs: validator failed: %s(%p)\n",
2480 dv->sdev_name, (void *)dv);
2481 break;
2482 /*NOTREACHED*/
2483 }
2484 }
2485
2486 namelen = strlen(dv->sdev_name);
2487 reclen = DIRENT64_RECLEN(namelen);
2488 if (outcount + reclen > alloc_count) {
2489 goto full;
2490 }
2491 dp->d_reclen = (ushort_t)reclen;
2492 dp->d_ino = (ino64_t)dv->sdev_ino;
2493 dp->d_off = (off64_t)diroff + 1;
2494 (void) strncpy(dp->d_name, dv->sdev_name,
2495 DIRENT64_NAMELEN(reclen));
2496 outcount += reclen;
2497 dp = nextdp(dp);
2498 }
2499
2500 full:
2501 sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2502 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2503 (void *)dv));
2504
2505 if (outcount)
2506 error = uiomove(outbuf, outcount, UIO_READ, uiop);
2507
2508 if (!error) {
2509 uiop->uio_loffset = diroff;
2510 if (eofp)
2511 *eofp = dv ? 0 : 1;
2512 }
2513
2514
2515 if (ddv->sdev_attrvp) {
2516 gethrestime(&now);
2517 attr.va_ctime = now;
2518 attr.va_atime = now;
2519 attr.va_mask = AT_CTIME|AT_ATIME;
2520
2521 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2522 }
2523 done:
2524 kmem_free(outbuf, alloc_count);
2525 return (error);
2526 }
2527
2528 static int
sdev_modctl_lookup(const char * path,vnode_t ** r_vp)2529 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2530 {
2531 vnode_t *vp;
2532 vnode_t *cvp;
2533 struct sdev_node *svp;
2534 char *nm;
2535 struct pathname pn;
2536 int error;
2537 int persisted = 0;
2538
2539 ASSERT(INGLOBALZONE(curproc));
2540
2541 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2542 return (error);
2543 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2544
2545 vp = rootdir;
2546 VN_HOLD(vp);
2547
2548 while (pn_pathleft(&pn)) {
2549 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2550 (void) pn_getcomponent(&pn, nm);
2551
2552 /*
2553 * Deal with the .. special case where we may be
2554 * traversing up across a mount point, to the
2555 * root of this filesystem or global root.
2556 */
2557 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2558 checkforroot:
2559 if (VN_CMP(vp, rootdir)) {
2560 nm[1] = 0;
2561 } else if (vp->v_flag & VROOT) {
2562 vfs_t *vfsp;
2563 cvp = vp;
2564 vfsp = cvp->v_vfsp;
2565 vfs_rlock_wait(vfsp);
2566 vp = cvp->v_vfsp->vfs_vnodecovered;
2567 if (vp == NULL ||
2568 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2569 vfs_unlock(vfsp);
2570 VN_RELE(cvp);
2571 error = EIO;
2572 break;
2573 }
2574 VN_HOLD(vp);
2575 vfs_unlock(vfsp);
2576 VN_RELE(cvp);
2577 cvp = NULL;
2578 goto checkforroot;
2579 }
2580 }
2581
2582 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2583 NULL, NULL);
2584 if (error) {
2585 VN_RELE(vp);
2586 break;
2587 }
2588
2589 /* traverse mount points encountered on our journey */
2590 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2591 VN_RELE(vp);
2592 VN_RELE(cvp);
2593 break;
2594 }
2595
2596 /*
2597 * symbolic link, can be either relative and absolute
2598 */
2599 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2600 struct pathname linkpath;
2601 pn_alloc(&linkpath);
2602 if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2603 pn_free(&linkpath);
2604 break;
2605 }
2606 if (pn_pathleft(&linkpath) == 0)
2607 (void) pn_set(&linkpath, ".");
2608 error = pn_insert(&pn, &linkpath, strlen(nm));
2609 pn_free(&linkpath);
2610 if (pn.pn_pathlen == 0) {
2611 VN_RELE(vp);
2612 return (ENOENT);
2613 }
2614 if (pn.pn_path[0] == '/') {
2615 pn_skipslash(&pn);
2616 VN_RELE(vp);
2617 VN_RELE(cvp);
2618 vp = rootdir;
2619 VN_HOLD(vp);
2620 } else {
2621 VN_RELE(cvp);
2622 }
2623 continue;
2624 }
2625
2626 VN_RELE(vp);
2627
2628 /*
2629 * Direct the operation to the persisting filesystem
2630 * underlying /dev. Bail if we encounter a
2631 * non-persistent dev entity here.
2632 */
2633 if (cvp->v_vfsp->vfs_fstype == devtype) {
2634
2635 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2636 error = ENOENT;
2637 VN_RELE(cvp);
2638 break;
2639 }
2640
2641 if (VTOSDEV(cvp) == NULL) {
2642 error = ENOENT;
2643 VN_RELE(cvp);
2644 break;
2645 }
2646 svp = VTOSDEV(cvp);
2647 if ((vp = svp->sdev_attrvp) == NULL) {
2648 error = ENOENT;
2649 VN_RELE(cvp);
2650 break;
2651 }
2652 persisted = 1;
2653 VN_HOLD(vp);
2654 VN_RELE(cvp);
2655 cvp = vp;
2656 }
2657
2658 vp = cvp;
2659 pn_skipslash(&pn);
2660 }
2661
2662 kmem_free(nm, MAXNAMELEN);
2663 pn_free(&pn);
2664
2665 if (error)
2666 return (error);
2667
2668 /*
2669 * Only return persisted nodes in the filesystem underlying /dev.
2670 */
2671 if (!persisted) {
2672 VN_RELE(vp);
2673 return (ENOENT);
2674 }
2675
2676 *r_vp = vp;
2677 return (0);
2678 }
2679
2680 int
sdev_modctl_readdir(const char * dir,char *** dirlistp,int * npathsp,int * npathsp_alloc,int checking_empty)2681 sdev_modctl_readdir(const char *dir, char ***dirlistp, int *npathsp,
2682 int *npathsp_alloc, int checking_empty)
2683 {
2684 char **pathlist = NULL;
2685 char **newlist = NULL;
2686 int npaths = 0;
2687 int npaths_alloc = 0;
2688 dirent64_t *dbuf = NULL;
2689 int n;
2690 char *s;
2691 int error;
2692 vnode_t *vp;
2693 int eof;
2694 struct iovec iov;
2695 struct uio uio;
2696 struct dirent64 *dp;
2697 size_t dlen;
2698 size_t dbuflen;
2699 int ndirents = 64;
2700 char *nm;
2701
2702 error = sdev_modctl_lookup(dir, &vp);
2703 sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2704 dir, curproc->p_user.u_comm,
2705 (error == 0) ? "ok" : "failed"));
2706 if (error)
2707 return (error);
2708
2709 dlen = ndirents * (sizeof (*dbuf));
2710 dbuf = kmem_alloc(dlen, KM_SLEEP);
2711
2712 uio.uio_iov = &iov;
2713 uio.uio_iovcnt = 1;
2714 uio.uio_segflg = UIO_SYSSPACE;
2715 uio.uio_fmode = 0;
2716 uio.uio_extflg = UIO_COPY_CACHED;
2717 uio.uio_loffset = 0;
2718 uio.uio_llimit = MAXOFFSET_T;
2719
2720 eof = 0;
2721 error = 0;
2722 while (!error && !eof) {
2723 uio.uio_resid = dlen;
2724 iov.iov_base = (char *)dbuf;
2725 iov.iov_len = dlen;
2726
2727 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2728 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2729 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2730
2731 dbuflen = dlen - uio.uio_resid;
2732
2733 if (error || dbuflen == 0)
2734 break;
2735
2736 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2737 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2738
2739 nm = dp->d_name;
2740
2741 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2742 continue;
2743 if (npaths == npaths_alloc) {
2744 npaths_alloc += 64;
2745 newlist = (char **)
2746 kmem_zalloc((npaths_alloc + 1) *
2747 sizeof (char *), KM_SLEEP);
2748 if (pathlist) {
2749 bcopy(pathlist, newlist,
2750 npaths * sizeof (char *));
2751 kmem_free(pathlist,
2752 (npaths + 1) * sizeof (char *));
2753 }
2754 pathlist = newlist;
2755 }
2756 n = strlen(nm) + 1;
2757 s = kmem_alloc(n, KM_SLEEP);
2758 bcopy(nm, s, n);
2759 pathlist[npaths++] = s;
2760 sdcmn_err11((" %s/%s\n", dir, s));
2761
2762 /* if checking empty, one entry is as good as many */
2763 if (checking_empty) {
2764 eof = 1;
2765 break;
2766 }
2767 }
2768 }
2769
2770 exit:
2771 VN_RELE(vp);
2772
2773 if (dbuf)
2774 kmem_free(dbuf, dlen);
2775
2776 if (error)
2777 return (error);
2778
2779 *dirlistp = pathlist;
2780 *npathsp = npaths;
2781 *npathsp_alloc = npaths_alloc;
2782
2783 return (0);
2784 }
2785
2786 void
sdev_modctl_readdir_free(char ** pathlist,int npaths,int npaths_alloc)2787 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2788 {
2789 int i, n;
2790
2791 for (i = 0; i < npaths; i++) {
2792 n = strlen(pathlist[i]) + 1;
2793 kmem_free(pathlist[i], n);
2794 }
2795
2796 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2797 }
2798
2799 int
sdev_modctl_devexists(const char * path)2800 sdev_modctl_devexists(const char *path)
2801 {
2802 vnode_t *vp;
2803 int error;
2804
2805 error = sdev_modctl_lookup(path, &vp);
2806 sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2807 path, curproc->p_user.u_comm,
2808 (error == 0) ? "ok" : "failed"));
2809 if (error == 0)
2810 VN_RELE(vp);
2811
2812 return (error);
2813 }
2814
2815 /*
2816 * a generic setattr() function
2817 *
2818 * note: flags only supports AT_UID and AT_GID.
2819 * Future enhancements can be done for other types, e.g. AT_MODE
2820 */
2821 int
devname_setattr_func(struct vnode * vp,struct vattr * vap,int flags,struct cred * cred,int (* callback)(struct sdev_node *,struct vattr *,int),int protocol)2822 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
2823 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
2824 int), int protocol)
2825 {
2826 struct sdev_node *dv = VTOSDEV(vp);
2827 struct sdev_node *parent = dv->sdev_dotdot;
2828 struct vattr *get;
2829 uint_t mask = vap->va_mask;
2830 int error;
2831
2832 /* some sanity checks */
2833 if (vap->va_mask & AT_NOSET)
2834 return (EINVAL);
2835
2836 if (vap->va_mask & AT_SIZE) {
2837 if (vp->v_type == VDIR) {
2838 return (EISDIR);
2839 }
2840 }
2841
2842 /* no need to set attribute, but do not fail either */
2843 ASSERT(parent);
2844 rw_enter(&parent->sdev_contents, RW_READER);
2845 if (dv->sdev_state == SDEV_ZOMBIE) {
2846 rw_exit(&parent->sdev_contents);
2847 return (0);
2848 }
2849
2850 /* If backing store exists, just set it. */
2851 if (dv->sdev_attrvp) {
2852 rw_exit(&parent->sdev_contents);
2853 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
2854 }
2855
2856 /*
2857 * Otherwise, for nodes with the persistence attribute, create it.
2858 */
2859 ASSERT(dv->sdev_attr);
2860 if (SDEV_IS_PERSIST(dv) ||
2861 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
2862 sdev_vattr_merge(dv, vap);
2863 rw_enter(&dv->sdev_contents, RW_WRITER);
2864 error = sdev_shadow_node(dv, cred);
2865 rw_exit(&dv->sdev_contents);
2866 rw_exit(&parent->sdev_contents);
2867
2868 if (error)
2869 return (error);
2870 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
2871 }
2872
2873
2874 /*
2875 * sdev_attr was allocated in sdev_mknode
2876 */
2877 rw_enter(&dv->sdev_contents, RW_WRITER);
2878 error = secpolicy_vnode_setattr(cred, vp, vap,
2879 dv->sdev_attr, flags, sdev_unlocked_access, dv);
2880 if (error) {
2881 rw_exit(&dv->sdev_contents);
2882 rw_exit(&parent->sdev_contents);
2883 return (error);
2884 }
2885
2886 get = dv->sdev_attr;
2887 if (mask & AT_MODE) {
2888 get->va_mode &= S_IFMT;
2889 get->va_mode |= vap->va_mode & ~S_IFMT;
2890 }
2891
2892 if ((mask & AT_UID) || (mask & AT_GID)) {
2893 if (mask & AT_UID)
2894 get->va_uid = vap->va_uid;
2895 if (mask & AT_GID)
2896 get->va_gid = vap->va_gid;
2897 /*
2898 * a callback must be provided if the protocol is set
2899 */
2900 if ((protocol & AT_UID) || (protocol & AT_GID)) {
2901 ASSERT(callback);
2902 error = callback(dv, get, protocol);
2903 if (error) {
2904 rw_exit(&dv->sdev_contents);
2905 rw_exit(&parent->sdev_contents);
2906 return (error);
2907 }
2908 }
2909 }
2910
2911 if (mask & AT_ATIME)
2912 get->va_atime = vap->va_atime;
2913 if (mask & AT_MTIME)
2914 get->va_mtime = vap->va_mtime;
2915 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
2916 gethrestime(&get->va_ctime);
2917 }
2918
2919 sdev_vattr_merge(dv, get);
2920 rw_exit(&dv->sdev_contents);
2921 rw_exit(&parent->sdev_contents);
2922 return (0);
2923 }
2924
2925 /*
2926 * a generic inactive() function
2927 */
2928 /*ARGSUSED*/
2929 void
devname_inactive_func(struct vnode * vp,struct cred * cred,void (* callback)(struct vnode *))2930 devname_inactive_func(struct vnode *vp, struct cred *cred,
2931 void (*callback)(struct vnode *))
2932 {
2933 int clean;
2934 struct sdev_node *dv = VTOSDEV(vp);
2935 int state;
2936
2937 mutex_enter(&vp->v_lock);
2938 ASSERT(vp->v_count >= 1);
2939
2940
2941 if (vp->v_count == 1 && callback != NULL)
2942 callback(vp);
2943
2944 rw_enter(&dv->sdev_contents, RW_WRITER);
2945 state = dv->sdev_state;
2946
2947 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
2948
2949 /*
2950 * sdev is a rather bad public citizen. It violates the general
2951 * agreement that in memory nodes should always have a valid reference
2952 * count on their vnode. But that's not the case here. This means that
2953 * we do actually have to distinguish between getting inactive callbacks
2954 * for zombies and otherwise. This should probably be fixed.
2955 */
2956 if (clean) {
2957 /* Remove the . entry to ourselves */
2958 if (vp->v_type == VDIR) {
2959 decr_link(dv);
2960 }
2961 VERIFY(dv->sdev_nlink == 1);
2962 decr_link(dv);
2963 VN_RELE_LOCKED(vp);
2964 rw_exit(&dv->sdev_contents);
2965 mutex_exit(&vp->v_lock);
2966 sdev_nodedestroy(dv, 0);
2967 } else {
2968 VN_RELE_LOCKED(vp);
2969 rw_exit(&dv->sdev_contents);
2970 mutex_exit(&vp->v_lock);
2971 }
2972 }
2973