1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
24 */
25
26 /*
27 * utility routines for the /dev fs
28 */
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/user.h>
36 #include <sys/time.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/flock.h>
42 #include <sys/kmem.h>
43 #include <sys/uio.h>
44 #include <sys/errno.h>
45 #include <sys/stat.h>
46 #include <sys/cred.h>
47 #include <sys/dirent.h>
48 #include <sys/pathname.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/mode.h>
52 #include <sys/policy.h>
53 #include <fs/fs_subr.h>
54 #include <sys/mount.h>
55 #include <sys/fs/snode.h>
56 #include <sys/fs/dv_node.h>
57 #include <sys/fs/sdev_impl.h>
58 #include <sys/sunndi.h>
59 #include <sys/sunmdi.h>
60 #include <sys/conf.h>
61 #include <sys/proc.h>
62 #include <sys/user.h>
63 #include <sys/modctl.h>
64
65 #ifdef DEBUG
66 int sdev_debug = 0x00000001;
67 int sdev_debug_cache_flags = 0;
68 #endif
69
70 /*
71 * globals
72 */
73 /* prototype memory vattrs */
74 vattr_t sdev_vattr_dir = {
75 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
76 VDIR, /* va_type */
77 SDEV_DIRMODE_DEFAULT, /* va_mode */
78 SDEV_UID_DEFAULT, /* va_uid */
79 SDEV_GID_DEFAULT, /* va_gid */
80 0, /* va_fsid */
81 0, /* va_nodeid */
82 0, /* va_nlink */
83 0, /* va_size */
84 0, /* va_atime */
85 0, /* va_mtime */
86 0, /* va_ctime */
87 0, /* va_rdev */
88 0, /* va_blksize */
89 0, /* va_nblocks */
90 0 /* va_vcode */
91 };
92
93 vattr_t sdev_vattr_lnk = {
94 AT_TYPE|AT_MODE, /* va_mask */
95 VLNK, /* va_type */
96 SDEV_LNKMODE_DEFAULT, /* va_mode */
97 SDEV_UID_DEFAULT, /* va_uid */
98 SDEV_GID_DEFAULT, /* va_gid */
99 0, /* va_fsid */
100 0, /* va_nodeid */
101 0, /* va_nlink */
102 0, /* va_size */
103 0, /* va_atime */
104 0, /* va_mtime */
105 0, /* va_ctime */
106 0, /* va_rdev */
107 0, /* va_blksize */
108 0, /* va_nblocks */
109 0 /* va_vcode */
110 };
111
112 vattr_t sdev_vattr_blk = {
113 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
114 VBLK, /* va_type */
115 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */
116 SDEV_UID_DEFAULT, /* va_uid */
117 SDEV_GID_DEFAULT, /* va_gid */
118 0, /* va_fsid */
119 0, /* va_nodeid */
120 0, /* va_nlink */
121 0, /* va_size */
122 0, /* va_atime */
123 0, /* va_mtime */
124 0, /* va_ctime */
125 0, /* va_rdev */
126 0, /* va_blksize */
127 0, /* va_nblocks */
128 0 /* va_vcode */
129 };
130
131 vattr_t sdev_vattr_chr = {
132 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
133 VCHR, /* va_type */
134 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */
135 SDEV_UID_DEFAULT, /* va_uid */
136 SDEV_GID_DEFAULT, /* va_gid */
137 0, /* va_fsid */
138 0, /* va_nodeid */
139 0, /* va_nlink */
140 0, /* va_size */
141 0, /* va_atime */
142 0, /* va_mtime */
143 0, /* va_ctime */
144 0, /* va_rdev */
145 0, /* va_blksize */
146 0, /* va_nblocks */
147 0 /* va_vcode */
148 };
149
150 kmem_cache_t *sdev_node_cache; /* sdev_node cache */
151 int devtype; /* fstype */
152
153 /* static */
154 static struct vnodeops *sdev_get_vop(struct sdev_node *);
155 static void sdev_set_no_negcache(struct sdev_node *);
156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
157 static void sdev_free_vtab(fs_operation_def_t *);
158
159 static void
sdev_prof_free(struct sdev_node * dv)160 sdev_prof_free(struct sdev_node *dv)
161 {
162 ASSERT(!SDEV_IS_GLOBAL(dv));
163 if (dv->sdev_prof.dev_name)
164 nvlist_free(dv->sdev_prof.dev_name);
165 if (dv->sdev_prof.dev_map)
166 nvlist_free(dv->sdev_prof.dev_map);
167 if (dv->sdev_prof.dev_symlink)
168 nvlist_free(dv->sdev_prof.dev_symlink);
169 if (dv->sdev_prof.dev_glob_incdir)
170 nvlist_free(dv->sdev_prof.dev_glob_incdir);
171 if (dv->sdev_prof.dev_glob_excdir)
172 nvlist_free(dv->sdev_prof.dev_glob_excdir);
173 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
174 }
175
176 /* sdev_node cache constructor */
177 /*ARGSUSED1*/
178 static int
i_sdev_node_ctor(void * buf,void * cfarg,int flag)179 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
180 {
181 struct sdev_node *dv = (struct sdev_node *)buf;
182 struct vnode *vp;
183
184 bzero(buf, sizeof (struct sdev_node));
185 vp = dv->sdev_vnode = vn_alloc(flag);
186 if (vp == NULL) {
187 return (-1);
188 }
189 vp->v_data = dv;
190 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
191 return (0);
192 }
193
194 /* sdev_node cache destructor */
195 /*ARGSUSED1*/
196 static void
i_sdev_node_dtor(void * buf,void * arg)197 i_sdev_node_dtor(void *buf, void *arg)
198 {
199 struct sdev_node *dv = (struct sdev_node *)buf;
200 struct vnode *vp = SDEVTOV(dv);
201
202 rw_destroy(&dv->sdev_contents);
203 vn_free(vp);
204 }
205
206 /* initialize sdev_node cache */
207 void
sdev_node_cache_init()208 sdev_node_cache_init()
209 {
210 int flags = 0;
211
212 #ifdef DEBUG
213 flags = sdev_debug_cache_flags;
214 if (flags)
215 sdcmn_err(("cache debug flags 0x%x\n", flags));
216 #endif /* DEBUG */
217
218 ASSERT(sdev_node_cache == NULL);
219 sdev_node_cache = kmem_cache_create("sdev_node_cache",
220 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
221 NULL, NULL, NULL, flags);
222 }
223
224 /* destroy sdev_node cache */
225 void
sdev_node_cache_fini()226 sdev_node_cache_fini()
227 {
228 ASSERT(sdev_node_cache != NULL);
229 kmem_cache_destroy(sdev_node_cache);
230 sdev_node_cache = NULL;
231 }
232
233 /*
234 * Compare two nodes lexographically to balance avl tree
235 */
236 static int
sdev_compare_nodes(const struct sdev_node * dv1,const struct sdev_node * dv2)237 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
238 {
239 int rv;
240 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
241 return (0);
242 return ((rv < 0) ? -1 : 1);
243 }
244
245 void
sdev_set_nodestate(struct sdev_node * dv,sdev_node_state_t state)246 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
247 {
248 ASSERT(dv);
249 ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
250 dv->sdev_state = state;
251 }
252
253 static void
sdev_attr_update(struct sdev_node * dv,vattr_t * vap)254 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
255 {
256 timestruc_t now;
257 struct vattr *attrp;
258 uint_t mask;
259
260 ASSERT(dv->sdev_attr);
261 ASSERT(vap);
262
263 attrp = dv->sdev_attr;
264 mask = vap->va_mask;
265 if (mask & AT_TYPE)
266 attrp->va_type = vap->va_type;
267 if (mask & AT_MODE)
268 attrp->va_mode = vap->va_mode;
269 if (mask & AT_UID)
270 attrp->va_uid = vap->va_uid;
271 if (mask & AT_GID)
272 attrp->va_gid = vap->va_gid;
273 if (mask & AT_RDEV)
274 attrp->va_rdev = vap->va_rdev;
275
276 gethrestime(&now);
277 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
278 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
279 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
280 }
281
282 static void
sdev_attr_alloc(struct sdev_node * dv,vattr_t * vap)283 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
284 {
285 ASSERT(dv->sdev_attr == NULL);
286 ASSERT(vap->va_mask & AT_TYPE);
287 ASSERT(vap->va_mask & AT_MODE);
288
289 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
290 sdev_attr_update(dv, vap);
291 }
292
293 /* alloc and initialize a sdev_node */
294 int
sdev_nodeinit(struct sdev_node * ddv,char * nm,struct sdev_node ** newdv,vattr_t * vap)295 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
296 vattr_t *vap)
297 {
298 struct sdev_node *dv = NULL;
299 struct vnode *vp;
300 size_t nmlen, len;
301 devname_handle_t *dhl;
302
303 nmlen = strlen(nm) + 1;
304 if (nmlen > MAXNAMELEN) {
305 sdcmn_err9(("sdev_nodeinit: node name %s"
306 " too long\n", nm));
307 *newdv = NULL;
308 return (ENAMETOOLONG);
309 }
310
311 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
312
313 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
314 bcopy(nm, dv->sdev_name, nmlen);
315 dv->sdev_namelen = nmlen - 1; /* '\0' not included */
316 len = strlen(ddv->sdev_path) + strlen(nm) + 2;
317 dv->sdev_path = kmem_alloc(len, KM_SLEEP);
318 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
319 /* overwritten for VLNK nodes */
320 dv->sdev_symlink = NULL;
321
322 vp = SDEVTOV(dv);
323 vn_reinit(vp);
324 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
325 if (vap)
326 vp->v_type = vap->va_type;
327
328 /*
329 * initialized to the parent's vnodeops.
330 * maybe overwriten for a VDIR
331 */
332 vn_setops(vp, vn_getops(SDEVTOV(ddv)));
333 vn_exists(vp);
334
335 dv->sdev_dotdot = NULL;
336 dv->sdev_attrvp = NULL;
337 if (vap) {
338 sdev_attr_alloc(dv, vap);
339 } else {
340 dv->sdev_attr = NULL;
341 }
342
343 dv->sdev_ino = sdev_mkino(dv);
344 dv->sdev_nlink = 0; /* updated on insert */
345 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
346 dv->sdev_flags |= SDEV_BUILD;
347 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
348 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
349 if (SDEV_IS_GLOBAL(ddv)) {
350 dv->sdev_flags |= SDEV_GLOBAL;
351 dhl = &(dv->sdev_handle);
352 dhl->dh_data = dv;
353 dhl->dh_args = NULL;
354 sdev_set_no_negcache(dv);
355 dv->sdev_gdir_gen = 0;
356 } else {
357 dv->sdev_flags &= ~SDEV_GLOBAL;
358 dv->sdev_origin = NULL; /* set later */
359 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
360 dv->sdev_ldir_gen = 0;
361 dv->sdev_devtree_gen = 0;
362 }
363
364 rw_enter(&dv->sdev_contents, RW_WRITER);
365 sdev_set_nodestate(dv, SDEV_INIT);
366 rw_exit(&dv->sdev_contents);
367 *newdv = dv;
368
369 return (0);
370 }
371
372 /*
373 * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the
374 * caller to transition the node to the SDEV_ZOMBIE state.
375 */
376 int
sdev_nodeready(struct sdev_node * dv,struct vattr * vap,struct vnode * avp,void * args,struct cred * cred)377 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
378 void *args, struct cred *cred)
379 {
380 int error = 0;
381 struct vnode *vp = SDEVTOV(dv);
382 vtype_t type;
383
384 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
385
386 type = vap->va_type;
387 vp->v_type = type;
388 vp->v_rdev = vap->va_rdev;
389 rw_enter(&dv->sdev_contents, RW_WRITER);
390 if (type == VDIR) {
391 dv->sdev_nlink = 2;
392 dv->sdev_flags &= ~SDEV_PERSIST;
393 dv->sdev_flags &= ~SDEV_DYNAMIC;
394 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
395 ASSERT(dv->sdev_dotdot);
396 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
397 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
398 avl_create(&dv->sdev_entries,
399 (int (*)(const void *, const void *))sdev_compare_nodes,
400 sizeof (struct sdev_node),
401 offsetof(struct sdev_node, sdev_avllink));
402 } else if (type == VLNK) {
403 ASSERT(args);
404 dv->sdev_nlink = 1;
405 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
406 } else {
407 dv->sdev_nlink = 1;
408 }
409
410 if (!(SDEV_IS_GLOBAL(dv))) {
411 dv->sdev_origin = (struct sdev_node *)args;
412 dv->sdev_flags &= ~SDEV_PERSIST;
413 }
414
415 /*
416 * shadow node is created here OR
417 * if failed (indicated by dv->sdev_attrvp == NULL),
418 * created later in sdev_setattr
419 */
420 if (avp) {
421 dv->sdev_attrvp = avp;
422 } else {
423 if (dv->sdev_attr == NULL) {
424 sdev_attr_alloc(dv, vap);
425 } else {
426 sdev_attr_update(dv, vap);
427 }
428
429 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
430 error = sdev_shadow_node(dv, cred);
431 }
432
433 if (error == 0) {
434 /* transition to READY state */
435 sdev_set_nodestate(dv, SDEV_READY);
436 sdev_nc_node_exists(dv);
437 }
438 rw_exit(&dv->sdev_contents);
439 return (error);
440 }
441
442 /*
443 * Build the VROOT sdev_node.
444 */
445 /*ARGSUSED*/
446 struct sdev_node *
sdev_mkroot(struct vfs * vfsp,dev_t devdev,struct vnode * mvp,struct vnode * avp,struct cred * cred)447 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
448 struct vnode *avp, struct cred *cred)
449 {
450 struct sdev_node *dv;
451 struct vnode *vp;
452 char devdir[] = "/dev";
453
454 ASSERT(sdev_node_cache != NULL);
455 ASSERT(avp);
456 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
457 vp = SDEVTOV(dv);
458 vn_reinit(vp);
459 vp->v_flag |= VROOT;
460 vp->v_vfsp = vfsp;
461 vp->v_type = VDIR;
462 vp->v_rdev = devdev;
463 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
464 vn_exists(vp);
465
466 if (vfsp->vfs_mntpt)
467 dv->sdev_name = i_ddi_strdup(
468 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
469 else
470 /* vfs_mountdev1 set mount point later */
471 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
472 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
473 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
474 dv->sdev_ino = SDEV_ROOTINO;
475 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */
476 dv->sdev_dotdot = dv; /* .. == self */
477 dv->sdev_attrvp = avp;
478 dv->sdev_attr = NULL;
479 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
480 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
481 if (strcmp(dv->sdev_name, "/dev") == 0) {
482 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
483 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
484 dv->sdev_gdir_gen = 0;
485 } else {
486 dv->sdev_flags = SDEV_BUILD;
487 dv->sdev_flags &= ~SDEV_PERSIST;
488 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
489 dv->sdev_ldir_gen = 0;
490 dv->sdev_devtree_gen = 0;
491 }
492
493 avl_create(&dv->sdev_entries,
494 (int (*)(const void *, const void *))sdev_compare_nodes,
495 sizeof (struct sdev_node),
496 offsetof(struct sdev_node, sdev_avllink));
497
498 rw_enter(&dv->sdev_contents, RW_WRITER);
499 sdev_set_nodestate(dv, SDEV_READY);
500 rw_exit(&dv->sdev_contents);
501 sdev_nc_node_exists(dv);
502 return (dv);
503 }
504
505 /* directory dependent vop table */
506 struct sdev_vop_table {
507 char *vt_name; /* subdirectory name */
508 const fs_operation_def_t *vt_service; /* vnodeops table */
509 struct vnodeops *vt_vops; /* constructed vop */
510 struct vnodeops **vt_global_vops; /* global container for vop */
511 int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */
512 int vt_flags;
513 };
514
515 /*
516 * A nice improvement would be to provide a plug-in mechanism
517 * for this table instead of a const table.
518 */
519 static struct sdev_vop_table vtab[] =
520 {
521 { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
522 SDEV_DYNAMIC | SDEV_VTOR },
523
524 { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
525 SDEV_DYNAMIC | SDEV_VTOR },
526
527 { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
528 devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
529
530 { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
531
532 { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
533 SDEV_DYNAMIC | SDEV_VTOR },
534
535 { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
536 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
537
538 /*
539 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
540 * lofi driver controls child nodes.
541 *
542 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
543 * stale nodes (e.g. from devfsadm -R).
544 *
545 * In addition, devfsadm knows not to attempt a rmdir: a zone
546 * may hold a reference, which would zombify the node,
547 * preventing a mkdir.
548 */
549
550 { "lofi", NULL, NULL, NULL, NULL,
551 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
552 { "rlofi", NULL, NULL, NULL, NULL,
553 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
554
555 { NULL, NULL, NULL, NULL, NULL, 0}
556 };
557
558 /*
559 * We need to match off of the sdev_path, not the sdev_name. We are only allowed
560 * to exist directly under /dev.
561 */
562 struct sdev_vop_table *
sdev_match(struct sdev_node * dv)563 sdev_match(struct sdev_node *dv)
564 {
565 int vlen;
566 int i;
567 const char *path;
568
569 if (strlen(dv->sdev_path) <= 5)
570 return (NULL);
571
572 if (strncmp(dv->sdev_path, "/dev/", 5) != 0)
573 return (NULL);
574 path = dv->sdev_path + 5;
575
576 for (i = 0; vtab[i].vt_name; i++) {
577 if (strcmp(vtab[i].vt_name, path) == 0)
578 return (&vtab[i]);
579 if (vtab[i].vt_flags & SDEV_SUBDIR) {
580 vlen = strlen(vtab[i].vt_name);
581 if ((strncmp(vtab[i].vt_name, path,
582 vlen - 1) == 0) && path[vlen] == '/')
583 return (&vtab[i]);
584 }
585
586 }
587 return (NULL);
588 }
589
590 /*
591 * sets a directory's vnodeops if the directory is in the vtab;
592 */
593 static struct vnodeops *
sdev_get_vop(struct sdev_node * dv)594 sdev_get_vop(struct sdev_node *dv)
595 {
596 struct sdev_vop_table *vtp;
597 char *path;
598
599 path = dv->sdev_path;
600 ASSERT(path);
601
602 /* gets the relative path to /dev/ */
603 path += 5;
604
605 /* gets the vtab entry it matches */
606 if ((vtp = sdev_match(dv)) != NULL) {
607 dv->sdev_flags |= vtp->vt_flags;
608 if (SDEV_IS_PERSIST(dv->sdev_dotdot) &&
609 (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv)))
610 dv->sdev_flags |= SDEV_PERSIST;
611
612 if (vtp->vt_vops) {
613 if (vtp->vt_global_vops)
614 *(vtp->vt_global_vops) = vtp->vt_vops;
615
616 return (vtp->vt_vops);
617 }
618
619 if (vtp->vt_service) {
620 fs_operation_def_t *templ;
621 templ = sdev_merge_vtab(vtp->vt_service);
622 if (vn_make_ops(vtp->vt_name,
623 (const fs_operation_def_t *)templ,
624 &vtp->vt_vops) != 0) {
625 cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
626 vtp->vt_name);
627 /*NOTREACHED*/
628 }
629 if (vtp->vt_global_vops) {
630 *(vtp->vt_global_vops) = vtp->vt_vops;
631 }
632 sdev_free_vtab(templ);
633
634 return (vtp->vt_vops);
635 }
636
637 return (sdev_vnodeops);
638 }
639
640 /* child inherits the persistence of the parent */
641 if (SDEV_IS_PERSIST(dv->sdev_dotdot))
642 dv->sdev_flags |= SDEV_PERSIST;
643
644 return (sdev_vnodeops);
645 }
646
647 static void
sdev_set_no_negcache(struct sdev_node * dv)648 sdev_set_no_negcache(struct sdev_node *dv)
649 {
650 int i;
651 char *path;
652
653 ASSERT(dv->sdev_path);
654 path = dv->sdev_path + strlen("/dev/");
655
656 for (i = 0; vtab[i].vt_name; i++) {
657 if (strcmp(vtab[i].vt_name, path) == 0) {
658 if (vtab[i].vt_flags & SDEV_NO_NCACHE)
659 dv->sdev_flags |= SDEV_NO_NCACHE;
660 break;
661 }
662 }
663 }
664
665 void *
sdev_get_vtor(struct sdev_node * dv)666 sdev_get_vtor(struct sdev_node *dv)
667 {
668 struct sdev_vop_table *vtp;
669
670 vtp = sdev_match(dv);
671 if (vtp)
672 return ((void *)vtp->vt_vtor);
673 else
674 return (NULL);
675 }
676
677 /*
678 * Build the base root inode
679 */
680 ino_t
sdev_mkino(struct sdev_node * dv)681 sdev_mkino(struct sdev_node *dv)
682 {
683 ino_t ino;
684
685 /*
686 * for now, follow the lead of tmpfs here
687 * need to someday understand the requirements here
688 */
689 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
690 ino += SDEV_ROOTINO + 1;
691
692 return (ino);
693 }
694
695 int
sdev_getlink(struct vnode * linkvp,char ** link)696 sdev_getlink(struct vnode *linkvp, char **link)
697 {
698 int err;
699 char *buf;
700 struct uio uio = {0};
701 struct iovec iov = {0};
702
703 if (linkvp == NULL)
704 return (ENOENT);
705 ASSERT(linkvp->v_type == VLNK);
706
707 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
708 iov.iov_base = buf;
709 iov.iov_len = MAXPATHLEN;
710 uio.uio_iov = &iov;
711 uio.uio_iovcnt = 1;
712 uio.uio_resid = MAXPATHLEN;
713 uio.uio_segflg = UIO_SYSSPACE;
714 uio.uio_llimit = MAXOFFSET_T;
715
716 err = VOP_READLINK(linkvp, &uio, kcred, NULL);
717 if (err) {
718 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
719 kmem_free(buf, MAXPATHLEN);
720 return (ENOENT);
721 }
722
723 /* mission complete */
724 *link = i_ddi_strdup(buf, KM_SLEEP);
725 kmem_free(buf, MAXPATHLEN);
726 return (0);
727 }
728
729 /*
730 * A convenient wrapper to get the devfs node vnode for a device
731 * minor functionality: readlink() of a /dev symlink
732 * Place the link into dv->sdev_symlink
733 */
734 static int
sdev_follow_link(struct sdev_node * dv)735 sdev_follow_link(struct sdev_node *dv)
736 {
737 int err;
738 struct vnode *linkvp;
739 char *link = NULL;
740
741 linkvp = SDEVTOV(dv);
742 if (linkvp == NULL)
743 return (ENOENT);
744 ASSERT(linkvp->v_type == VLNK);
745 err = sdev_getlink(linkvp, &link);
746 if (err) {
747 dv->sdev_symlink = NULL;
748 return (ENOENT);
749 }
750
751 ASSERT(link != NULL);
752 dv->sdev_symlink = link;
753 return (0);
754 }
755
756 static int
sdev_node_check(struct sdev_node * dv,struct vattr * nvap,void * nargs)757 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
758 {
759 vtype_t otype = SDEVTOV(dv)->v_type;
760
761 /*
762 * existing sdev_node has a different type.
763 */
764 if (otype != nvap->va_type) {
765 sdcmn_err9(("sdev_node_check: existing node "
766 " %s type %d does not match new node type %d\n",
767 dv->sdev_name, otype, nvap->va_type));
768 return (EEXIST);
769 }
770
771 /*
772 * For a symlink, the target should be the same.
773 */
774 if (otype == VLNK) {
775 ASSERT(nargs != NULL);
776 ASSERT(dv->sdev_symlink != NULL);
777 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
778 sdcmn_err9(("sdev_node_check: existing node "
779 " %s has different symlink %s as new node "
780 " %s\n", dv->sdev_name, dv->sdev_symlink,
781 (char *)nargs));
782 return (EEXIST);
783 }
784 }
785
786 return (0);
787 }
788
789 /*
790 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
791 *
792 * arguments:
793 * - ddv (parent)
794 * - nm (child name)
795 * - newdv (sdev_node for nm is returned here)
796 * - vap (vattr for the node to be created, va_type should be set.
797 * - avp (attribute vnode)
798 * the defaults should be used if unknown)
799 * - cred
800 * - args
801 * . tnm (for VLNK)
802 * . global sdev_node (for !SDEV_GLOBAL)
803 * - state: SDEV_INIT, SDEV_READY
804 *
805 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
806 *
807 * NOTE: directory contents writers lock needs to be held before
808 * calling this routine.
809 */
810 int
sdev_mknode(struct sdev_node * ddv,char * nm,struct sdev_node ** newdv,struct vattr * vap,struct vnode * avp,void * args,struct cred * cred,sdev_node_state_t state)811 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
812 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
813 sdev_node_state_t state)
814 {
815 int error = 0;
816 sdev_node_state_t node_state;
817 struct sdev_node *dv = NULL;
818
819 ASSERT(state != SDEV_ZOMBIE);
820 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
821
822 if (*newdv) {
823 dv = *newdv;
824 } else {
825 /* allocate and initialize a sdev_node */
826 if (ddv->sdev_state == SDEV_ZOMBIE) {
827 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
828 ddv->sdev_path));
829 return (ENOENT);
830 }
831
832 error = sdev_nodeinit(ddv, nm, &dv, vap);
833 if (error != 0) {
834 sdcmn_err9(("sdev_mknode: error %d,"
835 " name %s can not be initialized\n",
836 error, nm));
837 return (error);
838 }
839 ASSERT(dv);
840
841 /* insert into the directory cache */
842 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
843 }
844
845 ASSERT(dv);
846 node_state = dv->sdev_state;
847 ASSERT(node_state != SDEV_ZOMBIE);
848
849 if (state == SDEV_READY) {
850 switch (node_state) {
851 case SDEV_INIT:
852 error = sdev_nodeready(dv, vap, avp, args, cred);
853 if (error) {
854 sdcmn_err9(("sdev_mknode: node %s can NOT"
855 " be transitioned into READY state, "
856 "error %d\n", nm, error));
857 }
858 break;
859 case SDEV_READY:
860 /*
861 * Do some sanity checking to make sure
862 * the existing sdev_node is what has been
863 * asked for.
864 */
865 error = sdev_node_check(dv, vap, args);
866 break;
867 default:
868 break;
869 }
870 }
871
872 if (!error) {
873 *newdv = dv;
874 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
875 } else {
876 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
877 /*
878 * We created this node, it wasn't passed into us. Therefore it
879 * is up to us to delete it.
880 */
881 if (*newdv == NULL)
882 SDEV_SIMPLE_RELE(dv);
883 *newdv = NULL;
884 }
885
886 return (error);
887 }
888
889 /*
890 * convenient wrapper to change vp's ATIME, CTIME and MTIME
891 */
892 void
sdev_update_timestamps(struct vnode * vp,cred_t * cred,uint_t mask)893 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
894 {
895 struct vattr attr;
896 timestruc_t now;
897 int err;
898
899 ASSERT(vp);
900 gethrestime(&now);
901 if (mask & AT_CTIME)
902 attr.va_ctime = now;
903 if (mask & AT_MTIME)
904 attr.va_mtime = now;
905 if (mask & AT_ATIME)
906 attr.va_atime = now;
907
908 attr.va_mask = (mask & AT_TIMES);
909 err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
910 if (err && (err != EROFS)) {
911 sdcmn_err(("update timestamps error %d\n", err));
912 }
913 }
914
915 /*
916 * the backing store vnode is released here
917 */
918 /*ARGSUSED1*/
919 void
sdev_nodedestroy(struct sdev_node * dv,uint_t flags)920 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
921 {
922 /* no references */
923 ASSERT(dv->sdev_nlink == 0);
924
925 if (dv->sdev_attrvp != NULLVP) {
926 VN_RELE(dv->sdev_attrvp);
927 /*
928 * reset the attrvp so that no more
929 * references can be made on this already
930 * vn_rele() vnode
931 */
932 dv->sdev_attrvp = NULLVP;
933 }
934
935 if (dv->sdev_attr != NULL) {
936 kmem_free(dv->sdev_attr, sizeof (struct vattr));
937 dv->sdev_attr = NULL;
938 }
939
940 if (dv->sdev_name != NULL) {
941 kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
942 dv->sdev_name = NULL;
943 }
944
945 if (dv->sdev_symlink != NULL) {
946 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
947 dv->sdev_symlink = NULL;
948 }
949
950 if (dv->sdev_path) {
951 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
952 dv->sdev_path = NULL;
953 }
954
955 if (!SDEV_IS_GLOBAL(dv))
956 sdev_prof_free(dv);
957
958 if (SDEVTOV(dv)->v_type == VDIR) {
959 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
960 avl_destroy(&dv->sdev_entries);
961 }
962
963 mutex_destroy(&dv->sdev_lookup_lock);
964 cv_destroy(&dv->sdev_lookup_cv);
965
966 /* return node to initial state as per constructor */
967 (void) memset((void *)&dv->sdev_instance_data, 0,
968 sizeof (dv->sdev_instance_data));
969 vn_invalid(SDEVTOV(dv));
970 kmem_cache_free(sdev_node_cache, dv);
971 }
972
973 /*
974 * DIRECTORY CACHE lookup
975 */
976 struct sdev_node *
sdev_findbyname(struct sdev_node * ddv,char * nm)977 sdev_findbyname(struct sdev_node *ddv, char *nm)
978 {
979 struct sdev_node *dv;
980 struct sdev_node dvtmp;
981 avl_index_t where;
982
983 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
984
985 dvtmp.sdev_name = nm;
986 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
987 if (dv) {
988 ASSERT(dv->sdev_dotdot == ddv);
989 ASSERT(strcmp(dv->sdev_name, nm) == 0);
990 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
991 SDEV_HOLD(dv);
992 return (dv);
993 }
994 return (NULL);
995 }
996
997 /*
998 * Inserts a new sdev_node in a parent directory
999 */
1000 void
sdev_direnter(struct sdev_node * ddv,struct sdev_node * dv)1001 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
1002 {
1003 avl_index_t where;
1004
1005 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1006 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1007 ASSERT(ddv->sdev_nlink >= 2);
1008 ASSERT(dv->sdev_nlink == 0);
1009 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1010
1011 dv->sdev_dotdot = ddv;
1012 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1013 avl_insert(&ddv->sdev_entries, dv, where);
1014 ddv->sdev_nlink++;
1015 }
1016
1017 /*
1018 * The following check is needed because while sdev_nodes are linked
1019 * in SDEV_INIT state, they have their link counts incremented only
1020 * in SDEV_READY state.
1021 */
1022 static void
decr_link(struct sdev_node * dv)1023 decr_link(struct sdev_node *dv)
1024 {
1025 VERIFY(RW_WRITE_HELD(&dv->sdev_contents));
1026 if (dv->sdev_state != SDEV_INIT) {
1027 VERIFY(dv->sdev_nlink >= 1);
1028 dv->sdev_nlink--;
1029 } else {
1030 VERIFY(dv->sdev_nlink == 0);
1031 }
1032 }
1033
1034 /*
1035 * Delete an existing dv from directory cache
1036 *
1037 * In the case of a node is still held by non-zero reference count, the node is
1038 * put into ZOMBIE state. The node is always unlinked from its parent, but it is
1039 * not destroyed via sdev_inactive until its reference count reaches "0".
1040 */
1041 static void
sdev_dirdelete(struct sdev_node * ddv,struct sdev_node * dv)1042 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1043 {
1044 struct vnode *vp;
1045 sdev_node_state_t os;
1046
1047 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1048
1049 vp = SDEVTOV(dv);
1050 mutex_enter(&vp->v_lock);
1051 rw_enter(&dv->sdev_contents, RW_WRITER);
1052 os = dv->sdev_state;
1053 ASSERT(os != SDEV_ZOMBIE);
1054 dv->sdev_state = SDEV_ZOMBIE;
1055
1056 /*
1057 * unlink ourselves from the parent directory now to take care of the ..
1058 * link. However, if we're a directory, we don't remove our reference to
1059 * ourself eg. '.' until we are torn down in the inactive callback.
1060 */
1061 decr_link(ddv);
1062 avl_remove(&ddv->sdev_entries, dv);
1063 /*
1064 * sdev_inactive expects nodes to have a link to themselves when we're
1065 * tearing them down. If we're transitioning from the initial state to
1066 * zombie and not via ready, then we're not going to have this link that
1067 * comes from the node being ready. As a result, we need to increment
1068 * our link count by one to account for this.
1069 */
1070 if (os == SDEV_INIT && dv->sdev_nlink == 0)
1071 dv->sdev_nlink++;
1072 rw_exit(&dv->sdev_contents);
1073 mutex_exit(&vp->v_lock);
1074 }
1075
1076 /*
1077 * check if the source is in the path of the target
1078 *
1079 * source and target are different
1080 */
1081 /*ARGSUSED2*/
1082 static int
sdev_checkpath(struct sdev_node * sdv,struct sdev_node * tdv,struct cred * cred)1083 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1084 {
1085 int error = 0;
1086 struct sdev_node *dotdot, *dir;
1087
1088 dotdot = tdv->sdev_dotdot;
1089 ASSERT(dotdot);
1090
1091 /* fs root */
1092 if (dotdot == tdv) {
1093 return (0);
1094 }
1095
1096 for (;;) {
1097 /*
1098 * avoid error cases like
1099 * mv a a/b
1100 * mv a a/b/c
1101 * etc.
1102 */
1103 if (dotdot == sdv) {
1104 error = EINVAL;
1105 break;
1106 }
1107
1108 dir = dotdot;
1109 dotdot = dir->sdev_dotdot;
1110
1111 /* done checking because root is reached */
1112 if (dir == dotdot) {
1113 break;
1114 }
1115 }
1116 return (error);
1117 }
1118
1119 int
sdev_rnmnode(struct sdev_node * oddv,struct sdev_node * odv,struct sdev_node * nddv,struct sdev_node ** ndvp,char * nnm,struct cred * cred)1120 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1121 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1122 struct cred *cred)
1123 {
1124 int error = 0;
1125 struct vnode *ovp = SDEVTOV(odv);
1126 struct vnode *nvp;
1127 struct vattr vattr;
1128 int doingdir = (ovp->v_type == VDIR);
1129 char *link = NULL;
1130 int samedir = (oddv == nddv) ? 1 : 0;
1131 int bkstore = 0;
1132 struct sdev_node *idv = NULL;
1133 struct sdev_node *ndv = NULL;
1134 timestruc_t now;
1135
1136 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1137 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1138 if (error)
1139 return (error);
1140
1141 if (!samedir)
1142 rw_enter(&oddv->sdev_contents, RW_WRITER);
1143 rw_enter(&nddv->sdev_contents, RW_WRITER);
1144
1145 /*
1146 * the source may have been deleted by another thread before
1147 * we gets here.
1148 */
1149 if (odv->sdev_state != SDEV_READY) {
1150 error = ENOENT;
1151 goto err_out;
1152 }
1153
1154 if (doingdir && (odv == nddv)) {
1155 error = EINVAL;
1156 goto err_out;
1157 }
1158
1159 /*
1160 * If renaming a directory, and the parents are different (".." must be
1161 * changed) then the source dir must not be in the dir hierarchy above
1162 * the target since it would orphan everything below the source dir.
1163 */
1164 if (doingdir && (oddv != nddv)) {
1165 error = sdev_checkpath(odv, nddv, cred);
1166 if (error)
1167 goto err_out;
1168 }
1169
1170 /* fix the source for a symlink */
1171 if (vattr.va_type == VLNK) {
1172 if (odv->sdev_symlink == NULL) {
1173 error = sdev_follow_link(odv);
1174 if (error) {
1175 /*
1176 * The underlying symlink doesn't exist. This
1177 * node probably shouldn't even exist. While
1178 * it's a bit jarring to consumers, we're going
1179 * to remove the node from /dev.
1180 */
1181 if (SDEV_IS_PERSIST((*ndvp)))
1182 bkstore = 1;
1183 sdev_dirdelete(oddv, odv);
1184 if (bkstore) {
1185 ASSERT(nddv->sdev_attrvp);
1186 error = VOP_REMOVE(nddv->sdev_attrvp,
1187 nnm, cred, NULL, 0);
1188 if (error)
1189 goto err_out;
1190 }
1191 error = ENOENT;
1192 goto err_out;
1193 }
1194 }
1195 ASSERT(odv->sdev_symlink);
1196 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1197 }
1198
1199 /* destination existing */
1200 if (*ndvp) {
1201 nvp = SDEVTOV(*ndvp);
1202 ASSERT(nvp);
1203
1204 /* handling renaming to itself */
1205 if (odv == *ndvp) {
1206 error = 0;
1207 goto err_out;
1208 }
1209
1210 if (nvp->v_type == VDIR) {
1211 if (!doingdir) {
1212 error = EISDIR;
1213 goto err_out;
1214 }
1215
1216 if (vn_vfswlock(nvp)) {
1217 error = EBUSY;
1218 goto err_out;
1219 }
1220
1221 if (vn_mountedvfs(nvp) != NULL) {
1222 vn_vfsunlock(nvp);
1223 error = EBUSY;
1224 goto err_out;
1225 }
1226
1227 /* in case dir1 exists in dir2 and "mv dir1 dir2" */
1228 if ((*ndvp)->sdev_nlink > 2) {
1229 vn_vfsunlock(nvp);
1230 error = EEXIST;
1231 goto err_out;
1232 }
1233 vn_vfsunlock(nvp);
1234
1235 /*
1236 * We did not place the hold on *ndvp, so even though
1237 * we're deleting the node, we should not get rid of our
1238 * reference.
1239 */
1240 sdev_dirdelete(nddv, *ndvp);
1241 *ndvp = NULL;
1242 ASSERT(nddv->sdev_attrvp);
1243 error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1244 nddv->sdev_attrvp, cred, NULL, 0);
1245 if (error)
1246 goto err_out;
1247 } else {
1248 if (doingdir) {
1249 error = ENOTDIR;
1250 goto err_out;
1251 }
1252
1253 if (SDEV_IS_PERSIST((*ndvp))) {
1254 bkstore = 1;
1255 }
1256
1257 /*
1258 * Get rid of the node from the directory cache note.
1259 * Don't forget that it's not up to us to remove the vn
1260 * ref on the sdev node, as we did not place it.
1261 */
1262 sdev_dirdelete(nddv, *ndvp);
1263 *ndvp = NULL;
1264 if (bkstore) {
1265 ASSERT(nddv->sdev_attrvp);
1266 error = VOP_REMOVE(nddv->sdev_attrvp,
1267 nnm, cred, NULL, 0);
1268 if (error)
1269 goto err_out;
1270 }
1271 }
1272 }
1273
1274 /*
1275 * make a fresh node from the source attrs
1276 */
1277 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1278 error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1279 NULL, (void *)link, cred, SDEV_READY);
1280
1281 if (link != NULL) {
1282 kmem_free(link, strlen(link) + 1);
1283 link = NULL;
1284 }
1285
1286 if (error)
1287 goto err_out;
1288 ASSERT(*ndvp);
1289 ASSERT((*ndvp)->sdev_state == SDEV_READY);
1290
1291 /* move dir contents */
1292 if (doingdir) {
1293 for (idv = SDEV_FIRST_ENTRY(odv); idv;
1294 idv = SDEV_NEXT_ENTRY(odv, idv)) {
1295 SDEV_HOLD(idv);
1296 error = sdev_rnmnode(odv, idv,
1297 (struct sdev_node *)(*ndvp), &ndv,
1298 idv->sdev_name, cred);
1299 SDEV_RELE(idv);
1300 if (error)
1301 goto err_out;
1302 ndv = NULL;
1303 }
1304 }
1305
1306 if ((*ndvp)->sdev_attrvp) {
1307 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1308 AT_CTIME|AT_ATIME);
1309 } else {
1310 ASSERT((*ndvp)->sdev_attr);
1311 gethrestime(&now);
1312 (*ndvp)->sdev_attr->va_ctime = now;
1313 (*ndvp)->sdev_attr->va_atime = now;
1314 }
1315
1316 if (nddv->sdev_attrvp) {
1317 sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1318 AT_MTIME|AT_ATIME);
1319 } else {
1320 ASSERT(nddv->sdev_attr);
1321 gethrestime(&now);
1322 nddv->sdev_attr->va_mtime = now;
1323 nddv->sdev_attr->va_atime = now;
1324 }
1325 rw_exit(&nddv->sdev_contents);
1326 if (!samedir)
1327 rw_exit(&oddv->sdev_contents);
1328
1329 SDEV_RELE(*ndvp);
1330 return (error);
1331
1332 err_out:
1333 if (link != NULL) {
1334 kmem_free(link, strlen(link) + 1);
1335 link = NULL;
1336 }
1337
1338 rw_exit(&nddv->sdev_contents);
1339 if (!samedir)
1340 rw_exit(&oddv->sdev_contents);
1341 return (error);
1342 }
1343
1344 /*
1345 * Merge sdev_node specific information into an attribute structure.
1346 *
1347 * note: sdev_node is not locked here
1348 */
1349 void
sdev_vattr_merge(struct sdev_node * dv,struct vattr * vap)1350 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1351 {
1352 struct vnode *vp = SDEVTOV(dv);
1353
1354 vap->va_nlink = dv->sdev_nlink;
1355 vap->va_nodeid = dv->sdev_ino;
1356 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1357 vap->va_type = vp->v_type;
1358
1359 if (vp->v_type == VDIR) {
1360 vap->va_rdev = 0;
1361 vap->va_fsid = vp->v_rdev;
1362 } else if (vp->v_type == VLNK) {
1363 vap->va_rdev = 0;
1364 vap->va_mode &= ~S_IFMT;
1365 vap->va_mode |= S_IFLNK;
1366 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1367 vap->va_rdev = vp->v_rdev;
1368 vap->va_mode &= ~S_IFMT;
1369 if (vap->va_type == VCHR)
1370 vap->va_mode |= S_IFCHR;
1371 else
1372 vap->va_mode |= S_IFBLK;
1373 } else {
1374 vap->va_rdev = 0;
1375 }
1376 }
1377
1378 struct vattr *
sdev_getdefault_attr(enum vtype type)1379 sdev_getdefault_attr(enum vtype type)
1380 {
1381 if (type == VDIR)
1382 return (&sdev_vattr_dir);
1383 else if (type == VCHR)
1384 return (&sdev_vattr_chr);
1385 else if (type == VBLK)
1386 return (&sdev_vattr_blk);
1387 else if (type == VLNK)
1388 return (&sdev_vattr_lnk);
1389 else
1390 return (NULL);
1391 }
1392 int
sdev_to_vp(struct sdev_node * dv,struct vnode ** vpp)1393 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1394 {
1395 int rv = 0;
1396 struct vnode *vp = SDEVTOV(dv);
1397
1398 switch (vp->v_type) {
1399 case VCHR:
1400 case VBLK:
1401 /*
1402 * If vnode is a device, return special vnode instead
1403 * (though it knows all about -us- via sp->s_realvp)
1404 */
1405 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1406 VN_RELE(vp);
1407 if (*vpp == NULLVP)
1408 rv = ENOSYS;
1409 break;
1410 default: /* most types are returned as is */
1411 *vpp = vp;
1412 break;
1413 }
1414 return (rv);
1415 }
1416
1417 /*
1418 * junction between devname and root file system, e.g. ufs
1419 */
1420 int
devname_backstore_lookup(struct sdev_node * ddv,char * nm,struct vnode ** rvp)1421 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1422 {
1423 struct vnode *rdvp = ddv->sdev_attrvp;
1424 int rval = 0;
1425
1426 ASSERT(rdvp);
1427
1428 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1429 NULL);
1430 return (rval);
1431 }
1432
1433 static int
sdev_filldir_from_store(struct sdev_node * ddv,int dlen,struct cred * cred)1434 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1435 {
1436 struct sdev_node *dv = NULL;
1437 char *nm;
1438 struct vnode *dirvp;
1439 int error;
1440 vnode_t *vp;
1441 int eof;
1442 struct iovec iov;
1443 struct uio uio;
1444 struct dirent64 *dp;
1445 dirent64_t *dbuf;
1446 size_t dbuflen;
1447 struct vattr vattr;
1448 char *link = NULL;
1449
1450 if (ddv->sdev_attrvp == NULL)
1451 return (0);
1452 if (!(ddv->sdev_flags & SDEV_BUILD))
1453 return (0);
1454
1455 dirvp = ddv->sdev_attrvp;
1456 VN_HOLD(dirvp);
1457 dbuf = kmem_zalloc(dlen, KM_SLEEP);
1458
1459 uio.uio_iov = &iov;
1460 uio.uio_iovcnt = 1;
1461 uio.uio_segflg = UIO_SYSSPACE;
1462 uio.uio_fmode = 0;
1463 uio.uio_extflg = UIO_COPY_CACHED;
1464 uio.uio_loffset = 0;
1465 uio.uio_llimit = MAXOFFSET_T;
1466
1467 eof = 0;
1468 error = 0;
1469 while (!error && !eof) {
1470 uio.uio_resid = dlen;
1471 iov.iov_base = (char *)dbuf;
1472 iov.iov_len = dlen;
1473 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1474 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1475 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1476
1477 dbuflen = dlen - uio.uio_resid;
1478 if (error || dbuflen == 0)
1479 break;
1480
1481 if (!(ddv->sdev_flags & SDEV_BUILD))
1482 break;
1483
1484 for (dp = dbuf; ((intptr_t)dp <
1485 (intptr_t)dbuf + dbuflen);
1486 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1487 nm = dp->d_name;
1488
1489 if (strcmp(nm, ".") == 0 ||
1490 strcmp(nm, "..") == 0)
1491 continue;
1492
1493 vp = NULLVP;
1494 dv = sdev_cache_lookup(ddv, nm);
1495 if (dv) {
1496 VERIFY(dv->sdev_state != SDEV_ZOMBIE);
1497 SDEV_SIMPLE_RELE(dv);
1498 continue;
1499 }
1500
1501 /* refill the cache if not already */
1502 error = devname_backstore_lookup(ddv, nm, &vp);
1503 if (error)
1504 continue;
1505
1506 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1507 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1508 if (error)
1509 continue;
1510
1511 if (vattr.va_type == VLNK) {
1512 error = sdev_getlink(vp, &link);
1513 if (error) {
1514 continue;
1515 }
1516 ASSERT(link != NULL);
1517 }
1518
1519 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1520 rw_exit(&ddv->sdev_contents);
1521 rw_enter(&ddv->sdev_contents, RW_WRITER);
1522 }
1523 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1524 cred, SDEV_READY);
1525 rw_downgrade(&ddv->sdev_contents);
1526
1527 if (link != NULL) {
1528 kmem_free(link, strlen(link) + 1);
1529 link = NULL;
1530 }
1531
1532 if (!error) {
1533 ASSERT(dv);
1534 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1535 SDEV_SIMPLE_RELE(dv);
1536 }
1537 vp = NULL;
1538 dv = NULL;
1539 }
1540 }
1541
1542 done:
1543 VN_RELE(dirvp);
1544 kmem_free(dbuf, dlen);
1545
1546 return (error);
1547 }
1548
1549 void
sdev_filldir_dynamic(struct sdev_node * ddv)1550 sdev_filldir_dynamic(struct sdev_node *ddv)
1551 {
1552 int error;
1553 int i;
1554 struct vattr vattr;
1555 struct vattr *vap = &vattr;
1556 char *nm = NULL;
1557 struct sdev_node *dv = NULL;
1558
1559 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1560 ASSERT((ddv->sdev_flags & SDEV_BUILD));
1561
1562 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */
1563 gethrestime(&vap->va_atime);
1564 vap->va_mtime = vap->va_atime;
1565 vap->va_ctime = vap->va_atime;
1566 for (i = 0; vtab[i].vt_name != NULL; i++) {
1567 /*
1568 * This early, we may be in a read-only /dev environment: leave
1569 * the creation of any nodes we'd attempt to persist to
1570 * devfsadm. Because /dev itself is normally persistent, any
1571 * node which is not marked dynamic will end up being marked
1572 * persistent. However, some nodes are both dynamic and
1573 * persistent, mostly lofi and rlofi, so we need to be careful
1574 * in our check.
1575 */
1576 if ((vtab[i].vt_flags & SDEV_PERSIST) ||
1577 !(vtab[i].vt_flags & SDEV_DYNAMIC))
1578 continue;
1579 nm = vtab[i].vt_name;
1580 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1581 dv = NULL;
1582 error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1583 NULL, kcred, SDEV_READY);
1584 if (error) {
1585 cmn_err(CE_WARN, "%s/%s: error %d\n",
1586 ddv->sdev_name, nm, error);
1587 } else {
1588 ASSERT(dv);
1589 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1590 SDEV_SIMPLE_RELE(dv);
1591 }
1592 }
1593 }
1594
1595 /*
1596 * Creating a backing store entry based on sdev_attr.
1597 * This is called either as part of node creation in a persistent directory
1598 * or from setattr/setsecattr to persist access attributes across reboot.
1599 */
1600 int
sdev_shadow_node(struct sdev_node * dv,struct cred * cred)1601 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1602 {
1603 int error = 0;
1604 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1605 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1606 struct vattr *vap = dv->sdev_attr;
1607 char *nm = dv->sdev_name;
1608 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1609
1610 ASSERT(dv && dv->sdev_name && rdvp);
1611 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1612
1613 lookup:
1614 /* try to find it in the backing store */
1615 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1616 NULL);
1617 if (error == 0) {
1618 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1619 VN_HOLD(rrvp);
1620 VN_RELE(*rvp);
1621 *rvp = rrvp;
1622 }
1623
1624 kmem_free(dv->sdev_attr, sizeof (vattr_t));
1625 dv->sdev_attr = NULL;
1626 dv->sdev_attrvp = *rvp;
1627 return (0);
1628 }
1629
1630 /* let's try to persist the node */
1631 gethrestime(&vap->va_atime);
1632 vap->va_mtime = vap->va_atime;
1633 vap->va_ctime = vap->va_atime;
1634 vap->va_mask |= AT_TYPE|AT_MODE;
1635 switch (vap->va_type) {
1636 case VDIR:
1637 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1638 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1639 (void *)(*rvp), error));
1640 if (!error)
1641 VN_RELE(*rvp);
1642 break;
1643 case VCHR:
1644 case VBLK:
1645 case VREG:
1646 case VDOOR:
1647 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1648 rvp, cred, 0, NULL, NULL);
1649 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1650 (void *)(*rvp), error));
1651 if (!error)
1652 VN_RELE(*rvp);
1653 break;
1654 case VLNK:
1655 ASSERT(dv->sdev_symlink);
1656 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1657 NULL, 0);
1658 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1659 error));
1660 break;
1661 default:
1662 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1663 "create\n", nm);
1664 /*NOTREACHED*/
1665 }
1666
1667 /* go back to lookup to factor out spec node and set attrvp */
1668 if (error == 0)
1669 goto lookup;
1670
1671 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1672 return (error);
1673 }
1674
1675 static void
sdev_cache_add(struct sdev_node * ddv,struct sdev_node ** dv,char * nm)1676 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1677 {
1678 struct sdev_node *dup = NULL;
1679
1680 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1681 if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1682 sdev_direnter(ddv, *dv);
1683 } else {
1684 VERIFY(dup->sdev_state != SDEV_ZOMBIE);
1685 SDEV_SIMPLE_RELE(*dv);
1686 sdev_nodedestroy(*dv, 0);
1687 *dv = dup;
1688 }
1689 }
1690
1691 static void
sdev_cache_delete(struct sdev_node * ddv,struct sdev_node ** dv)1692 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1693 {
1694 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1695 sdev_dirdelete(ddv, *dv);
1696 }
1697
1698 /*
1699 * update the in-core directory cache
1700 */
1701 void
sdev_cache_update(struct sdev_node * ddv,struct sdev_node ** dv,char * nm,sdev_cache_ops_t ops)1702 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1703 sdev_cache_ops_t ops)
1704 {
1705 ASSERT((SDEV_HELD(*dv)));
1706
1707 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1708 switch (ops) {
1709 case SDEV_CACHE_ADD:
1710 sdev_cache_add(ddv, dv, nm);
1711 break;
1712 case SDEV_CACHE_DELETE:
1713 sdev_cache_delete(ddv, dv);
1714 break;
1715 default:
1716 break;
1717 }
1718 }
1719
1720 /*
1721 * retrieve the named entry from the directory cache
1722 */
1723 struct sdev_node *
sdev_cache_lookup(struct sdev_node * ddv,char * nm)1724 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1725 {
1726 struct sdev_node *dv = NULL;
1727
1728 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1729 dv = sdev_findbyname(ddv, nm);
1730
1731 return (dv);
1732 }
1733
1734 /*
1735 * Implicit reconfig for nodes constructed by a link generator
1736 * Start devfsadm if needed, or if devfsadm is in progress,
1737 * prepare to block on devfsadm either completing or
1738 * constructing the desired node. As devfsadmd is global
1739 * in scope, constructing all necessary nodes, we only
1740 * need to initiate it once.
1741 */
1742 static int
sdev_call_devfsadmd(struct sdev_node * ddv,struct sdev_node * dv,char * nm)1743 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1744 {
1745 int error = 0;
1746
1747 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1748 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1749 ddv->sdev_name, nm, devfsadm_state));
1750 mutex_enter(&dv->sdev_lookup_lock);
1751 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1752 mutex_exit(&dv->sdev_lookup_lock);
1753 error = 0;
1754 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1755 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1756 ddv->sdev_name, nm, devfsadm_state));
1757
1758 sdev_devfsadmd_thread(ddv, dv, kcred);
1759 mutex_enter(&dv->sdev_lookup_lock);
1760 SDEV_BLOCK_OTHERS(dv,
1761 (SDEV_LOOKUP | SDEV_LGWAITING));
1762 mutex_exit(&dv->sdev_lookup_lock);
1763 error = 0;
1764 } else {
1765 error = -1;
1766 }
1767
1768 return (error);
1769 }
1770
1771 /*
1772 * Support for specialized device naming construction mechanisms
1773 */
1774 static int
sdev_call_dircallback(struct sdev_node * ddv,struct sdev_node ** dvp,char * nm,int (* callback)(struct sdev_node *,char *,void **,struct cred *,void *,char *),int flags,struct cred * cred)1775 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1776 int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1777 void *, char *), int flags, struct cred *cred)
1778 {
1779 int rv = 0;
1780 char *physpath = NULL;
1781 struct vattr vattr;
1782 struct vattr *vap = &vattr;
1783 struct sdev_node *dv = NULL;
1784
1785 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1786 if (flags & SDEV_VLINK) {
1787 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1788 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1789 NULL);
1790 if (rv) {
1791 kmem_free(physpath, MAXPATHLEN);
1792 return (-1);
1793 }
1794
1795 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */
1796 vap->va_size = strlen(physpath);
1797 gethrestime(&vap->va_atime);
1798 vap->va_mtime = vap->va_atime;
1799 vap->va_ctime = vap->va_atime;
1800
1801 rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1802 (void *)physpath, cred, SDEV_READY);
1803 kmem_free(physpath, MAXPATHLEN);
1804 if (rv)
1805 return (rv);
1806 } else if (flags & SDEV_VATTR) {
1807 /*
1808 * /dev/pts
1809 *
1810 * callback is responsible to set the basic attributes,
1811 * e.g. va_type/va_uid/va_gid/
1812 * dev_t if VCHR or VBLK/
1813 */
1814 ASSERT(callback);
1815 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1816 if (rv) {
1817 sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1818 "callback failed \n"));
1819 return (-1);
1820 }
1821
1822 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1823 cred, SDEV_READY);
1824
1825 if (rv)
1826 return (rv);
1827
1828 } else {
1829 impossible(("lookup: %s/%s by %s not supported (%d)\n",
1830 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1831 __LINE__));
1832 rv = -1;
1833 }
1834
1835 *dvp = dv;
1836 return (rv);
1837 }
1838
1839 static int
is_devfsadm_thread(char * exec_name)1840 is_devfsadm_thread(char *exec_name)
1841 {
1842 /*
1843 * note: because devfsadmd -> /usr/sbin/devfsadm
1844 * it is safe to use "devfsadm" to capture the lookups
1845 * from devfsadm and its daemon version.
1846 */
1847 if (strcmp(exec_name, "devfsadm") == 0)
1848 return (1);
1849 return (0);
1850 }
1851
1852 /*
1853 * Lookup Order:
1854 * sdev_node cache;
1855 * backing store (SDEV_PERSIST);
1856 * DBNR: a. dir_ops implemented in the loadable modules;
1857 * b. vnode ops in vtab.
1858 */
1859 int
devname_lookup_func(struct sdev_node * ddv,char * nm,struct vnode ** vpp,struct cred * cred,int (* callback)(struct sdev_node *,char *,void **,struct cred *,void *,char *),int flags)1860 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1861 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1862 struct cred *, void *, char *), int flags)
1863 {
1864 int rv = 0, nmlen;
1865 struct vnode *rvp = NULL;
1866 struct sdev_node *dv = NULL;
1867 int retried = 0;
1868 int error = 0;
1869 struct vattr vattr;
1870 char *lookup_thread = curproc->p_user.u_comm;
1871 int failed_flags = 0;
1872 int (*vtor)(struct sdev_node *) = NULL;
1873 int state;
1874 int parent_state;
1875 char *link = NULL;
1876
1877 if (SDEVTOV(ddv)->v_type != VDIR)
1878 return (ENOTDIR);
1879
1880 /*
1881 * Empty name or ., return node itself.
1882 */
1883 nmlen = strlen(nm);
1884 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1885 *vpp = SDEVTOV(ddv);
1886 VN_HOLD(*vpp);
1887 return (0);
1888 }
1889
1890 /*
1891 * .., return the parent directory
1892 */
1893 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1894 *vpp = SDEVTOV(ddv->sdev_dotdot);
1895 VN_HOLD(*vpp);
1896 return (0);
1897 }
1898
1899 rw_enter(&ddv->sdev_contents, RW_READER);
1900 if (ddv->sdev_flags & SDEV_VTOR) {
1901 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1902 ASSERT(vtor);
1903 }
1904
1905 tryagain:
1906 /*
1907 * (a) directory cache lookup:
1908 */
1909 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1910 parent_state = ddv->sdev_state;
1911 dv = sdev_cache_lookup(ddv, nm);
1912 if (dv) {
1913 state = dv->sdev_state;
1914 switch (state) {
1915 case SDEV_INIT:
1916 if (is_devfsadm_thread(lookup_thread))
1917 break;
1918
1919 /* ZOMBIED parent won't allow node creation */
1920 if (parent_state == SDEV_ZOMBIE) {
1921 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1922 retried);
1923 goto nolock_notfound;
1924 }
1925
1926 mutex_enter(&dv->sdev_lookup_lock);
1927 /* compensate the threads started after devfsadm */
1928 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1929 !(SDEV_IS_LOOKUP(dv)))
1930 SDEV_BLOCK_OTHERS(dv,
1931 (SDEV_LOOKUP | SDEV_LGWAITING));
1932
1933 if (SDEV_IS_LOOKUP(dv)) {
1934 failed_flags |= SLF_REBUILT;
1935 rw_exit(&ddv->sdev_contents);
1936 error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1937 mutex_exit(&dv->sdev_lookup_lock);
1938 rw_enter(&ddv->sdev_contents, RW_READER);
1939
1940 if (error != 0) {
1941 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1942 retried);
1943 goto nolock_notfound;
1944 }
1945
1946 state = dv->sdev_state;
1947 if (state == SDEV_INIT) {
1948 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1949 retried);
1950 goto nolock_notfound;
1951 } else if (state == SDEV_READY) {
1952 goto found;
1953 } else if (state == SDEV_ZOMBIE) {
1954 rw_exit(&ddv->sdev_contents);
1955 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1956 retried);
1957 SDEV_RELE(dv);
1958 goto lookup_failed;
1959 }
1960 } else {
1961 mutex_exit(&dv->sdev_lookup_lock);
1962 }
1963 break;
1964 case SDEV_READY:
1965 goto found;
1966 case SDEV_ZOMBIE:
1967 rw_exit(&ddv->sdev_contents);
1968 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1969 SDEV_RELE(dv);
1970 goto lookup_failed;
1971 default:
1972 rw_exit(&ddv->sdev_contents);
1973 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1974 sdev_lookup_failed(ddv, nm, failed_flags);
1975 *vpp = NULLVP;
1976 return (ENOENT);
1977 }
1978 }
1979 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1980
1981 /*
1982 * ZOMBIED parent does not allow new node creation.
1983 * bail out early
1984 */
1985 if (parent_state == SDEV_ZOMBIE) {
1986 rw_exit(&ddv->sdev_contents);
1987 *vpp = NULLVP;
1988 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1989 return (ENOENT);
1990 }
1991
1992 /*
1993 * (b0): backing store lookup
1994 * SDEV_PERSIST is default except:
1995 * 1) pts nodes
1996 * 2) non-chmod'ed local nodes
1997 * 3) zvol nodes
1998 */
1999 if (SDEV_IS_PERSIST(ddv)) {
2000 error = devname_backstore_lookup(ddv, nm, &rvp);
2001
2002 if (!error) {
2003
2004 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
2005 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2006 if (error) {
2007 rw_exit(&ddv->sdev_contents);
2008 if (dv)
2009 SDEV_RELE(dv);
2010 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2011 sdev_lookup_failed(ddv, nm, failed_flags);
2012 *vpp = NULLVP;
2013 return (ENOENT);
2014 }
2015
2016 if (vattr.va_type == VLNK) {
2017 error = sdev_getlink(rvp, &link);
2018 if (error) {
2019 rw_exit(&ddv->sdev_contents);
2020 if (dv)
2021 SDEV_RELE(dv);
2022 SD_TRACE_FAILED_LOOKUP(ddv, nm,
2023 retried);
2024 sdev_lookup_failed(ddv, nm,
2025 failed_flags);
2026 *vpp = NULLVP;
2027 return (ENOENT);
2028 }
2029 ASSERT(link != NULL);
2030 }
2031
2032 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2033 rw_exit(&ddv->sdev_contents);
2034 rw_enter(&ddv->sdev_contents, RW_WRITER);
2035 }
2036 error = sdev_mknode(ddv, nm, &dv, &vattr,
2037 rvp, link, cred, SDEV_READY);
2038 rw_downgrade(&ddv->sdev_contents);
2039
2040 if (link != NULL) {
2041 kmem_free(link, strlen(link) + 1);
2042 link = NULL;
2043 }
2044
2045 if (error) {
2046 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2047 rw_exit(&ddv->sdev_contents);
2048 if (dv)
2049 SDEV_RELE(dv);
2050 goto lookup_failed;
2051 } else {
2052 goto found;
2053 }
2054 } else if (retried) {
2055 rw_exit(&ddv->sdev_contents);
2056 sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2057 ddv->sdev_name, nm));
2058 if (dv)
2059 SDEV_RELE(dv);
2060 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2061 sdev_lookup_failed(ddv, nm, failed_flags);
2062 *vpp = NULLVP;
2063 return (ENOENT);
2064 }
2065 }
2066
2067 lookup_create_node:
2068 /* first thread that is doing the lookup on this node */
2069 if (callback) {
2070 ASSERT(dv == NULL);
2071 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2072 rw_exit(&ddv->sdev_contents);
2073 rw_enter(&ddv->sdev_contents, RW_WRITER);
2074 }
2075 error = sdev_call_dircallback(ddv, &dv, nm, callback,
2076 flags, cred);
2077 rw_downgrade(&ddv->sdev_contents);
2078 if (error == 0) {
2079 goto found;
2080 } else {
2081 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2082 rw_exit(&ddv->sdev_contents);
2083 goto lookup_failed;
2084 }
2085 }
2086 if (!dv) {
2087 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2088 rw_exit(&ddv->sdev_contents);
2089 rw_enter(&ddv->sdev_contents, RW_WRITER);
2090 }
2091 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2092 cred, SDEV_INIT);
2093 if (!dv) {
2094 rw_exit(&ddv->sdev_contents);
2095 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2096 sdev_lookup_failed(ddv, nm, failed_flags);
2097 *vpp = NULLVP;
2098 return (ENOENT);
2099 }
2100 rw_downgrade(&ddv->sdev_contents);
2101 }
2102
2103 /*
2104 * (b1) invoking devfsadm once per life time for devfsadm nodes
2105 */
2106 ASSERT(SDEV_HELD(dv));
2107
2108 if (SDEV_IS_NO_NCACHE(dv))
2109 failed_flags |= SLF_NO_NCACHE;
2110 if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2111 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2112 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2113 ASSERT(SDEV_HELD(dv));
2114 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2115 goto nolock_notfound;
2116 }
2117
2118 /*
2119 * filter out known non-existent devices recorded
2120 * during initial reconfiguration boot for which
2121 * reconfig should not be done and lookup may
2122 * be short-circuited now.
2123 */
2124 if (sdev_lookup_filter(ddv, nm)) {
2125 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2126 goto nolock_notfound;
2127 }
2128
2129 /* bypassing devfsadm internal nodes */
2130 if (is_devfsadm_thread(lookup_thread)) {
2131 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2132 goto nolock_notfound;
2133 }
2134
2135 if (sdev_reconfig_disable) {
2136 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2137 goto nolock_notfound;
2138 }
2139
2140 error = sdev_call_devfsadmd(ddv, dv, nm);
2141 if (error == 0) {
2142 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2143 ddv->sdev_name, nm, curproc->p_user.u_comm));
2144 if (sdev_reconfig_verbose) {
2145 cmn_err(CE_CONT,
2146 "?lookup of %s/%s by %s: reconfig\n",
2147 ddv->sdev_name, nm, curproc->p_user.u_comm);
2148 }
2149 retried = 1;
2150 failed_flags |= SLF_REBUILT;
2151 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2152 SDEV_SIMPLE_RELE(dv);
2153 goto tryagain;
2154 } else {
2155 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2156 goto nolock_notfound;
2157 }
2158
2159 found:
2160 ASSERT(dv->sdev_state == SDEV_READY);
2161 if (vtor) {
2162 /*
2163 * Check validity of returned node
2164 */
2165 switch (vtor(dv)) {
2166 case SDEV_VTOR_VALID:
2167 break;
2168 case SDEV_VTOR_STALE:
2169 /*
2170 * The name exists, but the cache entry is
2171 * stale and needs to be re-created.
2172 */
2173 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2174 if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2175 rw_exit(&ddv->sdev_contents);
2176 rw_enter(&ddv->sdev_contents, RW_WRITER);
2177 }
2178 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
2179 rw_downgrade(&ddv->sdev_contents);
2180 SDEV_RELE(dv);
2181 dv = NULL;
2182 goto lookup_create_node;
2183 /* FALLTHRU */
2184 case SDEV_VTOR_INVALID:
2185 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2186 sdcmn_err7(("lookup: destroy invalid "
2187 "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2188 goto nolock_notfound;
2189 case SDEV_VTOR_SKIP:
2190 sdcmn_err7(("lookup: node not applicable - "
2191 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2192 rw_exit(&ddv->sdev_contents);
2193 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2194 SDEV_RELE(dv);
2195 goto lookup_failed;
2196 default:
2197 cmn_err(CE_PANIC,
2198 "dev fs: validator failed: %s(%p)\n",
2199 dv->sdev_name, (void *)dv);
2200 break;
2201 }
2202 }
2203
2204 rw_exit(&ddv->sdev_contents);
2205 rv = sdev_to_vp(dv, vpp);
2206 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2207 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2208 dv->sdev_state, nm, rv));
2209 return (rv);
2210
2211 nolock_notfound:
2212 /*
2213 * Destroy the node that is created for synchronization purposes.
2214 */
2215 sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2216 nm, dv->sdev_state));
2217 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2218 if (dv->sdev_state == SDEV_INIT) {
2219 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2220 rw_exit(&ddv->sdev_contents);
2221 rw_enter(&ddv->sdev_contents, RW_WRITER);
2222 }
2223
2224 /*
2225 * Node state may have changed during the lock
2226 * changes. Re-check.
2227 */
2228 if (dv->sdev_state == SDEV_INIT) {
2229 sdev_dirdelete(ddv, dv);
2230 rw_exit(&ddv->sdev_contents);
2231 sdev_lookup_failed(ddv, nm, failed_flags);
2232 SDEV_RELE(dv);
2233 *vpp = NULL;
2234 return (ENOENT);
2235 }
2236 }
2237
2238 rw_exit(&ddv->sdev_contents);
2239 SDEV_RELE(dv);
2240
2241 lookup_failed:
2242 sdev_lookup_failed(ddv, nm, failed_flags);
2243 *vpp = NULL;
2244 return (ENOENT);
2245 }
2246
2247 /*
2248 * Given a directory node, mark all nodes beneath as
2249 * STALE, i.e. nodes that don't exist as far as new
2250 * consumers are concerned. Remove them from the
2251 * list of directory entries so that no lookup or
2252 * directory traversal will find them. The node
2253 * not deallocated so existing holds are not affected.
2254 */
2255 void
sdev_stale(struct sdev_node * ddv)2256 sdev_stale(struct sdev_node *ddv)
2257 {
2258 struct sdev_node *dv;
2259 struct vnode *vp;
2260
2261 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2262
2263 rw_enter(&ddv->sdev_contents, RW_WRITER);
2264 while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2265 vp = SDEVTOV(dv);
2266 SDEV_HOLD(dv);
2267 if (vp->v_type == VDIR)
2268 sdev_stale(dv);
2269
2270 sdev_dirdelete(ddv, dv);
2271 SDEV_RELE(dv);
2272 }
2273 ddv->sdev_flags |= SDEV_BUILD;
2274 rw_exit(&ddv->sdev_contents);
2275 }
2276
2277 /*
2278 * Given a directory node, clean out all the nodes beneath.
2279 * If expr is specified, clean node with names matching expr.
2280 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2281 * so they are excluded from future lookups.
2282 */
2283 int
sdev_cleandir(struct sdev_node * ddv,char * expr,uint_t flags)2284 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2285 {
2286 int error = 0;
2287 int busy = 0;
2288 struct vnode *vp;
2289 struct sdev_node *dv;
2290 int bkstore = 0;
2291 int len = 0;
2292 char *bks_name = NULL;
2293
2294 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2295
2296 /*
2297 * We try our best to destroy all unused sdev_node's
2298 */
2299 rw_enter(&ddv->sdev_contents, RW_WRITER);
2300 while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2301 vp = SDEVTOV(dv);
2302
2303 if (expr && gmatch(dv->sdev_name, expr) == 0)
2304 continue;
2305
2306 if (vp->v_type == VDIR &&
2307 sdev_cleandir(dv, NULL, flags) != 0) {
2308 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2309 dv->sdev_name));
2310 busy++;
2311 continue;
2312 }
2313
2314 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2315 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2316 dv->sdev_name));
2317 busy++;
2318 continue;
2319 }
2320
2321 /*
2322 * at this point, either dv is not held or SDEV_ENFORCE
2323 * is specified. In either case, dv needs to be deleted
2324 */
2325 SDEV_HOLD(dv);
2326
2327 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2328 if (bkstore && (vp->v_type == VDIR))
2329 bkstore += 1;
2330
2331 if (bkstore) {
2332 len = strlen(dv->sdev_name) + 1;
2333 bks_name = kmem_alloc(len, KM_SLEEP);
2334 bcopy(dv->sdev_name, bks_name, len);
2335 }
2336
2337 sdev_dirdelete(ddv, dv);
2338
2339 /* take care the backing store clean up */
2340 if (bkstore) {
2341 ASSERT(bks_name);
2342 ASSERT(ddv->sdev_attrvp);
2343
2344 if (bkstore == 1) {
2345 error = VOP_REMOVE(ddv->sdev_attrvp,
2346 bks_name, kcred, NULL, 0);
2347 } else if (bkstore == 2) {
2348 error = VOP_RMDIR(ddv->sdev_attrvp,
2349 bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2350 }
2351
2352 /* do not propagate the backing store errors */
2353 if (error) {
2354 sdcmn_err9(("sdev_cleandir: backing store"
2355 "not cleaned\n"));
2356 error = 0;
2357 }
2358
2359 bkstore = 0;
2360 kmem_free(bks_name, len);
2361 bks_name = NULL;
2362 len = 0;
2363 }
2364
2365 ddv->sdev_flags |= SDEV_BUILD;
2366 SDEV_RELE(dv);
2367 }
2368
2369 ddv->sdev_flags |= SDEV_BUILD;
2370 rw_exit(&ddv->sdev_contents);
2371
2372 if (busy) {
2373 error = EBUSY;
2374 }
2375
2376 return (error);
2377 }
2378
2379 /*
2380 * a convenient wrapper for readdir() funcs
2381 */
2382 size_t
add_dir_entry(dirent64_t * de,char * nm,size_t size,ino_t ino,offset_t off)2383 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2384 {
2385 size_t reclen = DIRENT64_RECLEN(strlen(nm));
2386 if (reclen > size)
2387 return (0);
2388
2389 de->d_ino = (ino64_t)ino;
2390 de->d_off = (off64_t)off + 1;
2391 de->d_reclen = (ushort_t)reclen;
2392 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2393 return (reclen);
2394 }
2395
2396 /*
2397 * sdev_mount service routines
2398 */
2399 int
sdev_copyin_mountargs(struct mounta * uap,struct sdev_mountargs * args)2400 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2401 {
2402 int error;
2403
2404 if (uap->datalen != sizeof (*args))
2405 return (EINVAL);
2406
2407 if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2408 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2409 "get user data. error %d\n", error);
2410 return (EFAULT);
2411 }
2412
2413 return (0);
2414 }
2415
2416 #ifdef nextdp
2417 #undef nextdp
2418 #endif
2419 #define nextdp(dp) ((struct dirent64 *) \
2420 (intptr_t)((char *)(dp) + (dp)->d_reclen))
2421
2422 /*
2423 * readdir helper func
2424 */
2425 int
devname_readdir_func(vnode_t * vp,uio_t * uiop,cred_t * cred,int * eofp,int flags)2426 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2427 int flags)
2428 {
2429 struct sdev_node *ddv = VTOSDEV(vp);
2430 struct sdev_node *dv;
2431 dirent64_t *dp;
2432 ulong_t outcount = 0;
2433 size_t namelen;
2434 ulong_t alloc_count;
2435 void *outbuf;
2436 struct iovec *iovp;
2437 int error = 0;
2438 size_t reclen;
2439 offset_t diroff;
2440 offset_t soff;
2441 int this_reclen;
2442 int (*vtor)(struct sdev_node *) = NULL;
2443 struct vattr attr;
2444 timestruc_t now;
2445
2446 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2447 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2448
2449 if (uiop->uio_loffset >= MAXOFF_T) {
2450 if (eofp)
2451 *eofp = 1;
2452 return (0);
2453 }
2454
2455 if (uiop->uio_iovcnt != 1)
2456 return (EINVAL);
2457
2458 if (vp->v_type != VDIR)
2459 return (ENOTDIR);
2460
2461 if (ddv->sdev_flags & SDEV_VTOR) {
2462 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2463 ASSERT(vtor);
2464 }
2465
2466 if (eofp != NULL)
2467 *eofp = 0;
2468
2469 soff = uiop->uio_loffset;
2470 iovp = uiop->uio_iov;
2471 alloc_count = iovp->iov_len;
2472 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2473 outcount = 0;
2474
2475 if (ddv->sdev_state == SDEV_ZOMBIE)
2476 goto get_cache;
2477
2478 if (SDEV_IS_GLOBAL(ddv)) {
2479
2480 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2481 !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2482 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2483 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2484 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2485 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2486 !sdev_reconfig_disable) {
2487 /*
2488 * invoking "devfsadm" to do system device reconfig
2489 */
2490 mutex_enter(&ddv->sdev_lookup_lock);
2491 SDEV_BLOCK_OTHERS(ddv,
2492 (SDEV_READDIR|SDEV_LGWAITING));
2493 mutex_exit(&ddv->sdev_lookup_lock);
2494
2495 sdcmn_err8(("readdir of %s by %s: reconfig\n",
2496 ddv->sdev_path, curproc->p_user.u_comm));
2497 if (sdev_reconfig_verbose) {
2498 cmn_err(CE_CONT,
2499 "?readdir of %s by %s: reconfig\n",
2500 ddv->sdev_path, curproc->p_user.u_comm);
2501 }
2502
2503 sdev_devfsadmd_thread(ddv, NULL, kcred);
2504 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2505 /*
2506 * compensate the "ls" started later than "devfsadm"
2507 */
2508 mutex_enter(&ddv->sdev_lookup_lock);
2509 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2510 mutex_exit(&ddv->sdev_lookup_lock);
2511 }
2512
2513 /*
2514 * release the contents lock so that
2515 * the cache may be updated by devfsadmd
2516 */
2517 rw_exit(&ddv->sdev_contents);
2518 mutex_enter(&ddv->sdev_lookup_lock);
2519 if (SDEV_IS_READDIR(ddv))
2520 (void) sdev_wait4lookup(ddv, SDEV_READDIR);
2521 mutex_exit(&ddv->sdev_lookup_lock);
2522 rw_enter(&ddv->sdev_contents, RW_READER);
2523
2524 sdcmn_err4(("readdir of directory %s by %s\n",
2525 ddv->sdev_name, curproc->p_user.u_comm));
2526 if (ddv->sdev_flags & SDEV_BUILD) {
2527 if (SDEV_IS_PERSIST(ddv)) {
2528 error = sdev_filldir_from_store(ddv,
2529 alloc_count, cred);
2530 }
2531 ddv->sdev_flags &= ~SDEV_BUILD;
2532 }
2533 }
2534
2535 get_cache:
2536 /* handle "." and ".." */
2537 diroff = 0;
2538 if (soff == 0) {
2539 /* first time */
2540 this_reclen = DIRENT64_RECLEN(1);
2541 if (alloc_count < this_reclen) {
2542 error = EINVAL;
2543 goto done;
2544 }
2545
2546 dp->d_ino = (ino64_t)ddv->sdev_ino;
2547 dp->d_off = (off64_t)1;
2548 dp->d_reclen = (ushort_t)this_reclen;
2549
2550 (void) strncpy(dp->d_name, ".",
2551 DIRENT64_NAMELEN(this_reclen));
2552 outcount += dp->d_reclen;
2553 dp = nextdp(dp);
2554 }
2555
2556 diroff++;
2557 if (soff <= 1) {
2558 this_reclen = DIRENT64_RECLEN(2);
2559 if (alloc_count < outcount + this_reclen) {
2560 error = EINVAL;
2561 goto done;
2562 }
2563
2564 dp->d_reclen = (ushort_t)this_reclen;
2565 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2566 dp->d_off = (off64_t)2;
2567
2568 (void) strncpy(dp->d_name, "..",
2569 DIRENT64_NAMELEN(this_reclen));
2570 outcount += dp->d_reclen;
2571
2572 dp = nextdp(dp);
2573 }
2574
2575
2576 /* gets the cache */
2577 diroff++;
2578 for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2579 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2580 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2581 diroff, soff, dv->sdev_name));
2582
2583 /* bypassing pre-matured nodes */
2584 if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2585 sdcmn_err3(("sdev_readdir: pre-mature node "
2586 "%s %d\n", dv->sdev_name, dv->sdev_state));
2587 continue;
2588 }
2589
2590 /*
2591 * Check validity of node
2592 * Drop invalid and nodes to be skipped.
2593 * A node the validator indicates as stale needs
2594 * to be returned as presumably the node name itself
2595 * is valid and the node data itself will be refreshed
2596 * on lookup. An application performing a readdir then
2597 * stat on each entry should thus always see consistent
2598 * data. In any case, it is not possible to synchronize
2599 * with dynamic kernel state, and any view we return can
2600 * never be anything more than a snapshot at a point in time.
2601 */
2602 if (vtor) {
2603 switch (vtor(dv)) {
2604 case SDEV_VTOR_VALID:
2605 break;
2606 case SDEV_VTOR_INVALID:
2607 case SDEV_VTOR_SKIP:
2608 continue;
2609 case SDEV_VTOR_STALE:
2610 sdcmn_err3(("sdev_readir: %s stale\n",
2611 dv->sdev_name));
2612 break;
2613 default:
2614 cmn_err(CE_PANIC,
2615 "dev fs: validator failed: %s(%p)\n",
2616 dv->sdev_name, (void *)dv);
2617 break;
2618 /*NOTREACHED*/
2619 }
2620 }
2621
2622 namelen = strlen(dv->sdev_name);
2623 reclen = DIRENT64_RECLEN(namelen);
2624 if (outcount + reclen > alloc_count) {
2625 goto full;
2626 }
2627 dp->d_reclen = (ushort_t)reclen;
2628 dp->d_ino = (ino64_t)dv->sdev_ino;
2629 dp->d_off = (off64_t)diroff + 1;
2630 (void) strncpy(dp->d_name, dv->sdev_name,
2631 DIRENT64_NAMELEN(reclen));
2632 outcount += reclen;
2633 dp = nextdp(dp);
2634 }
2635
2636 full:
2637 sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2638 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2639 (void *)dv));
2640
2641 if (outcount)
2642 error = uiomove(outbuf, outcount, UIO_READ, uiop);
2643
2644 if (!error) {
2645 uiop->uio_loffset = diroff;
2646 if (eofp)
2647 *eofp = dv ? 0 : 1;
2648 }
2649
2650
2651 if (ddv->sdev_attrvp) {
2652 gethrestime(&now);
2653 attr.va_ctime = now;
2654 attr.va_atime = now;
2655 attr.va_mask = AT_CTIME|AT_ATIME;
2656
2657 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2658 }
2659 done:
2660 kmem_free(outbuf, alloc_count);
2661 return (error);
2662 }
2663
2664 static int
sdev_modctl_lookup(const char * path,vnode_t ** r_vp)2665 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2666 {
2667 vnode_t *vp;
2668 vnode_t *cvp;
2669 struct sdev_node *svp;
2670 char *nm;
2671 struct pathname pn;
2672 int error;
2673 int persisted = 0;
2674
2675 ASSERT(INGLOBALZONE(curproc));
2676
2677 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2678 return (error);
2679 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2680
2681 vp = rootdir;
2682 VN_HOLD(vp);
2683
2684 while (pn_pathleft(&pn)) {
2685 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2686 (void) pn_getcomponent(&pn, nm);
2687
2688 /*
2689 * Deal with the .. special case where we may be
2690 * traversing up across a mount point, to the
2691 * root of this filesystem or global root.
2692 */
2693 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2694 checkforroot:
2695 if (VN_CMP(vp, rootdir)) {
2696 nm[1] = 0;
2697 } else if (vp->v_flag & VROOT) {
2698 vfs_t *vfsp;
2699 cvp = vp;
2700 vfsp = cvp->v_vfsp;
2701 vfs_rlock_wait(vfsp);
2702 vp = cvp->v_vfsp->vfs_vnodecovered;
2703 if (vp == NULL ||
2704 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2705 vfs_unlock(vfsp);
2706 VN_RELE(cvp);
2707 error = EIO;
2708 break;
2709 }
2710 VN_HOLD(vp);
2711 vfs_unlock(vfsp);
2712 VN_RELE(cvp);
2713 cvp = NULL;
2714 goto checkforroot;
2715 }
2716 }
2717
2718 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2719 NULL, NULL);
2720 if (error) {
2721 VN_RELE(vp);
2722 break;
2723 }
2724
2725 /* traverse mount points encountered on our journey */
2726 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2727 VN_RELE(vp);
2728 VN_RELE(cvp);
2729 break;
2730 }
2731
2732 /*
2733 * symbolic link, can be either relative and absolute
2734 */
2735 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2736 struct pathname linkpath;
2737 pn_alloc(&linkpath);
2738 if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2739 pn_free(&linkpath);
2740 break;
2741 }
2742 if (pn_pathleft(&linkpath) == 0)
2743 (void) pn_set(&linkpath, ".");
2744 error = pn_insert(&pn, &linkpath, strlen(nm));
2745 pn_free(&linkpath);
2746 if (pn.pn_pathlen == 0) {
2747 VN_RELE(vp);
2748 return (ENOENT);
2749 }
2750 if (pn.pn_path[0] == '/') {
2751 pn_skipslash(&pn);
2752 VN_RELE(vp);
2753 VN_RELE(cvp);
2754 vp = rootdir;
2755 VN_HOLD(vp);
2756 } else {
2757 VN_RELE(cvp);
2758 }
2759 continue;
2760 }
2761
2762 VN_RELE(vp);
2763
2764 /*
2765 * Direct the operation to the persisting filesystem
2766 * underlying /dev. Bail if we encounter a
2767 * non-persistent dev entity here.
2768 */
2769 if (cvp->v_vfsp->vfs_fstype == devtype) {
2770
2771 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2772 error = ENOENT;
2773 VN_RELE(cvp);
2774 break;
2775 }
2776
2777 if (VTOSDEV(cvp) == NULL) {
2778 error = ENOENT;
2779 VN_RELE(cvp);
2780 break;
2781 }
2782 svp = VTOSDEV(cvp);
2783 if ((vp = svp->sdev_attrvp) == NULL) {
2784 error = ENOENT;
2785 VN_RELE(cvp);
2786 break;
2787 }
2788 persisted = 1;
2789 VN_HOLD(vp);
2790 VN_RELE(cvp);
2791 cvp = vp;
2792 }
2793
2794 vp = cvp;
2795 pn_skipslash(&pn);
2796 }
2797
2798 kmem_free(nm, MAXNAMELEN);
2799 pn_free(&pn);
2800
2801 if (error)
2802 return (error);
2803
2804 /*
2805 * Only return persisted nodes in the filesystem underlying /dev.
2806 */
2807 if (!persisted) {
2808 VN_RELE(vp);
2809 return (ENOENT);
2810 }
2811
2812 *r_vp = vp;
2813 return (0);
2814 }
2815
2816 int
sdev_modctl_readdir(const char * dir,char *** dirlistp,int * npathsp,int * npathsp_alloc,int checking_empty)2817 sdev_modctl_readdir(const char *dir, char ***dirlistp,
2818 int *npathsp, int *npathsp_alloc, int checking_empty)
2819 {
2820 char **pathlist = NULL;
2821 char **newlist = NULL;
2822 int npaths = 0;
2823 int npaths_alloc = 0;
2824 dirent64_t *dbuf = NULL;
2825 int n;
2826 char *s;
2827 int error;
2828 vnode_t *vp;
2829 int eof;
2830 struct iovec iov;
2831 struct uio uio;
2832 struct dirent64 *dp;
2833 size_t dlen;
2834 size_t dbuflen;
2835 int ndirents = 64;
2836 char *nm;
2837
2838 error = sdev_modctl_lookup(dir, &vp);
2839 sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2840 dir, curproc->p_user.u_comm,
2841 (error == 0) ? "ok" : "failed"));
2842 if (error)
2843 return (error);
2844
2845 dlen = ndirents * (sizeof (*dbuf));
2846 dbuf = kmem_alloc(dlen, KM_SLEEP);
2847
2848 uio.uio_iov = &iov;
2849 uio.uio_iovcnt = 1;
2850 uio.uio_segflg = UIO_SYSSPACE;
2851 uio.uio_fmode = 0;
2852 uio.uio_extflg = UIO_COPY_CACHED;
2853 uio.uio_loffset = 0;
2854 uio.uio_llimit = MAXOFFSET_T;
2855
2856 eof = 0;
2857 error = 0;
2858 while (!error && !eof) {
2859 uio.uio_resid = dlen;
2860 iov.iov_base = (char *)dbuf;
2861 iov.iov_len = dlen;
2862
2863 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2864 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2865 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2866
2867 dbuflen = dlen - uio.uio_resid;
2868
2869 if (error || dbuflen == 0)
2870 break;
2871
2872 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2873 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2874
2875 nm = dp->d_name;
2876
2877 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2878 continue;
2879 if (npaths == npaths_alloc) {
2880 npaths_alloc += 64;
2881 newlist = (char **)
2882 kmem_zalloc((npaths_alloc + 1) *
2883 sizeof (char *), KM_SLEEP);
2884 if (pathlist) {
2885 bcopy(pathlist, newlist,
2886 npaths * sizeof (char *));
2887 kmem_free(pathlist,
2888 (npaths + 1) * sizeof (char *));
2889 }
2890 pathlist = newlist;
2891 }
2892 n = strlen(nm) + 1;
2893 s = kmem_alloc(n, KM_SLEEP);
2894 bcopy(nm, s, n);
2895 pathlist[npaths++] = s;
2896 sdcmn_err11((" %s/%s\n", dir, s));
2897
2898 /* if checking empty, one entry is as good as many */
2899 if (checking_empty) {
2900 eof = 1;
2901 break;
2902 }
2903 }
2904 }
2905
2906 exit:
2907 VN_RELE(vp);
2908
2909 if (dbuf)
2910 kmem_free(dbuf, dlen);
2911
2912 if (error)
2913 return (error);
2914
2915 *dirlistp = pathlist;
2916 *npathsp = npaths;
2917 *npathsp_alloc = npaths_alloc;
2918
2919 return (0);
2920 }
2921
2922 void
sdev_modctl_readdir_free(char ** pathlist,int npaths,int npaths_alloc)2923 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2924 {
2925 int i, n;
2926
2927 for (i = 0; i < npaths; i++) {
2928 n = strlen(pathlist[i]) + 1;
2929 kmem_free(pathlist[i], n);
2930 }
2931
2932 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2933 }
2934
2935 int
sdev_modctl_devexists(const char * path)2936 sdev_modctl_devexists(const char *path)
2937 {
2938 vnode_t *vp;
2939 int error;
2940
2941 error = sdev_modctl_lookup(path, &vp);
2942 sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2943 path, curproc->p_user.u_comm,
2944 (error == 0) ? "ok" : "failed"));
2945 if (error == 0)
2946 VN_RELE(vp);
2947
2948 return (error);
2949 }
2950
2951 extern int sdev_vnodeops_tbl_size;
2952
2953 /*
2954 * construct a new template with overrides from vtab
2955 */
2956 static fs_operation_def_t *
sdev_merge_vtab(const fs_operation_def_t tab[])2957 sdev_merge_vtab(const fs_operation_def_t tab[])
2958 {
2959 fs_operation_def_t *new;
2960 const fs_operation_def_t *tab_entry;
2961
2962 /* make a copy of standard vnode ops table */
2963 new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
2964 bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
2965
2966 /* replace the overrides from tab */
2967 for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
2968 fs_operation_def_t *std_entry = new;
2969 while (std_entry->name) {
2970 if (strcmp(tab_entry->name, std_entry->name) == 0) {
2971 std_entry->func = tab_entry->func;
2972 break;
2973 }
2974 std_entry++;
2975 }
2976 if (std_entry->name == NULL)
2977 cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
2978 tab_entry->name);
2979 }
2980
2981 return (new);
2982 }
2983
2984 /* free memory allocated by sdev_merge_vtab */
2985 static void
sdev_free_vtab(fs_operation_def_t * new)2986 sdev_free_vtab(fs_operation_def_t *new)
2987 {
2988 kmem_free(new, sdev_vnodeops_tbl_size);
2989 }
2990
2991 /*
2992 * a generic setattr() function
2993 *
2994 * note: flags only supports AT_UID and AT_GID.
2995 * Future enhancements can be done for other types, e.g. AT_MODE
2996 */
2997 int
devname_setattr_func(struct vnode * vp,struct vattr * vap,int flags,struct cred * cred,int (* callback)(struct sdev_node *,struct vattr *,int),int protocol)2998 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
2999 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3000 int), int protocol)
3001 {
3002 struct sdev_node *dv = VTOSDEV(vp);
3003 struct sdev_node *parent = dv->sdev_dotdot;
3004 struct vattr *get;
3005 uint_t mask = vap->va_mask;
3006 int error;
3007
3008 /* some sanity checks */
3009 if (vap->va_mask & AT_NOSET)
3010 return (EINVAL);
3011
3012 if (vap->va_mask & AT_SIZE) {
3013 if (vp->v_type == VDIR) {
3014 return (EISDIR);
3015 }
3016 }
3017
3018 /* no need to set attribute, but do not fail either */
3019 ASSERT(parent);
3020 rw_enter(&parent->sdev_contents, RW_READER);
3021 if (dv->sdev_state == SDEV_ZOMBIE) {
3022 rw_exit(&parent->sdev_contents);
3023 return (0);
3024 }
3025
3026 /* If backing store exists, just set it. */
3027 if (dv->sdev_attrvp) {
3028 rw_exit(&parent->sdev_contents);
3029 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3030 }
3031
3032 /*
3033 * Otherwise, for nodes with the persistence attribute, create it.
3034 */
3035 ASSERT(dv->sdev_attr);
3036 if (SDEV_IS_PERSIST(dv) ||
3037 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3038 sdev_vattr_merge(dv, vap);
3039 rw_enter(&dv->sdev_contents, RW_WRITER);
3040 error = sdev_shadow_node(dv, cred);
3041 rw_exit(&dv->sdev_contents);
3042 rw_exit(&parent->sdev_contents);
3043
3044 if (error)
3045 return (error);
3046 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3047 }
3048
3049
3050 /*
3051 * sdev_attr was allocated in sdev_mknode
3052 */
3053 rw_enter(&dv->sdev_contents, RW_WRITER);
3054 error = secpolicy_vnode_setattr(cred, vp, vap,
3055 dv->sdev_attr, flags, sdev_unlocked_access, dv);
3056 if (error) {
3057 rw_exit(&dv->sdev_contents);
3058 rw_exit(&parent->sdev_contents);
3059 return (error);
3060 }
3061
3062 get = dv->sdev_attr;
3063 if (mask & AT_MODE) {
3064 get->va_mode &= S_IFMT;
3065 get->va_mode |= vap->va_mode & ~S_IFMT;
3066 }
3067
3068 if ((mask & AT_UID) || (mask & AT_GID)) {
3069 if (mask & AT_UID)
3070 get->va_uid = vap->va_uid;
3071 if (mask & AT_GID)
3072 get->va_gid = vap->va_gid;
3073 /*
3074 * a callback must be provided if the protocol is set
3075 */
3076 if ((protocol & AT_UID) || (protocol & AT_GID)) {
3077 ASSERT(callback);
3078 error = callback(dv, get, protocol);
3079 if (error) {
3080 rw_exit(&dv->sdev_contents);
3081 rw_exit(&parent->sdev_contents);
3082 return (error);
3083 }
3084 }
3085 }
3086
3087 if (mask & AT_ATIME)
3088 get->va_atime = vap->va_atime;
3089 if (mask & AT_MTIME)
3090 get->va_mtime = vap->va_mtime;
3091 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3092 gethrestime(&get->va_ctime);
3093 }
3094
3095 sdev_vattr_merge(dv, get);
3096 rw_exit(&dv->sdev_contents);
3097 rw_exit(&parent->sdev_contents);
3098 return (0);
3099 }
3100
3101 /*
3102 * a generic inactive() function
3103 */
3104 /*ARGSUSED*/
3105 void
devname_inactive_func(struct vnode * vp,struct cred * cred,void (* callback)(struct vnode *))3106 devname_inactive_func(struct vnode *vp, struct cred *cred,
3107 void (*callback)(struct vnode *))
3108 {
3109 int clean;
3110 struct sdev_node *dv = VTOSDEV(vp);
3111 int state;
3112
3113 mutex_enter(&vp->v_lock);
3114 ASSERT(vp->v_count >= 1);
3115
3116
3117 if (vp->v_count == 1 && callback != NULL)
3118 callback(vp);
3119
3120 rw_enter(&dv->sdev_contents, RW_WRITER);
3121 state = dv->sdev_state;
3122
3123 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3124
3125 /*
3126 * sdev is a rather bad public citizen. It violates the general
3127 * agreement that in memory nodes should always have a valid reference
3128 * count on their vnode. But that's not the case here. This means that
3129 * we do actually have to distinguish between getting inactive callbacks
3130 * for zombies and otherwise. This should probably be fixed.
3131 */
3132 if (clean) {
3133 /* Remove the . entry to ourselves */
3134 if (vp->v_type == VDIR) {
3135 decr_link(dv);
3136 }
3137 VERIFY(dv->sdev_nlink == 1);
3138 decr_link(dv);
3139 --vp->v_count;
3140 rw_exit(&dv->sdev_contents);
3141 mutex_exit(&vp->v_lock);
3142 sdev_nodedestroy(dv, 0);
3143 } else {
3144 --vp->v_count;
3145 rw_exit(&dv->sdev_contents);
3146 mutex_exit(&vp->v_lock);
3147 }
3148 }
3149