1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Copyright 2023 Oxide Computer Company
27 */
28
29 /*
30 * miscellaneous routines for the devfs
31 */
32
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/t_lock.h>
36 #include <sys/systm.h>
37 #include <sys/sysmacros.h>
38 #include <sys/user.h>
39 #include <sys/time.h>
40 #include <sys/vfs.h>
41 #include <sys/vnode.h>
42 #include <sys/file.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/kmem.h>
46 #include <sys/uio.h>
47 #include <sys/errno.h>
48 #include <sys/stat.h>
49 #include <sys/cred.h>
50 #include <sys/dirent.h>
51 #include <sys/pathname.h>
52 #include <sys/cmn_err.h>
53 #include <sys/debug.h>
54 #include <sys/modctl.h>
55 #include <fs/fs_subr.h>
56 #include <sys/fs/dv_node.h>
57 #include <sys/fs/snode.h>
58 #include <sys/sunndi.h>
59 #include <sys/sunmdi.h>
60 #include <sys/conf.h>
61
62 #ifdef DEBUG
63 int devfs_debug = 0x0;
64 #endif
65
66 const char dvnm[] = "devfs";
67 kmem_cache_t *dv_node_cache; /* dv_node cache */
68
69 /*
70 * The devfs_clean_key is taken during a devfs_clean operation: it is used to
71 * prevent unnecessary code execution and for detection of potential deadlocks.
72 */
73 uint_t devfs_clean_key;
74
75 struct dv_node *dvroot;
76
77 /* prototype memory vattrs */
78 vattr_t dv_vattr_dir = {
79 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
80 VDIR, /* va_type */
81 DV_DIRMODE_DEFAULT, /* va_mode */
82 DV_UID_DEFAULT, /* va_uid */
83 DV_GID_DEFAULT, /* va_gid */
84 0, /* va_fsid; */
85 0, /* va_nodeid; */
86 0, /* va_nlink; */
87 0, /* va_size; */
88 0, /* va_atime; */
89 0, /* va_mtime; */
90 0, /* va_ctime; */
91 0, /* va_rdev; */
92 0, /* va_blksize; */
93 0, /* va_nblocks; */
94 0, /* va_seq; */
95 };
96
97 vattr_t dv_vattr_file = {
98 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */
99 0, /* va_type */
100 DV_DEVMODE_DEFAULT, /* va_mode */
101 DV_UID_DEFAULT, /* va_uid */
102 DV_GID_DEFAULT, /* va_gid */
103 0, /* va_fsid; */
104 0, /* va_nodeid; */
105 0, /* va_nlink; */
106 0, /* va_size; */
107 0, /* va_atime; */
108 0, /* va_mtime; */
109 0, /* va_ctime; */
110 0, /* va_rdev; */
111 0, /* va_blksize; */
112 0, /* va_nblocks; */
113 0, /* va_seq; */
114 };
115
116 vattr_t dv_vattr_priv = {
117 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */
118 0, /* va_type */
119 DV_DEVMODE_PRIV, /* va_mode */
120 DV_UID_DEFAULT, /* va_uid */
121 DV_GID_DEFAULT, /* va_gid */
122 0, /* va_fsid; */
123 0, /* va_nodeid; */
124 0, /* va_nlink; */
125 0, /* va_size; */
126 0, /* va_atime; */
127 0, /* va_mtime; */
128 0, /* va_ctime; */
129 0, /* va_rdev; */
130 0, /* va_blksize; */
131 0, /* va_nblocks; */
132 0, /* va_seq; */
133 };
134
135 extern dev_info_t *clone_dip;
136 extern major_t clone_major;
137 extern struct dev_ops *ddi_hold_driver(major_t);
138
139 /* dv_node node constructor for kmem cache */
140 static int
i_dv_node_ctor(void * buf,void * cfarg,int flag)141 i_dv_node_ctor(void *buf, void *cfarg, int flag)
142 {
143 _NOTE(ARGUNUSED(cfarg, flag))
144 struct dv_node *dv = (struct dv_node *)buf;
145 struct vnode *vp;
146
147 bzero(buf, sizeof (struct dv_node));
148 vp = dv->dv_vnode = vn_alloc(flag);
149 if (vp == NULL) {
150 return (-1);
151 }
152 vp->v_data = dv;
153 rw_init(&dv->dv_contents, NULL, RW_DEFAULT, NULL);
154 return (0);
155 }
156
157 /* dv_node node destructor for kmem cache */
158 static void
i_dv_node_dtor(void * buf,void * arg)159 i_dv_node_dtor(void *buf, void *arg)
160 {
161 _NOTE(ARGUNUSED(arg))
162 struct dv_node *dv = (struct dv_node *)buf;
163 struct vnode *vp = DVTOV(dv);
164
165 rw_destroy(&dv->dv_contents);
166 vn_invalid(vp);
167 vn_free(vp);
168 }
169
170
171 /* initialize dv_node node cache */
172 void
dv_node_cache_init()173 dv_node_cache_init()
174 {
175 ASSERT(dv_node_cache == NULL);
176 dv_node_cache = kmem_cache_create("dv_node_cache",
177 sizeof (struct dv_node), 0, i_dv_node_ctor, i_dv_node_dtor,
178 NULL, NULL, NULL, 0);
179
180 tsd_create(&devfs_clean_key, NULL);
181 }
182
183 /* destroy dv_node node cache */
184 void
dv_node_cache_fini()185 dv_node_cache_fini()
186 {
187 ASSERT(dv_node_cache != NULL);
188 kmem_cache_destroy(dv_node_cache);
189 dv_node_cache = NULL;
190
191 tsd_destroy(&devfs_clean_key);
192 }
193
194 /*
195 * dv_mkino - Generate a unique inode number for devfs nodes.
196 *
197 * Although ino_t is 64 bits, the inode number is truncated to 32 bits for 32
198 * bit non-LARGEFILE applications. This means that there is a requirement to
199 * maintain the inode number as a 32 bit value or applications will have
200 * stat(2) calls fail with EOVERFLOW. We form a 32 bit inode number from the
201 * dev_t. but if the minor number is larger than L_MAXMIN32 we fold extra minor
202 *
203 * To generate inode numbers for directories, we assume that we will never use
204 * more than half the major space - this allows for ~8190 drivers. We use this
205 * upper major number space to allocate inode numbers for directories by
206 * encoding the major and instance into this space.
207 *
208 * We also skew the result so that inode 2 is reserved for the root of the file
209 * system.
210 *
211 * As part of the future support for 64-bit dev_t APIs, the upper minor bits
212 * should be folded into the high inode bits by adding the following code
213 * after "ino |= 1":
214 *
215 * #if (L_BITSMINOR32 != L_BITSMINOR)
216 * |* fold overflow minor bits into high bits of inode number *|
217 * ino |= ((ino_t)(minor >> L_BITSMINOR32)) << L_BITSMINOR;
218 * #endif |* (L_BITSMINOR32 != L_BITSMINOR) *|
219 *
220 * This way only applications that use devices that overflow their minor
221 * space will have an application level impact.
222 */
223 static ino_t
dv_mkino(dev_info_t * devi,vtype_t typ,dev_t dev)224 dv_mkino(dev_info_t *devi, vtype_t typ, dev_t dev)
225 {
226 major_t major;
227 minor_t minor;
228 ino_t ino;
229 static int warn;
230
231 if (typ == VDIR) {
232 major = ((L_MAXMAJ32 + 1) >> 1) + DEVI(devi)->devi_major;
233 minor = ddi_get_instance(devi);
234
235 /* makedevice32 in high half of major number space */
236 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32));
237
238 major = DEVI(devi)->devi_major;
239 } else {
240 major = getmajor(dev);
241 minor = getminor(dev);
242
243 /* makedevice32 */
244 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32));
245
246 /* make ino for VCHR different than VBLK */
247 ino <<= 1;
248 if (typ == VCHR)
249 ino |= 1;
250 }
251
252 ino += DV_ROOTINO + 1; /* skew */
253
254 /*
255 * diagnose things a little early because adding the skew to a large
256 * minor number could roll over the major.
257 */
258 if ((major >= (L_MAXMAJ32 >> 1)) && (warn == 0)) {
259 warn = 1;
260 cmn_err(CE_WARN, "%s: inode numbers are not unique", dvnm);
261 }
262
263 return (ino);
264 }
265
266 /*
267 * Compare two nodes lexographically to balance avl tree
268 */
269 static int
dv_compare_nodes(const struct dv_node * dv1,const struct dv_node * dv2)270 dv_compare_nodes(const struct dv_node *dv1, const struct dv_node *dv2)
271 {
272 int rv;
273
274 if ((rv = strcmp(dv1->dv_name, dv2->dv_name)) == 0)
275 return (0);
276 return ((rv < 0) ? -1 : 1);
277 }
278
279 /*
280 * dv_mkroot
281 *
282 * Build the first VDIR dv_node.
283 */
284 struct dv_node *
dv_mkroot(struct vfs * vfsp,dev_t devfsdev)285 dv_mkroot(struct vfs *vfsp, dev_t devfsdev)
286 {
287 struct dv_node *dv;
288 struct vnode *vp;
289
290 ASSERT(ddi_root_node() != NULL);
291 ASSERT(dv_node_cache != NULL);
292
293 dcmn_err3(("dv_mkroot\n"));
294 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
295 vp = DVTOV(dv);
296 vn_reinit(vp);
297 vp->v_flag = VROOT;
298 vp->v_vfsp = vfsp;
299 vp->v_type = VDIR;
300 vp->v_rdev = devfsdev;
301 vn_setops(vp, dv_vnodeops);
302 vn_exists(vp);
303
304 dvroot = dv;
305
306 dv->dv_name = NULL; /* not needed */
307 dv->dv_namelen = 0;
308
309 dv->dv_devi = ddi_root_node();
310
311 dv->dv_ino = DV_ROOTINO;
312 dv->dv_nlink = 2; /* name + . (no dv_insert) */
313 dv->dv_dotdot = dv; /* .. == self */
314 dv->dv_attrvp = NULLVP;
315 dv->dv_attr = NULL;
316 dv->dv_flags = DV_BUILD;
317 dv->dv_priv = NULL;
318 dv->dv_busy = 0;
319 dv->dv_dflt_mode = 0;
320
321 avl_create(&dv->dv_entries,
322 (int (*)(const void *, const void *))dv_compare_nodes,
323 sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink));
324
325 return (dv);
326 }
327
328 /*
329 * dv_mkdir
330 *
331 * Given an probed or attached nexus node, create a VDIR dv_node.
332 * No dv_attrvp is created at this point.
333 */
334 struct dv_node *
dv_mkdir(struct dv_node * ddv,dev_info_t * devi,char * nm)335 dv_mkdir(struct dv_node *ddv, dev_info_t *devi, char *nm)
336 {
337 struct dv_node *dv;
338 struct vnode *vp;
339 size_t nmlen;
340
341 ASSERT((devi));
342 dcmn_err4(("dv_mkdir: %s\n", nm));
343
344 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
345 nmlen = strlen(nm) + 1;
346 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP);
347 bcopy(nm, dv->dv_name, nmlen);
348 dv->dv_namelen = nmlen - 1; /* '\0' not included */
349
350 vp = DVTOV(dv);
351 vn_reinit(vp);
352 vp->v_flag = 0;
353 vp->v_vfsp = DVTOV(ddv)->v_vfsp;
354 vp->v_type = VDIR;
355 vp->v_rdev = DVTOV(ddv)->v_rdev;
356 vn_setops(vp, vn_getops(DVTOV(ddv)));
357 vn_exists(vp);
358
359 dv->dv_devi = devi;
360 ndi_hold_devi(devi);
361
362 dv->dv_ino = dv_mkino(devi, VDIR, NODEV);
363 dv->dv_nlink = 0; /* updated on insert */
364 dv->dv_dotdot = ddv;
365 dv->dv_attrvp = NULLVP;
366 dv->dv_attr = NULL;
367 dv->dv_flags = DV_BUILD;
368 dv->dv_priv = NULL;
369 dv->dv_busy = 0;
370 dv->dv_dflt_mode = 0;
371
372 avl_create(&dv->dv_entries,
373 (int (*)(const void *, const void *))dv_compare_nodes,
374 sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink));
375
376 return (dv);
377 }
378
379 /*
380 * dv_mknod
381 *
382 * Given a minor node, create a VCHR or VBLK dv_node.
383 * No dv_attrvp is created at this point.
384 */
385 static struct dv_node *
dv_mknod(struct dv_node * ddv,dev_info_t * devi,char * nm,struct ddi_minor_data * dmd)386 dv_mknod(struct dv_node *ddv, dev_info_t *devi, char *nm,
387 struct ddi_minor_data *dmd)
388 {
389 struct dv_node *dv;
390 struct vnode *vp;
391 size_t nmlen;
392
393 dcmn_err4(("dv_mknod: %s\n", nm));
394
395 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
396 nmlen = strlen(nm) + 1;
397 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP);
398 bcopy(nm, dv->dv_name, nmlen);
399 dv->dv_namelen = nmlen - 1; /* no '\0' */
400
401 vp = DVTOV(dv);
402 vn_reinit(vp);
403 vp->v_flag = 0;
404 vp->v_vfsp = DVTOV(ddv)->v_vfsp;
405 vp->v_type = dmd->ddm_spec_type == S_IFCHR ? VCHR : VBLK;
406 vp->v_rdev = dmd->ddm_dev;
407 vn_setops(vp, vn_getops(DVTOV(ddv)));
408 vn_exists(vp);
409
410 /* increment dev_ref with devi_lock held */
411 ASSERT(DEVI_BUSY_OWNED(devi));
412 mutex_enter(&DEVI(devi)->devi_lock);
413 dv->dv_devi = devi;
414 DEVI(devi)->devi_ref++; /* ndi_hold_devi(dip) */
415 mutex_exit(&DEVI(devi)->devi_lock);
416
417 dv->dv_ino = dv_mkino(devi, vp->v_type, vp->v_rdev);
418 dv->dv_nlink = 0; /* updated on insert */
419 dv->dv_dotdot = ddv;
420 dv->dv_attrvp = NULLVP;
421 dv->dv_attr = NULL;
422 dv->dv_flags = 0;
423
424 if (dmd->type == DDM_INTERNAL_PATH)
425 dv->dv_flags |= DV_INTERNAL;
426 if (dmd->ddm_flags & DM_NO_FSPERM)
427 dv->dv_flags |= DV_NO_FSPERM;
428
429 dv->dv_priv = dmd->ddm_node_priv;
430 if (dv->dv_priv)
431 dphold(dv->dv_priv);
432
433 /*
434 * Minors created with ddi_create_priv_minor_node can specify
435 * a default mode permission other than the devfs default.
436 */
437 if (dv->dv_priv || dv->dv_flags & DV_NO_FSPERM) {
438 dcmn_err5(("%s: dv_mknod default priv mode 0%o\n",
439 dv->dv_name, dmd->ddm_priv_mode));
440 dv->dv_flags |= DV_DFLT_MODE;
441 dv->dv_dflt_mode = dmd->ddm_priv_mode & S_IAMB;
442 }
443
444 return (dv);
445 }
446
447 /*
448 * dv_destroy
449 *
450 * Destroy what we created in dv_mkdir or dv_mknod.
451 * In the case of a *referenced* directory, do nothing.
452 */
453 void
dv_destroy(struct dv_node * dv,uint_t flags)454 dv_destroy(struct dv_node *dv, uint_t flags)
455 {
456 vnode_t *vp = DVTOV(dv);
457 ASSERT(dv->dv_nlink == 0); /* no references */
458
459 dcmn_err4(("dv_destroy: %s\n", dv->dv_name));
460
461 /*
462 * We may be asked to unlink referenced directories.
463 * In this case, there is nothing to be done.
464 * The eventual memory free will be done in
465 * devfs_inactive.
466 */
467 if (vp->v_count != 0) {
468 ASSERT(vp->v_type == VDIR);
469 ASSERT(flags & DV_CLEAN_FORCE);
470 ASSERT(DV_STALE(dv));
471 return;
472 }
473
474 if (vp->v_type == VDIR) {
475 ASSERT(DV_FIRST_ENTRY(dv) == NULL);
476 avl_destroy(&dv->dv_entries);
477 }
478
479 if (dv->dv_attrvp != NULLVP)
480 VN_RELE(dv->dv_attrvp);
481 if (dv->dv_attr != NULL)
482 kmem_free(dv->dv_attr, sizeof (struct vattr));
483 if (dv->dv_name != NULL)
484 kmem_free(dv->dv_name, dv->dv_namelen + 1);
485 if (dv->dv_devi != NULL) {
486 ndi_rele_devi(dv->dv_devi);
487 }
488 if (dv->dv_priv != NULL) {
489 dpfree(dv->dv_priv);
490 }
491
492 kmem_cache_free(dv_node_cache, dv);
493 }
494
495 /*
496 * Find and hold dv_node by name
497 */
498 static struct dv_node *
dv_findbyname(struct dv_node * ddv,char * nm)499 dv_findbyname(struct dv_node *ddv, char *nm)
500 {
501 struct dv_node *dv;
502 avl_index_t where;
503 struct dv_node dvtmp;
504
505 ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
506 dcmn_err3(("dv_findbyname: %s\n", nm));
507
508 dvtmp.dv_name = nm;
509 dv = avl_find(&ddv->dv_entries, &dvtmp, &where);
510 if (dv) {
511 ASSERT(dv->dv_dotdot == ddv);
512 ASSERT(strcmp(dv->dv_name, nm) == 0);
513 VN_HOLD(DVTOV(dv));
514 return (dv);
515 }
516 return (NULL);
517 }
518
519 /*
520 * Inserts a new dv_node in a parent directory
521 */
522 void
dv_insert(struct dv_node * ddv,struct dv_node * dv)523 dv_insert(struct dv_node *ddv, struct dv_node *dv)
524 {
525 avl_index_t where;
526
527 ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
528 ASSERT(DVTOV(ddv)->v_type == VDIR);
529 ASSERT(ddv->dv_nlink >= 2);
530 ASSERT(dv->dv_nlink == 0);
531
532 dcmn_err3(("dv_insert: %s\n", dv->dv_name));
533
534 dv->dv_dotdot = ddv;
535 if (DVTOV(dv)->v_type == VDIR) {
536 ddv->dv_nlink++; /* .. to containing directory */
537 dv->dv_nlink = 2; /* name + . */
538 } else {
539 dv->dv_nlink = 1; /* name */
540 }
541
542 /* enter node in the avl tree */
543 VERIFY(avl_find(&ddv->dv_entries, dv, &where) == NULL);
544 avl_insert(&ddv->dv_entries, dv, where);
545 }
546
547 /*
548 * Unlink a dv_node from a perent directory
549 */
550 void
dv_unlink(struct dv_node * ddv,struct dv_node * dv)551 dv_unlink(struct dv_node *ddv, struct dv_node *dv)
552 {
553 /* verify linkage of arguments */
554 ASSERT(ddv && dv);
555 ASSERT(dv->dv_dotdot == ddv);
556 ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
557 ASSERT(DVTOV(ddv)->v_type == VDIR);
558
559 dcmn_err3(("dv_unlink: %s\n", dv->dv_name));
560
561 if (DVTOV(dv)->v_type == VDIR) {
562 ddv->dv_nlink--; /* .. to containing directory */
563 dv->dv_nlink -= 2; /* name + . */
564 } else {
565 dv->dv_nlink -= 1; /* name */
566 }
567 ASSERT(ddv->dv_nlink >= 2);
568 ASSERT(dv->dv_nlink == 0);
569
570 dv->dv_dotdot = NULL;
571
572 /* remove from avl tree */
573 avl_remove(&ddv->dv_entries, dv);
574 }
575
576 /*
577 * Merge devfs node specific information into an attribute structure.
578 *
579 * NOTE: specfs provides ATIME,MTIME,CTIME,SIZE,BLKSIZE,NBLOCKS on leaf node.
580 */
581 void
dv_vattr_merge(struct dv_node * dv,struct vattr * vap)582 dv_vattr_merge(struct dv_node *dv, struct vattr *vap)
583 {
584 struct vnode *vp = DVTOV(dv);
585
586 vap->va_nodeid = dv->dv_ino;
587 vap->va_nlink = dv->dv_nlink;
588
589 if (vp->v_type == VDIR) {
590 vap->va_rdev = 0;
591 vap->va_fsid = vp->v_rdev;
592 } else {
593 vap->va_rdev = vp->v_rdev;
594 vap->va_fsid = DVTOV(dv->dv_dotdot)->v_rdev;
595 vap->va_type = vp->v_type;
596 /* don't trust the shadow file type */
597 vap->va_mode &= ~S_IFMT;
598 if (vap->va_type == VCHR)
599 vap->va_mode |= S_IFCHR;
600 else
601 vap->va_mode |= S_IFBLK;
602 }
603 }
604
605 /*
606 * Get default device permission by consulting rules in
607 * privilege specification in minor node and /etc/minor_perm.
608 *
609 * This function is called from the devname filesystem to get default
610 * permissions for a device exported to a non-global zone.
611 */
612 void
devfs_get_defattr(struct vnode * vp,struct vattr * vap,int * no_fs_perm)613 devfs_get_defattr(struct vnode *vp, struct vattr *vap, int *no_fs_perm)
614 {
615 mperm_t mp;
616 struct dv_node *dv;
617
618 /* If vp isn't a dv_node, return something sensible */
619 if (!vn_matchops(vp, dv_vnodeops)) {
620 if (no_fs_perm)
621 *no_fs_perm = 0;
622 *vap = dv_vattr_file;
623 return;
624 }
625
626 /*
627 * For minors not created by ddi_create_priv_minor_node(),
628 * use devfs defaults.
629 */
630 dv = VTODV(vp);
631 if (vp->v_type == VDIR) {
632 *vap = dv_vattr_dir;
633 } else if (dv->dv_flags & DV_NO_FSPERM) {
634 if (no_fs_perm)
635 *no_fs_perm = 1;
636 *vap = dv_vattr_priv;
637 } else {
638 /*
639 * look up perm bits from minor_perm
640 */
641 *vap = dv_vattr_file;
642 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) == 0) {
643 VATTR_MP_MERGE((*vap), mp);
644 dcmn_err5(("%s: minor perm mode 0%o\n",
645 dv->dv_name, vap->va_mode));
646 } else if (dv->dv_flags & DV_DFLT_MODE) {
647 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0);
648 vap->va_mode &= ~S_IAMB;
649 vap->va_mode |= dv->dv_dflt_mode;
650 dcmn_err5(("%s: priv mode 0%o\n",
651 dv->dv_name, vap->va_mode));
652 }
653 }
654 }
655
656 /*
657 * dv_shadow_node
658 *
659 * Given a VDIR dv_node, find/create the associated VDIR
660 * node in the shadow attribute filesystem.
661 *
662 * Given a VCHR/VBLK dv_node, find the associated VREG
663 * node in the shadow attribute filesystem. These nodes
664 * are only created to persist non-default attributes.
665 * Lack of such a node implies the default permissions
666 * are sufficient.
667 *
668 * Managing the attribute file entries is slightly tricky (mostly
669 * because we can't intercept VN_HOLD and VN_RELE except on the last
670 * release).
671 *
672 * We assert that if the dv_attrvp pointer is non-NULL, it points
673 * to a singly-held (by us) vnode that represents the shadow entry
674 * in the underlying filesystem. To avoid store-ordering issues,
675 * we assert that the pointer can only be tested under the dv_contents
676 * READERS lock.
677 */
678
679 void
dv_shadow_node(struct vnode * dvp,char * nm,struct vnode * vp,struct pathname * pnp,struct vnode * rdir,struct cred * cred,int flags)680 dv_shadow_node(
681 struct vnode *dvp, /* devfs parent directory vnode */
682 char *nm, /* name component */
683 struct vnode *vp, /* devfs vnode */
684 struct pathname *pnp, /* the path .. */
685 struct vnode *rdir, /* the root .. */
686 struct cred *cred, /* who's asking? */
687 int flags) /* optionally create shadow node */
688 {
689 struct dv_node *dv; /* dv_node of named directory */
690 struct vnode *rdvp; /* shadow parent directory vnode */
691 struct vnode *rvp; /* shadow vnode */
692 struct vnode *rrvp; /* realvp of shadow vnode */
693 struct vattr vattr;
694 int create_tried;
695 int error;
696
697 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK);
698 dv = VTODV(vp);
699 dcmn_err3(("dv_shadow_node: name %s attr %p\n",
700 nm, (void *)dv->dv_attrvp));
701
702 if ((flags & DV_SHADOW_WRITE_HELD) == 0) {
703 ASSERT(RW_READ_HELD(&dv->dv_contents));
704 if (dv->dv_attrvp != NULLVP)
705 return;
706 if (!rw_tryupgrade(&dv->dv_contents)) {
707 rw_exit(&dv->dv_contents);
708 rw_enter(&dv->dv_contents, RW_WRITER);
709 if (dv->dv_attrvp != NULLVP) {
710 rw_downgrade(&dv->dv_contents);
711 return;
712 }
713 }
714 } else {
715 ASSERT(RW_WRITE_HELD(&dv->dv_contents));
716 if (dv->dv_attrvp != NULLVP)
717 return;
718 }
719
720 ASSERT(RW_WRITE_HELD(&dv->dv_contents) && dv->dv_attrvp == NULL);
721
722 rdvp = VTODV(dvp)->dv_attrvp;
723 create_tried = 0;
724 lookup:
725 if (rdvp && (dv->dv_flags & DV_NO_FSPERM) == 0) {
726 error = VOP_LOOKUP(rdvp, nm, &rvp, pnp, LOOKUP_DIR, rdir, cred,
727 NULL, NULL, NULL);
728
729 /* factor out the snode since we only want the attribute node */
730 if ((error == 0) && (VOP_REALVP(rvp, &rrvp, NULL) == 0)) {
731 VN_HOLD(rrvp);
732 VN_RELE(rvp);
733 rvp = rrvp;
734 }
735 } else
736 error = EROFS; /* no parent, no entry */
737
738 /*
739 * All we want is the permissions (and maybe ACLs and
740 * extended attributes), and we want to perform lookups
741 * by name. Drivers occasionally change their minor
742 * number space. If something changes, there's no
743 * much we can do about it here.
744 */
745
746 /* The shadow node checks out. We are done */
747 if (error == 0) {
748 dv->dv_attrvp = rvp; /* with one hold */
749
750 /*
751 * Determine if we have non-trivial ACLs on this node.
752 * It is not necessary to VOP_RWLOCK since fs_acl_nontrivial
753 * only does VOP_GETSECATTR.
754 */
755 dv->dv_flags &= ~DV_ACL;
756
757 if (fs_acl_nontrivial(rvp, cred))
758 dv->dv_flags |= DV_ACL;
759
760 /*
761 * If we have synced out the memory attributes, free
762 * them and switch back to using the persistent store.
763 */
764 if (rvp && dv->dv_attr) {
765 kmem_free(dv->dv_attr, sizeof (struct vattr));
766 dv->dv_attr = NULL;
767 }
768 if ((flags & DV_SHADOW_WRITE_HELD) == 0)
769 rw_downgrade(&dv->dv_contents);
770 ASSERT(RW_LOCK_HELD(&dv->dv_contents));
771 return;
772 }
773
774 /*
775 * Failed to find attribute in persistent backing store,
776 * get default permission bits.
777 */
778 devfs_get_defattr(vp, &vattr, NULL);
779
780 dv_vattr_merge(dv, &vattr);
781 gethrestime(&vattr.va_atime);
782 vattr.va_mtime = vattr.va_atime;
783 vattr.va_ctime = vattr.va_atime;
784
785 /*
786 * Try to create shadow dir. This is necessary in case
787 * we need to create a shadow leaf node later, when user
788 * executes chmod.
789 */
790 if ((error == ENOENT) && !create_tried) {
791 switch (vp->v_type) {
792 case VDIR:
793 error = VOP_MKDIR(rdvp, nm, &vattr, &rvp, kcred,
794 NULL, 0, NULL);
795 dsysdebug(error, ("vop_mkdir %s %s %d\n",
796 VTODV(dvp)->dv_name, nm, error));
797 create_tried = 1;
798 break;
799
800 case VCHR:
801 case VBLK:
802 /*
803 * Shadow nodes are only created on demand
804 */
805 if (flags & DV_SHADOW_CREATE) {
806 error = VOP_CREATE(rdvp, nm, &vattr, NONEXCL,
807 VREAD|VWRITE, &rvp, kcred, 0, NULL, NULL);
808 dsysdebug(error, ("vop_create %s %s %d\n",
809 VTODV(dvp)->dv_name, nm, error));
810 create_tried = 1;
811 }
812 break;
813
814 default:
815 cmn_err(CE_PANIC, "devfs: %s: create", dvnm);
816 /*NOTREACHED*/
817 }
818
819 if (create_tried &&
820 (error == 0) || (error == EEXIST)) {
821 VN_RELE(rvp);
822 goto lookup;
823 }
824 }
825
826 /* Store attribute in memory */
827 if (dv->dv_attr == NULL) {
828 dv->dv_attr = kmem_alloc(sizeof (struct vattr), KM_SLEEP);
829 *(dv->dv_attr) = vattr;
830 }
831
832 if ((flags & DV_SHADOW_WRITE_HELD) == 0)
833 rw_downgrade(&dv->dv_contents);
834 ASSERT(RW_LOCK_HELD(&dv->dv_contents));
835 }
836
837 /*
838 * Given a devinfo node, and a name, returns the appropriate
839 * minor information for that named node, if it exists.
840 */
841 static int
dv_find_leafnode(dev_info_t * devi,char * minor_nm,struct ddi_minor_data * r_mi)842 dv_find_leafnode(dev_info_t *devi, char *minor_nm, struct ddi_minor_data *r_mi)
843 {
844 struct ddi_minor_data *dmd;
845
846 ASSERT(i_ddi_devi_attached(devi));
847
848 dcmn_err3(("dv_find_leafnode: %s\n", minor_nm));
849 ASSERT(DEVI_BUSY_OWNED(devi));
850 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) {
851
852 /*
853 * Skip alias nodes and nodes without a name.
854 */
855 if ((dmd->type == DDM_ALIAS) || (dmd->ddm_name == NULL))
856 continue;
857
858 dcmn_err4(("dv_find_leafnode: (%s,%s)\n",
859 minor_nm, dmd->ddm_name));
860 if (strcmp(minor_nm, dmd->ddm_name) == 0) {
861 r_mi->ddm_dev = dmd->ddm_dev;
862 r_mi->ddm_spec_type = dmd->ddm_spec_type;
863 r_mi->type = dmd->type;
864 r_mi->ddm_flags = dmd->ddm_flags;
865 r_mi->ddm_node_priv = dmd->ddm_node_priv;
866 r_mi->ddm_priv_mode = dmd->ddm_priv_mode;
867 if (r_mi->ddm_node_priv)
868 dphold(r_mi->ddm_node_priv);
869 return (0);
870 }
871 }
872
873 dcmn_err3(("dv_find_leafnode: %s: ENOENT\n", minor_nm));
874 return (ENOENT);
875 }
876
877 /*
878 * Special handling for clone node:
879 * Clone minor name is a driver name, the minor number will
880 * be the major number of the driver. There is no minor
881 * node under the clone driver, so we'll manufacture the
882 * dev_t.
883 */
884 static struct dv_node *
dv_clone_mknod(struct dv_node * ddv,char * drvname)885 dv_clone_mknod(struct dv_node *ddv, char *drvname)
886 {
887 major_t major;
888 struct dv_node *dvp;
889 char *devnm;
890 struct ddi_minor_data *dmd;
891
892 /*
893 * Make sure drvname is a STREAMS driver. We load the driver,
894 * but don't attach to any instances. This makes stat(2)
895 * relatively cheap.
896 */
897 major = ddi_name_to_major(drvname);
898 if (major == DDI_MAJOR_T_NONE)
899 return (NULL);
900
901 if (ddi_hold_driver(major) == NULL)
902 return (NULL);
903
904 if (STREAMSTAB(major) == NULL) {
905 ddi_rele_driver(major);
906 return (NULL);
907 }
908
909 ddi_rele_driver(major);
910 devnm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
911 (void) snprintf(devnm, MAXNAMELEN, "clone@0:%s", drvname);
912 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP);
913 dmd->ddm_dev = makedevice(clone_major, (minor_t)major);
914 dmd->ddm_spec_type = S_IFCHR;
915 dvp = dv_mknod(ddv, clone_dip, devnm, dmd);
916 kmem_free(dmd, sizeof (*dmd));
917 kmem_free(devnm, MAXNAMELEN);
918 return (dvp);
919 }
920
921 /*
922 * Given the parent directory node, and a name in it, returns the
923 * named dv_node to the caller (as a vnode).
924 *
925 * (We need pnp and rdir for doing shadow lookups; they can be NULL)
926 */
927 int
dv_find(struct dv_node * ddv,char * nm,struct vnode ** vpp,struct pathname * pnp,struct vnode * rdir,struct cred * cred,uint_t ndi_flags)928 dv_find(struct dv_node *ddv, char *nm, struct vnode **vpp, struct pathname *pnp,
929 struct vnode *rdir, struct cred *cred, uint_t ndi_flags)
930 {
931 extern int isminiroot; /* see modctl.c */
932
933 int rv = 0, was_busy = 0, nmlen, write_held = 0;
934 struct vnode *vp;
935 struct dv_node *dv, *dup;
936 dev_info_t *pdevi, *devi = NULL;
937 char *mnm;
938 struct ddi_minor_data *dmd;
939
940 dcmn_err3(("dv_find %s\n", nm));
941
942 if (!rw_tryenter(&ddv->dv_contents, RW_READER)) {
943 if (tsd_get(devfs_clean_key))
944 return (EBUSY);
945 rw_enter(&ddv->dv_contents, RW_READER);
946 }
947 start:
948 if (DV_STALE(ddv)) {
949 rw_exit(&ddv->dv_contents);
950 return (ESTALE);
951 }
952
953 /*
954 * Empty name or ., return node itself.
955 */
956 nmlen = strlen(nm);
957 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
958 *vpp = DVTOV(ddv);
959 rw_exit(&ddv->dv_contents);
960 VN_HOLD(*vpp);
961 return (0);
962 }
963
964 /*
965 * .., return the parent directory
966 */
967 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
968 *vpp = DVTOV(ddv->dv_dotdot);
969 rw_exit(&ddv->dv_contents);
970 VN_HOLD(*vpp);
971 return (0);
972 }
973
974 /*
975 * Fail anything without a valid device name component
976 */
977 if (nm[0] == '@' || nm[0] == ':') {
978 dcmn_err3(("devfs: no driver '%s'\n", nm));
979 rw_exit(&ddv->dv_contents);
980 return (ENOENT);
981 }
982
983 /*
984 * So, now we have to deal with the trickier stuff.
985 *
986 * (a) search the existing list of dv_nodes on this directory
987 */
988 if ((dv = dv_findbyname(ddv, nm)) != NULL) {
989 founddv:
990 ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
991
992 if (!rw_tryenter(&dv->dv_contents, RW_READER)) {
993 if (tsd_get(devfs_clean_key)) {
994 VN_RELE(DVTOV(dv));
995 rw_exit(&ddv->dv_contents);
996 return (EBUSY);
997 }
998 rw_enter(&dv->dv_contents, RW_READER);
999 }
1000
1001 vp = DVTOV(dv);
1002 if ((dv->dv_attrvp != NULLVP) ||
1003 (vp->v_type != VDIR && dv->dv_attr != NULL)) {
1004 /*
1005 * Common case - we already have attributes
1006 */
1007 rw_exit(&dv->dv_contents);
1008 rw_exit(&ddv->dv_contents);
1009 goto found;
1010 }
1011
1012 /*
1013 * No attribute vp, try and build one.
1014 *
1015 * dv_shadow_node() can briefly drop &dv->dv_contents lock
1016 * if it is unable to upgrade it to a write lock. If the
1017 * current thread has come in through the bottom-up device
1018 * configuration devfs_clean() path, we may deadlock against
1019 * a thread performing top-down device configuration if it
1020 * grabs the contents lock. To avoid this, when we are on the
1021 * devfs_clean() path we attempt to upgrade the dv_contents
1022 * lock before we call dv_shadow_node().
1023 */
1024 if (tsd_get(devfs_clean_key)) {
1025 if (!rw_tryupgrade(&dv->dv_contents)) {
1026 VN_RELE(DVTOV(dv));
1027 rw_exit(&dv->dv_contents);
1028 rw_exit(&ddv->dv_contents);
1029 return (EBUSY);
1030 }
1031
1032 write_held = DV_SHADOW_WRITE_HELD;
1033 }
1034
1035 dv_shadow_node(DVTOV(ddv), nm, vp, pnp, rdir, cred,
1036 write_held);
1037
1038 rw_exit(&dv->dv_contents);
1039 rw_exit(&ddv->dv_contents);
1040 goto found;
1041 }
1042
1043 /*
1044 * (b) Search the child devinfo nodes of our parent directory,
1045 * looking for the named node. If we find it, build a new
1046 * node, then grab the writers lock, search the directory
1047 * if it's still not there, then insert it.
1048 *
1049 * We drop the devfs locks before accessing the device tree.
1050 * Take care to mark the node BUSY so that a forced devfs_clean
1051 * doesn't mark the directory node stale.
1052 *
1053 * Also, check if we are called as part of devfs_clean or
1054 * reset_perm. If so, simply return not found because there
1055 * is nothing to clean.
1056 */
1057 if (tsd_get(devfs_clean_key)) {
1058 rw_exit(&ddv->dv_contents);
1059 return (ENOENT);
1060 }
1061
1062 /*
1063 * We could be either READ or WRITE locked at
1064 * this point. Upgrade if we are read locked.
1065 */
1066 ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
1067 if (rw_read_locked(&ddv->dv_contents) &&
1068 !rw_tryupgrade(&ddv->dv_contents)) {
1069 rw_exit(&ddv->dv_contents);
1070 rw_enter(&ddv->dv_contents, RW_WRITER);
1071 /*
1072 * Things may have changed when we dropped
1073 * the contents lock, so start from top again
1074 */
1075 goto start;
1076 }
1077 ddv->dv_busy++; /* mark busy before dropping lock */
1078 was_busy++;
1079 rw_exit(&ddv->dv_contents);
1080
1081 pdevi = ddv->dv_devi;
1082 ASSERT(pdevi != NULL);
1083
1084 mnm = strchr(nm, ':');
1085 if (mnm)
1086 *mnm = (char)0;
1087
1088 /*
1089 * Configure one nexus child, will call nexus's bus_ops
1090 * If successful, devi is held upon returning.
1091 * Note: devfs lookup should not be configuring grandchildren.
1092 */
1093 ASSERT((ndi_flags & NDI_CONFIG) == 0);
1094
1095 rv = ndi_devi_config_one(pdevi, nm, &devi, ndi_flags | NDI_NO_EVENT);
1096 if (mnm)
1097 *mnm = ':';
1098 if (rv != NDI_SUCCESS) {
1099 rv = ENOENT;
1100 goto notfound;
1101 }
1102
1103 ASSERT(devi);
1104
1105 /* Check if this is a path alias */
1106 if (ddi_aliases_present == B_TRUE && ddi_get_parent(devi) != pdevi) {
1107 char *curr = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1108
1109 (void) ddi_pathname(devi, curr);
1110
1111 vp = NULL;
1112 if (devfs_lookupname(curr, NULL, &vp) == 0 && vp) {
1113 dv = VTODV(vp);
1114 kmem_free(curr, MAXPATHLEN);
1115 goto found;
1116 }
1117 kmem_free(curr, MAXPATHLEN);
1118 }
1119
1120 /*
1121 * If we configured a hidden node, consider it notfound.
1122 */
1123 if (ndi_dev_is_hidden_node(devi)) {
1124 ndi_rele_devi(devi);
1125 rv = ENOENT;
1126 goto notfound;
1127 }
1128
1129 /*
1130 * Don't make vhci clients visible under phci, unless we
1131 * are in miniroot.
1132 */
1133 if (isminiroot == 0 && ddi_get_parent(devi) != pdevi) {
1134 ndi_rele_devi(devi);
1135 rv = ENOENT;
1136 goto notfound;
1137 }
1138
1139 ASSERT(devi && i_ddi_devi_attached(devi));
1140
1141 /*
1142 * Invalidate cache to notice newly created minor nodes.
1143 */
1144 rw_enter(&ddv->dv_contents, RW_WRITER);
1145 ddv->dv_flags |= DV_BUILD;
1146 rw_exit(&ddv->dv_contents);
1147
1148 /*
1149 * mkdir for nexus drivers and leaf nodes as well. If we are racing
1150 * and create a duplicate, the duplicate will be destroyed below.
1151 */
1152 if (mnm == NULL) {
1153 dv = dv_mkdir(ddv, devi, nm);
1154 } else {
1155 /*
1156 * Allocate dmd first to avoid KM_SLEEP with active
1157 * ndi_devi_enter.
1158 */
1159 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP);
1160 ndi_devi_enter(devi);
1161 if (devi == clone_dip) {
1162 /*
1163 * For clone minors, load the driver indicated by
1164 * minor name.
1165 */
1166 dv = dv_clone_mknod(ddv, mnm + 1);
1167 } else {
1168 /*
1169 * Find minor node and make a dv_node
1170 */
1171 if (dv_find_leafnode(devi, mnm + 1, dmd) == 0) {
1172 dv = dv_mknod(ddv, devi, nm, dmd);
1173 if (dmd->ddm_node_priv)
1174 dpfree(dmd->ddm_node_priv);
1175 }
1176 }
1177 ndi_devi_exit(devi);
1178 kmem_free(dmd, sizeof (*dmd));
1179 }
1180 /*
1181 * Release hold from ndi_devi_config_one()
1182 */
1183 ndi_rele_devi(devi);
1184
1185 if (dv == NULL) {
1186 rv = ENOENT;
1187 goto notfound;
1188 }
1189
1190 /*
1191 * We have released the dv_contents lock, need to check
1192 * if another thread already created a duplicate node
1193 */
1194 rw_enter(&ddv->dv_contents, RW_WRITER);
1195 if ((dup = dv_findbyname(ddv, nm)) == NULL) {
1196 dv_insert(ddv, dv);
1197 } else {
1198 /*
1199 * Duplicate found, use the existing node
1200 */
1201 VN_RELE(DVTOV(dv));
1202 dv_destroy(dv, 0);
1203 dv = dup;
1204 }
1205 goto founddv;
1206 /*NOTREACHED*/
1207
1208 found:
1209 /*
1210 * Fail lookup of device that has now become hidden (typically via
1211 * hot removal of open device).
1212 */
1213 if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi)) {
1214 dcmn_err2(("dv_find: nm %s failed: hidden/removed\n", nm));
1215 VN_RELE(vp);
1216 rv = ENOENT;
1217 goto notfound;
1218 }
1219
1220 /*
1221 * Skip non-kernel lookups of internal nodes.
1222 * This use of kcred to distinguish between user and
1223 * internal kernel lookups is unfortunate. The information
1224 * provided by the seg argument to lookupnameat should
1225 * evolve into a lookup flag for filesystems that need
1226 * this distinction.
1227 */
1228 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) {
1229 dcmn_err2(("dv_find: nm %s failed: internal\n", nm));
1230 VN_RELE(vp);
1231 rv = ENOENT;
1232 goto notfound;
1233 }
1234
1235 dcmn_err2(("dv_find: returning vp for nm %s\n", nm));
1236 if (vp->v_type == VCHR || vp->v_type == VBLK) {
1237 /*
1238 * If vnode is a device, return special vnode instead
1239 * (though it knows all about -us- via sp->s_realvp,
1240 * sp->s_devvp, and sp->s_dip)
1241 */
1242 *vpp = specvp_devfs(vp, vp->v_rdev, vp->v_type, cred,
1243 dv->dv_devi);
1244 VN_RELE(vp);
1245 if (*vpp == NULLVP)
1246 rv = ENOSYS;
1247 } else
1248 *vpp = vp;
1249
1250 notfound:
1251 if (was_busy) {
1252 /*
1253 * Non-zero was_busy tells us that we are not in the
1254 * devfs_clean() path which in turn means that we can afford
1255 * to take the contents lock unconditionally.
1256 */
1257 rw_enter(&ddv->dv_contents, RW_WRITER);
1258 ddv->dv_busy--;
1259 rw_exit(&ddv->dv_contents);
1260 }
1261 return (rv);
1262 }
1263
1264 /*
1265 * The given directory node is out-of-date; that is, it has been
1266 * marked as needing to be rebuilt, possibly because some new devinfo
1267 * node has come into existence, or possibly because this is the first
1268 * time we've been here.
1269 */
1270 void
dv_filldir(struct dv_node * ddv)1271 dv_filldir(struct dv_node *ddv)
1272 {
1273 struct dv_node *dv;
1274 dev_info_t *devi, *pdevi;
1275 struct ddi_minor_data *dmd;
1276 char devnm[MAXNAMELEN];
1277
1278 ASSERT(DVTOV(ddv)->v_type == VDIR);
1279 ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
1280 ASSERT(ddv->dv_flags & DV_BUILD);
1281
1282 dcmn_err3(("dv_filldir: %s\n", ddv->dv_name));
1283 if (DV_STALE(ddv))
1284 return;
1285 pdevi = ddv->dv_devi;
1286
1287 if (ndi_devi_config(pdevi, NDI_NO_EVENT) != NDI_SUCCESS) {
1288 dcmn_err3(("dv_filldir: config error %s\n", ddv->dv_name));
1289 }
1290
1291 ndi_devi_enter(pdevi);
1292 for (devi = ddi_get_child(pdevi); devi;
1293 devi = ddi_get_next_sibling(devi)) {
1294 /*
1295 * While we know enough to create a directory at DS_INITIALIZED,
1296 * the directory will be empty until DS_ATTACHED. The existence
1297 * of an empty directory dv_node will cause a devi_ref, which
1298 * has caused problems for existing code paths doing offline/DR
1299 * type operations - making devfs_clean coordination even more
1300 * sensitive and error prone. Given this, the 'continue' below
1301 * is checking for DS_ATTACHED instead of DS_INITIALIZED.
1302 */
1303 if (i_ddi_node_state(devi) < DS_ATTACHED)
1304 continue;
1305
1306 /* skip hidden nodes */
1307 if (ndi_dev_is_hidden_node(devi))
1308 continue;
1309
1310 dcmn_err3(("dv_filldir: node %s\n", ddi_node_name(devi)));
1311
1312 ndi_devi_enter(devi);
1313 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) {
1314 char *addr;
1315
1316 /*
1317 * Skip alias nodes, internal nodes, and nodes
1318 * without a name. We allow DDM_DEFAULT nodes
1319 * to appear in readdir.
1320 */
1321 if ((dmd->type == DDM_ALIAS) ||
1322 (dmd->type == DDM_INTERNAL_PATH) ||
1323 (dmd->ddm_name == NULL))
1324 continue;
1325
1326 addr = ddi_get_name_addr(devi);
1327 if (addr && *addr)
1328 (void) sprintf(devnm, "%s@%s:%s",
1329 ddi_node_name(devi), addr, dmd->ddm_name);
1330 else
1331 (void) sprintf(devnm, "%s:%s",
1332 ddi_node_name(devi), dmd->ddm_name);
1333
1334 if ((dv = dv_findbyname(ddv, devnm)) != NULL) {
1335 /* dv_node already exists */
1336 VN_RELE(DVTOV(dv));
1337 continue;
1338 }
1339
1340 dv = dv_mknod(ddv, devi, devnm, dmd);
1341 dv_insert(ddv, dv);
1342 VN_RELE(DVTOV(dv));
1343 }
1344 ndi_devi_exit(devi);
1345
1346 (void) ddi_deviname(devi, devnm);
1347 if ((dv = dv_findbyname(ddv, devnm + 1)) == NULL) {
1348 /* directory doesn't exist */
1349 dv = dv_mkdir(ddv, devi, devnm + 1);
1350 dv_insert(ddv, dv);
1351 }
1352 VN_RELE(DVTOV(dv));
1353 }
1354 ndi_devi_exit(pdevi);
1355
1356 ddv->dv_flags &= ~DV_BUILD;
1357 }
1358
1359 /*
1360 * Given a directory node, clean out all the nodes beneath.
1361 *
1362 * VDIR: Reinvoke to clean them, then delete the directory.
1363 * VCHR, VBLK: Just blow them away.
1364 *
1365 * Mark the directories touched as in need of a rebuild, in case
1366 * we fall over part way through. When DV_CLEAN_FORCE is specified,
1367 * we mark referenced empty directories as stale to facilitate DR.
1368 */
1369 int
dv_cleandir(struct dv_node * ddv,char * devnm,uint_t flags)1370 dv_cleandir(struct dv_node *ddv, char *devnm, uint_t flags)
1371 {
1372 struct dv_node *dv;
1373 struct dv_node *next;
1374 struct vnode *vp;
1375 int busy = 0;
1376
1377 /*
1378 * We should always be holding the tsd_clean_key here: dv_cleandir()
1379 * will be called as a result of a devfs_clean request and the
1380 * tsd_clean_key will be set in either in devfs_clean() itself or in
1381 * devfs_clean_vhci().
1382 *
1383 * Since we are on the devfs_clean path, we return EBUSY if we cannot
1384 * get the contents lock: if we blocked here we might deadlock against
1385 * a thread performing top-down device configuration.
1386 */
1387 ASSERT(tsd_get(devfs_clean_key));
1388
1389 dcmn_err3(("dv_cleandir: %s\n", ddv->dv_name));
1390
1391 if (!(flags & DV_CLEANDIR_LCK) &&
1392 !rw_tryenter(&ddv->dv_contents, RW_WRITER))
1393 return (EBUSY);
1394
1395 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = next) {
1396 next = DV_NEXT_ENTRY(ddv, dv);
1397
1398 /*
1399 * If devnm is specified, the non-minor portion of the
1400 * name must match devnm.
1401 */
1402 if (devnm &&
1403 (strncmp(devnm, dv->dv_name, strlen(devnm)) ||
1404 (dv->dv_name[strlen(devnm)] != ':' &&
1405 dv->dv_name[strlen(devnm)] != '\0')))
1406 continue;
1407
1408 /* check type of what we are cleaning */
1409 vp = DVTOV(dv);
1410 if (vp->v_type == VDIR) {
1411 /* recurse on directories */
1412 rw_enter(&dv->dv_contents, RW_WRITER);
1413 if (dv_cleandir(dv, NULL,
1414 flags | DV_CLEANDIR_LCK) == EBUSY) {
1415 rw_exit(&dv->dv_contents);
1416 goto set_busy;
1417 }
1418
1419 /* A clean directory is an empty directory... */
1420 ASSERT(dv->dv_nlink == 2);
1421 mutex_enter(&vp->v_lock);
1422 if (vp->v_count > 0) {
1423 /*
1424 * ... but an empty directory can still have
1425 * references to it. If we have dv_busy or
1426 * DV_CLEAN_FORCE is *not* specified then a
1427 * referenced directory is considered busy.
1428 */
1429 if (dv->dv_busy || !(flags & DV_CLEAN_FORCE)) {
1430 mutex_exit(&vp->v_lock);
1431 rw_exit(&dv->dv_contents);
1432 goto set_busy;
1433 }
1434
1435 /*
1436 * Mark referenced directory stale so that DR
1437 * will succeed even if a shell has
1438 * /devices/xxx as current directory (causing
1439 * VN_HOLD reference to an empty directory).
1440 */
1441 ASSERT(!DV_STALE(dv));
1442 ndi_rele_devi(dv->dv_devi);
1443 dv->dv_devi = NULL; /* mark DV_STALE */
1444 }
1445 } else {
1446 ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK));
1447 ASSERT(dv->dv_nlink == 1); /* no hard links */
1448 mutex_enter(&vp->v_lock);
1449 if (vp->v_count > 0) {
1450 mutex_exit(&vp->v_lock);
1451 goto set_busy;
1452 }
1453 }
1454
1455 /* unlink from directory */
1456 dv_unlink(ddv, dv);
1457
1458 /* drop locks */
1459 mutex_exit(&vp->v_lock);
1460 if (vp->v_type == VDIR)
1461 rw_exit(&dv->dv_contents);
1462
1463 /* destroy vnode if ref count is zero */
1464 if (vp->v_count == 0)
1465 dv_destroy(dv, flags);
1466
1467 continue;
1468
1469 /*
1470 * If devnm is not NULL we return immediately on busy,
1471 * otherwise we continue destroying unused dv_node's.
1472 */
1473 set_busy: busy++;
1474 if (devnm)
1475 break;
1476 }
1477
1478 /*
1479 * This code may be invoked to inform devfs that a new node has
1480 * been created in the kernel device tree. So we always set
1481 * the DV_BUILD flag to allow the next dv_filldir() to pick
1482 * the new devinfo nodes.
1483 */
1484 ddv->dv_flags |= DV_BUILD;
1485
1486 if (!(flags & DV_CLEANDIR_LCK))
1487 rw_exit(&ddv->dv_contents);
1488
1489 return (busy ? EBUSY : 0);
1490 }
1491
1492 /*
1493 * Walk through the devfs hierarchy, correcting the permissions of
1494 * devices with default permissions that do not match those specified
1495 * by minor perm. This can only be done for all drivers for now.
1496 */
1497 static int
dv_reset_perm_dir(struct dv_node * ddv,uint_t flags)1498 dv_reset_perm_dir(struct dv_node *ddv, uint_t flags)
1499 {
1500 struct dv_node *dv;
1501 struct vnode *vp;
1502 int retval = 0;
1503 struct vattr *attrp;
1504 mperm_t mp;
1505 char *nm;
1506 uid_t old_uid;
1507 gid_t old_gid;
1508 mode_t old_mode;
1509
1510 rw_enter(&ddv->dv_contents, RW_WRITER);
1511 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) {
1512 int error = 0;
1513 nm = dv->dv_name;
1514
1515 rw_enter(&dv->dv_contents, RW_READER);
1516 vp = DVTOV(dv);
1517 if (vp->v_type == VDIR) {
1518 rw_exit(&dv->dv_contents);
1519 if (dv_reset_perm_dir(dv, flags) != 0) {
1520 error = EBUSY;
1521 }
1522 } else {
1523 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK);
1524
1525 /*
1526 * Check for permissions from minor_perm
1527 * If there are none, we're done
1528 */
1529 rw_exit(&dv->dv_contents);
1530 if (dev_minorperm(dv->dv_devi, nm, &mp) != 0)
1531 continue;
1532
1533 rw_enter(&dv->dv_contents, RW_READER);
1534
1535 /*
1536 * Allow a node's permissions to be altered
1537 * permanently from the defaults by chmod,
1538 * using the shadow node as backing store.
1539 * Otherwise, update node to minor_perm permissions.
1540 */
1541 if (dv->dv_attrvp == NULLVP) {
1542 /*
1543 * No attribute vp, try to find one.
1544 */
1545 dv_shadow_node(DVTOV(ddv), nm, vp,
1546 NULL, NULLVP, kcred, 0);
1547 }
1548 if (dv->dv_attrvp != NULLVP || dv->dv_attr == NULL) {
1549 rw_exit(&dv->dv_contents);
1550 continue;
1551 }
1552
1553 attrp = dv->dv_attr;
1554
1555 if (VATTRP_MP_CMP(attrp, mp) == 0) {
1556 dcmn_err5(("%s: no perm change: "
1557 "%d %d 0%o\n", nm, attrp->va_uid,
1558 attrp->va_gid, attrp->va_mode));
1559 rw_exit(&dv->dv_contents);
1560 continue;
1561 }
1562
1563 old_uid = attrp->va_uid;
1564 old_gid = attrp->va_gid;
1565 old_mode = attrp->va_mode;
1566
1567 VATTRP_MP_MERGE(attrp, mp);
1568 mutex_enter(&vp->v_lock);
1569 if (vp->v_count > 0) {
1570 error = EBUSY;
1571 }
1572 mutex_exit(&vp->v_lock);
1573
1574 dcmn_err5(("%s: perm %d/%d/0%o -> %d/%d/0%o (%d)\n",
1575 nm, old_uid, old_gid, old_mode, attrp->va_uid,
1576 attrp->va_gid, attrp->va_mode, error));
1577
1578 rw_exit(&dv->dv_contents);
1579 }
1580
1581 if (error != 0) {
1582 retval = error;
1583 }
1584 }
1585
1586 ddv->dv_flags |= DV_BUILD;
1587
1588 rw_exit(&ddv->dv_contents);
1589
1590 return (retval);
1591 }
1592
1593 int
devfs_reset_perm(uint_t flags)1594 devfs_reset_perm(uint_t flags)
1595 {
1596 struct dv_node *dvp;
1597 int rval;
1598
1599 if ((dvp = devfs_dip_to_dvnode(ddi_root_node())) == NULL)
1600 return (0);
1601
1602 VN_HOLD(DVTOV(dvp));
1603 rval = dv_reset_perm_dir(dvp, flags);
1604 VN_RELE(DVTOV(dvp));
1605 return (rval);
1606 }
1607
1608 /*
1609 * Clean up dangling devfs shadow nodes for removed
1610 * drivers so that, in the event the driver is re-added
1611 * to the system, newly created nodes won't incorrectly
1612 * pick up these stale shadow node permissions.
1613 *
1614 * This is accomplished by walking down the pathname
1615 * to the directory, starting at the root's attribute
1616 * node, then removing all minors matching the specified
1617 * node name. Care must be taken to remove all entries
1618 * in a directory before the directory itself, so that
1619 * the clean-up associated with rem_drv'ing a nexus driver
1620 * does not inadvertently result in an inconsistent
1621 * filesystem underlying devfs.
1622 */
1623
1624 static int
devfs_remdrv_rmdir(vnode_t * dirvp,const char * dir,vnode_t * rvp)1625 devfs_remdrv_rmdir(vnode_t *dirvp, const char *dir, vnode_t *rvp)
1626 {
1627 int error;
1628 vnode_t *vp;
1629 int eof;
1630 struct iovec iov;
1631 struct uio uio;
1632 struct dirent64 *dp;
1633 dirent64_t *dbuf;
1634 size_t dlen;
1635 size_t dbuflen;
1636 int ndirents = 64;
1637 char *nm;
1638
1639 VN_HOLD(dirvp);
1640
1641 dlen = ndirents * (sizeof (*dbuf));
1642 dbuf = kmem_alloc(dlen, KM_SLEEP);
1643
1644 uio.uio_iov = &iov;
1645 uio.uio_iovcnt = 1;
1646 uio.uio_segflg = UIO_SYSSPACE;
1647 uio.uio_fmode = 0;
1648 uio.uio_extflg = UIO_COPY_CACHED;
1649 uio.uio_loffset = 0;
1650 uio.uio_llimit = MAXOFFSET_T;
1651
1652 eof = 0;
1653 error = 0;
1654 while (!error && !eof) {
1655 uio.uio_resid = dlen;
1656 iov.iov_base = (char *)dbuf;
1657 iov.iov_len = dlen;
1658
1659 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1660 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1661 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1662
1663 dbuflen = dlen - uio.uio_resid;
1664
1665 if (error || dbuflen == 0)
1666 break;
1667
1668 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
1669 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1670
1671 nm = dp->d_name;
1672
1673 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
1674 continue;
1675
1676 error = VOP_LOOKUP(dirvp, nm,
1677 &vp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
1678
1679 dsysdebug(error,
1680 ("rem_drv %s/%s lookup (%d)\n",
1681 dir, nm, error));
1682
1683 if (error)
1684 continue;
1685
1686 ASSERT(vp->v_type == VDIR ||
1687 vp->v_type == VCHR || vp->v_type == VBLK);
1688
1689 if (vp->v_type == VDIR) {
1690 error = devfs_remdrv_rmdir(vp, nm, rvp);
1691 if (error == 0) {
1692 error = VOP_RMDIR(dirvp,
1693 (char *)nm, rvp, kcred, NULL, 0);
1694 dsysdebug(error,
1695 ("rem_drv %s/%s rmdir (%d)\n",
1696 dir, nm, error));
1697 }
1698 } else {
1699 error = VOP_REMOVE(dirvp, (char *)nm, kcred,
1700 NULL, 0);
1701 dsysdebug(error,
1702 ("rem_drv %s/%s remove (%d)\n",
1703 dir, nm, error));
1704 }
1705
1706 VN_RELE(vp);
1707 if (error) {
1708 goto exit;
1709 }
1710 }
1711 }
1712
1713 exit:
1714 VN_RELE(dirvp);
1715 kmem_free(dbuf, dlen);
1716
1717 return (error);
1718 }
1719
1720 int
devfs_remdrv_cleanup(const char * dir,const char * nodename)1721 devfs_remdrv_cleanup(const char *dir, const char *nodename)
1722 {
1723 int error;
1724 vnode_t *vp;
1725 vnode_t *dirvp;
1726 int eof;
1727 struct iovec iov;
1728 struct uio uio;
1729 struct dirent64 *dp;
1730 dirent64_t *dbuf;
1731 size_t dlen;
1732 size_t dbuflen;
1733 int ndirents = 64;
1734 int nodenamelen = strlen(nodename);
1735 char *nm;
1736 struct pathname pn;
1737 vnode_t *rvp; /* root node of the underlying attribute fs */
1738
1739 dcmn_err5(("devfs_remdrv_cleanup: %s %s\n", dir, nodename));
1740
1741 if (error = pn_get((char *)dir, UIO_SYSSPACE, &pn))
1742 return (0);
1743
1744 rvp = dvroot->dv_attrvp;
1745 ASSERT(rvp != NULL);
1746 VN_HOLD(rvp);
1747
1748 pn_skipslash(&pn);
1749 dirvp = rvp;
1750 VN_HOLD(dirvp);
1751
1752 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1753
1754 while (pn_pathleft(&pn)) {
1755 ASSERT(dirvp->v_type == VDIR);
1756 (void) pn_getcomponent(&pn, nm);
1757 ASSERT((strcmp(nm, ".") != 0) && (strcmp(nm, "..") != 0));
1758 error = VOP_LOOKUP(dirvp, nm, &vp, NULL, 0, rvp, kcred,
1759 NULL, NULL, NULL);
1760 if (error) {
1761 dcmn_err5(("remdrv_cleanup %s lookup error %d\n",
1762 nm, error));
1763 VN_RELE(dirvp);
1764 if (dirvp != rvp)
1765 VN_RELE(rvp);
1766 pn_free(&pn);
1767 kmem_free(nm, MAXNAMELEN);
1768 return (0);
1769 }
1770 VN_RELE(dirvp);
1771 dirvp = vp;
1772 pn_skipslash(&pn);
1773 }
1774
1775 ASSERT(dirvp->v_type == VDIR);
1776 if (dirvp != rvp)
1777 VN_RELE(rvp);
1778 pn_free(&pn);
1779 kmem_free(nm, MAXNAMELEN);
1780
1781 dlen = ndirents * (sizeof (*dbuf));
1782 dbuf = kmem_alloc(dlen, KM_SLEEP);
1783
1784 uio.uio_iov = &iov;
1785 uio.uio_iovcnt = 1;
1786 uio.uio_segflg = UIO_SYSSPACE;
1787 uio.uio_fmode = 0;
1788 uio.uio_extflg = UIO_COPY_CACHED;
1789 uio.uio_loffset = 0;
1790 uio.uio_llimit = MAXOFFSET_T;
1791
1792 eof = 0;
1793 error = 0;
1794 while (!error && !eof) {
1795 uio.uio_resid = dlen;
1796 iov.iov_base = (char *)dbuf;
1797 iov.iov_len = dlen;
1798
1799 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1800 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1801 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1802
1803 dbuflen = dlen - uio.uio_resid;
1804
1805 if (error || dbuflen == 0)
1806 break;
1807
1808 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
1809 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1810
1811 nm = dp->d_name;
1812
1813 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
1814 continue;
1815
1816 if (strncmp(nm, nodename, nodenamelen) != 0)
1817 continue;
1818
1819 error = VOP_LOOKUP(dirvp, nm, &vp,
1820 NULL, 0, NULL, kcred, NULL, NULL, NULL);
1821
1822 dsysdebug(error,
1823 ("rem_drv %s/%s lookup (%d)\n",
1824 dir, nm, error));
1825
1826 if (error)
1827 continue;
1828
1829 ASSERT(vp->v_type == VDIR ||
1830 vp->v_type == VCHR || vp->v_type == VBLK);
1831
1832 if (vp->v_type == VDIR) {
1833 error = devfs_remdrv_rmdir(vp, nm, rvp);
1834 if (error == 0) {
1835 error = VOP_RMDIR(dirvp, (char *)nm,
1836 rvp, kcred, NULL, 0);
1837 dsysdebug(error,
1838 ("rem_drv %s/%s rmdir (%d)\n",
1839 dir, nm, error));
1840 }
1841 } else {
1842 error = VOP_REMOVE(dirvp, (char *)nm, kcred,
1843 NULL, 0);
1844 dsysdebug(error,
1845 ("rem_drv %s/%s remove (%d)\n",
1846 dir, nm, error));
1847 }
1848
1849 VN_RELE(vp);
1850 if (error)
1851 goto exit;
1852 }
1853 }
1854
1855 exit:
1856 VN_RELE(dirvp);
1857
1858 kmem_free(dbuf, dlen);
1859
1860 return (0);
1861 }
1862
1863 struct dv_list {
1864 struct dv_node *dv;
1865 struct dv_list *next;
1866 };
1867
1868 void
dv_walk(struct dv_node * ddv,char * devnm,void (* callback)(struct dv_node *,void *),void * arg)1869 dv_walk(
1870 struct dv_node *ddv,
1871 char *devnm,
1872 void (*callback)(struct dv_node *, void *),
1873 void *arg)
1874 {
1875 struct vnode *dvp;
1876 struct dv_node *dv;
1877 struct dv_list *head, *tail, *next;
1878 int len;
1879
1880 dcmn_err3(("dv_walk: ddv = %s, devnm = %s\n",
1881 ddv->dv_name, devnm ? devnm : "<null>"));
1882
1883 dvp = DVTOV(ddv);
1884
1885 ASSERT(dvp->v_type == VDIR);
1886
1887 head = tail = next = NULL;
1888
1889 rw_enter(&ddv->dv_contents, RW_READER);
1890 mutex_enter(&dvp->v_lock);
1891 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) {
1892 /*
1893 * If devnm is not NULL and is not the empty string,
1894 * select only dv_nodes with matching non-minor name
1895 */
1896 if (devnm && (len = strlen(devnm)) &&
1897 (strncmp(devnm, dv->dv_name, len) ||
1898 (dv->dv_name[len] != ':' && dv->dv_name[len] != '\0')))
1899 continue;
1900
1901 callback(dv, arg);
1902
1903 if (DVTOV(dv)->v_type != VDIR)
1904 continue;
1905
1906 next = kmem_zalloc(sizeof (*next), KM_SLEEP);
1907 next->dv = dv;
1908
1909 if (tail)
1910 tail->next = next;
1911 else
1912 head = next;
1913
1914 tail = next;
1915 }
1916
1917 while (head) {
1918 dv_walk(head->dv, NULL, callback, arg);
1919 next = head->next;
1920 kmem_free(head, sizeof (*head));
1921 head = next;
1922 }
1923 rw_exit(&ddv->dv_contents);
1924 mutex_exit(&dvp->v_lock);
1925 }
1926