1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright 2023 Oxide Computer Company 27 */ 28 29 /* 30 * miscellaneous routines for the devfs 31 */ 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/t_lock.h> 36 #include <sys/systm.h> 37 #include <sys/sysmacros.h> 38 #include <sys/user.h> 39 #include <sys/time.h> 40 #include <sys/vfs.h> 41 #include <sys/vnode.h> 42 #include <sys/file.h> 43 #include <sys/fcntl.h> 44 #include <sys/flock.h> 45 #include <sys/kmem.h> 46 #include <sys/uio.h> 47 #include <sys/errno.h> 48 #include <sys/stat.h> 49 #include <sys/cred.h> 50 #include <sys/dirent.h> 51 #include <sys/pathname.h> 52 #include <sys/cmn_err.h> 53 #include <sys/debug.h> 54 #include <sys/modctl.h> 55 #include <fs/fs_subr.h> 56 #include <sys/fs/dv_node.h> 57 #include <sys/fs/snode.h> 58 #include <sys/sunndi.h> 59 #include <sys/sunmdi.h> 60 #include <sys/conf.h> 61 62 #ifdef DEBUG 63 int devfs_debug = 0x0; 64 #endif 65 66 const char dvnm[] = "devfs"; 67 kmem_cache_t *dv_node_cache; /* dv_node cache */ 68 69 /* 70 * The devfs_clean_key is taken during a devfs_clean operation: it is used to 71 * prevent unnecessary code execution and for detection of potential deadlocks. 72 */ 73 uint_t devfs_clean_key; 74 75 struct dv_node *dvroot; 76 77 /* prototype memory vattrs */ 78 vattr_t dv_vattr_dir = { 79 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 80 VDIR, /* va_type */ 81 DV_DIRMODE_DEFAULT, /* va_mode */ 82 DV_UID_DEFAULT, /* va_uid */ 83 DV_GID_DEFAULT, /* va_gid */ 84 0, /* va_fsid; */ 85 0, /* va_nodeid; */ 86 0, /* va_nlink; */ 87 0, /* va_size; */ 88 0, /* va_atime; */ 89 0, /* va_mtime; */ 90 0, /* va_ctime; */ 91 0, /* va_rdev; */ 92 0, /* va_blksize; */ 93 0, /* va_nblocks; */ 94 0, /* va_seq; */ 95 }; 96 97 vattr_t dv_vattr_file = { 98 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 99 0, /* va_type */ 100 DV_DEVMODE_DEFAULT, /* va_mode */ 101 DV_UID_DEFAULT, /* va_uid */ 102 DV_GID_DEFAULT, /* va_gid */ 103 0, /* va_fsid; */ 104 0, /* va_nodeid; */ 105 0, /* va_nlink; */ 106 0, /* va_size; */ 107 0, /* va_atime; */ 108 0, /* va_mtime; */ 109 0, /* va_ctime; */ 110 0, /* va_rdev; */ 111 0, /* va_blksize; */ 112 0, /* va_nblocks; */ 113 0, /* va_seq; */ 114 }; 115 116 vattr_t dv_vattr_priv = { 117 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 118 0, /* va_type */ 119 DV_DEVMODE_PRIV, /* va_mode */ 120 DV_UID_DEFAULT, /* va_uid */ 121 DV_GID_DEFAULT, /* va_gid */ 122 0, /* va_fsid; */ 123 0, /* va_nodeid; */ 124 0, /* va_nlink; */ 125 0, /* va_size; */ 126 0, /* va_atime; */ 127 0, /* va_mtime; */ 128 0, /* va_ctime; */ 129 0, /* va_rdev; */ 130 0, /* va_blksize; */ 131 0, /* va_nblocks; */ 132 0, /* va_seq; */ 133 }; 134 135 extern dev_info_t *clone_dip; 136 extern major_t clone_major; 137 extern struct dev_ops *ddi_hold_driver(major_t); 138 139 /* dv_node node constructor for kmem cache */ 140 static int 141 i_dv_node_ctor(void *buf, void *cfarg, int flag) 142 { 143 _NOTE(ARGUNUSED(cfarg, flag)) 144 struct dv_node *dv = (struct dv_node *)buf; 145 struct vnode *vp; 146 147 bzero(buf, sizeof (struct dv_node)); 148 vp = dv->dv_vnode = vn_alloc(flag); 149 if (vp == NULL) { 150 return (-1); 151 } 152 vp->v_data = dv; 153 rw_init(&dv->dv_contents, NULL, RW_DEFAULT, NULL); 154 return (0); 155 } 156 157 /* dv_node node destructor for kmem cache */ 158 static void 159 i_dv_node_dtor(void *buf, void *arg) 160 { 161 _NOTE(ARGUNUSED(arg)) 162 struct dv_node *dv = (struct dv_node *)buf; 163 struct vnode *vp = DVTOV(dv); 164 165 rw_destroy(&dv->dv_contents); 166 vn_invalid(vp); 167 vn_free(vp); 168 } 169 170 171 /* initialize dv_node node cache */ 172 void 173 dv_node_cache_init() 174 { 175 ASSERT(dv_node_cache == NULL); 176 dv_node_cache = kmem_cache_create("dv_node_cache", 177 sizeof (struct dv_node), 0, i_dv_node_ctor, i_dv_node_dtor, 178 NULL, NULL, NULL, 0); 179 180 tsd_create(&devfs_clean_key, NULL); 181 } 182 183 /* destroy dv_node node cache */ 184 void 185 dv_node_cache_fini() 186 { 187 ASSERT(dv_node_cache != NULL); 188 kmem_cache_destroy(dv_node_cache); 189 dv_node_cache = NULL; 190 191 tsd_destroy(&devfs_clean_key); 192 } 193 194 /* 195 * dv_mkino - Generate a unique inode number for devfs nodes. 196 * 197 * Although ino_t is 64 bits, the inode number is truncated to 32 bits for 32 198 * bit non-LARGEFILE applications. This means that there is a requirement to 199 * maintain the inode number as a 32 bit value or applications will have 200 * stat(2) calls fail with EOVERFLOW. We form a 32 bit inode number from the 201 * dev_t. but if the minor number is larger than L_MAXMIN32 we fold extra minor 202 * 203 * To generate inode numbers for directories, we assume that we will never use 204 * more than half the major space - this allows for ~8190 drivers. We use this 205 * upper major number space to allocate inode numbers for directories by 206 * encoding the major and instance into this space. 207 * 208 * We also skew the result so that inode 2 is reserved for the root of the file 209 * system. 210 * 211 * As part of the future support for 64-bit dev_t APIs, the upper minor bits 212 * should be folded into the high inode bits by adding the following code 213 * after "ino |= 1": 214 * 215 * #if (L_BITSMINOR32 != L_BITSMINOR) 216 * |* fold overflow minor bits into high bits of inode number *| 217 * ino |= ((ino_t)(minor >> L_BITSMINOR32)) << L_BITSMINOR; 218 * #endif |* (L_BITSMINOR32 != L_BITSMINOR) *| 219 * 220 * This way only applications that use devices that overflow their minor 221 * space will have an application level impact. 222 */ 223 static ino_t 224 dv_mkino(dev_info_t *devi, vtype_t typ, dev_t dev) 225 { 226 major_t major; 227 minor_t minor; 228 ino_t ino; 229 static int warn; 230 231 if (typ == VDIR) { 232 major = ((L_MAXMAJ32 + 1) >> 1) + DEVI(devi)->devi_major; 233 minor = ddi_get_instance(devi); 234 235 /* makedevice32 in high half of major number space */ 236 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 237 238 major = DEVI(devi)->devi_major; 239 } else { 240 major = getmajor(dev); 241 minor = getminor(dev); 242 243 /* makedevice32 */ 244 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 245 246 /* make ino for VCHR different than VBLK */ 247 ino <<= 1; 248 if (typ == VCHR) 249 ino |= 1; 250 } 251 252 ino += DV_ROOTINO + 1; /* skew */ 253 254 /* 255 * diagnose things a little early because adding the skew to a large 256 * minor number could roll over the major. 257 */ 258 if ((major >= (L_MAXMAJ32 >> 1)) && (warn == 0)) { 259 warn = 1; 260 cmn_err(CE_WARN, "%s: inode numbers are not unique", dvnm); 261 } 262 263 return (ino); 264 } 265 266 /* 267 * Compare two nodes lexographically to balance avl tree 268 */ 269 static int 270 dv_compare_nodes(const struct dv_node *dv1, const struct dv_node *dv2) 271 { 272 int rv; 273 274 if ((rv = strcmp(dv1->dv_name, dv2->dv_name)) == 0) 275 return (0); 276 return ((rv < 0) ? -1 : 1); 277 } 278 279 /* 280 * dv_mkroot 281 * 282 * Build the first VDIR dv_node. 283 */ 284 struct dv_node * 285 dv_mkroot(struct vfs *vfsp, dev_t devfsdev) 286 { 287 struct dv_node *dv; 288 struct vnode *vp; 289 290 ASSERT(ddi_root_node() != NULL); 291 ASSERT(dv_node_cache != NULL); 292 293 dcmn_err3(("dv_mkroot\n")); 294 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 295 vp = DVTOV(dv); 296 vn_reinit(vp); 297 vp->v_flag = VROOT; 298 vp->v_vfsp = vfsp; 299 vp->v_type = VDIR; 300 vp->v_rdev = devfsdev; 301 vn_setops(vp, dv_vnodeops); 302 vn_exists(vp); 303 304 dvroot = dv; 305 306 dv->dv_name = NULL; /* not needed */ 307 dv->dv_namelen = 0; 308 309 dv->dv_devi = ddi_root_node(); 310 311 dv->dv_ino = DV_ROOTINO; 312 dv->dv_nlink = 2; /* name + . (no dv_insert) */ 313 dv->dv_dotdot = dv; /* .. == self */ 314 dv->dv_attrvp = NULLVP; 315 dv->dv_attr = NULL; 316 dv->dv_flags = DV_BUILD; 317 dv->dv_priv = NULL; 318 dv->dv_busy = 0; 319 dv->dv_dflt_mode = 0; 320 321 avl_create(&dv->dv_entries, 322 (int (*)(const void *, const void *))dv_compare_nodes, 323 sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink)); 324 325 return (dv); 326 } 327 328 /* 329 * dv_mkdir 330 * 331 * Given an probed or attached nexus node, create a VDIR dv_node. 332 * No dv_attrvp is created at this point. 333 */ 334 struct dv_node * 335 dv_mkdir(struct dv_node *ddv, dev_info_t *devi, char *nm) 336 { 337 struct dv_node *dv; 338 struct vnode *vp; 339 size_t nmlen; 340 341 ASSERT((devi)); 342 dcmn_err4(("dv_mkdir: %s\n", nm)); 343 344 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 345 nmlen = strlen(nm) + 1; 346 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 347 bcopy(nm, dv->dv_name, nmlen); 348 dv->dv_namelen = nmlen - 1; /* '\0' not included */ 349 350 vp = DVTOV(dv); 351 vn_reinit(vp); 352 vp->v_flag = 0; 353 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 354 vp->v_type = VDIR; 355 vp->v_rdev = DVTOV(ddv)->v_rdev; 356 vn_setops(vp, vn_getops(DVTOV(ddv))); 357 vn_exists(vp); 358 359 dv->dv_devi = devi; 360 ndi_hold_devi(devi); 361 362 dv->dv_ino = dv_mkino(devi, VDIR, NODEV); 363 dv->dv_nlink = 0; /* updated on insert */ 364 dv->dv_dotdot = ddv; 365 dv->dv_attrvp = NULLVP; 366 dv->dv_attr = NULL; 367 dv->dv_flags = DV_BUILD; 368 dv->dv_priv = NULL; 369 dv->dv_busy = 0; 370 dv->dv_dflt_mode = 0; 371 372 avl_create(&dv->dv_entries, 373 (int (*)(const void *, const void *))dv_compare_nodes, 374 sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink)); 375 376 return (dv); 377 } 378 379 /* 380 * dv_mknod 381 * 382 * Given a minor node, create a VCHR or VBLK dv_node. 383 * No dv_attrvp is created at this point. 384 */ 385 static struct dv_node * 386 dv_mknod(struct dv_node *ddv, dev_info_t *devi, char *nm, 387 struct ddi_minor_data *dmd) 388 { 389 struct dv_node *dv; 390 struct vnode *vp; 391 size_t nmlen; 392 393 dcmn_err4(("dv_mknod: %s\n", nm)); 394 395 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 396 nmlen = strlen(nm) + 1; 397 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 398 bcopy(nm, dv->dv_name, nmlen); 399 dv->dv_namelen = nmlen - 1; /* no '\0' */ 400 401 vp = DVTOV(dv); 402 vn_reinit(vp); 403 vp->v_flag = 0; 404 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 405 vp->v_type = dmd->ddm_spec_type == S_IFCHR ? VCHR : VBLK; 406 vp->v_rdev = dmd->ddm_dev; 407 vn_setops(vp, vn_getops(DVTOV(ddv))); 408 vn_exists(vp); 409 410 /* increment dev_ref with devi_lock held */ 411 ASSERT(DEVI_BUSY_OWNED(devi)); 412 mutex_enter(&DEVI(devi)->devi_lock); 413 dv->dv_devi = devi; 414 DEVI(devi)->devi_ref++; /* ndi_hold_devi(dip) */ 415 mutex_exit(&DEVI(devi)->devi_lock); 416 417 dv->dv_ino = dv_mkino(devi, vp->v_type, vp->v_rdev); 418 dv->dv_nlink = 0; /* updated on insert */ 419 dv->dv_dotdot = ddv; 420 dv->dv_attrvp = NULLVP; 421 dv->dv_attr = NULL; 422 dv->dv_flags = 0; 423 424 if (dmd->type == DDM_INTERNAL_PATH) 425 dv->dv_flags |= DV_INTERNAL; 426 if (dmd->ddm_flags & DM_NO_FSPERM) 427 dv->dv_flags |= DV_NO_FSPERM; 428 429 dv->dv_priv = dmd->ddm_node_priv; 430 if (dv->dv_priv) 431 dphold(dv->dv_priv); 432 433 /* 434 * Minors created with ddi_create_priv_minor_node can specify 435 * a default mode permission other than the devfs default. 436 */ 437 if (dv->dv_priv || dv->dv_flags & DV_NO_FSPERM) { 438 dcmn_err5(("%s: dv_mknod default priv mode 0%o\n", 439 dv->dv_name, dmd->ddm_priv_mode)); 440 dv->dv_flags |= DV_DFLT_MODE; 441 dv->dv_dflt_mode = dmd->ddm_priv_mode & S_IAMB; 442 } 443 444 return (dv); 445 } 446 447 /* 448 * dv_destroy 449 * 450 * Destroy what we created in dv_mkdir or dv_mknod. 451 * In the case of a *referenced* directory, do nothing. 452 */ 453 void 454 dv_destroy(struct dv_node *dv, uint_t flags) 455 { 456 vnode_t *vp = DVTOV(dv); 457 ASSERT(dv->dv_nlink == 0); /* no references */ 458 459 dcmn_err4(("dv_destroy: %s\n", dv->dv_name)); 460 461 /* 462 * We may be asked to unlink referenced directories. 463 * In this case, there is nothing to be done. 464 * The eventual memory free will be done in 465 * devfs_inactive. 466 */ 467 if (vp->v_count != 0) { 468 ASSERT(vp->v_type == VDIR); 469 ASSERT(flags & DV_CLEAN_FORCE); 470 ASSERT(DV_STALE(dv)); 471 return; 472 } 473 474 if (vp->v_type == VDIR) { 475 ASSERT(DV_FIRST_ENTRY(dv) == NULL); 476 avl_destroy(&dv->dv_entries); 477 } 478 479 if (dv->dv_attrvp != NULLVP) 480 VN_RELE(dv->dv_attrvp); 481 if (dv->dv_attr != NULL) 482 kmem_free(dv->dv_attr, sizeof (struct vattr)); 483 if (dv->dv_name != NULL) 484 kmem_free(dv->dv_name, dv->dv_namelen + 1); 485 if (dv->dv_devi != NULL) { 486 ndi_rele_devi(dv->dv_devi); 487 } 488 if (dv->dv_priv != NULL) { 489 dpfree(dv->dv_priv); 490 } 491 492 kmem_cache_free(dv_node_cache, dv); 493 } 494 495 /* 496 * Find and hold dv_node by name 497 */ 498 static struct dv_node * 499 dv_findbyname(struct dv_node *ddv, char *nm) 500 { 501 struct dv_node *dv; 502 avl_index_t where; 503 struct dv_node dvtmp; 504 505 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 506 dcmn_err3(("dv_findbyname: %s\n", nm)); 507 508 dvtmp.dv_name = nm; 509 dv = avl_find(&ddv->dv_entries, &dvtmp, &where); 510 if (dv) { 511 ASSERT(dv->dv_dotdot == ddv); 512 ASSERT(strcmp(dv->dv_name, nm) == 0); 513 VN_HOLD(DVTOV(dv)); 514 return (dv); 515 } 516 return (NULL); 517 } 518 519 /* 520 * Inserts a new dv_node in a parent directory 521 */ 522 void 523 dv_insert(struct dv_node *ddv, struct dv_node *dv) 524 { 525 avl_index_t where; 526 527 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 528 ASSERT(DVTOV(ddv)->v_type == VDIR); 529 ASSERT(ddv->dv_nlink >= 2); 530 ASSERT(dv->dv_nlink == 0); 531 532 dcmn_err3(("dv_insert: %s\n", dv->dv_name)); 533 534 dv->dv_dotdot = ddv; 535 if (DVTOV(dv)->v_type == VDIR) { 536 ddv->dv_nlink++; /* .. to containing directory */ 537 dv->dv_nlink = 2; /* name + . */ 538 } else { 539 dv->dv_nlink = 1; /* name */ 540 } 541 542 /* enter node in the avl tree */ 543 VERIFY(avl_find(&ddv->dv_entries, dv, &where) == NULL); 544 avl_insert(&ddv->dv_entries, dv, where); 545 } 546 547 /* 548 * Unlink a dv_node from a perent directory 549 */ 550 void 551 dv_unlink(struct dv_node *ddv, struct dv_node *dv) 552 { 553 /* verify linkage of arguments */ 554 ASSERT(ddv && dv); 555 ASSERT(dv->dv_dotdot == ddv); 556 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 557 ASSERT(DVTOV(ddv)->v_type == VDIR); 558 559 dcmn_err3(("dv_unlink: %s\n", dv->dv_name)); 560 561 if (DVTOV(dv)->v_type == VDIR) { 562 ddv->dv_nlink--; /* .. to containing directory */ 563 dv->dv_nlink -= 2; /* name + . */ 564 } else { 565 dv->dv_nlink -= 1; /* name */ 566 } 567 ASSERT(ddv->dv_nlink >= 2); 568 ASSERT(dv->dv_nlink == 0); 569 570 dv->dv_dotdot = NULL; 571 572 /* remove from avl tree */ 573 avl_remove(&ddv->dv_entries, dv); 574 } 575 576 /* 577 * Merge devfs node specific information into an attribute structure. 578 * 579 * NOTE: specfs provides ATIME,MTIME,CTIME,SIZE,BLKSIZE,NBLOCKS on leaf node. 580 */ 581 void 582 dv_vattr_merge(struct dv_node *dv, struct vattr *vap) 583 { 584 struct vnode *vp = DVTOV(dv); 585 586 vap->va_nodeid = dv->dv_ino; 587 vap->va_nlink = dv->dv_nlink; 588 589 if (vp->v_type == VDIR) { 590 vap->va_rdev = 0; 591 vap->va_fsid = vp->v_rdev; 592 } else { 593 vap->va_rdev = vp->v_rdev; 594 vap->va_fsid = DVTOV(dv->dv_dotdot)->v_rdev; 595 vap->va_type = vp->v_type; 596 /* don't trust the shadow file type */ 597 vap->va_mode &= ~S_IFMT; 598 if (vap->va_type == VCHR) 599 vap->va_mode |= S_IFCHR; 600 else 601 vap->va_mode |= S_IFBLK; 602 } 603 } 604 605 /* 606 * Get default device permission by consulting rules in 607 * privilege specification in minor node and /etc/minor_perm. 608 * 609 * This function is called from the devname filesystem to get default 610 * permissions for a device exported to a non-global zone. 611 */ 612 void 613 devfs_get_defattr(struct vnode *vp, struct vattr *vap, int *no_fs_perm) 614 { 615 mperm_t mp; 616 struct dv_node *dv; 617 618 /* If vp isn't a dv_node, return something sensible */ 619 if (!vn_matchops(vp, dv_vnodeops)) { 620 if (no_fs_perm) 621 *no_fs_perm = 0; 622 *vap = dv_vattr_file; 623 return; 624 } 625 626 /* 627 * For minors not created by ddi_create_priv_minor_node(), 628 * use devfs defaults. 629 */ 630 dv = VTODV(vp); 631 if (vp->v_type == VDIR) { 632 *vap = dv_vattr_dir; 633 } else if (dv->dv_flags & DV_NO_FSPERM) { 634 if (no_fs_perm) 635 *no_fs_perm = 1; 636 *vap = dv_vattr_priv; 637 } else { 638 /* 639 * look up perm bits from minor_perm 640 */ 641 *vap = dv_vattr_file; 642 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) == 0) { 643 VATTR_MP_MERGE((*vap), mp); 644 dcmn_err5(("%s: minor perm mode 0%o\n", 645 dv->dv_name, vap->va_mode)); 646 } else if (dv->dv_flags & DV_DFLT_MODE) { 647 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 648 vap->va_mode &= ~S_IAMB; 649 vap->va_mode |= dv->dv_dflt_mode; 650 dcmn_err5(("%s: priv mode 0%o\n", 651 dv->dv_name, vap->va_mode)); 652 } 653 } 654 } 655 656 /* 657 * dv_shadow_node 658 * 659 * Given a VDIR dv_node, find/create the associated VDIR 660 * node in the shadow attribute filesystem. 661 * 662 * Given a VCHR/VBLK dv_node, find the associated VREG 663 * node in the shadow attribute filesystem. These nodes 664 * are only created to persist non-default attributes. 665 * Lack of such a node implies the default permissions 666 * are sufficient. 667 * 668 * Managing the attribute file entries is slightly tricky (mostly 669 * because we can't intercept VN_HOLD and VN_RELE except on the last 670 * release). 671 * 672 * We assert that if the dv_attrvp pointer is non-NULL, it points 673 * to a singly-held (by us) vnode that represents the shadow entry 674 * in the underlying filesystem. To avoid store-ordering issues, 675 * we assert that the pointer can only be tested under the dv_contents 676 * READERS lock. 677 */ 678 679 void 680 dv_shadow_node( 681 struct vnode *dvp, /* devfs parent directory vnode */ 682 char *nm, /* name component */ 683 struct vnode *vp, /* devfs vnode */ 684 struct pathname *pnp, /* the path .. */ 685 struct vnode *rdir, /* the root .. */ 686 struct cred *cred, /* who's asking? */ 687 int flags) /* optionally create shadow node */ 688 { 689 struct dv_node *dv; /* dv_node of named directory */ 690 struct vnode *rdvp; /* shadow parent directory vnode */ 691 struct vnode *rvp; /* shadow vnode */ 692 struct vnode *rrvp; /* realvp of shadow vnode */ 693 struct vattr vattr; 694 int create_tried; 695 int error; 696 697 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 698 dv = VTODV(vp); 699 dcmn_err3(("dv_shadow_node: name %s attr %p\n", 700 nm, (void *)dv->dv_attrvp)); 701 702 if ((flags & DV_SHADOW_WRITE_HELD) == 0) { 703 ASSERT(RW_READ_HELD(&dv->dv_contents)); 704 if (dv->dv_attrvp != NULLVP) 705 return; 706 if (!rw_tryupgrade(&dv->dv_contents)) { 707 rw_exit(&dv->dv_contents); 708 rw_enter(&dv->dv_contents, RW_WRITER); 709 if (dv->dv_attrvp != NULLVP) { 710 rw_downgrade(&dv->dv_contents); 711 return; 712 } 713 } 714 } else { 715 ASSERT(RW_WRITE_HELD(&dv->dv_contents)); 716 if (dv->dv_attrvp != NULLVP) 717 return; 718 } 719 720 ASSERT(RW_WRITE_HELD(&dv->dv_contents) && dv->dv_attrvp == NULL); 721 722 rdvp = VTODV(dvp)->dv_attrvp; 723 create_tried = 0; 724 lookup: 725 if (rdvp && (dv->dv_flags & DV_NO_FSPERM) == 0) { 726 error = VOP_LOOKUP(rdvp, nm, &rvp, pnp, LOOKUP_DIR, rdir, cred, 727 NULL, NULL, NULL); 728 729 /* factor out the snode since we only want the attribute node */ 730 if ((error == 0) && (VOP_REALVP(rvp, &rrvp, NULL) == 0)) { 731 VN_HOLD(rrvp); 732 VN_RELE(rvp); 733 rvp = rrvp; 734 } 735 } else 736 error = EROFS; /* no parent, no entry */ 737 738 /* 739 * All we want is the permissions (and maybe ACLs and 740 * extended attributes), and we want to perform lookups 741 * by name. Drivers occasionally change their minor 742 * number space. If something changes, there's no 743 * much we can do about it here. 744 */ 745 746 /* The shadow node checks out. We are done */ 747 if (error == 0) { 748 dv->dv_attrvp = rvp; /* with one hold */ 749 750 /* 751 * Determine if we have non-trivial ACLs on this node. 752 * It is not necessary to VOP_RWLOCK since fs_acl_nontrivial 753 * only does VOP_GETSECATTR. 754 */ 755 dv->dv_flags &= ~DV_ACL; 756 757 if (fs_acl_nontrivial(rvp, cred)) 758 dv->dv_flags |= DV_ACL; 759 760 /* 761 * If we have synced out the memory attributes, free 762 * them and switch back to using the persistent store. 763 */ 764 if (rvp && dv->dv_attr) { 765 kmem_free(dv->dv_attr, sizeof (struct vattr)); 766 dv->dv_attr = NULL; 767 } 768 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 769 rw_downgrade(&dv->dv_contents); 770 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 771 return; 772 } 773 774 /* 775 * Failed to find attribute in persistent backing store, 776 * get default permission bits. 777 */ 778 devfs_get_defattr(vp, &vattr, NULL); 779 780 dv_vattr_merge(dv, &vattr); 781 gethrestime(&vattr.va_atime); 782 vattr.va_mtime = vattr.va_atime; 783 vattr.va_ctime = vattr.va_atime; 784 785 /* 786 * Try to create shadow dir. This is necessary in case 787 * we need to create a shadow leaf node later, when user 788 * executes chmod. 789 */ 790 if ((error == ENOENT) && !create_tried) { 791 switch (vp->v_type) { 792 case VDIR: 793 error = VOP_MKDIR(rdvp, nm, &vattr, &rvp, kcred, 794 NULL, 0, NULL); 795 dsysdebug(error, ("vop_mkdir %s %s %d\n", 796 VTODV(dvp)->dv_name, nm, error)); 797 create_tried = 1; 798 break; 799 800 case VCHR: 801 case VBLK: 802 /* 803 * Shadow nodes are only created on demand 804 */ 805 if (flags & DV_SHADOW_CREATE) { 806 error = VOP_CREATE(rdvp, nm, &vattr, NONEXCL, 807 VREAD|VWRITE, &rvp, kcred, 0, NULL, NULL); 808 dsysdebug(error, ("vop_create %s %s %d\n", 809 VTODV(dvp)->dv_name, nm, error)); 810 create_tried = 1; 811 } 812 break; 813 814 default: 815 cmn_err(CE_PANIC, "devfs: %s: create", dvnm); 816 /*NOTREACHED*/ 817 } 818 819 if (create_tried && 820 (error == 0) || (error == EEXIST)) { 821 VN_RELE(rvp); 822 goto lookup; 823 } 824 } 825 826 /* Store attribute in memory */ 827 if (dv->dv_attr == NULL) { 828 dv->dv_attr = kmem_alloc(sizeof (struct vattr), KM_SLEEP); 829 *(dv->dv_attr) = vattr; 830 } 831 832 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 833 rw_downgrade(&dv->dv_contents); 834 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 835 } 836 837 /* 838 * Given a devinfo node, and a name, returns the appropriate 839 * minor information for that named node, if it exists. 840 */ 841 static int 842 dv_find_leafnode(dev_info_t *devi, char *minor_nm, struct ddi_minor_data *r_mi) 843 { 844 struct ddi_minor_data *dmd; 845 846 ASSERT(i_ddi_devi_attached(devi)); 847 848 dcmn_err3(("dv_find_leafnode: %s\n", minor_nm)); 849 ASSERT(DEVI_BUSY_OWNED(devi)); 850 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 851 852 /* 853 * Skip alias nodes and nodes without a name. 854 */ 855 if ((dmd->type == DDM_ALIAS) || (dmd->ddm_name == NULL)) 856 continue; 857 858 dcmn_err4(("dv_find_leafnode: (%s,%s)\n", 859 minor_nm, dmd->ddm_name)); 860 if (strcmp(minor_nm, dmd->ddm_name) == 0) { 861 r_mi->ddm_dev = dmd->ddm_dev; 862 r_mi->ddm_spec_type = dmd->ddm_spec_type; 863 r_mi->type = dmd->type; 864 r_mi->ddm_flags = dmd->ddm_flags; 865 r_mi->ddm_node_priv = dmd->ddm_node_priv; 866 r_mi->ddm_priv_mode = dmd->ddm_priv_mode; 867 if (r_mi->ddm_node_priv) 868 dphold(r_mi->ddm_node_priv); 869 return (0); 870 } 871 } 872 873 dcmn_err3(("dv_find_leafnode: %s: ENOENT\n", minor_nm)); 874 return (ENOENT); 875 } 876 877 /* 878 * Special handling for clone node: 879 * Clone minor name is a driver name, the minor number will 880 * be the major number of the driver. There is no minor 881 * node under the clone driver, so we'll manufacture the 882 * dev_t. 883 */ 884 static struct dv_node * 885 dv_clone_mknod(struct dv_node *ddv, char *drvname) 886 { 887 major_t major; 888 struct dv_node *dvp; 889 char *devnm; 890 struct ddi_minor_data *dmd; 891 892 /* 893 * Make sure drvname is a STREAMS driver. We load the driver, 894 * but don't attach to any instances. This makes stat(2) 895 * relatively cheap. 896 */ 897 major = ddi_name_to_major(drvname); 898 if (major == DDI_MAJOR_T_NONE) 899 return (NULL); 900 901 if (ddi_hold_driver(major) == NULL) 902 return (NULL); 903 904 if (STREAMSTAB(major) == NULL) { 905 ddi_rele_driver(major); 906 return (NULL); 907 } 908 909 ddi_rele_driver(major); 910 devnm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 911 (void) snprintf(devnm, MAXNAMELEN, "clone@0:%s", drvname); 912 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 913 dmd->ddm_dev = makedevice(clone_major, (minor_t)major); 914 dmd->ddm_spec_type = S_IFCHR; 915 dvp = dv_mknod(ddv, clone_dip, devnm, dmd); 916 kmem_free(dmd, sizeof (*dmd)); 917 kmem_free(devnm, MAXNAMELEN); 918 return (dvp); 919 } 920 921 /* 922 * Given the parent directory node, and a name in it, returns the 923 * named dv_node to the caller (as a vnode). 924 * 925 * (We need pnp and rdir for doing shadow lookups; they can be NULL) 926 */ 927 int 928 dv_find(struct dv_node *ddv, char *nm, struct vnode **vpp, struct pathname *pnp, 929 struct vnode *rdir, struct cred *cred, uint_t ndi_flags) 930 { 931 extern int isminiroot; /* see modctl.c */ 932 933 int rv = 0, was_busy = 0, nmlen, write_held = 0; 934 struct vnode *vp; 935 struct dv_node *dv, *dup; 936 dev_info_t *pdevi, *devi = NULL; 937 char *mnm; 938 struct ddi_minor_data *dmd; 939 940 dcmn_err3(("dv_find %s\n", nm)); 941 942 if (!rw_tryenter(&ddv->dv_contents, RW_READER)) { 943 if (tsd_get(devfs_clean_key)) 944 return (EBUSY); 945 rw_enter(&ddv->dv_contents, RW_READER); 946 } 947 start: 948 if (DV_STALE(ddv)) { 949 rw_exit(&ddv->dv_contents); 950 return (ESTALE); 951 } 952 953 /* 954 * Empty name or ., return node itself. 955 */ 956 nmlen = strlen(nm); 957 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 958 *vpp = DVTOV(ddv); 959 rw_exit(&ddv->dv_contents); 960 VN_HOLD(*vpp); 961 return (0); 962 } 963 964 /* 965 * .., return the parent directory 966 */ 967 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 968 *vpp = DVTOV(ddv->dv_dotdot); 969 rw_exit(&ddv->dv_contents); 970 VN_HOLD(*vpp); 971 return (0); 972 } 973 974 /* 975 * Fail anything without a valid device name component 976 */ 977 if (nm[0] == '@' || nm[0] == ':') { 978 dcmn_err3(("devfs: no driver '%s'\n", nm)); 979 rw_exit(&ddv->dv_contents); 980 return (ENOENT); 981 } 982 983 /* 984 * So, now we have to deal with the trickier stuff. 985 * 986 * (a) search the existing list of dv_nodes on this directory 987 */ 988 if ((dv = dv_findbyname(ddv, nm)) != NULL) { 989 founddv: 990 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 991 992 if (!rw_tryenter(&dv->dv_contents, RW_READER)) { 993 if (tsd_get(devfs_clean_key)) { 994 VN_RELE(DVTOV(dv)); 995 rw_exit(&ddv->dv_contents); 996 return (EBUSY); 997 } 998 rw_enter(&dv->dv_contents, RW_READER); 999 } 1000 1001 vp = DVTOV(dv); 1002 if ((dv->dv_attrvp != NULLVP) || 1003 (vp->v_type != VDIR && dv->dv_attr != NULL)) { 1004 /* 1005 * Common case - we already have attributes 1006 */ 1007 rw_exit(&dv->dv_contents); 1008 rw_exit(&ddv->dv_contents); 1009 goto found; 1010 } 1011 1012 /* 1013 * No attribute vp, try and build one. 1014 * 1015 * dv_shadow_node() can briefly drop &dv->dv_contents lock 1016 * if it is unable to upgrade it to a write lock. If the 1017 * current thread has come in through the bottom-up device 1018 * configuration devfs_clean() path, we may deadlock against 1019 * a thread performing top-down device configuration if it 1020 * grabs the contents lock. To avoid this, when we are on the 1021 * devfs_clean() path we attempt to upgrade the dv_contents 1022 * lock before we call dv_shadow_node(). 1023 */ 1024 if (tsd_get(devfs_clean_key)) { 1025 if (!rw_tryupgrade(&dv->dv_contents)) { 1026 VN_RELE(DVTOV(dv)); 1027 rw_exit(&dv->dv_contents); 1028 rw_exit(&ddv->dv_contents); 1029 return (EBUSY); 1030 } 1031 1032 write_held = DV_SHADOW_WRITE_HELD; 1033 } 1034 1035 dv_shadow_node(DVTOV(ddv), nm, vp, pnp, rdir, cred, 1036 write_held); 1037 1038 rw_exit(&dv->dv_contents); 1039 rw_exit(&ddv->dv_contents); 1040 goto found; 1041 } 1042 1043 /* 1044 * (b) Search the child devinfo nodes of our parent directory, 1045 * looking for the named node. If we find it, build a new 1046 * node, then grab the writers lock, search the directory 1047 * if it's still not there, then insert it. 1048 * 1049 * We drop the devfs locks before accessing the device tree. 1050 * Take care to mark the node BUSY so that a forced devfs_clean 1051 * doesn't mark the directory node stale. 1052 * 1053 * Also, check if we are called as part of devfs_clean or 1054 * reset_perm. If so, simply return not found because there 1055 * is nothing to clean. 1056 */ 1057 if (tsd_get(devfs_clean_key)) { 1058 rw_exit(&ddv->dv_contents); 1059 return (ENOENT); 1060 } 1061 1062 /* 1063 * We could be either READ or WRITE locked at 1064 * this point. Upgrade if we are read locked. 1065 */ 1066 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 1067 if (rw_read_locked(&ddv->dv_contents) && 1068 !rw_tryupgrade(&ddv->dv_contents)) { 1069 rw_exit(&ddv->dv_contents); 1070 rw_enter(&ddv->dv_contents, RW_WRITER); 1071 /* 1072 * Things may have changed when we dropped 1073 * the contents lock, so start from top again 1074 */ 1075 goto start; 1076 } 1077 ddv->dv_busy++; /* mark busy before dropping lock */ 1078 was_busy++; 1079 rw_exit(&ddv->dv_contents); 1080 1081 pdevi = ddv->dv_devi; 1082 ASSERT(pdevi != NULL); 1083 1084 mnm = strchr(nm, ':'); 1085 if (mnm) 1086 *mnm = (char)0; 1087 1088 /* 1089 * Configure one nexus child, will call nexus's bus_ops 1090 * If successful, devi is held upon returning. 1091 * Note: devfs lookup should not be configuring grandchildren. 1092 */ 1093 ASSERT((ndi_flags & NDI_CONFIG) == 0); 1094 1095 rv = ndi_devi_config_one(pdevi, nm, &devi, ndi_flags | NDI_NO_EVENT); 1096 if (mnm) 1097 *mnm = ':'; 1098 if (rv != NDI_SUCCESS) { 1099 rv = ENOENT; 1100 goto notfound; 1101 } 1102 1103 ASSERT(devi); 1104 1105 /* Check if this is a path alias */ 1106 if (ddi_aliases_present == B_TRUE && ddi_get_parent(devi) != pdevi) { 1107 char *curr = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1108 1109 (void) ddi_pathname(devi, curr); 1110 1111 vp = NULL; 1112 if (devfs_lookupname(curr, NULL, &vp) == 0 && vp) { 1113 dv = VTODV(vp); 1114 kmem_free(curr, MAXPATHLEN); 1115 goto found; 1116 } 1117 kmem_free(curr, MAXPATHLEN); 1118 } 1119 1120 /* 1121 * If we configured a hidden node, consider it notfound. 1122 */ 1123 if (ndi_dev_is_hidden_node(devi)) { 1124 ndi_rele_devi(devi); 1125 rv = ENOENT; 1126 goto notfound; 1127 } 1128 1129 /* 1130 * Don't make vhci clients visible under phci, unless we 1131 * are in miniroot. 1132 */ 1133 if (isminiroot == 0 && ddi_get_parent(devi) != pdevi) { 1134 ndi_rele_devi(devi); 1135 rv = ENOENT; 1136 goto notfound; 1137 } 1138 1139 ASSERT(devi && i_ddi_devi_attached(devi)); 1140 1141 /* 1142 * Invalidate cache to notice newly created minor nodes. 1143 */ 1144 rw_enter(&ddv->dv_contents, RW_WRITER); 1145 ddv->dv_flags |= DV_BUILD; 1146 rw_exit(&ddv->dv_contents); 1147 1148 /* 1149 * mkdir for nexus drivers and leaf nodes as well. If we are racing 1150 * and create a duplicate, the duplicate will be destroyed below. 1151 */ 1152 if (mnm == NULL) { 1153 dv = dv_mkdir(ddv, devi, nm); 1154 } else { 1155 /* 1156 * Allocate dmd first to avoid KM_SLEEP with active 1157 * ndi_devi_enter. 1158 */ 1159 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 1160 ndi_devi_enter(devi); 1161 if (devi == clone_dip) { 1162 /* 1163 * For clone minors, load the driver indicated by 1164 * minor name. 1165 */ 1166 dv = dv_clone_mknod(ddv, mnm + 1); 1167 } else { 1168 /* 1169 * Find minor node and make a dv_node 1170 */ 1171 if (dv_find_leafnode(devi, mnm + 1, dmd) == 0) { 1172 dv = dv_mknod(ddv, devi, nm, dmd); 1173 if (dmd->ddm_node_priv) 1174 dpfree(dmd->ddm_node_priv); 1175 } 1176 } 1177 ndi_devi_exit(devi); 1178 kmem_free(dmd, sizeof (*dmd)); 1179 } 1180 /* 1181 * Release hold from ndi_devi_config_one() 1182 */ 1183 ndi_rele_devi(devi); 1184 1185 if (dv == NULL) { 1186 rv = ENOENT; 1187 goto notfound; 1188 } 1189 1190 /* 1191 * We have released the dv_contents lock, need to check 1192 * if another thread already created a duplicate node 1193 */ 1194 rw_enter(&ddv->dv_contents, RW_WRITER); 1195 if ((dup = dv_findbyname(ddv, nm)) == NULL) { 1196 dv_insert(ddv, dv); 1197 } else { 1198 /* 1199 * Duplicate found, use the existing node 1200 */ 1201 VN_RELE(DVTOV(dv)); 1202 dv_destroy(dv, 0); 1203 dv = dup; 1204 } 1205 goto founddv; 1206 /*NOTREACHED*/ 1207 1208 found: 1209 /* 1210 * Fail lookup of device that has now become hidden (typically via 1211 * hot removal of open device). 1212 */ 1213 if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi)) { 1214 dcmn_err2(("dv_find: nm %s failed: hidden/removed\n", nm)); 1215 VN_RELE(vp); 1216 rv = ENOENT; 1217 goto notfound; 1218 } 1219 1220 /* 1221 * Skip non-kernel lookups of internal nodes. 1222 * This use of kcred to distinguish between user and 1223 * internal kernel lookups is unfortunate. The information 1224 * provided by the seg argument to lookupnameat should 1225 * evolve into a lookup flag for filesystems that need 1226 * this distinction. 1227 */ 1228 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) { 1229 dcmn_err2(("dv_find: nm %s failed: internal\n", nm)); 1230 VN_RELE(vp); 1231 rv = ENOENT; 1232 goto notfound; 1233 } 1234 1235 dcmn_err2(("dv_find: returning vp for nm %s\n", nm)); 1236 if (vp->v_type == VCHR || vp->v_type == VBLK) { 1237 /* 1238 * If vnode is a device, return special vnode instead 1239 * (though it knows all about -us- via sp->s_realvp, 1240 * sp->s_devvp, and sp->s_dip) 1241 */ 1242 *vpp = specvp_devfs(vp, vp->v_rdev, vp->v_type, cred, 1243 dv->dv_devi); 1244 VN_RELE(vp); 1245 if (*vpp == NULLVP) 1246 rv = ENOSYS; 1247 } else 1248 *vpp = vp; 1249 1250 notfound: 1251 if (was_busy) { 1252 /* 1253 * Non-zero was_busy tells us that we are not in the 1254 * devfs_clean() path which in turn means that we can afford 1255 * to take the contents lock unconditionally. 1256 */ 1257 rw_enter(&ddv->dv_contents, RW_WRITER); 1258 ddv->dv_busy--; 1259 rw_exit(&ddv->dv_contents); 1260 } 1261 return (rv); 1262 } 1263 1264 /* 1265 * The given directory node is out-of-date; that is, it has been 1266 * marked as needing to be rebuilt, possibly because some new devinfo 1267 * node has come into existence, or possibly because this is the first 1268 * time we've been here. 1269 */ 1270 void 1271 dv_filldir(struct dv_node *ddv) 1272 { 1273 struct dv_node *dv; 1274 dev_info_t *devi, *pdevi; 1275 struct ddi_minor_data *dmd; 1276 char devnm[MAXNAMELEN]; 1277 1278 ASSERT(DVTOV(ddv)->v_type == VDIR); 1279 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 1280 ASSERT(ddv->dv_flags & DV_BUILD); 1281 1282 dcmn_err3(("dv_filldir: %s\n", ddv->dv_name)); 1283 if (DV_STALE(ddv)) 1284 return; 1285 pdevi = ddv->dv_devi; 1286 1287 if (ndi_devi_config(pdevi, NDI_NO_EVENT) != NDI_SUCCESS) { 1288 dcmn_err3(("dv_filldir: config error %s\n", ddv->dv_name)); 1289 } 1290 1291 ndi_devi_enter(pdevi); 1292 for (devi = ddi_get_child(pdevi); devi; 1293 devi = ddi_get_next_sibling(devi)) { 1294 /* 1295 * While we know enough to create a directory at DS_INITIALIZED, 1296 * the directory will be empty until DS_ATTACHED. The existence 1297 * of an empty directory dv_node will cause a devi_ref, which 1298 * has caused problems for existing code paths doing offline/DR 1299 * type operations - making devfs_clean coordination even more 1300 * sensitive and error prone. Given this, the 'continue' below 1301 * is checking for DS_ATTACHED instead of DS_INITIALIZED. 1302 */ 1303 if (i_ddi_node_state(devi) < DS_ATTACHED) 1304 continue; 1305 1306 /* skip hidden nodes */ 1307 if (ndi_dev_is_hidden_node(devi)) 1308 continue; 1309 1310 dcmn_err3(("dv_filldir: node %s\n", ddi_node_name(devi))); 1311 1312 ndi_devi_enter(devi); 1313 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 1314 char *addr; 1315 1316 /* 1317 * Skip alias nodes, internal nodes, and nodes 1318 * without a name. We allow DDM_DEFAULT nodes 1319 * to appear in readdir. 1320 */ 1321 if ((dmd->type == DDM_ALIAS) || 1322 (dmd->type == DDM_INTERNAL_PATH) || 1323 (dmd->ddm_name == NULL)) 1324 continue; 1325 1326 addr = ddi_get_name_addr(devi); 1327 if (addr && *addr) 1328 (void) sprintf(devnm, "%s@%s:%s", 1329 ddi_node_name(devi), addr, dmd->ddm_name); 1330 else 1331 (void) sprintf(devnm, "%s:%s", 1332 ddi_node_name(devi), dmd->ddm_name); 1333 1334 if ((dv = dv_findbyname(ddv, devnm)) != NULL) { 1335 /* dv_node already exists */ 1336 VN_RELE(DVTOV(dv)); 1337 continue; 1338 } 1339 1340 dv = dv_mknod(ddv, devi, devnm, dmd); 1341 dv_insert(ddv, dv); 1342 VN_RELE(DVTOV(dv)); 1343 } 1344 ndi_devi_exit(devi); 1345 1346 (void) ddi_deviname(devi, devnm); 1347 if ((dv = dv_findbyname(ddv, devnm + 1)) == NULL) { 1348 /* directory doesn't exist */ 1349 dv = dv_mkdir(ddv, devi, devnm + 1); 1350 dv_insert(ddv, dv); 1351 } 1352 VN_RELE(DVTOV(dv)); 1353 } 1354 ndi_devi_exit(pdevi); 1355 1356 ddv->dv_flags &= ~DV_BUILD; 1357 } 1358 1359 /* 1360 * Given a directory node, clean out all the nodes beneath. 1361 * 1362 * VDIR: Reinvoke to clean them, then delete the directory. 1363 * VCHR, VBLK: Just blow them away. 1364 * 1365 * Mark the directories touched as in need of a rebuild, in case 1366 * we fall over part way through. When DV_CLEAN_FORCE is specified, 1367 * we mark referenced empty directories as stale to facilitate DR. 1368 */ 1369 int 1370 dv_cleandir(struct dv_node *ddv, char *devnm, uint_t flags) 1371 { 1372 struct dv_node *dv; 1373 struct dv_node *next; 1374 struct vnode *vp; 1375 int busy = 0; 1376 1377 /* 1378 * We should always be holding the tsd_clean_key here: dv_cleandir() 1379 * will be called as a result of a devfs_clean request and the 1380 * tsd_clean_key will be set in either in devfs_clean() itself or in 1381 * devfs_clean_vhci(). 1382 * 1383 * Since we are on the devfs_clean path, we return EBUSY if we cannot 1384 * get the contents lock: if we blocked here we might deadlock against 1385 * a thread performing top-down device configuration. 1386 */ 1387 ASSERT(tsd_get(devfs_clean_key)); 1388 1389 dcmn_err3(("dv_cleandir: %s\n", ddv->dv_name)); 1390 1391 if (!(flags & DV_CLEANDIR_LCK) && 1392 !rw_tryenter(&ddv->dv_contents, RW_WRITER)) 1393 return (EBUSY); 1394 1395 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = next) { 1396 next = DV_NEXT_ENTRY(ddv, dv); 1397 1398 /* 1399 * If devnm is specified, the non-minor portion of the 1400 * name must match devnm. 1401 */ 1402 if (devnm && 1403 (strncmp(devnm, dv->dv_name, strlen(devnm)) || 1404 (dv->dv_name[strlen(devnm)] != ':' && 1405 dv->dv_name[strlen(devnm)] != '\0'))) 1406 continue; 1407 1408 /* check type of what we are cleaning */ 1409 vp = DVTOV(dv); 1410 if (vp->v_type == VDIR) { 1411 /* recurse on directories */ 1412 rw_enter(&dv->dv_contents, RW_WRITER); 1413 if (dv_cleandir(dv, NULL, 1414 flags | DV_CLEANDIR_LCK) == EBUSY) { 1415 rw_exit(&dv->dv_contents); 1416 goto set_busy; 1417 } 1418 1419 /* A clean directory is an empty directory... */ 1420 ASSERT(dv->dv_nlink == 2); 1421 mutex_enter(&vp->v_lock); 1422 if (vp->v_count > 0) { 1423 /* 1424 * ... but an empty directory can still have 1425 * references to it. If we have dv_busy or 1426 * DV_CLEAN_FORCE is *not* specified then a 1427 * referenced directory is considered busy. 1428 */ 1429 if (dv->dv_busy || !(flags & DV_CLEAN_FORCE)) { 1430 mutex_exit(&vp->v_lock); 1431 rw_exit(&dv->dv_contents); 1432 goto set_busy; 1433 } 1434 1435 /* 1436 * Mark referenced directory stale so that DR 1437 * will succeed even if a shell has 1438 * /devices/xxx as current directory (causing 1439 * VN_HOLD reference to an empty directory). 1440 */ 1441 ASSERT(!DV_STALE(dv)); 1442 ndi_rele_devi(dv->dv_devi); 1443 dv->dv_devi = NULL; /* mark DV_STALE */ 1444 } 1445 } else { 1446 ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK)); 1447 ASSERT(dv->dv_nlink == 1); /* no hard links */ 1448 mutex_enter(&vp->v_lock); 1449 if (vp->v_count > 0) { 1450 mutex_exit(&vp->v_lock); 1451 goto set_busy; 1452 } 1453 } 1454 1455 /* unlink from directory */ 1456 dv_unlink(ddv, dv); 1457 1458 /* drop locks */ 1459 mutex_exit(&vp->v_lock); 1460 if (vp->v_type == VDIR) 1461 rw_exit(&dv->dv_contents); 1462 1463 /* destroy vnode if ref count is zero */ 1464 if (vp->v_count == 0) 1465 dv_destroy(dv, flags); 1466 1467 continue; 1468 1469 /* 1470 * If devnm is not NULL we return immediately on busy, 1471 * otherwise we continue destroying unused dv_node's. 1472 */ 1473 set_busy: busy++; 1474 if (devnm) 1475 break; 1476 } 1477 1478 /* 1479 * This code may be invoked to inform devfs that a new node has 1480 * been created in the kernel device tree. So we always set 1481 * the DV_BUILD flag to allow the next dv_filldir() to pick 1482 * the new devinfo nodes. 1483 */ 1484 ddv->dv_flags |= DV_BUILD; 1485 1486 if (!(flags & DV_CLEANDIR_LCK)) 1487 rw_exit(&ddv->dv_contents); 1488 1489 return (busy ? EBUSY : 0); 1490 } 1491 1492 /* 1493 * Walk through the devfs hierarchy, correcting the permissions of 1494 * devices with default permissions that do not match those specified 1495 * by minor perm. This can only be done for all drivers for now. 1496 */ 1497 static int 1498 dv_reset_perm_dir(struct dv_node *ddv, uint_t flags) 1499 { 1500 struct dv_node *dv; 1501 struct vnode *vp; 1502 int retval = 0; 1503 struct vattr *attrp; 1504 mperm_t mp; 1505 char *nm; 1506 uid_t old_uid; 1507 gid_t old_gid; 1508 mode_t old_mode; 1509 1510 rw_enter(&ddv->dv_contents, RW_WRITER); 1511 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) { 1512 int error = 0; 1513 nm = dv->dv_name; 1514 1515 rw_enter(&dv->dv_contents, RW_READER); 1516 vp = DVTOV(dv); 1517 if (vp->v_type == VDIR) { 1518 rw_exit(&dv->dv_contents); 1519 if (dv_reset_perm_dir(dv, flags) != 0) { 1520 error = EBUSY; 1521 } 1522 } else { 1523 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 1524 1525 /* 1526 * Check for permissions from minor_perm 1527 * If there are none, we're done 1528 */ 1529 rw_exit(&dv->dv_contents); 1530 if (dev_minorperm(dv->dv_devi, nm, &mp) != 0) 1531 continue; 1532 1533 rw_enter(&dv->dv_contents, RW_READER); 1534 1535 /* 1536 * Allow a node's permissions to be altered 1537 * permanently from the defaults by chmod, 1538 * using the shadow node as backing store. 1539 * Otherwise, update node to minor_perm permissions. 1540 */ 1541 if (dv->dv_attrvp == NULLVP) { 1542 /* 1543 * No attribute vp, try to find one. 1544 */ 1545 dv_shadow_node(DVTOV(ddv), nm, vp, 1546 NULL, NULLVP, kcred, 0); 1547 } 1548 if (dv->dv_attrvp != NULLVP || dv->dv_attr == NULL) { 1549 rw_exit(&dv->dv_contents); 1550 continue; 1551 } 1552 1553 attrp = dv->dv_attr; 1554 1555 if (VATTRP_MP_CMP(attrp, mp) == 0) { 1556 dcmn_err5(("%s: no perm change: " 1557 "%d %d 0%o\n", nm, attrp->va_uid, 1558 attrp->va_gid, attrp->va_mode)); 1559 rw_exit(&dv->dv_contents); 1560 continue; 1561 } 1562 1563 old_uid = attrp->va_uid; 1564 old_gid = attrp->va_gid; 1565 old_mode = attrp->va_mode; 1566 1567 VATTRP_MP_MERGE(attrp, mp); 1568 mutex_enter(&vp->v_lock); 1569 if (vp->v_count > 0) { 1570 error = EBUSY; 1571 } 1572 mutex_exit(&vp->v_lock); 1573 1574 dcmn_err5(("%s: perm %d/%d/0%o -> %d/%d/0%o (%d)\n", 1575 nm, old_uid, old_gid, old_mode, attrp->va_uid, 1576 attrp->va_gid, attrp->va_mode, error)); 1577 1578 rw_exit(&dv->dv_contents); 1579 } 1580 1581 if (error != 0) { 1582 retval = error; 1583 } 1584 } 1585 1586 ddv->dv_flags |= DV_BUILD; 1587 1588 rw_exit(&ddv->dv_contents); 1589 1590 return (retval); 1591 } 1592 1593 int 1594 devfs_reset_perm(uint_t flags) 1595 { 1596 struct dv_node *dvp; 1597 int rval; 1598 1599 if ((dvp = devfs_dip_to_dvnode(ddi_root_node())) == NULL) 1600 return (0); 1601 1602 VN_HOLD(DVTOV(dvp)); 1603 rval = dv_reset_perm_dir(dvp, flags); 1604 VN_RELE(DVTOV(dvp)); 1605 return (rval); 1606 } 1607 1608 /* 1609 * Clean up dangling devfs shadow nodes for removed 1610 * drivers so that, in the event the driver is re-added 1611 * to the system, newly created nodes won't incorrectly 1612 * pick up these stale shadow node permissions. 1613 * 1614 * This is accomplished by walking down the pathname 1615 * to the directory, starting at the root's attribute 1616 * node, then removing all minors matching the specified 1617 * node name. Care must be taken to remove all entries 1618 * in a directory before the directory itself, so that 1619 * the clean-up associated with rem_drv'ing a nexus driver 1620 * does not inadvertently result in an inconsistent 1621 * filesystem underlying devfs. 1622 */ 1623 1624 static int 1625 devfs_remdrv_rmdir(vnode_t *dirvp, const char *dir, vnode_t *rvp) 1626 { 1627 int error; 1628 vnode_t *vp; 1629 int eof; 1630 struct iovec iov; 1631 struct uio uio; 1632 struct dirent64 *dp; 1633 dirent64_t *dbuf; 1634 size_t dlen; 1635 size_t dbuflen; 1636 int ndirents = 64; 1637 char *nm; 1638 1639 VN_HOLD(dirvp); 1640 1641 dlen = ndirents * (sizeof (*dbuf)); 1642 dbuf = kmem_alloc(dlen, KM_SLEEP); 1643 1644 uio.uio_iov = &iov; 1645 uio.uio_iovcnt = 1; 1646 uio.uio_segflg = UIO_SYSSPACE; 1647 uio.uio_fmode = 0; 1648 uio.uio_extflg = UIO_COPY_CACHED; 1649 uio.uio_loffset = 0; 1650 uio.uio_llimit = MAXOFFSET_T; 1651 1652 eof = 0; 1653 error = 0; 1654 while (!error && !eof) { 1655 uio.uio_resid = dlen; 1656 iov.iov_base = (char *)dbuf; 1657 iov.iov_len = dlen; 1658 1659 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1660 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1661 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1662 1663 dbuflen = dlen - uio.uio_resid; 1664 1665 if (error || dbuflen == 0) 1666 break; 1667 1668 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1669 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1670 1671 nm = dp->d_name; 1672 1673 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1674 continue; 1675 1676 error = VOP_LOOKUP(dirvp, nm, 1677 &vp, NULL, 0, NULL, kcred, NULL, NULL, NULL); 1678 1679 dsysdebug(error, 1680 ("rem_drv %s/%s lookup (%d)\n", 1681 dir, nm, error)); 1682 1683 if (error) 1684 continue; 1685 1686 ASSERT(vp->v_type == VDIR || 1687 vp->v_type == VCHR || vp->v_type == VBLK); 1688 1689 if (vp->v_type == VDIR) { 1690 error = devfs_remdrv_rmdir(vp, nm, rvp); 1691 if (error == 0) { 1692 error = VOP_RMDIR(dirvp, 1693 (char *)nm, rvp, kcred, NULL, 0); 1694 dsysdebug(error, 1695 ("rem_drv %s/%s rmdir (%d)\n", 1696 dir, nm, error)); 1697 } 1698 } else { 1699 error = VOP_REMOVE(dirvp, (char *)nm, kcred, 1700 NULL, 0); 1701 dsysdebug(error, 1702 ("rem_drv %s/%s remove (%d)\n", 1703 dir, nm, error)); 1704 } 1705 1706 VN_RELE(vp); 1707 if (error) { 1708 goto exit; 1709 } 1710 } 1711 } 1712 1713 exit: 1714 VN_RELE(dirvp); 1715 kmem_free(dbuf, dlen); 1716 1717 return (error); 1718 } 1719 1720 int 1721 devfs_remdrv_cleanup(const char *dir, const char *nodename) 1722 { 1723 int error; 1724 vnode_t *vp; 1725 vnode_t *dirvp; 1726 int eof; 1727 struct iovec iov; 1728 struct uio uio; 1729 struct dirent64 *dp; 1730 dirent64_t *dbuf; 1731 size_t dlen; 1732 size_t dbuflen; 1733 int ndirents = 64; 1734 int nodenamelen = strlen(nodename); 1735 char *nm; 1736 struct pathname pn; 1737 vnode_t *rvp; /* root node of the underlying attribute fs */ 1738 1739 dcmn_err5(("devfs_remdrv_cleanup: %s %s\n", dir, nodename)); 1740 1741 if (error = pn_get((char *)dir, UIO_SYSSPACE, &pn)) 1742 return (0); 1743 1744 rvp = dvroot->dv_attrvp; 1745 ASSERT(rvp != NULL); 1746 VN_HOLD(rvp); 1747 1748 pn_skipslash(&pn); 1749 dirvp = rvp; 1750 VN_HOLD(dirvp); 1751 1752 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 1753 1754 while (pn_pathleft(&pn)) { 1755 ASSERT(dirvp->v_type == VDIR); 1756 (void) pn_getcomponent(&pn, nm); 1757 ASSERT((strcmp(nm, ".") != 0) && (strcmp(nm, "..") != 0)); 1758 error = VOP_LOOKUP(dirvp, nm, &vp, NULL, 0, rvp, kcred, 1759 NULL, NULL, NULL); 1760 if (error) { 1761 dcmn_err5(("remdrv_cleanup %s lookup error %d\n", 1762 nm, error)); 1763 VN_RELE(dirvp); 1764 if (dirvp != rvp) 1765 VN_RELE(rvp); 1766 pn_free(&pn); 1767 kmem_free(nm, MAXNAMELEN); 1768 return (0); 1769 } 1770 VN_RELE(dirvp); 1771 dirvp = vp; 1772 pn_skipslash(&pn); 1773 } 1774 1775 ASSERT(dirvp->v_type == VDIR); 1776 if (dirvp != rvp) 1777 VN_RELE(rvp); 1778 pn_free(&pn); 1779 kmem_free(nm, MAXNAMELEN); 1780 1781 dlen = ndirents * (sizeof (*dbuf)); 1782 dbuf = kmem_alloc(dlen, KM_SLEEP); 1783 1784 uio.uio_iov = &iov; 1785 uio.uio_iovcnt = 1; 1786 uio.uio_segflg = UIO_SYSSPACE; 1787 uio.uio_fmode = 0; 1788 uio.uio_extflg = UIO_COPY_CACHED; 1789 uio.uio_loffset = 0; 1790 uio.uio_llimit = MAXOFFSET_T; 1791 1792 eof = 0; 1793 error = 0; 1794 while (!error && !eof) { 1795 uio.uio_resid = dlen; 1796 iov.iov_base = (char *)dbuf; 1797 iov.iov_len = dlen; 1798 1799 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1800 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1801 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1802 1803 dbuflen = dlen - uio.uio_resid; 1804 1805 if (error || dbuflen == 0) 1806 break; 1807 1808 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1809 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1810 1811 nm = dp->d_name; 1812 1813 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1814 continue; 1815 1816 if (strncmp(nm, nodename, nodenamelen) != 0) 1817 continue; 1818 1819 error = VOP_LOOKUP(dirvp, nm, &vp, 1820 NULL, 0, NULL, kcred, NULL, NULL, NULL); 1821 1822 dsysdebug(error, 1823 ("rem_drv %s/%s lookup (%d)\n", 1824 dir, nm, error)); 1825 1826 if (error) 1827 continue; 1828 1829 ASSERT(vp->v_type == VDIR || 1830 vp->v_type == VCHR || vp->v_type == VBLK); 1831 1832 if (vp->v_type == VDIR) { 1833 error = devfs_remdrv_rmdir(vp, nm, rvp); 1834 if (error == 0) { 1835 error = VOP_RMDIR(dirvp, (char *)nm, 1836 rvp, kcred, NULL, 0); 1837 dsysdebug(error, 1838 ("rem_drv %s/%s rmdir (%d)\n", 1839 dir, nm, error)); 1840 } 1841 } else { 1842 error = VOP_REMOVE(dirvp, (char *)nm, kcred, 1843 NULL, 0); 1844 dsysdebug(error, 1845 ("rem_drv %s/%s remove (%d)\n", 1846 dir, nm, error)); 1847 } 1848 1849 VN_RELE(vp); 1850 if (error) 1851 goto exit; 1852 } 1853 } 1854 1855 exit: 1856 VN_RELE(dirvp); 1857 1858 kmem_free(dbuf, dlen); 1859 1860 return (0); 1861 } 1862 1863 struct dv_list { 1864 struct dv_node *dv; 1865 struct dv_list *next; 1866 }; 1867 1868 void 1869 dv_walk( 1870 struct dv_node *ddv, 1871 char *devnm, 1872 void (*callback)(struct dv_node *, void *), 1873 void *arg) 1874 { 1875 struct vnode *dvp; 1876 struct dv_node *dv; 1877 struct dv_list *head, *tail, *next; 1878 int len; 1879 1880 dcmn_err3(("dv_walk: ddv = %s, devnm = %s\n", 1881 ddv->dv_name, devnm ? devnm : "<null>")); 1882 1883 dvp = DVTOV(ddv); 1884 1885 ASSERT(dvp->v_type == VDIR); 1886 1887 head = tail = next = NULL; 1888 1889 rw_enter(&ddv->dv_contents, RW_READER); 1890 mutex_enter(&dvp->v_lock); 1891 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) { 1892 /* 1893 * If devnm is not NULL and is not the empty string, 1894 * select only dv_nodes with matching non-minor name 1895 */ 1896 if (devnm && (len = strlen(devnm)) && 1897 (strncmp(devnm, dv->dv_name, len) || 1898 (dv->dv_name[len] != ':' && dv->dv_name[len] != '\0'))) 1899 continue; 1900 1901 callback(dv, arg); 1902 1903 if (DVTOV(dv)->v_type != VDIR) 1904 continue; 1905 1906 next = kmem_zalloc(sizeof (*next), KM_SLEEP); 1907 next->dv = dv; 1908 1909 if (tail) 1910 tail->next = next; 1911 else 1912 head = next; 1913 1914 tail = next; 1915 } 1916 1917 while (head) { 1918 dv_walk(head->dv, NULL, callback, arg); 1919 next = head->next; 1920 kmem_free(head, sizeof (*head)); 1921 head = next; 1922 } 1923 rw_exit(&ddv->dv_contents); 1924 mutex_exit(&dvp->v_lock); 1925 } 1926