1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * miscellaneous routines for the devfs 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/t_lock.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/user.h> 36 #include <sys/time.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/file.h> 40 #include <sys/fcntl.h> 41 #include <sys/flock.h> 42 #include <sys/kmem.h> 43 #include <sys/uio.h> 44 #include <sys/errno.h> 45 #include <sys/stat.h> 46 #include <sys/cred.h> 47 #include <sys/dirent.h> 48 #include <sys/pathname.h> 49 #include <sys/cmn_err.h> 50 #include <sys/debug.h> 51 #include <sys/modctl.h> 52 #include <fs/fs_subr.h> 53 #include <sys/fs/dv_node.h> 54 #include <sys/fs/snode.h> 55 #include <sys/sunndi.h> 56 #include <sys/sunmdi.h> 57 #include <sys/conf.h> 58 59 #ifdef DEBUG 60 int devfs_debug = 0x0; 61 #endif 62 63 const char dvnm[] = "devfs"; 64 kmem_cache_t *dv_node_cache; /* dv_node cache */ 65 66 /* 67 * The devfs_clean_key is taken during a devfs_clean operation: it is used to 68 * prevent unnecessary code execution and for detection of potential deadlocks. 69 */ 70 uint_t devfs_clean_key; 71 72 struct dv_node *dvroot; 73 74 /* prototype memory vattrs */ 75 vattr_t dv_vattr_dir = { 76 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 77 VDIR, /* va_type */ 78 DV_DIRMODE_DEFAULT, /* va_mode */ 79 DV_UID_DEFAULT, /* va_uid */ 80 DV_GID_DEFAULT, /* va_gid */ 81 0, /* va_fsid; */ 82 0, /* va_nodeid; */ 83 0, /* va_nlink; */ 84 0, /* va_size; */ 85 0, /* va_atime; */ 86 0, /* va_mtime; */ 87 0, /* va_ctime; */ 88 0, /* va_rdev; */ 89 0, /* va_blksize; */ 90 0, /* va_nblocks; */ 91 0, /* va_seq; */ 92 }; 93 94 vattr_t dv_vattr_file = { 95 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 96 0, /* va_type */ 97 DV_DEVMODE_DEFAULT, /* va_mode */ 98 DV_UID_DEFAULT, /* va_uid */ 99 DV_GID_DEFAULT, /* va_gid */ 100 0, /* va_fsid; */ 101 0, /* va_nodeid; */ 102 0, /* va_nlink; */ 103 0, /* va_size; */ 104 0, /* va_atime; */ 105 0, /* va_mtime; */ 106 0, /* va_ctime; */ 107 0, /* va_rdev; */ 108 0, /* va_blksize; */ 109 0, /* va_nblocks; */ 110 0, /* va_seq; */ 111 }; 112 113 vattr_t dv_vattr_priv = { 114 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 115 0, /* va_type */ 116 DV_DEVMODE_PRIV, /* va_mode */ 117 DV_UID_DEFAULT, /* va_uid */ 118 DV_GID_DEFAULT, /* va_gid */ 119 0, /* va_fsid; */ 120 0, /* va_nodeid; */ 121 0, /* va_nlink; */ 122 0, /* va_size; */ 123 0, /* va_atime; */ 124 0, /* va_mtime; */ 125 0, /* va_ctime; */ 126 0, /* va_rdev; */ 127 0, /* va_blksize; */ 128 0, /* va_nblocks; */ 129 0, /* va_seq; */ 130 }; 131 132 extern dev_info_t *clone_dip; 133 extern major_t clone_major; 134 extern struct dev_ops *ddi_hold_driver(major_t); 135 136 /* dv_node node constructor for kmem cache */ 137 static int 138 i_dv_node_ctor(void *buf, void *cfarg, int flag) 139 { 140 _NOTE(ARGUNUSED(cfarg, flag)) 141 struct dv_node *dv = (struct dv_node *)buf; 142 struct vnode *vp; 143 144 bzero(buf, sizeof (struct dv_node)); 145 vp = dv->dv_vnode = vn_alloc(flag); 146 if (vp == NULL) { 147 return (-1); 148 } 149 vp->v_data = dv; 150 rw_init(&dv->dv_contents, NULL, RW_DEFAULT, NULL); 151 return (0); 152 } 153 154 /* dv_node node destructor for kmem cache */ 155 static void 156 i_dv_node_dtor(void *buf, void *arg) 157 { 158 _NOTE(ARGUNUSED(arg)) 159 struct dv_node *dv = (struct dv_node *)buf; 160 struct vnode *vp = DVTOV(dv); 161 162 rw_destroy(&dv->dv_contents); 163 vn_invalid(vp); 164 vn_free(vp); 165 } 166 167 168 /* initialize dv_node node cache */ 169 void 170 dv_node_cache_init() 171 { 172 ASSERT(dv_node_cache == NULL); 173 dv_node_cache = kmem_cache_create("dv_node_cache", 174 sizeof (struct dv_node), 0, i_dv_node_ctor, i_dv_node_dtor, 175 NULL, NULL, NULL, 0); 176 177 tsd_create(&devfs_clean_key, NULL); 178 } 179 180 /* destroy dv_node node cache */ 181 void 182 dv_node_cache_fini() 183 { 184 ASSERT(dv_node_cache != NULL); 185 kmem_cache_destroy(dv_node_cache); 186 dv_node_cache = NULL; 187 188 tsd_destroy(&devfs_clean_key); 189 } 190 191 /* 192 * dv_mkino - Generate a unique inode number for devfs nodes. 193 * 194 * Although ino_t is 64 bits, the inode number is truncated to 32 bits for 32 195 * bit non-LARGEFILE applications. This means that there is a requirement to 196 * maintain the inode number as a 32 bit value or applications will have 197 * stat(2) calls fail with EOVERFLOW. We form a 32 bit inode number from the 198 * dev_t. but if the minor number is larger than L_MAXMIN32 we fold extra minor 199 * 200 * To generate inode numbers for directories, we assume that we will never use 201 * more than half the major space - this allows for ~8190 drivers. We use this 202 * upper major number space to allocate inode numbers for directories by 203 * encoding the major and instance into this space. 204 * 205 * We also skew the result so that inode 2 is reserved for the root of the file 206 * system. 207 * 208 * As part of the future support for 64-bit dev_t APIs, the upper minor bits 209 * should be folded into the high inode bits by adding the following code 210 * after "ino |= 1": 211 * 212 * #if (L_BITSMINOR32 != L_BITSMINOR) 213 * |* fold overflow minor bits into high bits of inode number *| 214 * ino |= ((ino_t)(minor >> L_BITSMINOR32)) << L_BITSMINOR; 215 * #endif |* (L_BITSMINOR32 != L_BITSMINOR) *| 216 * 217 * This way only applications that use devices that overflow their minor 218 * space will have an application level impact. 219 */ 220 static ino_t 221 dv_mkino(dev_info_t *devi, vtype_t typ, dev_t dev) 222 { 223 major_t major; 224 minor_t minor; 225 ino_t ino; 226 static int warn; 227 228 if (typ == VDIR) { 229 major = ((L_MAXMAJ32 + 1) >> 1) + DEVI(devi)->devi_major; 230 minor = ddi_get_instance(devi); 231 232 /* makedevice32 in high half of major number space */ 233 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 234 235 major = DEVI(devi)->devi_major; 236 } else { 237 major = getmajor(dev); 238 minor = getminor(dev); 239 240 /* makedevice32 */ 241 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 242 243 /* make ino for VCHR different than VBLK */ 244 ino <<= 1; 245 if (typ == VCHR) 246 ino |= 1; 247 } 248 249 ino += DV_ROOTINO + 1; /* skew */ 250 251 /* 252 * diagnose things a little early because adding the skew to a large 253 * minor number could roll over the major. 254 */ 255 if ((major >= (L_MAXMAJ32 >> 1)) && (warn == 0)) { 256 warn = 1; 257 cmn_err(CE_WARN, "%s: inode numbers are not unique", dvnm); 258 } 259 260 return (ino); 261 } 262 263 /* 264 * Compare two nodes lexographically to balance avl tree 265 */ 266 static int 267 dv_compare_nodes(const struct dv_node *dv1, const struct dv_node *dv2) 268 { 269 int rv; 270 271 if ((rv = strcmp(dv1->dv_name, dv2->dv_name)) == 0) 272 return (0); 273 return ((rv < 0) ? -1 : 1); 274 } 275 276 /* 277 * dv_mkroot 278 * 279 * Build the first VDIR dv_node. 280 */ 281 struct dv_node * 282 dv_mkroot(struct vfs *vfsp, dev_t devfsdev) 283 { 284 struct dv_node *dv; 285 struct vnode *vp; 286 287 ASSERT(ddi_root_node() != NULL); 288 ASSERT(dv_node_cache != NULL); 289 290 dcmn_err3(("dv_mkroot\n")); 291 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 292 vp = DVTOV(dv); 293 vn_reinit(vp); 294 vp->v_flag = VROOT; 295 vp->v_vfsp = vfsp; 296 vp->v_type = VDIR; 297 vp->v_rdev = devfsdev; 298 vn_setops(vp, dv_vnodeops); 299 vn_exists(vp); 300 301 dvroot = dv; 302 303 dv->dv_name = NULL; /* not needed */ 304 dv->dv_namelen = 0; 305 306 dv->dv_devi = ddi_root_node(); 307 308 dv->dv_ino = DV_ROOTINO; 309 dv->dv_nlink = 2; /* name + . (no dv_insert) */ 310 dv->dv_dotdot = dv; /* .. == self */ 311 dv->dv_attrvp = NULLVP; 312 dv->dv_attr = NULL; 313 dv->dv_flags = DV_BUILD; 314 dv->dv_priv = NULL; 315 dv->dv_busy = 0; 316 dv->dv_dflt_mode = 0; 317 318 avl_create(&dv->dv_entries, 319 (int (*)(const void *, const void *))dv_compare_nodes, 320 sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink)); 321 322 return (dv); 323 } 324 325 /* 326 * dv_mkdir 327 * 328 * Given an probed or attached nexus node, create a VDIR dv_node. 329 * No dv_attrvp is created at this point. 330 */ 331 struct dv_node * 332 dv_mkdir(struct dv_node *ddv, dev_info_t *devi, char *nm) 333 { 334 struct dv_node *dv; 335 struct vnode *vp; 336 size_t nmlen; 337 338 ASSERT((devi)); 339 dcmn_err4(("dv_mkdir: %s\n", nm)); 340 341 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 342 nmlen = strlen(nm) + 1; 343 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 344 bcopy(nm, dv->dv_name, nmlen); 345 dv->dv_namelen = nmlen - 1; /* '\0' not included */ 346 347 vp = DVTOV(dv); 348 vn_reinit(vp); 349 vp->v_flag = 0; 350 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 351 vp->v_type = VDIR; 352 vp->v_rdev = DVTOV(ddv)->v_rdev; 353 vn_setops(vp, vn_getops(DVTOV(ddv))); 354 vn_exists(vp); 355 356 dv->dv_devi = devi; 357 ndi_hold_devi(devi); 358 359 dv->dv_ino = dv_mkino(devi, VDIR, NODEV); 360 dv->dv_nlink = 0; /* updated on insert */ 361 dv->dv_dotdot = ddv; 362 dv->dv_attrvp = NULLVP; 363 dv->dv_attr = NULL; 364 dv->dv_flags = DV_BUILD; 365 dv->dv_priv = NULL; 366 dv->dv_busy = 0; 367 dv->dv_dflt_mode = 0; 368 369 avl_create(&dv->dv_entries, 370 (int (*)(const void *, const void *))dv_compare_nodes, 371 sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink)); 372 373 return (dv); 374 } 375 376 /* 377 * dv_mknod 378 * 379 * Given a minor node, create a VCHR or VBLK dv_node. 380 * No dv_attrvp is created at this point. 381 */ 382 static struct dv_node * 383 dv_mknod(struct dv_node *ddv, dev_info_t *devi, char *nm, 384 struct ddi_minor_data *dmd) 385 { 386 struct dv_node *dv; 387 struct vnode *vp; 388 size_t nmlen; 389 390 dcmn_err4(("dv_mknod: %s\n", nm)); 391 392 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 393 nmlen = strlen(nm) + 1; 394 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 395 bcopy(nm, dv->dv_name, nmlen); 396 dv->dv_namelen = nmlen - 1; /* no '\0' */ 397 398 vp = DVTOV(dv); 399 vn_reinit(vp); 400 vp->v_flag = 0; 401 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 402 vp->v_type = dmd->ddm_spec_type == S_IFCHR ? VCHR : VBLK; 403 vp->v_rdev = dmd->ddm_dev; 404 vn_setops(vp, vn_getops(DVTOV(ddv))); 405 vn_exists(vp); 406 407 /* increment dev_ref with devi_lock held */ 408 ASSERT(DEVI_BUSY_OWNED(devi)); 409 mutex_enter(&DEVI(devi)->devi_lock); 410 dv->dv_devi = devi; 411 DEVI(devi)->devi_ref++; /* ndi_hold_devi(dip) */ 412 mutex_exit(&DEVI(devi)->devi_lock); 413 414 dv->dv_ino = dv_mkino(devi, vp->v_type, vp->v_rdev); 415 dv->dv_nlink = 0; /* updated on insert */ 416 dv->dv_dotdot = ddv; 417 dv->dv_attrvp = NULLVP; 418 dv->dv_attr = NULL; 419 dv->dv_flags = 0; 420 421 if (dmd->type == DDM_INTERNAL_PATH) 422 dv->dv_flags |= DV_INTERNAL; 423 if (dmd->ddm_flags & DM_NO_FSPERM) 424 dv->dv_flags |= DV_NO_FSPERM; 425 426 dv->dv_priv = dmd->ddm_node_priv; 427 if (dv->dv_priv) 428 dphold(dv->dv_priv); 429 430 /* 431 * Minors created with ddi_create_priv_minor_node can specify 432 * a default mode permission other than the devfs default. 433 */ 434 if (dv->dv_priv || dv->dv_flags & DV_NO_FSPERM) { 435 dcmn_err5(("%s: dv_mknod default priv mode 0%o\n", 436 dv->dv_name, dmd->ddm_priv_mode)); 437 dv->dv_flags |= DV_DFLT_MODE; 438 dv->dv_dflt_mode = dmd->ddm_priv_mode & S_IAMB; 439 } 440 441 return (dv); 442 } 443 444 /* 445 * dv_destroy 446 * 447 * Destroy what we created in dv_mkdir or dv_mknod. 448 * In the case of a *referenced* directory, do nothing. 449 */ 450 void 451 dv_destroy(struct dv_node *dv, uint_t flags) 452 { 453 vnode_t *vp = DVTOV(dv); 454 ASSERT(dv->dv_nlink == 0); /* no references */ 455 456 dcmn_err4(("dv_destroy: %s\n", dv->dv_name)); 457 458 /* 459 * We may be asked to unlink referenced directories. 460 * In this case, there is nothing to be done. 461 * The eventual memory free will be done in 462 * devfs_inactive. 463 */ 464 if (vp->v_count != 0) { 465 ASSERT(vp->v_type == VDIR); 466 ASSERT(flags & DV_CLEAN_FORCE); 467 ASSERT(DV_STALE(dv)); 468 return; 469 } 470 471 if (vp->v_type == VDIR) { 472 ASSERT(DV_FIRST_ENTRY(dv) == NULL); 473 avl_destroy(&dv->dv_entries); 474 } 475 476 if (dv->dv_attrvp != NULLVP) 477 VN_RELE(dv->dv_attrvp); 478 if (dv->dv_attr != NULL) 479 kmem_free(dv->dv_attr, sizeof (struct vattr)); 480 if (dv->dv_name != NULL) 481 kmem_free(dv->dv_name, dv->dv_namelen + 1); 482 if (dv->dv_devi != NULL) { 483 ndi_rele_devi(dv->dv_devi); 484 } 485 if (dv->dv_priv != NULL) { 486 dpfree(dv->dv_priv); 487 } 488 489 kmem_cache_free(dv_node_cache, dv); 490 } 491 492 /* 493 * Find and hold dv_node by name 494 */ 495 static struct dv_node * 496 dv_findbyname(struct dv_node *ddv, char *nm) 497 { 498 struct dv_node *dv; 499 avl_index_t where; 500 struct dv_node dvtmp; 501 502 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 503 dcmn_err3(("dv_findbyname: %s\n", nm)); 504 505 dvtmp.dv_name = nm; 506 dv = avl_find(&ddv->dv_entries, &dvtmp, &where); 507 if (dv) { 508 ASSERT(dv->dv_dotdot == ddv); 509 ASSERT(strcmp(dv->dv_name, nm) == 0); 510 VN_HOLD(DVTOV(dv)); 511 return (dv); 512 } 513 return (NULL); 514 } 515 516 /* 517 * Inserts a new dv_node in a parent directory 518 */ 519 void 520 dv_insert(struct dv_node *ddv, struct dv_node *dv) 521 { 522 avl_index_t where; 523 524 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 525 ASSERT(DVTOV(ddv)->v_type == VDIR); 526 ASSERT(ddv->dv_nlink >= 2); 527 ASSERT(dv->dv_nlink == 0); 528 529 dcmn_err3(("dv_insert: %s\n", dv->dv_name)); 530 531 dv->dv_dotdot = ddv; 532 if (DVTOV(dv)->v_type == VDIR) { 533 ddv->dv_nlink++; /* .. to containing directory */ 534 dv->dv_nlink = 2; /* name + . */ 535 } else { 536 dv->dv_nlink = 1; /* name */ 537 } 538 539 /* enter node in the avl tree */ 540 VERIFY(avl_find(&ddv->dv_entries, dv, &where) == NULL); 541 avl_insert(&ddv->dv_entries, dv, where); 542 } 543 544 /* 545 * Unlink a dv_node from a perent directory 546 */ 547 void 548 dv_unlink(struct dv_node *ddv, struct dv_node *dv) 549 { 550 /* verify linkage of arguments */ 551 ASSERT(ddv && dv); 552 ASSERT(dv->dv_dotdot == ddv); 553 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 554 ASSERT(DVTOV(ddv)->v_type == VDIR); 555 556 dcmn_err3(("dv_unlink: %s\n", dv->dv_name)); 557 558 if (DVTOV(dv)->v_type == VDIR) { 559 ddv->dv_nlink--; /* .. to containing directory */ 560 dv->dv_nlink -= 2; /* name + . */ 561 } else { 562 dv->dv_nlink -= 1; /* name */ 563 } 564 ASSERT(ddv->dv_nlink >= 2); 565 ASSERT(dv->dv_nlink == 0); 566 567 dv->dv_dotdot = NULL; 568 569 /* remove from avl tree */ 570 avl_remove(&ddv->dv_entries, dv); 571 } 572 573 /* 574 * Merge devfs node specific information into an attribute structure. 575 * 576 * NOTE: specfs provides ATIME,MTIME,CTIME,SIZE,BLKSIZE,NBLOCKS on leaf node. 577 */ 578 void 579 dv_vattr_merge(struct dv_node *dv, struct vattr *vap) 580 { 581 struct vnode *vp = DVTOV(dv); 582 583 vap->va_nodeid = dv->dv_ino; 584 vap->va_nlink = dv->dv_nlink; 585 586 if (vp->v_type == VDIR) { 587 vap->va_rdev = 0; 588 vap->va_fsid = vp->v_rdev; 589 } else { 590 vap->va_rdev = vp->v_rdev; 591 vap->va_fsid = DVTOV(dv->dv_dotdot)->v_rdev; 592 vap->va_type = vp->v_type; 593 /* don't trust the shadow file type */ 594 vap->va_mode &= ~S_IFMT; 595 if (vap->va_type == VCHR) 596 vap->va_mode |= S_IFCHR; 597 else 598 vap->va_mode |= S_IFBLK; 599 } 600 } 601 602 /* 603 * Get default device permission by consulting rules in 604 * privilege specification in minor node and /etc/minor_perm. 605 * 606 * This function is called from the devname filesystem to get default 607 * permissions for a device exported to a non-global zone. 608 */ 609 void 610 devfs_get_defattr(struct vnode *vp, struct vattr *vap, int *no_fs_perm) 611 { 612 mperm_t mp; 613 struct dv_node *dv; 614 615 /* If vp isn't a dv_node, return something sensible */ 616 if (!vn_matchops(vp, dv_vnodeops)) { 617 if (no_fs_perm) 618 *no_fs_perm = 0; 619 *vap = dv_vattr_file; 620 return; 621 } 622 623 /* 624 * For minors not created by ddi_create_priv_minor_node(), 625 * use devfs defaults. 626 */ 627 dv = VTODV(vp); 628 if (vp->v_type == VDIR) { 629 *vap = dv_vattr_dir; 630 } else if (dv->dv_flags & DV_NO_FSPERM) { 631 if (no_fs_perm) 632 *no_fs_perm = 1; 633 *vap = dv_vattr_priv; 634 } else { 635 /* 636 * look up perm bits from minor_perm 637 */ 638 *vap = dv_vattr_file; 639 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) == 0) { 640 VATTR_MP_MERGE((*vap), mp); 641 dcmn_err5(("%s: minor perm mode 0%o\n", 642 dv->dv_name, vap->va_mode)); 643 } else if (dv->dv_flags & DV_DFLT_MODE) { 644 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 645 vap->va_mode &= ~S_IAMB; 646 vap->va_mode |= dv->dv_dflt_mode; 647 dcmn_err5(("%s: priv mode 0%o\n", 648 dv->dv_name, vap->va_mode)); 649 } 650 } 651 } 652 653 /* 654 * dv_shadow_node 655 * 656 * Given a VDIR dv_node, find/create the associated VDIR 657 * node in the shadow attribute filesystem. 658 * 659 * Given a VCHR/VBLK dv_node, find the associated VREG 660 * node in the shadow attribute filesystem. These nodes 661 * are only created to persist non-default attributes. 662 * Lack of such a node implies the default permissions 663 * are sufficient. 664 * 665 * Managing the attribute file entries is slightly tricky (mostly 666 * because we can't intercept VN_HOLD and VN_RELE except on the last 667 * release). 668 * 669 * We assert that if the dv_attrvp pointer is non-NULL, it points 670 * to a singly-held (by us) vnode that represents the shadow entry 671 * in the underlying filesystem. To avoid store-ordering issues, 672 * we assert that the pointer can only be tested under the dv_contents 673 * READERS lock. 674 */ 675 676 void 677 dv_shadow_node( 678 struct vnode *dvp, /* devfs parent directory vnode */ 679 char *nm, /* name component */ 680 struct vnode *vp, /* devfs vnode */ 681 struct pathname *pnp, /* the path .. */ 682 struct vnode *rdir, /* the root .. */ 683 struct cred *cred, /* who's asking? */ 684 int flags) /* optionally create shadow node */ 685 { 686 struct dv_node *dv; /* dv_node of named directory */ 687 struct vnode *rdvp; /* shadow parent directory vnode */ 688 struct vnode *rvp; /* shadow vnode */ 689 struct vnode *rrvp; /* realvp of shadow vnode */ 690 struct vattr vattr; 691 int create_tried; 692 int error; 693 694 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 695 dv = VTODV(vp); 696 dcmn_err3(("dv_shadow_node: name %s attr %p\n", 697 nm, (void *)dv->dv_attrvp)); 698 699 if ((flags & DV_SHADOW_WRITE_HELD) == 0) { 700 ASSERT(RW_READ_HELD(&dv->dv_contents)); 701 if (dv->dv_attrvp != NULLVP) 702 return; 703 if (!rw_tryupgrade(&dv->dv_contents)) { 704 rw_exit(&dv->dv_contents); 705 rw_enter(&dv->dv_contents, RW_WRITER); 706 if (dv->dv_attrvp != NULLVP) { 707 rw_downgrade(&dv->dv_contents); 708 return; 709 } 710 } 711 } else { 712 ASSERT(RW_WRITE_HELD(&dv->dv_contents)); 713 if (dv->dv_attrvp != NULLVP) 714 return; 715 } 716 717 ASSERT(RW_WRITE_HELD(&dv->dv_contents) && dv->dv_attrvp == NULL); 718 719 rdvp = VTODV(dvp)->dv_attrvp; 720 create_tried = 0; 721 lookup: 722 if (rdvp && (dv->dv_flags & DV_NO_FSPERM) == 0) { 723 error = VOP_LOOKUP(rdvp, nm, &rvp, pnp, LOOKUP_DIR, rdir, cred, 724 NULL, NULL, NULL); 725 726 /* factor out the snode since we only want the attribute node */ 727 if ((error == 0) && (VOP_REALVP(rvp, &rrvp, NULL) == 0)) { 728 VN_HOLD(rrvp); 729 VN_RELE(rvp); 730 rvp = rrvp; 731 } 732 } else 733 error = EROFS; /* no parent, no entry */ 734 735 /* 736 * All we want is the permissions (and maybe ACLs and 737 * extended attributes), and we want to perform lookups 738 * by name. Drivers occasionally change their minor 739 * number space. If something changes, there's no 740 * much we can do about it here. 741 */ 742 743 /* The shadow node checks out. We are done */ 744 if (error == 0) { 745 dv->dv_attrvp = rvp; /* with one hold */ 746 747 /* 748 * Determine if we have non-trivial ACLs on this node. 749 * It is not necessary to VOP_RWLOCK since fs_acl_nontrivial 750 * only does VOP_GETSECATTR. 751 */ 752 dv->dv_flags &= ~DV_ACL; 753 754 if (fs_acl_nontrivial(rvp, cred)) 755 dv->dv_flags |= DV_ACL; 756 757 /* 758 * If we have synced out the memory attributes, free 759 * them and switch back to using the persistent store. 760 */ 761 if (rvp && dv->dv_attr) { 762 kmem_free(dv->dv_attr, sizeof (struct vattr)); 763 dv->dv_attr = NULL; 764 } 765 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 766 rw_downgrade(&dv->dv_contents); 767 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 768 return; 769 } 770 771 /* 772 * Failed to find attribute in persistent backing store, 773 * get default permission bits. 774 */ 775 devfs_get_defattr(vp, &vattr, NULL); 776 777 dv_vattr_merge(dv, &vattr); 778 gethrestime(&vattr.va_atime); 779 vattr.va_mtime = vattr.va_atime; 780 vattr.va_ctime = vattr.va_atime; 781 782 /* 783 * Try to create shadow dir. This is necessary in case 784 * we need to create a shadow leaf node later, when user 785 * executes chmod. 786 */ 787 if ((error == ENOENT) && !create_tried) { 788 switch (vp->v_type) { 789 case VDIR: 790 error = VOP_MKDIR(rdvp, nm, &vattr, &rvp, kcred, 791 NULL, 0, NULL); 792 dsysdebug(error, ("vop_mkdir %s %s %d\n", 793 VTODV(dvp)->dv_name, nm, error)); 794 create_tried = 1; 795 break; 796 797 case VCHR: 798 case VBLK: 799 /* 800 * Shadow nodes are only created on demand 801 */ 802 if (flags & DV_SHADOW_CREATE) { 803 error = VOP_CREATE(rdvp, nm, &vattr, NONEXCL, 804 VREAD|VWRITE, &rvp, kcred, 0, NULL, NULL); 805 dsysdebug(error, ("vop_create %s %s %d\n", 806 VTODV(dvp)->dv_name, nm, error)); 807 create_tried = 1; 808 } 809 break; 810 811 default: 812 cmn_err(CE_PANIC, "devfs: %s: create", dvnm); 813 /*NOTREACHED*/ 814 } 815 816 if (create_tried && 817 (error == 0) || (error == EEXIST)) { 818 VN_RELE(rvp); 819 goto lookup; 820 } 821 } 822 823 /* Store attribute in memory */ 824 if (dv->dv_attr == NULL) { 825 dv->dv_attr = kmem_alloc(sizeof (struct vattr), KM_SLEEP); 826 *(dv->dv_attr) = vattr; 827 } 828 829 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 830 rw_downgrade(&dv->dv_contents); 831 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 832 } 833 834 /* 835 * Given a devinfo node, and a name, returns the appropriate 836 * minor information for that named node, if it exists. 837 */ 838 static int 839 dv_find_leafnode(dev_info_t *devi, char *minor_nm, struct ddi_minor_data *r_mi) 840 { 841 struct ddi_minor_data *dmd; 842 843 ASSERT(i_ddi_devi_attached(devi)); 844 845 dcmn_err3(("dv_find_leafnode: %s\n", minor_nm)); 846 ASSERT(DEVI_BUSY_OWNED(devi)); 847 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 848 849 /* 850 * Skip alias nodes and nodes without a name. 851 */ 852 if ((dmd->type == DDM_ALIAS) || (dmd->ddm_name == NULL)) 853 continue; 854 855 dcmn_err4(("dv_find_leafnode: (%s,%s)\n", 856 minor_nm, dmd->ddm_name)); 857 if (strcmp(minor_nm, dmd->ddm_name) == 0) { 858 r_mi->ddm_dev = dmd->ddm_dev; 859 r_mi->ddm_spec_type = dmd->ddm_spec_type; 860 r_mi->type = dmd->type; 861 r_mi->ddm_flags = dmd->ddm_flags; 862 r_mi->ddm_node_priv = dmd->ddm_node_priv; 863 r_mi->ddm_priv_mode = dmd->ddm_priv_mode; 864 if (r_mi->ddm_node_priv) 865 dphold(r_mi->ddm_node_priv); 866 return (0); 867 } 868 } 869 870 dcmn_err3(("dv_find_leafnode: %s: ENOENT\n", minor_nm)); 871 return (ENOENT); 872 } 873 874 /* 875 * Special handling for clone node: 876 * Clone minor name is a driver name, the minor number will 877 * be the major number of the driver. There is no minor 878 * node under the clone driver, so we'll manufacture the 879 * dev_t. 880 */ 881 static struct dv_node * 882 dv_clone_mknod(struct dv_node *ddv, char *drvname) 883 { 884 major_t major; 885 struct dv_node *dvp; 886 char *devnm; 887 struct ddi_minor_data *dmd; 888 889 /* 890 * Make sure drvname is a STREAMS driver. We load the driver, 891 * but don't attach to any instances. This makes stat(2) 892 * relatively cheap. 893 */ 894 major = ddi_name_to_major(drvname); 895 if (major == DDI_MAJOR_T_NONE) 896 return (NULL); 897 898 if (ddi_hold_driver(major) == NULL) 899 return (NULL); 900 901 if (STREAMSTAB(major) == NULL) { 902 ddi_rele_driver(major); 903 return (NULL); 904 } 905 906 ddi_rele_driver(major); 907 devnm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 908 (void) snprintf(devnm, MAXNAMELEN, "clone@0:%s", drvname); 909 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 910 dmd->ddm_dev = makedevice(clone_major, (minor_t)major); 911 dmd->ddm_spec_type = S_IFCHR; 912 dvp = dv_mknod(ddv, clone_dip, devnm, dmd); 913 kmem_free(dmd, sizeof (*dmd)); 914 kmem_free(devnm, MAXNAMELEN); 915 return (dvp); 916 } 917 918 /* 919 * Given the parent directory node, and a name in it, returns the 920 * named dv_node to the caller (as a vnode). 921 * 922 * (We need pnp and rdir for doing shadow lookups; they can be NULL) 923 */ 924 int 925 dv_find(struct dv_node *ddv, char *nm, struct vnode **vpp, struct pathname *pnp, 926 struct vnode *rdir, struct cred *cred, uint_t ndi_flags) 927 { 928 extern int isminiroot; /* see modctl.c */ 929 930 int circ; 931 int rv = 0, was_busy = 0, nmlen, write_held = 0; 932 struct vnode *vp; 933 struct dv_node *dv, *dup; 934 dev_info_t *pdevi, *devi = NULL; 935 char *mnm; 936 struct ddi_minor_data *dmd; 937 938 dcmn_err3(("dv_find %s\n", nm)); 939 940 rw_enter(&ddv->dv_contents, RW_READER); 941 start: 942 if (DV_STALE(ddv)) { 943 rw_exit(&ddv->dv_contents); 944 return (ESTALE); 945 } 946 947 /* 948 * Empty name or ., return node itself. 949 */ 950 nmlen = strlen(nm); 951 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 952 *vpp = DVTOV(ddv); 953 rw_exit(&ddv->dv_contents); 954 VN_HOLD(*vpp); 955 return (0); 956 } 957 958 /* 959 * .., return the parent directory 960 */ 961 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 962 *vpp = DVTOV(ddv->dv_dotdot); 963 rw_exit(&ddv->dv_contents); 964 VN_HOLD(*vpp); 965 return (0); 966 } 967 968 /* 969 * Fail anything without a valid device name component 970 */ 971 if (nm[0] == '@' || nm[0] == ':') { 972 dcmn_err3(("devfs: no driver '%s'\n", nm)); 973 rw_exit(&ddv->dv_contents); 974 return (ENOENT); 975 } 976 977 /* 978 * So, now we have to deal with the trickier stuff. 979 * 980 * (a) search the existing list of dv_nodes on this directory 981 */ 982 if ((dv = dv_findbyname(ddv, nm)) != NULL) { 983 founddv: 984 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 985 986 if (!rw_tryenter(&dv->dv_contents, RW_READER)) { 987 if (tsd_get(devfs_clean_key)) { 988 VN_RELE(DVTOV(dv)); 989 rw_exit(&ddv->dv_contents); 990 return (EBUSY); 991 } 992 rw_enter(&dv->dv_contents, RW_READER); 993 } 994 995 vp = DVTOV(dv); 996 if ((dv->dv_attrvp != NULLVP) || 997 (vp->v_type != VDIR && dv->dv_attr != NULL)) { 998 /* 999 * Common case - we already have attributes 1000 */ 1001 rw_exit(&dv->dv_contents); 1002 rw_exit(&ddv->dv_contents); 1003 goto found; 1004 } 1005 1006 /* 1007 * No attribute vp, try and build one. 1008 * 1009 * dv_shadow_node() can briefly drop &dv->dv_contents lock 1010 * if it is unable to upgrade it to a write lock. If the 1011 * current thread has come in through the bottom-up device 1012 * configuration devfs_clean() path, we may deadlock against 1013 * a thread performing top-down device configuration if it 1014 * grabs the contents lock. To avoid this, when we are on the 1015 * devfs_clean() path we attempt to upgrade the dv_contents 1016 * lock before we call dv_shadow_node(). 1017 */ 1018 if (tsd_get(devfs_clean_key)) { 1019 if (!rw_tryupgrade(&dv->dv_contents)) { 1020 VN_RELE(DVTOV(dv)); 1021 rw_exit(&dv->dv_contents); 1022 rw_exit(&ddv->dv_contents); 1023 return (EBUSY); 1024 } 1025 1026 write_held = DV_SHADOW_WRITE_HELD; 1027 } 1028 1029 dv_shadow_node(DVTOV(ddv), nm, vp, pnp, rdir, cred, 1030 write_held); 1031 1032 rw_exit(&dv->dv_contents); 1033 rw_exit(&ddv->dv_contents); 1034 goto found; 1035 } 1036 1037 /* 1038 * (b) Search the child devinfo nodes of our parent directory, 1039 * looking for the named node. If we find it, build a new 1040 * node, then grab the writers lock, search the directory 1041 * if it's still not there, then insert it. 1042 * 1043 * We drop the devfs locks before accessing the device tree. 1044 * Take care to mark the node BUSY so that a forced devfs_clean 1045 * doesn't mark the directory node stale. 1046 * 1047 * Also, check if we are called as part of devfs_clean or 1048 * reset_perm. If so, simply return not found because there 1049 * is nothing to clean. 1050 */ 1051 if (tsd_get(devfs_clean_key)) { 1052 rw_exit(&ddv->dv_contents); 1053 return (ENOENT); 1054 } 1055 1056 /* 1057 * We could be either READ or WRITE locked at 1058 * this point. Upgrade if we are read locked. 1059 */ 1060 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 1061 if (rw_read_locked(&ddv->dv_contents) && 1062 !rw_tryupgrade(&ddv->dv_contents)) { 1063 rw_exit(&ddv->dv_contents); 1064 rw_enter(&ddv->dv_contents, RW_WRITER); 1065 /* 1066 * Things may have changed when we dropped 1067 * the contents lock, so start from top again 1068 */ 1069 goto start; 1070 } 1071 ddv->dv_busy++; /* mark busy before dropping lock */ 1072 was_busy++; 1073 rw_exit(&ddv->dv_contents); 1074 1075 pdevi = ddv->dv_devi; 1076 ASSERT(pdevi != NULL); 1077 1078 mnm = strchr(nm, ':'); 1079 if (mnm) 1080 *mnm = (char)0; 1081 1082 /* 1083 * Configure one nexus child, will call nexus's bus_ops 1084 * If successful, devi is held upon returning. 1085 * Note: devfs lookup should not be configuring grandchildren. 1086 */ 1087 ASSERT((ndi_flags & NDI_CONFIG) == 0); 1088 1089 rv = ndi_devi_config_one(pdevi, nm, &devi, ndi_flags | NDI_NO_EVENT); 1090 if (mnm) 1091 *mnm = ':'; 1092 if (rv != NDI_SUCCESS) { 1093 rv = ENOENT; 1094 goto notfound; 1095 } 1096 1097 /* 1098 * If we configured a hidden node, consider it notfound. 1099 */ 1100 if (ndi_dev_is_hidden_node(devi)) { 1101 ndi_rele_devi(devi); 1102 rv = ENOENT; 1103 goto notfound; 1104 } 1105 1106 /* 1107 * Don't make vhci clients visible under phci, unless we 1108 * are in miniroot. 1109 */ 1110 if (isminiroot == 0 && ddi_get_parent(devi) != pdevi) { 1111 ndi_rele_devi(devi); 1112 rv = ENOENT; 1113 goto notfound; 1114 } 1115 1116 ASSERT(devi && i_ddi_devi_attached(devi)); 1117 1118 /* 1119 * Invalidate cache to notice newly created minor nodes. 1120 */ 1121 rw_enter(&ddv->dv_contents, RW_WRITER); 1122 ddv->dv_flags |= DV_BUILD; 1123 rw_exit(&ddv->dv_contents); 1124 1125 /* 1126 * mkdir for nexus drivers and leaf nodes as well. If we are racing 1127 * and create a duplicate, the duplicate will be destroyed below. 1128 */ 1129 if (mnm == NULL) { 1130 dv = dv_mkdir(ddv, devi, nm); 1131 } else { 1132 /* 1133 * Allocate dmd first to avoid KM_SLEEP with active 1134 * ndi_devi_enter. 1135 */ 1136 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 1137 ndi_devi_enter(devi, &circ); 1138 if (devi == clone_dip) { 1139 /* 1140 * For clone minors, load the driver indicated by 1141 * minor name. 1142 */ 1143 dv = dv_clone_mknod(ddv, mnm + 1); 1144 } else { 1145 /* 1146 * Find minor node and make a dv_node 1147 */ 1148 if (dv_find_leafnode(devi, mnm + 1, dmd) == 0) { 1149 dv = dv_mknod(ddv, devi, nm, dmd); 1150 if (dmd->ddm_node_priv) 1151 dpfree(dmd->ddm_node_priv); 1152 } 1153 } 1154 ndi_devi_exit(devi, circ); 1155 kmem_free(dmd, sizeof (*dmd)); 1156 } 1157 /* 1158 * Release hold from ndi_devi_config_one() 1159 */ 1160 ndi_rele_devi(devi); 1161 1162 if (dv == NULL) { 1163 rv = ENOENT; 1164 goto notfound; 1165 } 1166 1167 /* 1168 * We have released the dv_contents lock, need to check 1169 * if another thread already created a duplicate node 1170 */ 1171 rw_enter(&ddv->dv_contents, RW_WRITER); 1172 if ((dup = dv_findbyname(ddv, nm)) == NULL) { 1173 dv_insert(ddv, dv); 1174 } else { 1175 /* 1176 * Duplicate found, use the existing node 1177 */ 1178 VN_RELE(DVTOV(dv)); 1179 dv_destroy(dv, 0); 1180 dv = dup; 1181 } 1182 goto founddv; 1183 /*NOTREACHED*/ 1184 1185 found: 1186 /* 1187 * Fail lookup of device that has now become hidden (typically via 1188 * hot removal of open device). 1189 */ 1190 if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi)) { 1191 dcmn_err2(("dv_find: nm %s failed: hidden/removed\n", nm)); 1192 VN_RELE(vp); 1193 rv = ENOENT; 1194 goto notfound; 1195 } 1196 1197 /* 1198 * Skip non-kernel lookups of internal nodes. 1199 * This use of kcred to distinguish between user and 1200 * internal kernel lookups is unfortunate. The information 1201 * provided by the seg argument to lookupnameat should 1202 * evolve into a lookup flag for filesystems that need 1203 * this distinction. 1204 */ 1205 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) { 1206 dcmn_err2(("dv_find: nm %s failed: internal\n", nm)); 1207 VN_RELE(vp); 1208 rv = ENOENT; 1209 goto notfound; 1210 } 1211 1212 dcmn_err2(("dv_find: returning vp for nm %s\n", nm)); 1213 if (vp->v_type == VCHR || vp->v_type == VBLK) { 1214 /* 1215 * If vnode is a device, return special vnode instead 1216 * (though it knows all about -us- via sp->s_realvp, 1217 * sp->s_devvp, and sp->s_dip) 1218 */ 1219 *vpp = specvp_devfs(vp, vp->v_rdev, vp->v_type, cred, 1220 dv->dv_devi); 1221 VN_RELE(vp); 1222 if (*vpp == NULLVP) 1223 rv = ENOSYS; 1224 } else 1225 *vpp = vp; 1226 1227 notfound: 1228 rw_enter(&ddv->dv_contents, RW_WRITER); 1229 if (was_busy) 1230 ddv->dv_busy--; 1231 rw_exit(&ddv->dv_contents); 1232 return (rv); 1233 } 1234 1235 /* 1236 * The given directory node is out-of-date; that is, it has been 1237 * marked as needing to be rebuilt, possibly because some new devinfo 1238 * node has come into existence, or possibly because this is the first 1239 * time we've been here. 1240 */ 1241 void 1242 dv_filldir(struct dv_node *ddv) 1243 { 1244 struct dv_node *dv; 1245 dev_info_t *devi, *pdevi; 1246 struct ddi_minor_data *dmd; 1247 char devnm[MAXNAMELEN]; 1248 int circ, ccirc; 1249 1250 ASSERT(DVTOV(ddv)->v_type == VDIR); 1251 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 1252 ASSERT(ddv->dv_flags & DV_BUILD); 1253 1254 dcmn_err3(("dv_filldir: %s\n", ddv->dv_name)); 1255 if (DV_STALE(ddv)) 1256 return; 1257 pdevi = ddv->dv_devi; 1258 1259 if (ndi_devi_config(pdevi, NDI_NO_EVENT) != NDI_SUCCESS) { 1260 dcmn_err3(("dv_filldir: config error %s\n", ddv->dv_name)); 1261 } 1262 1263 ndi_devi_enter(pdevi, &circ); 1264 for (devi = ddi_get_child(pdevi); devi; 1265 devi = ddi_get_next_sibling(devi)) { 1266 if (i_ddi_node_state(devi) < DS_INITIALIZED) 1267 continue; 1268 1269 /* skip hidden nodes */ 1270 if (ndi_dev_is_hidden_node(devi)) 1271 continue; 1272 1273 dcmn_err3(("dv_filldir: node %s\n", ddi_node_name(devi))); 1274 1275 ndi_devi_enter(devi, &ccirc); 1276 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 1277 char *addr; 1278 1279 /* 1280 * Skip alias nodes, internal nodes, and nodes 1281 * without a name. We allow DDM_DEFAULT nodes 1282 * to appear in readdir. 1283 */ 1284 if ((dmd->type == DDM_ALIAS) || 1285 (dmd->type == DDM_INTERNAL_PATH) || 1286 (dmd->ddm_name == NULL)) 1287 continue; 1288 1289 addr = ddi_get_name_addr(devi); 1290 if (addr && *addr) 1291 (void) sprintf(devnm, "%s@%s:%s", 1292 ddi_node_name(devi), addr, dmd->ddm_name); 1293 else 1294 (void) sprintf(devnm, "%s:%s", 1295 ddi_node_name(devi), dmd->ddm_name); 1296 1297 if ((dv = dv_findbyname(ddv, devnm)) != NULL) { 1298 /* dv_node already exists */ 1299 VN_RELE(DVTOV(dv)); 1300 continue; 1301 } 1302 1303 dv = dv_mknod(ddv, devi, devnm, dmd); 1304 dv_insert(ddv, dv); 1305 VN_RELE(DVTOV(dv)); 1306 } 1307 ndi_devi_exit(devi, ccirc); 1308 1309 (void) ddi_deviname(devi, devnm); 1310 if ((dv = dv_findbyname(ddv, devnm + 1)) == NULL) { 1311 /* directory doesn't exist */ 1312 dv = dv_mkdir(ddv, devi, devnm + 1); 1313 dv_insert(ddv, dv); 1314 } 1315 VN_RELE(DVTOV(dv)); 1316 } 1317 ndi_devi_exit(pdevi, circ); 1318 1319 ddv->dv_flags &= ~DV_BUILD; 1320 } 1321 1322 /* 1323 * Given a directory node, clean out all the nodes beneath. 1324 * 1325 * VDIR: Reinvoke to clean them, then delete the directory. 1326 * VCHR, VBLK: Just blow them away. 1327 * 1328 * Mark the directories touched as in need of a rebuild, in case 1329 * we fall over part way through. When DV_CLEAN_FORCE is specified, 1330 * we mark referenced empty directories as stale to facilitate DR. 1331 */ 1332 int 1333 dv_cleandir(struct dv_node *ddv, char *devnm, uint_t flags) 1334 { 1335 struct dv_node *dv; 1336 struct dv_node *next; 1337 struct vnode *vp; 1338 int busy = 0; 1339 1340 /* 1341 * We should always be holding the tsd_clean_key here: dv_cleandir() 1342 * will be called as a result of a devfs_clean request and the 1343 * tsd_clean_key will be set in either in devfs_clean() itself or in 1344 * devfs_clean_vhci(). 1345 * 1346 * Since we are on the devfs_clean path, we return EBUSY if we cannot 1347 * get the contents lock: if we blocked here we might deadlock against 1348 * a thread performing top-down device configuration. 1349 */ 1350 ASSERT(tsd_get(devfs_clean_key)); 1351 1352 dcmn_err3(("dv_cleandir: %s\n", ddv->dv_name)); 1353 1354 if (!(flags & DV_CLEANDIR_LCK) && 1355 !rw_tryenter(&ddv->dv_contents, RW_WRITER)) 1356 return (EBUSY); 1357 1358 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = next) { 1359 next = DV_NEXT_ENTRY(ddv, dv); 1360 1361 /* 1362 * If devnm is specified, the non-minor portion of the 1363 * name must match devnm. 1364 */ 1365 if (devnm && 1366 (strncmp(devnm, dv->dv_name, strlen(devnm)) || 1367 (dv->dv_name[strlen(devnm)] != ':' && 1368 dv->dv_name[strlen(devnm)] != '\0'))) 1369 continue; 1370 1371 /* check type of what we are cleaning */ 1372 vp = DVTOV(dv); 1373 if (vp->v_type == VDIR) { 1374 /* recurse on directories */ 1375 rw_enter(&dv->dv_contents, RW_WRITER); 1376 if (dv_cleandir(dv, NULL, 1377 flags | DV_CLEANDIR_LCK) == EBUSY) { 1378 rw_exit(&dv->dv_contents); 1379 goto set_busy; 1380 } 1381 1382 /* A clean directory is an empty directory... */ 1383 ASSERT(dv->dv_nlink == 2); 1384 mutex_enter(&vp->v_lock); 1385 if (vp->v_count > 0) { 1386 /* 1387 * ... but an empty directory can still have 1388 * references to it. If we have dv_busy or 1389 * DV_CLEAN_FORCE is *not* specified then a 1390 * referenced directory is considered busy. 1391 */ 1392 if (dv->dv_busy || !(flags & DV_CLEAN_FORCE)) { 1393 mutex_exit(&vp->v_lock); 1394 rw_exit(&dv->dv_contents); 1395 goto set_busy; 1396 } 1397 1398 /* 1399 * Mark referenced directory stale so that DR 1400 * will succeed even if a shell has 1401 * /devices/xxx as current directory (causing 1402 * VN_HOLD reference to an empty directory). 1403 */ 1404 ASSERT(!DV_STALE(dv)); 1405 ndi_rele_devi(dv->dv_devi); 1406 dv->dv_devi = NULL; /* mark DV_STALE */ 1407 } 1408 } else { 1409 ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK)); 1410 ASSERT(dv->dv_nlink == 1); /* no hard links */ 1411 mutex_enter(&vp->v_lock); 1412 if (vp->v_count > 0) { 1413 mutex_exit(&vp->v_lock); 1414 goto set_busy; 1415 } 1416 } 1417 1418 /* unlink from directory */ 1419 dv_unlink(ddv, dv); 1420 1421 /* drop locks */ 1422 mutex_exit(&vp->v_lock); 1423 if (vp->v_type == VDIR) 1424 rw_exit(&dv->dv_contents); 1425 1426 /* destroy vnode if ref count is zero */ 1427 if (vp->v_count == 0) 1428 dv_destroy(dv, flags); 1429 1430 continue; 1431 1432 /* 1433 * If devnm is not NULL we return immediately on busy, 1434 * otherwise we continue destroying unused dv_node's. 1435 */ 1436 set_busy: busy++; 1437 if (devnm) 1438 break; 1439 } 1440 1441 /* 1442 * This code may be invoked to inform devfs that a new node has 1443 * been created in the kernel device tree. So we always set 1444 * the DV_BUILD flag to allow the next dv_filldir() to pick 1445 * the new devinfo nodes. 1446 */ 1447 ddv->dv_flags |= DV_BUILD; 1448 1449 if (!(flags & DV_CLEANDIR_LCK)) 1450 rw_exit(&ddv->dv_contents); 1451 1452 return (busy ? EBUSY : 0); 1453 } 1454 1455 /* 1456 * Walk through the devfs hierarchy, correcting the permissions of 1457 * devices with default permissions that do not match those specified 1458 * by minor perm. This can only be done for all drivers for now. 1459 */ 1460 static int 1461 dv_reset_perm_dir(struct dv_node *ddv, uint_t flags) 1462 { 1463 struct dv_node *dv; 1464 struct vnode *vp; 1465 int retval = 0; 1466 struct vattr *attrp; 1467 mperm_t mp; 1468 char *nm; 1469 uid_t old_uid; 1470 gid_t old_gid; 1471 mode_t old_mode; 1472 1473 rw_enter(&ddv->dv_contents, RW_WRITER); 1474 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) { 1475 int error = 0; 1476 nm = dv->dv_name; 1477 1478 rw_enter(&dv->dv_contents, RW_READER); 1479 vp = DVTOV(dv); 1480 if (vp->v_type == VDIR) { 1481 rw_exit(&dv->dv_contents); 1482 if (dv_reset_perm_dir(dv, flags) != 0) { 1483 error = EBUSY; 1484 } 1485 } else { 1486 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 1487 1488 /* 1489 * Check for permissions from minor_perm 1490 * If there are none, we're done 1491 */ 1492 rw_exit(&dv->dv_contents); 1493 if (dev_minorperm(dv->dv_devi, nm, &mp) != 0) 1494 continue; 1495 1496 rw_enter(&dv->dv_contents, RW_READER); 1497 1498 /* 1499 * Allow a node's permissions to be altered 1500 * permanently from the defaults by chmod, 1501 * using the shadow node as backing store. 1502 * Otherwise, update node to minor_perm permissions. 1503 */ 1504 if (dv->dv_attrvp == NULLVP) { 1505 /* 1506 * No attribute vp, try to find one. 1507 */ 1508 dv_shadow_node(DVTOV(ddv), nm, vp, 1509 NULL, NULLVP, kcred, 0); 1510 } 1511 if (dv->dv_attrvp != NULLVP || dv->dv_attr == NULL) { 1512 rw_exit(&dv->dv_contents); 1513 continue; 1514 } 1515 1516 attrp = dv->dv_attr; 1517 1518 if (VATTRP_MP_CMP(attrp, mp) == 0) { 1519 dcmn_err5(("%s: no perm change: " 1520 "%d %d 0%o\n", nm, attrp->va_uid, 1521 attrp->va_gid, attrp->va_mode)); 1522 rw_exit(&dv->dv_contents); 1523 continue; 1524 } 1525 1526 old_uid = attrp->va_uid; 1527 old_gid = attrp->va_gid; 1528 old_mode = attrp->va_mode; 1529 1530 VATTRP_MP_MERGE(attrp, mp); 1531 mutex_enter(&vp->v_lock); 1532 if (vp->v_count > 0) { 1533 error = EBUSY; 1534 } 1535 mutex_exit(&vp->v_lock); 1536 1537 dcmn_err5(("%s: perm %d/%d/0%o -> %d/%d/0%o (%d)\n", 1538 nm, old_uid, old_gid, old_mode, attrp->va_uid, 1539 attrp->va_gid, attrp->va_mode, error)); 1540 1541 rw_exit(&dv->dv_contents); 1542 } 1543 1544 if (error != 0) { 1545 retval = error; 1546 } 1547 } 1548 1549 ddv->dv_flags |= DV_BUILD; 1550 1551 rw_exit(&ddv->dv_contents); 1552 1553 return (retval); 1554 } 1555 1556 int 1557 devfs_reset_perm(uint_t flags) 1558 { 1559 struct dv_node *dvp; 1560 int rval; 1561 1562 if ((dvp = devfs_dip_to_dvnode(ddi_root_node())) == NULL) 1563 return (0); 1564 1565 VN_HOLD(DVTOV(dvp)); 1566 rval = dv_reset_perm_dir(dvp, flags); 1567 VN_RELE(DVTOV(dvp)); 1568 return (rval); 1569 } 1570 1571 /* 1572 * Clean up dangling devfs shadow nodes for removed 1573 * drivers so that, in the event the driver is re-added 1574 * to the system, newly created nodes won't incorrectly 1575 * pick up these stale shadow node permissions. 1576 * 1577 * This is accomplished by walking down the pathname 1578 * to the directory, starting at the root's attribute 1579 * node, then removing all minors matching the specified 1580 * node name. Care must be taken to remove all entries 1581 * in a directory before the directory itself, so that 1582 * the clean-up associated with rem_drv'ing a nexus driver 1583 * does not inadvertently result in an inconsistent 1584 * filesystem underlying devfs. 1585 */ 1586 1587 static int 1588 devfs_remdrv_rmdir(vnode_t *dirvp, const char *dir, vnode_t *rvp) 1589 { 1590 int error; 1591 vnode_t *vp; 1592 int eof; 1593 struct iovec iov; 1594 struct uio uio; 1595 struct dirent64 *dp; 1596 dirent64_t *dbuf; 1597 size_t dlen; 1598 size_t dbuflen; 1599 int ndirents = 64; 1600 char *nm; 1601 1602 VN_HOLD(dirvp); 1603 1604 dlen = ndirents * (sizeof (*dbuf)); 1605 dbuf = kmem_alloc(dlen, KM_SLEEP); 1606 1607 uio.uio_iov = &iov; 1608 uio.uio_iovcnt = 1; 1609 uio.uio_segflg = UIO_SYSSPACE; 1610 uio.uio_fmode = 0; 1611 uio.uio_extflg = UIO_COPY_CACHED; 1612 uio.uio_loffset = 0; 1613 uio.uio_llimit = MAXOFFSET_T; 1614 1615 eof = 0; 1616 error = 0; 1617 while (!error && !eof) { 1618 uio.uio_resid = dlen; 1619 iov.iov_base = (char *)dbuf; 1620 iov.iov_len = dlen; 1621 1622 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1623 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1624 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1625 1626 dbuflen = dlen - uio.uio_resid; 1627 1628 if (error || dbuflen == 0) 1629 break; 1630 1631 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1632 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1633 1634 nm = dp->d_name; 1635 1636 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1637 continue; 1638 1639 error = VOP_LOOKUP(dirvp, nm, 1640 &vp, NULL, 0, NULL, kcred, NULL, NULL, NULL); 1641 1642 dsysdebug(error, 1643 ("rem_drv %s/%s lookup (%d)\n", 1644 dir, nm, error)); 1645 1646 if (error) 1647 continue; 1648 1649 ASSERT(vp->v_type == VDIR || 1650 vp->v_type == VCHR || vp->v_type == VBLK); 1651 1652 if (vp->v_type == VDIR) { 1653 error = devfs_remdrv_rmdir(vp, nm, rvp); 1654 if (error == 0) { 1655 error = VOP_RMDIR(dirvp, 1656 (char *)nm, rvp, kcred, NULL, 0); 1657 dsysdebug(error, 1658 ("rem_drv %s/%s rmdir (%d)\n", 1659 dir, nm, error)); 1660 } 1661 } else { 1662 error = VOP_REMOVE(dirvp, (char *)nm, kcred, 1663 NULL, 0); 1664 dsysdebug(error, 1665 ("rem_drv %s/%s remove (%d)\n", 1666 dir, nm, error)); 1667 } 1668 1669 VN_RELE(vp); 1670 if (error) { 1671 goto exit; 1672 } 1673 } 1674 } 1675 1676 exit: 1677 VN_RELE(dirvp); 1678 kmem_free(dbuf, dlen); 1679 1680 return (error); 1681 } 1682 1683 int 1684 devfs_remdrv_cleanup(const char *dir, const char *nodename) 1685 { 1686 int error; 1687 vnode_t *vp; 1688 vnode_t *dirvp; 1689 int eof; 1690 struct iovec iov; 1691 struct uio uio; 1692 struct dirent64 *dp; 1693 dirent64_t *dbuf; 1694 size_t dlen; 1695 size_t dbuflen; 1696 int ndirents = 64; 1697 int nodenamelen = strlen(nodename); 1698 char *nm; 1699 struct pathname pn; 1700 vnode_t *rvp; /* root node of the underlying attribute fs */ 1701 1702 dcmn_err5(("devfs_remdrv_cleanup: %s %s\n", dir, nodename)); 1703 1704 if (error = pn_get((char *)dir, UIO_SYSSPACE, &pn)) 1705 return (0); 1706 1707 rvp = dvroot->dv_attrvp; 1708 ASSERT(rvp != NULL); 1709 VN_HOLD(rvp); 1710 1711 pn_skipslash(&pn); 1712 dirvp = rvp; 1713 VN_HOLD(dirvp); 1714 1715 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 1716 1717 while (pn_pathleft(&pn)) { 1718 ASSERT(dirvp->v_type == VDIR); 1719 (void) pn_getcomponent(&pn, nm); 1720 ASSERT((strcmp(nm, ".") != 0) && (strcmp(nm, "..") != 0)); 1721 error = VOP_LOOKUP(dirvp, nm, &vp, NULL, 0, rvp, kcred, 1722 NULL, NULL, NULL); 1723 if (error) { 1724 dcmn_err5(("remdrv_cleanup %s lookup error %d\n", 1725 nm, error)); 1726 VN_RELE(dirvp); 1727 if (dirvp != rvp) 1728 VN_RELE(rvp); 1729 pn_free(&pn); 1730 kmem_free(nm, MAXNAMELEN); 1731 return (0); 1732 } 1733 VN_RELE(dirvp); 1734 dirvp = vp; 1735 pn_skipslash(&pn); 1736 } 1737 1738 ASSERT(dirvp->v_type == VDIR); 1739 if (dirvp != rvp) 1740 VN_RELE(rvp); 1741 pn_free(&pn); 1742 kmem_free(nm, MAXNAMELEN); 1743 1744 dlen = ndirents * (sizeof (*dbuf)); 1745 dbuf = kmem_alloc(dlen, KM_SLEEP); 1746 1747 uio.uio_iov = &iov; 1748 uio.uio_iovcnt = 1; 1749 uio.uio_segflg = UIO_SYSSPACE; 1750 uio.uio_fmode = 0; 1751 uio.uio_extflg = UIO_COPY_CACHED; 1752 uio.uio_loffset = 0; 1753 uio.uio_llimit = MAXOFFSET_T; 1754 1755 eof = 0; 1756 error = 0; 1757 while (!error && !eof) { 1758 uio.uio_resid = dlen; 1759 iov.iov_base = (char *)dbuf; 1760 iov.iov_len = dlen; 1761 1762 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1763 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1764 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1765 1766 dbuflen = dlen - uio.uio_resid; 1767 1768 if (error || dbuflen == 0) 1769 break; 1770 1771 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1772 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1773 1774 nm = dp->d_name; 1775 1776 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1777 continue; 1778 1779 if (strncmp(nm, nodename, nodenamelen) != 0) 1780 continue; 1781 1782 error = VOP_LOOKUP(dirvp, nm, &vp, 1783 NULL, 0, NULL, kcred, NULL, NULL, NULL); 1784 1785 dsysdebug(error, 1786 ("rem_drv %s/%s lookup (%d)\n", 1787 dir, nm, error)); 1788 1789 if (error) 1790 continue; 1791 1792 ASSERT(vp->v_type == VDIR || 1793 vp->v_type == VCHR || vp->v_type == VBLK); 1794 1795 if (vp->v_type == VDIR) { 1796 error = devfs_remdrv_rmdir(vp, nm, rvp); 1797 if (error == 0) { 1798 error = VOP_RMDIR(dirvp, (char *)nm, 1799 rvp, kcred, NULL, 0); 1800 dsysdebug(error, 1801 ("rem_drv %s/%s rmdir (%d)\n", 1802 dir, nm, error)); 1803 } 1804 } else { 1805 error = VOP_REMOVE(dirvp, (char *)nm, kcred, 1806 NULL, 0); 1807 dsysdebug(error, 1808 ("rem_drv %s/%s remove (%d)\n", 1809 dir, nm, error)); 1810 } 1811 1812 VN_RELE(vp); 1813 if (error) 1814 goto exit; 1815 } 1816 } 1817 1818 exit: 1819 VN_RELE(dirvp); 1820 1821 kmem_free(dbuf, dlen); 1822 1823 return (0); 1824 } 1825 1826 struct dv_list { 1827 struct dv_node *dv; 1828 struct dv_list *next; 1829 }; 1830 1831 void 1832 dv_walk( 1833 struct dv_node *ddv, 1834 char *devnm, 1835 void (*callback)(struct dv_node *, void *), 1836 void *arg) 1837 { 1838 struct vnode *dvp; 1839 struct dv_node *dv; 1840 struct dv_list *head, *tail, *next; 1841 int len; 1842 1843 dcmn_err3(("dv_walk: ddv = %s, devnm = %s\n", 1844 ddv->dv_name, devnm ? devnm : "<null>")); 1845 1846 dvp = DVTOV(ddv); 1847 1848 ASSERT(dvp->v_type == VDIR); 1849 1850 head = tail = next = NULL; 1851 1852 rw_enter(&ddv->dv_contents, RW_READER); 1853 mutex_enter(&dvp->v_lock); 1854 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) { 1855 /* 1856 * If devnm is not NULL and is not the empty string, 1857 * select only dv_nodes with matching non-minor name 1858 */ 1859 if (devnm && (len = strlen(devnm)) && 1860 (strncmp(devnm, dv->dv_name, len) || 1861 (dv->dv_name[len] != ':' && dv->dv_name[len] != '\0'))) 1862 continue; 1863 1864 callback(dv, arg); 1865 1866 if (DVTOV(dv)->v_type != VDIR) 1867 continue; 1868 1869 next = kmem_zalloc(sizeof (*next), KM_SLEEP); 1870 next->dv = dv; 1871 1872 if (tail) 1873 tail->next = next; 1874 else 1875 head = next; 1876 1877 tail = next; 1878 } 1879 1880 while (head) { 1881 dv_walk(head->dv, NULL, callback, arg); 1882 next = head->next; 1883 kmem_free(head, sizeof (*head)); 1884 head = next; 1885 } 1886 rw_exit(&ddv->dv_contents); 1887 mutex_exit(&dvp->v_lock); 1888 } 1889