1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * miscellaneous routines for the devfs 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/t_lock.h> 35 #include <sys/systm.h> 36 #include <sys/sysmacros.h> 37 #include <sys/user.h> 38 #include <sys/time.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/file.h> 42 #include <sys/fcntl.h> 43 #include <sys/flock.h> 44 #include <sys/kmem.h> 45 #include <sys/uio.h> 46 #include <sys/errno.h> 47 #include <sys/stat.h> 48 #include <sys/cred.h> 49 #include <sys/dirent.h> 50 #include <sys/pathname.h> 51 #include <sys/cmn_err.h> 52 #include <sys/debug.h> 53 #include <sys/modctl.h> 54 #include <fs/fs_subr.h> 55 #include <sys/fs/dv_node.h> 56 #include <sys/fs/snode.h> 57 #include <sys/sunndi.h> 58 #include <sys/sunmdi.h> 59 #include <sys/conf.h> 60 61 #ifdef DEBUG 62 int devfs_debug = 0x0; 63 #endif 64 65 const char dvnm[] = "devfs"; 66 kmem_cache_t *dv_node_cache; /* dv_node cache */ 67 68 /* 69 * The devfs_clean_key is taken during a devfs_clean operation: it is used to 70 * prevent unnecessary code execution and for detection of potential deadlocks. 71 */ 72 uint_t devfs_clean_key; 73 74 struct dv_node *dvroot; 75 76 /* prototype memory vattrs */ 77 vattr_t dv_vattr_dir = { 78 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 79 VDIR, /* va_type */ 80 DV_DIRMODE_DEFAULT, /* va_mode */ 81 DV_UID_DEFAULT, /* va_uid */ 82 DV_GID_DEFAULT, /* va_gid */ 83 0, /* va_fsid; */ 84 0, /* va_nodeid; */ 85 0, /* va_nlink; */ 86 0, /* va_size; */ 87 0, /* va_atime; */ 88 0, /* va_mtime; */ 89 0, /* va_ctime; */ 90 0, /* va_rdev; */ 91 0, /* va_blksize; */ 92 0, /* va_nblocks; */ 93 0, /* va_seq; */ 94 }; 95 96 vattr_t dv_vattr_file = { 97 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 98 0, /* va_type */ 99 DV_DEVMODE_DEFAULT, /* va_mode */ 100 DV_UID_DEFAULT, /* va_uid */ 101 DV_GID_DEFAULT, /* va_gid */ 102 0, /* va_fsid; */ 103 0, /* va_nodeid; */ 104 0, /* va_nlink; */ 105 0, /* va_size; */ 106 0, /* va_atime; */ 107 0, /* va_mtime; */ 108 0, /* va_ctime; */ 109 0, /* va_rdev; */ 110 0, /* va_blksize; */ 111 0, /* va_nblocks; */ 112 0, /* va_seq; */ 113 }; 114 115 vattr_t dv_vattr_priv = { 116 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 117 0, /* va_type */ 118 DV_DEVMODE_PRIV, /* va_mode */ 119 DV_UID_DEFAULT, /* va_uid */ 120 DV_GID_DEFAULT, /* va_gid */ 121 0, /* va_fsid; */ 122 0, /* va_nodeid; */ 123 0, /* va_nlink; */ 124 0, /* va_size; */ 125 0, /* va_atime; */ 126 0, /* va_mtime; */ 127 0, /* va_ctime; */ 128 0, /* va_rdev; */ 129 0, /* va_blksize; */ 130 0, /* va_nblocks; */ 131 0, /* va_seq; */ 132 }; 133 134 extern dev_info_t *clone_dip; 135 extern major_t clone_major; 136 extern struct dev_ops *ddi_hold_driver(major_t); 137 138 /* 139 * dv_node cache constructor, destructor, can cache creation 140 */ 141 /*ARGSUSED1*/ 142 static int 143 i_dv_node_ctor(void *buf, void *cfarg, int flag) 144 { 145 struct dv_node *dv = (struct dv_node *)buf; 146 struct vnode *vp; 147 148 bzero(buf, sizeof (struct dv_node)); 149 150 /* initialize persistent parts of dv_node */ 151 rw_init(&dv->dv_contents, NULL, RW_DEFAULT, NULL); 152 153 /* allocate vnode and initialize link back to dv_node */ 154 dv->dv_vnode = vn_alloc(KM_SLEEP); 155 vp = DVTOV(dv); 156 vp->v_data = (caddr_t)dv; 157 return (0); 158 } 159 160 /* dev_info node destructor for kmem cache */ 161 /*ARGSUSED1*/ 162 static void 163 i_dv_node_dtor(void *buf, void *arg) 164 { 165 struct dv_node *dv = (struct dv_node *)buf; 166 struct vnode *vp = DVTOV(dv); 167 168 rw_destroy(&dv->dv_contents); 169 vn_invalid(vp); 170 vn_free(vp); 171 } 172 173 174 /* initialize dev_info node cache */ 175 void 176 dv_node_cache_init() 177 { 178 ASSERT(dv_node_cache == NULL); 179 dv_node_cache = kmem_cache_create("dv_node_cache", 180 sizeof (struct dv_node), 0, i_dv_node_ctor, i_dv_node_dtor, 181 NULL, NULL, NULL, 0); 182 183 tsd_create(&devfs_clean_key, NULL); 184 } 185 186 /* initialize dev_info node cache */ 187 void 188 dv_node_cache_fini() 189 { 190 ASSERT(dv_node_cache != NULL); 191 kmem_cache_destroy(dv_node_cache); 192 dv_node_cache = NULL; 193 194 tsd_destroy(&devfs_clean_key); 195 } 196 197 /* 198 * dv_mkino - Generate a unique inode number for devfs nodes. 199 * 200 * Although ino_t is 64 bits, the inode number is truncated to 32 bits for 32 201 * bit non-LARGEFILE applications. This means that there is a requirement to 202 * maintain the inode number as a 32 bit value or applications will have 203 * stat(2) calls fail with EOVERFLOW. We form a 32 bit inode number from the 204 * dev_t. but if the minor number is larger than L_MAXMIN32 we fold extra minor 205 * 206 * To generate inode numbers for directories, we assume that we will never use 207 * more than half the major space - this allows for ~8190 drivers. We use this 208 * upper major number space to allocate inode numbers for directories by 209 * encoding the major and instance into this space. 210 * 211 * We also skew the result so that inode 2 is reserved for the root of the file 212 * system. 213 * 214 * As part of the future support for 64-bit dev_t APIs, the upper minor bits 215 * should be folded into the high inode bits by adding the following code 216 * after "ino |= 1": 217 * 218 * #if (L_BITSMINOR32 != L_BITSMINOR) 219 * |* fold overflow minor bits into high bits of inode number *| 220 * ino |= ((ino_t)(minor >> L_BITSMINOR32)) << L_BITSMINOR; 221 * #endif |* (L_BITSMINOR32 != L_BITSMINOR) *| 222 * 223 * This way only applications that use devices that overflow their minor 224 * space will have an application level impact. 225 */ 226 static ino_t 227 dv_mkino(dev_info_t *devi, vtype_t typ, dev_t dev) 228 { 229 major_t major; 230 minor_t minor; 231 ino_t ino; 232 static int warn; 233 234 if (typ == VDIR) { 235 major = ((L_MAXMAJ32 + 1) >> 1) + DEVI(devi)->devi_major; 236 minor = ddi_get_instance(devi); 237 238 /* makedevice32 in high half of major number space */ 239 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 240 241 major = DEVI(devi)->devi_major; 242 } else { 243 major = getmajor(dev); 244 minor = getminor(dev); 245 246 /* makedevice32 */ 247 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 248 249 /* make ino for VCHR different than VBLK */ 250 ino <<= 1; 251 if (typ == VCHR) 252 ino |= 1; 253 } 254 255 ino += DV_ROOTINO + 1; /* skew */ 256 257 /* 258 * diagnose things a little early because adding the skew to a large 259 * minor number could roll over the major. 260 */ 261 if ((major >= (L_MAXMAJ32 >> 1)) && (warn == 0)) { 262 warn = 1; 263 cmn_err(CE_WARN, "%s: inode numbers are not unique", dvnm); 264 } 265 266 return (ino); 267 } 268 269 /* 270 * dv_mkroot 271 * 272 * Build the first VDIR dv_node. 273 */ 274 struct dv_node * 275 dv_mkroot(struct vfs *vfsp, dev_t devfsdev) 276 { 277 struct dv_node *dv; 278 struct vnode *vp; 279 280 ASSERT(ddi_root_node() != NULL); 281 ASSERT(dv_node_cache != NULL); 282 283 dcmn_err3(("dv_mkroot\n")); 284 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 285 vp = DVTOV(dv); 286 vn_reinit(vp); 287 vp->v_flag = VROOT; 288 vp->v_vfsp = vfsp; 289 vp->v_type = VDIR; 290 vp->v_rdev = devfsdev; 291 vn_setops(vp, dv_vnodeops); 292 vn_exists(vp); 293 294 dvroot = dv; 295 296 dv->dv_name = NULL; /* not needed */ 297 dv->dv_namelen = 0; 298 299 dv->dv_devi = ddi_root_node(); 300 301 dv->dv_ino = DV_ROOTINO; 302 dv->dv_nlink = 2; /* name + . (no dv_insert) */ 303 dv->dv_dotdot = dv; /* .. == self */ 304 dv->dv_attrvp = NULLVP; 305 dv->dv_attr = NULL; 306 dv->dv_flags = DV_BUILD; 307 dv->dv_priv = NULL; 308 dv->dv_busy = 0; 309 dv->dv_dflt_mode = 0; 310 311 return (dv); 312 } 313 314 /* 315 * dv_mkdir 316 * 317 * Given an probed or attached nexus node, create a VDIR dv_node. 318 * No dv_attrvp is created at this point. 319 */ 320 struct dv_node * 321 dv_mkdir(struct dv_node *ddv, dev_info_t *devi, char *nm) 322 { 323 struct dv_node *dv; 324 struct vnode *vp; 325 size_t nmlen; 326 327 ASSERT((devi)); 328 dcmn_err4(("dv_mkdir: %s\n", nm)); 329 330 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 331 nmlen = strlen(nm) + 1; 332 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 333 bcopy(nm, dv->dv_name, nmlen); 334 dv->dv_namelen = nmlen - 1; /* '\0' not included */ 335 vp = DVTOV(dv); 336 vn_reinit(vp); 337 vp->v_flag = 0; 338 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 339 vp->v_type = VDIR; 340 vp->v_rdev = DVTOV(ddv)->v_rdev; 341 vn_setops(vp, vn_getops(DVTOV(ddv))); 342 vn_exists(vp); 343 344 dv->dv_devi = devi; 345 ndi_hold_devi(devi); 346 347 dv->dv_ino = dv_mkino(devi, VDIR, NODEV); 348 dv->dv_nlink = 0; /* updated on insert */ 349 dv->dv_dotdot = ddv; 350 dv->dv_attrvp = NULLVP; 351 dv->dv_attr = NULL; 352 dv->dv_flags = DV_BUILD; 353 dv->dv_priv = NULL; 354 dv->dv_busy = 0; 355 dv->dv_dflt_mode = 0; 356 357 return (dv); 358 } 359 360 /* 361 * dv_mknod 362 * 363 * Given a minor node, create a VCHR or VBLK dv_node. 364 * No dv_attrvp is created at this point. 365 */ 366 static struct dv_node * 367 dv_mknod(struct dv_node *ddv, dev_info_t *devi, char *nm, 368 struct ddi_minor_data *dmd) 369 { 370 struct dv_node *dv; 371 struct vnode *vp; 372 size_t nmlen; 373 374 dcmn_err4(("dv_mknod: %s\n", nm)); 375 376 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 377 nmlen = strlen(nm) + 1; 378 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 379 bcopy(nm, dv->dv_name, nmlen); 380 dv->dv_namelen = nmlen - 1; /* no '\0' */ 381 vp = DVTOV(dv); 382 vn_reinit(vp); 383 vp->v_flag = 0; 384 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 385 vp->v_type = dmd->ddm_spec_type == S_IFCHR ? VCHR : VBLK; 386 vp->v_rdev = dmd->ddm_dev; 387 vn_setops(vp, vn_getops(DVTOV(ddv))); 388 vn_exists(vp); 389 390 ASSERT(MUTEX_HELD(&DEVI(devi)->devi_lock)); 391 dv->dv_devi = devi; 392 DEVI(devi)->devi_ref++; 393 394 dv->dv_ino = dv_mkino(devi, vp->v_type, vp->v_rdev); 395 dv->dv_nlink = 0; /* updated on insert */ 396 dv->dv_dotdot = ddv; 397 dv->dv_attrvp = NULLVP; 398 dv->dv_attr = NULL; 399 dv->dv_flags = 0; 400 401 if (dmd->type == DDM_INTERNAL_PATH) 402 dv->dv_flags |= DV_INTERNAL; 403 if (dmd->ddm_flags & DM_NO_FSPERM) 404 dv->dv_flags |= DV_NO_FSPERM; 405 406 dv->dv_priv = dmd->ddm_node_priv; 407 if (dv->dv_priv) 408 dphold(dv->dv_priv); 409 410 /* 411 * Minors created with ddi_create_priv_minor_node can specify 412 * a default mode permission other than the devfs default. 413 */ 414 if (dv->dv_priv || dv->dv_flags & DV_NO_FSPERM) { 415 dcmn_err5(("%s: dv_mknod default priv mode 0%o\n", 416 dv->dv_name, dmd->ddm_priv_mode)); 417 dv->dv_flags |= DV_DFLT_MODE; 418 dv->dv_dflt_mode = dmd->ddm_priv_mode & S_IAMB; 419 } 420 421 return (dv); 422 } 423 424 /* 425 * dv_destroy 426 * 427 * Destroy what we created in dv_mkdir or dv_mknod. 428 * In the case of a *referenced* directory, do nothing. 429 */ 430 /*ARGSUSED1*/ 431 void 432 dv_destroy(struct dv_node *dv, uint_t flags) 433 { 434 vnode_t *vp = DVTOV(dv); 435 ASSERT(dv->dv_nlink == 0); /* no references */ 436 ASSERT(dv->dv_next == NULL); /* unlinked from directory */ 437 438 dcmn_err4(("dv_destroy: %s\n", dv->dv_name)); 439 440 /* 441 * We may be asked to unlink referenced directories. 442 * In this case, there is nothing to be done. 443 * The eventual memory free will be done in 444 * devfs_inactive. 445 */ 446 if (vp->v_count != 0) { 447 ASSERT(vp->v_type == VDIR); 448 ASSERT(flags & DV_CLEAN_FORCE); 449 ASSERT(DV_STALE(dv)); 450 return; 451 } 452 453 if (dv->dv_attrvp != NULLVP) 454 VN_RELE(dv->dv_attrvp); 455 if (dv->dv_attr != NULL) 456 kmem_free(dv->dv_attr, sizeof (struct vattr)); 457 if (dv->dv_name != NULL) 458 kmem_free(dv->dv_name, dv->dv_namelen + 1); 459 if (dv->dv_devi != NULL) { 460 ndi_rele_devi(dv->dv_devi); 461 } 462 if (dv->dv_priv != NULL) { 463 dpfree(dv->dv_priv); 464 } 465 466 kmem_cache_free(dv_node_cache, dv); 467 } 468 469 /* 470 * Find and hold dv_node by name 471 */ 472 struct dv_node * 473 dv_findbyname(struct dv_node *ddv, char *nm) 474 { 475 struct dv_node *dv; 476 size_t nmlen = strlen(nm); 477 478 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 479 dcmn_err3(("dv_findbyname: %s\n", nm)); 480 for (dv = ddv->dv_dot; dv; dv = dv->dv_next) { 481 if (dv->dv_namelen != nmlen) 482 continue; 483 if (strcmp(dv->dv_name, nm) == 0) { 484 VN_HOLD(DVTOV(dv)); 485 return (dv); 486 } 487 } 488 return (NULL); 489 } 490 491 /* 492 * Inserts a new dv_node in a parent directory 493 */ 494 void 495 dv_insert(struct dv_node *ddv, struct dv_node *dv) 496 { 497 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 498 ASSERT(DVTOV(ddv)->v_type == VDIR); 499 ASSERT(ddv->dv_nlink >= 2); 500 ASSERT(dv->dv_nlink == 0); 501 502 dcmn_err3(("dv_insert: %s\n", dv->dv_name)); 503 504 dv->dv_dotdot = ddv; 505 dv->dv_next = ddv->dv_dot; 506 ddv->dv_dot = dv; 507 if (DVTOV(dv)->v_type == VDIR) { 508 ddv->dv_nlink++; /* .. to containing directory */ 509 dv->dv_nlink = 2; /* name + . */ 510 } else { 511 dv->dv_nlink = 1; /* name */ 512 } 513 } 514 515 /* 516 * Unlink a dv_node from a perent directory 517 */ 518 void 519 dv_unlink(struct dv_node *ddv, struct dv_node *dv, struct dv_node **dv_pprev) 520 { 521 /* verify linkage of arguments */ 522 ASSERT(ddv && dv && dv_pprev); 523 ASSERT(dv->dv_dotdot == ddv); 524 ASSERT(*dv_pprev == dv); 525 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 526 ASSERT(DVTOV(ddv)->v_type == VDIR); 527 528 dcmn_err3(("dv_unlink: %s\n", dv->dv_name)); 529 530 if (DVTOV(dv)->v_type == VDIR) { 531 ddv->dv_nlink--; /* .. to containing directory */ 532 dv->dv_nlink -= 2; /* name + . */ 533 } else { 534 dv->dv_nlink -= 1; /* name */ 535 } 536 ASSERT(ddv->dv_nlink >= 2); 537 ASSERT(dv->dv_nlink == 0); 538 539 /* update ddv->dv_dot/dv_next */ 540 *dv_pprev = dv->dv_next; 541 542 dv->dv_dotdot = NULL; 543 dv->dv_next = NULL; 544 dv->dv_dot = NULL; 545 } 546 547 /* 548 * Merge devfs node specific information into an attribute structure. 549 * 550 * NOTE: specfs provides ATIME,MTIME,CTIME,SIZE,BLKSIZE,NBLOCKS on leaf node. 551 */ 552 void 553 dv_vattr_merge(struct dv_node *dv, struct vattr *vap) 554 { 555 struct vnode *vp = DVTOV(dv); 556 557 vap->va_nodeid = dv->dv_ino; 558 vap->va_nlink = dv->dv_nlink; 559 560 if (vp->v_type == VDIR) { 561 vap->va_rdev = 0; 562 vap->va_fsid = vp->v_rdev; 563 } else { 564 vap->va_rdev = vp->v_rdev; 565 vap->va_fsid = DVTOV(dv->dv_dotdot)->v_rdev; 566 vap->va_type = vp->v_type; 567 /* don't trust the shadow file type */ 568 vap->va_mode &= ~S_IFMT; 569 if (vap->va_type == VCHR) 570 vap->va_mode |= S_IFCHR; 571 else 572 vap->va_mode |= S_IFBLK; 573 } 574 } 575 576 /* 577 * Get default device permission by consulting rules in 578 * privilege specification in minor node and /etc/minor_perm. 579 * 580 * This function is called from the devname filesystem to get default 581 * permissions for a device exported to a non-global zone. 582 */ 583 void 584 devfs_get_defattr(struct vnode *vp, struct vattr *vap, int *no_fs_perm) 585 { 586 mperm_t mp; 587 struct dv_node *dv; 588 589 /* If vp isn't a dv_node, return something sensible */ 590 if (!vn_matchops(vp, dv_vnodeops)) { 591 if (no_fs_perm) 592 *no_fs_perm = 0; 593 *vap = dv_vattr_file; 594 return; 595 } 596 597 /* 598 * For minors not created by ddi_create_priv_minor_node(), 599 * use devfs defaults. 600 */ 601 dv = VTODV(vp); 602 if (vp->v_type == VDIR) { 603 *vap = dv_vattr_dir; 604 } else if (dv->dv_flags & DV_NO_FSPERM) { 605 if (no_fs_perm) 606 *no_fs_perm = 1; 607 *vap = dv_vattr_priv; 608 } else { 609 /* 610 * look up perm bits from minor_perm 611 */ 612 *vap = dv_vattr_file; 613 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) == 0) { 614 VATTR_MP_MERGE((*vap), mp); 615 dcmn_err5(("%s: minor perm mode 0%o\n", 616 dv->dv_name, vap->va_mode)); 617 } else if (dv->dv_flags & DV_DFLT_MODE) { 618 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 619 vap->va_mode &= ~S_IAMB; 620 vap->va_mode |= dv->dv_dflt_mode; 621 dcmn_err5(("%s: priv mode 0%o\n", 622 dv->dv_name, vap->va_mode)); 623 } 624 } 625 } 626 627 /* 628 * dv_shadow_node 629 * 630 * Given a VDIR dv_node, find/create the associated VDIR 631 * node in the shadow attribute filesystem. 632 * 633 * Given a VCHR/VBLK dv_node, find the associated VREG 634 * node in the shadow attribute filesystem. These nodes 635 * are only created to persist non-default attributes. 636 * Lack of such a node implies the default permissions 637 * are sufficient. 638 * 639 * Managing the attribute file entries is slightly tricky (mostly 640 * because we can't intercept VN_HOLD and VN_RELE except on the last 641 * release). 642 * 643 * We assert that if the dv_attrvp pointer is non-NULL, it points 644 * to a singly-held (by us) vnode that represents the shadow entry 645 * in the underlying filesystem. To avoid store-ordering issues, 646 * we assert that the pointer can only be tested under the dv_contents 647 * READERS lock. 648 */ 649 650 void 651 dv_shadow_node( 652 struct vnode *dvp, /* devfs parent directory vnode */ 653 char *nm, /* name component */ 654 struct vnode *vp, /* devfs vnode */ 655 struct pathname *pnp, /* the path .. */ 656 struct vnode *rdir, /* the root .. */ 657 struct cred *cred, /* who's asking? */ 658 int flags) /* optionally create shadow node */ 659 { 660 struct dv_node *dv; /* dv_node of named directory */ 661 struct vnode *rdvp; /* shadow parent directory vnode */ 662 struct vnode *rvp; /* shadow vnode */ 663 struct vnode *rrvp; /* realvp of shadow vnode */ 664 struct vattr vattr; 665 int create_tried; 666 int error; 667 668 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 669 dv = VTODV(vp); 670 dcmn_err3(("dv_shadow_node: name %s attr %p\n", 671 nm, (void *)dv->dv_attrvp)); 672 673 if ((flags & DV_SHADOW_WRITE_HELD) == 0) { 674 ASSERT(RW_READ_HELD(&dv->dv_contents)); 675 if (dv->dv_attrvp != NULLVP) 676 return; 677 if (!rw_tryupgrade(&dv->dv_contents)) { 678 rw_exit(&dv->dv_contents); 679 rw_enter(&dv->dv_contents, RW_WRITER); 680 if (dv->dv_attrvp != NULLVP) { 681 rw_downgrade(&dv->dv_contents); 682 return; 683 } 684 } 685 } else { 686 ASSERT(RW_WRITE_HELD(&dv->dv_contents)); 687 if (dv->dv_attrvp != NULLVP) 688 return; 689 } 690 691 ASSERT(RW_WRITE_HELD(&dv->dv_contents) && dv->dv_attrvp == NULL); 692 693 rdvp = VTODV(dvp)->dv_attrvp; 694 create_tried = 0; 695 lookup: 696 if (rdvp && (dv->dv_flags & DV_NO_FSPERM) == 0) { 697 error = VOP_LOOKUP(rdvp, nm, &rvp, pnp, LOOKUP_DIR, rdir, cred, 698 NULL, NULL, NULL); 699 700 /* factor out the snode since we only want the attribute node */ 701 if ((error == 0) && (VOP_REALVP(rvp, &rrvp, NULL) == 0)) { 702 VN_HOLD(rrvp); 703 VN_RELE(rvp); 704 rvp = rrvp; 705 } 706 } else 707 error = EROFS; /* no parent, no entry */ 708 709 /* 710 * All we want is the permissions (and maybe ACLs and 711 * extended attributes), and we want to perform lookups 712 * by name. Drivers occasionally change their minor 713 * number space. If something changes, there's no 714 * much we can do about it here. 715 */ 716 717 /* The shadow node checks out. We are done */ 718 if (error == 0) { 719 dv->dv_attrvp = rvp; /* with one hold */ 720 721 /* 722 * Determine if we have non-trivial ACLs on this node. 723 * It is not necessary to VOP_RWLOCK since fs_acl_nontrivial 724 * only does VOP_GETSECATTR. 725 */ 726 dv->dv_flags &= ~DV_ACL; 727 728 if (fs_acl_nontrivial(rvp, cred)) 729 dv->dv_flags |= DV_ACL; 730 731 /* 732 * If we have synced out the memory attributes, free 733 * them and switch back to using the persistent store. 734 */ 735 if (rvp && dv->dv_attr) { 736 kmem_free(dv->dv_attr, sizeof (struct vattr)); 737 dv->dv_attr = NULL; 738 } 739 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 740 rw_downgrade(&dv->dv_contents); 741 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 742 return; 743 } 744 745 /* 746 * Failed to find attribute in persistent backing store, 747 * get default permission bits. 748 */ 749 devfs_get_defattr(vp, &vattr, NULL); 750 751 dv_vattr_merge(dv, &vattr); 752 gethrestime(&vattr.va_atime); 753 vattr.va_mtime = vattr.va_atime; 754 vattr.va_ctime = vattr.va_atime; 755 756 /* 757 * Try to create shadow dir. This is necessary in case 758 * we need to create a shadow leaf node later, when user 759 * executes chmod. 760 */ 761 if ((error == ENOENT) && !create_tried) { 762 switch (vp->v_type) { 763 case VDIR: 764 error = VOP_MKDIR(rdvp, nm, &vattr, &rvp, kcred, 765 NULL, 0, NULL); 766 dsysdebug(error, ("vop_mkdir %s %s %d\n", 767 VTODV(dvp)->dv_name, nm, error)); 768 create_tried = 1; 769 break; 770 771 case VCHR: 772 case VBLK: 773 /* 774 * Shadow nodes are only created on demand 775 */ 776 if (flags & DV_SHADOW_CREATE) { 777 error = VOP_CREATE(rdvp, nm, &vattr, NONEXCL, 778 VREAD|VWRITE, &rvp, kcred, 0, NULL, NULL); 779 dsysdebug(error, ("vop_create %s %s %d\n", 780 VTODV(dvp)->dv_name, nm, error)); 781 create_tried = 1; 782 } 783 break; 784 785 default: 786 cmn_err(CE_PANIC, "devfs: %s: create", dvnm); 787 /*NOTREACHED*/ 788 } 789 790 if (create_tried && 791 (error == 0) || (error == EEXIST)) { 792 VN_RELE(rvp); 793 goto lookup; 794 } 795 } 796 797 /* Store attribute in memory */ 798 if (dv->dv_attr == NULL) { 799 dv->dv_attr = kmem_alloc(sizeof (struct vattr), KM_SLEEP); 800 *(dv->dv_attr) = vattr; 801 } 802 803 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 804 rw_downgrade(&dv->dv_contents); 805 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 806 } 807 808 /* 809 * Given a devinfo node, and a name, returns the appropriate 810 * minor information for that named node, if it exists. 811 */ 812 static int 813 dv_find_leafnode(dev_info_t *devi, char *minor_nm, struct ddi_minor_data *r_mi) 814 { 815 struct ddi_minor_data *dmd; 816 817 ASSERT(i_ddi_devi_attached(devi)); 818 ASSERT(MUTEX_HELD(&DEVI(devi)->devi_lock)); 819 820 dcmn_err3(("dv_find_leafnode: %s\n", minor_nm)); 821 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 822 823 /* 824 * Skip alias nodes and nodes without a name. 825 */ 826 if ((dmd->type == DDM_ALIAS) || (dmd->ddm_name == NULL)) 827 continue; 828 829 dcmn_err4(("dv_find_leafnode: (%s,%s)\n", 830 minor_nm, dmd->ddm_name)); 831 if (strcmp(minor_nm, dmd->ddm_name) == 0) { 832 r_mi->ddm_dev = dmd->ddm_dev; 833 r_mi->ddm_spec_type = dmd->ddm_spec_type; 834 r_mi->type = dmd->type; 835 r_mi->ddm_flags = dmd->ddm_flags; 836 r_mi->ddm_node_priv = dmd->ddm_node_priv; 837 r_mi->ddm_priv_mode = dmd->ddm_priv_mode; 838 if (r_mi->ddm_node_priv) 839 dphold(r_mi->ddm_node_priv); 840 return (0); 841 } 842 } 843 844 dcmn_err3(("dv_find_leafnode: %s: ENOENT\n", minor_nm)); 845 return (ENOENT); 846 } 847 848 /* 849 * Special handling for clone node: 850 * Clone minor name is a driver name, the minor number will 851 * be the major number of the driver. There is no minor 852 * node under the clone driver, so we'll manufacture the 853 * dev_t. 854 */ 855 static struct dv_node * 856 dv_clone_mknod(struct dv_node *ddv, char *drvname) 857 { 858 major_t major; 859 struct dv_node *dvp; 860 char *devnm; 861 struct ddi_minor_data *dmd; 862 863 /* 864 * Make sure drvname is a STREAMS driver. We load the driver, 865 * but don't attach to any instances. This makes stat(2) 866 * relatively cheap. 867 */ 868 major = ddi_name_to_major(drvname); 869 if (major == (major_t)-1) 870 return (NULL); 871 872 if (ddi_hold_driver(major) == NULL) 873 return (NULL); 874 875 if (STREAMSTAB(major) == NULL) { 876 ddi_rele_driver(major); 877 return (NULL); 878 } 879 880 ddi_rele_driver(major); 881 devnm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 882 (void) snprintf(devnm, MAXNAMELEN, "clone@0:%s", drvname); 883 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 884 dmd->ddm_dev = makedevice(clone_major, (minor_t)major); 885 dmd->ddm_spec_type = S_IFCHR; 886 dvp = dv_mknod(ddv, clone_dip, devnm, dmd); 887 kmem_free(dmd, sizeof (*dmd)); 888 kmem_free(devnm, MAXNAMELEN); 889 return (dvp); 890 } 891 892 /* 893 * Given the parent directory node, and a name in it, returns the 894 * named dv_node to the caller (as a vnode). 895 * 896 * (We need pnp and rdir for doing shadow lookups; they can be NULL) 897 */ 898 int 899 dv_find(struct dv_node *ddv, char *nm, struct vnode **vpp, struct pathname *pnp, 900 struct vnode *rdir, struct cred *cred, uint_t ndi_flags) 901 { 902 extern int isminiroot; /* see modctl.c */ 903 904 int rv = 0, was_busy = 0, nmlen, write_held = 0; 905 struct vnode *vp; 906 struct dv_node *dv, *dup; 907 dev_info_t *pdevi, *devi = NULL; 908 char *mnm; 909 struct ddi_minor_data *dmd; 910 911 dcmn_err3(("dv_find %s\n", nm)); 912 913 rw_enter(&ddv->dv_contents, RW_READER); 914 start: 915 if (DV_STALE(ddv)) { 916 rw_exit(&ddv->dv_contents); 917 return (ESTALE); 918 } 919 920 /* 921 * Empty name or ., return node itself. 922 */ 923 nmlen = strlen(nm); 924 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 925 *vpp = DVTOV(ddv); 926 rw_exit(&ddv->dv_contents); 927 VN_HOLD(*vpp); 928 return (0); 929 } 930 931 /* 932 * .., return the parent directory 933 */ 934 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 935 *vpp = DVTOV(ddv->dv_dotdot); 936 rw_exit(&ddv->dv_contents); 937 VN_HOLD(*vpp); 938 return (0); 939 } 940 941 /* 942 * Fail anything without a valid device name component 943 */ 944 if (nm[0] == '@' || nm[0] == ':') { 945 dcmn_err3(("devfs: no driver '%s'\n", nm)); 946 rw_exit(&ddv->dv_contents); 947 return (ENOENT); 948 } 949 950 /* 951 * So, now we have to deal with the trickier stuff. 952 * 953 * (a) search the existing list of dv_nodes on this directory 954 */ 955 if ((dv = dv_findbyname(ddv, nm)) != NULL) { 956 founddv: 957 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 958 959 if (!rw_tryenter(&dv->dv_contents, RW_READER)) { 960 if (tsd_get(devfs_clean_key)) { 961 VN_RELE(DVTOV(dv)); 962 rw_exit(&ddv->dv_contents); 963 return (EBUSY); 964 } 965 rw_enter(&dv->dv_contents, RW_READER); 966 } 967 968 vp = DVTOV(dv); 969 if ((dv->dv_attrvp != NULLVP) || 970 (vp->v_type != VDIR && dv->dv_attr != NULL)) { 971 /* 972 * Common case - we already have attributes 973 */ 974 rw_exit(&dv->dv_contents); 975 rw_exit(&ddv->dv_contents); 976 goto found; 977 } 978 979 /* 980 * No attribute vp, try and build one. 981 * 982 * dv_shadow_node() can briefly drop &dv->dv_contents lock 983 * if it is unable to upgrade it to a write lock. If the 984 * current thread has come in through the bottom-up device 985 * configuration devfs_clean() path, we may deadlock against 986 * a thread performing top-down device configuration if it 987 * grabs the contents lock. To avoid this, when we are on the 988 * devfs_clean() path we attempt to upgrade the dv_contents 989 * lock before we call dv_shadow_node(). 990 */ 991 if (tsd_get(devfs_clean_key)) { 992 if (!rw_tryupgrade(&dv->dv_contents)) { 993 VN_RELE(DVTOV(dv)); 994 rw_exit(&dv->dv_contents); 995 rw_exit(&ddv->dv_contents); 996 return (EBUSY); 997 } 998 999 write_held = DV_SHADOW_WRITE_HELD; 1000 } 1001 1002 dv_shadow_node(DVTOV(ddv), nm, vp, pnp, rdir, cred, 1003 write_held); 1004 1005 rw_exit(&dv->dv_contents); 1006 rw_exit(&ddv->dv_contents); 1007 goto found; 1008 } 1009 1010 /* 1011 * (b) Search the child devinfo nodes of our parent directory, 1012 * looking for the named node. If we find it, build a new 1013 * node, then grab the writers lock, search the directory 1014 * if it's still not there, then insert it. 1015 * 1016 * We drop the devfs locks before accessing the device tree. 1017 * Take care to mark the node BUSY so that a forced devfs_clean 1018 * doesn't mark the directory node stale. 1019 * 1020 * Also, check if we are called as part of devfs_clean or 1021 * reset_perm. If so, simply return not found because there 1022 * is nothing to clean. 1023 */ 1024 if (tsd_get(devfs_clean_key)) { 1025 rw_exit(&ddv->dv_contents); 1026 return (ENOENT); 1027 } 1028 1029 /* 1030 * We could be either READ or WRITE locked at 1031 * this point. Upgrade if we are read locked. 1032 */ 1033 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 1034 if (rw_read_locked(&ddv->dv_contents) && 1035 !rw_tryupgrade(&ddv->dv_contents)) { 1036 rw_exit(&ddv->dv_contents); 1037 rw_enter(&ddv->dv_contents, RW_WRITER); 1038 /* 1039 * Things may have changed when we dropped 1040 * the contents lock, so start from top again 1041 */ 1042 goto start; 1043 } 1044 ddv->dv_busy++; /* mark busy before dropping lock */ 1045 was_busy++; 1046 rw_exit(&ddv->dv_contents); 1047 1048 pdevi = ddv->dv_devi; 1049 ASSERT(pdevi != NULL); 1050 1051 mnm = strchr(nm, ':'); 1052 if (mnm) 1053 *mnm = (char)0; 1054 1055 /* 1056 * Configure one nexus child, will call nexus's bus_ops 1057 * If successful, devi is held upon returning. 1058 * Note: devfs lookup should not be configuring grandchildren. 1059 */ 1060 ASSERT((ndi_flags & NDI_CONFIG) == 0); 1061 1062 rv = ndi_devi_config_one(pdevi, nm, &devi, ndi_flags | NDI_NO_EVENT); 1063 if (mnm) 1064 *mnm = ':'; 1065 if (rv != NDI_SUCCESS) { 1066 rv = ENOENT; 1067 goto notfound; 1068 } 1069 1070 /* 1071 * Don't make vhci clients visible under phci, unless we 1072 * are in miniroot. 1073 */ 1074 if (isminiroot == 0 && ddi_get_parent(devi) != pdevi) { 1075 ndi_rele_devi(devi); 1076 rv = ENOENT; 1077 goto notfound; 1078 } 1079 1080 ASSERT(devi && i_ddi_devi_attached(devi)); 1081 1082 /* 1083 * Invalidate cache to notice newly created minor nodes. 1084 */ 1085 rw_enter(&ddv->dv_contents, RW_WRITER); 1086 ddv->dv_flags |= DV_BUILD; 1087 rw_exit(&ddv->dv_contents); 1088 1089 /* 1090 * mkdir for nexus drivers and leaf nodes as well. If we are racing 1091 * and create a duplicate, the duplicate will be destroyed below. 1092 */ 1093 if (mnm == NULL) { 1094 dv = dv_mkdir(ddv, devi, nm); 1095 } else { 1096 /* 1097 * For clone minors, load the driver indicated by minor name. 1098 */ 1099 mutex_enter(&DEVI(devi)->devi_lock); 1100 if (devi == clone_dip) { 1101 dv = dv_clone_mknod(ddv, mnm + 1); 1102 } else { 1103 /* 1104 * Find minor node and make a dv_node 1105 */ 1106 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 1107 if (dv_find_leafnode(devi, mnm + 1, dmd) == 0) { 1108 dv = dv_mknod(ddv, devi, nm, dmd); 1109 if (dmd->ddm_node_priv) 1110 dpfree(dmd->ddm_node_priv); 1111 } 1112 kmem_free(dmd, sizeof (*dmd)); 1113 } 1114 mutex_exit(&DEVI(devi)->devi_lock); 1115 } 1116 /* 1117 * Release hold from ndi_devi_config_one() 1118 */ 1119 ndi_rele_devi(devi); 1120 1121 if (dv == NULL) { 1122 rv = ENOENT; 1123 goto notfound; 1124 } 1125 1126 /* 1127 * We have released the dv_contents lock, need to check 1128 * if another thread already created a duplicate node 1129 */ 1130 rw_enter(&ddv->dv_contents, RW_WRITER); 1131 if ((dup = dv_findbyname(ddv, nm)) == NULL) { 1132 dv_insert(ddv, dv); 1133 } else { 1134 /* 1135 * Duplicate found, use the existing node 1136 */ 1137 VN_RELE(DVTOV(dv)); 1138 dv_destroy(dv, 0); 1139 dv = dup; 1140 } 1141 goto founddv; 1142 /*NOTREACHED*/ 1143 1144 found: 1145 /* 1146 * Skip non-kernel lookups of internal nodes. 1147 * This use of kcred to distinguish between user and 1148 * internal kernel lookups is unfortunate. The information 1149 * provided by the seg argument to lookupnameat should 1150 * evolve into a lookup flag for filesystems that need 1151 * this distinction. 1152 */ 1153 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) { 1154 VN_RELE(vp); 1155 rv = ENOENT; 1156 goto notfound; 1157 } 1158 1159 dcmn_err2(("dv_find: returning vp for nm %s\n", nm)); 1160 if (vp->v_type == VCHR || vp->v_type == VBLK) { 1161 /* 1162 * If vnode is a device, return special vnode instead 1163 * (though it knows all about -us- via sp->s_realvp, 1164 * sp->s_devvp, and sp->s_dip) 1165 */ 1166 *vpp = specvp_devfs(vp, vp->v_rdev, vp->v_type, cred, 1167 dv->dv_devi); 1168 VN_RELE(vp); 1169 if (*vpp == NULLVP) 1170 rv = ENOSYS; 1171 } else 1172 *vpp = vp; 1173 1174 notfound: 1175 rw_enter(&ddv->dv_contents, RW_WRITER); 1176 if (was_busy) 1177 ddv->dv_busy--; 1178 rw_exit(&ddv->dv_contents); 1179 return (rv); 1180 } 1181 1182 /* 1183 * The given directory node is out-of-date; that is, it has been 1184 * marked as needing to be rebuilt, possibly because some new devinfo 1185 * node has come into existence, or possibly because this is the first 1186 * time we've been here. 1187 */ 1188 void 1189 dv_filldir(struct dv_node *ddv) 1190 { 1191 struct dv_node *dv; 1192 dev_info_t *devi, *pdevi; 1193 struct ddi_minor_data *dmd; 1194 char devnm[MAXNAMELEN]; 1195 int circ; 1196 1197 ASSERT(DVTOV(ddv)->v_type == VDIR); 1198 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 1199 ASSERT(ddv->dv_flags & DV_BUILD); 1200 1201 dcmn_err3(("dv_filldir: %s\n", ddv->dv_name)); 1202 if (DV_STALE(ddv)) 1203 return; 1204 pdevi = ddv->dv_devi; 1205 1206 if (ndi_devi_config(pdevi, NDI_NO_EVENT) != NDI_SUCCESS) { 1207 dcmn_err3(("dv_filldir: config error %s\n", 1208 ddv->dv_name)); 1209 } 1210 1211 ndi_devi_enter(pdevi, &circ); 1212 for (devi = ddi_get_child(pdevi); devi; 1213 devi = ddi_get_next_sibling(devi)) { 1214 if (i_ddi_node_state(devi) < DS_PROBED) 1215 continue; 1216 1217 dcmn_err3(("dv_filldir: node %s\n", ddi_node_name(devi))); 1218 1219 mutex_enter(&DEVI(devi)->devi_lock); 1220 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 1221 char *addr; 1222 1223 /* 1224 * Skip alias nodes, internal nodes, and nodes 1225 * without a name. We allow DDM_DEFAULT nodes 1226 * to appear in readdir. 1227 */ 1228 if ((dmd->type == DDM_ALIAS) || 1229 (dmd->type == DDM_INTERNAL_PATH) || 1230 (dmd->ddm_name == NULL)) 1231 continue; 1232 1233 addr = ddi_get_name_addr(devi); 1234 if (addr && *addr) 1235 (void) sprintf(devnm, "%s@%s:%s", 1236 ddi_node_name(devi), addr, dmd->ddm_name); 1237 else 1238 (void) sprintf(devnm, "%s:%s", 1239 ddi_node_name(devi), dmd->ddm_name); 1240 1241 if ((dv = dv_findbyname(ddv, devnm)) != NULL) { 1242 /* dv_node already exists */ 1243 VN_RELE(DVTOV(dv)); 1244 continue; 1245 } 1246 1247 dv = dv_mknod(ddv, devi, devnm, dmd); 1248 dv_insert(ddv, dv); 1249 VN_RELE(DVTOV(dv)); 1250 } 1251 mutex_exit(&DEVI(devi)->devi_lock); 1252 1253 (void) ddi_deviname(devi, devnm); 1254 if ((dv = dv_findbyname(ddv, devnm + 1)) == NULL) { 1255 /* directory doesn't exist */ 1256 dv = dv_mkdir(ddv, devi, devnm + 1); 1257 dv_insert(ddv, dv); 1258 } 1259 VN_RELE(DVTOV(dv)); 1260 } 1261 ndi_devi_exit(pdevi, circ); 1262 1263 ddv->dv_flags &= ~DV_BUILD; 1264 } 1265 1266 /* 1267 * Given a directory node, clean out all the nodes beneath. 1268 * 1269 * VDIR: Reinvoke to clean them, then delete the directory. 1270 * VCHR, VBLK: Just blow them away. 1271 * 1272 * Mark the directories touched as in need of a rebuild, in case 1273 * we fall over part way through. When DV_CLEAN_FORCE is specified, 1274 * we mark referenced empty directories as stale to facilitate DR. 1275 */ 1276 int 1277 dv_cleandir(struct dv_node *ddv, char *devnm, uint_t flags) 1278 { 1279 struct dv_node *dv; 1280 struct dv_node **pprev, **npprev; 1281 struct vnode *vp; 1282 int busy = 0; 1283 1284 /* 1285 * We should always be holding the tsd_clean_key here: dv_cleandir() 1286 * will be called as a result of a devfs_clean request and the 1287 * tsd_clean_key will be set in either in devfs_clean() itself or in 1288 * devfs_clean_vhci(). 1289 * 1290 * Since we are on the devfs_clean path, we return EBUSY if we cannot 1291 * get the contents lock: if we blocked here we might deadlock against 1292 * a thread performing top-down device configuration. 1293 */ 1294 ASSERT(tsd_get(devfs_clean_key)); 1295 1296 dcmn_err3(("dv_cleandir: %s\n", ddv->dv_name)); 1297 1298 if (!(flags & DV_CLEANDIR_LCK) && 1299 !rw_tryenter(&ddv->dv_contents, RW_WRITER)) 1300 return (EBUSY); 1301 1302 for (pprev = &ddv->dv_dot, dv = *pprev; dv; 1303 pprev = npprev, dv = *pprev) { 1304 npprev = &dv->dv_next; 1305 1306 /* 1307 * If devnm is specified, the non-minor portion of the 1308 * name must match devnm. 1309 */ 1310 if (devnm && 1311 (strncmp(devnm, dv->dv_name, strlen(devnm)) || 1312 (dv->dv_name[strlen(devnm)] != ':' && 1313 dv->dv_name[strlen(devnm)] != '\0'))) 1314 continue; 1315 1316 /* check type of what we are cleaning */ 1317 vp = DVTOV(dv); 1318 if (vp->v_type == VDIR) { 1319 /* recurse on directories */ 1320 rw_enter(&dv->dv_contents, RW_WRITER); 1321 if (dv_cleandir(dv, NULL, 1322 flags | DV_CLEANDIR_LCK) == EBUSY) { 1323 rw_exit(&dv->dv_contents); 1324 goto set_busy; 1325 } 1326 1327 /* A clean directory is an empty directory... */ 1328 ASSERT(dv->dv_nlink == 2); 1329 mutex_enter(&vp->v_lock); 1330 if (vp->v_count > 0) { 1331 /* 1332 * ... but an empty directory can still have 1333 * references to it. If we have dv_busy or 1334 * DV_CLEAN_FORCE is *not* specified then a 1335 * referenced directory is considered busy. 1336 */ 1337 if (dv->dv_busy || !(flags & DV_CLEAN_FORCE)) { 1338 mutex_exit(&vp->v_lock); 1339 rw_exit(&dv->dv_contents); 1340 goto set_busy; 1341 } 1342 1343 /* 1344 * Mark referenced directory stale so that DR 1345 * will succeed even if a shell has 1346 * /devices/xxx as current directory (causing 1347 * VN_HOLD reference to an empty directory). 1348 */ 1349 ASSERT(!DV_STALE(dv)); 1350 ndi_rele_devi(dv->dv_devi); 1351 dv->dv_devi = NULL; /* mark DV_STALE */ 1352 } 1353 } else { 1354 ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK)); 1355 ASSERT(dv->dv_nlink == 1); /* no hard links */ 1356 mutex_enter(&vp->v_lock); 1357 if (vp->v_count > 0) { 1358 mutex_exit(&vp->v_lock); 1359 goto set_busy; 1360 } 1361 } 1362 1363 /* unlink from directory */ 1364 dv_unlink(ddv, dv, pprev); 1365 1366 /* drop locks */ 1367 mutex_exit(&vp->v_lock); 1368 if (vp->v_type == VDIR) 1369 rw_exit(&dv->dv_contents); 1370 1371 /* destroy vnode if ref count is zero */ 1372 if (vp->v_count == 0) 1373 dv_destroy(dv, flags); 1374 1375 /* pointer to previous stays unchanged */ 1376 npprev = pprev; 1377 continue; 1378 1379 /* 1380 * If devnm is not NULL we return immediately on busy, 1381 * otherwise we continue destroying unused dv_node's. 1382 */ 1383 set_busy: busy++; 1384 if (devnm) 1385 break; 1386 } 1387 1388 /* 1389 * This code may be invoked to inform devfs that a new node has 1390 * been created in the kernel device tree. So we always set 1391 * the DV_BUILD flag to allow the next dv_filldir() to pick 1392 * the new devinfo nodes. 1393 */ 1394 ddv->dv_flags |= DV_BUILD; 1395 1396 if (!(flags & DV_CLEANDIR_LCK)) 1397 rw_exit(&ddv->dv_contents); 1398 1399 return (busy ? EBUSY : 0); 1400 } 1401 1402 /* 1403 * Walk through the devfs hierarchy, correcting the permissions of 1404 * devices with default permissions that do not match those specified 1405 * by minor perm. This can only be done for all drivers for now. 1406 */ 1407 static int 1408 dv_reset_perm_dir(struct dv_node *ddv, uint_t flags) 1409 { 1410 struct dv_node *dv, *next = NULL; 1411 struct vnode *vp; 1412 int retval = 0; 1413 struct vattr *attrp; 1414 mperm_t mp; 1415 char *nm; 1416 uid_t old_uid; 1417 gid_t old_gid; 1418 mode_t old_mode; 1419 1420 rw_enter(&ddv->dv_contents, RW_WRITER); 1421 for (dv = ddv->dv_dot; dv; dv = next) { 1422 int error = 0; 1423 next = dv->dv_next; 1424 nm = dv->dv_name; 1425 1426 rw_enter(&dv->dv_contents, RW_READER); 1427 vp = DVTOV(dv); 1428 if (vp->v_type == VDIR) { 1429 rw_exit(&dv->dv_contents); 1430 if (dv_reset_perm_dir(dv, flags) != 0) { 1431 error = EBUSY; 1432 } 1433 } else { 1434 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 1435 1436 /* 1437 * Check for permissions from minor_perm 1438 * If there are none, we're done 1439 */ 1440 rw_exit(&dv->dv_contents); 1441 if (dev_minorperm(dv->dv_devi, nm, &mp) != 0) 1442 continue; 1443 1444 rw_enter(&dv->dv_contents, RW_READER); 1445 1446 /* 1447 * Allow a node's permissions to be altered 1448 * permanently from the defaults by chmod, 1449 * using the shadow node as backing store. 1450 * Otherwise, update node to minor_perm permissions. 1451 */ 1452 if (dv->dv_attrvp == NULLVP) { 1453 /* 1454 * No attribute vp, try to find one. 1455 */ 1456 dv_shadow_node(DVTOV(ddv), nm, vp, 1457 NULL, NULLVP, kcred, 0); 1458 } 1459 if (dv->dv_attrvp != NULLVP || dv->dv_attr == NULL) { 1460 rw_exit(&dv->dv_contents); 1461 continue; 1462 } 1463 1464 attrp = dv->dv_attr; 1465 1466 if (VATTRP_MP_CMP(attrp, mp) == 0) { 1467 dcmn_err5(("%s: no perm change: " 1468 "%d %d 0%o\n", nm, attrp->va_uid, 1469 attrp->va_gid, attrp->va_mode)); 1470 rw_exit(&dv->dv_contents); 1471 continue; 1472 } 1473 1474 old_uid = attrp->va_uid; 1475 old_gid = attrp->va_gid; 1476 old_mode = attrp->va_mode; 1477 1478 VATTRP_MP_MERGE(attrp, mp); 1479 mutex_enter(&vp->v_lock); 1480 if (vp->v_count > 0) { 1481 error = EBUSY; 1482 } 1483 mutex_exit(&vp->v_lock); 1484 1485 dcmn_err5(("%s: perm %d/%d/0%o -> %d/%d/0%o (%d)\n", 1486 nm, old_uid, old_gid, old_mode, attrp->va_uid, 1487 attrp->va_gid, attrp->va_mode, error)); 1488 1489 rw_exit(&dv->dv_contents); 1490 } 1491 1492 if (error != 0) { 1493 retval = error; 1494 } 1495 } 1496 1497 ddv->dv_flags |= DV_BUILD; 1498 1499 rw_exit(&ddv->dv_contents); 1500 1501 return (retval); 1502 } 1503 1504 int 1505 devfs_reset_perm(uint_t flags) 1506 { 1507 struct dv_node *dvp; 1508 int rval; 1509 1510 if ((dvp = devfs_dip_to_dvnode(ddi_root_node())) == NULL) 1511 return (0); 1512 1513 VN_HOLD(DVTOV(dvp)); 1514 rval = dv_reset_perm_dir(dvp, flags); 1515 VN_RELE(DVTOV(dvp)); 1516 return (rval); 1517 } 1518 1519 /* 1520 * Clean up dangling devfs shadow nodes for removed 1521 * drivers so that, in the event the driver is re-added 1522 * to the system, newly created nodes won't incorrectly 1523 * pick up these stale shadow node permissions. 1524 * 1525 * This is accomplished by walking down the pathname 1526 * to the directory, starting at the root's attribute 1527 * node, then removing all minors matching the specified 1528 * node name. Care must be taken to remove all entries 1529 * in a directory before the directory itself, so that 1530 * the clean-up associated with rem_drv'ing a nexus driver 1531 * does not inadvertently result in an inconsistent 1532 * filesystem underlying devfs. 1533 */ 1534 1535 static int 1536 devfs_remdrv_rmdir(vnode_t *dirvp, const char *dir, vnode_t *rvp) 1537 { 1538 int error; 1539 vnode_t *vp; 1540 int eof; 1541 struct iovec iov; 1542 struct uio uio; 1543 struct dirent64 *dp; 1544 dirent64_t *dbuf; 1545 size_t dlen; 1546 size_t dbuflen; 1547 int ndirents = 64; 1548 char *nm; 1549 1550 VN_HOLD(dirvp); 1551 1552 dlen = ndirents * (sizeof (*dbuf)); 1553 dbuf = kmem_alloc(dlen, KM_SLEEP); 1554 1555 uio.uio_iov = &iov; 1556 uio.uio_iovcnt = 1; 1557 uio.uio_segflg = UIO_SYSSPACE; 1558 uio.uio_fmode = 0; 1559 uio.uio_extflg = UIO_COPY_CACHED; 1560 uio.uio_loffset = 0; 1561 uio.uio_llimit = MAXOFFSET_T; 1562 1563 eof = 0; 1564 error = 0; 1565 while (!error && !eof) { 1566 uio.uio_resid = dlen; 1567 iov.iov_base = (char *)dbuf; 1568 iov.iov_len = dlen; 1569 1570 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1571 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1572 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1573 1574 dbuflen = dlen - uio.uio_resid; 1575 1576 if (error || dbuflen == 0) 1577 break; 1578 1579 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1580 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1581 1582 nm = dp->d_name; 1583 1584 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1585 continue; 1586 1587 error = VOP_LOOKUP(dirvp, nm, 1588 &vp, NULL, 0, NULL, kcred, NULL, NULL, NULL); 1589 1590 dsysdebug(error, 1591 ("rem_drv %s/%s lookup (%d)\n", 1592 dir, nm, error)); 1593 1594 if (error) 1595 continue; 1596 1597 ASSERT(vp->v_type == VDIR || 1598 vp->v_type == VCHR || vp->v_type == VBLK); 1599 1600 if (vp->v_type == VDIR) { 1601 error = devfs_remdrv_rmdir(vp, nm, rvp); 1602 if (error == 0) { 1603 error = VOP_RMDIR(dirvp, 1604 (char *)nm, rvp, kcred, NULL, 0); 1605 dsysdebug(error, 1606 ("rem_drv %s/%s rmdir (%d)\n", 1607 dir, nm, error)); 1608 } 1609 } else { 1610 error = VOP_REMOVE(dirvp, (char *)nm, kcred, 1611 NULL, 0); 1612 dsysdebug(error, 1613 ("rem_drv %s/%s remove (%d)\n", 1614 dir, nm, error)); 1615 } 1616 1617 VN_RELE(vp); 1618 if (error) { 1619 goto exit; 1620 } 1621 } 1622 } 1623 1624 exit: 1625 VN_RELE(dirvp); 1626 kmem_free(dbuf, dlen); 1627 1628 return (error); 1629 } 1630 1631 int 1632 devfs_remdrv_cleanup(const char *dir, const char *nodename) 1633 { 1634 int error; 1635 vnode_t *vp; 1636 vnode_t *dirvp; 1637 int eof; 1638 struct iovec iov; 1639 struct uio uio; 1640 struct dirent64 *dp; 1641 dirent64_t *dbuf; 1642 size_t dlen; 1643 size_t dbuflen; 1644 int ndirents = 64; 1645 int nodenamelen = strlen(nodename); 1646 char *nm; 1647 struct pathname pn; 1648 vnode_t *rvp; /* root node of the underlying attribute fs */ 1649 1650 dcmn_err5(("devfs_remdrv_cleanup: %s %s\n", dir, nodename)); 1651 1652 if (error = pn_get((char *)dir, UIO_SYSSPACE, &pn)) 1653 return (0); 1654 1655 rvp = dvroot->dv_attrvp; 1656 ASSERT(rvp != NULL); 1657 VN_HOLD(rvp); 1658 1659 pn_skipslash(&pn); 1660 dirvp = rvp; 1661 VN_HOLD(dirvp); 1662 1663 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 1664 1665 while (pn_pathleft(&pn)) { 1666 ASSERT(dirvp->v_type == VDIR); 1667 (void) pn_getcomponent(&pn, nm); 1668 ASSERT((strcmp(nm, ".") != 0) && (strcmp(nm, "..") != 0)); 1669 error = VOP_LOOKUP(dirvp, nm, &vp, NULL, 0, rvp, kcred, 1670 NULL, NULL, NULL); 1671 if (error) { 1672 dcmn_err5(("remdrv_cleanup %s lookup error %d\n", 1673 nm, error)); 1674 VN_RELE(dirvp); 1675 if (dirvp != rvp) 1676 VN_RELE(rvp); 1677 pn_free(&pn); 1678 kmem_free(nm, MAXNAMELEN); 1679 return (0); 1680 } 1681 VN_RELE(dirvp); 1682 dirvp = vp; 1683 pn_skipslash(&pn); 1684 } 1685 1686 ASSERT(dirvp->v_type == VDIR); 1687 if (dirvp != rvp) 1688 VN_RELE(rvp); 1689 pn_free(&pn); 1690 kmem_free(nm, MAXNAMELEN); 1691 1692 dlen = ndirents * (sizeof (*dbuf)); 1693 dbuf = kmem_alloc(dlen, KM_SLEEP); 1694 1695 uio.uio_iov = &iov; 1696 uio.uio_iovcnt = 1; 1697 uio.uio_segflg = UIO_SYSSPACE; 1698 uio.uio_fmode = 0; 1699 uio.uio_extflg = UIO_COPY_CACHED; 1700 uio.uio_loffset = 0; 1701 uio.uio_llimit = MAXOFFSET_T; 1702 1703 eof = 0; 1704 error = 0; 1705 while (!error && !eof) { 1706 uio.uio_resid = dlen; 1707 iov.iov_base = (char *)dbuf; 1708 iov.iov_len = dlen; 1709 1710 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1711 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1712 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1713 1714 dbuflen = dlen - uio.uio_resid; 1715 1716 if (error || dbuflen == 0) 1717 break; 1718 1719 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1720 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1721 1722 nm = dp->d_name; 1723 1724 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1725 continue; 1726 1727 if (strncmp(nm, nodename, nodenamelen) != 0) 1728 continue; 1729 1730 error = VOP_LOOKUP(dirvp, nm, &vp, 1731 NULL, 0, NULL, kcred, NULL, NULL, NULL); 1732 1733 dsysdebug(error, 1734 ("rem_drv %s/%s lookup (%d)\n", 1735 dir, nm, error)); 1736 1737 if (error) 1738 continue; 1739 1740 ASSERT(vp->v_type == VDIR || 1741 vp->v_type == VCHR || vp->v_type == VBLK); 1742 1743 if (vp->v_type == VDIR) { 1744 error = devfs_remdrv_rmdir(vp, nm, rvp); 1745 if (error == 0) { 1746 error = VOP_RMDIR(dirvp, (char *)nm, 1747 rvp, kcred, NULL, 0); 1748 dsysdebug(error, 1749 ("rem_drv %s/%s rmdir (%d)\n", 1750 dir, nm, error)); 1751 } 1752 } else { 1753 error = VOP_REMOVE(dirvp, (char *)nm, kcred, 1754 NULL, 0); 1755 dsysdebug(error, 1756 ("rem_drv %s/%s remove (%d)\n", 1757 dir, nm, error)); 1758 } 1759 1760 VN_RELE(vp); 1761 if (error) 1762 goto exit; 1763 } 1764 } 1765 1766 exit: 1767 VN_RELE(dirvp); 1768 1769 kmem_free(dbuf, dlen); 1770 1771 return (0); 1772 } 1773 1774 struct dv_list { 1775 struct dv_node *dv; 1776 struct dv_list *next; 1777 }; 1778 1779 void 1780 dv_walk( 1781 struct dv_node *ddv, 1782 char *devnm, 1783 void (*callback)(struct dv_node *, void *), 1784 void *arg) 1785 { 1786 struct vnode *dvp; 1787 struct dv_node *dv; 1788 struct dv_list *head, *tail, *next; 1789 int len; 1790 1791 dcmn_err3(("dv_walk: ddv = %s, devnm = %s\n", 1792 ddv->dv_name, devnm ? devnm : "<null>")); 1793 1794 dvp = DVTOV(ddv); 1795 1796 ASSERT(dvp->v_type == VDIR); 1797 1798 head = tail = next = NULL; 1799 1800 rw_enter(&ddv->dv_contents, RW_READER); 1801 mutex_enter(&dvp->v_lock); 1802 for (dv = ddv->dv_dot; dv; dv = dv->dv_next) { 1803 /* 1804 * If devnm is not NULL and is not the empty string, 1805 * select only dv_nodes with matching non-minor name 1806 */ 1807 if (devnm && (len = strlen(devnm)) && 1808 (strncmp(devnm, dv->dv_name, len) || 1809 (dv->dv_name[len] != ':' && dv->dv_name[len] != '\0'))) 1810 continue; 1811 1812 callback(dv, arg); 1813 1814 if (DVTOV(dv)->v_type != VDIR) 1815 continue; 1816 1817 next = kmem_zalloc(sizeof (*next), KM_SLEEP); 1818 next->dv = dv; 1819 1820 if (tail) 1821 tail->next = next; 1822 else 1823 head = next; 1824 1825 tail = next; 1826 } 1827 1828 while (head) { 1829 dv_walk(head->dv, NULL, callback, arg); 1830 next = head->next; 1831 kmem_free(head, sizeof (*head)); 1832 head = next; 1833 } 1834 rw_exit(&ddv->dv_contents); 1835 mutex_exit(&dvp->v_lock); 1836 } 1837