1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * miscellaneous routines for the devfs 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/t_lock.h> 35 #include <sys/systm.h> 36 #include <sys/sysmacros.h> 37 #include <sys/user.h> 38 #include <sys/time.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/file.h> 42 #include <sys/fcntl.h> 43 #include <sys/flock.h> 44 #include <sys/kmem.h> 45 #include <sys/uio.h> 46 #include <sys/errno.h> 47 #include <sys/stat.h> 48 #include <sys/cred.h> 49 #include <sys/dirent.h> 50 #include <sys/pathname.h> 51 #include <sys/cmn_err.h> 52 #include <sys/debug.h> 53 #include <sys/modctl.h> 54 #include <fs/fs_subr.h> 55 #include <sys/fs/dv_node.h> 56 #include <sys/fs/snode.h> 57 #include <sys/sunndi.h> 58 #include <sys/sunmdi.h> 59 #include <sys/conf.h> 60 61 #ifdef DEBUG 62 int devfs_debug = 0x0; 63 #endif 64 65 const char dvnm[] = "devfs"; 66 kmem_cache_t *dv_node_cache; /* dv_node cache */ 67 uint_t devfs_clean_key; 68 struct dv_node *dvroot; 69 70 /* prototype memory vattrs */ 71 vattr_t dv_vattr_dir = { 72 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 73 VDIR, /* va_type */ 74 DV_DIRMODE_DEFAULT, /* va_mode */ 75 DV_UID_DEFAULT, /* va_uid */ 76 DV_GID_DEFAULT, /* va_gid */ 77 0, /* va_fsid; */ 78 0, /* va_nodeid; */ 79 0, /* va_nlink; */ 80 0, /* va_size; */ 81 0, /* va_atime; */ 82 0, /* va_mtime; */ 83 0, /* va_ctime; */ 84 0, /* va_rdev; */ 85 0, /* va_blksize; */ 86 0, /* va_nblocks; */ 87 0, /* va_seq; */ 88 }; 89 90 vattr_t dv_vattr_file = { 91 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 92 0, /* va_type */ 93 DV_DEVMODE_DEFAULT, /* va_mode */ 94 DV_UID_DEFAULT, /* va_uid */ 95 DV_GID_DEFAULT, /* va_gid */ 96 0, /* va_fsid; */ 97 0, /* va_nodeid; */ 98 0, /* va_nlink; */ 99 0, /* va_size; */ 100 0, /* va_atime; */ 101 0, /* va_mtime; */ 102 0, /* va_ctime; */ 103 0, /* va_rdev; */ 104 0, /* va_blksize; */ 105 0, /* va_nblocks; */ 106 0, /* va_seq; */ 107 }; 108 109 vattr_t dv_vattr_priv = { 110 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 111 0, /* va_type */ 112 DV_DEVMODE_PRIV, /* va_mode */ 113 DV_UID_DEFAULT, /* va_uid */ 114 DV_GID_DEFAULT, /* va_gid */ 115 0, /* va_fsid; */ 116 0, /* va_nodeid; */ 117 0, /* va_nlink; */ 118 0, /* va_size; */ 119 0, /* va_atime; */ 120 0, /* va_mtime; */ 121 0, /* va_ctime; */ 122 0, /* va_rdev; */ 123 0, /* va_blksize; */ 124 0, /* va_nblocks; */ 125 0, /* va_seq; */ 126 }; 127 128 extern dev_info_t *clone_dip; 129 extern major_t clone_major; 130 extern struct dev_ops *ddi_hold_driver(major_t); 131 132 /* 133 * dv_node cache constructor, destructor, can cache creation 134 */ 135 /*ARGSUSED1*/ 136 static int 137 i_dv_node_ctor(void *buf, void *cfarg, int flag) 138 { 139 struct dv_node *dv = (struct dv_node *)buf; 140 struct vnode *vp; 141 142 bzero(buf, sizeof (struct dv_node)); 143 144 /* initialize persistent parts of dv_node */ 145 rw_init(&dv->dv_contents, NULL, RW_DEFAULT, NULL); 146 147 /* allocate vnode and initialize link back to dv_node */ 148 dv->dv_vnode = vn_alloc(KM_SLEEP); 149 vp = DVTOV(dv); 150 vp->v_data = (caddr_t)dv; 151 return (0); 152 } 153 154 /* dev_info node destructor for kmem cache */ 155 /*ARGSUSED1*/ 156 static void 157 i_dv_node_dtor(void *buf, void *arg) 158 { 159 struct dv_node *dv = (struct dv_node *)buf; 160 struct vnode *vp = DVTOV(dv); 161 162 rw_destroy(&dv->dv_contents); 163 vn_invalid(vp); 164 vn_free(vp); 165 } 166 167 168 /* initialize dev_info node cache */ 169 void 170 dv_node_cache_init() 171 { 172 ASSERT(dv_node_cache == NULL); 173 dv_node_cache = kmem_cache_create("dv_node_cache", 174 sizeof (struct dv_node), 0, i_dv_node_ctor, i_dv_node_dtor, 175 NULL, NULL, NULL, 0); 176 177 tsd_create(&devfs_clean_key, NULL); 178 } 179 180 /* initialize dev_info node cache */ 181 void 182 dv_node_cache_fini() 183 { 184 ASSERT(dv_node_cache != NULL); 185 kmem_cache_destroy(dv_node_cache); 186 dv_node_cache = NULL; 187 188 tsd_destroy(&devfs_clean_key); 189 } 190 191 /* 192 * dv_mkino - Generate a unique inode number for devfs nodes. 193 * 194 * Although ino_t is 64 bits, the inode number is truncated to 32 bits for 32 195 * bit non-LARGEFILE applications. This means that there is a requirement to 196 * maintain the inode number as a 32 bit value or applications will have 197 * stat(2) calls fail with EOVERFLOW. We form a 32 bit inode number from the 198 * dev_t. but if the minor number is larger than L_MAXMIN32 we fold extra minor 199 * 200 * To generate inode numbers for directories, we assume that we will never use 201 * more than half the major space - this allows for ~8190 drivers. We use this 202 * upper major number space to allocate inode numbers for directories by 203 * encoding the major and instance into this space. 204 * 205 * We also skew the result so that inode 2 is reserved for the root of the file 206 * system. 207 * 208 * As part of the future support for 64-bit dev_t APIs, the upper minor bits 209 * should be folded into the high inode bits by adding the following code 210 * after "ino |= 1": 211 * 212 * #if (L_BITSMINOR32 != L_BITSMINOR) 213 * |* fold overflow minor bits into high bits of inode number *| 214 * ino |= ((ino_t)(minor >> L_BITSMINOR32)) << L_BITSMINOR; 215 * #endif |* (L_BITSMINOR32 != L_BITSMINOR) *| 216 * 217 * This way only applications that use devices that overflow their minor 218 * space will have an application level impact. 219 */ 220 static ino_t 221 dv_mkino(dev_info_t *devi, vtype_t typ, dev_t dev) 222 { 223 major_t major; 224 minor_t minor; 225 ino_t ino; 226 static int warn; 227 228 if (typ == VDIR) { 229 major = ((L_MAXMAJ32 + 1) >> 1) + DEVI(devi)->devi_major; 230 minor = ddi_get_instance(devi); 231 232 /* makedevice32 in high half of major number space */ 233 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 234 235 major = DEVI(devi)->devi_major; 236 } else { 237 major = getmajor(dev); 238 minor = getminor(dev); 239 240 /* makedevice32 */ 241 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 242 243 /* make ino for VCHR different than VBLK */ 244 ino <<= 1; 245 if (typ == VCHR) 246 ino |= 1; 247 } 248 249 ino += DV_ROOTINO + 1; /* skew */ 250 251 /* 252 * diagnose things a little early because adding the skew to a large 253 * minor number could roll over the major. 254 */ 255 if ((major >= (L_MAXMAJ32 >> 1)) && (warn == 0)) { 256 warn = 1; 257 cmn_err(CE_WARN, "%s: inode numbers are not unique", dvnm); 258 } 259 260 return (ino); 261 } 262 263 /* 264 * dv_mkroot 265 * 266 * Build the first VDIR dv_node. 267 */ 268 struct dv_node * 269 dv_mkroot(struct vfs *vfsp, dev_t devfsdev) 270 { 271 struct dv_node *dv; 272 struct vnode *vp; 273 274 ASSERT(ddi_root_node() != NULL); 275 ASSERT(dv_node_cache != NULL); 276 277 dcmn_err3(("dv_mkroot\n")); 278 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 279 vp = DVTOV(dv); 280 vn_reinit(vp); 281 vp->v_flag = VROOT; 282 vp->v_vfsp = vfsp; 283 vp->v_type = VDIR; 284 vp->v_rdev = devfsdev; 285 vn_setops(vp, dv_vnodeops); 286 vn_exists(vp); 287 288 dvroot = dv; 289 290 dv->dv_name = NULL; /* not needed */ 291 dv->dv_namelen = 0; 292 293 dv->dv_devi = ddi_root_node(); 294 295 dv->dv_ino = DV_ROOTINO; 296 dv->dv_nlink = 2; /* name + . (no dv_insert) */ 297 dv->dv_dotdot = dv; /* .. == self */ 298 dv->dv_attrvp = NULLVP; 299 dv->dv_attr = NULL; 300 dv->dv_flags = DV_BUILD; 301 dv->dv_priv = NULL; 302 dv->dv_busy = 0; 303 dv->dv_dflt_mode = 0; 304 305 return (dv); 306 } 307 308 /* 309 * dv_mkdir 310 * 311 * Given an probed or attached nexus node, create a VDIR dv_node. 312 * No dv_attrvp is created at this point. 313 */ 314 struct dv_node * 315 dv_mkdir(struct dv_node *ddv, dev_info_t *devi, char *nm) 316 { 317 struct dv_node *dv; 318 struct vnode *vp; 319 size_t nmlen; 320 321 ASSERT((devi)); 322 dcmn_err4(("dv_mkdir: %s\n", nm)); 323 324 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 325 nmlen = strlen(nm) + 1; 326 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 327 bcopy(nm, dv->dv_name, nmlen); 328 dv->dv_namelen = nmlen - 1; /* '\0' not included */ 329 vp = DVTOV(dv); 330 vn_reinit(vp); 331 vp->v_flag = 0; 332 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 333 vp->v_type = VDIR; 334 vp->v_rdev = DVTOV(ddv)->v_rdev; 335 vn_setops(vp, vn_getops(DVTOV(ddv))); 336 vn_exists(vp); 337 338 dv->dv_devi = devi; 339 ndi_hold_devi(devi); 340 341 dv->dv_ino = dv_mkino(devi, VDIR, NODEV); 342 dv->dv_nlink = 0; /* updated on insert */ 343 dv->dv_dotdot = ddv; 344 dv->dv_attrvp = NULLVP; 345 dv->dv_attr = NULL; 346 dv->dv_flags = DV_BUILD; 347 dv->dv_priv = NULL; 348 dv->dv_busy = 0; 349 dv->dv_dflt_mode = 0; 350 351 return (dv); 352 } 353 354 /* 355 * dv_mknod 356 * 357 * Given a minor node, create a VCHR or VBLK dv_node. 358 * No dv_attrvp is created at this point. 359 */ 360 static struct dv_node * 361 dv_mknod(struct dv_node *ddv, dev_info_t *devi, char *nm, 362 struct ddi_minor_data *dmd) 363 { 364 struct dv_node *dv; 365 struct vnode *vp; 366 size_t nmlen; 367 368 dcmn_err4(("dv_mknod: %s\n", nm)); 369 370 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 371 nmlen = strlen(nm) + 1; 372 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 373 bcopy(nm, dv->dv_name, nmlen); 374 dv->dv_namelen = nmlen - 1; /* no '\0' */ 375 vp = DVTOV(dv); 376 vn_reinit(vp); 377 vp->v_flag = 0; 378 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 379 vp->v_type = dmd->ddm_spec_type == S_IFCHR ? VCHR : VBLK; 380 vp->v_rdev = dmd->ddm_dev; 381 vn_setops(vp, vn_getops(DVTOV(ddv))); 382 vn_exists(vp); 383 384 ASSERT(MUTEX_HELD(&DEVI(devi)->devi_lock)); 385 dv->dv_devi = devi; 386 DEVI(devi)->devi_ref++; 387 388 dv->dv_ino = dv_mkino(devi, vp->v_type, vp->v_rdev); 389 dv->dv_nlink = 0; /* updated on insert */ 390 dv->dv_dotdot = ddv; 391 dv->dv_attrvp = NULLVP; 392 dv->dv_attr = NULL; 393 dv->dv_flags = 0; 394 395 if (dmd->type == DDM_INTERNAL_PATH) 396 dv->dv_flags |= DV_INTERNAL; 397 if (dmd->ddm_flags & DM_NO_FSPERM) 398 dv->dv_flags |= DV_NO_FSPERM; 399 400 dv->dv_priv = dmd->ddm_node_priv; 401 if (dv->dv_priv) 402 dphold(dv->dv_priv); 403 404 /* 405 * Minors created with ddi_create_priv_minor_node can specify 406 * a default mode permission other than the devfs default. 407 */ 408 if (dv->dv_priv || dv->dv_flags & DV_NO_FSPERM) { 409 dcmn_err5(("%s: dv_mknod default priv mode 0%o\n", 410 dv->dv_name, dmd->ddm_priv_mode)); 411 dv->dv_flags |= DV_DFLT_MODE; 412 dv->dv_dflt_mode = dmd->ddm_priv_mode & S_IAMB; 413 } 414 415 return (dv); 416 } 417 418 /* 419 * dv_destroy 420 * 421 * Destroy what we created in dv_mkdir or dv_mknod. 422 * In the case of a *referenced* directory, do nothing. 423 */ 424 /*ARGSUSED1*/ 425 void 426 dv_destroy(struct dv_node *dv, uint_t flags) 427 { 428 vnode_t *vp = DVTOV(dv); 429 ASSERT(dv->dv_nlink == 0); /* no references */ 430 ASSERT(dv->dv_next == NULL); /* unlinked from directory */ 431 432 dcmn_err4(("dv_destroy: %s\n", dv->dv_name)); 433 434 /* 435 * We may be asked to unlink referenced directories. 436 * In this case, there is nothing to be done. 437 * The eventual memory free will be done in 438 * devfs_inactive. 439 */ 440 if (vp->v_count != 0) { 441 ASSERT(vp->v_type == VDIR); 442 ASSERT(flags & DV_CLEAN_FORCE); 443 ASSERT(DV_STALE(dv)); 444 return; 445 } 446 447 if (dv->dv_attrvp != NULLVP) 448 VN_RELE(dv->dv_attrvp); 449 if (dv->dv_attr != NULL) 450 kmem_free(dv->dv_attr, sizeof (struct vattr)); 451 if (dv->dv_name != NULL) 452 kmem_free(dv->dv_name, dv->dv_namelen + 1); 453 if (dv->dv_devi != NULL) { 454 ndi_rele_devi(dv->dv_devi); 455 } 456 if (dv->dv_priv != NULL) { 457 dpfree(dv->dv_priv); 458 } 459 460 kmem_cache_free(dv_node_cache, dv); 461 } 462 463 /* 464 * Find and hold dv_node by name 465 */ 466 struct dv_node * 467 dv_findbyname(struct dv_node *ddv, char *nm) 468 { 469 struct dv_node *dv; 470 size_t nmlen = strlen(nm); 471 472 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 473 dcmn_err3(("dv_findbyname: %s\n", nm)); 474 for (dv = ddv->dv_dot; dv; dv = dv->dv_next) { 475 if (dv->dv_namelen != nmlen) 476 continue; 477 if (strcmp(dv->dv_name, nm) == 0) { 478 VN_HOLD(DVTOV(dv)); 479 return (dv); 480 } 481 } 482 return (NULL); 483 } 484 485 /* 486 * Inserts a new dv_node in a parent directory 487 */ 488 void 489 dv_insert(struct dv_node *ddv, struct dv_node *dv) 490 { 491 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 492 ASSERT(DVTOV(ddv)->v_type == VDIR); 493 ASSERT(ddv->dv_nlink >= 2); 494 ASSERT(dv->dv_nlink == 0); 495 496 dcmn_err3(("dv_insert: %s\n", dv->dv_name)); 497 498 dv->dv_dotdot = ddv; 499 dv->dv_next = ddv->dv_dot; 500 ddv->dv_dot = dv; 501 if (DVTOV(dv)->v_type == VDIR) { 502 ddv->dv_nlink++; /* .. to containing directory */ 503 dv->dv_nlink = 2; /* name + . */ 504 } else { 505 dv->dv_nlink = 1; /* name */ 506 } 507 } 508 509 /* 510 * Unlink a dv_node from a perent directory 511 */ 512 void 513 dv_unlink(struct dv_node *ddv, struct dv_node *dv, struct dv_node **dv_pprev) 514 { 515 /* verify linkage of arguments */ 516 ASSERT(ddv && dv && dv_pprev); 517 ASSERT(dv->dv_dotdot == ddv); 518 ASSERT(*dv_pprev == dv); 519 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 520 ASSERT(DVTOV(ddv)->v_type == VDIR); 521 522 dcmn_err3(("dv_unlink: %s\n", dv->dv_name)); 523 524 if (DVTOV(dv)->v_type == VDIR) { 525 ddv->dv_nlink--; /* .. to containing directory */ 526 dv->dv_nlink -= 2; /* name + . */ 527 } else { 528 dv->dv_nlink -= 1; /* name */ 529 } 530 ASSERT(ddv->dv_nlink >= 2); 531 ASSERT(dv->dv_nlink == 0); 532 533 /* update ddv->dv_dot/dv_next */ 534 *dv_pprev = dv->dv_next; 535 536 dv->dv_dotdot = NULL; 537 dv->dv_next = NULL; 538 dv->dv_dot = NULL; 539 } 540 541 /* 542 * Merge devfs node specific information into an attribute structure. 543 * 544 * NOTE: specfs provides ATIME,MTIME,CTIME,SIZE,BLKSIZE,NBLOCKS on leaf node. 545 */ 546 void 547 dv_vattr_merge(struct dv_node *dv, struct vattr *vap) 548 { 549 struct vnode *vp = DVTOV(dv); 550 551 vap->va_nodeid = dv->dv_ino; 552 vap->va_nlink = dv->dv_nlink; 553 554 if (vp->v_type == VDIR) { 555 vap->va_rdev = 0; 556 vap->va_fsid = vp->v_rdev; 557 } else { 558 vap->va_rdev = vp->v_rdev; 559 vap->va_fsid = DVTOV(dv->dv_dotdot)->v_rdev; 560 vap->va_type = vp->v_type; 561 /* don't trust the shadow file type */ 562 vap->va_mode &= ~S_IFMT; 563 if (vap->va_type == VCHR) 564 vap->va_mode |= S_IFCHR; 565 else 566 vap->va_mode |= S_IFBLK; 567 } 568 } 569 570 /* 571 * Get default device permission by consulting rules in 572 * privilege specification in minor node and /etc/minor_perm. 573 * 574 * This function is called from the devname filesystem to get default 575 * permissions for a device exported to a non-global zone. 576 */ 577 void 578 devfs_get_defattr(struct vnode *vp, struct vattr *vap, int *no_fs_perm) 579 { 580 mperm_t mp; 581 struct dv_node *dv; 582 583 /* If vp isn't a dv_node, return something sensible */ 584 if (!vn_matchops(vp, dv_vnodeops)) { 585 if (no_fs_perm) 586 *no_fs_perm = 0; 587 *vap = dv_vattr_file; 588 return; 589 } 590 591 /* 592 * For minors not created by ddi_create_priv_minor_node(), 593 * use devfs defaults. 594 */ 595 dv = VTODV(vp); 596 if (vp->v_type == VDIR) { 597 *vap = dv_vattr_dir; 598 } else if (dv->dv_flags & DV_NO_FSPERM) { 599 if (no_fs_perm) 600 *no_fs_perm = 1; 601 *vap = dv_vattr_priv; 602 } else { 603 /* 604 * look up perm bits from minor_perm 605 */ 606 *vap = dv_vattr_file; 607 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) == 0) { 608 VATTR_MP_MERGE((*vap), mp); 609 dcmn_err5(("%s: minor perm mode 0%o\n", 610 dv->dv_name, vap->va_mode)); 611 } else if (dv->dv_flags & DV_DFLT_MODE) { 612 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 613 vap->va_mode &= ~S_IAMB; 614 vap->va_mode |= dv->dv_dflt_mode; 615 dcmn_err5(("%s: priv mode 0%o\n", 616 dv->dv_name, vap->va_mode)); 617 } 618 } 619 } 620 621 /* 622 * dv_shadow_node 623 * 624 * Given a VDIR dv_node, find/create the associated VDIR 625 * node in the shadow attribute filesystem. 626 * 627 * Given a VCHR/VBLK dv_node, find the associated VREG 628 * node in the shadow attribute filesystem. These nodes 629 * are only created to persist non-default attributes. 630 * Lack of such a node implies the default permissions 631 * are sufficient. 632 * 633 * Managing the attribute file entries is slightly tricky (mostly 634 * because we can't intercept VN_HOLD and VN_RELE except on the last 635 * release). 636 * 637 * We assert that if the dv_attrvp pointer is non-NULL, it points 638 * to a singly-held (by us) vnode that represents the shadow entry 639 * in the underlying filesystem. To avoid store-ordering issues, 640 * we assert that the pointer can only be tested under the dv_contents 641 * READERS lock. 642 */ 643 644 void 645 dv_shadow_node( 646 struct vnode *dvp, /* devfs parent directory vnode */ 647 char *nm, /* name component */ 648 struct vnode *vp, /* devfs vnode */ 649 struct pathname *pnp, /* the path .. */ 650 struct vnode *rdir, /* the root .. */ 651 struct cred *cred, /* who's asking? */ 652 int flags) /* optionally create shadow node */ 653 { 654 struct dv_node *dv; /* dv_node of named directory */ 655 struct vnode *rdvp; /* shadow parent directory vnode */ 656 struct vnode *rvp; /* shadow vnode */ 657 struct vnode *rrvp; /* realvp of shadow vnode */ 658 struct vattr vattr; 659 int create_tried; 660 int error; 661 662 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 663 dv = VTODV(vp); 664 dcmn_err3(("dv_shadow_node: name %s attr %p\n", 665 nm, (void *)dv->dv_attrvp)); 666 667 if ((flags & DV_SHADOW_WRITE_HELD) == 0) { 668 ASSERT(RW_READ_HELD(&dv->dv_contents)); 669 if (dv->dv_attrvp != NULLVP) 670 return; 671 if (!rw_tryupgrade(&dv->dv_contents)) { 672 rw_exit(&dv->dv_contents); 673 rw_enter(&dv->dv_contents, RW_WRITER); 674 if (dv->dv_attrvp != NULLVP) { 675 rw_downgrade(&dv->dv_contents); 676 return; 677 } 678 } 679 } else { 680 ASSERT(RW_WRITE_HELD(&dv->dv_contents)); 681 if (dv->dv_attrvp != NULLVP) 682 return; 683 } 684 685 ASSERT(RW_WRITE_HELD(&dv->dv_contents) && dv->dv_attrvp == NULL); 686 687 rdvp = VTODV(dvp)->dv_attrvp; 688 create_tried = 0; 689 lookup: 690 if (rdvp && (dv->dv_flags & DV_NO_FSPERM) == 0) { 691 error = VOP_LOOKUP(rdvp, nm, &rvp, pnp, LOOKUP_DIR, rdir, cred); 692 693 /* factor out the snode since we only want the attribute node */ 694 if ((error == 0) && (VOP_REALVP(rvp, &rrvp) == 0)) { 695 VN_HOLD(rrvp); 696 VN_RELE(rvp); 697 rvp = rrvp; 698 } 699 } else 700 error = EROFS; /* no parent, no entry */ 701 702 /* 703 * All we want is the permissions (and maybe ACLs and 704 * extended attributes), and we want to perform lookups 705 * by name. Drivers occasionally change their minor 706 * number space. If something changes, there's no 707 * much we can do about it here. 708 */ 709 710 /* The shadow node checks out. We are done */ 711 if (error == 0) { 712 dv->dv_attrvp = rvp; /* with one hold */ 713 714 /* 715 * Determine if we have non-trivial ACLs on this node. 716 * It is not necessary to VOP_RWLOCK since fs_acl_nontrivial 717 * only does VOP_GETSECATTR. 718 */ 719 dv->dv_flags &= ~DV_ACL; 720 721 if (fs_acl_nontrivial(rvp, cred)) 722 dv->dv_flags |= DV_ACL; 723 724 /* 725 * If we have synced out the memory attributes, free 726 * them and switch back to using the persistent store. 727 */ 728 if (rvp && dv->dv_attr) { 729 kmem_free(dv->dv_attr, sizeof (struct vattr)); 730 dv->dv_attr = NULL; 731 } 732 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 733 rw_downgrade(&dv->dv_contents); 734 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 735 return; 736 } 737 738 /* 739 * Failed to find attribute in persistent backing store, 740 * get default permission bits. 741 */ 742 devfs_get_defattr(vp, &vattr, NULL); 743 744 dv_vattr_merge(dv, &vattr); 745 gethrestime(&vattr.va_atime); 746 vattr.va_mtime = vattr.va_atime; 747 vattr.va_ctime = vattr.va_atime; 748 749 /* 750 * Try to create shadow dir. This is necessary in case 751 * we need to create a shadow leaf node later, when user 752 * executes chmod. 753 */ 754 if ((error == ENOENT) && !create_tried) { 755 switch (vp->v_type) { 756 case VDIR: 757 error = VOP_MKDIR(rdvp, nm, &vattr, &rvp, kcred); 758 dsysdebug(error, ("vop_mkdir %s %s %d\n", 759 VTODV(dvp)->dv_name, nm, error)); 760 create_tried = 1; 761 break; 762 763 case VCHR: 764 case VBLK: 765 /* 766 * Shadow nodes are only created on demand 767 */ 768 if (flags & DV_SHADOW_CREATE) { 769 error = VOP_CREATE(rdvp, nm, &vattr, NONEXCL, 770 VREAD|VWRITE, &rvp, kcred, 0); 771 dsysdebug(error, ("vop_create %s %s %d\n", 772 VTODV(dvp)->dv_name, nm, error)); 773 create_tried = 1; 774 } 775 break; 776 777 default: 778 cmn_err(CE_PANIC, "devfs: %s: create", dvnm); 779 /*NOTREACHED*/ 780 } 781 782 if (create_tried && 783 (error == 0) || (error == EEXIST)) { 784 VN_RELE(rvp); 785 goto lookup; 786 } 787 } 788 789 /* Store attribute in memory */ 790 if (dv->dv_attr == NULL) { 791 dv->dv_attr = kmem_alloc(sizeof (struct vattr), KM_SLEEP); 792 *(dv->dv_attr) = vattr; 793 } 794 795 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 796 rw_downgrade(&dv->dv_contents); 797 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 798 } 799 800 /* 801 * Given a devinfo node, and a name, returns the appropriate 802 * minor information for that named node, if it exists. 803 */ 804 static int 805 dv_find_leafnode(dev_info_t *devi, char *minor_nm, struct ddi_minor_data *r_mi) 806 { 807 struct ddi_minor_data *dmd; 808 809 ASSERT(i_ddi_devi_attached(devi)); 810 ASSERT(MUTEX_HELD(&DEVI(devi)->devi_lock)); 811 812 dcmn_err3(("dv_find_leafnode: %s\n", minor_nm)); 813 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 814 815 /* 816 * Skip alias nodes and nodes without a name. 817 */ 818 if ((dmd->type == DDM_ALIAS) || (dmd->ddm_name == NULL)) 819 continue; 820 821 dcmn_err4(("dv_find_leafnode: (%s,%s)\n", 822 minor_nm, dmd->ddm_name)); 823 if (strcmp(minor_nm, dmd->ddm_name) == 0) { 824 r_mi->ddm_dev = dmd->ddm_dev; 825 r_mi->ddm_spec_type = dmd->ddm_spec_type; 826 r_mi->type = dmd->type; 827 r_mi->ddm_flags = dmd->ddm_flags; 828 r_mi->ddm_node_priv = dmd->ddm_node_priv; 829 r_mi->ddm_priv_mode = dmd->ddm_priv_mode; 830 if (r_mi->ddm_node_priv) 831 dphold(r_mi->ddm_node_priv); 832 return (0); 833 } 834 } 835 836 dcmn_err3(("dv_find_leafnode: %s: ENOENT\n", minor_nm)); 837 return (ENOENT); 838 } 839 840 /* 841 * Special handling for clone node: 842 * Clone minor name is a driver name, the minor number will 843 * be the major number of the driver. There is no minor 844 * node under the clone driver, so we'll manufacture the 845 * dev_t. 846 */ 847 static struct dv_node * 848 dv_clone_mknod(struct dv_node *ddv, char *drvname) 849 { 850 major_t major; 851 struct dv_node *dvp; 852 char *devnm; 853 struct ddi_minor_data *dmd; 854 855 /* 856 * Make sure drvname is a STREAMS driver. We load the driver, 857 * but don't attach to any instances. This makes stat(2) 858 * relatively cheap. 859 */ 860 major = ddi_name_to_major(drvname); 861 if (major == (major_t)-1) 862 return (NULL); 863 864 if (ddi_hold_driver(major) == NULL) 865 return (NULL); 866 867 if (STREAMSTAB(major) == NULL) { 868 ddi_rele_driver(major); 869 return (NULL); 870 } 871 872 ddi_rele_driver(major); 873 devnm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 874 (void) snprintf(devnm, MAXNAMELEN, "clone@0:%s", drvname); 875 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 876 dmd->ddm_dev = makedevice(clone_major, (minor_t)major); 877 dmd->ddm_spec_type = S_IFCHR; 878 dvp = dv_mknod(ddv, clone_dip, devnm, dmd); 879 kmem_free(dmd, sizeof (*dmd)); 880 kmem_free(devnm, MAXNAMELEN); 881 return (dvp); 882 } 883 884 /* 885 * Given the parent directory node, and a name in it, returns the 886 * named dv_node to the caller (as a vnode). 887 * 888 * (We need pnp and rdir for doing shadow lookups; they can be NULL) 889 */ 890 int 891 dv_find(struct dv_node *ddv, char *nm, struct vnode **vpp, struct pathname *pnp, 892 struct vnode *rdir, struct cred *cred, uint_t ndi_flags) 893 { 894 extern int isminiroot; /* see modctl.c */ 895 896 int rv = 0, was_busy = 0, nmlen; 897 struct vnode *vp; 898 struct dv_node *dv, *dup; 899 dev_info_t *pdevi, *devi = NULL; 900 char *mnm; 901 struct ddi_minor_data *dmd; 902 903 dcmn_err3(("dv_find %s\n", nm)); 904 905 rw_enter(&ddv->dv_contents, RW_READER); 906 start: 907 if (DV_STALE(ddv)) { 908 rw_exit(&ddv->dv_contents); 909 return (ESTALE); 910 } 911 912 /* 913 * Empty name or ., return node itself. 914 */ 915 nmlen = strlen(nm); 916 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 917 *vpp = DVTOV(ddv); 918 rw_exit(&ddv->dv_contents); 919 VN_HOLD(*vpp); 920 return (0); 921 } 922 923 /* 924 * .., return the parent directory 925 */ 926 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 927 *vpp = DVTOV(ddv->dv_dotdot); 928 rw_exit(&ddv->dv_contents); 929 VN_HOLD(*vpp); 930 return (0); 931 } 932 933 /* 934 * Fail anything without a valid device name component 935 */ 936 if (nm[0] == '@' || nm[0] == ':') { 937 dcmn_err3(("devfs: no driver '%s'\n", nm)); 938 rw_exit(&ddv->dv_contents); 939 return (ENOENT); 940 } 941 942 /* 943 * So, now we have to deal with the trickier stuff. 944 * 945 * (a) search the existing list of dv_nodes on this directory 946 */ 947 if ((dv = dv_findbyname(ddv, nm)) != NULL) { 948 founddv: 949 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 950 rw_enter(&dv->dv_contents, RW_READER); 951 vp = DVTOV(dv); 952 if ((dv->dv_attrvp != NULLVP) || 953 (vp->v_type != VDIR && dv->dv_attr != NULL)) { 954 /* 955 * Common case - we already have attributes 956 */ 957 rw_exit(&dv->dv_contents); 958 rw_exit(&ddv->dv_contents); 959 goto found; 960 } 961 962 /* 963 * No attribute vp, try and build one. 964 */ 965 dv_shadow_node(DVTOV(ddv), nm, vp, pnp, rdir, cred, 0); 966 rw_exit(&dv->dv_contents); 967 rw_exit(&ddv->dv_contents); 968 goto found; 969 } 970 971 /* 972 * (b) Search the child devinfo nodes of our parent directory, 973 * looking for the named node. If we find it, build a new 974 * node, then grab the writers lock, search the directory 975 * if it's still not there, then insert it. 976 * 977 * We drop the devfs locks before accessing the device tree. 978 * Take care to mark the node BUSY so that a forced devfs_clean 979 * doesn't mark the directory node stale. 980 * 981 * Also, check if we are called as part of devfs_clean or 982 * reset_perm. If so, simply return not found because there 983 * is nothing to clean. 984 */ 985 if (tsd_get(devfs_clean_key)) { 986 rw_exit(&ddv->dv_contents); 987 return (ENOENT); 988 } 989 990 /* 991 * We could be either READ or WRITE locked at 992 * this point. Upgrade if we are read locked. 993 */ 994 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 995 if (rw_read_locked(&ddv->dv_contents) && 996 !rw_tryupgrade(&ddv->dv_contents)) { 997 rw_exit(&ddv->dv_contents); 998 rw_enter(&ddv->dv_contents, RW_WRITER); 999 /* 1000 * Things may have changed when we dropped 1001 * the contents lock, so start from top again 1002 */ 1003 goto start; 1004 } 1005 ddv->dv_busy++; /* mark busy before dropping lock */ 1006 was_busy++; 1007 rw_exit(&ddv->dv_contents); 1008 1009 pdevi = ddv->dv_devi; 1010 ASSERT(pdevi != NULL); 1011 1012 mnm = strchr(nm, ':'); 1013 if (mnm) 1014 *mnm = (char)0; 1015 1016 /* 1017 * Configure one nexus child, will call nexus's bus_ops 1018 * If successful, devi is held upon returning. 1019 * Note: devfs lookup should not be configuring grandchildren. 1020 */ 1021 ASSERT((ndi_flags & NDI_CONFIG) == 0); 1022 1023 rv = ndi_devi_config_one(pdevi, nm, &devi, ndi_flags | NDI_NO_EVENT); 1024 if (mnm) 1025 *mnm = ':'; 1026 if (rv != NDI_SUCCESS) { 1027 rv = ENOENT; 1028 goto notfound; 1029 } 1030 1031 /* 1032 * Don't make vhci clients visible under phci, unless we 1033 * are in miniroot. 1034 */ 1035 if (isminiroot == 0 && ddi_get_parent(devi) != pdevi) { 1036 ndi_rele_devi(devi); 1037 rv = ENOENT; 1038 goto notfound; 1039 } 1040 1041 ASSERT(devi && i_ddi_devi_attached(devi)); 1042 1043 /* 1044 * Invalidate cache to notice newly created minor nodes. 1045 */ 1046 rw_enter(&ddv->dv_contents, RW_WRITER); 1047 ddv->dv_flags |= DV_BUILD; 1048 rw_exit(&ddv->dv_contents); 1049 1050 /* 1051 * mkdir for nexus drivers and leaf nodes as well. If we are racing 1052 * and create a duplicate, the duplicate will be destroyed below. 1053 */ 1054 if (mnm == NULL) { 1055 dv = dv_mkdir(ddv, devi, nm); 1056 } else { 1057 /* 1058 * For clone minors, load the driver indicated by minor name. 1059 */ 1060 mutex_enter(&DEVI(devi)->devi_lock); 1061 if (devi == clone_dip) { 1062 dv = dv_clone_mknod(ddv, mnm + 1); 1063 } else { 1064 /* 1065 * Find minor node and make a dv_node 1066 */ 1067 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 1068 if (dv_find_leafnode(devi, mnm + 1, dmd) == 0) { 1069 dv = dv_mknod(ddv, devi, nm, dmd); 1070 if (dmd->ddm_node_priv) 1071 dpfree(dmd->ddm_node_priv); 1072 } 1073 kmem_free(dmd, sizeof (*dmd)); 1074 } 1075 mutex_exit(&DEVI(devi)->devi_lock); 1076 } 1077 /* 1078 * Release hold from ndi_devi_config_one() 1079 */ 1080 ndi_rele_devi(devi); 1081 1082 if (dv == NULL) { 1083 rv = ENOENT; 1084 goto notfound; 1085 } 1086 1087 /* 1088 * We have released the dv_contents lock, need to check 1089 * if another thread already created a duplicate node 1090 */ 1091 rw_enter(&ddv->dv_contents, RW_WRITER); 1092 if ((dup = dv_findbyname(ddv, nm)) == NULL) { 1093 dv_insert(ddv, dv); 1094 } else { 1095 /* 1096 * Duplicate found, use the existing node 1097 */ 1098 VN_RELE(DVTOV(dv)); 1099 dv_destroy(dv, 0); 1100 dv = dup; 1101 } 1102 goto founddv; 1103 /*NOTREACHED*/ 1104 1105 found: 1106 /* 1107 * Skip non-kernel lookups of internal nodes. 1108 * This use of kcred to distinguish between user and 1109 * internal kernel lookups is unfortunate. The information 1110 * provided by the seg argument to lookupnameat should 1111 * evolve into a lookup flag for filesystems that need 1112 * this distinction. 1113 */ 1114 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) { 1115 VN_RELE(vp); 1116 rv = ENOENT; 1117 goto notfound; 1118 } 1119 1120 dcmn_err2(("dv_find: returning vp for nm %s\n", nm)); 1121 if (vp->v_type == VCHR || vp->v_type == VBLK) { 1122 /* 1123 * If vnode is a device, return special vnode instead 1124 * (though it knows all about -us- via sp->s_realvp, 1125 * sp->s_devvp, and sp->s_dip) 1126 */ 1127 *vpp = specvp_devfs(vp, vp->v_rdev, vp->v_type, cred, 1128 dv->dv_devi); 1129 VN_RELE(vp); 1130 if (*vpp == NULLVP) 1131 rv = ENOSYS; 1132 } else 1133 *vpp = vp; 1134 1135 notfound: 1136 rw_enter(&ddv->dv_contents, RW_WRITER); 1137 if (was_busy) 1138 ddv->dv_busy--; 1139 rw_exit(&ddv->dv_contents); 1140 return (rv); 1141 } 1142 1143 /* 1144 * The given directory node is out-of-date; that is, it has been 1145 * marked as needing to be rebuilt, possibly because some new devinfo 1146 * node has come into existence, or possibly because this is the first 1147 * time we've been here. 1148 */ 1149 void 1150 dv_filldir(struct dv_node *ddv) 1151 { 1152 struct dv_node *dv; 1153 dev_info_t *devi, *pdevi; 1154 struct ddi_minor_data *dmd; 1155 char devnm[MAXNAMELEN]; 1156 int circ; 1157 1158 ASSERT(DVTOV(ddv)->v_type == VDIR); 1159 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 1160 ASSERT(ddv->dv_flags & DV_BUILD); 1161 1162 dcmn_err3(("dv_filldir: %s\n", ddv->dv_name)); 1163 if (DV_STALE(ddv)) 1164 return; 1165 pdevi = ddv->dv_devi; 1166 1167 if (ndi_devi_config(pdevi, NDI_NO_EVENT) != NDI_SUCCESS) { 1168 dcmn_err3(("dv_filldir: config error %s\n", 1169 ddv->dv_name)); 1170 } 1171 1172 ndi_devi_enter(pdevi, &circ); 1173 for (devi = ddi_get_child(pdevi); devi; 1174 devi = ddi_get_next_sibling(devi)) { 1175 if (i_ddi_node_state(devi) < DS_PROBED) 1176 continue; 1177 1178 dcmn_err3(("dv_filldir: node %s\n", ddi_node_name(devi))); 1179 1180 mutex_enter(&DEVI(devi)->devi_lock); 1181 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 1182 char *addr; 1183 1184 /* 1185 * Skip alias nodes, internal nodes, and nodes 1186 * without a name. We allow DDM_DEFAULT nodes 1187 * to appear in readdir. 1188 */ 1189 if ((dmd->type == DDM_ALIAS) || 1190 (dmd->type == DDM_INTERNAL_PATH) || 1191 (dmd->ddm_name == NULL)) 1192 continue; 1193 1194 addr = ddi_get_name_addr(devi); 1195 if (addr && *addr) 1196 (void) sprintf(devnm, "%s@%s:%s", 1197 ddi_node_name(devi), addr, dmd->ddm_name); 1198 else 1199 (void) sprintf(devnm, "%s:%s", 1200 ddi_node_name(devi), dmd->ddm_name); 1201 1202 if ((dv = dv_findbyname(ddv, devnm)) != NULL) { 1203 /* dv_node already exists */ 1204 VN_RELE(DVTOV(dv)); 1205 continue; 1206 } 1207 1208 dv = dv_mknod(ddv, devi, devnm, dmd); 1209 dv_insert(ddv, dv); 1210 VN_RELE(DVTOV(dv)); 1211 } 1212 mutex_exit(&DEVI(devi)->devi_lock); 1213 1214 (void) ddi_deviname(devi, devnm); 1215 if ((dv = dv_findbyname(ddv, devnm + 1)) == NULL) { 1216 /* directory doesn't exist */ 1217 dv = dv_mkdir(ddv, devi, devnm + 1); 1218 dv_insert(ddv, dv); 1219 } 1220 VN_RELE(DVTOV(dv)); 1221 } 1222 ndi_devi_exit(pdevi, circ); 1223 1224 ddv->dv_flags &= ~DV_BUILD; 1225 } 1226 1227 /* 1228 * Given a directory node, clean out all the nodes beneath. 1229 * 1230 * VDIR: Reinvoke to clean them, then delete the directory. 1231 * VCHR, VBLK: Just blow them away. 1232 * 1233 * Mark the directories touched as in need of a rebuild, in case 1234 * we fall over part way through. When DV_CLEAN_FORCE is specified, 1235 * we mark referenced empty directories as stale to facilitate DR. 1236 */ 1237 int 1238 dv_cleandir(struct dv_node *ddv, char *devnm, uint_t flags) 1239 { 1240 struct dv_node *dv; 1241 struct dv_node **pprev, **npprev; 1242 struct vnode *vp; 1243 int busy = 0; 1244 1245 dcmn_err3(("dv_cleandir: %s\n", ddv->dv_name)); 1246 1247 if (!(flags & DV_CLEANDIR_LCK)) 1248 rw_enter(&ddv->dv_contents, RW_WRITER); 1249 for (pprev = &ddv->dv_dot, dv = *pprev; dv; 1250 pprev = npprev, dv = *pprev) { 1251 npprev = &dv->dv_next; 1252 1253 /* 1254 * If devnm is specified, the non-minor portion of the 1255 * name must match devnm. 1256 */ 1257 if (devnm && 1258 (strncmp(devnm, dv->dv_name, strlen(devnm)) || 1259 (dv->dv_name[strlen(devnm)] != ':' && 1260 dv->dv_name[strlen(devnm)] != '\0'))) 1261 continue; 1262 1263 /* check type of what we are cleaning */ 1264 vp = DVTOV(dv); 1265 if (vp->v_type == VDIR) { 1266 /* recurse on directories */ 1267 rw_enter(&dv->dv_contents, RW_WRITER); 1268 if (dv_cleandir(dv, NULL, 1269 flags | DV_CLEANDIR_LCK) == EBUSY) { 1270 rw_exit(&dv->dv_contents); 1271 goto set_busy; 1272 } 1273 1274 /* A clean directory is an empty directory... */ 1275 ASSERT(dv->dv_nlink == 2); 1276 mutex_enter(&vp->v_lock); 1277 if (vp->v_count > 0) { 1278 /* 1279 * ... but an empty directory can still have 1280 * references to it. If we have dv_busy or 1281 * DV_CLEAN_FORCE is *not* specified then a 1282 * referenced directory is considered busy. 1283 */ 1284 if (dv->dv_busy || !(flags & DV_CLEAN_FORCE)) { 1285 mutex_exit(&vp->v_lock); 1286 rw_exit(&dv->dv_contents); 1287 goto set_busy; 1288 } 1289 1290 /* 1291 * Mark referenced directory stale so that DR 1292 * will succeed even if a shell has 1293 * /devices/xxx as current directory (causing 1294 * VN_HOLD reference to an empty directory). 1295 */ 1296 ASSERT(!DV_STALE(dv)); 1297 ndi_rele_devi(dv->dv_devi); 1298 dv->dv_devi = NULL; /* mark DV_STALE */ 1299 } 1300 } else { 1301 ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK)); 1302 ASSERT(dv->dv_nlink == 1); /* no hard links */ 1303 mutex_enter(&vp->v_lock); 1304 if (vp->v_count > 0) { 1305 mutex_exit(&vp->v_lock); 1306 goto set_busy; 1307 } 1308 } 1309 1310 /* unlink from directory */ 1311 dv_unlink(ddv, dv, pprev); 1312 1313 /* drop locks */ 1314 mutex_exit(&vp->v_lock); 1315 if (vp->v_type == VDIR) 1316 rw_exit(&dv->dv_contents); 1317 1318 /* destroy vnode if ref count is zero */ 1319 if (vp->v_count == 0) 1320 dv_destroy(dv, flags); 1321 1322 /* pointer to previous stays unchanged */ 1323 npprev = pprev; 1324 continue; 1325 1326 /* 1327 * If devnm is not NULL we return immediately on busy, 1328 * otherwise we continue destroying unused dv_node's. 1329 */ 1330 set_busy: busy++; 1331 if (devnm) 1332 break; 1333 } 1334 1335 /* 1336 * This code may be invoked to inform devfs that a new node has 1337 * been created in the kernel device tree. So we always set 1338 * the DV_BUILD flag to allow the next dv_filldir() to pick 1339 * the new devinfo nodes. 1340 */ 1341 ddv->dv_flags |= DV_BUILD; 1342 1343 if (!(flags & DV_CLEANDIR_LCK)) 1344 rw_exit(&ddv->dv_contents); 1345 1346 return (busy ? EBUSY : 0); 1347 } 1348 1349 /* 1350 * Walk through the devfs hierarchy, correcting the permissions of 1351 * devices with default permissions that do not match those specified 1352 * by minor perm. This can only be done for all drivers for now. 1353 */ 1354 static int 1355 dv_reset_perm_dir(struct dv_node *ddv, uint_t flags) 1356 { 1357 struct dv_node *dv, *next = NULL; 1358 struct vnode *vp; 1359 int retval = 0; 1360 struct vattr *attrp; 1361 mperm_t mp; 1362 char *nm; 1363 uid_t old_uid; 1364 gid_t old_gid; 1365 mode_t old_mode; 1366 1367 rw_enter(&ddv->dv_contents, RW_WRITER); 1368 for (dv = ddv->dv_dot; dv; dv = next) { 1369 int error = 0; 1370 next = dv->dv_next; 1371 nm = dv->dv_name; 1372 1373 rw_enter(&dv->dv_contents, RW_READER); 1374 vp = DVTOV(dv); 1375 if (vp->v_type == VDIR) { 1376 rw_exit(&dv->dv_contents); 1377 if (dv_reset_perm_dir(dv, flags) != 0) { 1378 error = EBUSY; 1379 } 1380 } else { 1381 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 1382 1383 /* 1384 * Check for permissions from minor_perm 1385 * If there are none, we're done 1386 */ 1387 rw_exit(&dv->dv_contents); 1388 if (dev_minorperm(dv->dv_devi, nm, &mp) != 0) 1389 continue; 1390 1391 rw_enter(&dv->dv_contents, RW_READER); 1392 1393 /* 1394 * Allow a node's permissions to be altered 1395 * permanently from the defaults by chmod, 1396 * using the shadow node as backing store. 1397 * Otherwise, update node to minor_perm permissions. 1398 */ 1399 if (dv->dv_attrvp == NULLVP) { 1400 /* 1401 * No attribute vp, try to find one. 1402 */ 1403 dv_shadow_node(DVTOV(ddv), nm, vp, 1404 NULL, NULLVP, kcred, 0); 1405 } 1406 if (dv->dv_attrvp != NULLVP || dv->dv_attr == NULL) { 1407 rw_exit(&dv->dv_contents); 1408 continue; 1409 } 1410 1411 attrp = dv->dv_attr; 1412 1413 if (VATTRP_MP_CMP(attrp, mp) == 0) { 1414 dcmn_err5(("%s: no perm change: " 1415 "%d %d 0%o\n", nm, attrp->va_uid, 1416 attrp->va_gid, attrp->va_mode)); 1417 rw_exit(&dv->dv_contents); 1418 continue; 1419 } 1420 1421 old_uid = attrp->va_uid; 1422 old_gid = attrp->va_gid; 1423 old_mode = attrp->va_mode; 1424 1425 VATTRP_MP_MERGE(attrp, mp); 1426 mutex_enter(&vp->v_lock); 1427 if (vp->v_count > 0) { 1428 error = EBUSY; 1429 } 1430 mutex_exit(&vp->v_lock); 1431 1432 dcmn_err5(("%s: perm %d/%d/0%o -> %d/%d/0%o (%d)\n", 1433 nm, old_uid, old_gid, old_mode, attrp->va_uid, 1434 attrp->va_gid, attrp->va_mode, error)); 1435 1436 rw_exit(&dv->dv_contents); 1437 } 1438 1439 if (error != 0) { 1440 retval = error; 1441 } 1442 } 1443 1444 ddv->dv_flags |= DV_BUILD; 1445 1446 rw_exit(&ddv->dv_contents); 1447 1448 return (retval); 1449 } 1450 1451 int 1452 devfs_reset_perm(uint_t flags) 1453 { 1454 struct dv_node *dvp; 1455 int rval; 1456 1457 if ((dvp = devfs_dip_to_dvnode(ddi_root_node())) == NULL) 1458 return (0); 1459 1460 VN_HOLD(DVTOV(dvp)); 1461 rval = dv_reset_perm_dir(dvp, flags); 1462 VN_RELE(DVTOV(dvp)); 1463 return (rval); 1464 } 1465 1466 /* 1467 * Clean up dangling devfs shadow nodes for removed 1468 * drivers so that, in the event the driver is re-added 1469 * to the system, newly created nodes won't incorrectly 1470 * pick up these stale shadow node permissions. 1471 * 1472 * This is accomplished by walking down the pathname 1473 * to the directory, starting at the root's attribute 1474 * node, then removing all minors matching the specified 1475 * node name. Care must be taken to remove all entries 1476 * in a directory before the directory itself, so that 1477 * the clean-up associated with rem_drv'ing a nexus driver 1478 * does not inadvertently result in an inconsistent 1479 * filesystem underlying devfs. 1480 */ 1481 1482 static int 1483 devfs_remdrv_rmdir(vnode_t *dirvp, const char *dir, vnode_t *rvp) 1484 { 1485 int error; 1486 vnode_t *vp; 1487 int eof; 1488 struct iovec iov; 1489 struct uio uio; 1490 struct dirent64 *dp; 1491 dirent64_t *dbuf; 1492 size_t dlen; 1493 size_t dbuflen; 1494 int ndirents = 64; 1495 char *nm; 1496 1497 VN_HOLD(dirvp); 1498 1499 dlen = ndirents * (sizeof (*dbuf)); 1500 dbuf = kmem_alloc(dlen, KM_SLEEP); 1501 1502 uio.uio_iov = &iov; 1503 uio.uio_iovcnt = 1; 1504 uio.uio_segflg = UIO_SYSSPACE; 1505 uio.uio_fmode = 0; 1506 uio.uio_extflg = UIO_COPY_CACHED; 1507 uio.uio_loffset = 0; 1508 uio.uio_llimit = MAXOFFSET_T; 1509 1510 eof = 0; 1511 error = 0; 1512 while (!error && !eof) { 1513 uio.uio_resid = dlen; 1514 iov.iov_base = (char *)dbuf; 1515 iov.iov_len = dlen; 1516 1517 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1518 error = VOP_READDIR(dirvp, &uio, kcred, &eof); 1519 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1520 1521 dbuflen = dlen - uio.uio_resid; 1522 1523 if (error || dbuflen == 0) 1524 break; 1525 1526 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1527 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1528 1529 nm = dp->d_name; 1530 1531 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1532 continue; 1533 1534 error = VOP_LOOKUP(dirvp, 1535 nm, &vp, NULL, 0, NULL, kcred); 1536 1537 dsysdebug(error, 1538 ("rem_drv %s/%s lookup (%d)\n", 1539 dir, nm, error)); 1540 1541 if (error) 1542 continue; 1543 1544 ASSERT(vp->v_type == VDIR || 1545 vp->v_type == VCHR || vp->v_type == VBLK); 1546 1547 if (vp->v_type == VDIR) { 1548 error = devfs_remdrv_rmdir(vp, nm, rvp); 1549 if (error == 0) { 1550 error = VOP_RMDIR(dirvp, 1551 (char *)nm, rvp, kcred); 1552 dsysdebug(error, 1553 ("rem_drv %s/%s rmdir (%d)\n", 1554 dir, nm, error)); 1555 } 1556 } else { 1557 error = VOP_REMOVE(dirvp, (char *)nm, kcred); 1558 dsysdebug(error, 1559 ("rem_drv %s/%s remove (%d)\n", 1560 dir, nm, error)); 1561 } 1562 1563 VN_RELE(vp); 1564 if (error) { 1565 goto exit; 1566 } 1567 } 1568 } 1569 1570 exit: 1571 VN_RELE(dirvp); 1572 kmem_free(dbuf, dlen); 1573 1574 return (error); 1575 } 1576 1577 int 1578 devfs_remdrv_cleanup(const char *dir, const char *nodename) 1579 { 1580 int error; 1581 vnode_t *vp; 1582 vnode_t *dirvp; 1583 int eof; 1584 struct iovec iov; 1585 struct uio uio; 1586 struct dirent64 *dp; 1587 dirent64_t *dbuf; 1588 size_t dlen; 1589 size_t dbuflen; 1590 int ndirents = 64; 1591 int nodenamelen = strlen(nodename); 1592 char *nm; 1593 struct pathname pn; 1594 vnode_t *rvp; /* root node of the underlying attribute fs */ 1595 1596 dcmn_err5(("devfs_remdrv_cleanup: %s %s\n", dir, nodename)); 1597 1598 if (error = pn_get((char *)dir, UIO_SYSSPACE, &pn)) 1599 return (0); 1600 1601 rvp = dvroot->dv_attrvp; 1602 ASSERT(rvp != NULL); 1603 VN_HOLD(rvp); 1604 1605 pn_skipslash(&pn); 1606 dirvp = rvp; 1607 VN_HOLD(dirvp); 1608 1609 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 1610 1611 while (pn_pathleft(&pn)) { 1612 ASSERT(dirvp->v_type == VDIR); 1613 (void) pn_getcomponent(&pn, nm); 1614 ASSERT((strcmp(nm, ".") != 0) && (strcmp(nm, "..") != 0)); 1615 error = VOP_LOOKUP(dirvp, nm, &vp, NULL, 0, rvp, kcred); 1616 if (error) { 1617 dcmn_err5(("remdrv_cleanup %s lookup error %d\n", 1618 nm, error)); 1619 VN_RELE(dirvp); 1620 if (dirvp != rvp) 1621 VN_RELE(rvp); 1622 pn_free(&pn); 1623 kmem_free(nm, MAXNAMELEN); 1624 return (0); 1625 } 1626 VN_RELE(dirvp); 1627 dirvp = vp; 1628 pn_skipslash(&pn); 1629 } 1630 1631 ASSERT(dirvp->v_type == VDIR); 1632 if (dirvp != rvp) 1633 VN_RELE(rvp); 1634 pn_free(&pn); 1635 kmem_free(nm, MAXNAMELEN); 1636 1637 dlen = ndirents * (sizeof (*dbuf)); 1638 dbuf = kmem_alloc(dlen, KM_SLEEP); 1639 1640 uio.uio_iov = &iov; 1641 uio.uio_iovcnt = 1; 1642 uio.uio_segflg = UIO_SYSSPACE; 1643 uio.uio_fmode = 0; 1644 uio.uio_extflg = UIO_COPY_CACHED; 1645 uio.uio_loffset = 0; 1646 uio.uio_llimit = MAXOFFSET_T; 1647 1648 eof = 0; 1649 error = 0; 1650 while (!error && !eof) { 1651 uio.uio_resid = dlen; 1652 iov.iov_base = (char *)dbuf; 1653 iov.iov_len = dlen; 1654 1655 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1656 error = VOP_READDIR(dirvp, &uio, kcred, &eof); 1657 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1658 1659 dbuflen = dlen - uio.uio_resid; 1660 1661 if (error || dbuflen == 0) 1662 break; 1663 1664 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1665 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1666 1667 nm = dp->d_name; 1668 1669 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1670 continue; 1671 1672 if (strncmp(nm, nodename, nodenamelen) != 0) 1673 continue; 1674 1675 error = VOP_LOOKUP(dirvp, nm, &vp, 1676 NULL, 0, NULL, kcred); 1677 1678 dsysdebug(error, 1679 ("rem_drv %s/%s lookup (%d)\n", 1680 dir, nm, error)); 1681 1682 if (error) 1683 continue; 1684 1685 ASSERT(vp->v_type == VDIR || 1686 vp->v_type == VCHR || vp->v_type == VBLK); 1687 1688 if (vp->v_type == VDIR) { 1689 error = devfs_remdrv_rmdir(vp, nm, rvp); 1690 if (error == 0) { 1691 error = VOP_RMDIR(dirvp, 1692 (char *)nm, rvp, kcred); 1693 dsysdebug(error, 1694 ("rem_drv %s/%s rmdir (%d)\n", 1695 dir, nm, error)); 1696 } 1697 } else { 1698 error = VOP_REMOVE(dirvp, (char *)nm, kcred); 1699 dsysdebug(error, 1700 ("rem_drv %s/%s remove (%d)\n", 1701 dir, nm, error)); 1702 } 1703 1704 VN_RELE(vp); 1705 if (error) 1706 goto exit; 1707 } 1708 } 1709 1710 exit: 1711 VN_RELE(dirvp); 1712 1713 kmem_free(dbuf, dlen); 1714 1715 return (0); 1716 } 1717 1718 struct dv_list { 1719 struct dv_node *dv; 1720 struct dv_list *next; 1721 }; 1722 1723 void 1724 dv_walk( 1725 struct dv_node *ddv, 1726 char *devnm, 1727 void (*callback)(struct dv_node *, void *), 1728 void *arg) 1729 { 1730 struct vnode *dvp; 1731 struct dv_node *dv; 1732 struct dv_list *head, *tail, *next; 1733 int len; 1734 1735 dcmn_err3(("dv_walk: ddv = %s, devnm = %s\n", 1736 ddv->dv_name, devnm ? devnm : "<null>")); 1737 1738 dvp = DVTOV(ddv); 1739 1740 ASSERT(dvp->v_type == VDIR); 1741 1742 head = tail = next = NULL; 1743 1744 rw_enter(&ddv->dv_contents, RW_READER); 1745 mutex_enter(&dvp->v_lock); 1746 for (dv = ddv->dv_dot; dv; dv = dv->dv_next) { 1747 /* 1748 * If devnm is not NULL and is not the empty string, 1749 * select only dv_nodes with matching non-minor name 1750 */ 1751 if (devnm && (len = strlen(devnm)) && 1752 (strncmp(devnm, dv->dv_name, len) || 1753 (dv->dv_name[len] != ':' && dv->dv_name[len] != '\0'))) 1754 continue; 1755 1756 callback(dv, arg); 1757 1758 if (DVTOV(dv)->v_type != VDIR) 1759 continue; 1760 1761 next = kmem_zalloc(sizeof (*next), KM_SLEEP); 1762 next->dv = dv; 1763 1764 if (tail) 1765 tail->next = next; 1766 else 1767 head = next; 1768 1769 tail = next; 1770 } 1771 1772 while (head) { 1773 dv_walk(head->dv, NULL, callback, arg); 1774 next = head->next; 1775 kmem_free(head, sizeof (*head)); 1776 head = next; 1777 } 1778 rw_exit(&ddv->dv_contents); 1779 mutex_exit(&dvp->v_lock); 1780 } 1781