1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * miscellaneous routines for the devfs 31 */ 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/t_lock.h> 36 #include <sys/systm.h> 37 #include <sys/sysmacros.h> 38 #include <sys/user.h> 39 #include <sys/time.h> 40 #include <sys/vfs.h> 41 #include <sys/vnode.h> 42 #include <sys/file.h> 43 #include <sys/fcntl.h> 44 #include <sys/flock.h> 45 #include <sys/kmem.h> 46 #include <sys/uio.h> 47 #include <sys/errno.h> 48 #include <sys/stat.h> 49 #include <sys/cred.h> 50 #include <sys/dirent.h> 51 #include <sys/pathname.h> 52 #include <sys/cmn_err.h> 53 #include <sys/debug.h> 54 #include <sys/modctl.h> 55 #include <fs/fs_subr.h> 56 #include <sys/fs/dv_node.h> 57 #include <sys/fs/snode.h> 58 #include <sys/sunndi.h> 59 #include <sys/sunmdi.h> 60 #include <sys/conf.h> 61 62 #ifdef DEBUG 63 int devfs_debug = 0x0; 64 #endif 65 66 const char dvnm[] = "devfs"; 67 kmem_cache_t *dv_node_cache; /* dv_node cache */ 68 uint_t devfs_clean_key; 69 struct dv_node *dvroot; 70 71 /* prototype memory vattrs */ 72 vattr_t dv_vattr_dir = { 73 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 74 VDIR, /* va_type */ 75 DV_DIRMODE_DEFAULT, /* va_mode */ 76 DV_UID_DEFAULT, /* va_uid */ 77 DV_GID_DEFAULT, /* va_gid */ 78 0, /* va_fsid; */ 79 0, /* va_nodeid; */ 80 0, /* va_nlink; */ 81 0, /* va_size; */ 82 0, /* va_atime; */ 83 0, /* va_mtime; */ 84 0, /* va_ctime; */ 85 0, /* va_rdev; */ 86 0, /* va_blksize; */ 87 0, /* va_nblocks; */ 88 0, /* va_seq; */ 89 }; 90 91 vattr_t dv_vattr_file = { 92 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 93 0, /* va_type */ 94 DV_DEVMODE_DEFAULT, /* va_mode */ 95 DV_UID_DEFAULT, /* va_uid */ 96 DV_GID_DEFAULT, /* va_gid */ 97 0, /* va_fsid; */ 98 0, /* va_nodeid; */ 99 0, /* va_nlink; */ 100 0, /* va_size; */ 101 0, /* va_atime; */ 102 0, /* va_mtime; */ 103 0, /* va_ctime; */ 104 0, /* va_rdev; */ 105 0, /* va_blksize; */ 106 0, /* va_nblocks; */ 107 0, /* va_seq; */ 108 }; 109 110 vattr_t dv_vattr_priv = { 111 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 112 0, /* va_type */ 113 DV_DEVMODE_PRIV, /* va_mode */ 114 DV_UID_DEFAULT, /* va_uid */ 115 DV_GID_DEFAULT, /* va_gid */ 116 0, /* va_fsid; */ 117 0, /* va_nodeid; */ 118 0, /* va_nlink; */ 119 0, /* va_size; */ 120 0, /* va_atime; */ 121 0, /* va_mtime; */ 122 0, /* va_ctime; */ 123 0, /* va_rdev; */ 124 0, /* va_blksize; */ 125 0, /* va_nblocks; */ 126 0, /* va_seq; */ 127 }; 128 129 extern dev_info_t *clone_dip; 130 extern major_t clone_major; 131 extern struct dev_ops *ddi_hold_driver(major_t); 132 133 /* 134 * dv_node cache constructor, destructor, can cache creation 135 */ 136 /*ARGSUSED1*/ 137 static int 138 i_dv_node_ctor(void *buf, void *cfarg, int flag) 139 { 140 struct dv_node *dv = (struct dv_node *)buf; 141 struct vnode *vp; 142 143 bzero(buf, sizeof (struct dv_node)); 144 145 /* initialize persistent parts of dv_node */ 146 rw_init(&dv->dv_contents, NULL, RW_DEFAULT, NULL); 147 148 /* allocate vnode and initialize link back to dv_node */ 149 dv->dv_vnode = vn_alloc(KM_SLEEP); 150 vp = DVTOV(dv); 151 vp->v_data = (caddr_t)dv; 152 return (0); 153 } 154 155 /* dev_info node destructor for kmem cache */ 156 /*ARGSUSED1*/ 157 static void 158 i_dv_node_dtor(void *buf, void *arg) 159 { 160 struct dv_node *dv = (struct dv_node *)buf; 161 struct vnode *vp = DVTOV(dv); 162 163 rw_destroy(&dv->dv_contents); 164 vn_invalid(vp); 165 vn_free(vp); 166 } 167 168 169 /* initialize dev_info node cache */ 170 void 171 dv_node_cache_init() 172 { 173 ASSERT(dv_node_cache == NULL); 174 dv_node_cache = kmem_cache_create("dv_node_cache", 175 sizeof (struct dv_node), 0, i_dv_node_ctor, i_dv_node_dtor, 176 NULL, NULL, NULL, 0); 177 178 tsd_create(&devfs_clean_key, NULL); 179 } 180 181 /* initialize dev_info node cache */ 182 void 183 dv_node_cache_fini() 184 { 185 ASSERT(dv_node_cache != NULL); 186 kmem_cache_destroy(dv_node_cache); 187 dv_node_cache = NULL; 188 189 tsd_destroy(&devfs_clean_key); 190 } 191 192 /* 193 * dv_mkino - Generate a unique inode number for devfs nodes. 194 * 195 * Although ino_t is 64 bits, the inode number is truncated to 32 bits for 32 196 * bit non-LARGEFILE applications. This means that there is a requirement to 197 * maintain the inode number as a 32 bit value or applications will have 198 * stat(2) calls fail with EOVERFLOW. We form a 32 bit inode number from the 199 * dev_t. but if the minor number is larger than L_MAXMIN32 we fold extra minor 200 * 201 * To generate inode numbers for directories, we assume that we will never use 202 * more than half the major space - this allows for ~8190 drivers. We use this 203 * upper major number space to allocate inode numbers for directories by 204 * encoding the major and instance into this space. 205 * 206 * We also skew the result so that inode 2 is reserved for the root of the file 207 * system. 208 * 209 * As part of the future support for 64-bit dev_t APIs, the upper minor bits 210 * should be folded into the high inode bits by adding the following code 211 * after "ino |= 1": 212 * 213 * #if (L_BITSMINOR32 != L_BITSMINOR) 214 * |* fold overflow minor bits into high bits of inode number *| 215 * ino |= ((ino_t)(minor >> L_BITSMINOR32)) << L_BITSMINOR; 216 * #endif |* (L_BITSMINOR32 != L_BITSMINOR) *| 217 * 218 * This way only applications that use devices that overflow their minor 219 * space will have an application level impact. 220 */ 221 static ino_t 222 dv_mkino(dev_info_t *devi, vtype_t typ, dev_t dev) 223 { 224 major_t major; 225 minor_t minor; 226 ino_t ino; 227 static int warn; 228 229 if (typ == VDIR) { 230 major = ((L_MAXMAJ32 + 1) >> 1) + DEVI(devi)->devi_major; 231 minor = ddi_get_instance(devi); 232 233 /* makedevice32 in high half of major number space */ 234 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 235 236 major = DEVI(devi)->devi_major; 237 } else { 238 major = getmajor(dev); 239 minor = getminor(dev); 240 241 /* makedevice32 */ 242 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 243 244 /* make ino for VCHR different than VBLK */ 245 ino <<= 1; 246 if (typ == VCHR) 247 ino |= 1; 248 } 249 250 ino += DV_ROOTINO + 1; /* skew */ 251 252 /* 253 * diagnose things a little early because adding the skew to a large 254 * minor number could roll over the major. 255 */ 256 if ((major >= (L_MAXMAJ32 >> 1)) && (warn == 0)) { 257 warn = 1; 258 cmn_err(CE_WARN, "%s: inode numbers are not unique", dvnm); 259 } 260 261 return (ino); 262 } 263 264 /* 265 * dv_mkroot 266 * 267 * Build the first VDIR dv_node. 268 */ 269 struct dv_node * 270 dv_mkroot(struct vfs *vfsp, dev_t devfsdev) 271 { 272 struct dv_node *dv; 273 struct vnode *vp; 274 275 ASSERT(ddi_root_node() != NULL); 276 ASSERT(dv_node_cache != NULL); 277 278 dcmn_err3(("dv_mkroot\n")); 279 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 280 vp = DVTOV(dv); 281 vn_reinit(vp); 282 vp->v_flag = VROOT; 283 vp->v_vfsp = vfsp; 284 vp->v_type = VDIR; 285 vp->v_rdev = devfsdev; 286 vn_setops(vp, dv_vnodeops); 287 vn_exists(vp); 288 289 dvroot = dv; 290 291 dv->dv_name = NULL; /* not needed */ 292 dv->dv_namelen = 0; 293 294 dv->dv_devi = ddi_root_node(); 295 296 dv->dv_ino = DV_ROOTINO; 297 dv->dv_nlink = 2; /* name + . (no dv_insert) */ 298 dv->dv_dotdot = dv; /* .. == self */ 299 dv->dv_attrvp = NULLVP; 300 dv->dv_attr = NULL; 301 dv->dv_flags = DV_BUILD; 302 dv->dv_priv = NULL; 303 dv->dv_busy = 0; 304 dv->dv_dflt_mode = 0; 305 306 return (dv); 307 } 308 309 /* 310 * dv_mkdir 311 * 312 * Given an probed or attached nexus node, create a VDIR dv_node. 313 * No dv_attrvp is created at this point. 314 */ 315 struct dv_node * 316 dv_mkdir(struct dv_node *ddv, dev_info_t *devi, char *nm) 317 { 318 struct dv_node *dv; 319 struct vnode *vp; 320 size_t nmlen; 321 322 ASSERT((devi)); 323 dcmn_err4(("dv_mkdir: %s\n", nm)); 324 325 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 326 nmlen = strlen(nm) + 1; 327 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 328 bcopy(nm, dv->dv_name, nmlen); 329 dv->dv_namelen = nmlen - 1; /* '\0' not included */ 330 vp = DVTOV(dv); 331 vn_reinit(vp); 332 vp->v_flag = 0; 333 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 334 vp->v_type = VDIR; 335 vp->v_rdev = DVTOV(ddv)->v_rdev; 336 vn_setops(vp, vn_getops(DVTOV(ddv))); 337 vn_exists(vp); 338 339 dv->dv_devi = devi; 340 ndi_hold_devi(devi); 341 342 dv->dv_ino = dv_mkino(devi, VDIR, NODEV); 343 dv->dv_nlink = 0; /* updated on insert */ 344 dv->dv_dotdot = ddv; 345 dv->dv_attrvp = NULLVP; 346 dv->dv_attr = NULL; 347 dv->dv_flags = DV_BUILD; 348 dv->dv_priv = NULL; 349 dv->dv_busy = 0; 350 dv->dv_dflt_mode = 0; 351 352 return (dv); 353 } 354 355 /* 356 * dv_mknod 357 * 358 * Given a minor node, create a VCHR or VBLK dv_node. 359 * No dv_attrvp is created at this point. 360 */ 361 static struct dv_node * 362 dv_mknod(struct dv_node *ddv, dev_info_t *devi, char *nm, 363 struct ddi_minor_data *dmd) 364 { 365 struct dv_node *dv; 366 struct vnode *vp; 367 size_t nmlen; 368 369 dcmn_err4(("dv_mknod: %s\n", nm)); 370 371 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 372 nmlen = strlen(nm) + 1; 373 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 374 bcopy(nm, dv->dv_name, nmlen); 375 dv->dv_namelen = nmlen - 1; /* no '\0' */ 376 vp = DVTOV(dv); 377 vn_reinit(vp); 378 vp->v_flag = 0; 379 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 380 vp->v_type = dmd->ddm_spec_type == S_IFCHR ? VCHR : VBLK; 381 vp->v_rdev = dmd->ddm_dev; 382 vn_setops(vp, vn_getops(DVTOV(ddv))); 383 vn_exists(vp); 384 385 ASSERT(MUTEX_HELD(&DEVI(devi)->devi_lock)); 386 dv->dv_devi = devi; 387 DEVI(devi)->devi_ref++; 388 389 dv->dv_ino = dv_mkino(devi, vp->v_type, vp->v_rdev); 390 dv->dv_nlink = 0; /* updated on insert */ 391 dv->dv_dotdot = ddv; 392 dv->dv_attrvp = NULLVP; 393 dv->dv_attr = NULL; 394 dv->dv_flags = 0; 395 396 if (dmd->type == DDM_INTERNAL_PATH) 397 dv->dv_flags |= DV_INTERNAL; 398 if (dmd->ddm_flags & DM_NO_FSPERM) 399 dv->dv_flags |= DV_NO_FSPERM; 400 401 dv->dv_priv = dmd->ddm_node_priv; 402 if (dv->dv_priv) 403 dphold(dv->dv_priv); 404 405 /* 406 * Minors created with ddi_create_priv_minor_node can specify 407 * a default mode permission other than the devfs default. 408 */ 409 if (dv->dv_priv || dv->dv_flags & DV_NO_FSPERM) { 410 dcmn_err5(("%s: dv_mknod default priv mode 0%o\n", 411 dv->dv_name, dmd->ddm_priv_mode)); 412 dv->dv_flags |= DV_DFLT_MODE; 413 dv->dv_dflt_mode = dmd->ddm_priv_mode & S_IAMB; 414 } 415 416 return (dv); 417 } 418 419 /* 420 * dv_destroy 421 * 422 * Destroy what we created in dv_mkdir or dv_mknod. 423 * In the case of a *referenced* directory, do nothing. 424 */ 425 /*ARGSUSED1*/ 426 void 427 dv_destroy(struct dv_node *dv, uint_t flags) 428 { 429 vnode_t *vp = DVTOV(dv); 430 ASSERT(dv->dv_nlink == 0); /* no references */ 431 ASSERT(dv->dv_next == NULL); /* unlinked from directory */ 432 433 dcmn_err4(("dv_destroy: %s\n", dv->dv_name)); 434 435 /* 436 * We may be asked to unlink referenced directories. 437 * In this case, there is nothing to be done. 438 * The eventual memory free will be done in 439 * devfs_inactive. 440 */ 441 if (vp->v_count != 0) { 442 ASSERT(vp->v_type == VDIR); 443 ASSERT(flags & DV_CLEAN_FORCE); 444 ASSERT(DV_STALE(dv)); 445 return; 446 } 447 448 if (dv->dv_attrvp != NULLVP) 449 VN_RELE(dv->dv_attrvp); 450 if (dv->dv_attr != NULL) 451 kmem_free(dv->dv_attr, sizeof (struct vattr)); 452 if (dv->dv_name != NULL) 453 kmem_free(dv->dv_name, dv->dv_namelen + 1); 454 if (dv->dv_devi != NULL) { 455 ndi_rele_devi(dv->dv_devi); 456 } 457 if (dv->dv_priv != NULL) { 458 dpfree(dv->dv_priv); 459 } 460 461 kmem_cache_free(dv_node_cache, dv); 462 } 463 464 /* 465 * Find and hold dv_node by name 466 */ 467 struct dv_node * 468 dv_findbyname(struct dv_node *ddv, char *nm) 469 { 470 struct dv_node *dv; 471 size_t nmlen = strlen(nm); 472 473 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 474 dcmn_err3(("dv_findbyname: %s\n", nm)); 475 for (dv = ddv->dv_dot; dv; dv = dv->dv_next) { 476 if (dv->dv_namelen != nmlen) 477 continue; 478 if (strcmp(dv->dv_name, nm) == 0) { 479 VN_HOLD(DVTOV(dv)); 480 return (dv); 481 } 482 } 483 return (NULL); 484 } 485 486 /* 487 * Inserts a new dv_node in a parent directory 488 */ 489 void 490 dv_insert(struct dv_node *ddv, struct dv_node *dv) 491 { 492 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 493 ASSERT(DVTOV(ddv)->v_type == VDIR); 494 ASSERT(ddv->dv_nlink >= 2); 495 ASSERT(dv->dv_nlink == 0); 496 497 dcmn_err3(("dv_insert: %s\n", dv->dv_name)); 498 499 dv->dv_dotdot = ddv; 500 dv->dv_next = ddv->dv_dot; 501 ddv->dv_dot = dv; 502 if (DVTOV(dv)->v_type == VDIR) { 503 ddv->dv_nlink++; /* .. to containing directory */ 504 dv->dv_nlink = 2; /* name + . */ 505 } else { 506 dv->dv_nlink = 1; /* name */ 507 } 508 } 509 510 /* 511 * Unlink a dv_node from a perent directory 512 */ 513 void 514 dv_unlink(struct dv_node *ddv, struct dv_node *dv, struct dv_node **dv_pprev) 515 { 516 /* verify linkage of arguments */ 517 ASSERT(ddv && dv && dv_pprev); 518 ASSERT(dv->dv_dotdot == ddv); 519 ASSERT(*dv_pprev == dv); 520 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 521 ASSERT(DVTOV(ddv)->v_type == VDIR); 522 523 dcmn_err3(("dv_unlink: %s\n", dv->dv_name)); 524 525 if (DVTOV(dv)->v_type == VDIR) { 526 ddv->dv_nlink--; /* .. to containing directory */ 527 dv->dv_nlink -= 2; /* name + . */ 528 } else { 529 dv->dv_nlink -= 1; /* name */ 530 } 531 ASSERT(ddv->dv_nlink >= 2); 532 ASSERT(dv->dv_nlink == 0); 533 534 /* update ddv->dv_dot/dv_next */ 535 *dv_pprev = dv->dv_next; 536 537 dv->dv_dotdot = NULL; 538 dv->dv_next = NULL; 539 dv->dv_dot = NULL; 540 } 541 542 /* 543 * Merge devfs node specific information into an attribute structure. 544 * 545 * NOTE: specfs provides ATIME,MTIME,CTIME,SIZE,BLKSIZE,NBLOCKS on leaf node. 546 */ 547 void 548 dv_vattr_merge(struct dv_node *dv, struct vattr *vap) 549 { 550 struct vnode *vp = DVTOV(dv); 551 552 vap->va_nodeid = dv->dv_ino; 553 vap->va_nlink = dv->dv_nlink; 554 555 if (vp->v_type == VDIR) { 556 vap->va_rdev = 0; 557 vap->va_fsid = vp->v_rdev; 558 } else { 559 vap->va_rdev = vp->v_rdev; 560 vap->va_fsid = DVTOV(dv->dv_dotdot)->v_rdev; 561 vap->va_type = vp->v_type; 562 /* don't trust the shadow file type */ 563 vap->va_mode &= ~S_IFMT; 564 if (vap->va_type == VCHR) 565 vap->va_mode |= S_IFCHR; 566 else 567 vap->va_mode |= S_IFBLK; 568 } 569 } 570 571 /* 572 * Free a vsecattr 573 */ 574 static void 575 dv_free_vsa(struct vsecattr *vsap) 576 { 577 if (vsap->vsa_aclcnt > 0 && vsap->vsa_aclentp) 578 kmem_free(vsap->vsa_aclentp, 579 vsap->vsa_aclcnt * sizeof (aclent_t)); 580 if (vsap->vsa_dfaclcnt > 0 && vsap->vsa_dfaclentp) 581 kmem_free(vsap->vsa_dfaclentp, 582 vsap->vsa_dfaclcnt * sizeof (aclent_t)); 583 } 584 585 /* 586 * dv_shadow_node 587 * 588 * Given a VDIR dv_node, find/create the associated VDIR 589 * node in the shadow attribute filesystem. 590 * 591 * Given a VCHR/VBLK dv_node, find the associated VREG 592 * node in the shadow attribute filesystem. These nodes 593 * are only created to persist non-default attributes. 594 * Lack of such a node implies the default permissions 595 * are sufficient. 596 * 597 * Managing the attribute file entries is slightly tricky (mostly 598 * because we can't intercept VN_HOLD and VN_RELE except on the last 599 * release). 600 * 601 * We assert that if the dv_attrvp pointer is non-NULL, it points 602 * to a singly-held (by us) vnode that represents the shadow entry 603 * in the underlying filesystem. To avoid store-ordering issues, 604 * we assert that the pointer can only be tested under the dv_contents 605 * READERS lock. 606 */ 607 608 void 609 dv_shadow_node( 610 struct vnode *dvp, /* devfs parent directory vnode */ 611 char *nm, /* name component */ 612 struct vnode *vp, /* devfs vnode */ 613 struct pathname *pnp, /* the path .. */ 614 struct vnode *rdir, /* the root .. */ 615 struct cred *cred, /* who's asking? */ 616 int flags) /* optionally create shadow node */ 617 { 618 struct dv_node *dv; /* dv_node of named directory */ 619 struct vnode *rdvp; /* shadow parent directory vnode */ 620 struct vnode *rvp; /* shadow vnode */ 621 struct vnode *rrvp; /* realvp of shadow vnode */ 622 struct vattr vattr; 623 int create_tried; 624 int error; 625 mperm_t mp; 626 struct vsecattr vsa; 627 628 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 629 dv = VTODV(vp); 630 dcmn_err3(("dv_shadow_node: name %s attr %p\n", 631 nm, (void *)dv->dv_attrvp)); 632 633 if ((flags & DV_SHADOW_WRITE_HELD) == 0) { 634 ASSERT(RW_READ_HELD(&dv->dv_contents)); 635 if (dv->dv_attrvp != NULLVP) 636 return; 637 if (!rw_tryupgrade(&dv->dv_contents)) { 638 rw_exit(&dv->dv_contents); 639 rw_enter(&dv->dv_contents, RW_WRITER); 640 if (dv->dv_attrvp != NULLVP) { 641 rw_downgrade(&dv->dv_contents); 642 return; 643 } 644 } 645 } else { 646 ASSERT(RW_WRITE_HELD(&dv->dv_contents)); 647 if (dv->dv_attrvp != NULLVP) 648 return; 649 } 650 651 ASSERT(RW_WRITE_HELD(&dv->dv_contents) && dv->dv_attrvp == NULL); 652 653 rdvp = VTODV(dvp)->dv_attrvp; 654 create_tried = 0; 655 lookup: 656 if (rdvp && (dv->dv_flags & DV_NO_FSPERM) == 0) { 657 error = VOP_LOOKUP(rdvp, nm, &rvp, pnp, LOOKUP_DIR, rdir, cred); 658 659 /* factor out the snode since we only want the attribute node */ 660 if ((error == 0) && (VOP_REALVP(rvp, &rrvp) == 0)) { 661 VN_HOLD(rrvp); 662 VN_RELE(rvp); 663 rvp = rrvp; 664 } 665 } else 666 error = EROFS; /* no parent, no entry */ 667 668 /* 669 * All we want is the permissions (and maybe ACLs and 670 * extended attributes), and we want to perform lookups 671 * by name. Drivers occasionally change their minor 672 * number space. If something changes, there's no 673 * much we can do about it here. 674 */ 675 676 /* The shadow node checks out. We are done */ 677 if (error == 0) { 678 dv->dv_attrvp = rvp; /* with one hold */ 679 680 /* 681 * Determine if we have (non-trivial) ACLs on this node. 682 * NB: This should be changed call fs_acl_nontrivial for 683 * new ACE flavor ACLs. 684 */ 685 vsa.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT; 686 error = VOP_GETSECATTR(rvp, &vsa, 0, cred); 687 dv->dv_flags &= ~DV_ACL; 688 if (error == 0) { 689 if (vsa.vsa_aclcnt > MIN_ACL_ENTRIES) { 690 dv->dv_flags |= DV_ACL; /* non-trivial ACL */ 691 } 692 dv_free_vsa(&vsa); 693 } 694 695 /* 696 * If we have synced out the memory attributes, free 697 * them and switch back to using the persistent store. 698 */ 699 if (rvp && dv->dv_attr) { 700 kmem_free(dv->dv_attr, sizeof (struct vattr)); 701 dv->dv_attr = NULL; 702 } 703 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 704 rw_downgrade(&dv->dv_contents); 705 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 706 return; 707 } 708 709 /* 710 * Failed to find attribute in persistent backing store, 711 * get default permission bits. For minors not created by 712 * ddi_create_priv_minor_node(), use devfs defaults. 713 */ 714 if (vp->v_type == VDIR) { 715 vattr = dv_vattr_dir; 716 } else if (dv->dv_flags & DV_NO_FSPERM) { 717 vattr = dv_vattr_priv; 718 } else { 719 /* 720 * look up perm bits from minor_perm 721 */ 722 vattr = dv_vattr_file; 723 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) == 0) { 724 VATTR_MP_MERGE(vattr, mp); 725 dcmn_err5(("%s: minor perm mode 0%o\n", 726 dv->dv_name, vattr.va_mode)); 727 } else if (dv->dv_flags & DV_DFLT_MODE) { 728 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 729 vattr.va_mode &= ~S_IAMB; 730 vattr.va_mode |= dv->dv_dflt_mode; 731 dcmn_err5(("%s: priv mode 0%o\n", 732 dv->dv_name, vattr.va_mode)); 733 } 734 } 735 736 dv_vattr_merge(dv, &vattr); 737 gethrestime(&vattr.va_atime); 738 vattr.va_mtime = vattr.va_atime; 739 vattr.va_ctime = vattr.va_atime; 740 741 /* 742 * Try to create shadow dir. This is necessary in case 743 * we need to create a shadow leaf node later, when user 744 * executes chmod. 745 */ 746 if ((error == ENOENT) && !create_tried) { 747 switch (vp->v_type) { 748 case VDIR: 749 error = VOP_MKDIR(rdvp, nm, &vattr, &rvp, kcred); 750 dsysdebug(error, ("vop_mkdir %s %s %d\n", 751 VTODV(dvp)->dv_name, nm, error)); 752 create_tried = 1; 753 break; 754 755 case VCHR: 756 case VBLK: 757 /* 758 * Shadow nodes are only created on demand 759 */ 760 if (flags & DV_SHADOW_CREATE) { 761 error = VOP_CREATE(rdvp, nm, &vattr, NONEXCL, 762 VREAD|VWRITE, &rvp, kcred, 0); 763 dsysdebug(error, ("vop_create %s %s %d\n", 764 VTODV(dvp)->dv_name, nm, error)); 765 create_tried = 1; 766 } 767 break; 768 769 default: 770 cmn_err(CE_PANIC, "devfs: %s: create", dvnm); 771 /*NOTREACHED*/ 772 } 773 774 if (create_tried && 775 (error == 0) || (error == EEXIST)) { 776 VN_RELE(rvp); 777 goto lookup; 778 } 779 } 780 781 /* Store attribute in memory */ 782 if (dv->dv_attr == NULL) { 783 dv->dv_attr = kmem_alloc(sizeof (struct vattr), KM_SLEEP); 784 *(dv->dv_attr) = vattr; 785 } 786 787 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 788 rw_downgrade(&dv->dv_contents); 789 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 790 } 791 792 /* 793 * Given a devinfo node, and a name, returns the appropriate 794 * minor information for that named node, if it exists. 795 */ 796 static int 797 dv_find_leafnode(dev_info_t *devi, char *minor_nm, struct ddi_minor_data *r_mi) 798 { 799 struct ddi_minor_data *dmd; 800 801 ASSERT(i_ddi_node_state(devi) >= DS_ATTACHED); 802 ASSERT(MUTEX_HELD(&DEVI(devi)->devi_lock)); 803 804 dcmn_err3(("dv_find_leafnode: %s\n", minor_nm)); 805 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 806 807 /* 808 * Skip alias nodes and nodes without a name. 809 */ 810 if ((dmd->type == DDM_ALIAS) || (dmd->ddm_name == NULL)) 811 continue; 812 813 dcmn_err4(("dv_find_leafnode: (%s,%s)\n", 814 minor_nm, dmd->ddm_name)); 815 if (strcmp(minor_nm, dmd->ddm_name) == 0) { 816 r_mi->ddm_dev = dmd->ddm_dev; 817 r_mi->ddm_spec_type = dmd->ddm_spec_type; 818 r_mi->type = dmd->type; 819 r_mi->ddm_flags = dmd->ddm_flags; 820 r_mi->ddm_node_priv = dmd->ddm_node_priv; 821 r_mi->ddm_priv_mode = dmd->ddm_priv_mode; 822 if (r_mi->ddm_node_priv) 823 dphold(r_mi->ddm_node_priv); 824 return (0); 825 } 826 } 827 828 dcmn_err3(("dv_find_leafnode: %s: ENOENT\n", minor_nm)); 829 return (ENOENT); 830 } 831 832 /* 833 * Special handling for clone node: 834 * Clone minor name is a driver name, the minor number will 835 * be the major number of the driver. There is no minor 836 * node under the clone driver, so we'll manufacture the 837 * dev_t. 838 */ 839 static struct dv_node * 840 dv_clone_mknod(struct dv_node *ddv, char *drvname) 841 { 842 major_t major; 843 struct dv_node *dvp; 844 char *devnm; 845 struct ddi_minor_data *dmd; 846 847 /* 848 * Make sure drvname is a STREAMS driver. We load the driver, 849 * but don't attach to any instances. This makes stat(2) 850 * relatively cheap. 851 */ 852 major = ddi_name_to_major(drvname); 853 if (major == (major_t)-1) 854 return (NULL); 855 856 if (ddi_hold_driver(major) == NULL) 857 return (NULL); 858 859 if (STREAMSTAB(major) == NULL) { 860 ddi_rele_driver(major); 861 return (NULL); 862 } 863 864 ddi_rele_driver(major); 865 devnm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 866 (void) snprintf(devnm, MAXNAMELEN, "clone@0:%s", drvname); 867 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 868 dmd->ddm_dev = makedevice(clone_major, (minor_t)major); 869 dmd->ddm_spec_type = S_IFCHR; 870 dvp = dv_mknod(ddv, clone_dip, devnm, dmd); 871 kmem_free(dmd, sizeof (*dmd)); 872 kmem_free(devnm, MAXNAMELEN); 873 return (dvp); 874 } 875 876 /* 877 * Given the parent directory node, and a name in it, returns the 878 * named dv_node to the caller (as a vnode). 879 * 880 * (We need pnp and rdir for doing shadow lookups; they can be NULL) 881 */ 882 int 883 dv_find(struct dv_node *ddv, char *nm, struct vnode **vpp, struct pathname *pnp, 884 struct vnode *rdir, struct cred *cred, uint_t ndi_flags) 885 { 886 extern int isminiroot; /* see modctl.c */ 887 888 int rv = 0, was_busy = 0, nmlen; 889 struct vnode *vp; 890 struct dv_node *dv, *dup; 891 dev_info_t *pdevi, *devi = NULL; 892 char *mnm; 893 struct ddi_minor_data *dmd; 894 895 dcmn_err3(("dv_find %s\n", nm)); 896 897 rw_enter(&ddv->dv_contents, RW_READER); 898 start: 899 if (DV_STALE(ddv)) { 900 rw_exit(&ddv->dv_contents); 901 return (ESTALE); 902 } 903 904 /* 905 * Empty name or ., return node itself. 906 */ 907 nmlen = strlen(nm); 908 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 909 *vpp = DVTOV(ddv); 910 rw_exit(&ddv->dv_contents); 911 VN_HOLD(*vpp); 912 return (0); 913 } 914 915 /* 916 * .., return the parent directory 917 */ 918 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 919 *vpp = DVTOV(ddv->dv_dotdot); 920 rw_exit(&ddv->dv_contents); 921 VN_HOLD(*vpp); 922 return (0); 923 } 924 925 /* 926 * Fail anything without a valid device name component 927 */ 928 if (nm[0] == '@' || nm[0] == ':') { 929 dcmn_err3(("devfs: no driver '%s'\n", nm)); 930 rw_exit(&ddv->dv_contents); 931 return (ENOENT); 932 } 933 934 /* 935 * So, now we have to deal with the trickier stuff. 936 * 937 * (a) search the existing list of dv_nodes on this directory 938 */ 939 if ((dv = dv_findbyname(ddv, nm)) != NULL) { 940 founddv: 941 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 942 rw_enter(&dv->dv_contents, RW_READER); 943 vp = DVTOV(dv); 944 if ((dv->dv_attrvp != NULLVP) || 945 (vp->v_type != VDIR && dv->dv_attr != NULL)) { 946 /* 947 * Common case - we already have attributes 948 */ 949 rw_exit(&dv->dv_contents); 950 rw_exit(&ddv->dv_contents); 951 goto found; 952 } 953 954 /* 955 * No attribute vp, try and build one. 956 */ 957 dv_shadow_node(DVTOV(ddv), nm, vp, pnp, rdir, cred, 0); 958 rw_exit(&dv->dv_contents); 959 rw_exit(&ddv->dv_contents); 960 goto found; 961 } 962 963 /* 964 * (b) Search the child devinfo nodes of our parent directory, 965 * looking for the named node. If we find it, build a new 966 * node, then grab the writers lock, search the directory 967 * if it's still not there, then insert it. 968 * 969 * We drop the devfs locks before accessing the device tree. 970 * Take care to mark the node BUSY so that a forced devfs_clean 971 * doesn't mark the directory node stale. 972 * 973 * Also, check if we are called as part of devfs_clean or 974 * reset_perm. If so, simply return not found because there 975 * is nothing to clean. 976 */ 977 if (tsd_get(devfs_clean_key)) { 978 rw_exit(&ddv->dv_contents); 979 return (ENOENT); 980 } 981 982 /* 983 * We could be either READ or WRITE locked at 984 * this point. Upgrade if we are read locked. 985 */ 986 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 987 if (rw_read_locked(&ddv->dv_contents) && 988 !rw_tryupgrade(&ddv->dv_contents)) { 989 rw_exit(&ddv->dv_contents); 990 rw_enter(&ddv->dv_contents, RW_WRITER); 991 /* 992 * Things may have changed when we dropped 993 * the contents lock, so start from top again 994 */ 995 goto start; 996 } 997 ddv->dv_busy++; /* mark busy before dropping lock */ 998 was_busy++; 999 rw_exit(&ddv->dv_contents); 1000 1001 pdevi = ddv->dv_devi; 1002 ASSERT(pdevi != NULL); 1003 1004 mnm = strchr(nm, ':'); 1005 if (mnm) 1006 *mnm = (char)0; 1007 1008 /* 1009 * Configure one nexus child, will call nexus's bus_ops 1010 * If successful, devi is held upon returning. 1011 * Note: devfs lookup should not be configuring grandchildren. 1012 */ 1013 ASSERT((ndi_flags & NDI_CONFIG) == 0); 1014 1015 rv = ndi_devi_config_one(pdevi, nm, &devi, ndi_flags | NDI_NO_EVENT); 1016 if (mnm) 1017 *mnm = ':'; 1018 if (rv != NDI_SUCCESS) { 1019 rv = ENOENT; 1020 goto notfound; 1021 } 1022 1023 /* 1024 * Don't make vhci clients visible under phci, unless we 1025 * are in miniroot. 1026 */ 1027 if (isminiroot == 0 && ddi_get_parent(devi) != pdevi) { 1028 ndi_rele_devi(devi); 1029 rv = ENOENT; 1030 goto notfound; 1031 } 1032 1033 ASSERT(devi && (i_ddi_node_state(devi) >= DS_ATTACHED)); 1034 1035 /* 1036 * Invalidate cache to notice newly created minor nodes. 1037 */ 1038 rw_enter(&ddv->dv_contents, RW_WRITER); 1039 ddv->dv_flags |= DV_BUILD; 1040 rw_exit(&ddv->dv_contents); 1041 1042 /* 1043 * mkdir for nexus drivers and leaf nodes as well. If we are racing 1044 * and create a duplicate, the duplicate will be destroyed below. 1045 */ 1046 if (mnm == NULL) { 1047 dv = dv_mkdir(ddv, devi, nm); 1048 } else { 1049 /* 1050 * For clone minors, load the driver indicated by minor name. 1051 */ 1052 mutex_enter(&DEVI(devi)->devi_lock); 1053 if (devi == clone_dip) { 1054 dv = dv_clone_mknod(ddv, mnm + 1); 1055 } else { 1056 /* 1057 * Find minor node and make a dv_node 1058 */ 1059 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 1060 if (dv_find_leafnode(devi, mnm + 1, dmd) == 0) { 1061 dv = dv_mknod(ddv, devi, nm, dmd); 1062 if (dmd->ddm_node_priv) 1063 dpfree(dmd->ddm_node_priv); 1064 } 1065 kmem_free(dmd, sizeof (*dmd)); 1066 } 1067 mutex_exit(&DEVI(devi)->devi_lock); 1068 } 1069 /* 1070 * Release hold from ndi_devi_config_one() 1071 */ 1072 ndi_rele_devi(devi); 1073 1074 if (dv == NULL) { 1075 rv = ENOENT; 1076 goto notfound; 1077 } 1078 1079 /* 1080 * We have released the dv_contents lock, need to check 1081 * if another thread already created a duplicate node 1082 */ 1083 rw_enter(&ddv->dv_contents, RW_WRITER); 1084 if ((dup = dv_findbyname(ddv, nm)) == NULL) { 1085 dv_insert(ddv, dv); 1086 } else { 1087 /* 1088 * Duplicate found, use the existing node 1089 */ 1090 VN_RELE(DVTOV(dv)); 1091 dv_destroy(dv, 0); 1092 dv = dup; 1093 } 1094 goto founddv; 1095 /*NOTREACHED*/ 1096 1097 found: 1098 /* 1099 * Skip non-kernel lookups of internal nodes. 1100 * This use of kcred to distinguish between user and 1101 * internal kernel lookups is unfortunate. The information 1102 * provided by the seg argument to lookupnameat should 1103 * evolve into a lookup flag for filesystems that need 1104 * this distinction. 1105 */ 1106 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) { 1107 VN_RELE(vp); 1108 rv = ENOENT; 1109 goto notfound; 1110 } 1111 1112 dcmn_err2(("dv_find: returning vp for nm %s\n", nm)); 1113 if (vp->v_type == VCHR || vp->v_type == VBLK) { 1114 /* 1115 * If vnode is a device, return special vnode instead 1116 * (though it knows all about -us- via sp->s_realvp, 1117 * sp->s_devvp, and sp->s_dip) 1118 */ 1119 *vpp = specvp_devfs(vp, vp->v_rdev, vp->v_type, cred, 1120 dv->dv_devi); 1121 VN_RELE(vp); 1122 if (*vpp == NULLVP) 1123 rv = ENOSYS; 1124 } else 1125 *vpp = vp; 1126 1127 notfound: 1128 rw_enter(&ddv->dv_contents, RW_WRITER); 1129 if (was_busy) 1130 ddv->dv_busy--; 1131 rw_exit(&ddv->dv_contents); 1132 return (rv); 1133 } 1134 1135 /* 1136 * The given directory node is out-of-date; that is, it has been 1137 * marked as needing to be rebuilt, possibly because some new devinfo 1138 * node has come into existence, or possibly because this is the first 1139 * time we've been here. 1140 */ 1141 void 1142 dv_filldir(struct dv_node *ddv) 1143 { 1144 struct dv_node *dv; 1145 dev_info_t *devi, *pdevi; 1146 struct ddi_minor_data *dmd; 1147 char devnm[MAXNAMELEN]; 1148 int circ; 1149 1150 ASSERT(DVTOV(ddv)->v_type == VDIR); 1151 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 1152 ASSERT(ddv->dv_flags & DV_BUILD); 1153 1154 dcmn_err3(("dv_filldir: %s\n", ddv->dv_name)); 1155 if (DV_STALE(ddv)) 1156 return; 1157 pdevi = ddv->dv_devi; 1158 1159 if (ndi_devi_config(pdevi, NDI_NO_EVENT) != NDI_SUCCESS) { 1160 dcmn_err3(("dv_filldir: config error %s\n", 1161 ddv->dv_name)); 1162 } 1163 1164 ndi_devi_enter(pdevi, &circ); 1165 for (devi = ddi_get_child(pdevi); devi; 1166 devi = ddi_get_next_sibling(devi)) { 1167 if (i_ddi_node_state(devi) < DS_PROBED) 1168 continue; 1169 1170 dcmn_err3(("dv_filldir: node %s\n", ddi_node_name(devi))); 1171 1172 mutex_enter(&DEVI(devi)->devi_lock); 1173 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 1174 char *addr; 1175 1176 /* 1177 * Skip alias nodes, internal nodes, and nodes 1178 * without a name. We allow DDM_DEFAULT nodes 1179 * to appear in readdir. 1180 */ 1181 if ((dmd->type == DDM_ALIAS) || 1182 (dmd->type == DDM_INTERNAL_PATH) || 1183 (dmd->ddm_name == NULL)) 1184 continue; 1185 1186 addr = ddi_get_name_addr(devi); 1187 if (addr && *addr) 1188 (void) sprintf(devnm, "%s@%s:%s", 1189 ddi_node_name(devi), addr, dmd->ddm_name); 1190 else 1191 (void) sprintf(devnm, "%s:%s", 1192 ddi_node_name(devi), dmd->ddm_name); 1193 1194 if ((dv = dv_findbyname(ddv, devnm)) != NULL) { 1195 /* dv_node already exists */ 1196 VN_RELE(DVTOV(dv)); 1197 continue; 1198 } 1199 1200 dv = dv_mknod(ddv, devi, devnm, dmd); 1201 dv_insert(ddv, dv); 1202 VN_RELE(DVTOV(dv)); 1203 } 1204 mutex_exit(&DEVI(devi)->devi_lock); 1205 1206 (void) ddi_deviname(devi, devnm); 1207 if ((dv = dv_findbyname(ddv, devnm + 1)) == NULL) { 1208 /* directory doesn't exist */ 1209 dv = dv_mkdir(ddv, devi, devnm + 1); 1210 dv_insert(ddv, dv); 1211 } 1212 VN_RELE(DVTOV(dv)); 1213 } 1214 ndi_devi_exit(pdevi, circ); 1215 1216 ddv->dv_flags &= ~DV_BUILD; 1217 } 1218 1219 /* 1220 * Given a directory node, clean out all the nodes beneath. 1221 * 1222 * VDIR: Reinvoke to clean them, then delete the directory. 1223 * VCHR, VBLK: Just blow them away. 1224 * 1225 * Mark the directories touched as in need of a rebuild, in case 1226 * we fall over part way through. When DV_CLEAN_FORCE is specified, 1227 * we mark referenced empty directories as stale to facilitate DR. 1228 */ 1229 int 1230 dv_cleandir(struct dv_node *ddv, char *devnm, uint_t flags) 1231 { 1232 struct dv_node *dv; 1233 struct dv_node **pprev, **npprev; 1234 struct vnode *vp; 1235 int busy = 0; 1236 1237 dcmn_err3(("dv_cleandir: %s\n", ddv->dv_name)); 1238 1239 if (!(flags & DV_CLEANDIR_LCK)) 1240 rw_enter(&ddv->dv_contents, RW_WRITER); 1241 for (pprev = &ddv->dv_dot, dv = *pprev; dv; 1242 pprev = npprev, dv = *pprev) { 1243 npprev = &dv->dv_next; 1244 1245 /* 1246 * If devnm is specified, the non-minor portion of the 1247 * name must match devnm. 1248 */ 1249 if (devnm && 1250 (strncmp(devnm, dv->dv_name, strlen(devnm)) || 1251 (dv->dv_name[strlen(devnm)] != ':' && 1252 dv->dv_name[strlen(devnm)] != '\0'))) 1253 continue; 1254 1255 /* check type of what we are cleaning */ 1256 vp = DVTOV(dv); 1257 if (vp->v_type == VDIR) { 1258 /* recurse on directories */ 1259 rw_enter(&dv->dv_contents, RW_WRITER); 1260 if (dv_cleandir(dv, NULL, 1261 flags | DV_CLEANDIR_LCK) == EBUSY) { 1262 rw_exit(&dv->dv_contents); 1263 goto set_busy; 1264 } 1265 1266 /* A clean directory is an empty directory... */ 1267 ASSERT(dv->dv_nlink == 2); 1268 mutex_enter(&vp->v_lock); 1269 if (vp->v_count > 0) { 1270 /* 1271 * ... but an empty directory can still have 1272 * references to it. If we have dv_busy or 1273 * DV_CLEAN_FORCE is *not* specified then a 1274 * referenced directory is considered busy. 1275 */ 1276 if (dv->dv_busy || !(flags & DV_CLEAN_FORCE)) { 1277 mutex_exit(&vp->v_lock); 1278 rw_exit(&dv->dv_contents); 1279 goto set_busy; 1280 } 1281 1282 /* 1283 * Mark referenced directory stale so that DR 1284 * will succeed even if a shell has 1285 * /devices/xxx as current directory (causing 1286 * VN_HOLD reference to an empty directory). 1287 */ 1288 ASSERT(!DV_STALE(dv)); 1289 ndi_rele_devi(dv->dv_devi); 1290 dv->dv_devi = NULL; /* mark DV_STALE */ 1291 } 1292 } else { 1293 ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK)); 1294 ASSERT(dv->dv_nlink == 1); /* no hard links */ 1295 mutex_enter(&vp->v_lock); 1296 if (vp->v_count > 0) { 1297 mutex_exit(&vp->v_lock); 1298 goto set_busy; 1299 } 1300 } 1301 1302 /* unlink from directory */ 1303 dv_unlink(ddv, dv, pprev); 1304 1305 /* drop locks */ 1306 mutex_exit(&vp->v_lock); 1307 if (vp->v_type == VDIR) 1308 rw_exit(&dv->dv_contents); 1309 1310 /* destroy vnode if ref count is zero */ 1311 if (vp->v_count == 0) 1312 dv_destroy(dv, flags); 1313 1314 /* pointer to previous stays unchanged */ 1315 npprev = pprev; 1316 continue; 1317 1318 /* 1319 * If devnm is not NULL we return immediately on busy, 1320 * otherwise we continue destroying unused dv_node's. 1321 */ 1322 set_busy: busy++; 1323 if (devnm) 1324 break; 1325 } 1326 1327 /* 1328 * This code may be invoked to inform devfs that a new node has 1329 * been created in the kernel device tree. So we always set 1330 * the DV_BUILD flag to allow the next dv_filldir() to pick 1331 * the new devinfo nodes. 1332 */ 1333 ddv->dv_flags |= DV_BUILD; 1334 1335 if (!(flags & DV_CLEANDIR_LCK)) 1336 rw_exit(&ddv->dv_contents); 1337 1338 return (busy ? EBUSY : 0); 1339 } 1340 1341 /* 1342 * Walk through the devfs hierarchy, correcting the permissions of 1343 * devices with default permissions that do not match those specified 1344 * by minor perm. This can only be done for all drivers for now. 1345 */ 1346 static int 1347 dv_reset_perm_dir(struct dv_node *ddv, uint_t flags) 1348 { 1349 struct dv_node *dv, *next = NULL; 1350 struct vnode *vp; 1351 int retval = 0; 1352 struct vattr *attrp; 1353 mperm_t mp; 1354 char *nm; 1355 uid_t old_uid; 1356 gid_t old_gid; 1357 mode_t old_mode; 1358 1359 rw_enter(&ddv->dv_contents, RW_WRITER); 1360 for (dv = ddv->dv_dot; dv; dv = next) { 1361 int error = 0; 1362 next = dv->dv_next; 1363 nm = dv->dv_name; 1364 1365 rw_enter(&dv->dv_contents, RW_READER); 1366 vp = DVTOV(dv); 1367 if (vp->v_type == VDIR) { 1368 rw_exit(&dv->dv_contents); 1369 if (dv_reset_perm_dir(dv, flags) != 0) { 1370 error = EBUSY; 1371 } 1372 } else { 1373 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 1374 1375 /* 1376 * Check for permissions from minor_perm 1377 * If there are none, we're done 1378 */ 1379 rw_exit(&dv->dv_contents); 1380 if (dev_minorperm(dv->dv_devi, nm, &mp) != 0) 1381 continue; 1382 1383 rw_enter(&dv->dv_contents, RW_READER); 1384 1385 /* 1386 * Allow a node's permissions to be altered 1387 * permanently from the defaults by chmod, 1388 * using the shadow node as backing store. 1389 * Otherwise, update node to minor_perm permissions. 1390 */ 1391 if (dv->dv_attrvp == NULLVP) { 1392 /* 1393 * No attribute vp, try to find one. 1394 */ 1395 dv_shadow_node(DVTOV(ddv), nm, vp, 1396 NULL, NULLVP, kcred, 0); 1397 } 1398 if (dv->dv_attrvp != NULLVP || dv->dv_attr == NULL) { 1399 rw_exit(&dv->dv_contents); 1400 continue; 1401 } 1402 1403 attrp = dv->dv_attr; 1404 1405 if (VATTRP_MP_CMP(attrp, mp) == 0) { 1406 dcmn_err5(("%s: no perm change: " 1407 "%d %d 0%o\n", nm, attrp->va_uid, 1408 attrp->va_gid, attrp->va_mode)); 1409 rw_exit(&dv->dv_contents); 1410 continue; 1411 } 1412 1413 old_uid = attrp->va_uid; 1414 old_gid = attrp->va_gid; 1415 old_mode = attrp->va_mode; 1416 1417 VATTRP_MP_MERGE(attrp, mp); 1418 mutex_enter(&vp->v_lock); 1419 if (vp->v_count > 0) { 1420 error = EBUSY; 1421 } 1422 mutex_exit(&vp->v_lock); 1423 1424 dcmn_err5(("%s: perm %d/%d/0%o -> %d/%d/0%o (%d)\n", 1425 nm, old_uid, old_gid, old_mode, attrp->va_uid, 1426 attrp->va_gid, attrp->va_mode, error)); 1427 1428 rw_exit(&dv->dv_contents); 1429 } 1430 1431 if (error != 0) { 1432 retval = error; 1433 } 1434 } 1435 1436 ddv->dv_flags |= DV_BUILD; 1437 1438 rw_exit(&ddv->dv_contents); 1439 1440 return (retval); 1441 } 1442 1443 int 1444 devfs_reset_perm(uint_t flags) 1445 { 1446 struct dv_node *dvp; 1447 int rval; 1448 1449 if ((dvp = devfs_dip_to_dvnode(ddi_root_node())) == NULL) 1450 return (0); 1451 1452 VN_HOLD(DVTOV(dvp)); 1453 rval = dv_reset_perm_dir(dvp, flags); 1454 VN_RELE(DVTOV(dvp)); 1455 return (rval); 1456 } 1457 1458 /* 1459 * Clean up dangling devfs shadow nodes for removed 1460 * drivers so that, in the event the driver is re-added 1461 * to the system, newly created nodes won't incorrectly 1462 * pick up these stale shadow node permissions. 1463 * 1464 * This is accomplished by walking down the pathname 1465 * to the directory, starting at the root's attribute 1466 * node, then removing all minors matching the specified 1467 * node name. Care must be taken to remove all entries 1468 * in a directory before the directory itself, so that 1469 * the clean-up associated with rem_drv'ing a nexus driver 1470 * does not inadvertently result in an inconsistent 1471 * filesystem underlying devfs. 1472 */ 1473 1474 static int 1475 devfs_remdrv_rmdir(vnode_t *dirvp, const char *dir, vnode_t *rvp) 1476 { 1477 int error; 1478 vnode_t *vp; 1479 int eof; 1480 struct iovec iov; 1481 struct uio uio; 1482 struct dirent64 *dp; 1483 dirent64_t *dbuf; 1484 size_t dlen; 1485 size_t dbuflen; 1486 int ndirents = 64; 1487 char *nm; 1488 1489 VN_HOLD(dirvp); 1490 1491 dlen = ndirents * (sizeof (*dbuf)); 1492 dbuf = kmem_alloc(dlen, KM_SLEEP); 1493 1494 uio.uio_iov = &iov; 1495 uio.uio_iovcnt = 1; 1496 uio.uio_segflg = UIO_SYSSPACE; 1497 uio.uio_fmode = 0; 1498 uio.uio_extflg = UIO_COPY_CACHED; 1499 uio.uio_loffset = 0; 1500 uio.uio_llimit = MAXOFFSET_T; 1501 1502 eof = 0; 1503 error = 0; 1504 while (!error && !eof) { 1505 uio.uio_resid = dlen; 1506 iov.iov_base = (char *)dbuf; 1507 iov.iov_len = dlen; 1508 1509 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1510 error = VOP_READDIR(dirvp, &uio, kcred, &eof); 1511 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1512 1513 dbuflen = dlen - uio.uio_resid; 1514 1515 if (error || dbuflen == 0) 1516 break; 1517 1518 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1519 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1520 1521 nm = dp->d_name; 1522 1523 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1524 continue; 1525 1526 error = VOP_LOOKUP(dirvp, 1527 nm, &vp, NULL, 0, NULL, kcred); 1528 1529 dsysdebug(error, 1530 ("rem_drv %s/%s lookup (%d)\n", 1531 dir, nm, error)); 1532 1533 if (error) 1534 continue; 1535 1536 ASSERT(vp->v_type == VDIR || 1537 vp->v_type == VCHR || vp->v_type == VBLK); 1538 1539 if (vp->v_type == VDIR) { 1540 error = devfs_remdrv_rmdir(vp, nm, rvp); 1541 if (error == 0) { 1542 error = VOP_RMDIR(dirvp, 1543 (char *)nm, rvp, kcred); 1544 dsysdebug(error, 1545 ("rem_drv %s/%s rmdir (%d)\n", 1546 dir, nm, error)); 1547 } 1548 } else { 1549 error = VOP_REMOVE(dirvp, (char *)nm, kcred); 1550 dsysdebug(error, 1551 ("rem_drv %s/%s remove (%d)\n", 1552 dir, nm, error)); 1553 } 1554 1555 VN_RELE(vp); 1556 if (error) { 1557 goto exit; 1558 } 1559 } 1560 } 1561 1562 exit: 1563 VN_RELE(dirvp); 1564 kmem_free(dbuf, dlen); 1565 1566 return (error); 1567 } 1568 1569 int 1570 devfs_remdrv_cleanup(const char *dir, const char *nodename) 1571 { 1572 int error; 1573 vnode_t *vp; 1574 vnode_t *dirvp; 1575 int eof; 1576 struct iovec iov; 1577 struct uio uio; 1578 struct dirent64 *dp; 1579 dirent64_t *dbuf; 1580 size_t dlen; 1581 size_t dbuflen; 1582 int ndirents = 64; 1583 int nodenamelen = strlen(nodename); 1584 char *nm; 1585 struct pathname pn; 1586 vnode_t *rvp; /* root node of the underlying attribute fs */ 1587 1588 dcmn_err5(("devfs_remdrv_cleanup: %s %s\n", dir, nodename)); 1589 1590 if (error = pn_get((char *)dir, UIO_SYSSPACE, &pn)) 1591 return (0); 1592 1593 rvp = dvroot->dv_attrvp; 1594 ASSERT(rvp != NULL); 1595 VN_HOLD(rvp); 1596 1597 pn_skipslash(&pn); 1598 dirvp = rvp; 1599 VN_HOLD(dirvp); 1600 1601 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 1602 1603 while (pn_pathleft(&pn)) { 1604 ASSERT(dirvp->v_type == VDIR); 1605 (void) pn_getcomponent(&pn, nm); 1606 ASSERT((strcmp(nm, ".") != 0) && (strcmp(nm, "..") != 0)); 1607 error = VOP_LOOKUP(dirvp, nm, &vp, NULL, 0, rvp, kcred); 1608 if (error) { 1609 dcmn_err5(("remdrv_cleanup %s lookup error %d\n", 1610 nm, error)); 1611 VN_RELE(dirvp); 1612 if (dirvp != rvp) 1613 VN_RELE(rvp); 1614 pn_free(&pn); 1615 kmem_free(nm, MAXNAMELEN); 1616 return (0); 1617 } 1618 VN_RELE(dirvp); 1619 dirvp = vp; 1620 pn_skipslash(&pn); 1621 } 1622 1623 ASSERT(dirvp->v_type == VDIR); 1624 if (dirvp != rvp) 1625 VN_RELE(rvp); 1626 pn_free(&pn); 1627 kmem_free(nm, MAXNAMELEN); 1628 1629 dlen = ndirents * (sizeof (*dbuf)); 1630 dbuf = kmem_alloc(dlen, KM_SLEEP); 1631 1632 uio.uio_iov = &iov; 1633 uio.uio_iovcnt = 1; 1634 uio.uio_segflg = UIO_SYSSPACE; 1635 uio.uio_fmode = 0; 1636 uio.uio_extflg = UIO_COPY_CACHED; 1637 uio.uio_loffset = 0; 1638 uio.uio_llimit = MAXOFFSET_T; 1639 1640 eof = 0; 1641 error = 0; 1642 while (!error && !eof) { 1643 uio.uio_resid = dlen; 1644 iov.iov_base = (char *)dbuf; 1645 iov.iov_len = dlen; 1646 1647 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1648 error = VOP_READDIR(dirvp, &uio, kcred, &eof); 1649 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1650 1651 dbuflen = dlen - uio.uio_resid; 1652 1653 if (error || dbuflen == 0) 1654 break; 1655 1656 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1657 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1658 1659 nm = dp->d_name; 1660 1661 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1662 continue; 1663 1664 if (strncmp(nm, nodename, nodenamelen) != 0) 1665 continue; 1666 1667 error = VOP_LOOKUP(dirvp, nm, &vp, 1668 NULL, 0, NULL, kcred); 1669 1670 dsysdebug(error, 1671 ("rem_drv %s/%s lookup (%d)\n", 1672 dir, nm, error)); 1673 1674 if (error) 1675 continue; 1676 1677 ASSERT(vp->v_type == VDIR || 1678 vp->v_type == VCHR || vp->v_type == VBLK); 1679 1680 if (vp->v_type == VDIR) { 1681 error = devfs_remdrv_rmdir(vp, nm, rvp); 1682 if (error == 0) { 1683 error = VOP_RMDIR(dirvp, 1684 (char *)nm, rvp, kcred); 1685 dsysdebug(error, 1686 ("rem_drv %s/%s rmdir (%d)\n", 1687 dir, nm, error)); 1688 } 1689 } else { 1690 error = VOP_REMOVE(dirvp, (char *)nm, kcred); 1691 dsysdebug(error, 1692 ("rem_drv %s/%s remove (%d)\n", 1693 dir, nm, error)); 1694 } 1695 1696 VN_RELE(vp); 1697 if (error) 1698 goto exit; 1699 } 1700 } 1701 1702 exit: 1703 VN_RELE(dirvp); 1704 1705 kmem_free(dbuf, dlen); 1706 1707 return (0); 1708 } 1709 1710 struct dv_list { 1711 struct dv_node *dv; 1712 struct dv_list *next; 1713 }; 1714 1715 void 1716 dv_walk( 1717 struct dv_node *ddv, 1718 char *devnm, 1719 void (*callback)(struct dv_node *, void *), 1720 void *arg) 1721 { 1722 struct vnode *dvp; 1723 struct dv_node *dv; 1724 struct dv_list *head, *tail, *next; 1725 int len; 1726 1727 dcmn_err3(("dv_walk: ddv = %s, devnm = %s\n", 1728 ddv->dv_name, devnm ? devnm : "<null>")); 1729 1730 dvp = DVTOV(ddv); 1731 1732 ASSERT(dvp->v_type == VDIR); 1733 1734 head = tail = next = NULL; 1735 1736 rw_enter(&ddv->dv_contents, RW_READER); 1737 mutex_enter(&dvp->v_lock); 1738 for (dv = ddv->dv_dot; dv; dv = dv->dv_next) { 1739 /* 1740 * If devnm is not NULL and is not the empty string, 1741 * select only dv_nodes with matching non-minor name 1742 */ 1743 if (devnm && (len = strlen(devnm)) && 1744 (strncmp(devnm, dv->dv_name, len) || 1745 (dv->dv_name[len] != ':' && dv->dv_name[len] != '\0'))) 1746 continue; 1747 1748 callback(dv, arg); 1749 1750 if (DVTOV(dv)->v_type != VDIR) 1751 continue; 1752 1753 next = kmem_zalloc(sizeof (*next), KM_SLEEP); 1754 next->dv = dv; 1755 1756 if (tail) 1757 tail->next = next; 1758 else 1759 head = next; 1760 1761 tail = next; 1762 } 1763 1764 while (head) { 1765 dv_walk(head->dv, NULL, callback, arg); 1766 next = head->next; 1767 kmem_free(head, sizeof (*head)); 1768 head = next; 1769 } 1770 rw_exit(&ddv->dv_contents); 1771 mutex_exit(&dvp->v_lock); 1772 } 1773