1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * vnode ops for the devfs 30 * 31 * For leaf vnode special files (VCHR|VBLK) specfs will always see the VOP 32 * first because dv_find always performs leaf vnode substitution, returning 33 * a specfs vnode with an s_realvp pointing to the devfs leaf vnode. This 34 * means that the only leaf special file VOP operations that devfs will see 35 * after VOP_LOOKUP are the ones that specfs forwards. 36 */ 37 38 #include <sys/types.h> 39 #include <sys/param.h> 40 #include <sys/t_lock.h> 41 #include <sys/systm.h> 42 #include <sys/sysmacros.h> 43 #include <sys/user.h> 44 #include <sys/time.h> 45 #include <sys/vfs.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/fcntl.h> 49 #include <sys/flock.h> 50 #include <sys/kmem.h> 51 #include <sys/uio.h> 52 #include <sys/errno.h> 53 #include <sys/stat.h> 54 #include <sys/cred.h> 55 #include <sys/dirent.h> 56 #include <sys/pathname.h> 57 #include <sys/cmn_err.h> 58 #include <sys/debug.h> 59 #include <sys/policy.h> 60 #include <sys/modctl.h> 61 62 #include <fs/fs_subr.h> 63 #include <sys/fs/dv_node.h> 64 65 extern struct vattr dv_vattr_dir, dv_vattr_file; 66 extern dev_t rconsdev; 67 68 /* 69 * Open of devices (leaf nodes) is handled by specfs. 70 * There is nothing to do to open a directory 71 */ 72 /*ARGSUSED*/ 73 static int 74 devfs_open(struct vnode **vpp, int flag, struct cred *cred) 75 { 76 struct dv_node *dv = VTODV(*vpp); 77 78 dcmn_err2(("devfs_open %s\n", dv->dv_name)); 79 ASSERT((*vpp)->v_type == VDIR); 80 return (0); 81 } 82 83 /* 84 * Close of devices (leaf nodes) is handled by specfs. 85 * There is nothing much to do inorder to close a directory. 86 */ 87 /*ARGSUSED1*/ 88 static int 89 devfs_close(struct vnode *vp, int flag, int count, 90 offset_t offset, struct cred *cred) 91 { 92 struct dv_node *dv = VTODV(vp); 93 94 dcmn_err2(("devfs_close %s\n", dv->dv_name)); 95 ASSERT(vp->v_type == VDIR); 96 97 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 98 cleanshares(vp, ttoproc(curthread)->p_pid); 99 return (0); 100 } 101 102 /* 103 * Read of devices (leaf nodes) is handled by specfs. 104 * Read of directories is not supported. 105 */ 106 /*ARGSUSED*/ 107 static int 108 devfs_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 109 struct caller_context *ct) 110 { 111 dcmn_err2(("devfs_read %s\n", VTODV(vp)->dv_name)); 112 ASSERT(vp->v_type == VDIR); 113 ASSERT(RW_READ_HELD(&VTODV(vp)->dv_contents)); 114 return (EISDIR); 115 } 116 117 /* 118 * Write of devices (leaf nodes) is handled by specfs. 119 * Write of directories is not supported. 120 */ 121 /*ARGSUSED*/ 122 static int 123 devfs_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 124 struct caller_context *ct) 125 { 126 dcmn_err2(("devfs_write %s\n", VTODV(vp)->dv_name)); 127 ASSERT(vp->v_type == VDIR); 128 ASSERT(RW_WRITE_HELD(&VTODV(vp)->dv_contents)); 129 return (EISDIR); 130 } 131 132 /* 133 * Ioctls to device (leaf nodes) is handled by specfs. 134 * Ioctl to directories is not supported. 135 */ 136 /*ARGSUSED*/ 137 static int 138 devfs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 139 struct cred *cred, int *rvalp) 140 { 141 dcmn_err2(("devfs_ioctl %s\n", VTODV(vp)->dv_name)); 142 ASSERT(vp->v_type == VDIR); 143 144 return (ENOTTY); /* no ioctls supported */ 145 } 146 147 /* 148 * We can be asked directly about the attributes of directories, or 149 * (via sp->s_realvp) about the filesystem attributes of special files. 150 * 151 * For directories, we just believe the attribute store 152 * though we mangle the nodeid, fsid, and rdev to convince userland we 153 * really are a different filesystem. 154 * 155 * For special files, a little more fakery is required. 156 * 157 * If the attribute store is not there (read only root), we believe our 158 * memory based attributes. 159 */ 160 static int 161 devfs_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr) 162 { 163 struct dv_node *dv = VTODV(vp); 164 int error = 0; 165 uint_t mask; 166 167 /* 168 * Message goes to console only. Otherwise, the message 169 * causes devfs_getattr to be invoked again... infinite loop 170 */ 171 dcmn_err2(("?devfs_getattr %s\n", dv->dv_name)); 172 ASSERT(dv->dv_attr || dv->dv_attrvp); 173 174 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 175 cmn_err(CE_WARN, /* panic ? */ 176 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 177 return (ENOENT); 178 } 179 180 if (dv->dv_attr) { 181 /* 182 * obtain from the memory version of attribute. 183 * preserve mask for those that optimize. 184 * devfs specific fields are already merged on creation. 185 */ 186 mask = vap->va_mask; 187 *vap = *dv->dv_attr; 188 vap->va_mask = mask; 189 } else { 190 /* obtain from attribute store and merge */ 191 error = VOP_GETATTR(dv->dv_attrvp, vap, flags, cr); 192 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error)); 193 dv_vattr_merge(dv, vap); 194 } 195 196 /* 197 * Restrict the permissions of the node fronting the console 198 * to 0600 with root as the owner. This prevents a non-root 199 * user from gaining access to a serial terminal (like /dev/term/a) 200 * which is in reality serving as the console device (/dev/console). 201 */ 202 if (vp->v_rdev == rconsdev) { 203 mode_t rconsmask = S_IXUSR|S_IRWXG|S_IRWXO; 204 vap->va_mode &= (~rconsmask); 205 vap->va_uid = 0; 206 } 207 208 return (error); 209 } 210 211 static int devfs_unlocked_access(void *, int, struct cred *); 212 213 /*ARGSUSED4*/ 214 static int 215 devfs_setattr_dir( 216 struct dv_node *dv, 217 struct vnode *vp, 218 struct vattr *vap, 219 int flags, 220 struct cred *cr) 221 { 222 struct vattr *map; 223 long int mask; 224 int error = 0; 225 struct vattr vattr; 226 227 ASSERT(dv->dv_attr || dv->dv_attrvp); 228 229 ASSERT(vp->v_type == VDIR); 230 ASSERT((dv->dv_flags & DV_NO_FSPERM) == 0); 231 232 if (vap->va_mask & AT_NOSET) 233 return (EINVAL); 234 235 /* to ensure consistency, single thread setting of attributes */ 236 rw_enter(&dv->dv_contents, RW_WRITER); 237 238 again: if (dv->dv_attr) { 239 240 error = secpolicy_vnode_setattr(cr, vp, vap, dv->dv_attr, 241 flags, devfs_unlocked_access, dv); 242 243 if (error) 244 goto out; 245 246 /* 247 * Apply changes to the memory based attribute. This code 248 * is modeled after the tmpfs implementation of memory 249 * based vnodes 250 */ 251 map = dv->dv_attr; 252 mask = vap->va_mask; 253 254 /* Change file access modes. */ 255 if (mask & AT_MODE) { 256 map->va_mode &= S_IFMT; 257 map->va_mode |= vap->va_mode & ~S_IFMT; 258 } 259 if (mask & AT_UID) 260 map->va_uid = vap->va_uid; 261 if (mask & AT_GID) 262 map->va_gid = vap->va_gid; 263 if (mask & AT_ATIME) 264 map->va_atime = vap->va_atime; 265 if (mask & AT_MTIME) 266 map->va_mtime = vap->va_mtime; 267 268 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) 269 gethrestime(&map->va_ctime); 270 } else { 271 /* use the backing attribute store */ 272 ASSERT(dv->dv_attrvp); 273 274 /* 275 * See if we are changing something we care about 276 * the persistence of - return success if we don't care. 277 */ 278 if (vap->va_mask & (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) { 279 /* Set the attributes */ 280 error = VOP_SETATTR(dv->dv_attrvp, 281 vap, flags, cr, NULL); 282 dsysdebug(error, 283 ("vop_setattr %s %d\n", dv->dv_name, error)); 284 285 /* 286 * Some file systems may return EROFS for a setattr 287 * on a readonly file system. In this case we create 288 * our own memory based attribute. 289 */ 290 if (error == EROFS) { 291 /* 292 * obtain attributes from existing file 293 * that we will modify and switch to memory 294 * based attribute until attribute store is 295 * read/write. 296 */ 297 vattr = dv_vattr_dir; 298 if (VOP_GETATTR(dv->dv_attrvp, &vattr, 299 flags, cr) == 0) { 300 dv->dv_attr = kmem_alloc( 301 sizeof (struct vattr), KM_SLEEP); 302 *dv->dv_attr = vattr; 303 dv_vattr_merge(dv, dv->dv_attr); 304 goto again; 305 } 306 } 307 } 308 } 309 out: 310 rw_exit(&dv->dv_contents); 311 return (error); 312 } 313 314 315 /* 316 * Compare the uid/gid/mode changes requested for a setattr 317 * operation with the same details of a node's default minor 318 * perm information. Return 0 if identical. 319 */ 320 static int 321 dv_setattr_cmp(struct vattr *map, mperm_t *mp) 322 { 323 if ((map->va_mode & S_IAMB) != (mp->mp_mode & S_IAMB)) 324 return (1); 325 if (map->va_uid != mp->mp_uid) 326 return (1); 327 if (map->va_gid != mp->mp_gid) 328 return (1); 329 return (0); 330 } 331 332 333 /*ARGSUSED4*/ 334 static int 335 devfs_setattr( 336 struct vnode *vp, 337 struct vattr *vap, 338 int flags, 339 struct cred *cr, 340 caller_context_t *ct) 341 { 342 struct dv_node *dv = VTODV(vp); 343 struct dv_node *ddv; 344 struct vnode *dvp; 345 struct vattr *map; 346 long int mask; 347 int error = 0; 348 struct vattr *free_vattr = NULL; 349 struct vattr *vattrp = NULL; 350 mperm_t mp; 351 int persist; 352 353 /* 354 * Message goes to console only. Otherwise, the message 355 * causes devfs_getattr to be invoked again... infinite loop 356 */ 357 dcmn_err2(("?devfs_setattr %s\n", dv->dv_name)); 358 ASSERT(dv->dv_attr || dv->dv_attrvp); 359 360 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 361 cmn_err(CE_WARN, /* panic ? */ 362 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 363 return (ENOENT); 364 } 365 366 if (vap->va_mask & AT_NOSET) 367 return (EINVAL); 368 369 /* 370 * If we are changing something we don't care about 371 * the persistence of, return success. 372 */ 373 if ((vap->va_mask & 374 (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) == 0) 375 return (0); 376 377 /* 378 * If driver overrides fs perm, disallow chmod 379 * and do not create attribute nodes. 380 */ 381 if (dv->dv_flags & DV_NO_FSPERM) { 382 ASSERT(dv->dv_attr); 383 if (vap->va_mask & (AT_MODE | AT_UID | AT_GID)) 384 return (EPERM); 385 if ((vap->va_mask & (AT_ATIME|AT_MTIME)) == 0) 386 return (0); 387 rw_enter(&dv->dv_contents, RW_WRITER); 388 if (vap->va_mask & AT_ATIME) 389 dv->dv_attr->va_atime = vap->va_atime; 390 if (vap->va_mask & AT_MTIME) 391 dv->dv_attr->va_mtime = vap->va_mtime; 392 rw_exit(&dv->dv_contents); 393 return (0); 394 } 395 396 /* 397 * Directories are always created but device nodes are 398 * only used to persist non-default permissions. 399 */ 400 if (vp->v_type == VDIR) { 401 ASSERT(dv->dv_attr || dv->dv_attrvp); 402 return (devfs_setattr_dir(dv, vp, vap, flags, cr)); 403 } 404 405 /* 406 * Allocate now before we take any locks 407 */ 408 vattrp = kmem_zalloc(sizeof (*vattrp), KM_SLEEP); 409 410 /* to ensure consistency, single thread setting of attributes */ 411 rw_enter(&dv->dv_contents, RW_WRITER); 412 413 /* 414 * We don't need to create an attribute node 415 * to persist access or modification times. 416 */ 417 persist = (vap->va_mask & (AT_MODE | AT_UID | AT_GID)); 418 419 /* 420 * If persisting something, get the default permissions 421 * for this minor to compare against what the attributes 422 * are now being set to. Default ordering is: 423 * - minor_perm match for this minor 424 * - mode supplied by ddi_create_priv_minor_node 425 * - devfs defaults 426 */ 427 if (persist) { 428 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) != 0) { 429 mp.mp_uid = dv_vattr_file.va_uid; 430 mp.mp_gid = dv_vattr_file.va_gid; 431 mp.mp_mode = dv_vattr_file.va_mode; 432 if (dv->dv_flags & DV_DFLT_MODE) { 433 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 434 mp.mp_mode &= ~S_IAMB; 435 mp.mp_mode |= dv->dv_dflt_mode; 436 dcmn_err5(("%s: setattr priv default 0%o\n", 437 dv->dv_name, mp.mp_mode)); 438 } else { 439 dcmn_err5(("%s: setattr devfs default 0%o\n", 440 dv->dv_name, mp.mp_mode)); 441 } 442 } else { 443 dcmn_err5(("%s: setattr minor perm default 0%o\n", 444 dv->dv_name, mp.mp_mode)); 445 } 446 } 447 448 /* 449 * If we don't have a vattr for this node, construct one. 450 */ 451 if (dv->dv_attr) { 452 free_vattr = vattrp; 453 vattrp = NULL; 454 } else { 455 ASSERT(dv->dv_attrvp); 456 ASSERT(vp->v_type != VDIR); 457 *vattrp = dv_vattr_file; 458 error = VOP_GETATTR(dv->dv_attrvp, vattrp, 0, cr); 459 dsysdebug(error, ("vop_getattr %s %d\n", 460 dv->dv_name, error)); 461 if (error) 462 goto out; 463 dv->dv_attr = vattrp; 464 dv_vattr_merge(dv, dv->dv_attr); 465 vattrp = NULL; 466 } 467 468 error = secpolicy_vnode_setattr(cr, vp, vap, dv->dv_attr, 469 flags, devfs_unlocked_access, dv); 470 if (error) { 471 dsysdebug(error, ("devfs_setattr %s secpolicy error %d\n", 472 dv->dv_name, error)); 473 goto out; 474 } 475 476 /* 477 * Apply changes to the memory based attribute. This code 478 * is modeled after the tmpfs implementation of memory 479 * based vnodes 480 */ 481 map = dv->dv_attr; 482 mask = vap->va_mask; 483 484 /* Change file access modes. */ 485 if (mask & AT_MODE) { 486 map->va_mode &= S_IFMT; 487 map->va_mode |= vap->va_mode & ~S_IFMT; 488 } 489 if (mask & AT_UID) 490 map->va_uid = vap->va_uid; 491 if (mask & AT_GID) 492 map->va_gid = vap->va_gid; 493 if (mask & AT_ATIME) 494 map->va_atime = vap->va_atime; 495 if (mask & AT_MTIME) 496 map->va_mtime = vap->va_mtime; 497 498 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) { 499 gethrestime(&map->va_ctime); 500 } 501 502 /* 503 * A setattr to defaults means we no longer need the 504 * shadow node as a persistent store, unless there 505 * are ACLs. Otherwise create a shadow node if one 506 * doesn't exist yet. 507 */ 508 if (persist) { 509 if ((dv_setattr_cmp(map, &mp) == 0) && 510 ((dv->dv_flags & DV_ACL) == 0)) { 511 512 if (dv->dv_attrvp) { 513 ddv = dv->dv_dotdot; 514 ASSERT(ddv->dv_attrvp); 515 error = VOP_REMOVE(ddv->dv_attrvp, 516 dv->dv_name, cr); 517 dsysdebug(error, 518 ("vop_remove %s %s %d\n", 519 ddv->dv_name, dv->dv_name, error)); 520 521 if (error == EROFS) 522 error = 0; 523 VN_RELE(dv->dv_attrvp); 524 dv->dv_attrvp = NULL; 525 } 526 ASSERT(dv->dv_attr); 527 } else { 528 if (mask & AT_MODE) 529 dcmn_err5(("%s persisting mode 0%o\n", 530 dv->dv_name, vap->va_mode)); 531 if (mask & AT_UID) 532 dcmn_err5(("%s persisting uid %d\n", 533 dv->dv_name, vap->va_uid)); 534 if (mask & AT_GID) 535 dcmn_err5(("%s persisting gid %d\n", 536 dv->dv_name, vap->va_gid)); 537 538 if (dv->dv_attrvp == NULL) { 539 dvp = DVTOV(dv->dv_dotdot); 540 dv_shadow_node(dvp, dv->dv_name, vp, 541 NULL, NULLVP, cr, 542 DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 543 } 544 if (dv->dv_attrvp) { 545 error = VOP_SETATTR(dv->dv_attrvp, 546 vap, flags, cr, NULL); 547 dsysdebug(error, ("vop_setattr %s %d\n", 548 dv->dv_name, error)); 549 } 550 /* 551 * Some file systems may return EROFS for a setattr 552 * on a readonly file system. In this case save 553 * as our own memory based attribute. 554 * NOTE: ufs is NOT one of these (see ufs_iupdat). 555 */ 556 if (dv->dv_attr && dv->dv_attrvp && error == 0) { 557 vattrp = dv->dv_attr; 558 dv->dv_attr = NULL; 559 } else if (error == EROFS) 560 error = 0; 561 } 562 } 563 564 out: 565 rw_exit(&dv->dv_contents); 566 567 if (vattrp) 568 kmem_free(vattrp, sizeof (*vattrp)); 569 if (free_vattr) 570 kmem_free(free_vattr, sizeof (*free_vattr)); 571 return (error); 572 } 573 574 static int 575 devfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr) 576 { 577 switch (cmd) { 578 case _PC_ACL_ENABLED: 579 /* 580 * We rely on the underlying filesystem for ACLs, 581 * so direct the query for ACL support there. 582 * ACL support isn't relative to the file 583 * and we can't guarantee that the dv node 584 * has an attribute node, so any valid 585 * attribute node will suffice. 586 */ 587 ASSERT(dvroot); 588 ASSERT(dvroot->dv_attrvp); 589 return (VOP_PATHCONF(dvroot->dv_attrvp, cmd, valp, cr)); 590 /*NOTREACHED*/ 591 } 592 593 return (fs_pathconf(vp, cmd, valp, cr)); 594 } 595 596 /* 597 * Let avp handle security attributes (acl's). 598 */ 599 static int 600 devfs_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 601 struct cred *cr) 602 { 603 dvnode_t *dv = VTODV(vp); 604 struct vnode *avp; 605 int error; 606 607 dcmn_err2(("devfs_getsecattr %s\n", dv->dv_name)); 608 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 609 610 rw_enter(&dv->dv_contents, RW_READER); 611 612 avp = dv->dv_attrvp; 613 614 /* fabricate the acl */ 615 if (avp == NULL) { 616 error = fs_fab_acl(vp, vsap, flags, cr); 617 rw_exit(&dv->dv_contents); 618 return (error); 619 } 620 621 error = VOP_GETSECATTR(avp, vsap, flags, cr); 622 dsysdebug(error, ("vop_getsecattr %s %d\n", VTODV(vp)->dv_name, error)); 623 rw_exit(&dv->dv_contents); 624 return (error); 625 } 626 627 /* 628 * Set security attributes (acl's) 629 * 630 * Note that the dv_contents lock has already been acquired 631 * by the caller's VOP_RWLOCK. 632 */ 633 static int 634 devfs_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 635 struct cred *cr) 636 { 637 dvnode_t *dv = VTODV(vp); 638 struct vnode *avp; 639 int error; 640 641 dcmn_err2(("devfs_setsecattr %s\n", dv->dv_name)); 642 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 643 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 644 645 /* 646 * Not a supported operation on drivers not providing 647 * file system based permissions. 648 */ 649 if (dv->dv_flags & DV_NO_FSPERM) 650 return (ENOTSUP); 651 652 /* 653 * To complete, the setsecattr requires an underlying attribute node. 654 */ 655 if (dv->dv_attrvp == NULL) { 656 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 657 dv_shadow_node(DVTOV(dv->dv_dotdot), dv->dv_name, vp, 658 NULL, NULLVP, cr, DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 659 } 660 661 if ((avp = dv->dv_attrvp) == NULL) { 662 dcmn_err2(("devfs_setsecattr %s: " 663 "cannot construct attribute node\n", dv->dv_name)); 664 return (fs_nosys()); 665 } 666 667 /* 668 * The acl(2) system call issues a VOP_RWLOCK before setting an ACL. 669 * Since backing file systems expect the lock to be held before seeing 670 * a VOP_SETSECATTR ACL, we need to issue the VOP_RWLOCK to the backing 671 * store before forwarding the ACL. 672 */ 673 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, NULL); 674 error = VOP_SETSECATTR(avp, vsap, flags, cr); 675 dsysdebug(error, ("vop_setsecattr %s %d\n", VTODV(vp)->dv_name, error)); 676 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, NULL); 677 678 /* 679 * Set DV_ACL if we have a non-trivial set of ACLs. It is not 680 * necessary to hold VOP_RWLOCK since fs_acl_nontrivial only does 681 * VOP_GETSECATTR calls. 682 */ 683 if (fs_acl_nontrivial(avp, cr)) 684 dv->dv_flags |= DV_ACL; 685 return (error); 686 } 687 688 /* 689 * This function is used for secpolicy_setattr(). It must call an 690 * access() like function while it is already holding the 691 * dv_contents lock. We only care about this when dv_attr != NULL; 692 * so the unlocked access call only concerns itself with that 693 * particular branch of devfs_access(). 694 */ 695 static int 696 devfs_unlocked_access(void *vdv, int mode, struct cred *cr) 697 { 698 struct dv_node *dv = vdv; 699 int shift = 0; 700 uid_t owner = dv->dv_attr->va_uid; 701 702 /* Check access based on owner, group and public permissions. */ 703 if (crgetuid(cr) != owner) { 704 shift += 3; 705 if (groupmember(dv->dv_attr->va_gid, cr) == 0) 706 shift += 3; 707 } 708 709 /* compute missing mode bits */ 710 mode &= ~(dv->dv_attr->va_mode << shift); 711 712 if (mode == 0) 713 return (0); 714 715 return (secpolicy_vnode_access(cr, DVTOV(dv), owner, mode)); 716 } 717 718 static int 719 devfs_access(struct vnode *vp, int mode, int flags, struct cred *cr) 720 { 721 struct dv_node *dv = VTODV(vp); 722 int res; 723 724 dcmn_err2(("devfs_access %s\n", dv->dv_name)); 725 ASSERT(dv->dv_attr || dv->dv_attrvp); 726 727 /* restrict console access to privileged processes */ 728 if ((vp->v_rdev == rconsdev) && secpolicy_console(cr) != 0) { 729 return (EACCES); 730 } 731 732 if (dv->dv_attr && ((dv->dv_flags & DV_ACL) == 0)) { 733 rw_enter(&dv->dv_contents, RW_READER); 734 if (dv->dv_attr) { 735 res = devfs_unlocked_access(dv, mode, cr); 736 rw_exit(&dv->dv_contents); 737 return (res); 738 } 739 rw_exit(&dv->dv_contents); 740 } 741 return (VOP_ACCESS(dv->dv_attrvp, mode, flags, cr)); 742 } 743 744 /* 745 * Lookup 746 * 747 * Given the directory vnode and the name of the component, return 748 * the corresponding held vnode for that component. 749 * 750 * Of course in these fictional filesystems, nothing's ever quite 751 * -that- simple. 752 * 753 * devfs name type shadow (fs attributes) type comments 754 * ------------------------------------------------------------------------- 755 * drv[@addr] VDIR drv[@addr] VDIR nexus driver 756 * drv[@addr]:m VCHR/VBLK drv[@addr]:m VREG leaf driver 757 * drv[@addr] VCHR/VBLK drv[@addr]:.default VREG leaf driver 758 * ------------------------------------------------------------------------- 759 * 760 * The following names are reserved for the attribute filesystem (which 761 * could easily be another layer on top of this one - we simply need to 762 * hold the vnode of the thing we're looking at) 763 * 764 * attr name type shadow (fs attributes) type comments 765 * ------------------------------------------------------------------------- 766 * drv[@addr] VDIR - - attribute dir 767 * minorname VDIR - - minorname 768 * attribute VREG - - attribute 769 * ------------------------------------------------------------------------- 770 * 771 * Examples: 772 * 773 * devfs:/devices/.../mm@0:zero VCHR 774 * shadow:/.devices/.../mm@0:zero VREG, fs attrs 775 * devfs:/devices/.../mm@0:/zero/attr VREG, driver attribute 776 * 777 * devfs:/devices/.../sd@0,0:a VBLK 778 * shadow:/.devices/.../sd@0,0:a VREG, fs attrs 779 * devfs:/devices/.../sd@0,0:/a/.type VREG, "ddi_block:chan" 780 * 781 * devfs:/devices/.../mm@0 VCHR 782 * shadow:/.devices/.../mm@0:.default VREG, fs attrs 783 * devfs:/devices/.../mm@0:/.default/attr VREG, driver attribute 784 * devfs:/devices/.../mm@0:/.default/.type VREG, "ddi_pseudo" 785 * 786 * devfs:/devices/.../obio VDIR 787 * shadow:/devices/.../obio VDIR, needed for fs attrs. 788 * devfs:/devices/.../obio:/.default/attr VDIR, driver attribute 789 * 790 * We also need to be able deal with "old" devices that have gone away, 791 * though I think that provided we return them with readdir, they can 792 * be removed (i.e. they don't have to respond to lookup, though it might 793 * be weird if they didn't ;-) 794 * 795 * Lookup has side-effects. 796 * 797 * - It will create directories and fs attribute files in the shadow hierarchy. 798 * - It should cause non-SID devices to be probed (ask the parent nexi). 799 */ 800 /*ARGSUSED3*/ 801 static int 802 devfs_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 803 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred) 804 { 805 ASSERT(dvp->v_type == VDIR); 806 dcmn_err2(("devfs_lookup: %s\n", nm)); 807 return (dv_find(VTODV(dvp), nm, vpp, pnp, rdir, cred, 0)); 808 } 809 810 /* 811 * devfs nodes can't really be created directly by userland - however, 812 * we do allow creates to find existing nodes: 813 * 814 * - any create fails if the node doesn't exist - EROFS. 815 * - creating an existing directory read-only succeeds, otherwise EISDIR. 816 * - exclusive creates fail if the node already exists - EEXIST. 817 * - failure to create the snode for an existing device - ENOSYS. 818 */ 819 /*ARGSUSED2*/ 820 static int 821 devfs_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 822 int mode, struct vnode **vpp, struct cred *cred, int flag) 823 { 824 int error; 825 struct vnode *vp; 826 827 dcmn_err2(("devfs_create %s\n", nm)); 828 error = dv_find(VTODV(dvp), nm, &vp, NULL, NULLVP, cred, 0); 829 if (error == 0) { 830 if (excl == EXCL) 831 error = EEXIST; 832 else if (vp->v_type == VDIR && (mode & VWRITE)) 833 error = EISDIR; 834 else 835 error = VOP_ACCESS(vp, mode, 0, cred); 836 837 if (error) { 838 VN_RELE(vp); 839 } else 840 *vpp = vp; 841 } else if (error == ENOENT) 842 error = EROFS; 843 844 return (error); 845 } 846 847 /* 848 * If DV_BUILD is set, we call into nexus driver to do a BUS_CONFIG_ALL. 849 * Otherwise, simply return cached dv_node's. Hotplug code always call 850 * devfs_clean() to invalid the dv_node cache. 851 */ 852 static int 853 devfs_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp) 854 { 855 struct dv_node *ddv, *dv; 856 struct dirent64 *de, *bufp; 857 offset_t diroff; 858 offset_t soff; 859 size_t reclen, movesz; 860 int error; 861 struct vattr va; 862 size_t bufsz; 863 864 ddv = VTODV(dvp); 865 dcmn_err2(("devfs_readdir %s: offset %lld len %ld\n", 866 ddv->dv_name, uiop->uio_loffset, uiop->uio_iov->iov_len)); 867 ASSERT(ddv->dv_attr || ddv->dv_attrvp); 868 ASSERT(RW_READ_HELD(&ddv->dv_contents)); 869 870 if (uiop->uio_loffset >= MAXOFF_T) { 871 if (eofp) 872 *eofp = 1; 873 return (0); 874 } 875 876 if (uiop->uio_iovcnt != 1) 877 return (EINVAL); 878 879 if (dvp->v_type != VDIR) 880 return (ENOTDIR); 881 882 /* Load the initial contents */ 883 if (ddv->dv_flags & DV_BUILD) { 884 if (!rw_tryupgrade(&ddv->dv_contents)) { 885 rw_exit(&ddv->dv_contents); 886 rw_enter(&ddv->dv_contents, RW_WRITER); 887 } 888 889 /* recheck and fill */ 890 if (ddv->dv_flags & DV_BUILD) 891 dv_filldir(ddv); 892 893 rw_downgrade(&ddv->dv_contents); 894 } 895 896 soff = uiop->uio_loffset; 897 bufsz = uiop->uio_iov->iov_len; 898 de = bufp = kmem_alloc(bufsz, KM_SLEEP); 899 movesz = 0; 900 dv = (struct dv_node *)-1; 901 902 /* 903 * Move as many entries into the uio structure as it will take. 904 * Special case "." and "..". 905 */ 906 diroff = 0; 907 if (soff == 0) { /* . */ 908 reclen = DIRENT64_RECLEN(strlen(".")); 909 if ((movesz + reclen) > bufsz) 910 goto full; 911 de->d_ino = (ino64_t)ddv->dv_ino; 912 de->d_off = (off64_t)diroff + 1; 913 de->d_reclen = (ushort_t)reclen; 914 915 /* use strncpy(9f) to zero out uninitialized bytes */ 916 917 (void) strncpy(de->d_name, ".", DIRENT64_NAMELEN(reclen)); 918 movesz += reclen; 919 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 920 dcmn_err3(("devfs_readdir: A: diroff %lld, soff %lld: '%s' " 921 "reclen %lu\n", diroff, soff, ".", reclen)); 922 } 923 924 diroff++; 925 if (soff <= 1) { /* .. */ 926 reclen = DIRENT64_RECLEN(strlen("..")); 927 if ((movesz + reclen) > bufsz) 928 goto full; 929 de->d_ino = (ino64_t)ddv->dv_dotdot->dv_ino; 930 de->d_off = (off64_t)diroff + 1; 931 de->d_reclen = (ushort_t)reclen; 932 933 /* use strncpy(9f) to zero out uninitialized bytes */ 934 935 (void) strncpy(de->d_name, "..", DIRENT64_NAMELEN(reclen)); 936 movesz += reclen; 937 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 938 dcmn_err3(("devfs_readdir: B: diroff %lld, soff %lld: '%s' " 939 "reclen %lu\n", diroff, soff, "..", reclen)); 940 } 941 942 diroff++; 943 for (dv = ddv->dv_dot; dv; dv = dv->dv_next, diroff++) { 944 /* 945 * although DDM_INTERNAL_PATH minor nodes are skipped for 946 * readdirs outside the kernel, they still occupy directory 947 * offsets 948 */ 949 if (diroff < soff || 950 ((dv->dv_flags & DV_INTERNAL) && (cred != kcred))) 951 continue; 952 953 reclen = DIRENT64_RECLEN(strlen(dv->dv_name)); 954 if ((movesz + reclen) > bufsz) { 955 dcmn_err3(("devfs_readdir: C: diroff " 956 "%lld, soff %lld: '%s' reclen %lu\n", 957 diroff, soff, dv->dv_name, reclen)); 958 goto full; 959 } 960 de->d_ino = (ino64_t)dv->dv_ino; 961 de->d_off = (off64_t)diroff + 1; 962 de->d_reclen = (ushort_t)reclen; 963 964 /* use strncpy(9f) to zero out uninitialized bytes */ 965 966 ASSERT(strlen(dv->dv_name) + 1 <= 967 DIRENT64_NAMELEN(reclen)); 968 (void) strncpy(de->d_name, dv->dv_name, 969 DIRENT64_NAMELEN(reclen)); 970 971 movesz += reclen; 972 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 973 dcmn_err4(("devfs_readdir: D: diroff " 974 "%lld, soff %lld: '%s' reclen %lu\n", diroff, soff, 975 dv->dv_name, reclen)); 976 } 977 978 /* the buffer is full, or we exhausted everything */ 979 full: dcmn_err3(("devfs_readdir: moving %lu bytes: " 980 "diroff %lld, soff %lld, dv %p\n", 981 movesz, diroff, soff, (void *)dv)); 982 983 if ((movesz == 0) && dv) 984 error = EINVAL; /* cannot be represented */ 985 else { 986 error = uiomove(bufp, movesz, UIO_READ, uiop); 987 if (error == 0) { 988 if (eofp) 989 *eofp = dv ? 0 : 1; 990 uiop->uio_loffset = diroff; 991 } 992 993 va.va_mask = AT_ATIME; 994 gethrestime(&va.va_atime); 995 rw_exit(&ddv->dv_contents); 996 (void) devfs_setattr(dvp, &va, 0, cred, NULL); 997 rw_enter(&ddv->dv_contents, RW_READER); 998 } 999 1000 kmem_free(bufp, bufsz); 1001 return (error); 1002 } 1003 1004 /*ARGSUSED*/ 1005 static int 1006 devfs_fsync(struct vnode *vp, int syncflag, struct cred *cred) 1007 { 1008 /* 1009 * Message goes to console only. Otherwise, the message 1010 * causes devfs_fsync to be invoked again... infinite loop 1011 */ 1012 dcmn_err2(("devfs_fsync %s\n", VTODV(vp)->dv_name)); 1013 return (0); 1014 } 1015 1016 /* 1017 * Normally, we leave the dv_node here at count of 0. 1018 * The node will be destroyed when dv_cleandir() is called. 1019 * 1020 * Stale dv_node's are already unlinked from the fs tree, 1021 * so dv_cleandir() won't find them. We destroy such nodes 1022 * immediately. 1023 */ 1024 /*ARGSUSED1*/ 1025 static void 1026 devfs_inactive(struct vnode *vp, struct cred *cred) 1027 { 1028 int destroy; 1029 struct dv_node *dv = VTODV(vp); 1030 1031 dcmn_err2(("devfs_inactive: %s\n", dv->dv_name)); 1032 mutex_enter(&vp->v_lock); 1033 ASSERT(vp->v_count >= 1); 1034 --vp->v_count; 1035 destroy = (DV_STALE(dv) && vp->v_count == 0); 1036 mutex_exit(&vp->v_lock); 1037 1038 /* stale nodes cannot be rediscovered, destroy it here */ 1039 if (destroy) 1040 dv_destroy(dv, 0); 1041 } 1042 1043 /* 1044 * XXX Why do we need this? NFS mounted /dev directories? 1045 * XXX Talk to peter staubach about this. 1046 */ 1047 static int 1048 devfs_fid(struct vnode *vp, struct fid *fidp) 1049 { 1050 struct dv_node *dv = VTODV(vp); 1051 struct dv_fid *dv_fid; 1052 1053 if (fidp->fid_len < (sizeof (struct dv_fid) - sizeof (ushort_t))) { 1054 fidp->fid_len = sizeof (struct dv_fid) - sizeof (ushort_t); 1055 return (ENOSPC); 1056 } 1057 1058 dv_fid = (struct dv_fid *)fidp; 1059 bzero(dv_fid, sizeof (struct dv_fid)); 1060 dv_fid->dvfid_len = (int)sizeof (struct dv_fid) - sizeof (ushort_t); 1061 dv_fid->dvfid_ino = dv->dv_ino; 1062 /* dv_fid->dvfid_gen = dv->tn_gen; XXX ? */ 1063 1064 return (0); 1065 } 1066 1067 /* 1068 * This pair of routines bracket all VOP_READ, VOP_WRITE 1069 * and VOP_READDIR requests. The contents lock stops things 1070 * moving around while we're looking at them. 1071 * 1072 * Also used by file and record locking. 1073 */ 1074 /*ARGSUSED2*/ 1075 static int 1076 devfs_rwlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1077 { 1078 dcmn_err2(("devfs_rwlock %s\n", VTODV(vp)->dv_name)); 1079 rw_enter(&VTODV(vp)->dv_contents, write_flag ? RW_WRITER : RW_READER); 1080 return (write_flag); 1081 } 1082 1083 /*ARGSUSED1*/ 1084 static void 1085 devfs_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1086 { 1087 dcmn_err2(("devfs_rwunlock %s\n", VTODV(vp)->dv_name)); 1088 rw_exit(&VTODV(vp)->dv_contents); 1089 } 1090 1091 /* 1092 * XXX Should probably do a better job of computing the maximum 1093 * offset available in the directory. 1094 */ 1095 /*ARGSUSED1*/ 1096 static int 1097 devfs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp) 1098 { 1099 ASSERT(vp->v_type == VDIR); 1100 dcmn_err2(("devfs_seek %s\n", VTODV(vp)->dv_name)); 1101 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1102 } 1103 1104 vnodeops_t *dv_vnodeops; 1105 1106 const fs_operation_def_t dv_vnodeops_template[] = { 1107 VOPNAME_OPEN, devfs_open, 1108 VOPNAME_CLOSE, devfs_close, 1109 VOPNAME_READ, devfs_read, 1110 VOPNAME_WRITE, devfs_write, 1111 VOPNAME_IOCTL, devfs_ioctl, 1112 VOPNAME_GETATTR, devfs_getattr, 1113 VOPNAME_SETATTR, devfs_setattr, 1114 VOPNAME_ACCESS, devfs_access, 1115 VOPNAME_LOOKUP, devfs_lookup, 1116 VOPNAME_CREATE, devfs_create, 1117 VOPNAME_READDIR, devfs_readdir, 1118 VOPNAME_FSYNC, devfs_fsync, 1119 VOPNAME_INACTIVE, (fs_generic_func_p) devfs_inactive, 1120 VOPNAME_FID, devfs_fid, 1121 VOPNAME_RWLOCK, devfs_rwlock, 1122 VOPNAME_RWUNLOCK, (fs_generic_func_p) devfs_rwunlock, 1123 VOPNAME_SEEK, devfs_seek, 1124 VOPNAME_PATHCONF, devfs_pathconf, 1125 VOPNAME_DISPOSE, fs_error, 1126 VOPNAME_SETSECATTR, devfs_setsecattr, 1127 VOPNAME_GETSECATTR, devfs_getsecattr, 1128 NULL, NULL 1129 }; 1130