1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * vnode ops for the devfs 31 * 32 * For leaf vnode special files (VCHR|VBLK) specfs will always see the VOP 33 * first because dv_find always performs leaf vnode substitution, returning 34 * a specfs vnode with an s_realvp pointing to the devfs leaf vnode. This 35 * means that the only leaf special file VOP operations that devfs will see 36 * after VOP_LOOKUP are the ones that specfs forwards. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/param.h> 41 #include <sys/t_lock.h> 42 #include <sys/systm.h> 43 #include <sys/sysmacros.h> 44 #include <sys/user.h> 45 #include <sys/time.h> 46 #include <sys/vfs.h> 47 #include <sys/vnode.h> 48 #include <sys/file.h> 49 #include <sys/fcntl.h> 50 #include <sys/flock.h> 51 #include <sys/kmem.h> 52 #include <sys/uio.h> 53 #include <sys/errno.h> 54 #include <sys/stat.h> 55 #include <sys/cred.h> 56 #include <sys/dirent.h> 57 #include <sys/pathname.h> 58 #include <sys/cmn_err.h> 59 #include <sys/debug.h> 60 #include <sys/policy.h> 61 #include <sys/modctl.h> 62 63 #include <fs/fs_subr.h> 64 #include <sys/fs/dv_node.h> 65 66 extern struct vattr dv_vattr_dir, dv_vattr_file; 67 extern dev_t rconsdev; 68 69 /* 70 * Open of devices (leaf nodes) is handled by specfs. 71 * There is nothing to do to open a directory 72 */ 73 /*ARGSUSED*/ 74 static int 75 devfs_open(struct vnode **vpp, int flag, struct cred *cred) 76 { 77 struct dv_node *dv = VTODV(*vpp); 78 79 dcmn_err2(("devfs_open %s\n", dv->dv_name)); 80 ASSERT((*vpp)->v_type == VDIR); 81 return (0); 82 } 83 84 /* 85 * Close of devices (leaf nodes) is handled by specfs. 86 * There is nothing much to do inorder to close a directory. 87 */ 88 /*ARGSUSED1*/ 89 static int 90 devfs_close(struct vnode *vp, int flag, int count, 91 offset_t offset, struct cred *cred) 92 { 93 struct dv_node *dv = VTODV(vp); 94 95 dcmn_err2(("devfs_close %s\n", dv->dv_name)); 96 ASSERT(vp->v_type == VDIR); 97 98 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 99 cleanshares(vp, ttoproc(curthread)->p_pid); 100 return (0); 101 } 102 103 /* 104 * Read of devices (leaf nodes) is handled by specfs. 105 * Read of directories is not supported. 106 */ 107 /*ARGSUSED*/ 108 static int 109 devfs_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 110 struct caller_context *ct) 111 { 112 dcmn_err2(("devfs_read %s\n", VTODV(vp)->dv_name)); 113 ASSERT(vp->v_type == VDIR); 114 ASSERT(RW_READ_HELD(&VTODV(vp)->dv_contents)); 115 return (EISDIR); 116 } 117 118 /* 119 * Write of devices (leaf nodes) is handled by specfs. 120 * Write of directories is not supported. 121 */ 122 /*ARGSUSED*/ 123 static int 124 devfs_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 125 struct caller_context *ct) 126 { 127 dcmn_err2(("devfs_write %s\n", VTODV(vp)->dv_name)); 128 ASSERT(vp->v_type == VDIR); 129 ASSERT(RW_WRITE_HELD(&VTODV(vp)->dv_contents)); 130 return (EISDIR); 131 } 132 133 /* 134 * Ioctls to device (leaf nodes) is handled by specfs. 135 * Ioctl to directories is not supported. 136 */ 137 /*ARGSUSED*/ 138 static int 139 devfs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 140 struct cred *cred, int *rvalp) 141 { 142 dcmn_err2(("devfs_ioctl %s\n", VTODV(vp)->dv_name)); 143 ASSERT(vp->v_type == VDIR); 144 145 return (ENOTTY); /* no ioctls supported */ 146 } 147 148 /* 149 * We can be asked directly about the attributes of directories, or 150 * (via sp->s_realvp) about the filesystem attributes of special files. 151 * 152 * For directories, we just believe the attribute store 153 * though we mangle the nodeid, fsid, and rdev to convince userland we 154 * really are a different filesystem. 155 * 156 * For special files, a little more fakery is required. 157 * 158 * If the attribute store is not there (read only root), we believe our 159 * memory based attributes. 160 */ 161 static int 162 devfs_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr) 163 { 164 struct dv_node *dv = VTODV(vp); 165 int error = 0; 166 uint_t mask; 167 168 /* 169 * Message goes to console only. Otherwise, the message 170 * causes devfs_getattr to be invoked again... infinite loop 171 */ 172 dcmn_err2(("?devfs_getattr %s\n", dv->dv_name)); 173 ASSERT(dv->dv_attr || dv->dv_attrvp); 174 175 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 176 cmn_err(CE_WARN, /* panic ? */ 177 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 178 return (ENOENT); 179 } 180 181 if (dv->dv_attr) { 182 /* 183 * obtain from the memory version of attribute. 184 * preserve mask for those that optimize. 185 * devfs specific fields are already merged on creation. 186 */ 187 mask = vap->va_mask; 188 *vap = *dv->dv_attr; 189 vap->va_mask = mask; 190 } else { 191 /* obtain from attribute store and merge */ 192 error = VOP_GETATTR(dv->dv_attrvp, vap, flags, cr); 193 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error)); 194 dv_vattr_merge(dv, vap); 195 } 196 197 /* 198 * Restrict the permissions of the node fronting the console 199 * to 0600 with root as the owner. This prevents a non-root 200 * user from gaining access to a serial terminal (like /dev/term/a) 201 * which is in reality serving as the console device (/dev/console). 202 */ 203 if (vp->v_rdev == rconsdev) { 204 mode_t rconsmask = S_IXUSR|S_IRWXG|S_IRWXO; 205 vap->va_mode &= (~rconsmask); 206 vap->va_uid = 0; 207 } 208 209 return (error); 210 } 211 212 static int devfs_unlocked_access(void *, int, struct cred *); 213 214 /*ARGSUSED4*/ 215 static int 216 devfs_setattr_dir( 217 struct dv_node *dv, 218 struct vnode *vp, 219 struct vattr *vap, 220 int flags, 221 struct cred *cr) 222 { 223 struct vattr *map; 224 long int mask; 225 int error = 0; 226 struct vattr vattr; 227 228 ASSERT(dv->dv_attr || dv->dv_attrvp); 229 230 ASSERT(vp->v_type == VDIR); 231 ASSERT((dv->dv_flags & DV_NO_FSPERM) == 0); 232 233 if (vap->va_mask & AT_NOSET) 234 return (EINVAL); 235 236 /* to ensure consistency, single thread setting of attributes */ 237 rw_enter(&dv->dv_contents, RW_WRITER); 238 239 again: if (dv->dv_attr) { 240 241 error = secpolicy_vnode_setattr(cr, vp, vap, dv->dv_attr, 242 flags, devfs_unlocked_access, dv); 243 244 if (error) 245 goto out; 246 247 /* 248 * Apply changes to the memory based attribute. This code 249 * is modeled after the tmpfs implementation of memory 250 * based vnodes 251 */ 252 map = dv->dv_attr; 253 mask = vap->va_mask; 254 255 /* Change file access modes. */ 256 if (mask & AT_MODE) { 257 map->va_mode &= S_IFMT; 258 map->va_mode |= vap->va_mode & ~S_IFMT; 259 } 260 if (mask & AT_UID) 261 map->va_uid = vap->va_uid; 262 if (mask & AT_GID) 263 map->va_gid = vap->va_gid; 264 if (mask & AT_ATIME) 265 map->va_atime = vap->va_atime; 266 if (mask & AT_MTIME) 267 map->va_mtime = vap->va_mtime; 268 269 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) 270 gethrestime(&map->va_ctime); 271 } else { 272 /* use the backing attribute store */ 273 ASSERT(dv->dv_attrvp); 274 275 /* 276 * See if we are changing something we care about 277 * the persistence of - return success if we don't care. 278 */ 279 if (vap->va_mask & (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) { 280 /* Set the attributes */ 281 error = VOP_SETATTR(dv->dv_attrvp, 282 vap, flags, cr, NULL); 283 dsysdebug(error, 284 ("vop_setattr %s %d\n", dv->dv_name, error)); 285 286 /* 287 * Some file systems may return EROFS for a setattr 288 * on a readonly file system. In this case we create 289 * our own memory based attribute. 290 */ 291 if (error == EROFS) { 292 /* 293 * obtain attributes from existing file 294 * that we will modify and switch to memory 295 * based attribute until attribute store is 296 * read/write. 297 */ 298 vattr = dv_vattr_dir; 299 if (VOP_GETATTR(dv->dv_attrvp, &vattr, 300 flags, cr) == 0) { 301 dv->dv_attr = kmem_alloc( 302 sizeof (struct vattr), KM_SLEEP); 303 *dv->dv_attr = vattr; 304 dv_vattr_merge(dv, dv->dv_attr); 305 goto again; 306 } 307 } 308 } 309 } 310 out: 311 rw_exit(&dv->dv_contents); 312 return (error); 313 } 314 315 316 /* 317 * Compare the uid/gid/mode changes requested for a setattr 318 * operation with the same details of a node's default minor 319 * perm information. Return 0 if identical. 320 */ 321 static int 322 dv_setattr_cmp(struct vattr *map, mperm_t *mp) 323 { 324 if ((map->va_mode & S_IAMB) != (mp->mp_mode & S_IAMB)) 325 return (1); 326 if (map->va_uid != mp->mp_uid) 327 return (1); 328 if (map->va_gid != mp->mp_gid) 329 return (1); 330 return (0); 331 } 332 333 334 /*ARGSUSED4*/ 335 static int 336 devfs_setattr( 337 struct vnode *vp, 338 struct vattr *vap, 339 int flags, 340 struct cred *cr, 341 caller_context_t *ct) 342 { 343 struct dv_node *dv = VTODV(vp); 344 struct dv_node *ddv; 345 struct vnode *dvp; 346 struct vattr *map; 347 long int mask; 348 int error = 0; 349 struct vattr *free_vattr = NULL; 350 struct vattr *vattrp = NULL; 351 mperm_t mp; 352 int persist; 353 354 /* 355 * Message goes to console only. Otherwise, the message 356 * causes devfs_getattr to be invoked again... infinite loop 357 */ 358 dcmn_err2(("?devfs_setattr %s\n", dv->dv_name)); 359 ASSERT(dv->dv_attr || dv->dv_attrvp); 360 361 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 362 cmn_err(CE_WARN, /* panic ? */ 363 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 364 return (ENOENT); 365 } 366 367 if (vap->va_mask & AT_NOSET) 368 return (EINVAL); 369 370 /* 371 * If we are changing something we don't care about 372 * the persistence of, return success. 373 */ 374 if ((vap->va_mask & 375 (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) == 0) 376 return (0); 377 378 /* 379 * If driver overrides fs perm, disallow chmod 380 * and do not create attribute nodes. 381 */ 382 if (dv->dv_flags & DV_NO_FSPERM) { 383 ASSERT(dv->dv_attr); 384 if (vap->va_mask & (AT_MODE | AT_UID | AT_GID)) 385 return (EPERM); 386 if ((vap->va_mask & (AT_ATIME|AT_MTIME)) == 0) 387 return (0); 388 rw_enter(&dv->dv_contents, RW_WRITER); 389 if (vap->va_mask & AT_ATIME) 390 dv->dv_attr->va_atime = vap->va_atime; 391 if (vap->va_mask & AT_MTIME) 392 dv->dv_attr->va_mtime = vap->va_mtime; 393 rw_exit(&dv->dv_contents); 394 return (0); 395 } 396 397 /* 398 * Directories are always created but device nodes are 399 * only used to persist non-default permissions. 400 */ 401 if (vp->v_type == VDIR) { 402 ASSERT(dv->dv_attr || dv->dv_attrvp); 403 return (devfs_setattr_dir(dv, vp, vap, flags, cr)); 404 } 405 406 /* 407 * Allocate now before we take any locks 408 */ 409 vattrp = kmem_zalloc(sizeof (*vattrp), KM_SLEEP); 410 411 /* to ensure consistency, single thread setting of attributes */ 412 rw_enter(&dv->dv_contents, RW_WRITER); 413 414 /* 415 * We don't need to create an attribute node 416 * to persist access or modification times. 417 */ 418 persist = (vap->va_mask & (AT_MODE | AT_UID | AT_GID)); 419 420 /* 421 * If persisting something, get the default permissions 422 * for this minor to compare against what the attributes 423 * are now being set to. Default ordering is: 424 * - minor_perm match for this minor 425 * - mode supplied by ddi_create_priv_minor_node 426 * - devfs defaults 427 */ 428 if (persist) { 429 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) != 0) { 430 mp.mp_uid = dv_vattr_file.va_uid; 431 mp.mp_gid = dv_vattr_file.va_gid; 432 mp.mp_mode = dv_vattr_file.va_mode; 433 if (dv->dv_flags & DV_DFLT_MODE) { 434 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 435 mp.mp_mode &= ~S_IAMB; 436 mp.mp_mode |= dv->dv_dflt_mode; 437 dcmn_err5(("%s: setattr priv default 0%o\n", 438 dv->dv_name, mp.mp_mode)); 439 } else { 440 dcmn_err5(("%s: setattr devfs default 0%o\n", 441 dv->dv_name, mp.mp_mode)); 442 } 443 } else { 444 dcmn_err5(("%s: setattr minor perm default 0%o\n", 445 dv->dv_name, mp.mp_mode)); 446 } 447 } 448 449 /* 450 * If we don't have a vattr for this node, construct one. 451 */ 452 if (dv->dv_attr) { 453 free_vattr = vattrp; 454 vattrp = NULL; 455 } else { 456 ASSERT(dv->dv_attrvp); 457 ASSERT(vp->v_type != VDIR); 458 *vattrp = dv_vattr_file; 459 error = VOP_GETATTR(dv->dv_attrvp, vattrp, 0, cr); 460 dsysdebug(error, ("vop_getattr %s %d\n", 461 dv->dv_name, error)); 462 if (error) 463 goto out; 464 dv->dv_attr = vattrp; 465 dv_vattr_merge(dv, dv->dv_attr); 466 vattrp = NULL; 467 } 468 469 error = secpolicy_vnode_setattr(cr, vp, vap, dv->dv_attr, 470 flags, devfs_unlocked_access, dv); 471 if (error) { 472 dsysdebug(error, ("devfs_setattr %s secpolicy error %d\n", 473 dv->dv_name, error)); 474 goto out; 475 } 476 477 /* 478 * Apply changes to the memory based attribute. This code 479 * is modeled after the tmpfs implementation of memory 480 * based vnodes 481 */ 482 map = dv->dv_attr; 483 mask = vap->va_mask; 484 485 /* Change file access modes. */ 486 if (mask & AT_MODE) { 487 map->va_mode &= S_IFMT; 488 map->va_mode |= vap->va_mode & ~S_IFMT; 489 } 490 if (mask & AT_UID) 491 map->va_uid = vap->va_uid; 492 if (mask & AT_GID) 493 map->va_gid = vap->va_gid; 494 if (mask & AT_ATIME) 495 map->va_atime = vap->va_atime; 496 if (mask & AT_MTIME) 497 map->va_mtime = vap->va_mtime; 498 499 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) { 500 gethrestime(&map->va_ctime); 501 } 502 503 /* 504 * A setattr to defaults means we no longer need the 505 * shadow node as a persistent store, unless there 506 * are ACLs. Otherwise create a shadow node if one 507 * doesn't exist yet. 508 */ 509 if (persist) { 510 if ((dv_setattr_cmp(map, &mp) == 0) && 511 ((dv->dv_flags & DV_ACL) == 0)) { 512 513 if (dv->dv_attrvp) { 514 ddv = dv->dv_dotdot; 515 ASSERT(ddv->dv_attrvp); 516 error = VOP_REMOVE(ddv->dv_attrvp, 517 dv->dv_name, cr); 518 dsysdebug(error, 519 ("vop_remove %s %s %d\n", 520 ddv->dv_name, dv->dv_name, error)); 521 522 if (error == EROFS) 523 error = 0; 524 VN_RELE(dv->dv_attrvp); 525 dv->dv_attrvp = NULL; 526 } 527 ASSERT(dv->dv_attr); 528 } else { 529 if (mask & AT_MODE) 530 dcmn_err5(("%s persisting mode 0%o\n", 531 dv->dv_name, vap->va_mode)); 532 if (mask & AT_UID) 533 dcmn_err5(("%s persisting uid %d\n", 534 dv->dv_name, vap->va_uid)); 535 if (mask & AT_GID) 536 dcmn_err5(("%s persisting gid %d\n", 537 dv->dv_name, vap->va_gid)); 538 539 if (dv->dv_attrvp == NULL) { 540 dvp = DVTOV(dv->dv_dotdot); 541 dv_shadow_node(dvp, dv->dv_name, vp, 542 NULL, NULLVP, cr, 543 DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 544 } 545 if (dv->dv_attrvp) { 546 error = VOP_SETATTR(dv->dv_attrvp, 547 vap, flags, cr, NULL); 548 dsysdebug(error, ("vop_setattr %s %d\n", 549 dv->dv_name, error)); 550 } 551 /* 552 * Some file systems may return EROFS for a setattr 553 * on a readonly file system. In this case save 554 * as our own memory based attribute. 555 * NOTE: ufs is NOT one of these (see ufs_iupdat). 556 */ 557 if (dv->dv_attr && dv->dv_attrvp && error == 0) { 558 vattrp = dv->dv_attr; 559 dv->dv_attr = NULL; 560 } else if (error == EROFS) 561 error = 0; 562 } 563 } 564 565 out: 566 rw_exit(&dv->dv_contents); 567 568 if (vattrp) 569 kmem_free(vattrp, sizeof (*vattrp)); 570 if (free_vattr) 571 kmem_free(free_vattr, sizeof (*free_vattr)); 572 return (error); 573 } 574 575 static int 576 devfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr) 577 { 578 switch (cmd) { 579 case _PC_ACL_ENABLED: 580 /* 581 * We rely on the underlying filesystem for ACLs, 582 * so direct the query for ACL support there. 583 * ACL support isn't relative to the file 584 * and we can't guarantee that the dv node 585 * has an attribute node, so any valid 586 * attribute node will suffice. 587 */ 588 ASSERT(dvroot); 589 ASSERT(dvroot->dv_attrvp); 590 return (VOP_PATHCONF(dvroot->dv_attrvp, cmd, valp, cr)); 591 /*NOTREACHED*/ 592 } 593 594 return (fs_pathconf(vp, cmd, valp, cr)); 595 } 596 597 /* 598 * Let avp handle security attributes (acl's). 599 */ 600 static int 601 devfs_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 602 struct cred *cr) 603 { 604 dvnode_t *dv = VTODV(vp); 605 struct vnode *avp; 606 int error; 607 608 dcmn_err2(("devfs_getsecattr %s\n", dv->dv_name)); 609 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 610 611 rw_enter(&dv->dv_contents, RW_READER); 612 613 avp = dv->dv_attrvp; 614 615 /* fabricate the acl */ 616 if (avp == NULL) { 617 error = fs_fab_acl(vp, vsap, flags, cr); 618 rw_exit(&dv->dv_contents); 619 return (error); 620 } 621 622 error = VOP_GETSECATTR(avp, vsap, flags, cr); 623 dsysdebug(error, ("vop_getsecattr %s %d\n", VTODV(vp)->dv_name, error)); 624 rw_exit(&dv->dv_contents); 625 return (error); 626 } 627 628 /* 629 * Set security attributes (acl's) 630 * 631 * Note that the dv_contents lock has already been acquired 632 * by the caller's VOP_RWLOCK. 633 */ 634 static int 635 devfs_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 636 struct cred *cr) 637 { 638 dvnode_t *dv = VTODV(vp); 639 struct vnode *avp; 640 int error; 641 642 dcmn_err2(("devfs_setsecattr %s\n", dv->dv_name)); 643 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 644 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 645 646 /* 647 * Not a supported operation on drivers not providing 648 * file system based permissions. 649 */ 650 if (dv->dv_flags & DV_NO_FSPERM) 651 return (ENOTSUP); 652 653 /* 654 * To complete, the setsecattr requires an underlying attribute node. 655 */ 656 if (dv->dv_attrvp == NULL) { 657 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 658 dv_shadow_node(DVTOV(dv->dv_dotdot), dv->dv_name, vp, 659 NULL, NULLVP, cr, DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 660 } 661 662 if ((avp = dv->dv_attrvp) == NULL) { 663 dcmn_err2(("devfs_setsecattr %s: " 664 "cannot construct attribute node\n", dv->dv_name)); 665 return (fs_nosys()); 666 } 667 668 /* 669 * The acl(2) system call issues a VOP_RWLOCK before setting an ACL. 670 * Since backing file systems expect the lock to be held before seeing 671 * a VOP_SETSECATTR ACL, we need to issue the VOP_RWLOCK to the backing 672 * store before forwarding the ACL. 673 */ 674 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, NULL); 675 error = VOP_SETSECATTR(avp, vsap, flags, cr); 676 dsysdebug(error, ("vop_setsecattr %s %d\n", VTODV(vp)->dv_name, error)); 677 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, NULL); 678 679 /* 680 * Set DV_ACL if we have a non-trivial set of ACLs. It is not 681 * necessary to hold VOP_RWLOCK since fs_acl_nontrivial only does 682 * VOP_GETSECATTR calls. 683 */ 684 if (fs_acl_nontrivial(avp, cr)) 685 dv->dv_flags |= DV_ACL; 686 return (error); 687 } 688 689 /* 690 * This function is used for secpolicy_setattr(). It must call an 691 * access() like function while it is already holding the 692 * dv_contents lock. We only care about this when dv_attr != NULL; 693 * so the unlocked access call only concerns itself with that 694 * particular branch of devfs_access(). 695 */ 696 static int 697 devfs_unlocked_access(void *vdv, int mode, struct cred *cr) 698 { 699 struct dv_node *dv = vdv; 700 int shift = 0; 701 uid_t owner = dv->dv_attr->va_uid; 702 703 /* Check access based on owner, group and public permissions. */ 704 if (crgetuid(cr) != owner) { 705 shift += 3; 706 if (groupmember(dv->dv_attr->va_gid, cr) == 0) 707 shift += 3; 708 } 709 710 /* compute missing mode bits */ 711 mode &= ~(dv->dv_attr->va_mode << shift); 712 713 if (mode == 0) 714 return (0); 715 716 return (secpolicy_vnode_access(cr, DVTOV(dv), owner, mode)); 717 } 718 719 static int 720 devfs_access(struct vnode *vp, int mode, int flags, struct cred *cr) 721 { 722 struct dv_node *dv = VTODV(vp); 723 int res; 724 725 dcmn_err2(("devfs_access %s\n", dv->dv_name)); 726 ASSERT(dv->dv_attr || dv->dv_attrvp); 727 728 /* restrict console access to privileged processes */ 729 if ((vp->v_rdev == rconsdev) && secpolicy_console(cr) != 0) { 730 return (EACCES); 731 } 732 733 if (dv->dv_attr && ((dv->dv_flags & DV_ACL) == 0)) { 734 rw_enter(&dv->dv_contents, RW_READER); 735 if (dv->dv_attr) { 736 res = devfs_unlocked_access(dv, mode, cr); 737 rw_exit(&dv->dv_contents); 738 return (res); 739 } 740 rw_exit(&dv->dv_contents); 741 } 742 return (VOP_ACCESS(dv->dv_attrvp, mode, flags, cr)); 743 } 744 745 /* 746 * Lookup 747 * 748 * Given the directory vnode and the name of the component, return 749 * the corresponding held vnode for that component. 750 * 751 * Of course in these fictional filesystems, nothing's ever quite 752 * -that- simple. 753 * 754 * devfs name type shadow (fs attributes) type comments 755 * ------------------------------------------------------------------------- 756 * drv[@addr] VDIR drv[@addr] VDIR nexus driver 757 * drv[@addr]:m VCHR/VBLK drv[@addr]:m VREG leaf driver 758 * drv[@addr] VCHR/VBLK drv[@addr]:.default VREG leaf driver 759 * ------------------------------------------------------------------------- 760 * 761 * The following names are reserved for the attribute filesystem (which 762 * could easily be another layer on top of this one - we simply need to 763 * hold the vnode of the thing we're looking at) 764 * 765 * attr name type shadow (fs attributes) type comments 766 * ------------------------------------------------------------------------- 767 * drv[@addr] VDIR - - attribute dir 768 * minorname VDIR - - minorname 769 * attribute VREG - - attribute 770 * ------------------------------------------------------------------------- 771 * 772 * Examples: 773 * 774 * devfs:/devices/.../mm@0:zero VCHR 775 * shadow:/.devices/.../mm@0:zero VREG, fs attrs 776 * devfs:/devices/.../mm@0:/zero/attr VREG, driver attribute 777 * 778 * devfs:/devices/.../sd@0,0:a VBLK 779 * shadow:/.devices/.../sd@0,0:a VREG, fs attrs 780 * devfs:/devices/.../sd@0,0:/a/.type VREG, "ddi_block:chan" 781 * 782 * devfs:/devices/.../mm@0 VCHR 783 * shadow:/.devices/.../mm@0:.default VREG, fs attrs 784 * devfs:/devices/.../mm@0:/.default/attr VREG, driver attribute 785 * devfs:/devices/.../mm@0:/.default/.type VREG, "ddi_pseudo" 786 * 787 * devfs:/devices/.../obio VDIR 788 * shadow:/devices/.../obio VDIR, needed for fs attrs. 789 * devfs:/devices/.../obio:/.default/attr VDIR, driver attribute 790 * 791 * We also need to be able deal with "old" devices that have gone away, 792 * though I think that provided we return them with readdir, they can 793 * be removed (i.e. they don't have to respond to lookup, though it might 794 * be weird if they didn't ;-) 795 * 796 * Lookup has side-effects. 797 * 798 * - It will create directories and fs attribute files in the shadow hierarchy. 799 * - It should cause non-SID devices to be probed (ask the parent nexi). 800 */ 801 /*ARGSUSED3*/ 802 static int 803 devfs_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 804 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred) 805 { 806 ASSERT(dvp->v_type == VDIR); 807 dcmn_err2(("devfs_lookup: %s\n", nm)); 808 return (dv_find(VTODV(dvp), nm, vpp, pnp, rdir, cred, 0)); 809 } 810 811 /* 812 * devfs nodes can't really be created directly by userland - however, 813 * we do allow creates to find existing nodes: 814 * 815 * - any create fails if the node doesn't exist - EROFS. 816 * - creating an existing directory read-only succeeds, otherwise EISDIR. 817 * - exclusive creates fail if the node already exists - EEXIST. 818 * - failure to create the snode for an existing device - ENOSYS. 819 */ 820 /*ARGSUSED2*/ 821 static int 822 devfs_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 823 int mode, struct vnode **vpp, struct cred *cred, int flag) 824 { 825 int error; 826 struct vnode *vp; 827 828 dcmn_err2(("devfs_create %s\n", nm)); 829 error = dv_find(VTODV(dvp), nm, &vp, NULL, NULLVP, cred, 0); 830 if (error == 0) { 831 if (excl == EXCL) 832 error = EEXIST; 833 else if (vp->v_type == VDIR && (mode & VWRITE)) 834 error = EISDIR; 835 else 836 error = VOP_ACCESS(vp, mode, 0, cred); 837 838 if (error) { 839 VN_RELE(vp); 840 } else 841 *vpp = vp; 842 } else if (error == ENOENT) 843 error = EROFS; 844 845 return (error); 846 } 847 848 /* 849 * If DV_BUILD is set, we call into nexus driver to do a BUS_CONFIG_ALL. 850 * Otherwise, simply return cached dv_node's. Hotplug code always call 851 * devfs_clean() to invalid the dv_node cache. 852 */ 853 static int 854 devfs_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp) 855 { 856 struct dv_node *ddv, *dv; 857 struct dirent64 *de, *bufp; 858 offset_t diroff; 859 offset_t soff; 860 size_t reclen, movesz; 861 int error; 862 struct vattr va; 863 size_t bufsz; 864 865 ddv = VTODV(dvp); 866 dcmn_err2(("devfs_readdir %s: offset %lld len %ld\n", 867 ddv->dv_name, uiop->uio_loffset, uiop->uio_iov->iov_len)); 868 ASSERT(ddv->dv_attr || ddv->dv_attrvp); 869 ASSERT(RW_READ_HELD(&ddv->dv_contents)); 870 871 if (uiop->uio_loffset >= MAXOFF_T) { 872 if (eofp) 873 *eofp = 1; 874 return (0); 875 } 876 877 if (uiop->uio_iovcnt != 1) 878 return (EINVAL); 879 880 if (dvp->v_type != VDIR) 881 return (ENOTDIR); 882 883 /* Load the initial contents */ 884 if (ddv->dv_flags & DV_BUILD) { 885 if (!rw_tryupgrade(&ddv->dv_contents)) { 886 rw_exit(&ddv->dv_contents); 887 rw_enter(&ddv->dv_contents, RW_WRITER); 888 } 889 890 /* recheck and fill */ 891 if (ddv->dv_flags & DV_BUILD) 892 dv_filldir(ddv); 893 894 rw_downgrade(&ddv->dv_contents); 895 } 896 897 soff = uiop->uio_offset; 898 bufsz = uiop->uio_iov->iov_len; 899 de = bufp = kmem_alloc(bufsz, KM_SLEEP); 900 movesz = 0; 901 dv = (struct dv_node *)-1; 902 903 /* 904 * Move as many entries into the uio structure as it will take. 905 * Special case "." and "..". 906 */ 907 diroff = 0; 908 if (soff == 0) { /* . */ 909 reclen = DIRENT64_RECLEN(strlen(".")); 910 if ((movesz + reclen) > bufsz) 911 goto full; 912 de->d_ino = (ino64_t)ddv->dv_ino; 913 de->d_off = (off64_t)diroff + 1; 914 de->d_reclen = (ushort_t)reclen; 915 916 /* use strncpy(9f) to zero out uninitialized bytes */ 917 918 (void) strncpy(de->d_name, ".", DIRENT64_NAMELEN(reclen)); 919 movesz += reclen; 920 de = (dirent64_t *)((char *)de + reclen); 921 dcmn_err3(("devfs_readdir: A: diroff %lld, soff %lld: '%s' " 922 "reclen %lu\n", diroff, soff, ".", reclen)); 923 } 924 925 diroff++; 926 if (soff <= 1) { /* .. */ 927 reclen = DIRENT64_RECLEN(strlen("..")); 928 if ((movesz + reclen) > bufsz) 929 goto full; 930 de->d_ino = (ino64_t)ddv->dv_dotdot->dv_ino; 931 de->d_off = (off64_t)diroff + 1; 932 de->d_reclen = (ushort_t)reclen; 933 934 /* use strncpy(9f) to zero out uninitialized bytes */ 935 936 (void) strncpy(de->d_name, "..", DIRENT64_NAMELEN(reclen)); 937 movesz += reclen; 938 de = (dirent64_t *)((char *)de + reclen); 939 dcmn_err3(("devfs_readdir: B: diroff %lld, soff %lld: '%s' " 940 "reclen %lu\n", diroff, soff, "..", reclen)); 941 } 942 943 diroff++; 944 for (dv = ddv->dv_dot; dv; dv = dv->dv_next, diroff++) { 945 /* 946 * although DDM_INTERNAL_PATH minor nodes are skipped for 947 * readdirs outside the kernel, they still occupy directory 948 * offsets 949 */ 950 if (diroff < soff || 951 ((dv->dv_flags & DV_INTERNAL) && (cred != kcred))) 952 continue; 953 954 reclen = DIRENT64_RECLEN(strlen(dv->dv_name)); 955 if ((movesz + reclen) > bufsz) { 956 dcmn_err3(("devfs_readdir: C: diroff " 957 "%lld, soff %lld: '%s' reclen %lu\n", 958 diroff, soff, dv->dv_name, reclen)); 959 goto full; 960 } 961 de->d_ino = (ino64_t)dv->dv_ino; 962 de->d_off = (off64_t)diroff + 1; 963 de->d_reclen = (ushort_t)reclen; 964 965 /* use strncpy(9f) to zero out uninitialized bytes */ 966 967 ASSERT(strlen(dv->dv_name) + 1 <= 968 DIRENT64_NAMELEN(reclen)); 969 (void) strncpy(de->d_name, dv->dv_name, 970 DIRENT64_NAMELEN(reclen)); 971 972 movesz += reclen; 973 de = (dirent64_t *)((char *)de + reclen); 974 dcmn_err4(("devfs_readdir: D: diroff " 975 "%lld, soff %lld: '%s' reclen %lu\n", diroff, soff, 976 dv->dv_name, reclen)); 977 } 978 979 /* the buffer is full, or we exhausted everything */ 980 full: dcmn_err3(("devfs_readdir: moving %lu bytes: " 981 "diroff %lld, soff %lld, dv %p\n", 982 movesz, diroff, soff, (void *)dv)); 983 984 if ((movesz == 0) && dv) 985 error = EINVAL; /* cannot be represented */ 986 else { 987 error = uiomove(bufp, movesz, UIO_READ, uiop); 988 if (error == 0) { 989 if (eofp) 990 *eofp = dv ? 0 : 1; 991 uiop->uio_offset = diroff; 992 } 993 994 va.va_mask = AT_ATIME; 995 gethrestime(&va.va_atime); 996 rw_exit(&ddv->dv_contents); 997 (void) devfs_setattr(dvp, &va, 0, cred, NULL); 998 rw_enter(&ddv->dv_contents, RW_READER); 999 } 1000 1001 kmem_free(bufp, bufsz); 1002 return (error); 1003 } 1004 1005 /*ARGSUSED*/ 1006 static int 1007 devfs_fsync(struct vnode *vp, int syncflag, struct cred *cred) 1008 { 1009 /* 1010 * Message goes to console only. Otherwise, the message 1011 * causes devfs_fsync to be invoked again... infinite loop 1012 */ 1013 dcmn_err2(("devfs_fsync %s\n", VTODV(vp)->dv_name)); 1014 return (0); 1015 } 1016 1017 /* 1018 * Normally, we leave the dv_node here at count of 0. 1019 * The node will be destroyed when dv_cleandir() is called. 1020 * 1021 * Stale dv_node's are already unlinked from the fs tree, 1022 * so dv_cleandir() won't find them. We destroy such nodes 1023 * immediately. 1024 */ 1025 /*ARGSUSED1*/ 1026 static void 1027 devfs_inactive(struct vnode *vp, struct cred *cred) 1028 { 1029 int destroy; 1030 struct dv_node *dv = VTODV(vp); 1031 1032 dcmn_err2(("devfs_inactive: %s\n", dv->dv_name)); 1033 mutex_enter(&vp->v_lock); 1034 ASSERT(vp->v_count >= 1); 1035 --vp->v_count; 1036 destroy = (DV_STALE(dv) && vp->v_count == 0); 1037 mutex_exit(&vp->v_lock); 1038 1039 /* stale nodes cannot be rediscovered, destroy it here */ 1040 if (destroy) 1041 dv_destroy(dv, 0); 1042 } 1043 1044 /* 1045 * XXX Why do we need this? NFS mounted /dev directories? 1046 * XXX Talk to peter staubach about this. 1047 */ 1048 static int 1049 devfs_fid(struct vnode *vp, struct fid *fidp) 1050 { 1051 struct dv_node *dv = VTODV(vp); 1052 struct dv_fid *dv_fid; 1053 1054 if (fidp->fid_len < (sizeof (struct dv_fid) - sizeof (ushort_t))) { 1055 fidp->fid_len = sizeof (struct dv_fid) - sizeof (ushort_t); 1056 return (ENOSPC); 1057 } 1058 1059 dv_fid = (struct dv_fid *)fidp; 1060 bzero(dv_fid, sizeof (struct dv_fid)); 1061 dv_fid->dvfid_len = (int)sizeof (struct dv_fid) - sizeof (ushort_t); 1062 dv_fid->dvfid_ino = dv->dv_ino; 1063 /* dv_fid->dvfid_gen = dv->tn_gen; XXX ? */ 1064 1065 return (0); 1066 } 1067 1068 /* 1069 * This pair of routines bracket all VOP_READ, VOP_WRITE 1070 * and VOP_READDIR requests. The contents lock stops things 1071 * moving around while we're looking at them. 1072 * 1073 * Also used by file and record locking. 1074 */ 1075 /*ARGSUSED2*/ 1076 static int 1077 devfs_rwlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1078 { 1079 dcmn_err2(("devfs_rwlock %s\n", VTODV(vp)->dv_name)); 1080 rw_enter(&VTODV(vp)->dv_contents, write_flag ? RW_WRITER : RW_READER); 1081 return (write_flag); 1082 } 1083 1084 /*ARGSUSED1*/ 1085 static void 1086 devfs_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1087 { 1088 dcmn_err2(("devfs_rwunlock %s\n", VTODV(vp)->dv_name)); 1089 rw_exit(&VTODV(vp)->dv_contents); 1090 } 1091 1092 /* 1093 * XXX Should probably do a better job of computing the maximum 1094 * offset available in the directory. 1095 */ 1096 /*ARGSUSED1*/ 1097 static int 1098 devfs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp) 1099 { 1100 ASSERT(vp->v_type == VDIR); 1101 dcmn_err2(("devfs_seek %s\n", VTODV(vp)->dv_name)); 1102 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1103 } 1104 1105 vnodeops_t *dv_vnodeops; 1106 1107 const fs_operation_def_t dv_vnodeops_template[] = { 1108 VOPNAME_OPEN, devfs_open, 1109 VOPNAME_CLOSE, devfs_close, 1110 VOPNAME_READ, devfs_read, 1111 VOPNAME_WRITE, devfs_write, 1112 VOPNAME_IOCTL, devfs_ioctl, 1113 VOPNAME_GETATTR, devfs_getattr, 1114 VOPNAME_SETATTR, devfs_setattr, 1115 VOPNAME_ACCESS, devfs_access, 1116 VOPNAME_LOOKUP, devfs_lookup, 1117 VOPNAME_CREATE, devfs_create, 1118 VOPNAME_READDIR, devfs_readdir, 1119 VOPNAME_FSYNC, devfs_fsync, 1120 VOPNAME_INACTIVE, (fs_generic_func_p) devfs_inactive, 1121 VOPNAME_FID, devfs_fid, 1122 VOPNAME_RWLOCK, devfs_rwlock, 1123 VOPNAME_RWUNLOCK, (fs_generic_func_p) devfs_rwunlock, 1124 VOPNAME_SEEK, devfs_seek, 1125 VOPNAME_PATHCONF, devfs_pathconf, 1126 VOPNAME_DISPOSE, fs_error, 1127 VOPNAME_SETSECATTR, devfs_setsecattr, 1128 VOPNAME_GETSECATTR, devfs_getsecattr, 1129 NULL, NULL 1130 }; 1131