1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * vnode ops for the devfs 30 * 31 * For leaf vnode special files (VCHR|VBLK) specfs will always see the VOP 32 * first because dv_find always performs leaf vnode substitution, returning 33 * a specfs vnode with an s_realvp pointing to the devfs leaf vnode. This 34 * means that the only leaf special file VOP operations that devfs will see 35 * after VOP_LOOKUP are the ones that specfs forwards. 36 */ 37 38 #include <sys/types.h> 39 #include <sys/param.h> 40 #include <sys/t_lock.h> 41 #include <sys/systm.h> 42 #include <sys/sysmacros.h> 43 #include <sys/user.h> 44 #include <sys/time.h> 45 #include <sys/vfs.h> 46 #include <sys/vnode.h> 47 #include <sys/vfs_opreg.h> 48 #include <sys/file.h> 49 #include <sys/fcntl.h> 50 #include <sys/flock.h> 51 #include <sys/kmem.h> 52 #include <sys/uio.h> 53 #include <sys/errno.h> 54 #include <sys/stat.h> 55 #include <sys/cred.h> 56 #include <sys/dirent.h> 57 #include <sys/pathname.h> 58 #include <sys/cmn_err.h> 59 #include <sys/debug.h> 60 #include <sys/policy.h> 61 #include <sys/modctl.h> 62 63 #include <fs/fs_subr.h> 64 #include <sys/fs/dv_node.h> 65 66 extern struct vattr dv_vattr_dir, dv_vattr_file; 67 extern dev_t rconsdev; 68 69 /* 70 * Open of devices (leaf nodes) is handled by specfs. 71 * There is nothing to do to open a directory 72 */ 73 /*ARGSUSED*/ 74 static int 75 devfs_open(struct vnode **vpp, int flag, struct cred *cred, 76 caller_context_t *ct) 77 { 78 struct dv_node *dv = VTODV(*vpp); 79 80 dcmn_err2(("devfs_open %s\n", dv->dv_name)); 81 ASSERT((*vpp)->v_type == VDIR); 82 return (0); 83 } 84 85 /* 86 * Close of devices (leaf nodes) is handled by specfs. 87 * There is nothing much to do inorder to close a directory. 88 */ 89 /*ARGSUSED1*/ 90 static int 91 devfs_close(struct vnode *vp, int flag, int count, 92 offset_t offset, struct cred *cred, caller_context_t *ct) 93 { 94 struct dv_node *dv = VTODV(vp); 95 96 dcmn_err2(("devfs_close %s\n", dv->dv_name)); 97 ASSERT(vp->v_type == VDIR); 98 99 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 100 cleanshares(vp, ttoproc(curthread)->p_pid); 101 return (0); 102 } 103 104 /* 105 * Read of devices (leaf nodes) is handled by specfs. 106 * Read of directories is not supported. 107 */ 108 /*ARGSUSED*/ 109 static int 110 devfs_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 111 struct caller_context *ct) 112 { 113 dcmn_err2(("devfs_read %s\n", VTODV(vp)->dv_name)); 114 ASSERT(vp->v_type == VDIR); 115 ASSERT(RW_READ_HELD(&VTODV(vp)->dv_contents)); 116 return (EISDIR); 117 } 118 119 /* 120 * Write of devices (leaf nodes) is handled by specfs. 121 * Write of directories is not supported. 122 */ 123 /*ARGSUSED*/ 124 static int 125 devfs_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 126 struct caller_context *ct) 127 { 128 dcmn_err2(("devfs_write %s\n", VTODV(vp)->dv_name)); 129 ASSERT(vp->v_type == VDIR); 130 ASSERT(RW_WRITE_HELD(&VTODV(vp)->dv_contents)); 131 return (EISDIR); 132 } 133 134 /* 135 * Ioctls to device (leaf nodes) is handled by specfs. 136 * Ioctl to directories is not supported. 137 */ 138 /*ARGSUSED*/ 139 static int 140 devfs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 141 struct cred *cred, int *rvalp, caller_context_t *ct) 142 { 143 dcmn_err2(("devfs_ioctl %s\n", VTODV(vp)->dv_name)); 144 ASSERT(vp->v_type == VDIR); 145 146 return (ENOTTY); /* no ioctls supported */ 147 } 148 149 /* 150 * We can be asked directly about the attributes of directories, or 151 * (via sp->s_realvp) about the filesystem attributes of special files. 152 * 153 * For directories, we just believe the attribute store 154 * though we mangle the nodeid, fsid, and rdev to convince userland we 155 * really are a different filesystem. 156 * 157 * For special files, a little more fakery is required. 158 * 159 * If the attribute store is not there (read only root), we believe our 160 * memory based attributes. 161 */ 162 static int 163 devfs_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr, 164 caller_context_t *ct) 165 { 166 struct dv_node *dv = VTODV(vp); 167 int error = 0; 168 uint_t mask; 169 170 /* 171 * Message goes to console only. Otherwise, the message 172 * causes devfs_getattr to be invoked again... infinite loop 173 */ 174 dcmn_err2(("?devfs_getattr %s\n", dv->dv_name)); 175 ASSERT(dv->dv_attr || dv->dv_attrvp); 176 177 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 178 cmn_err(CE_WARN, /* panic ? */ 179 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 180 return (ENOENT); 181 } 182 183 rw_enter(&dv->dv_contents, RW_READER); 184 if (dv->dv_attr) { 185 /* 186 * obtain from the memory version of attribute. 187 * preserve mask for those that optimize. 188 * devfs specific fields are already merged on creation. 189 */ 190 mask = vap->va_mask; 191 *vap = *dv->dv_attr; 192 vap->va_mask = mask; 193 } else { 194 /* obtain from attribute store and merge */ 195 error = VOP_GETATTR(dv->dv_attrvp, vap, flags, cr, ct); 196 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error)); 197 dv_vattr_merge(dv, vap); 198 } 199 rw_exit(&dv->dv_contents); 200 201 /* 202 * Restrict the permissions of the node fronting the console 203 * to 0600 with root as the owner. This prevents a non-root 204 * user from gaining access to a serial terminal (like /dev/term/a) 205 * which is in reality serving as the console device (/dev/console). 206 */ 207 if (vp->v_rdev == rconsdev) { 208 mode_t rconsmask = S_IXUSR|S_IRWXG|S_IRWXO; 209 vap->va_mode &= (~rconsmask); 210 vap->va_uid = 0; 211 } 212 213 return (error); 214 } 215 216 static int devfs_unlocked_access(void *, int, struct cred *); 217 218 /*ARGSUSED4*/ 219 static int 220 devfs_setattr_dir( 221 struct dv_node *dv, 222 struct vnode *vp, 223 struct vattr *vap, 224 int flags, 225 struct cred *cr) 226 { 227 struct vattr *map; 228 uint_t mask; 229 int error = 0; 230 struct vattr vattr; 231 232 ASSERT(dv->dv_attr || dv->dv_attrvp); 233 234 ASSERT(vp->v_type == VDIR); 235 ASSERT((dv->dv_flags & DV_NO_FSPERM) == 0); 236 237 if (vap->va_mask & AT_NOSET) 238 return (EINVAL); 239 240 /* to ensure consistency, single thread setting of attributes */ 241 rw_enter(&dv->dv_contents, RW_WRITER); 242 243 again: if (dv->dv_attr) { 244 245 error = secpolicy_vnode_setattr(cr, vp, vap, 246 dv->dv_attr, flags, devfs_unlocked_access, dv); 247 248 if (error) 249 goto out; 250 251 /* 252 * Apply changes to the memory based attribute. This code 253 * is modeled after the tmpfs implementation of memory 254 * based vnodes 255 */ 256 map = dv->dv_attr; 257 mask = vap->va_mask; 258 259 /* Change file access modes. */ 260 if (mask & AT_MODE) { 261 map->va_mode &= S_IFMT; 262 map->va_mode |= vap->va_mode & ~S_IFMT; 263 } 264 if (mask & AT_UID) 265 map->va_uid = vap->va_uid; 266 if (mask & AT_GID) 267 map->va_gid = vap->va_gid; 268 if (mask & AT_ATIME) 269 map->va_atime = vap->va_atime; 270 if (mask & AT_MTIME) 271 map->va_mtime = vap->va_mtime; 272 273 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) 274 gethrestime(&map->va_ctime); 275 } else { 276 /* use the backing attribute store */ 277 ASSERT(dv->dv_attrvp); 278 279 /* 280 * See if we are changing something we care about 281 * the persistence of - return success if we don't care. 282 */ 283 if (vap->va_mask & (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) { 284 /* Set the attributes */ 285 error = VOP_SETATTR(dv->dv_attrvp, 286 vap, flags, cr, NULL); 287 dsysdebug(error, 288 ("vop_setattr %s %d\n", dv->dv_name, error)); 289 290 /* 291 * Some file systems may return EROFS for a setattr 292 * on a readonly file system. In this case we create 293 * our own memory based attribute. 294 */ 295 if (error == EROFS) { 296 /* 297 * obtain attributes from existing file 298 * that we will modify and switch to memory 299 * based attribute until attribute store is 300 * read/write. 301 */ 302 vattr = dv_vattr_dir; 303 if (VOP_GETATTR(dv->dv_attrvp, 304 &vattr, flags, cr, NULL) == 0) { 305 dv->dv_attr = kmem_alloc( 306 sizeof (struct vattr), KM_SLEEP); 307 *dv->dv_attr = vattr; 308 dv_vattr_merge(dv, dv->dv_attr); 309 goto again; 310 } 311 } 312 } 313 } 314 out: 315 rw_exit(&dv->dv_contents); 316 return (error); 317 } 318 319 320 /* 321 * Compare the uid/gid/mode changes requested for a setattr 322 * operation with the same details of a node's default minor 323 * perm information. Return 0 if identical. 324 */ 325 static int 326 dv_setattr_cmp(struct vattr *map, mperm_t *mp) 327 { 328 if ((map->va_mode & S_IAMB) != (mp->mp_mode & S_IAMB)) 329 return (1); 330 if (map->va_uid != mp->mp_uid) 331 return (1); 332 if (map->va_gid != mp->mp_gid) 333 return (1); 334 return (0); 335 } 336 337 338 /*ARGSUSED4*/ 339 static int 340 devfs_setattr( 341 struct vnode *vp, 342 struct vattr *vap, 343 int flags, 344 struct cred *cr, 345 caller_context_t *ct) 346 { 347 struct dv_node *dv = VTODV(vp); 348 struct dv_node *ddv; 349 struct vnode *dvp; 350 struct vattr *map; 351 uint_t mask; 352 int error = 0; 353 struct vattr *free_vattr = NULL; 354 struct vattr *vattrp = NULL; 355 mperm_t mp; 356 int persist; 357 358 /* 359 * Message goes to console only. Otherwise, the message 360 * causes devfs_getattr to be invoked again... infinite loop 361 */ 362 dcmn_err2(("?devfs_setattr %s\n", dv->dv_name)); 363 ASSERT(dv->dv_attr || dv->dv_attrvp); 364 365 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 366 cmn_err(CE_WARN, /* panic ? */ 367 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 368 return (ENOENT); 369 } 370 371 if (vap->va_mask & AT_NOSET) 372 return (EINVAL); 373 374 /* 375 * If we are changing something we don't care about 376 * the persistence of, return success. 377 */ 378 if ((vap->va_mask & 379 (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) == 0) 380 return (0); 381 382 /* 383 * If driver overrides fs perm, disallow chmod 384 * and do not create attribute nodes. 385 */ 386 if (dv->dv_flags & DV_NO_FSPERM) { 387 ASSERT(dv->dv_attr); 388 if (vap->va_mask & (AT_MODE | AT_UID | AT_GID)) 389 return (EPERM); 390 if ((vap->va_mask & (AT_ATIME|AT_MTIME)) == 0) 391 return (0); 392 rw_enter(&dv->dv_contents, RW_WRITER); 393 if (vap->va_mask & AT_ATIME) 394 dv->dv_attr->va_atime = vap->va_atime; 395 if (vap->va_mask & AT_MTIME) 396 dv->dv_attr->va_mtime = vap->va_mtime; 397 rw_exit(&dv->dv_contents); 398 return (0); 399 } 400 401 /* 402 * Directories are always created but device nodes are 403 * only used to persist non-default permissions. 404 */ 405 if (vp->v_type == VDIR) { 406 ASSERT(dv->dv_attr || dv->dv_attrvp); 407 return (devfs_setattr_dir(dv, vp, vap, flags, cr)); 408 } 409 410 /* 411 * Allocate now before we take any locks 412 */ 413 vattrp = kmem_zalloc(sizeof (*vattrp), KM_SLEEP); 414 415 /* to ensure consistency, single thread setting of attributes */ 416 rw_enter(&dv->dv_contents, RW_WRITER); 417 418 /* 419 * We don't need to create an attribute node 420 * to persist access or modification times. 421 */ 422 persist = (vap->va_mask & (AT_MODE | AT_UID | AT_GID)); 423 424 /* 425 * If persisting something, get the default permissions 426 * for this minor to compare against what the attributes 427 * are now being set to. Default ordering is: 428 * - minor_perm match for this minor 429 * - mode supplied by ddi_create_priv_minor_node 430 * - devfs defaults 431 */ 432 if (persist) { 433 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) != 0) { 434 mp.mp_uid = dv_vattr_file.va_uid; 435 mp.mp_gid = dv_vattr_file.va_gid; 436 mp.mp_mode = dv_vattr_file.va_mode; 437 if (dv->dv_flags & DV_DFLT_MODE) { 438 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 439 mp.mp_mode &= ~S_IAMB; 440 mp.mp_mode |= dv->dv_dflt_mode; 441 dcmn_err5(("%s: setattr priv default 0%o\n", 442 dv->dv_name, mp.mp_mode)); 443 } else { 444 dcmn_err5(("%s: setattr devfs default 0%o\n", 445 dv->dv_name, mp.mp_mode)); 446 } 447 } else { 448 dcmn_err5(("%s: setattr minor perm default 0%o\n", 449 dv->dv_name, mp.mp_mode)); 450 } 451 } 452 453 /* 454 * If we don't have a vattr for this node, construct one. 455 */ 456 if (dv->dv_attr) { 457 free_vattr = vattrp; 458 vattrp = NULL; 459 } else { 460 ASSERT(dv->dv_attrvp); 461 ASSERT(vp->v_type != VDIR); 462 *vattrp = dv_vattr_file; 463 error = VOP_GETATTR(dv->dv_attrvp, vattrp, 0, cr, ct); 464 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error)); 465 if (error) 466 goto out; 467 dv->dv_attr = vattrp; 468 dv_vattr_merge(dv, dv->dv_attr); 469 vattrp = NULL; 470 } 471 472 error = secpolicy_vnode_setattr(cr, vp, vap, dv->dv_attr, 473 flags, devfs_unlocked_access, dv); 474 if (error) { 475 dsysdebug(error, ("devfs_setattr %s secpolicy error %d\n", 476 dv->dv_name, error)); 477 goto out; 478 } 479 480 /* 481 * Apply changes to the memory based attribute. This code 482 * is modeled after the tmpfs implementation of memory 483 * based vnodes 484 */ 485 map = dv->dv_attr; 486 mask = vap->va_mask; 487 488 /* Change file access modes. */ 489 if (mask & AT_MODE) { 490 map->va_mode &= S_IFMT; 491 map->va_mode |= vap->va_mode & ~S_IFMT; 492 } 493 if (mask & AT_UID) 494 map->va_uid = vap->va_uid; 495 if (mask & AT_GID) 496 map->va_gid = vap->va_gid; 497 if (mask & AT_ATIME) 498 map->va_atime = vap->va_atime; 499 if (mask & AT_MTIME) 500 map->va_mtime = vap->va_mtime; 501 502 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) { 503 gethrestime(&map->va_ctime); 504 } 505 506 /* 507 * A setattr to defaults means we no longer need the 508 * shadow node as a persistent store, unless there 509 * are ACLs. Otherwise create a shadow node if one 510 * doesn't exist yet. 511 */ 512 if (persist) { 513 if ((dv_setattr_cmp(map, &mp) == 0) && 514 ((dv->dv_flags & DV_ACL) == 0)) { 515 516 if (dv->dv_attrvp) { 517 ddv = dv->dv_dotdot; 518 ASSERT(ddv->dv_attrvp); 519 error = VOP_REMOVE(ddv->dv_attrvp, 520 dv->dv_name, cr, ct, 0); 521 dsysdebug(error, 522 ("vop_remove %s %s %d\n", 523 ddv->dv_name, dv->dv_name, error)); 524 525 if (error == EROFS) 526 error = 0; 527 VN_RELE(dv->dv_attrvp); 528 dv->dv_attrvp = NULL; 529 } 530 ASSERT(dv->dv_attr); 531 } else { 532 if (mask & AT_MODE) 533 dcmn_err5(("%s persisting mode 0%o\n", 534 dv->dv_name, vap->va_mode)); 535 if (mask & AT_UID) 536 dcmn_err5(("%s persisting uid %d\n", 537 dv->dv_name, vap->va_uid)); 538 if (mask & AT_GID) 539 dcmn_err5(("%s persisting gid %d\n", 540 dv->dv_name, vap->va_gid)); 541 542 if (dv->dv_attrvp == NULL) { 543 dvp = DVTOV(dv->dv_dotdot); 544 dv_shadow_node(dvp, dv->dv_name, vp, 545 NULL, NULLVP, cr, 546 DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 547 } 548 if (dv->dv_attrvp) { 549 /* If map still valid do TIME for free. */ 550 if (dv->dv_attr == map) { 551 mask = map->va_mask; 552 map->va_mask = 553 vap->va_mask | AT_ATIME | AT_MTIME; 554 error = VOP_SETATTR(dv->dv_attrvp, map, 555 flags, cr, NULL); 556 map->va_mask = mask; 557 } else { 558 error = VOP_SETATTR(dv->dv_attrvp, 559 vap, flags, cr, NULL); 560 } 561 dsysdebug(error, ("vop_setattr %s %d\n", 562 dv->dv_name, error)); 563 } 564 /* 565 * Some file systems may return EROFS for a setattr 566 * on a readonly file system. In this case save 567 * as our own memory based attribute. 568 * NOTE: ufs is NOT one of these (see ufs_iupdat). 569 */ 570 if (dv->dv_attr && dv->dv_attrvp && error == 0) { 571 vattrp = dv->dv_attr; 572 dv->dv_attr = NULL; 573 } else if (error == EROFS) 574 error = 0; 575 } 576 } 577 578 out: 579 rw_exit(&dv->dv_contents); 580 581 if (vattrp) 582 kmem_free(vattrp, sizeof (*vattrp)); 583 if (free_vattr) 584 kmem_free(free_vattr, sizeof (*free_vattr)); 585 return (error); 586 } 587 588 static int 589 devfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 590 caller_context_t *ct) 591 { 592 switch (cmd) { 593 case _PC_ACL_ENABLED: 594 /* 595 * We rely on the underlying filesystem for ACLs, 596 * so direct the query for ACL support there. 597 * ACL support isn't relative to the file 598 * and we can't guarantee that the dv node 599 * has an attribute node, so any valid 600 * attribute node will suffice. 601 */ 602 ASSERT(dvroot); 603 ASSERT(dvroot->dv_attrvp); 604 return (VOP_PATHCONF(dvroot->dv_attrvp, cmd, valp, cr, ct)); 605 /*NOTREACHED*/ 606 } 607 608 return (fs_pathconf(vp, cmd, valp, cr, ct)); 609 } 610 611 /* 612 * Let avp handle security attributes (acl's). 613 */ 614 static int 615 devfs_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 616 struct cred *cr, caller_context_t *ct) 617 { 618 dvnode_t *dv = VTODV(vp); 619 struct vnode *avp; 620 int error; 621 622 dcmn_err2(("devfs_getsecattr %s\n", dv->dv_name)); 623 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 624 625 rw_enter(&dv->dv_contents, RW_READER); 626 627 avp = dv->dv_attrvp; 628 629 /* fabricate the acl */ 630 if (avp == NULL) { 631 error = fs_fab_acl(vp, vsap, flags, cr, ct); 632 rw_exit(&dv->dv_contents); 633 return (error); 634 } 635 636 error = VOP_GETSECATTR(avp, vsap, flags, cr, ct); 637 dsysdebug(error, ("vop_getsecattr %s %d\n", VTODV(vp)->dv_name, error)); 638 rw_exit(&dv->dv_contents); 639 return (error); 640 } 641 642 /* 643 * Set security attributes (acl's) 644 * 645 * Note that the dv_contents lock has already been acquired 646 * by the caller's VOP_RWLOCK. 647 */ 648 static int 649 devfs_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 650 struct cred *cr, caller_context_t *ct) 651 { 652 dvnode_t *dv = VTODV(vp); 653 struct vnode *avp; 654 int error; 655 656 dcmn_err2(("devfs_setsecattr %s\n", dv->dv_name)); 657 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 658 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 659 660 /* 661 * Not a supported operation on drivers not providing 662 * file system based permissions. 663 */ 664 if (dv->dv_flags & DV_NO_FSPERM) 665 return (ENOTSUP); 666 667 /* 668 * To complete, the setsecattr requires an underlying attribute node. 669 */ 670 if (dv->dv_attrvp == NULL) { 671 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 672 dv_shadow_node(DVTOV(dv->dv_dotdot), dv->dv_name, vp, 673 NULL, NULLVP, cr, DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 674 } 675 676 if ((avp = dv->dv_attrvp) == NULL) { 677 dcmn_err2(("devfs_setsecattr %s: " 678 "cannot construct attribute node\n", dv->dv_name)); 679 return (fs_nosys()); 680 } 681 682 /* 683 * The acl(2) system call issues a VOP_RWLOCK before setting an ACL. 684 * Since backing file systems expect the lock to be held before seeing 685 * a VOP_SETSECATTR ACL, we need to issue the VOP_RWLOCK to the backing 686 * store before forwarding the ACL. 687 */ 688 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, NULL); 689 error = VOP_SETSECATTR(avp, vsap, flags, cr, ct); 690 dsysdebug(error, ("vop_setsecattr %s %d\n", VTODV(vp)->dv_name, error)); 691 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, NULL); 692 693 /* 694 * Set DV_ACL if we have a non-trivial set of ACLs. It is not 695 * necessary to hold VOP_RWLOCK since fs_acl_nontrivial only does 696 * VOP_GETSECATTR calls. 697 */ 698 if (fs_acl_nontrivial(avp, cr)) 699 dv->dv_flags |= DV_ACL; 700 return (error); 701 } 702 703 /* 704 * This function is used for secpolicy_setattr(). It must call an 705 * access() like function while it is already holding the 706 * dv_contents lock. We only care about this when dv_attr != NULL; 707 * so the unlocked access call only concerns itself with that 708 * particular branch of devfs_access(). 709 */ 710 static int 711 devfs_unlocked_access(void *vdv, int mode, struct cred *cr) 712 { 713 struct dv_node *dv = vdv; 714 int shift = 0; 715 uid_t owner = dv->dv_attr->va_uid; 716 717 /* Check access based on owner, group and public permissions. */ 718 if (crgetuid(cr) != owner) { 719 shift += 3; 720 if (groupmember(dv->dv_attr->va_gid, cr) == 0) 721 shift += 3; 722 } 723 724 /* compute missing mode bits */ 725 mode &= ~(dv->dv_attr->va_mode << shift); 726 727 if (mode == 0) 728 return (0); 729 730 return (secpolicy_vnode_access(cr, DVTOV(dv), owner, mode)); 731 } 732 733 static int 734 devfs_access(struct vnode *vp, int mode, int flags, struct cred *cr, 735 caller_context_t *ct) 736 { 737 struct dv_node *dv = VTODV(vp); 738 int res; 739 740 dcmn_err2(("devfs_access %s\n", dv->dv_name)); 741 ASSERT(dv->dv_attr || dv->dv_attrvp); 742 743 /* restrict console access to privileged processes */ 744 if ((vp->v_rdev == rconsdev) && secpolicy_console(cr) != 0) { 745 return (EACCES); 746 } 747 748 rw_enter(&dv->dv_contents, RW_READER); 749 if (dv->dv_attr && ((dv->dv_flags & DV_ACL) == 0)) { 750 res = devfs_unlocked_access(dv, mode, cr); 751 } else { 752 res = VOP_ACCESS(dv->dv_attrvp, mode, flags, cr, ct); 753 } 754 rw_exit(&dv->dv_contents); 755 return (res); 756 } 757 758 /* 759 * Lookup 760 * 761 * Given the directory vnode and the name of the component, return 762 * the corresponding held vnode for that component. 763 * 764 * Of course in these fictional filesystems, nothing's ever quite 765 * -that- simple. 766 * 767 * devfs name type shadow (fs attributes) type comments 768 * ------------------------------------------------------------------------- 769 * drv[@addr] VDIR drv[@addr] VDIR nexus driver 770 * drv[@addr]:m VCHR/VBLK drv[@addr]:m VREG leaf driver 771 * drv[@addr] VCHR/VBLK drv[@addr]:.default VREG leaf driver 772 * ------------------------------------------------------------------------- 773 * 774 * The following names are reserved for the attribute filesystem (which 775 * could easily be another layer on top of this one - we simply need to 776 * hold the vnode of the thing we're looking at) 777 * 778 * attr name type shadow (fs attributes) type comments 779 * ------------------------------------------------------------------------- 780 * drv[@addr] VDIR - - attribute dir 781 * minorname VDIR - - minorname 782 * attribute VREG - - attribute 783 * ------------------------------------------------------------------------- 784 * 785 * Examples: 786 * 787 * devfs:/devices/.../mm@0:zero VCHR 788 * shadow:/.devices/.../mm@0:zero VREG, fs attrs 789 * devfs:/devices/.../mm@0:/zero/attr VREG, driver attribute 790 * 791 * devfs:/devices/.../sd@0,0:a VBLK 792 * shadow:/.devices/.../sd@0,0:a VREG, fs attrs 793 * devfs:/devices/.../sd@0,0:/a/.type VREG, "ddi_block:chan" 794 * 795 * devfs:/devices/.../mm@0 VCHR 796 * shadow:/.devices/.../mm@0:.default VREG, fs attrs 797 * devfs:/devices/.../mm@0:/.default/attr VREG, driver attribute 798 * devfs:/devices/.../mm@0:/.default/.type VREG, "ddi_pseudo" 799 * 800 * devfs:/devices/.../obio VDIR 801 * shadow:/devices/.../obio VDIR, needed for fs attrs. 802 * devfs:/devices/.../obio:/.default/attr VDIR, driver attribute 803 * 804 * We also need to be able deal with "old" devices that have gone away, 805 * though I think that provided we return them with readdir, they can 806 * be removed (i.e. they don't have to respond to lookup, though it might 807 * be weird if they didn't ;-) 808 * 809 * Lookup has side-effects. 810 * 811 * - It will create directories and fs attribute files in the shadow hierarchy. 812 * - It should cause non-SID devices to be probed (ask the parent nexi). 813 */ 814 /*ARGSUSED3*/ 815 static int 816 devfs_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 817 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, 818 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 819 { 820 ASSERT(dvp->v_type == VDIR); 821 dcmn_err2(("devfs_lookup: %s\n", nm)); 822 return (dv_find(VTODV(dvp), nm, vpp, pnp, rdir, cred, 0)); 823 } 824 825 /* 826 * devfs nodes can't really be created directly by userland - however, 827 * we do allow creates to find existing nodes: 828 * 829 * - any create fails if the node doesn't exist - EROFS. 830 * - creating an existing directory read-only succeeds, otherwise EISDIR. 831 * - exclusive creates fail if the node already exists - EEXIST. 832 * - failure to create the snode for an existing device - ENOSYS. 833 */ 834 /*ARGSUSED2*/ 835 static int 836 devfs_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 837 int mode, struct vnode **vpp, struct cred *cred, int flag, 838 caller_context_t *ct, vsecattr_t *vsecp) 839 { 840 int error; 841 struct vnode *vp; 842 843 dcmn_err2(("devfs_create %s\n", nm)); 844 error = dv_find(VTODV(dvp), nm, &vp, NULL, NULLVP, cred, 0); 845 if (error == 0) { 846 if (excl == EXCL) 847 error = EEXIST; 848 else if (vp->v_type == VDIR && (mode & VWRITE)) 849 error = EISDIR; 850 else 851 error = VOP_ACCESS(vp, mode, 0, cred, ct); 852 853 if (error) { 854 VN_RELE(vp); 855 } else 856 *vpp = vp; 857 } else if (error == ENOENT) 858 error = EROFS; 859 860 return (error); 861 } 862 863 /* 864 * If DV_BUILD is set, we call into nexus driver to do a BUS_CONFIG_ALL. 865 * Otherwise, simply return cached dv_node's. Hotplug code always call 866 * devfs_clean() to invalid the dv_node cache. 867 */ 868 /*ARGSUSED5*/ 869 static int 870 devfs_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp, 871 caller_context_t *ct, int flags) 872 { 873 struct dv_node *ddv, *dv; 874 struct dirent64 *de, *bufp; 875 offset_t diroff; 876 offset_t soff; 877 size_t reclen, movesz; 878 int error; 879 struct vattr va; 880 size_t bufsz; 881 882 ddv = VTODV(dvp); 883 dcmn_err2(("devfs_readdir %s: offset %lld len %ld\n", 884 ddv->dv_name, uiop->uio_loffset, uiop->uio_iov->iov_len)); 885 ASSERT(ddv->dv_attr || ddv->dv_attrvp); 886 ASSERT(RW_READ_HELD(&ddv->dv_contents)); 887 888 if (uiop->uio_loffset >= MAXOFF_T) { 889 if (eofp) 890 *eofp = 1; 891 return (0); 892 } 893 894 if (uiop->uio_iovcnt != 1) 895 return (EINVAL); 896 897 if (dvp->v_type != VDIR) 898 return (ENOTDIR); 899 900 /* Load the initial contents */ 901 if (ddv->dv_flags & DV_BUILD) { 902 if (!rw_tryupgrade(&ddv->dv_contents)) { 903 rw_exit(&ddv->dv_contents); 904 rw_enter(&ddv->dv_contents, RW_WRITER); 905 } 906 907 /* recheck and fill */ 908 if (ddv->dv_flags & DV_BUILD) 909 dv_filldir(ddv); 910 911 rw_downgrade(&ddv->dv_contents); 912 } 913 914 soff = uiop->uio_loffset; 915 bufsz = uiop->uio_iov->iov_len; 916 de = bufp = kmem_alloc(bufsz, KM_SLEEP); 917 movesz = 0; 918 dv = (struct dv_node *)-1; 919 920 /* 921 * Move as many entries into the uio structure as it will take. 922 * Special case "." and "..". 923 */ 924 diroff = 0; 925 if (soff == 0) { /* . */ 926 reclen = DIRENT64_RECLEN(strlen(".")); 927 if ((movesz + reclen) > bufsz) 928 goto full; 929 de->d_ino = (ino64_t)ddv->dv_ino; 930 de->d_off = (off64_t)diroff + 1; 931 de->d_reclen = (ushort_t)reclen; 932 933 /* use strncpy(9f) to zero out uninitialized bytes */ 934 935 (void) strncpy(de->d_name, ".", DIRENT64_NAMELEN(reclen)); 936 movesz += reclen; 937 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 938 dcmn_err3(("devfs_readdir: A: diroff %lld, soff %lld: '%s' " 939 "reclen %lu\n", diroff, soff, ".", reclen)); 940 } 941 942 diroff++; 943 if (soff <= 1) { /* .. */ 944 reclen = DIRENT64_RECLEN(strlen("..")); 945 if ((movesz + reclen) > bufsz) 946 goto full; 947 de->d_ino = (ino64_t)ddv->dv_dotdot->dv_ino; 948 de->d_off = (off64_t)diroff + 1; 949 de->d_reclen = (ushort_t)reclen; 950 951 /* use strncpy(9f) to zero out uninitialized bytes */ 952 953 (void) strncpy(de->d_name, "..", DIRENT64_NAMELEN(reclen)); 954 movesz += reclen; 955 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 956 dcmn_err3(("devfs_readdir: B: diroff %lld, soff %lld: '%s' " 957 "reclen %lu\n", diroff, soff, "..", reclen)); 958 } 959 960 diroff++; 961 for (dv = DV_FIRST_ENTRY(ddv); dv; 962 dv = DV_NEXT_ENTRY(ddv, dv), diroff++) { 963 /* 964 * although DDM_INTERNAL_PATH minor nodes are skipped for 965 * readdirs outside the kernel, they still occupy directory 966 * offsets 967 */ 968 if (diroff < soff || 969 ((dv->dv_flags & DV_INTERNAL) && (cred != kcred))) 970 continue; 971 972 reclen = DIRENT64_RECLEN(strlen(dv->dv_name)); 973 if ((movesz + reclen) > bufsz) { 974 dcmn_err3(("devfs_readdir: C: diroff " 975 "%lld, soff %lld: '%s' reclen %lu\n", 976 diroff, soff, dv->dv_name, reclen)); 977 goto full; 978 } 979 de->d_ino = (ino64_t)dv->dv_ino; 980 de->d_off = (off64_t)diroff + 1; 981 de->d_reclen = (ushort_t)reclen; 982 983 /* use strncpy(9f) to zero out uninitialized bytes */ 984 985 ASSERT(strlen(dv->dv_name) + 1 <= 986 DIRENT64_NAMELEN(reclen)); 987 (void) strncpy(de->d_name, dv->dv_name, 988 DIRENT64_NAMELEN(reclen)); 989 990 movesz += reclen; 991 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 992 dcmn_err4(("devfs_readdir: D: diroff " 993 "%lld, soff %lld: '%s' reclen %lu\n", diroff, soff, 994 dv->dv_name, reclen)); 995 } 996 997 /* the buffer is full, or we exhausted everything */ 998 full: dcmn_err3(("devfs_readdir: moving %lu bytes: " 999 "diroff %lld, soff %lld, dv %p\n", 1000 movesz, diroff, soff, (void *)dv)); 1001 1002 if ((movesz == 0) && dv) 1003 error = EINVAL; /* cannot be represented */ 1004 else { 1005 error = uiomove(bufp, movesz, UIO_READ, uiop); 1006 if (error == 0) { 1007 if (eofp) 1008 *eofp = dv ? 0 : 1; 1009 uiop->uio_loffset = diroff; 1010 } 1011 1012 va.va_mask = AT_ATIME; 1013 gethrestime(&va.va_atime); 1014 rw_exit(&ddv->dv_contents); 1015 (void) devfs_setattr(dvp, &va, 0, cred, ct); 1016 rw_enter(&ddv->dv_contents, RW_READER); 1017 } 1018 1019 kmem_free(bufp, bufsz); 1020 return (error); 1021 } 1022 1023 /*ARGSUSED*/ 1024 static int 1025 devfs_fsync(struct vnode *vp, int syncflag, struct cred *cred, 1026 caller_context_t *ct) 1027 { 1028 /* 1029 * Message goes to console only. Otherwise, the message 1030 * causes devfs_fsync to be invoked again... infinite loop 1031 */ 1032 dcmn_err2(("devfs_fsync %s\n", VTODV(vp)->dv_name)); 1033 return (0); 1034 } 1035 1036 /* 1037 * Normally, we leave the dv_node here at count of 0. 1038 * The node will be destroyed when dv_cleandir() is called. 1039 * 1040 * Stale dv_node's are already unlinked from the fs tree, 1041 * so dv_cleandir() won't find them. We destroy such nodes 1042 * immediately. 1043 */ 1044 /*ARGSUSED1*/ 1045 static void 1046 devfs_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1047 { 1048 int destroy; 1049 struct dv_node *dv = VTODV(vp); 1050 1051 dcmn_err2(("devfs_inactive: %s\n", dv->dv_name)); 1052 mutex_enter(&vp->v_lock); 1053 ASSERT(vp->v_count >= 1); 1054 --vp->v_count; 1055 destroy = (DV_STALE(dv) && vp->v_count == 0); 1056 mutex_exit(&vp->v_lock); 1057 1058 /* stale nodes cannot be rediscovered, destroy it here */ 1059 if (destroy) 1060 dv_destroy(dv, 0); 1061 } 1062 1063 /* 1064 * XXX Why do we need this? NFS mounted /dev directories? 1065 * XXX Talk to peter staubach about this. 1066 */ 1067 /*ARGSUSED2*/ 1068 static int 1069 devfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1070 { 1071 struct dv_node *dv = VTODV(vp); 1072 struct dv_fid *dv_fid; 1073 1074 if (fidp->fid_len < (sizeof (struct dv_fid) - sizeof (ushort_t))) { 1075 fidp->fid_len = sizeof (struct dv_fid) - sizeof (ushort_t); 1076 return (ENOSPC); 1077 } 1078 1079 dv_fid = (struct dv_fid *)fidp; 1080 bzero(dv_fid, sizeof (struct dv_fid)); 1081 dv_fid->dvfid_len = (int)sizeof (struct dv_fid) - sizeof (ushort_t); 1082 dv_fid->dvfid_ino = dv->dv_ino; 1083 /* dv_fid->dvfid_gen = dv->tn_gen; XXX ? */ 1084 1085 return (0); 1086 } 1087 1088 /* 1089 * This pair of routines bracket all VOP_READ, VOP_WRITE 1090 * and VOP_READDIR requests. The contents lock stops things 1091 * moving around while we're looking at them. 1092 * 1093 * Also used by file and record locking. 1094 */ 1095 /*ARGSUSED2*/ 1096 static int 1097 devfs_rwlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1098 { 1099 dcmn_err2(("devfs_rwlock %s\n", VTODV(vp)->dv_name)); 1100 rw_enter(&VTODV(vp)->dv_contents, write_flag ? RW_WRITER : RW_READER); 1101 return (write_flag); 1102 } 1103 1104 /*ARGSUSED1*/ 1105 static void 1106 devfs_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1107 { 1108 dcmn_err2(("devfs_rwunlock %s\n", VTODV(vp)->dv_name)); 1109 rw_exit(&VTODV(vp)->dv_contents); 1110 } 1111 1112 /* 1113 * XXX Should probably do a better job of computing the maximum 1114 * offset available in the directory. 1115 */ 1116 /*ARGSUSED1*/ 1117 static int 1118 devfs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 1119 caller_context_t *ct) 1120 { 1121 ASSERT(vp->v_type == VDIR); 1122 dcmn_err2(("devfs_seek %s\n", VTODV(vp)->dv_name)); 1123 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1124 } 1125 1126 vnodeops_t *dv_vnodeops; 1127 1128 const fs_operation_def_t dv_vnodeops_template[] = { 1129 VOPNAME_OPEN, { .vop_open = devfs_open }, 1130 VOPNAME_CLOSE, { .vop_close = devfs_close }, 1131 VOPNAME_READ, { .vop_read = devfs_read }, 1132 VOPNAME_WRITE, { .vop_write = devfs_write }, 1133 VOPNAME_IOCTL, { .vop_ioctl = devfs_ioctl }, 1134 VOPNAME_GETATTR, { .vop_getattr = devfs_getattr }, 1135 VOPNAME_SETATTR, { .vop_setattr = devfs_setattr }, 1136 VOPNAME_ACCESS, { .vop_access = devfs_access }, 1137 VOPNAME_LOOKUP, { .vop_lookup = devfs_lookup }, 1138 VOPNAME_CREATE, { .vop_create = devfs_create }, 1139 VOPNAME_READDIR, { .vop_readdir = devfs_readdir }, 1140 VOPNAME_FSYNC, { .vop_fsync = devfs_fsync }, 1141 VOPNAME_INACTIVE, { .vop_inactive = devfs_inactive }, 1142 VOPNAME_FID, { .vop_fid = devfs_fid }, 1143 VOPNAME_RWLOCK, { .vop_rwlock = devfs_rwlock }, 1144 VOPNAME_RWUNLOCK, { .vop_rwunlock = devfs_rwunlock }, 1145 VOPNAME_SEEK, { .vop_seek = devfs_seek }, 1146 VOPNAME_PATHCONF, { .vop_pathconf = devfs_pathconf }, 1147 VOPNAME_DISPOSE, { .error = fs_error }, 1148 VOPNAME_SETSECATTR, { .vop_setsecattr = devfs_setsecattr }, 1149 VOPNAME_GETSECATTR, { .vop_getsecattr = devfs_getsecattr }, 1150 NULL, NULL 1151 }; 1152