1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2017 by Delphix. All rights reserved. 24 * Copyright 2023 Oxide Computer Company 25 */ 26 27 /* 28 * vnode ops for the devfs 29 * 30 * For leaf vnode special files (VCHR|VBLK) specfs will always see the VOP 31 * first because dv_find always performs leaf vnode substitution, returning 32 * a specfs vnode with an s_realvp pointing to the devfs leaf vnode. This 33 * means that the only leaf special file VOP operations that devfs will see 34 * after VOP_LOOKUP are the ones that specfs forwards. 35 */ 36 37 #include <sys/types.h> 38 #include <sys/param.h> 39 #include <sys/t_lock.h> 40 #include <sys/systm.h> 41 #include <sys/sysmacros.h> 42 #include <sys/user.h> 43 #include <sys/time.h> 44 #include <sys/vfs.h> 45 #include <sys/vnode.h> 46 #include <sys/vfs_opreg.h> 47 #include <sys/file.h> 48 #include <sys/fcntl.h> 49 #include <sys/flock.h> 50 #include <sys/kmem.h> 51 #include <sys/uio.h> 52 #include <sys/errno.h> 53 #include <sys/stat.h> 54 #include <sys/cred.h> 55 #include <sys/dirent.h> 56 #include <sys/pathname.h> 57 #include <sys/cmn_err.h> 58 #include <sys/debug.h> 59 #include <sys/policy.h> 60 #include <sys/modctl.h> 61 #include <sys/sunndi.h> 62 #include <fs/fs_subr.h> 63 #include <sys/fs/dv_node.h> 64 65 extern struct vattr dv_vattr_dir, dv_vattr_file; 66 extern dev_t rconsdev; 67 68 /* 69 * Open of devices (leaf nodes) is handled by specfs. 70 * There is nothing to do to open a directory 71 */ 72 /*ARGSUSED*/ 73 static int 74 devfs_open(struct vnode **vpp, int flag, struct cred *cred, 75 caller_context_t *ct) 76 { 77 struct dv_node *dv = VTODV(*vpp); 78 79 dcmn_err2(("devfs_open %s\n", dv->dv_name)); 80 ASSERT((*vpp)->v_type == VDIR); 81 return (0); 82 } 83 84 /* 85 * Close of devices (leaf nodes) is handled by specfs. 86 * There is nothing much to do inorder to close a directory. 87 */ 88 /*ARGSUSED1*/ 89 static int 90 devfs_close(struct vnode *vp, int flag, int count, 91 offset_t offset, struct cred *cred, caller_context_t *ct) 92 { 93 struct dv_node *dv = VTODV(vp); 94 95 dcmn_err2(("devfs_close %s\n", dv->dv_name)); 96 ASSERT(vp->v_type == VDIR); 97 98 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 99 cleanshares(vp, ttoproc(curthread)->p_pid); 100 return (0); 101 } 102 103 /* 104 * Read of devices (leaf nodes) is handled by specfs. 105 * Read of directories is not supported. 106 */ 107 /*ARGSUSED*/ 108 static int 109 devfs_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 110 struct caller_context *ct) 111 { 112 dcmn_err2(("devfs_read %s\n", VTODV(vp)->dv_name)); 113 ASSERT(vp->v_type == VDIR); 114 ASSERT(RW_READ_HELD(&VTODV(vp)->dv_contents)); 115 return (EISDIR); 116 } 117 118 /* 119 * Write of devices (leaf nodes) is handled by specfs. 120 * Write of directories is not supported. 121 */ 122 /*ARGSUSED*/ 123 static int 124 devfs_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 125 struct caller_context *ct) 126 { 127 dcmn_err2(("devfs_write %s\n", VTODV(vp)->dv_name)); 128 ASSERT(vp->v_type == VDIR); 129 ASSERT(RW_WRITE_HELD(&VTODV(vp)->dv_contents)); 130 return (EISDIR); 131 } 132 133 /* 134 * Ioctls to device (leaf nodes) is handled by specfs. 135 * Ioctl to directories is not supported. 136 */ 137 /*ARGSUSED*/ 138 static int 139 devfs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 140 struct cred *cred, int *rvalp, caller_context_t *ct) 141 { 142 dcmn_err2(("devfs_ioctl %s\n", VTODV(vp)->dv_name)); 143 ASSERT(vp->v_type == VDIR); 144 145 return (ENOTTY); /* no ioctls supported */ 146 } 147 148 /* 149 * We can be asked directly about the attributes of directories, or 150 * (via sp->s_realvp) about the filesystem attributes of special files. 151 * 152 * For directories, we just believe the attribute store 153 * though we mangle the nodeid, fsid, and rdev to convince userland we 154 * really are a different filesystem. 155 * 156 * For special files, a little more fakery is required. 157 * 158 * If the attribute store is not there (read only root), we believe our 159 * memory based attributes. 160 */ 161 static int 162 devfs_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr, 163 caller_context_t *ct) 164 { 165 struct dv_node *dv = VTODV(vp); 166 int error = 0; 167 uint_t mask; 168 169 /* 170 * Message goes to console only. Otherwise, the message 171 * causes devfs_getattr to be invoked again... infinite loop 172 */ 173 dcmn_err2(("?devfs_getattr %s\n", dv->dv_name)); 174 ASSERT(dv->dv_attr || dv->dv_attrvp); 175 176 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 177 cmn_err(CE_WARN, /* panic ? */ 178 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 179 return (ENOENT); 180 } 181 182 rw_enter(&dv->dv_contents, RW_READER); 183 if (dv->dv_attr) { 184 /* 185 * obtain from the memory version of attribute. 186 * preserve mask for those that optimize. 187 * devfs specific fields are already merged on creation. 188 */ 189 mask = vap->va_mask; 190 *vap = *dv->dv_attr; 191 vap->va_mask = mask; 192 } else { 193 /* obtain from attribute store and merge */ 194 error = VOP_GETATTR(dv->dv_attrvp, vap, flags, cr, ct); 195 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error)); 196 dv_vattr_merge(dv, vap); 197 } 198 rw_exit(&dv->dv_contents); 199 200 /* 201 * Restrict the permissions of the node fronting the console 202 * to 0600 with root as the owner. This prevents a non-root 203 * user from gaining access to a serial terminal (like /dev/term/a) 204 * which is in reality serving as the console device (/dev/console). 205 */ 206 if (vp->v_rdev == rconsdev) { 207 mode_t rconsmask = S_IXUSR|S_IRWXG|S_IRWXO; 208 vap->va_mode &= (~rconsmask); 209 vap->va_uid = 0; 210 } 211 212 return (error); 213 } 214 215 static int devfs_unlocked_access(void *, int, struct cred *); 216 217 /*ARGSUSED4*/ 218 static int 219 devfs_setattr_dir( 220 struct dv_node *dv, 221 struct vnode *vp, 222 struct vattr *vap, 223 int flags, 224 struct cred *cr) 225 { 226 struct vattr *map; 227 uint_t mask; 228 int error = 0; 229 struct vattr vattr; 230 231 ASSERT(dv->dv_attr || dv->dv_attrvp); 232 233 ASSERT(vp->v_type == VDIR); 234 ASSERT((dv->dv_flags & DV_NO_FSPERM) == 0); 235 236 if (vap->va_mask & AT_NOSET) 237 return (EINVAL); 238 239 /* to ensure consistency, single thread setting of attributes */ 240 rw_enter(&dv->dv_contents, RW_WRITER); 241 242 again: if (dv->dv_attr) { 243 244 error = secpolicy_vnode_setattr(cr, vp, vap, 245 dv->dv_attr, flags, devfs_unlocked_access, dv); 246 247 if (error) 248 goto out; 249 250 /* 251 * Apply changes to the memory based attribute. This code 252 * is modeled after the tmpfs implementation of memory 253 * based vnodes 254 */ 255 map = dv->dv_attr; 256 mask = vap->va_mask; 257 258 /* Change file access modes. */ 259 if (mask & AT_MODE) { 260 map->va_mode &= S_IFMT; 261 map->va_mode |= vap->va_mode & ~S_IFMT; 262 } 263 if (mask & AT_UID) 264 map->va_uid = vap->va_uid; 265 if (mask & AT_GID) 266 map->va_gid = vap->va_gid; 267 if (mask & AT_ATIME) 268 map->va_atime = vap->va_atime; 269 if (mask & AT_MTIME) 270 map->va_mtime = vap->va_mtime; 271 272 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) 273 gethrestime(&map->va_ctime); 274 } else { 275 /* use the backing attribute store */ 276 ASSERT(dv->dv_attrvp); 277 278 /* 279 * See if we are changing something we care about 280 * the persistence of - return success if we don't care. 281 */ 282 if (vap->va_mask & (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) { 283 /* Set the attributes */ 284 error = VOP_SETATTR(dv->dv_attrvp, 285 vap, flags, cr, NULL); 286 dsysdebug(error, 287 ("vop_setattr %s %d\n", dv->dv_name, error)); 288 289 /* 290 * Some file systems may return EROFS for a setattr 291 * on a readonly file system. In this case we create 292 * our own memory based attribute. 293 */ 294 if (error == EROFS) { 295 /* 296 * obtain attributes from existing file 297 * that we will modify and switch to memory 298 * based attribute until attribute store is 299 * read/write. 300 */ 301 vattr = dv_vattr_dir; 302 if (VOP_GETATTR(dv->dv_attrvp, 303 &vattr, flags, cr, NULL) == 0) { 304 dv->dv_attr = kmem_alloc( 305 sizeof (struct vattr), KM_SLEEP); 306 *dv->dv_attr = vattr; 307 dv_vattr_merge(dv, dv->dv_attr); 308 goto again; 309 } 310 } 311 } 312 } 313 out: 314 rw_exit(&dv->dv_contents); 315 return (error); 316 } 317 318 319 /* 320 * Compare the uid/gid/mode changes requested for a setattr 321 * operation with the same details of a node's default minor 322 * perm information. Return 0 if identical. 323 */ 324 static int 325 dv_setattr_cmp(struct vattr *map, mperm_t *mp) 326 { 327 if ((map->va_mode & S_IAMB) != (mp->mp_mode & S_IAMB)) 328 return (1); 329 if (map->va_uid != mp->mp_uid) 330 return (1); 331 if (map->va_gid != mp->mp_gid) 332 return (1); 333 return (0); 334 } 335 336 337 /*ARGSUSED4*/ 338 static int 339 devfs_setattr( 340 struct vnode *vp, 341 struct vattr *vap, 342 int flags, 343 struct cred *cr, 344 caller_context_t *ct) 345 { 346 struct dv_node *dv = VTODV(vp); 347 struct dv_node *ddv; 348 struct vnode *dvp; 349 struct vattr *map; 350 uint_t mask; 351 int error = 0; 352 struct vattr *free_vattr = NULL; 353 struct vattr *vattrp = NULL; 354 mperm_t mp; 355 int persist; 356 357 /* 358 * Message goes to console only. Otherwise, the message 359 * causes devfs_getattr to be invoked again... infinite loop 360 */ 361 dcmn_err2(("?devfs_setattr %s\n", dv->dv_name)); 362 ASSERT(dv->dv_attr || dv->dv_attrvp); 363 364 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 365 cmn_err(CE_WARN, /* panic ? */ 366 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 367 return (ENOENT); 368 } 369 370 if (vap->va_mask & AT_NOSET) 371 return (EINVAL); 372 373 /* 374 * If we are changing something we don't care about 375 * the persistence of, return success. 376 */ 377 if ((vap->va_mask & 378 (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) == 0) 379 return (0); 380 381 /* 382 * If driver overrides fs perm, disallow chmod 383 * and do not create attribute nodes. 384 */ 385 if (dv->dv_flags & DV_NO_FSPERM) { 386 ASSERT(dv->dv_attr); 387 if (vap->va_mask & (AT_MODE | AT_UID | AT_GID)) 388 return (EPERM); 389 if ((vap->va_mask & (AT_ATIME|AT_MTIME)) == 0) 390 return (0); 391 rw_enter(&dv->dv_contents, RW_WRITER); 392 if (vap->va_mask & AT_ATIME) 393 dv->dv_attr->va_atime = vap->va_atime; 394 if (vap->va_mask & AT_MTIME) 395 dv->dv_attr->va_mtime = vap->va_mtime; 396 rw_exit(&dv->dv_contents); 397 return (0); 398 } 399 400 /* 401 * Directories are always created but device nodes are 402 * only used to persist non-default permissions. 403 */ 404 if (vp->v_type == VDIR) { 405 ASSERT(dv->dv_attr || dv->dv_attrvp); 406 return (devfs_setattr_dir(dv, vp, vap, flags, cr)); 407 } 408 409 /* 410 * Allocate now before we take any locks 411 */ 412 vattrp = kmem_zalloc(sizeof (*vattrp), KM_SLEEP); 413 414 /* to ensure consistency, single thread setting of attributes */ 415 rw_enter(&dv->dv_contents, RW_WRITER); 416 417 /* 418 * We don't need to create an attribute node 419 * to persist access or modification times. 420 */ 421 persist = (vap->va_mask & (AT_MODE | AT_UID | AT_GID)); 422 423 /* 424 * If persisting something, get the default permissions 425 * for this minor to compare against what the attributes 426 * are now being set to. Default ordering is: 427 * - minor_perm match for this minor 428 * - mode supplied by ddi_create_priv_minor_node 429 * - devfs defaults 430 */ 431 if (persist) { 432 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) != 0) { 433 mp.mp_uid = dv_vattr_file.va_uid; 434 mp.mp_gid = dv_vattr_file.va_gid; 435 mp.mp_mode = dv_vattr_file.va_mode; 436 if (dv->dv_flags & DV_DFLT_MODE) { 437 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 438 mp.mp_mode &= ~S_IAMB; 439 mp.mp_mode |= dv->dv_dflt_mode; 440 dcmn_err5(("%s: setattr priv default 0%o\n", 441 dv->dv_name, mp.mp_mode)); 442 } else { 443 dcmn_err5(("%s: setattr devfs default 0%o\n", 444 dv->dv_name, mp.mp_mode)); 445 } 446 } else { 447 dcmn_err5(("%s: setattr minor perm default 0%o\n", 448 dv->dv_name, mp.mp_mode)); 449 } 450 } 451 452 /* 453 * If we don't have a vattr for this node, construct one. 454 */ 455 if (dv->dv_attr) { 456 free_vattr = vattrp; 457 vattrp = NULL; 458 } else { 459 ASSERT(dv->dv_attrvp); 460 ASSERT(vp->v_type != VDIR); 461 *vattrp = dv_vattr_file; 462 error = VOP_GETATTR(dv->dv_attrvp, vattrp, 0, cr, ct); 463 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error)); 464 if (error) 465 goto out; 466 dv->dv_attr = vattrp; 467 dv_vattr_merge(dv, dv->dv_attr); 468 vattrp = NULL; 469 } 470 471 error = secpolicy_vnode_setattr(cr, vp, vap, dv->dv_attr, 472 flags, devfs_unlocked_access, dv); 473 if (error) { 474 dsysdebug(error, ("devfs_setattr %s secpolicy error %d\n", 475 dv->dv_name, error)); 476 goto out; 477 } 478 479 /* 480 * Apply changes to the memory based attribute. This code 481 * is modeled after the tmpfs implementation of memory 482 * based vnodes 483 */ 484 map = dv->dv_attr; 485 mask = vap->va_mask; 486 487 /* Change file access modes. */ 488 if (mask & AT_MODE) { 489 map->va_mode &= S_IFMT; 490 map->va_mode |= vap->va_mode & ~S_IFMT; 491 } 492 if (mask & AT_UID) 493 map->va_uid = vap->va_uid; 494 if (mask & AT_GID) 495 map->va_gid = vap->va_gid; 496 if (mask & AT_ATIME) 497 map->va_atime = vap->va_atime; 498 if (mask & AT_MTIME) 499 map->va_mtime = vap->va_mtime; 500 501 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) { 502 gethrestime(&map->va_ctime); 503 } 504 505 /* 506 * A setattr to defaults means we no longer need the 507 * shadow node as a persistent store, unless there 508 * are ACLs. Otherwise create a shadow node if one 509 * doesn't exist yet. 510 */ 511 if (persist) { 512 if ((dv_setattr_cmp(map, &mp) == 0) && 513 ((dv->dv_flags & DV_ACL) == 0)) { 514 515 if (dv->dv_attrvp) { 516 ddv = dv->dv_dotdot; 517 ASSERT(ddv->dv_attrvp); 518 error = VOP_REMOVE(ddv->dv_attrvp, 519 dv->dv_name, cr, ct, 0); 520 dsysdebug(error, 521 ("vop_remove %s %s %d\n", 522 ddv->dv_name, dv->dv_name, error)); 523 524 if (error == EROFS) 525 error = 0; 526 VN_RELE(dv->dv_attrvp); 527 dv->dv_attrvp = NULL; 528 } 529 ASSERT(dv->dv_attr); 530 } else { 531 if (mask & AT_MODE) 532 dcmn_err5(("%s persisting mode 0%o\n", 533 dv->dv_name, vap->va_mode)); 534 if (mask & AT_UID) 535 dcmn_err5(("%s persisting uid %d\n", 536 dv->dv_name, vap->va_uid)); 537 if (mask & AT_GID) 538 dcmn_err5(("%s persisting gid %d\n", 539 dv->dv_name, vap->va_gid)); 540 541 if (dv->dv_attrvp == NULL) { 542 dvp = DVTOV(dv->dv_dotdot); 543 dv_shadow_node(dvp, dv->dv_name, vp, 544 NULL, NULLVP, cr, 545 DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 546 } 547 if (dv->dv_attrvp) { 548 /* If map still valid do TIME for free. */ 549 if (dv->dv_attr == map) { 550 mask = map->va_mask; 551 map->va_mask = 552 vap->va_mask | AT_ATIME | AT_MTIME; 553 error = VOP_SETATTR(dv->dv_attrvp, map, 554 flags, cr, NULL); 555 map->va_mask = mask; 556 } else { 557 error = VOP_SETATTR(dv->dv_attrvp, 558 vap, flags, cr, NULL); 559 } 560 dsysdebug(error, ("vop_setattr %s %d\n", 561 dv->dv_name, error)); 562 } 563 /* 564 * Some file systems may return EROFS for a setattr 565 * on a readonly file system. In this case save 566 * as our own memory based attribute. 567 * NOTE: ufs is NOT one of these (see ufs_iupdat). 568 */ 569 if (dv->dv_attr && dv->dv_attrvp && error == 0) { 570 vattrp = dv->dv_attr; 571 dv->dv_attr = NULL; 572 } else if (error == EROFS) 573 error = 0; 574 } 575 } 576 577 out: 578 rw_exit(&dv->dv_contents); 579 580 if (vattrp) 581 kmem_free(vattrp, sizeof (*vattrp)); 582 if (free_vattr) 583 kmem_free(free_vattr, sizeof (*free_vattr)); 584 return (error); 585 } 586 587 static int 588 devfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 589 caller_context_t *ct) 590 { 591 switch (cmd) { 592 case _PC_ACL_ENABLED: 593 /* 594 * We rely on the underlying filesystem for ACLs, 595 * so direct the query for ACL support there. 596 * ACL support isn't relative to the file 597 * and we can't guarantee that the dv node 598 * has an attribute node, so any valid 599 * attribute node will suffice. 600 */ 601 ASSERT(dvroot); 602 ASSERT(dvroot->dv_attrvp); 603 return (VOP_PATHCONF(dvroot->dv_attrvp, cmd, valp, cr, ct)); 604 /*NOTREACHED*/ 605 } 606 607 return (fs_pathconf(vp, cmd, valp, cr, ct)); 608 } 609 610 /* 611 * Let avp handle security attributes (acl's). 612 */ 613 static int 614 devfs_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 615 struct cred *cr, caller_context_t *ct) 616 { 617 dvnode_t *dv = VTODV(vp); 618 struct vnode *avp; 619 int error; 620 621 dcmn_err2(("devfs_getsecattr %s\n", dv->dv_name)); 622 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 623 624 rw_enter(&dv->dv_contents, RW_READER); 625 626 avp = dv->dv_attrvp; 627 628 /* fabricate the acl */ 629 if (avp == NULL) { 630 error = fs_fab_acl(vp, vsap, flags, cr, ct); 631 rw_exit(&dv->dv_contents); 632 return (error); 633 } 634 635 error = VOP_GETSECATTR(avp, vsap, flags, cr, ct); 636 dsysdebug(error, ("vop_getsecattr %s %d\n", VTODV(vp)->dv_name, error)); 637 rw_exit(&dv->dv_contents); 638 return (error); 639 } 640 641 /* 642 * Set security attributes (acl's) 643 * 644 * Note that the dv_contents lock has already been acquired 645 * by the caller's VOP_RWLOCK. 646 */ 647 static int 648 devfs_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 649 struct cred *cr, caller_context_t *ct) 650 { 651 dvnode_t *dv = VTODV(vp); 652 struct vnode *avp; 653 int error; 654 655 dcmn_err2(("devfs_setsecattr %s\n", dv->dv_name)); 656 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 657 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 658 659 /* 660 * Not a supported operation on drivers not providing 661 * file system based permissions. 662 */ 663 if (dv->dv_flags & DV_NO_FSPERM) 664 return (ENOTSUP); 665 666 /* 667 * To complete, the setsecattr requires an underlying attribute node. 668 */ 669 if (dv->dv_attrvp == NULL) { 670 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 671 dv_shadow_node(DVTOV(dv->dv_dotdot), dv->dv_name, vp, 672 NULL, NULLVP, cr, DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 673 } 674 675 if ((avp = dv->dv_attrvp) == NULL) { 676 dcmn_err2(("devfs_setsecattr %s: " 677 "cannot construct attribute node\n", dv->dv_name)); 678 return (fs_nosys()); 679 } 680 681 /* 682 * The acl(2) system call issues a VOP_RWLOCK before setting an ACL. 683 * Since backing file systems expect the lock to be held before seeing 684 * a VOP_SETSECATTR ACL, we need to issue the VOP_RWLOCK to the backing 685 * store before forwarding the ACL. 686 */ 687 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, NULL); 688 error = VOP_SETSECATTR(avp, vsap, flags, cr, ct); 689 dsysdebug(error, ("vop_setsecattr %s %d\n", VTODV(vp)->dv_name, error)); 690 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, NULL); 691 692 /* 693 * Set DV_ACL if we have a non-trivial set of ACLs. It is not 694 * necessary to hold VOP_RWLOCK since fs_acl_nontrivial only does 695 * VOP_GETSECATTR calls. 696 */ 697 if (fs_acl_nontrivial(avp, cr)) 698 dv->dv_flags |= DV_ACL; 699 return (error); 700 } 701 702 /* 703 * This function is used for secpolicy_setattr(). It must call an 704 * access() like function while it is already holding the 705 * dv_contents lock. We only care about this when dv_attr != NULL; 706 * so the unlocked access call only concerns itself with that 707 * particular branch of devfs_access(). 708 */ 709 static int 710 devfs_unlocked_access(void *vdv, int mode, struct cred *cr) 711 { 712 struct dv_node *dv = vdv; 713 int shift = 0; 714 uid_t owner = dv->dv_attr->va_uid; 715 716 /* Check access based on owner, group and public permissions. */ 717 if (crgetuid(cr) != owner) { 718 shift += 3; 719 if (groupmember(dv->dv_attr->va_gid, cr) == 0) 720 shift += 3; 721 } 722 723 return (secpolicy_vnode_access2(cr, DVTOV(dv), owner, 724 dv->dv_attr->va_mode << shift, mode)); 725 } 726 727 static int 728 devfs_access(struct vnode *vp, int mode, int flags, struct cred *cr, 729 caller_context_t *ct) 730 { 731 struct dv_node *dv = VTODV(vp); 732 int res; 733 734 dcmn_err2(("devfs_access %s\n", dv->dv_name)); 735 ASSERT(dv->dv_attr || dv->dv_attrvp); 736 737 /* restrict console access to privileged processes */ 738 if ((vp->v_rdev == rconsdev) && secpolicy_console(cr) != 0) { 739 return (EACCES); 740 } 741 742 rw_enter(&dv->dv_contents, RW_READER); 743 if (dv->dv_attr && ((dv->dv_flags & DV_ACL) == 0)) { 744 res = devfs_unlocked_access(dv, mode, cr); 745 } else { 746 res = VOP_ACCESS(dv->dv_attrvp, mode, flags, cr, ct); 747 } 748 rw_exit(&dv->dv_contents); 749 return (res); 750 } 751 752 /* 753 * Lookup 754 * 755 * Given the directory vnode and the name of the component, return 756 * the corresponding held vnode for that component. 757 * 758 * Of course in these fictional filesystems, nothing's ever quite 759 * -that- simple. 760 * 761 * devfs name type shadow (fs attributes) type comments 762 * ------------------------------------------------------------------------- 763 * drv[@addr] VDIR drv[@addr] VDIR nexus driver 764 * drv[@addr]:m VCHR/VBLK drv[@addr]:m VREG leaf driver 765 * drv[@addr] VCHR/VBLK drv[@addr]:.default VREG leaf driver 766 * ------------------------------------------------------------------------- 767 * 768 * The following names are reserved for the attribute filesystem (which 769 * could easily be another layer on top of this one - we simply need to 770 * hold the vnode of the thing we're looking at) 771 * 772 * attr name type shadow (fs attributes) type comments 773 * ------------------------------------------------------------------------- 774 * drv[@addr] VDIR - - attribute dir 775 * minorname VDIR - - minorname 776 * attribute VREG - - attribute 777 * ------------------------------------------------------------------------- 778 * 779 * Examples: 780 * 781 * devfs:/devices/.../mm@0:zero VCHR 782 * shadow:/.devices/.../mm@0:zero VREG, fs attrs 783 * devfs:/devices/.../mm@0:/zero/attr VREG, driver attribute 784 * 785 * devfs:/devices/.../sd@0,0:a VBLK 786 * shadow:/.devices/.../sd@0,0:a VREG, fs attrs 787 * devfs:/devices/.../sd@0,0:/a/.type VREG, "ddi_block:chan" 788 * 789 * devfs:/devices/.../mm@0 VCHR 790 * shadow:/.devices/.../mm@0:.default VREG, fs attrs 791 * devfs:/devices/.../mm@0:/.default/attr VREG, driver attribute 792 * devfs:/devices/.../mm@0:/.default/.type VREG, "ddi_pseudo" 793 * 794 * devfs:/devices/.../obio VDIR 795 * shadow:/devices/.../obio VDIR, needed for fs attrs. 796 * devfs:/devices/.../obio:/.default/attr VDIR, driver attribute 797 * 798 * We also need to be able deal with "old" devices that have gone away, 799 * though I think that provided we return them with readdir, they can 800 * be removed (i.e. they don't have to respond to lookup, though it might 801 * be weird if they didn't ;-) 802 * 803 * Lookup has side-effects. 804 * 805 * - It will create directories and fs attribute files in the shadow hierarchy. 806 * - It should cause non-SID devices to be probed (ask the parent nexi). 807 */ 808 /*ARGSUSED3*/ 809 static int 810 devfs_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 811 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, 812 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 813 { 814 ASSERT(dvp->v_type == VDIR); 815 dcmn_err2(("devfs_lookup: %s\n", nm)); 816 return (dv_find(VTODV(dvp), nm, vpp, pnp, rdir, cred, 0)); 817 } 818 819 /* 820 * devfs nodes can't really be created directly by userland - however, 821 * we do allow creates to find existing nodes: 822 * 823 * - any create fails if the node doesn't exist - EROFS. 824 * - creating an existing directory read-only succeeds, otherwise EISDIR. 825 * - exclusive creates fail if the node already exists - EEXIST. 826 * - failure to create the snode for an existing device - ENOSYS. 827 */ 828 /*ARGSUSED2*/ 829 static int 830 devfs_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 831 int mode, struct vnode **vpp, struct cred *cred, int flag, 832 caller_context_t *ct, vsecattr_t *vsecp) 833 { 834 int error; 835 struct vnode *vp; 836 837 dcmn_err2(("devfs_create %s\n", nm)); 838 error = dv_find(VTODV(dvp), nm, &vp, NULL, NULLVP, cred, 0); 839 if (error == 0) { 840 if (excl == EXCL) 841 error = EEXIST; 842 else if (vp->v_type == VDIR && (mode & VWRITE)) 843 error = EISDIR; 844 else 845 error = VOP_ACCESS(vp, mode, 0, cred, ct); 846 847 if (error) { 848 VN_RELE(vp); 849 } else 850 *vpp = vp; 851 } else if (error == ENOENT) 852 error = EROFS; 853 854 return (error); 855 } 856 857 /* 858 * If DV_BUILD is set, we call into nexus driver to do a BUS_CONFIG_ALL. 859 * Otherwise, simply return cached dv_node's. Hotplug code always call 860 * devfs_clean() to invalid the dv_node cache. 861 */ 862 /*ARGSUSED5*/ 863 static int 864 devfs_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp, 865 caller_context_t *ct, int flags) 866 { 867 struct dv_node *ddv, *dv; 868 struct dirent64 *de, *bufp; 869 offset_t diroff; 870 offset_t soff; 871 size_t reclen, movesz; 872 int error; 873 struct vattr va; 874 size_t bufsz; 875 876 ddv = VTODV(dvp); 877 dcmn_err2(("devfs_readdir %s: offset %lld len %ld\n", 878 ddv->dv_name, uiop->uio_loffset, uiop->uio_iov->iov_len)); 879 ASSERT(ddv->dv_attr || ddv->dv_attrvp); 880 ASSERT(RW_READ_HELD(&ddv->dv_contents)); 881 882 if (uiop->uio_loffset >= MAXOFF_T) { 883 if (eofp) 884 *eofp = 1; 885 return (0); 886 } 887 888 if (uiop->uio_iovcnt != 1) 889 return (EINVAL); 890 891 if (dvp->v_type != VDIR) 892 return (ENOTDIR); 893 894 /* Load the initial contents */ 895 if (ddv->dv_flags & DV_BUILD) { 896 if (!rw_tryupgrade(&ddv->dv_contents)) { 897 rw_exit(&ddv->dv_contents); 898 rw_enter(&ddv->dv_contents, RW_WRITER); 899 } 900 901 /* recheck and fill */ 902 if (ddv->dv_flags & DV_BUILD) 903 dv_filldir(ddv); 904 905 rw_downgrade(&ddv->dv_contents); 906 } 907 908 /* 909 * Even if the dv node was not stale at entry to this function, it may 910 * be stale now if another process got in between the rw_exit/rw_enter 911 * calls above and unlinked it. 912 */ 913 if (DV_STALE(ddv)) 914 return (ESTALE); 915 916 soff = uiop->uio_loffset; 917 bufsz = uiop->uio_iov->iov_len; 918 de = bufp = kmem_alloc(bufsz, KM_SLEEP); 919 movesz = 0; 920 dv = (struct dv_node *)-1; 921 922 /* 923 * Move as many entries into the uio structure as it will take. 924 * Special case "." and "..". 925 */ 926 diroff = 0; 927 if (soff == 0) { /* . */ 928 reclen = DIRENT64_RECLEN(strlen(".")); 929 if ((movesz + reclen) > bufsz) 930 goto full; 931 de->d_ino = (ino64_t)ddv->dv_ino; 932 de->d_off = (off64_t)diroff + 1; 933 de->d_reclen = (ushort_t)reclen; 934 935 /* use strncpy(9f) to zero out uninitialized bytes */ 936 937 (void) strncpy(de->d_name, ".", DIRENT64_NAMELEN(reclen)); 938 movesz += reclen; 939 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 940 dcmn_err3(("devfs_readdir: A: diroff %lld, soff %lld: '%s' " 941 "reclen %lu\n", diroff, soff, ".", reclen)); 942 } 943 944 diroff++; 945 if (soff <= 1) { /* .. */ 946 reclen = DIRENT64_RECLEN(strlen("..")); 947 if ((movesz + reclen) > bufsz) 948 goto full; 949 de->d_ino = (ino64_t)ddv->dv_dotdot->dv_ino; 950 de->d_off = (off64_t)diroff + 1; 951 de->d_reclen = (ushort_t)reclen; 952 953 /* use strncpy(9f) to zero out uninitialized bytes */ 954 955 (void) strncpy(de->d_name, "..", DIRENT64_NAMELEN(reclen)); 956 movesz += reclen; 957 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 958 dcmn_err3(("devfs_readdir: B: diroff %lld, soff %lld: '%s' " 959 "reclen %lu\n", diroff, soff, "..", reclen)); 960 } 961 962 diroff++; 963 for (dv = DV_FIRST_ENTRY(ddv); dv; 964 dv = DV_NEXT_ENTRY(ddv, dv), diroff++) { 965 /* skip entries until at correct directory offset */ 966 if (diroff < soff) 967 continue; 968 969 /* 970 * hidden nodes are skipped (but they still occupy a 971 * directory offset). 972 */ 973 if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi)) 974 continue; 975 976 /* 977 * DDM_INTERNAL_PATH minor nodes are skipped for readdirs 978 * outside the kernel (but they still occupy a directory 979 * offset). 980 */ 981 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) 982 continue; 983 984 reclen = DIRENT64_RECLEN(strlen(dv->dv_name)); 985 if ((movesz + reclen) > bufsz) { 986 dcmn_err3(("devfs_readdir: C: diroff " 987 "%lld, soff %lld: '%s' reclen %lu\n", 988 diroff, soff, dv->dv_name, reclen)); 989 goto full; 990 } 991 de->d_ino = (ino64_t)dv->dv_ino; 992 de->d_off = (off64_t)diroff + 1; 993 de->d_reclen = (ushort_t)reclen; 994 995 /* use strncpy(9f) to zero out uninitialized bytes */ 996 997 ASSERT(strlen(dv->dv_name) + 1 <= 998 DIRENT64_NAMELEN(reclen)); 999 (void) strncpy(de->d_name, dv->dv_name, 1000 DIRENT64_NAMELEN(reclen)); 1001 1002 movesz += reclen; 1003 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 1004 dcmn_err4(("devfs_readdir: D: diroff " 1005 "%lld, soff %lld: '%s' reclen %lu\n", diroff, soff, 1006 dv->dv_name, reclen)); 1007 } 1008 1009 /* the buffer is full, or we exhausted everything */ 1010 full: dcmn_err3(("devfs_readdir: moving %lu bytes: " 1011 "diroff %lld, soff %lld, dv %p\n", 1012 movesz, diroff, soff, (void *)dv)); 1013 1014 if ((movesz == 0) && dv) 1015 error = EINVAL; /* cannot be represented */ 1016 else { 1017 error = uiomove(bufp, movesz, UIO_READ, uiop); 1018 if (error == 0) { 1019 if (eofp) 1020 *eofp = dv ? 0 : 1; 1021 uiop->uio_loffset = diroff; 1022 } 1023 1024 va.va_mask = AT_ATIME; 1025 gethrestime(&va.va_atime); 1026 rw_exit(&ddv->dv_contents); 1027 (void) devfs_setattr(dvp, &va, 0, cred, ct); 1028 rw_enter(&ddv->dv_contents, RW_READER); 1029 } 1030 1031 kmem_free(bufp, bufsz); 1032 return (error); 1033 } 1034 1035 /*ARGSUSED*/ 1036 static int 1037 devfs_fsync(struct vnode *vp, int syncflag, struct cred *cred, 1038 caller_context_t *ct) 1039 { 1040 /* 1041 * Message goes to console only. Otherwise, the message 1042 * causes devfs_fsync to be invoked again... infinite loop 1043 */ 1044 dcmn_err2(("devfs_fsync %s\n", VTODV(vp)->dv_name)); 1045 return (0); 1046 } 1047 1048 /* 1049 * Normally, we leave the dv_node here at count of 0. 1050 * The node will be destroyed when dv_cleandir() is called. 1051 * 1052 * Stale dv_node's are already unlinked from the fs tree, 1053 * so dv_cleandir() won't find them. We destroy such nodes 1054 * immediately. 1055 */ 1056 /*ARGSUSED1*/ 1057 static void 1058 devfs_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1059 { 1060 int destroy; 1061 struct dv_node *dv = VTODV(vp); 1062 1063 dcmn_err2(("devfs_inactive: %s\n", dv->dv_name)); 1064 mutex_enter(&vp->v_lock); 1065 ASSERT(vp->v_count >= 1); 1066 VN_RELE_LOCKED(vp); 1067 destroy = (DV_STALE(dv) && vp->v_count == 0); 1068 mutex_exit(&vp->v_lock); 1069 1070 /* stale nodes cannot be rediscovered, destroy it here */ 1071 if (destroy) 1072 dv_destroy(dv, 0); 1073 } 1074 1075 /* 1076 * XXX Why do we need this? NFS mounted /dev directories? 1077 * XXX Talk to peter staubach about this. 1078 */ 1079 /*ARGSUSED2*/ 1080 static int 1081 devfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1082 { 1083 struct dv_node *dv = VTODV(vp); 1084 struct dv_fid *dv_fid; 1085 1086 if (fidp->fid_len < (sizeof (struct dv_fid) - sizeof (ushort_t))) { 1087 fidp->fid_len = sizeof (struct dv_fid) - sizeof (ushort_t); 1088 return (ENOSPC); 1089 } 1090 1091 dv_fid = (struct dv_fid *)fidp; 1092 bzero(dv_fid, sizeof (struct dv_fid)); 1093 dv_fid->dvfid_len = (int)sizeof (struct dv_fid) - sizeof (ushort_t); 1094 dv_fid->dvfid_ino = dv->dv_ino; 1095 /* dv_fid->dvfid_gen = dv->tn_gen; XXX ? */ 1096 1097 return (0); 1098 } 1099 1100 /* 1101 * This pair of routines bracket all VOP_READ, VOP_WRITE 1102 * and VOP_READDIR requests. The contents lock stops things 1103 * moving around while we're looking at them. 1104 * 1105 * Also used by file and record locking. 1106 */ 1107 /*ARGSUSED2*/ 1108 static int 1109 devfs_rwlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1110 { 1111 dcmn_err2(("devfs_rwlock %s\n", VTODV(vp)->dv_name)); 1112 rw_enter(&VTODV(vp)->dv_contents, write_flag ? RW_WRITER : RW_READER); 1113 return (write_flag); 1114 } 1115 1116 /*ARGSUSED1*/ 1117 static void 1118 devfs_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1119 { 1120 dcmn_err2(("devfs_rwunlock %s\n", VTODV(vp)->dv_name)); 1121 rw_exit(&VTODV(vp)->dv_contents); 1122 } 1123 1124 /* 1125 * XXX Should probably do a better job of computing the maximum 1126 * offset available in the directory. 1127 */ 1128 /*ARGSUSED1*/ 1129 static int 1130 devfs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 1131 caller_context_t *ct) 1132 { 1133 ASSERT(vp->v_type == VDIR); 1134 dcmn_err2(("devfs_seek %s\n", VTODV(vp)->dv_name)); 1135 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1136 } 1137 1138 vnodeops_t *dv_vnodeops; 1139 1140 const fs_operation_def_t dv_vnodeops_template[] = { 1141 VOPNAME_OPEN, { .vop_open = devfs_open }, 1142 VOPNAME_CLOSE, { .vop_close = devfs_close }, 1143 VOPNAME_READ, { .vop_read = devfs_read }, 1144 VOPNAME_WRITE, { .vop_write = devfs_write }, 1145 VOPNAME_IOCTL, { .vop_ioctl = devfs_ioctl }, 1146 VOPNAME_GETATTR, { .vop_getattr = devfs_getattr }, 1147 VOPNAME_SETATTR, { .vop_setattr = devfs_setattr }, 1148 VOPNAME_ACCESS, { .vop_access = devfs_access }, 1149 VOPNAME_LOOKUP, { .vop_lookup = devfs_lookup }, 1150 VOPNAME_CREATE, { .vop_create = devfs_create }, 1151 VOPNAME_READDIR, { .vop_readdir = devfs_readdir }, 1152 VOPNAME_FSYNC, { .vop_fsync = devfs_fsync }, 1153 VOPNAME_INACTIVE, { .vop_inactive = devfs_inactive }, 1154 VOPNAME_FID, { .vop_fid = devfs_fid }, 1155 VOPNAME_RWLOCK, { .vop_rwlock = devfs_rwlock }, 1156 VOPNAME_RWUNLOCK, { .vop_rwunlock = devfs_rwunlock }, 1157 VOPNAME_SEEK, { .vop_seek = devfs_seek }, 1158 VOPNAME_PATHCONF, { .vop_pathconf = devfs_pathconf }, 1159 VOPNAME_DISPOSE, { .error = fs_error }, 1160 VOPNAME_SETSECATTR, { .vop_setsecattr = devfs_setsecattr }, 1161 VOPNAME_GETSECATTR, { .vop_getsecattr = devfs_getsecattr }, 1162 NULL, NULL 1163 }; 1164