1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * vnode ops for the devfs 28 * 29 * For leaf vnode special files (VCHR|VBLK) specfs will always see the VOP 30 * first because dv_find always performs leaf vnode substitution, returning 31 * a specfs vnode with an s_realvp pointing to the devfs leaf vnode. This 32 * means that the only leaf special file VOP operations that devfs will see 33 * after VOP_LOOKUP are the ones that specfs forwards. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/param.h> 38 #include <sys/t_lock.h> 39 #include <sys/systm.h> 40 #include <sys/sysmacros.h> 41 #include <sys/user.h> 42 #include <sys/time.h> 43 #include <sys/vfs.h> 44 #include <sys/vnode.h> 45 #include <sys/vfs_opreg.h> 46 #include <sys/file.h> 47 #include <sys/fcntl.h> 48 #include <sys/flock.h> 49 #include <sys/kmem.h> 50 #include <sys/uio.h> 51 #include <sys/errno.h> 52 #include <sys/stat.h> 53 #include <sys/cred.h> 54 #include <sys/dirent.h> 55 #include <sys/pathname.h> 56 #include <sys/cmn_err.h> 57 #include <sys/debug.h> 58 #include <sys/policy.h> 59 #include <sys/modctl.h> 60 #include <sys/sunndi.h> 61 #include <fs/fs_subr.h> 62 #include <sys/fs/dv_node.h> 63 64 extern struct vattr dv_vattr_dir, dv_vattr_file; 65 extern dev_t rconsdev; 66 67 /* 68 * Open of devices (leaf nodes) is handled by specfs. 69 * There is nothing to do to open a directory 70 */ 71 /*ARGSUSED*/ 72 static int 73 devfs_open(struct vnode **vpp, int flag, struct cred *cred, 74 caller_context_t *ct) 75 { 76 struct dv_node *dv = VTODV(*vpp); 77 78 dcmn_err2(("devfs_open %s\n", dv->dv_name)); 79 ASSERT((*vpp)->v_type == VDIR); 80 return (0); 81 } 82 83 /* 84 * Close of devices (leaf nodes) is handled by specfs. 85 * There is nothing much to do inorder to close a directory. 86 */ 87 /*ARGSUSED1*/ 88 static int 89 devfs_close(struct vnode *vp, int flag, int count, 90 offset_t offset, struct cred *cred, caller_context_t *ct) 91 { 92 struct dv_node *dv = VTODV(vp); 93 94 dcmn_err2(("devfs_close %s\n", dv->dv_name)); 95 ASSERT(vp->v_type == VDIR); 96 97 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 98 cleanshares(vp, ttoproc(curthread)->p_pid); 99 return (0); 100 } 101 102 /* 103 * Read of devices (leaf nodes) is handled by specfs. 104 * Read of directories is not supported. 105 */ 106 /*ARGSUSED*/ 107 static int 108 devfs_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 109 struct caller_context *ct) 110 { 111 dcmn_err2(("devfs_read %s\n", VTODV(vp)->dv_name)); 112 ASSERT(vp->v_type == VDIR); 113 ASSERT(RW_READ_HELD(&VTODV(vp)->dv_contents)); 114 return (EISDIR); 115 } 116 117 /* 118 * Write of devices (leaf nodes) is handled by specfs. 119 * Write of directories is not supported. 120 */ 121 /*ARGSUSED*/ 122 static int 123 devfs_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 124 struct caller_context *ct) 125 { 126 dcmn_err2(("devfs_write %s\n", VTODV(vp)->dv_name)); 127 ASSERT(vp->v_type == VDIR); 128 ASSERT(RW_WRITE_HELD(&VTODV(vp)->dv_contents)); 129 return (EISDIR); 130 } 131 132 /* 133 * Ioctls to device (leaf nodes) is handled by specfs. 134 * Ioctl to directories is not supported. 135 */ 136 /*ARGSUSED*/ 137 static int 138 devfs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 139 struct cred *cred, int *rvalp, caller_context_t *ct) 140 { 141 dcmn_err2(("devfs_ioctl %s\n", VTODV(vp)->dv_name)); 142 ASSERT(vp->v_type == VDIR); 143 144 return (ENOTTY); /* no ioctls supported */ 145 } 146 147 /* 148 * We can be asked directly about the attributes of directories, or 149 * (via sp->s_realvp) about the filesystem attributes of special files. 150 * 151 * For directories, we just believe the attribute store 152 * though we mangle the nodeid, fsid, and rdev to convince userland we 153 * really are a different filesystem. 154 * 155 * For special files, a little more fakery is required. 156 * 157 * If the attribute store is not there (read only root), we believe our 158 * memory based attributes. 159 */ 160 static int 161 devfs_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr, 162 caller_context_t *ct) 163 { 164 struct dv_node *dv = VTODV(vp); 165 int error = 0; 166 uint_t mask; 167 168 /* 169 * Message goes to console only. Otherwise, the message 170 * causes devfs_getattr to be invoked again... infinite loop 171 */ 172 dcmn_err2(("?devfs_getattr %s\n", dv->dv_name)); 173 ASSERT(dv->dv_attr || dv->dv_attrvp); 174 175 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 176 cmn_err(CE_WARN, /* panic ? */ 177 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 178 return (ENOENT); 179 } 180 181 rw_enter(&dv->dv_contents, RW_READER); 182 if (dv->dv_attr) { 183 /* 184 * obtain from the memory version of attribute. 185 * preserve mask for those that optimize. 186 * devfs specific fields are already merged on creation. 187 */ 188 mask = vap->va_mask; 189 *vap = *dv->dv_attr; 190 vap->va_mask = mask; 191 } else { 192 /* obtain from attribute store and merge */ 193 error = VOP_GETATTR(dv->dv_attrvp, vap, flags, cr, ct); 194 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error)); 195 dv_vattr_merge(dv, vap); 196 } 197 rw_exit(&dv->dv_contents); 198 199 /* 200 * Restrict the permissions of the node fronting the console 201 * to 0600 with root as the owner. This prevents a non-root 202 * user from gaining access to a serial terminal (like /dev/term/a) 203 * which is in reality serving as the console device (/dev/console). 204 */ 205 if (vp->v_rdev == rconsdev) { 206 mode_t rconsmask = S_IXUSR|S_IRWXG|S_IRWXO; 207 vap->va_mode &= (~rconsmask); 208 vap->va_uid = 0; 209 } 210 211 return (error); 212 } 213 214 static int devfs_unlocked_access(void *, int, struct cred *); 215 216 /*ARGSUSED4*/ 217 static int 218 devfs_setattr_dir( 219 struct dv_node *dv, 220 struct vnode *vp, 221 struct vattr *vap, 222 int flags, 223 struct cred *cr) 224 { 225 struct vattr *map; 226 uint_t mask; 227 int error = 0; 228 struct vattr vattr; 229 230 ASSERT(dv->dv_attr || dv->dv_attrvp); 231 232 ASSERT(vp->v_type == VDIR); 233 ASSERT((dv->dv_flags & DV_NO_FSPERM) == 0); 234 235 if (vap->va_mask & AT_NOSET) 236 return (EINVAL); 237 238 /* to ensure consistency, single thread setting of attributes */ 239 rw_enter(&dv->dv_contents, RW_WRITER); 240 241 again: if (dv->dv_attr) { 242 243 error = secpolicy_vnode_setattr(cr, vp, vap, 244 dv->dv_attr, flags, devfs_unlocked_access, dv); 245 246 if (error) 247 goto out; 248 249 /* 250 * Apply changes to the memory based attribute. This code 251 * is modeled after the tmpfs implementation of memory 252 * based vnodes 253 */ 254 map = dv->dv_attr; 255 mask = vap->va_mask; 256 257 /* Change file access modes. */ 258 if (mask & AT_MODE) { 259 map->va_mode &= S_IFMT; 260 map->va_mode |= vap->va_mode & ~S_IFMT; 261 } 262 if (mask & AT_UID) 263 map->va_uid = vap->va_uid; 264 if (mask & AT_GID) 265 map->va_gid = vap->va_gid; 266 if (mask & AT_ATIME) 267 map->va_atime = vap->va_atime; 268 if (mask & AT_MTIME) 269 map->va_mtime = vap->va_mtime; 270 271 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) 272 gethrestime(&map->va_ctime); 273 } else { 274 /* use the backing attribute store */ 275 ASSERT(dv->dv_attrvp); 276 277 /* 278 * See if we are changing something we care about 279 * the persistence of - return success if we don't care. 280 */ 281 if (vap->va_mask & (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) { 282 /* Set the attributes */ 283 error = VOP_SETATTR(dv->dv_attrvp, 284 vap, flags, cr, NULL); 285 dsysdebug(error, 286 ("vop_setattr %s %d\n", dv->dv_name, error)); 287 288 /* 289 * Some file systems may return EROFS for a setattr 290 * on a readonly file system. In this case we create 291 * our own memory based attribute. 292 */ 293 if (error == EROFS) { 294 /* 295 * obtain attributes from existing file 296 * that we will modify and switch to memory 297 * based attribute until attribute store is 298 * read/write. 299 */ 300 vattr = dv_vattr_dir; 301 if (VOP_GETATTR(dv->dv_attrvp, 302 &vattr, flags, cr, NULL) == 0) { 303 dv->dv_attr = kmem_alloc( 304 sizeof (struct vattr), KM_SLEEP); 305 *dv->dv_attr = vattr; 306 dv_vattr_merge(dv, dv->dv_attr); 307 goto again; 308 } 309 } 310 } 311 } 312 out: 313 rw_exit(&dv->dv_contents); 314 return (error); 315 } 316 317 318 /* 319 * Compare the uid/gid/mode changes requested for a setattr 320 * operation with the same details of a node's default minor 321 * perm information. Return 0 if identical. 322 */ 323 static int 324 dv_setattr_cmp(struct vattr *map, mperm_t *mp) 325 { 326 if ((map->va_mode & S_IAMB) != (mp->mp_mode & S_IAMB)) 327 return (1); 328 if (map->va_uid != mp->mp_uid) 329 return (1); 330 if (map->va_gid != mp->mp_gid) 331 return (1); 332 return (0); 333 } 334 335 336 /*ARGSUSED4*/ 337 static int 338 devfs_setattr( 339 struct vnode *vp, 340 struct vattr *vap, 341 int flags, 342 struct cred *cr, 343 caller_context_t *ct) 344 { 345 struct dv_node *dv = VTODV(vp); 346 struct dv_node *ddv; 347 struct vnode *dvp; 348 struct vattr *map; 349 uint_t mask; 350 int error = 0; 351 struct vattr *free_vattr = NULL; 352 struct vattr *vattrp = NULL; 353 mperm_t mp; 354 int persist; 355 356 /* 357 * Message goes to console only. Otherwise, the message 358 * causes devfs_getattr to be invoked again... infinite loop 359 */ 360 dcmn_err2(("?devfs_setattr %s\n", dv->dv_name)); 361 ASSERT(dv->dv_attr || dv->dv_attrvp); 362 363 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 364 cmn_err(CE_WARN, /* panic ? */ 365 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 366 return (ENOENT); 367 } 368 369 if (vap->va_mask & AT_NOSET) 370 return (EINVAL); 371 372 /* 373 * If we are changing something we don't care about 374 * the persistence of, return success. 375 */ 376 if ((vap->va_mask & 377 (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) == 0) 378 return (0); 379 380 /* 381 * If driver overrides fs perm, disallow chmod 382 * and do not create attribute nodes. 383 */ 384 if (dv->dv_flags & DV_NO_FSPERM) { 385 ASSERT(dv->dv_attr); 386 if (vap->va_mask & (AT_MODE | AT_UID | AT_GID)) 387 return (EPERM); 388 if ((vap->va_mask & (AT_ATIME|AT_MTIME)) == 0) 389 return (0); 390 rw_enter(&dv->dv_contents, RW_WRITER); 391 if (vap->va_mask & AT_ATIME) 392 dv->dv_attr->va_atime = vap->va_atime; 393 if (vap->va_mask & AT_MTIME) 394 dv->dv_attr->va_mtime = vap->va_mtime; 395 rw_exit(&dv->dv_contents); 396 return (0); 397 } 398 399 /* 400 * Directories are always created but device nodes are 401 * only used to persist non-default permissions. 402 */ 403 if (vp->v_type == VDIR) { 404 ASSERT(dv->dv_attr || dv->dv_attrvp); 405 return (devfs_setattr_dir(dv, vp, vap, flags, cr)); 406 } 407 408 /* 409 * Allocate now before we take any locks 410 */ 411 vattrp = kmem_zalloc(sizeof (*vattrp), KM_SLEEP); 412 413 /* to ensure consistency, single thread setting of attributes */ 414 rw_enter(&dv->dv_contents, RW_WRITER); 415 416 /* 417 * We don't need to create an attribute node 418 * to persist access or modification times. 419 */ 420 persist = (vap->va_mask & (AT_MODE | AT_UID | AT_GID)); 421 422 /* 423 * If persisting something, get the default permissions 424 * for this minor to compare against what the attributes 425 * are now being set to. Default ordering is: 426 * - minor_perm match for this minor 427 * - mode supplied by ddi_create_priv_minor_node 428 * - devfs defaults 429 */ 430 if (persist) { 431 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) != 0) { 432 mp.mp_uid = dv_vattr_file.va_uid; 433 mp.mp_gid = dv_vattr_file.va_gid; 434 mp.mp_mode = dv_vattr_file.va_mode; 435 if (dv->dv_flags & DV_DFLT_MODE) { 436 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 437 mp.mp_mode &= ~S_IAMB; 438 mp.mp_mode |= dv->dv_dflt_mode; 439 dcmn_err5(("%s: setattr priv default 0%o\n", 440 dv->dv_name, mp.mp_mode)); 441 } else { 442 dcmn_err5(("%s: setattr devfs default 0%o\n", 443 dv->dv_name, mp.mp_mode)); 444 } 445 } else { 446 dcmn_err5(("%s: setattr minor perm default 0%o\n", 447 dv->dv_name, mp.mp_mode)); 448 } 449 } 450 451 /* 452 * If we don't have a vattr for this node, construct one. 453 */ 454 if (dv->dv_attr) { 455 free_vattr = vattrp; 456 vattrp = NULL; 457 } else { 458 ASSERT(dv->dv_attrvp); 459 ASSERT(vp->v_type != VDIR); 460 *vattrp = dv_vattr_file; 461 error = VOP_GETATTR(dv->dv_attrvp, vattrp, 0, cr, ct); 462 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error)); 463 if (error) 464 goto out; 465 dv->dv_attr = vattrp; 466 dv_vattr_merge(dv, dv->dv_attr); 467 vattrp = NULL; 468 } 469 470 error = secpolicy_vnode_setattr(cr, vp, vap, dv->dv_attr, 471 flags, devfs_unlocked_access, dv); 472 if (error) { 473 dsysdebug(error, ("devfs_setattr %s secpolicy error %d\n", 474 dv->dv_name, error)); 475 goto out; 476 } 477 478 /* 479 * Apply changes to the memory based attribute. This code 480 * is modeled after the tmpfs implementation of memory 481 * based vnodes 482 */ 483 map = dv->dv_attr; 484 mask = vap->va_mask; 485 486 /* Change file access modes. */ 487 if (mask & AT_MODE) { 488 map->va_mode &= S_IFMT; 489 map->va_mode |= vap->va_mode & ~S_IFMT; 490 } 491 if (mask & AT_UID) 492 map->va_uid = vap->va_uid; 493 if (mask & AT_GID) 494 map->va_gid = vap->va_gid; 495 if (mask & AT_ATIME) 496 map->va_atime = vap->va_atime; 497 if (mask & AT_MTIME) 498 map->va_mtime = vap->va_mtime; 499 500 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) { 501 gethrestime(&map->va_ctime); 502 } 503 504 /* 505 * A setattr to defaults means we no longer need the 506 * shadow node as a persistent store, unless there 507 * are ACLs. Otherwise create a shadow node if one 508 * doesn't exist yet. 509 */ 510 if (persist) { 511 if ((dv_setattr_cmp(map, &mp) == 0) && 512 ((dv->dv_flags & DV_ACL) == 0)) { 513 514 if (dv->dv_attrvp) { 515 ddv = dv->dv_dotdot; 516 ASSERT(ddv->dv_attrvp); 517 error = VOP_REMOVE(ddv->dv_attrvp, 518 dv->dv_name, cr, ct, 0); 519 dsysdebug(error, 520 ("vop_remove %s %s %d\n", 521 ddv->dv_name, dv->dv_name, error)); 522 523 if (error == EROFS) 524 error = 0; 525 VN_RELE(dv->dv_attrvp); 526 dv->dv_attrvp = NULL; 527 } 528 ASSERT(dv->dv_attr); 529 } else { 530 if (mask & AT_MODE) 531 dcmn_err5(("%s persisting mode 0%o\n", 532 dv->dv_name, vap->va_mode)); 533 if (mask & AT_UID) 534 dcmn_err5(("%s persisting uid %d\n", 535 dv->dv_name, vap->va_uid)); 536 if (mask & AT_GID) 537 dcmn_err5(("%s persisting gid %d\n", 538 dv->dv_name, vap->va_gid)); 539 540 if (dv->dv_attrvp == NULL) { 541 dvp = DVTOV(dv->dv_dotdot); 542 dv_shadow_node(dvp, dv->dv_name, vp, 543 NULL, NULLVP, cr, 544 DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 545 } 546 if (dv->dv_attrvp) { 547 /* If map still valid do TIME for free. */ 548 if (dv->dv_attr == map) { 549 mask = map->va_mask; 550 map->va_mask = 551 vap->va_mask | AT_ATIME | AT_MTIME; 552 error = VOP_SETATTR(dv->dv_attrvp, map, 553 flags, cr, NULL); 554 map->va_mask = mask; 555 } else { 556 error = VOP_SETATTR(dv->dv_attrvp, 557 vap, flags, cr, NULL); 558 } 559 dsysdebug(error, ("vop_setattr %s %d\n", 560 dv->dv_name, error)); 561 } 562 /* 563 * Some file systems may return EROFS for a setattr 564 * on a readonly file system. In this case save 565 * as our own memory based attribute. 566 * NOTE: ufs is NOT one of these (see ufs_iupdat). 567 */ 568 if (dv->dv_attr && dv->dv_attrvp && error == 0) { 569 vattrp = dv->dv_attr; 570 dv->dv_attr = NULL; 571 } else if (error == EROFS) 572 error = 0; 573 } 574 } 575 576 out: 577 rw_exit(&dv->dv_contents); 578 579 if (vattrp) 580 kmem_free(vattrp, sizeof (*vattrp)); 581 if (free_vattr) 582 kmem_free(free_vattr, sizeof (*free_vattr)); 583 return (error); 584 } 585 586 static int 587 devfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 588 caller_context_t *ct) 589 { 590 switch (cmd) { 591 case _PC_ACL_ENABLED: 592 /* 593 * We rely on the underlying filesystem for ACLs, 594 * so direct the query for ACL support there. 595 * ACL support isn't relative to the file 596 * and we can't guarantee that the dv node 597 * has an attribute node, so any valid 598 * attribute node will suffice. 599 */ 600 ASSERT(dvroot); 601 ASSERT(dvroot->dv_attrvp); 602 return (VOP_PATHCONF(dvroot->dv_attrvp, cmd, valp, cr, ct)); 603 /*NOTREACHED*/ 604 } 605 606 return (fs_pathconf(vp, cmd, valp, cr, ct)); 607 } 608 609 /* 610 * Let avp handle security attributes (acl's). 611 */ 612 static int 613 devfs_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 614 struct cred *cr, caller_context_t *ct) 615 { 616 dvnode_t *dv = VTODV(vp); 617 struct vnode *avp; 618 int error; 619 620 dcmn_err2(("devfs_getsecattr %s\n", dv->dv_name)); 621 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 622 623 rw_enter(&dv->dv_contents, RW_READER); 624 625 avp = dv->dv_attrvp; 626 627 /* fabricate the acl */ 628 if (avp == NULL) { 629 error = fs_fab_acl(vp, vsap, flags, cr, ct); 630 rw_exit(&dv->dv_contents); 631 return (error); 632 } 633 634 error = VOP_GETSECATTR(avp, vsap, flags, cr, ct); 635 dsysdebug(error, ("vop_getsecattr %s %d\n", VTODV(vp)->dv_name, error)); 636 rw_exit(&dv->dv_contents); 637 return (error); 638 } 639 640 /* 641 * Set security attributes (acl's) 642 * 643 * Note that the dv_contents lock has already been acquired 644 * by the caller's VOP_RWLOCK. 645 */ 646 static int 647 devfs_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 648 struct cred *cr, caller_context_t *ct) 649 { 650 dvnode_t *dv = VTODV(vp); 651 struct vnode *avp; 652 int error; 653 654 dcmn_err2(("devfs_setsecattr %s\n", dv->dv_name)); 655 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 656 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 657 658 /* 659 * Not a supported operation on drivers not providing 660 * file system based permissions. 661 */ 662 if (dv->dv_flags & DV_NO_FSPERM) 663 return (ENOTSUP); 664 665 /* 666 * To complete, the setsecattr requires an underlying attribute node. 667 */ 668 if (dv->dv_attrvp == NULL) { 669 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 670 dv_shadow_node(DVTOV(dv->dv_dotdot), dv->dv_name, vp, 671 NULL, NULLVP, cr, DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 672 } 673 674 if ((avp = dv->dv_attrvp) == NULL) { 675 dcmn_err2(("devfs_setsecattr %s: " 676 "cannot construct attribute node\n", dv->dv_name)); 677 return (fs_nosys()); 678 } 679 680 /* 681 * The acl(2) system call issues a VOP_RWLOCK before setting an ACL. 682 * Since backing file systems expect the lock to be held before seeing 683 * a VOP_SETSECATTR ACL, we need to issue the VOP_RWLOCK to the backing 684 * store before forwarding the ACL. 685 */ 686 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, NULL); 687 error = VOP_SETSECATTR(avp, vsap, flags, cr, ct); 688 dsysdebug(error, ("vop_setsecattr %s %d\n", VTODV(vp)->dv_name, error)); 689 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, NULL); 690 691 /* 692 * Set DV_ACL if we have a non-trivial set of ACLs. It is not 693 * necessary to hold VOP_RWLOCK since fs_acl_nontrivial only does 694 * VOP_GETSECATTR calls. 695 */ 696 if (fs_acl_nontrivial(avp, cr)) 697 dv->dv_flags |= DV_ACL; 698 return (error); 699 } 700 701 /* 702 * This function is used for secpolicy_setattr(). It must call an 703 * access() like function while it is already holding the 704 * dv_contents lock. We only care about this when dv_attr != NULL; 705 * so the unlocked access call only concerns itself with that 706 * particular branch of devfs_access(). 707 */ 708 static int 709 devfs_unlocked_access(void *vdv, int mode, struct cred *cr) 710 { 711 struct dv_node *dv = vdv; 712 int shift = 0; 713 uid_t owner = dv->dv_attr->va_uid; 714 715 /* Check access based on owner, group and public permissions. */ 716 if (crgetuid(cr) != owner) { 717 shift += 3; 718 if (groupmember(dv->dv_attr->va_gid, cr) == 0) 719 shift += 3; 720 } 721 722 /* compute missing mode bits */ 723 mode &= ~(dv->dv_attr->va_mode << shift); 724 725 if (mode == 0) 726 return (0); 727 728 return (secpolicy_vnode_access(cr, DVTOV(dv), owner, mode)); 729 } 730 731 static int 732 devfs_access(struct vnode *vp, int mode, int flags, struct cred *cr, 733 caller_context_t *ct) 734 { 735 struct dv_node *dv = VTODV(vp); 736 int res; 737 738 dcmn_err2(("devfs_access %s\n", dv->dv_name)); 739 ASSERT(dv->dv_attr || dv->dv_attrvp); 740 741 /* restrict console access to privileged processes */ 742 if ((vp->v_rdev == rconsdev) && secpolicy_console(cr) != 0) { 743 return (EACCES); 744 } 745 746 rw_enter(&dv->dv_contents, RW_READER); 747 if (dv->dv_attr && ((dv->dv_flags & DV_ACL) == 0)) { 748 res = devfs_unlocked_access(dv, mode, cr); 749 } else { 750 res = VOP_ACCESS(dv->dv_attrvp, mode, flags, cr, ct); 751 } 752 rw_exit(&dv->dv_contents); 753 return (res); 754 } 755 756 /* 757 * Lookup 758 * 759 * Given the directory vnode and the name of the component, return 760 * the corresponding held vnode for that component. 761 * 762 * Of course in these fictional filesystems, nothing's ever quite 763 * -that- simple. 764 * 765 * devfs name type shadow (fs attributes) type comments 766 * ------------------------------------------------------------------------- 767 * drv[@addr] VDIR drv[@addr] VDIR nexus driver 768 * drv[@addr]:m VCHR/VBLK drv[@addr]:m VREG leaf driver 769 * drv[@addr] VCHR/VBLK drv[@addr]:.default VREG leaf driver 770 * ------------------------------------------------------------------------- 771 * 772 * The following names are reserved for the attribute filesystem (which 773 * could easily be another layer on top of this one - we simply need to 774 * hold the vnode of the thing we're looking at) 775 * 776 * attr name type shadow (fs attributes) type comments 777 * ------------------------------------------------------------------------- 778 * drv[@addr] VDIR - - attribute dir 779 * minorname VDIR - - minorname 780 * attribute VREG - - attribute 781 * ------------------------------------------------------------------------- 782 * 783 * Examples: 784 * 785 * devfs:/devices/.../mm@0:zero VCHR 786 * shadow:/.devices/.../mm@0:zero VREG, fs attrs 787 * devfs:/devices/.../mm@0:/zero/attr VREG, driver attribute 788 * 789 * devfs:/devices/.../sd@0,0:a VBLK 790 * shadow:/.devices/.../sd@0,0:a VREG, fs attrs 791 * devfs:/devices/.../sd@0,0:/a/.type VREG, "ddi_block:chan" 792 * 793 * devfs:/devices/.../mm@0 VCHR 794 * shadow:/.devices/.../mm@0:.default VREG, fs attrs 795 * devfs:/devices/.../mm@0:/.default/attr VREG, driver attribute 796 * devfs:/devices/.../mm@0:/.default/.type VREG, "ddi_pseudo" 797 * 798 * devfs:/devices/.../obio VDIR 799 * shadow:/devices/.../obio VDIR, needed for fs attrs. 800 * devfs:/devices/.../obio:/.default/attr VDIR, driver attribute 801 * 802 * We also need to be able deal with "old" devices that have gone away, 803 * though I think that provided we return them with readdir, they can 804 * be removed (i.e. they don't have to respond to lookup, though it might 805 * be weird if they didn't ;-) 806 * 807 * Lookup has side-effects. 808 * 809 * - It will create directories and fs attribute files in the shadow hierarchy. 810 * - It should cause non-SID devices to be probed (ask the parent nexi). 811 */ 812 /*ARGSUSED3*/ 813 static int 814 devfs_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 815 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, 816 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 817 { 818 ASSERT(dvp->v_type == VDIR); 819 dcmn_err2(("devfs_lookup: %s\n", nm)); 820 return (dv_find(VTODV(dvp), nm, vpp, pnp, rdir, cred, 0)); 821 } 822 823 /* 824 * devfs nodes can't really be created directly by userland - however, 825 * we do allow creates to find existing nodes: 826 * 827 * - any create fails if the node doesn't exist - EROFS. 828 * - creating an existing directory read-only succeeds, otherwise EISDIR. 829 * - exclusive creates fail if the node already exists - EEXIST. 830 * - failure to create the snode for an existing device - ENOSYS. 831 */ 832 /*ARGSUSED2*/ 833 static int 834 devfs_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 835 int mode, struct vnode **vpp, struct cred *cred, int flag, 836 caller_context_t *ct, vsecattr_t *vsecp) 837 { 838 int error; 839 struct vnode *vp; 840 841 dcmn_err2(("devfs_create %s\n", nm)); 842 error = dv_find(VTODV(dvp), nm, &vp, NULL, NULLVP, cred, 0); 843 if (error == 0) { 844 if (excl == EXCL) 845 error = EEXIST; 846 else if (vp->v_type == VDIR && (mode & VWRITE)) 847 error = EISDIR; 848 else 849 error = VOP_ACCESS(vp, mode, 0, cred, ct); 850 851 if (error) { 852 VN_RELE(vp); 853 } else 854 *vpp = vp; 855 } else if (error == ENOENT) 856 error = EROFS; 857 858 return (error); 859 } 860 861 /* 862 * If DV_BUILD is set, we call into nexus driver to do a BUS_CONFIG_ALL. 863 * Otherwise, simply return cached dv_node's. Hotplug code always call 864 * devfs_clean() to invalid the dv_node cache. 865 */ 866 /*ARGSUSED5*/ 867 static int 868 devfs_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp, 869 caller_context_t *ct, int flags) 870 { 871 struct dv_node *ddv, *dv; 872 struct dirent64 *de, *bufp; 873 offset_t diroff; 874 offset_t soff; 875 size_t reclen, movesz; 876 int error; 877 struct vattr va; 878 size_t bufsz; 879 880 ddv = VTODV(dvp); 881 dcmn_err2(("devfs_readdir %s: offset %lld len %ld\n", 882 ddv->dv_name, uiop->uio_loffset, uiop->uio_iov->iov_len)); 883 ASSERT(ddv->dv_attr || ddv->dv_attrvp); 884 ASSERT(RW_READ_HELD(&ddv->dv_contents)); 885 886 if (uiop->uio_loffset >= MAXOFF_T) { 887 if (eofp) 888 *eofp = 1; 889 return (0); 890 } 891 892 if (uiop->uio_iovcnt != 1) 893 return (EINVAL); 894 895 if (dvp->v_type != VDIR) 896 return (ENOTDIR); 897 898 /* Load the initial contents */ 899 if (ddv->dv_flags & DV_BUILD) { 900 if (!rw_tryupgrade(&ddv->dv_contents)) { 901 rw_exit(&ddv->dv_contents); 902 rw_enter(&ddv->dv_contents, RW_WRITER); 903 } 904 905 /* recheck and fill */ 906 if (ddv->dv_flags & DV_BUILD) 907 dv_filldir(ddv); 908 909 rw_downgrade(&ddv->dv_contents); 910 } 911 912 soff = uiop->uio_loffset; 913 bufsz = uiop->uio_iov->iov_len; 914 de = bufp = kmem_alloc(bufsz, KM_SLEEP); 915 movesz = 0; 916 dv = (struct dv_node *)-1; 917 918 /* 919 * Move as many entries into the uio structure as it will take. 920 * Special case "." and "..". 921 */ 922 diroff = 0; 923 if (soff == 0) { /* . */ 924 reclen = DIRENT64_RECLEN(strlen(".")); 925 if ((movesz + reclen) > bufsz) 926 goto full; 927 de->d_ino = (ino64_t)ddv->dv_ino; 928 de->d_off = (off64_t)diroff + 1; 929 de->d_reclen = (ushort_t)reclen; 930 931 /* use strncpy(9f) to zero out uninitialized bytes */ 932 933 (void) strncpy(de->d_name, ".", DIRENT64_NAMELEN(reclen)); 934 movesz += reclen; 935 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 936 dcmn_err3(("devfs_readdir: A: diroff %lld, soff %lld: '%s' " 937 "reclen %lu\n", diroff, soff, ".", reclen)); 938 } 939 940 diroff++; 941 if (soff <= 1) { /* .. */ 942 reclen = DIRENT64_RECLEN(strlen("..")); 943 if ((movesz + reclen) > bufsz) 944 goto full; 945 de->d_ino = (ino64_t)ddv->dv_dotdot->dv_ino; 946 de->d_off = (off64_t)diroff + 1; 947 de->d_reclen = (ushort_t)reclen; 948 949 /* use strncpy(9f) to zero out uninitialized bytes */ 950 951 (void) strncpy(de->d_name, "..", DIRENT64_NAMELEN(reclen)); 952 movesz += reclen; 953 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 954 dcmn_err3(("devfs_readdir: B: diroff %lld, soff %lld: '%s' " 955 "reclen %lu\n", diroff, soff, "..", reclen)); 956 } 957 958 diroff++; 959 for (dv = DV_FIRST_ENTRY(ddv); dv; 960 dv = DV_NEXT_ENTRY(ddv, dv), diroff++) { 961 /* skip entries until at correct directory offset */ 962 if (diroff < soff) 963 continue; 964 965 /* 966 * hidden nodes are skipped (but they still occupy a 967 * directory offset). 968 */ 969 if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi)) 970 continue; 971 972 /* 973 * DDM_INTERNAL_PATH minor nodes are skipped for readdirs 974 * outside the kernel (but they still occupy a directory 975 * offset). 976 */ 977 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) 978 continue; 979 980 reclen = DIRENT64_RECLEN(strlen(dv->dv_name)); 981 if ((movesz + reclen) > bufsz) { 982 dcmn_err3(("devfs_readdir: C: diroff " 983 "%lld, soff %lld: '%s' reclen %lu\n", 984 diroff, soff, dv->dv_name, reclen)); 985 goto full; 986 } 987 de->d_ino = (ino64_t)dv->dv_ino; 988 de->d_off = (off64_t)diroff + 1; 989 de->d_reclen = (ushort_t)reclen; 990 991 /* use strncpy(9f) to zero out uninitialized bytes */ 992 993 ASSERT(strlen(dv->dv_name) + 1 <= 994 DIRENT64_NAMELEN(reclen)); 995 (void) strncpy(de->d_name, dv->dv_name, 996 DIRENT64_NAMELEN(reclen)); 997 998 movesz += reclen; 999 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 1000 dcmn_err4(("devfs_readdir: D: diroff " 1001 "%lld, soff %lld: '%s' reclen %lu\n", diroff, soff, 1002 dv->dv_name, reclen)); 1003 } 1004 1005 /* the buffer is full, or we exhausted everything */ 1006 full: dcmn_err3(("devfs_readdir: moving %lu bytes: " 1007 "diroff %lld, soff %lld, dv %p\n", 1008 movesz, diroff, soff, (void *)dv)); 1009 1010 if ((movesz == 0) && dv) 1011 error = EINVAL; /* cannot be represented */ 1012 else { 1013 error = uiomove(bufp, movesz, UIO_READ, uiop); 1014 if (error == 0) { 1015 if (eofp) 1016 *eofp = dv ? 0 : 1; 1017 uiop->uio_loffset = diroff; 1018 } 1019 1020 va.va_mask = AT_ATIME; 1021 gethrestime(&va.va_atime); 1022 rw_exit(&ddv->dv_contents); 1023 (void) devfs_setattr(dvp, &va, 0, cred, ct); 1024 rw_enter(&ddv->dv_contents, RW_READER); 1025 } 1026 1027 kmem_free(bufp, bufsz); 1028 return (error); 1029 } 1030 1031 /*ARGSUSED*/ 1032 static int 1033 devfs_fsync(struct vnode *vp, int syncflag, struct cred *cred, 1034 caller_context_t *ct) 1035 { 1036 /* 1037 * Message goes to console only. Otherwise, the message 1038 * causes devfs_fsync to be invoked again... infinite loop 1039 */ 1040 dcmn_err2(("devfs_fsync %s\n", VTODV(vp)->dv_name)); 1041 return (0); 1042 } 1043 1044 /* 1045 * Normally, we leave the dv_node here at count of 0. 1046 * The node will be destroyed when dv_cleandir() is called. 1047 * 1048 * Stale dv_node's are already unlinked from the fs tree, 1049 * so dv_cleandir() won't find them. We destroy such nodes 1050 * immediately. 1051 */ 1052 /*ARGSUSED1*/ 1053 static void 1054 devfs_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1055 { 1056 int destroy; 1057 struct dv_node *dv = VTODV(vp); 1058 1059 dcmn_err2(("devfs_inactive: %s\n", dv->dv_name)); 1060 mutex_enter(&vp->v_lock); 1061 ASSERT(vp->v_count >= 1); 1062 --vp->v_count; 1063 destroy = (DV_STALE(dv) && vp->v_count == 0); 1064 mutex_exit(&vp->v_lock); 1065 1066 /* stale nodes cannot be rediscovered, destroy it here */ 1067 if (destroy) 1068 dv_destroy(dv, 0); 1069 } 1070 1071 /* 1072 * XXX Why do we need this? NFS mounted /dev directories? 1073 * XXX Talk to peter staubach about this. 1074 */ 1075 /*ARGSUSED2*/ 1076 static int 1077 devfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1078 { 1079 struct dv_node *dv = VTODV(vp); 1080 struct dv_fid *dv_fid; 1081 1082 if (fidp->fid_len < (sizeof (struct dv_fid) - sizeof (ushort_t))) { 1083 fidp->fid_len = sizeof (struct dv_fid) - sizeof (ushort_t); 1084 return (ENOSPC); 1085 } 1086 1087 dv_fid = (struct dv_fid *)fidp; 1088 bzero(dv_fid, sizeof (struct dv_fid)); 1089 dv_fid->dvfid_len = (int)sizeof (struct dv_fid) - sizeof (ushort_t); 1090 dv_fid->dvfid_ino = dv->dv_ino; 1091 /* dv_fid->dvfid_gen = dv->tn_gen; XXX ? */ 1092 1093 return (0); 1094 } 1095 1096 /* 1097 * This pair of routines bracket all VOP_READ, VOP_WRITE 1098 * and VOP_READDIR requests. The contents lock stops things 1099 * moving around while we're looking at them. 1100 * 1101 * Also used by file and record locking. 1102 */ 1103 /*ARGSUSED2*/ 1104 static int 1105 devfs_rwlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1106 { 1107 dcmn_err2(("devfs_rwlock %s\n", VTODV(vp)->dv_name)); 1108 rw_enter(&VTODV(vp)->dv_contents, write_flag ? RW_WRITER : RW_READER); 1109 return (write_flag); 1110 } 1111 1112 /*ARGSUSED1*/ 1113 static void 1114 devfs_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1115 { 1116 dcmn_err2(("devfs_rwunlock %s\n", VTODV(vp)->dv_name)); 1117 rw_exit(&VTODV(vp)->dv_contents); 1118 } 1119 1120 /* 1121 * XXX Should probably do a better job of computing the maximum 1122 * offset available in the directory. 1123 */ 1124 /*ARGSUSED1*/ 1125 static int 1126 devfs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 1127 caller_context_t *ct) 1128 { 1129 ASSERT(vp->v_type == VDIR); 1130 dcmn_err2(("devfs_seek %s\n", VTODV(vp)->dv_name)); 1131 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1132 } 1133 1134 vnodeops_t *dv_vnodeops; 1135 1136 const fs_operation_def_t dv_vnodeops_template[] = { 1137 VOPNAME_OPEN, { .vop_open = devfs_open }, 1138 VOPNAME_CLOSE, { .vop_close = devfs_close }, 1139 VOPNAME_READ, { .vop_read = devfs_read }, 1140 VOPNAME_WRITE, { .vop_write = devfs_write }, 1141 VOPNAME_IOCTL, { .vop_ioctl = devfs_ioctl }, 1142 VOPNAME_GETATTR, { .vop_getattr = devfs_getattr }, 1143 VOPNAME_SETATTR, { .vop_setattr = devfs_setattr }, 1144 VOPNAME_ACCESS, { .vop_access = devfs_access }, 1145 VOPNAME_LOOKUP, { .vop_lookup = devfs_lookup }, 1146 VOPNAME_CREATE, { .vop_create = devfs_create }, 1147 VOPNAME_READDIR, { .vop_readdir = devfs_readdir }, 1148 VOPNAME_FSYNC, { .vop_fsync = devfs_fsync }, 1149 VOPNAME_INACTIVE, { .vop_inactive = devfs_inactive }, 1150 VOPNAME_FID, { .vop_fid = devfs_fid }, 1151 VOPNAME_RWLOCK, { .vop_rwlock = devfs_rwlock }, 1152 VOPNAME_RWUNLOCK, { .vop_rwunlock = devfs_rwunlock }, 1153 VOPNAME_SEEK, { .vop_seek = devfs_seek }, 1154 VOPNAME_PATHCONF, { .vop_pathconf = devfs_pathconf }, 1155 VOPNAME_DISPOSE, { .error = fs_error }, 1156 VOPNAME_SETSECATTR, { .vop_setsecattr = devfs_setsecattr }, 1157 VOPNAME_GETSECATTR, { .vop_getsecattr = devfs_getsecattr }, 1158 NULL, NULL 1159 }; 1160