1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * vnode ops for the devfs 27 * 28 * For leaf vnode special files (VCHR|VBLK) specfs will always see the VOP 29 * first because dv_find always performs leaf vnode substitution, returning 30 * a specfs vnode with an s_realvp pointing to the devfs leaf vnode. This 31 * means that the only leaf special file VOP operations that devfs will see 32 * after VOP_LOOKUP are the ones that specfs forwards. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/param.h> 37 #include <sys/t_lock.h> 38 #include <sys/systm.h> 39 #include <sys/sysmacros.h> 40 #include <sys/user.h> 41 #include <sys/time.h> 42 #include <sys/vfs.h> 43 #include <sys/vnode.h> 44 #include <sys/vfs_opreg.h> 45 #include <sys/file.h> 46 #include <sys/fcntl.h> 47 #include <sys/flock.h> 48 #include <sys/kmem.h> 49 #include <sys/uio.h> 50 #include <sys/errno.h> 51 #include <sys/stat.h> 52 #include <sys/cred.h> 53 #include <sys/dirent.h> 54 #include <sys/pathname.h> 55 #include <sys/cmn_err.h> 56 #include <sys/debug.h> 57 #include <sys/policy.h> 58 #include <sys/modctl.h> 59 #include <sys/sunndi.h> 60 #include <fs/fs_subr.h> 61 #include <sys/fs/dv_node.h> 62 63 extern struct vattr dv_vattr_dir, dv_vattr_file; 64 extern dev_t rconsdev; 65 66 /* 67 * Open of devices (leaf nodes) is handled by specfs. 68 * There is nothing to do to open a directory 69 */ 70 /*ARGSUSED*/ 71 static int 72 devfs_open(struct vnode **vpp, int flag, struct cred *cred, 73 caller_context_t *ct) 74 { 75 struct dv_node *dv = VTODV(*vpp); 76 77 dcmn_err2(("devfs_open %s\n", dv->dv_name)); 78 ASSERT((*vpp)->v_type == VDIR); 79 return (0); 80 } 81 82 /* 83 * Close of devices (leaf nodes) is handled by specfs. 84 * There is nothing much to do inorder to close a directory. 85 */ 86 /*ARGSUSED1*/ 87 static int 88 devfs_close(struct vnode *vp, int flag, int count, 89 offset_t offset, struct cred *cred, caller_context_t *ct) 90 { 91 struct dv_node *dv = VTODV(vp); 92 93 dcmn_err2(("devfs_close %s\n", dv->dv_name)); 94 ASSERT(vp->v_type == VDIR); 95 96 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 97 cleanshares(vp, ttoproc(curthread)->p_pid); 98 return (0); 99 } 100 101 /* 102 * Read of devices (leaf nodes) is handled by specfs. 103 * Read of directories is not supported. 104 */ 105 /*ARGSUSED*/ 106 static int 107 devfs_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 108 struct caller_context *ct) 109 { 110 dcmn_err2(("devfs_read %s\n", VTODV(vp)->dv_name)); 111 ASSERT(vp->v_type == VDIR); 112 ASSERT(RW_READ_HELD(&VTODV(vp)->dv_contents)); 113 return (EISDIR); 114 } 115 116 /* 117 * Write of devices (leaf nodes) is handled by specfs. 118 * Write of directories is not supported. 119 */ 120 /*ARGSUSED*/ 121 static int 122 devfs_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 123 struct caller_context *ct) 124 { 125 dcmn_err2(("devfs_write %s\n", VTODV(vp)->dv_name)); 126 ASSERT(vp->v_type == VDIR); 127 ASSERT(RW_WRITE_HELD(&VTODV(vp)->dv_contents)); 128 return (EISDIR); 129 } 130 131 /* 132 * Ioctls to device (leaf nodes) is handled by specfs. 133 * Ioctl to directories is not supported. 134 */ 135 /*ARGSUSED*/ 136 static int 137 devfs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 138 struct cred *cred, int *rvalp, caller_context_t *ct) 139 { 140 dcmn_err2(("devfs_ioctl %s\n", VTODV(vp)->dv_name)); 141 ASSERT(vp->v_type == VDIR); 142 143 return (ENOTTY); /* no ioctls supported */ 144 } 145 146 /* 147 * We can be asked directly about the attributes of directories, or 148 * (via sp->s_realvp) about the filesystem attributes of special files. 149 * 150 * For directories, we just believe the attribute store 151 * though we mangle the nodeid, fsid, and rdev to convince userland we 152 * really are a different filesystem. 153 * 154 * For special files, a little more fakery is required. 155 * 156 * If the attribute store is not there (read only root), we believe our 157 * memory based attributes. 158 */ 159 static int 160 devfs_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr, 161 caller_context_t *ct) 162 { 163 struct dv_node *dv = VTODV(vp); 164 int error = 0; 165 uint_t mask; 166 167 /* 168 * Message goes to console only. Otherwise, the message 169 * causes devfs_getattr to be invoked again... infinite loop 170 */ 171 dcmn_err2(("?devfs_getattr %s\n", dv->dv_name)); 172 ASSERT(dv->dv_attr || dv->dv_attrvp); 173 174 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 175 cmn_err(CE_WARN, /* panic ? */ 176 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 177 return (ENOENT); 178 } 179 180 rw_enter(&dv->dv_contents, RW_READER); 181 if (dv->dv_attr) { 182 /* 183 * obtain from the memory version of attribute. 184 * preserve mask for those that optimize. 185 * devfs specific fields are already merged on creation. 186 */ 187 mask = vap->va_mask; 188 *vap = *dv->dv_attr; 189 vap->va_mask = mask; 190 } else { 191 /* obtain from attribute store and merge */ 192 error = VOP_GETATTR(dv->dv_attrvp, vap, flags, cr, ct); 193 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error)); 194 dv_vattr_merge(dv, vap); 195 } 196 rw_exit(&dv->dv_contents); 197 198 /* 199 * Restrict the permissions of the node fronting the console 200 * to 0600 with root as the owner. This prevents a non-root 201 * user from gaining access to a serial terminal (like /dev/term/a) 202 * which is in reality serving as the console device (/dev/console). 203 */ 204 if (vp->v_rdev == rconsdev) { 205 mode_t rconsmask = S_IXUSR|S_IRWXG|S_IRWXO; 206 vap->va_mode &= (~rconsmask); 207 vap->va_uid = 0; 208 } 209 210 return (error); 211 } 212 213 static int devfs_unlocked_access(void *, int, struct cred *); 214 215 /*ARGSUSED4*/ 216 static int 217 devfs_setattr_dir( 218 struct dv_node *dv, 219 struct vnode *vp, 220 struct vattr *vap, 221 int flags, 222 struct cred *cr) 223 { 224 struct vattr *map; 225 uint_t mask; 226 int error = 0; 227 struct vattr vattr; 228 229 ASSERT(dv->dv_attr || dv->dv_attrvp); 230 231 ASSERT(vp->v_type == VDIR); 232 ASSERT((dv->dv_flags & DV_NO_FSPERM) == 0); 233 234 if (vap->va_mask & AT_NOSET) 235 return (EINVAL); 236 237 /* to ensure consistency, single thread setting of attributes */ 238 rw_enter(&dv->dv_contents, RW_WRITER); 239 240 again: if (dv->dv_attr) { 241 242 error = secpolicy_vnode_setattr(cr, vp, vap, 243 dv->dv_attr, flags, devfs_unlocked_access, dv); 244 245 if (error) 246 goto out; 247 248 /* 249 * Apply changes to the memory based attribute. This code 250 * is modeled after the tmpfs implementation of memory 251 * based vnodes 252 */ 253 map = dv->dv_attr; 254 mask = vap->va_mask; 255 256 /* Change file access modes. */ 257 if (mask & AT_MODE) { 258 map->va_mode &= S_IFMT; 259 map->va_mode |= vap->va_mode & ~S_IFMT; 260 } 261 if (mask & AT_UID) 262 map->va_uid = vap->va_uid; 263 if (mask & AT_GID) 264 map->va_gid = vap->va_gid; 265 if (mask & AT_ATIME) 266 map->va_atime = vap->va_atime; 267 if (mask & AT_MTIME) 268 map->va_mtime = vap->va_mtime; 269 270 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) 271 gethrestime(&map->va_ctime); 272 } else { 273 /* use the backing attribute store */ 274 ASSERT(dv->dv_attrvp); 275 276 /* 277 * See if we are changing something we care about 278 * the persistence of - return success if we don't care. 279 */ 280 if (vap->va_mask & (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) { 281 /* Set the attributes */ 282 error = VOP_SETATTR(dv->dv_attrvp, 283 vap, flags, cr, NULL); 284 dsysdebug(error, 285 ("vop_setattr %s %d\n", dv->dv_name, error)); 286 287 /* 288 * Some file systems may return EROFS for a setattr 289 * on a readonly file system. In this case we create 290 * our own memory based attribute. 291 */ 292 if (error == EROFS) { 293 /* 294 * obtain attributes from existing file 295 * that we will modify and switch to memory 296 * based attribute until attribute store is 297 * read/write. 298 */ 299 vattr = dv_vattr_dir; 300 if (VOP_GETATTR(dv->dv_attrvp, 301 &vattr, flags, cr, NULL) == 0) { 302 dv->dv_attr = kmem_alloc( 303 sizeof (struct vattr), KM_SLEEP); 304 *dv->dv_attr = vattr; 305 dv_vattr_merge(dv, dv->dv_attr); 306 goto again; 307 } 308 } 309 } 310 } 311 out: 312 rw_exit(&dv->dv_contents); 313 return (error); 314 } 315 316 317 /* 318 * Compare the uid/gid/mode changes requested for a setattr 319 * operation with the same details of a node's default minor 320 * perm information. Return 0 if identical. 321 */ 322 static int 323 dv_setattr_cmp(struct vattr *map, mperm_t *mp) 324 { 325 if ((map->va_mode & S_IAMB) != (mp->mp_mode & S_IAMB)) 326 return (1); 327 if (map->va_uid != mp->mp_uid) 328 return (1); 329 if (map->va_gid != mp->mp_gid) 330 return (1); 331 return (0); 332 } 333 334 335 /*ARGSUSED4*/ 336 static int 337 devfs_setattr( 338 struct vnode *vp, 339 struct vattr *vap, 340 int flags, 341 struct cred *cr, 342 caller_context_t *ct) 343 { 344 struct dv_node *dv = VTODV(vp); 345 struct dv_node *ddv; 346 struct vnode *dvp; 347 struct vattr *map; 348 uint_t mask; 349 int error = 0; 350 struct vattr *free_vattr = NULL; 351 struct vattr *vattrp = NULL; 352 mperm_t mp; 353 int persist; 354 355 /* 356 * Message goes to console only. Otherwise, the message 357 * causes devfs_getattr to be invoked again... infinite loop 358 */ 359 dcmn_err2(("?devfs_setattr %s\n", dv->dv_name)); 360 ASSERT(dv->dv_attr || dv->dv_attrvp); 361 362 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) { 363 cmn_err(CE_WARN, /* panic ? */ 364 "?%s: getattr on vnode type %d", dvnm, vp->v_type); 365 return (ENOENT); 366 } 367 368 if (vap->va_mask & AT_NOSET) 369 return (EINVAL); 370 371 /* 372 * If we are changing something we don't care about 373 * the persistence of, return success. 374 */ 375 if ((vap->va_mask & 376 (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) == 0) 377 return (0); 378 379 /* 380 * If driver overrides fs perm, disallow chmod 381 * and do not create attribute nodes. 382 */ 383 if (dv->dv_flags & DV_NO_FSPERM) { 384 ASSERT(dv->dv_attr); 385 if (vap->va_mask & (AT_MODE | AT_UID | AT_GID)) 386 return (EPERM); 387 if ((vap->va_mask & (AT_ATIME|AT_MTIME)) == 0) 388 return (0); 389 rw_enter(&dv->dv_contents, RW_WRITER); 390 if (vap->va_mask & AT_ATIME) 391 dv->dv_attr->va_atime = vap->va_atime; 392 if (vap->va_mask & AT_MTIME) 393 dv->dv_attr->va_mtime = vap->va_mtime; 394 rw_exit(&dv->dv_contents); 395 return (0); 396 } 397 398 /* 399 * Directories are always created but device nodes are 400 * only used to persist non-default permissions. 401 */ 402 if (vp->v_type == VDIR) { 403 ASSERT(dv->dv_attr || dv->dv_attrvp); 404 return (devfs_setattr_dir(dv, vp, vap, flags, cr)); 405 } 406 407 /* 408 * Allocate now before we take any locks 409 */ 410 vattrp = kmem_zalloc(sizeof (*vattrp), KM_SLEEP); 411 412 /* to ensure consistency, single thread setting of attributes */ 413 rw_enter(&dv->dv_contents, RW_WRITER); 414 415 /* 416 * We don't need to create an attribute node 417 * to persist access or modification times. 418 */ 419 persist = (vap->va_mask & (AT_MODE | AT_UID | AT_GID)); 420 421 /* 422 * If persisting something, get the default permissions 423 * for this minor to compare against what the attributes 424 * are now being set to. Default ordering is: 425 * - minor_perm match for this minor 426 * - mode supplied by ddi_create_priv_minor_node 427 * - devfs defaults 428 */ 429 if (persist) { 430 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) != 0) { 431 mp.mp_uid = dv_vattr_file.va_uid; 432 mp.mp_gid = dv_vattr_file.va_gid; 433 mp.mp_mode = dv_vattr_file.va_mode; 434 if (dv->dv_flags & DV_DFLT_MODE) { 435 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 436 mp.mp_mode &= ~S_IAMB; 437 mp.mp_mode |= dv->dv_dflt_mode; 438 dcmn_err5(("%s: setattr priv default 0%o\n", 439 dv->dv_name, mp.mp_mode)); 440 } else { 441 dcmn_err5(("%s: setattr devfs default 0%o\n", 442 dv->dv_name, mp.mp_mode)); 443 } 444 } else { 445 dcmn_err5(("%s: setattr minor perm default 0%o\n", 446 dv->dv_name, mp.mp_mode)); 447 } 448 } 449 450 /* 451 * If we don't have a vattr for this node, construct one. 452 */ 453 if (dv->dv_attr) { 454 free_vattr = vattrp; 455 vattrp = NULL; 456 } else { 457 ASSERT(dv->dv_attrvp); 458 ASSERT(vp->v_type != VDIR); 459 *vattrp = dv_vattr_file; 460 error = VOP_GETATTR(dv->dv_attrvp, vattrp, 0, cr, ct); 461 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error)); 462 if (error) 463 goto out; 464 dv->dv_attr = vattrp; 465 dv_vattr_merge(dv, dv->dv_attr); 466 vattrp = NULL; 467 } 468 469 error = secpolicy_vnode_setattr(cr, vp, vap, dv->dv_attr, 470 flags, devfs_unlocked_access, dv); 471 if (error) { 472 dsysdebug(error, ("devfs_setattr %s secpolicy error %d\n", 473 dv->dv_name, error)); 474 goto out; 475 } 476 477 /* 478 * Apply changes to the memory based attribute. This code 479 * is modeled after the tmpfs implementation of memory 480 * based vnodes 481 */ 482 map = dv->dv_attr; 483 mask = vap->va_mask; 484 485 /* Change file access modes. */ 486 if (mask & AT_MODE) { 487 map->va_mode &= S_IFMT; 488 map->va_mode |= vap->va_mode & ~S_IFMT; 489 } 490 if (mask & AT_UID) 491 map->va_uid = vap->va_uid; 492 if (mask & AT_GID) 493 map->va_gid = vap->va_gid; 494 if (mask & AT_ATIME) 495 map->va_atime = vap->va_atime; 496 if (mask & AT_MTIME) 497 map->va_mtime = vap->va_mtime; 498 499 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) { 500 gethrestime(&map->va_ctime); 501 } 502 503 /* 504 * A setattr to defaults means we no longer need the 505 * shadow node as a persistent store, unless there 506 * are ACLs. Otherwise create a shadow node if one 507 * doesn't exist yet. 508 */ 509 if (persist) { 510 if ((dv_setattr_cmp(map, &mp) == 0) && 511 ((dv->dv_flags & DV_ACL) == 0)) { 512 513 if (dv->dv_attrvp) { 514 ddv = dv->dv_dotdot; 515 ASSERT(ddv->dv_attrvp); 516 error = VOP_REMOVE(ddv->dv_attrvp, 517 dv->dv_name, cr, ct, 0); 518 dsysdebug(error, 519 ("vop_remove %s %s %d\n", 520 ddv->dv_name, dv->dv_name, error)); 521 522 if (error == EROFS) 523 error = 0; 524 VN_RELE(dv->dv_attrvp); 525 dv->dv_attrvp = NULL; 526 } 527 ASSERT(dv->dv_attr); 528 } else { 529 if (mask & AT_MODE) 530 dcmn_err5(("%s persisting mode 0%o\n", 531 dv->dv_name, vap->va_mode)); 532 if (mask & AT_UID) 533 dcmn_err5(("%s persisting uid %d\n", 534 dv->dv_name, vap->va_uid)); 535 if (mask & AT_GID) 536 dcmn_err5(("%s persisting gid %d\n", 537 dv->dv_name, vap->va_gid)); 538 539 if (dv->dv_attrvp == NULL) { 540 dvp = DVTOV(dv->dv_dotdot); 541 dv_shadow_node(dvp, dv->dv_name, vp, 542 NULL, NULLVP, cr, 543 DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 544 } 545 if (dv->dv_attrvp) { 546 /* If map still valid do TIME for free. */ 547 if (dv->dv_attr == map) { 548 mask = map->va_mask; 549 map->va_mask = 550 vap->va_mask | AT_ATIME | AT_MTIME; 551 error = VOP_SETATTR(dv->dv_attrvp, map, 552 flags, cr, NULL); 553 map->va_mask = mask; 554 } else { 555 error = VOP_SETATTR(dv->dv_attrvp, 556 vap, flags, cr, NULL); 557 } 558 dsysdebug(error, ("vop_setattr %s %d\n", 559 dv->dv_name, error)); 560 } 561 /* 562 * Some file systems may return EROFS for a setattr 563 * on a readonly file system. In this case save 564 * as our own memory based attribute. 565 * NOTE: ufs is NOT one of these (see ufs_iupdat). 566 */ 567 if (dv->dv_attr && dv->dv_attrvp && error == 0) { 568 vattrp = dv->dv_attr; 569 dv->dv_attr = NULL; 570 } else if (error == EROFS) 571 error = 0; 572 } 573 } 574 575 out: 576 rw_exit(&dv->dv_contents); 577 578 if (vattrp) 579 kmem_free(vattrp, sizeof (*vattrp)); 580 if (free_vattr) 581 kmem_free(free_vattr, sizeof (*free_vattr)); 582 return (error); 583 } 584 585 static int 586 devfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 587 caller_context_t *ct) 588 { 589 switch (cmd) { 590 case _PC_ACL_ENABLED: 591 /* 592 * We rely on the underlying filesystem for ACLs, 593 * so direct the query for ACL support there. 594 * ACL support isn't relative to the file 595 * and we can't guarantee that the dv node 596 * has an attribute node, so any valid 597 * attribute node will suffice. 598 */ 599 ASSERT(dvroot); 600 ASSERT(dvroot->dv_attrvp); 601 return (VOP_PATHCONF(dvroot->dv_attrvp, cmd, valp, cr, ct)); 602 /*NOTREACHED*/ 603 } 604 605 return (fs_pathconf(vp, cmd, valp, cr, ct)); 606 } 607 608 /* 609 * Let avp handle security attributes (acl's). 610 */ 611 static int 612 devfs_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 613 struct cred *cr, caller_context_t *ct) 614 { 615 dvnode_t *dv = VTODV(vp); 616 struct vnode *avp; 617 int error; 618 619 dcmn_err2(("devfs_getsecattr %s\n", dv->dv_name)); 620 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 621 622 rw_enter(&dv->dv_contents, RW_READER); 623 624 avp = dv->dv_attrvp; 625 626 /* fabricate the acl */ 627 if (avp == NULL) { 628 error = fs_fab_acl(vp, vsap, flags, cr, ct); 629 rw_exit(&dv->dv_contents); 630 return (error); 631 } 632 633 error = VOP_GETSECATTR(avp, vsap, flags, cr, ct); 634 dsysdebug(error, ("vop_getsecattr %s %d\n", VTODV(vp)->dv_name, error)); 635 rw_exit(&dv->dv_contents); 636 return (error); 637 } 638 639 /* 640 * Set security attributes (acl's) 641 * 642 * Note that the dv_contents lock has already been acquired 643 * by the caller's VOP_RWLOCK. 644 */ 645 static int 646 devfs_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 647 struct cred *cr, caller_context_t *ct) 648 { 649 dvnode_t *dv = VTODV(vp); 650 struct vnode *avp; 651 int error; 652 653 dcmn_err2(("devfs_setsecattr %s\n", dv->dv_name)); 654 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 655 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 656 657 /* 658 * Not a supported operation on drivers not providing 659 * file system based permissions. 660 */ 661 if (dv->dv_flags & DV_NO_FSPERM) 662 return (ENOTSUP); 663 664 /* 665 * To complete, the setsecattr requires an underlying attribute node. 666 */ 667 if (dv->dv_attrvp == NULL) { 668 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 669 dv_shadow_node(DVTOV(dv->dv_dotdot), dv->dv_name, vp, 670 NULL, NULLVP, cr, DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); 671 } 672 673 if ((avp = dv->dv_attrvp) == NULL) { 674 dcmn_err2(("devfs_setsecattr %s: " 675 "cannot construct attribute node\n", dv->dv_name)); 676 return (fs_nosys()); 677 } 678 679 /* 680 * The acl(2) system call issues a VOP_RWLOCK before setting an ACL. 681 * Since backing file systems expect the lock to be held before seeing 682 * a VOP_SETSECATTR ACL, we need to issue the VOP_RWLOCK to the backing 683 * store before forwarding the ACL. 684 */ 685 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, NULL); 686 error = VOP_SETSECATTR(avp, vsap, flags, cr, ct); 687 dsysdebug(error, ("vop_setsecattr %s %d\n", VTODV(vp)->dv_name, error)); 688 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, NULL); 689 690 /* 691 * Set DV_ACL if we have a non-trivial set of ACLs. It is not 692 * necessary to hold VOP_RWLOCK since fs_acl_nontrivial only does 693 * VOP_GETSECATTR calls. 694 */ 695 if (fs_acl_nontrivial(avp, cr)) 696 dv->dv_flags |= DV_ACL; 697 return (error); 698 } 699 700 /* 701 * This function is used for secpolicy_setattr(). It must call an 702 * access() like function while it is already holding the 703 * dv_contents lock. We only care about this when dv_attr != NULL; 704 * so the unlocked access call only concerns itself with that 705 * particular branch of devfs_access(). 706 */ 707 static int 708 devfs_unlocked_access(void *vdv, int mode, struct cred *cr) 709 { 710 struct dv_node *dv = vdv; 711 int shift = 0; 712 uid_t owner = dv->dv_attr->va_uid; 713 714 /* Check access based on owner, group and public permissions. */ 715 if (crgetuid(cr) != owner) { 716 shift += 3; 717 if (groupmember(dv->dv_attr->va_gid, cr) == 0) 718 shift += 3; 719 } 720 721 return (secpolicy_vnode_access2(cr, DVTOV(dv), owner, 722 dv->dv_attr->va_mode << shift, mode)); 723 } 724 725 static int 726 devfs_access(struct vnode *vp, int mode, int flags, struct cred *cr, 727 caller_context_t *ct) 728 { 729 struct dv_node *dv = VTODV(vp); 730 int res; 731 732 dcmn_err2(("devfs_access %s\n", dv->dv_name)); 733 ASSERT(dv->dv_attr || dv->dv_attrvp); 734 735 /* restrict console access to privileged processes */ 736 if ((vp->v_rdev == rconsdev) && secpolicy_console(cr) != 0) { 737 return (EACCES); 738 } 739 740 rw_enter(&dv->dv_contents, RW_READER); 741 if (dv->dv_attr && ((dv->dv_flags & DV_ACL) == 0)) { 742 res = devfs_unlocked_access(dv, mode, cr); 743 } else { 744 res = VOP_ACCESS(dv->dv_attrvp, mode, flags, cr, ct); 745 } 746 rw_exit(&dv->dv_contents); 747 return (res); 748 } 749 750 /* 751 * Lookup 752 * 753 * Given the directory vnode and the name of the component, return 754 * the corresponding held vnode for that component. 755 * 756 * Of course in these fictional filesystems, nothing's ever quite 757 * -that- simple. 758 * 759 * devfs name type shadow (fs attributes) type comments 760 * ------------------------------------------------------------------------- 761 * drv[@addr] VDIR drv[@addr] VDIR nexus driver 762 * drv[@addr]:m VCHR/VBLK drv[@addr]:m VREG leaf driver 763 * drv[@addr] VCHR/VBLK drv[@addr]:.default VREG leaf driver 764 * ------------------------------------------------------------------------- 765 * 766 * The following names are reserved for the attribute filesystem (which 767 * could easily be another layer on top of this one - we simply need to 768 * hold the vnode of the thing we're looking at) 769 * 770 * attr name type shadow (fs attributes) type comments 771 * ------------------------------------------------------------------------- 772 * drv[@addr] VDIR - - attribute dir 773 * minorname VDIR - - minorname 774 * attribute VREG - - attribute 775 * ------------------------------------------------------------------------- 776 * 777 * Examples: 778 * 779 * devfs:/devices/.../mm@0:zero VCHR 780 * shadow:/.devices/.../mm@0:zero VREG, fs attrs 781 * devfs:/devices/.../mm@0:/zero/attr VREG, driver attribute 782 * 783 * devfs:/devices/.../sd@0,0:a VBLK 784 * shadow:/.devices/.../sd@0,0:a VREG, fs attrs 785 * devfs:/devices/.../sd@0,0:/a/.type VREG, "ddi_block:chan" 786 * 787 * devfs:/devices/.../mm@0 VCHR 788 * shadow:/.devices/.../mm@0:.default VREG, fs attrs 789 * devfs:/devices/.../mm@0:/.default/attr VREG, driver attribute 790 * devfs:/devices/.../mm@0:/.default/.type VREG, "ddi_pseudo" 791 * 792 * devfs:/devices/.../obio VDIR 793 * shadow:/devices/.../obio VDIR, needed for fs attrs. 794 * devfs:/devices/.../obio:/.default/attr VDIR, driver attribute 795 * 796 * We also need to be able deal with "old" devices that have gone away, 797 * though I think that provided we return them with readdir, they can 798 * be removed (i.e. they don't have to respond to lookup, though it might 799 * be weird if they didn't ;-) 800 * 801 * Lookup has side-effects. 802 * 803 * - It will create directories and fs attribute files in the shadow hierarchy. 804 * - It should cause non-SID devices to be probed (ask the parent nexi). 805 */ 806 /*ARGSUSED3*/ 807 static int 808 devfs_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 809 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, 810 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 811 { 812 ASSERT(dvp->v_type == VDIR); 813 dcmn_err2(("devfs_lookup: %s\n", nm)); 814 return (dv_find(VTODV(dvp), nm, vpp, pnp, rdir, cred, 0)); 815 } 816 817 /* 818 * devfs nodes can't really be created directly by userland - however, 819 * we do allow creates to find existing nodes: 820 * 821 * - any create fails if the node doesn't exist - EROFS. 822 * - creating an existing directory read-only succeeds, otherwise EISDIR. 823 * - exclusive creates fail if the node already exists - EEXIST. 824 * - failure to create the snode for an existing device - ENOSYS. 825 */ 826 /*ARGSUSED2*/ 827 static int 828 devfs_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 829 int mode, struct vnode **vpp, struct cred *cred, int flag, 830 caller_context_t *ct, vsecattr_t *vsecp) 831 { 832 int error; 833 struct vnode *vp; 834 835 dcmn_err2(("devfs_create %s\n", nm)); 836 error = dv_find(VTODV(dvp), nm, &vp, NULL, NULLVP, cred, 0); 837 if (error == 0) { 838 if (excl == EXCL) 839 error = EEXIST; 840 else if (vp->v_type == VDIR && (mode & VWRITE)) 841 error = EISDIR; 842 else 843 error = VOP_ACCESS(vp, mode, 0, cred, ct); 844 845 if (error) { 846 VN_RELE(vp); 847 } else 848 *vpp = vp; 849 } else if (error == ENOENT) 850 error = EROFS; 851 852 return (error); 853 } 854 855 /* 856 * If DV_BUILD is set, we call into nexus driver to do a BUS_CONFIG_ALL. 857 * Otherwise, simply return cached dv_node's. Hotplug code always call 858 * devfs_clean() to invalid the dv_node cache. 859 */ 860 /*ARGSUSED5*/ 861 static int 862 devfs_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp, 863 caller_context_t *ct, int flags) 864 { 865 struct dv_node *ddv, *dv; 866 struct dirent64 *de, *bufp; 867 offset_t diroff; 868 offset_t soff; 869 size_t reclen, movesz; 870 int error; 871 struct vattr va; 872 size_t bufsz; 873 874 ddv = VTODV(dvp); 875 dcmn_err2(("devfs_readdir %s: offset %lld len %ld\n", 876 ddv->dv_name, uiop->uio_loffset, uiop->uio_iov->iov_len)); 877 ASSERT(ddv->dv_attr || ddv->dv_attrvp); 878 ASSERT(RW_READ_HELD(&ddv->dv_contents)); 879 880 if (uiop->uio_loffset >= MAXOFF_T) { 881 if (eofp) 882 *eofp = 1; 883 return (0); 884 } 885 886 if (uiop->uio_iovcnt != 1) 887 return (EINVAL); 888 889 if (dvp->v_type != VDIR) 890 return (ENOTDIR); 891 892 /* Load the initial contents */ 893 if (ddv->dv_flags & DV_BUILD) { 894 if (!rw_tryupgrade(&ddv->dv_contents)) { 895 rw_exit(&ddv->dv_contents); 896 rw_enter(&ddv->dv_contents, RW_WRITER); 897 } 898 899 /* recheck and fill */ 900 if (ddv->dv_flags & DV_BUILD) 901 dv_filldir(ddv); 902 903 rw_downgrade(&ddv->dv_contents); 904 } 905 906 soff = uiop->uio_loffset; 907 bufsz = uiop->uio_iov->iov_len; 908 de = bufp = kmem_alloc(bufsz, KM_SLEEP); 909 movesz = 0; 910 dv = (struct dv_node *)-1; 911 912 /* 913 * Move as many entries into the uio structure as it will take. 914 * Special case "." and "..". 915 */ 916 diroff = 0; 917 if (soff == 0) { /* . */ 918 reclen = DIRENT64_RECLEN(strlen(".")); 919 if ((movesz + reclen) > bufsz) 920 goto full; 921 de->d_ino = (ino64_t)ddv->dv_ino; 922 de->d_off = (off64_t)diroff + 1; 923 de->d_reclen = (ushort_t)reclen; 924 925 /* use strncpy(9f) to zero out uninitialized bytes */ 926 927 (void) strncpy(de->d_name, ".", DIRENT64_NAMELEN(reclen)); 928 movesz += reclen; 929 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 930 dcmn_err3(("devfs_readdir: A: diroff %lld, soff %lld: '%s' " 931 "reclen %lu\n", diroff, soff, ".", reclen)); 932 } 933 934 diroff++; 935 if (soff <= 1) { /* .. */ 936 reclen = DIRENT64_RECLEN(strlen("..")); 937 if ((movesz + reclen) > bufsz) 938 goto full; 939 de->d_ino = (ino64_t)ddv->dv_dotdot->dv_ino; 940 de->d_off = (off64_t)diroff + 1; 941 de->d_reclen = (ushort_t)reclen; 942 943 /* use strncpy(9f) to zero out uninitialized bytes */ 944 945 (void) strncpy(de->d_name, "..", DIRENT64_NAMELEN(reclen)); 946 movesz += reclen; 947 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 948 dcmn_err3(("devfs_readdir: B: diroff %lld, soff %lld: '%s' " 949 "reclen %lu\n", diroff, soff, "..", reclen)); 950 } 951 952 diroff++; 953 for (dv = DV_FIRST_ENTRY(ddv); dv; 954 dv = DV_NEXT_ENTRY(ddv, dv), diroff++) { 955 /* skip entries until at correct directory offset */ 956 if (diroff < soff) 957 continue; 958 959 /* 960 * hidden nodes are skipped (but they still occupy a 961 * directory offset). 962 */ 963 if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi)) 964 continue; 965 966 /* 967 * DDM_INTERNAL_PATH minor nodes are skipped for readdirs 968 * outside the kernel (but they still occupy a directory 969 * offset). 970 */ 971 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) 972 continue; 973 974 reclen = DIRENT64_RECLEN(strlen(dv->dv_name)); 975 if ((movesz + reclen) > bufsz) { 976 dcmn_err3(("devfs_readdir: C: diroff " 977 "%lld, soff %lld: '%s' reclen %lu\n", 978 diroff, soff, dv->dv_name, reclen)); 979 goto full; 980 } 981 de->d_ino = (ino64_t)dv->dv_ino; 982 de->d_off = (off64_t)diroff + 1; 983 de->d_reclen = (ushort_t)reclen; 984 985 /* use strncpy(9f) to zero out uninitialized bytes */ 986 987 ASSERT(strlen(dv->dv_name) + 1 <= 988 DIRENT64_NAMELEN(reclen)); 989 (void) strncpy(de->d_name, dv->dv_name, 990 DIRENT64_NAMELEN(reclen)); 991 992 movesz += reclen; 993 de = (dirent64_t *)(intptr_t)((char *)de + reclen); 994 dcmn_err4(("devfs_readdir: D: diroff " 995 "%lld, soff %lld: '%s' reclen %lu\n", diroff, soff, 996 dv->dv_name, reclen)); 997 } 998 999 /* the buffer is full, or we exhausted everything */ 1000 full: dcmn_err3(("devfs_readdir: moving %lu bytes: " 1001 "diroff %lld, soff %lld, dv %p\n", 1002 movesz, diroff, soff, (void *)dv)); 1003 1004 if ((movesz == 0) && dv) 1005 error = EINVAL; /* cannot be represented */ 1006 else { 1007 error = uiomove(bufp, movesz, UIO_READ, uiop); 1008 if (error == 0) { 1009 if (eofp) 1010 *eofp = dv ? 0 : 1; 1011 uiop->uio_loffset = diroff; 1012 } 1013 1014 va.va_mask = AT_ATIME; 1015 gethrestime(&va.va_atime); 1016 rw_exit(&ddv->dv_contents); 1017 (void) devfs_setattr(dvp, &va, 0, cred, ct); 1018 rw_enter(&ddv->dv_contents, RW_READER); 1019 } 1020 1021 kmem_free(bufp, bufsz); 1022 return (error); 1023 } 1024 1025 /*ARGSUSED*/ 1026 static int 1027 devfs_fsync(struct vnode *vp, int syncflag, struct cred *cred, 1028 caller_context_t *ct) 1029 { 1030 /* 1031 * Message goes to console only. Otherwise, the message 1032 * causes devfs_fsync to be invoked again... infinite loop 1033 */ 1034 dcmn_err2(("devfs_fsync %s\n", VTODV(vp)->dv_name)); 1035 return (0); 1036 } 1037 1038 /* 1039 * Normally, we leave the dv_node here at count of 0. 1040 * The node will be destroyed when dv_cleandir() is called. 1041 * 1042 * Stale dv_node's are already unlinked from the fs tree, 1043 * so dv_cleandir() won't find them. We destroy such nodes 1044 * immediately. 1045 */ 1046 /*ARGSUSED1*/ 1047 static void 1048 devfs_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1049 { 1050 int destroy; 1051 struct dv_node *dv = VTODV(vp); 1052 1053 dcmn_err2(("devfs_inactive: %s\n", dv->dv_name)); 1054 mutex_enter(&vp->v_lock); 1055 ASSERT(vp->v_count >= 1); 1056 --vp->v_count; 1057 destroy = (DV_STALE(dv) && vp->v_count == 0); 1058 mutex_exit(&vp->v_lock); 1059 1060 /* stale nodes cannot be rediscovered, destroy it here */ 1061 if (destroy) 1062 dv_destroy(dv, 0); 1063 } 1064 1065 /* 1066 * XXX Why do we need this? NFS mounted /dev directories? 1067 * XXX Talk to peter staubach about this. 1068 */ 1069 /*ARGSUSED2*/ 1070 static int 1071 devfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1072 { 1073 struct dv_node *dv = VTODV(vp); 1074 struct dv_fid *dv_fid; 1075 1076 if (fidp->fid_len < (sizeof (struct dv_fid) - sizeof (ushort_t))) { 1077 fidp->fid_len = sizeof (struct dv_fid) - sizeof (ushort_t); 1078 return (ENOSPC); 1079 } 1080 1081 dv_fid = (struct dv_fid *)fidp; 1082 bzero(dv_fid, sizeof (struct dv_fid)); 1083 dv_fid->dvfid_len = (int)sizeof (struct dv_fid) - sizeof (ushort_t); 1084 dv_fid->dvfid_ino = dv->dv_ino; 1085 /* dv_fid->dvfid_gen = dv->tn_gen; XXX ? */ 1086 1087 return (0); 1088 } 1089 1090 /* 1091 * This pair of routines bracket all VOP_READ, VOP_WRITE 1092 * and VOP_READDIR requests. The contents lock stops things 1093 * moving around while we're looking at them. 1094 * 1095 * Also used by file and record locking. 1096 */ 1097 /*ARGSUSED2*/ 1098 static int 1099 devfs_rwlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1100 { 1101 dcmn_err2(("devfs_rwlock %s\n", VTODV(vp)->dv_name)); 1102 rw_enter(&VTODV(vp)->dv_contents, write_flag ? RW_WRITER : RW_READER); 1103 return (write_flag); 1104 } 1105 1106 /*ARGSUSED1*/ 1107 static void 1108 devfs_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ct) 1109 { 1110 dcmn_err2(("devfs_rwunlock %s\n", VTODV(vp)->dv_name)); 1111 rw_exit(&VTODV(vp)->dv_contents); 1112 } 1113 1114 /* 1115 * XXX Should probably do a better job of computing the maximum 1116 * offset available in the directory. 1117 */ 1118 /*ARGSUSED1*/ 1119 static int 1120 devfs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 1121 caller_context_t *ct) 1122 { 1123 ASSERT(vp->v_type == VDIR); 1124 dcmn_err2(("devfs_seek %s\n", VTODV(vp)->dv_name)); 1125 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1126 } 1127 1128 vnodeops_t *dv_vnodeops; 1129 1130 const fs_operation_def_t dv_vnodeops_template[] = { 1131 VOPNAME_OPEN, { .vop_open = devfs_open }, 1132 VOPNAME_CLOSE, { .vop_close = devfs_close }, 1133 VOPNAME_READ, { .vop_read = devfs_read }, 1134 VOPNAME_WRITE, { .vop_write = devfs_write }, 1135 VOPNAME_IOCTL, { .vop_ioctl = devfs_ioctl }, 1136 VOPNAME_GETATTR, { .vop_getattr = devfs_getattr }, 1137 VOPNAME_SETATTR, { .vop_setattr = devfs_setattr }, 1138 VOPNAME_ACCESS, { .vop_access = devfs_access }, 1139 VOPNAME_LOOKUP, { .vop_lookup = devfs_lookup }, 1140 VOPNAME_CREATE, { .vop_create = devfs_create }, 1141 VOPNAME_READDIR, { .vop_readdir = devfs_readdir }, 1142 VOPNAME_FSYNC, { .vop_fsync = devfs_fsync }, 1143 VOPNAME_INACTIVE, { .vop_inactive = devfs_inactive }, 1144 VOPNAME_FID, { .vop_fid = devfs_fid }, 1145 VOPNAME_RWLOCK, { .vop_rwlock = devfs_rwlock }, 1146 VOPNAME_RWUNLOCK, { .vop_rwunlock = devfs_rwunlock }, 1147 VOPNAME_SEEK, { .vop_seek = devfs_seek }, 1148 VOPNAME_PATHCONF, { .vop_pathconf = devfs_pathconf }, 1149 VOPNAME_DISPOSE, { .error = fs_error }, 1150 VOPNAME_SETSECATTR, { .vop_setsecattr = devfs_setsecattr }, 1151 VOPNAME_GETSECATTR, { .vop_getsecattr = devfs_getsecattr }, 1152 NULL, NULL 1153 }; 1154