1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 27 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 28 * Copyright 2017 Joyent, Inc. 29 * Copyright 2024 Oxide Computer Company 30 */ 31 32 /* 33 * Generic vnode operations. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/errno.h> 40 #include <sys/fcntl.h> 41 #include <sys/flock.h> 42 #include <sys/statvfs.h> 43 #include <sys/vfs.h> 44 #include <sys/vnode.h> 45 #include <sys/proc.h> 46 #include <sys/user.h> 47 #include <sys/unistd.h> 48 #include <sys/cred.h> 49 #include <sys/poll.h> 50 #include <sys/debug.h> 51 #include <sys/cmn_err.h> 52 #include <sys/stream.h> 53 #include <fs/fs_subr.h> 54 #include <fs/fs_reparse.h> 55 #include <sys/door.h> 56 #include <sys/acl.h> 57 #include <sys/share.h> 58 #include <sys/file.h> 59 #include <sys/kmem.h> 60 #include <sys/file.h> 61 #include <sys/nbmlock.h> 62 #include <acl/acl_common.h> 63 #include <sys/pathname.h> 64 65 /* required for fs_reject_epoll */ 66 #include <sys/poll_impl.h> 67 68 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *); 69 70 /* 71 * Tunable to limit the number of retry to recover from STALE error. 72 */ 73 int fs_estale_retry = 5; 74 75 /* 76 * supports for reparse point door upcall 77 */ 78 static door_handle_t reparsed_door; 79 static kmutex_t reparsed_door_lock; 80 81 /* 82 * The associated operation is not supported by the file system. 83 */ 84 int 85 fs_nosys() 86 { 87 return (ENOSYS); 88 } 89 90 /* 91 * This is the version of syncfs that a file system gets by default if it 92 * doesn't support the entry point and has a valid sync routine. 93 */ 94 int 95 fs_nosys_syncfs(vfs_t *vfsp, uint64_t flags, cred_t *cr) 96 { 97 return (ENOSYS); 98 } 99 100 /* 101 * The associated operation is invalid (on this vnode). 102 */ 103 int 104 fs_inval() 105 { 106 return (EINVAL); 107 } 108 109 /* 110 * The associated operation is valid only for directories. 111 */ 112 int 113 fs_notdir() 114 { 115 return (ENOTDIR); 116 } 117 118 /* 119 * Free the file system specific resources. For the file systems that 120 * do not support the forced unmount, it will be a nop function. 121 */ 122 123 /*ARGSUSED*/ 124 void 125 fs_freevfs(vfs_t *vfsp) 126 { 127 } 128 129 /* ARGSUSED */ 130 int 131 fs_nosys_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp, 132 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr, 133 caller_context_t *ct) 134 { 135 return (ENOSYS); 136 } 137 138 /* ARGSUSED */ 139 int 140 fs_nosys_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr, 141 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr, 142 caller_context_t *ct) 143 { 144 return (ENOSYS); 145 } 146 147 /* ARGSUSED */ 148 int 149 fs_nosys_poll(vnode_t *vp, short events, int anyyet, short *reventsp, 150 struct pollhead **phpp, caller_context_t *ct) 151 { 152 return (ENOSYS); 153 } 154 155 156 /* 157 * The file system has nothing to sync to disk. However, the 158 * VFS_SYNC operation must not fail. 159 */ 160 /* ARGSUSED */ 161 int 162 fs_sync(struct vfs *vfspp, short flag, cred_t *cr) 163 { 164 return (0); 165 } 166 167 /* 168 * This should be used for file systems which do not need to support any kind of 169 * sync(2) style operation. 170 */ 171 int 172 fs_syncfs_nop(vfs_t *vfspp, uint64_t flag, cred_t *cr) 173 { 174 return (0); 175 } 176 177 /* 178 * Does nothing but VOP_FSYNC must not fail. 179 */ 180 /* ARGSUSED */ 181 int 182 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 183 { 184 return (0); 185 } 186 187 /* 188 * Does nothing but VOP_PUTPAGE must not fail. 189 */ 190 /* ARGSUSED */ 191 int 192 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 193 caller_context_t *ctp) 194 { 195 return (0); 196 } 197 198 /* 199 * Does nothing but VOP_IOCTL must not fail. 200 */ 201 /* ARGSUSED */ 202 int 203 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, 204 int *rvalp) 205 { 206 return (0); 207 } 208 209 /* 210 * Read/write lock/unlock. Does nothing. 211 */ 212 /* ARGSUSED */ 213 int 214 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp) 215 { 216 return (-1); 217 } 218 219 /* ARGSUSED */ 220 void 221 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp) 222 { 223 } 224 225 /* 226 * Compare two vnodes. 227 */ 228 /*ARGSUSED2*/ 229 int 230 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) 231 { 232 return (vp1 == vp2); 233 } 234 235 /* 236 * No-op seek operation. 237 */ 238 /* ARGSUSED */ 239 int 240 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 241 { 242 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 243 } 244 245 /* 246 * File and record locking. 247 */ 248 /* ARGSUSED */ 249 int 250 fs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset, 251 flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 252 { 253 int frcmd; 254 int nlmid; 255 int error = 0; 256 boolean_t skip_lock = B_FALSE; 257 flk_callback_t serialize_callback; 258 int serialize = 0; 259 v_mode_t mode; 260 261 switch (cmd) { 262 263 case F_GETLK: 264 case F_O_GETLK: 265 if (flag & F_REMOTELOCK) { 266 frcmd = RCMDLCK; 267 } else if (flag & F_PXFSLOCK) { 268 frcmd = PCMDLCK; 269 } else { 270 frcmd = 0; 271 bfp->l_pid = ttoproc(curthread)->p_pid; 272 bfp->l_sysid = 0; 273 } 274 break; 275 276 case F_OFD_GETLK: 277 /* 278 * TBD we do not support remote OFD locks at this time. 279 */ 280 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) { 281 error = EOPNOTSUPP; 282 goto done; 283 } 284 skip_lock = B_TRUE; 285 break; 286 287 case F_SETLK_NBMAND: 288 /* 289 * Are NBMAND locks allowed on this file? 290 */ 291 if (!vp->v_vfsp || 292 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) { 293 error = EINVAL; 294 goto done; 295 } 296 if (vp->v_type != VREG) { 297 error = EINVAL; 298 goto done; 299 } 300 /*FALLTHROUGH*/ 301 302 case F_SETLK: 303 if (flag & F_REMOTELOCK) { 304 frcmd = SETFLCK|RCMDLCK; 305 } else if (flag & F_PXFSLOCK) { 306 frcmd = SETFLCK|PCMDLCK; 307 } else { 308 frcmd = SETFLCK; 309 bfp->l_pid = ttoproc(curthread)->p_pid; 310 bfp->l_sysid = 0; 311 } 312 if (cmd == F_SETLK_NBMAND && 313 (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) { 314 frcmd |= NBMLCK; 315 } 316 317 if (nbl_need_check(vp)) { 318 nbl_start_crit(vp, RW_WRITER); 319 serialize = 1; 320 if (frcmd & NBMLCK) { 321 mode = (bfp->l_type == F_RDLCK) ? 322 V_READ : V_RDANDWR; 323 if (vn_is_mapped(vp, mode)) { 324 error = EAGAIN; 325 goto done; 326 } 327 } 328 } 329 break; 330 331 case F_SETLKW: 332 if (flag & F_REMOTELOCK) { 333 frcmd = SETFLCK|SLPFLCK|RCMDLCK; 334 } else if (flag & F_PXFSLOCK) { 335 frcmd = SETFLCK|SLPFLCK|PCMDLCK; 336 } else { 337 frcmd = SETFLCK|SLPFLCK; 338 bfp->l_pid = ttoproc(curthread)->p_pid; 339 bfp->l_sysid = 0; 340 } 341 342 if (nbl_need_check(vp)) { 343 nbl_start_crit(vp, RW_WRITER); 344 serialize = 1; 345 } 346 break; 347 348 case F_OFD_SETLK: 349 case F_OFD_SETLKW: 350 case F_FLOCK: 351 case F_FLOCKW: 352 /* 353 * TBD we do not support remote OFD locks at this time. 354 */ 355 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) { 356 error = EOPNOTSUPP; 357 goto done; 358 } 359 skip_lock = B_TRUE; 360 break; 361 362 case F_HASREMOTELOCKS: 363 nlmid = GETNLMID(bfp->l_sysid); 364 if (nlmid != 0) { /* booted as a cluster */ 365 l_has_rmt(bfp) = 366 cl_flk_has_remote_locks_for_nlmid(vp, nlmid); 367 } else { /* not booted as a cluster */ 368 l_has_rmt(bfp) = flk_has_remote_locks(vp); 369 } 370 371 goto done; 372 373 default: 374 error = EINVAL; 375 goto done; 376 } 377 378 /* 379 * If this is a blocking lock request and we're serializing lock 380 * requests, modify the callback list to leave the critical region 381 * while we're waiting for the lock. 382 */ 383 384 if (serialize && (frcmd & SLPFLCK) != 0) { 385 flk_add_callback(&serialize_callback, 386 frlock_serialize_blocked, vp, flk_cbp); 387 flk_cbp = &serialize_callback; 388 } 389 390 if (!skip_lock) 391 error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp); 392 393 if (serialize && (frcmd & SLPFLCK) != 0) 394 flk_del_callback(&serialize_callback); 395 396 done: 397 if (serialize) 398 nbl_end_crit(vp); 399 400 return (error); 401 } 402 403 /* 404 * Callback when a lock request blocks and we are serializing requests. If 405 * before sleeping, leave the critical region. If after wakeup, reenter 406 * the critical region. 407 */ 408 409 static callb_cpr_t * 410 frlock_serialize_blocked(flk_cb_when_t when, void *infop) 411 { 412 vnode_t *vp = (vnode_t *)infop; 413 414 if (when == FLK_BEFORE_SLEEP) 415 nbl_end_crit(vp); 416 else { 417 nbl_start_crit(vp, RW_WRITER); 418 } 419 420 return (NULL); 421 } 422 423 /* 424 * Allow any flags. 425 */ 426 /* ARGSUSED */ 427 int 428 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct) 429 { 430 return (0); 431 } 432 433 /* 434 * Unlike poll(2), epoll should reject attempts to add normal files or 435 * directories to a given handle. Most non-pseudo filesystems rely on 436 * fs_poll() as their implementation of polling behavior. Exceptions to that 437 * rule (ufs) can use fs_reject_epoll(), so they don't require access to the 438 * inner details of poll. Potential race conditions related to the poll module 439 * being loaded are avoided by implementing the check here in genunix. 440 */ 441 boolean_t 442 fs_reject_epoll() 443 { 444 /* Check if the currently-active pollcache is epoll-enabled. */ 445 return (curthread->t_pollcache != NULL && 446 (curthread->t_pollcache->pc_flag & PC_EPOLL) != 0); 447 } 448 449 /* ARGSUSED */ 450 int 451 fs_poll(vnode_t *vp, short events, int anyyet, short *reventsp, 452 struct pollhead **phpp, caller_context_t *ct) 453 { 454 /* 455 * Regular filesystems should reject epollers. On the off chance that 456 * a non-epoll consumer expresses the desire for edge-triggered 457 * polling, we reject them too. Yes, the expected error for this 458 * really is EPERM. 459 */ 460 if (fs_reject_epoll() || (events & POLLET) != 0) { 461 return (EPERM); 462 } 463 464 *reventsp = 0; 465 if (events & POLLIN) 466 *reventsp |= POLLIN; 467 if (events & POLLRDNORM) 468 *reventsp |= POLLRDNORM; 469 if (events & POLLRDBAND) 470 *reventsp |= POLLRDBAND; 471 if (events & POLLOUT) 472 *reventsp |= POLLOUT; 473 if (events & POLLWRBAND) 474 *reventsp |= POLLWRBAND; 475 476 return (0); 477 } 478 479 /* 480 * POSIX pathconf() support. 481 */ 482 /* ARGSUSED */ 483 int 484 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 485 caller_context_t *ct) 486 { 487 ulong_t val; 488 int error = 0; 489 struct statvfs64 vfsbuf; 490 491 switch (cmd) { 492 493 case _PC_LINK_MAX: 494 val = MAXLINK; 495 break; 496 497 case _PC_MAX_CANON: 498 val = MAX_CANON; 499 break; 500 501 case _PC_MAX_INPUT: 502 val = MAX_INPUT; 503 break; 504 505 case _PC_NAME_MAX: 506 bzero(&vfsbuf, sizeof (vfsbuf)); 507 if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf)) 508 break; 509 val = vfsbuf.f_namemax; 510 break; 511 512 case _PC_PATH_MAX: 513 case _PC_SYMLINK_MAX: 514 val = MAXPATHLEN; 515 break; 516 517 case _PC_PIPE_BUF: 518 val = PIPE_BUF; 519 break; 520 521 case _PC_NO_TRUNC: 522 if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC) 523 val = 1; /* NOTRUNC is enabled for vp */ 524 else 525 val = (ulong_t)-1; 526 break; 527 528 case _PC_VDISABLE: 529 val = _POSIX_VDISABLE; 530 break; 531 532 case _PC_CHOWN_RESTRICTED: 533 if (rstchown) 534 val = rstchown; /* chown restricted enabled */ 535 else 536 val = (ulong_t)-1; 537 break; 538 539 case _PC_FILESIZEBITS: 540 541 /* 542 * If ever we come here it means that underlying file system 543 * does not recognise the command and therefore this 544 * configurable limit cannot be determined. We return -1 545 * and don't change errno. 546 */ 547 548 val = (ulong_t)-1; /* large file support */ 549 break; 550 551 case _PC_ACL_ENABLED: 552 val = 0; 553 break; 554 555 case _PC_CASE_BEHAVIOR: 556 val = _CASE_SENSITIVE; 557 if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1) 558 val |= _CASE_INSENSITIVE; 559 if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1) 560 val &= ~_CASE_SENSITIVE; 561 break; 562 563 case _PC_SATTR_ENABLED: 564 case _PC_SATTR_EXISTS: 565 val = 0; 566 break; 567 568 case _PC_ACCESS_FILTERING: 569 val = 0; 570 break; 571 572 default: 573 error = EINVAL; 574 break; 575 } 576 577 if (error == 0) 578 *valp = val; 579 return (error); 580 } 581 582 /* 583 * Dispose of a page. 584 */ 585 /* ARGSUSED */ 586 void 587 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr, 588 caller_context_t *ct) 589 { 590 591 ASSERT(fl == B_FREE || fl == B_INVAL); 592 593 if (fl == B_FREE) 594 page_free(pp, dn); 595 else 596 page_destroy(pp, dn); 597 } 598 599 /* ARGSUSED */ 600 void 601 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr, 602 caller_context_t *ct) 603 { 604 cmn_err(CE_PANIC, "fs_nodispose invoked"); 605 } 606 607 /* 608 * fabricate acls for file systems that do not support acls. 609 */ 610 /* ARGSUSED */ 611 int 612 fs_fab_acl(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr, 613 caller_context_t *ct) 614 { 615 aclent_t *aclentp; 616 struct vattr vattr; 617 int error; 618 size_t aclsize; 619 620 vsecattr->vsa_aclcnt = 0; 621 vsecattr->vsa_aclentsz = 0; 622 vsecattr->vsa_aclentp = NULL; 623 vsecattr->vsa_dfaclcnt = 0; /* Default ACLs are not fabricated */ 624 vsecattr->vsa_dfaclentp = NULL; 625 626 vattr.va_mask = AT_MODE | AT_UID | AT_GID; 627 if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct)) 628 return (error); 629 630 if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) { 631 aclsize = 4 * sizeof (aclent_t); 632 vsecattr->vsa_aclcnt = 4; /* USER, GROUP, OTHER, and CLASS */ 633 vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP); 634 aclentp = vsecattr->vsa_aclentp; 635 636 aclentp->a_type = USER_OBJ; /* Owner */ 637 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6; 638 aclentp->a_id = vattr.va_uid; /* Really undefined */ 639 aclentp++; 640 641 aclentp->a_type = GROUP_OBJ; /* Group */ 642 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3; 643 aclentp->a_id = vattr.va_gid; /* Really undefined */ 644 aclentp++; 645 646 aclentp->a_type = OTHER_OBJ; /* Other */ 647 aclentp->a_perm = vattr.va_mode & 0007; 648 aclentp->a_id = (gid_t)-1; /* Really undefined */ 649 aclentp++; 650 651 aclentp->a_type = CLASS_OBJ; /* Class */ 652 aclentp->a_perm = (ushort_t)(0007); 653 aclentp->a_id = (gid_t)-1; /* Really undefined */ 654 } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) { 655 VERIFY(0 == acl_trivial_create(vattr.va_mode, 656 (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp, 657 &vsecattr->vsa_aclcnt)); 658 vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t); 659 } 660 661 return (error); 662 } 663 664 /* 665 * Common code for implementing DOS share reservations 666 */ 667 /* ARGSUSED4 */ 668 int 669 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr, 670 caller_context_t *ct) 671 { 672 int error; 673 674 /* 675 * Make sure that the file was opened with permissions appropriate 676 * for the request, and make sure the caller isn't trying to sneak 677 * in an NBMAND request. 678 */ 679 if (cmd == F_SHARE) { 680 if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) || 681 ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0)) 682 return (EBADF); 683 if (shr->s_access & (F_RMACC | F_MDACC)) 684 return (EINVAL); 685 if (shr->s_deny & (F_MANDDNY | F_RMDNY)) 686 return (EINVAL); 687 } 688 if (cmd == F_SHARE_NBMAND) { 689 /* make sure nbmand is allowed on the file */ 690 if (!vp->v_vfsp || 691 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) { 692 return (EINVAL); 693 } 694 if (vp->v_type != VREG) { 695 return (EINVAL); 696 } 697 } 698 699 nbl_start_crit(vp, RW_WRITER); 700 701 switch (cmd) { 702 703 case F_SHARE_NBMAND: 704 shr->s_deny |= F_MANDDNY; 705 /*FALLTHROUGH*/ 706 case F_SHARE: 707 error = add_share(vp, shr); 708 break; 709 710 case F_UNSHARE: 711 error = del_share(vp, shr); 712 break; 713 714 case F_HASREMOTELOCKS: 715 /* 716 * We are overloading this command to refer to remote 717 * shares as well as remote locks, despite its name. 718 */ 719 shr->s_access = shr_has_remote_shares(vp, shr->s_sysid); 720 error = 0; 721 break; 722 723 default: 724 error = EINVAL; 725 break; 726 } 727 728 nbl_end_crit(vp); 729 return (error); 730 } 731 732 /*ARGSUSED1*/ 733 int 734 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm, 735 caller_context_t *ct) 736 { 737 ASSERT(vp != NULL); 738 return (ENOTSUP); 739 } 740 741 /*ARGSUSED1*/ 742 int 743 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm, 744 caller_context_t *ct) 745 { 746 ASSERT(vp != NULL); 747 return (0); 748 } 749 750 /* 751 * return 1 for non-trivial ACL. 752 * 753 * NB: It is not necessary for the caller to VOP_RWLOCK since 754 * we only issue VOP_GETSECATTR. 755 * 756 * Returns 0 == trivial 757 * 1 == NOT Trivial 758 * <0 could not determine. 759 */ 760 int 761 fs_acl_nontrivial(vnode_t *vp, cred_t *cr) 762 { 763 ulong_t acl_styles; 764 ulong_t acl_flavor; 765 vsecattr_t vsecattr; 766 int error; 767 int isnontrivial; 768 769 /* determine the forms of ACLs maintained */ 770 error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL); 771 772 /* clear bits we don't understand and establish default acl_style */ 773 acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED); 774 if (error || (acl_styles == 0)) 775 acl_styles = _ACL_ACLENT_ENABLED; 776 777 vsecattr.vsa_aclentp = NULL; 778 vsecattr.vsa_dfaclentp = NULL; 779 vsecattr.vsa_aclcnt = 0; 780 vsecattr.vsa_dfaclcnt = 0; 781 782 while (acl_styles) { 783 /* select one of the styles as current flavor */ 784 acl_flavor = 0; 785 if (acl_styles & _ACL_ACLENT_ENABLED) { 786 acl_flavor = _ACL_ACLENT_ENABLED; 787 vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT; 788 } else if (acl_styles & _ACL_ACE_ENABLED) { 789 acl_flavor = _ACL_ACE_ENABLED; 790 vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE; 791 } 792 793 ASSERT(vsecattr.vsa_mask && acl_flavor); 794 error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL); 795 if (error == 0) 796 break; 797 798 /* that flavor failed */ 799 acl_styles &= ~acl_flavor; 800 } 801 802 /* if all styles fail then assume trivial */ 803 if (acl_styles == 0) 804 return (0); 805 806 /* process the flavor that worked */ 807 isnontrivial = 0; 808 if (acl_flavor & _ACL_ACLENT_ENABLED) { 809 if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES) 810 isnontrivial = 1; 811 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL) 812 kmem_free(vsecattr.vsa_aclentp, 813 vsecattr.vsa_aclcnt * sizeof (aclent_t)); 814 if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL) 815 kmem_free(vsecattr.vsa_dfaclentp, 816 vsecattr.vsa_dfaclcnt * sizeof (aclent_t)); 817 } 818 if (acl_flavor & _ACL_ACE_ENABLED) { 819 isnontrivial = ace_trivial(vsecattr.vsa_aclentp, 820 vsecattr.vsa_aclcnt); 821 822 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL) 823 kmem_free(vsecattr.vsa_aclentp, 824 vsecattr.vsa_aclcnt * sizeof (ace_t)); 825 /* ACE has no vsecattr.vsa_dfaclcnt */ 826 } 827 return (isnontrivial); 828 } 829 830 /* 831 * Check whether we need a retry to recover from STALE error. 832 */ 833 int 834 fs_need_estale_retry(int retry_count) 835 { 836 if (retry_count < fs_estale_retry) 837 return (1); 838 else 839 return (0); 840 } 841 842 843 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL; 844 845 /* 846 * Routine for anti-virus scanner to call to register its scanning routine. 847 */ 848 void 849 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int)) 850 { 851 fs_av_scan = av_scan; 852 } 853 854 /* 855 * Routine for file systems to call to initiate anti-virus scanning. 856 * Scanning will only be done on REGular files (currently). 857 */ 858 int 859 fs_vscan(vnode_t *vp, cred_t *cr, int async) 860 { 861 int ret = 0; 862 863 if (fs_av_scan && vp->v_type == VREG) 864 ret = (*fs_av_scan)(vp, cr, async); 865 866 return (ret); 867 } 868 869 /* 870 * support functions for reparse point 871 */ 872 /* 873 * reparse_vnode_parse 874 * 875 * Read the symlink data of a reparse point specified by the vnode 876 * and return the reparse data as name-value pair in the nvlist. 877 */ 878 int 879 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl) 880 { 881 int err; 882 char *lkdata; 883 struct uio uio; 884 struct iovec iov; 885 886 if (vp == NULL || nvl == NULL) 887 return (EINVAL); 888 889 lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP); 890 891 /* 892 * Set up io vector to read sym link data 893 */ 894 iov.iov_base = lkdata; 895 iov.iov_len = MAXREPARSELEN; 896 uio.uio_iov = &iov; 897 uio.uio_iovcnt = 1; 898 uio.uio_segflg = UIO_SYSSPACE; 899 uio.uio_extflg = UIO_COPY_CACHED; 900 uio.uio_loffset = (offset_t)0; 901 uio.uio_resid = MAXREPARSELEN; 902 903 if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) { 904 *(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0'; 905 err = reparse_parse(lkdata, nvl); 906 } 907 kmem_free(lkdata, MAXREPARSELEN); /* done with lkdata */ 908 909 return (err); 910 } 911 912 void 913 reparse_point_init() 914 { 915 mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL); 916 } 917 918 static door_handle_t 919 reparse_door_get_handle() 920 { 921 door_handle_t dh; 922 923 mutex_enter(&reparsed_door_lock); 924 if ((dh = reparsed_door) == NULL) { 925 if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) { 926 reparsed_door = NULL; 927 dh = NULL; 928 } else 929 dh = reparsed_door; 930 } 931 mutex_exit(&reparsed_door_lock); 932 return (dh); 933 } 934 935 static void 936 reparse_door_reset_handle() 937 { 938 mutex_enter(&reparsed_door_lock); 939 reparsed_door = NULL; 940 mutex_exit(&reparsed_door_lock); 941 } 942 943 /* 944 * reparse_kderef 945 * 946 * Accepts the service-specific item from the reparse point and returns 947 * the service-specific data requested. The caller specifies the size of 948 * the buffer provided via *bufsz; the routine will fail with EOVERFLOW 949 * if the results will not fit in the buffer, in which case, *bufsz will 950 * contain the number of bytes needed to hold the results. 951 * 952 * if ok return 0 and update *bufsize with length of actual result 953 * else return error code. 954 */ 955 int 956 reparse_kderef(const char *svc_type, const char *svc_data, char *buf, 957 size_t *bufsize) 958 { 959 int err, retries, need_free, retried_doorhd; 960 size_t dlen, res_len; 961 char *darg; 962 door_arg_t door_args; 963 reparsed_door_res_t *resp; 964 door_handle_t rp_door; 965 966 if (svc_type == NULL || svc_data == NULL || buf == NULL || 967 bufsize == NULL) 968 return (EINVAL); 969 970 /* get reparsed's door handle */ 971 if ((rp_door = reparse_door_get_handle()) == NULL) 972 return (EBADF); 973 974 /* setup buffer for door_call args and results */ 975 dlen = strlen(svc_type) + strlen(svc_data) + 2; 976 if (*bufsize < dlen) { 977 darg = kmem_alloc(dlen, KM_SLEEP); 978 need_free = 1; 979 } else { 980 darg = buf; /* use same buffer for door's args & results */ 981 need_free = 0; 982 } 983 984 /* build argument string of door call */ 985 (void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data); 986 987 /* setup args for door call */ 988 door_args.data_ptr = darg; 989 door_args.data_size = dlen; 990 door_args.desc_ptr = NULL; 991 door_args.desc_num = 0; 992 door_args.rbuf = buf; 993 door_args.rsize = *bufsize; 994 995 /* do the door_call */ 996 retried_doorhd = 0; 997 retries = 0; 998 door_ki_hold(rp_door); 999 while ((err = door_ki_upcall_limited(rp_door, &door_args, 1000 NULL, SIZE_MAX, 0)) != 0) { 1001 if (err == EAGAIN || err == EINTR) { 1002 if (++retries < REPARSED_DOORCALL_MAX_RETRY) { 1003 delay(SEC_TO_TICK(1)); 1004 continue; 1005 } 1006 } else if (err == EBADF) { 1007 /* door server goes away... */ 1008 reparse_door_reset_handle(); 1009 1010 if (retried_doorhd == 0) { 1011 door_ki_rele(rp_door); 1012 retried_doorhd++; 1013 rp_door = reparse_door_get_handle(); 1014 if (rp_door != NULL) { 1015 door_ki_hold(rp_door); 1016 continue; 1017 } 1018 } 1019 } 1020 break; 1021 } 1022 1023 if (rp_door) 1024 door_ki_rele(rp_door); 1025 1026 if (need_free) 1027 kmem_free(darg, dlen); /* done with args buffer */ 1028 1029 if (err != 0) 1030 return (err); 1031 1032 resp = (reparsed_door_res_t *)door_args.rbuf; 1033 if ((err = resp->res_status) == 0) { 1034 /* 1035 * have to save the length of the results before the 1036 * bcopy below since it's can be an overlap copy that 1037 * overwrites the reparsed_door_res_t structure at 1038 * the beginning of the buffer. 1039 */ 1040 res_len = (size_t)resp->res_len; 1041 1042 /* deref call is ok */ 1043 if (res_len > *bufsize) 1044 err = EOVERFLOW; 1045 else 1046 bcopy(resp->res_data, buf, res_len); 1047 *bufsize = res_len; 1048 } 1049 if (door_args.rbuf != buf) 1050 kmem_free(door_args.rbuf, door_args.rsize); 1051 1052 return (err); 1053 } 1054 1055 /* 1056 * This routine is used to create a single vfs_t that is used globally in the 1057 * system for a psuedo-file system that does not actually ever "mount", like 1058 * sockfs or fifofs. This constructs a single vfs_t that will not be 1059 * accidentally freed nor will it end up on a zone's list of file systems. 1060 * Please do not add new file systems that need to use this. The kmem_zalloc 1061 * explicitly takes care of ensuring the following (amongst others): 1062 * 1063 * - This vfs_t is explicitly not linked on any list (vfs_next/prev are NULL) 1064 * - The vnode is not covered and has no flags 1065 * - There is no mount point, resource, or options 1066 * - There is no zone that nominally owns this 1067 * - There is no file system specific data 1068 */ 1069 vfs_t * 1070 fs_vfsp_global(struct vfsops *ops, dev_t dev, int fstype, uint_t bsize) 1071 { 1072 vfs_t *vfsp = kmem_zalloc(sizeof (struct vfs), KM_SLEEP); 1073 1074 vfs_setops(vfsp, ops); 1075 vfsp->vfs_bsize = bsize; 1076 vfsp->vfs_fstype = fstype; 1077 vfs_make_fsid(&vfsp->vfs_fsid, dev, fstype); 1078 vfsp->vfs_dev = dev; 1079 1080 /* 1081 * We purposefully bump the reference on this vfs_t to one. This vfs_t 1082 * is intended to always exist regardless of surrounding activity. 1083 * Importantly this ensures that something that incidentally performs a 1084 * VFS_HOLD followed by a VFS_RELE on the vfs_t doesn't end up freeing 1085 * this. 1086 */ 1087 vfsp->vfs_count = 1; 1088 1089 return (vfsp); 1090 } 1091