1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 27 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 28 * Copyright 2017 Joyent, Inc. 29 * Copyright 2022 Oxide Computer Company 30 */ 31 32 /* 33 * Generic vnode operations. 34 */ 35 #include <sys/types.h> 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/errno.h> 39 #include <sys/fcntl.h> 40 #include <sys/flock.h> 41 #include <sys/statvfs.h> 42 #include <sys/vfs.h> 43 #include <sys/vnode.h> 44 #include <sys/proc.h> 45 #include <sys/user.h> 46 #include <sys/unistd.h> 47 #include <sys/cred.h> 48 #include <sys/poll.h> 49 #include <sys/debug.h> 50 #include <sys/cmn_err.h> 51 #include <sys/stream.h> 52 #include <fs/fs_subr.h> 53 #include <fs/fs_reparse.h> 54 #include <sys/door.h> 55 #include <sys/acl.h> 56 #include <sys/share.h> 57 #include <sys/file.h> 58 #include <sys/kmem.h> 59 #include <sys/file.h> 60 #include <sys/nbmlock.h> 61 #include <acl/acl_common.h> 62 #include <sys/pathname.h> 63 64 /* required for fs_reject_epoll */ 65 #include <sys/poll_impl.h> 66 67 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *); 68 69 /* 70 * Tunable to limit the number of retry to recover from STALE error. 71 */ 72 int fs_estale_retry = 5; 73 74 /* 75 * supports for reparse point door upcall 76 */ 77 static door_handle_t reparsed_door; 78 static kmutex_t reparsed_door_lock; 79 80 /* 81 * The associated operation is not supported by the file system. 82 */ 83 int 84 fs_nosys() 85 { 86 return (ENOSYS); 87 } 88 89 /* 90 * The associated operation is invalid (on this vnode). 91 */ 92 int 93 fs_inval() 94 { 95 return (EINVAL); 96 } 97 98 /* 99 * The associated operation is valid only for directories. 100 */ 101 int 102 fs_notdir() 103 { 104 return (ENOTDIR); 105 } 106 107 /* 108 * Free the file system specific resources. For the file systems that 109 * do not support the forced unmount, it will be a nop function. 110 */ 111 112 /*ARGSUSED*/ 113 void 114 fs_freevfs(vfs_t *vfsp) 115 { 116 } 117 118 /* ARGSUSED */ 119 int 120 fs_nosys_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp, 121 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr, 122 caller_context_t *ct) 123 { 124 return (ENOSYS); 125 } 126 127 /* ARGSUSED */ 128 int 129 fs_nosys_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr, 130 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr, 131 caller_context_t *ct) 132 { 133 return (ENOSYS); 134 } 135 136 /* ARGSUSED */ 137 int 138 fs_nosys_poll(vnode_t *vp, short events, int anyyet, short *reventsp, 139 struct pollhead **phpp, caller_context_t *ct) 140 { 141 return (ENOSYS); 142 } 143 144 145 /* 146 * The file system has nothing to sync to disk. However, the 147 * VFS_SYNC operation must not fail. 148 */ 149 /* ARGSUSED */ 150 int 151 fs_sync(struct vfs *vfspp, short flag, cred_t *cr) 152 { 153 return (0); 154 } 155 156 /* 157 * Does nothing but VOP_FSYNC must not fail. 158 */ 159 /* ARGSUSED */ 160 int 161 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 162 { 163 return (0); 164 } 165 166 /* 167 * Does nothing but VOP_PUTPAGE must not fail. 168 */ 169 /* ARGSUSED */ 170 int 171 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 172 caller_context_t *ctp) 173 { 174 return (0); 175 } 176 177 /* 178 * Does nothing but VOP_IOCTL must not fail. 179 */ 180 /* ARGSUSED */ 181 int 182 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, 183 int *rvalp) 184 { 185 return (0); 186 } 187 188 /* 189 * Read/write lock/unlock. Does nothing. 190 */ 191 /* ARGSUSED */ 192 int 193 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp) 194 { 195 return (-1); 196 } 197 198 /* ARGSUSED */ 199 void 200 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp) 201 { 202 } 203 204 /* 205 * Compare two vnodes. 206 */ 207 /*ARGSUSED2*/ 208 int 209 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) 210 { 211 return (vp1 == vp2); 212 } 213 214 /* 215 * No-op seek operation. 216 */ 217 /* ARGSUSED */ 218 int 219 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 220 { 221 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 222 } 223 224 /* 225 * File and record locking. 226 */ 227 /* ARGSUSED */ 228 int 229 fs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset, 230 flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 231 { 232 int frcmd; 233 int nlmid; 234 int error = 0; 235 boolean_t skip_lock = B_FALSE; 236 flk_callback_t serialize_callback; 237 int serialize = 0; 238 v_mode_t mode; 239 240 switch (cmd) { 241 242 case F_GETLK: 243 case F_O_GETLK: 244 if (flag & F_REMOTELOCK) { 245 frcmd = RCMDLCK; 246 } else if (flag & F_PXFSLOCK) { 247 frcmd = PCMDLCK; 248 } else { 249 frcmd = 0; 250 bfp->l_pid = ttoproc(curthread)->p_pid; 251 bfp->l_sysid = 0; 252 } 253 break; 254 255 case F_OFD_GETLK: 256 /* 257 * TBD we do not support remote OFD locks at this time. 258 */ 259 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) { 260 error = EOPNOTSUPP; 261 goto done; 262 } 263 skip_lock = B_TRUE; 264 break; 265 266 case F_SETLK_NBMAND: 267 /* 268 * Are NBMAND locks allowed on this file? 269 */ 270 if (!vp->v_vfsp || 271 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) { 272 error = EINVAL; 273 goto done; 274 } 275 if (vp->v_type != VREG) { 276 error = EINVAL; 277 goto done; 278 } 279 /*FALLTHROUGH*/ 280 281 case F_SETLK: 282 if (flag & F_REMOTELOCK) { 283 frcmd = SETFLCK|RCMDLCK; 284 } else if (flag & F_PXFSLOCK) { 285 frcmd = SETFLCK|PCMDLCK; 286 } else { 287 frcmd = SETFLCK; 288 bfp->l_pid = ttoproc(curthread)->p_pid; 289 bfp->l_sysid = 0; 290 } 291 if (cmd == F_SETLK_NBMAND && 292 (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) { 293 frcmd |= NBMLCK; 294 } 295 296 if (nbl_need_check(vp)) { 297 nbl_start_crit(vp, RW_WRITER); 298 serialize = 1; 299 if (frcmd & NBMLCK) { 300 mode = (bfp->l_type == F_RDLCK) ? 301 V_READ : V_RDANDWR; 302 if (vn_is_mapped(vp, mode)) { 303 error = EAGAIN; 304 goto done; 305 } 306 } 307 } 308 break; 309 310 case F_SETLKW: 311 if (flag & F_REMOTELOCK) { 312 frcmd = SETFLCK|SLPFLCK|RCMDLCK; 313 } else if (flag & F_PXFSLOCK) { 314 frcmd = SETFLCK|SLPFLCK|PCMDLCK; 315 } else { 316 frcmd = SETFLCK|SLPFLCK; 317 bfp->l_pid = ttoproc(curthread)->p_pid; 318 bfp->l_sysid = 0; 319 } 320 321 if (nbl_need_check(vp)) { 322 nbl_start_crit(vp, RW_WRITER); 323 serialize = 1; 324 } 325 break; 326 327 case F_OFD_SETLK: 328 case F_OFD_SETLKW: 329 case F_FLOCK: 330 case F_FLOCKW: 331 /* 332 * TBD we do not support remote OFD locks at this time. 333 */ 334 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) { 335 error = EOPNOTSUPP; 336 goto done; 337 } 338 skip_lock = B_TRUE; 339 break; 340 341 case F_HASREMOTELOCKS: 342 nlmid = GETNLMID(bfp->l_sysid); 343 if (nlmid != 0) { /* booted as a cluster */ 344 l_has_rmt(bfp) = 345 cl_flk_has_remote_locks_for_nlmid(vp, nlmid); 346 } else { /* not booted as a cluster */ 347 l_has_rmt(bfp) = flk_has_remote_locks(vp); 348 } 349 350 goto done; 351 352 default: 353 error = EINVAL; 354 goto done; 355 } 356 357 /* 358 * If this is a blocking lock request and we're serializing lock 359 * requests, modify the callback list to leave the critical region 360 * while we're waiting for the lock. 361 */ 362 363 if (serialize && (frcmd & SLPFLCK) != 0) { 364 flk_add_callback(&serialize_callback, 365 frlock_serialize_blocked, vp, flk_cbp); 366 flk_cbp = &serialize_callback; 367 } 368 369 if (!skip_lock) 370 error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp); 371 372 if (serialize && (frcmd & SLPFLCK) != 0) 373 flk_del_callback(&serialize_callback); 374 375 done: 376 if (serialize) 377 nbl_end_crit(vp); 378 379 return (error); 380 } 381 382 /* 383 * Callback when a lock request blocks and we are serializing requests. If 384 * before sleeping, leave the critical region. If after wakeup, reenter 385 * the critical region. 386 */ 387 388 static callb_cpr_t * 389 frlock_serialize_blocked(flk_cb_when_t when, void *infop) 390 { 391 vnode_t *vp = (vnode_t *)infop; 392 393 if (when == FLK_BEFORE_SLEEP) 394 nbl_end_crit(vp); 395 else { 396 nbl_start_crit(vp, RW_WRITER); 397 } 398 399 return (NULL); 400 } 401 402 /* 403 * Allow any flags. 404 */ 405 /* ARGSUSED */ 406 int 407 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct) 408 { 409 return (0); 410 } 411 412 /* 413 * Unlike poll(2), epoll should reject attempts to add normal files or 414 * directories to a given handle. Most non-pseudo filesystems rely on 415 * fs_poll() as their implementation of polling behavior. Exceptions to that 416 * rule (ufs) can use fs_reject_epoll(), so they don't require access to the 417 * inner details of poll. Potential race conditions related to the poll module 418 * being loaded are avoided by implementing the check here in genunix. 419 */ 420 boolean_t 421 fs_reject_epoll() 422 { 423 /* Check if the currently-active pollcache is epoll-enabled. */ 424 return (curthread->t_pollcache != NULL && 425 (curthread->t_pollcache->pc_flag & PC_EPOLL) != 0); 426 } 427 428 /* ARGSUSED */ 429 int 430 fs_poll(vnode_t *vp, short events, int anyyet, short *reventsp, 431 struct pollhead **phpp, caller_context_t *ct) 432 { 433 /* 434 * Regular filesystems should reject epollers. On the off chance that 435 * a non-epoll consumer expresses the desire for edge-triggered 436 * polling, we reject them too. Yes, the expected error for this 437 * really is EPERM. 438 */ 439 if (fs_reject_epoll() || (events & POLLET) != 0) { 440 return (EPERM); 441 } 442 443 *reventsp = 0; 444 if (events & POLLIN) 445 *reventsp |= POLLIN; 446 if (events & POLLRDNORM) 447 *reventsp |= POLLRDNORM; 448 if (events & POLLRDBAND) 449 *reventsp |= POLLRDBAND; 450 if (events & POLLOUT) 451 *reventsp |= POLLOUT; 452 if (events & POLLWRBAND) 453 *reventsp |= POLLWRBAND; 454 455 return (0); 456 } 457 458 /* 459 * POSIX pathconf() support. 460 */ 461 /* ARGSUSED */ 462 int 463 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 464 caller_context_t *ct) 465 { 466 ulong_t val; 467 int error = 0; 468 struct statvfs64 vfsbuf; 469 470 switch (cmd) { 471 472 case _PC_LINK_MAX: 473 val = MAXLINK; 474 break; 475 476 case _PC_MAX_CANON: 477 val = MAX_CANON; 478 break; 479 480 case _PC_MAX_INPUT: 481 val = MAX_INPUT; 482 break; 483 484 case _PC_NAME_MAX: 485 bzero(&vfsbuf, sizeof (vfsbuf)); 486 if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf)) 487 break; 488 val = vfsbuf.f_namemax; 489 break; 490 491 case _PC_PATH_MAX: 492 case _PC_SYMLINK_MAX: 493 val = MAXPATHLEN; 494 break; 495 496 case _PC_PIPE_BUF: 497 val = PIPE_BUF; 498 break; 499 500 case _PC_NO_TRUNC: 501 if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC) 502 val = 1; /* NOTRUNC is enabled for vp */ 503 else 504 val = (ulong_t)-1; 505 break; 506 507 case _PC_VDISABLE: 508 val = _POSIX_VDISABLE; 509 break; 510 511 case _PC_CHOWN_RESTRICTED: 512 if (rstchown) 513 val = rstchown; /* chown restricted enabled */ 514 else 515 val = (ulong_t)-1; 516 break; 517 518 case _PC_FILESIZEBITS: 519 520 /* 521 * If ever we come here it means that underlying file system 522 * does not recognise the command and therefore this 523 * configurable limit cannot be determined. We return -1 524 * and don't change errno. 525 */ 526 527 val = (ulong_t)-1; /* large file support */ 528 break; 529 530 case _PC_ACL_ENABLED: 531 val = 0; 532 break; 533 534 case _PC_CASE_BEHAVIOR: 535 val = _CASE_SENSITIVE; 536 if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1) 537 val |= _CASE_INSENSITIVE; 538 if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1) 539 val &= ~_CASE_SENSITIVE; 540 break; 541 542 case _PC_SATTR_ENABLED: 543 case _PC_SATTR_EXISTS: 544 val = 0; 545 break; 546 547 case _PC_ACCESS_FILTERING: 548 val = 0; 549 break; 550 551 default: 552 error = EINVAL; 553 break; 554 } 555 556 if (error == 0) 557 *valp = val; 558 return (error); 559 } 560 561 /* 562 * Dispose of a page. 563 */ 564 /* ARGSUSED */ 565 void 566 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr, 567 caller_context_t *ct) 568 { 569 570 ASSERT(fl == B_FREE || fl == B_INVAL); 571 572 if (fl == B_FREE) 573 page_free(pp, dn); 574 else 575 page_destroy(pp, dn); 576 } 577 578 /* ARGSUSED */ 579 void 580 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr, 581 caller_context_t *ct) 582 { 583 cmn_err(CE_PANIC, "fs_nodispose invoked"); 584 } 585 586 /* 587 * fabricate acls for file systems that do not support acls. 588 */ 589 /* ARGSUSED */ 590 int 591 fs_fab_acl(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr, 592 caller_context_t *ct) 593 { 594 aclent_t *aclentp; 595 struct vattr vattr; 596 int error; 597 size_t aclsize; 598 599 vsecattr->vsa_aclcnt = 0; 600 vsecattr->vsa_aclentsz = 0; 601 vsecattr->vsa_aclentp = NULL; 602 vsecattr->vsa_dfaclcnt = 0; /* Default ACLs are not fabricated */ 603 vsecattr->vsa_dfaclentp = NULL; 604 605 vattr.va_mask = AT_MODE | AT_UID | AT_GID; 606 if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct)) 607 return (error); 608 609 if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) { 610 aclsize = 4 * sizeof (aclent_t); 611 vsecattr->vsa_aclcnt = 4; /* USER, GROUP, OTHER, and CLASS */ 612 vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP); 613 aclentp = vsecattr->vsa_aclentp; 614 615 aclentp->a_type = USER_OBJ; /* Owner */ 616 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6; 617 aclentp->a_id = vattr.va_uid; /* Really undefined */ 618 aclentp++; 619 620 aclentp->a_type = GROUP_OBJ; /* Group */ 621 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3; 622 aclentp->a_id = vattr.va_gid; /* Really undefined */ 623 aclentp++; 624 625 aclentp->a_type = OTHER_OBJ; /* Other */ 626 aclentp->a_perm = vattr.va_mode & 0007; 627 aclentp->a_id = (gid_t)-1; /* Really undefined */ 628 aclentp++; 629 630 aclentp->a_type = CLASS_OBJ; /* Class */ 631 aclentp->a_perm = (ushort_t)(0007); 632 aclentp->a_id = (gid_t)-1; /* Really undefined */ 633 } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) { 634 VERIFY(0 == acl_trivial_create(vattr.va_mode, 635 (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp, 636 &vsecattr->vsa_aclcnt)); 637 vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t); 638 } 639 640 return (error); 641 } 642 643 /* 644 * Common code for implementing DOS share reservations 645 */ 646 /* ARGSUSED4 */ 647 int 648 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr, 649 caller_context_t *ct) 650 { 651 int error; 652 653 /* 654 * Make sure that the file was opened with permissions appropriate 655 * for the request, and make sure the caller isn't trying to sneak 656 * in an NBMAND request. 657 */ 658 if (cmd == F_SHARE) { 659 if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) || 660 ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0)) 661 return (EBADF); 662 if (shr->s_access & (F_RMACC | F_MDACC)) 663 return (EINVAL); 664 if (shr->s_deny & (F_MANDDNY | F_RMDNY)) 665 return (EINVAL); 666 } 667 if (cmd == F_SHARE_NBMAND) { 668 /* make sure nbmand is allowed on the file */ 669 if (!vp->v_vfsp || 670 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) { 671 return (EINVAL); 672 } 673 if (vp->v_type != VREG) { 674 return (EINVAL); 675 } 676 } 677 678 nbl_start_crit(vp, RW_WRITER); 679 680 switch (cmd) { 681 682 case F_SHARE_NBMAND: 683 shr->s_deny |= F_MANDDNY; 684 /*FALLTHROUGH*/ 685 case F_SHARE: 686 error = add_share(vp, shr); 687 break; 688 689 case F_UNSHARE: 690 error = del_share(vp, shr); 691 break; 692 693 case F_HASREMOTELOCKS: 694 /* 695 * We are overloading this command to refer to remote 696 * shares as well as remote locks, despite its name. 697 */ 698 shr->s_access = shr_has_remote_shares(vp, shr->s_sysid); 699 error = 0; 700 break; 701 702 default: 703 error = EINVAL; 704 break; 705 } 706 707 nbl_end_crit(vp); 708 return (error); 709 } 710 711 /*ARGSUSED1*/ 712 int 713 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm, 714 caller_context_t *ct) 715 { 716 ASSERT(vp != NULL); 717 return (ENOTSUP); 718 } 719 720 /*ARGSUSED1*/ 721 int 722 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm, 723 caller_context_t *ct) 724 { 725 ASSERT(vp != NULL); 726 return (0); 727 } 728 729 /* 730 * return 1 for non-trivial ACL. 731 * 732 * NB: It is not necessary for the caller to VOP_RWLOCK since 733 * we only issue VOP_GETSECATTR. 734 * 735 * Returns 0 == trivial 736 * 1 == NOT Trivial 737 * <0 could not determine. 738 */ 739 int 740 fs_acl_nontrivial(vnode_t *vp, cred_t *cr) 741 { 742 ulong_t acl_styles; 743 ulong_t acl_flavor; 744 vsecattr_t vsecattr; 745 int error; 746 int isnontrivial; 747 748 /* determine the forms of ACLs maintained */ 749 error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL); 750 751 /* clear bits we don't understand and establish default acl_style */ 752 acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED); 753 if (error || (acl_styles == 0)) 754 acl_styles = _ACL_ACLENT_ENABLED; 755 756 vsecattr.vsa_aclentp = NULL; 757 vsecattr.vsa_dfaclentp = NULL; 758 vsecattr.vsa_aclcnt = 0; 759 vsecattr.vsa_dfaclcnt = 0; 760 761 while (acl_styles) { 762 /* select one of the styles as current flavor */ 763 acl_flavor = 0; 764 if (acl_styles & _ACL_ACLENT_ENABLED) { 765 acl_flavor = _ACL_ACLENT_ENABLED; 766 vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT; 767 } else if (acl_styles & _ACL_ACE_ENABLED) { 768 acl_flavor = _ACL_ACE_ENABLED; 769 vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE; 770 } 771 772 ASSERT(vsecattr.vsa_mask && acl_flavor); 773 error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL); 774 if (error == 0) 775 break; 776 777 /* that flavor failed */ 778 acl_styles &= ~acl_flavor; 779 } 780 781 /* if all styles fail then assume trivial */ 782 if (acl_styles == 0) 783 return (0); 784 785 /* process the flavor that worked */ 786 isnontrivial = 0; 787 if (acl_flavor & _ACL_ACLENT_ENABLED) { 788 if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES) 789 isnontrivial = 1; 790 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL) 791 kmem_free(vsecattr.vsa_aclentp, 792 vsecattr.vsa_aclcnt * sizeof (aclent_t)); 793 if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL) 794 kmem_free(vsecattr.vsa_dfaclentp, 795 vsecattr.vsa_dfaclcnt * sizeof (aclent_t)); 796 } 797 if (acl_flavor & _ACL_ACE_ENABLED) { 798 isnontrivial = ace_trivial(vsecattr.vsa_aclentp, 799 vsecattr.vsa_aclcnt); 800 801 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL) 802 kmem_free(vsecattr.vsa_aclentp, 803 vsecattr.vsa_aclcnt * sizeof (ace_t)); 804 /* ACE has no vsecattr.vsa_dfaclcnt */ 805 } 806 return (isnontrivial); 807 } 808 809 /* 810 * Check whether we need a retry to recover from STALE error. 811 */ 812 int 813 fs_need_estale_retry(int retry_count) 814 { 815 if (retry_count < fs_estale_retry) 816 return (1); 817 else 818 return (0); 819 } 820 821 822 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL; 823 824 /* 825 * Routine for anti-virus scanner to call to register its scanning routine. 826 */ 827 void 828 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int)) 829 { 830 fs_av_scan = av_scan; 831 } 832 833 /* 834 * Routine for file systems to call to initiate anti-virus scanning. 835 * Scanning will only be done on REGular files (currently). 836 */ 837 int 838 fs_vscan(vnode_t *vp, cred_t *cr, int async) 839 { 840 int ret = 0; 841 842 if (fs_av_scan && vp->v_type == VREG) 843 ret = (*fs_av_scan)(vp, cr, async); 844 845 return (ret); 846 } 847 848 /* 849 * support functions for reparse point 850 */ 851 /* 852 * reparse_vnode_parse 853 * 854 * Read the symlink data of a reparse point specified by the vnode 855 * and return the reparse data as name-value pair in the nvlist. 856 */ 857 int 858 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl) 859 { 860 int err; 861 char *lkdata; 862 struct uio uio; 863 struct iovec iov; 864 865 if (vp == NULL || nvl == NULL) 866 return (EINVAL); 867 868 lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP); 869 870 /* 871 * Set up io vector to read sym link data 872 */ 873 iov.iov_base = lkdata; 874 iov.iov_len = MAXREPARSELEN; 875 uio.uio_iov = &iov; 876 uio.uio_iovcnt = 1; 877 uio.uio_segflg = UIO_SYSSPACE; 878 uio.uio_extflg = UIO_COPY_CACHED; 879 uio.uio_loffset = (offset_t)0; 880 uio.uio_resid = MAXREPARSELEN; 881 882 if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) { 883 *(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0'; 884 err = reparse_parse(lkdata, nvl); 885 } 886 kmem_free(lkdata, MAXREPARSELEN); /* done with lkdata */ 887 888 return (err); 889 } 890 891 void 892 reparse_point_init() 893 { 894 mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL); 895 } 896 897 static door_handle_t 898 reparse_door_get_handle() 899 { 900 door_handle_t dh; 901 902 mutex_enter(&reparsed_door_lock); 903 if ((dh = reparsed_door) == NULL) { 904 if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) { 905 reparsed_door = NULL; 906 dh = NULL; 907 } else 908 dh = reparsed_door; 909 } 910 mutex_exit(&reparsed_door_lock); 911 return (dh); 912 } 913 914 static void 915 reparse_door_reset_handle() 916 { 917 mutex_enter(&reparsed_door_lock); 918 reparsed_door = NULL; 919 mutex_exit(&reparsed_door_lock); 920 } 921 922 /* 923 * reparse_kderef 924 * 925 * Accepts the service-specific item from the reparse point and returns 926 * the service-specific data requested. The caller specifies the size of 927 * the buffer provided via *bufsz; the routine will fail with EOVERFLOW 928 * if the results will not fit in the buffer, in which case, *bufsz will 929 * contain the number of bytes needed to hold the results. 930 * 931 * if ok return 0 and update *bufsize with length of actual result 932 * else return error code. 933 */ 934 int 935 reparse_kderef(const char *svc_type, const char *svc_data, char *buf, 936 size_t *bufsize) 937 { 938 int err, retries, need_free, retried_doorhd; 939 size_t dlen, res_len; 940 char *darg; 941 door_arg_t door_args; 942 reparsed_door_res_t *resp; 943 door_handle_t rp_door; 944 945 if (svc_type == NULL || svc_data == NULL || buf == NULL || 946 bufsize == NULL) 947 return (EINVAL); 948 949 /* get reparsed's door handle */ 950 if ((rp_door = reparse_door_get_handle()) == NULL) 951 return (EBADF); 952 953 /* setup buffer for door_call args and results */ 954 dlen = strlen(svc_type) + strlen(svc_data) + 2; 955 if (*bufsize < dlen) { 956 darg = kmem_alloc(dlen, KM_SLEEP); 957 need_free = 1; 958 } else { 959 darg = buf; /* use same buffer for door's args & results */ 960 need_free = 0; 961 } 962 963 /* build argument string of door call */ 964 (void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data); 965 966 /* setup args for door call */ 967 door_args.data_ptr = darg; 968 door_args.data_size = dlen; 969 door_args.desc_ptr = NULL; 970 door_args.desc_num = 0; 971 door_args.rbuf = buf; 972 door_args.rsize = *bufsize; 973 974 /* do the door_call */ 975 retried_doorhd = 0; 976 retries = 0; 977 door_ki_hold(rp_door); 978 while ((err = door_ki_upcall_limited(rp_door, &door_args, 979 NULL, SIZE_MAX, 0)) != 0) { 980 if (err == EAGAIN || err == EINTR) { 981 if (++retries < REPARSED_DOORCALL_MAX_RETRY) { 982 delay(SEC_TO_TICK(1)); 983 continue; 984 } 985 } else if (err == EBADF) { 986 /* door server goes away... */ 987 reparse_door_reset_handle(); 988 989 if (retried_doorhd == 0) { 990 door_ki_rele(rp_door); 991 retried_doorhd++; 992 rp_door = reparse_door_get_handle(); 993 if (rp_door != NULL) { 994 door_ki_hold(rp_door); 995 continue; 996 } 997 } 998 } 999 break; 1000 } 1001 1002 if (rp_door) 1003 door_ki_rele(rp_door); 1004 1005 if (need_free) 1006 kmem_free(darg, dlen); /* done with args buffer */ 1007 1008 if (err != 0) 1009 return (err); 1010 1011 resp = (reparsed_door_res_t *)door_args.rbuf; 1012 if ((err = resp->res_status) == 0) { 1013 /* 1014 * have to save the length of the results before the 1015 * bcopy below since it's can be an overlap copy that 1016 * overwrites the reparsed_door_res_t structure at 1017 * the beginning of the buffer. 1018 */ 1019 res_len = (size_t)resp->res_len; 1020 1021 /* deref call is ok */ 1022 if (res_len > *bufsize) 1023 err = EOVERFLOW; 1024 else 1025 bcopy(resp->res_data, buf, res_len); 1026 *bufsize = res_len; 1027 } 1028 if (door_args.rbuf != buf) 1029 kmem_free(door_args.rbuf, door_args.rsize); 1030 1031 return (err); 1032 } 1033