1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2017 Nexenta Systems, Inc. All rights reserved. 14 */ 15 16 #include <sys/types.h> 17 #include <sys/param.h> 18 #include <sys/systm.h> 19 #include <sys/t_lock.h> 20 #include <sys/errno.h> 21 #include <sys/cred.h> 22 #include <sys/user.h> 23 #include <sys/uio.h> 24 #include <sys/file.h> 25 #include <sys/pathname.h> 26 #include <sys/vfs.h> 27 #include <sys/vnode.h> 28 #include <sys/stat.h> 29 #include <sys/mode.h> 30 #include <sys/kmem.h> 31 #include <sys/cmn_err.h> 32 #include <sys/debug.h> 33 #include <sys/atomic.h> 34 #include <sys/acl.h> 35 #include <sys/filio.h> 36 #include <sys/flock.h> 37 #include <sys/nbmlock.h> 38 #include <sys/fcntl.h> 39 #include <sys/poll.h> 40 #include <sys/time.h> 41 #include <sys/mman.h> 42 #include <sys/sysmacros.h> 43 44 #include <errno.h> 45 #include <fcntl.h> 46 #include <unistd.h> 47 48 #include "vncache.h" 49 50 #define O_RWMASK (O_WRONLY | O_RDWR) /* == 3 */ 51 52 int fop_shrlock_enable = 0; 53 54 int stat_to_vattr(const struct stat *, vattr_t *); 55 int fop__getxvattr(vnode_t *, xvattr_t *); 56 int fop__setxvattr(vnode_t *, xvattr_t *); 57 58 static void fake_inactive_xattrdir(vnode_t *); 59 60 typedef struct fake_xuio { 61 off_t map_foff; // file offset at start of mapping 62 char *map_addr; // mapped address 63 size_t map_len; // length of mapping 64 iovec_t iovec[2]; 65 } fake_xuio_t; 66 67 int fake_xuio_blksz = 4096; 68 69 70 /* ARGSUSED */ 71 int 72 fop_open( 73 vnode_t **vpp, 74 int mode, 75 cred_t *cr, 76 caller_context_t *ct) 77 { 78 79 if ((*vpp)->v_type == VREG) { 80 if (mode & FREAD) 81 atomic_add_32(&((*vpp)->v_rdcnt), 1); 82 if (mode & FWRITE) 83 atomic_add_32(&((*vpp)->v_wrcnt), 1); 84 } 85 86 /* call to ->vop_open was here */ 87 88 return (0); 89 } 90 91 /* ARGSUSED */ 92 int 93 fop_close( 94 vnode_t *vp, 95 int flag, 96 int count, 97 offset_t offset, 98 cred_t *cr, 99 caller_context_t *ct) 100 { 101 102 /* call to ->vop_close was here */ 103 104 /* 105 * Check passed in count to handle possible dups. Vnode counts are only 106 * kept on regular files 107 */ 108 if ((vp->v_type == VREG) && (count == 1)) { 109 if (flag & FREAD) { 110 ASSERT(vp->v_rdcnt > 0); 111 atomic_add_32(&(vp->v_rdcnt), -1); 112 } 113 if (flag & FWRITE) { 114 ASSERT(vp->v_wrcnt > 0); 115 atomic_add_32(&(vp->v_wrcnt), -1); 116 } 117 } 118 return (0); 119 } 120 121 /* ARGSUSED */ 122 int 123 fop_read( 124 vnode_t *vp, 125 uio_t *uio, 126 int ioflag, 127 cred_t *cr, 128 caller_context_t *ct) 129 { 130 struct stat st; 131 struct iovec *iov; 132 ssize_t resid; 133 size_t cnt; 134 int n; 135 136 /* 137 * If that caller asks for read beyond end of file, 138 * that causes the pread call to block. (Ugh!) 139 * Get the file size and return what we can. 140 */ 141 (void) fstat(vp->v_fd, &st); 142 resid = uio->uio_resid; 143 if ((uio->uio_loffset + resid) > st.st_size) 144 resid = st.st_size - uio->uio_loffset; 145 if (resid == 0) 146 return (0); 147 148 /* 149 * Simulating zero-copy support with mmap. See: 150 * fop_reqzcbuf(), fop_retzcbuf() 151 */ 152 if ((uio->uio_extflg == UIO_XUIO) && 153 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 154 xuio_t *xuio = (xuio_t *)uio; 155 int poff; 156 157 fake_xuio_t *priv = XUIO_XUZC_PRIV(xuio); 158 159 /* 160 * Sanity check mapped range overlaps this I/O: 161 * uio_offset >= mapped base 162 * uio_resid <= (mapped length - page offset) 163 */ 164 if (uio->uio_loffset < priv->map_foff) 165 return (EINVAL); 166 poff = uio->uio_loffset - priv->map_foff; 167 if ((uio->uio_resid + poff) > priv->map_len) 168 return (EINVAL); 169 170 /* 171 * Setup the uio with our loaned buffers, 172 * and update offset, resid. 173 */ 174 uio->uio_iovcnt = 1; 175 uio->uio_iov = &priv->iovec[0]; 176 iov = uio->uio_iov; 177 iov->iov_base = priv->map_addr + poff; 178 iov->iov_len = priv->map_len - poff; 179 180 uio->uio_loffset += iov->iov_len; 181 uio->uio_resid -= iov->iov_len; 182 183 return (0); 184 } 185 186 while (resid > 0) { 187 188 ASSERT(uio->uio_iovcnt > 0); 189 iov = uio->uio_iov; 190 191 if (iov->iov_len == 0) { 192 uio->uio_iov++; 193 uio->uio_iovcnt--; 194 continue; 195 } 196 cnt = iov->iov_len; 197 if (cnt > resid) 198 cnt = resid; 199 200 n = pread(vp->v_fd, iov->iov_base, cnt, uio->uio_loffset); 201 if (n < 0) 202 return (errno); 203 204 iov->iov_base += n; 205 iov->iov_len -= n; 206 207 uio->uio_resid -= n; 208 uio->uio_loffset += n; 209 210 resid -= n; 211 } 212 213 return (0); 214 } 215 216 /* ARGSUSED */ 217 int 218 fop_write( 219 vnode_t *vp, 220 uio_t *uio, 221 int ioflag, 222 cred_t *cr, 223 caller_context_t *ct) 224 { 225 struct iovec *iov; 226 size_t cnt; 227 int n; 228 229 while (uio->uio_resid > 0) { 230 231 ASSERT(uio->uio_iovcnt > 0); 232 iov = uio->uio_iov; 233 234 if (iov->iov_len == 0) { 235 uio->uio_iov++; 236 uio->uio_iovcnt--; 237 continue; 238 } 239 cnt = iov->iov_len; 240 if (cnt > uio->uio_resid) 241 cnt = uio->uio_resid; 242 243 n = pwrite(vp->v_fd, iov->iov_base, iov->iov_len, 244 uio->uio_loffset); 245 if (n < 0) 246 return (errno); 247 248 iov->iov_base += n; 249 iov->iov_len -= n; 250 251 uio->uio_resid -= n; 252 uio->uio_loffset += n; 253 } 254 255 if (ioflag == FSYNC) { 256 (void) fsync(vp->v_fd); 257 } 258 259 return (0); 260 } 261 262 /* ARGSUSED */ 263 int 264 fop_ioctl( 265 vnode_t *vp, 266 int cmd, 267 intptr_t arg, 268 int flag, 269 cred_t *cr, 270 int *rvalp, 271 caller_context_t *ct) 272 { 273 off64_t off; 274 int rv, whence; 275 276 switch (cmd) { 277 case _FIO_SEEK_DATA: 278 case _FIO_SEEK_HOLE: 279 whence = (cmd == _FIO_SEEK_DATA) ? SEEK_DATA : SEEK_HOLE; 280 bcopy((void *)arg, &off, sizeof (off)); 281 off = lseek(vp->v_fd, off, whence); 282 if (off == (off64_t)-1) { 283 rv = errno; 284 } else { 285 bcopy(&off, (void *)arg, sizeof (off)); 286 rv = 0; 287 } 288 break; 289 290 default: 291 rv = ENOTTY; 292 break; 293 } 294 295 return (rv); 296 } 297 298 /* ARGSUSED */ 299 int 300 fop_setfl( 301 vnode_t *vp, 302 int oflags, 303 int nflags, 304 cred_t *cr, 305 caller_context_t *ct) 306 { 307 /* allow any flags? See fs_setfl */ 308 return (0); 309 } 310 311 /* ARGSUSED */ 312 int 313 fop_getattr( 314 vnode_t *vp, 315 vattr_t *vap, 316 int flags, 317 cred_t *cr, 318 caller_context_t *ct) 319 { 320 int error; 321 struct stat st; 322 323 if (fstat(vp->v_fd, &st) == -1) 324 return (errno); 325 error = stat_to_vattr(&st, vap); 326 327 if (vap->va_mask & AT_XVATTR) 328 (void) fop__getxvattr(vp, (xvattr_t *)vap); 329 330 return (error); 331 } 332 333 /* ARGSUSED */ 334 int 335 fop_setattr( 336 vnode_t *vp, 337 vattr_t *vap, 338 int flags, 339 cred_t *cr, 340 caller_context_t *ct) 341 { 342 timespec_t times[2]; 343 int err; 344 345 if (vap->va_mask & AT_SIZE) { 346 if (ftruncate(vp->v_fd, vap->va_size) == -1) { 347 err = errno; 348 if (err == EBADF) 349 err = EACCES; 350 return (err); 351 } 352 } 353 354 /* AT_MODE or anything else? */ 355 356 if (vap->va_mask & AT_XVATTR) 357 (void) fop__setxvattr(vp, (xvattr_t *)vap); 358 359 if (vap->va_mask & (AT_ATIME | AT_MTIME)) { 360 if (vap->va_mask & AT_ATIME) { 361 times[0] = vap->va_atime; 362 } else { 363 times[0].tv_sec = 0; 364 times[0].tv_nsec = UTIME_OMIT; 365 } 366 if (vap->va_mask & AT_MTIME) { 367 times[1] = vap->va_mtime; 368 } else { 369 times[1].tv_sec = 0; 370 times[1].tv_nsec = UTIME_OMIT; 371 } 372 373 (void) futimens(vp->v_fd, times); 374 } 375 376 return (0); 377 } 378 379 /* ARGSUSED */ 380 int 381 fop_access( 382 vnode_t *vp, 383 int mode, 384 int flags, 385 cred_t *cr, 386 caller_context_t *ct) 387 { 388 return (0); 389 } 390 391 /* 392 * Conceptually like xattr_dir_lookup() 393 */ 394 static int 395 fake_lookup_xattrdir( 396 vnode_t *dvp, 397 vnode_t **vpp) 398 { 399 int len, fd; 400 int omode = O_RDWR | O_NOFOLLOW; 401 vnode_t *vp; 402 403 *vpp = NULL; 404 405 if (dvp->v_type != VDIR && dvp->v_type != VREG) 406 return (EINVAL); 407 408 /* 409 * If we're already in sysattr space, don't allow creation 410 * of another level of sysattrs. 411 */ 412 if (dvp->v_flag & V_SYSATTR) 413 return (EINVAL); 414 415 mutex_enter(&dvp->v_lock); 416 if (dvp->v_xattrdir != NULL) { 417 *vpp = dvp->v_xattrdir; 418 VN_HOLD(*vpp); 419 mutex_exit(&dvp->v_lock); 420 return (0); 421 } 422 mutex_exit(&dvp->v_lock); 423 424 omode = O_RDONLY|O_XATTR; 425 fd = openat(dvp->v_fd, ".", omode); 426 if (fd < 0) 427 return (errno); 428 429 vp = vn_alloc(KM_SLEEP); 430 vp->v_fd = fd; 431 vp->v_flag = V_XATTRDIR|V_SYSATTR; 432 vp->v_type = VDIR; 433 vp->v_vfsp = dvp->v_vfsp; 434 435 /* Set v_path to parent path + "/@" (like NFS) */ 436 len = strlen(dvp->v_path) + 3; 437 vp->v_path = kmem_alloc(len, KM_SLEEP); 438 (void) snprintf(vp->v_path, len, "%s/@", dvp->v_path); 439 440 /* 441 * Keep a pointer to the parent and a hold on it. 442 * Both are cleaned up in fake_inactive_xattrdir 443 */ 444 vp->v_data = dvp; 445 vn_hold(dvp); 446 447 mutex_enter(&dvp->v_lock); 448 if (dvp->v_xattrdir == NULL) { 449 *vpp = dvp->v_xattrdir = vp; 450 mutex_exit(&dvp->v_lock); 451 } else { 452 *vpp = dvp->v_xattrdir; 453 mutex_exit(&dvp->v_lock); 454 fake_inactive_xattrdir(vp); 455 } 456 457 return (0); 458 } 459 460 /* ARGSUSED */ 461 int 462 fop_lookup( 463 vnode_t *dvp, 464 char *name, 465 vnode_t **vpp, 466 pathname_t *pnp, 467 int flags, 468 vnode_t *rdir, 469 cred_t *cr, 470 caller_context_t *ct, 471 int *deflags, /* Returned per-dirent flags */ 472 pathname_t *ppnp) /* Returned case-preserved name in directory */ 473 { 474 int fd; 475 int omode = O_RDWR | O_NOFOLLOW; 476 vnode_t *vp; 477 struct stat st; 478 479 if (flags & LOOKUP_XATTR) 480 return (fake_lookup_xattrdir(dvp, vpp)); 481 482 /* 483 * If lookup is for "", just return dvp. 484 */ 485 if (name[0] == '\0') { 486 vn_hold(dvp); 487 *vpp = dvp; 488 return (0); 489 } 490 491 if (fstatat(dvp->v_fd, name, &st, AT_SYMLINK_NOFOLLOW) == -1) 492 return (errno); 493 494 vp = vncache_lookup(&st); 495 if (vp != NULL) { 496 /* lookup gave us a hold */ 497 *vpp = vp; 498 return (0); 499 } 500 501 if (S_ISDIR(st.st_mode)) 502 omode = O_RDONLY | O_NOFOLLOW; 503 504 again: 505 fd = openat(dvp->v_fd, name, omode, 0); 506 if (fd < 0) { 507 if ((omode & O_RWMASK) == O_RDWR) { 508 omode &= ~O_RWMASK; 509 omode |= O_RDONLY; 510 goto again; 511 } 512 return (errno); 513 } 514 515 if (fstat(fd, &st) == -1) { 516 (void) close(fd); 517 return (errno); 518 } 519 520 vp = vncache_enter(&st, dvp, name, fd); 521 522 *vpp = vp; 523 return (0); 524 } 525 526 /* ARGSUSED */ 527 int 528 fop_create( 529 vnode_t *dvp, 530 char *name, 531 vattr_t *vap, 532 vcexcl_t excl, 533 int mode, 534 vnode_t **vpp, 535 cred_t *cr, 536 int flags, 537 caller_context_t *ct, 538 vsecattr_t *vsecp) /* ACL to set during create */ 539 { 540 struct stat st; 541 vnode_t *vp; 542 int err, fd, omode; 543 544 /* 545 * If creating "", just return dvp. 546 */ 547 if (name[0] == '\0') { 548 vn_hold(dvp); 549 *vpp = dvp; 550 return (0); 551 } 552 553 err = fstatat(dvp->v_fd, name, &st, AT_SYMLINK_NOFOLLOW); 554 if (err != 0) 555 err = errno; 556 557 vp = NULL; 558 if (err == 0) { 559 /* The file already exists. */ 560 if (excl == EXCL) 561 return (EEXIST); 562 563 vp = vncache_lookup(&st); 564 /* vp gained a hold */ 565 } 566 567 if (vp == NULL) { 568 /* 569 * Open it. (may or may not exist) 570 */ 571 omode = O_RDWR | O_CREAT | O_NOFOLLOW; 572 if (excl == EXCL) 573 omode |= O_EXCL; 574 open_again: 575 fd = openat(dvp->v_fd, name, omode, mode); 576 if (fd < 0) { 577 if ((omode & O_RWMASK) == O_RDWR) { 578 omode &= ~O_RWMASK; 579 omode |= O_RDONLY; 580 goto open_again; 581 } 582 return (errno); 583 } 584 (void) fstat(fd, &st); 585 586 vp = vncache_enter(&st, dvp, name, fd); 587 /* vp has its initial hold */ 588 } 589 590 /* Should have the vp now. */ 591 if (vp == NULL) 592 return (EFAULT); 593 594 if (vp->v_type == VDIR && vap->va_type != VDIR) { 595 vn_rele(vp); 596 return (EISDIR); 597 } 598 if (vp->v_type != VDIR && vap->va_type == VDIR) { 599 vn_rele(vp); 600 return (ENOTDIR); 601 } 602 603 /* 604 * Might need to set attributes. 605 */ 606 (void) fop_setattr(vp, vap, 0, cr, ct); 607 608 *vpp = vp; 609 return (0); 610 } 611 612 /* ARGSUSED */ 613 int 614 fop_remove( 615 vnode_t *dvp, 616 char *name, 617 cred_t *cr, 618 caller_context_t *ct, 619 int flags) 620 { 621 622 if (unlinkat(dvp->v_fd, name, 0)) 623 return (errno); 624 625 return (0); 626 } 627 628 /* ARGSUSED */ 629 int 630 fop_link( 631 vnode_t *to_dvp, 632 vnode_t *fr_vp, 633 char *to_name, 634 cred_t *cr, 635 caller_context_t *ct, 636 int flags) 637 { 638 int err; 639 640 /* 641 * Would prefer to specify "from" as the combination: 642 * (fr_vp->v_fd, NULL) but linkat does not permit it. 643 */ 644 err = linkat(AT_FDCWD, fr_vp->v_path, to_dvp->v_fd, to_name, 645 AT_SYMLINK_FOLLOW); 646 if (err == -1) 647 err = errno; 648 649 return (err); 650 } 651 652 /* ARGSUSED */ 653 int 654 fop_rename( 655 vnode_t *from_dvp, 656 char *from_name, 657 vnode_t *to_dvp, 658 char *to_name, 659 cred_t *cr, 660 caller_context_t *ct, 661 int flags) 662 { 663 struct stat st; 664 vnode_t *vp; 665 int err; 666 667 if (fstatat(from_dvp->v_fd, from_name, &st, 668 AT_SYMLINK_NOFOLLOW) == -1) 669 return (errno); 670 671 vp = vncache_lookup(&st); 672 if (vp == NULL) 673 return (ENOENT); 674 675 err = renameat(from_dvp->v_fd, from_name, to_dvp->v_fd, to_name); 676 if (err == -1) 677 err = errno; 678 else 679 vncache_renamed(vp, to_dvp, to_name); 680 681 vn_rele(vp); 682 683 return (err); 684 } 685 686 /* ARGSUSED */ 687 int 688 fop_mkdir( 689 vnode_t *dvp, 690 char *name, 691 vattr_t *vap, 692 vnode_t **vpp, 693 cred_t *cr, 694 caller_context_t *ct, 695 int flags, 696 vsecattr_t *vsecp) /* ACL to set during create */ 697 { 698 struct stat st; 699 int err, fd; 700 701 mode_t mode = vap->va_mode & 0777; 702 703 if (mkdirat(dvp->v_fd, name, mode) == -1) 704 return (errno); 705 706 if ((fd = openat(dvp->v_fd, name, O_RDONLY)) == -1) 707 return (errno); 708 if (fstat(fd, &st) == -1) { 709 err = errno; 710 (void) close(fd); 711 return (err); 712 } 713 714 *vpp = vncache_enter(&st, dvp, name, fd); 715 716 /* 717 * Might need to set attributes. 718 */ 719 (void) fop_setattr(*vpp, vap, 0, cr, ct); 720 721 return (0); 722 } 723 724 /* ARGSUSED */ 725 int 726 fop_rmdir( 727 vnode_t *dvp, 728 char *name, 729 vnode_t *cdir, 730 cred_t *cr, 731 caller_context_t *ct, 732 int flags) 733 { 734 735 if (unlinkat(dvp->v_fd, name, AT_REMOVEDIR) == -1) 736 return (errno); 737 738 return (0); 739 } 740 741 /* ARGSUSED */ 742 int 743 fop_readdir( 744 vnode_t *vp, 745 uio_t *uiop, 746 cred_t *cr, 747 int *eofp, 748 caller_context_t *ct, 749 int flags) 750 { 751 struct iovec *iov; 752 int cnt; 753 int error = 0; 754 int fd = vp->v_fd; 755 756 if (eofp) { 757 *eofp = 0; 758 } 759 760 error = lseek(fd, uiop->uio_loffset, SEEK_SET); 761 if (error == -1) 762 return (errno); 763 764 ASSERT(uiop->uio_iovcnt > 0); 765 iov = uiop->uio_iov; 766 if (iov->iov_len < sizeof (struct dirent)) 767 return (EINVAL); 768 769 /* LINTED E_BAD_PTR_CAST_ALIGN */ 770 cnt = getdents(fd, (struct dirent *)(uiop->uio_iov->iov_base), 771 uiop->uio_resid); 772 if (cnt == -1) 773 return (errno); 774 if (cnt == 0) { 775 if (eofp) { 776 *eofp = 1; 777 } 778 return (ENOENT); 779 } 780 781 iov->iov_base += cnt; 782 iov->iov_len -= cnt; 783 uiop->uio_resid -= cnt; 784 uiop->uio_loffset = lseek(fd, 0LL, SEEK_CUR); 785 786 return (0); 787 } 788 789 /* ARGSUSED */ 790 int 791 fop_symlink( 792 vnode_t *dvp, 793 char *linkname, 794 vattr_t *vap, 795 char *target, 796 cred_t *cr, 797 caller_context_t *ct, 798 int flags) 799 { 800 return (ENOSYS); 801 } 802 803 /* ARGSUSED */ 804 int 805 fop_readlink( 806 vnode_t *vp, 807 uio_t *uiop, 808 cred_t *cr, 809 caller_context_t *ct) 810 { 811 return (ENOSYS); 812 } 813 814 /* ARGSUSED */ 815 int 816 fop_fsync( 817 vnode_t *vp, 818 int syncflag, 819 cred_t *cr, 820 caller_context_t *ct) 821 { 822 823 if (fsync(vp->v_fd) == -1) 824 return (errno); 825 826 return (0); 827 } 828 829 /* ARGSUSED */ 830 void 831 fop_inactive( 832 vnode_t *vp, 833 cred_t *cr, 834 caller_context_t *ct) 835 { 836 if (vp->v_flag & V_XATTRDIR) { 837 fake_inactive_xattrdir(vp); 838 } else { 839 vncache_inactive(vp); 840 } 841 } 842 843 /* 844 * The special xattr directories are not in the vncache AVL, but 845 * hang off the parent's v_xattrdir field. When vn_rele finds 846 * an xattr dir at v_count == 1 it calls here, but until we 847 * take locks on both the parent and the xattrdir, we don't 848 * know if we're really at the last reference. So in here we 849 * take both locks, re-check the count, and either bail out 850 * or proceed with "inactive" vnode cleanup. Part of that 851 * cleanup includes releasing the hold on the parent and 852 * clearing the parent's v_xattrdir field, which were 853 * setup in fake_lookup_xattrdir() 854 */ 855 static void 856 fake_inactive_xattrdir(vnode_t *vp) 857 { 858 vnode_t *dvp = vp->v_data; /* parent */ 859 mutex_enter(&dvp->v_lock); 860 mutex_enter(&vp->v_lock); 861 if (vp->v_count > 1) { 862 /* new ref. via v_xattrdir */ 863 mutex_exit(&vp->v_lock); 864 mutex_exit(&dvp->v_lock); 865 return; 866 } 867 ASSERT(dvp->v_xattrdir == vp); 868 dvp->v_xattrdir = NULL; 869 mutex_exit(&vp->v_lock); 870 mutex_exit(&dvp->v_lock); 871 vn_rele(dvp); 872 vn_free(vp); 873 } 874 875 /* ARGSUSED */ 876 int 877 fop_fid( 878 vnode_t *vp, 879 fid_t *fidp, 880 caller_context_t *ct) 881 { 882 return (ENOSYS); 883 } 884 885 /* ARGSUSED */ 886 int 887 fop_rwlock( 888 vnode_t *vp, 889 int write_lock, 890 caller_context_t *ct) 891 { 892 /* See: fs_rwlock */ 893 return (-1); 894 } 895 896 /* ARGSUSED */ 897 void 898 fop_rwunlock( 899 vnode_t *vp, 900 int write_lock, 901 caller_context_t *ct) 902 { 903 /* See: fs_rwunlock */ 904 } 905 906 /* ARGSUSED */ 907 int 908 fop_seek( 909 vnode_t *vp, 910 offset_t ooff, 911 offset_t *noffp, 912 caller_context_t *ct) 913 { 914 return (ENOSYS); 915 } 916 917 /* ARGSUSED */ 918 int 919 fop_cmp( 920 vnode_t *vp1, 921 vnode_t *vp2, 922 caller_context_t *ct) 923 { 924 /* See fs_cmp */ 925 return (vncache_cmp(vp1, vp2)); 926 } 927 928 /* ARGSUSED */ 929 int 930 fop_frlock( 931 vnode_t *vp, 932 int cmd, 933 flock64_t *bfp, 934 int flag, 935 offset_t offset, 936 struct flk_callback *flk_cbp, 937 cred_t *cr, 938 caller_context_t *ct) 939 { 940 #if defined(_LP64) 941 offset_t maxoffset = INT64_MAX; 942 #elif defined(_ILP32) 943 /* 944 * Sadly, the fcntl API enforces 32-bit offsets, 945 * even though we have _FILE_OFFSET_BITS=64 946 */ 947 offset_t maxoffset = INT32_MAX; 948 #else 949 #error "unsupported env." 950 #endif 951 952 /* See fs_frlock */ 953 954 switch (cmd) { 955 case F_GETLK: 956 case F_SETLK_NBMAND: 957 case F_SETLK: 958 case F_SETLKW: 959 break; 960 default: 961 return (EINVAL); 962 } 963 964 /* We only get SEEK_SET ranges here. */ 965 if (bfp->l_whence != 0) 966 return (EINVAL); 967 968 /* 969 * One limitation of using fcntl(2) F_SETLK etc is that 970 * the real kernel limits the offsets we can use. 971 * (Maybe the fcntl API should loosen that up?) 972 * See syscall/fcntl.c:flock_check() 973 * 974 * Here in libfksmbsrv we can just ignore such locks, 975 * or ignore the part that extends beyond maxoffset. 976 * The SMB layer still keeps track of such locks for 977 * conflict detection, so not reflecting such locks 978 * into the real FS layer is OK. Note: this may 979 * modify the pased bfp->l_len. 980 */ 981 if (bfp->l_start < 0 || bfp->l_start > maxoffset) 982 return (0); 983 if (bfp->l_len < 0 || bfp->l_len > maxoffset) 984 return (0); 985 if (bfp->l_len > (maxoffset - bfp->l_start + 1)) 986 bfp->l_len = (maxoffset - bfp->l_start + 1); 987 988 if (fcntl(vp->v_fd, cmd, bfp) == -1) 989 return (errno); 990 991 return (0); 992 } 993 994 /* ARGSUSED */ 995 int 996 fop_space( 997 vnode_t *vp, 998 int cmd, 999 flock64_t *bfp, 1000 int flag, 1001 offset_t offset, 1002 cred_t *cr, 1003 caller_context_t *ct) 1004 { 1005 /* See fs_frlock */ 1006 1007 switch (cmd) { 1008 case F_ALLOCSP: 1009 case F_FREESP: 1010 break; 1011 default: 1012 return (EINVAL); 1013 } 1014 1015 if (fcntl(vp->v_fd, cmd, bfp) == -1) 1016 return (errno); 1017 1018 return (0); 1019 } 1020 1021 /* ARGSUSED */ 1022 int 1023 fop_realvp( 1024 vnode_t *vp, 1025 vnode_t **vpp, 1026 caller_context_t *ct) 1027 { 1028 return (ENOSYS); 1029 } 1030 1031 /* ARGSUSED */ 1032 int 1033 fop_getpage( 1034 vnode_t *vp, 1035 offset_t off, 1036 size_t len, 1037 uint_t *protp, 1038 struct page **plarr, 1039 size_t plsz, 1040 struct seg *seg, 1041 caddr_t addr, 1042 enum seg_rw rw, 1043 cred_t *cr, 1044 caller_context_t *ct) 1045 { 1046 return (ENOSYS); 1047 } 1048 1049 /* ARGSUSED */ 1050 int 1051 fop_putpage( 1052 vnode_t *vp, 1053 offset_t off, 1054 size_t len, 1055 int flags, 1056 cred_t *cr, 1057 caller_context_t *ct) 1058 { 1059 return (ENOSYS); 1060 } 1061 1062 /* ARGSUSED */ 1063 int 1064 fop_map( 1065 vnode_t *vp, 1066 offset_t off, 1067 struct as *as, 1068 caddr_t *addrp, 1069 size_t len, 1070 uchar_t prot, 1071 uchar_t maxprot, 1072 uint_t flags, 1073 cred_t *cr, 1074 caller_context_t *ct) 1075 { 1076 return (ENOSYS); 1077 } 1078 1079 /* ARGSUSED */ 1080 int 1081 fop_addmap( 1082 vnode_t *vp, 1083 offset_t off, 1084 struct as *as, 1085 caddr_t addr, 1086 size_t len, 1087 uchar_t prot, 1088 uchar_t maxprot, 1089 uint_t flags, 1090 cred_t *cr, 1091 caller_context_t *ct) 1092 { 1093 return (ENOSYS); 1094 } 1095 1096 /* ARGSUSED */ 1097 int 1098 fop_delmap( 1099 vnode_t *vp, 1100 offset_t off, 1101 struct as *as, 1102 caddr_t addr, 1103 size_t len, 1104 uint_t prot, 1105 uint_t maxprot, 1106 uint_t flags, 1107 cred_t *cr, 1108 caller_context_t *ct) 1109 { 1110 return (ENOSYS); 1111 } 1112 1113 /* ARGSUSED */ 1114 int 1115 fop_poll( 1116 vnode_t *vp, 1117 short events, 1118 int anyyet, 1119 short *reventsp, 1120 struct pollhead **phpp, 1121 caller_context_t *ct) 1122 { 1123 *reventsp = 0; 1124 if (events & POLLIN) 1125 *reventsp |= POLLIN; 1126 if (events & POLLRDNORM) 1127 *reventsp |= POLLRDNORM; 1128 if (events & POLLRDBAND) 1129 *reventsp |= POLLRDBAND; 1130 if (events & POLLOUT) 1131 *reventsp |= POLLOUT; 1132 if (events & POLLWRBAND) 1133 *reventsp |= POLLWRBAND; 1134 *phpp = NULL; /* or fake_pollhead? */ 1135 1136 return (0); 1137 } 1138 1139 /* ARGSUSED */ 1140 int 1141 fop_dump( 1142 vnode_t *vp, 1143 caddr_t addr, 1144 offset_t lbdn, 1145 offset_t dblks, 1146 caller_context_t *ct) 1147 { 1148 return (ENOSYS); 1149 } 1150 1151 /* 1152 * See fs_pathconf 1153 */ 1154 /* ARGSUSED */ 1155 int 1156 fop_pathconf( 1157 vnode_t *vp, 1158 int cmd, 1159 ulong_t *valp, 1160 cred_t *cr, 1161 caller_context_t *ct) 1162 { 1163 register ulong_t val; 1164 register int error = 0; 1165 1166 switch (cmd) { 1167 1168 case _PC_LINK_MAX: 1169 val = MAXLINK; 1170 break; 1171 1172 case _PC_MAX_CANON: 1173 val = MAX_CANON; 1174 break; 1175 1176 case _PC_MAX_INPUT: 1177 val = MAX_INPUT; 1178 break; 1179 1180 case _PC_NAME_MAX: 1181 val = MAXNAMELEN; 1182 break; 1183 1184 case _PC_PATH_MAX: 1185 case _PC_SYMLINK_MAX: 1186 val = MAXPATHLEN; 1187 break; 1188 1189 case _PC_PIPE_BUF: 1190 val = PIPE_BUF; 1191 break; 1192 1193 case _PC_NO_TRUNC: 1194 val = (ulong_t)-1; 1195 break; 1196 1197 case _PC_VDISABLE: 1198 val = _POSIX_VDISABLE; 1199 break; 1200 1201 case _PC_CHOWN_RESTRICTED: 1202 val = 1; /* chown restricted enabled */ 1203 break; 1204 1205 case _PC_FILESIZEBITS: 1206 val = (ulong_t)-1; /* large file support */ 1207 break; 1208 1209 case _PC_ACL_ENABLED: 1210 val = _ACL_ACE_ENABLED; 1211 break; 1212 1213 case _PC_CASE_BEHAVIOR: 1214 val = _CASE_SENSITIVE; 1215 break; 1216 1217 case _PC_SATTR_ENABLED: 1218 case _PC_SATTR_EXISTS: 1219 val = 0; 1220 break; 1221 1222 case _PC_ACCESS_FILTERING: 1223 val = 0; 1224 break; 1225 1226 default: 1227 error = EINVAL; 1228 break; 1229 } 1230 1231 if (error == 0) 1232 *valp = val; 1233 return (error); 1234 } 1235 1236 /* ARGSUSED */ 1237 int 1238 fop_pageio( 1239 vnode_t *vp, 1240 struct page *pp, 1241 u_offset_t io_off, 1242 size_t io_len, 1243 int flags, 1244 cred_t *cr, 1245 caller_context_t *ct) 1246 { 1247 return (ENOSYS); 1248 } 1249 1250 /* ARGSUSED */ 1251 int 1252 fop_dumpctl( 1253 vnode_t *vp, 1254 int action, 1255 offset_t *blkp, 1256 caller_context_t *ct) 1257 { 1258 return (ENOSYS); 1259 } 1260 1261 /* ARGSUSED */ 1262 void 1263 fop_dispose( 1264 vnode_t *vp, 1265 struct page *pp, 1266 int flag, 1267 int dn, 1268 cred_t *cr, 1269 caller_context_t *ct) 1270 { 1271 } 1272 1273 /* ARGSUSED */ 1274 int 1275 fop_setsecattr( 1276 vnode_t *vp, 1277 vsecattr_t *vsap, 1278 int flag, 1279 cred_t *cr, 1280 caller_context_t *ct) 1281 { 1282 return (0); 1283 } 1284 1285 /* 1286 * Fake up just enough of this so we can test get/set SDs. 1287 */ 1288 /* ARGSUSED */ 1289 int 1290 fop_getsecattr( 1291 vnode_t *vp, 1292 vsecattr_t *vsecattr, 1293 int flag, 1294 cred_t *cr, 1295 caller_context_t *ct) 1296 { 1297 1298 vsecattr->vsa_aclcnt = 0; 1299 vsecattr->vsa_aclentsz = 0; 1300 vsecattr->vsa_aclentp = NULL; 1301 vsecattr->vsa_dfaclcnt = 0; /* Default ACLs are not fabricated */ 1302 vsecattr->vsa_dfaclentp = NULL; 1303 1304 if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) { 1305 aclent_t *aclentp; 1306 size_t aclsize; 1307 1308 aclsize = sizeof (aclent_t); 1309 vsecattr->vsa_aclcnt = 1; 1310 vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP); 1311 aclentp = vsecattr->vsa_aclentp; 1312 1313 aclentp->a_type = OTHER_OBJ; 1314 aclentp->a_perm = 0777; 1315 aclentp->a_id = (gid_t)-1; 1316 aclentp++; 1317 } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) { 1318 ace_t *acl; 1319 1320 acl = kmem_alloc(sizeof (ace_t), KM_SLEEP); 1321 acl->a_who = (uint32_t)-1; 1322 acl->a_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 1323 acl->a_flags = ACE_EVERYONE; 1324 acl->a_access_mask = ACE_MODIFY_PERMS; 1325 1326 vsecattr->vsa_aclentp = (void *)acl; 1327 vsecattr->vsa_aclcnt = 1; 1328 vsecattr->vsa_aclentsz = sizeof (ace_t); 1329 } 1330 1331 return (0); 1332 } 1333 1334 /* ARGSUSED */ 1335 int 1336 fop_shrlock( 1337 vnode_t *vp, 1338 int cmd, 1339 struct shrlock *shr, 1340 int flag, 1341 cred_t *cr, 1342 caller_context_t *ct) 1343 { 1344 1345 switch (cmd) { 1346 case F_SHARE: 1347 case F_SHARE_NBMAND: 1348 case F_UNSHARE: 1349 break; 1350 default: 1351 return (EINVAL); 1352 } 1353 1354 if (!fop_shrlock_enable) 1355 return (0); 1356 1357 if (fcntl(vp->v_fd, cmd, shr) == -1) 1358 return (errno); 1359 1360 return (0); 1361 } 1362 1363 /* ARGSUSED */ 1364 int 1365 fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm, 1366 caller_context_t *ct) 1367 { 1368 return (ENOSYS); 1369 } 1370 1371 /* ARGSUSED */ 1372 int 1373 fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 1374 caller_context_t *ct) 1375 { 1376 fake_xuio_t *priv; 1377 uio_t *uio = &xuio->xu_uio; 1378 int blksz = fake_xuio_blksz; 1379 off_t foff, moff; 1380 size_t flen, mlen; 1381 int poff; 1382 char *ma; 1383 struct stat st; 1384 1385 if (xuio->xu_type != UIOTYPE_ZEROCOPY) 1386 return (EINVAL); 1387 1388 foff = uio->uio_loffset; 1389 flen = uio->uio_resid; 1390 1391 if (fstat(vp->v_fd, &st) == -1) 1392 return (errno); 1393 1394 if (foff >= st.st_size) 1395 return (EINVAL); 1396 if ((foff + flen) > st.st_size) 1397 flen = st.st_size - foff; 1398 1399 switch (ioflag) { 1400 case UIO_READ: 1401 if (flen < blksz/2) 1402 return (EINVAL); 1403 break; 1404 1405 case UIO_WRITE: 1406 default: 1407 return (EINVAL); 1408 } 1409 1410 /* 1411 * See if we can map the file for read. 1412 * Round down start offset for mmap. 1413 */ 1414 poff = P2PHASE((int)foff, blksz); 1415 moff = foff - poff; 1416 mlen = flen + poff; 1417 1418 ma = mmap(NULL, mlen, PROT_READ, MAP_SHARED, vp->v_fd, moff); 1419 if (ma == MAP_FAILED) { 1420 /* Can't use loaned buffers. */ 1421 return (EINVAL); 1422 } 1423 1424 priv = kmem_zalloc(sizeof (*priv), KM_SLEEP); 1425 priv->map_foff = foff; 1426 priv->map_addr = ma; 1427 priv->map_len = mlen; 1428 1429 XUIO_XUZC_PRIV(xuio) = priv; 1430 XUIO_XUZC_RW(xuio) = ioflag; 1431 uio->uio_extflg = UIO_XUIO; 1432 1433 return (0); 1434 } 1435 1436 /* ARGSUSED */ 1437 int 1438 fop_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 1439 { 1440 fake_xuio_t *priv = XUIO_XUZC_PRIV(xuio); 1441 int ioflag = XUIO_XUZC_RW(xuio); 1442 1443 ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 1444 ASSERT(ioflag == UIO_READ); 1445 1446 munmap(priv->map_addr, priv->map_len); 1447 kmem_free(priv, sizeof (fake_xuio_t)); 1448 XUIO_XUZC_PRIV(xuio) = NULL; 1449 1450 return (0); 1451 } 1452 1453 1454 /* 1455 * *************************************************************** 1456 * other VOP support 1457 */ 1458 1459 /* 1460 * Convert stat(2) formats to vnode types and vice versa. (Knows about 1461 * numerical order of S_IFMT and vnode types.) 1462 */ 1463 enum vtype iftovt_tab[] = { 1464 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 1465 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON 1466 }; 1467 1468 ushort_t vttoif_tab[] = { 1469 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 1470 S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0 1471 }; 1472 1473 /* 1474 * stat_to_vattr() 1475 * 1476 * Convert from a stat structure to an vattr structure 1477 * Note: only set fields according to va_mask 1478 */ 1479 1480 int 1481 stat_to_vattr(const struct stat *st, vattr_t *vap) 1482 { 1483 1484 if (vap->va_mask & AT_TYPE) 1485 vap->va_type = IFTOVT(st->st_mode); 1486 1487 if (vap->va_mask & AT_MODE) 1488 vap->va_mode = st->st_mode; 1489 1490 if (vap->va_mask & AT_UID) 1491 vap->va_uid = st->st_uid; 1492 1493 if (vap->va_mask & AT_GID) 1494 vap->va_gid = st->st_gid; 1495 1496 if (vap->va_mask & AT_FSID) 1497 vap->va_fsid = st->st_dev; 1498 1499 if (vap->va_mask & AT_NODEID) 1500 vap->va_nodeid = st->st_ino; 1501 1502 if (vap->va_mask & AT_NLINK) 1503 vap->va_nlink = st->st_nlink; 1504 1505 if (vap->va_mask & AT_SIZE) 1506 vap->va_size = (u_offset_t)st->st_size; 1507 1508 if (vap->va_mask & AT_ATIME) { 1509 vap->va_atime.tv_sec = st->st_atim.tv_sec; 1510 vap->va_atime.tv_nsec = st->st_atim.tv_nsec; 1511 } 1512 1513 if (vap->va_mask & AT_MTIME) { 1514 vap->va_mtime.tv_sec = st->st_mtim.tv_sec; 1515 vap->va_mtime.tv_nsec = st->st_mtim.tv_nsec; 1516 } 1517 1518 if (vap->va_mask & AT_CTIME) { 1519 vap->va_ctime.tv_sec = st->st_ctim.tv_sec; 1520 vap->va_ctime.tv_nsec = st->st_ctim.tv_nsec; 1521 } 1522 1523 if (vap->va_mask & AT_RDEV) 1524 vap->va_rdev = st->st_rdev; 1525 1526 if (vap->va_mask & AT_BLKSIZE) 1527 vap->va_blksize = (uint_t)st->st_blksize; 1528 1529 1530 if (vap->va_mask & AT_NBLOCKS) 1531 vap->va_nblocks = (u_longlong_t)st->st_blocks; 1532 1533 if (vap->va_mask & AT_SEQ) 1534 vap->va_seq = 0; 1535 1536 return (0); 1537 } 1538 1539 /* ARGSUSED */ 1540 void 1541 flk_init_callback(flk_callback_t *flk_cb, 1542 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata) 1543 { 1544 } 1545 1546 void 1547 vn_hold(vnode_t *vp) 1548 { 1549 mutex_enter(&vp->v_lock); 1550 vp->v_count++; 1551 mutex_exit(&vp->v_lock); 1552 } 1553 1554 void 1555 vn_rele(vnode_t *vp) 1556 { 1557 VERIFY3U(vp->v_count, !=, 0); 1558 mutex_enter(&vp->v_lock); 1559 if (vp->v_count == 1) { 1560 mutex_exit(&vp->v_lock); 1561 fop_inactive(vp, NULL, NULL); 1562 } else { 1563 vp->v_count--; 1564 mutex_exit(&vp->v_lock); 1565 } 1566 } 1567 1568 int 1569 vn_has_other_opens( 1570 vnode_t *vp, 1571 v_mode_t mode) 1572 { 1573 1574 switch (mode) { 1575 case V_WRITE: 1576 if (vp->v_wrcnt > 1) 1577 return (V_TRUE); 1578 break; 1579 case V_RDORWR: 1580 if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1)) 1581 return (V_TRUE); 1582 break; 1583 case V_RDANDWR: 1584 if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1)) 1585 return (V_TRUE); 1586 break; 1587 case V_READ: 1588 if (vp->v_rdcnt > 1) 1589 return (V_TRUE); 1590 break; 1591 } 1592 1593 return (V_FALSE); 1594 } 1595 1596 /* 1597 * vn_is_opened() checks whether a particular file is opened and 1598 * whether the open is for read and/or write. 1599 * 1600 * Vnode counts are only kept on regular files (v_type=VREG). 1601 */ 1602 int 1603 vn_is_opened( 1604 vnode_t *vp, 1605 v_mode_t mode) 1606 { 1607 1608 ASSERT(vp != NULL); 1609 1610 switch (mode) { 1611 case V_WRITE: 1612 if (vp->v_wrcnt) 1613 return (V_TRUE); 1614 break; 1615 case V_RDANDWR: 1616 if (vp->v_rdcnt && vp->v_wrcnt) 1617 return (V_TRUE); 1618 break; 1619 case V_RDORWR: 1620 if (vp->v_rdcnt || vp->v_wrcnt) 1621 return (V_TRUE); 1622 break; 1623 case V_READ: 1624 if (vp->v_rdcnt) 1625 return (V_TRUE); 1626 break; 1627 } 1628 1629 return (V_FALSE); 1630 } 1631 1632 /* 1633 * vn_is_mapped() checks whether a particular file is mapped and whether 1634 * the file is mapped read and/or write. 1635 */ 1636 /* ARGSUSED */ 1637 int 1638 vn_is_mapped( 1639 vnode_t *vp, 1640 v_mode_t mode) 1641 { 1642 return (V_FALSE); 1643 } 1644