1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2017 Nexenta Systems, Inc. All rights reserved. 14 * Copyright 2024 RackTop Systems, Inc. 15 */ 16 17 #include <sys/types.h> 18 #include <sys/param.h> 19 #include <sys/systm.h> 20 #include <sys/t_lock.h> 21 #include <sys/errno.h> 22 #include <sys/cred.h> 23 #include <sys/user.h> 24 #include <sys/uio.h> 25 #include <sys/file.h> 26 #include <sys/pathname.h> 27 #include <sys/vfs.h> 28 #include <sys/vnode.h> 29 #include <sys/stat.h> 30 #include <sys/mode.h> 31 #include <sys/kmem.h> 32 #include <sys/cmn_err.h> 33 #include <sys/debug.h> 34 #include <sys/atomic.h> 35 #include <sys/acl.h> 36 #include <sys/filio.h> 37 #include <sys/flock.h> 38 #include <sys/nbmlock.h> 39 #include <sys/fcntl.h> 40 #include <sys/poll.h> 41 #include <sys/time.h> 42 #include <sys/mman.h> 43 #include <sys/sysmacros.h> 44 45 #include <errno.h> 46 #include <fcntl.h> 47 #include <unistd.h> 48 49 #include "vncache.h" 50 51 #define O_RWMASK (O_WRONLY | O_RDWR) /* == 3 */ 52 53 int fop_shrlock_enable = 0; 54 55 int stat_to_vattr(const struct stat *, vattr_t *); 56 int fop__getxvattr(vnode_t *, xvattr_t *); 57 int fop__setxvattr(vnode_t *, xvattr_t *); 58 59 static void fake_inactive_xattrdir(vnode_t *); 60 61 typedef struct fake_xuio { 62 off_t map_foff; // file offset at start of mapping 63 char *map_addr; // mapped address 64 size_t map_len; // length of mapping 65 iovec_t iovec[2]; 66 } fake_xuio_t; 67 68 int fake_xuio_blksz = 4096; 69 70 /* ARGSUSED */ 71 int 72 fop_open( 73 vnode_t **vpp, 74 int mode, 75 cred_t *cr, 76 caller_context_t *ct) 77 { 78 79 if ((*vpp)->v_type == VREG) { 80 if (mode & FREAD) 81 atomic_add_32(&((*vpp)->v_rdcnt), 1); 82 if (mode & FWRITE) 83 atomic_add_32(&((*vpp)->v_wrcnt), 1); 84 } 85 86 /* call to ->vop_open was here */ 87 88 return (0); 89 } 90 91 /* ARGSUSED */ 92 int 93 fop_close( 94 vnode_t *vp, 95 int flag, 96 int count, 97 offset_t offset, 98 cred_t *cr, 99 caller_context_t *ct) 100 { 101 102 /* call to ->vop_close was here */ 103 104 /* 105 * Check passed in count to handle possible dups. Vnode counts are only 106 * kept on regular files 107 */ 108 if ((vp->v_type == VREG) && (count == 1)) { 109 if (flag & FREAD) { 110 ASSERT(vp->v_rdcnt > 0); 111 atomic_add_32(&(vp->v_rdcnt), -1); 112 } 113 if (flag & FWRITE) { 114 ASSERT(vp->v_wrcnt > 0); 115 atomic_add_32(&(vp->v_wrcnt), -1); 116 } 117 } 118 return (0); 119 } 120 121 /* ARGSUSED */ 122 int 123 fop_read( 124 vnode_t *vp, 125 uio_t *uio, 126 int ioflag, 127 cred_t *cr, 128 caller_context_t *ct) 129 { 130 struct stat st; 131 struct iovec *iov; 132 ssize_t resid; 133 size_t cnt; 134 int n; 135 int fd = vncache_getfd(vp); 136 137 /* 138 * If that caller asks for read beyond end of file, 139 * that causes the pread call to block. (Ugh!) 140 * Get the file size and return what we can. 141 */ 142 (void) fstat(fd, &st); 143 resid = uio->uio_resid; 144 if ((uio->uio_loffset + resid) > st.st_size) 145 resid = st.st_size - uio->uio_loffset; 146 if (resid == 0) 147 return (0); 148 149 /* 150 * Simulating zero-copy support with mmap. See: 151 * fop_reqzcbuf(), fop_retzcbuf() 152 */ 153 if ((uio->uio_extflg == UIO_XUIO) && 154 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 155 xuio_t *xuio = (xuio_t *)uio; 156 int poff; 157 158 fake_xuio_t *priv = XUIO_XUZC_PRIV(xuio); 159 160 /* 161 * Sanity check mapped range overlaps this I/O: 162 * uio_offset >= mapped base 163 * uio_resid <= (mapped length - page offset) 164 */ 165 if (uio->uio_loffset < priv->map_foff) 166 return (EINVAL); 167 poff = uio->uio_loffset - priv->map_foff; 168 if ((uio->uio_resid + poff) > priv->map_len) 169 return (EINVAL); 170 171 /* 172 * Setup the uio with our loaned buffers, 173 * and update offset, resid. 174 */ 175 uio->uio_iovcnt = 1; 176 uio->uio_iov = &priv->iovec[0]; 177 iov = uio->uio_iov; 178 iov->iov_base = priv->map_addr + poff; 179 iov->iov_len = priv->map_len - poff; 180 181 uio->uio_loffset += iov->iov_len; 182 uio->uio_resid -= iov->iov_len; 183 184 return (0); 185 } 186 187 while (resid > 0) { 188 189 ASSERT(uio->uio_iovcnt > 0); 190 iov = uio->uio_iov; 191 192 if (iov->iov_len == 0) { 193 uio->uio_iov++; 194 uio->uio_iovcnt--; 195 continue; 196 } 197 cnt = iov->iov_len; 198 if (cnt > resid) 199 cnt = resid; 200 201 n = pread(fd, iov->iov_base, cnt, uio->uio_loffset); 202 if (n < 0) 203 return (errno); 204 205 iov->iov_base += n; 206 iov->iov_len -= n; 207 208 uio->uio_resid -= n; 209 uio->uio_loffset += n; 210 211 resid -= n; 212 } 213 214 return (0); 215 } 216 217 /* ARGSUSED */ 218 int 219 fop_write( 220 vnode_t *vp, 221 uio_t *uio, 222 int ioflag, 223 cred_t *cr, 224 caller_context_t *ct) 225 { 226 struct iovec *iov; 227 size_t cnt; 228 int n; 229 int fd = vncache_getfd(vp); 230 231 while (uio->uio_resid > 0) { 232 233 ASSERT(uio->uio_iovcnt > 0); 234 iov = uio->uio_iov; 235 236 if (iov->iov_len == 0) { 237 uio->uio_iov++; 238 uio->uio_iovcnt--; 239 continue; 240 } 241 cnt = iov->iov_len; 242 if (cnt > uio->uio_resid) 243 cnt = uio->uio_resid; 244 245 n = pwrite(fd, iov->iov_base, iov->iov_len, 246 uio->uio_loffset); 247 if (n < 0) 248 return (errno); 249 250 iov->iov_base += n; 251 iov->iov_len -= n; 252 253 uio->uio_resid -= n; 254 uio->uio_loffset += n; 255 } 256 257 if (ioflag == FSYNC) { 258 (void) fsync(fd); 259 } 260 261 return (0); 262 } 263 264 /* ARGSUSED */ 265 int 266 fop_ioctl( 267 vnode_t *vp, 268 int cmd, 269 intptr_t arg, 270 int flag, 271 cred_t *cr, 272 int *rvalp, 273 caller_context_t *ct) 274 { 275 off64_t off; 276 int rv, whence; 277 int fd = vncache_getfd(vp); 278 279 switch (cmd) { 280 case _FIO_SEEK_DATA: 281 case _FIO_SEEK_HOLE: 282 whence = (cmd == _FIO_SEEK_DATA) ? SEEK_DATA : SEEK_HOLE; 283 bcopy((void *)arg, &off, sizeof (off)); 284 off = lseek(fd, off, whence); 285 if (off == (off64_t)-1) { 286 rv = errno; 287 } else { 288 bcopy(&off, (void *)arg, sizeof (off)); 289 rv = 0; 290 } 291 break; 292 293 default: 294 rv = ENOTTY; 295 break; 296 } 297 298 return (rv); 299 } 300 301 /* ARGSUSED */ 302 int 303 fop_setfl( 304 vnode_t *vp, 305 int oflags, 306 int nflags, 307 cred_t *cr, 308 caller_context_t *ct) 309 { 310 /* allow any flags? See fs_setfl */ 311 return (0); 312 } 313 314 /* ARGSUSED */ 315 int 316 fop_getattr( 317 vnode_t *vp, 318 vattr_t *vap, 319 int flags, 320 cred_t *cr, 321 caller_context_t *ct) 322 { 323 struct stat st; 324 int error; 325 int fd = vncache_getfd(vp); 326 327 if (fstat(fd, &st) == -1) 328 return (errno); 329 error = stat_to_vattr(&st, vap); 330 331 if (vap->va_mask & AT_XVATTR) 332 (void) fop__getxvattr(vp, (xvattr_t *)vap); 333 334 return (error); 335 } 336 337 /* ARGSUSED */ 338 int 339 fop_setattr( 340 vnode_t *vp, 341 vattr_t *vap, 342 int flags, 343 cred_t *cr, 344 caller_context_t *ct) 345 { 346 timespec_t times[2]; 347 int err; 348 int fd = vncache_getfd(vp); 349 350 if (vap->va_mask & AT_SIZE) { 351 if (ftruncate(fd, vap->va_size) == -1) { 352 err = errno; 353 if (err == EBADF) 354 err = EACCES; 355 return (err); 356 } 357 } 358 359 /* AT_MODE or anything else? */ 360 361 if (vap->va_mask & AT_XVATTR) 362 (void) fop__setxvattr(vp, (xvattr_t *)vap); 363 364 if (vap->va_mask & (AT_ATIME | AT_MTIME)) { 365 if (vap->va_mask & AT_ATIME) { 366 times[0] = vap->va_atime; 367 } else { 368 times[0].tv_sec = 0; 369 times[0].tv_nsec = UTIME_OMIT; 370 } 371 if (vap->va_mask & AT_MTIME) { 372 times[1] = vap->va_mtime; 373 } else { 374 times[1].tv_sec = 0; 375 times[1].tv_nsec = UTIME_OMIT; 376 } 377 378 (void) futimens(fd, times); 379 } 380 381 return (0); 382 } 383 384 /* ARGSUSED */ 385 int 386 fop_access( 387 vnode_t *vp, 388 int mode, 389 int flags, 390 cred_t *cr, 391 caller_context_t *ct) 392 { 393 return (0); 394 } 395 396 /* 397 * Conceptually like xattr_dir_lookup() 398 * 399 * Once we've looked up the XATTRDIR for some vp, we keep it in 400 * v_xattrdir until this vp goes inactive. See: vncache_inactive() 401 */ 402 static int 403 fake_lookup_xattrdir( 404 vnode_t *dvp, 405 vnode_t **vpp) 406 { 407 int len, fd; 408 int omode = O_RDWR | O_NOFOLLOW; 409 vnode_t *vp; 410 int dfd = vncache_getfd(dvp); 411 412 *vpp = NULL; 413 414 if (dvp->v_type != VDIR && dvp->v_type != VREG) 415 return (EINVAL); 416 417 /* 418 * If we're already in sysattr space, don't allow creation 419 * of another level of sysattrs. 420 */ 421 if (dvp->v_flag & V_SYSATTR) 422 return (EINVAL); 423 424 /* 425 * We may already have the XATTR dir. 426 */ 427 mutex_enter(&dvp->v_lock); 428 if (dvp->v_xattrdir != NULL) { 429 *vpp = dvp->v_xattrdir; 430 VN_HOLD(*vpp); 431 mutex_exit(&dvp->v_lock); 432 return (0); 433 } 434 mutex_exit(&dvp->v_lock); 435 436 /* 437 * Need to "create" the XATTR dir vnode. 438 */ 439 omode = O_RDONLY|O_XATTR; 440 fd = openat(dfd, ".", omode); 441 if (fd < 0) 442 return (errno); 443 444 /* 445 * Normally vn_alloc() is called by vncache_enter(), but 446 * we don't enter the special xattr dir into the cache. 447 * These are only found via the parent's v_xattrdir field. 448 */ 449 vp = vn_alloc(KM_SLEEP); 450 vncache_setfd(vp, fd); 451 vp->v_flag = V_XATTRDIR|V_SYSATTR; 452 vp->v_type = VDIR; 453 vp->v_vfsp = dvp->v_vfsp; 454 455 /* Set v_path to parent path + "/@" (like NFS) */ 456 len = strlen(dvp->v_path) + 3; 457 vp->v_path = kmem_alloc(len, KM_SLEEP); 458 (void) snprintf(vp->v_path, len, "%s/@", dvp->v_path); 459 460 mutex_enter(&dvp->v_lock); 461 if (dvp->v_xattrdir == NULL) { 462 dvp->v_xattrdir = vp; 463 vp = NULL; 464 } 465 *vpp = dvp->v_xattrdir; 466 VN_HOLD(*vpp); 467 mutex_exit(&dvp->v_lock); 468 469 if (vp != NULL) { 470 /* Lost race filling in v_xattrdir */ 471 fake_inactive_xattrdir(vp); 472 } 473 474 return (0); 475 } 476 477 /* ARGSUSED */ 478 int 479 fop_lookup( 480 vnode_t *dvp, 481 char *name, 482 vnode_t **vpp, 483 pathname_t *pnp, 484 int flags, 485 vnode_t *rdir, 486 cred_t *cr, 487 caller_context_t *ct, 488 int *deflags, /* Returned per-dirent flags */ 489 pathname_t *ppnp) /* Returned case-preserved name in directory */ 490 { 491 int err, fd; 492 int omode = O_RDWR | O_NOFOLLOW; 493 vnode_t *vp; 494 struct stat st; 495 int dfd = vncache_getfd(dvp); 496 497 if (flags & LOOKUP_XATTR) 498 return (fake_lookup_xattrdir(dvp, vpp)); 499 500 /* 501 * If lookup is for "", just return dvp. 502 */ 503 if (name[0] == '\0') { 504 VN_HOLD(dvp); 505 *vpp = dvp; 506 return (0); 507 } 508 509 if (fstatat(dfd, name, &st, AT_SYMLINK_NOFOLLOW) == -1) 510 return (errno); 511 512 vp = vncache_lookup(&st); 513 if (vp != NULL) { 514 /* lookup gave us a hold */ 515 *vpp = vp; 516 return (0); 517 } 518 519 if (S_ISDIR(st.st_mode)) 520 omode = O_RDONLY | O_NOFOLLOW; 521 522 again: 523 err = 0; 524 fd = openat(dfd, name, omode, 0); 525 if (fd < 0) 526 err = errno; 527 DTRACE_PROBE3(openat, int, dfd, char *, name, int, err); 528 if (err != 0) { 529 if ((omode & O_RWMASK) == O_RDWR) { 530 omode &= ~O_RWMASK; 531 omode |= O_RDONLY; 532 goto again; 533 } 534 return (err); 535 } 536 537 if (fstat(fd, &st) == -1) { 538 (void) close(fd); 539 return (errno); 540 } 541 542 vp = vncache_enter(&st, dvp, name, fd); 543 544 *vpp = vp; 545 return (0); 546 } 547 548 /* ARGSUSED */ 549 int 550 fop_create( 551 vnode_t *dvp, 552 char *name, 553 vattr_t *vap, 554 vcexcl_t excl, 555 int mode, 556 vnode_t **vpp, 557 cred_t *cr, 558 int flags, 559 caller_context_t *ct, 560 vsecattr_t *vsecp) /* ACL to set during create */ 561 { 562 struct stat st; 563 vnode_t *vp; 564 int err, fd, omode; 565 int dfd = vncache_getfd(dvp); 566 567 /* 568 * If creating "", just return dvp. 569 */ 570 if (name[0] == '\0') { 571 VN_HOLD(dvp); 572 *vpp = dvp; 573 return (0); 574 } 575 576 err = fstatat(dfd, name, &st, AT_SYMLINK_NOFOLLOW); 577 if (err != 0) 578 err = errno; 579 580 vp = NULL; 581 if (err == 0) { 582 /* The file already exists. */ 583 if (excl == EXCL) 584 return (EEXIST); 585 586 vp = vncache_lookup(&st); 587 /* vp gained a hold */ 588 } 589 590 if (vp == NULL) { 591 /* 592 * Open it. (may or may not exist) 593 */ 594 omode = O_RDWR | O_CREAT | O_NOFOLLOW; 595 if (excl == EXCL) 596 omode |= O_EXCL; 597 open_again: 598 err = 0; 599 fd = openat(dfd, name, omode, mode); 600 if (fd < 0) 601 err = errno; 602 DTRACE_PROBE3(openat, int, dfd, char *, name, int, err); 603 if (err != 0) { 604 if ((omode & O_RWMASK) == O_RDWR) { 605 omode &= ~O_RWMASK; 606 omode |= O_RDONLY; 607 goto open_again; 608 } 609 return (err); 610 } 611 (void) fstat(fd, &st); 612 613 vp = vncache_enter(&st, dvp, name, fd); 614 /* vp has its initial hold */ 615 } 616 617 /* Should have the vp now. */ 618 if (vp == NULL) 619 return (EFAULT); 620 621 if (vp->v_type == VDIR && vap->va_type != VDIR) { 622 vn_rele(vp); 623 return (EISDIR); 624 } 625 if (vp->v_type != VDIR && vap->va_type == VDIR) { 626 vn_rele(vp); 627 return (ENOTDIR); 628 } 629 630 /* 631 * Might need to set attributes. 632 */ 633 (void) fop_setattr(vp, vap, 0, cr, ct); 634 635 *vpp = vp; 636 return (0); 637 } 638 639 /* ARGSUSED */ 640 int 641 fop_remove( 642 vnode_t *dvp, 643 char *name, 644 cred_t *cr, 645 caller_context_t *ct, 646 int flags) 647 { 648 int dfd = vncache_getfd(dvp); 649 650 if (unlinkat(dfd, name, 0)) 651 return (errno); 652 653 return (0); 654 } 655 656 /* ARGSUSED */ 657 int 658 fop_link( 659 vnode_t *to_dvp, 660 vnode_t *fr_vp, 661 char *to_name, 662 cred_t *cr, 663 caller_context_t *ct, 664 int flags) 665 { 666 int to_dfd = vncache_getfd(to_dvp); 667 int err; 668 669 /* 670 * Would prefer to specify "from" as the combination: 671 * (fr_vp, NULL) but linkat does not permit it. 672 */ 673 err = linkat(AT_FDCWD, fr_vp->v_path, to_dfd, to_name, 674 AT_SYMLINK_FOLLOW); 675 if (err == -1) 676 err = errno; 677 678 return (err); 679 } 680 681 /* ARGSUSED */ 682 int 683 fop_rename( 684 vnode_t *from_dvp, 685 char *from_name, 686 vnode_t *to_dvp, 687 char *to_name, 688 cred_t *cr, 689 caller_context_t *ct, 690 int flags) 691 { 692 struct stat st; 693 vnode_t *vp; 694 int err; 695 int from_dfd = vncache_getfd(from_dvp); 696 int to_dfd = vncache_getfd(to_dvp); 697 698 if (fstatat(from_dfd, from_name, &st, 699 AT_SYMLINK_NOFOLLOW) == -1) 700 return (errno); 701 702 vp = vncache_lookup(&st); 703 if (vp == NULL) 704 return (ENOENT); 705 706 err = renameat(from_dfd, from_name, to_dfd, to_name); 707 if (err == -1) 708 err = errno; 709 else 710 vncache_renamed(vp, to_dvp, to_name); 711 712 vn_rele(vp); 713 714 return (err); 715 } 716 717 /* ARGSUSED */ 718 int 719 fop_mkdir( 720 vnode_t *dvp, 721 char *name, 722 vattr_t *vap, 723 vnode_t **vpp, 724 cred_t *cr, 725 caller_context_t *ct, 726 int flags, 727 vsecattr_t *vsecp) /* ACL to set during create */ 728 { 729 struct stat st; 730 int err, fd; 731 int dfd = vncache_getfd(dvp); 732 733 mode_t mode = vap->va_mode & 0777; 734 735 if (mkdirat(dfd, name, mode) == -1) 736 return (errno); 737 738 err = 0; 739 fd = openat(dfd, name, O_RDONLY); 740 if (fd < 0) 741 err = errno; 742 DTRACE_PROBE3(openat, int, dfd, char *, name, int, err); 743 if (err != 0) 744 return (err); 745 746 if (fstat(fd, &st) == -1) { 747 err = errno; 748 (void) close(fd); 749 return (err); 750 } 751 752 *vpp = vncache_enter(&st, dvp, name, fd); 753 754 /* 755 * Might need to set attributes. 756 */ 757 (void) fop_setattr(*vpp, vap, 0, cr, ct); 758 759 return (0); 760 } 761 762 /* ARGSUSED */ 763 int 764 fop_rmdir( 765 vnode_t *dvp, 766 char *name, 767 vnode_t *cdir, 768 cred_t *cr, 769 caller_context_t *ct, 770 int flags) 771 { 772 int dfd = vncache_getfd(dvp); 773 774 if (unlinkat(dfd, name, AT_REMOVEDIR) == -1) 775 return (errno); 776 777 return (0); 778 } 779 780 /* ARGSUSED */ 781 int 782 fop_readdir( 783 vnode_t *vp, 784 uio_t *uiop, 785 cred_t *cr, 786 int *eofp, 787 caller_context_t *ct, 788 int flags) 789 { 790 struct iovec *iov; 791 int cnt; 792 int error = 0; 793 int fd = vncache_getfd(vp); 794 795 if (eofp) { 796 *eofp = 0; 797 } 798 799 error = lseek(fd, uiop->uio_loffset, SEEK_SET); 800 if (error == -1) 801 return (errno); 802 803 ASSERT(uiop->uio_iovcnt > 0); 804 iov = uiop->uio_iov; 805 if (iov->iov_len < sizeof (struct dirent)) 806 return (EINVAL); 807 808 /* LINTED E_BAD_PTR_CAST_ALIGN */ 809 cnt = getdents(fd, (struct dirent *)(uiop->uio_iov->iov_base), 810 uiop->uio_resid); 811 if (cnt == -1) 812 return (errno); 813 if (cnt == 0) { 814 if (eofp) { 815 *eofp = 1; 816 } 817 return (ENOENT); 818 } 819 820 iov->iov_base += cnt; 821 iov->iov_len -= cnt; 822 uiop->uio_resid -= cnt; 823 uiop->uio_loffset = lseek(fd, 0LL, SEEK_CUR); 824 825 return (0); 826 } 827 828 /* ARGSUSED */ 829 int 830 fop_symlink( 831 vnode_t *dvp, 832 char *linkname, 833 vattr_t *vap, 834 char *target, 835 cred_t *cr, 836 caller_context_t *ct, 837 int flags) 838 { 839 return (ENOSYS); 840 } 841 842 /* ARGSUSED */ 843 int 844 fop_readlink( 845 vnode_t *vp, 846 uio_t *uiop, 847 cred_t *cr, 848 caller_context_t *ct) 849 { 850 return (ENOSYS); 851 } 852 853 /* ARGSUSED */ 854 int 855 fop_fsync( 856 vnode_t *vp, 857 int syncflag, 858 cred_t *cr, 859 caller_context_t *ct) 860 { 861 int fd = vncache_getfd(vp); 862 863 if (fsync(fd) == -1) 864 return (errno); 865 866 return (0); 867 } 868 869 /* ARGSUSED */ 870 void 871 fop_inactive( 872 vnode_t *vp, 873 cred_t *cr, 874 caller_context_t *ct) 875 { 876 if (vp->v_flag & V_XATTRDIR) { 877 fake_inactive_xattrdir(vp); 878 } else { 879 vncache_inactive(vp); 880 } 881 } 882 883 /* 884 * The special xattr directories are not in the vncache AVL, but 885 * hang off the parent's v_xattrdir field. When vn_rele finds 886 * an xattr dir at v_count == 1 it calls here via fop_inactive(). 887 */ 888 static void 889 fake_inactive_xattrdir(vnode_t *vp) 890 { 891 mutex_enter(&vp->v_lock); 892 if (vp->v_count > 1) { 893 /* new ref. via v_xattrdir */ 894 mutex_exit(&vp->v_lock); 895 return; 896 } 897 mutex_exit(&vp->v_lock); 898 vn_free(vp); 899 } 900 901 /* ARGSUSED */ 902 int 903 fop_fid( 904 vnode_t *vp, 905 fid_t *fidp, 906 caller_context_t *ct) 907 { 908 return (ENOSYS); 909 } 910 911 /* ARGSUSED */ 912 int 913 fop_rwlock( 914 vnode_t *vp, 915 int write_lock, 916 caller_context_t *ct) 917 { 918 /* See: fs_rwlock */ 919 return (-1); 920 } 921 922 /* ARGSUSED */ 923 void 924 fop_rwunlock( 925 vnode_t *vp, 926 int write_lock, 927 caller_context_t *ct) 928 { 929 /* See: fs_rwunlock */ 930 } 931 932 /* ARGSUSED */ 933 int 934 fop_seek( 935 vnode_t *vp, 936 offset_t ooff, 937 offset_t *noffp, 938 caller_context_t *ct) 939 { 940 return (ENOSYS); 941 } 942 943 /* ARGSUSED */ 944 int 945 fop_cmp( 946 vnode_t *vp1, 947 vnode_t *vp2, 948 caller_context_t *ct) 949 { 950 /* See fs_cmp */ 951 return (vncache_cmp(vp1, vp2)); 952 } 953 954 /* ARGSUSED */ 955 int 956 fop_frlock( 957 vnode_t *vp, 958 int cmd, 959 flock64_t *bfp, 960 int flag, 961 offset_t offset, 962 struct flk_callback *flk_cbp, 963 cred_t *cr, 964 caller_context_t *ct) 965 { 966 #if defined(_LP64) 967 offset_t maxoffset = INT64_MAX; 968 #elif defined(_ILP32) 969 /* 970 * Sadly, the fcntl API enforces 32-bit offsets, 971 * even though we have _FILE_OFFSET_BITS=64 972 */ 973 offset_t maxoffset = INT32_MAX; 974 #else 975 #error "unsupported env." 976 #endif 977 int fd = vncache_getfd(vp); 978 979 /* See fs_frlock */ 980 981 switch (cmd) { 982 case F_GETLK: 983 case F_SETLK_NBMAND: 984 case F_SETLK: 985 case F_SETLKW: 986 break; 987 default: 988 return (EINVAL); 989 } 990 991 /* We only get SEEK_SET ranges here. */ 992 if (bfp->l_whence != 0) 993 return (EINVAL); 994 995 /* 996 * One limitation of using fcntl(2) F_SETLK etc is that 997 * the real kernel limits the offsets we can use. 998 * (Maybe the fcntl API should loosen that up?) 999 * See syscall/fcntl.c:flock_check() 1000 * 1001 * Here in libfksmbsrv we can just ignore such locks, 1002 * or ignore the part that extends beyond maxoffset. 1003 * The SMB layer still keeps track of such locks for 1004 * conflict detection, so not reflecting such locks 1005 * into the real FS layer is OK. Note: this may 1006 * modify the pased bfp->l_len. 1007 */ 1008 if (bfp->l_start < 0 || bfp->l_start > maxoffset) 1009 return (0); 1010 if (bfp->l_len < 0 || bfp->l_len > maxoffset) 1011 return (0); 1012 if (bfp->l_len > (maxoffset - bfp->l_start + 1)) 1013 bfp->l_len = (maxoffset - bfp->l_start + 1); 1014 1015 if (fcntl(fd, cmd, bfp) == -1) 1016 return (errno); 1017 1018 return (0); 1019 } 1020 1021 /* ARGSUSED */ 1022 int 1023 fop_space( 1024 vnode_t *vp, 1025 int cmd, 1026 flock64_t *bfp, 1027 int flag, 1028 offset_t offset, 1029 cred_t *cr, 1030 caller_context_t *ct) 1031 { 1032 int fd = vncache_getfd(vp); 1033 1034 /* See fs_frlock */ 1035 1036 switch (cmd) { 1037 case F_ALLOCSP: 1038 case F_FREESP: 1039 break; 1040 default: 1041 return (EINVAL); 1042 } 1043 1044 if (fcntl(fd, cmd, bfp) == -1) 1045 return (errno); 1046 1047 return (0); 1048 } 1049 1050 /* ARGSUSED */ 1051 int 1052 fop_realvp( 1053 vnode_t *vp, 1054 vnode_t **vpp, 1055 caller_context_t *ct) 1056 { 1057 return (ENOSYS); 1058 } 1059 1060 /* ARGSUSED */ 1061 int 1062 fop_getpage( 1063 vnode_t *vp, 1064 offset_t off, 1065 size_t len, 1066 uint_t *protp, 1067 struct page **plarr, 1068 size_t plsz, 1069 struct seg *seg, 1070 caddr_t addr, 1071 enum seg_rw rw, 1072 cred_t *cr, 1073 caller_context_t *ct) 1074 { 1075 return (ENOSYS); 1076 } 1077 1078 /* ARGSUSED */ 1079 int 1080 fop_putpage( 1081 vnode_t *vp, 1082 offset_t off, 1083 size_t len, 1084 int flags, 1085 cred_t *cr, 1086 caller_context_t *ct) 1087 { 1088 return (ENOSYS); 1089 } 1090 1091 /* ARGSUSED */ 1092 int 1093 fop_map( 1094 vnode_t *vp, 1095 offset_t off, 1096 struct as *as, 1097 caddr_t *addrp, 1098 size_t len, 1099 uchar_t prot, 1100 uchar_t maxprot, 1101 uint_t flags, 1102 cred_t *cr, 1103 caller_context_t *ct) 1104 { 1105 return (ENOSYS); 1106 } 1107 1108 /* ARGSUSED */ 1109 int 1110 fop_addmap( 1111 vnode_t *vp, 1112 offset_t off, 1113 struct as *as, 1114 caddr_t addr, 1115 size_t len, 1116 uchar_t prot, 1117 uchar_t maxprot, 1118 uint_t flags, 1119 cred_t *cr, 1120 caller_context_t *ct) 1121 { 1122 return (ENOSYS); 1123 } 1124 1125 /* ARGSUSED */ 1126 int 1127 fop_delmap( 1128 vnode_t *vp, 1129 offset_t off, 1130 struct as *as, 1131 caddr_t addr, 1132 size_t len, 1133 uint_t prot, 1134 uint_t maxprot, 1135 uint_t flags, 1136 cred_t *cr, 1137 caller_context_t *ct) 1138 { 1139 return (ENOSYS); 1140 } 1141 1142 /* ARGSUSED */ 1143 int 1144 fop_poll( 1145 vnode_t *vp, 1146 short events, 1147 int anyyet, 1148 short *reventsp, 1149 struct pollhead **phpp, 1150 caller_context_t *ct) 1151 { 1152 *reventsp = 0; 1153 if (events & POLLIN) 1154 *reventsp |= POLLIN; 1155 if (events & POLLRDNORM) 1156 *reventsp |= POLLRDNORM; 1157 if (events & POLLRDBAND) 1158 *reventsp |= POLLRDBAND; 1159 if (events & POLLOUT) 1160 *reventsp |= POLLOUT; 1161 if (events & POLLWRBAND) 1162 *reventsp |= POLLWRBAND; 1163 *phpp = NULL; /* or fake_pollhead? */ 1164 1165 return (0); 1166 } 1167 1168 /* ARGSUSED */ 1169 int 1170 fop_dump( 1171 vnode_t *vp, 1172 caddr_t addr, 1173 offset_t lbdn, 1174 offset_t dblks, 1175 caller_context_t *ct) 1176 { 1177 return (ENOSYS); 1178 } 1179 1180 /* 1181 * See fs_pathconf 1182 */ 1183 /* ARGSUSED */ 1184 int 1185 fop_pathconf( 1186 vnode_t *vp, 1187 int cmd, 1188 ulong_t *valp, 1189 cred_t *cr, 1190 caller_context_t *ct) 1191 { 1192 register ulong_t val; 1193 register int error = 0; 1194 1195 switch (cmd) { 1196 1197 case _PC_LINK_MAX: 1198 val = MAXLINK; 1199 break; 1200 1201 case _PC_MAX_CANON: 1202 val = MAX_CANON; 1203 break; 1204 1205 case _PC_MAX_INPUT: 1206 val = MAX_INPUT; 1207 break; 1208 1209 case _PC_NAME_MAX: 1210 val = MAXNAMELEN; 1211 break; 1212 1213 case _PC_PATH_MAX: 1214 case _PC_SYMLINK_MAX: 1215 val = MAXPATHLEN; 1216 break; 1217 1218 case _PC_PIPE_BUF: 1219 val = PIPE_BUF; 1220 break; 1221 1222 case _PC_NO_TRUNC: 1223 val = (ulong_t)-1; 1224 break; 1225 1226 case _PC_VDISABLE: 1227 val = _POSIX_VDISABLE; 1228 break; 1229 1230 case _PC_CHOWN_RESTRICTED: 1231 val = 1; /* chown restricted enabled */ 1232 break; 1233 1234 case _PC_FILESIZEBITS: 1235 val = (ulong_t)-1; /* large file support */ 1236 break; 1237 1238 case _PC_ACL_ENABLED: 1239 val = _ACL_ACE_ENABLED; 1240 break; 1241 1242 case _PC_CASE_BEHAVIOR: 1243 val = _CASE_SENSITIVE; 1244 break; 1245 1246 case _PC_SATTR_ENABLED: 1247 case _PC_SATTR_EXISTS: 1248 val = 0; 1249 break; 1250 1251 case _PC_ACCESS_FILTERING: 1252 val = 0; 1253 break; 1254 1255 default: 1256 error = EINVAL; 1257 break; 1258 } 1259 1260 if (error == 0) 1261 *valp = val; 1262 return (error); 1263 } 1264 1265 /* ARGSUSED */ 1266 int 1267 fop_pageio( 1268 vnode_t *vp, 1269 struct page *pp, 1270 u_offset_t io_off, 1271 size_t io_len, 1272 int flags, 1273 cred_t *cr, 1274 caller_context_t *ct) 1275 { 1276 return (ENOSYS); 1277 } 1278 1279 /* ARGSUSED */ 1280 int 1281 fop_dumpctl( 1282 vnode_t *vp, 1283 int action, 1284 offset_t *blkp, 1285 caller_context_t *ct) 1286 { 1287 return (ENOSYS); 1288 } 1289 1290 /* ARGSUSED */ 1291 void 1292 fop_dispose( 1293 vnode_t *vp, 1294 struct page *pp, 1295 int flag, 1296 int dn, 1297 cred_t *cr, 1298 caller_context_t *ct) 1299 { 1300 } 1301 1302 /* ARGSUSED */ 1303 int 1304 fop_setsecattr( 1305 vnode_t *vp, 1306 vsecattr_t *vsap, 1307 int flag, 1308 cred_t *cr, 1309 caller_context_t *ct) 1310 { 1311 return (0); 1312 } 1313 1314 /* 1315 * Fake up just enough of this so we can test get/set SDs. 1316 */ 1317 /* ARGSUSED */ 1318 int 1319 fop_getsecattr( 1320 vnode_t *vp, 1321 vsecattr_t *vsecattr, 1322 int flag, 1323 cred_t *cr, 1324 caller_context_t *ct) 1325 { 1326 1327 vsecattr->vsa_aclcnt = 0; 1328 vsecattr->vsa_aclentsz = 0; 1329 vsecattr->vsa_aclentp = NULL; 1330 vsecattr->vsa_dfaclcnt = 0; /* Default ACLs are not fabricated */ 1331 vsecattr->vsa_dfaclentp = NULL; 1332 1333 if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) { 1334 aclent_t *aclentp; 1335 size_t aclsize; 1336 1337 aclsize = sizeof (aclent_t); 1338 vsecattr->vsa_aclcnt = 1; 1339 vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP); 1340 aclentp = vsecattr->vsa_aclentp; 1341 1342 aclentp->a_type = OTHER_OBJ; 1343 aclentp->a_perm = 0777; 1344 aclentp->a_id = (gid_t)-1; 1345 aclentp++; 1346 } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) { 1347 ace_t *acl; 1348 1349 acl = kmem_alloc(sizeof (ace_t), KM_SLEEP); 1350 acl->a_who = (uint32_t)-1; 1351 acl->a_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 1352 acl->a_flags = ACE_EVERYONE; 1353 acl->a_access_mask = ACE_MODIFY_PERMS; 1354 1355 vsecattr->vsa_aclentp = (void *)acl; 1356 vsecattr->vsa_aclcnt = 1; 1357 vsecattr->vsa_aclentsz = sizeof (ace_t); 1358 } 1359 1360 return (0); 1361 } 1362 1363 /* ARGSUSED */ 1364 int 1365 fop_shrlock( 1366 vnode_t *vp, 1367 int cmd, 1368 struct shrlock *shr, 1369 int flag, 1370 cred_t *cr, 1371 caller_context_t *ct) 1372 { 1373 int fd = vncache_getfd(vp); 1374 1375 switch (cmd) { 1376 case F_SHARE: 1377 case F_SHARE_NBMAND: 1378 case F_UNSHARE: 1379 break; 1380 default: 1381 return (EINVAL); 1382 } 1383 1384 if (!fop_shrlock_enable) 1385 return (0); 1386 1387 if (fcntl(fd, cmd, shr) == -1) 1388 return (errno); 1389 1390 return (0); 1391 } 1392 1393 /* ARGSUSED */ 1394 int 1395 fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm, 1396 caller_context_t *ct) 1397 { 1398 return (ENOSYS); 1399 } 1400 1401 /* ARGSUSED */ 1402 int 1403 fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 1404 caller_context_t *ct) 1405 { 1406 fake_xuio_t *priv; 1407 uio_t *uio = &xuio->xu_uio; 1408 int blksz = fake_xuio_blksz; 1409 off_t foff, moff; 1410 size_t flen, mlen; 1411 int poff; 1412 char *ma; 1413 struct stat st; 1414 int fd = vncache_getfd(vp); 1415 1416 if (xuio->xu_type != UIOTYPE_ZEROCOPY) 1417 return (EINVAL); 1418 1419 foff = uio->uio_loffset; 1420 flen = uio->uio_resid; 1421 1422 if (fstat(fd, &st) == -1) 1423 return (errno); 1424 1425 if (foff >= st.st_size) 1426 return (EINVAL); 1427 if ((foff + flen) > st.st_size) 1428 flen = st.st_size - foff; 1429 1430 switch (ioflag) { 1431 case UIO_READ: 1432 if (flen < blksz/2) 1433 return (EINVAL); 1434 break; 1435 1436 case UIO_WRITE: 1437 default: 1438 return (EINVAL); 1439 } 1440 1441 /* 1442 * See if we can map the file for read. 1443 * Round down start offset for mmap. 1444 */ 1445 poff = P2PHASE((int)foff, blksz); 1446 moff = foff - poff; 1447 mlen = flen + poff; 1448 1449 ma = mmap(NULL, mlen, PROT_READ, MAP_SHARED, fd, moff); 1450 if (ma == MAP_FAILED) { 1451 /* Can't use loaned buffers. */ 1452 return (EINVAL); 1453 } 1454 1455 priv = kmem_zalloc(sizeof (*priv), KM_SLEEP); 1456 priv->map_foff = foff; 1457 priv->map_addr = ma; 1458 priv->map_len = mlen; 1459 1460 XUIO_XUZC_PRIV(xuio) = priv; 1461 XUIO_XUZC_RW(xuio) = ioflag; 1462 uio->uio_extflg = UIO_XUIO; 1463 1464 return (0); 1465 } 1466 1467 /* ARGSUSED */ 1468 int 1469 fop_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 1470 { 1471 fake_xuio_t *priv = XUIO_XUZC_PRIV(xuio); 1472 int ioflag = XUIO_XUZC_RW(xuio); 1473 1474 ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 1475 ASSERT(ioflag == UIO_READ); 1476 1477 munmap(priv->map_addr, priv->map_len); 1478 kmem_free(priv, sizeof (fake_xuio_t)); 1479 XUIO_XUZC_PRIV(xuio) = NULL; 1480 1481 return (0); 1482 } 1483 1484 1485 /* 1486 * *************************************************************** 1487 * other VOP support 1488 */ 1489 1490 /* 1491 * Convert stat(2) formats to vnode types and vice versa. (Knows about 1492 * numerical order of S_IFMT and vnode types.) 1493 */ 1494 enum vtype iftovt_tab[] = { 1495 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 1496 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON 1497 }; 1498 1499 ushort_t vttoif_tab[] = { 1500 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 1501 S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0 1502 }; 1503 1504 /* 1505 * stat_to_vattr() 1506 * 1507 * Convert from a stat structure to an vattr structure 1508 * Note: only set fields according to va_mask 1509 */ 1510 1511 int 1512 stat_to_vattr(const struct stat *st, vattr_t *vap) 1513 { 1514 1515 if (vap->va_mask & AT_TYPE) 1516 vap->va_type = IFTOVT(st->st_mode); 1517 1518 if (vap->va_mask & AT_MODE) 1519 vap->va_mode = st->st_mode; 1520 1521 if (vap->va_mask & AT_UID) 1522 vap->va_uid = st->st_uid; 1523 1524 if (vap->va_mask & AT_GID) 1525 vap->va_gid = st->st_gid; 1526 1527 if (vap->va_mask & AT_FSID) 1528 vap->va_fsid = st->st_dev; 1529 1530 if (vap->va_mask & AT_NODEID) 1531 vap->va_nodeid = st->st_ino; 1532 1533 if (vap->va_mask & AT_NLINK) 1534 vap->va_nlink = st->st_nlink; 1535 1536 if (vap->va_mask & AT_SIZE) 1537 vap->va_size = (u_offset_t)st->st_size; 1538 1539 if (vap->va_mask & AT_ATIME) { 1540 vap->va_atime.tv_sec = st->st_atim.tv_sec; 1541 vap->va_atime.tv_nsec = st->st_atim.tv_nsec; 1542 } 1543 1544 if (vap->va_mask & AT_MTIME) { 1545 vap->va_mtime.tv_sec = st->st_mtim.tv_sec; 1546 vap->va_mtime.tv_nsec = st->st_mtim.tv_nsec; 1547 } 1548 1549 if (vap->va_mask & AT_CTIME) { 1550 vap->va_ctime.tv_sec = st->st_ctim.tv_sec; 1551 vap->va_ctime.tv_nsec = st->st_ctim.tv_nsec; 1552 } 1553 1554 if (vap->va_mask & AT_RDEV) 1555 vap->va_rdev = st->st_rdev; 1556 1557 if (vap->va_mask & AT_BLKSIZE) 1558 vap->va_blksize = (uint_t)st->st_blksize; 1559 1560 1561 if (vap->va_mask & AT_NBLOCKS) 1562 vap->va_nblocks = (u_longlong_t)st->st_blocks; 1563 1564 if (vap->va_mask & AT_SEQ) 1565 vap->va_seq = 0; 1566 1567 return (0); 1568 } 1569 1570 /* ARGSUSED */ 1571 void 1572 flk_init_callback(flk_callback_t *flk_cb, 1573 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata) 1574 { 1575 } 1576 1577 /* See: VN_HOLD / VN_RELE */ 1578 1579 void 1580 vn_rele(vnode_t *vp) 1581 { 1582 VERIFY3U(vp->v_count, !=, 0); 1583 mutex_enter(&vp->v_lock); 1584 if (vp->v_count == 1) { 1585 mutex_exit(&vp->v_lock); 1586 fop_inactive(vp, NULL, NULL); 1587 } else { 1588 vp->v_count--; 1589 mutex_exit(&vp->v_lock); 1590 } 1591 } 1592 1593 int 1594 vn_has_other_opens( 1595 vnode_t *vp, 1596 v_mode_t mode) 1597 { 1598 1599 switch (mode) { 1600 case V_WRITE: 1601 if (vp->v_wrcnt > 1) 1602 return (V_TRUE); 1603 break; 1604 case V_RDORWR: 1605 if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1)) 1606 return (V_TRUE); 1607 break; 1608 case V_RDANDWR: 1609 if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1)) 1610 return (V_TRUE); 1611 break; 1612 case V_READ: 1613 if (vp->v_rdcnt > 1) 1614 return (V_TRUE); 1615 break; 1616 } 1617 1618 return (V_FALSE); 1619 } 1620 1621 /* 1622 * vn_is_opened() checks whether a particular file is opened and 1623 * whether the open is for read and/or write. 1624 * 1625 * Vnode counts are only kept on regular files (v_type=VREG). 1626 */ 1627 int 1628 vn_is_opened( 1629 vnode_t *vp, 1630 v_mode_t mode) 1631 { 1632 1633 ASSERT(vp != NULL); 1634 1635 switch (mode) { 1636 case V_WRITE: 1637 if (vp->v_wrcnt) 1638 return (V_TRUE); 1639 break; 1640 case V_RDANDWR: 1641 if (vp->v_rdcnt && vp->v_wrcnt) 1642 return (V_TRUE); 1643 break; 1644 case V_RDORWR: 1645 if (vp->v_rdcnt || vp->v_wrcnt) 1646 return (V_TRUE); 1647 break; 1648 case V_READ: 1649 if (vp->v_rdcnt) 1650 return (V_TRUE); 1651 break; 1652 } 1653 1654 return (V_FALSE); 1655 } 1656 1657 /* 1658 * vn_is_mapped() checks whether a particular file is mapped and whether 1659 * the file is mapped read and/or write. 1660 */ 1661 /* ARGSUSED */ 1662 int 1663 vn_is_mapped( 1664 vnode_t *vp, 1665 v_mode_t mode) 1666 { 1667 return (V_FALSE); 1668 } 1669