1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed 8 * to Berkeley by John Heidemann of the UCLA Ficus project. 9 * 10 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/bio.h> 43 #include <sys/buf.h> 44 #include <sys/conf.h> 45 #include <sys/event.h> 46 #include <sys/filio.h> 47 #include <sys/kernel.h> 48 #include <sys/limits.h> 49 #include <sys/lock.h> 50 #include <sys/lockf.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/namei.h> 54 #include <sys/rwlock.h> 55 #include <sys/fcntl.h> 56 #include <sys/unistd.h> 57 #include <sys/vnode.h> 58 #include <sys/dirent.h> 59 #include <sys/poll.h> 60 61 #include <security/mac/mac_framework.h> 62 63 #include <vm/vm.h> 64 #include <vm/vm_object.h> 65 #include <vm/vm_extern.h> 66 #include <vm/pmap.h> 67 #include <vm/vm_map.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_pager.h> 70 #include <vm/vnode_pager.h> 71 72 static int vop_nolookup(struct vop_lookup_args *); 73 static int vop_norename(struct vop_rename_args *); 74 static int vop_nostrategy(struct vop_strategy_args *); 75 static int get_next_dirent(struct vnode *vp, struct dirent **dpp, 76 char *dirbuf, int dirbuflen, off_t *off, 77 char **cpos, int *len, int *eofflag, 78 struct thread *td); 79 static int dirent_exists(struct vnode *vp, const char *dirname, 80 struct thread *td); 81 82 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) 83 84 static int vop_stdis_text(struct vop_is_text_args *ap); 85 static int vop_stdunset_text(struct vop_unset_text_args *ap); 86 static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); 87 static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap); 88 static int vop_stdfdatasync(struct vop_fdatasync_args *ap); 89 static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); 90 91 /* 92 * This vnode table stores what we want to do if the filesystem doesn't 93 * implement a particular VOP. 94 * 95 * If there is no specific entry here, we will return EOPNOTSUPP. 96 * 97 * Note that every filesystem has to implement either vop_access 98 * or vop_accessx; failing to do so will result in immediate crash 99 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(), 100 * which calls vop_stdaccess() etc. 101 */ 102 103 struct vop_vector default_vnodeops = { 104 .vop_default = NULL, 105 .vop_bypass = VOP_EOPNOTSUPP, 106 107 .vop_access = vop_stdaccess, 108 .vop_accessx = vop_stdaccessx, 109 .vop_advise = vop_stdadvise, 110 .vop_advlock = vop_stdadvlock, 111 .vop_advlockasync = vop_stdadvlockasync, 112 .vop_advlockpurge = vop_stdadvlockpurge, 113 .vop_allocate = vop_stdallocate, 114 .vop_bmap = vop_stdbmap, 115 .vop_close = VOP_NULL, 116 .vop_fsync = VOP_NULL, 117 .vop_fdatasync = vop_stdfdatasync, 118 .vop_getpages = vop_stdgetpages, 119 .vop_getpages_async = vop_stdgetpages_async, 120 .vop_getwritemount = vop_stdgetwritemount, 121 .vop_inactive = VOP_NULL, 122 .vop_need_inactive = vop_stdneed_inactive, 123 .vop_ioctl = vop_stdioctl, 124 .vop_kqfilter = vop_stdkqfilter, 125 .vop_islocked = vop_stdislocked, 126 .vop_lock1 = vop_stdlock, 127 .vop_lookup = vop_nolookup, 128 .vop_open = VOP_NULL, 129 .vop_pathconf = VOP_EINVAL, 130 .vop_poll = vop_nopoll, 131 .vop_putpages = vop_stdputpages, 132 .vop_readlink = VOP_EINVAL, 133 .vop_rename = vop_norename, 134 .vop_revoke = VOP_PANIC, 135 .vop_strategy = vop_nostrategy, 136 .vop_unlock = vop_stdunlock, 137 .vop_vptocnp = vop_stdvptocnp, 138 .vop_vptofh = vop_stdvptofh, 139 .vop_unp_bind = vop_stdunp_bind, 140 .vop_unp_connect = vop_stdunp_connect, 141 .vop_unp_detach = vop_stdunp_detach, 142 .vop_is_text = vop_stdis_text, 143 .vop_set_text = vop_stdset_text, 144 .vop_unset_text = vop_stdunset_text, 145 .vop_add_writecount = vop_stdadd_writecount, 146 .vop_copy_file_range = vop_stdcopy_file_range, 147 }; 148 149 /* 150 * Series of placeholder functions for various error returns for 151 * VOPs. 152 */ 153 154 int 155 vop_eopnotsupp(struct vop_generic_args *ap) 156 { 157 /* 158 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 159 */ 160 161 return (EOPNOTSUPP); 162 } 163 164 int 165 vop_ebadf(struct vop_generic_args *ap) 166 { 167 168 return (EBADF); 169 } 170 171 int 172 vop_enotty(struct vop_generic_args *ap) 173 { 174 175 return (ENOTTY); 176 } 177 178 int 179 vop_einval(struct vop_generic_args *ap) 180 { 181 182 return (EINVAL); 183 } 184 185 int 186 vop_enoent(struct vop_generic_args *ap) 187 { 188 189 return (ENOENT); 190 } 191 192 int 193 vop_null(struct vop_generic_args *ap) 194 { 195 196 return (0); 197 } 198 199 /* 200 * Helper function to panic on some bad VOPs in some filesystems. 201 */ 202 int 203 vop_panic(struct vop_generic_args *ap) 204 { 205 206 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 207 } 208 209 /* 210 * vop_std<something> and vop_no<something> are default functions for use by 211 * filesystems that need the "default reasonable" implementation for a 212 * particular operation. 213 * 214 * The documentation for the operations they implement exists (if it exists) 215 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 216 */ 217 218 /* 219 * Default vop for filesystems that do not support name lookup 220 */ 221 static int 222 vop_nolookup(ap) 223 struct vop_lookup_args /* { 224 struct vnode *a_dvp; 225 struct vnode **a_vpp; 226 struct componentname *a_cnp; 227 } */ *ap; 228 { 229 230 *ap->a_vpp = NULL; 231 return (ENOTDIR); 232 } 233 234 /* 235 * vop_norename: 236 * 237 * Handle unlock and reference counting for arguments of vop_rename 238 * for filesystems that do not implement rename operation. 239 */ 240 static int 241 vop_norename(struct vop_rename_args *ap) 242 { 243 244 vop_rename_fail(ap); 245 return (EOPNOTSUPP); 246 } 247 248 /* 249 * vop_nostrategy: 250 * 251 * Strategy routine for VFS devices that have none. 252 * 253 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 254 * routine. Typically this is done for a BIO_READ strategy call. 255 * Typically B_INVAL is assumed to already be clear prior to a write 256 * and should not be cleared manually unless you just made the buffer 257 * invalid. BIO_ERROR should be cleared either way. 258 */ 259 260 static int 261 vop_nostrategy (struct vop_strategy_args *ap) 262 { 263 printf("No strategy for buffer at %p\n", ap->a_bp); 264 vn_printf(ap->a_vp, "vnode "); 265 ap->a_bp->b_ioflags |= BIO_ERROR; 266 ap->a_bp->b_error = EOPNOTSUPP; 267 bufdone(ap->a_bp); 268 return (EOPNOTSUPP); 269 } 270 271 static int 272 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, 273 int dirbuflen, off_t *off, char **cpos, int *len, 274 int *eofflag, struct thread *td) 275 { 276 int error, reclen; 277 struct uio uio; 278 struct iovec iov; 279 struct dirent *dp; 280 281 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 282 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 283 284 if (*len == 0) { 285 iov.iov_base = dirbuf; 286 iov.iov_len = dirbuflen; 287 288 uio.uio_iov = &iov; 289 uio.uio_iovcnt = 1; 290 uio.uio_offset = *off; 291 uio.uio_resid = dirbuflen; 292 uio.uio_segflg = UIO_SYSSPACE; 293 uio.uio_rw = UIO_READ; 294 uio.uio_td = td; 295 296 *eofflag = 0; 297 298 #ifdef MAC 299 error = mac_vnode_check_readdir(td->td_ucred, vp); 300 if (error == 0) 301 #endif 302 error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, 303 NULL, NULL); 304 if (error) 305 return (error); 306 307 *off = uio.uio_offset; 308 309 *cpos = dirbuf; 310 *len = (dirbuflen - uio.uio_resid); 311 312 if (*len == 0) 313 return (ENOENT); 314 } 315 316 dp = (struct dirent *)(*cpos); 317 reclen = dp->d_reclen; 318 *dpp = dp; 319 320 /* check for malformed directory.. */ 321 if (reclen < DIRENT_MINSIZE) 322 return (EINVAL); 323 324 *cpos += reclen; 325 *len -= reclen; 326 327 return (0); 328 } 329 330 /* 331 * Check if a named file exists in a given directory vnode. 332 */ 333 static int 334 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) 335 { 336 char *dirbuf, *cpos; 337 int error, eofflag, dirbuflen, len, found; 338 off_t off; 339 struct dirent *dp; 340 struct vattr va; 341 342 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 343 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 344 345 found = 0; 346 347 error = VOP_GETATTR(vp, &va, td->td_ucred); 348 if (error) 349 return (found); 350 351 dirbuflen = DEV_BSIZE; 352 if (dirbuflen < va.va_blocksize) 353 dirbuflen = va.va_blocksize; 354 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 355 356 off = 0; 357 len = 0; 358 do { 359 error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, 360 &cpos, &len, &eofflag, td); 361 if (error) 362 goto out; 363 364 if (dp->d_type != DT_WHT && dp->d_fileno != 0 && 365 strcmp(dp->d_name, dirname) == 0) { 366 found = 1; 367 goto out; 368 } 369 } while (len > 0 || !eofflag); 370 371 out: 372 free(dirbuf, M_TEMP); 373 return (found); 374 } 375 376 int 377 vop_stdaccess(struct vop_access_args *ap) 378 { 379 380 KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | 381 VAPPEND)) == 0, ("invalid bit in accmode")); 382 383 return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td)); 384 } 385 386 int 387 vop_stdaccessx(struct vop_accessx_args *ap) 388 { 389 int error; 390 accmode_t accmode = ap->a_accmode; 391 392 error = vfs_unixify_accmode(&accmode); 393 if (error != 0) 394 return (error); 395 396 if (accmode == 0) 397 return (0); 398 399 return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td)); 400 } 401 402 /* 403 * Advisory record locking support 404 */ 405 int 406 vop_stdadvlock(struct vop_advlock_args *ap) 407 { 408 struct vnode *vp; 409 struct vattr vattr; 410 int error; 411 412 vp = ap->a_vp; 413 if (ap->a_fl->l_whence == SEEK_END) { 414 /* 415 * The NFSv4 server must avoid doing a vn_lock() here, since it 416 * can deadlock the nfsd threads, due to a LOR. Fortunately 417 * the NFSv4 server always uses SEEK_SET and this code is 418 * only required for the SEEK_END case. 419 */ 420 vn_lock(vp, LK_SHARED | LK_RETRY); 421 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 422 VOP_UNLOCK(vp, 0); 423 if (error) 424 return (error); 425 } else 426 vattr.va_size = 0; 427 428 return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); 429 } 430 431 int 432 vop_stdadvlockasync(struct vop_advlockasync_args *ap) 433 { 434 struct vnode *vp; 435 struct vattr vattr; 436 int error; 437 438 vp = ap->a_vp; 439 if (ap->a_fl->l_whence == SEEK_END) { 440 /* The size argument is only needed for SEEK_END. */ 441 vn_lock(vp, LK_SHARED | LK_RETRY); 442 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 443 VOP_UNLOCK(vp, 0); 444 if (error) 445 return (error); 446 } else 447 vattr.va_size = 0; 448 449 return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); 450 } 451 452 int 453 vop_stdadvlockpurge(struct vop_advlockpurge_args *ap) 454 { 455 struct vnode *vp; 456 457 vp = ap->a_vp; 458 lf_purgelocks(vp, &vp->v_lockf); 459 return (0); 460 } 461 462 /* 463 * vop_stdpathconf: 464 * 465 * Standard implementation of POSIX pathconf, to get information about limits 466 * for a filesystem. 467 * Override per filesystem for the case where the filesystem has smaller 468 * limits. 469 */ 470 int 471 vop_stdpathconf(ap) 472 struct vop_pathconf_args /* { 473 struct vnode *a_vp; 474 int a_name; 475 int *a_retval; 476 } */ *ap; 477 { 478 479 switch (ap->a_name) { 480 case _PC_ASYNC_IO: 481 *ap->a_retval = _POSIX_ASYNCHRONOUS_IO; 482 return (0); 483 case _PC_PATH_MAX: 484 *ap->a_retval = PATH_MAX; 485 return (0); 486 case _PC_ACL_EXTENDED: 487 case _PC_ACL_NFS4: 488 case _PC_CAP_PRESENT: 489 case _PC_INF_PRESENT: 490 case _PC_MAC_PRESENT: 491 *ap->a_retval = 0; 492 return (0); 493 default: 494 return (EINVAL); 495 } 496 /* NOTREACHED */ 497 } 498 499 /* 500 * Standard lock, unlock and islocked functions. 501 */ 502 int 503 vop_stdlock(ap) 504 struct vop_lock1_args /* { 505 struct vnode *a_vp; 506 int a_flags; 507 char *file; 508 int line; 509 } */ *ap; 510 { 511 struct vnode *vp = ap->a_vp; 512 struct mtx *ilk; 513 514 ilk = VI_MTX(vp); 515 return (lockmgr_lock_fast_path(vp->v_vnlock, ap->a_flags, 516 &ilk->lock_object, ap->a_file, ap->a_line)); 517 } 518 519 /* See above. */ 520 int 521 vop_stdunlock(ap) 522 struct vop_unlock_args /* { 523 struct vnode *a_vp; 524 int a_flags; 525 } */ *ap; 526 { 527 struct vnode *vp = ap->a_vp; 528 struct mtx *ilk; 529 530 ilk = VI_MTX(vp); 531 return (lockmgr_unlock_fast_path(vp->v_vnlock, ap->a_flags, 532 &ilk->lock_object)); 533 } 534 535 /* See above. */ 536 int 537 vop_stdislocked(ap) 538 struct vop_islocked_args /* { 539 struct vnode *a_vp; 540 } */ *ap; 541 { 542 543 return (lockstatus(ap->a_vp->v_vnlock)); 544 } 545 546 /* 547 * Variants of the above set. 548 * 549 * Differences are: 550 * - shared locking disablement is not supported 551 * - v_vnlock pointer is not honored 552 */ 553 int 554 vop_lock(ap) 555 struct vop_lock1_args /* { 556 struct vnode *a_vp; 557 int a_flags; 558 char *file; 559 int line; 560 } */ *ap; 561 { 562 struct vnode *vp = ap->a_vp; 563 int flags = ap->a_flags; 564 struct mtx *ilk; 565 566 MPASS(vp->v_vnlock == &vp->v_lock); 567 568 if (__predict_false((flags & ~(LK_TYPE_MASK | LK_NODDLKTREAT | LK_RETRY)) != 0)) 569 goto other; 570 571 switch (flags & LK_TYPE_MASK) { 572 case LK_SHARED: 573 return (lockmgr_slock(&vp->v_lock, flags, ap->a_file, ap->a_line)); 574 case LK_EXCLUSIVE: 575 return (lockmgr_xlock(&vp->v_lock, flags, ap->a_file, ap->a_line)); 576 } 577 other: 578 ilk = VI_MTX(vp); 579 return (lockmgr_lock_fast_path(&vp->v_lock, flags, 580 &ilk->lock_object, ap->a_file, ap->a_line)); 581 } 582 583 int 584 vop_unlock(ap) 585 struct vop_unlock_args /* { 586 struct vnode *a_vp; 587 int a_flags; 588 } */ *ap; 589 { 590 struct vnode *vp = ap->a_vp; 591 592 MPASS(vp->v_vnlock == &vp->v_lock); 593 MPASS(ap->a_flags == 0); 594 595 return (lockmgr_unlock(&vp->v_lock)); 596 } 597 598 int 599 vop_islocked(ap) 600 struct vop_islocked_args /* { 601 struct vnode *a_vp; 602 } */ *ap; 603 { 604 struct vnode *vp = ap->a_vp; 605 606 MPASS(vp->v_vnlock == &vp->v_lock); 607 608 return (lockstatus(&vp->v_lock)); 609 } 610 611 /* 612 * Return true for select/poll. 613 */ 614 int 615 vop_nopoll(ap) 616 struct vop_poll_args /* { 617 struct vnode *a_vp; 618 int a_events; 619 struct ucred *a_cred; 620 struct thread *a_td; 621 } */ *ap; 622 { 623 624 return (poll_no_poll(ap->a_events)); 625 } 626 627 /* 628 * Implement poll for local filesystems that support it. 629 */ 630 int 631 vop_stdpoll(ap) 632 struct vop_poll_args /* { 633 struct vnode *a_vp; 634 int a_events; 635 struct ucred *a_cred; 636 struct thread *a_td; 637 } */ *ap; 638 { 639 if (ap->a_events & ~POLLSTANDARD) 640 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 641 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 642 } 643 644 /* 645 * Return our mount point, as we will take charge of the writes. 646 */ 647 int 648 vop_stdgetwritemount(ap) 649 struct vop_getwritemount_args /* { 650 struct vnode *a_vp; 651 struct mount **a_mpp; 652 } */ *ap; 653 { 654 struct mount *mp; 655 struct vnode *vp; 656 657 /* 658 * Note that having a reference does not prevent forced unmount from 659 * setting ->v_mount to NULL after the lock gets released. This is of 660 * no consequence for typical consumers (most notably vn_start_write) 661 * since in this case the vnode is VIRF_DOOMED. Unmount might have 662 * progressed far enough that its completion is only delayed by the 663 * reference obtained here. The consumer only needs to concern itself 664 * with releasing it. 665 */ 666 vp = ap->a_vp; 667 mp = vp->v_mount; 668 if (mp == NULL) { 669 *(ap->a_mpp) = NULL; 670 return (0); 671 } 672 if (vfs_op_thread_enter(mp)) { 673 if (mp == vp->v_mount) { 674 vfs_mp_count_add_pcpu(mp, ref, 1); 675 vfs_op_thread_exit(mp); 676 } else { 677 vfs_op_thread_exit(mp); 678 mp = NULL; 679 } 680 } else { 681 MNT_ILOCK(mp); 682 if (mp == vp->v_mount) { 683 MNT_REF(mp); 684 MNT_IUNLOCK(mp); 685 } else { 686 MNT_IUNLOCK(mp); 687 mp = NULL; 688 } 689 } 690 *(ap->a_mpp) = mp; 691 return (0); 692 } 693 694 /* 695 * If the file system doesn't implement VOP_BMAP, then return sensible defaults: 696 * - Return the vnode's bufobj instead of any underlying device's bufobj 697 * - Calculate the physical block number as if there were equal size 698 * consecutive blocks, but 699 * - Report no contiguous runs of blocks. 700 */ 701 int 702 vop_stdbmap(ap) 703 struct vop_bmap_args /* { 704 struct vnode *a_vp; 705 daddr_t a_bn; 706 struct bufobj **a_bop; 707 daddr_t *a_bnp; 708 int *a_runp; 709 int *a_runb; 710 } */ *ap; 711 { 712 713 if (ap->a_bop != NULL) 714 *ap->a_bop = &ap->a_vp->v_bufobj; 715 if (ap->a_bnp != NULL) 716 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 717 if (ap->a_runp != NULL) 718 *ap->a_runp = 0; 719 if (ap->a_runb != NULL) 720 *ap->a_runb = 0; 721 return (0); 722 } 723 724 int 725 vop_stdfsync(ap) 726 struct vop_fsync_args /* { 727 struct vnode *a_vp; 728 int a_waitfor; 729 struct thread *a_td; 730 } */ *ap; 731 { 732 733 return (vn_fsync_buf(ap->a_vp, ap->a_waitfor)); 734 } 735 736 static int 737 vop_stdfdatasync(struct vop_fdatasync_args *ap) 738 { 739 740 return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td)); 741 } 742 743 int 744 vop_stdfdatasync_buf(struct vop_fdatasync_args *ap) 745 { 746 747 return (vn_fsync_buf(ap->a_vp, MNT_WAIT)); 748 } 749 750 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 751 int 752 vop_stdgetpages(ap) 753 struct vop_getpages_args /* { 754 struct vnode *a_vp; 755 vm_page_t *a_m; 756 int a_count; 757 int *a_rbehind; 758 int *a_rahead; 759 } */ *ap; 760 { 761 762 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 763 ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL); 764 } 765 766 static int 767 vop_stdgetpages_async(struct vop_getpages_async_args *ap) 768 { 769 int error; 770 771 error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 772 ap->a_rahead); 773 ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error); 774 return (error); 775 } 776 777 int 778 vop_stdkqfilter(struct vop_kqfilter_args *ap) 779 { 780 return vfs_kqfilter(ap); 781 } 782 783 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 784 int 785 vop_stdputpages(ap) 786 struct vop_putpages_args /* { 787 struct vnode *a_vp; 788 vm_page_t *a_m; 789 int a_count; 790 int a_sync; 791 int *a_rtvals; 792 } */ *ap; 793 { 794 795 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 796 ap->a_sync, ap->a_rtvals); 797 } 798 799 int 800 vop_stdvptofh(struct vop_vptofh_args *ap) 801 { 802 return (EOPNOTSUPP); 803 } 804 805 int 806 vop_stdvptocnp(struct vop_vptocnp_args *ap) 807 { 808 struct vnode *vp = ap->a_vp; 809 struct vnode **dvp = ap->a_vpp; 810 struct ucred *cred = ap->a_cred; 811 char *buf = ap->a_buf; 812 int *buflen = ap->a_buflen; 813 char *dirbuf, *cpos; 814 int i, error, eofflag, dirbuflen, flags, locked, len, covered; 815 off_t off; 816 ino_t fileno; 817 struct vattr va; 818 struct nameidata nd; 819 struct thread *td; 820 struct dirent *dp; 821 struct vnode *mvp; 822 823 i = *buflen; 824 error = 0; 825 covered = 0; 826 td = curthread; 827 828 if (vp->v_type != VDIR) 829 return (ENOENT); 830 831 error = VOP_GETATTR(vp, &va, cred); 832 if (error) 833 return (error); 834 835 VREF(vp); 836 locked = VOP_ISLOCKED(vp); 837 VOP_UNLOCK(vp, 0); 838 NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, 839 "..", vp, td); 840 flags = FREAD; 841 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL); 842 if (error) { 843 vn_lock(vp, locked | LK_RETRY); 844 return (error); 845 } 846 NDFREE(&nd, NDF_ONLY_PNBUF); 847 848 mvp = *dvp = nd.ni_vp; 849 850 if (vp->v_mount != (*dvp)->v_mount && 851 ((*dvp)->v_vflag & VV_ROOT) && 852 ((*dvp)->v_mount->mnt_flag & MNT_UNION)) { 853 *dvp = (*dvp)->v_mount->mnt_vnodecovered; 854 VREF(mvp); 855 VOP_UNLOCK(mvp, 0); 856 vn_close(mvp, FREAD, cred, td); 857 VREF(*dvp); 858 vn_lock(*dvp, LK_SHARED | LK_RETRY); 859 covered = 1; 860 } 861 862 fileno = va.va_fileid; 863 864 dirbuflen = DEV_BSIZE; 865 if (dirbuflen < va.va_blocksize) 866 dirbuflen = va.va_blocksize; 867 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 868 869 if ((*dvp)->v_type != VDIR) { 870 error = ENOENT; 871 goto out; 872 } 873 874 off = 0; 875 len = 0; 876 do { 877 /* call VOP_READDIR of parent */ 878 error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, 879 &cpos, &len, &eofflag, td); 880 if (error) 881 goto out; 882 883 if ((dp->d_type != DT_WHT) && 884 (dp->d_fileno == fileno)) { 885 if (covered) { 886 VOP_UNLOCK(*dvp, 0); 887 vn_lock(mvp, LK_SHARED | LK_RETRY); 888 if (dirent_exists(mvp, dp->d_name, td)) { 889 error = ENOENT; 890 VOP_UNLOCK(mvp, 0); 891 vn_lock(*dvp, LK_SHARED | LK_RETRY); 892 goto out; 893 } 894 VOP_UNLOCK(mvp, 0); 895 vn_lock(*dvp, LK_SHARED | LK_RETRY); 896 } 897 i -= dp->d_namlen; 898 899 if (i < 0) { 900 error = ENOMEM; 901 goto out; 902 } 903 if (dp->d_namlen == 1 && dp->d_name[0] == '.') { 904 error = ENOENT; 905 } else { 906 bcopy(dp->d_name, buf + i, dp->d_namlen); 907 error = 0; 908 } 909 goto out; 910 } 911 } while (len > 0 || !eofflag); 912 error = ENOENT; 913 914 out: 915 free(dirbuf, M_TEMP); 916 if (!error) { 917 *buflen = i; 918 vref(*dvp); 919 } 920 if (covered) { 921 vput(*dvp); 922 vrele(mvp); 923 } else { 924 VOP_UNLOCK(mvp, 0); 925 vn_close(mvp, FREAD, cred, td); 926 } 927 vn_lock(vp, locked | LK_RETRY); 928 return (error); 929 } 930 931 int 932 vop_stdallocate(struct vop_allocate_args *ap) 933 { 934 #ifdef __notyet__ 935 struct statfs *sfs; 936 off_t maxfilesize = 0; 937 #endif 938 struct iovec aiov; 939 struct vattr vattr, *vap; 940 struct uio auio; 941 off_t fsize, len, cur, offset; 942 uint8_t *buf; 943 struct thread *td; 944 struct vnode *vp; 945 size_t iosize; 946 int error; 947 948 buf = NULL; 949 error = 0; 950 td = curthread; 951 vap = &vattr; 952 vp = ap->a_vp; 953 len = *ap->a_len; 954 offset = *ap->a_offset; 955 956 error = VOP_GETATTR(vp, vap, td->td_ucred); 957 if (error != 0) 958 goto out; 959 fsize = vap->va_size; 960 iosize = vap->va_blocksize; 961 if (iosize == 0) 962 iosize = BLKDEV_IOSIZE; 963 if (iosize > MAXPHYS) 964 iosize = MAXPHYS; 965 buf = malloc(iosize, M_TEMP, M_WAITOK); 966 967 #ifdef __notyet__ 968 /* 969 * Check if the filesystem sets f_maxfilesize; if not use 970 * VOP_SETATTR to perform the check. 971 */ 972 sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 973 error = VFS_STATFS(vp->v_mount, sfs, td); 974 if (error == 0) 975 maxfilesize = sfs->f_maxfilesize; 976 free(sfs, M_STATFS); 977 if (error != 0) 978 goto out; 979 if (maxfilesize) { 980 if (offset > maxfilesize || len > maxfilesize || 981 offset + len > maxfilesize) { 982 error = EFBIG; 983 goto out; 984 } 985 } else 986 #endif 987 if (offset + len > vap->va_size) { 988 /* 989 * Test offset + len against the filesystem's maxfilesize. 990 */ 991 VATTR_NULL(vap); 992 vap->va_size = offset + len; 993 error = VOP_SETATTR(vp, vap, td->td_ucred); 994 if (error != 0) 995 goto out; 996 VATTR_NULL(vap); 997 vap->va_size = fsize; 998 error = VOP_SETATTR(vp, vap, td->td_ucred); 999 if (error != 0) 1000 goto out; 1001 } 1002 1003 for (;;) { 1004 /* 1005 * Read and write back anything below the nominal file 1006 * size. There's currently no way outside the filesystem 1007 * to know whether this area is sparse or not. 1008 */ 1009 cur = iosize; 1010 if ((offset % iosize) != 0) 1011 cur -= (offset % iosize); 1012 if (cur > len) 1013 cur = len; 1014 if (offset < fsize) { 1015 aiov.iov_base = buf; 1016 aiov.iov_len = cur; 1017 auio.uio_iov = &aiov; 1018 auio.uio_iovcnt = 1; 1019 auio.uio_offset = offset; 1020 auio.uio_resid = cur; 1021 auio.uio_segflg = UIO_SYSSPACE; 1022 auio.uio_rw = UIO_READ; 1023 auio.uio_td = td; 1024 error = VOP_READ(vp, &auio, 0, td->td_ucred); 1025 if (error != 0) 1026 break; 1027 if (auio.uio_resid > 0) { 1028 bzero(buf + cur - auio.uio_resid, 1029 auio.uio_resid); 1030 } 1031 } else { 1032 bzero(buf, cur); 1033 } 1034 1035 aiov.iov_base = buf; 1036 aiov.iov_len = cur; 1037 auio.uio_iov = &aiov; 1038 auio.uio_iovcnt = 1; 1039 auio.uio_offset = offset; 1040 auio.uio_resid = cur; 1041 auio.uio_segflg = UIO_SYSSPACE; 1042 auio.uio_rw = UIO_WRITE; 1043 auio.uio_td = td; 1044 1045 error = VOP_WRITE(vp, &auio, 0, td->td_ucred); 1046 if (error != 0) 1047 break; 1048 1049 len -= cur; 1050 offset += cur; 1051 if (len == 0) 1052 break; 1053 if (should_yield()) 1054 break; 1055 } 1056 1057 out: 1058 *ap->a_len = len; 1059 *ap->a_offset = offset; 1060 free(buf, M_TEMP); 1061 return (error); 1062 } 1063 1064 int 1065 vop_stdadvise(struct vop_advise_args *ap) 1066 { 1067 struct vnode *vp; 1068 struct bufobj *bo; 1069 daddr_t startn, endn; 1070 off_t bstart, bend, start, end; 1071 int bsize, error; 1072 1073 vp = ap->a_vp; 1074 switch (ap->a_advice) { 1075 case POSIX_FADV_WILLNEED: 1076 /* 1077 * Do nothing for now. Filesystems should provide a 1078 * custom method which starts an asynchronous read of 1079 * the requested region. 1080 */ 1081 error = 0; 1082 break; 1083 case POSIX_FADV_DONTNEED: 1084 error = 0; 1085 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1086 if (VN_IS_DOOMED(vp)) { 1087 VOP_UNLOCK(vp, 0); 1088 break; 1089 } 1090 1091 /* 1092 * Round to block boundaries (and later possibly further to 1093 * page boundaries). Applications cannot reasonably be aware 1094 * of the boundaries, and the rounding must be to expand at 1095 * both extremities to cover enough. It still doesn't cover 1096 * read-ahead. For partial blocks, this gives unnecessary 1097 * discarding of buffers but is efficient enough since the 1098 * pages usually remain in VMIO for some time. 1099 */ 1100 bsize = vp->v_bufobj.bo_bsize; 1101 bstart = rounddown(ap->a_start, bsize); 1102 bend = roundup(ap->a_end, bsize); 1103 1104 /* 1105 * Deactivate pages in the specified range from the backing VM 1106 * object. Pages that are resident in the buffer cache will 1107 * remain wired until their corresponding buffers are released 1108 * below. 1109 */ 1110 if (vp->v_object != NULL) { 1111 start = trunc_page(bstart); 1112 end = round_page(bend); 1113 VM_OBJECT_RLOCK(vp->v_object); 1114 vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start), 1115 OFF_TO_IDX(end)); 1116 VM_OBJECT_RUNLOCK(vp->v_object); 1117 } 1118 1119 bo = &vp->v_bufobj; 1120 BO_RLOCK(bo); 1121 startn = bstart / bsize; 1122 endn = bend / bsize; 1123 error = bnoreuselist(&bo->bo_clean, bo, startn, endn); 1124 if (error == 0) 1125 error = bnoreuselist(&bo->bo_dirty, bo, startn, endn); 1126 BO_RUNLOCK(bo); 1127 VOP_UNLOCK(vp, 0); 1128 break; 1129 default: 1130 error = EINVAL; 1131 break; 1132 } 1133 return (error); 1134 } 1135 1136 int 1137 vop_stdunp_bind(struct vop_unp_bind_args *ap) 1138 { 1139 1140 ap->a_vp->v_unpcb = ap->a_unpcb; 1141 return (0); 1142 } 1143 1144 int 1145 vop_stdunp_connect(struct vop_unp_connect_args *ap) 1146 { 1147 1148 *ap->a_unpcb = ap->a_vp->v_unpcb; 1149 return (0); 1150 } 1151 1152 int 1153 vop_stdunp_detach(struct vop_unp_detach_args *ap) 1154 { 1155 1156 ap->a_vp->v_unpcb = NULL; 1157 return (0); 1158 } 1159 1160 static int 1161 vop_stdis_text(struct vop_is_text_args *ap) 1162 { 1163 1164 return (ap->a_vp->v_writecount < 0); 1165 } 1166 1167 int 1168 vop_stdset_text(struct vop_set_text_args *ap) 1169 { 1170 struct vnode *vp; 1171 struct mount *mp; 1172 int error; 1173 1174 vp = ap->a_vp; 1175 VI_LOCK(vp); 1176 if (vp->v_writecount > 0) { 1177 error = ETXTBSY; 1178 } else { 1179 /* 1180 * If requested by fs, keep a use reference to the 1181 * vnode until the last text reference is released. 1182 */ 1183 mp = vp->v_mount; 1184 if (mp != NULL && (mp->mnt_kern_flag & MNTK_TEXT_REFS) != 0 && 1185 vp->v_writecount == 0) { 1186 vp->v_iflag |= VI_TEXT_REF; 1187 vrefl(vp); 1188 } 1189 1190 vp->v_writecount--; 1191 error = 0; 1192 } 1193 VI_UNLOCK(vp); 1194 return (error); 1195 } 1196 1197 static int 1198 vop_stdunset_text(struct vop_unset_text_args *ap) 1199 { 1200 struct vnode *vp; 1201 int error; 1202 bool last; 1203 1204 vp = ap->a_vp; 1205 last = false; 1206 VI_LOCK(vp); 1207 if (vp->v_writecount < 0) { 1208 if ((vp->v_iflag & VI_TEXT_REF) != 0 && 1209 vp->v_writecount == -1) { 1210 last = true; 1211 vp->v_iflag &= ~VI_TEXT_REF; 1212 } 1213 vp->v_writecount++; 1214 error = 0; 1215 } else { 1216 error = EINVAL; 1217 } 1218 VI_UNLOCK(vp); 1219 if (last) 1220 vunref(vp); 1221 return (error); 1222 } 1223 1224 static int 1225 vop_stdadd_writecount(struct vop_add_writecount_args *ap) 1226 { 1227 struct vnode *vp; 1228 int error; 1229 1230 vp = ap->a_vp; 1231 VI_LOCK_FLAGS(vp, MTX_DUPOK); 1232 if (vp->v_writecount < 0) { 1233 error = ETXTBSY; 1234 } else { 1235 VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp, 1236 ("neg writecount increment %d", ap->a_inc)); 1237 vp->v_writecount += ap->a_inc; 1238 error = 0; 1239 } 1240 VI_UNLOCK(vp); 1241 return (error); 1242 } 1243 1244 int 1245 vop_stdneed_inactive(struct vop_need_inactive_args *ap) 1246 { 1247 1248 return (1); 1249 } 1250 1251 int 1252 vop_stdioctl(struct vop_ioctl_args *ap) 1253 { 1254 struct vnode *vp; 1255 struct vattr va; 1256 off_t *offp; 1257 int error; 1258 1259 switch (ap->a_command) { 1260 case FIOSEEKDATA: 1261 case FIOSEEKHOLE: 1262 vp = ap->a_vp; 1263 error = vn_lock(vp, LK_SHARED); 1264 if (error != 0) 1265 return (EBADF); 1266 if (vp->v_type == VREG) 1267 error = VOP_GETATTR(vp, &va, ap->a_cred); 1268 else 1269 error = ENOTTY; 1270 if (error == 0) { 1271 offp = ap->a_data; 1272 if (*offp < 0 || *offp >= va.va_size) 1273 error = ENXIO; 1274 else if (ap->a_command == FIOSEEKHOLE) 1275 *offp = va.va_size; 1276 } 1277 VOP_UNLOCK(vp, 0); 1278 break; 1279 default: 1280 error = ENOTTY; 1281 break; 1282 } 1283 return (error); 1284 } 1285 1286 /* 1287 * vfs default ops 1288 * used to fill the vfs function table to get reasonable default return values. 1289 */ 1290 int 1291 vfs_stdroot (mp, flags, vpp) 1292 struct mount *mp; 1293 int flags; 1294 struct vnode **vpp; 1295 { 1296 1297 return (EOPNOTSUPP); 1298 } 1299 1300 int 1301 vfs_stdstatfs (mp, sbp) 1302 struct mount *mp; 1303 struct statfs *sbp; 1304 { 1305 1306 return (EOPNOTSUPP); 1307 } 1308 1309 int 1310 vfs_stdquotactl (mp, cmds, uid, arg) 1311 struct mount *mp; 1312 int cmds; 1313 uid_t uid; 1314 void *arg; 1315 { 1316 1317 return (EOPNOTSUPP); 1318 } 1319 1320 int 1321 vfs_stdsync(mp, waitfor) 1322 struct mount *mp; 1323 int waitfor; 1324 { 1325 struct vnode *vp, *mvp; 1326 struct thread *td; 1327 int error, lockreq, allerror = 0; 1328 1329 td = curthread; 1330 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 1331 if (waitfor != MNT_WAIT) 1332 lockreq |= LK_NOWAIT; 1333 /* 1334 * Force stale buffer cache information to be flushed. 1335 */ 1336 loop: 1337 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1338 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { 1339 VI_UNLOCK(vp); 1340 continue; 1341 } 1342 if ((error = vget(vp, lockreq, td)) != 0) { 1343 if (error == ENOENT) { 1344 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1345 goto loop; 1346 } 1347 continue; 1348 } 1349 error = VOP_FSYNC(vp, waitfor, td); 1350 if (error) 1351 allerror = error; 1352 vput(vp); 1353 } 1354 return (allerror); 1355 } 1356 1357 int 1358 vfs_stdnosync (mp, waitfor) 1359 struct mount *mp; 1360 int waitfor; 1361 { 1362 1363 return (0); 1364 } 1365 1366 static int 1367 vop_stdcopy_file_range(struct vop_copy_file_range_args *ap) 1368 { 1369 int error; 1370 1371 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 1372 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred, 1373 ap->a_outcred, ap->a_fsizetd); 1374 return (error); 1375 } 1376 1377 int 1378 vfs_stdvget (mp, ino, flags, vpp) 1379 struct mount *mp; 1380 ino_t ino; 1381 int flags; 1382 struct vnode **vpp; 1383 { 1384 1385 return (EOPNOTSUPP); 1386 } 1387 1388 int 1389 vfs_stdfhtovp (mp, fhp, flags, vpp) 1390 struct mount *mp; 1391 struct fid *fhp; 1392 int flags; 1393 struct vnode **vpp; 1394 { 1395 1396 return (EOPNOTSUPP); 1397 } 1398 1399 int 1400 vfs_stdinit (vfsp) 1401 struct vfsconf *vfsp; 1402 { 1403 1404 return (0); 1405 } 1406 1407 int 1408 vfs_stduninit (vfsp) 1409 struct vfsconf *vfsp; 1410 { 1411 1412 return(0); 1413 } 1414 1415 int 1416 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname) 1417 struct mount *mp; 1418 int cmd; 1419 struct vnode *filename_vp; 1420 int attrnamespace; 1421 const char *attrname; 1422 { 1423 1424 if (filename_vp != NULL) 1425 VOP_UNLOCK(filename_vp, 0); 1426 return (EOPNOTSUPP); 1427 } 1428 1429 int 1430 vfs_stdsysctl(mp, op, req) 1431 struct mount *mp; 1432 fsctlop_t op; 1433 struct sysctl_req *req; 1434 { 1435 1436 return (EOPNOTSUPP); 1437 } 1438 1439 static vop_bypass_t * 1440 bp_by_off(struct vop_vector *vop, struct vop_generic_args *a) 1441 { 1442 1443 return (*(vop_bypass_t **)((char *)vop + a->a_desc->vdesc_vop_offset)); 1444 } 1445 1446 int 1447 vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a) 1448 { 1449 vop_bypass_t *bp; 1450 int prev_stops, rc; 1451 1452 for (; vop != NULL; vop = vop->vop_default) { 1453 bp = bp_by_off(vop, a); 1454 if (bp != NULL) 1455 break; 1456 1457 /* 1458 * Bypass is not really supported. It is done for 1459 * fallback to unimplemented vops in the default 1460 * vector. 1461 */ 1462 bp = vop->vop_bypass; 1463 if (bp != NULL) 1464 break; 1465 } 1466 MPASS(bp != NULL); 1467 1468 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 1469 rc = bp(a); 1470 sigallowstop(prev_stops); 1471 return (rc); 1472 } 1473