1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed 8 * to Berkeley by John Heidemann of the UCLA Ficus project. 9 * 10 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/bio.h> 43 #include <sys/buf.h> 44 #include <sys/conf.h> 45 #include <sys/event.h> 46 #include <sys/filio.h> 47 #include <sys/kernel.h> 48 #include <sys/limits.h> 49 #include <sys/lock.h> 50 #include <sys/lockf.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/namei.h> 54 #include <sys/rwlock.h> 55 #include <sys/fcntl.h> 56 #include <sys/unistd.h> 57 #include <sys/vnode.h> 58 #include <sys/dirent.h> 59 #include <sys/poll.h> 60 61 #include <security/mac/mac_framework.h> 62 63 #include <vm/vm.h> 64 #include <vm/vm_object.h> 65 #include <vm/vm_extern.h> 66 #include <vm/pmap.h> 67 #include <vm/vm_map.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_pager.h> 70 #include <vm/vnode_pager.h> 71 72 static int vop_nolookup(struct vop_lookup_args *); 73 static int vop_norename(struct vop_rename_args *); 74 static int vop_nostrategy(struct vop_strategy_args *); 75 static int get_next_dirent(struct vnode *vp, struct dirent **dpp, 76 char *dirbuf, int dirbuflen, off_t *off, 77 char **cpos, int *len, int *eofflag, 78 struct thread *td); 79 static int dirent_exists(struct vnode *vp, const char *dirname, 80 struct thread *td); 81 82 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) 83 84 static int vop_stdis_text(struct vop_is_text_args *ap); 85 static int vop_stdunset_text(struct vop_unset_text_args *ap); 86 static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); 87 static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap); 88 static int vop_stdfdatasync(struct vop_fdatasync_args *ap); 89 static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); 90 91 /* 92 * This vnode table stores what we want to do if the filesystem doesn't 93 * implement a particular VOP. 94 * 95 * If there is no specific entry here, we will return EOPNOTSUPP. 96 * 97 * Note that every filesystem has to implement either vop_access 98 * or vop_accessx; failing to do so will result in immediate crash 99 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(), 100 * which calls vop_stdaccess() etc. 101 */ 102 103 struct vop_vector default_vnodeops = { 104 .vop_default = NULL, 105 .vop_bypass = VOP_EOPNOTSUPP, 106 107 .vop_access = vop_stdaccess, 108 .vop_accessx = vop_stdaccessx, 109 .vop_advise = vop_stdadvise, 110 .vop_advlock = vop_stdadvlock, 111 .vop_advlockasync = vop_stdadvlockasync, 112 .vop_advlockpurge = vop_stdadvlockpurge, 113 .vop_allocate = vop_stdallocate, 114 .vop_bmap = vop_stdbmap, 115 .vop_close = VOP_NULL, 116 .vop_fsync = VOP_NULL, 117 .vop_fdatasync = vop_stdfdatasync, 118 .vop_getpages = vop_stdgetpages, 119 .vop_getpages_async = vop_stdgetpages_async, 120 .vop_getwritemount = vop_stdgetwritemount, 121 .vop_inactive = VOP_NULL, 122 .vop_need_inactive = vop_stdneed_inactive, 123 .vop_ioctl = vop_stdioctl, 124 .vop_kqfilter = vop_stdkqfilter, 125 .vop_islocked = vop_stdislocked, 126 .vop_lock1 = vop_stdlock, 127 .vop_lookup = vop_nolookup, 128 .vop_open = VOP_NULL, 129 .vop_pathconf = VOP_EINVAL, 130 .vop_poll = vop_nopoll, 131 .vop_putpages = vop_stdputpages, 132 .vop_readlink = VOP_EINVAL, 133 .vop_rename = vop_norename, 134 .vop_revoke = VOP_PANIC, 135 .vop_strategy = vop_nostrategy, 136 .vop_unlock = vop_stdunlock, 137 .vop_vptocnp = vop_stdvptocnp, 138 .vop_vptofh = vop_stdvptofh, 139 .vop_unp_bind = vop_stdunp_bind, 140 .vop_unp_connect = vop_stdunp_connect, 141 .vop_unp_detach = vop_stdunp_detach, 142 .vop_is_text = vop_stdis_text, 143 .vop_set_text = vop_stdset_text, 144 .vop_unset_text = vop_stdunset_text, 145 .vop_add_writecount = vop_stdadd_writecount, 146 .vop_copy_file_range = vop_stdcopy_file_range, 147 }; 148 VFS_VOP_VECTOR_REGISTER(default_vnodeops); 149 150 /* 151 * Series of placeholder functions for various error returns for 152 * VOPs. 153 */ 154 155 int 156 vop_eopnotsupp(struct vop_generic_args *ap) 157 { 158 /* 159 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 160 */ 161 162 return (EOPNOTSUPP); 163 } 164 165 int 166 vop_ebadf(struct vop_generic_args *ap) 167 { 168 169 return (EBADF); 170 } 171 172 int 173 vop_enotty(struct vop_generic_args *ap) 174 { 175 176 return (ENOTTY); 177 } 178 179 int 180 vop_einval(struct vop_generic_args *ap) 181 { 182 183 return (EINVAL); 184 } 185 186 int 187 vop_enoent(struct vop_generic_args *ap) 188 { 189 190 return (ENOENT); 191 } 192 193 int 194 vop_null(struct vop_generic_args *ap) 195 { 196 197 return (0); 198 } 199 200 /* 201 * Helper function to panic on some bad VOPs in some filesystems. 202 */ 203 int 204 vop_panic(struct vop_generic_args *ap) 205 { 206 207 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 208 } 209 210 /* 211 * vop_std<something> and vop_no<something> are default functions for use by 212 * filesystems that need the "default reasonable" implementation for a 213 * particular operation. 214 * 215 * The documentation for the operations they implement exists (if it exists) 216 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 217 */ 218 219 /* 220 * Default vop for filesystems that do not support name lookup 221 */ 222 static int 223 vop_nolookup(ap) 224 struct vop_lookup_args /* { 225 struct vnode *a_dvp; 226 struct vnode **a_vpp; 227 struct componentname *a_cnp; 228 } */ *ap; 229 { 230 231 *ap->a_vpp = NULL; 232 return (ENOTDIR); 233 } 234 235 /* 236 * vop_norename: 237 * 238 * Handle unlock and reference counting for arguments of vop_rename 239 * for filesystems that do not implement rename operation. 240 */ 241 static int 242 vop_norename(struct vop_rename_args *ap) 243 { 244 245 vop_rename_fail(ap); 246 return (EOPNOTSUPP); 247 } 248 249 /* 250 * vop_nostrategy: 251 * 252 * Strategy routine for VFS devices that have none. 253 * 254 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 255 * routine. Typically this is done for a BIO_READ strategy call. 256 * Typically B_INVAL is assumed to already be clear prior to a write 257 * and should not be cleared manually unless you just made the buffer 258 * invalid. BIO_ERROR should be cleared either way. 259 */ 260 261 static int 262 vop_nostrategy (struct vop_strategy_args *ap) 263 { 264 printf("No strategy for buffer at %p\n", ap->a_bp); 265 vn_printf(ap->a_vp, "vnode "); 266 ap->a_bp->b_ioflags |= BIO_ERROR; 267 ap->a_bp->b_error = EOPNOTSUPP; 268 bufdone(ap->a_bp); 269 return (EOPNOTSUPP); 270 } 271 272 static int 273 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, 274 int dirbuflen, off_t *off, char **cpos, int *len, 275 int *eofflag, struct thread *td) 276 { 277 int error, reclen; 278 struct uio uio; 279 struct iovec iov; 280 struct dirent *dp; 281 282 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 283 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 284 285 if (*len == 0) { 286 iov.iov_base = dirbuf; 287 iov.iov_len = dirbuflen; 288 289 uio.uio_iov = &iov; 290 uio.uio_iovcnt = 1; 291 uio.uio_offset = *off; 292 uio.uio_resid = dirbuflen; 293 uio.uio_segflg = UIO_SYSSPACE; 294 uio.uio_rw = UIO_READ; 295 uio.uio_td = td; 296 297 *eofflag = 0; 298 299 #ifdef MAC 300 error = mac_vnode_check_readdir(td->td_ucred, vp); 301 if (error == 0) 302 #endif 303 error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, 304 NULL, NULL); 305 if (error) 306 return (error); 307 308 *off = uio.uio_offset; 309 310 *cpos = dirbuf; 311 *len = (dirbuflen - uio.uio_resid); 312 313 if (*len == 0) 314 return (ENOENT); 315 } 316 317 dp = (struct dirent *)(*cpos); 318 reclen = dp->d_reclen; 319 *dpp = dp; 320 321 /* check for malformed directory.. */ 322 if (reclen < DIRENT_MINSIZE) 323 return (EINVAL); 324 325 *cpos += reclen; 326 *len -= reclen; 327 328 return (0); 329 } 330 331 /* 332 * Check if a named file exists in a given directory vnode. 333 */ 334 static int 335 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) 336 { 337 char *dirbuf, *cpos; 338 int error, eofflag, dirbuflen, len, found; 339 off_t off; 340 struct dirent *dp; 341 struct vattr va; 342 343 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 344 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 345 346 found = 0; 347 348 error = VOP_GETATTR(vp, &va, td->td_ucred); 349 if (error) 350 return (found); 351 352 dirbuflen = DEV_BSIZE; 353 if (dirbuflen < va.va_blocksize) 354 dirbuflen = va.va_blocksize; 355 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 356 357 off = 0; 358 len = 0; 359 do { 360 error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, 361 &cpos, &len, &eofflag, td); 362 if (error) 363 goto out; 364 365 if (dp->d_type != DT_WHT && dp->d_fileno != 0 && 366 strcmp(dp->d_name, dirname) == 0) { 367 found = 1; 368 goto out; 369 } 370 } while (len > 0 || !eofflag); 371 372 out: 373 free(dirbuf, M_TEMP); 374 return (found); 375 } 376 377 int 378 vop_stdaccess(struct vop_access_args *ap) 379 { 380 381 KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | 382 VAPPEND)) == 0, ("invalid bit in accmode")); 383 384 return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td)); 385 } 386 387 int 388 vop_stdaccessx(struct vop_accessx_args *ap) 389 { 390 int error; 391 accmode_t accmode = ap->a_accmode; 392 393 error = vfs_unixify_accmode(&accmode); 394 if (error != 0) 395 return (error); 396 397 if (accmode == 0) 398 return (0); 399 400 return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td)); 401 } 402 403 /* 404 * Advisory record locking support 405 */ 406 int 407 vop_stdadvlock(struct vop_advlock_args *ap) 408 { 409 struct vnode *vp; 410 struct vattr vattr; 411 int error; 412 413 vp = ap->a_vp; 414 if (ap->a_fl->l_whence == SEEK_END) { 415 /* 416 * The NFSv4 server must avoid doing a vn_lock() here, since it 417 * can deadlock the nfsd threads, due to a LOR. Fortunately 418 * the NFSv4 server always uses SEEK_SET and this code is 419 * only required for the SEEK_END case. 420 */ 421 vn_lock(vp, LK_SHARED | LK_RETRY); 422 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 423 VOP_UNLOCK(vp, 0); 424 if (error) 425 return (error); 426 } else 427 vattr.va_size = 0; 428 429 return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); 430 } 431 432 int 433 vop_stdadvlockasync(struct vop_advlockasync_args *ap) 434 { 435 struct vnode *vp; 436 struct vattr vattr; 437 int error; 438 439 vp = ap->a_vp; 440 if (ap->a_fl->l_whence == SEEK_END) { 441 /* The size argument is only needed for SEEK_END. */ 442 vn_lock(vp, LK_SHARED | LK_RETRY); 443 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 444 VOP_UNLOCK(vp, 0); 445 if (error) 446 return (error); 447 } else 448 vattr.va_size = 0; 449 450 return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); 451 } 452 453 int 454 vop_stdadvlockpurge(struct vop_advlockpurge_args *ap) 455 { 456 struct vnode *vp; 457 458 vp = ap->a_vp; 459 lf_purgelocks(vp, &vp->v_lockf); 460 return (0); 461 } 462 463 /* 464 * vop_stdpathconf: 465 * 466 * Standard implementation of POSIX pathconf, to get information about limits 467 * for a filesystem. 468 * Override per filesystem for the case where the filesystem has smaller 469 * limits. 470 */ 471 int 472 vop_stdpathconf(ap) 473 struct vop_pathconf_args /* { 474 struct vnode *a_vp; 475 int a_name; 476 int *a_retval; 477 } */ *ap; 478 { 479 480 switch (ap->a_name) { 481 case _PC_ASYNC_IO: 482 *ap->a_retval = _POSIX_ASYNCHRONOUS_IO; 483 return (0); 484 case _PC_PATH_MAX: 485 *ap->a_retval = PATH_MAX; 486 return (0); 487 case _PC_ACL_EXTENDED: 488 case _PC_ACL_NFS4: 489 case _PC_CAP_PRESENT: 490 case _PC_INF_PRESENT: 491 case _PC_MAC_PRESENT: 492 *ap->a_retval = 0; 493 return (0); 494 default: 495 return (EINVAL); 496 } 497 /* NOTREACHED */ 498 } 499 500 /* 501 * Standard lock, unlock and islocked functions. 502 */ 503 int 504 vop_stdlock(ap) 505 struct vop_lock1_args /* { 506 struct vnode *a_vp; 507 int a_flags; 508 char *file; 509 int line; 510 } */ *ap; 511 { 512 struct vnode *vp = ap->a_vp; 513 struct mtx *ilk; 514 515 ilk = VI_MTX(vp); 516 return (lockmgr_lock_fast_path(vp->v_vnlock, ap->a_flags, 517 &ilk->lock_object, ap->a_file, ap->a_line)); 518 } 519 520 /* See above. */ 521 int 522 vop_stdunlock(ap) 523 struct vop_unlock_args /* { 524 struct vnode *a_vp; 525 int a_flags; 526 } */ *ap; 527 { 528 struct vnode *vp = ap->a_vp; 529 struct mtx *ilk; 530 531 ilk = VI_MTX(vp); 532 return (lockmgr_unlock_fast_path(vp->v_vnlock, ap->a_flags, 533 &ilk->lock_object)); 534 } 535 536 /* See above. */ 537 int 538 vop_stdislocked(ap) 539 struct vop_islocked_args /* { 540 struct vnode *a_vp; 541 } */ *ap; 542 { 543 544 return (lockstatus(ap->a_vp->v_vnlock)); 545 } 546 547 /* 548 * Variants of the above set. 549 * 550 * Differences are: 551 * - shared locking disablement is not supported 552 * - v_vnlock pointer is not honored 553 */ 554 int 555 vop_lock(ap) 556 struct vop_lock1_args /* { 557 struct vnode *a_vp; 558 int a_flags; 559 char *file; 560 int line; 561 } */ *ap; 562 { 563 struct vnode *vp = ap->a_vp; 564 int flags = ap->a_flags; 565 struct mtx *ilk; 566 567 MPASS(vp->v_vnlock == &vp->v_lock); 568 569 if (__predict_false((flags & ~(LK_TYPE_MASK | LK_NODDLKTREAT | LK_RETRY)) != 0)) 570 goto other; 571 572 switch (flags & LK_TYPE_MASK) { 573 case LK_SHARED: 574 return (lockmgr_slock(&vp->v_lock, flags, ap->a_file, ap->a_line)); 575 case LK_EXCLUSIVE: 576 return (lockmgr_xlock(&vp->v_lock, flags, ap->a_file, ap->a_line)); 577 } 578 other: 579 ilk = VI_MTX(vp); 580 return (lockmgr_lock_fast_path(&vp->v_lock, flags, 581 &ilk->lock_object, ap->a_file, ap->a_line)); 582 } 583 584 int 585 vop_unlock(ap) 586 struct vop_unlock_args /* { 587 struct vnode *a_vp; 588 int a_flags; 589 } */ *ap; 590 { 591 struct vnode *vp = ap->a_vp; 592 593 MPASS(vp->v_vnlock == &vp->v_lock); 594 MPASS(ap->a_flags == 0); 595 596 return (lockmgr_unlock(&vp->v_lock)); 597 } 598 599 int 600 vop_islocked(ap) 601 struct vop_islocked_args /* { 602 struct vnode *a_vp; 603 } */ *ap; 604 { 605 struct vnode *vp = ap->a_vp; 606 607 MPASS(vp->v_vnlock == &vp->v_lock); 608 609 return (lockstatus(&vp->v_lock)); 610 } 611 612 /* 613 * Return true for select/poll. 614 */ 615 int 616 vop_nopoll(ap) 617 struct vop_poll_args /* { 618 struct vnode *a_vp; 619 int a_events; 620 struct ucred *a_cred; 621 struct thread *a_td; 622 } */ *ap; 623 { 624 625 return (poll_no_poll(ap->a_events)); 626 } 627 628 /* 629 * Implement poll for local filesystems that support it. 630 */ 631 int 632 vop_stdpoll(ap) 633 struct vop_poll_args /* { 634 struct vnode *a_vp; 635 int a_events; 636 struct ucred *a_cred; 637 struct thread *a_td; 638 } */ *ap; 639 { 640 if (ap->a_events & ~POLLSTANDARD) 641 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 642 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 643 } 644 645 /* 646 * Return our mount point, as we will take charge of the writes. 647 */ 648 int 649 vop_stdgetwritemount(ap) 650 struct vop_getwritemount_args /* { 651 struct vnode *a_vp; 652 struct mount **a_mpp; 653 } */ *ap; 654 { 655 struct mount *mp; 656 struct vnode *vp; 657 658 /* 659 * Note that having a reference does not prevent forced unmount from 660 * setting ->v_mount to NULL after the lock gets released. This is of 661 * no consequence for typical consumers (most notably vn_start_write) 662 * since in this case the vnode is VIRF_DOOMED. Unmount might have 663 * progressed far enough that its completion is only delayed by the 664 * reference obtained here. The consumer only needs to concern itself 665 * with releasing it. 666 */ 667 vp = ap->a_vp; 668 mp = vp->v_mount; 669 if (mp == NULL) { 670 *(ap->a_mpp) = NULL; 671 return (0); 672 } 673 if (vfs_op_thread_enter(mp)) { 674 if (mp == vp->v_mount) { 675 vfs_mp_count_add_pcpu(mp, ref, 1); 676 vfs_op_thread_exit(mp); 677 } else { 678 vfs_op_thread_exit(mp); 679 mp = NULL; 680 } 681 } else { 682 MNT_ILOCK(mp); 683 if (mp == vp->v_mount) { 684 MNT_REF(mp); 685 MNT_IUNLOCK(mp); 686 } else { 687 MNT_IUNLOCK(mp); 688 mp = NULL; 689 } 690 } 691 *(ap->a_mpp) = mp; 692 return (0); 693 } 694 695 /* 696 * If the file system doesn't implement VOP_BMAP, then return sensible defaults: 697 * - Return the vnode's bufobj instead of any underlying device's bufobj 698 * - Calculate the physical block number as if there were equal size 699 * consecutive blocks, but 700 * - Report no contiguous runs of blocks. 701 */ 702 int 703 vop_stdbmap(ap) 704 struct vop_bmap_args /* { 705 struct vnode *a_vp; 706 daddr_t a_bn; 707 struct bufobj **a_bop; 708 daddr_t *a_bnp; 709 int *a_runp; 710 int *a_runb; 711 } */ *ap; 712 { 713 714 if (ap->a_bop != NULL) 715 *ap->a_bop = &ap->a_vp->v_bufobj; 716 if (ap->a_bnp != NULL) 717 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 718 if (ap->a_runp != NULL) 719 *ap->a_runp = 0; 720 if (ap->a_runb != NULL) 721 *ap->a_runb = 0; 722 return (0); 723 } 724 725 int 726 vop_stdfsync(ap) 727 struct vop_fsync_args /* { 728 struct vnode *a_vp; 729 int a_waitfor; 730 struct thread *a_td; 731 } */ *ap; 732 { 733 734 return (vn_fsync_buf(ap->a_vp, ap->a_waitfor)); 735 } 736 737 static int 738 vop_stdfdatasync(struct vop_fdatasync_args *ap) 739 { 740 741 return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td)); 742 } 743 744 int 745 vop_stdfdatasync_buf(struct vop_fdatasync_args *ap) 746 { 747 748 return (vn_fsync_buf(ap->a_vp, MNT_WAIT)); 749 } 750 751 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 752 int 753 vop_stdgetpages(ap) 754 struct vop_getpages_args /* { 755 struct vnode *a_vp; 756 vm_page_t *a_m; 757 int a_count; 758 int *a_rbehind; 759 int *a_rahead; 760 } */ *ap; 761 { 762 763 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 764 ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL); 765 } 766 767 static int 768 vop_stdgetpages_async(struct vop_getpages_async_args *ap) 769 { 770 int error; 771 772 error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 773 ap->a_rahead); 774 ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error); 775 return (error); 776 } 777 778 int 779 vop_stdkqfilter(struct vop_kqfilter_args *ap) 780 { 781 return vfs_kqfilter(ap); 782 } 783 784 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 785 int 786 vop_stdputpages(ap) 787 struct vop_putpages_args /* { 788 struct vnode *a_vp; 789 vm_page_t *a_m; 790 int a_count; 791 int a_sync; 792 int *a_rtvals; 793 } */ *ap; 794 { 795 796 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 797 ap->a_sync, ap->a_rtvals); 798 } 799 800 int 801 vop_stdvptofh(struct vop_vptofh_args *ap) 802 { 803 return (EOPNOTSUPP); 804 } 805 806 int 807 vop_stdvptocnp(struct vop_vptocnp_args *ap) 808 { 809 struct vnode *vp = ap->a_vp; 810 struct vnode **dvp = ap->a_vpp; 811 struct ucred *cred = ap->a_cred; 812 char *buf = ap->a_buf; 813 int *buflen = ap->a_buflen; 814 char *dirbuf, *cpos; 815 int i, error, eofflag, dirbuflen, flags, locked, len, covered; 816 off_t off; 817 ino_t fileno; 818 struct vattr va; 819 struct nameidata nd; 820 struct thread *td; 821 struct dirent *dp; 822 struct vnode *mvp; 823 824 i = *buflen; 825 error = 0; 826 covered = 0; 827 td = curthread; 828 829 if (vp->v_type != VDIR) 830 return (ENOENT); 831 832 error = VOP_GETATTR(vp, &va, cred); 833 if (error) 834 return (error); 835 836 VREF(vp); 837 locked = VOP_ISLOCKED(vp); 838 VOP_UNLOCK(vp, 0); 839 NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, 840 "..", vp, td); 841 flags = FREAD; 842 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL); 843 if (error) { 844 vn_lock(vp, locked | LK_RETRY); 845 return (error); 846 } 847 NDFREE(&nd, NDF_ONLY_PNBUF); 848 849 mvp = *dvp = nd.ni_vp; 850 851 if (vp->v_mount != (*dvp)->v_mount && 852 ((*dvp)->v_vflag & VV_ROOT) && 853 ((*dvp)->v_mount->mnt_flag & MNT_UNION)) { 854 *dvp = (*dvp)->v_mount->mnt_vnodecovered; 855 VREF(mvp); 856 VOP_UNLOCK(mvp, 0); 857 vn_close(mvp, FREAD, cred, td); 858 VREF(*dvp); 859 vn_lock(*dvp, LK_SHARED | LK_RETRY); 860 covered = 1; 861 } 862 863 fileno = va.va_fileid; 864 865 dirbuflen = DEV_BSIZE; 866 if (dirbuflen < va.va_blocksize) 867 dirbuflen = va.va_blocksize; 868 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 869 870 if ((*dvp)->v_type != VDIR) { 871 error = ENOENT; 872 goto out; 873 } 874 875 off = 0; 876 len = 0; 877 do { 878 /* call VOP_READDIR of parent */ 879 error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, 880 &cpos, &len, &eofflag, td); 881 if (error) 882 goto out; 883 884 if ((dp->d_type != DT_WHT) && 885 (dp->d_fileno == fileno)) { 886 if (covered) { 887 VOP_UNLOCK(*dvp, 0); 888 vn_lock(mvp, LK_SHARED | LK_RETRY); 889 if (dirent_exists(mvp, dp->d_name, td)) { 890 error = ENOENT; 891 VOP_UNLOCK(mvp, 0); 892 vn_lock(*dvp, LK_SHARED | LK_RETRY); 893 goto out; 894 } 895 VOP_UNLOCK(mvp, 0); 896 vn_lock(*dvp, LK_SHARED | LK_RETRY); 897 } 898 i -= dp->d_namlen; 899 900 if (i < 0) { 901 error = ENOMEM; 902 goto out; 903 } 904 if (dp->d_namlen == 1 && dp->d_name[0] == '.') { 905 error = ENOENT; 906 } else { 907 bcopy(dp->d_name, buf + i, dp->d_namlen); 908 error = 0; 909 } 910 goto out; 911 } 912 } while (len > 0 || !eofflag); 913 error = ENOENT; 914 915 out: 916 free(dirbuf, M_TEMP); 917 if (!error) { 918 *buflen = i; 919 vref(*dvp); 920 } 921 if (covered) { 922 vput(*dvp); 923 vrele(mvp); 924 } else { 925 VOP_UNLOCK(mvp, 0); 926 vn_close(mvp, FREAD, cred, td); 927 } 928 vn_lock(vp, locked | LK_RETRY); 929 return (error); 930 } 931 932 int 933 vop_stdallocate(struct vop_allocate_args *ap) 934 { 935 #ifdef __notyet__ 936 struct statfs *sfs; 937 off_t maxfilesize = 0; 938 #endif 939 struct iovec aiov; 940 struct vattr vattr, *vap; 941 struct uio auio; 942 off_t fsize, len, cur, offset; 943 uint8_t *buf; 944 struct thread *td; 945 struct vnode *vp; 946 size_t iosize; 947 int error; 948 949 buf = NULL; 950 error = 0; 951 td = curthread; 952 vap = &vattr; 953 vp = ap->a_vp; 954 len = *ap->a_len; 955 offset = *ap->a_offset; 956 957 error = VOP_GETATTR(vp, vap, td->td_ucred); 958 if (error != 0) 959 goto out; 960 fsize = vap->va_size; 961 iosize = vap->va_blocksize; 962 if (iosize == 0) 963 iosize = BLKDEV_IOSIZE; 964 if (iosize > MAXPHYS) 965 iosize = MAXPHYS; 966 buf = malloc(iosize, M_TEMP, M_WAITOK); 967 968 #ifdef __notyet__ 969 /* 970 * Check if the filesystem sets f_maxfilesize; if not use 971 * VOP_SETATTR to perform the check. 972 */ 973 sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 974 error = VFS_STATFS(vp->v_mount, sfs, td); 975 if (error == 0) 976 maxfilesize = sfs->f_maxfilesize; 977 free(sfs, M_STATFS); 978 if (error != 0) 979 goto out; 980 if (maxfilesize) { 981 if (offset > maxfilesize || len > maxfilesize || 982 offset + len > maxfilesize) { 983 error = EFBIG; 984 goto out; 985 } 986 } else 987 #endif 988 if (offset + len > vap->va_size) { 989 /* 990 * Test offset + len against the filesystem's maxfilesize. 991 */ 992 VATTR_NULL(vap); 993 vap->va_size = offset + len; 994 error = VOP_SETATTR(vp, vap, td->td_ucred); 995 if (error != 0) 996 goto out; 997 VATTR_NULL(vap); 998 vap->va_size = fsize; 999 error = VOP_SETATTR(vp, vap, td->td_ucred); 1000 if (error != 0) 1001 goto out; 1002 } 1003 1004 for (;;) { 1005 /* 1006 * Read and write back anything below the nominal file 1007 * size. There's currently no way outside the filesystem 1008 * to know whether this area is sparse or not. 1009 */ 1010 cur = iosize; 1011 if ((offset % iosize) != 0) 1012 cur -= (offset % iosize); 1013 if (cur > len) 1014 cur = len; 1015 if (offset < fsize) { 1016 aiov.iov_base = buf; 1017 aiov.iov_len = cur; 1018 auio.uio_iov = &aiov; 1019 auio.uio_iovcnt = 1; 1020 auio.uio_offset = offset; 1021 auio.uio_resid = cur; 1022 auio.uio_segflg = UIO_SYSSPACE; 1023 auio.uio_rw = UIO_READ; 1024 auio.uio_td = td; 1025 error = VOP_READ(vp, &auio, 0, td->td_ucred); 1026 if (error != 0) 1027 break; 1028 if (auio.uio_resid > 0) { 1029 bzero(buf + cur - auio.uio_resid, 1030 auio.uio_resid); 1031 } 1032 } else { 1033 bzero(buf, cur); 1034 } 1035 1036 aiov.iov_base = buf; 1037 aiov.iov_len = cur; 1038 auio.uio_iov = &aiov; 1039 auio.uio_iovcnt = 1; 1040 auio.uio_offset = offset; 1041 auio.uio_resid = cur; 1042 auio.uio_segflg = UIO_SYSSPACE; 1043 auio.uio_rw = UIO_WRITE; 1044 auio.uio_td = td; 1045 1046 error = VOP_WRITE(vp, &auio, 0, td->td_ucred); 1047 if (error != 0) 1048 break; 1049 1050 len -= cur; 1051 offset += cur; 1052 if (len == 0) 1053 break; 1054 if (should_yield()) 1055 break; 1056 } 1057 1058 out: 1059 *ap->a_len = len; 1060 *ap->a_offset = offset; 1061 free(buf, M_TEMP); 1062 return (error); 1063 } 1064 1065 int 1066 vop_stdadvise(struct vop_advise_args *ap) 1067 { 1068 struct vnode *vp; 1069 struct bufobj *bo; 1070 daddr_t startn, endn; 1071 off_t bstart, bend, start, end; 1072 int bsize, error; 1073 1074 vp = ap->a_vp; 1075 switch (ap->a_advice) { 1076 case POSIX_FADV_WILLNEED: 1077 /* 1078 * Do nothing for now. Filesystems should provide a 1079 * custom method which starts an asynchronous read of 1080 * the requested region. 1081 */ 1082 error = 0; 1083 break; 1084 case POSIX_FADV_DONTNEED: 1085 error = 0; 1086 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1087 if (VN_IS_DOOMED(vp)) { 1088 VOP_UNLOCK(vp, 0); 1089 break; 1090 } 1091 1092 /* 1093 * Round to block boundaries (and later possibly further to 1094 * page boundaries). Applications cannot reasonably be aware 1095 * of the boundaries, and the rounding must be to expand at 1096 * both extremities to cover enough. It still doesn't cover 1097 * read-ahead. For partial blocks, this gives unnecessary 1098 * discarding of buffers but is efficient enough since the 1099 * pages usually remain in VMIO for some time. 1100 */ 1101 bsize = vp->v_bufobj.bo_bsize; 1102 bstart = rounddown(ap->a_start, bsize); 1103 bend = roundup(ap->a_end, bsize); 1104 1105 /* 1106 * Deactivate pages in the specified range from the backing VM 1107 * object. Pages that are resident in the buffer cache will 1108 * remain wired until their corresponding buffers are released 1109 * below. 1110 */ 1111 if (vp->v_object != NULL) { 1112 start = trunc_page(bstart); 1113 end = round_page(bend); 1114 VM_OBJECT_RLOCK(vp->v_object); 1115 vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start), 1116 OFF_TO_IDX(end)); 1117 VM_OBJECT_RUNLOCK(vp->v_object); 1118 } 1119 1120 bo = &vp->v_bufobj; 1121 BO_RLOCK(bo); 1122 startn = bstart / bsize; 1123 endn = bend / bsize; 1124 error = bnoreuselist(&bo->bo_clean, bo, startn, endn); 1125 if (error == 0) 1126 error = bnoreuselist(&bo->bo_dirty, bo, startn, endn); 1127 BO_RUNLOCK(bo); 1128 VOP_UNLOCK(vp, 0); 1129 break; 1130 default: 1131 error = EINVAL; 1132 break; 1133 } 1134 return (error); 1135 } 1136 1137 int 1138 vop_stdunp_bind(struct vop_unp_bind_args *ap) 1139 { 1140 1141 ap->a_vp->v_unpcb = ap->a_unpcb; 1142 return (0); 1143 } 1144 1145 int 1146 vop_stdunp_connect(struct vop_unp_connect_args *ap) 1147 { 1148 1149 *ap->a_unpcb = ap->a_vp->v_unpcb; 1150 return (0); 1151 } 1152 1153 int 1154 vop_stdunp_detach(struct vop_unp_detach_args *ap) 1155 { 1156 1157 ap->a_vp->v_unpcb = NULL; 1158 return (0); 1159 } 1160 1161 static int 1162 vop_stdis_text(struct vop_is_text_args *ap) 1163 { 1164 1165 return (ap->a_vp->v_writecount < 0); 1166 } 1167 1168 int 1169 vop_stdset_text(struct vop_set_text_args *ap) 1170 { 1171 struct vnode *vp; 1172 struct mount *mp; 1173 int error; 1174 1175 vp = ap->a_vp; 1176 VI_LOCK(vp); 1177 if (vp->v_writecount > 0) { 1178 error = ETXTBSY; 1179 } else { 1180 /* 1181 * If requested by fs, keep a use reference to the 1182 * vnode until the last text reference is released. 1183 */ 1184 mp = vp->v_mount; 1185 if (mp != NULL && (mp->mnt_kern_flag & MNTK_TEXT_REFS) != 0 && 1186 vp->v_writecount == 0) { 1187 vp->v_iflag |= VI_TEXT_REF; 1188 vrefl(vp); 1189 } 1190 1191 vp->v_writecount--; 1192 error = 0; 1193 } 1194 VI_UNLOCK(vp); 1195 return (error); 1196 } 1197 1198 static int 1199 vop_stdunset_text(struct vop_unset_text_args *ap) 1200 { 1201 struct vnode *vp; 1202 int error; 1203 bool last; 1204 1205 vp = ap->a_vp; 1206 last = false; 1207 VI_LOCK(vp); 1208 if (vp->v_writecount < 0) { 1209 if ((vp->v_iflag & VI_TEXT_REF) != 0 && 1210 vp->v_writecount == -1) { 1211 last = true; 1212 vp->v_iflag &= ~VI_TEXT_REF; 1213 } 1214 vp->v_writecount++; 1215 error = 0; 1216 } else { 1217 error = EINVAL; 1218 } 1219 VI_UNLOCK(vp); 1220 if (last) 1221 vunref(vp); 1222 return (error); 1223 } 1224 1225 static int 1226 vop_stdadd_writecount(struct vop_add_writecount_args *ap) 1227 { 1228 struct vnode *vp; 1229 int error; 1230 1231 vp = ap->a_vp; 1232 VI_LOCK_FLAGS(vp, MTX_DUPOK); 1233 if (vp->v_writecount < 0) { 1234 error = ETXTBSY; 1235 } else { 1236 VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp, 1237 ("neg writecount increment %d", ap->a_inc)); 1238 vp->v_writecount += ap->a_inc; 1239 error = 0; 1240 } 1241 VI_UNLOCK(vp); 1242 return (error); 1243 } 1244 1245 int 1246 vop_stdneed_inactive(struct vop_need_inactive_args *ap) 1247 { 1248 1249 return (1); 1250 } 1251 1252 int 1253 vop_stdioctl(struct vop_ioctl_args *ap) 1254 { 1255 struct vnode *vp; 1256 struct vattr va; 1257 off_t *offp; 1258 int error; 1259 1260 switch (ap->a_command) { 1261 case FIOSEEKDATA: 1262 case FIOSEEKHOLE: 1263 vp = ap->a_vp; 1264 error = vn_lock(vp, LK_SHARED); 1265 if (error != 0) 1266 return (EBADF); 1267 if (vp->v_type == VREG) 1268 error = VOP_GETATTR(vp, &va, ap->a_cred); 1269 else 1270 error = ENOTTY; 1271 if (error == 0) { 1272 offp = ap->a_data; 1273 if (*offp < 0 || *offp >= va.va_size) 1274 error = ENXIO; 1275 else if (ap->a_command == FIOSEEKHOLE) 1276 *offp = va.va_size; 1277 } 1278 VOP_UNLOCK(vp, 0); 1279 break; 1280 default: 1281 error = ENOTTY; 1282 break; 1283 } 1284 return (error); 1285 } 1286 1287 /* 1288 * vfs default ops 1289 * used to fill the vfs function table to get reasonable default return values. 1290 */ 1291 int 1292 vfs_stdroot (mp, flags, vpp) 1293 struct mount *mp; 1294 int flags; 1295 struct vnode **vpp; 1296 { 1297 1298 return (EOPNOTSUPP); 1299 } 1300 1301 int 1302 vfs_stdstatfs (mp, sbp) 1303 struct mount *mp; 1304 struct statfs *sbp; 1305 { 1306 1307 return (EOPNOTSUPP); 1308 } 1309 1310 int 1311 vfs_stdquotactl (mp, cmds, uid, arg) 1312 struct mount *mp; 1313 int cmds; 1314 uid_t uid; 1315 void *arg; 1316 { 1317 1318 return (EOPNOTSUPP); 1319 } 1320 1321 int 1322 vfs_stdsync(mp, waitfor) 1323 struct mount *mp; 1324 int waitfor; 1325 { 1326 struct vnode *vp, *mvp; 1327 struct thread *td; 1328 int error, lockreq, allerror = 0; 1329 1330 td = curthread; 1331 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 1332 if (waitfor != MNT_WAIT) 1333 lockreq |= LK_NOWAIT; 1334 /* 1335 * Force stale buffer cache information to be flushed. 1336 */ 1337 loop: 1338 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1339 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { 1340 VI_UNLOCK(vp); 1341 continue; 1342 } 1343 if ((error = vget(vp, lockreq, td)) != 0) { 1344 if (error == ENOENT) { 1345 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1346 goto loop; 1347 } 1348 continue; 1349 } 1350 error = VOP_FSYNC(vp, waitfor, td); 1351 if (error) 1352 allerror = error; 1353 vput(vp); 1354 } 1355 return (allerror); 1356 } 1357 1358 int 1359 vfs_stdnosync (mp, waitfor) 1360 struct mount *mp; 1361 int waitfor; 1362 { 1363 1364 return (0); 1365 } 1366 1367 static int 1368 vop_stdcopy_file_range(struct vop_copy_file_range_args *ap) 1369 { 1370 int error; 1371 1372 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 1373 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred, 1374 ap->a_outcred, ap->a_fsizetd); 1375 return (error); 1376 } 1377 1378 int 1379 vfs_stdvget (mp, ino, flags, vpp) 1380 struct mount *mp; 1381 ino_t ino; 1382 int flags; 1383 struct vnode **vpp; 1384 { 1385 1386 return (EOPNOTSUPP); 1387 } 1388 1389 int 1390 vfs_stdfhtovp (mp, fhp, flags, vpp) 1391 struct mount *mp; 1392 struct fid *fhp; 1393 int flags; 1394 struct vnode **vpp; 1395 { 1396 1397 return (EOPNOTSUPP); 1398 } 1399 1400 int 1401 vfs_stdinit (vfsp) 1402 struct vfsconf *vfsp; 1403 { 1404 1405 return (0); 1406 } 1407 1408 int 1409 vfs_stduninit (vfsp) 1410 struct vfsconf *vfsp; 1411 { 1412 1413 return(0); 1414 } 1415 1416 int 1417 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname) 1418 struct mount *mp; 1419 int cmd; 1420 struct vnode *filename_vp; 1421 int attrnamespace; 1422 const char *attrname; 1423 { 1424 1425 if (filename_vp != NULL) 1426 VOP_UNLOCK(filename_vp, 0); 1427 return (EOPNOTSUPP); 1428 } 1429 1430 int 1431 vfs_stdsysctl(mp, op, req) 1432 struct mount *mp; 1433 fsctlop_t op; 1434 struct sysctl_req *req; 1435 { 1436 1437 return (EOPNOTSUPP); 1438 } 1439 1440 static vop_bypass_t * 1441 bp_by_off(struct vop_vector *vop, struct vop_generic_args *a) 1442 { 1443 1444 return (*(vop_bypass_t **)((char *)vop + a->a_desc->vdesc_vop_offset)); 1445 } 1446 1447 int 1448 vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a) 1449 { 1450 vop_bypass_t *bp; 1451 int prev_stops, rc; 1452 1453 for (; vop != NULL; vop = vop->vop_default) { 1454 bp = bp_by_off(vop, a); 1455 if (bp != NULL) 1456 break; 1457 1458 /* 1459 * Bypass is not really supported. It is done for 1460 * fallback to unimplemented vops in the default 1461 * vector. 1462 */ 1463 bp = vop->vop_bypass; 1464 if (bp != NULL) 1465 break; 1466 } 1467 MPASS(bp != NULL); 1468 1469 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 1470 rc = bp(a); 1471 sigallowstop(prev_stops); 1472 return (rc); 1473 } 1474