1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed 8 * to Berkeley by John Heidemann of the UCLA Ficus project. 9 * 10 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/bio.h> 43 #include <sys/buf.h> 44 #include <sys/conf.h> 45 #include <sys/event.h> 46 #include <sys/kernel.h> 47 #include <sys/limits.h> 48 #include <sys/lock.h> 49 #include <sys/lockf.h> 50 #include <sys/malloc.h> 51 #include <sys/mount.h> 52 #include <sys/namei.h> 53 #include <sys/rwlock.h> 54 #include <sys/fcntl.h> 55 #include <sys/unistd.h> 56 #include <sys/vnode.h> 57 #include <sys/dirent.h> 58 #include <sys/poll.h> 59 60 #include <security/mac/mac_framework.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_object.h> 64 #include <vm/vm_extern.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_pager.h> 69 #include <vm/vnode_pager.h> 70 71 static int vop_nolookup(struct vop_lookup_args *); 72 static int vop_norename(struct vop_rename_args *); 73 static int vop_nostrategy(struct vop_strategy_args *); 74 static int get_next_dirent(struct vnode *vp, struct dirent **dpp, 75 char *dirbuf, int dirbuflen, off_t *off, 76 char **cpos, int *len, int *eofflag, 77 struct thread *td); 78 static int dirent_exists(struct vnode *vp, const char *dirname, 79 struct thread *td); 80 81 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) 82 83 static int vop_stdis_text(struct vop_is_text_args *ap); 84 static int vop_stdset_text(struct vop_set_text_args *ap); 85 static int vop_stdunset_text(struct vop_unset_text_args *ap); 86 static int vop_stdget_writecount(struct vop_get_writecount_args *ap); 87 static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); 88 static int vop_stdfdatasync(struct vop_fdatasync_args *ap); 89 static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); 90 91 /* 92 * This vnode table stores what we want to do if the filesystem doesn't 93 * implement a particular VOP. 94 * 95 * If there is no specific entry here, we will return EOPNOTSUPP. 96 * 97 * Note that every filesystem has to implement either vop_access 98 * or vop_accessx; failing to do so will result in immediate crash 99 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(), 100 * which calls vop_stdaccess() etc. 101 */ 102 103 struct vop_vector default_vnodeops = { 104 .vop_default = NULL, 105 .vop_bypass = VOP_EOPNOTSUPP, 106 107 .vop_access = vop_stdaccess, 108 .vop_accessx = vop_stdaccessx, 109 .vop_advise = vop_stdadvise, 110 .vop_advlock = vop_stdadvlock, 111 .vop_advlockasync = vop_stdadvlockasync, 112 .vop_advlockpurge = vop_stdadvlockpurge, 113 .vop_allocate = vop_stdallocate, 114 .vop_bmap = vop_stdbmap, 115 .vop_close = VOP_NULL, 116 .vop_fsync = VOP_NULL, 117 .vop_fdatasync = vop_stdfdatasync, 118 .vop_getpages = vop_stdgetpages, 119 .vop_getpages_async = vop_stdgetpages_async, 120 .vop_getwritemount = vop_stdgetwritemount, 121 .vop_inactive = VOP_NULL, 122 .vop_ioctl = VOP_ENOTTY, 123 .vop_kqfilter = vop_stdkqfilter, 124 .vop_islocked = vop_stdislocked, 125 .vop_lock1 = vop_stdlock, 126 .vop_lookup = vop_nolookup, 127 .vop_open = VOP_NULL, 128 .vop_pathconf = VOP_EINVAL, 129 .vop_poll = vop_nopoll, 130 .vop_putpages = vop_stdputpages, 131 .vop_readlink = VOP_EINVAL, 132 .vop_rename = vop_norename, 133 .vop_revoke = VOP_PANIC, 134 .vop_strategy = vop_nostrategy, 135 .vop_unlock = vop_stdunlock, 136 .vop_vptocnp = vop_stdvptocnp, 137 .vop_vptofh = vop_stdvptofh, 138 .vop_unp_bind = vop_stdunp_bind, 139 .vop_unp_connect = vop_stdunp_connect, 140 .vop_unp_detach = vop_stdunp_detach, 141 .vop_is_text = vop_stdis_text, 142 .vop_set_text = vop_stdset_text, 143 .vop_unset_text = vop_stdunset_text, 144 .vop_get_writecount = vop_stdget_writecount, 145 .vop_add_writecount = vop_stdadd_writecount, 146 }; 147 148 /* 149 * Series of placeholder functions for various error returns for 150 * VOPs. 151 */ 152 153 int 154 vop_eopnotsupp(struct vop_generic_args *ap) 155 { 156 /* 157 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 158 */ 159 160 return (EOPNOTSUPP); 161 } 162 163 int 164 vop_ebadf(struct vop_generic_args *ap) 165 { 166 167 return (EBADF); 168 } 169 170 int 171 vop_enotty(struct vop_generic_args *ap) 172 { 173 174 return (ENOTTY); 175 } 176 177 int 178 vop_einval(struct vop_generic_args *ap) 179 { 180 181 return (EINVAL); 182 } 183 184 int 185 vop_enoent(struct vop_generic_args *ap) 186 { 187 188 return (ENOENT); 189 } 190 191 int 192 vop_null(struct vop_generic_args *ap) 193 { 194 195 return (0); 196 } 197 198 /* 199 * Helper function to panic on some bad VOPs in some filesystems. 200 */ 201 int 202 vop_panic(struct vop_generic_args *ap) 203 { 204 205 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 206 } 207 208 /* 209 * vop_std<something> and vop_no<something> are default functions for use by 210 * filesystems that need the "default reasonable" implementation for a 211 * particular operation. 212 * 213 * The documentation for the operations they implement exists (if it exists) 214 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 215 */ 216 217 /* 218 * Default vop for filesystems that do not support name lookup 219 */ 220 static int 221 vop_nolookup(ap) 222 struct vop_lookup_args /* { 223 struct vnode *a_dvp; 224 struct vnode **a_vpp; 225 struct componentname *a_cnp; 226 } */ *ap; 227 { 228 229 *ap->a_vpp = NULL; 230 return (ENOTDIR); 231 } 232 233 /* 234 * vop_norename: 235 * 236 * Handle unlock and reference counting for arguments of vop_rename 237 * for filesystems that do not implement rename operation. 238 */ 239 static int 240 vop_norename(struct vop_rename_args *ap) 241 { 242 243 vop_rename_fail(ap); 244 return (EOPNOTSUPP); 245 } 246 247 /* 248 * vop_nostrategy: 249 * 250 * Strategy routine for VFS devices that have none. 251 * 252 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 253 * routine. Typically this is done for a BIO_READ strategy call. 254 * Typically B_INVAL is assumed to already be clear prior to a write 255 * and should not be cleared manually unless you just made the buffer 256 * invalid. BIO_ERROR should be cleared either way. 257 */ 258 259 static int 260 vop_nostrategy (struct vop_strategy_args *ap) 261 { 262 printf("No strategy for buffer at %p\n", ap->a_bp); 263 vn_printf(ap->a_vp, "vnode "); 264 ap->a_bp->b_ioflags |= BIO_ERROR; 265 ap->a_bp->b_error = EOPNOTSUPP; 266 bufdone(ap->a_bp); 267 return (EOPNOTSUPP); 268 } 269 270 static int 271 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, 272 int dirbuflen, off_t *off, char **cpos, int *len, 273 int *eofflag, struct thread *td) 274 { 275 int error, reclen; 276 struct uio uio; 277 struct iovec iov; 278 struct dirent *dp; 279 280 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 281 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 282 283 if (*len == 0) { 284 iov.iov_base = dirbuf; 285 iov.iov_len = dirbuflen; 286 287 uio.uio_iov = &iov; 288 uio.uio_iovcnt = 1; 289 uio.uio_offset = *off; 290 uio.uio_resid = dirbuflen; 291 uio.uio_segflg = UIO_SYSSPACE; 292 uio.uio_rw = UIO_READ; 293 uio.uio_td = td; 294 295 *eofflag = 0; 296 297 #ifdef MAC 298 error = mac_vnode_check_readdir(td->td_ucred, vp); 299 if (error == 0) 300 #endif 301 error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, 302 NULL, NULL); 303 if (error) 304 return (error); 305 306 *off = uio.uio_offset; 307 308 *cpos = dirbuf; 309 *len = (dirbuflen - uio.uio_resid); 310 311 if (*len == 0) 312 return (ENOENT); 313 } 314 315 dp = (struct dirent *)(*cpos); 316 reclen = dp->d_reclen; 317 *dpp = dp; 318 319 /* check for malformed directory.. */ 320 if (reclen < DIRENT_MINSIZE) 321 return (EINVAL); 322 323 *cpos += reclen; 324 *len -= reclen; 325 326 return (0); 327 } 328 329 /* 330 * Check if a named file exists in a given directory vnode. 331 */ 332 static int 333 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) 334 { 335 char *dirbuf, *cpos; 336 int error, eofflag, dirbuflen, len, found; 337 off_t off; 338 struct dirent *dp; 339 struct vattr va; 340 341 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 342 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 343 344 found = 0; 345 346 error = VOP_GETATTR(vp, &va, td->td_ucred); 347 if (error) 348 return (found); 349 350 dirbuflen = DEV_BSIZE; 351 if (dirbuflen < va.va_blocksize) 352 dirbuflen = va.va_blocksize; 353 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 354 355 off = 0; 356 len = 0; 357 do { 358 error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, 359 &cpos, &len, &eofflag, td); 360 if (error) 361 goto out; 362 363 if (dp->d_type != DT_WHT && dp->d_fileno != 0 && 364 strcmp(dp->d_name, dirname) == 0) { 365 found = 1; 366 goto out; 367 } 368 } while (len > 0 || !eofflag); 369 370 out: 371 free(dirbuf, M_TEMP); 372 return (found); 373 } 374 375 int 376 vop_stdaccess(struct vop_access_args *ap) 377 { 378 379 KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | 380 VAPPEND)) == 0, ("invalid bit in accmode")); 381 382 return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td)); 383 } 384 385 int 386 vop_stdaccessx(struct vop_accessx_args *ap) 387 { 388 int error; 389 accmode_t accmode = ap->a_accmode; 390 391 error = vfs_unixify_accmode(&accmode); 392 if (error != 0) 393 return (error); 394 395 if (accmode == 0) 396 return (0); 397 398 return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td)); 399 } 400 401 /* 402 * Advisory record locking support 403 */ 404 int 405 vop_stdadvlock(struct vop_advlock_args *ap) 406 { 407 struct vnode *vp; 408 struct vattr vattr; 409 int error; 410 411 vp = ap->a_vp; 412 if (ap->a_fl->l_whence == SEEK_END) { 413 /* 414 * The NFSv4 server must avoid doing a vn_lock() here, since it 415 * can deadlock the nfsd threads, due to a LOR. Fortunately 416 * the NFSv4 server always uses SEEK_SET and this code is 417 * only required for the SEEK_END case. 418 */ 419 vn_lock(vp, LK_SHARED | LK_RETRY); 420 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 421 VOP_UNLOCK(vp, 0); 422 if (error) 423 return (error); 424 } else 425 vattr.va_size = 0; 426 427 return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); 428 } 429 430 int 431 vop_stdadvlockasync(struct vop_advlockasync_args *ap) 432 { 433 struct vnode *vp; 434 struct vattr vattr; 435 int error; 436 437 vp = ap->a_vp; 438 if (ap->a_fl->l_whence == SEEK_END) { 439 /* The size argument is only needed for SEEK_END. */ 440 vn_lock(vp, LK_SHARED | LK_RETRY); 441 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 442 VOP_UNLOCK(vp, 0); 443 if (error) 444 return (error); 445 } else 446 vattr.va_size = 0; 447 448 return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); 449 } 450 451 int 452 vop_stdadvlockpurge(struct vop_advlockpurge_args *ap) 453 { 454 struct vnode *vp; 455 456 vp = ap->a_vp; 457 lf_purgelocks(vp, &vp->v_lockf); 458 return (0); 459 } 460 461 /* 462 * vop_stdpathconf: 463 * 464 * Standard implementation of POSIX pathconf, to get information about limits 465 * for a filesystem. 466 * Override per filesystem for the case where the filesystem has smaller 467 * limits. 468 */ 469 int 470 vop_stdpathconf(ap) 471 struct vop_pathconf_args /* { 472 struct vnode *a_vp; 473 int a_name; 474 int *a_retval; 475 } */ *ap; 476 { 477 478 switch (ap->a_name) { 479 case _PC_ASYNC_IO: 480 *ap->a_retval = _POSIX_ASYNCHRONOUS_IO; 481 return (0); 482 case _PC_PATH_MAX: 483 *ap->a_retval = PATH_MAX; 484 return (0); 485 default: 486 return (EINVAL); 487 } 488 /* NOTREACHED */ 489 } 490 491 /* 492 * Standard lock, unlock and islocked functions. 493 */ 494 int 495 vop_stdlock(ap) 496 struct vop_lock1_args /* { 497 struct vnode *a_vp; 498 int a_flags; 499 char *file; 500 int line; 501 } */ *ap; 502 { 503 struct vnode *vp = ap->a_vp; 504 struct mtx *ilk; 505 506 ilk = VI_MTX(vp); 507 return (lockmgr_lock_fast_path(vp->v_vnlock, ap->a_flags, 508 (ilk != NULL) ? &ilk->lock_object : NULL, ap->a_file, ap->a_line)); 509 } 510 511 /* See above. */ 512 int 513 vop_stdunlock(ap) 514 struct vop_unlock_args /* { 515 struct vnode *a_vp; 516 int a_flags; 517 } */ *ap; 518 { 519 struct vnode *vp = ap->a_vp; 520 struct mtx *ilk; 521 522 ilk = VI_MTX(vp); 523 return (lockmgr_unlock_fast_path(vp->v_vnlock, ap->a_flags, 524 (ilk != NULL) ? &ilk->lock_object : NULL)); 525 } 526 527 /* See above. */ 528 int 529 vop_stdislocked(ap) 530 struct vop_islocked_args /* { 531 struct vnode *a_vp; 532 } */ *ap; 533 { 534 535 return (lockstatus(ap->a_vp->v_vnlock)); 536 } 537 538 /* 539 * Return true for select/poll. 540 */ 541 int 542 vop_nopoll(ap) 543 struct vop_poll_args /* { 544 struct vnode *a_vp; 545 int a_events; 546 struct ucred *a_cred; 547 struct thread *a_td; 548 } */ *ap; 549 { 550 551 return (poll_no_poll(ap->a_events)); 552 } 553 554 /* 555 * Implement poll for local filesystems that support it. 556 */ 557 int 558 vop_stdpoll(ap) 559 struct vop_poll_args /* { 560 struct vnode *a_vp; 561 int a_events; 562 struct ucred *a_cred; 563 struct thread *a_td; 564 } */ *ap; 565 { 566 if (ap->a_events & ~POLLSTANDARD) 567 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 568 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 569 } 570 571 /* 572 * Return our mount point, as we will take charge of the writes. 573 */ 574 int 575 vop_stdgetwritemount(ap) 576 struct vop_getwritemount_args /* { 577 struct vnode *a_vp; 578 struct mount **a_mpp; 579 } */ *ap; 580 { 581 struct mount *mp; 582 583 /* 584 * XXX Since this is called unlocked we may be recycled while 585 * attempting to ref the mount. If this is the case or mountpoint 586 * will be set to NULL. We only have to prevent this call from 587 * returning with a ref to an incorrect mountpoint. It is not 588 * harmful to return with a ref to our previous mountpoint. 589 */ 590 mp = ap->a_vp->v_mount; 591 if (mp != NULL) { 592 vfs_ref(mp); 593 if (mp != ap->a_vp->v_mount) { 594 vfs_rel(mp); 595 mp = NULL; 596 } 597 } 598 *(ap->a_mpp) = mp; 599 return (0); 600 } 601 602 /* XXX Needs good comment and VOP_BMAP(9) manpage */ 603 int 604 vop_stdbmap(ap) 605 struct vop_bmap_args /* { 606 struct vnode *a_vp; 607 daddr_t a_bn; 608 struct bufobj **a_bop; 609 daddr_t *a_bnp; 610 int *a_runp; 611 int *a_runb; 612 } */ *ap; 613 { 614 615 if (ap->a_bop != NULL) 616 *ap->a_bop = &ap->a_vp->v_bufobj; 617 if (ap->a_bnp != NULL) 618 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 619 if (ap->a_runp != NULL) 620 *ap->a_runp = 0; 621 if (ap->a_runb != NULL) 622 *ap->a_runb = 0; 623 return (0); 624 } 625 626 int 627 vop_stdfsync(ap) 628 struct vop_fsync_args /* { 629 struct vnode *a_vp; 630 int a_waitfor; 631 struct thread *a_td; 632 } */ *ap; 633 { 634 struct vnode *vp; 635 struct buf *bp, *nbp; 636 struct bufobj *bo; 637 struct mount *mp; 638 int error, maxretry; 639 640 error = 0; 641 maxretry = 10000; /* large, arbitrarily chosen */ 642 vp = ap->a_vp; 643 mp = NULL; 644 if (vp->v_type == VCHR) { 645 VI_LOCK(vp); 646 mp = vp->v_rdev->si_mountpt; 647 VI_UNLOCK(vp); 648 } 649 bo = &vp->v_bufobj; 650 BO_LOCK(bo); 651 loop1: 652 /* 653 * MARK/SCAN initialization to avoid infinite loops. 654 */ 655 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { 656 bp->b_vflags &= ~BV_SCANNED; 657 bp->b_error = 0; 658 } 659 660 /* 661 * Flush all dirty buffers associated with a vnode. 662 */ 663 loop2: 664 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 665 if ((bp->b_vflags & BV_SCANNED) != 0) 666 continue; 667 bp->b_vflags |= BV_SCANNED; 668 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 669 if (ap->a_waitfor != MNT_WAIT) 670 continue; 671 if (BUF_LOCK(bp, 672 LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL, 673 BO_LOCKPTR(bo)) != 0) { 674 BO_LOCK(bo); 675 goto loop1; 676 } 677 BO_LOCK(bo); 678 } 679 BO_UNLOCK(bo); 680 KASSERT(bp->b_bufobj == bo, 681 ("bp %p wrong b_bufobj %p should be %p", 682 bp, bp->b_bufobj, bo)); 683 if ((bp->b_flags & B_DELWRI) == 0) 684 panic("fsync: not dirty"); 685 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { 686 vfs_bio_awrite(bp); 687 } else { 688 bremfree(bp); 689 bawrite(bp); 690 } 691 if (maxretry < 1000) 692 pause("dirty", hz < 1000 ? 1 : hz / 1000); 693 BO_LOCK(bo); 694 goto loop2; 695 } 696 697 /* 698 * If synchronous the caller expects us to completely resolve all 699 * dirty buffers in the system. Wait for in-progress I/O to 700 * complete (which could include background bitmap writes), then 701 * retry if dirty blocks still exist. 702 */ 703 if (ap->a_waitfor == MNT_WAIT) { 704 bufobj_wwait(bo, 0, 0); 705 if (bo->bo_dirty.bv_cnt > 0) { 706 /* 707 * If we are unable to write any of these buffers 708 * then we fail now rather than trying endlessly 709 * to write them out. 710 */ 711 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 712 if ((error = bp->b_error) != 0) 713 break; 714 if ((mp != NULL && mp->mnt_secondary_writes > 0) || 715 (error == 0 && --maxretry >= 0)) 716 goto loop1; 717 error = EAGAIN; 718 } 719 } 720 BO_UNLOCK(bo); 721 if (error == EAGAIN) 722 vn_printf(vp, "fsync: giving up on dirty "); 723 724 return (error); 725 } 726 727 static int 728 vop_stdfdatasync(struct vop_fdatasync_args *ap) 729 { 730 731 return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td)); 732 } 733 734 int 735 vop_stdfdatasync_buf(struct vop_fdatasync_args *ap) 736 { 737 struct vop_fsync_args apf; 738 739 apf.a_vp = ap->a_vp; 740 apf.a_waitfor = MNT_WAIT; 741 apf.a_td = ap->a_td; 742 return (vop_stdfsync(&apf)); 743 } 744 745 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 746 int 747 vop_stdgetpages(ap) 748 struct vop_getpages_args /* { 749 struct vnode *a_vp; 750 vm_page_t *a_m; 751 int a_count; 752 int *a_rbehind; 753 int *a_rahead; 754 } */ *ap; 755 { 756 757 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 758 ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL); 759 } 760 761 static int 762 vop_stdgetpages_async(struct vop_getpages_async_args *ap) 763 { 764 int error; 765 766 error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 767 ap->a_rahead); 768 ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error); 769 return (error); 770 } 771 772 int 773 vop_stdkqfilter(struct vop_kqfilter_args *ap) 774 { 775 return vfs_kqfilter(ap); 776 } 777 778 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 779 int 780 vop_stdputpages(ap) 781 struct vop_putpages_args /* { 782 struct vnode *a_vp; 783 vm_page_t *a_m; 784 int a_count; 785 int a_sync; 786 int *a_rtvals; 787 } */ *ap; 788 { 789 790 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 791 ap->a_sync, ap->a_rtvals); 792 } 793 794 int 795 vop_stdvptofh(struct vop_vptofh_args *ap) 796 { 797 return (EOPNOTSUPP); 798 } 799 800 int 801 vop_stdvptocnp(struct vop_vptocnp_args *ap) 802 { 803 struct vnode *vp = ap->a_vp; 804 struct vnode **dvp = ap->a_vpp; 805 struct ucred *cred = ap->a_cred; 806 char *buf = ap->a_buf; 807 int *buflen = ap->a_buflen; 808 char *dirbuf, *cpos; 809 int i, error, eofflag, dirbuflen, flags, locked, len, covered; 810 off_t off; 811 ino_t fileno; 812 struct vattr va; 813 struct nameidata nd; 814 struct thread *td; 815 struct dirent *dp; 816 struct vnode *mvp; 817 818 i = *buflen; 819 error = 0; 820 covered = 0; 821 td = curthread; 822 823 if (vp->v_type != VDIR) 824 return (ENOENT); 825 826 error = VOP_GETATTR(vp, &va, cred); 827 if (error) 828 return (error); 829 830 VREF(vp); 831 locked = VOP_ISLOCKED(vp); 832 VOP_UNLOCK(vp, 0); 833 NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, 834 "..", vp, td); 835 flags = FREAD; 836 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL); 837 if (error) { 838 vn_lock(vp, locked | LK_RETRY); 839 return (error); 840 } 841 NDFREE(&nd, NDF_ONLY_PNBUF); 842 843 mvp = *dvp = nd.ni_vp; 844 845 if (vp->v_mount != (*dvp)->v_mount && 846 ((*dvp)->v_vflag & VV_ROOT) && 847 ((*dvp)->v_mount->mnt_flag & MNT_UNION)) { 848 *dvp = (*dvp)->v_mount->mnt_vnodecovered; 849 VREF(mvp); 850 VOP_UNLOCK(mvp, 0); 851 vn_close(mvp, FREAD, cred, td); 852 VREF(*dvp); 853 vn_lock(*dvp, LK_SHARED | LK_RETRY); 854 covered = 1; 855 } 856 857 fileno = va.va_fileid; 858 859 dirbuflen = DEV_BSIZE; 860 if (dirbuflen < va.va_blocksize) 861 dirbuflen = va.va_blocksize; 862 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 863 864 if ((*dvp)->v_type != VDIR) { 865 error = ENOENT; 866 goto out; 867 } 868 869 off = 0; 870 len = 0; 871 do { 872 /* call VOP_READDIR of parent */ 873 error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, 874 &cpos, &len, &eofflag, td); 875 if (error) 876 goto out; 877 878 if ((dp->d_type != DT_WHT) && 879 (dp->d_fileno == fileno)) { 880 if (covered) { 881 VOP_UNLOCK(*dvp, 0); 882 vn_lock(mvp, LK_SHARED | LK_RETRY); 883 if (dirent_exists(mvp, dp->d_name, td)) { 884 error = ENOENT; 885 VOP_UNLOCK(mvp, 0); 886 vn_lock(*dvp, LK_SHARED | LK_RETRY); 887 goto out; 888 } 889 VOP_UNLOCK(mvp, 0); 890 vn_lock(*dvp, LK_SHARED | LK_RETRY); 891 } 892 i -= dp->d_namlen; 893 894 if (i < 0) { 895 error = ENOMEM; 896 goto out; 897 } 898 if (dp->d_namlen == 1 && dp->d_name[0] == '.') { 899 error = ENOENT; 900 } else { 901 bcopy(dp->d_name, buf + i, dp->d_namlen); 902 error = 0; 903 } 904 goto out; 905 } 906 } while (len > 0 || !eofflag); 907 error = ENOENT; 908 909 out: 910 free(dirbuf, M_TEMP); 911 if (!error) { 912 *buflen = i; 913 vref(*dvp); 914 } 915 if (covered) { 916 vput(*dvp); 917 vrele(mvp); 918 } else { 919 VOP_UNLOCK(mvp, 0); 920 vn_close(mvp, FREAD, cred, td); 921 } 922 vn_lock(vp, locked | LK_RETRY); 923 return (error); 924 } 925 926 int 927 vop_stdallocate(struct vop_allocate_args *ap) 928 { 929 #ifdef __notyet__ 930 struct statfs *sfs; 931 off_t maxfilesize = 0; 932 #endif 933 struct iovec aiov; 934 struct vattr vattr, *vap; 935 struct uio auio; 936 off_t fsize, len, cur, offset; 937 uint8_t *buf; 938 struct thread *td; 939 struct vnode *vp; 940 size_t iosize; 941 int error; 942 943 buf = NULL; 944 error = 0; 945 td = curthread; 946 vap = &vattr; 947 vp = ap->a_vp; 948 len = *ap->a_len; 949 offset = *ap->a_offset; 950 951 error = VOP_GETATTR(vp, vap, td->td_ucred); 952 if (error != 0) 953 goto out; 954 fsize = vap->va_size; 955 iosize = vap->va_blocksize; 956 if (iosize == 0) 957 iosize = BLKDEV_IOSIZE; 958 if (iosize > MAXPHYS) 959 iosize = MAXPHYS; 960 buf = malloc(iosize, M_TEMP, M_WAITOK); 961 962 #ifdef __notyet__ 963 /* 964 * Check if the filesystem sets f_maxfilesize; if not use 965 * VOP_SETATTR to perform the check. 966 */ 967 sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 968 error = VFS_STATFS(vp->v_mount, sfs, td); 969 if (error == 0) 970 maxfilesize = sfs->f_maxfilesize; 971 free(sfs, M_STATFS); 972 if (error != 0) 973 goto out; 974 if (maxfilesize) { 975 if (offset > maxfilesize || len > maxfilesize || 976 offset + len > maxfilesize) { 977 error = EFBIG; 978 goto out; 979 } 980 } else 981 #endif 982 if (offset + len > vap->va_size) { 983 /* 984 * Test offset + len against the filesystem's maxfilesize. 985 */ 986 VATTR_NULL(vap); 987 vap->va_size = offset + len; 988 error = VOP_SETATTR(vp, vap, td->td_ucred); 989 if (error != 0) 990 goto out; 991 VATTR_NULL(vap); 992 vap->va_size = fsize; 993 error = VOP_SETATTR(vp, vap, td->td_ucred); 994 if (error != 0) 995 goto out; 996 } 997 998 for (;;) { 999 /* 1000 * Read and write back anything below the nominal file 1001 * size. There's currently no way outside the filesystem 1002 * to know whether this area is sparse or not. 1003 */ 1004 cur = iosize; 1005 if ((offset % iosize) != 0) 1006 cur -= (offset % iosize); 1007 if (cur > len) 1008 cur = len; 1009 if (offset < fsize) { 1010 aiov.iov_base = buf; 1011 aiov.iov_len = cur; 1012 auio.uio_iov = &aiov; 1013 auio.uio_iovcnt = 1; 1014 auio.uio_offset = offset; 1015 auio.uio_resid = cur; 1016 auio.uio_segflg = UIO_SYSSPACE; 1017 auio.uio_rw = UIO_READ; 1018 auio.uio_td = td; 1019 error = VOP_READ(vp, &auio, 0, td->td_ucred); 1020 if (error != 0) 1021 break; 1022 if (auio.uio_resid > 0) { 1023 bzero(buf + cur - auio.uio_resid, 1024 auio.uio_resid); 1025 } 1026 } else { 1027 bzero(buf, cur); 1028 } 1029 1030 aiov.iov_base = buf; 1031 aiov.iov_len = cur; 1032 auio.uio_iov = &aiov; 1033 auio.uio_iovcnt = 1; 1034 auio.uio_offset = offset; 1035 auio.uio_resid = cur; 1036 auio.uio_segflg = UIO_SYSSPACE; 1037 auio.uio_rw = UIO_WRITE; 1038 auio.uio_td = td; 1039 1040 error = VOP_WRITE(vp, &auio, 0, td->td_ucred); 1041 if (error != 0) 1042 break; 1043 1044 len -= cur; 1045 offset += cur; 1046 if (len == 0) 1047 break; 1048 if (should_yield()) 1049 break; 1050 } 1051 1052 out: 1053 *ap->a_len = len; 1054 *ap->a_offset = offset; 1055 free(buf, M_TEMP); 1056 return (error); 1057 } 1058 1059 int 1060 vop_stdadvise(struct vop_advise_args *ap) 1061 { 1062 struct vnode *vp; 1063 struct bufobj *bo; 1064 daddr_t startn, endn; 1065 off_t start, end; 1066 int bsize, error; 1067 1068 vp = ap->a_vp; 1069 switch (ap->a_advice) { 1070 case POSIX_FADV_WILLNEED: 1071 /* 1072 * Do nothing for now. Filesystems should provide a 1073 * custom method which starts an asynchronous read of 1074 * the requested region. 1075 */ 1076 error = 0; 1077 break; 1078 case POSIX_FADV_DONTNEED: 1079 error = 0; 1080 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1081 if (vp->v_iflag & VI_DOOMED) { 1082 VOP_UNLOCK(vp, 0); 1083 break; 1084 } 1085 1086 /* 1087 * Deactivate pages in the specified range from the backing VM 1088 * object. Pages that are resident in the buffer cache will 1089 * remain wired until their corresponding buffers are released 1090 * below. 1091 */ 1092 if (vp->v_object != NULL) { 1093 start = trunc_page(ap->a_start); 1094 end = round_page(ap->a_end); 1095 VM_OBJECT_RLOCK(vp->v_object); 1096 vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start), 1097 OFF_TO_IDX(end)); 1098 VM_OBJECT_RUNLOCK(vp->v_object); 1099 } 1100 1101 bo = &vp->v_bufobj; 1102 BO_RLOCK(bo); 1103 bsize = vp->v_bufobj.bo_bsize; 1104 startn = ap->a_start / bsize; 1105 endn = ap->a_end / bsize; 1106 error = bnoreuselist(&bo->bo_clean, bo, startn, endn); 1107 if (error == 0) 1108 error = bnoreuselist(&bo->bo_dirty, bo, startn, endn); 1109 BO_RUNLOCK(bo); 1110 VOP_UNLOCK(vp, 0); 1111 break; 1112 default: 1113 error = EINVAL; 1114 break; 1115 } 1116 return (error); 1117 } 1118 1119 int 1120 vop_stdunp_bind(struct vop_unp_bind_args *ap) 1121 { 1122 1123 ap->a_vp->v_unpcb = ap->a_unpcb; 1124 return (0); 1125 } 1126 1127 int 1128 vop_stdunp_connect(struct vop_unp_connect_args *ap) 1129 { 1130 1131 *ap->a_unpcb = ap->a_vp->v_unpcb; 1132 return (0); 1133 } 1134 1135 int 1136 vop_stdunp_detach(struct vop_unp_detach_args *ap) 1137 { 1138 1139 ap->a_vp->v_unpcb = NULL; 1140 return (0); 1141 } 1142 1143 static int 1144 vop_stdis_text(struct vop_is_text_args *ap) 1145 { 1146 1147 return ((ap->a_vp->v_vflag & VV_TEXT) != 0); 1148 } 1149 1150 static int 1151 vop_stdset_text(struct vop_set_text_args *ap) 1152 { 1153 1154 ap->a_vp->v_vflag |= VV_TEXT; 1155 return (0); 1156 } 1157 1158 static int 1159 vop_stdunset_text(struct vop_unset_text_args *ap) 1160 { 1161 1162 ap->a_vp->v_vflag &= ~VV_TEXT; 1163 return (0); 1164 } 1165 1166 static int 1167 vop_stdget_writecount(struct vop_get_writecount_args *ap) 1168 { 1169 1170 *ap->a_writecount = ap->a_vp->v_writecount; 1171 return (0); 1172 } 1173 1174 static int 1175 vop_stdadd_writecount(struct vop_add_writecount_args *ap) 1176 { 1177 1178 ap->a_vp->v_writecount += ap->a_inc; 1179 return (0); 1180 } 1181 1182 /* 1183 * vfs default ops 1184 * used to fill the vfs function table to get reasonable default return values. 1185 */ 1186 int 1187 vfs_stdroot (mp, flags, vpp) 1188 struct mount *mp; 1189 int flags; 1190 struct vnode **vpp; 1191 { 1192 1193 return (EOPNOTSUPP); 1194 } 1195 1196 int 1197 vfs_stdstatfs (mp, sbp) 1198 struct mount *mp; 1199 struct statfs *sbp; 1200 { 1201 1202 return (EOPNOTSUPP); 1203 } 1204 1205 int 1206 vfs_stdquotactl (mp, cmds, uid, arg) 1207 struct mount *mp; 1208 int cmds; 1209 uid_t uid; 1210 void *arg; 1211 { 1212 1213 return (EOPNOTSUPP); 1214 } 1215 1216 int 1217 vfs_stdsync(mp, waitfor) 1218 struct mount *mp; 1219 int waitfor; 1220 { 1221 struct vnode *vp, *mvp; 1222 struct thread *td; 1223 int error, lockreq, allerror = 0; 1224 1225 td = curthread; 1226 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 1227 if (waitfor != MNT_WAIT) 1228 lockreq |= LK_NOWAIT; 1229 /* 1230 * Force stale buffer cache information to be flushed. 1231 */ 1232 loop: 1233 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1234 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { 1235 VI_UNLOCK(vp); 1236 continue; 1237 } 1238 if ((error = vget(vp, lockreq, td)) != 0) { 1239 if (error == ENOENT) { 1240 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1241 goto loop; 1242 } 1243 continue; 1244 } 1245 error = VOP_FSYNC(vp, waitfor, td); 1246 if (error) 1247 allerror = error; 1248 vput(vp); 1249 } 1250 return (allerror); 1251 } 1252 1253 int 1254 vfs_stdnosync (mp, waitfor) 1255 struct mount *mp; 1256 int waitfor; 1257 { 1258 1259 return (0); 1260 } 1261 1262 int 1263 vfs_stdvget (mp, ino, flags, vpp) 1264 struct mount *mp; 1265 ino_t ino; 1266 int flags; 1267 struct vnode **vpp; 1268 { 1269 1270 return (EOPNOTSUPP); 1271 } 1272 1273 int 1274 vfs_stdfhtovp (mp, fhp, flags, vpp) 1275 struct mount *mp; 1276 struct fid *fhp; 1277 int flags; 1278 struct vnode **vpp; 1279 { 1280 1281 return (EOPNOTSUPP); 1282 } 1283 1284 int 1285 vfs_stdinit (vfsp) 1286 struct vfsconf *vfsp; 1287 { 1288 1289 return (0); 1290 } 1291 1292 int 1293 vfs_stduninit (vfsp) 1294 struct vfsconf *vfsp; 1295 { 1296 1297 return(0); 1298 } 1299 1300 int 1301 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname) 1302 struct mount *mp; 1303 int cmd; 1304 struct vnode *filename_vp; 1305 int attrnamespace; 1306 const char *attrname; 1307 { 1308 1309 if (filename_vp != NULL) 1310 VOP_UNLOCK(filename_vp, 0); 1311 return (EOPNOTSUPP); 1312 } 1313 1314 int 1315 vfs_stdsysctl(mp, op, req) 1316 struct mount *mp; 1317 fsctlop_t op; 1318 struct sysctl_req *req; 1319 { 1320 1321 return (EOPNOTSUPP); 1322 } 1323 1324 /* end of vfs default ops */ 1325