1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/bio.h> 41 #include <sys/buf.h> 42 #include <sys/conf.h> 43 #include <sys/event.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/lockf.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/namei.h> 51 #include <sys/rwlock.h> 52 #include <sys/fcntl.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/dirent.h> 56 #include <sys/poll.h> 57 58 #include <security/mac/mac_framework.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_object.h> 62 #include <vm/vm_extern.h> 63 #include <vm/pmap.h> 64 #include <vm/vm_map.h> 65 #include <vm/vm_page.h> 66 #include <vm/vm_pager.h> 67 #include <vm/vnode_pager.h> 68 69 static int vop_nolookup(struct vop_lookup_args *); 70 static int vop_norename(struct vop_rename_args *); 71 static int vop_nostrategy(struct vop_strategy_args *); 72 static int get_next_dirent(struct vnode *vp, struct dirent **dpp, 73 char *dirbuf, int dirbuflen, off_t *off, 74 char **cpos, int *len, int *eofflag, 75 struct thread *td); 76 static int dirent_exists(struct vnode *vp, const char *dirname, 77 struct thread *td); 78 79 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) 80 81 static int vop_stdis_text(struct vop_is_text_args *ap); 82 static int vop_stdset_text(struct vop_set_text_args *ap); 83 static int vop_stdunset_text(struct vop_unset_text_args *ap); 84 static int vop_stdget_writecount(struct vop_get_writecount_args *ap); 85 static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); 86 static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); 87 88 /* 89 * This vnode table stores what we want to do if the filesystem doesn't 90 * implement a particular VOP. 91 * 92 * If there is no specific entry here, we will return EOPNOTSUPP. 93 * 94 * Note that every filesystem has to implement either vop_access 95 * or vop_accessx; failing to do so will result in immediate crash 96 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(), 97 * which calls vop_stdaccess() etc. 98 */ 99 100 struct vop_vector default_vnodeops = { 101 .vop_default = NULL, 102 .vop_bypass = VOP_EOPNOTSUPP, 103 104 .vop_access = vop_stdaccess, 105 .vop_accessx = vop_stdaccessx, 106 .vop_advise = vop_stdadvise, 107 .vop_advlock = vop_stdadvlock, 108 .vop_advlockasync = vop_stdadvlockasync, 109 .vop_advlockpurge = vop_stdadvlockpurge, 110 .vop_allocate = vop_stdallocate, 111 .vop_bmap = vop_stdbmap, 112 .vop_close = VOP_NULL, 113 .vop_fsync = VOP_NULL, 114 .vop_getpages = vop_stdgetpages, 115 .vop_getpages_async = vop_stdgetpages_async, 116 .vop_getwritemount = vop_stdgetwritemount, 117 .vop_inactive = VOP_NULL, 118 .vop_ioctl = VOP_ENOTTY, 119 .vop_kqfilter = vop_stdkqfilter, 120 .vop_islocked = vop_stdislocked, 121 .vop_lock1 = vop_stdlock, 122 .vop_lookup = vop_nolookup, 123 .vop_open = VOP_NULL, 124 .vop_pathconf = VOP_EINVAL, 125 .vop_poll = vop_nopoll, 126 .vop_putpages = vop_stdputpages, 127 .vop_readlink = VOP_EINVAL, 128 .vop_rename = vop_norename, 129 .vop_revoke = VOP_PANIC, 130 .vop_strategy = vop_nostrategy, 131 .vop_unlock = vop_stdunlock, 132 .vop_vptocnp = vop_stdvptocnp, 133 .vop_vptofh = vop_stdvptofh, 134 .vop_unp_bind = vop_stdunp_bind, 135 .vop_unp_connect = vop_stdunp_connect, 136 .vop_unp_detach = vop_stdunp_detach, 137 .vop_is_text = vop_stdis_text, 138 .vop_set_text = vop_stdset_text, 139 .vop_unset_text = vop_stdunset_text, 140 .vop_get_writecount = vop_stdget_writecount, 141 .vop_add_writecount = vop_stdadd_writecount, 142 }; 143 144 /* 145 * Series of placeholder functions for various error returns for 146 * VOPs. 147 */ 148 149 int 150 vop_eopnotsupp(struct vop_generic_args *ap) 151 { 152 /* 153 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 154 */ 155 156 return (EOPNOTSUPP); 157 } 158 159 int 160 vop_ebadf(struct vop_generic_args *ap) 161 { 162 163 return (EBADF); 164 } 165 166 int 167 vop_enotty(struct vop_generic_args *ap) 168 { 169 170 return (ENOTTY); 171 } 172 173 int 174 vop_einval(struct vop_generic_args *ap) 175 { 176 177 return (EINVAL); 178 } 179 180 int 181 vop_enoent(struct vop_generic_args *ap) 182 { 183 184 return (ENOENT); 185 } 186 187 int 188 vop_null(struct vop_generic_args *ap) 189 { 190 191 return (0); 192 } 193 194 /* 195 * Helper function to panic on some bad VOPs in some filesystems. 196 */ 197 int 198 vop_panic(struct vop_generic_args *ap) 199 { 200 201 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 202 } 203 204 /* 205 * vop_std<something> and vop_no<something> are default functions for use by 206 * filesystems that need the "default reasonable" implementation for a 207 * particular operation. 208 * 209 * The documentation for the operations they implement exists (if it exists) 210 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 211 */ 212 213 /* 214 * Default vop for filesystems that do not support name lookup 215 */ 216 static int 217 vop_nolookup(ap) 218 struct vop_lookup_args /* { 219 struct vnode *a_dvp; 220 struct vnode **a_vpp; 221 struct componentname *a_cnp; 222 } */ *ap; 223 { 224 225 *ap->a_vpp = NULL; 226 return (ENOTDIR); 227 } 228 229 /* 230 * vop_norename: 231 * 232 * Handle unlock and reference counting for arguments of vop_rename 233 * for filesystems that do not implement rename operation. 234 */ 235 static int 236 vop_norename(struct vop_rename_args *ap) 237 { 238 239 vop_rename_fail(ap); 240 return (EOPNOTSUPP); 241 } 242 243 /* 244 * vop_nostrategy: 245 * 246 * Strategy routine for VFS devices that have none. 247 * 248 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 249 * routine. Typically this is done for a BIO_READ strategy call. 250 * Typically B_INVAL is assumed to already be clear prior to a write 251 * and should not be cleared manually unless you just made the buffer 252 * invalid. BIO_ERROR should be cleared either way. 253 */ 254 255 static int 256 vop_nostrategy (struct vop_strategy_args *ap) 257 { 258 printf("No strategy for buffer at %p\n", ap->a_bp); 259 vprint("vnode", ap->a_vp); 260 ap->a_bp->b_ioflags |= BIO_ERROR; 261 ap->a_bp->b_error = EOPNOTSUPP; 262 bufdone(ap->a_bp); 263 return (EOPNOTSUPP); 264 } 265 266 static int 267 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, 268 int dirbuflen, off_t *off, char **cpos, int *len, 269 int *eofflag, struct thread *td) 270 { 271 int error, reclen; 272 struct uio uio; 273 struct iovec iov; 274 struct dirent *dp; 275 276 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 277 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 278 279 if (*len == 0) { 280 iov.iov_base = dirbuf; 281 iov.iov_len = dirbuflen; 282 283 uio.uio_iov = &iov; 284 uio.uio_iovcnt = 1; 285 uio.uio_offset = *off; 286 uio.uio_resid = dirbuflen; 287 uio.uio_segflg = UIO_SYSSPACE; 288 uio.uio_rw = UIO_READ; 289 uio.uio_td = td; 290 291 *eofflag = 0; 292 293 #ifdef MAC 294 error = mac_vnode_check_readdir(td->td_ucred, vp); 295 if (error == 0) 296 #endif 297 error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, 298 NULL, NULL); 299 if (error) 300 return (error); 301 302 *off = uio.uio_offset; 303 304 *cpos = dirbuf; 305 *len = (dirbuflen - uio.uio_resid); 306 307 if (*len == 0) 308 return (ENOENT); 309 } 310 311 dp = (struct dirent *)(*cpos); 312 reclen = dp->d_reclen; 313 *dpp = dp; 314 315 /* check for malformed directory.. */ 316 if (reclen < DIRENT_MINSIZE) 317 return (EINVAL); 318 319 *cpos += reclen; 320 *len -= reclen; 321 322 return (0); 323 } 324 325 /* 326 * Check if a named file exists in a given directory vnode. 327 */ 328 static int 329 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) 330 { 331 char *dirbuf, *cpos; 332 int error, eofflag, dirbuflen, len, found; 333 off_t off; 334 struct dirent *dp; 335 struct vattr va; 336 337 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 338 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 339 340 found = 0; 341 342 error = VOP_GETATTR(vp, &va, td->td_ucred); 343 if (error) 344 return (found); 345 346 dirbuflen = DEV_BSIZE; 347 if (dirbuflen < va.va_blocksize) 348 dirbuflen = va.va_blocksize; 349 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 350 351 off = 0; 352 len = 0; 353 do { 354 error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, 355 &cpos, &len, &eofflag, td); 356 if (error) 357 goto out; 358 359 if (dp->d_type != DT_WHT && dp->d_fileno != 0 && 360 strcmp(dp->d_name, dirname) == 0) { 361 found = 1; 362 goto out; 363 } 364 } while (len > 0 || !eofflag); 365 366 out: 367 free(dirbuf, M_TEMP); 368 return (found); 369 } 370 371 int 372 vop_stdaccess(struct vop_access_args *ap) 373 { 374 375 KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | 376 VAPPEND)) == 0, ("invalid bit in accmode")); 377 378 return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td)); 379 } 380 381 int 382 vop_stdaccessx(struct vop_accessx_args *ap) 383 { 384 int error; 385 accmode_t accmode = ap->a_accmode; 386 387 error = vfs_unixify_accmode(&accmode); 388 if (error != 0) 389 return (error); 390 391 if (accmode == 0) 392 return (0); 393 394 return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td)); 395 } 396 397 /* 398 * Advisory record locking support 399 */ 400 int 401 vop_stdadvlock(struct vop_advlock_args *ap) 402 { 403 struct vnode *vp; 404 struct vattr vattr; 405 int error; 406 407 vp = ap->a_vp; 408 if (ap->a_fl->l_whence == SEEK_END) { 409 /* 410 * The NFSv4 server must avoid doing a vn_lock() here, since it 411 * can deadlock the nfsd threads, due to a LOR. Fortunately 412 * the NFSv4 server always uses SEEK_SET and this code is 413 * only required for the SEEK_END case. 414 */ 415 vn_lock(vp, LK_SHARED | LK_RETRY); 416 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 417 VOP_UNLOCK(vp, 0); 418 if (error) 419 return (error); 420 } else 421 vattr.va_size = 0; 422 423 return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); 424 } 425 426 int 427 vop_stdadvlockasync(struct vop_advlockasync_args *ap) 428 { 429 struct vnode *vp; 430 struct vattr vattr; 431 int error; 432 433 vp = ap->a_vp; 434 if (ap->a_fl->l_whence == SEEK_END) { 435 /* The size argument is only needed for SEEK_END. */ 436 vn_lock(vp, LK_SHARED | LK_RETRY); 437 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 438 VOP_UNLOCK(vp, 0); 439 if (error) 440 return (error); 441 } else 442 vattr.va_size = 0; 443 444 return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); 445 } 446 447 int 448 vop_stdadvlockpurge(struct vop_advlockpurge_args *ap) 449 { 450 struct vnode *vp; 451 452 vp = ap->a_vp; 453 lf_purgelocks(vp, &vp->v_lockf); 454 return (0); 455 } 456 457 /* 458 * vop_stdpathconf: 459 * 460 * Standard implementation of POSIX pathconf, to get information about limits 461 * for a filesystem. 462 * Override per filesystem for the case where the filesystem has smaller 463 * limits. 464 */ 465 int 466 vop_stdpathconf(ap) 467 struct vop_pathconf_args /* { 468 struct vnode *a_vp; 469 int a_name; 470 int *a_retval; 471 } */ *ap; 472 { 473 474 switch (ap->a_name) { 475 case _PC_NAME_MAX: 476 *ap->a_retval = NAME_MAX; 477 return (0); 478 case _PC_PATH_MAX: 479 *ap->a_retval = PATH_MAX; 480 return (0); 481 case _PC_LINK_MAX: 482 *ap->a_retval = LINK_MAX; 483 return (0); 484 case _PC_MAX_CANON: 485 *ap->a_retval = MAX_CANON; 486 return (0); 487 case _PC_MAX_INPUT: 488 *ap->a_retval = MAX_INPUT; 489 return (0); 490 case _PC_PIPE_BUF: 491 *ap->a_retval = PIPE_BUF; 492 return (0); 493 case _PC_CHOWN_RESTRICTED: 494 *ap->a_retval = 1; 495 return (0); 496 case _PC_VDISABLE: 497 *ap->a_retval = _POSIX_VDISABLE; 498 return (0); 499 default: 500 return (EINVAL); 501 } 502 /* NOTREACHED */ 503 } 504 505 /* 506 * Standard lock, unlock and islocked functions. 507 */ 508 int 509 vop_stdlock(ap) 510 struct vop_lock1_args /* { 511 struct vnode *a_vp; 512 int a_flags; 513 char *file; 514 int line; 515 } */ *ap; 516 { 517 struct vnode *vp = ap->a_vp; 518 519 return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp), 520 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file, 521 ap->a_line)); 522 } 523 524 /* See above. */ 525 int 526 vop_stdunlock(ap) 527 struct vop_unlock_args /* { 528 struct vnode *a_vp; 529 int a_flags; 530 } */ *ap; 531 { 532 struct vnode *vp = ap->a_vp; 533 534 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp))); 535 } 536 537 /* See above. */ 538 int 539 vop_stdislocked(ap) 540 struct vop_islocked_args /* { 541 struct vnode *a_vp; 542 } */ *ap; 543 { 544 545 return (lockstatus(ap->a_vp->v_vnlock)); 546 } 547 548 /* 549 * Return true for select/poll. 550 */ 551 int 552 vop_nopoll(ap) 553 struct vop_poll_args /* { 554 struct vnode *a_vp; 555 int a_events; 556 struct ucred *a_cred; 557 struct thread *a_td; 558 } */ *ap; 559 { 560 561 return (poll_no_poll(ap->a_events)); 562 } 563 564 /* 565 * Implement poll for local filesystems that support it. 566 */ 567 int 568 vop_stdpoll(ap) 569 struct vop_poll_args /* { 570 struct vnode *a_vp; 571 int a_events; 572 struct ucred *a_cred; 573 struct thread *a_td; 574 } */ *ap; 575 { 576 if (ap->a_events & ~POLLSTANDARD) 577 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 578 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 579 } 580 581 /* 582 * Return our mount point, as we will take charge of the writes. 583 */ 584 int 585 vop_stdgetwritemount(ap) 586 struct vop_getwritemount_args /* { 587 struct vnode *a_vp; 588 struct mount **a_mpp; 589 } */ *ap; 590 { 591 struct mount *mp; 592 593 /* 594 * XXX Since this is called unlocked we may be recycled while 595 * attempting to ref the mount. If this is the case or mountpoint 596 * will be set to NULL. We only have to prevent this call from 597 * returning with a ref to an incorrect mountpoint. It is not 598 * harmful to return with a ref to our previous mountpoint. 599 */ 600 mp = ap->a_vp->v_mount; 601 if (mp != NULL) { 602 vfs_ref(mp); 603 if (mp != ap->a_vp->v_mount) { 604 vfs_rel(mp); 605 mp = NULL; 606 } 607 } 608 *(ap->a_mpp) = mp; 609 return (0); 610 } 611 612 /* XXX Needs good comment and VOP_BMAP(9) manpage */ 613 int 614 vop_stdbmap(ap) 615 struct vop_bmap_args /* { 616 struct vnode *a_vp; 617 daddr_t a_bn; 618 struct bufobj **a_bop; 619 daddr_t *a_bnp; 620 int *a_runp; 621 int *a_runb; 622 } */ *ap; 623 { 624 625 if (ap->a_bop != NULL) 626 *ap->a_bop = &ap->a_vp->v_bufobj; 627 if (ap->a_bnp != NULL) 628 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 629 if (ap->a_runp != NULL) 630 *ap->a_runp = 0; 631 if (ap->a_runb != NULL) 632 *ap->a_runb = 0; 633 return (0); 634 } 635 636 int 637 vop_stdfsync(ap) 638 struct vop_fsync_args /* { 639 struct vnode *a_vp; 640 struct ucred *a_cred; 641 int a_waitfor; 642 struct thread *a_td; 643 } */ *ap; 644 { 645 struct vnode *vp = ap->a_vp; 646 struct buf *bp; 647 struct bufobj *bo; 648 struct buf *nbp; 649 int error = 0; 650 int maxretry = 1000; /* large, arbitrarily chosen */ 651 652 bo = &vp->v_bufobj; 653 BO_LOCK(bo); 654 loop1: 655 /* 656 * MARK/SCAN initialization to avoid infinite loops. 657 */ 658 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { 659 bp->b_vflags &= ~BV_SCANNED; 660 bp->b_error = 0; 661 } 662 663 /* 664 * Flush all dirty buffers associated with a vnode. 665 */ 666 loop2: 667 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 668 if ((bp->b_vflags & BV_SCANNED) != 0) 669 continue; 670 bp->b_vflags |= BV_SCANNED; 671 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 672 if (ap->a_waitfor != MNT_WAIT) 673 continue; 674 if (BUF_LOCK(bp, 675 LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL, 676 BO_LOCKPTR(bo)) != 0) { 677 BO_LOCK(bo); 678 goto loop1; 679 } 680 BO_LOCK(bo); 681 } 682 BO_UNLOCK(bo); 683 KASSERT(bp->b_bufobj == bo, 684 ("bp %p wrong b_bufobj %p should be %p", 685 bp, bp->b_bufobj, bo)); 686 if ((bp->b_flags & B_DELWRI) == 0) 687 panic("fsync: not dirty"); 688 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { 689 vfs_bio_awrite(bp); 690 } else { 691 bremfree(bp); 692 bawrite(bp); 693 } 694 BO_LOCK(bo); 695 goto loop2; 696 } 697 698 /* 699 * If synchronous the caller expects us to completely resolve all 700 * dirty buffers in the system. Wait for in-progress I/O to 701 * complete (which could include background bitmap writes), then 702 * retry if dirty blocks still exist. 703 */ 704 if (ap->a_waitfor == MNT_WAIT) { 705 bufobj_wwait(bo, 0, 0); 706 if (bo->bo_dirty.bv_cnt > 0) { 707 /* 708 * If we are unable to write any of these buffers 709 * then we fail now rather than trying endlessly 710 * to write them out. 711 */ 712 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 713 if ((error = bp->b_error) == 0) 714 continue; 715 if (error == 0 && --maxretry >= 0) 716 goto loop1; 717 error = EAGAIN; 718 } 719 } 720 BO_UNLOCK(bo); 721 if (error == EAGAIN) 722 vprint("fsync: giving up on dirty", vp); 723 724 return (error); 725 } 726 727 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 728 int 729 vop_stdgetpages(ap) 730 struct vop_getpages_args /* { 731 struct vnode *a_vp; 732 vm_page_t *a_m; 733 int a_count; 734 int a_reqpage; 735 } */ *ap; 736 { 737 738 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 739 ap->a_count, ap->a_reqpage, NULL, NULL); 740 } 741 742 static int 743 vop_stdgetpages_async(struct vop_getpages_async_args *ap) 744 { 745 int error; 746 747 error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage); 748 ap->a_iodone(ap->a_arg, ap->a_m, ap->a_reqpage, error); 749 return (error); 750 } 751 752 int 753 vop_stdkqfilter(struct vop_kqfilter_args *ap) 754 { 755 return vfs_kqfilter(ap); 756 } 757 758 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 759 int 760 vop_stdputpages(ap) 761 struct vop_putpages_args /* { 762 struct vnode *a_vp; 763 vm_page_t *a_m; 764 int a_count; 765 int a_sync; 766 int *a_rtvals; 767 } */ *ap; 768 { 769 770 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 771 ap->a_sync, ap->a_rtvals); 772 } 773 774 int 775 vop_stdvptofh(struct vop_vptofh_args *ap) 776 { 777 return (EOPNOTSUPP); 778 } 779 780 int 781 vop_stdvptocnp(struct vop_vptocnp_args *ap) 782 { 783 struct vnode *vp = ap->a_vp; 784 struct vnode **dvp = ap->a_vpp; 785 struct ucred *cred = ap->a_cred; 786 char *buf = ap->a_buf; 787 int *buflen = ap->a_buflen; 788 char *dirbuf, *cpos; 789 int i, error, eofflag, dirbuflen, flags, locked, len, covered; 790 off_t off; 791 ino_t fileno; 792 struct vattr va; 793 struct nameidata nd; 794 struct thread *td; 795 struct dirent *dp; 796 struct vnode *mvp; 797 798 i = *buflen; 799 error = 0; 800 covered = 0; 801 td = curthread; 802 803 if (vp->v_type != VDIR) 804 return (ENOENT); 805 806 error = VOP_GETATTR(vp, &va, cred); 807 if (error) 808 return (error); 809 810 VREF(vp); 811 locked = VOP_ISLOCKED(vp); 812 VOP_UNLOCK(vp, 0); 813 NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, 814 "..", vp, td); 815 flags = FREAD; 816 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL); 817 if (error) { 818 vn_lock(vp, locked | LK_RETRY); 819 return (error); 820 } 821 NDFREE(&nd, NDF_ONLY_PNBUF); 822 823 mvp = *dvp = nd.ni_vp; 824 825 if (vp->v_mount != (*dvp)->v_mount && 826 ((*dvp)->v_vflag & VV_ROOT) && 827 ((*dvp)->v_mount->mnt_flag & MNT_UNION)) { 828 *dvp = (*dvp)->v_mount->mnt_vnodecovered; 829 VREF(mvp); 830 VOP_UNLOCK(mvp, 0); 831 vn_close(mvp, FREAD, cred, td); 832 VREF(*dvp); 833 vn_lock(*dvp, LK_SHARED | LK_RETRY); 834 covered = 1; 835 } 836 837 fileno = va.va_fileid; 838 839 dirbuflen = DEV_BSIZE; 840 if (dirbuflen < va.va_blocksize) 841 dirbuflen = va.va_blocksize; 842 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 843 844 if ((*dvp)->v_type != VDIR) { 845 error = ENOENT; 846 goto out; 847 } 848 849 off = 0; 850 len = 0; 851 do { 852 /* call VOP_READDIR of parent */ 853 error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, 854 &cpos, &len, &eofflag, td); 855 if (error) 856 goto out; 857 858 if ((dp->d_type != DT_WHT) && 859 (dp->d_fileno == fileno)) { 860 if (covered) { 861 VOP_UNLOCK(*dvp, 0); 862 vn_lock(mvp, LK_SHARED | LK_RETRY); 863 if (dirent_exists(mvp, dp->d_name, td)) { 864 error = ENOENT; 865 VOP_UNLOCK(mvp, 0); 866 vn_lock(*dvp, LK_SHARED | LK_RETRY); 867 goto out; 868 } 869 VOP_UNLOCK(mvp, 0); 870 vn_lock(*dvp, LK_SHARED | LK_RETRY); 871 } 872 i -= dp->d_namlen; 873 874 if (i < 0) { 875 error = ENOMEM; 876 goto out; 877 } 878 if (dp->d_namlen == 1 && dp->d_name[0] == '.') { 879 error = ENOENT; 880 } else { 881 bcopy(dp->d_name, buf + i, dp->d_namlen); 882 error = 0; 883 } 884 goto out; 885 } 886 } while (len > 0 || !eofflag); 887 error = ENOENT; 888 889 out: 890 free(dirbuf, M_TEMP); 891 if (!error) { 892 *buflen = i; 893 vref(*dvp); 894 } 895 if (covered) { 896 vput(*dvp); 897 vrele(mvp); 898 } else { 899 VOP_UNLOCK(mvp, 0); 900 vn_close(mvp, FREAD, cred, td); 901 } 902 vn_lock(vp, locked | LK_RETRY); 903 return (error); 904 } 905 906 int 907 vop_stdallocate(struct vop_allocate_args *ap) 908 { 909 #ifdef __notyet__ 910 struct statfs sfs; 911 #endif 912 struct iovec aiov; 913 struct vattr vattr, *vap; 914 struct uio auio; 915 off_t fsize, len, cur, offset; 916 uint8_t *buf; 917 struct thread *td; 918 struct vnode *vp; 919 size_t iosize; 920 int error; 921 922 buf = NULL; 923 error = 0; 924 td = curthread; 925 vap = &vattr; 926 vp = ap->a_vp; 927 len = *ap->a_len; 928 offset = *ap->a_offset; 929 930 error = VOP_GETATTR(vp, vap, td->td_ucred); 931 if (error != 0) 932 goto out; 933 fsize = vap->va_size; 934 iosize = vap->va_blocksize; 935 if (iosize == 0) 936 iosize = BLKDEV_IOSIZE; 937 if (iosize > MAXPHYS) 938 iosize = MAXPHYS; 939 buf = malloc(iosize, M_TEMP, M_WAITOK); 940 941 #ifdef __notyet__ 942 /* 943 * Check if the filesystem sets f_maxfilesize; if not use 944 * VOP_SETATTR to perform the check. 945 */ 946 error = VFS_STATFS(vp->v_mount, &sfs, td); 947 if (error != 0) 948 goto out; 949 if (sfs.f_maxfilesize) { 950 if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize || 951 offset + len > sfs.f_maxfilesize) { 952 error = EFBIG; 953 goto out; 954 } 955 } else 956 #endif 957 if (offset + len > vap->va_size) { 958 /* 959 * Test offset + len against the filesystem's maxfilesize. 960 */ 961 VATTR_NULL(vap); 962 vap->va_size = offset + len; 963 error = VOP_SETATTR(vp, vap, td->td_ucred); 964 if (error != 0) 965 goto out; 966 VATTR_NULL(vap); 967 vap->va_size = fsize; 968 error = VOP_SETATTR(vp, vap, td->td_ucred); 969 if (error != 0) 970 goto out; 971 } 972 973 for (;;) { 974 /* 975 * Read and write back anything below the nominal file 976 * size. There's currently no way outside the filesystem 977 * to know whether this area is sparse or not. 978 */ 979 cur = iosize; 980 if ((offset % iosize) != 0) 981 cur -= (offset % iosize); 982 if (cur > len) 983 cur = len; 984 if (offset < fsize) { 985 aiov.iov_base = buf; 986 aiov.iov_len = cur; 987 auio.uio_iov = &aiov; 988 auio.uio_iovcnt = 1; 989 auio.uio_offset = offset; 990 auio.uio_resid = cur; 991 auio.uio_segflg = UIO_SYSSPACE; 992 auio.uio_rw = UIO_READ; 993 auio.uio_td = td; 994 error = VOP_READ(vp, &auio, 0, td->td_ucred); 995 if (error != 0) 996 break; 997 if (auio.uio_resid > 0) { 998 bzero(buf + cur - auio.uio_resid, 999 auio.uio_resid); 1000 } 1001 } else { 1002 bzero(buf, cur); 1003 } 1004 1005 aiov.iov_base = buf; 1006 aiov.iov_len = cur; 1007 auio.uio_iov = &aiov; 1008 auio.uio_iovcnt = 1; 1009 auio.uio_offset = offset; 1010 auio.uio_resid = cur; 1011 auio.uio_segflg = UIO_SYSSPACE; 1012 auio.uio_rw = UIO_WRITE; 1013 auio.uio_td = td; 1014 1015 error = VOP_WRITE(vp, &auio, 0, td->td_ucred); 1016 if (error != 0) 1017 break; 1018 1019 len -= cur; 1020 offset += cur; 1021 if (len == 0) 1022 break; 1023 if (should_yield()) 1024 break; 1025 } 1026 1027 out: 1028 *ap->a_len = len; 1029 *ap->a_offset = offset; 1030 free(buf, M_TEMP); 1031 return (error); 1032 } 1033 1034 int 1035 vop_stdadvise(struct vop_advise_args *ap) 1036 { 1037 struct buf *bp; 1038 struct buflists *bl; 1039 struct vnode *vp; 1040 daddr_t bn, startn, endn; 1041 off_t start, end; 1042 int bsize, error; 1043 1044 vp = ap->a_vp; 1045 switch (ap->a_advice) { 1046 case POSIX_FADV_WILLNEED: 1047 /* 1048 * Do nothing for now. Filesystems should provide a 1049 * custom method which starts an asynchronous read of 1050 * the requested region. 1051 */ 1052 error = 0; 1053 break; 1054 case POSIX_FADV_DONTNEED: 1055 error = 0; 1056 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1057 if (vp->v_iflag & VI_DOOMED) { 1058 VOP_UNLOCK(vp, 0); 1059 break; 1060 } 1061 1062 /* 1063 * Deactivate pages in the specified range from the backing VM 1064 * object. Pages that are resident in the buffer cache will 1065 * remain wired until their corresponding buffers are released 1066 * below. 1067 */ 1068 if (vp->v_object != NULL) { 1069 start = trunc_page(ap->a_start); 1070 end = round_page(ap->a_end); 1071 VM_OBJECT_WLOCK(vp->v_object); 1072 vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start), 1073 OFF_TO_IDX(end)); 1074 VM_OBJECT_WUNLOCK(vp->v_object); 1075 } 1076 1077 BO_RLOCK(&vp->v_bufobj); 1078 bsize = vp->v_bufobj.bo_bsize; 1079 startn = ap->a_start / bsize; 1080 endn = -1; 1081 bl = &vp->v_bufobj.bo_clean.bv_hd; 1082 if (!TAILQ_EMPTY(bl)) 1083 endn = TAILQ_LAST(bl, buflists)->b_lblkno; 1084 bl = &vp->v_bufobj.bo_dirty.bv_hd; 1085 if (!TAILQ_EMPTY(bl) && 1086 endn < TAILQ_LAST(bl, buflists)->b_lblkno) 1087 endn = TAILQ_LAST(bl, buflists)->b_lblkno; 1088 if (ap->a_end != OFF_MAX && endn != -1) 1089 endn = ap->a_end / bsize; 1090 BO_RUNLOCK(&vp->v_bufobj); 1091 /* 1092 * In the VMIO case, use the B_NOREUSE flag to hint that the 1093 * pages backing each buffer in the range are unlikely to be 1094 * reused. Dirty buffers will have the hint applied once 1095 * they've been written. 1096 */ 1097 for (bn = startn; bn <= endn; bn++) { 1098 bp = getblk(vp, bn, bsize, 0, 0, GB_NOCREAT | 1099 GB_UNMAPPED); 1100 if (bp == NULL) 1101 continue; 1102 bp->b_flags |= B_RELBUF; 1103 if (vp->v_object != NULL) 1104 bp->b_flags |= B_NOREUSE; 1105 brelse(bp); 1106 } 1107 VOP_UNLOCK(vp, 0); 1108 break; 1109 default: 1110 error = EINVAL; 1111 break; 1112 } 1113 return (error); 1114 } 1115 1116 int 1117 vop_stdunp_bind(struct vop_unp_bind_args *ap) 1118 { 1119 1120 ap->a_vp->v_socket = ap->a_socket; 1121 return (0); 1122 } 1123 1124 int 1125 vop_stdunp_connect(struct vop_unp_connect_args *ap) 1126 { 1127 1128 *ap->a_socket = ap->a_vp->v_socket; 1129 return (0); 1130 } 1131 1132 int 1133 vop_stdunp_detach(struct vop_unp_detach_args *ap) 1134 { 1135 1136 ap->a_vp->v_socket = NULL; 1137 return (0); 1138 } 1139 1140 static int 1141 vop_stdis_text(struct vop_is_text_args *ap) 1142 { 1143 1144 return ((ap->a_vp->v_vflag & VV_TEXT) != 0); 1145 } 1146 1147 static int 1148 vop_stdset_text(struct vop_set_text_args *ap) 1149 { 1150 1151 ap->a_vp->v_vflag |= VV_TEXT; 1152 return (0); 1153 } 1154 1155 static int 1156 vop_stdunset_text(struct vop_unset_text_args *ap) 1157 { 1158 1159 ap->a_vp->v_vflag &= ~VV_TEXT; 1160 return (0); 1161 } 1162 1163 static int 1164 vop_stdget_writecount(struct vop_get_writecount_args *ap) 1165 { 1166 1167 *ap->a_writecount = ap->a_vp->v_writecount; 1168 return (0); 1169 } 1170 1171 static int 1172 vop_stdadd_writecount(struct vop_add_writecount_args *ap) 1173 { 1174 1175 ap->a_vp->v_writecount += ap->a_inc; 1176 return (0); 1177 } 1178 1179 /* 1180 * vfs default ops 1181 * used to fill the vfs function table to get reasonable default return values. 1182 */ 1183 int 1184 vfs_stdroot (mp, flags, vpp) 1185 struct mount *mp; 1186 int flags; 1187 struct vnode **vpp; 1188 { 1189 1190 return (EOPNOTSUPP); 1191 } 1192 1193 int 1194 vfs_stdstatfs (mp, sbp) 1195 struct mount *mp; 1196 struct statfs *sbp; 1197 { 1198 1199 return (EOPNOTSUPP); 1200 } 1201 1202 int 1203 vfs_stdquotactl (mp, cmds, uid, arg) 1204 struct mount *mp; 1205 int cmds; 1206 uid_t uid; 1207 void *arg; 1208 { 1209 1210 return (EOPNOTSUPP); 1211 } 1212 1213 int 1214 vfs_stdsync(mp, waitfor) 1215 struct mount *mp; 1216 int waitfor; 1217 { 1218 struct vnode *vp, *mvp; 1219 struct thread *td; 1220 int error, lockreq, allerror = 0; 1221 1222 td = curthread; 1223 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 1224 if (waitfor != MNT_WAIT) 1225 lockreq |= LK_NOWAIT; 1226 /* 1227 * Force stale buffer cache information to be flushed. 1228 */ 1229 loop: 1230 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1231 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { 1232 VI_UNLOCK(vp); 1233 continue; 1234 } 1235 if ((error = vget(vp, lockreq, td)) != 0) { 1236 if (error == ENOENT) { 1237 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1238 goto loop; 1239 } 1240 continue; 1241 } 1242 error = VOP_FSYNC(vp, waitfor, td); 1243 if (error) 1244 allerror = error; 1245 vput(vp); 1246 } 1247 return (allerror); 1248 } 1249 1250 int 1251 vfs_stdnosync (mp, waitfor) 1252 struct mount *mp; 1253 int waitfor; 1254 { 1255 1256 return (0); 1257 } 1258 1259 int 1260 vfs_stdvget (mp, ino, flags, vpp) 1261 struct mount *mp; 1262 ino_t ino; 1263 int flags; 1264 struct vnode **vpp; 1265 { 1266 1267 return (EOPNOTSUPP); 1268 } 1269 1270 int 1271 vfs_stdfhtovp (mp, fhp, flags, vpp) 1272 struct mount *mp; 1273 struct fid *fhp; 1274 int flags; 1275 struct vnode **vpp; 1276 { 1277 1278 return (EOPNOTSUPP); 1279 } 1280 1281 int 1282 vfs_stdinit (vfsp) 1283 struct vfsconf *vfsp; 1284 { 1285 1286 return (0); 1287 } 1288 1289 int 1290 vfs_stduninit (vfsp) 1291 struct vfsconf *vfsp; 1292 { 1293 1294 return(0); 1295 } 1296 1297 int 1298 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname) 1299 struct mount *mp; 1300 int cmd; 1301 struct vnode *filename_vp; 1302 int attrnamespace; 1303 const char *attrname; 1304 { 1305 1306 if (filename_vp != NULL) 1307 VOP_UNLOCK(filename_vp, 0); 1308 return (EOPNOTSUPP); 1309 } 1310 1311 int 1312 vfs_stdsysctl(mp, op, req) 1313 struct mount *mp; 1314 fsctlop_t op; 1315 struct sysctl_req *req; 1316 { 1317 1318 return (EOPNOTSUPP); 1319 } 1320 1321 /* end of vfs default ops */ 1322