1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/bio.h> 41 #include <sys/buf.h> 42 #include <sys/conf.h> 43 #include <sys/event.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/lockf.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mutex.h> 51 #include <sys/namei.h> 52 #include <sys/fcntl.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/dirent.h> 56 #include <sys/poll.h> 57 58 #include <security/mac/mac_framework.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_object.h> 62 #include <vm/vm_extern.h> 63 #include <vm/pmap.h> 64 #include <vm/vm_map.h> 65 #include <vm/vm_page.h> 66 #include <vm/vm_pager.h> 67 #include <vm/vnode_pager.h> 68 69 static int vop_nolookup(struct vop_lookup_args *); 70 static int vop_norename(struct vop_rename_args *); 71 static int vop_nostrategy(struct vop_strategy_args *); 72 static int get_next_dirent(struct vnode *vp, struct dirent **dpp, 73 char *dirbuf, int dirbuflen, off_t *off, 74 char **cpos, int *len, int *eofflag, 75 struct thread *td); 76 static int dirent_exists(struct vnode *vp, const char *dirname, 77 struct thread *td); 78 79 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) 80 81 /* 82 * This vnode table stores what we want to do if the filesystem doesn't 83 * implement a particular VOP. 84 * 85 * If there is no specific entry here, we will return EOPNOTSUPP. 86 * 87 * Note that every filesystem has to implement either vop_access 88 * or vop_accessx; failing to do so will result in immediate crash 89 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(), 90 * which calls vop_stdaccess() etc. 91 */ 92 93 struct vop_vector default_vnodeops = { 94 .vop_default = NULL, 95 .vop_bypass = VOP_EOPNOTSUPP, 96 97 .vop_access = vop_stdaccess, 98 .vop_accessx = vop_stdaccessx, 99 .vop_advise = vop_stdadvise, 100 .vop_advlock = vop_stdadvlock, 101 .vop_advlockasync = vop_stdadvlockasync, 102 .vop_advlockpurge = vop_stdadvlockpurge, 103 .vop_allocate = vop_stdallocate, 104 .vop_bmap = vop_stdbmap, 105 .vop_close = VOP_NULL, 106 .vop_fsync = VOP_NULL, 107 .vop_getpages = vop_stdgetpages, 108 .vop_getwritemount = vop_stdgetwritemount, 109 .vop_inactive = VOP_NULL, 110 .vop_ioctl = VOP_ENOTTY, 111 .vop_kqfilter = vop_stdkqfilter, 112 .vop_islocked = vop_stdislocked, 113 .vop_lock1 = vop_stdlock, 114 .vop_lookup = vop_nolookup, 115 .vop_open = VOP_NULL, 116 .vop_pathconf = VOP_EINVAL, 117 .vop_poll = vop_nopoll, 118 .vop_putpages = vop_stdputpages, 119 .vop_readlink = VOP_EINVAL, 120 .vop_rename = vop_norename, 121 .vop_revoke = VOP_PANIC, 122 .vop_strategy = vop_nostrategy, 123 .vop_unlock = vop_stdunlock, 124 .vop_vptocnp = vop_stdvptocnp, 125 .vop_vptofh = vop_stdvptofh, 126 }; 127 128 /* 129 * Series of placeholder functions for various error returns for 130 * VOPs. 131 */ 132 133 int 134 vop_eopnotsupp(struct vop_generic_args *ap) 135 { 136 /* 137 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 138 */ 139 140 return (EOPNOTSUPP); 141 } 142 143 int 144 vop_ebadf(struct vop_generic_args *ap) 145 { 146 147 return (EBADF); 148 } 149 150 int 151 vop_enotty(struct vop_generic_args *ap) 152 { 153 154 return (ENOTTY); 155 } 156 157 int 158 vop_einval(struct vop_generic_args *ap) 159 { 160 161 return (EINVAL); 162 } 163 164 int 165 vop_enoent(struct vop_generic_args *ap) 166 { 167 168 return (ENOENT); 169 } 170 171 int 172 vop_null(struct vop_generic_args *ap) 173 { 174 175 return (0); 176 } 177 178 /* 179 * Helper function to panic on some bad VOPs in some filesystems. 180 */ 181 int 182 vop_panic(struct vop_generic_args *ap) 183 { 184 185 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 186 } 187 188 /* 189 * vop_std<something> and vop_no<something> are default functions for use by 190 * filesystems that need the "default reasonable" implementation for a 191 * particular operation. 192 * 193 * The documentation for the operations they implement exists (if it exists) 194 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 195 */ 196 197 /* 198 * Default vop for filesystems that do not support name lookup 199 */ 200 static int 201 vop_nolookup(ap) 202 struct vop_lookup_args /* { 203 struct vnode *a_dvp; 204 struct vnode **a_vpp; 205 struct componentname *a_cnp; 206 } */ *ap; 207 { 208 209 *ap->a_vpp = NULL; 210 return (ENOTDIR); 211 } 212 213 /* 214 * vop_norename: 215 * 216 * Handle unlock and reference counting for arguments of vop_rename 217 * for filesystems that do not implement rename operation. 218 */ 219 static int 220 vop_norename(struct vop_rename_args *ap) 221 { 222 223 vop_rename_fail(ap); 224 return (EOPNOTSUPP); 225 } 226 227 /* 228 * vop_nostrategy: 229 * 230 * Strategy routine for VFS devices that have none. 231 * 232 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 233 * routine. Typically this is done for a BIO_READ strategy call. 234 * Typically B_INVAL is assumed to already be clear prior to a write 235 * and should not be cleared manually unless you just made the buffer 236 * invalid. BIO_ERROR should be cleared either way. 237 */ 238 239 static int 240 vop_nostrategy (struct vop_strategy_args *ap) 241 { 242 printf("No strategy for buffer at %p\n", ap->a_bp); 243 vprint("vnode", ap->a_vp); 244 ap->a_bp->b_ioflags |= BIO_ERROR; 245 ap->a_bp->b_error = EOPNOTSUPP; 246 bufdone(ap->a_bp); 247 return (EOPNOTSUPP); 248 } 249 250 static int 251 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, 252 int dirbuflen, off_t *off, char **cpos, int *len, 253 int *eofflag, struct thread *td) 254 { 255 int error, reclen; 256 struct uio uio; 257 struct iovec iov; 258 struct dirent *dp; 259 260 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 261 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 262 263 if (*len == 0) { 264 iov.iov_base = dirbuf; 265 iov.iov_len = dirbuflen; 266 267 uio.uio_iov = &iov; 268 uio.uio_iovcnt = 1; 269 uio.uio_offset = *off; 270 uio.uio_resid = dirbuflen; 271 uio.uio_segflg = UIO_SYSSPACE; 272 uio.uio_rw = UIO_READ; 273 uio.uio_td = td; 274 275 *eofflag = 0; 276 277 #ifdef MAC 278 error = mac_vnode_check_readdir(td->td_ucred, vp); 279 if (error == 0) 280 #endif 281 error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, 282 NULL, NULL); 283 if (error) 284 return (error); 285 286 *off = uio.uio_offset; 287 288 *cpos = dirbuf; 289 *len = (dirbuflen - uio.uio_resid); 290 291 if (*len == 0) 292 return (ENOENT); 293 } 294 295 dp = (struct dirent *)(*cpos); 296 reclen = dp->d_reclen; 297 *dpp = dp; 298 299 /* check for malformed directory.. */ 300 if (reclen < DIRENT_MINSIZE) 301 return (EINVAL); 302 303 *cpos += reclen; 304 *len -= reclen; 305 306 return (0); 307 } 308 309 /* 310 * Check if a named file exists in a given directory vnode. 311 */ 312 static int 313 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) 314 { 315 char *dirbuf, *cpos; 316 int error, eofflag, dirbuflen, len, found; 317 off_t off; 318 struct dirent *dp; 319 struct vattr va; 320 321 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 322 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 323 324 found = 0; 325 326 error = VOP_GETATTR(vp, &va, td->td_ucred); 327 if (error) 328 return (found); 329 330 dirbuflen = DEV_BSIZE; 331 if (dirbuflen < va.va_blocksize) 332 dirbuflen = va.va_blocksize; 333 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 334 335 off = 0; 336 len = 0; 337 do { 338 error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, 339 &cpos, &len, &eofflag, td); 340 if (error) 341 goto out; 342 343 if ((dp->d_type != DT_WHT) && 344 !strcmp(dp->d_name, dirname)) { 345 found = 1; 346 goto out; 347 } 348 } while (len > 0 || !eofflag); 349 350 out: 351 free(dirbuf, M_TEMP); 352 return (found); 353 } 354 355 int 356 vop_stdaccess(struct vop_access_args *ap) 357 { 358 359 KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | 360 VAPPEND)) == 0, ("invalid bit in accmode")); 361 362 return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td)); 363 } 364 365 int 366 vop_stdaccessx(struct vop_accessx_args *ap) 367 { 368 int error; 369 accmode_t accmode = ap->a_accmode; 370 371 error = vfs_unixify_accmode(&accmode); 372 if (error != 0) 373 return (error); 374 375 if (accmode == 0) 376 return (0); 377 378 return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td)); 379 } 380 381 /* 382 * Advisory record locking support 383 */ 384 int 385 vop_stdadvlock(struct vop_advlock_args *ap) 386 { 387 struct vnode *vp; 388 struct ucred *cred; 389 struct vattr vattr; 390 int error; 391 392 vp = ap->a_vp; 393 cred = curthread->td_ucred; 394 vn_lock(vp, LK_SHARED | LK_RETRY); 395 error = VOP_GETATTR(vp, &vattr, cred); 396 VOP_UNLOCK(vp, 0); 397 if (error) 398 return (error); 399 400 return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); 401 } 402 403 int 404 vop_stdadvlockasync(struct vop_advlockasync_args *ap) 405 { 406 struct vnode *vp; 407 struct ucred *cred; 408 struct vattr vattr; 409 int error; 410 411 vp = ap->a_vp; 412 cred = curthread->td_ucred; 413 vn_lock(vp, LK_SHARED | LK_RETRY); 414 error = VOP_GETATTR(vp, &vattr, cred); 415 VOP_UNLOCK(vp, 0); 416 if (error) 417 return (error); 418 419 return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); 420 } 421 422 int 423 vop_stdadvlockpurge(struct vop_advlockpurge_args *ap) 424 { 425 struct vnode *vp; 426 427 vp = ap->a_vp; 428 lf_purgelocks(vp, &vp->v_lockf); 429 return (0); 430 } 431 432 /* 433 * vop_stdpathconf: 434 * 435 * Standard implementation of POSIX pathconf, to get information about limits 436 * for a filesystem. 437 * Override per filesystem for the case where the filesystem has smaller 438 * limits. 439 */ 440 int 441 vop_stdpathconf(ap) 442 struct vop_pathconf_args /* { 443 struct vnode *a_vp; 444 int a_name; 445 int *a_retval; 446 } */ *ap; 447 { 448 449 switch (ap->a_name) { 450 case _PC_NAME_MAX: 451 *ap->a_retval = NAME_MAX; 452 return (0); 453 case _PC_PATH_MAX: 454 *ap->a_retval = PATH_MAX; 455 return (0); 456 case _PC_LINK_MAX: 457 *ap->a_retval = LINK_MAX; 458 return (0); 459 case _PC_MAX_CANON: 460 *ap->a_retval = MAX_CANON; 461 return (0); 462 case _PC_MAX_INPUT: 463 *ap->a_retval = MAX_INPUT; 464 return (0); 465 case _PC_PIPE_BUF: 466 *ap->a_retval = PIPE_BUF; 467 return (0); 468 case _PC_CHOWN_RESTRICTED: 469 *ap->a_retval = 1; 470 return (0); 471 case _PC_VDISABLE: 472 *ap->a_retval = _POSIX_VDISABLE; 473 return (0); 474 default: 475 return (EINVAL); 476 } 477 /* NOTREACHED */ 478 } 479 480 /* 481 * Standard lock, unlock and islocked functions. 482 */ 483 int 484 vop_stdlock(ap) 485 struct vop_lock1_args /* { 486 struct vnode *a_vp; 487 int a_flags; 488 char *file; 489 int line; 490 } */ *ap; 491 { 492 struct vnode *vp = ap->a_vp; 493 494 return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp), 495 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file, 496 ap->a_line)); 497 } 498 499 /* See above. */ 500 int 501 vop_stdunlock(ap) 502 struct vop_unlock_args /* { 503 struct vnode *a_vp; 504 int a_flags; 505 } */ *ap; 506 { 507 struct vnode *vp = ap->a_vp; 508 509 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp))); 510 } 511 512 /* See above. */ 513 int 514 vop_stdislocked(ap) 515 struct vop_islocked_args /* { 516 struct vnode *a_vp; 517 } */ *ap; 518 { 519 520 return (lockstatus(ap->a_vp->v_vnlock)); 521 } 522 523 /* 524 * Return true for select/poll. 525 */ 526 int 527 vop_nopoll(ap) 528 struct vop_poll_args /* { 529 struct vnode *a_vp; 530 int a_events; 531 struct ucred *a_cred; 532 struct thread *a_td; 533 } */ *ap; 534 { 535 536 return (poll_no_poll(ap->a_events)); 537 } 538 539 /* 540 * Implement poll for local filesystems that support it. 541 */ 542 int 543 vop_stdpoll(ap) 544 struct vop_poll_args /* { 545 struct vnode *a_vp; 546 int a_events; 547 struct ucred *a_cred; 548 struct thread *a_td; 549 } */ *ap; 550 { 551 if (ap->a_events & ~POLLSTANDARD) 552 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 553 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 554 } 555 556 /* 557 * Return our mount point, as we will take charge of the writes. 558 */ 559 int 560 vop_stdgetwritemount(ap) 561 struct vop_getwritemount_args /* { 562 struct vnode *a_vp; 563 struct mount **a_mpp; 564 } */ *ap; 565 { 566 struct mount *mp; 567 568 /* 569 * XXX Since this is called unlocked we may be recycled while 570 * attempting to ref the mount. If this is the case or mountpoint 571 * will be set to NULL. We only have to prevent this call from 572 * returning with a ref to an incorrect mountpoint. It is not 573 * harmful to return with a ref to our previous mountpoint. 574 */ 575 mp = ap->a_vp->v_mount; 576 if (mp != NULL) { 577 vfs_ref(mp); 578 if (mp != ap->a_vp->v_mount) { 579 vfs_rel(mp); 580 mp = NULL; 581 } 582 } 583 *(ap->a_mpp) = mp; 584 return (0); 585 } 586 587 /* XXX Needs good comment and VOP_BMAP(9) manpage */ 588 int 589 vop_stdbmap(ap) 590 struct vop_bmap_args /* { 591 struct vnode *a_vp; 592 daddr_t a_bn; 593 struct bufobj **a_bop; 594 daddr_t *a_bnp; 595 int *a_runp; 596 int *a_runb; 597 } */ *ap; 598 { 599 600 if (ap->a_bop != NULL) 601 *ap->a_bop = &ap->a_vp->v_bufobj; 602 if (ap->a_bnp != NULL) 603 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 604 if (ap->a_runp != NULL) 605 *ap->a_runp = 0; 606 if (ap->a_runb != NULL) 607 *ap->a_runb = 0; 608 return (0); 609 } 610 611 int 612 vop_stdfsync(ap) 613 struct vop_fsync_args /* { 614 struct vnode *a_vp; 615 struct ucred *a_cred; 616 int a_waitfor; 617 struct thread *a_td; 618 } */ *ap; 619 { 620 struct vnode *vp = ap->a_vp; 621 struct buf *bp; 622 struct bufobj *bo; 623 struct buf *nbp; 624 int error = 0; 625 int maxretry = 1000; /* large, arbitrarily chosen */ 626 627 bo = &vp->v_bufobj; 628 BO_LOCK(bo); 629 loop1: 630 /* 631 * MARK/SCAN initialization to avoid infinite loops. 632 */ 633 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { 634 bp->b_vflags &= ~BV_SCANNED; 635 bp->b_error = 0; 636 } 637 638 /* 639 * Flush all dirty buffers associated with a vnode. 640 */ 641 loop2: 642 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 643 if ((bp->b_vflags & BV_SCANNED) != 0) 644 continue; 645 bp->b_vflags |= BV_SCANNED; 646 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 647 continue; 648 BO_UNLOCK(bo); 649 KASSERT(bp->b_bufobj == bo, 650 ("bp %p wrong b_bufobj %p should be %p", 651 bp, bp->b_bufobj, bo)); 652 if ((bp->b_flags & B_DELWRI) == 0) 653 panic("fsync: not dirty"); 654 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { 655 vfs_bio_awrite(bp); 656 } else { 657 bremfree(bp); 658 bawrite(bp); 659 } 660 BO_LOCK(bo); 661 goto loop2; 662 } 663 664 /* 665 * If synchronous the caller expects us to completely resolve all 666 * dirty buffers in the system. Wait for in-progress I/O to 667 * complete (which could include background bitmap writes), then 668 * retry if dirty blocks still exist. 669 */ 670 if (ap->a_waitfor == MNT_WAIT) { 671 bufobj_wwait(bo, 0, 0); 672 if (bo->bo_dirty.bv_cnt > 0) { 673 /* 674 * If we are unable to write any of these buffers 675 * then we fail now rather than trying endlessly 676 * to write them out. 677 */ 678 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 679 if ((error = bp->b_error) == 0) 680 continue; 681 if (error == 0 && --maxretry >= 0) 682 goto loop1; 683 error = EAGAIN; 684 } 685 } 686 BO_UNLOCK(bo); 687 if (error == EAGAIN) 688 vprint("fsync: giving up on dirty", vp); 689 690 return (error); 691 } 692 693 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 694 int 695 vop_stdgetpages(ap) 696 struct vop_getpages_args /* { 697 struct vnode *a_vp; 698 vm_page_t *a_m; 699 int a_count; 700 int a_reqpage; 701 vm_ooffset_t a_offset; 702 } */ *ap; 703 { 704 705 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 706 ap->a_count, ap->a_reqpage); 707 } 708 709 int 710 vop_stdkqfilter(struct vop_kqfilter_args *ap) 711 { 712 return vfs_kqfilter(ap); 713 } 714 715 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 716 int 717 vop_stdputpages(ap) 718 struct vop_putpages_args /* { 719 struct vnode *a_vp; 720 vm_page_t *a_m; 721 int a_count; 722 int a_sync; 723 int *a_rtvals; 724 vm_ooffset_t a_offset; 725 } */ *ap; 726 { 727 728 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 729 ap->a_sync, ap->a_rtvals); 730 } 731 732 int 733 vop_stdvptofh(struct vop_vptofh_args *ap) 734 { 735 return (EOPNOTSUPP); 736 } 737 738 int 739 vop_stdvptocnp(struct vop_vptocnp_args *ap) 740 { 741 struct vnode *vp = ap->a_vp; 742 struct vnode **dvp = ap->a_vpp; 743 struct ucred *cred = ap->a_cred; 744 char *buf = ap->a_buf; 745 int *buflen = ap->a_buflen; 746 char *dirbuf, *cpos; 747 int i, error, eofflag, dirbuflen, flags, locked, len, covered; 748 off_t off; 749 ino_t fileno; 750 struct vattr va; 751 struct nameidata nd; 752 struct thread *td; 753 struct dirent *dp; 754 struct vnode *mvp; 755 756 i = *buflen; 757 error = 0; 758 covered = 0; 759 td = curthread; 760 761 if (vp->v_type != VDIR) 762 return (ENOENT); 763 764 error = VOP_GETATTR(vp, &va, cred); 765 if (error) 766 return (error); 767 768 VREF(vp); 769 locked = VOP_ISLOCKED(vp); 770 VOP_UNLOCK(vp, 0); 771 NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, 772 "..", vp, td); 773 flags = FREAD; 774 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL); 775 if (error) { 776 vn_lock(vp, locked | LK_RETRY); 777 return (error); 778 } 779 NDFREE(&nd, NDF_ONLY_PNBUF); 780 781 mvp = *dvp = nd.ni_vp; 782 783 if (vp->v_mount != (*dvp)->v_mount && 784 ((*dvp)->v_vflag & VV_ROOT) && 785 ((*dvp)->v_mount->mnt_flag & MNT_UNION)) { 786 *dvp = (*dvp)->v_mount->mnt_vnodecovered; 787 VREF(mvp); 788 VOP_UNLOCK(mvp, 0); 789 vn_close(mvp, FREAD, cred, td); 790 VREF(*dvp); 791 vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY); 792 covered = 1; 793 } 794 795 fileno = va.va_fileid; 796 797 dirbuflen = DEV_BSIZE; 798 if (dirbuflen < va.va_blocksize) 799 dirbuflen = va.va_blocksize; 800 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 801 802 if ((*dvp)->v_type != VDIR) { 803 error = ENOENT; 804 goto out; 805 } 806 807 off = 0; 808 len = 0; 809 do { 810 /* call VOP_READDIR of parent */ 811 error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, 812 &cpos, &len, &eofflag, td); 813 if (error) 814 goto out; 815 816 if ((dp->d_type != DT_WHT) && 817 (dp->d_fileno == fileno)) { 818 if (covered) { 819 VOP_UNLOCK(*dvp, 0); 820 vn_lock(mvp, LK_EXCLUSIVE | LK_RETRY); 821 if (dirent_exists(mvp, dp->d_name, td)) { 822 error = ENOENT; 823 VOP_UNLOCK(mvp, 0); 824 vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY); 825 goto out; 826 } 827 VOP_UNLOCK(mvp, 0); 828 vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY); 829 } 830 i -= dp->d_namlen; 831 832 if (i < 0) { 833 error = ENOMEM; 834 goto out; 835 } 836 bcopy(dp->d_name, buf + i, dp->d_namlen); 837 error = 0; 838 goto out; 839 } 840 } while (len > 0 || !eofflag); 841 error = ENOENT; 842 843 out: 844 free(dirbuf, M_TEMP); 845 if (!error) { 846 *buflen = i; 847 vref(*dvp); 848 } 849 if (covered) { 850 vput(*dvp); 851 vrele(mvp); 852 } else { 853 VOP_UNLOCK(mvp, 0); 854 vn_close(mvp, FREAD, cred, td); 855 } 856 vn_lock(vp, locked | LK_RETRY); 857 return (error); 858 } 859 860 int 861 vop_stdallocate(struct vop_allocate_args *ap) 862 { 863 #ifdef __notyet__ 864 struct statfs sfs; 865 #endif 866 struct iovec aiov; 867 struct vattr vattr, *vap; 868 struct uio auio; 869 off_t fsize, len, cur, offset; 870 uint8_t *buf; 871 struct thread *td; 872 struct vnode *vp; 873 size_t iosize; 874 int error; 875 876 buf = NULL; 877 error = 0; 878 td = curthread; 879 vap = &vattr; 880 vp = ap->a_vp; 881 len = *ap->a_len; 882 offset = *ap->a_offset; 883 884 error = VOP_GETATTR(vp, vap, td->td_ucred); 885 if (error != 0) 886 goto out; 887 fsize = vap->va_size; 888 iosize = vap->va_blocksize; 889 if (iosize == 0) 890 iosize = BLKDEV_IOSIZE; 891 if (iosize > MAXPHYS) 892 iosize = MAXPHYS; 893 buf = malloc(iosize, M_TEMP, M_WAITOK); 894 895 #ifdef __notyet__ 896 /* 897 * Check if the filesystem sets f_maxfilesize; if not use 898 * VOP_SETATTR to perform the check. 899 */ 900 error = VFS_STATFS(vp->v_mount, &sfs, td); 901 if (error != 0) 902 goto out; 903 if (sfs.f_maxfilesize) { 904 if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize || 905 offset + len > sfs.f_maxfilesize) { 906 error = EFBIG; 907 goto out; 908 } 909 } else 910 #endif 911 if (offset + len > vap->va_size) { 912 /* 913 * Test offset + len against the filesystem's maxfilesize. 914 */ 915 VATTR_NULL(vap); 916 vap->va_size = offset + len; 917 error = VOP_SETATTR(vp, vap, td->td_ucred); 918 if (error != 0) 919 goto out; 920 VATTR_NULL(vap); 921 vap->va_size = fsize; 922 error = VOP_SETATTR(vp, vap, td->td_ucred); 923 if (error != 0) 924 goto out; 925 } 926 927 for (;;) { 928 /* 929 * Read and write back anything below the nominal file 930 * size. There's currently no way outside the filesystem 931 * to know whether this area is sparse or not. 932 */ 933 cur = iosize; 934 if ((offset % iosize) != 0) 935 cur -= (offset % iosize); 936 if (cur > len) 937 cur = len; 938 if (offset < fsize) { 939 aiov.iov_base = buf; 940 aiov.iov_len = cur; 941 auio.uio_iov = &aiov; 942 auio.uio_iovcnt = 1; 943 auio.uio_offset = offset; 944 auio.uio_resid = cur; 945 auio.uio_segflg = UIO_SYSSPACE; 946 auio.uio_rw = UIO_READ; 947 auio.uio_td = td; 948 error = VOP_READ(vp, &auio, 0, td->td_ucred); 949 if (error != 0) 950 break; 951 if (auio.uio_resid > 0) { 952 bzero(buf + cur - auio.uio_resid, 953 auio.uio_resid); 954 } 955 } else { 956 bzero(buf, cur); 957 } 958 959 aiov.iov_base = buf; 960 aiov.iov_len = cur; 961 auio.uio_iov = &aiov; 962 auio.uio_iovcnt = 1; 963 auio.uio_offset = offset; 964 auio.uio_resid = cur; 965 auio.uio_segflg = UIO_SYSSPACE; 966 auio.uio_rw = UIO_WRITE; 967 auio.uio_td = td; 968 969 error = VOP_WRITE(vp, &auio, 0, td->td_ucred); 970 if (error != 0) 971 break; 972 973 len -= cur; 974 offset += cur; 975 if (len == 0) 976 break; 977 if (should_yield()) 978 break; 979 } 980 981 out: 982 *ap->a_len = len; 983 *ap->a_offset = offset; 984 free(buf, M_TEMP); 985 return (error); 986 } 987 988 int 989 vop_stdadvise(struct vop_advise_args *ap) 990 { 991 struct vnode *vp; 992 off_t start, end; 993 int error, vfslocked; 994 995 vp = ap->a_vp; 996 switch (ap->a_advice) { 997 case POSIX_FADV_WILLNEED: 998 /* 999 * Do nothing for now. Filesystems should provide a 1000 * custom method which starts an asynchronous read of 1001 * the requested region. 1002 */ 1003 error = 0; 1004 break; 1005 case POSIX_FADV_DONTNEED: 1006 /* 1007 * Flush any open FS buffers and then remove pages 1008 * from the backing VM object. Using vinvalbuf() here 1009 * is a bit heavy-handed as it flushes all buffers for 1010 * the given vnode, not just the buffers covering the 1011 * requested range. 1012 */ 1013 error = 0; 1014 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1015 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1016 if (vp->v_iflag & VI_DOOMED) { 1017 VOP_UNLOCK(vp, 0); 1018 VFS_UNLOCK_GIANT(vfslocked); 1019 break; 1020 } 1021 vinvalbuf(vp, V_CLEANONLY, 0, 0); 1022 if (vp->v_object != NULL) { 1023 start = trunc_page(ap->a_start); 1024 end = round_page(ap->a_end); 1025 VM_OBJECT_LOCK(vp->v_object); 1026 vm_object_page_cache(vp->v_object, OFF_TO_IDX(start), 1027 OFF_TO_IDX(end)); 1028 VM_OBJECT_UNLOCK(vp->v_object); 1029 } 1030 VOP_UNLOCK(vp, 0); 1031 VFS_UNLOCK_GIANT(vfslocked); 1032 break; 1033 default: 1034 error = EINVAL; 1035 break; 1036 } 1037 return (error); 1038 } 1039 1040 /* 1041 * vfs default ops 1042 * used to fill the vfs function table to get reasonable default return values. 1043 */ 1044 int 1045 vfs_stdroot (mp, flags, vpp) 1046 struct mount *mp; 1047 int flags; 1048 struct vnode **vpp; 1049 { 1050 1051 return (EOPNOTSUPP); 1052 } 1053 1054 int 1055 vfs_stdstatfs (mp, sbp) 1056 struct mount *mp; 1057 struct statfs *sbp; 1058 { 1059 1060 return (EOPNOTSUPP); 1061 } 1062 1063 int 1064 vfs_stdquotactl (mp, cmds, uid, arg) 1065 struct mount *mp; 1066 int cmds; 1067 uid_t uid; 1068 void *arg; 1069 { 1070 1071 return (EOPNOTSUPP); 1072 } 1073 1074 int 1075 vfs_stdsync(mp, waitfor) 1076 struct mount *mp; 1077 int waitfor; 1078 { 1079 struct vnode *vp, *mvp; 1080 struct thread *td; 1081 int error, lockreq, allerror = 0; 1082 1083 td = curthread; 1084 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 1085 if (waitfor != MNT_WAIT) 1086 lockreq |= LK_NOWAIT; 1087 /* 1088 * Force stale buffer cache information to be flushed. 1089 */ 1090 MNT_ILOCK(mp); 1091 loop: 1092 MNT_VNODE_FOREACH(vp, mp, mvp) { 1093 /* bv_cnt is an acceptable race here. */ 1094 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) 1095 continue; 1096 VI_LOCK(vp); 1097 MNT_IUNLOCK(mp); 1098 if ((error = vget(vp, lockreq, td)) != 0) { 1099 MNT_ILOCK(mp); 1100 if (error == ENOENT) { 1101 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 1102 goto loop; 1103 } 1104 continue; 1105 } 1106 error = VOP_FSYNC(vp, waitfor, td); 1107 if (error) 1108 allerror = error; 1109 vput(vp); 1110 MNT_ILOCK(mp); 1111 } 1112 MNT_IUNLOCK(mp); 1113 return (allerror); 1114 } 1115 1116 int 1117 vfs_stdnosync (mp, waitfor) 1118 struct mount *mp; 1119 int waitfor; 1120 { 1121 1122 return (0); 1123 } 1124 1125 int 1126 vfs_stdvget (mp, ino, flags, vpp) 1127 struct mount *mp; 1128 ino_t ino; 1129 int flags; 1130 struct vnode **vpp; 1131 { 1132 1133 return (EOPNOTSUPP); 1134 } 1135 1136 int 1137 vfs_stdfhtovp (mp, fhp, flags, vpp) 1138 struct mount *mp; 1139 struct fid *fhp; 1140 int flags; 1141 struct vnode **vpp; 1142 { 1143 1144 return (EOPNOTSUPP); 1145 } 1146 1147 int 1148 vfs_stdinit (vfsp) 1149 struct vfsconf *vfsp; 1150 { 1151 1152 return (0); 1153 } 1154 1155 int 1156 vfs_stduninit (vfsp) 1157 struct vfsconf *vfsp; 1158 { 1159 1160 return(0); 1161 } 1162 1163 int 1164 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname) 1165 struct mount *mp; 1166 int cmd; 1167 struct vnode *filename_vp; 1168 int attrnamespace; 1169 const char *attrname; 1170 { 1171 1172 if (filename_vp != NULL) 1173 VOP_UNLOCK(filename_vp, 0); 1174 return (EOPNOTSUPP); 1175 } 1176 1177 int 1178 vfs_stdsysctl(mp, op, req) 1179 struct mount *mp; 1180 fsctlop_t op; 1181 struct sysctl_req *req; 1182 { 1183 1184 return (EOPNOTSUPP); 1185 } 1186 1187 /* end of vfs default ops */ 1188