1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/bio.h> 41 #include <sys/buf.h> 42 #include <sys/conf.h> 43 #include <sys/event.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/lockf.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mutex.h> 51 #include <sys/unistd.h> 52 #include <sys/vnode.h> 53 #include <sys/poll.h> 54 55 #include <vm/vm.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_extern.h> 58 #include <vm/pmap.h> 59 #include <vm/vm_map.h> 60 #include <vm/vm_page.h> 61 #include <vm/vm_pager.h> 62 #include <vm/vnode_pager.h> 63 64 static int vop_nolookup(struct vop_lookup_args *); 65 static int vop_nostrategy(struct vop_strategy_args *); 66 67 /* 68 * This vnode table stores what we want to do if the filesystem doesn't 69 * implement a particular VOP. 70 * 71 * If there is no specific entry here, we will return EOPNOTSUPP. 72 * 73 */ 74 75 struct vop_vector default_vnodeops = { 76 .vop_default = NULL, 77 .vop_bypass = VOP_EOPNOTSUPP, 78 79 .vop_advlock = vop_stdadvlock, 80 .vop_advlockasync = vop_stdadvlockasync, 81 .vop_bmap = vop_stdbmap, 82 .vop_close = VOP_NULL, 83 .vop_fsync = VOP_NULL, 84 .vop_getpages = vop_stdgetpages, 85 .vop_getwritemount = vop_stdgetwritemount, 86 .vop_inactive = VOP_NULL, 87 .vop_ioctl = VOP_ENOTTY, 88 .vop_kqfilter = vop_stdkqfilter, 89 .vop_islocked = vop_stdislocked, 90 .vop_lease = VOP_NULL, 91 .vop_lock1 = vop_stdlock, 92 .vop_lookup = vop_nolookup, 93 .vop_open = VOP_NULL, 94 .vop_pathconf = VOP_EINVAL, 95 .vop_poll = vop_nopoll, 96 .vop_putpages = vop_stdputpages, 97 .vop_readlink = VOP_EINVAL, 98 .vop_revoke = VOP_PANIC, 99 .vop_strategy = vop_nostrategy, 100 .vop_unlock = vop_stdunlock, 101 .vop_vptofh = vop_stdvptofh, 102 }; 103 104 /* 105 * Series of placeholder functions for various error returns for 106 * VOPs. 107 */ 108 109 int 110 vop_eopnotsupp(struct vop_generic_args *ap) 111 { 112 /* 113 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 114 */ 115 116 return (EOPNOTSUPP); 117 } 118 119 int 120 vop_ebadf(struct vop_generic_args *ap) 121 { 122 123 return (EBADF); 124 } 125 126 int 127 vop_enotty(struct vop_generic_args *ap) 128 { 129 130 return (ENOTTY); 131 } 132 133 int 134 vop_einval(struct vop_generic_args *ap) 135 { 136 137 return (EINVAL); 138 } 139 140 int 141 vop_null(struct vop_generic_args *ap) 142 { 143 144 return (0); 145 } 146 147 /* 148 * Helper function to panic on some bad VOPs in some filesystems. 149 */ 150 int 151 vop_panic(struct vop_generic_args *ap) 152 { 153 154 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 155 } 156 157 /* 158 * vop_std<something> and vop_no<something> are default functions for use by 159 * filesystems that need the "default reasonable" implementation for a 160 * particular operation. 161 * 162 * The documentation for the operations they implement exists (if it exists) 163 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 164 */ 165 166 /* 167 * Default vop for filesystems that do not support name lookup 168 */ 169 static int 170 vop_nolookup(ap) 171 struct vop_lookup_args /* { 172 struct vnode *a_dvp; 173 struct vnode **a_vpp; 174 struct componentname *a_cnp; 175 } */ *ap; 176 { 177 178 *ap->a_vpp = NULL; 179 return (ENOTDIR); 180 } 181 182 /* 183 * vop_nostrategy: 184 * 185 * Strategy routine for VFS devices that have none. 186 * 187 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 188 * routine. Typically this is done for a BIO_READ strategy call. 189 * Typically B_INVAL is assumed to already be clear prior to a write 190 * and should not be cleared manually unless you just made the buffer 191 * invalid. BIO_ERROR should be cleared either way. 192 */ 193 194 static int 195 vop_nostrategy (struct vop_strategy_args *ap) 196 { 197 printf("No strategy for buffer at %p\n", ap->a_bp); 198 vprint("vnode", ap->a_vp); 199 ap->a_bp->b_ioflags |= BIO_ERROR; 200 ap->a_bp->b_error = EOPNOTSUPP; 201 bufdone(ap->a_bp); 202 return (EOPNOTSUPP); 203 } 204 205 /* 206 * Advisory record locking support 207 */ 208 int 209 vop_stdadvlock(struct vop_advlock_args *ap) 210 { 211 struct vnode *vp = ap->a_vp; 212 struct thread *td = curthread; 213 struct vattr vattr; 214 int error; 215 216 vn_lock(vp, LK_SHARED | LK_RETRY); 217 error = VOP_GETATTR(vp, &vattr, td->td_ucred, td); 218 VOP_UNLOCK(vp, 0); 219 if (error) 220 return (error); 221 222 return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); 223 } 224 225 int 226 vop_stdadvlockasync(struct vop_advlockasync_args *ap) 227 { 228 struct vnode *vp = ap->a_vp; 229 struct thread *td = curthread; 230 struct vattr vattr; 231 int error; 232 233 vn_lock(vp, LK_SHARED | LK_RETRY); 234 error = VOP_GETATTR(vp, &vattr, td->td_ucred, td); 235 VOP_UNLOCK(vp, 0); 236 if (error) 237 return (error); 238 239 return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); 240 } 241 242 /* 243 * vop_stdpathconf: 244 * 245 * Standard implementation of POSIX pathconf, to get information about limits 246 * for a filesystem. 247 * Override per filesystem for the case where the filesystem has smaller 248 * limits. 249 */ 250 int 251 vop_stdpathconf(ap) 252 struct vop_pathconf_args /* { 253 struct vnode *a_vp; 254 int a_name; 255 int *a_retval; 256 } */ *ap; 257 { 258 259 switch (ap->a_name) { 260 case _PC_NAME_MAX: 261 *ap->a_retval = NAME_MAX; 262 return (0); 263 case _PC_PATH_MAX: 264 *ap->a_retval = PATH_MAX; 265 return (0); 266 case _PC_LINK_MAX: 267 *ap->a_retval = LINK_MAX; 268 return (0); 269 case _PC_MAX_CANON: 270 *ap->a_retval = MAX_CANON; 271 return (0); 272 case _PC_MAX_INPUT: 273 *ap->a_retval = MAX_INPUT; 274 return (0); 275 case _PC_PIPE_BUF: 276 *ap->a_retval = PIPE_BUF; 277 return (0); 278 case _PC_CHOWN_RESTRICTED: 279 *ap->a_retval = 1; 280 return (0); 281 case _PC_VDISABLE: 282 *ap->a_retval = _POSIX_VDISABLE; 283 return (0); 284 default: 285 return (EINVAL); 286 } 287 /* NOTREACHED */ 288 } 289 290 /* 291 * Standard lock, unlock and islocked functions. 292 */ 293 int 294 vop_stdlock(ap) 295 struct vop_lock1_args /* { 296 struct vnode *a_vp; 297 int a_flags; 298 char *file; 299 int line; 300 } */ *ap; 301 { 302 struct vnode *vp = ap->a_vp; 303 304 return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp), 305 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file, 306 ap->a_line)); 307 } 308 309 /* See above. */ 310 int 311 vop_stdunlock(ap) 312 struct vop_unlock_args /* { 313 struct vnode *a_vp; 314 int a_flags; 315 } */ *ap; 316 { 317 struct vnode *vp = ap->a_vp; 318 319 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp))); 320 } 321 322 /* See above. */ 323 int 324 vop_stdislocked(ap) 325 struct vop_islocked_args /* { 326 struct vnode *a_vp; 327 } */ *ap; 328 { 329 330 return (lockstatus(ap->a_vp->v_vnlock)); 331 } 332 333 /* 334 * Return true for select/poll. 335 */ 336 int 337 vop_nopoll(ap) 338 struct vop_poll_args /* { 339 struct vnode *a_vp; 340 int a_events; 341 struct ucred *a_cred; 342 struct thread *a_td; 343 } */ *ap; 344 { 345 /* 346 * Return true for read/write. If the user asked for something 347 * special, return POLLNVAL, so that clients have a way of 348 * determining reliably whether or not the extended 349 * functionality is present without hard-coding knowledge 350 * of specific filesystem implementations. 351 * Stay in sync with kern_conf.c::no_poll(). 352 */ 353 if (ap->a_events & ~POLLSTANDARD) 354 return (POLLNVAL); 355 356 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 357 } 358 359 /* 360 * Implement poll for local filesystems that support it. 361 */ 362 int 363 vop_stdpoll(ap) 364 struct vop_poll_args /* { 365 struct vnode *a_vp; 366 int a_events; 367 struct ucred *a_cred; 368 struct thread *a_td; 369 } */ *ap; 370 { 371 if (ap->a_events & ~POLLSTANDARD) 372 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 373 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 374 } 375 376 /* 377 * Return our mount point, as we will take charge of the writes. 378 */ 379 int 380 vop_stdgetwritemount(ap) 381 struct vop_getwritemount_args /* { 382 struct vnode *a_vp; 383 struct mount **a_mpp; 384 } */ *ap; 385 { 386 struct mount *mp; 387 388 /* 389 * XXX Since this is called unlocked we may be recycled while 390 * attempting to ref the mount. If this is the case or mountpoint 391 * will be set to NULL. We only have to prevent this call from 392 * returning with a ref to an incorrect mountpoint. It is not 393 * harmful to return with a ref to our previous mountpoint. 394 */ 395 mp = ap->a_vp->v_mount; 396 if (mp != NULL) { 397 vfs_ref(mp); 398 if (mp != ap->a_vp->v_mount) { 399 vfs_rel(mp); 400 mp = NULL; 401 } 402 } 403 *(ap->a_mpp) = mp; 404 return (0); 405 } 406 407 /* XXX Needs good comment and VOP_BMAP(9) manpage */ 408 int 409 vop_stdbmap(ap) 410 struct vop_bmap_args /* { 411 struct vnode *a_vp; 412 daddr_t a_bn; 413 struct bufobj **a_bop; 414 daddr_t *a_bnp; 415 int *a_runp; 416 int *a_runb; 417 } */ *ap; 418 { 419 420 if (ap->a_bop != NULL) 421 *ap->a_bop = &ap->a_vp->v_bufobj; 422 if (ap->a_bnp != NULL) 423 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 424 if (ap->a_runp != NULL) 425 *ap->a_runp = 0; 426 if (ap->a_runb != NULL) 427 *ap->a_runb = 0; 428 return (0); 429 } 430 431 int 432 vop_stdfsync(ap) 433 struct vop_fsync_args /* { 434 struct vnode *a_vp; 435 struct ucred *a_cred; 436 int a_waitfor; 437 struct thread *a_td; 438 } */ *ap; 439 { 440 struct vnode *vp = ap->a_vp; 441 struct buf *bp; 442 struct bufobj *bo; 443 struct buf *nbp; 444 int error = 0; 445 int maxretry = 1000; /* large, arbitrarily chosen */ 446 447 bo = &vp->v_bufobj; 448 BO_LOCK(bo); 449 loop1: 450 /* 451 * MARK/SCAN initialization to avoid infinite loops. 452 */ 453 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { 454 bp->b_vflags &= ~BV_SCANNED; 455 bp->b_error = 0; 456 } 457 458 /* 459 * Flush all dirty buffers associated with a vnode. 460 */ 461 loop2: 462 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 463 if ((bp->b_vflags & BV_SCANNED) != 0) 464 continue; 465 bp->b_vflags |= BV_SCANNED; 466 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 467 continue; 468 BO_UNLOCK(bo); 469 KASSERT(bp->b_bufobj == bo, 470 ("bp %p wrong b_bufobj %p should be %p", 471 bp, bp->b_bufobj, bo)); 472 if ((bp->b_flags & B_DELWRI) == 0) 473 panic("fsync: not dirty"); 474 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { 475 vfs_bio_awrite(bp); 476 } else { 477 bremfree(bp); 478 bawrite(bp); 479 } 480 BO_LOCK(bo); 481 goto loop2; 482 } 483 484 /* 485 * If synchronous the caller expects us to completely resolve all 486 * dirty buffers in the system. Wait for in-progress I/O to 487 * complete (which could include background bitmap writes), then 488 * retry if dirty blocks still exist. 489 */ 490 if (ap->a_waitfor == MNT_WAIT) { 491 bufobj_wwait(bo, 0, 0); 492 if (bo->bo_dirty.bv_cnt > 0) { 493 /* 494 * If we are unable to write any of these buffers 495 * then we fail now rather than trying endlessly 496 * to write them out. 497 */ 498 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 499 if ((error = bp->b_error) == 0) 500 continue; 501 if (error == 0 && --maxretry >= 0) 502 goto loop1; 503 error = EAGAIN; 504 } 505 } 506 BO_UNLOCK(bo); 507 if (error == EAGAIN) 508 vprint("fsync: giving up on dirty", vp); 509 510 return (error); 511 } 512 513 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 514 int 515 vop_stdgetpages(ap) 516 struct vop_getpages_args /* { 517 struct vnode *a_vp; 518 vm_page_t *a_m; 519 int a_count; 520 int a_reqpage; 521 vm_ooffset_t a_offset; 522 } */ *ap; 523 { 524 525 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 526 ap->a_count, ap->a_reqpage); 527 } 528 529 int 530 vop_stdkqfilter(struct vop_kqfilter_args *ap) 531 { 532 return vfs_kqfilter(ap); 533 } 534 535 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 536 int 537 vop_stdputpages(ap) 538 struct vop_putpages_args /* { 539 struct vnode *a_vp; 540 vm_page_t *a_m; 541 int a_count; 542 int a_sync; 543 int *a_rtvals; 544 vm_ooffset_t a_offset; 545 } */ *ap; 546 { 547 548 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 549 ap->a_sync, ap->a_rtvals); 550 } 551 552 int 553 vop_stdvptofh(struct vop_vptofh_args *ap) 554 { 555 return (EOPNOTSUPP); 556 } 557 558 /* 559 * vfs default ops 560 * used to fill the vfs function table to get reasonable default return values. 561 */ 562 int 563 vfs_stdroot (mp, flags, vpp, td) 564 struct mount *mp; 565 int flags; 566 struct vnode **vpp; 567 struct thread *td; 568 { 569 570 return (EOPNOTSUPP); 571 } 572 573 int 574 vfs_stdstatfs (mp, sbp, td) 575 struct mount *mp; 576 struct statfs *sbp; 577 struct thread *td; 578 { 579 580 return (EOPNOTSUPP); 581 } 582 583 int 584 vfs_stdquotactl (mp, cmds, uid, arg, td) 585 struct mount *mp; 586 int cmds; 587 uid_t uid; 588 void *arg; 589 struct thread *td; 590 { 591 592 return (EOPNOTSUPP); 593 } 594 595 int 596 vfs_stdsync(mp, waitfor, td) 597 struct mount *mp; 598 int waitfor; 599 struct thread *td; 600 { 601 struct vnode *vp, *mvp; 602 int error, lockreq, allerror = 0; 603 604 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 605 if (waitfor != MNT_WAIT) 606 lockreq |= LK_NOWAIT; 607 /* 608 * Force stale buffer cache information to be flushed. 609 */ 610 MNT_ILOCK(mp); 611 loop: 612 MNT_VNODE_FOREACH(vp, mp, mvp) { 613 /* bv_cnt is an acceptable race here. */ 614 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) 615 continue; 616 VI_LOCK(vp); 617 MNT_IUNLOCK(mp); 618 if ((error = vget(vp, lockreq, td)) != 0) { 619 MNT_ILOCK(mp); 620 if (error == ENOENT) { 621 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 622 goto loop; 623 } 624 continue; 625 } 626 error = VOP_FSYNC(vp, waitfor, td); 627 if (error) 628 allerror = error; 629 630 /* Do not turn this into vput. td is not always curthread. */ 631 VOP_UNLOCK(vp, 0); 632 vrele(vp); 633 MNT_ILOCK(mp); 634 } 635 MNT_IUNLOCK(mp); 636 return (allerror); 637 } 638 639 int 640 vfs_stdnosync (mp, waitfor, td) 641 struct mount *mp; 642 int waitfor; 643 struct thread *td; 644 { 645 646 return (0); 647 } 648 649 int 650 vfs_stdvget (mp, ino, flags, vpp) 651 struct mount *mp; 652 ino_t ino; 653 int flags; 654 struct vnode **vpp; 655 { 656 657 return (EOPNOTSUPP); 658 } 659 660 int 661 vfs_stdfhtovp (mp, fhp, vpp) 662 struct mount *mp; 663 struct fid *fhp; 664 struct vnode **vpp; 665 { 666 667 return (EOPNOTSUPP); 668 } 669 670 int 671 vfs_stdinit (vfsp) 672 struct vfsconf *vfsp; 673 { 674 675 return (0); 676 } 677 678 int 679 vfs_stduninit (vfsp) 680 struct vfsconf *vfsp; 681 { 682 683 return(0); 684 } 685 686 int 687 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td) 688 struct mount *mp; 689 int cmd; 690 struct vnode *filename_vp; 691 int attrnamespace; 692 const char *attrname; 693 struct thread *td; 694 { 695 696 if (filename_vp != NULL) 697 VOP_UNLOCK(filename_vp, 0); 698 return (EOPNOTSUPP); 699 } 700 701 int 702 vfs_stdsysctl(mp, op, req) 703 struct mount *mp; 704 fsctlop_t op; 705 struct sysctl_req *req; 706 { 707 708 return (EOPNOTSUPP); 709 } 710 711 /* end of vfs default ops */ 712