1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/bio.h> 41 #include <sys/buf.h> 42 #include <sys/conf.h> 43 #include <sys/event.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/mutex.h> 50 #include <sys/unistd.h> 51 #include <sys/vnode.h> 52 #include <sys/poll.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_extern.h> 57 #include <vm/pmap.h> 58 #include <vm/vm_map.h> 59 #include <vm/vm_page.h> 60 #include <vm/vm_pager.h> 61 #include <vm/vnode_pager.h> 62 63 static int vop_nolookup(struct vop_lookup_args *); 64 static int vop_nostrategy(struct vop_strategy_args *); 65 66 /* 67 * This vnode table stores what we want to do if the filesystem doesn't 68 * implement a particular VOP. 69 * 70 * If there is no specific entry here, we will return EOPNOTSUPP. 71 * 72 */ 73 74 struct vop_vector default_vnodeops = { 75 .vop_default = NULL, 76 .vop_bypass = VOP_EOPNOTSUPP, 77 78 .vop_advlock = VOP_EINVAL, 79 .vop_bmap = vop_stdbmap, 80 .vop_close = VOP_NULL, 81 .vop_fsync = VOP_NULL, 82 .vop_getpages = vop_stdgetpages, 83 .vop_getwritemount = vop_stdgetwritemount, 84 .vop_inactive = VOP_NULL, 85 .vop_ioctl = VOP_ENOTTY, 86 .vop_kqfilter = vop_stdkqfilter, 87 .vop_islocked = vop_stdislocked, 88 .vop_lease = VOP_NULL, 89 .vop_lock = vop_stdlock, 90 .vop_lookup = vop_nolookup, 91 .vop_open = VOP_NULL, 92 .vop_pathconf = VOP_EINVAL, 93 .vop_poll = vop_nopoll, 94 .vop_putpages = vop_stdputpages, 95 .vop_readlink = VOP_EINVAL, 96 .vop_revoke = VOP_PANIC, 97 .vop_strategy = vop_nostrategy, 98 .vop_unlock = vop_stdunlock, 99 }; 100 101 /* 102 * Series of placeholder functions for various error returns for 103 * VOPs. 104 */ 105 106 int 107 vop_eopnotsupp(struct vop_generic_args *ap) 108 { 109 /* 110 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 111 */ 112 113 return (EOPNOTSUPP); 114 } 115 116 int 117 vop_ebadf(struct vop_generic_args *ap) 118 { 119 120 return (EBADF); 121 } 122 123 int 124 vop_enotty(struct vop_generic_args *ap) 125 { 126 127 return (ENOTTY); 128 } 129 130 int 131 vop_einval(struct vop_generic_args *ap) 132 { 133 134 return (EINVAL); 135 } 136 137 int 138 vop_null(struct vop_generic_args *ap) 139 { 140 141 return (0); 142 } 143 144 /* 145 * Helper function to panic on some bad VOPs in some filesystems. 146 */ 147 int 148 vop_panic(struct vop_generic_args *ap) 149 { 150 151 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 152 } 153 154 /* 155 * vop_std<something> and vop_no<something> are default functions for use by 156 * filesystems that need the "default reasonable" implementation for a 157 * particular operation. 158 * 159 * The documentation for the operations they implement exists (if it exists) 160 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 161 */ 162 163 /* 164 * Default vop for filesystems that do not support name lookup 165 */ 166 static int 167 vop_nolookup(ap) 168 struct vop_lookup_args /* { 169 struct vnode *a_dvp; 170 struct vnode **a_vpp; 171 struct componentname *a_cnp; 172 } */ *ap; 173 { 174 175 *ap->a_vpp = NULL; 176 return (ENOTDIR); 177 } 178 179 /* 180 * vop_nostrategy: 181 * 182 * Strategy routine for VFS devices that have none. 183 * 184 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 185 * routine. Typically this is done for a BIO_READ strategy call. 186 * Typically B_INVAL is assumed to already be clear prior to a write 187 * and should not be cleared manually unless you just made the buffer 188 * invalid. BIO_ERROR should be cleared either way. 189 */ 190 191 static int 192 vop_nostrategy (struct vop_strategy_args *ap) 193 { 194 printf("No strategy for buffer at %p\n", ap->a_bp); 195 vprint("vnode", ap->a_vp); 196 ap->a_bp->b_ioflags |= BIO_ERROR; 197 ap->a_bp->b_error = EOPNOTSUPP; 198 bufdone(ap->a_bp); 199 return (EOPNOTSUPP); 200 } 201 202 /* 203 * vop_stdpathconf: 204 * 205 * Standard implementation of POSIX pathconf, to get information about limits 206 * for a filesystem. 207 * Override per filesystem for the case where the filesystem has smaller 208 * limits. 209 */ 210 int 211 vop_stdpathconf(ap) 212 struct vop_pathconf_args /* { 213 struct vnode *a_vp; 214 int a_name; 215 int *a_retval; 216 } */ *ap; 217 { 218 219 switch (ap->a_name) { 220 case _PC_NAME_MAX: 221 *ap->a_retval = NAME_MAX; 222 return (0); 223 case _PC_PATH_MAX: 224 *ap->a_retval = PATH_MAX; 225 return (0); 226 case _PC_LINK_MAX: 227 *ap->a_retval = LINK_MAX; 228 return (0); 229 case _PC_MAX_CANON: 230 *ap->a_retval = MAX_CANON; 231 return (0); 232 case _PC_MAX_INPUT: 233 *ap->a_retval = MAX_INPUT; 234 return (0); 235 case _PC_PIPE_BUF: 236 *ap->a_retval = PIPE_BUF; 237 return (0); 238 case _PC_CHOWN_RESTRICTED: 239 *ap->a_retval = 1; 240 return (0); 241 case _PC_VDISABLE: 242 *ap->a_retval = _POSIX_VDISABLE; 243 return (0); 244 default: 245 return (EINVAL); 246 } 247 /* NOTREACHED */ 248 } 249 250 /* 251 * Standard lock, unlock and islocked functions. 252 */ 253 int 254 vop_stdlock(ap) 255 struct vop_lock_args /* { 256 struct vnode *a_vp; 257 int a_flags; 258 struct thread *a_td; 259 } */ *ap; 260 { 261 struct vnode *vp = ap->a_vp; 262 263 return (lockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp), ap->a_td)); 264 } 265 266 /* See above. */ 267 int 268 vop_stdunlock(ap) 269 struct vop_unlock_args /* { 270 struct vnode *a_vp; 271 int a_flags; 272 struct thread *a_td; 273 } */ *ap; 274 { 275 struct vnode *vp = ap->a_vp; 276 277 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp), 278 ap->a_td)); 279 } 280 281 /* See above. */ 282 int 283 vop_stdislocked(ap) 284 struct vop_islocked_args /* { 285 struct vnode *a_vp; 286 struct thread *a_td; 287 } */ *ap; 288 { 289 290 return (lockstatus(ap->a_vp->v_vnlock, ap->a_td)); 291 } 292 293 /* 294 * Return true for select/poll. 295 */ 296 int 297 vop_nopoll(ap) 298 struct vop_poll_args /* { 299 struct vnode *a_vp; 300 int a_events; 301 struct ucred *a_cred; 302 struct thread *a_td; 303 } */ *ap; 304 { 305 /* 306 * Return true for read/write. If the user asked for something 307 * special, return POLLNVAL, so that clients have a way of 308 * determining reliably whether or not the extended 309 * functionality is present without hard-coding knowledge 310 * of specific filesystem implementations. 311 * Stay in sync with kern_conf.c::no_poll(). 312 */ 313 if (ap->a_events & ~POLLSTANDARD) 314 return (POLLNVAL); 315 316 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 317 } 318 319 /* 320 * Implement poll for local filesystems that support it. 321 */ 322 int 323 vop_stdpoll(ap) 324 struct vop_poll_args /* { 325 struct vnode *a_vp; 326 int a_events; 327 struct ucred *a_cred; 328 struct thread *a_td; 329 } */ *ap; 330 { 331 if (ap->a_events & ~POLLSTANDARD) 332 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 333 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 334 } 335 336 /* 337 * Return our mount point, as we will take charge of the writes. 338 */ 339 int 340 vop_stdgetwritemount(ap) 341 struct vop_getwritemount_args /* { 342 struct vnode *a_vp; 343 struct mount **a_mpp; 344 } */ *ap; 345 { 346 347 *(ap->a_mpp) = ap->a_vp->v_mount; 348 return (0); 349 } 350 351 /* XXX Needs good comment and VOP_BMAP(9) manpage */ 352 int 353 vop_stdbmap(ap) 354 struct vop_bmap_args /* { 355 struct vnode *a_vp; 356 daddr_t a_bn; 357 struct bufobj **a_bop; 358 daddr_t *a_bnp; 359 int *a_runp; 360 int *a_runb; 361 } */ *ap; 362 { 363 364 if (ap->a_bop != NULL) 365 *ap->a_bop = &ap->a_vp->v_bufobj; 366 if (ap->a_bnp != NULL) 367 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 368 if (ap->a_runp != NULL) 369 *ap->a_runp = 0; 370 if (ap->a_runb != NULL) 371 *ap->a_runb = 0; 372 return (0); 373 } 374 375 int 376 vop_stdfsync(ap) 377 struct vop_fsync_args /* { 378 struct vnode *a_vp; 379 struct ucred *a_cred; 380 int a_waitfor; 381 struct thread *a_td; 382 } */ *ap; 383 { 384 struct vnode *vp = ap->a_vp; 385 struct buf *bp; 386 struct bufobj *bo; 387 struct buf *nbp; 388 int error = 0; 389 int maxretry = 1000; /* large, arbitrarily chosen */ 390 391 VI_LOCK(vp); 392 loop1: 393 /* 394 * MARK/SCAN initialization to avoid infinite loops. 395 */ 396 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) { 397 bp->b_vflags &= ~BV_SCANNED; 398 bp->b_error = 0; 399 } 400 401 /* 402 * Flush all dirty buffers associated with a vnode. 403 */ 404 loop2: 405 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { 406 if ((bp->b_vflags & BV_SCANNED) != 0) 407 continue; 408 bp->b_vflags |= BV_SCANNED; 409 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 410 continue; 411 VI_UNLOCK(vp); 412 KASSERT(bp->b_bufobj == &vp->v_bufobj, 413 ("bp %p wrong b_bufobj %p should be %p", 414 bp, bp->b_bufobj, &vp->v_bufobj)); 415 if ((bp->b_flags & B_DELWRI) == 0) 416 panic("fsync: not dirty"); 417 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { 418 vfs_bio_awrite(bp); 419 } else { 420 bremfree(bp); 421 bawrite(bp); 422 } 423 VI_LOCK(vp); 424 goto loop2; 425 } 426 427 /* 428 * If synchronous the caller expects us to completely resolve all 429 * dirty buffers in the system. Wait for in-progress I/O to 430 * complete (which could include background bitmap writes), then 431 * retry if dirty blocks still exist. 432 */ 433 if (ap->a_waitfor == MNT_WAIT) { 434 bo = &vp->v_bufobj; 435 bufobj_wwait(bo, 0, 0); 436 if (bo->bo_dirty.bv_cnt > 0) { 437 /* 438 * If we are unable to write any of these buffers 439 * then we fail now rather than trying endlessly 440 * to write them out. 441 */ 442 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 443 if ((error = bp->b_error) == 0) 444 continue; 445 if (error == 0 && --maxretry >= 0) 446 goto loop1; 447 error = EAGAIN; 448 } 449 } 450 VI_UNLOCK(vp); 451 if (error == EAGAIN) 452 vprint("fsync: giving up on dirty", vp); 453 454 return (error); 455 } 456 457 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 458 int 459 vop_stdgetpages(ap) 460 struct vop_getpages_args /* { 461 struct vnode *a_vp; 462 vm_page_t *a_m; 463 int a_count; 464 int a_reqpage; 465 vm_ooffset_t a_offset; 466 } */ *ap; 467 { 468 469 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 470 ap->a_count, ap->a_reqpage); 471 } 472 473 int 474 vop_stdkqfilter(struct vop_kqfilter_args *ap) 475 { 476 return vfs_kqfilter(ap); 477 } 478 479 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 480 int 481 vop_stdputpages(ap) 482 struct vop_putpages_args /* { 483 struct vnode *a_vp; 484 vm_page_t *a_m; 485 int a_count; 486 int a_sync; 487 int *a_rtvals; 488 vm_ooffset_t a_offset; 489 } */ *ap; 490 { 491 492 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 493 ap->a_sync, ap->a_rtvals); 494 } 495 496 /* 497 * vfs default ops 498 * used to fill the vfs function table to get reasonable default return values. 499 */ 500 int 501 vfs_stdroot (mp, flags, vpp, td) 502 struct mount *mp; 503 int flags; 504 struct vnode **vpp; 505 struct thread *td; 506 { 507 508 return (EOPNOTSUPP); 509 } 510 511 int 512 vfs_stdstatfs (mp, sbp, td) 513 struct mount *mp; 514 struct statfs *sbp; 515 struct thread *td; 516 { 517 518 return (EOPNOTSUPP); 519 } 520 521 int 522 vfs_stdvptofh (vp, fhp) 523 struct vnode *vp; 524 struct fid *fhp; 525 { 526 527 return (EOPNOTSUPP); 528 } 529 530 int 531 vfs_stdquotactl (mp, cmds, uid, arg, td) 532 struct mount *mp; 533 int cmds; 534 uid_t uid; 535 void *arg; 536 struct thread *td; 537 { 538 539 return (EOPNOTSUPP); 540 } 541 542 int 543 vfs_stdsync(mp, waitfor, td) 544 struct mount *mp; 545 int waitfor; 546 struct thread *td; 547 { 548 struct vnode *vp, *mvp; 549 int error, lockreq, allerror = 0; 550 551 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 552 if (waitfor != MNT_WAIT) 553 lockreq |= LK_NOWAIT; 554 /* 555 * Force stale buffer cache information to be flushed. 556 */ 557 MNT_ILOCK(mp); 558 loop: 559 MNT_VNODE_FOREACH(vp, mp, mvp) { 560 561 VI_LOCK(vp); 562 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { 563 VI_UNLOCK(vp); 564 continue; 565 } 566 MNT_IUNLOCK(mp); 567 568 if ((error = vget(vp, lockreq, td)) != 0) { 569 MNT_ILOCK(mp); 570 if (error == ENOENT) { 571 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 572 goto loop; 573 } 574 continue; 575 } 576 error = VOP_FSYNC(vp, waitfor, td); 577 if (error) 578 allerror = error; 579 580 /* Do not turn this into vput. td is not always curthread. */ 581 VOP_UNLOCK(vp, 0, td); 582 vrele(vp); 583 MNT_ILOCK(mp); 584 } 585 MNT_IUNLOCK(mp); 586 return (allerror); 587 } 588 589 int 590 vfs_stdnosync (mp, waitfor, td) 591 struct mount *mp; 592 int waitfor; 593 struct thread *td; 594 { 595 596 return (0); 597 } 598 599 int 600 vfs_stdvget (mp, ino, flags, vpp) 601 struct mount *mp; 602 ino_t ino; 603 int flags; 604 struct vnode **vpp; 605 { 606 607 return (EOPNOTSUPP); 608 } 609 610 int 611 vfs_stdfhtovp (mp, fhp, vpp) 612 struct mount *mp; 613 struct fid *fhp; 614 struct vnode **vpp; 615 { 616 617 return (EOPNOTSUPP); 618 } 619 620 int 621 vfs_stdinit (vfsp) 622 struct vfsconf *vfsp; 623 { 624 625 return (0); 626 } 627 628 int 629 vfs_stduninit (vfsp) 630 struct vfsconf *vfsp; 631 { 632 633 return(0); 634 } 635 636 int 637 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td) 638 struct mount *mp; 639 int cmd; 640 struct vnode *filename_vp; 641 int attrnamespace; 642 const char *attrname; 643 struct thread *td; 644 { 645 646 if (filename_vp != NULL) 647 VOP_UNLOCK(filename_vp, 0, td); 648 return (EOPNOTSUPP); 649 } 650 651 int 652 vfs_stdsysctl(mp, op, req) 653 struct mount *mp; 654 fsctlop_t op; 655 struct sysctl_req *req; 656 { 657 658 return (EOPNOTSUPP); 659 } 660 661 /* end of vfs default ops */ 662