1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id: vfs_subr.c,v 1.180 1999/01/05 18:49:53 eivind Exp $ 40 */ 41 42 /* 43 * External virtual filesystem routines 44 */ 45 #include "opt_ddb.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/conf.h> 50 #include <sys/fcntl.h> 51 #include <sys/kernel.h> 52 #include <sys/proc.h> 53 #include <sys/malloc.h> 54 #include <sys/mount.h> 55 #include <sys/socket.h> 56 #include <sys/vnode.h> 57 #include <sys/stat.h> 58 #include <sys/buf.h> 59 #include <sys/domain.h> 60 #include <sys/dirent.h> 61 #include <sys/vmmeter.h> 62 63 #include <machine/limits.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_object.h> 67 #include <vm/vm_extern.h> 68 #include <vm/pmap.h> 69 #include <vm/vm_map.h> 70 #include <vm/vm_pager.h> 71 #include <vm/vnode_pager.h> 72 #include <vm/vm_zone.h> 73 #include <sys/sysctl.h> 74 75 #include <miscfs/specfs/specdev.h> 76 77 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 78 79 static void insmntque __P((struct vnode *vp, struct mount *mp)); 80 static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 81 static void vfree __P((struct vnode *)); 82 static void vgonel __P((struct vnode *vp, struct proc *p)); 83 static unsigned long numvnodes; 84 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 85 86 enum vtype iftovt_tab[16] = { 87 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 88 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 89 }; 90 int vttoif_tab[9] = { 91 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 92 S_IFSOCK, S_IFIFO, S_IFMT, 93 }; 94 95 static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 96 struct tobefreelist vnode_tobefree_list; /* vnode free list */ 97 98 static u_long wantfreevnodes = 25; 99 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 100 static u_long freevnodes = 0; 101 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 102 103 int vfs_ioopt = 0; 104 #ifdef ENABLE_VFS_IOOPT 105 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 106 #endif 107 108 struct mntlist mountlist; /* mounted filesystem list */ 109 struct simplelock mountlist_slock; 110 struct simplelock mntvnode_slock; 111 int nfs_mount_type = -1; 112 #ifndef NULL_SIMPLELOCKS 113 static struct simplelock mntid_slock; 114 static struct simplelock vnode_free_list_slock; 115 static struct simplelock spechash_slock; 116 #endif 117 struct nfs_public nfs_pub; /* publicly exported FS */ 118 static vm_zone_t vnode_zone; 119 120 /* 121 * The workitem queue. 122 */ 123 #define SYNCER_MAXDELAY 32 124 static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 125 time_t syncdelay = 30; 126 int rushjob; /* number of slots to run ASAP */ 127 128 static int syncer_delayno = 0; 129 static long syncer_mask; 130 LIST_HEAD(synclist, vnode); 131 static struct synclist *syncer_workitem_pending; 132 133 int desiredvnodes; 134 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 135 136 static void vfs_free_addrlist __P((struct netexport *nep)); 137 static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 138 static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 139 struct export_args *argp)); 140 141 /* 142 * Initialize the vnode management data structures. 143 */ 144 void 145 vntblinit() 146 { 147 148 desiredvnodes = maxproc + cnt.v_page_count / 4; 149 simple_lock_init(&mntvnode_slock); 150 simple_lock_init(&mntid_slock); 151 simple_lock_init(&spechash_slock); 152 TAILQ_INIT(&vnode_free_list); 153 TAILQ_INIT(&vnode_tobefree_list); 154 simple_lock_init(&vnode_free_list_slock); 155 CIRCLEQ_INIT(&mountlist); 156 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 157 /* 158 * Initialize the filesystem syncer. 159 */ 160 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 161 &syncer_mask); 162 syncer_maxdelay = syncer_mask + 1; 163 } 164 165 /* 166 * Mark a mount point as busy. Used to synchronize access and to delay 167 * unmounting. Interlock is not released on failure. 168 */ 169 int 170 vfs_busy(mp, flags, interlkp, p) 171 struct mount *mp; 172 int flags; 173 struct simplelock *interlkp; 174 struct proc *p; 175 { 176 int lkflags; 177 178 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 179 if (flags & LK_NOWAIT) 180 return (ENOENT); 181 mp->mnt_kern_flag |= MNTK_MWAIT; 182 if (interlkp) { 183 simple_unlock(interlkp); 184 } 185 /* 186 * Since all busy locks are shared except the exclusive 187 * lock granted when unmounting, the only place that a 188 * wakeup needs to be done is at the release of the 189 * exclusive lock at the end of dounmount. 190 */ 191 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 192 if (interlkp) { 193 simple_lock(interlkp); 194 } 195 return (ENOENT); 196 } 197 lkflags = LK_SHARED | LK_NOPAUSE; 198 if (interlkp) 199 lkflags |= LK_INTERLOCK; 200 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 201 panic("vfs_busy: unexpected lock failure"); 202 return (0); 203 } 204 205 /* 206 * Free a busy filesystem. 207 */ 208 void 209 vfs_unbusy(mp, p) 210 struct mount *mp; 211 struct proc *p; 212 { 213 214 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 215 } 216 217 /* 218 * Lookup a filesystem type, and if found allocate and initialize 219 * a mount structure for it. 220 * 221 * Devname is usually updated by mount(8) after booting. 222 */ 223 int 224 vfs_rootmountalloc(fstypename, devname, mpp) 225 char *fstypename; 226 char *devname; 227 struct mount **mpp; 228 { 229 struct proc *p = curproc; /* XXX */ 230 struct vfsconf *vfsp; 231 struct mount *mp; 232 233 if (fstypename == NULL) 234 return (ENODEV); 235 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 236 if (!strcmp(vfsp->vfc_name, fstypename)) 237 break; 238 if (vfsp == NULL) 239 return (ENODEV); 240 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 241 bzero((char *)mp, (u_long)sizeof(struct mount)); 242 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 243 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 244 LIST_INIT(&mp->mnt_vnodelist); 245 mp->mnt_vfc = vfsp; 246 mp->mnt_op = vfsp->vfc_vfsops; 247 mp->mnt_flag = MNT_RDONLY; 248 mp->mnt_vnodecovered = NULLVP; 249 vfsp->vfc_refcount++; 250 mp->mnt_stat.f_type = vfsp->vfc_typenum; 251 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 252 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 253 mp->mnt_stat.f_mntonname[0] = '/'; 254 mp->mnt_stat.f_mntonname[1] = 0; 255 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 256 *mpp = mp; 257 return (0); 258 } 259 260 /* 261 * Find an appropriate filesystem to use for the root. If a filesystem 262 * has not been preselected, walk through the list of known filesystems 263 * trying those that have mountroot routines, and try them until one 264 * works or we have tried them all. 265 */ 266 #ifdef notdef /* XXX JH */ 267 int 268 lite2_vfs_mountroot() 269 { 270 struct vfsconf *vfsp; 271 extern int (*lite2_mountroot) __P((void)); 272 int error; 273 274 if (lite2_mountroot != NULL) 275 return ((*lite2_mountroot)()); 276 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 277 if (vfsp->vfc_mountroot == NULL) 278 continue; 279 if ((error = (*vfsp->vfc_mountroot)()) == 0) 280 return (0); 281 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 282 } 283 return (ENODEV); 284 } 285 #endif 286 287 /* 288 * Lookup a mount point by filesystem identifier. 289 */ 290 struct mount * 291 vfs_getvfs(fsid) 292 fsid_t *fsid; 293 { 294 register struct mount *mp; 295 296 simple_lock(&mountlist_slock); 297 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 298 mp = mp->mnt_list.cqe_next) { 299 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 300 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 301 simple_unlock(&mountlist_slock); 302 return (mp); 303 } 304 } 305 simple_unlock(&mountlist_slock); 306 return ((struct mount *) 0); 307 } 308 309 /* 310 * Get a new unique fsid 311 */ 312 void 313 vfs_getnewfsid(mp) 314 struct mount *mp; 315 { 316 static u_short xxxfs_mntid; 317 318 fsid_t tfsid; 319 int mtype; 320 321 simple_lock(&mntid_slock); 322 mtype = mp->mnt_vfc->vfc_typenum; 323 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 324 mp->mnt_stat.f_fsid.val[1] = mtype; 325 if (xxxfs_mntid == 0) 326 ++xxxfs_mntid; 327 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 328 tfsid.val[1] = mtype; 329 if (mountlist.cqh_first != (void *)&mountlist) { 330 while (vfs_getvfs(&tfsid)) { 331 tfsid.val[0]++; 332 xxxfs_mntid++; 333 } 334 } 335 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 336 simple_unlock(&mntid_slock); 337 } 338 339 /* 340 * Set vnode attributes to VNOVAL 341 */ 342 void 343 vattr_null(vap) 344 register struct vattr *vap; 345 { 346 347 vap->va_type = VNON; 348 vap->va_size = VNOVAL; 349 vap->va_bytes = VNOVAL; 350 vap->va_mode = VNOVAL; 351 vap->va_nlink = VNOVAL; 352 vap->va_uid = VNOVAL; 353 vap->va_gid = VNOVAL; 354 vap->va_fsid = VNOVAL; 355 vap->va_fileid = VNOVAL; 356 vap->va_blocksize = VNOVAL; 357 vap->va_rdev = VNOVAL; 358 vap->va_atime.tv_sec = VNOVAL; 359 vap->va_atime.tv_nsec = VNOVAL; 360 vap->va_mtime.tv_sec = VNOVAL; 361 vap->va_mtime.tv_nsec = VNOVAL; 362 vap->va_ctime.tv_sec = VNOVAL; 363 vap->va_ctime.tv_nsec = VNOVAL; 364 vap->va_flags = VNOVAL; 365 vap->va_gen = VNOVAL; 366 vap->va_vaflags = 0; 367 } 368 369 /* 370 * Routines having to do with the management of the vnode table. 371 */ 372 extern vop_t **dead_vnodeop_p; 373 374 /* 375 * Return the next vnode from the free list. 376 */ 377 int 378 getnewvnode(tag, mp, vops, vpp) 379 enum vtagtype tag; 380 struct mount *mp; 381 vop_t **vops; 382 struct vnode **vpp; 383 { 384 int s; 385 struct proc *p = curproc; /* XXX */ 386 struct vnode *vp, *tvp, *nvp; 387 vm_object_t object; 388 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 389 390 /* 391 * We take the least recently used vnode from the freelist 392 * if we can get it and it has no cached pages, and no 393 * namecache entries are relative to it. 394 * Otherwise we allocate a new vnode 395 */ 396 397 s = splbio(); 398 simple_lock(&vnode_free_list_slock); 399 TAILQ_INIT(&vnode_tmp_list); 400 401 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 402 nvp = TAILQ_NEXT(vp, v_freelist); 403 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 404 if (vp->v_flag & VAGE) { 405 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 406 } else { 407 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 408 } 409 vp->v_flag &= ~(VTBFREE|VAGE); 410 vp->v_flag |= VFREE; 411 if (vp->v_usecount) 412 panic("tobe free vnode isn't"); 413 freevnodes++; 414 } 415 416 if (wantfreevnodes && freevnodes < wantfreevnodes) { 417 vp = NULL; 418 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 419 /* 420 * XXX: this is only here to be backwards compatible 421 */ 422 vp = NULL; 423 } else { 424 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 425 nvp = TAILQ_NEXT(vp, v_freelist); 426 if (!simple_lock_try(&vp->v_interlock)) 427 continue; 428 if (vp->v_usecount) 429 panic("free vnode isn't"); 430 431 object = vp->v_object; 432 if (object && (object->resident_page_count || object->ref_count)) { 433 printf("object inconsistant state: RPC: %d, RC: %d\n", 434 object->resident_page_count, object->ref_count); 435 /* Don't recycle if it's caching some pages */ 436 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 437 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 438 continue; 439 } else if (LIST_FIRST(&vp->v_cache_src)) { 440 /* Don't recycle if active in the namecache */ 441 simple_unlock(&vp->v_interlock); 442 continue; 443 } else { 444 break; 445 } 446 } 447 } 448 449 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 450 nvp = TAILQ_NEXT(tvp, v_freelist); 451 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 452 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 453 simple_unlock(&tvp->v_interlock); 454 } 455 456 if (vp) { 457 vp->v_flag |= VDOOMED; 458 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 459 freevnodes--; 460 simple_unlock(&vnode_free_list_slock); 461 cache_purge(vp); 462 vp->v_lease = NULL; 463 if (vp->v_type != VBAD) { 464 vgonel(vp, p); 465 } else { 466 simple_unlock(&vp->v_interlock); 467 } 468 469 #ifdef INVARIANTS 470 { 471 int s; 472 473 if (vp->v_data) 474 panic("cleaned vnode isn't"); 475 s = splbio(); 476 if (vp->v_numoutput) 477 panic("Clean vnode has pending I/O's"); 478 splx(s); 479 } 480 #endif 481 vp->v_flag = 0; 482 vp->v_lastr = 0; 483 vp->v_lastw = 0; 484 vp->v_lasta = 0; 485 vp->v_cstart = 0; 486 vp->v_clen = 0; 487 vp->v_socket = 0; 488 vp->v_writecount = 0; /* XXX */ 489 vp->v_maxio = 0; 490 } else { 491 simple_unlock(&vnode_free_list_slock); 492 vp = (struct vnode *) zalloc(vnode_zone); 493 bzero((char *) vp, sizeof *vp); 494 simple_lock_init(&vp->v_interlock); 495 vp->v_dd = vp; 496 cache_purge(vp); 497 LIST_INIT(&vp->v_cache_src); 498 TAILQ_INIT(&vp->v_cache_dst); 499 numvnodes++; 500 } 501 502 TAILQ_INIT(&vp->v_cleanblkhd); 503 TAILQ_INIT(&vp->v_dirtyblkhd); 504 vp->v_type = VNON; 505 vp->v_tag = tag; 506 vp->v_op = vops; 507 insmntque(vp, mp); 508 *vpp = vp; 509 vp->v_usecount = 1; 510 vp->v_data = 0; 511 splx(s); 512 513 vfs_object_create(vp, p, p->p_ucred); 514 return (0); 515 } 516 517 /* 518 * Move a vnode from one mount queue to another. 519 */ 520 static void 521 insmntque(vp, mp) 522 register struct vnode *vp; 523 register struct mount *mp; 524 { 525 526 simple_lock(&mntvnode_slock); 527 /* 528 * Delete from old mount point vnode list, if on one. 529 */ 530 if (vp->v_mount != NULL) 531 LIST_REMOVE(vp, v_mntvnodes); 532 /* 533 * Insert into list of vnodes for the new mount point, if available. 534 */ 535 if ((vp->v_mount = mp) == NULL) { 536 simple_unlock(&mntvnode_slock); 537 return; 538 } 539 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 540 simple_unlock(&mntvnode_slock); 541 } 542 543 /* 544 * Update outstanding I/O count and do wakeup if requested. 545 */ 546 void 547 vwakeup(bp) 548 register struct buf *bp; 549 { 550 register struct vnode *vp; 551 552 bp->b_flags &= ~B_WRITEINPROG; 553 if ((vp = bp->b_vp)) { 554 vp->v_numoutput--; 555 if (vp->v_numoutput < 0) 556 panic("vwakeup: neg numoutput"); 557 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 558 vp->v_flag &= ~VBWAIT; 559 wakeup((caddr_t) &vp->v_numoutput); 560 } 561 } 562 } 563 564 /* 565 * Flush out and invalidate all buffers associated with a vnode. 566 * Called with the underlying object locked. 567 */ 568 int 569 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 570 register struct vnode *vp; 571 int flags; 572 struct ucred *cred; 573 struct proc *p; 574 int slpflag, slptimeo; 575 { 576 register struct buf *bp; 577 struct buf *nbp, *blist; 578 int s, error; 579 vm_object_t object; 580 581 if (flags & V_SAVE) { 582 s = splbio(); 583 while (vp->v_numoutput) { 584 vp->v_flag |= VBWAIT; 585 error = tsleep((caddr_t)&vp->v_numoutput, 586 slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 587 if (error) { 588 splx(s); 589 return (error); 590 } 591 } 592 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 593 splx(s); 594 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 595 return (error); 596 s = splbio(); 597 if (vp->v_numoutput > 0 || 598 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 599 panic("vinvalbuf: dirty bufs"); 600 } 601 splx(s); 602 } 603 s = splbio(); 604 for (;;) { 605 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 606 if (!blist) 607 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 608 if (!blist) 609 break; 610 611 for (bp = blist; bp; bp = nbp) { 612 nbp = TAILQ_NEXT(bp, b_vnbufs); 613 if (bp->b_flags & B_BUSY) { 614 bp->b_flags |= B_WANTED; 615 error = tsleep((caddr_t) bp, 616 slpflag | (PRIBIO + 4), "vinvalbuf", 617 slptimeo); 618 if (error) { 619 splx(s); 620 return (error); 621 } 622 break; 623 } 624 /* 625 * XXX Since there are no node locks for NFS, I 626 * believe there is a slight chance that a delayed 627 * write will occur while sleeping just above, so 628 * check for it. Note that vfs_bio_awrite expects 629 * buffers to reside on a queue, while VOP_BWRITE and 630 * brelse do not. 631 */ 632 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 633 (flags & V_SAVE)) { 634 635 if (bp->b_vp == vp) { 636 if (bp->b_flags & B_CLUSTEROK) { 637 vfs_bio_awrite(bp); 638 } else { 639 bremfree(bp); 640 bp->b_flags |= (B_BUSY | B_ASYNC); 641 VOP_BWRITE(bp); 642 } 643 } else { 644 bremfree(bp); 645 bp->b_flags |= B_BUSY; 646 (void) VOP_BWRITE(bp); 647 } 648 break; 649 } 650 bremfree(bp); 651 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY); 652 bp->b_flags &= ~B_ASYNC; 653 brelse(bp); 654 } 655 } 656 657 while (vp->v_numoutput > 0) { 658 vp->v_flag |= VBWAIT; 659 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 660 } 661 662 splx(s); 663 664 /* 665 * Destroy the copy in the VM cache, too. 666 */ 667 simple_lock(&vp->v_interlock); 668 object = vp->v_object; 669 if (object != NULL) { 670 vm_object_page_remove(object, 0, 0, 671 (flags & V_SAVE) ? TRUE : FALSE); 672 } 673 simple_unlock(&vp->v_interlock); 674 675 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 676 panic("vinvalbuf: flush failed"); 677 return (0); 678 } 679 680 /* 681 * Truncate a file's buffer and pages to a specified length. This 682 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 683 * sync activity. 684 */ 685 int 686 vtruncbuf(vp, cred, p, length, blksize) 687 register struct vnode *vp; 688 struct ucred *cred; 689 struct proc *p; 690 off_t length; 691 int blksize; 692 { 693 register struct buf *bp; 694 struct buf *nbp; 695 int s, anyfreed; 696 int trunclbn; 697 698 /* 699 * Round up to the *next* lbn. 700 */ 701 trunclbn = (length + blksize - 1) / blksize; 702 703 s = splbio(); 704 restart: 705 anyfreed = 1; 706 for (;anyfreed;) { 707 anyfreed = 0; 708 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 709 nbp = TAILQ_NEXT(bp, b_vnbufs); 710 if (bp->b_lblkno >= trunclbn) { 711 if (bp->b_flags & B_BUSY) { 712 bp->b_flags |= B_WANTED; 713 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 714 goto restart; 715 } else { 716 bremfree(bp); 717 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 718 bp->b_flags &= ~B_ASYNC; 719 brelse(bp); 720 anyfreed = 1; 721 } 722 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)|| 723 (nbp->b_vp != vp) || 724 (nbp->b_flags & B_DELWRI))) { 725 goto restart; 726 } 727 } 728 } 729 730 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 731 nbp = TAILQ_NEXT(bp, b_vnbufs); 732 if (bp->b_lblkno >= trunclbn) { 733 if (bp->b_flags & B_BUSY) { 734 bp->b_flags |= B_WANTED; 735 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 736 goto restart; 737 } else { 738 bremfree(bp); 739 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 740 bp->b_flags &= ~B_ASYNC; 741 brelse(bp); 742 anyfreed = 1; 743 } 744 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)|| 745 (nbp->b_vp != vp) || 746 (nbp->b_flags & B_DELWRI) == 0)) { 747 goto restart; 748 } 749 } 750 } 751 } 752 753 if (length > 0) { 754 restartsync: 755 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 756 nbp = TAILQ_NEXT(bp, b_vnbufs); 757 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 758 if (bp->b_flags & B_BUSY) { 759 bp->b_flags |= B_WANTED; 760 tsleep(bp, PRIBIO, "vtrb3", 0); 761 } else { 762 bremfree(bp); 763 bp->b_flags |= B_BUSY; 764 if (bp->b_vp == vp) { 765 bp->b_flags |= B_ASYNC; 766 } else { 767 bp->b_flags &= ~B_ASYNC; 768 } 769 VOP_BWRITE(bp); 770 } 771 goto restartsync; 772 } 773 774 } 775 } 776 777 while (vp->v_numoutput > 0) { 778 vp->v_flag |= VBWAIT; 779 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 780 } 781 782 splx(s); 783 784 vnode_pager_setsize(vp, length); 785 786 return (0); 787 } 788 789 /* 790 * Associate a buffer with a vnode. 791 */ 792 void 793 bgetvp(vp, bp) 794 register struct vnode *vp; 795 register struct buf *bp; 796 { 797 int s; 798 799 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 800 vhold(vp); 801 bp->b_vp = vp; 802 if (vp->v_type == VBLK || vp->v_type == VCHR) 803 bp->b_dev = vp->v_rdev; 804 else 805 bp->b_dev = NODEV; 806 /* 807 * Insert onto list for new vnode. 808 */ 809 s = splbio(); 810 bp->b_xflags |= B_VNCLEAN; 811 bp->b_xflags &= ~B_VNDIRTY; 812 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 813 splx(s); 814 } 815 816 /* 817 * Disassociate a buffer from a vnode. 818 */ 819 void 820 brelvp(bp) 821 register struct buf *bp; 822 { 823 struct vnode *vp; 824 struct buflists *listheadp; 825 int s; 826 827 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 828 829 /* 830 * Delete from old vnode list, if on one. 831 */ 832 vp = bp->b_vp; 833 s = splbio(); 834 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 835 if (bp->b_xflags & B_VNDIRTY) 836 listheadp = &vp->v_dirtyblkhd; 837 else 838 listheadp = &vp->v_cleanblkhd; 839 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 840 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 841 } 842 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 843 vp->v_flag &= ~VONWORKLST; 844 LIST_REMOVE(vp, v_synclist); 845 } 846 splx(s); 847 bp->b_vp = (struct vnode *) 0; 848 vdrop(vp); 849 } 850 851 /* 852 * The workitem queue. 853 * 854 * It is useful to delay writes of file data and filesystem metadata 855 * for tens of seconds so that quickly created and deleted files need 856 * not waste disk bandwidth being created and removed. To realize this, 857 * we append vnodes to a "workitem" queue. When running with a soft 858 * updates implementation, most pending metadata dependencies should 859 * not wait for more than a few seconds. Thus, mounted on block devices 860 * are delayed only about a half the time that file data is delayed. 861 * Similarly, directory updates are more critical, so are only delayed 862 * about a third the time that file data is delayed. Thus, there are 863 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 864 * one each second (driven off the filesystem syner process). The 865 * syncer_delayno variable indicates the next queue that is to be processed. 866 * Items that need to be processed soon are placed in this queue: 867 * 868 * syncer_workitem_pending[syncer_delayno] 869 * 870 * A delay of fifteen seconds is done by placing the request fifteen 871 * entries later in the queue: 872 * 873 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 874 * 875 */ 876 877 /* 878 * Add an item to the syncer work queue. 879 */ 880 void 881 vn_syncer_add_to_worklist(vp, delay) 882 struct vnode *vp; 883 int delay; 884 { 885 int s, slot; 886 887 s = splbio(); 888 889 if (vp->v_flag & VONWORKLST) { 890 LIST_REMOVE(vp, v_synclist); 891 } 892 893 if (delay > syncer_maxdelay - 2) 894 delay = syncer_maxdelay - 2; 895 slot = (syncer_delayno + delay) & syncer_mask; 896 897 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 898 vp->v_flag |= VONWORKLST; 899 splx(s); 900 } 901 902 static void sched_sync __P((void)); 903 static struct proc *updateproc; 904 static struct kproc_desc up_kp = { 905 "syncer", 906 sched_sync, 907 &updateproc 908 }; 909 SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 910 911 /* 912 * System filesystem synchronizer daemon. 913 */ 914 void 915 sched_sync(void) 916 { 917 struct synclist *slp; 918 struct vnode *vp; 919 long starttime; 920 int s; 921 struct proc *p = updateproc; 922 923 for (;;) { 924 starttime = time_second; 925 926 /* 927 * Push files whose dirty time has expired. 928 */ 929 s = splbio(); 930 slp = &syncer_workitem_pending[syncer_delayno]; 931 syncer_delayno += 1; 932 if (syncer_delayno == syncer_maxdelay) 933 syncer_delayno = 0; 934 splx(s); 935 936 while ((vp = LIST_FIRST(slp)) != NULL) { 937 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 938 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 939 VOP_UNLOCK(vp, 0, p); 940 if (LIST_FIRST(slp) == vp) { 941 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 942 vp->v_type != VBLK) 943 panic("sched_sync: fsync failed"); 944 /* 945 * Move ourselves to the back of the sync list. 946 */ 947 LIST_REMOVE(vp, v_synclist); 948 vn_syncer_add_to_worklist(vp, syncdelay); 949 } 950 } 951 952 /* 953 * Do soft update processing. 954 */ 955 if (bioops.io_sync) 956 (*bioops.io_sync)(NULL); 957 958 /* 959 * The variable rushjob allows the kernel to speed up the 960 * processing of the filesystem syncer process. A rushjob 961 * value of N tells the filesystem syncer to process the next 962 * N seconds worth of work on its queue ASAP. Currently rushjob 963 * is used by the soft update code to speed up the filesystem 964 * syncer process when the incore state is getting so far 965 * ahead of the disk that the kernel memory pool is being 966 * threatened with exhaustion. 967 */ 968 if (rushjob > 0) { 969 rushjob -= 1; 970 continue; 971 } 972 /* 973 * If it has taken us less than a second to process the 974 * current work, then wait. Otherwise start right over 975 * again. We can still lose time if any single round 976 * takes more than two seconds, but it does not really 977 * matter as we are just trying to generally pace the 978 * filesystem activity. 979 */ 980 if (time_second == starttime) 981 tsleep(&lbolt, PPAUSE, "syncer", 0); 982 } 983 } 984 985 /* 986 * Associate a p-buffer with a vnode. 987 */ 988 void 989 pbgetvp(vp, bp) 990 register struct vnode *vp; 991 register struct buf *bp; 992 { 993 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 994 995 bp->b_vp = vp; 996 if (vp->v_type == VBLK || vp->v_type == VCHR) 997 bp->b_dev = vp->v_rdev; 998 else 999 bp->b_dev = NODEV; 1000 } 1001 1002 /* 1003 * Disassociate a p-buffer from a vnode. 1004 */ 1005 void 1006 pbrelvp(bp) 1007 register struct buf *bp; 1008 { 1009 1010 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1011 1012 bp->b_vp = (struct vnode *) 0; 1013 } 1014 1015 /* 1016 * Reassign a buffer from one vnode to another. 1017 * Used to assign file specific control information 1018 * (indirect blocks) to the vnode to which they belong. 1019 */ 1020 void 1021 reassignbuf(bp, newvp) 1022 register struct buf *bp; 1023 register struct vnode *newvp; 1024 { 1025 struct buflists *listheadp; 1026 struct vnode *oldvp; 1027 int delay; 1028 int s; 1029 1030 if (newvp == NULL) { 1031 printf("reassignbuf: NULL"); 1032 return; 1033 } 1034 1035 s = splbio(); 1036 /* 1037 * Delete from old vnode list, if on one. 1038 */ 1039 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 1040 oldvp = bp->b_vp; 1041 if (bp->b_xflags & B_VNDIRTY) 1042 listheadp = &oldvp->v_dirtyblkhd; 1043 else 1044 listheadp = &oldvp->v_cleanblkhd; 1045 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1046 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 1047 vdrop(oldvp); 1048 } 1049 /* 1050 * If dirty, put on list of dirty buffers; otherwise insert onto list 1051 * of clean buffers. 1052 */ 1053 if (bp->b_flags & B_DELWRI) { 1054 struct buf *tbp; 1055 1056 listheadp = &newvp->v_dirtyblkhd; 1057 if ((newvp->v_flag & VONWORKLST) == 0) { 1058 switch (newvp->v_type) { 1059 case VDIR: 1060 delay = syncdelay / 3; 1061 break; 1062 case VBLK: 1063 if (newvp->v_specmountpoint != NULL) { 1064 delay = syncdelay / 2; 1065 break; 1066 } 1067 /* fall through */ 1068 default: 1069 delay = syncdelay; 1070 } 1071 vn_syncer_add_to_worklist(newvp, delay); 1072 } 1073 bp->b_xflags |= B_VNDIRTY; 1074 tbp = TAILQ_FIRST(listheadp); 1075 if (tbp == NULL || 1076 (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) { 1077 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1078 } else { 1079 if (bp->b_lblkno >= 0) { 1080 struct buf *ttbp; 1081 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1082 (ttbp->b_lblkno < bp->b_lblkno)) { 1083 tbp = ttbp; 1084 } 1085 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1086 } else { 1087 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1088 } 1089 } 1090 } else { 1091 bp->b_xflags |= B_VNCLEAN; 1092 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1093 if ((newvp->v_flag & VONWORKLST) && 1094 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1095 newvp->v_flag &= ~VONWORKLST; 1096 LIST_REMOVE(newvp, v_synclist); 1097 } 1098 } 1099 bp->b_vp = newvp; 1100 vhold(bp->b_vp); 1101 splx(s); 1102 } 1103 1104 /* 1105 * Create a vnode for a block device. 1106 * Used for mounting the root file system. 1107 */ 1108 int 1109 bdevvp(dev, vpp) 1110 dev_t dev; 1111 struct vnode **vpp; 1112 { 1113 register struct vnode *vp; 1114 struct vnode *nvp; 1115 int error; 1116 1117 /* XXX 255 is for mfs. */ 1118 if (dev == NODEV || (major(dev) != 255 && (major(dev) >= nblkdev || 1119 bdevsw[major(dev)] == NULL))) { 1120 *vpp = NULLVP; 1121 return (ENXIO); 1122 } 1123 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1124 if (error) { 1125 *vpp = NULLVP; 1126 return (error); 1127 } 1128 vp = nvp; 1129 vp->v_type = VBLK; 1130 if ((nvp = checkalias(vp, dev, (struct mount *)0)) != NULL) { 1131 vput(vp); 1132 vp = nvp; 1133 } 1134 *vpp = vp; 1135 return (0); 1136 } 1137 1138 /* 1139 * Check to see if the new vnode represents a special device 1140 * for which we already have a vnode (either because of 1141 * bdevvp() or because of a different vnode representing 1142 * the same block device). If such an alias exists, deallocate 1143 * the existing contents and return the aliased vnode. The 1144 * caller is responsible for filling it with its new contents. 1145 */ 1146 struct vnode * 1147 checkalias(nvp, nvp_rdev, mp) 1148 register struct vnode *nvp; 1149 dev_t nvp_rdev; 1150 struct mount *mp; 1151 { 1152 struct proc *p = curproc; /* XXX */ 1153 struct vnode *vp; 1154 struct vnode **vpp; 1155 1156 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1157 return (NULLVP); 1158 1159 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1160 loop: 1161 simple_lock(&spechash_slock); 1162 for (vp = *vpp; vp; vp = vp->v_specnext) { 1163 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1164 continue; 1165 /* 1166 * Alias, but not in use, so flush it out. 1167 * Only alias active device nodes. 1168 * Not sure why we don't re-use this like we do below. 1169 */ 1170 simple_lock(&vp->v_interlock); 1171 if (vp->v_usecount == 0) { 1172 simple_unlock(&spechash_slock); 1173 vgonel(vp, p); 1174 goto loop; 1175 } 1176 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 1177 /* 1178 * It dissappeared, and we may have slept. 1179 * Restart from the beginning 1180 */ 1181 simple_unlock(&spechash_slock); 1182 goto loop; 1183 } 1184 break; 1185 } 1186 /* 1187 * It would be a lot clearer what is going on here if 1188 * this had been expressed as: 1189 * if ( vp && (vp->v_tag == VT_NULL)) 1190 * and the clauses had been swapped. 1191 */ 1192 if (vp == NULL || vp->v_tag != VT_NON) { 1193 /* 1194 * Put the new vnode into the hash chain. 1195 * and if there was an alias, connect them. 1196 */ 1197 MALLOC(nvp->v_specinfo, struct specinfo *, 1198 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1199 nvp->v_rdev = nvp_rdev; 1200 nvp->v_hashchain = vpp; 1201 nvp->v_specnext = *vpp; 1202 nvp->v_specmountpoint = NULL; 1203 simple_unlock(&spechash_slock); 1204 *vpp = nvp; 1205 if (vp != NULLVP) { 1206 nvp->v_flag |= VALIASED; 1207 vp->v_flag |= VALIASED; 1208 vput(vp); 1209 } 1210 return (NULLVP); 1211 } 1212 /* 1213 * if ( vp && (vp->v_tag == VT_NULL)) 1214 * We have a vnode alias, but it is a trashed. 1215 * Make it look like it's newley allocated. (by getnewvnode()) 1216 * The caller should use this instead. 1217 */ 1218 simple_unlock(&spechash_slock); 1219 VOP_UNLOCK(vp, 0, p); 1220 simple_lock(&vp->v_interlock); 1221 vclean(vp, 0, p); 1222 vp->v_op = nvp->v_op; 1223 vp->v_tag = nvp->v_tag; 1224 nvp->v_type = VNON; 1225 insmntque(vp, mp); 1226 return (vp); 1227 } 1228 1229 /* 1230 * Grab a particular vnode from the free list, increment its 1231 * reference count and lock it. The vnode lock bit is set the 1232 * vnode is being eliminated in vgone. The process is awakened 1233 * when the transition is completed, and an error returned to 1234 * indicate that the vnode is no longer usable (possibly having 1235 * been changed to a new file system type). 1236 */ 1237 int 1238 vget(vp, flags, p) 1239 register struct vnode *vp; 1240 int flags; 1241 struct proc *p; 1242 { 1243 int error; 1244 1245 /* 1246 * If the vnode is in the process of being cleaned out for 1247 * another use, we wait for the cleaning to finish and then 1248 * return failure. Cleaning is determined by checking that 1249 * the VXLOCK flag is set. 1250 */ 1251 if ((flags & LK_INTERLOCK) == 0) { 1252 simple_lock(&vp->v_interlock); 1253 } 1254 if (vp->v_flag & VXLOCK) { 1255 vp->v_flag |= VXWANT; 1256 simple_unlock(&vp->v_interlock); 1257 tsleep((caddr_t)vp, PINOD, "vget", 0); 1258 return (ENOENT); 1259 } 1260 1261 vp->v_usecount++; 1262 1263 if (VSHOULDBUSY(vp)) 1264 vbusy(vp); 1265 if (flags & LK_TYPE_MASK) { 1266 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1267 /* 1268 * must expand vrele here because we do not want 1269 * to call VOP_INACTIVE if the reference count 1270 * drops back to zero since it was never really 1271 * active. We must remove it from the free list 1272 * before sleeping so that multiple processes do 1273 * not try to recycle it. 1274 */ 1275 simple_lock(&vp->v_interlock); 1276 vp->v_usecount--; 1277 if (VSHOULDFREE(vp)) 1278 vfree(vp); 1279 simple_unlock(&vp->v_interlock); 1280 } 1281 return (error); 1282 } 1283 simple_unlock(&vp->v_interlock); 1284 return (0); 1285 } 1286 1287 void 1288 vref(struct vnode *vp) 1289 { 1290 simple_lock(&vp->v_interlock); 1291 vp->v_usecount++; 1292 simple_unlock(&vp->v_interlock); 1293 } 1294 1295 /* 1296 * Vnode put/release. 1297 * If count drops to zero, call inactive routine and return to freelist. 1298 */ 1299 void 1300 vrele(vp) 1301 struct vnode *vp; 1302 { 1303 struct proc *p = curproc; /* XXX */ 1304 1305 KASSERT(vp, ("vrele: null vp")); 1306 1307 simple_lock(&vp->v_interlock); 1308 1309 if (vp->v_usecount > 1) { 1310 1311 vp->v_usecount--; 1312 simple_unlock(&vp->v_interlock); 1313 1314 return; 1315 } 1316 1317 if (vp->v_usecount == 1) { 1318 1319 vp->v_usecount--; 1320 if (VSHOULDFREE(vp)) 1321 vfree(vp); 1322 /* 1323 * If we are doing a vput, the node is already locked, and we must 1324 * call VOP_INACTIVE with the node locked. So, in the case of 1325 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1326 */ 1327 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1328 VOP_INACTIVE(vp, p); 1329 } 1330 1331 } else { 1332 #ifdef DIAGNOSTIC 1333 vprint("vrele: negative ref count", vp); 1334 simple_unlock(&vp->v_interlock); 1335 #endif 1336 panic("vrele: negative ref cnt"); 1337 } 1338 } 1339 1340 void 1341 vput(vp) 1342 struct vnode *vp; 1343 { 1344 struct proc *p = curproc; /* XXX */ 1345 1346 KASSERT(vp != NULL, ("vput: null vp")); 1347 1348 simple_lock(&vp->v_interlock); 1349 1350 if (vp->v_usecount > 1) { 1351 1352 vp->v_usecount--; 1353 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1354 return; 1355 1356 } 1357 1358 if (vp->v_usecount == 1) { 1359 1360 vp->v_usecount--; 1361 if (VSHOULDFREE(vp)) 1362 vfree(vp); 1363 /* 1364 * If we are doing a vput, the node is already locked, and we must 1365 * call VOP_INACTIVE with the node locked. So, in the case of 1366 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1367 */ 1368 simple_unlock(&vp->v_interlock); 1369 VOP_INACTIVE(vp, p); 1370 1371 } else { 1372 #ifdef DIAGNOSTIC 1373 vprint("vput: negative ref count", vp); 1374 #endif 1375 panic("vput: negative ref cnt"); 1376 } 1377 } 1378 1379 /* 1380 * Somebody doesn't want the vnode recycled. 1381 */ 1382 void 1383 vhold(vp) 1384 register struct vnode *vp; 1385 { 1386 int s; 1387 1388 s = splbio(); 1389 vp->v_holdcnt++; 1390 if (VSHOULDBUSY(vp)) 1391 vbusy(vp); 1392 splx(s); 1393 } 1394 1395 /* 1396 * One less who cares about this vnode. 1397 */ 1398 void 1399 vdrop(vp) 1400 register struct vnode *vp; 1401 { 1402 int s; 1403 1404 s = splbio(); 1405 if (vp->v_holdcnt <= 0) 1406 panic("vdrop: holdcnt"); 1407 vp->v_holdcnt--; 1408 if (VSHOULDFREE(vp)) 1409 vfree(vp); 1410 splx(s); 1411 } 1412 1413 /* 1414 * Remove any vnodes in the vnode table belonging to mount point mp. 1415 * 1416 * If MNT_NOFORCE is specified, there should not be any active ones, 1417 * return error if any are found (nb: this is a user error, not a 1418 * system error). If MNT_FORCE is specified, detach any active vnodes 1419 * that are found. 1420 */ 1421 #ifdef DIAGNOSTIC 1422 static int busyprt = 0; /* print out busy vnodes */ 1423 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1424 #endif 1425 1426 int 1427 vflush(mp, skipvp, flags) 1428 struct mount *mp; 1429 struct vnode *skipvp; 1430 int flags; 1431 { 1432 struct proc *p = curproc; /* XXX */ 1433 struct vnode *vp, *nvp; 1434 int busy = 0; 1435 1436 simple_lock(&mntvnode_slock); 1437 loop: 1438 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1439 /* 1440 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1441 * Start over if it has (it won't be on the list anymore). 1442 */ 1443 if (vp->v_mount != mp) 1444 goto loop; 1445 nvp = vp->v_mntvnodes.le_next; 1446 /* 1447 * Skip over a selected vnode. 1448 */ 1449 if (vp == skipvp) 1450 continue; 1451 1452 simple_lock(&vp->v_interlock); 1453 /* 1454 * Skip over a vnodes marked VSYSTEM. 1455 */ 1456 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1457 simple_unlock(&vp->v_interlock); 1458 continue; 1459 } 1460 /* 1461 * If WRITECLOSE is set, only flush out regular file vnodes 1462 * open for writing. 1463 */ 1464 if ((flags & WRITECLOSE) && 1465 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1466 simple_unlock(&vp->v_interlock); 1467 continue; 1468 } 1469 1470 /* 1471 * With v_usecount == 0, all we need to do is clear out the 1472 * vnode data structures and we are done. 1473 */ 1474 if (vp->v_usecount == 0) { 1475 simple_unlock(&mntvnode_slock); 1476 vgonel(vp, p); 1477 simple_lock(&mntvnode_slock); 1478 continue; 1479 } 1480 1481 /* 1482 * If FORCECLOSE is set, forcibly close the vnode. For block 1483 * or character devices, revert to an anonymous device. For 1484 * all other files, just kill them. 1485 */ 1486 if (flags & FORCECLOSE) { 1487 simple_unlock(&mntvnode_slock); 1488 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1489 vgonel(vp, p); 1490 } else { 1491 vclean(vp, 0, p); 1492 vp->v_op = spec_vnodeop_p; 1493 insmntque(vp, (struct mount *) 0); 1494 } 1495 simple_lock(&mntvnode_slock); 1496 continue; 1497 } 1498 #ifdef DIAGNOSTIC 1499 if (busyprt) 1500 vprint("vflush: busy vnode", vp); 1501 #endif 1502 simple_unlock(&vp->v_interlock); 1503 busy++; 1504 } 1505 simple_unlock(&mntvnode_slock); 1506 if (busy) 1507 return (EBUSY); 1508 return (0); 1509 } 1510 1511 /* 1512 * Disassociate the underlying file system from a vnode. 1513 */ 1514 static void 1515 vclean(vp, flags, p) 1516 struct vnode *vp; 1517 int flags; 1518 struct proc *p; 1519 { 1520 int active; 1521 vm_object_t obj; 1522 1523 /* 1524 * Check to see if the vnode is in use. If so we have to reference it 1525 * before we clean it out so that its count cannot fall to zero and 1526 * generate a race against ourselves to recycle it. 1527 */ 1528 if ((active = vp->v_usecount)) 1529 vp->v_usecount++; 1530 1531 /* 1532 * Prevent the vnode from being recycled or brought into use while we 1533 * clean it out. 1534 */ 1535 if (vp->v_flag & VXLOCK) 1536 panic("vclean: deadlock"); 1537 vp->v_flag |= VXLOCK; 1538 /* 1539 * Even if the count is zero, the VOP_INACTIVE routine may still 1540 * have the object locked while it cleans it out. The VOP_LOCK 1541 * ensures that the VOP_INACTIVE routine is done with its work. 1542 * For active vnodes, it ensures that no other activity can 1543 * occur while the underlying object is being cleaned out. 1544 */ 1545 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1546 1547 /* 1548 * Clean out any buffers associated with the vnode. 1549 */ 1550 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1551 if (obj = vp->v_object) { 1552 if (obj->ref_count == 0) { 1553 /* 1554 * This is a normal way of shutting down the object/vnode 1555 * association. 1556 */ 1557 vm_object_terminate(obj); 1558 } else { 1559 /* 1560 * Woe to the process that tries to page now :-). 1561 */ 1562 vm_pager_deallocate(obj); 1563 } 1564 } 1565 1566 /* 1567 * If purging an active vnode, it must be closed and 1568 * deactivated before being reclaimed. Note that the 1569 * VOP_INACTIVE will unlock the vnode. 1570 */ 1571 if (active) { 1572 if (flags & DOCLOSE) 1573 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1574 VOP_INACTIVE(vp, p); 1575 } else { 1576 /* 1577 * Any other processes trying to obtain this lock must first 1578 * wait for VXLOCK to clear, then call the new lock operation. 1579 */ 1580 VOP_UNLOCK(vp, 0, p); 1581 } 1582 /* 1583 * Reclaim the vnode. 1584 */ 1585 if (VOP_RECLAIM(vp, p)) 1586 panic("vclean: cannot reclaim"); 1587 1588 if (active) 1589 vrele(vp); 1590 1591 cache_purge(vp); 1592 if (vp->v_vnlock) { 1593 #if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1594 #ifdef DIAGNOSTIC 1595 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1596 vprint("vclean: lock not drained", vp); 1597 #endif 1598 #endif 1599 FREE(vp->v_vnlock, M_VNODE); 1600 vp->v_vnlock = NULL; 1601 } 1602 1603 if (VSHOULDFREE(vp)) 1604 vfree(vp); 1605 1606 /* 1607 * Done with purge, notify sleepers of the grim news. 1608 */ 1609 vp->v_op = dead_vnodeop_p; 1610 vn_pollgone(vp); 1611 vp->v_tag = VT_NON; 1612 vp->v_flag &= ~VXLOCK; 1613 if (vp->v_flag & VXWANT) { 1614 vp->v_flag &= ~VXWANT; 1615 wakeup((caddr_t) vp); 1616 } 1617 } 1618 1619 /* 1620 * Eliminate all activity associated with the requested vnode 1621 * and with all vnodes aliased to the requested vnode. 1622 */ 1623 int 1624 vop_revoke(ap) 1625 struct vop_revoke_args /* { 1626 struct vnode *a_vp; 1627 int a_flags; 1628 } */ *ap; 1629 { 1630 struct vnode *vp, *vq; 1631 struct proc *p = curproc; /* XXX */ 1632 1633 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1634 1635 vp = ap->a_vp; 1636 simple_lock(&vp->v_interlock); 1637 1638 if (vp->v_flag & VALIASED) { 1639 /* 1640 * If a vgone (or vclean) is already in progress, 1641 * wait until it is done and return. 1642 */ 1643 if (vp->v_flag & VXLOCK) { 1644 vp->v_flag |= VXWANT; 1645 simple_unlock(&vp->v_interlock); 1646 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1647 return (0); 1648 } 1649 /* 1650 * Ensure that vp will not be vgone'd while we 1651 * are eliminating its aliases. 1652 */ 1653 vp->v_flag |= VXLOCK; 1654 simple_unlock(&vp->v_interlock); 1655 while (vp->v_flag & VALIASED) { 1656 simple_lock(&spechash_slock); 1657 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1658 if (vq->v_rdev != vp->v_rdev || 1659 vq->v_type != vp->v_type || vp == vq) 1660 continue; 1661 simple_unlock(&spechash_slock); 1662 vgone(vq); 1663 break; 1664 } 1665 if (vq == NULLVP) { 1666 simple_unlock(&spechash_slock); 1667 } 1668 } 1669 /* 1670 * Remove the lock so that vgone below will 1671 * really eliminate the vnode after which time 1672 * vgone will awaken any sleepers. 1673 */ 1674 simple_lock(&vp->v_interlock); 1675 vp->v_flag &= ~VXLOCK; 1676 if (vp->v_flag & VXWANT) { 1677 vp->v_flag &= ~VXWANT; 1678 wakeup(vp); 1679 } 1680 } 1681 vgonel(vp, p); 1682 return (0); 1683 } 1684 1685 /* 1686 * Recycle an unused vnode to the front of the free list. 1687 * Release the passed interlock if the vnode will be recycled. 1688 */ 1689 int 1690 vrecycle(vp, inter_lkp, p) 1691 struct vnode *vp; 1692 struct simplelock *inter_lkp; 1693 struct proc *p; 1694 { 1695 1696 simple_lock(&vp->v_interlock); 1697 if (vp->v_usecount == 0) { 1698 if (inter_lkp) { 1699 simple_unlock(inter_lkp); 1700 } 1701 vgonel(vp, p); 1702 return (1); 1703 } 1704 simple_unlock(&vp->v_interlock); 1705 return (0); 1706 } 1707 1708 /* 1709 * Eliminate all activity associated with a vnode 1710 * in preparation for reuse. 1711 */ 1712 void 1713 vgone(vp) 1714 register struct vnode *vp; 1715 { 1716 struct proc *p = curproc; /* XXX */ 1717 1718 simple_lock(&vp->v_interlock); 1719 vgonel(vp, p); 1720 } 1721 1722 /* 1723 * vgone, with the vp interlock held. 1724 */ 1725 static void 1726 vgonel(vp, p) 1727 struct vnode *vp; 1728 struct proc *p; 1729 { 1730 int s; 1731 struct vnode *vq; 1732 struct vnode *vx; 1733 1734 /* 1735 * If a vgone (or vclean) is already in progress, 1736 * wait until it is done and return. 1737 */ 1738 if (vp->v_flag & VXLOCK) { 1739 vp->v_flag |= VXWANT; 1740 simple_unlock(&vp->v_interlock); 1741 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1742 return; 1743 } 1744 1745 /* 1746 * Clean out the filesystem specific data. 1747 */ 1748 vclean(vp, DOCLOSE, p); 1749 simple_lock(&vp->v_interlock); 1750 1751 /* 1752 * Delete from old mount point vnode list, if on one. 1753 */ 1754 if (vp->v_mount != NULL) 1755 insmntque(vp, (struct mount *)0); 1756 /* 1757 * If special device, remove it from special device alias list 1758 * if it is on one. 1759 */ 1760 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1761 simple_lock(&spechash_slock); 1762 if (*vp->v_hashchain == vp) { 1763 *vp->v_hashchain = vp->v_specnext; 1764 } else { 1765 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1766 if (vq->v_specnext != vp) 1767 continue; 1768 vq->v_specnext = vp->v_specnext; 1769 break; 1770 } 1771 if (vq == NULL) 1772 panic("missing bdev"); 1773 } 1774 if (vp->v_flag & VALIASED) { 1775 vx = NULL; 1776 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1777 if (vq->v_rdev != vp->v_rdev || 1778 vq->v_type != vp->v_type) 1779 continue; 1780 if (vx) 1781 break; 1782 vx = vq; 1783 } 1784 if (vx == NULL) 1785 panic("missing alias"); 1786 if (vq == NULL) 1787 vx->v_flag &= ~VALIASED; 1788 vp->v_flag &= ~VALIASED; 1789 } 1790 simple_unlock(&spechash_slock); 1791 FREE(vp->v_specinfo, M_VNODE); 1792 vp->v_specinfo = NULL; 1793 } 1794 1795 /* 1796 * If it is on the freelist and not already at the head, 1797 * move it to the head of the list. The test of the back 1798 * pointer and the reference count of zero is because 1799 * it will be removed from the free list by getnewvnode, 1800 * but will not have its reference count incremented until 1801 * after calling vgone. If the reference count were 1802 * incremented first, vgone would (incorrectly) try to 1803 * close the previous instance of the underlying object. 1804 */ 1805 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1806 s = splbio(); 1807 simple_lock(&vnode_free_list_slock); 1808 if (vp->v_flag & VFREE) { 1809 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1810 } else if (vp->v_flag & VTBFREE) { 1811 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1812 vp->v_flag &= ~VTBFREE; 1813 freevnodes++; 1814 } else 1815 freevnodes++; 1816 vp->v_flag |= VFREE; 1817 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1818 simple_unlock(&vnode_free_list_slock); 1819 splx(s); 1820 } 1821 1822 vp->v_type = VBAD; 1823 simple_unlock(&vp->v_interlock); 1824 } 1825 1826 /* 1827 * Lookup a vnode by device number. 1828 */ 1829 int 1830 vfinddev(dev, type, vpp) 1831 dev_t dev; 1832 enum vtype type; 1833 struct vnode **vpp; 1834 { 1835 register struct vnode *vp; 1836 int rc = 0; 1837 1838 simple_lock(&spechash_slock); 1839 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1840 if (dev != vp->v_rdev || type != vp->v_type) 1841 continue; 1842 *vpp = vp; 1843 rc = 1; 1844 break; 1845 } 1846 simple_unlock(&spechash_slock); 1847 return (rc); 1848 } 1849 1850 /* 1851 * Calculate the total number of references to a special device. 1852 */ 1853 int 1854 vcount(vp) 1855 register struct vnode *vp; 1856 { 1857 struct vnode *vq, *vnext; 1858 int count; 1859 1860 loop: 1861 if ((vp->v_flag & VALIASED) == 0) 1862 return (vp->v_usecount); 1863 simple_lock(&spechash_slock); 1864 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1865 vnext = vq->v_specnext; 1866 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1867 continue; 1868 /* 1869 * Alias, but not in use, so flush it out. 1870 */ 1871 if (vq->v_usecount == 0 && vq != vp) { 1872 simple_unlock(&spechash_slock); 1873 vgone(vq); 1874 goto loop; 1875 } 1876 count += vq->v_usecount; 1877 } 1878 simple_unlock(&spechash_slock); 1879 return (count); 1880 } 1881 /* 1882 * Print out a description of a vnode. 1883 */ 1884 static char *typename[] = 1885 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1886 1887 void 1888 vprint(label, vp) 1889 char *label; 1890 register struct vnode *vp; 1891 { 1892 char buf[96]; 1893 1894 if (label != NULL) 1895 printf("%s: %p: ", label, (void *)vp); 1896 else 1897 printf("%p: ", (void *)vp); 1898 printf("type %s, usecount %d, writecount %d, refcount %d,", 1899 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1900 vp->v_holdcnt); 1901 buf[0] = '\0'; 1902 if (vp->v_flag & VROOT) 1903 strcat(buf, "|VROOT"); 1904 if (vp->v_flag & VTEXT) 1905 strcat(buf, "|VTEXT"); 1906 if (vp->v_flag & VSYSTEM) 1907 strcat(buf, "|VSYSTEM"); 1908 if (vp->v_flag & VXLOCK) 1909 strcat(buf, "|VXLOCK"); 1910 if (vp->v_flag & VXWANT) 1911 strcat(buf, "|VXWANT"); 1912 if (vp->v_flag & VBWAIT) 1913 strcat(buf, "|VBWAIT"); 1914 if (vp->v_flag & VALIASED) 1915 strcat(buf, "|VALIASED"); 1916 if (vp->v_flag & VDOOMED) 1917 strcat(buf, "|VDOOMED"); 1918 if (vp->v_flag & VFREE) 1919 strcat(buf, "|VFREE"); 1920 if (vp->v_flag & VOBJBUF) 1921 strcat(buf, "|VOBJBUF"); 1922 if (buf[0] != '\0') 1923 printf(" flags (%s)", &buf[1]); 1924 if (vp->v_data == NULL) { 1925 printf("\n"); 1926 } else { 1927 printf("\n\t"); 1928 VOP_PRINT(vp); 1929 } 1930 } 1931 1932 #ifdef DDB 1933 #include <ddb/ddb.h> 1934 /* 1935 * List all of the locked vnodes in the system. 1936 * Called when debugging the kernel. 1937 */ 1938 DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1939 { 1940 struct proc *p = curproc; /* XXX */ 1941 struct mount *mp, *nmp; 1942 struct vnode *vp; 1943 1944 printf("Locked vnodes\n"); 1945 simple_lock(&mountlist_slock); 1946 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1947 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1948 nmp = mp->mnt_list.cqe_next; 1949 continue; 1950 } 1951 for (vp = mp->mnt_vnodelist.lh_first; 1952 vp != NULL; 1953 vp = vp->v_mntvnodes.le_next) { 1954 if (VOP_ISLOCKED(vp)) 1955 vprint((char *)0, vp); 1956 } 1957 simple_lock(&mountlist_slock); 1958 nmp = mp->mnt_list.cqe_next; 1959 vfs_unbusy(mp, p); 1960 } 1961 simple_unlock(&mountlist_slock); 1962 } 1963 #endif 1964 1965 /* 1966 * Top level filesystem related information gathering. 1967 */ 1968 static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1969 1970 static int 1971 vfs_sysctl SYSCTL_HANDLER_ARGS 1972 { 1973 int *name = (int *)arg1 - 1; /* XXX */ 1974 u_int namelen = arg2 + 1; /* XXX */ 1975 struct vfsconf *vfsp; 1976 1977 #if 1 || defined(COMPAT_PRELITE2) 1978 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1979 if (namelen == 1) 1980 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1981 #endif 1982 1983 #ifdef notyet 1984 /* all sysctl names at this level are at least name and field */ 1985 if (namelen < 2) 1986 return (ENOTDIR); /* overloaded */ 1987 if (name[0] != VFS_GENERIC) { 1988 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1989 if (vfsp->vfc_typenum == name[0]) 1990 break; 1991 if (vfsp == NULL) 1992 return (EOPNOTSUPP); 1993 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1994 oldp, oldlenp, newp, newlen, p)); 1995 } 1996 #endif 1997 switch (name[1]) { 1998 case VFS_MAXTYPENUM: 1999 if (namelen != 2) 2000 return (ENOTDIR); 2001 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2002 case VFS_CONF: 2003 if (namelen != 3) 2004 return (ENOTDIR); /* overloaded */ 2005 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2006 if (vfsp->vfc_typenum == name[2]) 2007 break; 2008 if (vfsp == NULL) 2009 return (EOPNOTSUPP); 2010 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2011 } 2012 return (EOPNOTSUPP); 2013 } 2014 2015 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2016 "Generic filesystem"); 2017 2018 #if 1 || defined(COMPAT_PRELITE2) 2019 2020 static int 2021 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2022 { 2023 int error; 2024 struct vfsconf *vfsp; 2025 struct ovfsconf ovfs; 2026 2027 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2028 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2029 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2030 ovfs.vfc_index = vfsp->vfc_typenum; 2031 ovfs.vfc_refcount = vfsp->vfc_refcount; 2032 ovfs.vfc_flags = vfsp->vfc_flags; 2033 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2034 if (error) 2035 return error; 2036 } 2037 return 0; 2038 } 2039 2040 #endif /* 1 || COMPAT_PRELITE2 */ 2041 2042 #if 0 2043 #define KINFO_VNODESLOP 10 2044 /* 2045 * Dump vnode list (via sysctl). 2046 * Copyout address of vnode followed by vnode. 2047 */ 2048 /* ARGSUSED */ 2049 static int 2050 sysctl_vnode SYSCTL_HANDLER_ARGS 2051 { 2052 struct proc *p = curproc; /* XXX */ 2053 struct mount *mp, *nmp; 2054 struct vnode *nvp, *vp; 2055 int error; 2056 2057 #define VPTRSZ sizeof (struct vnode *) 2058 #define VNODESZ sizeof (struct vnode) 2059 2060 req->lock = 0; 2061 if (!req->oldptr) /* Make an estimate */ 2062 return (SYSCTL_OUT(req, 0, 2063 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2064 2065 simple_lock(&mountlist_slock); 2066 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2067 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2068 nmp = mp->mnt_list.cqe_next; 2069 continue; 2070 } 2071 again: 2072 simple_lock(&mntvnode_slock); 2073 for (vp = mp->mnt_vnodelist.lh_first; 2074 vp != NULL; 2075 vp = nvp) { 2076 /* 2077 * Check that the vp is still associated with 2078 * this filesystem. RACE: could have been 2079 * recycled onto the same filesystem. 2080 */ 2081 if (vp->v_mount != mp) { 2082 simple_unlock(&mntvnode_slock); 2083 goto again; 2084 } 2085 nvp = vp->v_mntvnodes.le_next; 2086 simple_unlock(&mntvnode_slock); 2087 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2088 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2089 return (error); 2090 simple_lock(&mntvnode_slock); 2091 } 2092 simple_unlock(&mntvnode_slock); 2093 simple_lock(&mountlist_slock); 2094 nmp = mp->mnt_list.cqe_next; 2095 vfs_unbusy(mp, p); 2096 } 2097 simple_unlock(&mountlist_slock); 2098 2099 return (0); 2100 } 2101 #endif 2102 2103 /* 2104 * XXX 2105 * Exporting the vnode list on large systems causes them to crash. 2106 * Exporting the vnode list on medium systems causes sysctl to coredump. 2107 */ 2108 #if 0 2109 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2110 0, 0, sysctl_vnode, "S,vnode", ""); 2111 #endif 2112 2113 /* 2114 * Check to see if a filesystem is mounted on a block device. 2115 */ 2116 int 2117 vfs_mountedon(vp) 2118 struct vnode *vp; 2119 { 2120 struct vnode *vq; 2121 int error = 0; 2122 2123 if (vp->v_specmountpoint != NULL) 2124 return (EBUSY); 2125 if (vp->v_flag & VALIASED) { 2126 simple_lock(&spechash_slock); 2127 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2128 if (vq->v_rdev != vp->v_rdev || 2129 vq->v_type != vp->v_type) 2130 continue; 2131 if (vq->v_specmountpoint != NULL) { 2132 error = EBUSY; 2133 break; 2134 } 2135 } 2136 simple_unlock(&spechash_slock); 2137 } 2138 return (error); 2139 } 2140 2141 /* 2142 * Unmount all filesystems. The list is traversed in reverse order 2143 * of mounting to avoid dependencies. 2144 */ 2145 void 2146 vfs_unmountall() 2147 { 2148 struct mount *mp, *nmp; 2149 struct proc *p; 2150 int error; 2151 2152 if (curproc != NULL) 2153 p = curproc; 2154 else 2155 p = initproc; /* XXX XXX should this be proc0? */ 2156 /* 2157 * Since this only runs when rebooting, it is not interlocked. 2158 */ 2159 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2160 nmp = mp->mnt_list.cqe_prev; 2161 error = dounmount(mp, MNT_FORCE, p); 2162 if (error) { 2163 printf("unmount of %s failed (", 2164 mp->mnt_stat.f_mntonname); 2165 if (error == EBUSY) 2166 printf("BUSY)\n"); 2167 else 2168 printf("%d)\n", error); 2169 } 2170 } 2171 } 2172 2173 /* 2174 * Build hash lists of net addresses and hang them off the mount point. 2175 * Called by ufs_mount() to set up the lists of export addresses. 2176 */ 2177 static int 2178 vfs_hang_addrlist(mp, nep, argp) 2179 struct mount *mp; 2180 struct netexport *nep; 2181 struct export_args *argp; 2182 { 2183 register struct netcred *np; 2184 register struct radix_node_head *rnh; 2185 register int i; 2186 struct radix_node *rn; 2187 struct sockaddr *saddr, *smask = 0; 2188 struct domain *dom; 2189 int error; 2190 2191 if (argp->ex_addrlen == 0) { 2192 if (mp->mnt_flag & MNT_DEFEXPORTED) 2193 return (EPERM); 2194 np = &nep->ne_defexported; 2195 np->netc_exflags = argp->ex_flags; 2196 np->netc_anon = argp->ex_anon; 2197 np->netc_anon.cr_ref = 1; 2198 mp->mnt_flag |= MNT_DEFEXPORTED; 2199 return (0); 2200 } 2201 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2202 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2203 bzero((caddr_t) np, i); 2204 saddr = (struct sockaddr *) (np + 1); 2205 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2206 goto out; 2207 if (saddr->sa_len > argp->ex_addrlen) 2208 saddr->sa_len = argp->ex_addrlen; 2209 if (argp->ex_masklen) { 2210 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2211 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2212 if (error) 2213 goto out; 2214 if (smask->sa_len > argp->ex_masklen) 2215 smask->sa_len = argp->ex_masklen; 2216 } 2217 i = saddr->sa_family; 2218 if ((rnh = nep->ne_rtable[i]) == 0) { 2219 /* 2220 * Seems silly to initialize every AF when most are not used, 2221 * do so on demand here 2222 */ 2223 for (dom = domains; dom; dom = dom->dom_next) 2224 if (dom->dom_family == i && dom->dom_rtattach) { 2225 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2226 dom->dom_rtoffset); 2227 break; 2228 } 2229 if ((rnh = nep->ne_rtable[i]) == 0) { 2230 error = ENOBUFS; 2231 goto out; 2232 } 2233 } 2234 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2235 np->netc_rnodes); 2236 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2237 error = EPERM; 2238 goto out; 2239 } 2240 np->netc_exflags = argp->ex_flags; 2241 np->netc_anon = argp->ex_anon; 2242 np->netc_anon.cr_ref = 1; 2243 return (0); 2244 out: 2245 free(np, M_NETADDR); 2246 return (error); 2247 } 2248 2249 /* ARGSUSED */ 2250 static int 2251 vfs_free_netcred(rn, w) 2252 struct radix_node *rn; 2253 void *w; 2254 { 2255 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2256 2257 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2258 free((caddr_t) rn, M_NETADDR); 2259 return (0); 2260 } 2261 2262 /* 2263 * Free the net address hash lists that are hanging off the mount points. 2264 */ 2265 static void 2266 vfs_free_addrlist(nep) 2267 struct netexport *nep; 2268 { 2269 register int i; 2270 register struct radix_node_head *rnh; 2271 2272 for (i = 0; i <= AF_MAX; i++) 2273 if ((rnh = nep->ne_rtable[i])) { 2274 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2275 (caddr_t) rnh); 2276 free((caddr_t) rnh, M_RTABLE); 2277 nep->ne_rtable[i] = 0; 2278 } 2279 } 2280 2281 int 2282 vfs_export(mp, nep, argp) 2283 struct mount *mp; 2284 struct netexport *nep; 2285 struct export_args *argp; 2286 { 2287 int error; 2288 2289 if (argp->ex_flags & MNT_DELEXPORT) { 2290 if (mp->mnt_flag & MNT_EXPUBLIC) { 2291 vfs_setpublicfs(NULL, NULL, NULL); 2292 mp->mnt_flag &= ~MNT_EXPUBLIC; 2293 } 2294 vfs_free_addrlist(nep); 2295 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2296 } 2297 if (argp->ex_flags & MNT_EXPORTED) { 2298 if (argp->ex_flags & MNT_EXPUBLIC) { 2299 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2300 return (error); 2301 mp->mnt_flag |= MNT_EXPUBLIC; 2302 } 2303 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2304 return (error); 2305 mp->mnt_flag |= MNT_EXPORTED; 2306 } 2307 return (0); 2308 } 2309 2310 2311 /* 2312 * Set the publicly exported filesystem (WebNFS). Currently, only 2313 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2314 */ 2315 int 2316 vfs_setpublicfs(mp, nep, argp) 2317 struct mount *mp; 2318 struct netexport *nep; 2319 struct export_args *argp; 2320 { 2321 int error; 2322 struct vnode *rvp; 2323 char *cp; 2324 2325 /* 2326 * mp == NULL -> invalidate the current info, the FS is 2327 * no longer exported. May be called from either vfs_export 2328 * or unmount, so check if it hasn't already been done. 2329 */ 2330 if (mp == NULL) { 2331 if (nfs_pub.np_valid) { 2332 nfs_pub.np_valid = 0; 2333 if (nfs_pub.np_index != NULL) { 2334 FREE(nfs_pub.np_index, M_TEMP); 2335 nfs_pub.np_index = NULL; 2336 } 2337 } 2338 return (0); 2339 } 2340 2341 /* 2342 * Only one allowed at a time. 2343 */ 2344 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2345 return (EBUSY); 2346 2347 /* 2348 * Get real filehandle for root of exported FS. 2349 */ 2350 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2351 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2352 2353 if ((error = VFS_ROOT(mp, &rvp))) 2354 return (error); 2355 2356 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2357 return (error); 2358 2359 vput(rvp); 2360 2361 /* 2362 * If an indexfile was specified, pull it in. 2363 */ 2364 if (argp->ex_indexfile != NULL) { 2365 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2366 M_WAITOK); 2367 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2368 MAXNAMLEN, (size_t *)0); 2369 if (!error) { 2370 /* 2371 * Check for illegal filenames. 2372 */ 2373 for (cp = nfs_pub.np_index; *cp; cp++) { 2374 if (*cp == '/') { 2375 error = EINVAL; 2376 break; 2377 } 2378 } 2379 } 2380 if (error) { 2381 FREE(nfs_pub.np_index, M_TEMP); 2382 return (error); 2383 } 2384 } 2385 2386 nfs_pub.np_mount = mp; 2387 nfs_pub.np_valid = 1; 2388 return (0); 2389 } 2390 2391 struct netcred * 2392 vfs_export_lookup(mp, nep, nam) 2393 register struct mount *mp; 2394 struct netexport *nep; 2395 struct sockaddr *nam; 2396 { 2397 register struct netcred *np; 2398 register struct radix_node_head *rnh; 2399 struct sockaddr *saddr; 2400 2401 np = NULL; 2402 if (mp->mnt_flag & MNT_EXPORTED) { 2403 /* 2404 * Lookup in the export list first. 2405 */ 2406 if (nam != NULL) { 2407 saddr = nam; 2408 rnh = nep->ne_rtable[saddr->sa_family]; 2409 if (rnh != NULL) { 2410 np = (struct netcred *) 2411 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2412 rnh); 2413 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2414 np = NULL; 2415 } 2416 } 2417 /* 2418 * If no address match, use the default if it exists. 2419 */ 2420 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2421 np = &nep->ne_defexported; 2422 } 2423 return (np); 2424 } 2425 2426 /* 2427 * perform msync on all vnodes under a mount point 2428 * the mount point must be locked. 2429 */ 2430 void 2431 vfs_msync(struct mount *mp, int flags) { 2432 struct vnode *vp, *nvp; 2433 struct vm_object *obj; 2434 int anyio, tries; 2435 2436 tries = 5; 2437 loop: 2438 anyio = 0; 2439 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2440 2441 nvp = vp->v_mntvnodes.le_next; 2442 2443 if (vp->v_mount != mp) { 2444 goto loop; 2445 } 2446 2447 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2448 continue; 2449 2450 if (flags != MNT_WAIT) { 2451 obj = vp->v_object; 2452 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2453 continue; 2454 if (VOP_ISLOCKED(vp)) 2455 continue; 2456 } 2457 2458 simple_lock(&vp->v_interlock); 2459 if (vp->v_object && 2460 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2461 if (!vget(vp, 2462 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2463 if (vp->v_object) { 2464 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2465 anyio = 1; 2466 } 2467 vput(vp); 2468 } 2469 } else { 2470 simple_unlock(&vp->v_interlock); 2471 } 2472 } 2473 if (anyio && (--tries > 0)) 2474 goto loop; 2475 } 2476 2477 /* 2478 * Create the VM object needed for VMIO and mmap support. This 2479 * is done for all VREG files in the system. Some filesystems might 2480 * afford the additional metadata buffering capability of the 2481 * VMIO code by making the device node be VMIO mode also. 2482 * 2483 * vp must be locked when vfs_object_create is called. 2484 */ 2485 int 2486 vfs_object_create(vp, p, cred) 2487 struct vnode *vp; 2488 struct proc *p; 2489 struct ucred *cred; 2490 { 2491 struct vattr vat; 2492 vm_object_t object; 2493 int error = 0; 2494 2495 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) 2496 return 0; 2497 2498 retry: 2499 if ((object = vp->v_object) == NULL) { 2500 if (vp->v_type == VREG) { 2501 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2502 goto retn; 2503 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2504 } else if (major(vp->v_rdev) < nblkdev && 2505 bdevsw[major(vp->v_rdev)] != NULL) { 2506 /* 2507 * This simply allocates the biggest object possible 2508 * for a VBLK vnode. This should be fixed, but doesn't 2509 * cause any problems (yet). 2510 */ 2511 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2512 } 2513 object->ref_count--; 2514 vp->v_usecount--; 2515 } else { 2516 if (object->flags & OBJ_DEAD) { 2517 VOP_UNLOCK(vp, 0, p); 2518 tsleep(object, PVM, "vodead", 0); 2519 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2520 goto retry; 2521 } 2522 } 2523 2524 if (vp->v_object) 2525 vp->v_flag |= VOBJBUF; 2526 2527 retn: 2528 return error; 2529 } 2530 2531 static void 2532 vfree(vp) 2533 struct vnode *vp; 2534 { 2535 int s; 2536 2537 s = splbio(); 2538 simple_lock(&vnode_free_list_slock); 2539 if (vp->v_flag & VTBFREE) { 2540 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2541 vp->v_flag &= ~VTBFREE; 2542 } 2543 if (vp->v_flag & VAGE) { 2544 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2545 } else { 2546 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2547 } 2548 freevnodes++; 2549 simple_unlock(&vnode_free_list_slock); 2550 vp->v_flag &= ~VAGE; 2551 vp->v_flag |= VFREE; 2552 splx(s); 2553 } 2554 2555 void 2556 vbusy(vp) 2557 struct vnode *vp; 2558 { 2559 int s; 2560 2561 s = splbio(); 2562 simple_lock(&vnode_free_list_slock); 2563 if (vp->v_flag & VTBFREE) { 2564 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2565 vp->v_flag &= ~VTBFREE; 2566 } else { 2567 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2568 freevnodes--; 2569 } 2570 simple_unlock(&vnode_free_list_slock); 2571 vp->v_flag &= ~(VFREE|VAGE); 2572 splx(s); 2573 } 2574 2575 /* 2576 * Record a process's interest in events which might happen to 2577 * a vnode. Because poll uses the historic select-style interface 2578 * internally, this routine serves as both the ``check for any 2579 * pending events'' and the ``record my interest in future events'' 2580 * functions. (These are done together, while the lock is held, 2581 * to avoid race conditions.) 2582 */ 2583 int 2584 vn_pollrecord(vp, p, events) 2585 struct vnode *vp; 2586 struct proc *p; 2587 short events; 2588 { 2589 simple_lock(&vp->v_pollinfo.vpi_lock); 2590 if (vp->v_pollinfo.vpi_revents & events) { 2591 /* 2592 * This leaves events we are not interested 2593 * in available for the other process which 2594 * which presumably had requested them 2595 * (otherwise they would never have been 2596 * recorded). 2597 */ 2598 events &= vp->v_pollinfo.vpi_revents; 2599 vp->v_pollinfo.vpi_revents &= ~events; 2600 2601 simple_unlock(&vp->v_pollinfo.vpi_lock); 2602 return events; 2603 } 2604 vp->v_pollinfo.vpi_events |= events; 2605 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2606 simple_unlock(&vp->v_pollinfo.vpi_lock); 2607 return 0; 2608 } 2609 2610 /* 2611 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2612 * it is possible for us to miss an event due to race conditions, but 2613 * that condition is expected to be rare, so for the moment it is the 2614 * preferred interface. 2615 */ 2616 void 2617 vn_pollevent(vp, events) 2618 struct vnode *vp; 2619 short events; 2620 { 2621 simple_lock(&vp->v_pollinfo.vpi_lock); 2622 if (vp->v_pollinfo.vpi_events & events) { 2623 /* 2624 * We clear vpi_events so that we don't 2625 * call selwakeup() twice if two events are 2626 * posted before the polling process(es) is 2627 * awakened. This also ensures that we take at 2628 * most one selwakeup() if the polling process 2629 * is no longer interested. However, it does 2630 * mean that only one event can be noticed at 2631 * a time. (Perhaps we should only clear those 2632 * event bits which we note?) XXX 2633 */ 2634 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2635 vp->v_pollinfo.vpi_revents |= events; 2636 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2637 } 2638 simple_unlock(&vp->v_pollinfo.vpi_lock); 2639 } 2640 2641 /* 2642 * Wake up anyone polling on vp because it is being revoked. 2643 * This depends on dead_poll() returning POLLHUP for correct 2644 * behavior. 2645 */ 2646 void 2647 vn_pollgone(vp) 2648 struct vnode *vp; 2649 { 2650 simple_lock(&vp->v_pollinfo.vpi_lock); 2651 if (vp->v_pollinfo.vpi_events) { 2652 vp->v_pollinfo.vpi_events = 0; 2653 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2654 } 2655 simple_unlock(&vp->v_pollinfo.vpi_lock); 2656 } 2657 2658 2659 2660 /* 2661 * Routine to create and manage a filesystem syncer vnode. 2662 */ 2663 #define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2664 static int sync_fsync __P((struct vop_fsync_args *)); 2665 static int sync_inactive __P((struct vop_inactive_args *)); 2666 static int sync_reclaim __P((struct vop_reclaim_args *)); 2667 #define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2668 #define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2669 static int sync_print __P((struct vop_print_args *)); 2670 #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2671 2672 static vop_t **sync_vnodeop_p; 2673 static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2674 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2675 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2676 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2677 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2678 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2679 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2680 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2681 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2682 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2683 { NULL, NULL } 2684 }; 2685 static struct vnodeopv_desc sync_vnodeop_opv_desc = 2686 { &sync_vnodeop_p, sync_vnodeop_entries }; 2687 2688 VNODEOP_SET(sync_vnodeop_opv_desc); 2689 2690 /* 2691 * Create a new filesystem syncer vnode for the specified mount point. 2692 */ 2693 int 2694 vfs_allocate_syncvnode(mp) 2695 struct mount *mp; 2696 { 2697 struct vnode *vp; 2698 static long start, incr, next; 2699 int error; 2700 2701 /* Allocate a new vnode */ 2702 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2703 mp->mnt_syncer = NULL; 2704 return (error); 2705 } 2706 vp->v_type = VNON; 2707 /* 2708 * Place the vnode onto the syncer worklist. We attempt to 2709 * scatter them about on the list so that they will go off 2710 * at evenly distributed times even if all the filesystems 2711 * are mounted at once. 2712 */ 2713 next += incr; 2714 if (next == 0 || next > syncer_maxdelay) { 2715 start /= 2; 2716 incr /= 2; 2717 if (start == 0) { 2718 start = syncer_maxdelay / 2; 2719 incr = syncer_maxdelay; 2720 } 2721 next = start; 2722 } 2723 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2724 mp->mnt_syncer = vp; 2725 return (0); 2726 } 2727 2728 /* 2729 * Do a lazy sync of the filesystem. 2730 */ 2731 static int 2732 sync_fsync(ap) 2733 struct vop_fsync_args /* { 2734 struct vnode *a_vp; 2735 struct ucred *a_cred; 2736 int a_waitfor; 2737 struct proc *a_p; 2738 } */ *ap; 2739 { 2740 struct vnode *syncvp = ap->a_vp; 2741 struct mount *mp = syncvp->v_mount; 2742 struct proc *p = ap->a_p; 2743 int asyncflag; 2744 2745 /* 2746 * We only need to do something if this is a lazy evaluation. 2747 */ 2748 if (ap->a_waitfor != MNT_LAZY) 2749 return (0); 2750 2751 /* 2752 * Move ourselves to the back of the sync list. 2753 */ 2754 vn_syncer_add_to_worklist(syncvp, syncdelay); 2755 2756 /* 2757 * Walk the list of vnodes pushing all that are dirty and 2758 * not already on the sync list. 2759 */ 2760 simple_lock(&mountlist_slock); 2761 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2762 simple_unlock(&mountlist_slock); 2763 return (0); 2764 } 2765 asyncflag = mp->mnt_flag & MNT_ASYNC; 2766 mp->mnt_flag &= ~MNT_ASYNC; 2767 vfs_msync(mp, MNT_NOWAIT); 2768 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2769 if (asyncflag) 2770 mp->mnt_flag |= MNT_ASYNC; 2771 vfs_unbusy(mp, p); 2772 return (0); 2773 } 2774 2775 /* 2776 * The syncer vnode is no referenced. 2777 */ 2778 static int 2779 sync_inactive(ap) 2780 struct vop_inactive_args /* { 2781 struct vnode *a_vp; 2782 struct proc *a_p; 2783 } */ *ap; 2784 { 2785 2786 vgone(ap->a_vp); 2787 return (0); 2788 } 2789 2790 /* 2791 * The syncer vnode is no longer needed and is being decommissioned. 2792 */ 2793 static int 2794 sync_reclaim(ap) 2795 struct vop_reclaim_args /* { 2796 struct vnode *a_vp; 2797 } */ *ap; 2798 { 2799 struct vnode *vp = ap->a_vp; 2800 2801 vp->v_mount->mnt_syncer = NULL; 2802 if (vp->v_flag & VONWORKLST) { 2803 LIST_REMOVE(vp, v_synclist); 2804 vp->v_flag &= ~VONWORKLST; 2805 } 2806 2807 return (0); 2808 } 2809 2810 /* 2811 * Print out a syncer vnode. 2812 */ 2813 static int 2814 sync_print(ap) 2815 struct vop_print_args /* { 2816 struct vnode *a_vp; 2817 } */ *ap; 2818 { 2819 struct vnode *vp = ap->a_vp; 2820 2821 printf("syncer vnode"); 2822 if (vp->v_vnlock != NULL) 2823 lockmgr_printinfo(vp->v_vnlock); 2824 printf("\n"); 2825 return (0); 2826 } 2827