1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id: vfs_subr.c,v 1.158 1998/07/11 07:45:43 bde Exp $ 40 */ 41 42 /* 43 * External virtual filesystem routines 44 */ 45 #include "opt_ddb.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/kernel.h> 50 #include <sys/proc.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/socket.h> 54 #include <sys/vnode.h> 55 #include <sys/stat.h> 56 #include <sys/buf.h> 57 #include <sys/domain.h> 58 #include <sys/dirent.h> 59 #include <sys/vmmeter.h> 60 61 #include <machine/limits.h> 62 63 #include <vm/vm.h> 64 #include <vm/vm_object.h> 65 #include <vm/vm_extern.h> 66 #include <vm/pmap.h> 67 #include <vm/vm_map.h> 68 #include <vm/vm_pager.h> 69 #include <vm/vnode_pager.h> 70 #include <vm/vm_zone.h> 71 #include <sys/sysctl.h> 72 73 #include <miscfs/specfs/specdev.h> 74 75 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 76 77 static void insmntque __P((struct vnode *vp, struct mount *mp)); 78 #ifdef DDB 79 static void printlockedvnodes __P((void)); 80 #endif 81 static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 82 static void vfree __P((struct vnode *)); 83 static void vgonel __P((struct vnode *vp, struct proc *p)); 84 static unsigned long numvnodes; 85 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 86 87 enum vtype iftovt_tab[16] = { 88 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 89 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 90 }; 91 int vttoif_tab[9] = { 92 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 93 S_IFSOCK, S_IFIFO, S_IFMT, 94 }; 95 96 /* 97 * Insq/Remq for the vnode usage lists. 98 */ 99 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 100 #define bufremvn(bp) { \ 101 LIST_REMOVE(bp, b_vnbufs); \ 102 (bp)->b_vnbufs.le_next = NOLIST; \ 103 } 104 105 static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 106 struct tobefreelist vnode_tobefree_list; /* vnode free list */ 107 108 static u_long wantfreevnodes = 25; 109 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 110 static u_long freevnodes = 0; 111 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 112 113 int vfs_ioopt = 0; 114 #ifdef ENABLE_VFS_IOOPT 115 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 116 #endif 117 118 struct mntlist mountlist; /* mounted filesystem list */ 119 struct simplelock mountlist_slock; 120 static struct simplelock mntid_slock; 121 struct simplelock mntvnode_slock; 122 static struct simplelock vnode_free_list_slock; 123 static struct simplelock spechash_slock; 124 struct nfs_public nfs_pub; /* publicly exported FS */ 125 static vm_zone_t vnode_zone; 126 127 /* 128 * The workitem queue. 129 */ 130 #define SYNCER_MAXDELAY 32 131 int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 132 time_t syncdelay = 30; 133 int rushjob; /* number of slots to run ASAP */ 134 135 static int syncer_delayno = 0; 136 static long syncer_mask; 137 LIST_HEAD(synclist, vnode); 138 static struct synclist *syncer_workitem_pending; 139 140 int desiredvnodes; 141 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 142 143 static void vfs_free_addrlist __P((struct netexport *nep)); 144 static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 145 static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 146 struct export_args *argp)); 147 148 /* 149 * Initialize the vnode management data structures. 150 */ 151 void 152 vntblinit() 153 { 154 155 desiredvnodes = maxproc + cnt.v_page_count / 4; 156 simple_lock_init(&mntvnode_slock); 157 simple_lock_init(&mntid_slock); 158 simple_lock_init(&spechash_slock); 159 TAILQ_INIT(&vnode_free_list); 160 TAILQ_INIT(&vnode_tobefree_list); 161 simple_lock_init(&vnode_free_list_slock); 162 CIRCLEQ_INIT(&mountlist); 163 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 164 /* 165 * Initialize the filesystem syncer. 166 */ 167 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 168 &syncer_mask); 169 syncer_maxdelay = syncer_mask + 1; 170 } 171 172 /* 173 * Mark a mount point as busy. Used to synchronize access and to delay 174 * unmounting. Interlock is not released on failure. 175 */ 176 int 177 vfs_busy(mp, flags, interlkp, p) 178 struct mount *mp; 179 int flags; 180 struct simplelock *interlkp; 181 struct proc *p; 182 { 183 int lkflags; 184 185 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 186 if (flags & LK_NOWAIT) 187 return (ENOENT); 188 mp->mnt_kern_flag |= MNTK_MWAIT; 189 if (interlkp) { 190 simple_unlock(interlkp); 191 } 192 /* 193 * Since all busy locks are shared except the exclusive 194 * lock granted when unmounting, the only place that a 195 * wakeup needs to be done is at the release of the 196 * exclusive lock at the end of dounmount. 197 */ 198 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 199 if (interlkp) { 200 simple_lock(interlkp); 201 } 202 return (ENOENT); 203 } 204 lkflags = LK_SHARED | LK_NOPAUSE; 205 if (interlkp) 206 lkflags |= LK_INTERLOCK; 207 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 208 panic("vfs_busy: unexpected lock failure"); 209 return (0); 210 } 211 212 /* 213 * Free a busy filesystem. 214 */ 215 void 216 vfs_unbusy(mp, p) 217 struct mount *mp; 218 struct proc *p; 219 { 220 221 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 222 } 223 224 /* 225 * Lookup a filesystem type, and if found allocate and initialize 226 * a mount structure for it. 227 * 228 * Devname is usually updated by mount(8) after booting. 229 */ 230 int 231 vfs_rootmountalloc(fstypename, devname, mpp) 232 char *fstypename; 233 char *devname; 234 struct mount **mpp; 235 { 236 struct proc *p = curproc; /* XXX */ 237 struct vfsconf *vfsp; 238 struct mount *mp; 239 240 if (fstypename == NULL) 241 return (ENODEV); 242 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 243 if (!strcmp(vfsp->vfc_name, fstypename)) 244 break; 245 if (vfsp == NULL) 246 return (ENODEV); 247 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 248 bzero((char *)mp, (u_long)sizeof(struct mount)); 249 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 250 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 251 LIST_INIT(&mp->mnt_vnodelist); 252 mp->mnt_vfc = vfsp; 253 mp->mnt_op = vfsp->vfc_vfsops; 254 mp->mnt_flag = MNT_RDONLY; 255 mp->mnt_vnodecovered = NULLVP; 256 vfsp->vfc_refcount++; 257 mp->mnt_stat.f_type = vfsp->vfc_typenum; 258 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 259 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 260 mp->mnt_stat.f_mntonname[0] = '/'; 261 mp->mnt_stat.f_mntonname[1] = 0; 262 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 263 *mpp = mp; 264 return (0); 265 } 266 267 /* 268 * Find an appropriate filesystem to use for the root. If a filesystem 269 * has not been preselected, walk through the list of known filesystems 270 * trying those that have mountroot routines, and try them until one 271 * works or we have tried them all. 272 */ 273 #ifdef notdef /* XXX JH */ 274 int 275 lite2_vfs_mountroot() 276 { 277 struct vfsconf *vfsp; 278 extern int (*lite2_mountroot) __P((void)); 279 int error; 280 281 if (lite2_mountroot != NULL) 282 return ((*lite2_mountroot)()); 283 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 284 if (vfsp->vfc_mountroot == NULL) 285 continue; 286 if ((error = (*vfsp->vfc_mountroot)()) == 0) 287 return (0); 288 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 289 } 290 return (ENODEV); 291 } 292 #endif 293 294 /* 295 * Lookup a mount point by filesystem identifier. 296 */ 297 struct mount * 298 vfs_getvfs(fsid) 299 fsid_t *fsid; 300 { 301 register struct mount *mp; 302 303 simple_lock(&mountlist_slock); 304 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 305 mp = mp->mnt_list.cqe_next) { 306 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 307 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 308 simple_unlock(&mountlist_slock); 309 return (mp); 310 } 311 } 312 simple_unlock(&mountlist_slock); 313 return ((struct mount *) 0); 314 } 315 316 /* 317 * Get a new unique fsid 318 */ 319 void 320 vfs_getnewfsid(mp) 321 struct mount *mp; 322 { 323 static u_short xxxfs_mntid; 324 325 fsid_t tfsid; 326 int mtype; 327 328 simple_lock(&mntid_slock); 329 mtype = mp->mnt_vfc->vfc_typenum; 330 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 331 mp->mnt_stat.f_fsid.val[1] = mtype; 332 if (xxxfs_mntid == 0) 333 ++xxxfs_mntid; 334 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 335 tfsid.val[1] = mtype; 336 if (mountlist.cqh_first != (void *)&mountlist) { 337 while (vfs_getvfs(&tfsid)) { 338 tfsid.val[0]++; 339 xxxfs_mntid++; 340 } 341 } 342 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 343 simple_unlock(&mntid_slock); 344 } 345 346 /* 347 * Set vnode attributes to VNOVAL 348 */ 349 void 350 vattr_null(vap) 351 register struct vattr *vap; 352 { 353 354 vap->va_type = VNON; 355 vap->va_size = VNOVAL; 356 vap->va_bytes = VNOVAL; 357 vap->va_mode = VNOVAL; 358 vap->va_nlink = VNOVAL; 359 vap->va_uid = VNOVAL; 360 vap->va_gid = VNOVAL; 361 vap->va_fsid = VNOVAL; 362 vap->va_fileid = VNOVAL; 363 vap->va_blocksize = VNOVAL; 364 vap->va_rdev = VNOVAL; 365 vap->va_atime.tv_sec = VNOVAL; 366 vap->va_atime.tv_nsec = VNOVAL; 367 vap->va_mtime.tv_sec = VNOVAL; 368 vap->va_mtime.tv_nsec = VNOVAL; 369 vap->va_ctime.tv_sec = VNOVAL; 370 vap->va_ctime.tv_nsec = VNOVAL; 371 vap->va_flags = VNOVAL; 372 vap->va_gen = VNOVAL; 373 vap->va_vaflags = 0; 374 } 375 376 /* 377 * Routines having to do with the management of the vnode table. 378 */ 379 extern vop_t **dead_vnodeop_p; 380 381 /* 382 * Return the next vnode from the free list. 383 */ 384 int 385 getnewvnode(tag, mp, vops, vpp) 386 enum vtagtype tag; 387 struct mount *mp; 388 vop_t **vops; 389 struct vnode **vpp; 390 { 391 int s; 392 struct proc *p = curproc; /* XXX */ 393 struct vnode *vp, *tvp, *nvp; 394 vm_object_t object; 395 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 396 397 /* 398 * We take the least recently used vnode from the freelist 399 * if we can get it and it has no cached pages, and no 400 * namecache entries are relative to it. 401 * Otherwise we allocate a new vnode 402 */ 403 404 s = splbio(); 405 simple_lock(&vnode_free_list_slock); 406 TAILQ_INIT(&vnode_tmp_list); 407 408 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 409 nvp = TAILQ_NEXT(vp, v_freelist); 410 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 411 if (vp->v_flag & VAGE) { 412 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 413 } else { 414 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 415 } 416 vp->v_flag &= ~(VTBFREE|VAGE); 417 vp->v_flag |= VFREE; 418 if (vp->v_usecount) 419 panic("tobe free vnode isn't"); 420 freevnodes++; 421 } 422 423 if (wantfreevnodes && freevnodes < wantfreevnodes) { 424 vp = NULL; 425 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 426 /* 427 * XXX: this is only here to be backwards compatible 428 */ 429 vp = NULL; 430 } else { 431 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 432 433 nvp = TAILQ_NEXT(vp, v_freelist); 434 435 if (!simple_lock_try(&vp->v_interlock)) 436 continue; 437 if (vp->v_usecount) 438 panic("free vnode isn't"); 439 440 object = vp->v_object; 441 if (object && (object->resident_page_count || object->ref_count)) { 442 printf("object inconsistant state: RPC: %d, RC: %d\n", 443 object->resident_page_count, object->ref_count); 444 /* Don't recycle if it's caching some pages */ 445 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 446 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 447 continue; 448 } else if (LIST_FIRST(&vp->v_cache_src)) { 449 /* Don't recycle if active in the namecache */ 450 simple_unlock(&vp->v_interlock); 451 continue; 452 } else { 453 break; 454 } 455 } 456 } 457 458 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 459 nvp = TAILQ_NEXT(tvp, v_freelist); 460 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 461 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 462 simple_unlock(&tvp->v_interlock); 463 } 464 465 if (vp) { 466 vp->v_flag |= VDOOMED; 467 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 468 freevnodes--; 469 simple_unlock(&vnode_free_list_slock); 470 cache_purge(vp); 471 vp->v_lease = NULL; 472 if (vp->v_type != VBAD) { 473 vgonel(vp, p); 474 } else { 475 simple_unlock(&vp->v_interlock); 476 } 477 478 #ifdef DIAGNOSTIC 479 { 480 int s; 481 482 if (vp->v_data) 483 panic("cleaned vnode isn't"); 484 s = splbio(); 485 if (vp->v_numoutput) 486 panic("Clean vnode has pending I/O's"); 487 splx(s); 488 } 489 #endif 490 vp->v_flag = 0; 491 vp->v_lastr = 0; 492 vp->v_lastw = 0; 493 vp->v_lasta = 0; 494 vp->v_cstart = 0; 495 vp->v_clen = 0; 496 vp->v_socket = 0; 497 vp->v_writecount = 0; /* XXX */ 498 vp->v_maxio = 0; 499 } else { 500 simple_unlock(&vnode_free_list_slock); 501 vp = (struct vnode *) zalloc(vnode_zone); 502 bzero((char *) vp, sizeof *vp); 503 simple_lock_init(&vp->v_interlock); 504 vp->v_dd = vp; 505 cache_purge(vp); 506 LIST_INIT(&vp->v_cache_src); 507 TAILQ_INIT(&vp->v_cache_dst); 508 numvnodes++; 509 } 510 511 vp->v_type = VNON; 512 vp->v_tag = tag; 513 vp->v_op = vops; 514 insmntque(vp, mp); 515 *vpp = vp; 516 vp->v_usecount = 1; 517 vp->v_data = 0; 518 splx(s); 519 520 vfs_object_create(vp, p, p->p_ucred, TRUE); 521 return (0); 522 } 523 524 /* 525 * Move a vnode from one mount queue to another. 526 */ 527 static void 528 insmntque(vp, mp) 529 register struct vnode *vp; 530 register struct mount *mp; 531 { 532 533 simple_lock(&mntvnode_slock); 534 /* 535 * Delete from old mount point vnode list, if on one. 536 */ 537 if (vp->v_mount != NULL) 538 LIST_REMOVE(vp, v_mntvnodes); 539 /* 540 * Insert into list of vnodes for the new mount point, if available. 541 */ 542 if ((vp->v_mount = mp) == NULL) { 543 simple_unlock(&mntvnode_slock); 544 return; 545 } 546 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 547 simple_unlock(&mntvnode_slock); 548 } 549 550 /* 551 * Update outstanding I/O count and do wakeup if requested. 552 */ 553 void 554 vwakeup(bp) 555 register struct buf *bp; 556 { 557 register struct vnode *vp; 558 559 bp->b_flags &= ~B_WRITEINPROG; 560 if ((vp = bp->b_vp)) { 561 vp->v_numoutput--; 562 if (vp->v_numoutput < 0) 563 panic("vwakeup: neg numoutput"); 564 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 565 vp->v_flag &= ~VBWAIT; 566 wakeup((caddr_t) &vp->v_numoutput); 567 } 568 } 569 } 570 571 /* 572 * Flush out and invalidate all buffers associated with a vnode. 573 * Called with the underlying object locked. 574 */ 575 int 576 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 577 register struct vnode *vp; 578 int flags; 579 struct ucred *cred; 580 struct proc *p; 581 int slpflag, slptimeo; 582 { 583 register struct buf *bp; 584 struct buf *nbp, *blist; 585 int s, error; 586 vm_object_t object; 587 588 if (flags & V_SAVE) { 589 s = splbio(); 590 while (vp->v_numoutput) { 591 vp->v_flag |= VBWAIT; 592 tsleep((caddr_t)&vp->v_numoutput, 593 slpflag | (PRIBIO + 1), 594 "vinvlbuf", slptimeo); 595 } 596 if (vp->v_dirtyblkhd.lh_first != NULL) { 597 splx(s); 598 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 599 return (error); 600 s = splbio(); 601 if (vp->v_numoutput > 0 || 602 vp->v_dirtyblkhd.lh_first != NULL) 603 panic("vinvalbuf: dirty bufs"); 604 } 605 splx(s); 606 } 607 s = splbio(); 608 for (;;) { 609 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 610 while (blist && blist->b_lblkno < 0) 611 blist = blist->b_vnbufs.le_next; 612 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 613 (flags & V_SAVEMETA)) 614 while (blist && blist->b_lblkno < 0) 615 blist = blist->b_vnbufs.le_next; 616 if (!blist) 617 break; 618 619 for (bp = blist; bp; bp = nbp) { 620 nbp = bp->b_vnbufs.le_next; 621 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) 622 continue; 623 if (bp->b_flags & B_BUSY) { 624 bp->b_flags |= B_WANTED; 625 error = tsleep((caddr_t) bp, 626 slpflag | (PRIBIO + 4), "vinvalbuf", 627 slptimeo); 628 if (error) { 629 splx(s); 630 return (error); 631 } 632 break; 633 } 634 /* 635 * XXX Since there are no node locks for NFS, I 636 * believe there is a slight chance that a delayed 637 * write will occur while sleeping just above, so 638 * check for it. Note that vfs_bio_awrite expects 639 * buffers to reside on a queue, while VOP_BWRITE and 640 * brelse do not. 641 */ 642 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 643 (flags & V_SAVE)) { 644 645 if (bp->b_vp == vp) { 646 if (bp->b_flags & B_CLUSTEROK) { 647 vfs_bio_awrite(bp); 648 } else { 649 bremfree(bp); 650 bp->b_flags |= (B_BUSY | B_ASYNC); 651 VOP_BWRITE(bp); 652 } 653 } else { 654 bremfree(bp); 655 bp->b_flags |= B_BUSY; 656 (void) VOP_BWRITE(bp); 657 } 658 break; 659 } 660 bremfree(bp); 661 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY); 662 bp->b_flags &= ~B_ASYNC; 663 brelse(bp); 664 } 665 } 666 667 while (vp->v_numoutput > 0) { 668 vp->v_flag |= VBWAIT; 669 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 670 } 671 672 splx(s); 673 674 /* 675 * Destroy the copy in the VM cache, too. 676 */ 677 simple_lock(&vp->v_interlock); 678 object = vp->v_object; 679 if (object != NULL) { 680 if (flags & V_SAVEMETA) 681 vm_object_page_remove(object, 0, object->size, 682 (flags & V_SAVE) ? TRUE : FALSE); 683 else 684 vm_object_page_remove(object, 0, 0, 685 (flags & V_SAVE) ? TRUE : FALSE); 686 } 687 simple_unlock(&vp->v_interlock); 688 689 if (!(flags & V_SAVEMETA) && 690 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 691 panic("vinvalbuf: flush failed"); 692 return (0); 693 } 694 695 /* 696 * Truncate a file's buffer and pages to a specified length. This 697 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 698 * sync activity. 699 */ 700 int 701 vtruncbuf(vp, cred, p, length, blksize) 702 register struct vnode *vp; 703 struct ucred *cred; 704 struct proc *p; 705 off_t length; 706 int blksize; 707 { 708 register struct buf *bp; 709 struct buf *nbp, *blist; 710 int s, error, anyfreed; 711 vm_object_t object; 712 int trunclbn; 713 714 /* 715 * Round up to the *next* lbn. 716 */ 717 trunclbn = (length + blksize - 1) / blksize; 718 719 s = splbio(); 720 restart: 721 anyfreed = 1; 722 for (;anyfreed;) { 723 anyfreed = 0; 724 for ( bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 725 726 nbp = LIST_NEXT(bp, b_vnbufs); 727 728 if (bp->b_lblkno >= trunclbn) { 729 if (bp->b_flags & B_BUSY) { 730 bp->b_flags |= B_WANTED; 731 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 732 goto restart; 733 } else { 734 bremfree(bp); 735 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 736 bp->b_flags &= ~B_ASYNC; 737 brelse(bp); 738 anyfreed = 1; 739 } 740 if (nbp && 741 ((LIST_NEXT(nbp, b_vnbufs) == NOLIST) || 742 (nbp->b_vp != vp) || 743 (nbp->b_flags & B_DELWRI))) { 744 goto restart; 745 } 746 } 747 } 748 749 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 750 751 nbp = LIST_NEXT(bp, b_vnbufs); 752 753 if (bp->b_lblkno >= trunclbn) { 754 if (bp->b_flags & B_BUSY) { 755 bp->b_flags |= B_WANTED; 756 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 757 goto restart; 758 } else { 759 bremfree(bp); 760 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 761 bp->b_flags &= ~B_ASYNC; 762 brelse(bp); 763 anyfreed = 1; 764 } 765 if (nbp && 766 ((LIST_NEXT(nbp, b_vnbufs) == NOLIST) || 767 (nbp->b_vp != vp) || 768 (nbp->b_flags & B_DELWRI) == 0)) { 769 goto restart; 770 } 771 } 772 } 773 } 774 775 if (length > 0) { 776 restartsync: 777 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 778 779 nbp = LIST_NEXT(bp, b_vnbufs); 780 781 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 782 if (bp->b_flags & B_BUSY) { 783 bp->b_flags |= B_WANTED; 784 tsleep(bp, PRIBIO, "vtrb3", 0); 785 } else { 786 bremfree(bp); 787 bp->b_flags |= B_BUSY; 788 if (bp->b_vp == vp) { 789 bp->b_flags |= B_ASYNC; 790 } else { 791 bp->b_flags &= ~B_ASYNC; 792 } 793 VOP_BWRITE(bp); 794 } 795 goto restartsync; 796 } 797 798 } 799 } 800 801 while (vp->v_numoutput > 0) { 802 vp->v_flag |= VBWAIT; 803 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 804 } 805 806 splx(s); 807 808 vnode_pager_setsize(vp, length); 809 810 return (0); 811 } 812 813 /* 814 * Associate a buffer with a vnode. 815 */ 816 void 817 bgetvp(vp, bp) 818 register struct vnode *vp; 819 register struct buf *bp; 820 { 821 int s; 822 823 #if defined(DIAGNOSTIC) 824 if (bp->b_vp) 825 panic("bgetvp: not free"); 826 #endif 827 vhold(vp); 828 bp->b_vp = vp; 829 if (vp->v_type == VBLK || vp->v_type == VCHR) 830 bp->b_dev = vp->v_rdev; 831 else 832 bp->b_dev = NODEV; 833 /* 834 * Insert onto list for new vnode. 835 */ 836 s = splbio(); 837 bufinsvn(bp, &vp->v_cleanblkhd); 838 splx(s); 839 } 840 841 /* 842 * Disassociate a buffer from a vnode. 843 */ 844 void 845 brelvp(bp) 846 register struct buf *bp; 847 { 848 struct vnode *vp; 849 int s; 850 851 #if defined(DIAGNOSTIC) 852 if (bp->b_vp == (struct vnode *) 0) 853 panic("brelvp: NULL"); 854 #endif 855 856 /* 857 * Delete from old vnode list, if on one. 858 */ 859 vp = bp->b_vp; 860 s = splbio(); 861 if (bp->b_vnbufs.le_next != NOLIST) 862 bufremvn(bp); 863 if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) { 864 vp->v_flag &= ~VONWORKLST; 865 LIST_REMOVE(vp, v_synclist); 866 } 867 splx(s); 868 bp->b_vp = (struct vnode *) 0; 869 vdrop(vp); 870 } 871 872 /* 873 * The workitem queue. 874 * 875 * It is useful to delay writes of file data and filesystem metadata 876 * for tens of seconds so that quickly created and deleted files need 877 * not waste disk bandwidth being created and removed. To realize this, 878 * we append vnodes to a "workitem" queue. When running with a soft 879 * updates implementation, most pending metadata dependencies should 880 * not wait for more than a few seconds. Thus, mounted on block devices 881 * are delayed only about a half the time that file data is delayed. 882 * Similarly, directory updates are more critical, so are only delayed 883 * about a third the time that file data is delayed. Thus, there are 884 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 885 * one each second (driven off the filesystem syner process). The 886 * syncer_delayno variable indicates the next queue that is to be processed. 887 * Items that need to be processed soon are placed in this queue: 888 * 889 * syncer_workitem_pending[syncer_delayno] 890 * 891 * A delay of fifteen seconds is done by placing the request fifteen 892 * entries later in the queue: 893 * 894 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 895 * 896 */ 897 898 /* 899 * Add an item to the syncer work queue. 900 */ 901 void 902 vn_syncer_add_to_worklist(vp, delay) 903 struct vnode *vp; 904 int delay; 905 { 906 int s, slot; 907 908 s = splbio(); 909 910 if (vp->v_flag & VONWORKLST) { 911 LIST_REMOVE(vp, v_synclist); 912 } 913 914 if (delay > syncer_maxdelay - 2) 915 delay = syncer_maxdelay - 2; 916 slot = (syncer_delayno + delay) & syncer_mask; 917 918 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 919 vp->v_flag |= VONWORKLST; 920 splx(s); 921 } 922 923 static void sched_sync __P((void)); 924 static struct proc *updateproc; 925 static struct kproc_desc up_kp = { 926 "syncer", 927 sched_sync, 928 &updateproc 929 }; 930 SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 931 932 /* 933 * System filesystem synchronizer daemon. 934 */ 935 void 936 sched_sync(void) 937 { 938 struct synclist *slp; 939 struct vnode *vp; 940 long starttime; 941 int s; 942 struct proc *p = updateproc; 943 944 for (;;) { 945 starttime = time_second; 946 947 /* 948 * Push files whose dirty time has expired. 949 */ 950 s = splbio(); 951 slp = &syncer_workitem_pending[syncer_delayno]; 952 syncer_delayno += 1; 953 if (syncer_delayno == syncer_maxdelay) 954 syncer_delayno = 0; 955 splx(s); 956 957 while ((vp = LIST_FIRST(slp)) != NULL) { 958 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 959 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 960 VOP_UNLOCK(vp, 0, p); 961 if (LIST_FIRST(slp) == vp) { 962 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && 963 vp->v_type != VBLK) 964 panic("sched_sync: fsync failed"); 965 /* 966 * Move ourselves to the back of the sync list. 967 */ 968 LIST_REMOVE(vp, v_synclist); 969 vn_syncer_add_to_worklist(vp, syncdelay); 970 } 971 } 972 973 /* 974 * Do soft update processing. 975 */ 976 if (bioops.io_sync) 977 (*bioops.io_sync)(NULL); 978 979 /* 980 * The variable rushjob allows the kernel to speed up the 981 * processing of the filesystem syncer process. A rushjob 982 * value of N tells the filesystem syncer to process the next 983 * N seconds worth of work on its queue ASAP. Currently rushjob 984 * is used by the soft update code to speed up the filesystem 985 * syncer process when the incore state is getting so far 986 * ahead of the disk that the kernel memory pool is being 987 * threatened with exhaustion. 988 */ 989 if (rushjob > 0) { 990 rushjob -= 1; 991 continue; 992 } 993 /* 994 * If it has taken us less than a second to process the 995 * current work, then wait. Otherwise start right over 996 * again. We can still lose time if any single round 997 * takes more than two seconds, but it does not really 998 * matter as we are just trying to generally pace the 999 * filesystem activity. 1000 */ 1001 if (time_second == starttime) 1002 tsleep(&lbolt, PPAUSE, "syncer", 0); 1003 } 1004 } 1005 1006 /* 1007 * Associate a p-buffer with a vnode. 1008 */ 1009 void 1010 pbgetvp(vp, bp) 1011 register struct vnode *vp; 1012 register struct buf *bp; 1013 { 1014 #if defined(DIAGNOSTIC) 1015 if (bp->b_vp) 1016 panic("pbgetvp: not free"); 1017 #endif 1018 bp->b_vp = vp; 1019 if (vp->v_type == VBLK || vp->v_type == VCHR) 1020 bp->b_dev = vp->v_rdev; 1021 else 1022 bp->b_dev = NODEV; 1023 } 1024 1025 /* 1026 * Disassociate a p-buffer from a vnode. 1027 */ 1028 void 1029 pbrelvp(bp) 1030 register struct buf *bp; 1031 { 1032 1033 #if defined(DIAGNOSTIC) 1034 if (bp->b_vp == (struct vnode *) 0) 1035 panic("pbrelvp: NULL"); 1036 #endif 1037 1038 bp->b_vp = (struct vnode *) 0; 1039 } 1040 1041 /* 1042 * Reassign a buffer from one vnode to another. 1043 * Used to assign file specific control information 1044 * (indirect blocks) to the vnode to which they belong. 1045 */ 1046 void 1047 reassignbuf(bp, newvp) 1048 register struct buf *bp; 1049 register struct vnode *newvp; 1050 { 1051 struct buflists *listheadp; 1052 int delay; 1053 int s; 1054 1055 if (newvp == NULL) { 1056 printf("reassignbuf: NULL"); 1057 return; 1058 } 1059 1060 s = splbio(); 1061 /* 1062 * Delete from old vnode list, if on one. 1063 */ 1064 if (bp->b_vnbufs.le_next != NOLIST) { 1065 bufremvn(bp); 1066 vdrop(bp->b_vp); 1067 } 1068 /* 1069 * If dirty, put on list of dirty buffers; otherwise insert onto list 1070 * of clean buffers. 1071 */ 1072 if (bp->b_flags & B_DELWRI) { 1073 struct buf *tbp; 1074 1075 listheadp = &newvp->v_dirtyblkhd; 1076 if ((newvp->v_flag & VONWORKLST) == 0) { 1077 switch (newvp->v_type) { 1078 case VDIR: 1079 delay = syncdelay / 3; 1080 break; 1081 case VBLK: 1082 if (newvp->v_specmountpoint != NULL) { 1083 delay = syncdelay / 2; 1084 break; 1085 } 1086 /* fall through */ 1087 default: 1088 delay = syncdelay; 1089 } 1090 vn_syncer_add_to_worklist(newvp, delay); 1091 } 1092 tbp = listheadp->lh_first; 1093 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { 1094 bufinsvn(bp, listheadp); 1095 } else { 1096 while (tbp->b_vnbufs.le_next && 1097 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { 1098 tbp = tbp->b_vnbufs.le_next; 1099 } 1100 LIST_INSERT_AFTER(tbp, bp, b_vnbufs); 1101 } 1102 } else { 1103 bufinsvn(bp, &newvp->v_cleanblkhd); 1104 if ((newvp->v_flag & VONWORKLST) && 1105 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 1106 newvp->v_flag &= ~VONWORKLST; 1107 LIST_REMOVE(newvp, v_synclist); 1108 } 1109 } 1110 bp->b_vp = newvp; 1111 vhold(bp->b_vp); 1112 splx(s); 1113 } 1114 1115 #ifndef SLICE 1116 /* 1117 * Create a vnode for a block device. 1118 * Used for mounting the root file system. 1119 */ 1120 int 1121 bdevvp(dev, vpp) 1122 dev_t dev; 1123 struct vnode **vpp; 1124 { 1125 register struct vnode *vp; 1126 struct vnode *nvp; 1127 int error; 1128 1129 if (dev == NODEV) 1130 return (0); 1131 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); 1132 if (error) { 1133 *vpp = 0; 1134 return (error); 1135 } 1136 vp = nvp; 1137 vp->v_type = VBLK; 1138 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { 1139 vput(vp); 1140 vp = nvp; 1141 } 1142 *vpp = vp; 1143 return (0); 1144 } 1145 #endif /* !SLICE */ 1146 1147 /* 1148 * Check to see if the new vnode represents a special device 1149 * for which we already have a vnode (either because of 1150 * bdevvp() or because of a different vnode representing 1151 * the same block device). If such an alias exists, deallocate 1152 * the existing contents and return the aliased vnode. The 1153 * caller is responsible for filling it with its new contents. 1154 */ 1155 struct vnode * 1156 checkalias(nvp, nvp_rdev, mp) 1157 register struct vnode *nvp; 1158 dev_t nvp_rdev; 1159 struct mount *mp; 1160 { 1161 struct proc *p = curproc; /* XXX */ 1162 struct vnode *vp; 1163 struct vnode **vpp; 1164 1165 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1166 return (NULLVP); 1167 1168 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1169 loop: 1170 simple_lock(&spechash_slock); 1171 for (vp = *vpp; vp; vp = vp->v_specnext) { 1172 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1173 continue; 1174 /* 1175 * Alias, but not in use, so flush it out. 1176 * Only alias active device nodes. 1177 * Not sure why we don't re-use this like we do below. 1178 */ 1179 simple_lock(&vp->v_interlock); 1180 if (vp->v_usecount == 0) { 1181 simple_unlock(&spechash_slock); 1182 vgonel(vp, p); 1183 goto loop; 1184 } 1185 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 1186 /* 1187 * It dissappeared, and we may have slept. 1188 * Restart from the beginning 1189 */ 1190 simple_unlock(&spechash_slock); 1191 goto loop; 1192 } 1193 break; 1194 } 1195 /* 1196 * It would be a lot clearer what is going on here if 1197 * this had been expressed as: 1198 * if ( vp && (vp->v_tag == VT_NULL)) 1199 * and the clauses had been swapped. 1200 */ 1201 if (vp == NULL || vp->v_tag != VT_NON) { 1202 /* 1203 * Put the new vnode into the hash chain. 1204 * and if there was an alias, connect them. 1205 */ 1206 MALLOC(nvp->v_specinfo, struct specinfo *, 1207 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1208 nvp->v_rdev = nvp_rdev; 1209 nvp->v_hashchain = vpp; 1210 nvp->v_specnext = *vpp; 1211 nvp->v_specmountpoint = NULL; 1212 simple_unlock(&spechash_slock); 1213 *vpp = nvp; 1214 if (vp != NULLVP) { 1215 nvp->v_flag |= VALIASED; 1216 vp->v_flag |= VALIASED; 1217 vput(vp); 1218 } 1219 return (NULLVP); 1220 } 1221 /* 1222 * if ( vp && (vp->v_tag == VT_NULL)) 1223 * We have a vnode alias, but it is a trashed. 1224 * Make it look like it's newley allocated. (by getnewvnode()) 1225 * The caller should use this instead. 1226 */ 1227 simple_unlock(&spechash_slock); 1228 VOP_UNLOCK(vp, 0, p); 1229 simple_lock(&vp->v_interlock); 1230 vclean(vp, 0, p); 1231 vp->v_op = nvp->v_op; 1232 vp->v_tag = nvp->v_tag; 1233 nvp->v_type = VNON; 1234 insmntque(vp, mp); 1235 return (vp); 1236 } 1237 1238 /* 1239 * Grab a particular vnode from the free list, increment its 1240 * reference count and lock it. The vnode lock bit is set the 1241 * vnode is being eliminated in vgone. The process is awakened 1242 * when the transition is completed, and an error returned to 1243 * indicate that the vnode is no longer usable (possibly having 1244 * been changed to a new file system type). 1245 */ 1246 int 1247 vget(vp, flags, p) 1248 register struct vnode *vp; 1249 int flags; 1250 struct proc *p; 1251 { 1252 int error; 1253 1254 /* 1255 * If the vnode is in the process of being cleaned out for 1256 * another use, we wait for the cleaning to finish and then 1257 * return failure. Cleaning is determined by checking that 1258 * the VXLOCK flag is set. 1259 */ 1260 if ((flags & LK_INTERLOCK) == 0) { 1261 simple_lock(&vp->v_interlock); 1262 } 1263 if (vp->v_flag & VXLOCK) { 1264 vp->v_flag |= VXWANT; 1265 simple_unlock(&vp->v_interlock); 1266 tsleep((caddr_t)vp, PINOD, "vget", 0); 1267 return (ENOENT); 1268 } 1269 1270 vp->v_usecount++; 1271 1272 if (VSHOULDBUSY(vp)) 1273 vbusy(vp); 1274 if (flags & LK_TYPE_MASK) { 1275 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1276 /* 1277 * must expand vrele here because we do not want 1278 * to call VOP_INACTIVE if the reference count 1279 * drops back to zero since it was never really 1280 * active. We must remove it from the free list 1281 * before sleeping so that multiple processes do 1282 * not try to recycle it. 1283 */ 1284 simple_lock(&vp->v_interlock); 1285 vp->v_usecount--; 1286 if (VSHOULDFREE(vp)) 1287 vfree(vp); 1288 simple_unlock(&vp->v_interlock); 1289 } 1290 return (error); 1291 } 1292 simple_unlock(&vp->v_interlock); 1293 return (0); 1294 } 1295 1296 void 1297 vref(struct vnode *vp) 1298 { 1299 simple_lock(&vp->v_interlock); 1300 vp->v_usecount++; 1301 simple_unlock(&vp->v_interlock); 1302 } 1303 1304 /* 1305 * Vnode put/release. 1306 * If count drops to zero, call inactive routine and return to freelist. 1307 */ 1308 void 1309 vrele(vp) 1310 struct vnode *vp; 1311 { 1312 struct proc *p = curproc; /* XXX */ 1313 1314 #ifdef DIAGNOSTIC 1315 if (vp == NULL) 1316 panic("vrele: null vp"); 1317 #endif 1318 simple_lock(&vp->v_interlock); 1319 1320 if (vp->v_usecount > 1) { 1321 1322 vp->v_usecount--; 1323 simple_unlock(&vp->v_interlock); 1324 1325 return; 1326 } 1327 1328 if (vp->v_usecount == 1) { 1329 1330 vp->v_usecount--; 1331 1332 if (VSHOULDFREE(vp)) 1333 vfree(vp); 1334 /* 1335 * If we are doing a vput, the node is already locked, and we must 1336 * call VOP_INACTIVE with the node locked. So, in the case of 1337 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1338 */ 1339 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1340 VOP_INACTIVE(vp, p); 1341 } 1342 1343 } else { 1344 #ifdef DIAGNOSTIC 1345 vprint("vrele: negative ref count", vp); 1346 simple_unlock(&vp->v_interlock); 1347 #endif 1348 panic("vrele: negative ref cnt"); 1349 } 1350 } 1351 1352 void 1353 vput(vp) 1354 struct vnode *vp; 1355 { 1356 struct proc *p = curproc; /* XXX */ 1357 1358 #ifdef DIAGNOSTIC 1359 if (vp == NULL) 1360 panic("vput: null vp"); 1361 #endif 1362 1363 simple_lock(&vp->v_interlock); 1364 1365 if (vp->v_usecount > 1) { 1366 1367 vp->v_usecount--; 1368 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1369 return; 1370 1371 } 1372 1373 if (vp->v_usecount == 1) { 1374 1375 vp->v_usecount--; 1376 if (VSHOULDFREE(vp)) 1377 vfree(vp); 1378 /* 1379 * If we are doing a vput, the node is already locked, and we must 1380 * call VOP_INACTIVE with the node locked. So, in the case of 1381 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1382 */ 1383 simple_unlock(&vp->v_interlock); 1384 VOP_INACTIVE(vp, p); 1385 1386 } else { 1387 #ifdef DIAGNOSTIC 1388 vprint("vput: negative ref count", vp); 1389 #endif 1390 panic("vput: negative ref cnt"); 1391 } 1392 } 1393 1394 /* 1395 * Somebody doesn't want the vnode recycled. 1396 */ 1397 void 1398 vhold(vp) 1399 register struct vnode *vp; 1400 { 1401 int s; 1402 1403 s = splbio(); 1404 vp->v_holdcnt++; 1405 if (VSHOULDBUSY(vp)) 1406 vbusy(vp); 1407 splx(s); 1408 } 1409 1410 /* 1411 * One less who cares about this vnode. 1412 */ 1413 void 1414 vdrop(vp) 1415 register struct vnode *vp; 1416 { 1417 int s; 1418 1419 s = splbio(); 1420 if (vp->v_holdcnt <= 0) 1421 panic("vdrop: holdcnt"); 1422 vp->v_holdcnt--; 1423 if (VSHOULDFREE(vp)) 1424 vfree(vp); 1425 splx(s); 1426 } 1427 1428 /* 1429 * Remove any vnodes in the vnode table belonging to mount point mp. 1430 * 1431 * If MNT_NOFORCE is specified, there should not be any active ones, 1432 * return error if any are found (nb: this is a user error, not a 1433 * system error). If MNT_FORCE is specified, detach any active vnodes 1434 * that are found. 1435 */ 1436 #ifdef DIAGNOSTIC 1437 static int busyprt = 0; /* print out busy vnodes */ 1438 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1439 #endif 1440 1441 int 1442 vflush(mp, skipvp, flags) 1443 struct mount *mp; 1444 struct vnode *skipvp; 1445 int flags; 1446 { 1447 struct proc *p = curproc; /* XXX */ 1448 struct vnode *vp, *nvp; 1449 int busy = 0; 1450 1451 simple_lock(&mntvnode_slock); 1452 loop: 1453 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1454 /* 1455 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1456 * Start over if it has (it won't be on the list anymore). 1457 */ 1458 if (vp->v_mount != mp) 1459 goto loop; 1460 nvp = vp->v_mntvnodes.le_next; 1461 /* 1462 * Skip over a selected vnode. 1463 */ 1464 if (vp == skipvp) 1465 continue; 1466 1467 simple_lock(&vp->v_interlock); 1468 /* 1469 * Skip over a vnodes marked VSYSTEM. 1470 */ 1471 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1472 simple_unlock(&vp->v_interlock); 1473 continue; 1474 } 1475 /* 1476 * If WRITECLOSE is set, only flush out regular file vnodes 1477 * open for writing. 1478 */ 1479 if ((flags & WRITECLOSE) && 1480 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1481 simple_unlock(&vp->v_interlock); 1482 continue; 1483 } 1484 1485 /* 1486 * With v_usecount == 0, all we need to do is clear out the 1487 * vnode data structures and we are done. 1488 */ 1489 if (vp->v_usecount == 0) { 1490 simple_unlock(&mntvnode_slock); 1491 vgonel(vp, p); 1492 simple_lock(&mntvnode_slock); 1493 continue; 1494 } 1495 1496 /* 1497 * If FORCECLOSE is set, forcibly close the vnode. For block 1498 * or character devices, revert to an anonymous device. For 1499 * all other files, just kill them. 1500 */ 1501 if (flags & FORCECLOSE) { 1502 simple_unlock(&mntvnode_slock); 1503 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1504 vgonel(vp, p); 1505 } else { 1506 vclean(vp, 0, p); 1507 vp->v_op = spec_vnodeop_p; 1508 insmntque(vp, (struct mount *) 0); 1509 } 1510 simple_lock(&mntvnode_slock); 1511 continue; 1512 } 1513 #ifdef DIAGNOSTIC 1514 if (busyprt) 1515 vprint("vflush: busy vnode", vp); 1516 #endif 1517 simple_unlock(&vp->v_interlock); 1518 busy++; 1519 } 1520 simple_unlock(&mntvnode_slock); 1521 if (busy) 1522 return (EBUSY); 1523 return (0); 1524 } 1525 1526 /* 1527 * Disassociate the underlying file system from a vnode. 1528 */ 1529 static void 1530 vclean(vp, flags, p) 1531 struct vnode *vp; 1532 int flags; 1533 struct proc *p; 1534 { 1535 int active; 1536 vm_object_t obj; 1537 1538 /* 1539 * Check to see if the vnode is in use. If so we have to reference it 1540 * before we clean it out so that its count cannot fall to zero and 1541 * generate a race against ourselves to recycle it. 1542 */ 1543 if ((active = vp->v_usecount)) 1544 vp->v_usecount++; 1545 1546 /* 1547 * Prevent the vnode from being recycled or brought into use while we 1548 * clean it out. 1549 */ 1550 if (vp->v_flag & VXLOCK) 1551 panic("vclean: deadlock"); 1552 vp->v_flag |= VXLOCK; 1553 /* 1554 * Even if the count is zero, the VOP_INACTIVE routine may still 1555 * have the object locked while it cleans it out. The VOP_LOCK 1556 * ensures that the VOP_INACTIVE routine is done with its work. 1557 * For active vnodes, it ensures that no other activity can 1558 * occur while the underlying object is being cleaned out. 1559 */ 1560 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1561 1562 /* 1563 * Clean out any buffers associated with the vnode. 1564 */ 1565 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1566 if (obj = vp->v_object) { 1567 if (obj->ref_count == 0) { 1568 /* 1569 * This is a normal way of shutting down the object/vnode 1570 * association. 1571 */ 1572 vm_object_terminate(obj); 1573 } else { 1574 /* 1575 * Woe to the process that tries to page now :-). 1576 */ 1577 vm_pager_deallocate(obj); 1578 } 1579 } 1580 1581 /* 1582 * If purging an active vnode, it must be closed and 1583 * deactivated before being reclaimed. Note that the 1584 * VOP_INACTIVE will unlock the vnode. 1585 */ 1586 if (active) { 1587 if (flags & DOCLOSE) 1588 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 1589 VOP_INACTIVE(vp, p); 1590 } else { 1591 /* 1592 * Any other processes trying to obtain this lock must first 1593 * wait for VXLOCK to clear, then call the new lock operation. 1594 */ 1595 VOP_UNLOCK(vp, 0, p); 1596 } 1597 /* 1598 * Reclaim the vnode. 1599 */ 1600 if (VOP_RECLAIM(vp, p)) 1601 panic("vclean: cannot reclaim"); 1602 1603 if (active) 1604 vrele(vp); 1605 1606 cache_purge(vp); 1607 if (vp->v_vnlock) { 1608 #if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1609 #ifdef DIAGNOSTIC 1610 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1611 vprint("vclean: lock not drained", vp); 1612 #endif 1613 #endif 1614 FREE(vp->v_vnlock, M_VNODE); 1615 vp->v_vnlock = NULL; 1616 } 1617 1618 if (VSHOULDFREE(vp)) 1619 vfree(vp); 1620 1621 /* 1622 * Done with purge, notify sleepers of the grim news. 1623 */ 1624 vp->v_op = dead_vnodeop_p; 1625 vn_pollgone(vp); 1626 vp->v_tag = VT_NON; 1627 vp->v_flag &= ~VXLOCK; 1628 if (vp->v_flag & VXWANT) { 1629 vp->v_flag &= ~VXWANT; 1630 wakeup((caddr_t) vp); 1631 } 1632 } 1633 1634 /* 1635 * Eliminate all activity associated with the requested vnode 1636 * and with all vnodes aliased to the requested vnode. 1637 */ 1638 int 1639 vop_revoke(ap) 1640 struct vop_revoke_args /* { 1641 struct vnode *a_vp; 1642 int a_flags; 1643 } */ *ap; 1644 { 1645 struct vnode *vp, *vq; 1646 struct proc *p = curproc; /* XXX */ 1647 1648 #ifdef DIAGNOSTIC 1649 if ((ap->a_flags & REVOKEALL) == 0) 1650 panic("vop_revoke"); 1651 #endif 1652 1653 vp = ap->a_vp; 1654 simple_lock(&vp->v_interlock); 1655 1656 if (vp->v_flag & VALIASED) { 1657 /* 1658 * If a vgone (or vclean) is already in progress, 1659 * wait until it is done and return. 1660 */ 1661 if (vp->v_flag & VXLOCK) { 1662 vp->v_flag |= VXWANT; 1663 simple_unlock(&vp->v_interlock); 1664 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1665 return (0); 1666 } 1667 /* 1668 * Ensure that vp will not be vgone'd while we 1669 * are eliminating its aliases. 1670 */ 1671 vp->v_flag |= VXLOCK; 1672 simple_unlock(&vp->v_interlock); 1673 while (vp->v_flag & VALIASED) { 1674 simple_lock(&spechash_slock); 1675 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1676 if (vq->v_rdev != vp->v_rdev || 1677 vq->v_type != vp->v_type || vp == vq) 1678 continue; 1679 simple_unlock(&spechash_slock); 1680 vgone(vq); 1681 break; 1682 } 1683 if (vq == NULLVP) { 1684 simple_unlock(&spechash_slock); 1685 } 1686 } 1687 /* 1688 * Remove the lock so that vgone below will 1689 * really eliminate the vnode after which time 1690 * vgone will awaken any sleepers. 1691 */ 1692 simple_lock(&vp->v_interlock); 1693 vp->v_flag &= ~VXLOCK; 1694 if (vp->v_flag & VXWANT) { 1695 vp->v_flag &= ~VXWANT; 1696 wakeup(vp); 1697 } 1698 } 1699 vgonel(vp, p); 1700 return (0); 1701 } 1702 1703 /* 1704 * Recycle an unused vnode to the front of the free list. 1705 * Release the passed interlock if the vnode will be recycled. 1706 */ 1707 int 1708 vrecycle(vp, inter_lkp, p) 1709 struct vnode *vp; 1710 struct simplelock *inter_lkp; 1711 struct proc *p; 1712 { 1713 1714 simple_lock(&vp->v_interlock); 1715 if (vp->v_usecount == 0) { 1716 if (inter_lkp) { 1717 simple_unlock(inter_lkp); 1718 } 1719 vgonel(vp, p); 1720 return (1); 1721 } 1722 simple_unlock(&vp->v_interlock); 1723 return (0); 1724 } 1725 1726 /* 1727 * Eliminate all activity associated with a vnode 1728 * in preparation for reuse. 1729 */ 1730 void 1731 vgone(vp) 1732 register struct vnode *vp; 1733 { 1734 struct proc *p = curproc; /* XXX */ 1735 1736 simple_lock(&vp->v_interlock); 1737 vgonel(vp, p); 1738 } 1739 1740 /* 1741 * vgone, with the vp interlock held. 1742 */ 1743 static void 1744 vgonel(vp, p) 1745 struct vnode *vp; 1746 struct proc *p; 1747 { 1748 int s; 1749 struct vnode *vq; 1750 struct vnode *vx; 1751 1752 /* 1753 * If a vgone (or vclean) is already in progress, 1754 * wait until it is done and return. 1755 */ 1756 if (vp->v_flag & VXLOCK) { 1757 vp->v_flag |= VXWANT; 1758 simple_unlock(&vp->v_interlock); 1759 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1760 return; 1761 } 1762 1763 /* 1764 * Clean out the filesystem specific data. 1765 */ 1766 vclean(vp, DOCLOSE, p); 1767 simple_lock(&vp->v_interlock); 1768 1769 /* 1770 * Delete from old mount point vnode list, if on one. 1771 */ 1772 if (vp->v_mount != NULL) 1773 insmntque(vp, (struct mount *)0); 1774 /* 1775 * If special device, remove it from special device alias list 1776 * if it is on one. 1777 */ 1778 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1779 simple_lock(&spechash_slock); 1780 if (*vp->v_hashchain == vp) { 1781 *vp->v_hashchain = vp->v_specnext; 1782 } else { 1783 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1784 if (vq->v_specnext != vp) 1785 continue; 1786 vq->v_specnext = vp->v_specnext; 1787 break; 1788 } 1789 if (vq == NULL) 1790 panic("missing bdev"); 1791 } 1792 if (vp->v_flag & VALIASED) { 1793 vx = NULL; 1794 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1795 if (vq->v_rdev != vp->v_rdev || 1796 vq->v_type != vp->v_type) 1797 continue; 1798 if (vx) 1799 break; 1800 vx = vq; 1801 } 1802 if (vx == NULL) 1803 panic("missing alias"); 1804 if (vq == NULL) 1805 vx->v_flag &= ~VALIASED; 1806 vp->v_flag &= ~VALIASED; 1807 } 1808 simple_unlock(&spechash_slock); 1809 FREE(vp->v_specinfo, M_VNODE); 1810 vp->v_specinfo = NULL; 1811 } 1812 1813 /* 1814 * If it is on the freelist and not already at the head, 1815 * move it to the head of the list. The test of the back 1816 * pointer and the reference count of zero is because 1817 * it will be removed from the free list by getnewvnode, 1818 * but will not have its reference count incremented until 1819 * after calling vgone. If the reference count were 1820 * incremented first, vgone would (incorrectly) try to 1821 * close the previous instance of the underlying object. 1822 */ 1823 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1824 s = splbio(); 1825 simple_lock(&vnode_free_list_slock); 1826 if (vp->v_flag & VFREE) { 1827 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1828 } else if (vp->v_flag & VTBFREE) { 1829 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1830 vp->v_flag &= ~VTBFREE; 1831 freevnodes++; 1832 } else 1833 freevnodes++; 1834 vp->v_flag |= VFREE; 1835 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1836 simple_unlock(&vnode_free_list_slock); 1837 splx(s); 1838 } 1839 1840 vp->v_type = VBAD; 1841 simple_unlock(&vp->v_interlock); 1842 } 1843 1844 /* 1845 * Lookup a vnode by device number. 1846 */ 1847 int 1848 vfinddev(dev, type, vpp) 1849 dev_t dev; 1850 enum vtype type; 1851 struct vnode **vpp; 1852 { 1853 register struct vnode *vp; 1854 int rc = 0; 1855 1856 simple_lock(&spechash_slock); 1857 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1858 if (dev != vp->v_rdev || type != vp->v_type) 1859 continue; 1860 *vpp = vp; 1861 rc = 1; 1862 break; 1863 } 1864 simple_unlock(&spechash_slock); 1865 return (rc); 1866 } 1867 1868 /* 1869 * Calculate the total number of references to a special device. 1870 */ 1871 int 1872 vcount(vp) 1873 register struct vnode *vp; 1874 { 1875 struct vnode *vq, *vnext; 1876 int count; 1877 1878 loop: 1879 if ((vp->v_flag & VALIASED) == 0) 1880 return (vp->v_usecount); 1881 simple_lock(&spechash_slock); 1882 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1883 vnext = vq->v_specnext; 1884 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1885 continue; 1886 /* 1887 * Alias, but not in use, so flush it out. 1888 */ 1889 if (vq->v_usecount == 0 && vq != vp) { 1890 simple_unlock(&spechash_slock); 1891 vgone(vq); 1892 goto loop; 1893 } 1894 count += vq->v_usecount; 1895 } 1896 simple_unlock(&spechash_slock); 1897 return (count); 1898 } 1899 /* 1900 * Print out a description of a vnode. 1901 */ 1902 static char *typename[] = 1903 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1904 1905 void 1906 vprint(label, vp) 1907 char *label; 1908 register struct vnode *vp; 1909 { 1910 char buf[64]; 1911 1912 if (label != NULL) 1913 printf("%s: %p: ", label, (void *)vp); 1914 else 1915 printf("%p: ", (void *)vp); 1916 printf("type %s, usecount %d, writecount %d, refcount %d,", 1917 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1918 vp->v_holdcnt); 1919 buf[0] = '\0'; 1920 if (vp->v_flag & VROOT) 1921 strcat(buf, "|VROOT"); 1922 if (vp->v_flag & VTEXT) 1923 strcat(buf, "|VTEXT"); 1924 if (vp->v_flag & VSYSTEM) 1925 strcat(buf, "|VSYSTEM"); 1926 if (vp->v_flag & VXLOCK) 1927 strcat(buf, "|VXLOCK"); 1928 if (vp->v_flag & VXWANT) 1929 strcat(buf, "|VXWANT"); 1930 if (vp->v_flag & VBWAIT) 1931 strcat(buf, "|VBWAIT"); 1932 if (vp->v_flag & VALIASED) 1933 strcat(buf, "|VALIASED"); 1934 if (vp->v_flag & VDOOMED) 1935 strcat(buf, "|VDOOMED"); 1936 if (vp->v_flag & VFREE) 1937 strcat(buf, "|VFREE"); 1938 if (vp->v_flag & VOBJBUF) 1939 strcat(buf, "|VOBJBUF"); 1940 if (buf[0] != '\0') 1941 printf(" flags (%s)", &buf[1]); 1942 if (vp->v_data == NULL) { 1943 printf("\n"); 1944 } else { 1945 printf("\n\t"); 1946 VOP_PRINT(vp); 1947 } 1948 } 1949 1950 #ifdef DDB 1951 /* 1952 * List all of the locked vnodes in the system. 1953 * Called when debugging the kernel. 1954 */ 1955 static void 1956 printlockedvnodes() 1957 { 1958 struct proc *p = curproc; /* XXX */ 1959 struct mount *mp, *nmp; 1960 struct vnode *vp; 1961 1962 printf("Locked vnodes\n"); 1963 simple_lock(&mountlist_slock); 1964 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1965 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1966 nmp = mp->mnt_list.cqe_next; 1967 continue; 1968 } 1969 for (vp = mp->mnt_vnodelist.lh_first; 1970 vp != NULL; 1971 vp = vp->v_mntvnodes.le_next) { 1972 if (VOP_ISLOCKED(vp)) 1973 vprint((char *)0, vp); 1974 } 1975 simple_lock(&mountlist_slock); 1976 nmp = mp->mnt_list.cqe_next; 1977 vfs_unbusy(mp, p); 1978 } 1979 simple_unlock(&mountlist_slock); 1980 } 1981 #endif 1982 1983 /* 1984 * Top level filesystem related information gathering. 1985 */ 1986 static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1987 1988 static int 1989 vfs_sysctl SYSCTL_HANDLER_ARGS 1990 { 1991 int *name = (int *)arg1 - 1; /* XXX */ 1992 u_int namelen = arg2 + 1; /* XXX */ 1993 struct vfsconf *vfsp; 1994 1995 #ifndef NO_COMPAT_PRELITE2 1996 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1997 if (namelen == 1) 1998 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1999 #endif 2000 2001 #ifdef notyet 2002 /* all sysctl names at this level are at least name and field */ 2003 if (namelen < 2) 2004 return (ENOTDIR); /* overloaded */ 2005 if (name[0] != VFS_GENERIC) { 2006 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2007 if (vfsp->vfc_typenum == name[0]) 2008 break; 2009 if (vfsp == NULL) 2010 return (EOPNOTSUPP); 2011 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2012 oldp, oldlenp, newp, newlen, p)); 2013 } 2014 #endif 2015 switch (name[1]) { 2016 case VFS_MAXTYPENUM: 2017 if (namelen != 2) 2018 return (ENOTDIR); 2019 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2020 case VFS_CONF: 2021 if (namelen != 3) 2022 return (ENOTDIR); /* overloaded */ 2023 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2024 if (vfsp->vfc_typenum == name[2]) 2025 break; 2026 if (vfsp == NULL) 2027 return (EOPNOTSUPP); 2028 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2029 } 2030 return (EOPNOTSUPP); 2031 } 2032 2033 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2034 "Generic filesystem"); 2035 2036 #ifndef NO_COMPAT_PRELITE2 2037 2038 static int 2039 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2040 { 2041 int error; 2042 struct vfsconf *vfsp; 2043 struct ovfsconf ovfs; 2044 2045 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2046 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2047 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2048 ovfs.vfc_index = vfsp->vfc_typenum; 2049 ovfs.vfc_refcount = vfsp->vfc_refcount; 2050 ovfs.vfc_flags = vfsp->vfc_flags; 2051 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2052 if (error) 2053 return error; 2054 } 2055 return 0; 2056 } 2057 2058 #endif /* !NO_COMPAT_PRELITE2 */ 2059 2060 static volatile int kinfo_vdebug = 1; 2061 2062 #if 0 2063 #define KINFO_VNODESLOP 10 2064 /* 2065 * Dump vnode list (via sysctl). 2066 * Copyout address of vnode followed by vnode. 2067 */ 2068 /* ARGSUSED */ 2069 static int 2070 sysctl_vnode SYSCTL_HANDLER_ARGS 2071 { 2072 struct proc *p = curproc; /* XXX */ 2073 struct mount *mp, *nmp; 2074 struct vnode *nvp, *vp; 2075 int error; 2076 2077 #define VPTRSZ sizeof (struct vnode *) 2078 #define VNODESZ sizeof (struct vnode) 2079 2080 req->lock = 0; 2081 if (!req->oldptr) /* Make an estimate */ 2082 return (SYSCTL_OUT(req, 0, 2083 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2084 2085 simple_lock(&mountlist_slock); 2086 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2087 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2088 nmp = mp->mnt_list.cqe_next; 2089 continue; 2090 } 2091 again: 2092 simple_lock(&mntvnode_slock); 2093 for (vp = mp->mnt_vnodelist.lh_first; 2094 vp != NULL; 2095 vp = nvp) { 2096 /* 2097 * Check that the vp is still associated with 2098 * this filesystem. RACE: could have been 2099 * recycled onto the same filesystem. 2100 */ 2101 if (vp->v_mount != mp) { 2102 simple_unlock(&mntvnode_slock); 2103 if (kinfo_vdebug) 2104 printf("kinfo: vp changed\n"); 2105 goto again; 2106 } 2107 nvp = vp->v_mntvnodes.le_next; 2108 simple_unlock(&mntvnode_slock); 2109 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2110 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2111 return (error); 2112 simple_lock(&mntvnode_slock); 2113 } 2114 simple_unlock(&mntvnode_slock); 2115 simple_lock(&mountlist_slock); 2116 nmp = mp->mnt_list.cqe_next; 2117 vfs_unbusy(mp, p); 2118 } 2119 simple_unlock(&mountlist_slock); 2120 2121 return (0); 2122 } 2123 #endif 2124 2125 /* 2126 * XXX 2127 * Exporting the vnode list on large systems causes them to crash. 2128 * Exporting the vnode list on medium systems causes sysctl to coredump. 2129 */ 2130 #if 0 2131 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2132 0, 0, sysctl_vnode, "S,vnode", ""); 2133 #endif 2134 2135 /* 2136 * Check to see if a filesystem is mounted on a block device. 2137 */ 2138 int 2139 vfs_mountedon(vp) 2140 struct vnode *vp; 2141 { 2142 struct vnode *vq; 2143 int error = 0; 2144 2145 if (vp->v_specmountpoint != NULL) 2146 return (EBUSY); 2147 if (vp->v_flag & VALIASED) { 2148 simple_lock(&spechash_slock); 2149 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2150 if (vq->v_rdev != vp->v_rdev || 2151 vq->v_type != vp->v_type) 2152 continue; 2153 if (vq->v_specmountpoint != NULL) { 2154 error = EBUSY; 2155 break; 2156 } 2157 } 2158 simple_unlock(&spechash_slock); 2159 } 2160 return (error); 2161 } 2162 2163 /* 2164 * Unmount all filesystems. The list is traversed in reverse order 2165 * of mounting to avoid dependencies. 2166 */ 2167 void 2168 vfs_unmountall() 2169 { 2170 struct mount *mp, *nmp; 2171 struct proc *p; 2172 int error; 2173 2174 if (curproc != NULL) 2175 p = curproc; 2176 else 2177 p = initproc; /* XXX XXX should this be proc0? */ 2178 /* 2179 * Since this only runs when rebooting, it is not interlocked. 2180 */ 2181 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2182 nmp = mp->mnt_list.cqe_prev; 2183 error = dounmount(mp, MNT_FORCE, p); 2184 if (error) { 2185 printf("unmount of %s failed (", 2186 mp->mnt_stat.f_mntonname); 2187 if (error == EBUSY) 2188 printf("BUSY)\n"); 2189 else 2190 printf("%d)\n", error); 2191 } 2192 } 2193 } 2194 2195 /* 2196 * Build hash lists of net addresses and hang them off the mount point. 2197 * Called by ufs_mount() to set up the lists of export addresses. 2198 */ 2199 static int 2200 vfs_hang_addrlist(mp, nep, argp) 2201 struct mount *mp; 2202 struct netexport *nep; 2203 struct export_args *argp; 2204 { 2205 register struct netcred *np; 2206 register struct radix_node_head *rnh; 2207 register int i; 2208 struct radix_node *rn; 2209 struct sockaddr *saddr, *smask = 0; 2210 struct domain *dom; 2211 int error; 2212 2213 if (argp->ex_addrlen == 0) { 2214 if (mp->mnt_flag & MNT_DEFEXPORTED) 2215 return (EPERM); 2216 np = &nep->ne_defexported; 2217 np->netc_exflags = argp->ex_flags; 2218 np->netc_anon = argp->ex_anon; 2219 np->netc_anon.cr_ref = 1; 2220 mp->mnt_flag |= MNT_DEFEXPORTED; 2221 return (0); 2222 } 2223 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2224 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2225 bzero((caddr_t) np, i); 2226 saddr = (struct sockaddr *) (np + 1); 2227 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2228 goto out; 2229 if (saddr->sa_len > argp->ex_addrlen) 2230 saddr->sa_len = argp->ex_addrlen; 2231 if (argp->ex_masklen) { 2232 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2233 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2234 if (error) 2235 goto out; 2236 if (smask->sa_len > argp->ex_masklen) 2237 smask->sa_len = argp->ex_masklen; 2238 } 2239 i = saddr->sa_family; 2240 if ((rnh = nep->ne_rtable[i]) == 0) { 2241 /* 2242 * Seems silly to initialize every AF when most are not used, 2243 * do so on demand here 2244 */ 2245 for (dom = domains; dom; dom = dom->dom_next) 2246 if (dom->dom_family == i && dom->dom_rtattach) { 2247 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2248 dom->dom_rtoffset); 2249 break; 2250 } 2251 if ((rnh = nep->ne_rtable[i]) == 0) { 2252 error = ENOBUFS; 2253 goto out; 2254 } 2255 } 2256 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2257 np->netc_rnodes); 2258 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2259 error = EPERM; 2260 goto out; 2261 } 2262 np->netc_exflags = argp->ex_flags; 2263 np->netc_anon = argp->ex_anon; 2264 np->netc_anon.cr_ref = 1; 2265 return (0); 2266 out: 2267 free(np, M_NETADDR); 2268 return (error); 2269 } 2270 2271 /* ARGSUSED */ 2272 static int 2273 vfs_free_netcred(rn, w) 2274 struct radix_node *rn; 2275 void *w; 2276 { 2277 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2278 2279 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2280 free((caddr_t) rn, M_NETADDR); 2281 return (0); 2282 } 2283 2284 /* 2285 * Free the net address hash lists that are hanging off the mount points. 2286 */ 2287 static void 2288 vfs_free_addrlist(nep) 2289 struct netexport *nep; 2290 { 2291 register int i; 2292 register struct radix_node_head *rnh; 2293 2294 for (i = 0; i <= AF_MAX; i++) 2295 if ((rnh = nep->ne_rtable[i])) { 2296 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2297 (caddr_t) rnh); 2298 free((caddr_t) rnh, M_RTABLE); 2299 nep->ne_rtable[i] = 0; 2300 } 2301 } 2302 2303 int 2304 vfs_export(mp, nep, argp) 2305 struct mount *mp; 2306 struct netexport *nep; 2307 struct export_args *argp; 2308 { 2309 int error; 2310 2311 if (argp->ex_flags & MNT_DELEXPORT) { 2312 if (mp->mnt_flag & MNT_EXPUBLIC) { 2313 vfs_setpublicfs(NULL, NULL, NULL); 2314 mp->mnt_flag &= ~MNT_EXPUBLIC; 2315 } 2316 vfs_free_addrlist(nep); 2317 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2318 } 2319 if (argp->ex_flags & MNT_EXPORTED) { 2320 if (argp->ex_flags & MNT_EXPUBLIC) { 2321 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2322 return (error); 2323 mp->mnt_flag |= MNT_EXPUBLIC; 2324 } 2325 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2326 return (error); 2327 mp->mnt_flag |= MNT_EXPORTED; 2328 } 2329 return (0); 2330 } 2331 2332 2333 /* 2334 * Set the publicly exported filesystem (WebNFS). Currently, only 2335 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2336 */ 2337 int 2338 vfs_setpublicfs(mp, nep, argp) 2339 struct mount *mp; 2340 struct netexport *nep; 2341 struct export_args *argp; 2342 { 2343 int error; 2344 struct vnode *rvp; 2345 char *cp; 2346 2347 /* 2348 * mp == NULL -> invalidate the current info, the FS is 2349 * no longer exported. May be called from either vfs_export 2350 * or unmount, so check if it hasn't already been done. 2351 */ 2352 if (mp == NULL) { 2353 if (nfs_pub.np_valid) { 2354 nfs_pub.np_valid = 0; 2355 if (nfs_pub.np_index != NULL) { 2356 FREE(nfs_pub.np_index, M_TEMP); 2357 nfs_pub.np_index = NULL; 2358 } 2359 } 2360 return (0); 2361 } 2362 2363 /* 2364 * Only one allowed at a time. 2365 */ 2366 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2367 return (EBUSY); 2368 2369 /* 2370 * Get real filehandle for root of exported FS. 2371 */ 2372 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2373 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2374 2375 if ((error = VFS_ROOT(mp, &rvp))) 2376 return (error); 2377 2378 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2379 return (error); 2380 2381 vput(rvp); 2382 2383 /* 2384 * If an indexfile was specified, pull it in. 2385 */ 2386 if (argp->ex_indexfile != NULL) { 2387 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2388 M_WAITOK); 2389 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2390 MAXNAMLEN, (size_t *)0); 2391 if (!error) { 2392 /* 2393 * Check for illegal filenames. 2394 */ 2395 for (cp = nfs_pub.np_index; *cp; cp++) { 2396 if (*cp == '/') { 2397 error = EINVAL; 2398 break; 2399 } 2400 } 2401 } 2402 if (error) { 2403 FREE(nfs_pub.np_index, M_TEMP); 2404 return (error); 2405 } 2406 } 2407 2408 nfs_pub.np_mount = mp; 2409 nfs_pub.np_valid = 1; 2410 return (0); 2411 } 2412 2413 struct netcred * 2414 vfs_export_lookup(mp, nep, nam) 2415 register struct mount *mp; 2416 struct netexport *nep; 2417 struct sockaddr *nam; 2418 { 2419 register struct netcred *np; 2420 register struct radix_node_head *rnh; 2421 struct sockaddr *saddr; 2422 2423 np = NULL; 2424 if (mp->mnt_flag & MNT_EXPORTED) { 2425 /* 2426 * Lookup in the export list first. 2427 */ 2428 if (nam != NULL) { 2429 saddr = nam; 2430 rnh = nep->ne_rtable[saddr->sa_family]; 2431 if (rnh != NULL) { 2432 np = (struct netcred *) 2433 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2434 rnh); 2435 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2436 np = NULL; 2437 } 2438 } 2439 /* 2440 * If no address match, use the default if it exists. 2441 */ 2442 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2443 np = &nep->ne_defexported; 2444 } 2445 return (np); 2446 } 2447 2448 /* 2449 * perform msync on all vnodes under a mount point 2450 * the mount point must be locked. 2451 */ 2452 void 2453 vfs_msync(struct mount *mp, int flags) { 2454 struct vnode *vp, *nvp; 2455 struct vm_object *obj; 2456 int anyio, tries; 2457 2458 tries = 5; 2459 loop: 2460 anyio = 0; 2461 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2462 2463 nvp = vp->v_mntvnodes.le_next; 2464 2465 if (vp->v_mount != mp) { 2466 goto loop; 2467 } 2468 2469 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2470 continue; 2471 2472 if (flags != MNT_WAIT) { 2473 obj = vp->v_object; 2474 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2475 continue; 2476 if (VOP_ISLOCKED(vp)) 2477 continue; 2478 } 2479 2480 simple_lock(&vp->v_interlock); 2481 if (vp->v_object && 2482 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2483 if (!vget(vp, 2484 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2485 if (vp->v_object) { 2486 vm_object_page_clean(vp->v_object, 0, 0, TRUE); 2487 anyio = 1; 2488 } 2489 vput(vp); 2490 } 2491 } else { 2492 simple_unlock(&vp->v_interlock); 2493 } 2494 } 2495 if (anyio && (--tries > 0)) 2496 goto loop; 2497 } 2498 2499 /* 2500 * Create the VM object needed for VMIO and mmap support. This 2501 * is done for all VREG files in the system. Some filesystems might 2502 * afford the additional metadata buffering capability of the 2503 * VMIO code by making the device node be VMIO mode also. 2504 * 2505 * If !waslocked, must be called with interlock. 2506 */ 2507 int 2508 vfs_object_create(vp, p, cred, waslocked) 2509 struct vnode *vp; 2510 struct proc *p; 2511 struct ucred *cred; 2512 int waslocked; 2513 { 2514 struct vattr vat; 2515 vm_object_t object; 2516 int error = 0; 2517 2518 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) { 2519 if (!waslocked) 2520 simple_unlock(&vp->v_interlock); 2521 return 0; 2522 } 2523 2524 if (!waslocked) 2525 vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p); 2526 2527 retry: 2528 if ((object = vp->v_object) == NULL) { 2529 if (vp->v_type == VREG) { 2530 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2531 goto retn; 2532 object = vnode_pager_alloc(vp, 2533 OFF_TO_IDX(round_page(vat.va_size)), 0, 0); 2534 } else if (major(vp->v_rdev) < nblkdev) { 2535 /* 2536 * This simply allocates the biggest object possible 2537 * for a VBLK vnode. This should be fixed, but doesn't 2538 * cause any problems (yet). 2539 */ 2540 object = vnode_pager_alloc(vp, INT_MAX, 0, 0); 2541 } 2542 object->ref_count--; 2543 vp->v_usecount--; 2544 } else { 2545 if (object->flags & OBJ_DEAD) { 2546 VOP_UNLOCK(vp, 0, p); 2547 tsleep(object, PVM, "vodead", 0); 2548 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2549 goto retry; 2550 } 2551 } 2552 2553 if (vp->v_object) { 2554 vp->v_flag |= VOBJBUF; 2555 } 2556 2557 retn: 2558 if (!waslocked) { 2559 simple_lock(&vp->v_interlock); 2560 VOP_UNLOCK(vp, LK_INTERLOCK, p); 2561 } 2562 2563 return error; 2564 } 2565 2566 static void 2567 vfree(vp) 2568 struct vnode *vp; 2569 { 2570 int s; 2571 2572 s = splbio(); 2573 simple_lock(&vnode_free_list_slock); 2574 if (vp->v_flag & VTBFREE) { 2575 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2576 vp->v_flag &= ~VTBFREE; 2577 } 2578 if (vp->v_flag & VAGE) { 2579 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2580 } else { 2581 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2582 } 2583 freevnodes++; 2584 simple_unlock(&vnode_free_list_slock); 2585 vp->v_flag &= ~VAGE; 2586 vp->v_flag |= VFREE; 2587 splx(s); 2588 } 2589 2590 void 2591 vbusy(vp) 2592 struct vnode *vp; 2593 { 2594 int s; 2595 2596 s = splbio(); 2597 simple_lock(&vnode_free_list_slock); 2598 if (vp->v_flag & VTBFREE) { 2599 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2600 vp->v_flag &= ~VTBFREE; 2601 } else { 2602 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2603 freevnodes--; 2604 } 2605 simple_unlock(&vnode_free_list_slock); 2606 vp->v_flag &= ~(VFREE|VAGE); 2607 splx(s); 2608 } 2609 2610 /* 2611 * Record a process's interest in events which might happen to 2612 * a vnode. Because poll uses the historic select-style interface 2613 * internally, this routine serves as both the ``check for any 2614 * pending events'' and the ``record my interest in future events'' 2615 * functions. (These are done together, while the lock is held, 2616 * to avoid race conditions.) 2617 */ 2618 int 2619 vn_pollrecord(vp, p, events) 2620 struct vnode *vp; 2621 struct proc *p; 2622 short events; 2623 { 2624 simple_lock(&vp->v_pollinfo.vpi_lock); 2625 if (vp->v_pollinfo.vpi_revents & events) { 2626 /* 2627 * This leaves events we are not interested 2628 * in available for the other process which 2629 * which presumably had requested them 2630 * (otherwise they would never have been 2631 * recorded). 2632 */ 2633 events &= vp->v_pollinfo.vpi_revents; 2634 vp->v_pollinfo.vpi_revents &= ~events; 2635 2636 simple_unlock(&vp->v_pollinfo.vpi_lock); 2637 return events; 2638 } 2639 vp->v_pollinfo.vpi_events |= events; 2640 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2641 simple_unlock(&vp->v_pollinfo.vpi_lock); 2642 return 0; 2643 } 2644 2645 /* 2646 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2647 * it is possible for us to miss an event due to race conditions, but 2648 * that condition is expected to be rare, so for the moment it is the 2649 * preferred interface. 2650 */ 2651 void 2652 vn_pollevent(vp, events) 2653 struct vnode *vp; 2654 short events; 2655 { 2656 simple_lock(&vp->v_pollinfo.vpi_lock); 2657 if (vp->v_pollinfo.vpi_events & events) { 2658 /* 2659 * We clear vpi_events so that we don't 2660 * call selwakeup() twice if two events are 2661 * posted before the polling process(es) is 2662 * awakened. This also ensures that we take at 2663 * most one selwakeup() if the polling process 2664 * is no longer interested. However, it does 2665 * mean that only one event can be noticed at 2666 * a time. (Perhaps we should only clear those 2667 * event bits which we note?) XXX 2668 */ 2669 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2670 vp->v_pollinfo.vpi_revents |= events; 2671 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2672 } 2673 simple_unlock(&vp->v_pollinfo.vpi_lock); 2674 } 2675 2676 /* 2677 * Wake up anyone polling on vp because it is being revoked. 2678 * This depends on dead_poll() returning POLLHUP for correct 2679 * behavior. 2680 */ 2681 void 2682 vn_pollgone(vp) 2683 struct vnode *vp; 2684 { 2685 simple_lock(&vp->v_pollinfo.vpi_lock); 2686 if (vp->v_pollinfo.vpi_events) { 2687 vp->v_pollinfo.vpi_events = 0; 2688 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2689 } 2690 simple_unlock(&vp->v_pollinfo.vpi_lock); 2691 } 2692 2693 2694 2695 /* 2696 * Routine to create and manage a filesystem syncer vnode. 2697 */ 2698 #define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2699 int sync_fsync __P((struct vop_fsync_args *)); 2700 int sync_inactive __P((struct vop_inactive_args *)); 2701 int sync_reclaim __P((struct vop_reclaim_args *)); 2702 #define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2703 #define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2704 int sync_print __P((struct vop_print_args *)); 2705 #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2706 2707 vop_t **sync_vnodeop_p; 2708 struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2709 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2710 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2711 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2712 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2713 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2714 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2715 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2716 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2717 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2718 { NULL, NULL } 2719 }; 2720 struct vnodeopv_desc sync_vnodeop_opv_desc = 2721 { &sync_vnodeop_p, sync_vnodeop_entries }; 2722 2723 VNODEOP_SET(sync_vnodeop_opv_desc); 2724 2725 /* 2726 * Create a new filesystem syncer vnode for the specified mount point. 2727 */ 2728 int 2729 vfs_allocate_syncvnode(mp) 2730 struct mount *mp; 2731 { 2732 struct vnode *vp; 2733 static long start, incr, next; 2734 int error; 2735 2736 /* Allocate a new vnode */ 2737 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2738 mp->mnt_syncer = NULL; 2739 return (error); 2740 } 2741 vp->v_type = VNON; 2742 /* 2743 * Place the vnode onto the syncer worklist. We attempt to 2744 * scatter them about on the list so that they will go off 2745 * at evenly distributed times even if all the filesystems 2746 * are mounted at once. 2747 */ 2748 next += incr; 2749 if (next == 0 || next > syncer_maxdelay) { 2750 start /= 2; 2751 incr /= 2; 2752 if (start == 0) { 2753 start = syncer_maxdelay / 2; 2754 incr = syncer_maxdelay; 2755 } 2756 next = start; 2757 } 2758 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2759 mp->mnt_syncer = vp; 2760 return (0); 2761 } 2762 2763 /* 2764 * Do a lazy sync of the filesystem. 2765 */ 2766 int 2767 sync_fsync(ap) 2768 struct vop_fsync_args /* { 2769 struct vnode *a_vp; 2770 struct ucred *a_cred; 2771 int a_waitfor; 2772 struct proc *a_p; 2773 } */ *ap; 2774 { 2775 struct vnode *syncvp = ap->a_vp; 2776 struct mount *mp = syncvp->v_mount; 2777 struct proc *p = ap->a_p; 2778 int asyncflag; 2779 2780 /* 2781 * We only need to do something if this is a lazy evaluation. 2782 */ 2783 if (ap->a_waitfor != MNT_LAZY) 2784 return (0); 2785 2786 /* 2787 * Move ourselves to the back of the sync list. 2788 */ 2789 vn_syncer_add_to_worklist(syncvp, syncdelay); 2790 2791 /* 2792 * Walk the list of vnodes pushing all that are dirty and 2793 * not already on the sync list. 2794 */ 2795 simple_lock(&mountlist_slock); 2796 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2797 simple_unlock(&mountlist_slock); 2798 return (0); 2799 } 2800 asyncflag = mp->mnt_flag & MNT_ASYNC; 2801 mp->mnt_flag &= ~MNT_ASYNC; 2802 vfs_msync(mp, MNT_NOWAIT); 2803 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2804 if (asyncflag) 2805 mp->mnt_flag |= MNT_ASYNC; 2806 vfs_unbusy(mp, p); 2807 return (0); 2808 } 2809 2810 /* 2811 * The syncer vnode is no referenced. 2812 */ 2813 int 2814 sync_inactive(ap) 2815 struct vop_inactive_args /* { 2816 struct vnode *a_vp; 2817 struct proc *a_p; 2818 } */ *ap; 2819 { 2820 2821 vgone(ap->a_vp); 2822 return (0); 2823 } 2824 2825 /* 2826 * The syncer vnode is no longer needed and is being decommissioned. 2827 */ 2828 int 2829 sync_reclaim(ap) 2830 struct vop_reclaim_args /* { 2831 struct vnode *a_vp; 2832 } */ *ap; 2833 { 2834 struct vnode *vp = ap->a_vp; 2835 2836 vp->v_mount->mnt_syncer = NULL; 2837 if (vp->v_flag & VONWORKLST) { 2838 LIST_REMOVE(vp, v_synclist); 2839 vp->v_flag &= ~VONWORKLST; 2840 } 2841 2842 return (0); 2843 } 2844 2845 /* 2846 * Print out a syncer vnode. 2847 */ 2848 int 2849 sync_print(ap) 2850 struct vop_print_args /* { 2851 struct vnode *a_vp; 2852 } */ *ap; 2853 { 2854 struct vnode *vp = ap->a_vp; 2855 2856 printf("syncer vnode"); 2857 if (vp->v_vnlock != NULL) 2858 lockmgr_printinfo(vp->v_vnlock); 2859 printf("\n"); 2860 return (0); 2861 } 2862