1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id: vfs_subr.c,v 1.162 1998/09/05 15:17:33 bde Exp $ 40 */ 41 42 /* 43 * External virtual filesystem routines 44 */ 45 #include "opt_ddb.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/kernel.h> 50 #include <sys/proc.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/socket.h> 54 #include <sys/vnode.h> 55 #include <sys/stat.h> 56 #include <sys/buf.h> 57 #include <sys/domain.h> 58 #include <sys/dirent.h> 59 #include <sys/vmmeter.h> 60 61 #include <machine/limits.h> 62 63 #include <vm/vm.h> 64 #include <vm/vm_object.h> 65 #include <vm/vm_extern.h> 66 #include <vm/pmap.h> 67 #include <vm/vm_map.h> 68 #include <vm/vm_pager.h> 69 #include <vm/vnode_pager.h> 70 #include <vm/vm_zone.h> 71 #include <sys/sysctl.h> 72 73 #include <miscfs/specfs/specdev.h> 74 75 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 76 77 static void insmntque __P((struct vnode *vp, struct mount *mp)); 78 #ifdef DDB 79 static void printlockedvnodes __P((void)); 80 #endif 81 static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 82 static void vfree __P((struct vnode *)); 83 static void vgonel __P((struct vnode *vp, struct proc *p)); 84 static unsigned long numvnodes; 85 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 86 87 enum vtype iftovt_tab[16] = { 88 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 89 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 90 }; 91 int vttoif_tab[9] = { 92 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 93 S_IFSOCK, S_IFIFO, S_IFMT, 94 }; 95 96 /* 97 * Insq/Remq for the vnode usage lists. 98 */ 99 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 100 #define bufremvn(bp) { \ 101 LIST_REMOVE(bp, b_vnbufs); \ 102 (bp)->b_vnbufs.le_next = NOLIST; \ 103 } 104 105 static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 106 struct tobefreelist vnode_tobefree_list; /* vnode free list */ 107 108 static u_long wantfreevnodes = 25; 109 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 110 static u_long freevnodes = 0; 111 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 112 113 int vfs_ioopt = 0; 114 #ifdef ENABLE_VFS_IOOPT 115 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 116 #endif 117 118 struct mntlist mountlist; /* mounted filesystem list */ 119 struct simplelock mountlist_slock; 120 static struct simplelock mntid_slock; 121 struct simplelock mntvnode_slock; 122 int nfs_mount_type = -1; 123 static struct simplelock vnode_free_list_slock; 124 static struct simplelock spechash_slock; 125 struct nfs_public nfs_pub; /* publicly exported FS */ 126 static vm_zone_t vnode_zone; 127 128 /* 129 * The workitem queue. 130 */ 131 #define SYNCER_MAXDELAY 32 132 int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 133 time_t syncdelay = 30; 134 int rushjob; /* number of slots to run ASAP */ 135 136 static int syncer_delayno = 0; 137 static long syncer_mask; 138 LIST_HEAD(synclist, vnode); 139 static struct synclist *syncer_workitem_pending; 140 141 int desiredvnodes; 142 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 143 144 static void vfs_free_addrlist __P((struct netexport *nep)); 145 static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 146 static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 147 struct export_args *argp)); 148 149 /* 150 * Initialize the vnode management data structures. 151 */ 152 void 153 vntblinit() 154 { 155 156 desiredvnodes = maxproc + cnt.v_page_count / 4; 157 simple_lock_init(&mntvnode_slock); 158 simple_lock_init(&mntid_slock); 159 simple_lock_init(&spechash_slock); 160 TAILQ_INIT(&vnode_free_list); 161 TAILQ_INIT(&vnode_tobefree_list); 162 simple_lock_init(&vnode_free_list_slock); 163 CIRCLEQ_INIT(&mountlist); 164 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 165 /* 166 * Initialize the filesystem syncer. 167 */ 168 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 169 &syncer_mask); 170 syncer_maxdelay = syncer_mask + 1; 171 } 172 173 /* 174 * Mark a mount point as busy. Used to synchronize access and to delay 175 * unmounting. Interlock is not released on failure. 176 */ 177 int 178 vfs_busy(mp, flags, interlkp, p) 179 struct mount *mp; 180 int flags; 181 struct simplelock *interlkp; 182 struct proc *p; 183 { 184 int lkflags; 185 186 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 187 if (flags & LK_NOWAIT) 188 return (ENOENT); 189 mp->mnt_kern_flag |= MNTK_MWAIT; 190 if (interlkp) { 191 simple_unlock(interlkp); 192 } 193 /* 194 * Since all busy locks are shared except the exclusive 195 * lock granted when unmounting, the only place that a 196 * wakeup needs to be done is at the release of the 197 * exclusive lock at the end of dounmount. 198 */ 199 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 200 if (interlkp) { 201 simple_lock(interlkp); 202 } 203 return (ENOENT); 204 } 205 lkflags = LK_SHARED | LK_NOPAUSE; 206 if (interlkp) 207 lkflags |= LK_INTERLOCK; 208 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 209 panic("vfs_busy: unexpected lock failure"); 210 return (0); 211 } 212 213 /* 214 * Free a busy filesystem. 215 */ 216 void 217 vfs_unbusy(mp, p) 218 struct mount *mp; 219 struct proc *p; 220 { 221 222 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 223 } 224 225 /* 226 * Lookup a filesystem type, and if found allocate and initialize 227 * a mount structure for it. 228 * 229 * Devname is usually updated by mount(8) after booting. 230 */ 231 int 232 vfs_rootmountalloc(fstypename, devname, mpp) 233 char *fstypename; 234 char *devname; 235 struct mount **mpp; 236 { 237 struct proc *p = curproc; /* XXX */ 238 struct vfsconf *vfsp; 239 struct mount *mp; 240 241 if (fstypename == NULL) 242 return (ENODEV); 243 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 244 if (!strcmp(vfsp->vfc_name, fstypename)) 245 break; 246 if (vfsp == NULL) 247 return (ENODEV); 248 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 249 bzero((char *)mp, (u_long)sizeof(struct mount)); 250 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 251 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 252 LIST_INIT(&mp->mnt_vnodelist); 253 mp->mnt_vfc = vfsp; 254 mp->mnt_op = vfsp->vfc_vfsops; 255 mp->mnt_flag = MNT_RDONLY; 256 mp->mnt_vnodecovered = NULLVP; 257 vfsp->vfc_refcount++; 258 mp->mnt_stat.f_type = vfsp->vfc_typenum; 259 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 260 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 261 mp->mnt_stat.f_mntonname[0] = '/'; 262 mp->mnt_stat.f_mntonname[1] = 0; 263 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 264 *mpp = mp; 265 return (0); 266 } 267 268 /* 269 * Find an appropriate filesystem to use for the root. If a filesystem 270 * has not been preselected, walk through the list of known filesystems 271 * trying those that have mountroot routines, and try them until one 272 * works or we have tried them all. 273 */ 274 #ifdef notdef /* XXX JH */ 275 int 276 lite2_vfs_mountroot() 277 { 278 struct vfsconf *vfsp; 279 extern int (*lite2_mountroot) __P((void)); 280 int error; 281 282 if (lite2_mountroot != NULL) 283 return ((*lite2_mountroot)()); 284 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 285 if (vfsp->vfc_mountroot == NULL) 286 continue; 287 if ((error = (*vfsp->vfc_mountroot)()) == 0) 288 return (0); 289 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 290 } 291 return (ENODEV); 292 } 293 #endif 294 295 /* 296 * Lookup a mount point by filesystem identifier. 297 */ 298 struct mount * 299 vfs_getvfs(fsid) 300 fsid_t *fsid; 301 { 302 register struct mount *mp; 303 304 simple_lock(&mountlist_slock); 305 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 306 mp = mp->mnt_list.cqe_next) { 307 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 308 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 309 simple_unlock(&mountlist_slock); 310 return (mp); 311 } 312 } 313 simple_unlock(&mountlist_slock); 314 return ((struct mount *) 0); 315 } 316 317 /* 318 * Get a new unique fsid 319 */ 320 void 321 vfs_getnewfsid(mp) 322 struct mount *mp; 323 { 324 static u_short xxxfs_mntid; 325 326 fsid_t tfsid; 327 int mtype; 328 329 simple_lock(&mntid_slock); 330 mtype = mp->mnt_vfc->vfc_typenum; 331 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 332 mp->mnt_stat.f_fsid.val[1] = mtype; 333 if (xxxfs_mntid == 0) 334 ++xxxfs_mntid; 335 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 336 tfsid.val[1] = mtype; 337 if (mountlist.cqh_first != (void *)&mountlist) { 338 while (vfs_getvfs(&tfsid)) { 339 tfsid.val[0]++; 340 xxxfs_mntid++; 341 } 342 } 343 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 344 simple_unlock(&mntid_slock); 345 } 346 347 /* 348 * Set vnode attributes to VNOVAL 349 */ 350 void 351 vattr_null(vap) 352 register struct vattr *vap; 353 { 354 355 vap->va_type = VNON; 356 vap->va_size = VNOVAL; 357 vap->va_bytes = VNOVAL; 358 vap->va_mode = VNOVAL; 359 vap->va_nlink = VNOVAL; 360 vap->va_uid = VNOVAL; 361 vap->va_gid = VNOVAL; 362 vap->va_fsid = VNOVAL; 363 vap->va_fileid = VNOVAL; 364 vap->va_blocksize = VNOVAL; 365 vap->va_rdev = VNOVAL; 366 vap->va_atime.tv_sec = VNOVAL; 367 vap->va_atime.tv_nsec = VNOVAL; 368 vap->va_mtime.tv_sec = VNOVAL; 369 vap->va_mtime.tv_nsec = VNOVAL; 370 vap->va_ctime.tv_sec = VNOVAL; 371 vap->va_ctime.tv_nsec = VNOVAL; 372 vap->va_flags = VNOVAL; 373 vap->va_gen = VNOVAL; 374 vap->va_vaflags = 0; 375 } 376 377 /* 378 * Routines having to do with the management of the vnode table. 379 */ 380 extern vop_t **dead_vnodeop_p; 381 382 /* 383 * Return the next vnode from the free list. 384 */ 385 int 386 getnewvnode(tag, mp, vops, vpp) 387 enum vtagtype tag; 388 struct mount *mp; 389 vop_t **vops; 390 struct vnode **vpp; 391 { 392 int s; 393 struct proc *p = curproc; /* XXX */ 394 struct vnode *vp, *tvp, *nvp; 395 vm_object_t object; 396 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 397 398 /* 399 * We take the least recently used vnode from the freelist 400 * if we can get it and it has no cached pages, and no 401 * namecache entries are relative to it. 402 * Otherwise we allocate a new vnode 403 */ 404 405 s = splbio(); 406 simple_lock(&vnode_free_list_slock); 407 TAILQ_INIT(&vnode_tmp_list); 408 409 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 410 nvp = TAILQ_NEXT(vp, v_freelist); 411 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 412 if (vp->v_flag & VAGE) { 413 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 414 } else { 415 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 416 } 417 vp->v_flag &= ~(VTBFREE|VAGE); 418 vp->v_flag |= VFREE; 419 if (vp->v_usecount) 420 panic("tobe free vnode isn't"); 421 freevnodes++; 422 } 423 424 if (wantfreevnodes && freevnodes < wantfreevnodes) { 425 vp = NULL; 426 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 427 /* 428 * XXX: this is only here to be backwards compatible 429 */ 430 vp = NULL; 431 } else { 432 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 433 434 nvp = TAILQ_NEXT(vp, v_freelist); 435 436 if (!simple_lock_try(&vp->v_interlock)) 437 continue; 438 if (vp->v_usecount) 439 panic("free vnode isn't"); 440 441 object = vp->v_object; 442 if (object && (object->resident_page_count || object->ref_count)) { 443 printf("object inconsistant state: RPC: %d, RC: %d\n", 444 object->resident_page_count, object->ref_count); 445 /* Don't recycle if it's caching some pages */ 446 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 447 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 448 continue; 449 } else if (LIST_FIRST(&vp->v_cache_src)) { 450 /* Don't recycle if active in the namecache */ 451 simple_unlock(&vp->v_interlock); 452 continue; 453 } else { 454 break; 455 } 456 } 457 } 458 459 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 460 nvp = TAILQ_NEXT(tvp, v_freelist); 461 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 462 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 463 simple_unlock(&tvp->v_interlock); 464 } 465 466 if (vp) { 467 vp->v_flag |= VDOOMED; 468 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 469 freevnodes--; 470 simple_unlock(&vnode_free_list_slock); 471 cache_purge(vp); 472 vp->v_lease = NULL; 473 if (vp->v_type != VBAD) { 474 vgonel(vp, p); 475 } else { 476 simple_unlock(&vp->v_interlock); 477 } 478 479 #ifdef DIAGNOSTIC 480 { 481 int s; 482 483 if (vp->v_data) 484 panic("cleaned vnode isn't"); 485 s = splbio(); 486 if (vp->v_numoutput) 487 panic("Clean vnode has pending I/O's"); 488 splx(s); 489 } 490 #endif 491 vp->v_flag = 0; 492 vp->v_lastr = 0; 493 vp->v_lastw = 0; 494 vp->v_lasta = 0; 495 vp->v_cstart = 0; 496 vp->v_clen = 0; 497 vp->v_socket = 0; 498 vp->v_writecount = 0; /* XXX */ 499 vp->v_maxio = 0; 500 } else { 501 simple_unlock(&vnode_free_list_slock); 502 vp = (struct vnode *) zalloc(vnode_zone); 503 bzero((char *) vp, sizeof *vp); 504 simple_lock_init(&vp->v_interlock); 505 vp->v_dd = vp; 506 cache_purge(vp); 507 LIST_INIT(&vp->v_cache_src); 508 TAILQ_INIT(&vp->v_cache_dst); 509 numvnodes++; 510 } 511 512 vp->v_type = VNON; 513 vp->v_tag = tag; 514 vp->v_op = vops; 515 insmntque(vp, mp); 516 *vpp = vp; 517 vp->v_usecount = 1; 518 vp->v_data = 0; 519 splx(s); 520 521 vfs_object_create(vp, p, p->p_ucred, TRUE); 522 return (0); 523 } 524 525 /* 526 * Move a vnode from one mount queue to another. 527 */ 528 static void 529 insmntque(vp, mp) 530 register struct vnode *vp; 531 register struct mount *mp; 532 { 533 534 simple_lock(&mntvnode_slock); 535 /* 536 * Delete from old mount point vnode list, if on one. 537 */ 538 if (vp->v_mount != NULL) 539 LIST_REMOVE(vp, v_mntvnodes); 540 /* 541 * Insert into list of vnodes for the new mount point, if available. 542 */ 543 if ((vp->v_mount = mp) == NULL) { 544 simple_unlock(&mntvnode_slock); 545 return; 546 } 547 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 548 simple_unlock(&mntvnode_slock); 549 } 550 551 /* 552 * Update outstanding I/O count and do wakeup if requested. 553 */ 554 void 555 vwakeup(bp) 556 register struct buf *bp; 557 { 558 register struct vnode *vp; 559 560 bp->b_flags &= ~B_WRITEINPROG; 561 if ((vp = bp->b_vp)) { 562 vp->v_numoutput--; 563 if (vp->v_numoutput < 0) 564 panic("vwakeup: neg numoutput"); 565 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 566 vp->v_flag &= ~VBWAIT; 567 wakeup((caddr_t) &vp->v_numoutput); 568 } 569 } 570 } 571 572 /* 573 * Flush out and invalidate all buffers associated with a vnode. 574 * Called with the underlying object locked. 575 */ 576 int 577 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 578 register struct vnode *vp; 579 int flags; 580 struct ucred *cred; 581 struct proc *p; 582 int slpflag, slptimeo; 583 { 584 register struct buf *bp; 585 struct buf *nbp, *blist; 586 int s, error; 587 vm_object_t object; 588 589 if (flags & V_SAVE) { 590 s = splbio(); 591 while (vp->v_numoutput) { 592 vp->v_flag |= VBWAIT; 593 tsleep((caddr_t)&vp->v_numoutput, 594 slpflag | (PRIBIO + 1), 595 "vinvlbuf", slptimeo); 596 } 597 if (vp->v_dirtyblkhd.lh_first != NULL) { 598 splx(s); 599 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 600 return (error); 601 s = splbio(); 602 if (vp->v_numoutput > 0 || 603 vp->v_dirtyblkhd.lh_first != NULL) 604 panic("vinvalbuf: dirty bufs"); 605 } 606 splx(s); 607 } 608 s = splbio(); 609 for (;;) { 610 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 611 while (blist && blist->b_lblkno < 0) 612 blist = blist->b_vnbufs.le_next; 613 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 614 (flags & V_SAVEMETA)) 615 while (blist && blist->b_lblkno < 0) 616 blist = blist->b_vnbufs.le_next; 617 if (!blist) 618 break; 619 620 for (bp = blist; bp; bp = nbp) { 621 nbp = bp->b_vnbufs.le_next; 622 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) 623 continue; 624 if (bp->b_flags & B_BUSY) { 625 bp->b_flags |= B_WANTED; 626 error = tsleep((caddr_t) bp, 627 slpflag | (PRIBIO + 4), "vinvalbuf", 628 slptimeo); 629 if (error) { 630 splx(s); 631 return (error); 632 } 633 break; 634 } 635 /* 636 * XXX Since there are no node locks for NFS, I 637 * believe there is a slight chance that a delayed 638 * write will occur while sleeping just above, so 639 * check for it. Note that vfs_bio_awrite expects 640 * buffers to reside on a queue, while VOP_BWRITE and 641 * brelse do not. 642 */ 643 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 644 (flags & V_SAVE)) { 645 646 if (bp->b_vp == vp) { 647 if (bp->b_flags & B_CLUSTEROK) { 648 vfs_bio_awrite(bp); 649 } else { 650 bremfree(bp); 651 bp->b_flags |= (B_BUSY | B_ASYNC); 652 VOP_BWRITE(bp); 653 } 654 } else { 655 bremfree(bp); 656 bp->b_flags |= B_BUSY; 657 (void) VOP_BWRITE(bp); 658 } 659 break; 660 } 661 bremfree(bp); 662 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY); 663 bp->b_flags &= ~B_ASYNC; 664 brelse(bp); 665 } 666 } 667 668 while (vp->v_numoutput > 0) { 669 vp->v_flag |= VBWAIT; 670 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 671 } 672 673 splx(s); 674 675 /* 676 * Destroy the copy in the VM cache, too. 677 */ 678 simple_lock(&vp->v_interlock); 679 object = vp->v_object; 680 if (object != NULL) { 681 if (flags & V_SAVEMETA) 682 vm_object_page_remove(object, 0, object->size, 683 (flags & V_SAVE) ? TRUE : FALSE); 684 else 685 vm_object_page_remove(object, 0, 0, 686 (flags & V_SAVE) ? TRUE : FALSE); 687 } 688 simple_unlock(&vp->v_interlock); 689 690 if (!(flags & V_SAVEMETA) && 691 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 692 panic("vinvalbuf: flush failed"); 693 return (0); 694 } 695 696 /* 697 * Truncate a file's buffer and pages to a specified length. This 698 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 699 * sync activity. 700 */ 701 int 702 vtruncbuf(vp, cred, p, length, blksize) 703 register struct vnode *vp; 704 struct ucred *cred; 705 struct proc *p; 706 off_t length; 707 int blksize; 708 { 709 register struct buf *bp; 710 struct buf *nbp, *blist; 711 int s, error, anyfreed; 712 vm_object_t object; 713 int trunclbn; 714 715 /* 716 * Round up to the *next* lbn. 717 */ 718 trunclbn = (length + blksize - 1) / blksize; 719 720 s = splbio(); 721 restart: 722 anyfreed = 1; 723 for (;anyfreed;) { 724 anyfreed = 0; 725 for ( bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 726 727 nbp = LIST_NEXT(bp, b_vnbufs); 728 729 if (bp->b_lblkno >= trunclbn) { 730 if (bp->b_flags & B_BUSY) { 731 bp->b_flags |= B_WANTED; 732 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 733 goto restart; 734 } else { 735 bremfree(bp); 736 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 737 bp->b_flags &= ~B_ASYNC; 738 brelse(bp); 739 anyfreed = 1; 740 } 741 if (nbp && 742 ((LIST_NEXT(nbp, b_vnbufs) == NOLIST) || 743 (nbp->b_vp != vp) || 744 (nbp->b_flags & B_DELWRI))) { 745 goto restart; 746 } 747 } 748 } 749 750 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 751 752 nbp = LIST_NEXT(bp, b_vnbufs); 753 754 if (bp->b_lblkno >= trunclbn) { 755 if (bp->b_flags & B_BUSY) { 756 bp->b_flags |= B_WANTED; 757 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 758 goto restart; 759 } else { 760 bremfree(bp); 761 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 762 bp->b_flags &= ~B_ASYNC; 763 brelse(bp); 764 anyfreed = 1; 765 } 766 if (nbp && 767 ((LIST_NEXT(nbp, b_vnbufs) == NOLIST) || 768 (nbp->b_vp != vp) || 769 (nbp->b_flags & B_DELWRI) == 0)) { 770 goto restart; 771 } 772 } 773 } 774 } 775 776 if (length > 0) { 777 restartsync: 778 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 779 780 nbp = LIST_NEXT(bp, b_vnbufs); 781 782 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 783 if (bp->b_flags & B_BUSY) { 784 bp->b_flags |= B_WANTED; 785 tsleep(bp, PRIBIO, "vtrb3", 0); 786 } else { 787 bremfree(bp); 788 bp->b_flags |= B_BUSY; 789 if (bp->b_vp == vp) { 790 bp->b_flags |= B_ASYNC; 791 } else { 792 bp->b_flags &= ~B_ASYNC; 793 } 794 VOP_BWRITE(bp); 795 } 796 goto restartsync; 797 } 798 799 } 800 } 801 802 while (vp->v_numoutput > 0) { 803 vp->v_flag |= VBWAIT; 804 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 805 } 806 807 splx(s); 808 809 vnode_pager_setsize(vp, length); 810 811 return (0); 812 } 813 814 /* 815 * Associate a buffer with a vnode. 816 */ 817 void 818 bgetvp(vp, bp) 819 register struct vnode *vp; 820 register struct buf *bp; 821 { 822 int s; 823 824 #if defined(DIAGNOSTIC) 825 if (bp->b_vp) 826 panic("bgetvp: not free"); 827 #endif 828 vhold(vp); 829 bp->b_vp = vp; 830 if (vp->v_type == VBLK || vp->v_type == VCHR) 831 bp->b_dev = vp->v_rdev; 832 else 833 bp->b_dev = NODEV; 834 /* 835 * Insert onto list for new vnode. 836 */ 837 s = splbio(); 838 bufinsvn(bp, &vp->v_cleanblkhd); 839 splx(s); 840 } 841 842 /* 843 * Disassociate a buffer from a vnode. 844 */ 845 void 846 brelvp(bp) 847 register struct buf *bp; 848 { 849 struct vnode *vp; 850 int s; 851 852 #if defined(DIAGNOSTIC) 853 if (bp->b_vp == (struct vnode *) 0) 854 panic("brelvp: NULL"); 855 #endif 856 857 /* 858 * Delete from old vnode list, if on one. 859 */ 860 vp = bp->b_vp; 861 s = splbio(); 862 if (bp->b_vnbufs.le_next != NOLIST) 863 bufremvn(bp); 864 if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) { 865 vp->v_flag &= ~VONWORKLST; 866 LIST_REMOVE(vp, v_synclist); 867 } 868 splx(s); 869 bp->b_vp = (struct vnode *) 0; 870 vdrop(vp); 871 } 872 873 /* 874 * The workitem queue. 875 * 876 * It is useful to delay writes of file data and filesystem metadata 877 * for tens of seconds so that quickly created and deleted files need 878 * not waste disk bandwidth being created and removed. To realize this, 879 * we append vnodes to a "workitem" queue. When running with a soft 880 * updates implementation, most pending metadata dependencies should 881 * not wait for more than a few seconds. Thus, mounted on block devices 882 * are delayed only about a half the time that file data is delayed. 883 * Similarly, directory updates are more critical, so are only delayed 884 * about a third the time that file data is delayed. Thus, there are 885 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 886 * one each second (driven off the filesystem syner process). The 887 * syncer_delayno variable indicates the next queue that is to be processed. 888 * Items that need to be processed soon are placed in this queue: 889 * 890 * syncer_workitem_pending[syncer_delayno] 891 * 892 * A delay of fifteen seconds is done by placing the request fifteen 893 * entries later in the queue: 894 * 895 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 896 * 897 */ 898 899 /* 900 * Add an item to the syncer work queue. 901 */ 902 void 903 vn_syncer_add_to_worklist(vp, delay) 904 struct vnode *vp; 905 int delay; 906 { 907 int s, slot; 908 909 s = splbio(); 910 911 if (vp->v_flag & VONWORKLST) { 912 LIST_REMOVE(vp, v_synclist); 913 } 914 915 if (delay > syncer_maxdelay - 2) 916 delay = syncer_maxdelay - 2; 917 slot = (syncer_delayno + delay) & syncer_mask; 918 919 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 920 vp->v_flag |= VONWORKLST; 921 splx(s); 922 } 923 924 static void sched_sync __P((void)); 925 static struct proc *updateproc; 926 static struct kproc_desc up_kp = { 927 "syncer", 928 sched_sync, 929 &updateproc 930 }; 931 SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 932 933 /* 934 * System filesystem synchronizer daemon. 935 */ 936 void 937 sched_sync(void) 938 { 939 struct synclist *slp; 940 struct vnode *vp; 941 long starttime; 942 int s; 943 struct proc *p = updateproc; 944 945 for (;;) { 946 starttime = time_second; 947 948 /* 949 * Push files whose dirty time has expired. 950 */ 951 s = splbio(); 952 slp = &syncer_workitem_pending[syncer_delayno]; 953 syncer_delayno += 1; 954 if (syncer_delayno == syncer_maxdelay) 955 syncer_delayno = 0; 956 splx(s); 957 958 while ((vp = LIST_FIRST(slp)) != NULL) { 959 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 960 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 961 VOP_UNLOCK(vp, 0, p); 962 if (LIST_FIRST(slp) == vp) { 963 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && 964 vp->v_type != VBLK) 965 panic("sched_sync: fsync failed"); 966 /* 967 * Move ourselves to the back of the sync list. 968 */ 969 LIST_REMOVE(vp, v_synclist); 970 vn_syncer_add_to_worklist(vp, syncdelay); 971 } 972 } 973 974 /* 975 * Do soft update processing. 976 */ 977 if (bioops.io_sync) 978 (*bioops.io_sync)(NULL); 979 980 /* 981 * The variable rushjob allows the kernel to speed up the 982 * processing of the filesystem syncer process. A rushjob 983 * value of N tells the filesystem syncer to process the next 984 * N seconds worth of work on its queue ASAP. Currently rushjob 985 * is used by the soft update code to speed up the filesystem 986 * syncer process when the incore state is getting so far 987 * ahead of the disk that the kernel memory pool is being 988 * threatened with exhaustion. 989 */ 990 if (rushjob > 0) { 991 rushjob -= 1; 992 continue; 993 } 994 /* 995 * If it has taken us less than a second to process the 996 * current work, then wait. Otherwise start right over 997 * again. We can still lose time if any single round 998 * takes more than two seconds, but it does not really 999 * matter as we are just trying to generally pace the 1000 * filesystem activity. 1001 */ 1002 if (time_second == starttime) 1003 tsleep(&lbolt, PPAUSE, "syncer", 0); 1004 } 1005 } 1006 1007 /* 1008 * Associate a p-buffer with a vnode. 1009 */ 1010 void 1011 pbgetvp(vp, bp) 1012 register struct vnode *vp; 1013 register struct buf *bp; 1014 { 1015 #if defined(DIAGNOSTIC) 1016 if (bp->b_vp) 1017 panic("pbgetvp: not free"); 1018 #endif 1019 bp->b_vp = vp; 1020 if (vp->v_type == VBLK || vp->v_type == VCHR) 1021 bp->b_dev = vp->v_rdev; 1022 else 1023 bp->b_dev = NODEV; 1024 } 1025 1026 /* 1027 * Disassociate a p-buffer from a vnode. 1028 */ 1029 void 1030 pbrelvp(bp) 1031 register struct buf *bp; 1032 { 1033 1034 #if defined(DIAGNOSTIC) 1035 if (bp->b_vp == (struct vnode *) 0) 1036 panic("pbrelvp: NULL"); 1037 #endif 1038 1039 bp->b_vp = (struct vnode *) 0; 1040 } 1041 1042 /* 1043 * Reassign a buffer from one vnode to another. 1044 * Used to assign file specific control information 1045 * (indirect blocks) to the vnode to which they belong. 1046 */ 1047 void 1048 reassignbuf(bp, newvp) 1049 register struct buf *bp; 1050 register struct vnode *newvp; 1051 { 1052 struct buflists *listheadp; 1053 int delay; 1054 int s; 1055 1056 if (newvp == NULL) { 1057 printf("reassignbuf: NULL"); 1058 return; 1059 } 1060 1061 s = splbio(); 1062 /* 1063 * Delete from old vnode list, if on one. 1064 */ 1065 if (bp->b_vnbufs.le_next != NOLIST) { 1066 bufremvn(bp); 1067 vdrop(bp->b_vp); 1068 } 1069 /* 1070 * If dirty, put on list of dirty buffers; otherwise insert onto list 1071 * of clean buffers. 1072 */ 1073 if (bp->b_flags & B_DELWRI) { 1074 struct buf *tbp; 1075 1076 listheadp = &newvp->v_dirtyblkhd; 1077 if ((newvp->v_flag & VONWORKLST) == 0) { 1078 switch (newvp->v_type) { 1079 case VDIR: 1080 delay = syncdelay / 3; 1081 break; 1082 case VBLK: 1083 if (newvp->v_specmountpoint != NULL) { 1084 delay = syncdelay / 2; 1085 break; 1086 } 1087 /* fall through */ 1088 default: 1089 delay = syncdelay; 1090 } 1091 vn_syncer_add_to_worklist(newvp, delay); 1092 } 1093 tbp = listheadp->lh_first; 1094 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { 1095 bufinsvn(bp, listheadp); 1096 } else { 1097 while (tbp->b_vnbufs.le_next && 1098 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { 1099 tbp = tbp->b_vnbufs.le_next; 1100 } 1101 LIST_INSERT_AFTER(tbp, bp, b_vnbufs); 1102 } 1103 } else { 1104 bufinsvn(bp, &newvp->v_cleanblkhd); 1105 if ((newvp->v_flag & VONWORKLST) && 1106 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 1107 newvp->v_flag &= ~VONWORKLST; 1108 LIST_REMOVE(newvp, v_synclist); 1109 } 1110 } 1111 bp->b_vp = newvp; 1112 vhold(bp->b_vp); 1113 splx(s); 1114 } 1115 1116 /* 1117 * Create a vnode for a block device. 1118 * Used for mounting the root file system. 1119 */ 1120 int 1121 bdevvp(dev, vpp) 1122 dev_t dev; 1123 struct vnode **vpp; 1124 { 1125 register struct vnode *vp; 1126 struct vnode *nvp; 1127 int error; 1128 1129 if (dev == NODEV) 1130 return (0); 1131 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); 1132 if (error) { 1133 *vpp = 0; 1134 return (error); 1135 } 1136 vp = nvp; 1137 vp->v_type = VBLK; 1138 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { 1139 vput(vp); 1140 vp = nvp; 1141 } 1142 *vpp = vp; 1143 return (0); 1144 } 1145 1146 /* 1147 * Check to see if the new vnode represents a special device 1148 * for which we already have a vnode (either because of 1149 * bdevvp() or because of a different vnode representing 1150 * the same block device). If such an alias exists, deallocate 1151 * the existing contents and return the aliased vnode. The 1152 * caller is responsible for filling it with its new contents. 1153 */ 1154 struct vnode * 1155 checkalias(nvp, nvp_rdev, mp) 1156 register struct vnode *nvp; 1157 dev_t nvp_rdev; 1158 struct mount *mp; 1159 { 1160 struct proc *p = curproc; /* XXX */ 1161 struct vnode *vp; 1162 struct vnode **vpp; 1163 1164 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1165 return (NULLVP); 1166 1167 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1168 loop: 1169 simple_lock(&spechash_slock); 1170 for (vp = *vpp; vp; vp = vp->v_specnext) { 1171 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1172 continue; 1173 /* 1174 * Alias, but not in use, so flush it out. 1175 * Only alias active device nodes. 1176 * Not sure why we don't re-use this like we do below. 1177 */ 1178 simple_lock(&vp->v_interlock); 1179 if (vp->v_usecount == 0) { 1180 simple_unlock(&spechash_slock); 1181 vgonel(vp, p); 1182 goto loop; 1183 } 1184 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 1185 /* 1186 * It dissappeared, and we may have slept. 1187 * Restart from the beginning 1188 */ 1189 simple_unlock(&spechash_slock); 1190 goto loop; 1191 } 1192 break; 1193 } 1194 /* 1195 * It would be a lot clearer what is going on here if 1196 * this had been expressed as: 1197 * if ( vp && (vp->v_tag == VT_NULL)) 1198 * and the clauses had been swapped. 1199 */ 1200 if (vp == NULL || vp->v_tag != VT_NON) { 1201 /* 1202 * Put the new vnode into the hash chain. 1203 * and if there was an alias, connect them. 1204 */ 1205 MALLOC(nvp->v_specinfo, struct specinfo *, 1206 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1207 nvp->v_rdev = nvp_rdev; 1208 nvp->v_hashchain = vpp; 1209 nvp->v_specnext = *vpp; 1210 nvp->v_specmountpoint = NULL; 1211 simple_unlock(&spechash_slock); 1212 *vpp = nvp; 1213 if (vp != NULLVP) { 1214 nvp->v_flag |= VALIASED; 1215 vp->v_flag |= VALIASED; 1216 vput(vp); 1217 } 1218 return (NULLVP); 1219 } 1220 /* 1221 * if ( vp && (vp->v_tag == VT_NULL)) 1222 * We have a vnode alias, but it is a trashed. 1223 * Make it look like it's newley allocated. (by getnewvnode()) 1224 * The caller should use this instead. 1225 */ 1226 simple_unlock(&spechash_slock); 1227 VOP_UNLOCK(vp, 0, p); 1228 simple_lock(&vp->v_interlock); 1229 vclean(vp, 0, p); 1230 vp->v_op = nvp->v_op; 1231 vp->v_tag = nvp->v_tag; 1232 nvp->v_type = VNON; 1233 insmntque(vp, mp); 1234 return (vp); 1235 } 1236 1237 /* 1238 * Grab a particular vnode from the free list, increment its 1239 * reference count and lock it. The vnode lock bit is set the 1240 * vnode is being eliminated in vgone. The process is awakened 1241 * when the transition is completed, and an error returned to 1242 * indicate that the vnode is no longer usable (possibly having 1243 * been changed to a new file system type). 1244 */ 1245 int 1246 vget(vp, flags, p) 1247 register struct vnode *vp; 1248 int flags; 1249 struct proc *p; 1250 { 1251 int error; 1252 1253 /* 1254 * If the vnode is in the process of being cleaned out for 1255 * another use, we wait for the cleaning to finish and then 1256 * return failure. Cleaning is determined by checking that 1257 * the VXLOCK flag is set. 1258 */ 1259 if ((flags & LK_INTERLOCK) == 0) { 1260 simple_lock(&vp->v_interlock); 1261 } 1262 if (vp->v_flag & VXLOCK) { 1263 vp->v_flag |= VXWANT; 1264 simple_unlock(&vp->v_interlock); 1265 tsleep((caddr_t)vp, PINOD, "vget", 0); 1266 return (ENOENT); 1267 } 1268 1269 vp->v_usecount++; 1270 1271 if (VSHOULDBUSY(vp)) 1272 vbusy(vp); 1273 if (flags & LK_TYPE_MASK) { 1274 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1275 /* 1276 * must expand vrele here because we do not want 1277 * to call VOP_INACTIVE if the reference count 1278 * drops back to zero since it was never really 1279 * active. We must remove it from the free list 1280 * before sleeping so that multiple processes do 1281 * not try to recycle it. 1282 */ 1283 simple_lock(&vp->v_interlock); 1284 vp->v_usecount--; 1285 if (VSHOULDFREE(vp)) 1286 vfree(vp); 1287 simple_unlock(&vp->v_interlock); 1288 } 1289 return (error); 1290 } 1291 simple_unlock(&vp->v_interlock); 1292 return (0); 1293 } 1294 1295 void 1296 vref(struct vnode *vp) 1297 { 1298 simple_lock(&vp->v_interlock); 1299 vp->v_usecount++; 1300 simple_unlock(&vp->v_interlock); 1301 } 1302 1303 /* 1304 * Vnode put/release. 1305 * If count drops to zero, call inactive routine and return to freelist. 1306 */ 1307 void 1308 vrele(vp) 1309 struct vnode *vp; 1310 { 1311 struct proc *p = curproc; /* XXX */ 1312 1313 #ifdef DIAGNOSTIC 1314 if (vp == NULL) 1315 panic("vrele: null vp"); 1316 #endif 1317 simple_lock(&vp->v_interlock); 1318 1319 if (vp->v_usecount > 1) { 1320 1321 vp->v_usecount--; 1322 simple_unlock(&vp->v_interlock); 1323 1324 return; 1325 } 1326 1327 if (vp->v_usecount == 1) { 1328 1329 vp->v_usecount--; 1330 1331 if (VSHOULDFREE(vp)) 1332 vfree(vp); 1333 /* 1334 * If we are doing a vput, the node is already locked, and we must 1335 * call VOP_INACTIVE with the node locked. So, in the case of 1336 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1337 */ 1338 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1339 VOP_INACTIVE(vp, p); 1340 } 1341 1342 } else { 1343 #ifdef DIAGNOSTIC 1344 vprint("vrele: negative ref count", vp); 1345 simple_unlock(&vp->v_interlock); 1346 #endif 1347 panic("vrele: negative ref cnt"); 1348 } 1349 } 1350 1351 void 1352 vput(vp) 1353 struct vnode *vp; 1354 { 1355 struct proc *p = curproc; /* XXX */ 1356 1357 #ifdef DIAGNOSTIC 1358 if (vp == NULL) 1359 panic("vput: null vp"); 1360 #endif 1361 1362 simple_lock(&vp->v_interlock); 1363 1364 if (vp->v_usecount > 1) { 1365 1366 vp->v_usecount--; 1367 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1368 return; 1369 1370 } 1371 1372 if (vp->v_usecount == 1) { 1373 1374 vp->v_usecount--; 1375 if (VSHOULDFREE(vp)) 1376 vfree(vp); 1377 /* 1378 * If we are doing a vput, the node is already locked, and we must 1379 * call VOP_INACTIVE with the node locked. So, in the case of 1380 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1381 */ 1382 simple_unlock(&vp->v_interlock); 1383 VOP_INACTIVE(vp, p); 1384 1385 } else { 1386 #ifdef DIAGNOSTIC 1387 vprint("vput: negative ref count", vp); 1388 #endif 1389 panic("vput: negative ref cnt"); 1390 } 1391 } 1392 1393 /* 1394 * Somebody doesn't want the vnode recycled. 1395 */ 1396 void 1397 vhold(vp) 1398 register struct vnode *vp; 1399 { 1400 int s; 1401 1402 s = splbio(); 1403 vp->v_holdcnt++; 1404 if (VSHOULDBUSY(vp)) 1405 vbusy(vp); 1406 splx(s); 1407 } 1408 1409 /* 1410 * One less who cares about this vnode. 1411 */ 1412 void 1413 vdrop(vp) 1414 register struct vnode *vp; 1415 { 1416 int s; 1417 1418 s = splbio(); 1419 if (vp->v_holdcnt <= 0) 1420 panic("vdrop: holdcnt"); 1421 vp->v_holdcnt--; 1422 if (VSHOULDFREE(vp)) 1423 vfree(vp); 1424 splx(s); 1425 } 1426 1427 /* 1428 * Remove any vnodes in the vnode table belonging to mount point mp. 1429 * 1430 * If MNT_NOFORCE is specified, there should not be any active ones, 1431 * return error if any are found (nb: this is a user error, not a 1432 * system error). If MNT_FORCE is specified, detach any active vnodes 1433 * that are found. 1434 */ 1435 #ifdef DIAGNOSTIC 1436 static int busyprt = 0; /* print out busy vnodes */ 1437 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1438 #endif 1439 1440 int 1441 vflush(mp, skipvp, flags) 1442 struct mount *mp; 1443 struct vnode *skipvp; 1444 int flags; 1445 { 1446 struct proc *p = curproc; /* XXX */ 1447 struct vnode *vp, *nvp; 1448 int busy = 0; 1449 1450 simple_lock(&mntvnode_slock); 1451 loop: 1452 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1453 /* 1454 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1455 * Start over if it has (it won't be on the list anymore). 1456 */ 1457 if (vp->v_mount != mp) 1458 goto loop; 1459 nvp = vp->v_mntvnodes.le_next; 1460 /* 1461 * Skip over a selected vnode. 1462 */ 1463 if (vp == skipvp) 1464 continue; 1465 1466 simple_lock(&vp->v_interlock); 1467 /* 1468 * Skip over a vnodes marked VSYSTEM. 1469 */ 1470 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1471 simple_unlock(&vp->v_interlock); 1472 continue; 1473 } 1474 /* 1475 * If WRITECLOSE is set, only flush out regular file vnodes 1476 * open for writing. 1477 */ 1478 if ((flags & WRITECLOSE) && 1479 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1480 simple_unlock(&vp->v_interlock); 1481 continue; 1482 } 1483 1484 /* 1485 * With v_usecount == 0, all we need to do is clear out the 1486 * vnode data structures and we are done. 1487 */ 1488 if (vp->v_usecount == 0) { 1489 simple_unlock(&mntvnode_slock); 1490 vgonel(vp, p); 1491 simple_lock(&mntvnode_slock); 1492 continue; 1493 } 1494 1495 /* 1496 * If FORCECLOSE is set, forcibly close the vnode. For block 1497 * or character devices, revert to an anonymous device. For 1498 * all other files, just kill them. 1499 */ 1500 if (flags & FORCECLOSE) { 1501 simple_unlock(&mntvnode_slock); 1502 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1503 vgonel(vp, p); 1504 } else { 1505 vclean(vp, 0, p); 1506 vp->v_op = spec_vnodeop_p; 1507 insmntque(vp, (struct mount *) 0); 1508 } 1509 simple_lock(&mntvnode_slock); 1510 continue; 1511 } 1512 #ifdef DIAGNOSTIC 1513 if (busyprt) 1514 vprint("vflush: busy vnode", vp); 1515 #endif 1516 simple_unlock(&vp->v_interlock); 1517 busy++; 1518 } 1519 simple_unlock(&mntvnode_slock); 1520 if (busy) 1521 return (EBUSY); 1522 return (0); 1523 } 1524 1525 /* 1526 * Disassociate the underlying file system from a vnode. 1527 */ 1528 static void 1529 vclean(vp, flags, p) 1530 struct vnode *vp; 1531 int flags; 1532 struct proc *p; 1533 { 1534 int active; 1535 vm_object_t obj; 1536 1537 /* 1538 * Check to see if the vnode is in use. If so we have to reference it 1539 * before we clean it out so that its count cannot fall to zero and 1540 * generate a race against ourselves to recycle it. 1541 */ 1542 if ((active = vp->v_usecount)) 1543 vp->v_usecount++; 1544 1545 /* 1546 * Prevent the vnode from being recycled or brought into use while we 1547 * clean it out. 1548 */ 1549 if (vp->v_flag & VXLOCK) 1550 panic("vclean: deadlock"); 1551 vp->v_flag |= VXLOCK; 1552 /* 1553 * Even if the count is zero, the VOP_INACTIVE routine may still 1554 * have the object locked while it cleans it out. The VOP_LOCK 1555 * ensures that the VOP_INACTIVE routine is done with its work. 1556 * For active vnodes, it ensures that no other activity can 1557 * occur while the underlying object is being cleaned out. 1558 */ 1559 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1560 1561 /* 1562 * Clean out any buffers associated with the vnode. 1563 */ 1564 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1565 if (obj = vp->v_object) { 1566 if (obj->ref_count == 0) { 1567 /* 1568 * This is a normal way of shutting down the object/vnode 1569 * association. 1570 */ 1571 vm_object_terminate(obj); 1572 } else { 1573 /* 1574 * Woe to the process that tries to page now :-). 1575 */ 1576 vm_pager_deallocate(obj); 1577 } 1578 } 1579 1580 /* 1581 * If purging an active vnode, it must be closed and 1582 * deactivated before being reclaimed. Note that the 1583 * VOP_INACTIVE will unlock the vnode. 1584 */ 1585 if (active) { 1586 if (flags & DOCLOSE) 1587 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 1588 VOP_INACTIVE(vp, p); 1589 } else { 1590 /* 1591 * Any other processes trying to obtain this lock must first 1592 * wait for VXLOCK to clear, then call the new lock operation. 1593 */ 1594 VOP_UNLOCK(vp, 0, p); 1595 } 1596 /* 1597 * Reclaim the vnode. 1598 */ 1599 if (VOP_RECLAIM(vp, p)) 1600 panic("vclean: cannot reclaim"); 1601 1602 if (active) 1603 vrele(vp); 1604 1605 cache_purge(vp); 1606 if (vp->v_vnlock) { 1607 #if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1608 #ifdef DIAGNOSTIC 1609 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1610 vprint("vclean: lock not drained", vp); 1611 #endif 1612 #endif 1613 FREE(vp->v_vnlock, M_VNODE); 1614 vp->v_vnlock = NULL; 1615 } 1616 1617 if (VSHOULDFREE(vp)) 1618 vfree(vp); 1619 1620 /* 1621 * Done with purge, notify sleepers of the grim news. 1622 */ 1623 vp->v_op = dead_vnodeop_p; 1624 vn_pollgone(vp); 1625 vp->v_tag = VT_NON; 1626 vp->v_flag &= ~VXLOCK; 1627 if (vp->v_flag & VXWANT) { 1628 vp->v_flag &= ~VXWANT; 1629 wakeup((caddr_t) vp); 1630 } 1631 } 1632 1633 /* 1634 * Eliminate all activity associated with the requested vnode 1635 * and with all vnodes aliased to the requested vnode. 1636 */ 1637 int 1638 vop_revoke(ap) 1639 struct vop_revoke_args /* { 1640 struct vnode *a_vp; 1641 int a_flags; 1642 } */ *ap; 1643 { 1644 struct vnode *vp, *vq; 1645 struct proc *p = curproc; /* XXX */ 1646 1647 #ifdef DIAGNOSTIC 1648 if ((ap->a_flags & REVOKEALL) == 0) 1649 panic("vop_revoke"); 1650 #endif 1651 1652 vp = ap->a_vp; 1653 simple_lock(&vp->v_interlock); 1654 1655 if (vp->v_flag & VALIASED) { 1656 /* 1657 * If a vgone (or vclean) is already in progress, 1658 * wait until it is done and return. 1659 */ 1660 if (vp->v_flag & VXLOCK) { 1661 vp->v_flag |= VXWANT; 1662 simple_unlock(&vp->v_interlock); 1663 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1664 return (0); 1665 } 1666 /* 1667 * Ensure that vp will not be vgone'd while we 1668 * are eliminating its aliases. 1669 */ 1670 vp->v_flag |= VXLOCK; 1671 simple_unlock(&vp->v_interlock); 1672 while (vp->v_flag & VALIASED) { 1673 simple_lock(&spechash_slock); 1674 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1675 if (vq->v_rdev != vp->v_rdev || 1676 vq->v_type != vp->v_type || vp == vq) 1677 continue; 1678 simple_unlock(&spechash_slock); 1679 vgone(vq); 1680 break; 1681 } 1682 if (vq == NULLVP) { 1683 simple_unlock(&spechash_slock); 1684 } 1685 } 1686 /* 1687 * Remove the lock so that vgone below will 1688 * really eliminate the vnode after which time 1689 * vgone will awaken any sleepers. 1690 */ 1691 simple_lock(&vp->v_interlock); 1692 vp->v_flag &= ~VXLOCK; 1693 if (vp->v_flag & VXWANT) { 1694 vp->v_flag &= ~VXWANT; 1695 wakeup(vp); 1696 } 1697 } 1698 vgonel(vp, p); 1699 return (0); 1700 } 1701 1702 /* 1703 * Recycle an unused vnode to the front of the free list. 1704 * Release the passed interlock if the vnode will be recycled. 1705 */ 1706 int 1707 vrecycle(vp, inter_lkp, p) 1708 struct vnode *vp; 1709 struct simplelock *inter_lkp; 1710 struct proc *p; 1711 { 1712 1713 simple_lock(&vp->v_interlock); 1714 if (vp->v_usecount == 0) { 1715 if (inter_lkp) { 1716 simple_unlock(inter_lkp); 1717 } 1718 vgonel(vp, p); 1719 return (1); 1720 } 1721 simple_unlock(&vp->v_interlock); 1722 return (0); 1723 } 1724 1725 /* 1726 * Eliminate all activity associated with a vnode 1727 * in preparation for reuse. 1728 */ 1729 void 1730 vgone(vp) 1731 register struct vnode *vp; 1732 { 1733 struct proc *p = curproc; /* XXX */ 1734 1735 simple_lock(&vp->v_interlock); 1736 vgonel(vp, p); 1737 } 1738 1739 /* 1740 * vgone, with the vp interlock held. 1741 */ 1742 static void 1743 vgonel(vp, p) 1744 struct vnode *vp; 1745 struct proc *p; 1746 { 1747 int s; 1748 struct vnode *vq; 1749 struct vnode *vx; 1750 1751 /* 1752 * If a vgone (or vclean) is already in progress, 1753 * wait until it is done and return. 1754 */ 1755 if (vp->v_flag & VXLOCK) { 1756 vp->v_flag |= VXWANT; 1757 simple_unlock(&vp->v_interlock); 1758 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1759 return; 1760 } 1761 1762 /* 1763 * Clean out the filesystem specific data. 1764 */ 1765 vclean(vp, DOCLOSE, p); 1766 simple_lock(&vp->v_interlock); 1767 1768 /* 1769 * Delete from old mount point vnode list, if on one. 1770 */ 1771 if (vp->v_mount != NULL) 1772 insmntque(vp, (struct mount *)0); 1773 /* 1774 * If special device, remove it from special device alias list 1775 * if it is on one. 1776 */ 1777 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1778 simple_lock(&spechash_slock); 1779 if (*vp->v_hashchain == vp) { 1780 *vp->v_hashchain = vp->v_specnext; 1781 } else { 1782 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1783 if (vq->v_specnext != vp) 1784 continue; 1785 vq->v_specnext = vp->v_specnext; 1786 break; 1787 } 1788 if (vq == NULL) 1789 panic("missing bdev"); 1790 } 1791 if (vp->v_flag & VALIASED) { 1792 vx = NULL; 1793 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1794 if (vq->v_rdev != vp->v_rdev || 1795 vq->v_type != vp->v_type) 1796 continue; 1797 if (vx) 1798 break; 1799 vx = vq; 1800 } 1801 if (vx == NULL) 1802 panic("missing alias"); 1803 if (vq == NULL) 1804 vx->v_flag &= ~VALIASED; 1805 vp->v_flag &= ~VALIASED; 1806 } 1807 simple_unlock(&spechash_slock); 1808 FREE(vp->v_specinfo, M_VNODE); 1809 vp->v_specinfo = NULL; 1810 } 1811 1812 /* 1813 * If it is on the freelist and not already at the head, 1814 * move it to the head of the list. The test of the back 1815 * pointer and the reference count of zero is because 1816 * it will be removed from the free list by getnewvnode, 1817 * but will not have its reference count incremented until 1818 * after calling vgone. If the reference count were 1819 * incremented first, vgone would (incorrectly) try to 1820 * close the previous instance of the underlying object. 1821 */ 1822 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1823 s = splbio(); 1824 simple_lock(&vnode_free_list_slock); 1825 if (vp->v_flag & VFREE) { 1826 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1827 } else if (vp->v_flag & VTBFREE) { 1828 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1829 vp->v_flag &= ~VTBFREE; 1830 freevnodes++; 1831 } else 1832 freevnodes++; 1833 vp->v_flag |= VFREE; 1834 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1835 simple_unlock(&vnode_free_list_slock); 1836 splx(s); 1837 } 1838 1839 vp->v_type = VBAD; 1840 simple_unlock(&vp->v_interlock); 1841 } 1842 1843 /* 1844 * Lookup a vnode by device number. 1845 */ 1846 int 1847 vfinddev(dev, type, vpp) 1848 dev_t dev; 1849 enum vtype type; 1850 struct vnode **vpp; 1851 { 1852 register struct vnode *vp; 1853 int rc = 0; 1854 1855 simple_lock(&spechash_slock); 1856 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1857 if (dev != vp->v_rdev || type != vp->v_type) 1858 continue; 1859 *vpp = vp; 1860 rc = 1; 1861 break; 1862 } 1863 simple_unlock(&spechash_slock); 1864 return (rc); 1865 } 1866 1867 /* 1868 * Calculate the total number of references to a special device. 1869 */ 1870 int 1871 vcount(vp) 1872 register struct vnode *vp; 1873 { 1874 struct vnode *vq, *vnext; 1875 int count; 1876 1877 loop: 1878 if ((vp->v_flag & VALIASED) == 0) 1879 return (vp->v_usecount); 1880 simple_lock(&spechash_slock); 1881 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1882 vnext = vq->v_specnext; 1883 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1884 continue; 1885 /* 1886 * Alias, but not in use, so flush it out. 1887 */ 1888 if (vq->v_usecount == 0 && vq != vp) { 1889 simple_unlock(&spechash_slock); 1890 vgone(vq); 1891 goto loop; 1892 } 1893 count += vq->v_usecount; 1894 } 1895 simple_unlock(&spechash_slock); 1896 return (count); 1897 } 1898 /* 1899 * Print out a description of a vnode. 1900 */ 1901 static char *typename[] = 1902 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1903 1904 void 1905 vprint(label, vp) 1906 char *label; 1907 register struct vnode *vp; 1908 { 1909 char buf[64]; 1910 1911 if (label != NULL) 1912 printf("%s: %p: ", label, (void *)vp); 1913 else 1914 printf("%p: ", (void *)vp); 1915 printf("type %s, usecount %d, writecount %d, refcount %d,", 1916 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1917 vp->v_holdcnt); 1918 buf[0] = '\0'; 1919 if (vp->v_flag & VROOT) 1920 strcat(buf, "|VROOT"); 1921 if (vp->v_flag & VTEXT) 1922 strcat(buf, "|VTEXT"); 1923 if (vp->v_flag & VSYSTEM) 1924 strcat(buf, "|VSYSTEM"); 1925 if (vp->v_flag & VXLOCK) 1926 strcat(buf, "|VXLOCK"); 1927 if (vp->v_flag & VXWANT) 1928 strcat(buf, "|VXWANT"); 1929 if (vp->v_flag & VBWAIT) 1930 strcat(buf, "|VBWAIT"); 1931 if (vp->v_flag & VALIASED) 1932 strcat(buf, "|VALIASED"); 1933 if (vp->v_flag & VDOOMED) 1934 strcat(buf, "|VDOOMED"); 1935 if (vp->v_flag & VFREE) 1936 strcat(buf, "|VFREE"); 1937 if (vp->v_flag & VOBJBUF) 1938 strcat(buf, "|VOBJBUF"); 1939 if (buf[0] != '\0') 1940 printf(" flags (%s)", &buf[1]); 1941 if (vp->v_data == NULL) { 1942 printf("\n"); 1943 } else { 1944 printf("\n\t"); 1945 VOP_PRINT(vp); 1946 } 1947 } 1948 1949 #ifdef DDB 1950 /* 1951 * List all of the locked vnodes in the system. 1952 * Called when debugging the kernel. 1953 */ 1954 static void 1955 printlockedvnodes() 1956 { 1957 struct proc *p = curproc; /* XXX */ 1958 struct mount *mp, *nmp; 1959 struct vnode *vp; 1960 1961 printf("Locked vnodes\n"); 1962 simple_lock(&mountlist_slock); 1963 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1964 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1965 nmp = mp->mnt_list.cqe_next; 1966 continue; 1967 } 1968 for (vp = mp->mnt_vnodelist.lh_first; 1969 vp != NULL; 1970 vp = vp->v_mntvnodes.le_next) { 1971 if (VOP_ISLOCKED(vp)) 1972 vprint((char *)0, vp); 1973 } 1974 simple_lock(&mountlist_slock); 1975 nmp = mp->mnt_list.cqe_next; 1976 vfs_unbusy(mp, p); 1977 } 1978 simple_unlock(&mountlist_slock); 1979 } 1980 #endif 1981 1982 /* 1983 * Top level filesystem related information gathering. 1984 */ 1985 static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1986 1987 static int 1988 vfs_sysctl SYSCTL_HANDLER_ARGS 1989 { 1990 int *name = (int *)arg1 - 1; /* XXX */ 1991 u_int namelen = arg2 + 1; /* XXX */ 1992 struct vfsconf *vfsp; 1993 1994 #if 1 || defined(COMPAT_PRELITE2) 1995 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1996 if (namelen == 1) 1997 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1998 #endif 1999 2000 #ifdef notyet 2001 /* all sysctl names at this level are at least name and field */ 2002 if (namelen < 2) 2003 return (ENOTDIR); /* overloaded */ 2004 if (name[0] != VFS_GENERIC) { 2005 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2006 if (vfsp->vfc_typenum == name[0]) 2007 break; 2008 if (vfsp == NULL) 2009 return (EOPNOTSUPP); 2010 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2011 oldp, oldlenp, newp, newlen, p)); 2012 } 2013 #endif 2014 switch (name[1]) { 2015 case VFS_MAXTYPENUM: 2016 if (namelen != 2) 2017 return (ENOTDIR); 2018 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2019 case VFS_CONF: 2020 if (namelen != 3) 2021 return (ENOTDIR); /* overloaded */ 2022 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2023 if (vfsp->vfc_typenum == name[2]) 2024 break; 2025 if (vfsp == NULL) 2026 return (EOPNOTSUPP); 2027 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2028 } 2029 return (EOPNOTSUPP); 2030 } 2031 2032 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2033 "Generic filesystem"); 2034 2035 #if 1 || defined(COMPAT_PRELITE2) 2036 2037 static int 2038 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2039 { 2040 int error; 2041 struct vfsconf *vfsp; 2042 struct ovfsconf ovfs; 2043 2044 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2045 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2046 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2047 ovfs.vfc_index = vfsp->vfc_typenum; 2048 ovfs.vfc_refcount = vfsp->vfc_refcount; 2049 ovfs.vfc_flags = vfsp->vfc_flags; 2050 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2051 if (error) 2052 return error; 2053 } 2054 return 0; 2055 } 2056 2057 #endif /* 1 || COMPAT_PRELITE2 */ 2058 2059 static volatile int kinfo_vdebug = 1; 2060 2061 #if 0 2062 #define KINFO_VNODESLOP 10 2063 /* 2064 * Dump vnode list (via sysctl). 2065 * Copyout address of vnode followed by vnode. 2066 */ 2067 /* ARGSUSED */ 2068 static int 2069 sysctl_vnode SYSCTL_HANDLER_ARGS 2070 { 2071 struct proc *p = curproc; /* XXX */ 2072 struct mount *mp, *nmp; 2073 struct vnode *nvp, *vp; 2074 int error; 2075 2076 #define VPTRSZ sizeof (struct vnode *) 2077 #define VNODESZ sizeof (struct vnode) 2078 2079 req->lock = 0; 2080 if (!req->oldptr) /* Make an estimate */ 2081 return (SYSCTL_OUT(req, 0, 2082 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2083 2084 simple_lock(&mountlist_slock); 2085 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2086 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2087 nmp = mp->mnt_list.cqe_next; 2088 continue; 2089 } 2090 again: 2091 simple_lock(&mntvnode_slock); 2092 for (vp = mp->mnt_vnodelist.lh_first; 2093 vp != NULL; 2094 vp = nvp) { 2095 /* 2096 * Check that the vp is still associated with 2097 * this filesystem. RACE: could have been 2098 * recycled onto the same filesystem. 2099 */ 2100 if (vp->v_mount != mp) { 2101 simple_unlock(&mntvnode_slock); 2102 if (kinfo_vdebug) 2103 printf("kinfo: vp changed\n"); 2104 goto again; 2105 } 2106 nvp = vp->v_mntvnodes.le_next; 2107 simple_unlock(&mntvnode_slock); 2108 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2109 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2110 return (error); 2111 simple_lock(&mntvnode_slock); 2112 } 2113 simple_unlock(&mntvnode_slock); 2114 simple_lock(&mountlist_slock); 2115 nmp = mp->mnt_list.cqe_next; 2116 vfs_unbusy(mp, p); 2117 } 2118 simple_unlock(&mountlist_slock); 2119 2120 return (0); 2121 } 2122 #endif 2123 2124 /* 2125 * XXX 2126 * Exporting the vnode list on large systems causes them to crash. 2127 * Exporting the vnode list on medium systems causes sysctl to coredump. 2128 */ 2129 #if 0 2130 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2131 0, 0, sysctl_vnode, "S,vnode", ""); 2132 #endif 2133 2134 /* 2135 * Check to see if a filesystem is mounted on a block device. 2136 */ 2137 int 2138 vfs_mountedon(vp) 2139 struct vnode *vp; 2140 { 2141 struct vnode *vq; 2142 int error = 0; 2143 2144 if (vp->v_specmountpoint != NULL) 2145 return (EBUSY); 2146 if (vp->v_flag & VALIASED) { 2147 simple_lock(&spechash_slock); 2148 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2149 if (vq->v_rdev != vp->v_rdev || 2150 vq->v_type != vp->v_type) 2151 continue; 2152 if (vq->v_specmountpoint != NULL) { 2153 error = EBUSY; 2154 break; 2155 } 2156 } 2157 simple_unlock(&spechash_slock); 2158 } 2159 return (error); 2160 } 2161 2162 /* 2163 * Unmount all filesystems. The list is traversed in reverse order 2164 * of mounting to avoid dependencies. 2165 */ 2166 void 2167 vfs_unmountall() 2168 { 2169 struct mount *mp, *nmp; 2170 struct proc *p; 2171 int error; 2172 2173 if (curproc != NULL) 2174 p = curproc; 2175 else 2176 p = initproc; /* XXX XXX should this be proc0? */ 2177 /* 2178 * Since this only runs when rebooting, it is not interlocked. 2179 */ 2180 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2181 nmp = mp->mnt_list.cqe_prev; 2182 error = dounmount(mp, MNT_FORCE, p); 2183 if (error) { 2184 printf("unmount of %s failed (", 2185 mp->mnt_stat.f_mntonname); 2186 if (error == EBUSY) 2187 printf("BUSY)\n"); 2188 else 2189 printf("%d)\n", error); 2190 } 2191 } 2192 } 2193 2194 /* 2195 * Build hash lists of net addresses and hang them off the mount point. 2196 * Called by ufs_mount() to set up the lists of export addresses. 2197 */ 2198 static int 2199 vfs_hang_addrlist(mp, nep, argp) 2200 struct mount *mp; 2201 struct netexport *nep; 2202 struct export_args *argp; 2203 { 2204 register struct netcred *np; 2205 register struct radix_node_head *rnh; 2206 register int i; 2207 struct radix_node *rn; 2208 struct sockaddr *saddr, *smask = 0; 2209 struct domain *dom; 2210 int error; 2211 2212 if (argp->ex_addrlen == 0) { 2213 if (mp->mnt_flag & MNT_DEFEXPORTED) 2214 return (EPERM); 2215 np = &nep->ne_defexported; 2216 np->netc_exflags = argp->ex_flags; 2217 np->netc_anon = argp->ex_anon; 2218 np->netc_anon.cr_ref = 1; 2219 mp->mnt_flag |= MNT_DEFEXPORTED; 2220 return (0); 2221 } 2222 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2223 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2224 bzero((caddr_t) np, i); 2225 saddr = (struct sockaddr *) (np + 1); 2226 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2227 goto out; 2228 if (saddr->sa_len > argp->ex_addrlen) 2229 saddr->sa_len = argp->ex_addrlen; 2230 if (argp->ex_masklen) { 2231 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2232 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2233 if (error) 2234 goto out; 2235 if (smask->sa_len > argp->ex_masklen) 2236 smask->sa_len = argp->ex_masklen; 2237 } 2238 i = saddr->sa_family; 2239 if ((rnh = nep->ne_rtable[i]) == 0) { 2240 /* 2241 * Seems silly to initialize every AF when most are not used, 2242 * do so on demand here 2243 */ 2244 for (dom = domains; dom; dom = dom->dom_next) 2245 if (dom->dom_family == i && dom->dom_rtattach) { 2246 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2247 dom->dom_rtoffset); 2248 break; 2249 } 2250 if ((rnh = nep->ne_rtable[i]) == 0) { 2251 error = ENOBUFS; 2252 goto out; 2253 } 2254 } 2255 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2256 np->netc_rnodes); 2257 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2258 error = EPERM; 2259 goto out; 2260 } 2261 np->netc_exflags = argp->ex_flags; 2262 np->netc_anon = argp->ex_anon; 2263 np->netc_anon.cr_ref = 1; 2264 return (0); 2265 out: 2266 free(np, M_NETADDR); 2267 return (error); 2268 } 2269 2270 /* ARGSUSED */ 2271 static int 2272 vfs_free_netcred(rn, w) 2273 struct radix_node *rn; 2274 void *w; 2275 { 2276 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2277 2278 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2279 free((caddr_t) rn, M_NETADDR); 2280 return (0); 2281 } 2282 2283 /* 2284 * Free the net address hash lists that are hanging off the mount points. 2285 */ 2286 static void 2287 vfs_free_addrlist(nep) 2288 struct netexport *nep; 2289 { 2290 register int i; 2291 register struct radix_node_head *rnh; 2292 2293 for (i = 0; i <= AF_MAX; i++) 2294 if ((rnh = nep->ne_rtable[i])) { 2295 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2296 (caddr_t) rnh); 2297 free((caddr_t) rnh, M_RTABLE); 2298 nep->ne_rtable[i] = 0; 2299 } 2300 } 2301 2302 int 2303 vfs_export(mp, nep, argp) 2304 struct mount *mp; 2305 struct netexport *nep; 2306 struct export_args *argp; 2307 { 2308 int error; 2309 2310 if (argp->ex_flags & MNT_DELEXPORT) { 2311 if (mp->mnt_flag & MNT_EXPUBLIC) { 2312 vfs_setpublicfs(NULL, NULL, NULL); 2313 mp->mnt_flag &= ~MNT_EXPUBLIC; 2314 } 2315 vfs_free_addrlist(nep); 2316 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2317 } 2318 if (argp->ex_flags & MNT_EXPORTED) { 2319 if (argp->ex_flags & MNT_EXPUBLIC) { 2320 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2321 return (error); 2322 mp->mnt_flag |= MNT_EXPUBLIC; 2323 } 2324 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2325 return (error); 2326 mp->mnt_flag |= MNT_EXPORTED; 2327 } 2328 return (0); 2329 } 2330 2331 2332 /* 2333 * Set the publicly exported filesystem (WebNFS). Currently, only 2334 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2335 */ 2336 int 2337 vfs_setpublicfs(mp, nep, argp) 2338 struct mount *mp; 2339 struct netexport *nep; 2340 struct export_args *argp; 2341 { 2342 int error; 2343 struct vnode *rvp; 2344 char *cp; 2345 2346 /* 2347 * mp == NULL -> invalidate the current info, the FS is 2348 * no longer exported. May be called from either vfs_export 2349 * or unmount, so check if it hasn't already been done. 2350 */ 2351 if (mp == NULL) { 2352 if (nfs_pub.np_valid) { 2353 nfs_pub.np_valid = 0; 2354 if (nfs_pub.np_index != NULL) { 2355 FREE(nfs_pub.np_index, M_TEMP); 2356 nfs_pub.np_index = NULL; 2357 } 2358 } 2359 return (0); 2360 } 2361 2362 /* 2363 * Only one allowed at a time. 2364 */ 2365 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2366 return (EBUSY); 2367 2368 /* 2369 * Get real filehandle for root of exported FS. 2370 */ 2371 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2372 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2373 2374 if ((error = VFS_ROOT(mp, &rvp))) 2375 return (error); 2376 2377 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2378 return (error); 2379 2380 vput(rvp); 2381 2382 /* 2383 * If an indexfile was specified, pull it in. 2384 */ 2385 if (argp->ex_indexfile != NULL) { 2386 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2387 M_WAITOK); 2388 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2389 MAXNAMLEN, (size_t *)0); 2390 if (!error) { 2391 /* 2392 * Check for illegal filenames. 2393 */ 2394 for (cp = nfs_pub.np_index; *cp; cp++) { 2395 if (*cp == '/') { 2396 error = EINVAL; 2397 break; 2398 } 2399 } 2400 } 2401 if (error) { 2402 FREE(nfs_pub.np_index, M_TEMP); 2403 return (error); 2404 } 2405 } 2406 2407 nfs_pub.np_mount = mp; 2408 nfs_pub.np_valid = 1; 2409 return (0); 2410 } 2411 2412 struct netcred * 2413 vfs_export_lookup(mp, nep, nam) 2414 register struct mount *mp; 2415 struct netexport *nep; 2416 struct sockaddr *nam; 2417 { 2418 register struct netcred *np; 2419 register struct radix_node_head *rnh; 2420 struct sockaddr *saddr; 2421 2422 np = NULL; 2423 if (mp->mnt_flag & MNT_EXPORTED) { 2424 /* 2425 * Lookup in the export list first. 2426 */ 2427 if (nam != NULL) { 2428 saddr = nam; 2429 rnh = nep->ne_rtable[saddr->sa_family]; 2430 if (rnh != NULL) { 2431 np = (struct netcred *) 2432 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2433 rnh); 2434 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2435 np = NULL; 2436 } 2437 } 2438 /* 2439 * If no address match, use the default if it exists. 2440 */ 2441 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2442 np = &nep->ne_defexported; 2443 } 2444 return (np); 2445 } 2446 2447 /* 2448 * perform msync on all vnodes under a mount point 2449 * the mount point must be locked. 2450 */ 2451 void 2452 vfs_msync(struct mount *mp, int flags) { 2453 struct vnode *vp, *nvp; 2454 struct vm_object *obj; 2455 int anyio, tries; 2456 2457 tries = 5; 2458 loop: 2459 anyio = 0; 2460 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2461 2462 nvp = vp->v_mntvnodes.le_next; 2463 2464 if (vp->v_mount != mp) { 2465 goto loop; 2466 } 2467 2468 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2469 continue; 2470 2471 if (flags != MNT_WAIT) { 2472 obj = vp->v_object; 2473 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2474 continue; 2475 if (VOP_ISLOCKED(vp)) 2476 continue; 2477 } 2478 2479 simple_lock(&vp->v_interlock); 2480 if (vp->v_object && 2481 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2482 if (!vget(vp, 2483 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2484 if (vp->v_object) { 2485 vm_object_page_clean(vp->v_object, 0, 0, TRUE); 2486 anyio = 1; 2487 } 2488 vput(vp); 2489 } 2490 } else { 2491 simple_unlock(&vp->v_interlock); 2492 } 2493 } 2494 if (anyio && (--tries > 0)) 2495 goto loop; 2496 } 2497 2498 /* 2499 * Create the VM object needed for VMIO and mmap support. This 2500 * is done for all VREG files in the system. Some filesystems might 2501 * afford the additional metadata buffering capability of the 2502 * VMIO code by making the device node be VMIO mode also. 2503 * 2504 * If !waslocked, must be called with interlock. 2505 */ 2506 int 2507 vfs_object_create(vp, p, cred, waslocked) 2508 struct vnode *vp; 2509 struct proc *p; 2510 struct ucred *cred; 2511 int waslocked; 2512 { 2513 struct vattr vat; 2514 vm_object_t object; 2515 int error = 0; 2516 2517 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) { 2518 if (!waslocked) 2519 simple_unlock(&vp->v_interlock); 2520 return 0; 2521 } 2522 2523 if (!waslocked) 2524 vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p); 2525 2526 retry: 2527 if ((object = vp->v_object) == NULL) { 2528 if (vp->v_type == VREG) { 2529 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2530 goto retn; 2531 object = vnode_pager_alloc(vp, 2532 OFF_TO_IDX(round_page(vat.va_size)), 0, 0); 2533 } else if (major(vp->v_rdev) < nblkdev) { 2534 /* 2535 * This simply allocates the biggest object possible 2536 * for a VBLK vnode. This should be fixed, but doesn't 2537 * cause any problems (yet). 2538 */ 2539 object = vnode_pager_alloc(vp, INT_MAX, 0, 0); 2540 } 2541 object->ref_count--; 2542 vp->v_usecount--; 2543 } else { 2544 if (object->flags & OBJ_DEAD) { 2545 VOP_UNLOCK(vp, 0, p); 2546 tsleep(object, PVM, "vodead", 0); 2547 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2548 goto retry; 2549 } 2550 } 2551 2552 if (vp->v_object) { 2553 vp->v_flag |= VOBJBUF; 2554 } 2555 2556 retn: 2557 if (!waslocked) { 2558 simple_lock(&vp->v_interlock); 2559 VOP_UNLOCK(vp, LK_INTERLOCK, p); 2560 } 2561 2562 return error; 2563 } 2564 2565 static void 2566 vfree(vp) 2567 struct vnode *vp; 2568 { 2569 int s; 2570 2571 s = splbio(); 2572 simple_lock(&vnode_free_list_slock); 2573 if (vp->v_flag & VTBFREE) { 2574 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2575 vp->v_flag &= ~VTBFREE; 2576 } 2577 if (vp->v_flag & VAGE) { 2578 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2579 } else { 2580 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2581 } 2582 freevnodes++; 2583 simple_unlock(&vnode_free_list_slock); 2584 vp->v_flag &= ~VAGE; 2585 vp->v_flag |= VFREE; 2586 splx(s); 2587 } 2588 2589 void 2590 vbusy(vp) 2591 struct vnode *vp; 2592 { 2593 int s; 2594 2595 s = splbio(); 2596 simple_lock(&vnode_free_list_slock); 2597 if (vp->v_flag & VTBFREE) { 2598 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2599 vp->v_flag &= ~VTBFREE; 2600 } else { 2601 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2602 freevnodes--; 2603 } 2604 simple_unlock(&vnode_free_list_slock); 2605 vp->v_flag &= ~(VFREE|VAGE); 2606 splx(s); 2607 } 2608 2609 /* 2610 * Record a process's interest in events which might happen to 2611 * a vnode. Because poll uses the historic select-style interface 2612 * internally, this routine serves as both the ``check for any 2613 * pending events'' and the ``record my interest in future events'' 2614 * functions. (These are done together, while the lock is held, 2615 * to avoid race conditions.) 2616 */ 2617 int 2618 vn_pollrecord(vp, p, events) 2619 struct vnode *vp; 2620 struct proc *p; 2621 short events; 2622 { 2623 simple_lock(&vp->v_pollinfo.vpi_lock); 2624 if (vp->v_pollinfo.vpi_revents & events) { 2625 /* 2626 * This leaves events we are not interested 2627 * in available for the other process which 2628 * which presumably had requested them 2629 * (otherwise they would never have been 2630 * recorded). 2631 */ 2632 events &= vp->v_pollinfo.vpi_revents; 2633 vp->v_pollinfo.vpi_revents &= ~events; 2634 2635 simple_unlock(&vp->v_pollinfo.vpi_lock); 2636 return events; 2637 } 2638 vp->v_pollinfo.vpi_events |= events; 2639 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2640 simple_unlock(&vp->v_pollinfo.vpi_lock); 2641 return 0; 2642 } 2643 2644 /* 2645 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2646 * it is possible for us to miss an event due to race conditions, but 2647 * that condition is expected to be rare, so for the moment it is the 2648 * preferred interface. 2649 */ 2650 void 2651 vn_pollevent(vp, events) 2652 struct vnode *vp; 2653 short events; 2654 { 2655 simple_lock(&vp->v_pollinfo.vpi_lock); 2656 if (vp->v_pollinfo.vpi_events & events) { 2657 /* 2658 * We clear vpi_events so that we don't 2659 * call selwakeup() twice if two events are 2660 * posted before the polling process(es) is 2661 * awakened. This also ensures that we take at 2662 * most one selwakeup() if the polling process 2663 * is no longer interested. However, it does 2664 * mean that only one event can be noticed at 2665 * a time. (Perhaps we should only clear those 2666 * event bits which we note?) XXX 2667 */ 2668 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2669 vp->v_pollinfo.vpi_revents |= events; 2670 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2671 } 2672 simple_unlock(&vp->v_pollinfo.vpi_lock); 2673 } 2674 2675 /* 2676 * Wake up anyone polling on vp because it is being revoked. 2677 * This depends on dead_poll() returning POLLHUP for correct 2678 * behavior. 2679 */ 2680 void 2681 vn_pollgone(vp) 2682 struct vnode *vp; 2683 { 2684 simple_lock(&vp->v_pollinfo.vpi_lock); 2685 if (vp->v_pollinfo.vpi_events) { 2686 vp->v_pollinfo.vpi_events = 0; 2687 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2688 } 2689 simple_unlock(&vp->v_pollinfo.vpi_lock); 2690 } 2691 2692 2693 2694 /* 2695 * Routine to create and manage a filesystem syncer vnode. 2696 */ 2697 #define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2698 int sync_fsync __P((struct vop_fsync_args *)); 2699 int sync_inactive __P((struct vop_inactive_args *)); 2700 int sync_reclaim __P((struct vop_reclaim_args *)); 2701 #define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2702 #define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2703 int sync_print __P((struct vop_print_args *)); 2704 #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2705 2706 vop_t **sync_vnodeop_p; 2707 struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2708 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2709 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2710 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2711 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2712 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2713 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2714 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2715 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2716 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2717 { NULL, NULL } 2718 }; 2719 struct vnodeopv_desc sync_vnodeop_opv_desc = 2720 { &sync_vnodeop_p, sync_vnodeop_entries }; 2721 2722 VNODEOP_SET(sync_vnodeop_opv_desc); 2723 2724 /* 2725 * Create a new filesystem syncer vnode for the specified mount point. 2726 */ 2727 int 2728 vfs_allocate_syncvnode(mp) 2729 struct mount *mp; 2730 { 2731 struct vnode *vp; 2732 static long start, incr, next; 2733 int error; 2734 2735 /* Allocate a new vnode */ 2736 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2737 mp->mnt_syncer = NULL; 2738 return (error); 2739 } 2740 vp->v_type = VNON; 2741 /* 2742 * Place the vnode onto the syncer worklist. We attempt to 2743 * scatter them about on the list so that they will go off 2744 * at evenly distributed times even if all the filesystems 2745 * are mounted at once. 2746 */ 2747 next += incr; 2748 if (next == 0 || next > syncer_maxdelay) { 2749 start /= 2; 2750 incr /= 2; 2751 if (start == 0) { 2752 start = syncer_maxdelay / 2; 2753 incr = syncer_maxdelay; 2754 } 2755 next = start; 2756 } 2757 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2758 mp->mnt_syncer = vp; 2759 return (0); 2760 } 2761 2762 /* 2763 * Do a lazy sync of the filesystem. 2764 */ 2765 int 2766 sync_fsync(ap) 2767 struct vop_fsync_args /* { 2768 struct vnode *a_vp; 2769 struct ucred *a_cred; 2770 int a_waitfor; 2771 struct proc *a_p; 2772 } */ *ap; 2773 { 2774 struct vnode *syncvp = ap->a_vp; 2775 struct mount *mp = syncvp->v_mount; 2776 struct proc *p = ap->a_p; 2777 int asyncflag; 2778 2779 /* 2780 * We only need to do something if this is a lazy evaluation. 2781 */ 2782 if (ap->a_waitfor != MNT_LAZY) 2783 return (0); 2784 2785 /* 2786 * Move ourselves to the back of the sync list. 2787 */ 2788 vn_syncer_add_to_worklist(syncvp, syncdelay); 2789 2790 /* 2791 * Walk the list of vnodes pushing all that are dirty and 2792 * not already on the sync list. 2793 */ 2794 simple_lock(&mountlist_slock); 2795 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2796 simple_unlock(&mountlist_slock); 2797 return (0); 2798 } 2799 asyncflag = mp->mnt_flag & MNT_ASYNC; 2800 mp->mnt_flag &= ~MNT_ASYNC; 2801 vfs_msync(mp, MNT_NOWAIT); 2802 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2803 if (asyncflag) 2804 mp->mnt_flag |= MNT_ASYNC; 2805 vfs_unbusy(mp, p); 2806 return (0); 2807 } 2808 2809 /* 2810 * The syncer vnode is no referenced. 2811 */ 2812 int 2813 sync_inactive(ap) 2814 struct vop_inactive_args /* { 2815 struct vnode *a_vp; 2816 struct proc *a_p; 2817 } */ *ap; 2818 { 2819 2820 vgone(ap->a_vp); 2821 return (0); 2822 } 2823 2824 /* 2825 * The syncer vnode is no longer needed and is being decommissioned. 2826 */ 2827 int 2828 sync_reclaim(ap) 2829 struct vop_reclaim_args /* { 2830 struct vnode *a_vp; 2831 } */ *ap; 2832 { 2833 struct vnode *vp = ap->a_vp; 2834 2835 vp->v_mount->mnt_syncer = NULL; 2836 if (vp->v_flag & VONWORKLST) { 2837 LIST_REMOVE(vp, v_synclist); 2838 vp->v_flag &= ~VONWORKLST; 2839 } 2840 2841 return (0); 2842 } 2843 2844 /* 2845 * Print out a syncer vnode. 2846 */ 2847 int 2848 sync_print(ap) 2849 struct vop_print_args /* { 2850 struct vnode *a_vp; 2851 } */ *ap; 2852 { 2853 struct vnode *vp = ap->a_vp; 2854 2855 printf("syncer vnode"); 2856 if (vp->v_vnlock != NULL) 2857 lockmgr_printinfo(vp->v_vnlock); 2858 printf("\n"); 2859 return (0); 2860 } 2861