1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id: vfs_subr.c,v 1.172 1998/10/31 07:42:03 peter Exp $ 40 */ 41 42 /* 43 * External virtual filesystem routines 44 */ 45 #include "opt_ddb.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/conf.h> 50 #include <sys/kernel.h> 51 #include <sys/proc.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/socket.h> 55 #include <sys/vnode.h> 56 #include <sys/stat.h> 57 #include <sys/buf.h> 58 #include <sys/domain.h> 59 #include <sys/dirent.h> 60 #include <sys/vmmeter.h> 61 62 #include <machine/limits.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_object.h> 66 #include <vm/vm_extern.h> 67 #include <vm/pmap.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_pager.h> 70 #include <vm/vnode_pager.h> 71 #include <vm/vm_zone.h> 72 #include <sys/sysctl.h> 73 74 #include <miscfs/specfs/specdev.h> 75 76 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 77 78 static void insmntque __P((struct vnode *vp, struct mount *mp)); 79 static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 80 static void vfree __P((struct vnode *)); 81 static void vgonel __P((struct vnode *vp, struct proc *p)); 82 static unsigned long numvnodes; 83 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 84 85 enum vtype iftovt_tab[16] = { 86 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 87 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 88 }; 89 int vttoif_tab[9] = { 90 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 91 S_IFSOCK, S_IFIFO, S_IFMT, 92 }; 93 94 static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 95 struct tobefreelist vnode_tobefree_list; /* vnode free list */ 96 97 static u_long wantfreevnodes = 25; 98 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 99 static u_long freevnodes = 0; 100 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 101 102 int vfs_ioopt = 0; 103 #ifdef ENABLE_VFS_IOOPT 104 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 105 #endif 106 107 struct mntlist mountlist; /* mounted filesystem list */ 108 struct simplelock mountlist_slock; 109 static struct simplelock mntid_slock; 110 struct simplelock mntvnode_slock; 111 int nfs_mount_type = -1; 112 static struct simplelock vnode_free_list_slock; 113 static struct simplelock spechash_slock; 114 struct nfs_public nfs_pub; /* publicly exported FS */ 115 static vm_zone_t vnode_zone; 116 117 /* 118 * The workitem queue. 119 */ 120 #define SYNCER_MAXDELAY 32 121 int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 122 time_t syncdelay = 30; 123 int rushjob; /* number of slots to run ASAP */ 124 125 static int syncer_delayno = 0; 126 static long syncer_mask; 127 LIST_HEAD(synclist, vnode); 128 static struct synclist *syncer_workitem_pending; 129 130 int desiredvnodes; 131 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 132 133 static void vfs_free_addrlist __P((struct netexport *nep)); 134 static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 135 static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 136 struct export_args *argp)); 137 138 /* 139 * Initialize the vnode management data structures. 140 */ 141 void 142 vntblinit() 143 { 144 145 desiredvnodes = maxproc + cnt.v_page_count / 4; 146 simple_lock_init(&mntvnode_slock); 147 simple_lock_init(&mntid_slock); 148 simple_lock_init(&spechash_slock); 149 TAILQ_INIT(&vnode_free_list); 150 TAILQ_INIT(&vnode_tobefree_list); 151 simple_lock_init(&vnode_free_list_slock); 152 CIRCLEQ_INIT(&mountlist); 153 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 154 /* 155 * Initialize the filesystem syncer. 156 */ 157 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 158 &syncer_mask); 159 syncer_maxdelay = syncer_mask + 1; 160 } 161 162 /* 163 * Mark a mount point as busy. Used to synchronize access and to delay 164 * unmounting. Interlock is not released on failure. 165 */ 166 int 167 vfs_busy(mp, flags, interlkp, p) 168 struct mount *mp; 169 int flags; 170 struct simplelock *interlkp; 171 struct proc *p; 172 { 173 int lkflags; 174 175 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 176 if (flags & LK_NOWAIT) 177 return (ENOENT); 178 mp->mnt_kern_flag |= MNTK_MWAIT; 179 if (interlkp) { 180 simple_unlock(interlkp); 181 } 182 /* 183 * Since all busy locks are shared except the exclusive 184 * lock granted when unmounting, the only place that a 185 * wakeup needs to be done is at the release of the 186 * exclusive lock at the end of dounmount. 187 */ 188 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 189 if (interlkp) { 190 simple_lock(interlkp); 191 } 192 return (ENOENT); 193 } 194 lkflags = LK_SHARED | LK_NOPAUSE; 195 if (interlkp) 196 lkflags |= LK_INTERLOCK; 197 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 198 panic("vfs_busy: unexpected lock failure"); 199 return (0); 200 } 201 202 /* 203 * Free a busy filesystem. 204 */ 205 void 206 vfs_unbusy(mp, p) 207 struct mount *mp; 208 struct proc *p; 209 { 210 211 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 212 } 213 214 /* 215 * Lookup a filesystem type, and if found allocate and initialize 216 * a mount structure for it. 217 * 218 * Devname is usually updated by mount(8) after booting. 219 */ 220 int 221 vfs_rootmountalloc(fstypename, devname, mpp) 222 char *fstypename; 223 char *devname; 224 struct mount **mpp; 225 { 226 struct proc *p = curproc; /* XXX */ 227 struct vfsconf *vfsp; 228 struct mount *mp; 229 230 if (fstypename == NULL) 231 return (ENODEV); 232 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 233 if (!strcmp(vfsp->vfc_name, fstypename)) 234 break; 235 if (vfsp == NULL) 236 return (ENODEV); 237 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 238 bzero((char *)mp, (u_long)sizeof(struct mount)); 239 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 240 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 241 LIST_INIT(&mp->mnt_vnodelist); 242 mp->mnt_vfc = vfsp; 243 mp->mnt_op = vfsp->vfc_vfsops; 244 mp->mnt_flag = MNT_RDONLY; 245 mp->mnt_vnodecovered = NULLVP; 246 vfsp->vfc_refcount++; 247 mp->mnt_stat.f_type = vfsp->vfc_typenum; 248 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 249 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 250 mp->mnt_stat.f_mntonname[0] = '/'; 251 mp->mnt_stat.f_mntonname[1] = 0; 252 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 253 *mpp = mp; 254 return (0); 255 } 256 257 /* 258 * Find an appropriate filesystem to use for the root. If a filesystem 259 * has not been preselected, walk through the list of known filesystems 260 * trying those that have mountroot routines, and try them until one 261 * works or we have tried them all. 262 */ 263 #ifdef notdef /* XXX JH */ 264 int 265 lite2_vfs_mountroot() 266 { 267 struct vfsconf *vfsp; 268 extern int (*lite2_mountroot) __P((void)); 269 int error; 270 271 if (lite2_mountroot != NULL) 272 return ((*lite2_mountroot)()); 273 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 274 if (vfsp->vfc_mountroot == NULL) 275 continue; 276 if ((error = (*vfsp->vfc_mountroot)()) == 0) 277 return (0); 278 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 279 } 280 return (ENODEV); 281 } 282 #endif 283 284 /* 285 * Lookup a mount point by filesystem identifier. 286 */ 287 struct mount * 288 vfs_getvfs(fsid) 289 fsid_t *fsid; 290 { 291 register struct mount *mp; 292 293 simple_lock(&mountlist_slock); 294 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 295 mp = mp->mnt_list.cqe_next) { 296 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 297 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 298 simple_unlock(&mountlist_slock); 299 return (mp); 300 } 301 } 302 simple_unlock(&mountlist_slock); 303 return ((struct mount *) 0); 304 } 305 306 /* 307 * Get a new unique fsid 308 */ 309 void 310 vfs_getnewfsid(mp) 311 struct mount *mp; 312 { 313 static u_short xxxfs_mntid; 314 315 fsid_t tfsid; 316 int mtype; 317 318 simple_lock(&mntid_slock); 319 mtype = mp->mnt_vfc->vfc_typenum; 320 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 321 mp->mnt_stat.f_fsid.val[1] = mtype; 322 if (xxxfs_mntid == 0) 323 ++xxxfs_mntid; 324 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 325 tfsid.val[1] = mtype; 326 if (mountlist.cqh_first != (void *)&mountlist) { 327 while (vfs_getvfs(&tfsid)) { 328 tfsid.val[0]++; 329 xxxfs_mntid++; 330 } 331 } 332 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 333 simple_unlock(&mntid_slock); 334 } 335 336 /* 337 * Set vnode attributes to VNOVAL 338 */ 339 void 340 vattr_null(vap) 341 register struct vattr *vap; 342 { 343 344 vap->va_type = VNON; 345 vap->va_size = VNOVAL; 346 vap->va_bytes = VNOVAL; 347 vap->va_mode = VNOVAL; 348 vap->va_nlink = VNOVAL; 349 vap->va_uid = VNOVAL; 350 vap->va_gid = VNOVAL; 351 vap->va_fsid = VNOVAL; 352 vap->va_fileid = VNOVAL; 353 vap->va_blocksize = VNOVAL; 354 vap->va_rdev = VNOVAL; 355 vap->va_atime.tv_sec = VNOVAL; 356 vap->va_atime.tv_nsec = VNOVAL; 357 vap->va_mtime.tv_sec = VNOVAL; 358 vap->va_mtime.tv_nsec = VNOVAL; 359 vap->va_ctime.tv_sec = VNOVAL; 360 vap->va_ctime.tv_nsec = VNOVAL; 361 vap->va_flags = VNOVAL; 362 vap->va_gen = VNOVAL; 363 vap->va_vaflags = 0; 364 } 365 366 /* 367 * Routines having to do with the management of the vnode table. 368 */ 369 extern vop_t **dead_vnodeop_p; 370 371 /* 372 * Return the next vnode from the free list. 373 */ 374 int 375 getnewvnode(tag, mp, vops, vpp) 376 enum vtagtype tag; 377 struct mount *mp; 378 vop_t **vops; 379 struct vnode **vpp; 380 { 381 int s; 382 struct proc *p = curproc; /* XXX */ 383 struct vnode *vp, *tvp, *nvp; 384 vm_object_t object; 385 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 386 387 /* 388 * We take the least recently used vnode from the freelist 389 * if we can get it and it has no cached pages, and no 390 * namecache entries are relative to it. 391 * Otherwise we allocate a new vnode 392 */ 393 394 s = splbio(); 395 simple_lock(&vnode_free_list_slock); 396 TAILQ_INIT(&vnode_tmp_list); 397 398 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 399 nvp = TAILQ_NEXT(vp, v_freelist); 400 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 401 if (vp->v_flag & VAGE) { 402 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 403 } else { 404 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 405 } 406 vp->v_flag &= ~(VTBFREE|VAGE); 407 vp->v_flag |= VFREE; 408 if (vp->v_usecount) 409 panic("tobe free vnode isn't"); 410 freevnodes++; 411 } 412 413 if (wantfreevnodes && freevnodes < wantfreevnodes) { 414 vp = NULL; 415 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 416 /* 417 * XXX: this is only here to be backwards compatible 418 */ 419 vp = NULL; 420 } else { 421 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 422 nvp = TAILQ_NEXT(vp, v_freelist); 423 if (!simple_lock_try(&vp->v_interlock)) 424 continue; 425 if (vp->v_usecount) 426 panic("free vnode isn't"); 427 428 object = vp->v_object; 429 if (object && (object->resident_page_count || object->ref_count)) { 430 printf("object inconsistant state: RPC: %d, RC: %d\n", 431 object->resident_page_count, object->ref_count); 432 /* Don't recycle if it's caching some pages */ 433 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 434 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 435 continue; 436 } else if (LIST_FIRST(&vp->v_cache_src)) { 437 /* Don't recycle if active in the namecache */ 438 simple_unlock(&vp->v_interlock); 439 continue; 440 } else { 441 break; 442 } 443 } 444 } 445 446 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 447 nvp = TAILQ_NEXT(tvp, v_freelist); 448 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 449 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 450 simple_unlock(&tvp->v_interlock); 451 } 452 453 if (vp) { 454 vp->v_flag |= VDOOMED; 455 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 456 freevnodes--; 457 simple_unlock(&vnode_free_list_slock); 458 cache_purge(vp); 459 vp->v_lease = NULL; 460 if (vp->v_type != VBAD) { 461 vgonel(vp, p); 462 } else { 463 simple_unlock(&vp->v_interlock); 464 } 465 466 #ifdef DIAGNOSTIC 467 { 468 int s; 469 470 if (vp->v_data) 471 panic("cleaned vnode isn't"); 472 s = splbio(); 473 if (vp->v_numoutput) 474 panic("Clean vnode has pending I/O's"); 475 splx(s); 476 } 477 #endif 478 vp->v_flag = 0; 479 vp->v_lastr = 0; 480 vp->v_lastw = 0; 481 vp->v_lasta = 0; 482 vp->v_cstart = 0; 483 vp->v_clen = 0; 484 vp->v_socket = 0; 485 vp->v_writecount = 0; /* XXX */ 486 vp->v_maxio = 0; 487 } else { 488 simple_unlock(&vnode_free_list_slock); 489 vp = (struct vnode *) zalloc(vnode_zone); 490 bzero((char *) vp, sizeof *vp); 491 simple_lock_init(&vp->v_interlock); 492 vp->v_dd = vp; 493 cache_purge(vp); 494 LIST_INIT(&vp->v_cache_src); 495 TAILQ_INIT(&vp->v_cache_dst); 496 numvnodes++; 497 } 498 499 TAILQ_INIT(&vp->v_cleanblkhd); 500 TAILQ_INIT(&vp->v_dirtyblkhd); 501 vp->v_type = VNON; 502 vp->v_tag = tag; 503 vp->v_op = vops; 504 insmntque(vp, mp); 505 *vpp = vp; 506 vp->v_usecount = 1; 507 vp->v_data = 0; 508 splx(s); 509 510 vfs_object_create(vp, p, p->p_ucred, TRUE); 511 return (0); 512 } 513 514 /* 515 * Move a vnode from one mount queue to another. 516 */ 517 static void 518 insmntque(vp, mp) 519 register struct vnode *vp; 520 register struct mount *mp; 521 { 522 523 simple_lock(&mntvnode_slock); 524 /* 525 * Delete from old mount point vnode list, if on one. 526 */ 527 if (vp->v_mount != NULL) 528 LIST_REMOVE(vp, v_mntvnodes); 529 /* 530 * Insert into list of vnodes for the new mount point, if available. 531 */ 532 if ((vp->v_mount = mp) == NULL) { 533 simple_unlock(&mntvnode_slock); 534 return; 535 } 536 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 537 simple_unlock(&mntvnode_slock); 538 } 539 540 /* 541 * Update outstanding I/O count and do wakeup if requested. 542 */ 543 void 544 vwakeup(bp) 545 register struct buf *bp; 546 { 547 register struct vnode *vp; 548 549 bp->b_flags &= ~B_WRITEINPROG; 550 if ((vp = bp->b_vp)) { 551 vp->v_numoutput--; 552 if (vp->v_numoutput < 0) 553 panic("vwakeup: neg numoutput"); 554 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 555 vp->v_flag &= ~VBWAIT; 556 wakeup((caddr_t) &vp->v_numoutput); 557 } 558 } 559 } 560 561 /* 562 * Flush out and invalidate all buffers associated with a vnode. 563 * Called with the underlying object locked. 564 */ 565 int 566 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 567 register struct vnode *vp; 568 int flags; 569 struct ucred *cred; 570 struct proc *p; 571 int slpflag, slptimeo; 572 { 573 register struct buf *bp; 574 struct buf *nbp, *blist; 575 int s, error; 576 vm_object_t object; 577 578 if (flags & V_SAVE) { 579 s = splbio(); 580 while (vp->v_numoutput) { 581 vp->v_flag |= VBWAIT; 582 tsleep((caddr_t)&vp->v_numoutput, 583 slpflag | (PRIBIO + 1), 584 "vinvlbuf", slptimeo); 585 } 586 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 587 splx(s); 588 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 589 return (error); 590 s = splbio(); 591 if (vp->v_numoutput > 0 || 592 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 593 panic("vinvalbuf: dirty bufs"); 594 } 595 splx(s); 596 } 597 s = splbio(); 598 for (;;) { 599 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 600 if (!blist) 601 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 602 if (!blist) 603 break; 604 605 for (bp = blist; bp; bp = nbp) { 606 nbp = TAILQ_NEXT(bp, b_vnbufs); 607 if (bp->b_flags & B_BUSY) { 608 bp->b_flags |= B_WANTED; 609 error = tsleep((caddr_t) bp, 610 slpflag | (PRIBIO + 4), "vinvalbuf", 611 slptimeo); 612 if (error) { 613 splx(s); 614 return (error); 615 } 616 break; 617 } 618 /* 619 * XXX Since there are no node locks for NFS, I 620 * believe there is a slight chance that a delayed 621 * write will occur while sleeping just above, so 622 * check for it. Note that vfs_bio_awrite expects 623 * buffers to reside on a queue, while VOP_BWRITE and 624 * brelse do not. 625 */ 626 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 627 (flags & V_SAVE)) { 628 629 if (bp->b_vp == vp) { 630 if (bp->b_flags & B_CLUSTEROK) { 631 vfs_bio_awrite(bp); 632 } else { 633 bremfree(bp); 634 bp->b_flags |= (B_BUSY | B_ASYNC); 635 VOP_BWRITE(bp); 636 } 637 } else { 638 bremfree(bp); 639 bp->b_flags |= B_BUSY; 640 (void) VOP_BWRITE(bp); 641 } 642 break; 643 } 644 bremfree(bp); 645 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY); 646 bp->b_flags &= ~B_ASYNC; 647 brelse(bp); 648 } 649 } 650 651 while (vp->v_numoutput > 0) { 652 vp->v_flag |= VBWAIT; 653 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 654 } 655 656 splx(s); 657 658 /* 659 * Destroy the copy in the VM cache, too. 660 */ 661 simple_lock(&vp->v_interlock); 662 object = vp->v_object; 663 if (object != NULL) { 664 vm_object_page_remove(object, 0, 0, 665 (flags & V_SAVE) ? TRUE : FALSE); 666 } 667 simple_unlock(&vp->v_interlock); 668 669 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 670 panic("vinvalbuf: flush failed"); 671 return (0); 672 } 673 674 /* 675 * Truncate a file's buffer and pages to a specified length. This 676 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 677 * sync activity. 678 */ 679 int 680 vtruncbuf(vp, cred, p, length, blksize) 681 register struct vnode *vp; 682 struct ucred *cred; 683 struct proc *p; 684 off_t length; 685 int blksize; 686 { 687 register struct buf *bp; 688 struct buf *nbp; 689 int s, anyfreed; 690 int trunclbn; 691 692 /* 693 * Round up to the *next* lbn. 694 */ 695 trunclbn = (length + blksize - 1) / blksize; 696 697 s = splbio(); 698 restart: 699 anyfreed = 1; 700 for (;anyfreed;) { 701 anyfreed = 0; 702 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 703 nbp = TAILQ_NEXT(bp, b_vnbufs); 704 if (bp->b_lblkno >= trunclbn) { 705 if (bp->b_flags & B_BUSY) { 706 bp->b_flags |= B_WANTED; 707 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 708 goto restart; 709 } else { 710 bremfree(bp); 711 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 712 bp->b_flags &= ~B_ASYNC; 713 brelse(bp); 714 anyfreed = 1; 715 } 716 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)|| 717 (nbp->b_vp != vp) || 718 (nbp->b_flags & B_DELWRI))) { 719 goto restart; 720 } 721 } 722 } 723 724 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 725 nbp = TAILQ_NEXT(bp, b_vnbufs); 726 if (bp->b_lblkno >= trunclbn) { 727 if (bp->b_flags & B_BUSY) { 728 bp->b_flags |= B_WANTED; 729 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 730 goto restart; 731 } else { 732 bremfree(bp); 733 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 734 bp->b_flags &= ~B_ASYNC; 735 brelse(bp); 736 anyfreed = 1; 737 } 738 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)|| 739 (nbp->b_vp != vp) || 740 (nbp->b_flags & B_DELWRI) == 0)) { 741 goto restart; 742 } 743 } 744 } 745 } 746 747 if (length > 0) { 748 restartsync: 749 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 750 nbp = TAILQ_NEXT(bp, b_vnbufs); 751 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 752 if (bp->b_flags & B_BUSY) { 753 bp->b_flags |= B_WANTED; 754 tsleep(bp, PRIBIO, "vtrb3", 0); 755 } else { 756 bremfree(bp); 757 bp->b_flags |= B_BUSY; 758 if (bp->b_vp == vp) { 759 bp->b_flags |= B_ASYNC; 760 } else { 761 bp->b_flags &= ~B_ASYNC; 762 } 763 VOP_BWRITE(bp); 764 } 765 goto restartsync; 766 } 767 768 } 769 } 770 771 while (vp->v_numoutput > 0) { 772 vp->v_flag |= VBWAIT; 773 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 774 } 775 776 splx(s); 777 778 vnode_pager_setsize(vp, length); 779 780 return (0); 781 } 782 783 /* 784 * Associate a buffer with a vnode. 785 */ 786 void 787 bgetvp(vp, bp) 788 register struct vnode *vp; 789 register struct buf *bp; 790 { 791 int s; 792 793 #if defined(DIAGNOSTIC) 794 if (bp->b_vp) 795 panic("bgetvp: not free"); 796 #endif 797 vhold(vp); 798 bp->b_vp = vp; 799 if (vp->v_type == VBLK || vp->v_type == VCHR) 800 bp->b_dev = vp->v_rdev; 801 else 802 bp->b_dev = NODEV; 803 /* 804 * Insert onto list for new vnode. 805 */ 806 s = splbio(); 807 bp->b_xflags |= B_VNCLEAN; 808 bp->b_xflags &= ~B_VNDIRTY; 809 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 810 splx(s); 811 } 812 813 /* 814 * Disassociate a buffer from a vnode. 815 */ 816 void 817 brelvp(bp) 818 register struct buf *bp; 819 { 820 struct vnode *vp; 821 struct buflists *listheadp; 822 int s; 823 824 #if defined(DIAGNOSTIC) 825 if (bp->b_vp == (struct vnode *) 0) 826 panic("brelvp: NULL"); 827 #endif 828 829 /* 830 * Delete from old vnode list, if on one. 831 */ 832 vp = bp->b_vp; 833 s = splbio(); 834 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 835 if (bp->b_xflags & B_VNDIRTY) 836 listheadp = &vp->v_dirtyblkhd; 837 else 838 listheadp = &vp->v_cleanblkhd; 839 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 840 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 841 } 842 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 843 vp->v_flag &= ~VONWORKLST; 844 LIST_REMOVE(vp, v_synclist); 845 } 846 splx(s); 847 bp->b_vp = (struct vnode *) 0; 848 vdrop(vp); 849 } 850 851 /* 852 * The workitem queue. 853 * 854 * It is useful to delay writes of file data and filesystem metadata 855 * for tens of seconds so that quickly created and deleted files need 856 * not waste disk bandwidth being created and removed. To realize this, 857 * we append vnodes to a "workitem" queue. When running with a soft 858 * updates implementation, most pending metadata dependencies should 859 * not wait for more than a few seconds. Thus, mounted on block devices 860 * are delayed only about a half the time that file data is delayed. 861 * Similarly, directory updates are more critical, so are only delayed 862 * about a third the time that file data is delayed. Thus, there are 863 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 864 * one each second (driven off the filesystem syner process). The 865 * syncer_delayno variable indicates the next queue that is to be processed. 866 * Items that need to be processed soon are placed in this queue: 867 * 868 * syncer_workitem_pending[syncer_delayno] 869 * 870 * A delay of fifteen seconds is done by placing the request fifteen 871 * entries later in the queue: 872 * 873 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 874 * 875 */ 876 877 /* 878 * Add an item to the syncer work queue. 879 */ 880 void 881 vn_syncer_add_to_worklist(vp, delay) 882 struct vnode *vp; 883 int delay; 884 { 885 int s, slot; 886 887 s = splbio(); 888 889 if (vp->v_flag & VONWORKLST) { 890 LIST_REMOVE(vp, v_synclist); 891 } 892 893 if (delay > syncer_maxdelay - 2) 894 delay = syncer_maxdelay - 2; 895 slot = (syncer_delayno + delay) & syncer_mask; 896 897 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 898 vp->v_flag |= VONWORKLST; 899 splx(s); 900 } 901 902 static void sched_sync __P((void)); 903 static struct proc *updateproc; 904 static struct kproc_desc up_kp = { 905 "syncer", 906 sched_sync, 907 &updateproc 908 }; 909 SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 910 911 /* 912 * System filesystem synchronizer daemon. 913 */ 914 void 915 sched_sync(void) 916 { 917 struct synclist *slp; 918 struct vnode *vp; 919 long starttime; 920 int s; 921 struct proc *p = updateproc; 922 923 for (;;) { 924 starttime = time_second; 925 926 /* 927 * Push files whose dirty time has expired. 928 */ 929 s = splbio(); 930 slp = &syncer_workitem_pending[syncer_delayno]; 931 syncer_delayno += 1; 932 if (syncer_delayno == syncer_maxdelay) 933 syncer_delayno = 0; 934 splx(s); 935 936 while ((vp = LIST_FIRST(slp)) != NULL) { 937 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 938 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 939 VOP_UNLOCK(vp, 0, p); 940 if (LIST_FIRST(slp) == vp) { 941 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 942 vp->v_type != VBLK) 943 panic("sched_sync: fsync failed"); 944 /* 945 * Move ourselves to the back of the sync list. 946 */ 947 LIST_REMOVE(vp, v_synclist); 948 vn_syncer_add_to_worklist(vp, syncdelay); 949 } 950 } 951 952 /* 953 * Do soft update processing. 954 */ 955 if (bioops.io_sync) 956 (*bioops.io_sync)(NULL); 957 958 /* 959 * The variable rushjob allows the kernel to speed up the 960 * processing of the filesystem syncer process. A rushjob 961 * value of N tells the filesystem syncer to process the next 962 * N seconds worth of work on its queue ASAP. Currently rushjob 963 * is used by the soft update code to speed up the filesystem 964 * syncer process when the incore state is getting so far 965 * ahead of the disk that the kernel memory pool is being 966 * threatened with exhaustion. 967 */ 968 if (rushjob > 0) { 969 rushjob -= 1; 970 continue; 971 } 972 /* 973 * If it has taken us less than a second to process the 974 * current work, then wait. Otherwise start right over 975 * again. We can still lose time if any single round 976 * takes more than two seconds, but it does not really 977 * matter as we are just trying to generally pace the 978 * filesystem activity. 979 */ 980 if (time_second == starttime) 981 tsleep(&lbolt, PPAUSE, "syncer", 0); 982 } 983 } 984 985 /* 986 * Associate a p-buffer with a vnode. 987 */ 988 void 989 pbgetvp(vp, bp) 990 register struct vnode *vp; 991 register struct buf *bp; 992 { 993 #if defined(DIAGNOSTIC) 994 if (bp->b_vp) 995 panic("pbgetvp: not free"); 996 #endif 997 bp->b_vp = vp; 998 if (vp->v_type == VBLK || vp->v_type == VCHR) 999 bp->b_dev = vp->v_rdev; 1000 else 1001 bp->b_dev = NODEV; 1002 } 1003 1004 /* 1005 * Disassociate a p-buffer from a vnode. 1006 */ 1007 void 1008 pbrelvp(bp) 1009 register struct buf *bp; 1010 { 1011 1012 #if defined(DIAGNOSTIC) 1013 if (bp->b_vp == (struct vnode *) 0) 1014 panic("pbrelvp: NULL"); 1015 #endif 1016 1017 bp->b_vp = (struct vnode *) 0; 1018 } 1019 1020 /* 1021 * Reassign a buffer from one vnode to another. 1022 * Used to assign file specific control information 1023 * (indirect blocks) to the vnode to which they belong. 1024 */ 1025 void 1026 reassignbuf(bp, newvp) 1027 register struct buf *bp; 1028 register struct vnode *newvp; 1029 { 1030 struct buflists *listheadp; 1031 struct vnode *oldvp; 1032 int delay; 1033 int s; 1034 1035 if (newvp == NULL) { 1036 printf("reassignbuf: NULL"); 1037 return; 1038 } 1039 1040 s = splbio(); 1041 /* 1042 * Delete from old vnode list, if on one. 1043 */ 1044 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 1045 oldvp = bp->b_vp; 1046 if (bp->b_xflags & B_VNDIRTY) 1047 listheadp = &oldvp->v_dirtyblkhd; 1048 else 1049 listheadp = &oldvp->v_cleanblkhd; 1050 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1051 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 1052 vdrop(oldvp); 1053 } 1054 /* 1055 * If dirty, put on list of dirty buffers; otherwise insert onto list 1056 * of clean buffers. 1057 */ 1058 if (bp->b_flags & B_DELWRI) { 1059 struct buf *tbp; 1060 1061 listheadp = &newvp->v_dirtyblkhd; 1062 if ((newvp->v_flag & VONWORKLST) == 0) { 1063 switch (newvp->v_type) { 1064 case VDIR: 1065 delay = syncdelay / 3; 1066 break; 1067 case VBLK: 1068 if (newvp->v_specmountpoint != NULL) { 1069 delay = syncdelay / 2; 1070 break; 1071 } 1072 /* fall through */ 1073 default: 1074 delay = syncdelay; 1075 } 1076 vn_syncer_add_to_worklist(newvp, delay); 1077 } 1078 bp->b_xflags |= B_VNDIRTY; 1079 tbp = TAILQ_FIRST(listheadp); 1080 if (tbp == NULL || 1081 (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) { 1082 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1083 } else { 1084 if (bp->b_lblkno >= 0) { 1085 struct buf *ttbp; 1086 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1087 (ttbp->b_lblkno < bp->b_lblkno)) { 1088 tbp = ttbp; 1089 } 1090 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1091 } else { 1092 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1093 } 1094 } 1095 } else { 1096 bp->b_xflags |= B_VNCLEAN; 1097 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1098 if ((newvp->v_flag & VONWORKLST) && 1099 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1100 newvp->v_flag &= ~VONWORKLST; 1101 LIST_REMOVE(newvp, v_synclist); 1102 } 1103 } 1104 bp->b_vp = newvp; 1105 vhold(bp->b_vp); 1106 splx(s); 1107 } 1108 1109 /* 1110 * Create a vnode for a block device. 1111 * Used for mounting the root file system. 1112 */ 1113 int 1114 bdevvp(dev, vpp) 1115 dev_t dev; 1116 struct vnode **vpp; 1117 { 1118 register struct vnode *vp; 1119 struct vnode *nvp; 1120 int error; 1121 1122 /* XXX 255 is for mfs. */ 1123 if (dev == NODEV || (major(dev) != 255 && (major(dev) >= nblkdev || 1124 bdevsw[major(dev)] == NULL))) { 1125 *vpp = NULLVP; 1126 return (ENXIO); 1127 } 1128 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1129 if (error) { 1130 *vpp = NULLVP; 1131 return (error); 1132 } 1133 vp = nvp; 1134 vp->v_type = VBLK; 1135 if ((nvp = checkalias(vp, dev, (struct mount *)0)) != NULL) { 1136 vput(vp); 1137 vp = nvp; 1138 } 1139 *vpp = vp; 1140 return (0); 1141 } 1142 1143 /* 1144 * Check to see if the new vnode represents a special device 1145 * for which we already have a vnode (either because of 1146 * bdevvp() or because of a different vnode representing 1147 * the same block device). If such an alias exists, deallocate 1148 * the existing contents and return the aliased vnode. The 1149 * caller is responsible for filling it with its new contents. 1150 */ 1151 struct vnode * 1152 checkalias(nvp, nvp_rdev, mp) 1153 register struct vnode *nvp; 1154 dev_t nvp_rdev; 1155 struct mount *mp; 1156 { 1157 struct proc *p = curproc; /* XXX */ 1158 struct vnode *vp; 1159 struct vnode **vpp; 1160 1161 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1162 return (NULLVP); 1163 1164 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1165 loop: 1166 simple_lock(&spechash_slock); 1167 for (vp = *vpp; vp; vp = vp->v_specnext) { 1168 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1169 continue; 1170 /* 1171 * Alias, but not in use, so flush it out. 1172 * Only alias active device nodes. 1173 * Not sure why we don't re-use this like we do below. 1174 */ 1175 simple_lock(&vp->v_interlock); 1176 if (vp->v_usecount == 0) { 1177 simple_unlock(&spechash_slock); 1178 vgonel(vp, p); 1179 goto loop; 1180 } 1181 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 1182 /* 1183 * It dissappeared, and we may have slept. 1184 * Restart from the beginning 1185 */ 1186 simple_unlock(&spechash_slock); 1187 goto loop; 1188 } 1189 break; 1190 } 1191 /* 1192 * It would be a lot clearer what is going on here if 1193 * this had been expressed as: 1194 * if ( vp && (vp->v_tag == VT_NULL)) 1195 * and the clauses had been swapped. 1196 */ 1197 if (vp == NULL || vp->v_tag != VT_NON) { 1198 /* 1199 * Put the new vnode into the hash chain. 1200 * and if there was an alias, connect them. 1201 */ 1202 MALLOC(nvp->v_specinfo, struct specinfo *, 1203 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1204 nvp->v_rdev = nvp_rdev; 1205 nvp->v_hashchain = vpp; 1206 nvp->v_specnext = *vpp; 1207 nvp->v_specmountpoint = NULL; 1208 simple_unlock(&spechash_slock); 1209 *vpp = nvp; 1210 if (vp != NULLVP) { 1211 nvp->v_flag |= VALIASED; 1212 vp->v_flag |= VALIASED; 1213 vput(vp); 1214 } 1215 return (NULLVP); 1216 } 1217 /* 1218 * if ( vp && (vp->v_tag == VT_NULL)) 1219 * We have a vnode alias, but it is a trashed. 1220 * Make it look like it's newley allocated. (by getnewvnode()) 1221 * The caller should use this instead. 1222 */ 1223 simple_unlock(&spechash_slock); 1224 VOP_UNLOCK(vp, 0, p); 1225 simple_lock(&vp->v_interlock); 1226 vclean(vp, 0, p); 1227 vp->v_op = nvp->v_op; 1228 vp->v_tag = nvp->v_tag; 1229 nvp->v_type = VNON; 1230 insmntque(vp, mp); 1231 return (vp); 1232 } 1233 1234 /* 1235 * Grab a particular vnode from the free list, increment its 1236 * reference count and lock it. The vnode lock bit is set the 1237 * vnode is being eliminated in vgone. The process is awakened 1238 * when the transition is completed, and an error returned to 1239 * indicate that the vnode is no longer usable (possibly having 1240 * been changed to a new file system type). 1241 */ 1242 int 1243 vget(vp, flags, p) 1244 register struct vnode *vp; 1245 int flags; 1246 struct proc *p; 1247 { 1248 int error; 1249 1250 /* 1251 * If the vnode is in the process of being cleaned out for 1252 * another use, we wait for the cleaning to finish and then 1253 * return failure. Cleaning is determined by checking that 1254 * the VXLOCK flag is set. 1255 */ 1256 if ((flags & LK_INTERLOCK) == 0) { 1257 simple_lock(&vp->v_interlock); 1258 } 1259 if (vp->v_flag & VXLOCK) { 1260 vp->v_flag |= VXWANT; 1261 simple_unlock(&vp->v_interlock); 1262 tsleep((caddr_t)vp, PINOD, "vget", 0); 1263 return (ENOENT); 1264 } 1265 1266 vp->v_usecount++; 1267 1268 if (VSHOULDBUSY(vp)) 1269 vbusy(vp); 1270 if (flags & LK_TYPE_MASK) { 1271 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1272 /* 1273 * must expand vrele here because we do not want 1274 * to call VOP_INACTIVE if the reference count 1275 * drops back to zero since it was never really 1276 * active. We must remove it from the free list 1277 * before sleeping so that multiple processes do 1278 * not try to recycle it. 1279 */ 1280 simple_lock(&vp->v_interlock); 1281 vp->v_usecount--; 1282 if (VSHOULDFREE(vp)) 1283 vfree(vp); 1284 simple_unlock(&vp->v_interlock); 1285 } 1286 return (error); 1287 } 1288 simple_unlock(&vp->v_interlock); 1289 return (0); 1290 } 1291 1292 void 1293 vref(struct vnode *vp) 1294 { 1295 simple_lock(&vp->v_interlock); 1296 vp->v_usecount++; 1297 simple_unlock(&vp->v_interlock); 1298 } 1299 1300 /* 1301 * Vnode put/release. 1302 * If count drops to zero, call inactive routine and return to freelist. 1303 */ 1304 void 1305 vrele(vp) 1306 struct vnode *vp; 1307 { 1308 struct proc *p = curproc; /* XXX */ 1309 1310 #ifdef DIAGNOSTIC 1311 if (vp == NULL) 1312 panic("vrele: null vp"); 1313 #endif 1314 simple_lock(&vp->v_interlock); 1315 1316 if (vp->v_usecount > 1) { 1317 1318 vp->v_usecount--; 1319 simple_unlock(&vp->v_interlock); 1320 1321 return; 1322 } 1323 1324 if (vp->v_usecount == 1) { 1325 1326 vp->v_usecount--; 1327 if (VSHOULDFREE(vp)) 1328 vfree(vp); 1329 /* 1330 * If we are doing a vput, the node is already locked, and we must 1331 * call VOP_INACTIVE with the node locked. So, in the case of 1332 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1333 */ 1334 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1335 VOP_INACTIVE(vp, p); 1336 } 1337 1338 } else { 1339 #ifdef DIAGNOSTIC 1340 vprint("vrele: negative ref count", vp); 1341 simple_unlock(&vp->v_interlock); 1342 #endif 1343 panic("vrele: negative ref cnt"); 1344 } 1345 } 1346 1347 void 1348 vput(vp) 1349 struct vnode *vp; 1350 { 1351 struct proc *p = curproc; /* XXX */ 1352 1353 #ifdef DIAGNOSTIC 1354 if (vp == NULL) 1355 panic("vput: null vp"); 1356 #endif 1357 1358 simple_lock(&vp->v_interlock); 1359 1360 if (vp->v_usecount > 1) { 1361 1362 vp->v_usecount--; 1363 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1364 return; 1365 1366 } 1367 1368 if (vp->v_usecount == 1) { 1369 1370 vp->v_usecount--; 1371 if (VSHOULDFREE(vp)) 1372 vfree(vp); 1373 /* 1374 * If we are doing a vput, the node is already locked, and we must 1375 * call VOP_INACTIVE with the node locked. So, in the case of 1376 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1377 */ 1378 simple_unlock(&vp->v_interlock); 1379 VOP_INACTIVE(vp, p); 1380 1381 } else { 1382 #ifdef DIAGNOSTIC 1383 vprint("vput: negative ref count", vp); 1384 #endif 1385 panic("vput: negative ref cnt"); 1386 } 1387 } 1388 1389 /* 1390 * Somebody doesn't want the vnode recycled. 1391 */ 1392 void 1393 vhold(vp) 1394 register struct vnode *vp; 1395 { 1396 int s; 1397 1398 s = splbio(); 1399 vp->v_holdcnt++; 1400 if (VSHOULDBUSY(vp)) 1401 vbusy(vp); 1402 splx(s); 1403 } 1404 1405 /* 1406 * One less who cares about this vnode. 1407 */ 1408 void 1409 vdrop(vp) 1410 register struct vnode *vp; 1411 { 1412 int s; 1413 1414 s = splbio(); 1415 if (vp->v_holdcnt <= 0) 1416 panic("vdrop: holdcnt"); 1417 vp->v_holdcnt--; 1418 if (VSHOULDFREE(vp)) 1419 vfree(vp); 1420 splx(s); 1421 } 1422 1423 /* 1424 * Remove any vnodes in the vnode table belonging to mount point mp. 1425 * 1426 * If MNT_NOFORCE is specified, there should not be any active ones, 1427 * return error if any are found (nb: this is a user error, not a 1428 * system error). If MNT_FORCE is specified, detach any active vnodes 1429 * that are found. 1430 */ 1431 #ifdef DIAGNOSTIC 1432 static int busyprt = 0; /* print out busy vnodes */ 1433 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1434 #endif 1435 1436 int 1437 vflush(mp, skipvp, flags) 1438 struct mount *mp; 1439 struct vnode *skipvp; 1440 int flags; 1441 { 1442 struct proc *p = curproc; /* XXX */ 1443 struct vnode *vp, *nvp; 1444 int busy = 0; 1445 1446 simple_lock(&mntvnode_slock); 1447 loop: 1448 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1449 /* 1450 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1451 * Start over if it has (it won't be on the list anymore). 1452 */ 1453 if (vp->v_mount != mp) 1454 goto loop; 1455 nvp = vp->v_mntvnodes.le_next; 1456 /* 1457 * Skip over a selected vnode. 1458 */ 1459 if (vp == skipvp) 1460 continue; 1461 1462 simple_lock(&vp->v_interlock); 1463 /* 1464 * Skip over a vnodes marked VSYSTEM. 1465 */ 1466 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1467 simple_unlock(&vp->v_interlock); 1468 continue; 1469 } 1470 /* 1471 * If WRITECLOSE is set, only flush out regular file vnodes 1472 * open for writing. 1473 */ 1474 if ((flags & WRITECLOSE) && 1475 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1476 simple_unlock(&vp->v_interlock); 1477 continue; 1478 } 1479 1480 /* 1481 * With v_usecount == 0, all we need to do is clear out the 1482 * vnode data structures and we are done. 1483 */ 1484 if (vp->v_usecount == 0) { 1485 simple_unlock(&mntvnode_slock); 1486 vgonel(vp, p); 1487 simple_lock(&mntvnode_slock); 1488 continue; 1489 } 1490 1491 /* 1492 * If FORCECLOSE is set, forcibly close the vnode. For block 1493 * or character devices, revert to an anonymous device. For 1494 * all other files, just kill them. 1495 */ 1496 if (flags & FORCECLOSE) { 1497 simple_unlock(&mntvnode_slock); 1498 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1499 vgonel(vp, p); 1500 } else { 1501 vclean(vp, 0, p); 1502 vp->v_op = spec_vnodeop_p; 1503 insmntque(vp, (struct mount *) 0); 1504 } 1505 simple_lock(&mntvnode_slock); 1506 continue; 1507 } 1508 #ifdef DIAGNOSTIC 1509 if (busyprt) 1510 vprint("vflush: busy vnode", vp); 1511 #endif 1512 simple_unlock(&vp->v_interlock); 1513 busy++; 1514 } 1515 simple_unlock(&mntvnode_slock); 1516 if (busy) 1517 return (EBUSY); 1518 return (0); 1519 } 1520 1521 /* 1522 * Disassociate the underlying file system from a vnode. 1523 */ 1524 static void 1525 vclean(vp, flags, p) 1526 struct vnode *vp; 1527 int flags; 1528 struct proc *p; 1529 { 1530 int active; 1531 vm_object_t obj; 1532 1533 /* 1534 * Check to see if the vnode is in use. If so we have to reference it 1535 * before we clean it out so that its count cannot fall to zero and 1536 * generate a race against ourselves to recycle it. 1537 */ 1538 if ((active = vp->v_usecount)) 1539 vp->v_usecount++; 1540 1541 /* 1542 * Prevent the vnode from being recycled or brought into use while we 1543 * clean it out. 1544 */ 1545 if (vp->v_flag & VXLOCK) 1546 panic("vclean: deadlock"); 1547 vp->v_flag |= VXLOCK; 1548 /* 1549 * Even if the count is zero, the VOP_INACTIVE routine may still 1550 * have the object locked while it cleans it out. The VOP_LOCK 1551 * ensures that the VOP_INACTIVE routine is done with its work. 1552 * For active vnodes, it ensures that no other activity can 1553 * occur while the underlying object is being cleaned out. 1554 */ 1555 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1556 1557 /* 1558 * Clean out any buffers associated with the vnode. 1559 */ 1560 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1561 if (obj = vp->v_object) { 1562 if (obj->ref_count == 0) { 1563 /* 1564 * This is a normal way of shutting down the object/vnode 1565 * association. 1566 */ 1567 vm_object_terminate(obj); 1568 } else { 1569 /* 1570 * Woe to the process that tries to page now :-). 1571 */ 1572 vm_pager_deallocate(obj); 1573 } 1574 } 1575 1576 /* 1577 * If purging an active vnode, it must be closed and 1578 * deactivated before being reclaimed. Note that the 1579 * VOP_INACTIVE will unlock the vnode. 1580 */ 1581 if (active) { 1582 if (flags & DOCLOSE) 1583 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 1584 VOP_INACTIVE(vp, p); 1585 } else { 1586 /* 1587 * Any other processes trying to obtain this lock must first 1588 * wait for VXLOCK to clear, then call the new lock operation. 1589 */ 1590 VOP_UNLOCK(vp, 0, p); 1591 } 1592 /* 1593 * Reclaim the vnode. 1594 */ 1595 if (VOP_RECLAIM(vp, p)) 1596 panic("vclean: cannot reclaim"); 1597 1598 if (active) 1599 vrele(vp); 1600 1601 cache_purge(vp); 1602 if (vp->v_vnlock) { 1603 #if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1604 #ifdef DIAGNOSTIC 1605 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1606 vprint("vclean: lock not drained", vp); 1607 #endif 1608 #endif 1609 FREE(vp->v_vnlock, M_VNODE); 1610 vp->v_vnlock = NULL; 1611 } 1612 1613 if (VSHOULDFREE(vp)) 1614 vfree(vp); 1615 1616 /* 1617 * Done with purge, notify sleepers of the grim news. 1618 */ 1619 vp->v_op = dead_vnodeop_p; 1620 vn_pollgone(vp); 1621 vp->v_tag = VT_NON; 1622 vp->v_flag &= ~VXLOCK; 1623 if (vp->v_flag & VXWANT) { 1624 vp->v_flag &= ~VXWANT; 1625 wakeup((caddr_t) vp); 1626 } 1627 } 1628 1629 /* 1630 * Eliminate all activity associated with the requested vnode 1631 * and with all vnodes aliased to the requested vnode. 1632 */ 1633 int 1634 vop_revoke(ap) 1635 struct vop_revoke_args /* { 1636 struct vnode *a_vp; 1637 int a_flags; 1638 } */ *ap; 1639 { 1640 struct vnode *vp, *vq; 1641 struct proc *p = curproc; /* XXX */ 1642 1643 #ifdef DIAGNOSTIC 1644 if ((ap->a_flags & REVOKEALL) == 0) 1645 panic("vop_revoke"); 1646 #endif 1647 1648 vp = ap->a_vp; 1649 simple_lock(&vp->v_interlock); 1650 1651 if (vp->v_flag & VALIASED) { 1652 /* 1653 * If a vgone (or vclean) is already in progress, 1654 * wait until it is done and return. 1655 */ 1656 if (vp->v_flag & VXLOCK) { 1657 vp->v_flag |= VXWANT; 1658 simple_unlock(&vp->v_interlock); 1659 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1660 return (0); 1661 } 1662 /* 1663 * Ensure that vp will not be vgone'd while we 1664 * are eliminating its aliases. 1665 */ 1666 vp->v_flag |= VXLOCK; 1667 simple_unlock(&vp->v_interlock); 1668 while (vp->v_flag & VALIASED) { 1669 simple_lock(&spechash_slock); 1670 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1671 if (vq->v_rdev != vp->v_rdev || 1672 vq->v_type != vp->v_type || vp == vq) 1673 continue; 1674 simple_unlock(&spechash_slock); 1675 vgone(vq); 1676 break; 1677 } 1678 if (vq == NULLVP) { 1679 simple_unlock(&spechash_slock); 1680 } 1681 } 1682 /* 1683 * Remove the lock so that vgone below will 1684 * really eliminate the vnode after which time 1685 * vgone will awaken any sleepers. 1686 */ 1687 simple_lock(&vp->v_interlock); 1688 vp->v_flag &= ~VXLOCK; 1689 if (vp->v_flag & VXWANT) { 1690 vp->v_flag &= ~VXWANT; 1691 wakeup(vp); 1692 } 1693 } 1694 vgonel(vp, p); 1695 return (0); 1696 } 1697 1698 /* 1699 * Recycle an unused vnode to the front of the free list. 1700 * Release the passed interlock if the vnode will be recycled. 1701 */ 1702 int 1703 vrecycle(vp, inter_lkp, p) 1704 struct vnode *vp; 1705 struct simplelock *inter_lkp; 1706 struct proc *p; 1707 { 1708 1709 simple_lock(&vp->v_interlock); 1710 if (vp->v_usecount == 0) { 1711 if (inter_lkp) { 1712 simple_unlock(inter_lkp); 1713 } 1714 vgonel(vp, p); 1715 return (1); 1716 } 1717 simple_unlock(&vp->v_interlock); 1718 return (0); 1719 } 1720 1721 /* 1722 * Eliminate all activity associated with a vnode 1723 * in preparation for reuse. 1724 */ 1725 void 1726 vgone(vp) 1727 register struct vnode *vp; 1728 { 1729 struct proc *p = curproc; /* XXX */ 1730 1731 simple_lock(&vp->v_interlock); 1732 vgonel(vp, p); 1733 } 1734 1735 /* 1736 * vgone, with the vp interlock held. 1737 */ 1738 static void 1739 vgonel(vp, p) 1740 struct vnode *vp; 1741 struct proc *p; 1742 { 1743 int s; 1744 struct vnode *vq; 1745 struct vnode *vx; 1746 1747 /* 1748 * If a vgone (or vclean) is already in progress, 1749 * wait until it is done and return. 1750 */ 1751 if (vp->v_flag & VXLOCK) { 1752 vp->v_flag |= VXWANT; 1753 simple_unlock(&vp->v_interlock); 1754 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1755 return; 1756 } 1757 1758 /* 1759 * Clean out the filesystem specific data. 1760 */ 1761 vclean(vp, DOCLOSE, p); 1762 simple_lock(&vp->v_interlock); 1763 1764 /* 1765 * Delete from old mount point vnode list, if on one. 1766 */ 1767 if (vp->v_mount != NULL) 1768 insmntque(vp, (struct mount *)0); 1769 /* 1770 * If special device, remove it from special device alias list 1771 * if it is on one. 1772 */ 1773 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1774 simple_lock(&spechash_slock); 1775 if (*vp->v_hashchain == vp) { 1776 *vp->v_hashchain = vp->v_specnext; 1777 } else { 1778 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1779 if (vq->v_specnext != vp) 1780 continue; 1781 vq->v_specnext = vp->v_specnext; 1782 break; 1783 } 1784 if (vq == NULL) 1785 panic("missing bdev"); 1786 } 1787 if (vp->v_flag & VALIASED) { 1788 vx = NULL; 1789 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1790 if (vq->v_rdev != vp->v_rdev || 1791 vq->v_type != vp->v_type) 1792 continue; 1793 if (vx) 1794 break; 1795 vx = vq; 1796 } 1797 if (vx == NULL) 1798 panic("missing alias"); 1799 if (vq == NULL) 1800 vx->v_flag &= ~VALIASED; 1801 vp->v_flag &= ~VALIASED; 1802 } 1803 simple_unlock(&spechash_slock); 1804 FREE(vp->v_specinfo, M_VNODE); 1805 vp->v_specinfo = NULL; 1806 } 1807 1808 /* 1809 * If it is on the freelist and not already at the head, 1810 * move it to the head of the list. The test of the back 1811 * pointer and the reference count of zero is because 1812 * it will be removed from the free list by getnewvnode, 1813 * but will not have its reference count incremented until 1814 * after calling vgone. If the reference count were 1815 * incremented first, vgone would (incorrectly) try to 1816 * close the previous instance of the underlying object. 1817 */ 1818 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1819 s = splbio(); 1820 simple_lock(&vnode_free_list_slock); 1821 if (vp->v_flag & VFREE) { 1822 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1823 } else if (vp->v_flag & VTBFREE) { 1824 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1825 vp->v_flag &= ~VTBFREE; 1826 freevnodes++; 1827 } else 1828 freevnodes++; 1829 vp->v_flag |= VFREE; 1830 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1831 simple_unlock(&vnode_free_list_slock); 1832 splx(s); 1833 } 1834 1835 vp->v_type = VBAD; 1836 simple_unlock(&vp->v_interlock); 1837 } 1838 1839 /* 1840 * Lookup a vnode by device number. 1841 */ 1842 int 1843 vfinddev(dev, type, vpp) 1844 dev_t dev; 1845 enum vtype type; 1846 struct vnode **vpp; 1847 { 1848 register struct vnode *vp; 1849 int rc = 0; 1850 1851 simple_lock(&spechash_slock); 1852 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1853 if (dev != vp->v_rdev || type != vp->v_type) 1854 continue; 1855 *vpp = vp; 1856 rc = 1; 1857 break; 1858 } 1859 simple_unlock(&spechash_slock); 1860 return (rc); 1861 } 1862 1863 /* 1864 * Calculate the total number of references to a special device. 1865 */ 1866 int 1867 vcount(vp) 1868 register struct vnode *vp; 1869 { 1870 struct vnode *vq, *vnext; 1871 int count; 1872 1873 loop: 1874 if ((vp->v_flag & VALIASED) == 0) 1875 return (vp->v_usecount); 1876 simple_lock(&spechash_slock); 1877 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1878 vnext = vq->v_specnext; 1879 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1880 continue; 1881 /* 1882 * Alias, but not in use, so flush it out. 1883 */ 1884 if (vq->v_usecount == 0 && vq != vp) { 1885 simple_unlock(&spechash_slock); 1886 vgone(vq); 1887 goto loop; 1888 } 1889 count += vq->v_usecount; 1890 } 1891 simple_unlock(&spechash_slock); 1892 return (count); 1893 } 1894 /* 1895 * Print out a description of a vnode. 1896 */ 1897 static char *typename[] = 1898 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1899 1900 void 1901 vprint(label, vp) 1902 char *label; 1903 register struct vnode *vp; 1904 { 1905 char buf[64]; 1906 1907 if (label != NULL) 1908 printf("%s: %p: ", label, (void *)vp); 1909 else 1910 printf("%p: ", (void *)vp); 1911 printf("type %s, usecount %d, writecount %d, refcount %d,", 1912 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1913 vp->v_holdcnt); 1914 buf[0] = '\0'; 1915 if (vp->v_flag & VROOT) 1916 strcat(buf, "|VROOT"); 1917 if (vp->v_flag & VTEXT) 1918 strcat(buf, "|VTEXT"); 1919 if (vp->v_flag & VSYSTEM) 1920 strcat(buf, "|VSYSTEM"); 1921 if (vp->v_flag & VXLOCK) 1922 strcat(buf, "|VXLOCK"); 1923 if (vp->v_flag & VXWANT) 1924 strcat(buf, "|VXWANT"); 1925 if (vp->v_flag & VBWAIT) 1926 strcat(buf, "|VBWAIT"); 1927 if (vp->v_flag & VALIASED) 1928 strcat(buf, "|VALIASED"); 1929 if (vp->v_flag & VDOOMED) 1930 strcat(buf, "|VDOOMED"); 1931 if (vp->v_flag & VFREE) 1932 strcat(buf, "|VFREE"); 1933 if (vp->v_flag & VOBJBUF) 1934 strcat(buf, "|VOBJBUF"); 1935 if (buf[0] != '\0') 1936 printf(" flags (%s)", &buf[1]); 1937 if (vp->v_data == NULL) { 1938 printf("\n"); 1939 } else { 1940 printf("\n\t"); 1941 VOP_PRINT(vp); 1942 } 1943 } 1944 1945 #ifdef DDB 1946 #include <ddb/ddb.h> 1947 /* 1948 * List all of the locked vnodes in the system. 1949 * Called when debugging the kernel. 1950 */ 1951 DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1952 { 1953 struct proc *p = curproc; /* XXX */ 1954 struct mount *mp, *nmp; 1955 struct vnode *vp; 1956 1957 printf("Locked vnodes\n"); 1958 simple_lock(&mountlist_slock); 1959 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1960 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1961 nmp = mp->mnt_list.cqe_next; 1962 continue; 1963 } 1964 for (vp = mp->mnt_vnodelist.lh_first; 1965 vp != NULL; 1966 vp = vp->v_mntvnodes.le_next) { 1967 if (VOP_ISLOCKED(vp)) 1968 vprint((char *)0, vp); 1969 } 1970 simple_lock(&mountlist_slock); 1971 nmp = mp->mnt_list.cqe_next; 1972 vfs_unbusy(mp, p); 1973 } 1974 simple_unlock(&mountlist_slock); 1975 } 1976 #endif 1977 1978 /* 1979 * Top level filesystem related information gathering. 1980 */ 1981 static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1982 1983 static int 1984 vfs_sysctl SYSCTL_HANDLER_ARGS 1985 { 1986 int *name = (int *)arg1 - 1; /* XXX */ 1987 u_int namelen = arg2 + 1; /* XXX */ 1988 struct vfsconf *vfsp; 1989 1990 #if 1 || defined(COMPAT_PRELITE2) 1991 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1992 if (namelen == 1) 1993 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1994 #endif 1995 1996 #ifdef notyet 1997 /* all sysctl names at this level are at least name and field */ 1998 if (namelen < 2) 1999 return (ENOTDIR); /* overloaded */ 2000 if (name[0] != VFS_GENERIC) { 2001 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2002 if (vfsp->vfc_typenum == name[0]) 2003 break; 2004 if (vfsp == NULL) 2005 return (EOPNOTSUPP); 2006 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2007 oldp, oldlenp, newp, newlen, p)); 2008 } 2009 #endif 2010 switch (name[1]) { 2011 case VFS_MAXTYPENUM: 2012 if (namelen != 2) 2013 return (ENOTDIR); 2014 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2015 case VFS_CONF: 2016 if (namelen != 3) 2017 return (ENOTDIR); /* overloaded */ 2018 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2019 if (vfsp->vfc_typenum == name[2]) 2020 break; 2021 if (vfsp == NULL) 2022 return (EOPNOTSUPP); 2023 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2024 } 2025 return (EOPNOTSUPP); 2026 } 2027 2028 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2029 "Generic filesystem"); 2030 2031 #if 1 || defined(COMPAT_PRELITE2) 2032 2033 static int 2034 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2035 { 2036 int error; 2037 struct vfsconf *vfsp; 2038 struct ovfsconf ovfs; 2039 2040 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2041 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2042 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2043 ovfs.vfc_index = vfsp->vfc_typenum; 2044 ovfs.vfc_refcount = vfsp->vfc_refcount; 2045 ovfs.vfc_flags = vfsp->vfc_flags; 2046 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2047 if (error) 2048 return error; 2049 } 2050 return 0; 2051 } 2052 2053 #endif /* 1 || COMPAT_PRELITE2 */ 2054 2055 #if 0 2056 #define KINFO_VNODESLOP 10 2057 /* 2058 * Dump vnode list (via sysctl). 2059 * Copyout address of vnode followed by vnode. 2060 */ 2061 /* ARGSUSED */ 2062 static int 2063 sysctl_vnode SYSCTL_HANDLER_ARGS 2064 { 2065 struct proc *p = curproc; /* XXX */ 2066 struct mount *mp, *nmp; 2067 struct vnode *nvp, *vp; 2068 int error; 2069 2070 #define VPTRSZ sizeof (struct vnode *) 2071 #define VNODESZ sizeof (struct vnode) 2072 2073 req->lock = 0; 2074 if (!req->oldptr) /* Make an estimate */ 2075 return (SYSCTL_OUT(req, 0, 2076 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2077 2078 simple_lock(&mountlist_slock); 2079 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2080 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2081 nmp = mp->mnt_list.cqe_next; 2082 continue; 2083 } 2084 again: 2085 simple_lock(&mntvnode_slock); 2086 for (vp = mp->mnt_vnodelist.lh_first; 2087 vp != NULL; 2088 vp = nvp) { 2089 /* 2090 * Check that the vp is still associated with 2091 * this filesystem. RACE: could have been 2092 * recycled onto the same filesystem. 2093 */ 2094 if (vp->v_mount != mp) { 2095 simple_unlock(&mntvnode_slock); 2096 goto again; 2097 } 2098 nvp = vp->v_mntvnodes.le_next; 2099 simple_unlock(&mntvnode_slock); 2100 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2101 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2102 return (error); 2103 simple_lock(&mntvnode_slock); 2104 } 2105 simple_unlock(&mntvnode_slock); 2106 simple_lock(&mountlist_slock); 2107 nmp = mp->mnt_list.cqe_next; 2108 vfs_unbusy(mp, p); 2109 } 2110 simple_unlock(&mountlist_slock); 2111 2112 return (0); 2113 } 2114 #endif 2115 2116 /* 2117 * XXX 2118 * Exporting the vnode list on large systems causes them to crash. 2119 * Exporting the vnode list on medium systems causes sysctl to coredump. 2120 */ 2121 #if 0 2122 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2123 0, 0, sysctl_vnode, "S,vnode", ""); 2124 #endif 2125 2126 /* 2127 * Check to see if a filesystem is mounted on a block device. 2128 */ 2129 int 2130 vfs_mountedon(vp) 2131 struct vnode *vp; 2132 { 2133 struct vnode *vq; 2134 int error = 0; 2135 2136 if (vp->v_specmountpoint != NULL) 2137 return (EBUSY); 2138 if (vp->v_flag & VALIASED) { 2139 simple_lock(&spechash_slock); 2140 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2141 if (vq->v_rdev != vp->v_rdev || 2142 vq->v_type != vp->v_type) 2143 continue; 2144 if (vq->v_specmountpoint != NULL) { 2145 error = EBUSY; 2146 break; 2147 } 2148 } 2149 simple_unlock(&spechash_slock); 2150 } 2151 return (error); 2152 } 2153 2154 /* 2155 * Unmount all filesystems. The list is traversed in reverse order 2156 * of mounting to avoid dependencies. 2157 */ 2158 void 2159 vfs_unmountall() 2160 { 2161 struct mount *mp, *nmp; 2162 struct proc *p; 2163 int error; 2164 2165 if (curproc != NULL) 2166 p = curproc; 2167 else 2168 p = initproc; /* XXX XXX should this be proc0? */ 2169 /* 2170 * Since this only runs when rebooting, it is not interlocked. 2171 */ 2172 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2173 nmp = mp->mnt_list.cqe_prev; 2174 error = dounmount(mp, MNT_FORCE, p); 2175 if (error) { 2176 printf("unmount of %s failed (", 2177 mp->mnt_stat.f_mntonname); 2178 if (error == EBUSY) 2179 printf("BUSY)\n"); 2180 else 2181 printf("%d)\n", error); 2182 } 2183 } 2184 } 2185 2186 /* 2187 * Build hash lists of net addresses and hang them off the mount point. 2188 * Called by ufs_mount() to set up the lists of export addresses. 2189 */ 2190 static int 2191 vfs_hang_addrlist(mp, nep, argp) 2192 struct mount *mp; 2193 struct netexport *nep; 2194 struct export_args *argp; 2195 { 2196 register struct netcred *np; 2197 register struct radix_node_head *rnh; 2198 register int i; 2199 struct radix_node *rn; 2200 struct sockaddr *saddr, *smask = 0; 2201 struct domain *dom; 2202 int error; 2203 2204 if (argp->ex_addrlen == 0) { 2205 if (mp->mnt_flag & MNT_DEFEXPORTED) 2206 return (EPERM); 2207 np = &nep->ne_defexported; 2208 np->netc_exflags = argp->ex_flags; 2209 np->netc_anon = argp->ex_anon; 2210 np->netc_anon.cr_ref = 1; 2211 mp->mnt_flag |= MNT_DEFEXPORTED; 2212 return (0); 2213 } 2214 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2215 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2216 bzero((caddr_t) np, i); 2217 saddr = (struct sockaddr *) (np + 1); 2218 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2219 goto out; 2220 if (saddr->sa_len > argp->ex_addrlen) 2221 saddr->sa_len = argp->ex_addrlen; 2222 if (argp->ex_masklen) { 2223 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2224 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2225 if (error) 2226 goto out; 2227 if (smask->sa_len > argp->ex_masklen) 2228 smask->sa_len = argp->ex_masklen; 2229 } 2230 i = saddr->sa_family; 2231 if ((rnh = nep->ne_rtable[i]) == 0) { 2232 /* 2233 * Seems silly to initialize every AF when most are not used, 2234 * do so on demand here 2235 */ 2236 for (dom = domains; dom; dom = dom->dom_next) 2237 if (dom->dom_family == i && dom->dom_rtattach) { 2238 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2239 dom->dom_rtoffset); 2240 break; 2241 } 2242 if ((rnh = nep->ne_rtable[i]) == 0) { 2243 error = ENOBUFS; 2244 goto out; 2245 } 2246 } 2247 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2248 np->netc_rnodes); 2249 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2250 error = EPERM; 2251 goto out; 2252 } 2253 np->netc_exflags = argp->ex_flags; 2254 np->netc_anon = argp->ex_anon; 2255 np->netc_anon.cr_ref = 1; 2256 return (0); 2257 out: 2258 free(np, M_NETADDR); 2259 return (error); 2260 } 2261 2262 /* ARGSUSED */ 2263 static int 2264 vfs_free_netcred(rn, w) 2265 struct radix_node *rn; 2266 void *w; 2267 { 2268 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2269 2270 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2271 free((caddr_t) rn, M_NETADDR); 2272 return (0); 2273 } 2274 2275 /* 2276 * Free the net address hash lists that are hanging off the mount points. 2277 */ 2278 static void 2279 vfs_free_addrlist(nep) 2280 struct netexport *nep; 2281 { 2282 register int i; 2283 register struct radix_node_head *rnh; 2284 2285 for (i = 0; i <= AF_MAX; i++) 2286 if ((rnh = nep->ne_rtable[i])) { 2287 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2288 (caddr_t) rnh); 2289 free((caddr_t) rnh, M_RTABLE); 2290 nep->ne_rtable[i] = 0; 2291 } 2292 } 2293 2294 int 2295 vfs_export(mp, nep, argp) 2296 struct mount *mp; 2297 struct netexport *nep; 2298 struct export_args *argp; 2299 { 2300 int error; 2301 2302 if (argp->ex_flags & MNT_DELEXPORT) { 2303 if (mp->mnt_flag & MNT_EXPUBLIC) { 2304 vfs_setpublicfs(NULL, NULL, NULL); 2305 mp->mnt_flag &= ~MNT_EXPUBLIC; 2306 } 2307 vfs_free_addrlist(nep); 2308 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2309 } 2310 if (argp->ex_flags & MNT_EXPORTED) { 2311 if (argp->ex_flags & MNT_EXPUBLIC) { 2312 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2313 return (error); 2314 mp->mnt_flag |= MNT_EXPUBLIC; 2315 } 2316 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2317 return (error); 2318 mp->mnt_flag |= MNT_EXPORTED; 2319 } 2320 return (0); 2321 } 2322 2323 2324 /* 2325 * Set the publicly exported filesystem (WebNFS). Currently, only 2326 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2327 */ 2328 int 2329 vfs_setpublicfs(mp, nep, argp) 2330 struct mount *mp; 2331 struct netexport *nep; 2332 struct export_args *argp; 2333 { 2334 int error; 2335 struct vnode *rvp; 2336 char *cp; 2337 2338 /* 2339 * mp == NULL -> invalidate the current info, the FS is 2340 * no longer exported. May be called from either vfs_export 2341 * or unmount, so check if it hasn't already been done. 2342 */ 2343 if (mp == NULL) { 2344 if (nfs_pub.np_valid) { 2345 nfs_pub.np_valid = 0; 2346 if (nfs_pub.np_index != NULL) { 2347 FREE(nfs_pub.np_index, M_TEMP); 2348 nfs_pub.np_index = NULL; 2349 } 2350 } 2351 return (0); 2352 } 2353 2354 /* 2355 * Only one allowed at a time. 2356 */ 2357 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2358 return (EBUSY); 2359 2360 /* 2361 * Get real filehandle for root of exported FS. 2362 */ 2363 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2364 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2365 2366 if ((error = VFS_ROOT(mp, &rvp))) 2367 return (error); 2368 2369 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2370 return (error); 2371 2372 vput(rvp); 2373 2374 /* 2375 * If an indexfile was specified, pull it in. 2376 */ 2377 if (argp->ex_indexfile != NULL) { 2378 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2379 M_WAITOK); 2380 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2381 MAXNAMLEN, (size_t *)0); 2382 if (!error) { 2383 /* 2384 * Check for illegal filenames. 2385 */ 2386 for (cp = nfs_pub.np_index; *cp; cp++) { 2387 if (*cp == '/') { 2388 error = EINVAL; 2389 break; 2390 } 2391 } 2392 } 2393 if (error) { 2394 FREE(nfs_pub.np_index, M_TEMP); 2395 return (error); 2396 } 2397 } 2398 2399 nfs_pub.np_mount = mp; 2400 nfs_pub.np_valid = 1; 2401 return (0); 2402 } 2403 2404 struct netcred * 2405 vfs_export_lookup(mp, nep, nam) 2406 register struct mount *mp; 2407 struct netexport *nep; 2408 struct sockaddr *nam; 2409 { 2410 register struct netcred *np; 2411 register struct radix_node_head *rnh; 2412 struct sockaddr *saddr; 2413 2414 np = NULL; 2415 if (mp->mnt_flag & MNT_EXPORTED) { 2416 /* 2417 * Lookup in the export list first. 2418 */ 2419 if (nam != NULL) { 2420 saddr = nam; 2421 rnh = nep->ne_rtable[saddr->sa_family]; 2422 if (rnh != NULL) { 2423 np = (struct netcred *) 2424 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2425 rnh); 2426 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2427 np = NULL; 2428 } 2429 } 2430 /* 2431 * If no address match, use the default if it exists. 2432 */ 2433 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2434 np = &nep->ne_defexported; 2435 } 2436 return (np); 2437 } 2438 2439 /* 2440 * perform msync on all vnodes under a mount point 2441 * the mount point must be locked. 2442 */ 2443 void 2444 vfs_msync(struct mount *mp, int flags) { 2445 struct vnode *vp, *nvp; 2446 struct vm_object *obj; 2447 int anyio, tries; 2448 2449 tries = 5; 2450 loop: 2451 anyio = 0; 2452 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2453 2454 nvp = vp->v_mntvnodes.le_next; 2455 2456 if (vp->v_mount != mp) { 2457 goto loop; 2458 } 2459 2460 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2461 continue; 2462 2463 if (flags != MNT_WAIT) { 2464 obj = vp->v_object; 2465 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2466 continue; 2467 if (VOP_ISLOCKED(vp)) 2468 continue; 2469 } 2470 2471 simple_lock(&vp->v_interlock); 2472 if (vp->v_object && 2473 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2474 if (!vget(vp, 2475 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2476 if (vp->v_object) { 2477 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2478 anyio = 1; 2479 } 2480 vput(vp); 2481 } 2482 } else { 2483 simple_unlock(&vp->v_interlock); 2484 } 2485 } 2486 if (anyio && (--tries > 0)) 2487 goto loop; 2488 } 2489 2490 /* 2491 * Create the VM object needed for VMIO and mmap support. This 2492 * is done for all VREG files in the system. Some filesystems might 2493 * afford the additional metadata buffering capability of the 2494 * VMIO code by making the device node be VMIO mode also. 2495 * 2496 * If !waslocked, must be called with interlock. 2497 */ 2498 int 2499 vfs_object_create(vp, p, cred, waslocked) 2500 struct vnode *vp; 2501 struct proc *p; 2502 struct ucred *cred; 2503 int waslocked; 2504 { 2505 struct vattr vat; 2506 vm_object_t object; 2507 int error = 0; 2508 2509 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) { 2510 if (!waslocked) 2511 simple_unlock(&vp->v_interlock); 2512 return 0; 2513 } 2514 2515 if (!waslocked) 2516 vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p); 2517 2518 retry: 2519 if ((object = vp->v_object) == NULL) { 2520 if (vp->v_type == VREG) { 2521 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2522 goto retn; 2523 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2524 } else if (major(vp->v_rdev) < nblkdev && 2525 bdevsw[major(vp->v_rdev)] != NULL) { 2526 /* 2527 * This simply allocates the biggest object possible 2528 * for a VBLK vnode. This should be fixed, but doesn't 2529 * cause any problems (yet). 2530 */ 2531 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2532 } 2533 object->ref_count--; 2534 vp->v_usecount--; 2535 } else { 2536 if (object->flags & OBJ_DEAD) { 2537 VOP_UNLOCK(vp, 0, p); 2538 tsleep(object, PVM, "vodead", 0); 2539 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2540 goto retry; 2541 } 2542 } 2543 2544 if (vp->v_object) { 2545 vp->v_flag |= VOBJBUF; 2546 } 2547 2548 retn: 2549 if (!waslocked) { 2550 simple_lock(&vp->v_interlock); 2551 VOP_UNLOCK(vp, LK_INTERLOCK, p); 2552 } 2553 2554 return error; 2555 } 2556 2557 static void 2558 vfree(vp) 2559 struct vnode *vp; 2560 { 2561 int s; 2562 2563 s = splbio(); 2564 simple_lock(&vnode_free_list_slock); 2565 if (vp->v_flag & VTBFREE) { 2566 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2567 vp->v_flag &= ~VTBFREE; 2568 } 2569 if (vp->v_flag & VAGE) { 2570 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2571 } else { 2572 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2573 } 2574 freevnodes++; 2575 simple_unlock(&vnode_free_list_slock); 2576 vp->v_flag &= ~VAGE; 2577 vp->v_flag |= VFREE; 2578 splx(s); 2579 } 2580 2581 void 2582 vbusy(vp) 2583 struct vnode *vp; 2584 { 2585 int s; 2586 2587 s = splbio(); 2588 simple_lock(&vnode_free_list_slock); 2589 if (vp->v_flag & VTBFREE) { 2590 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2591 vp->v_flag &= ~VTBFREE; 2592 } else { 2593 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2594 freevnodes--; 2595 } 2596 simple_unlock(&vnode_free_list_slock); 2597 vp->v_flag &= ~(VFREE|VAGE); 2598 splx(s); 2599 } 2600 2601 /* 2602 * Record a process's interest in events which might happen to 2603 * a vnode. Because poll uses the historic select-style interface 2604 * internally, this routine serves as both the ``check for any 2605 * pending events'' and the ``record my interest in future events'' 2606 * functions. (These are done together, while the lock is held, 2607 * to avoid race conditions.) 2608 */ 2609 int 2610 vn_pollrecord(vp, p, events) 2611 struct vnode *vp; 2612 struct proc *p; 2613 short events; 2614 { 2615 simple_lock(&vp->v_pollinfo.vpi_lock); 2616 if (vp->v_pollinfo.vpi_revents & events) { 2617 /* 2618 * This leaves events we are not interested 2619 * in available for the other process which 2620 * which presumably had requested them 2621 * (otherwise they would never have been 2622 * recorded). 2623 */ 2624 events &= vp->v_pollinfo.vpi_revents; 2625 vp->v_pollinfo.vpi_revents &= ~events; 2626 2627 simple_unlock(&vp->v_pollinfo.vpi_lock); 2628 return events; 2629 } 2630 vp->v_pollinfo.vpi_events |= events; 2631 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2632 simple_unlock(&vp->v_pollinfo.vpi_lock); 2633 return 0; 2634 } 2635 2636 /* 2637 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2638 * it is possible for us to miss an event due to race conditions, but 2639 * that condition is expected to be rare, so for the moment it is the 2640 * preferred interface. 2641 */ 2642 void 2643 vn_pollevent(vp, events) 2644 struct vnode *vp; 2645 short events; 2646 { 2647 simple_lock(&vp->v_pollinfo.vpi_lock); 2648 if (vp->v_pollinfo.vpi_events & events) { 2649 /* 2650 * We clear vpi_events so that we don't 2651 * call selwakeup() twice if two events are 2652 * posted before the polling process(es) is 2653 * awakened. This also ensures that we take at 2654 * most one selwakeup() if the polling process 2655 * is no longer interested. However, it does 2656 * mean that only one event can be noticed at 2657 * a time. (Perhaps we should only clear those 2658 * event bits which we note?) XXX 2659 */ 2660 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2661 vp->v_pollinfo.vpi_revents |= events; 2662 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2663 } 2664 simple_unlock(&vp->v_pollinfo.vpi_lock); 2665 } 2666 2667 /* 2668 * Wake up anyone polling on vp because it is being revoked. 2669 * This depends on dead_poll() returning POLLHUP for correct 2670 * behavior. 2671 */ 2672 void 2673 vn_pollgone(vp) 2674 struct vnode *vp; 2675 { 2676 simple_lock(&vp->v_pollinfo.vpi_lock); 2677 if (vp->v_pollinfo.vpi_events) { 2678 vp->v_pollinfo.vpi_events = 0; 2679 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2680 } 2681 simple_unlock(&vp->v_pollinfo.vpi_lock); 2682 } 2683 2684 2685 2686 /* 2687 * Routine to create and manage a filesystem syncer vnode. 2688 */ 2689 #define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2690 int sync_fsync __P((struct vop_fsync_args *)); 2691 int sync_inactive __P((struct vop_inactive_args *)); 2692 int sync_reclaim __P((struct vop_reclaim_args *)); 2693 #define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2694 #define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2695 int sync_print __P((struct vop_print_args *)); 2696 #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2697 2698 vop_t **sync_vnodeop_p; 2699 struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2700 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2701 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2702 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2703 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2704 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2705 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2706 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2707 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2708 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2709 { NULL, NULL } 2710 }; 2711 struct vnodeopv_desc sync_vnodeop_opv_desc = 2712 { &sync_vnodeop_p, sync_vnodeop_entries }; 2713 2714 VNODEOP_SET(sync_vnodeop_opv_desc); 2715 2716 /* 2717 * Create a new filesystem syncer vnode for the specified mount point. 2718 */ 2719 int 2720 vfs_allocate_syncvnode(mp) 2721 struct mount *mp; 2722 { 2723 struct vnode *vp; 2724 static long start, incr, next; 2725 int error; 2726 2727 /* Allocate a new vnode */ 2728 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2729 mp->mnt_syncer = NULL; 2730 return (error); 2731 } 2732 vp->v_type = VNON; 2733 /* 2734 * Place the vnode onto the syncer worklist. We attempt to 2735 * scatter them about on the list so that they will go off 2736 * at evenly distributed times even if all the filesystems 2737 * are mounted at once. 2738 */ 2739 next += incr; 2740 if (next == 0 || next > syncer_maxdelay) { 2741 start /= 2; 2742 incr /= 2; 2743 if (start == 0) { 2744 start = syncer_maxdelay / 2; 2745 incr = syncer_maxdelay; 2746 } 2747 next = start; 2748 } 2749 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2750 mp->mnt_syncer = vp; 2751 return (0); 2752 } 2753 2754 /* 2755 * Do a lazy sync of the filesystem. 2756 */ 2757 int 2758 sync_fsync(ap) 2759 struct vop_fsync_args /* { 2760 struct vnode *a_vp; 2761 struct ucred *a_cred; 2762 int a_waitfor; 2763 struct proc *a_p; 2764 } */ *ap; 2765 { 2766 struct vnode *syncvp = ap->a_vp; 2767 struct mount *mp = syncvp->v_mount; 2768 struct proc *p = ap->a_p; 2769 int asyncflag; 2770 2771 /* 2772 * We only need to do something if this is a lazy evaluation. 2773 */ 2774 if (ap->a_waitfor != MNT_LAZY) 2775 return (0); 2776 2777 /* 2778 * Move ourselves to the back of the sync list. 2779 */ 2780 vn_syncer_add_to_worklist(syncvp, syncdelay); 2781 2782 /* 2783 * Walk the list of vnodes pushing all that are dirty and 2784 * not already on the sync list. 2785 */ 2786 simple_lock(&mountlist_slock); 2787 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2788 simple_unlock(&mountlist_slock); 2789 return (0); 2790 } 2791 asyncflag = mp->mnt_flag & MNT_ASYNC; 2792 mp->mnt_flag &= ~MNT_ASYNC; 2793 vfs_msync(mp, MNT_NOWAIT); 2794 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2795 if (asyncflag) 2796 mp->mnt_flag |= MNT_ASYNC; 2797 vfs_unbusy(mp, p); 2798 return (0); 2799 } 2800 2801 /* 2802 * The syncer vnode is no referenced. 2803 */ 2804 int 2805 sync_inactive(ap) 2806 struct vop_inactive_args /* { 2807 struct vnode *a_vp; 2808 struct proc *a_p; 2809 } */ *ap; 2810 { 2811 2812 vgone(ap->a_vp); 2813 return (0); 2814 } 2815 2816 /* 2817 * The syncer vnode is no longer needed and is being decommissioned. 2818 */ 2819 int 2820 sync_reclaim(ap) 2821 struct vop_reclaim_args /* { 2822 struct vnode *a_vp; 2823 } */ *ap; 2824 { 2825 struct vnode *vp = ap->a_vp; 2826 2827 vp->v_mount->mnt_syncer = NULL; 2828 if (vp->v_flag & VONWORKLST) { 2829 LIST_REMOVE(vp, v_synclist); 2830 vp->v_flag &= ~VONWORKLST; 2831 } 2832 2833 return (0); 2834 } 2835 2836 /* 2837 * Print out a syncer vnode. 2838 */ 2839 int 2840 sync_print(ap) 2841 struct vop_print_args /* { 2842 struct vnode *a_vp; 2843 } */ *ap; 2844 { 2845 struct vnode *vp = ap->a_vp; 2846 2847 printf("syncer vnode"); 2848 if (vp->v_vnlock != NULL) 2849 lockmgr_printinfo(vp->v_vnlock); 2850 printf("\n"); 2851 return (0); 2852 } 2853