1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $FreeBSD$ 40 */ 41 42 /* 43 * External virtual filesystem routines 44 */ 45 #include "opt_ddb.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/fcntl.h> 50 #include <sys/kernel.h> 51 #include <sys/proc.h> 52 #include <sys/kthread.h> 53 #include <sys/malloc.h> 54 #include <sys/mount.h> 55 #include <sys/socket.h> 56 #include <sys/vnode.h> 57 #include <sys/stat.h> 58 #include <sys/buf.h> 59 #include <sys/domain.h> 60 #include <sys/dirent.h> 61 #include <sys/vmmeter.h> 62 #include <sys/conf.h> 63 64 #include <machine/limits.h> 65 66 #include <vm/vm.h> 67 #include <vm/vm_param.h> 68 #include <vm/vm_prot.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_extern.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_page.h> 74 #include <vm/vm_pager.h> 75 #include <vm/vnode_pager.h> 76 #include <vm/vm_zone.h> 77 #include <sys/sysctl.h> 78 79 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 80 81 static void insmntque __P((struct vnode *vp, struct mount *mp)); 82 static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 83 static void vfree __P((struct vnode *)); 84 static void vgonel __P((struct vnode *vp, struct proc *p)); 85 static unsigned long numvnodes; 86 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 87 88 enum vtype iftovt_tab[16] = { 89 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 90 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 91 }; 92 int vttoif_tab[9] = { 93 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 94 S_IFSOCK, S_IFIFO, S_IFMT, 95 }; 96 97 static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 98 struct tobefreelist vnode_tobefree_list; /* vnode free list */ 99 100 static u_long wantfreevnodes = 25; 101 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 102 static u_long freevnodes = 0; 103 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 104 105 static int reassignbufcalls; 106 SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); 107 static int reassignbufloops; 108 SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, ""); 109 static int reassignbufsortgood; 110 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, ""); 111 static int reassignbufsortbad; 112 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, ""); 113 static int reassignbufmethod = 1; 114 SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, ""); 115 116 #ifdef ENABLE_VFS_IOOPT 117 int vfs_ioopt = 0; 118 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 119 #endif 120 121 struct mntlist mountlist; /* mounted filesystem list */ 122 struct simplelock mountlist_slock; 123 struct simplelock mntvnode_slock; 124 int nfs_mount_type = -1; 125 #ifndef NULL_SIMPLELOCKS 126 static struct simplelock mntid_slock; 127 static struct simplelock vnode_free_list_slock; 128 static struct simplelock spechash_slock; 129 #endif 130 struct nfs_public nfs_pub; /* publicly exported FS */ 131 static vm_zone_t vnode_zone; 132 133 /* 134 * The workitem queue. 135 */ 136 #define SYNCER_MAXDELAY 32 137 static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 138 time_t syncdelay = 30; /* max time to delay syncing data */ 139 time_t filedelay = 30; /* time to delay syncing files */ 140 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); 141 time_t dirdelay = 29; /* time to delay syncing directories */ 142 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); 143 time_t metadelay = 28; /* time to delay syncing metadata */ 144 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); 145 static int rushjob; /* number of slots to run ASAP */ 146 static int stat_rush_requests; /* number of times I/O speeded up */ 147 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); 148 149 static int syncer_delayno = 0; 150 static long syncer_mask; 151 LIST_HEAD(synclist, vnode); 152 static struct synclist *syncer_workitem_pending; 153 154 int desiredvnodes; 155 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 156 &desiredvnodes, 0, "Maximum number of vnodes"); 157 158 static void vfs_free_addrlist __P((struct netexport *nep)); 159 static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 160 static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 161 struct export_args *argp)); 162 163 /* 164 * Initialize the vnode management data structures. 165 */ 166 void 167 vntblinit() 168 { 169 170 desiredvnodes = maxproc + cnt.v_page_count / 4; 171 simple_lock_init(&mntvnode_slock); 172 simple_lock_init(&mntid_slock); 173 simple_lock_init(&spechash_slock); 174 TAILQ_INIT(&vnode_free_list); 175 TAILQ_INIT(&vnode_tobefree_list); 176 simple_lock_init(&vnode_free_list_slock); 177 CIRCLEQ_INIT(&mountlist); 178 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 179 /* 180 * Initialize the filesystem syncer. 181 */ 182 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 183 &syncer_mask); 184 syncer_maxdelay = syncer_mask + 1; 185 } 186 187 /* 188 * Mark a mount point as busy. Used to synchronize access and to delay 189 * unmounting. Interlock is not released on failure. 190 */ 191 int 192 vfs_busy(mp, flags, interlkp, p) 193 struct mount *mp; 194 int flags; 195 struct simplelock *interlkp; 196 struct proc *p; 197 { 198 int lkflags; 199 200 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 201 if (flags & LK_NOWAIT) 202 return (ENOENT); 203 mp->mnt_kern_flag |= MNTK_MWAIT; 204 if (interlkp) { 205 simple_unlock(interlkp); 206 } 207 /* 208 * Since all busy locks are shared except the exclusive 209 * lock granted when unmounting, the only place that a 210 * wakeup needs to be done is at the release of the 211 * exclusive lock at the end of dounmount. 212 */ 213 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 214 if (interlkp) { 215 simple_lock(interlkp); 216 } 217 return (ENOENT); 218 } 219 lkflags = LK_SHARED | LK_NOPAUSE; 220 if (interlkp) 221 lkflags |= LK_INTERLOCK; 222 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 223 panic("vfs_busy: unexpected lock failure"); 224 return (0); 225 } 226 227 /* 228 * Free a busy filesystem. 229 */ 230 void 231 vfs_unbusy(mp, p) 232 struct mount *mp; 233 struct proc *p; 234 { 235 236 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 237 } 238 239 /* 240 * Lookup a filesystem type, and if found allocate and initialize 241 * a mount structure for it. 242 * 243 * Devname is usually updated by mount(8) after booting. 244 */ 245 int 246 vfs_rootmountalloc(fstypename, devname, mpp) 247 char *fstypename; 248 char *devname; 249 struct mount **mpp; 250 { 251 struct proc *p = curproc; /* XXX */ 252 struct vfsconf *vfsp; 253 struct mount *mp; 254 255 if (fstypename == NULL) 256 return (ENODEV); 257 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 258 if (!strcmp(vfsp->vfc_name, fstypename)) 259 break; 260 if (vfsp == NULL) 261 return (ENODEV); 262 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 263 bzero((char *)mp, (u_long)sizeof(struct mount)); 264 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 265 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 266 LIST_INIT(&mp->mnt_vnodelist); 267 mp->mnt_vfc = vfsp; 268 mp->mnt_op = vfsp->vfc_vfsops; 269 mp->mnt_flag = MNT_RDONLY; 270 mp->mnt_vnodecovered = NULLVP; 271 vfsp->vfc_refcount++; 272 mp->mnt_stat.f_type = vfsp->vfc_typenum; 273 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 274 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 275 mp->mnt_stat.f_mntonname[0] = '/'; 276 mp->mnt_stat.f_mntonname[1] = 0; 277 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 278 *mpp = mp; 279 return (0); 280 } 281 282 /* 283 * Find an appropriate filesystem to use for the root. If a filesystem 284 * has not been preselected, walk through the list of known filesystems 285 * trying those that have mountroot routines, and try them until one 286 * works or we have tried them all. 287 */ 288 #ifdef notdef /* XXX JH */ 289 int 290 lite2_vfs_mountroot() 291 { 292 struct vfsconf *vfsp; 293 extern int (*lite2_mountroot) __P((void)); 294 int error; 295 296 if (lite2_mountroot != NULL) 297 return ((*lite2_mountroot)()); 298 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 299 if (vfsp->vfc_mountroot == NULL) 300 continue; 301 if ((error = (*vfsp->vfc_mountroot)()) == 0) 302 return (0); 303 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 304 } 305 return (ENODEV); 306 } 307 #endif 308 309 /* 310 * Lookup a mount point by filesystem identifier. 311 */ 312 struct mount * 313 vfs_getvfs(fsid) 314 fsid_t *fsid; 315 { 316 register struct mount *mp; 317 318 simple_lock(&mountlist_slock); 319 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 320 mp = mp->mnt_list.cqe_next) { 321 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 322 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 323 simple_unlock(&mountlist_slock); 324 return (mp); 325 } 326 } 327 simple_unlock(&mountlist_slock); 328 return ((struct mount *) 0); 329 } 330 331 /* 332 * Get a new unique fsid 333 */ 334 void 335 vfs_getnewfsid(mp) 336 struct mount *mp; 337 { 338 static u_short xxxfs_mntid; 339 340 fsid_t tfsid; 341 int mtype; 342 343 simple_lock(&mntid_slock); 344 mtype = mp->mnt_vfc->vfc_typenum; 345 mp->mnt_stat.f_fsid.val[0] = makeudev(255, mtype); 346 mp->mnt_stat.f_fsid.val[1] = mtype; 347 if (xxxfs_mntid == 0) 348 ++xxxfs_mntid; 349 tfsid.val[0] = makeudev(255, mtype + (xxxfs_mntid << 16)); 350 tfsid.val[1] = mtype; 351 if (mountlist.cqh_first != (void *)&mountlist) { 352 while (vfs_getvfs(&tfsid)) { 353 xxxfs_mntid++; 354 tfsid.val[0] = makeudev(255, 355 mtype + (xxxfs_mntid << 16)); 356 } 357 } 358 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 359 simple_unlock(&mntid_slock); 360 } 361 362 /* 363 * Knob to control the precision of file timestamps: 364 * 365 * 0 = seconds only; nanoseconds zeroed. 366 * 1 = seconds and nanoseconds, accurate within 1/HZ. 367 * 2 = seconds and nanoseconds, truncated to microseconds. 368 * >=3 = seconds and nanoseconds, maximum precision. 369 */ 370 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; 371 372 static int timestamp_precision = TSP_SEC; 373 SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, 374 ×tamp_precision, 0, ""); 375 376 /* 377 * Get a current timestamp. 378 */ 379 void 380 vfs_timestamp(tsp) 381 struct timespec *tsp; 382 { 383 struct timeval tv; 384 385 switch (timestamp_precision) { 386 case TSP_SEC: 387 tsp->tv_sec = time_second; 388 tsp->tv_nsec = 0; 389 break; 390 case TSP_HZ: 391 getnanotime(tsp); 392 break; 393 case TSP_USEC: 394 microtime(&tv); 395 TIMEVAL_TO_TIMESPEC(&tv, tsp); 396 break; 397 case TSP_NSEC: 398 default: 399 nanotime(tsp); 400 break; 401 } 402 } 403 404 /* 405 * Set vnode attributes to VNOVAL 406 */ 407 void 408 vattr_null(vap) 409 register struct vattr *vap; 410 { 411 412 vap->va_type = VNON; 413 vap->va_size = VNOVAL; 414 vap->va_bytes = VNOVAL; 415 vap->va_mode = VNOVAL; 416 vap->va_nlink = VNOVAL; 417 vap->va_uid = VNOVAL; 418 vap->va_gid = VNOVAL; 419 vap->va_fsid = VNOVAL; 420 vap->va_fileid = VNOVAL; 421 vap->va_blocksize = VNOVAL; 422 vap->va_rdev = VNOVAL; 423 vap->va_atime.tv_sec = VNOVAL; 424 vap->va_atime.tv_nsec = VNOVAL; 425 vap->va_mtime.tv_sec = VNOVAL; 426 vap->va_mtime.tv_nsec = VNOVAL; 427 vap->va_ctime.tv_sec = VNOVAL; 428 vap->va_ctime.tv_nsec = VNOVAL; 429 vap->va_flags = VNOVAL; 430 vap->va_gen = VNOVAL; 431 vap->va_vaflags = 0; 432 } 433 434 /* 435 * Routines having to do with the management of the vnode table. 436 */ 437 extern vop_t **dead_vnodeop_p; 438 439 /* 440 * Return the next vnode from the free list. 441 */ 442 int 443 getnewvnode(tag, mp, vops, vpp) 444 enum vtagtype tag; 445 struct mount *mp; 446 vop_t **vops; 447 struct vnode **vpp; 448 { 449 int s; 450 struct proc *p = curproc; /* XXX */ 451 struct vnode *vp, *tvp, *nvp; 452 vm_object_t object; 453 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 454 455 /* 456 * We take the least recently used vnode from the freelist 457 * if we can get it and it has no cached pages, and no 458 * namecache entries are relative to it. 459 * Otherwise we allocate a new vnode 460 */ 461 462 s = splbio(); 463 simple_lock(&vnode_free_list_slock); 464 TAILQ_INIT(&vnode_tmp_list); 465 466 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 467 nvp = TAILQ_NEXT(vp, v_freelist); 468 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 469 if (vp->v_flag & VAGE) { 470 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 471 } else { 472 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 473 } 474 vp->v_flag &= ~(VTBFREE|VAGE); 475 vp->v_flag |= VFREE; 476 if (vp->v_usecount) 477 panic("tobe free vnode isn't"); 478 freevnodes++; 479 } 480 481 if (wantfreevnodes && freevnodes < wantfreevnodes) { 482 vp = NULL; 483 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 484 /* 485 * XXX: this is only here to be backwards compatible 486 */ 487 vp = NULL; 488 } else { 489 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 490 nvp = TAILQ_NEXT(vp, v_freelist); 491 if (!simple_lock_try(&vp->v_interlock)) 492 continue; 493 if (vp->v_usecount) 494 panic("free vnode isn't"); 495 496 object = vp->v_object; 497 if (object && (object->resident_page_count || object->ref_count)) { 498 printf("object inconsistant state: RPC: %d, RC: %d\n", 499 object->resident_page_count, object->ref_count); 500 /* Don't recycle if it's caching some pages */ 501 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 502 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 503 continue; 504 } else if (LIST_FIRST(&vp->v_cache_src)) { 505 /* Don't recycle if active in the namecache */ 506 simple_unlock(&vp->v_interlock); 507 continue; 508 } else { 509 break; 510 } 511 } 512 } 513 514 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 515 nvp = TAILQ_NEXT(tvp, v_freelist); 516 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 517 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 518 simple_unlock(&tvp->v_interlock); 519 } 520 521 if (vp) { 522 vp->v_flag |= VDOOMED; 523 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 524 freevnodes--; 525 simple_unlock(&vnode_free_list_slock); 526 cache_purge(vp); 527 vp->v_lease = NULL; 528 if (vp->v_type != VBAD) { 529 vgonel(vp, p); 530 } else { 531 simple_unlock(&vp->v_interlock); 532 } 533 534 #ifdef INVARIANTS 535 { 536 int s; 537 538 if (vp->v_data) 539 panic("cleaned vnode isn't"); 540 s = splbio(); 541 if (vp->v_numoutput) 542 panic("Clean vnode has pending I/O's"); 543 splx(s); 544 } 545 #endif 546 vp->v_flag = 0; 547 vp->v_lastr = 0; 548 vp->v_lastw = 0; 549 vp->v_lasta = 0; 550 vp->v_cstart = 0; 551 vp->v_clen = 0; 552 vp->v_socket = 0; 553 vp->v_writecount = 0; /* XXX */ 554 vp->v_maxio = 0; 555 } else { 556 simple_unlock(&vnode_free_list_slock); 557 vp = (struct vnode *) zalloc(vnode_zone); 558 bzero((char *) vp, sizeof *vp); 559 simple_lock_init(&vp->v_interlock); 560 vp->v_dd = vp; 561 cache_purge(vp); 562 LIST_INIT(&vp->v_cache_src); 563 TAILQ_INIT(&vp->v_cache_dst); 564 numvnodes++; 565 } 566 567 TAILQ_INIT(&vp->v_cleanblkhd); 568 TAILQ_INIT(&vp->v_dirtyblkhd); 569 vp->v_type = VNON; 570 vp->v_tag = tag; 571 vp->v_op = vops; 572 insmntque(vp, mp); 573 *vpp = vp; 574 vp->v_usecount = 1; 575 vp->v_data = 0; 576 splx(s); 577 578 vfs_object_create(vp, p, p->p_ucred); 579 return (0); 580 } 581 582 /* 583 * Move a vnode from one mount queue to another. 584 */ 585 static void 586 insmntque(vp, mp) 587 register struct vnode *vp; 588 register struct mount *mp; 589 { 590 591 simple_lock(&mntvnode_slock); 592 /* 593 * Delete from old mount point vnode list, if on one. 594 */ 595 if (vp->v_mount != NULL) 596 LIST_REMOVE(vp, v_mntvnodes); 597 /* 598 * Insert into list of vnodes for the new mount point, if available. 599 */ 600 if ((vp->v_mount = mp) == NULL) { 601 simple_unlock(&mntvnode_slock); 602 return; 603 } 604 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 605 simple_unlock(&mntvnode_slock); 606 } 607 608 /* 609 * Update outstanding I/O count and do wakeup if requested. 610 */ 611 void 612 vwakeup(bp) 613 register struct buf *bp; 614 { 615 register struct vnode *vp; 616 617 bp->b_flags &= ~B_WRITEINPROG; 618 if ((vp = bp->b_vp)) { 619 vp->v_numoutput--; 620 if (vp->v_numoutput < 0) 621 panic("vwakeup: neg numoutput"); 622 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 623 vp->v_flag &= ~VBWAIT; 624 wakeup((caddr_t) &vp->v_numoutput); 625 } 626 } 627 } 628 629 /* 630 * Flush out and invalidate all buffers associated with a vnode. 631 * Called with the underlying object locked. 632 */ 633 int 634 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 635 register struct vnode *vp; 636 int flags; 637 struct ucred *cred; 638 struct proc *p; 639 int slpflag, slptimeo; 640 { 641 register struct buf *bp; 642 struct buf *nbp, *blist; 643 int s, error; 644 vm_object_t object; 645 646 if (flags & V_SAVE) { 647 s = splbio(); 648 while (vp->v_numoutput) { 649 vp->v_flag |= VBWAIT; 650 error = tsleep((caddr_t)&vp->v_numoutput, 651 slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 652 if (error) { 653 splx(s); 654 return (error); 655 } 656 } 657 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 658 splx(s); 659 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 660 return (error); 661 s = splbio(); 662 if (vp->v_numoutput > 0 || 663 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 664 panic("vinvalbuf: dirty bufs"); 665 } 666 splx(s); 667 } 668 s = splbio(); 669 for (;;) { 670 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 671 if (!blist) 672 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 673 if (!blist) 674 break; 675 676 for (bp = blist; bp; bp = nbp) { 677 nbp = TAILQ_NEXT(bp, b_vnbufs); 678 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 679 error = BUF_TIMELOCK(bp, 680 LK_EXCLUSIVE | LK_SLEEPFAIL, 681 "vinvalbuf", slpflag, slptimeo); 682 if (error == ENOLCK) 683 break; 684 splx(s); 685 return (error); 686 } 687 /* 688 * XXX Since there are no node locks for NFS, I 689 * believe there is a slight chance that a delayed 690 * write will occur while sleeping just above, so 691 * check for it. Note that vfs_bio_awrite expects 692 * buffers to reside on a queue, while VOP_BWRITE and 693 * brelse do not. 694 */ 695 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 696 (flags & V_SAVE)) { 697 698 if (bp->b_vp == vp) { 699 if (bp->b_flags & B_CLUSTEROK) { 700 BUF_UNLOCK(bp); 701 vfs_bio_awrite(bp); 702 } else { 703 bremfree(bp); 704 bp->b_flags |= B_ASYNC; 705 VOP_BWRITE(bp->b_vp, bp); 706 } 707 } else { 708 bremfree(bp); 709 (void) VOP_BWRITE(bp->b_vp, bp); 710 } 711 break; 712 } 713 bremfree(bp); 714 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF); 715 bp->b_flags &= ~B_ASYNC; 716 brelse(bp); 717 } 718 } 719 720 while (vp->v_numoutput > 0) { 721 vp->v_flag |= VBWAIT; 722 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 723 } 724 725 splx(s); 726 727 /* 728 * Destroy the copy in the VM cache, too. 729 */ 730 simple_lock(&vp->v_interlock); 731 object = vp->v_object; 732 if (object != NULL) { 733 vm_object_page_remove(object, 0, 0, 734 (flags & V_SAVE) ? TRUE : FALSE); 735 } 736 simple_unlock(&vp->v_interlock); 737 738 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 739 panic("vinvalbuf: flush failed"); 740 return (0); 741 } 742 743 /* 744 * Truncate a file's buffer and pages to a specified length. This 745 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 746 * sync activity. 747 */ 748 int 749 vtruncbuf(vp, cred, p, length, blksize) 750 register struct vnode *vp; 751 struct ucred *cred; 752 struct proc *p; 753 off_t length; 754 int blksize; 755 { 756 register struct buf *bp; 757 struct buf *nbp; 758 int s, anyfreed; 759 int trunclbn; 760 761 /* 762 * Round up to the *next* lbn. 763 */ 764 trunclbn = (length + blksize - 1) / blksize; 765 766 s = splbio(); 767 restart: 768 anyfreed = 1; 769 for (;anyfreed;) { 770 anyfreed = 0; 771 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 772 nbp = TAILQ_NEXT(bp, b_vnbufs); 773 if (bp->b_lblkno >= trunclbn) { 774 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 775 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 776 goto restart; 777 } else { 778 bremfree(bp); 779 bp->b_flags |= (B_INVAL | B_RELBUF); 780 bp->b_flags &= ~B_ASYNC; 781 brelse(bp); 782 anyfreed = 1; 783 } 784 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)|| 785 (nbp->b_vp != vp) || 786 (nbp->b_flags & B_DELWRI))) { 787 goto restart; 788 } 789 } 790 } 791 792 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 793 nbp = TAILQ_NEXT(bp, b_vnbufs); 794 if (bp->b_lblkno >= trunclbn) { 795 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 796 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 797 goto restart; 798 } else { 799 bremfree(bp); 800 bp->b_flags |= (B_INVAL | B_RELBUF); 801 bp->b_flags &= ~B_ASYNC; 802 brelse(bp); 803 anyfreed = 1; 804 } 805 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)|| 806 (nbp->b_vp != vp) || 807 (nbp->b_flags & B_DELWRI) == 0)) { 808 goto restart; 809 } 810 } 811 } 812 } 813 814 if (length > 0) { 815 restartsync: 816 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 817 nbp = TAILQ_NEXT(bp, b_vnbufs); 818 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 819 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 820 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 821 goto restart; 822 } else { 823 bremfree(bp); 824 if (bp->b_vp == vp) { 825 bp->b_flags |= B_ASYNC; 826 } else { 827 bp->b_flags &= ~B_ASYNC; 828 } 829 VOP_BWRITE(bp->b_vp, bp); 830 } 831 goto restartsync; 832 } 833 834 } 835 } 836 837 while (vp->v_numoutput > 0) { 838 vp->v_flag |= VBWAIT; 839 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 840 } 841 842 splx(s); 843 844 vnode_pager_setsize(vp, length); 845 846 return (0); 847 } 848 849 /* 850 * Associate a buffer with a vnode. 851 */ 852 void 853 bgetvp(vp, bp) 854 register struct vnode *vp; 855 register struct buf *bp; 856 { 857 int s; 858 859 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 860 861 vhold(vp); 862 bp->b_vp = vp; 863 bp->b_dev = vn_todev(vp); 864 /* 865 * Insert onto list for new vnode. 866 */ 867 s = splbio(); 868 bp->b_xflags |= B_VNCLEAN; 869 bp->b_xflags &= ~B_VNDIRTY; 870 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 871 splx(s); 872 } 873 874 /* 875 * Disassociate a buffer from a vnode. 876 */ 877 void 878 brelvp(bp) 879 register struct buf *bp; 880 { 881 struct vnode *vp; 882 struct buflists *listheadp; 883 int s; 884 885 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 886 887 /* 888 * Delete from old vnode list, if on one. 889 */ 890 vp = bp->b_vp; 891 s = splbio(); 892 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 893 if (bp->b_xflags & B_VNDIRTY) 894 listheadp = &vp->v_dirtyblkhd; 895 else 896 listheadp = &vp->v_cleanblkhd; 897 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 898 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 899 } 900 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 901 vp->v_flag &= ~VONWORKLST; 902 LIST_REMOVE(vp, v_synclist); 903 } 904 splx(s); 905 bp->b_vp = (struct vnode *) 0; 906 vdrop(vp); 907 } 908 909 /* 910 * The workitem queue. 911 * 912 * It is useful to delay writes of file data and filesystem metadata 913 * for tens of seconds so that quickly created and deleted files need 914 * not waste disk bandwidth being created and removed. To realize this, 915 * we append vnodes to a "workitem" queue. When running with a soft 916 * updates implementation, most pending metadata dependencies should 917 * not wait for more than a few seconds. Thus, mounted on block devices 918 * are delayed only about a half the time that file data is delayed. 919 * Similarly, directory updates are more critical, so are only delayed 920 * about a third the time that file data is delayed. Thus, there are 921 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 922 * one each second (driven off the filesystem syncer process). The 923 * syncer_delayno variable indicates the next queue that is to be processed. 924 * Items that need to be processed soon are placed in this queue: 925 * 926 * syncer_workitem_pending[syncer_delayno] 927 * 928 * A delay of fifteen seconds is done by placing the request fifteen 929 * entries later in the queue: 930 * 931 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 932 * 933 */ 934 935 /* 936 * Add an item to the syncer work queue. 937 */ 938 static void 939 vn_syncer_add_to_worklist(struct vnode *vp, int delay) 940 { 941 int s, slot; 942 943 s = splbio(); 944 945 if (vp->v_flag & VONWORKLST) { 946 LIST_REMOVE(vp, v_synclist); 947 } 948 949 if (delay > syncer_maxdelay - 2) 950 delay = syncer_maxdelay - 2; 951 slot = (syncer_delayno + delay) & syncer_mask; 952 953 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 954 vp->v_flag |= VONWORKLST; 955 splx(s); 956 } 957 958 struct proc *updateproc; 959 static void sched_sync __P((void)); 960 static struct kproc_desc up_kp = { 961 "syncer", 962 sched_sync, 963 &updateproc 964 }; 965 SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 966 967 /* 968 * System filesystem synchronizer daemon. 969 */ 970 void 971 sched_sync(void) 972 { 973 struct synclist *slp; 974 struct vnode *vp; 975 long starttime; 976 int s; 977 struct proc *p = updateproc; 978 979 p->p_flag |= P_BUFEXHAUST; 980 981 for (;;) { 982 starttime = time_second; 983 984 /* 985 * Push files whose dirty time has expired. Be careful 986 * of interrupt race on slp queue. 987 */ 988 s = splbio(); 989 slp = &syncer_workitem_pending[syncer_delayno]; 990 syncer_delayno += 1; 991 if (syncer_delayno == syncer_maxdelay) 992 syncer_delayno = 0; 993 splx(s); 994 995 while ((vp = LIST_FIRST(slp)) != NULL) { 996 if (VOP_ISLOCKED(vp) == 0) { 997 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 998 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 999 VOP_UNLOCK(vp, 0, p); 1000 } 1001 s = splbio(); 1002 if (LIST_FIRST(slp) == vp) { 1003 /* 1004 * Note: v_tag VT_VFS vps can remain on the 1005 * worklist too with no dirty blocks, but 1006 * since sync_fsync() moves it to a different 1007 * slot we are safe. 1008 */ 1009 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 1010 vp->v_type != VBLK) 1011 panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 1012 /* 1013 * Put us back on the worklist. The worklist 1014 * routine will remove us from our current 1015 * position and then add us back in at a later 1016 * position. 1017 */ 1018 vn_syncer_add_to_worklist(vp, syncdelay); 1019 } 1020 splx(s); 1021 } 1022 1023 /* 1024 * Do soft update processing. 1025 */ 1026 if (bioops.io_sync) 1027 (*bioops.io_sync)(NULL); 1028 1029 /* 1030 * The variable rushjob allows the kernel to speed up the 1031 * processing of the filesystem syncer process. A rushjob 1032 * value of N tells the filesystem syncer to process the next 1033 * N seconds worth of work on its queue ASAP. Currently rushjob 1034 * is used by the soft update code to speed up the filesystem 1035 * syncer process when the incore state is getting so far 1036 * ahead of the disk that the kernel memory pool is being 1037 * threatened with exhaustion. 1038 */ 1039 if (rushjob > 0) { 1040 rushjob -= 1; 1041 continue; 1042 } 1043 /* 1044 * If it has taken us less than a second to process the 1045 * current work, then wait. Otherwise start right over 1046 * again. We can still lose time if any single round 1047 * takes more than two seconds, but it does not really 1048 * matter as we are just trying to generally pace the 1049 * filesystem activity. 1050 */ 1051 if (time_second == starttime) 1052 tsleep(&lbolt, PPAUSE, "syncer", 0); 1053 } 1054 } 1055 1056 /* 1057 * Request the syncer daemon to speed up its work. 1058 * We never push it to speed up more than half of its 1059 * normal turn time, otherwise it could take over the cpu. 1060 */ 1061 int 1062 speedup_syncer() 1063 { 1064 int s; 1065 1066 s = splhigh(); 1067 if (updateproc->p_wchan == &lbolt) 1068 setrunnable(updateproc); 1069 splx(s); 1070 if (rushjob < syncdelay / 2) { 1071 rushjob += 1; 1072 stat_rush_requests += 1; 1073 return (1); 1074 } 1075 return(0); 1076 } 1077 1078 /* 1079 * Associate a p-buffer with a vnode. 1080 * 1081 * Also sets B_PAGING flag to indicate that vnode is not fully associated 1082 * with the buffer. i.e. the bp has not been linked into the vnode or 1083 * ref-counted. 1084 */ 1085 void 1086 pbgetvp(vp, bp) 1087 register struct vnode *vp; 1088 register struct buf *bp; 1089 { 1090 1091 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 1092 1093 bp->b_vp = vp; 1094 bp->b_flags |= B_PAGING; 1095 bp->b_dev = vn_todev(vp); 1096 } 1097 1098 /* 1099 * Disassociate a p-buffer from a vnode. 1100 */ 1101 void 1102 pbrelvp(bp) 1103 register struct buf *bp; 1104 { 1105 1106 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1107 1108 #if !defined(MAX_PERF) 1109 /* XXX REMOVE ME */ 1110 if (bp->b_vnbufs.tqe_next != NULL) { 1111 panic( 1112 "relpbuf(): b_vp was probably reassignbuf()d %p %x", 1113 bp, 1114 (int)bp->b_flags 1115 ); 1116 } 1117 #endif 1118 bp->b_vp = (struct vnode *) 0; 1119 bp->b_flags &= ~B_PAGING; 1120 } 1121 1122 void 1123 pbreassignbuf(bp, newvp) 1124 struct buf *bp; 1125 struct vnode *newvp; 1126 { 1127 #if !defined(MAX_PERF) 1128 if ((bp->b_flags & B_PAGING) == 0) { 1129 panic( 1130 "pbreassignbuf() on non phys bp %p", 1131 bp 1132 ); 1133 } 1134 #endif 1135 bp->b_vp = newvp; 1136 } 1137 1138 /* 1139 * Reassign a buffer from one vnode to another. 1140 * Used to assign file specific control information 1141 * (indirect blocks) to the vnode to which they belong. 1142 */ 1143 void 1144 reassignbuf(bp, newvp) 1145 register struct buf *bp; 1146 register struct vnode *newvp; 1147 { 1148 struct buflists *listheadp; 1149 int delay; 1150 int s; 1151 1152 if (newvp == NULL) { 1153 printf("reassignbuf: NULL"); 1154 return; 1155 } 1156 ++reassignbufcalls; 1157 1158 #if !defined(MAX_PERF) 1159 /* 1160 * B_PAGING flagged buffers cannot be reassigned because their vp 1161 * is not fully linked in. 1162 */ 1163 if (bp->b_flags & B_PAGING) 1164 panic("cannot reassign paging buffer"); 1165 #endif 1166 1167 s = splbio(); 1168 /* 1169 * Delete from old vnode list, if on one. 1170 */ 1171 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 1172 if (bp->b_xflags & B_VNDIRTY) 1173 listheadp = &bp->b_vp->v_dirtyblkhd; 1174 else 1175 listheadp = &bp->b_vp->v_cleanblkhd; 1176 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1177 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 1178 if (bp->b_vp != newvp) { 1179 vdrop(bp->b_vp); 1180 bp->b_vp = NULL; /* for clarification */ 1181 } 1182 } 1183 /* 1184 * If dirty, put on list of dirty buffers; otherwise insert onto list 1185 * of clean buffers. 1186 */ 1187 if (bp->b_flags & B_DELWRI) { 1188 struct buf *tbp; 1189 1190 listheadp = &newvp->v_dirtyblkhd; 1191 if ((newvp->v_flag & VONWORKLST) == 0) { 1192 switch (newvp->v_type) { 1193 case VDIR: 1194 delay = dirdelay; 1195 break; 1196 case VBLK: 1197 if (newvp->v_specmountpoint != NULL) { 1198 delay = metadelay; 1199 break; 1200 } 1201 /* fall through */ 1202 default: 1203 delay = filedelay; 1204 } 1205 vn_syncer_add_to_worklist(newvp, delay); 1206 } 1207 bp->b_xflags |= B_VNDIRTY; 1208 tbp = TAILQ_FIRST(listheadp); 1209 if (tbp == NULL || 1210 bp->b_lblkno == 0 || 1211 (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { 1212 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1213 ++reassignbufsortgood; 1214 } else if (bp->b_lblkno < 0) { 1215 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1216 ++reassignbufsortgood; 1217 } else if (reassignbufmethod == 1) { 1218 /* 1219 * New sorting algorithm, only handle sequential case, 1220 * otherwise guess. 1221 */ 1222 if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && 1223 (tbp->b_xflags & B_VNDIRTY)) { 1224 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1225 ++reassignbufsortgood; 1226 } else { 1227 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1228 ++reassignbufsortbad; 1229 } 1230 } else { 1231 /* 1232 * Old sorting algorithm, scan queue and insert 1233 */ 1234 struct buf *ttbp; 1235 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1236 (ttbp->b_lblkno < bp->b_lblkno)) { 1237 ++reassignbufloops; 1238 tbp = ttbp; 1239 } 1240 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1241 } 1242 } else { 1243 bp->b_xflags |= B_VNCLEAN; 1244 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1245 if ((newvp->v_flag & VONWORKLST) && 1246 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1247 newvp->v_flag &= ~VONWORKLST; 1248 LIST_REMOVE(newvp, v_synclist); 1249 } 1250 } 1251 if (bp->b_vp != newvp) { 1252 bp->b_vp = newvp; 1253 vhold(bp->b_vp); 1254 } 1255 splx(s); 1256 } 1257 1258 /* 1259 * Create a vnode for a block device. 1260 * Used for mounting the root file system. 1261 */ 1262 int 1263 bdevvp(dev, vpp) 1264 dev_t dev; 1265 struct vnode **vpp; 1266 { 1267 register struct vnode *vp; 1268 struct vnode *nvp; 1269 int error; 1270 1271 if (dev == NODEV) { 1272 *vpp = NULLVP; 1273 return (ENXIO); 1274 } 1275 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1276 if (error) { 1277 *vpp = NULLVP; 1278 return (error); 1279 } 1280 vp = nvp; 1281 vp->v_type = VBLK; 1282 addalias(vp, dev); 1283 *vpp = vp; 1284 return (0); 1285 } 1286 1287 /* 1288 * Add vnode to the alias list hung off the dev_t. 1289 * 1290 * The reason for this gunk is that multiple vnodes can reference 1291 * the same physical device, so checking vp->v_usecount to see 1292 * how many users there are is inadequate; the v_usecount for 1293 * the vnodes need to be accumulated. vcount() does that. 1294 */ 1295 void 1296 addaliasu(nvp, nvp_rdev) 1297 struct vnode *nvp; 1298 udev_t nvp_rdev; 1299 { 1300 1301 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1302 panic("addaliasu on non-special vnode"); 1303 1304 nvp->v_rdev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0); 1305 simple_lock(&spechash_slock); 1306 SLIST_INSERT_HEAD(&nvp->v_rdev->si_hlist, nvp, v_specnext); 1307 simple_unlock(&spechash_slock); 1308 } 1309 1310 void 1311 addalias(nvp, dev) 1312 struct vnode *nvp; 1313 dev_t dev; 1314 { 1315 1316 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1317 panic("addalias on non-special vnode"); 1318 1319 nvp->v_rdev = dev; 1320 simple_lock(&spechash_slock); 1321 SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext); 1322 simple_unlock(&spechash_slock); 1323 } 1324 1325 /* 1326 * Grab a particular vnode from the free list, increment its 1327 * reference count and lock it. The vnode lock bit is set if the 1328 * vnode is being eliminated in vgone. The process is awakened 1329 * when the transition is completed, and an error returned to 1330 * indicate that the vnode is no longer usable (possibly having 1331 * been changed to a new file system type). 1332 */ 1333 int 1334 vget(vp, flags, p) 1335 register struct vnode *vp; 1336 int flags; 1337 struct proc *p; 1338 { 1339 int error; 1340 1341 /* 1342 * If the vnode is in the process of being cleaned out for 1343 * another use, we wait for the cleaning to finish and then 1344 * return failure. Cleaning is determined by checking that 1345 * the VXLOCK flag is set. 1346 */ 1347 if ((flags & LK_INTERLOCK) == 0) { 1348 simple_lock(&vp->v_interlock); 1349 } 1350 if (vp->v_flag & VXLOCK) { 1351 vp->v_flag |= VXWANT; 1352 simple_unlock(&vp->v_interlock); 1353 tsleep((caddr_t)vp, PINOD, "vget", 0); 1354 return (ENOENT); 1355 } 1356 1357 vp->v_usecount++; 1358 1359 if (VSHOULDBUSY(vp)) 1360 vbusy(vp); 1361 if (flags & LK_TYPE_MASK) { 1362 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1363 /* 1364 * must expand vrele here because we do not want 1365 * to call VOP_INACTIVE if the reference count 1366 * drops back to zero since it was never really 1367 * active. We must remove it from the free list 1368 * before sleeping so that multiple processes do 1369 * not try to recycle it. 1370 */ 1371 simple_lock(&vp->v_interlock); 1372 vp->v_usecount--; 1373 if (VSHOULDFREE(vp)) 1374 vfree(vp); 1375 simple_unlock(&vp->v_interlock); 1376 } 1377 return (error); 1378 } 1379 simple_unlock(&vp->v_interlock); 1380 return (0); 1381 } 1382 1383 void 1384 vref(struct vnode *vp) 1385 { 1386 simple_lock(&vp->v_interlock); 1387 vp->v_usecount++; 1388 simple_unlock(&vp->v_interlock); 1389 } 1390 1391 /* 1392 * Vnode put/release. 1393 * If count drops to zero, call inactive routine and return to freelist. 1394 */ 1395 void 1396 vrele(vp) 1397 struct vnode *vp; 1398 { 1399 struct proc *p = curproc; /* XXX */ 1400 1401 KASSERT(vp != NULL, ("vrele: null vp")); 1402 1403 simple_lock(&vp->v_interlock); 1404 1405 if (vp->v_usecount > 1) { 1406 1407 vp->v_usecount--; 1408 simple_unlock(&vp->v_interlock); 1409 1410 return; 1411 } 1412 1413 if (vp->v_usecount == 1) { 1414 1415 vp->v_usecount--; 1416 if (VSHOULDFREE(vp)) 1417 vfree(vp); 1418 /* 1419 * If we are doing a vput, the node is already locked, and we must 1420 * call VOP_INACTIVE with the node locked. So, in the case of 1421 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1422 */ 1423 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1424 VOP_INACTIVE(vp, p); 1425 } 1426 1427 } else { 1428 #ifdef DIAGNOSTIC 1429 vprint("vrele: negative ref count", vp); 1430 simple_unlock(&vp->v_interlock); 1431 #endif 1432 panic("vrele: negative ref cnt"); 1433 } 1434 } 1435 1436 void 1437 vput(vp) 1438 struct vnode *vp; 1439 { 1440 struct proc *p = curproc; /* XXX */ 1441 1442 KASSERT(vp != NULL, ("vput: null vp")); 1443 1444 simple_lock(&vp->v_interlock); 1445 1446 if (vp->v_usecount > 1) { 1447 1448 vp->v_usecount--; 1449 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1450 return; 1451 1452 } 1453 1454 if (vp->v_usecount == 1) { 1455 1456 vp->v_usecount--; 1457 if (VSHOULDFREE(vp)) 1458 vfree(vp); 1459 /* 1460 * If we are doing a vput, the node is already locked, and we must 1461 * call VOP_INACTIVE with the node locked. So, in the case of 1462 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1463 */ 1464 simple_unlock(&vp->v_interlock); 1465 VOP_INACTIVE(vp, p); 1466 1467 } else { 1468 #ifdef DIAGNOSTIC 1469 vprint("vput: negative ref count", vp); 1470 #endif 1471 panic("vput: negative ref cnt"); 1472 } 1473 } 1474 1475 /* 1476 * Somebody doesn't want the vnode recycled. 1477 */ 1478 void 1479 vhold(vp) 1480 register struct vnode *vp; 1481 { 1482 int s; 1483 1484 s = splbio(); 1485 vp->v_holdcnt++; 1486 if (VSHOULDBUSY(vp)) 1487 vbusy(vp); 1488 splx(s); 1489 } 1490 1491 /* 1492 * One less who cares about this vnode. 1493 */ 1494 void 1495 vdrop(vp) 1496 register struct vnode *vp; 1497 { 1498 int s; 1499 1500 s = splbio(); 1501 if (vp->v_holdcnt <= 0) 1502 panic("vdrop: holdcnt"); 1503 vp->v_holdcnt--; 1504 if (VSHOULDFREE(vp)) 1505 vfree(vp); 1506 splx(s); 1507 } 1508 1509 /* 1510 * Remove any vnodes in the vnode table belonging to mount point mp. 1511 * 1512 * If MNT_NOFORCE is specified, there should not be any active ones, 1513 * return error if any are found (nb: this is a user error, not a 1514 * system error). If MNT_FORCE is specified, detach any active vnodes 1515 * that are found. 1516 */ 1517 #ifdef DIAGNOSTIC 1518 static int busyprt = 0; /* print out busy vnodes */ 1519 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1520 #endif 1521 1522 int 1523 vflush(mp, skipvp, flags) 1524 struct mount *mp; 1525 struct vnode *skipvp; 1526 int flags; 1527 { 1528 struct proc *p = curproc; /* XXX */ 1529 struct vnode *vp, *nvp; 1530 int busy = 0; 1531 1532 simple_lock(&mntvnode_slock); 1533 loop: 1534 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1535 /* 1536 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1537 * Start over if it has (it won't be on the list anymore). 1538 */ 1539 if (vp->v_mount != mp) 1540 goto loop; 1541 nvp = vp->v_mntvnodes.le_next; 1542 /* 1543 * Skip over a selected vnode. 1544 */ 1545 if (vp == skipvp) 1546 continue; 1547 1548 simple_lock(&vp->v_interlock); 1549 /* 1550 * Skip over a vnodes marked VSYSTEM. 1551 */ 1552 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1553 simple_unlock(&vp->v_interlock); 1554 continue; 1555 } 1556 /* 1557 * If WRITECLOSE is set, only flush out regular file vnodes 1558 * open for writing. 1559 */ 1560 if ((flags & WRITECLOSE) && 1561 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1562 simple_unlock(&vp->v_interlock); 1563 continue; 1564 } 1565 1566 /* 1567 * With v_usecount == 0, all we need to do is clear out the 1568 * vnode data structures and we are done. 1569 */ 1570 if (vp->v_usecount == 0) { 1571 simple_unlock(&mntvnode_slock); 1572 vgonel(vp, p); 1573 simple_lock(&mntvnode_slock); 1574 continue; 1575 } 1576 1577 /* 1578 * If FORCECLOSE is set, forcibly close the vnode. For block 1579 * or character devices, revert to an anonymous device. For 1580 * all other files, just kill them. 1581 */ 1582 if (flags & FORCECLOSE) { 1583 simple_unlock(&mntvnode_slock); 1584 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1585 vgonel(vp, p); 1586 } else { 1587 vclean(vp, 0, p); 1588 vp->v_op = spec_vnodeop_p; 1589 insmntque(vp, (struct mount *) 0); 1590 } 1591 simple_lock(&mntvnode_slock); 1592 continue; 1593 } 1594 #ifdef DIAGNOSTIC 1595 if (busyprt) 1596 vprint("vflush: busy vnode", vp); 1597 #endif 1598 simple_unlock(&vp->v_interlock); 1599 busy++; 1600 } 1601 simple_unlock(&mntvnode_slock); 1602 if (busy) 1603 return (EBUSY); 1604 return (0); 1605 } 1606 1607 /* 1608 * Disassociate the underlying file system from a vnode. 1609 */ 1610 static void 1611 vclean(vp, flags, p) 1612 struct vnode *vp; 1613 int flags; 1614 struct proc *p; 1615 { 1616 int active; 1617 vm_object_t obj; 1618 1619 /* 1620 * Check to see if the vnode is in use. If so we have to reference it 1621 * before we clean it out so that its count cannot fall to zero and 1622 * generate a race against ourselves to recycle it. 1623 */ 1624 if ((active = vp->v_usecount)) 1625 vp->v_usecount++; 1626 1627 /* 1628 * Prevent the vnode from being recycled or brought into use while we 1629 * clean it out. 1630 */ 1631 if (vp->v_flag & VXLOCK) 1632 panic("vclean: deadlock"); 1633 vp->v_flag |= VXLOCK; 1634 /* 1635 * Even if the count is zero, the VOP_INACTIVE routine may still 1636 * have the object locked while it cleans it out. The VOP_LOCK 1637 * ensures that the VOP_INACTIVE routine is done with its work. 1638 * For active vnodes, it ensures that no other activity can 1639 * occur while the underlying object is being cleaned out. 1640 */ 1641 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1642 1643 /* 1644 * Clean out any buffers associated with the vnode. 1645 */ 1646 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1647 if ((obj = vp->v_object) != NULL) { 1648 if (obj->ref_count == 0) { 1649 /* 1650 * This is a normal way of shutting down the object/vnode 1651 * association. 1652 */ 1653 vm_object_terminate(obj); 1654 } else { 1655 /* 1656 * Woe to the process that tries to page now :-). 1657 */ 1658 vm_pager_deallocate(obj); 1659 } 1660 } 1661 1662 /* 1663 * If purging an active vnode, it must be closed and 1664 * deactivated before being reclaimed. Note that the 1665 * VOP_INACTIVE will unlock the vnode. 1666 */ 1667 if (active) { 1668 if (flags & DOCLOSE) 1669 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1670 VOP_INACTIVE(vp, p); 1671 } else { 1672 /* 1673 * Any other processes trying to obtain this lock must first 1674 * wait for VXLOCK to clear, then call the new lock operation. 1675 */ 1676 VOP_UNLOCK(vp, 0, p); 1677 } 1678 /* 1679 * Reclaim the vnode. 1680 */ 1681 if (VOP_RECLAIM(vp, p)) 1682 panic("vclean: cannot reclaim"); 1683 1684 if (active) 1685 vrele(vp); 1686 1687 cache_purge(vp); 1688 if (vp->v_vnlock) { 1689 FREE(vp->v_vnlock, M_VNODE); 1690 vp->v_vnlock = NULL; 1691 } 1692 1693 if (VSHOULDFREE(vp)) 1694 vfree(vp); 1695 1696 /* 1697 * Done with purge, notify sleepers of the grim news. 1698 */ 1699 vp->v_op = dead_vnodeop_p; 1700 vn_pollgone(vp); 1701 vp->v_tag = VT_NON; 1702 vp->v_flag &= ~VXLOCK; 1703 if (vp->v_flag & VXWANT) { 1704 vp->v_flag &= ~VXWANT; 1705 wakeup((caddr_t) vp); 1706 } 1707 } 1708 1709 /* 1710 * Eliminate all activity associated with the requested vnode 1711 * and with all vnodes aliased to the requested vnode. 1712 */ 1713 int 1714 vop_revoke(ap) 1715 struct vop_revoke_args /* { 1716 struct vnode *a_vp; 1717 int a_flags; 1718 } */ *ap; 1719 { 1720 struct vnode *vp, *vq; 1721 dev_t dev; 1722 1723 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1724 1725 vp = ap->a_vp; 1726 /* 1727 * If a vgone (or vclean) is already in progress, 1728 * wait until it is done and return. 1729 */ 1730 if (vp->v_flag & VXLOCK) { 1731 vp->v_flag |= VXWANT; 1732 simple_unlock(&vp->v_interlock); 1733 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1734 return (0); 1735 } 1736 dev = vp->v_rdev; 1737 for (;;) { 1738 simple_lock(&spechash_slock); 1739 vq = SLIST_FIRST(&dev->si_hlist); 1740 simple_unlock(&spechash_slock); 1741 if (!vq) 1742 break; 1743 vgone(vq); 1744 } 1745 return (0); 1746 } 1747 1748 /* 1749 * Recycle an unused vnode to the front of the free list. 1750 * Release the passed interlock if the vnode will be recycled. 1751 */ 1752 int 1753 vrecycle(vp, inter_lkp, p) 1754 struct vnode *vp; 1755 struct simplelock *inter_lkp; 1756 struct proc *p; 1757 { 1758 1759 simple_lock(&vp->v_interlock); 1760 if (vp->v_usecount == 0) { 1761 if (inter_lkp) { 1762 simple_unlock(inter_lkp); 1763 } 1764 vgonel(vp, p); 1765 return (1); 1766 } 1767 simple_unlock(&vp->v_interlock); 1768 return (0); 1769 } 1770 1771 /* 1772 * Eliminate all activity associated with a vnode 1773 * in preparation for reuse. 1774 */ 1775 void 1776 vgone(vp) 1777 register struct vnode *vp; 1778 { 1779 struct proc *p = curproc; /* XXX */ 1780 1781 simple_lock(&vp->v_interlock); 1782 vgonel(vp, p); 1783 } 1784 1785 /* 1786 * vgone, with the vp interlock held. 1787 */ 1788 static void 1789 vgonel(vp, p) 1790 struct vnode *vp; 1791 struct proc *p; 1792 { 1793 int s; 1794 1795 /* 1796 * If a vgone (or vclean) is already in progress, 1797 * wait until it is done and return. 1798 */ 1799 if (vp->v_flag & VXLOCK) { 1800 vp->v_flag |= VXWANT; 1801 simple_unlock(&vp->v_interlock); 1802 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1803 return; 1804 } 1805 1806 /* 1807 * Clean out the filesystem specific data. 1808 */ 1809 vclean(vp, DOCLOSE, p); 1810 simple_lock(&vp->v_interlock); 1811 1812 /* 1813 * Delete from old mount point vnode list, if on one. 1814 */ 1815 if (vp->v_mount != NULL) 1816 insmntque(vp, (struct mount *)0); 1817 /* 1818 * If special device, remove it from special device alias list 1819 * if it is on one. 1820 */ 1821 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) { 1822 simple_lock(&spechash_slock); 1823 SLIST_REMOVE(&vp->v_hashchain, vp, vnode, v_specnext); 1824 simple_unlock(&spechash_slock); 1825 vp->v_rdev = NULL; 1826 } 1827 1828 /* 1829 * If it is on the freelist and not already at the head, 1830 * move it to the head of the list. The test of the back 1831 * pointer and the reference count of zero is because 1832 * it will be removed from the free list by getnewvnode, 1833 * but will not have its reference count incremented until 1834 * after calling vgone. If the reference count were 1835 * incremented first, vgone would (incorrectly) try to 1836 * close the previous instance of the underlying object. 1837 */ 1838 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1839 s = splbio(); 1840 simple_lock(&vnode_free_list_slock); 1841 if (vp->v_flag & VFREE) { 1842 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1843 } else if (vp->v_flag & VTBFREE) { 1844 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1845 vp->v_flag &= ~VTBFREE; 1846 freevnodes++; 1847 } else 1848 freevnodes++; 1849 vp->v_flag |= VFREE; 1850 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1851 simple_unlock(&vnode_free_list_slock); 1852 splx(s); 1853 } 1854 1855 vp->v_type = VBAD; 1856 simple_unlock(&vp->v_interlock); 1857 } 1858 1859 /* 1860 * Lookup a vnode by device number. 1861 */ 1862 int 1863 vfinddev(dev, type, vpp) 1864 dev_t dev; 1865 enum vtype type; 1866 struct vnode **vpp; 1867 { 1868 struct vnode *vp; 1869 1870 simple_lock(&spechash_slock); 1871 SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) { 1872 if (type == vp->v_type) { 1873 *vpp = vp; 1874 simple_unlock(&spechash_slock); 1875 return (1); 1876 } 1877 } 1878 simple_unlock(&spechash_slock); 1879 return (0); 1880 } 1881 1882 /* 1883 * Calculate the total number of references to a special device. 1884 */ 1885 int 1886 vcount(vp) 1887 struct vnode *vp; 1888 { 1889 struct vnode *vq, *vnext; 1890 int count; 1891 1892 count = 0; 1893 simple_lock(&spechash_slock); 1894 SLIST_FOREACH(vq, &vp->v_hashchain, v_specnext) 1895 count += vq->v_usecount; 1896 simple_unlock(&spechash_slock); 1897 return (count); 1898 } 1899 1900 /* 1901 * Print out a description of a vnode. 1902 */ 1903 static char *typename[] = 1904 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1905 1906 void 1907 vprint(label, vp) 1908 char *label; 1909 struct vnode *vp; 1910 { 1911 char buf[96]; 1912 1913 if (label != NULL) 1914 printf("%s: %p: ", label, (void *)vp); 1915 else 1916 printf("%p: ", (void *)vp); 1917 printf("type %s, usecount %d, writecount %d, refcount %d,", 1918 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1919 vp->v_holdcnt); 1920 buf[0] = '\0'; 1921 if (vp->v_flag & VROOT) 1922 strcat(buf, "|VROOT"); 1923 if (vp->v_flag & VTEXT) 1924 strcat(buf, "|VTEXT"); 1925 if (vp->v_flag & VSYSTEM) 1926 strcat(buf, "|VSYSTEM"); 1927 if (vp->v_flag & VXLOCK) 1928 strcat(buf, "|VXLOCK"); 1929 if (vp->v_flag & VXWANT) 1930 strcat(buf, "|VXWANT"); 1931 if (vp->v_flag & VBWAIT) 1932 strcat(buf, "|VBWAIT"); 1933 if (vp->v_flag & VDOOMED) 1934 strcat(buf, "|VDOOMED"); 1935 if (vp->v_flag & VFREE) 1936 strcat(buf, "|VFREE"); 1937 if (vp->v_flag & VOBJBUF) 1938 strcat(buf, "|VOBJBUF"); 1939 if (buf[0] != '\0') 1940 printf(" flags (%s)", &buf[1]); 1941 if (vp->v_data == NULL) { 1942 printf("\n"); 1943 } else { 1944 printf("\n\t"); 1945 VOP_PRINT(vp); 1946 } 1947 } 1948 1949 #ifdef DDB 1950 #include <ddb/ddb.h> 1951 /* 1952 * List all of the locked vnodes in the system. 1953 * Called when debugging the kernel. 1954 */ 1955 DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1956 { 1957 struct proc *p = curproc; /* XXX */ 1958 struct mount *mp, *nmp; 1959 struct vnode *vp; 1960 1961 printf("Locked vnodes\n"); 1962 simple_lock(&mountlist_slock); 1963 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1964 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1965 nmp = mp->mnt_list.cqe_next; 1966 continue; 1967 } 1968 for (vp = mp->mnt_vnodelist.lh_first; 1969 vp != NULL; 1970 vp = vp->v_mntvnodes.le_next) { 1971 if (VOP_ISLOCKED(vp)) 1972 vprint((char *)0, vp); 1973 } 1974 simple_lock(&mountlist_slock); 1975 nmp = mp->mnt_list.cqe_next; 1976 vfs_unbusy(mp, p); 1977 } 1978 simple_unlock(&mountlist_slock); 1979 } 1980 #endif 1981 1982 /* 1983 * Top level filesystem related information gathering. 1984 */ 1985 static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1986 1987 static int 1988 vfs_sysctl SYSCTL_HANDLER_ARGS 1989 { 1990 int *name = (int *)arg1 - 1; /* XXX */ 1991 u_int namelen = arg2 + 1; /* XXX */ 1992 struct vfsconf *vfsp; 1993 1994 #if 1 || defined(COMPAT_PRELITE2) 1995 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1996 if (namelen == 1) 1997 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1998 #endif 1999 2000 #ifdef notyet 2001 /* all sysctl names at this level are at least name and field */ 2002 if (namelen < 2) 2003 return (ENOTDIR); /* overloaded */ 2004 if (name[0] != VFS_GENERIC) { 2005 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2006 if (vfsp->vfc_typenum == name[0]) 2007 break; 2008 if (vfsp == NULL) 2009 return (EOPNOTSUPP); 2010 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2011 oldp, oldlenp, newp, newlen, p)); 2012 } 2013 #endif 2014 switch (name[1]) { 2015 case VFS_MAXTYPENUM: 2016 if (namelen != 2) 2017 return (ENOTDIR); 2018 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2019 case VFS_CONF: 2020 if (namelen != 3) 2021 return (ENOTDIR); /* overloaded */ 2022 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2023 if (vfsp->vfc_typenum == name[2]) 2024 break; 2025 if (vfsp == NULL) 2026 return (EOPNOTSUPP); 2027 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2028 } 2029 return (EOPNOTSUPP); 2030 } 2031 2032 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2033 "Generic filesystem"); 2034 2035 #if 1 || defined(COMPAT_PRELITE2) 2036 2037 static int 2038 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2039 { 2040 int error; 2041 struct vfsconf *vfsp; 2042 struct ovfsconf ovfs; 2043 2044 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2045 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2046 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2047 ovfs.vfc_index = vfsp->vfc_typenum; 2048 ovfs.vfc_refcount = vfsp->vfc_refcount; 2049 ovfs.vfc_flags = vfsp->vfc_flags; 2050 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2051 if (error) 2052 return error; 2053 } 2054 return 0; 2055 } 2056 2057 #endif /* 1 || COMPAT_PRELITE2 */ 2058 2059 #if 0 2060 #define KINFO_VNODESLOP 10 2061 /* 2062 * Dump vnode list (via sysctl). 2063 * Copyout address of vnode followed by vnode. 2064 */ 2065 /* ARGSUSED */ 2066 static int 2067 sysctl_vnode SYSCTL_HANDLER_ARGS 2068 { 2069 struct proc *p = curproc; /* XXX */ 2070 struct mount *mp, *nmp; 2071 struct vnode *nvp, *vp; 2072 int error; 2073 2074 #define VPTRSZ sizeof (struct vnode *) 2075 #define VNODESZ sizeof (struct vnode) 2076 2077 req->lock = 0; 2078 if (!req->oldptr) /* Make an estimate */ 2079 return (SYSCTL_OUT(req, 0, 2080 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2081 2082 simple_lock(&mountlist_slock); 2083 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2084 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2085 nmp = mp->mnt_list.cqe_next; 2086 continue; 2087 } 2088 again: 2089 simple_lock(&mntvnode_slock); 2090 for (vp = mp->mnt_vnodelist.lh_first; 2091 vp != NULL; 2092 vp = nvp) { 2093 /* 2094 * Check that the vp is still associated with 2095 * this filesystem. RACE: could have been 2096 * recycled onto the same filesystem. 2097 */ 2098 if (vp->v_mount != mp) { 2099 simple_unlock(&mntvnode_slock); 2100 goto again; 2101 } 2102 nvp = vp->v_mntvnodes.le_next; 2103 simple_unlock(&mntvnode_slock); 2104 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2105 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2106 return (error); 2107 simple_lock(&mntvnode_slock); 2108 } 2109 simple_unlock(&mntvnode_slock); 2110 simple_lock(&mountlist_slock); 2111 nmp = mp->mnt_list.cqe_next; 2112 vfs_unbusy(mp, p); 2113 } 2114 simple_unlock(&mountlist_slock); 2115 2116 return (0); 2117 } 2118 #endif 2119 2120 /* 2121 * XXX 2122 * Exporting the vnode list on large systems causes them to crash. 2123 * Exporting the vnode list on medium systems causes sysctl to coredump. 2124 */ 2125 #if 0 2126 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2127 0, 0, sysctl_vnode, "S,vnode", ""); 2128 #endif 2129 2130 /* 2131 * Check to see if a filesystem is mounted on a block device. 2132 */ 2133 int 2134 vfs_mountedon(vp) 2135 struct vnode *vp; 2136 { 2137 2138 if (vp->v_specmountpoint != NULL) 2139 return (EBUSY); 2140 return (0); 2141 } 2142 2143 /* 2144 * Unmount all filesystems. The list is traversed in reverse order 2145 * of mounting to avoid dependencies. 2146 */ 2147 void 2148 vfs_unmountall() 2149 { 2150 struct mount *mp, *nmp; 2151 struct proc *p; 2152 int error; 2153 2154 if (curproc != NULL) 2155 p = curproc; 2156 else 2157 p = initproc; /* XXX XXX should this be proc0? */ 2158 /* 2159 * Since this only runs when rebooting, it is not interlocked. 2160 */ 2161 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2162 nmp = mp->mnt_list.cqe_prev; 2163 error = dounmount(mp, MNT_FORCE, p); 2164 if (error) { 2165 printf("unmount of %s failed (", 2166 mp->mnt_stat.f_mntonname); 2167 if (error == EBUSY) 2168 printf("BUSY)\n"); 2169 else 2170 printf("%d)\n", error); 2171 } 2172 } 2173 } 2174 2175 /* 2176 * Build hash lists of net addresses and hang them off the mount point. 2177 * Called by ufs_mount() to set up the lists of export addresses. 2178 */ 2179 static int 2180 vfs_hang_addrlist(mp, nep, argp) 2181 struct mount *mp; 2182 struct netexport *nep; 2183 struct export_args *argp; 2184 { 2185 register struct netcred *np; 2186 register struct radix_node_head *rnh; 2187 register int i; 2188 struct radix_node *rn; 2189 struct sockaddr *saddr, *smask = 0; 2190 struct domain *dom; 2191 int error; 2192 2193 if (argp->ex_addrlen == 0) { 2194 if (mp->mnt_flag & MNT_DEFEXPORTED) 2195 return (EPERM); 2196 np = &nep->ne_defexported; 2197 np->netc_exflags = argp->ex_flags; 2198 np->netc_anon = argp->ex_anon; 2199 np->netc_anon.cr_ref = 1; 2200 mp->mnt_flag |= MNT_DEFEXPORTED; 2201 return (0); 2202 } 2203 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2204 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2205 bzero((caddr_t) np, i); 2206 saddr = (struct sockaddr *) (np + 1); 2207 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2208 goto out; 2209 if (saddr->sa_len > argp->ex_addrlen) 2210 saddr->sa_len = argp->ex_addrlen; 2211 if (argp->ex_masklen) { 2212 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2213 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2214 if (error) 2215 goto out; 2216 if (smask->sa_len > argp->ex_masklen) 2217 smask->sa_len = argp->ex_masklen; 2218 } 2219 i = saddr->sa_family; 2220 if ((rnh = nep->ne_rtable[i]) == 0) { 2221 /* 2222 * Seems silly to initialize every AF when most are not used, 2223 * do so on demand here 2224 */ 2225 for (dom = domains; dom; dom = dom->dom_next) 2226 if (dom->dom_family == i && dom->dom_rtattach) { 2227 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2228 dom->dom_rtoffset); 2229 break; 2230 } 2231 if ((rnh = nep->ne_rtable[i]) == 0) { 2232 error = ENOBUFS; 2233 goto out; 2234 } 2235 } 2236 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2237 np->netc_rnodes); 2238 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2239 error = EPERM; 2240 goto out; 2241 } 2242 np->netc_exflags = argp->ex_flags; 2243 np->netc_anon = argp->ex_anon; 2244 np->netc_anon.cr_ref = 1; 2245 return (0); 2246 out: 2247 free(np, M_NETADDR); 2248 return (error); 2249 } 2250 2251 /* ARGSUSED */ 2252 static int 2253 vfs_free_netcred(rn, w) 2254 struct radix_node *rn; 2255 void *w; 2256 { 2257 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2258 2259 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2260 free((caddr_t) rn, M_NETADDR); 2261 return (0); 2262 } 2263 2264 /* 2265 * Free the net address hash lists that are hanging off the mount points. 2266 */ 2267 static void 2268 vfs_free_addrlist(nep) 2269 struct netexport *nep; 2270 { 2271 register int i; 2272 register struct radix_node_head *rnh; 2273 2274 for (i = 0; i <= AF_MAX; i++) 2275 if ((rnh = nep->ne_rtable[i])) { 2276 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2277 (caddr_t) rnh); 2278 free((caddr_t) rnh, M_RTABLE); 2279 nep->ne_rtable[i] = 0; 2280 } 2281 } 2282 2283 int 2284 vfs_export(mp, nep, argp) 2285 struct mount *mp; 2286 struct netexport *nep; 2287 struct export_args *argp; 2288 { 2289 int error; 2290 2291 if (argp->ex_flags & MNT_DELEXPORT) { 2292 if (mp->mnt_flag & MNT_EXPUBLIC) { 2293 vfs_setpublicfs(NULL, NULL, NULL); 2294 mp->mnt_flag &= ~MNT_EXPUBLIC; 2295 } 2296 vfs_free_addrlist(nep); 2297 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2298 } 2299 if (argp->ex_flags & MNT_EXPORTED) { 2300 if (argp->ex_flags & MNT_EXPUBLIC) { 2301 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2302 return (error); 2303 mp->mnt_flag |= MNT_EXPUBLIC; 2304 } 2305 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2306 return (error); 2307 mp->mnt_flag |= MNT_EXPORTED; 2308 } 2309 return (0); 2310 } 2311 2312 2313 /* 2314 * Set the publicly exported filesystem (WebNFS). Currently, only 2315 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2316 */ 2317 int 2318 vfs_setpublicfs(mp, nep, argp) 2319 struct mount *mp; 2320 struct netexport *nep; 2321 struct export_args *argp; 2322 { 2323 int error; 2324 struct vnode *rvp; 2325 char *cp; 2326 2327 /* 2328 * mp == NULL -> invalidate the current info, the FS is 2329 * no longer exported. May be called from either vfs_export 2330 * or unmount, so check if it hasn't already been done. 2331 */ 2332 if (mp == NULL) { 2333 if (nfs_pub.np_valid) { 2334 nfs_pub.np_valid = 0; 2335 if (nfs_pub.np_index != NULL) { 2336 FREE(nfs_pub.np_index, M_TEMP); 2337 nfs_pub.np_index = NULL; 2338 } 2339 } 2340 return (0); 2341 } 2342 2343 /* 2344 * Only one allowed at a time. 2345 */ 2346 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2347 return (EBUSY); 2348 2349 /* 2350 * Get real filehandle for root of exported FS. 2351 */ 2352 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2353 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2354 2355 if ((error = VFS_ROOT(mp, &rvp))) 2356 return (error); 2357 2358 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2359 return (error); 2360 2361 vput(rvp); 2362 2363 /* 2364 * If an indexfile was specified, pull it in. 2365 */ 2366 if (argp->ex_indexfile != NULL) { 2367 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2368 M_WAITOK); 2369 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2370 MAXNAMLEN, (size_t *)0); 2371 if (!error) { 2372 /* 2373 * Check for illegal filenames. 2374 */ 2375 for (cp = nfs_pub.np_index; *cp; cp++) { 2376 if (*cp == '/') { 2377 error = EINVAL; 2378 break; 2379 } 2380 } 2381 } 2382 if (error) { 2383 FREE(nfs_pub.np_index, M_TEMP); 2384 return (error); 2385 } 2386 } 2387 2388 nfs_pub.np_mount = mp; 2389 nfs_pub.np_valid = 1; 2390 return (0); 2391 } 2392 2393 struct netcred * 2394 vfs_export_lookup(mp, nep, nam) 2395 register struct mount *mp; 2396 struct netexport *nep; 2397 struct sockaddr *nam; 2398 { 2399 register struct netcred *np; 2400 register struct radix_node_head *rnh; 2401 struct sockaddr *saddr; 2402 2403 np = NULL; 2404 if (mp->mnt_flag & MNT_EXPORTED) { 2405 /* 2406 * Lookup in the export list first. 2407 */ 2408 if (nam != NULL) { 2409 saddr = nam; 2410 rnh = nep->ne_rtable[saddr->sa_family]; 2411 if (rnh != NULL) { 2412 np = (struct netcred *) 2413 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2414 rnh); 2415 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2416 np = NULL; 2417 } 2418 } 2419 /* 2420 * If no address match, use the default if it exists. 2421 */ 2422 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2423 np = &nep->ne_defexported; 2424 } 2425 return (np); 2426 } 2427 2428 /* 2429 * perform msync on all vnodes under a mount point 2430 * the mount point must be locked. 2431 */ 2432 void 2433 vfs_msync(struct mount *mp, int flags) { 2434 struct vnode *vp, *nvp; 2435 struct vm_object *obj; 2436 int anyio, tries; 2437 2438 tries = 5; 2439 loop: 2440 anyio = 0; 2441 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2442 2443 nvp = vp->v_mntvnodes.le_next; 2444 2445 if (vp->v_mount != mp) { 2446 goto loop; 2447 } 2448 2449 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2450 continue; 2451 2452 if (flags != MNT_WAIT) { 2453 obj = vp->v_object; 2454 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2455 continue; 2456 if (VOP_ISLOCKED(vp)) 2457 continue; 2458 } 2459 2460 simple_lock(&vp->v_interlock); 2461 if (vp->v_object && 2462 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2463 if (!vget(vp, 2464 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2465 if (vp->v_object) { 2466 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2467 anyio = 1; 2468 } 2469 vput(vp); 2470 } 2471 } else { 2472 simple_unlock(&vp->v_interlock); 2473 } 2474 } 2475 if (anyio && (--tries > 0)) 2476 goto loop; 2477 } 2478 2479 /* 2480 * Create the VM object needed for VMIO and mmap support. This 2481 * is done for all VREG files in the system. Some filesystems might 2482 * afford the additional metadata buffering capability of the 2483 * VMIO code by making the device node be VMIO mode also. 2484 * 2485 * vp must be locked when vfs_object_create is called. 2486 */ 2487 int 2488 vfs_object_create(vp, p, cred) 2489 struct vnode *vp; 2490 struct proc *p; 2491 struct ucred *cred; 2492 { 2493 struct vattr vat; 2494 vm_object_t object; 2495 int error = 0; 2496 2497 if (vp->v_type != VBLK && vn_canvmio(vp) == FALSE) 2498 return 0; 2499 2500 retry: 2501 if ((object = vp->v_object) == NULL) { 2502 if (vp->v_type == VREG || vp->v_type == VDIR) { 2503 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2504 goto retn; 2505 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2506 } else if (devsw(vp->v_rdev) != NULL) { 2507 /* 2508 * This simply allocates the biggest object possible 2509 * for a VBLK vnode. This should be fixed, but doesn't 2510 * cause any problems (yet). 2511 */ 2512 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2513 } else { 2514 goto retn; 2515 } 2516 /* 2517 * Dereference the reference we just created. This assumes 2518 * that the object is associated with the vp. 2519 */ 2520 object->ref_count--; 2521 vp->v_usecount--; 2522 } else { 2523 if (object->flags & OBJ_DEAD) { 2524 VOP_UNLOCK(vp, 0, p); 2525 tsleep(object, PVM, "vodead", 0); 2526 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2527 goto retry; 2528 } 2529 } 2530 2531 KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); 2532 vp->v_flag |= VOBJBUF; 2533 2534 retn: 2535 return error; 2536 } 2537 2538 static void 2539 vfree(vp) 2540 struct vnode *vp; 2541 { 2542 int s; 2543 2544 s = splbio(); 2545 simple_lock(&vnode_free_list_slock); 2546 if (vp->v_flag & VTBFREE) { 2547 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2548 vp->v_flag &= ~VTBFREE; 2549 } 2550 if (vp->v_flag & VAGE) { 2551 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2552 } else { 2553 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2554 } 2555 freevnodes++; 2556 simple_unlock(&vnode_free_list_slock); 2557 vp->v_flag &= ~VAGE; 2558 vp->v_flag |= VFREE; 2559 splx(s); 2560 } 2561 2562 void 2563 vbusy(vp) 2564 struct vnode *vp; 2565 { 2566 int s; 2567 2568 s = splbio(); 2569 simple_lock(&vnode_free_list_slock); 2570 if (vp->v_flag & VTBFREE) { 2571 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2572 vp->v_flag &= ~VTBFREE; 2573 } else { 2574 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2575 freevnodes--; 2576 } 2577 simple_unlock(&vnode_free_list_slock); 2578 vp->v_flag &= ~(VFREE|VAGE); 2579 splx(s); 2580 } 2581 2582 /* 2583 * Record a process's interest in events which might happen to 2584 * a vnode. Because poll uses the historic select-style interface 2585 * internally, this routine serves as both the ``check for any 2586 * pending events'' and the ``record my interest in future events'' 2587 * functions. (These are done together, while the lock is held, 2588 * to avoid race conditions.) 2589 */ 2590 int 2591 vn_pollrecord(vp, p, events) 2592 struct vnode *vp; 2593 struct proc *p; 2594 short events; 2595 { 2596 simple_lock(&vp->v_pollinfo.vpi_lock); 2597 if (vp->v_pollinfo.vpi_revents & events) { 2598 /* 2599 * This leaves events we are not interested 2600 * in available for the other process which 2601 * which presumably had requested them 2602 * (otherwise they would never have been 2603 * recorded). 2604 */ 2605 events &= vp->v_pollinfo.vpi_revents; 2606 vp->v_pollinfo.vpi_revents &= ~events; 2607 2608 simple_unlock(&vp->v_pollinfo.vpi_lock); 2609 return events; 2610 } 2611 vp->v_pollinfo.vpi_events |= events; 2612 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2613 simple_unlock(&vp->v_pollinfo.vpi_lock); 2614 return 0; 2615 } 2616 2617 /* 2618 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2619 * it is possible for us to miss an event due to race conditions, but 2620 * that condition is expected to be rare, so for the moment it is the 2621 * preferred interface. 2622 */ 2623 void 2624 vn_pollevent(vp, events) 2625 struct vnode *vp; 2626 short events; 2627 { 2628 simple_lock(&vp->v_pollinfo.vpi_lock); 2629 if (vp->v_pollinfo.vpi_events & events) { 2630 /* 2631 * We clear vpi_events so that we don't 2632 * call selwakeup() twice if two events are 2633 * posted before the polling process(es) is 2634 * awakened. This also ensures that we take at 2635 * most one selwakeup() if the polling process 2636 * is no longer interested. However, it does 2637 * mean that only one event can be noticed at 2638 * a time. (Perhaps we should only clear those 2639 * event bits which we note?) XXX 2640 */ 2641 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2642 vp->v_pollinfo.vpi_revents |= events; 2643 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2644 } 2645 simple_unlock(&vp->v_pollinfo.vpi_lock); 2646 } 2647 2648 /* 2649 * Wake up anyone polling on vp because it is being revoked. 2650 * This depends on dead_poll() returning POLLHUP for correct 2651 * behavior. 2652 */ 2653 void 2654 vn_pollgone(vp) 2655 struct vnode *vp; 2656 { 2657 simple_lock(&vp->v_pollinfo.vpi_lock); 2658 if (vp->v_pollinfo.vpi_events) { 2659 vp->v_pollinfo.vpi_events = 0; 2660 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2661 } 2662 simple_unlock(&vp->v_pollinfo.vpi_lock); 2663 } 2664 2665 2666 2667 /* 2668 * Routine to create and manage a filesystem syncer vnode. 2669 */ 2670 #define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2671 static int sync_fsync __P((struct vop_fsync_args *)); 2672 static int sync_inactive __P((struct vop_inactive_args *)); 2673 static int sync_reclaim __P((struct vop_reclaim_args *)); 2674 #define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2675 #define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2676 static int sync_print __P((struct vop_print_args *)); 2677 #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2678 2679 static vop_t **sync_vnodeop_p; 2680 static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2681 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2682 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2683 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2684 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2685 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2686 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2687 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2688 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2689 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2690 { NULL, NULL } 2691 }; 2692 static struct vnodeopv_desc sync_vnodeop_opv_desc = 2693 { &sync_vnodeop_p, sync_vnodeop_entries }; 2694 2695 VNODEOP_SET(sync_vnodeop_opv_desc); 2696 2697 /* 2698 * Create a new filesystem syncer vnode for the specified mount point. 2699 */ 2700 int 2701 vfs_allocate_syncvnode(mp) 2702 struct mount *mp; 2703 { 2704 struct vnode *vp; 2705 static long start, incr, next; 2706 int error; 2707 2708 /* Allocate a new vnode */ 2709 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2710 mp->mnt_syncer = NULL; 2711 return (error); 2712 } 2713 vp->v_type = VNON; 2714 /* 2715 * Place the vnode onto the syncer worklist. We attempt to 2716 * scatter them about on the list so that they will go off 2717 * at evenly distributed times even if all the filesystems 2718 * are mounted at once. 2719 */ 2720 next += incr; 2721 if (next == 0 || next > syncer_maxdelay) { 2722 start /= 2; 2723 incr /= 2; 2724 if (start == 0) { 2725 start = syncer_maxdelay / 2; 2726 incr = syncer_maxdelay; 2727 } 2728 next = start; 2729 } 2730 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2731 mp->mnt_syncer = vp; 2732 return (0); 2733 } 2734 2735 /* 2736 * Do a lazy sync of the filesystem. 2737 */ 2738 static int 2739 sync_fsync(ap) 2740 struct vop_fsync_args /* { 2741 struct vnode *a_vp; 2742 struct ucred *a_cred; 2743 int a_waitfor; 2744 struct proc *a_p; 2745 } */ *ap; 2746 { 2747 struct vnode *syncvp = ap->a_vp; 2748 struct mount *mp = syncvp->v_mount; 2749 struct proc *p = ap->a_p; 2750 int asyncflag; 2751 2752 /* 2753 * We only need to do something if this is a lazy evaluation. 2754 */ 2755 if (ap->a_waitfor != MNT_LAZY) 2756 return (0); 2757 2758 /* 2759 * Move ourselves to the back of the sync list. 2760 */ 2761 vn_syncer_add_to_worklist(syncvp, syncdelay); 2762 2763 /* 2764 * Walk the list of vnodes pushing all that are dirty and 2765 * not already on the sync list. 2766 */ 2767 simple_lock(&mountlist_slock); 2768 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2769 simple_unlock(&mountlist_slock); 2770 return (0); 2771 } 2772 asyncflag = mp->mnt_flag & MNT_ASYNC; 2773 mp->mnt_flag &= ~MNT_ASYNC; 2774 vfs_msync(mp, MNT_NOWAIT); 2775 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2776 if (asyncflag) 2777 mp->mnt_flag |= MNT_ASYNC; 2778 vfs_unbusy(mp, p); 2779 return (0); 2780 } 2781 2782 /* 2783 * The syncer vnode is no referenced. 2784 */ 2785 static int 2786 sync_inactive(ap) 2787 struct vop_inactive_args /* { 2788 struct vnode *a_vp; 2789 struct proc *a_p; 2790 } */ *ap; 2791 { 2792 2793 vgone(ap->a_vp); 2794 return (0); 2795 } 2796 2797 /* 2798 * The syncer vnode is no longer needed and is being decommissioned. 2799 * 2800 * Modifications to the worklist must be protected at splbio(). 2801 */ 2802 static int 2803 sync_reclaim(ap) 2804 struct vop_reclaim_args /* { 2805 struct vnode *a_vp; 2806 } */ *ap; 2807 { 2808 struct vnode *vp = ap->a_vp; 2809 int s; 2810 2811 s = splbio(); 2812 vp->v_mount->mnt_syncer = NULL; 2813 if (vp->v_flag & VONWORKLST) { 2814 LIST_REMOVE(vp, v_synclist); 2815 vp->v_flag &= ~VONWORKLST; 2816 } 2817 splx(s); 2818 2819 return (0); 2820 } 2821 2822 /* 2823 * Print out a syncer vnode. 2824 */ 2825 static int 2826 sync_print(ap) 2827 struct vop_print_args /* { 2828 struct vnode *a_vp; 2829 } */ *ap; 2830 { 2831 struct vnode *vp = ap->a_vp; 2832 2833 printf("syncer vnode"); 2834 if (vp->v_vnlock != NULL) 2835 lockmgr_printinfo(vp->v_vnlock); 2836 printf("\n"); 2837 return (0); 2838 } 2839 2840 /* 2841 * extract the dev_t from a VBLK or VCHR 2842 */ 2843 dev_t 2844 vn_todev(vp) 2845 struct vnode *vp; 2846 { 2847 if (vp->v_type != VBLK && vp->v_type != VCHR) 2848 return (NODEV); 2849 return (vp->v_rdev); 2850 } 2851 2852 /* 2853 * Check if vnode represents a disk device 2854 */ 2855 int 2856 vn_isdisk(vp) 2857 struct vnode *vp; 2858 { 2859 if (vp->v_type != VBLK) 2860 return (0); 2861 if (!devsw(vp->v_rdev)) 2862 return (0); 2863 if (!(devsw(vp->v_rdev)->d_flags & D_DISK)) 2864 return (0); 2865 return (1); 2866 } 2867 2868