1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id: vfs_subr.c,v 1.79 1997/03/04 18:31:56 bde Exp $ 40 */ 41 42 /* 43 * External virtual filesystem routines 44 */ 45 #include "opt_ddb.h" 46 #include "opt_devfs.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/file.h> 52 #include <sys/proc.h> 53 #include <sys/mount.h> 54 #include <sys/time.h> 55 #include <sys/vnode.h> 56 #include <sys/stat.h> 57 #include <sys/namei.h> 58 #include <sys/ucred.h> 59 #include <sys/buf.h> 60 #include <sys/errno.h> 61 #include <sys/malloc.h> 62 #include <sys/domain.h> 63 #include <sys/mbuf.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_param.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_extern.h> 69 #include <vm/vm_pager.h> 70 #include <vm/vnode_pager.h> 71 #include <sys/sysctl.h> 72 73 #include <miscfs/specfs/specdev.h> 74 75 #ifdef DDB 76 extern void printlockedvnodes __P((void)); 77 #endif 78 static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 79 extern void vgonel __P((struct vnode *vp, struct proc *p)); 80 unsigned long numvnodes; 81 extern void vfs_unmountroot __P((struct mount *rootfs)); 82 extern void vputrele __P((struct vnode *vp, int put)); 83 84 enum vtype iftovt_tab[16] = { 85 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 86 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 87 }; 88 int vttoif_tab[9] = { 89 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 90 S_IFSOCK, S_IFIFO, S_IFMT, 91 }; 92 93 /* 94 * Insq/Remq for the vnode usage lists. 95 */ 96 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 97 #define bufremvn(bp) { \ 98 LIST_REMOVE(bp, b_vnbufs); \ 99 (bp)->b_vnbufs.le_next = NOLIST; \ 100 } 101 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 102 static u_long freevnodes = 0; 103 104 struct mntlist mountlist; /* mounted filesystem list */ 105 struct simplelock mountlist_slock; 106 static struct simplelock mntid_slock; 107 struct simplelock mntvnode_slock; 108 struct simplelock vnode_free_list_slock; 109 static struct simplelock spechash_slock; 110 111 int desiredvnodes; 112 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 113 114 static void vfs_free_addrlist __P((struct netexport *nep)); 115 static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 116 static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 117 struct export_args *argp)); 118 119 /* 120 * Initialize the vnode management data structures. 121 */ 122 void 123 vntblinit() 124 { 125 126 desiredvnodes = maxproc + vm_object_cache_max; 127 simple_lock_init(&mntvnode_slock); 128 simple_lock_init(&mntid_slock); 129 simple_lock_init(&spechash_slock); 130 TAILQ_INIT(&vnode_free_list); 131 simple_lock_init(&vnode_free_list_slock); 132 CIRCLEQ_INIT(&mountlist); 133 } 134 135 /* 136 * Mark a mount point as busy. Used to synchronize access and to delay 137 * unmounting. Interlock is not released on failure. 138 */ 139 int 140 vfs_busy(mp, flags, interlkp, p) 141 struct mount *mp; 142 int flags; 143 struct simplelock *interlkp; 144 struct proc *p; 145 { 146 int lkflags; 147 148 if (mp->mnt_flag & MNT_UNMOUNT) { 149 if (flags & LK_NOWAIT) 150 return (ENOENT); 151 mp->mnt_flag |= MNT_MWAIT; 152 if (interlkp) { 153 simple_unlock(interlkp); 154 } 155 /* 156 * Since all busy locks are shared except the exclusive 157 * lock granted when unmounting, the only place that a 158 * wakeup needs to be done is at the release of the 159 * exclusive lock at the end of dounmount. 160 */ 161 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 162 if (interlkp) { 163 simple_lock(interlkp); 164 } 165 return (ENOENT); 166 } 167 lkflags = LK_SHARED; 168 if (interlkp) 169 lkflags |= LK_INTERLOCK; 170 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 171 panic("vfs_busy: unexpected lock failure"); 172 return (0); 173 } 174 175 /* 176 * Free a busy filesystem. 177 */ 178 void 179 vfs_unbusy(mp, p) 180 struct mount *mp; 181 struct proc *p; 182 { 183 184 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 185 } 186 187 /* 188 * Lookup a filesystem type, and if found allocate and initialize 189 * a mount structure for it. 190 * 191 * Devname is usually updated by mount(8) after booting. 192 */ 193 int 194 vfs_rootmountalloc(fstypename, devname, mpp) 195 char *fstypename; 196 char *devname; 197 struct mount **mpp; 198 { 199 struct proc *p = curproc; /* XXX */ 200 struct vfsconf *vfsp; 201 struct mount *mp; 202 203 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 204 if (!strcmp(vfsp->vfc_name, fstypename)) 205 break; 206 if (vfsp == NULL) 207 return (ENODEV); 208 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 209 bzero((char *)mp, (u_long)sizeof(struct mount)); 210 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 211 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 212 LIST_INIT(&mp->mnt_vnodelist); 213 mp->mnt_vfc = vfsp; 214 mp->mnt_op = vfsp->vfc_vfsops; 215 mp->mnt_flag = MNT_RDONLY; 216 mp->mnt_vnodecovered = NULLVP; 217 vfsp->vfc_refcount++; 218 mp->mnt_stat.f_type = vfsp->vfc_typenum; 219 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 220 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 221 mp->mnt_stat.f_mntonname[0] = '/'; 222 mp->mnt_stat.f_mntonname[1] = 0; 223 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 224 *mpp = mp; 225 return (0); 226 } 227 228 /* 229 * Find an appropriate filesystem to use for the root. If a filesystem 230 * has not been preselected, walk through the list of known filesystems 231 * trying those that have mountroot routines, and try them until one 232 * works or we have tried them all. 233 */ 234 #ifdef notdef /* XXX JH */ 235 int 236 lite2_vfs_mountroot(void) 237 { 238 struct vfsconf *vfsp; 239 extern int (*lite2_mountroot)(void); 240 int error; 241 242 if (lite2_mountroot != NULL) 243 return ((*lite2_mountroot)()); 244 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 245 if (vfsp->vfc_mountroot == NULL) 246 continue; 247 if ((error = (*vfsp->vfc_mountroot)()) == 0) 248 return (0); 249 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 250 } 251 return (ENODEV); 252 } 253 #endif 254 255 /* 256 * Lookup a mount point by filesystem identifier. 257 */ 258 struct mount * 259 vfs_getvfs(fsid) 260 fsid_t *fsid; 261 { 262 register struct mount *mp; 263 264 simple_lock(&mountlist_slock); 265 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 266 mp = mp->mnt_list.cqe_next) { 267 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 268 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 269 simple_unlock(&mountlist_slock); 270 return (mp); 271 } 272 } 273 simple_unlock(&mountlist_slock); 274 return ((struct mount *) 0); 275 } 276 277 /* 278 * Get a new unique fsid 279 */ 280 void 281 vfs_getnewfsid(mp) 282 struct mount *mp; 283 { 284 static u_short xxxfs_mntid; 285 286 fsid_t tfsid; 287 int mtype; 288 289 simple_lock(&mntid_slock); 290 mtype = mp->mnt_vfc->vfc_typenum; 291 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 292 mp->mnt_stat.f_fsid.val[1] = mtype; 293 if (xxxfs_mntid == 0) 294 ++xxxfs_mntid; 295 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 296 tfsid.val[1] = mtype; 297 if (mountlist.cqh_first != (void *)&mountlist) { 298 while (vfs_getvfs(&tfsid)) { 299 tfsid.val[0]++; 300 xxxfs_mntid++; 301 } 302 } 303 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 304 simple_unlock(&mntid_slock); 305 } 306 307 /* 308 * Set vnode attributes to VNOVAL 309 */ 310 void 311 vattr_null(vap) 312 register struct vattr *vap; 313 { 314 315 vap->va_type = VNON; 316 vap->va_size = VNOVAL; 317 vap->va_bytes = VNOVAL; 318 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 319 vap->va_fsid = vap->va_fileid = 320 vap->va_blocksize = vap->va_rdev = 321 vap->va_atime.tv_sec = vap->va_atime.tv_nsec = 322 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = 323 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = 324 vap->va_flags = vap->va_gen = VNOVAL; 325 vap->va_vaflags = 0; 326 } 327 328 /* 329 * Routines having to do with the management of the vnode table. 330 */ 331 extern vop_t **dead_vnodeop_p; 332 333 /* 334 * Return the next vnode from the free list. 335 */ 336 int 337 getnewvnode(tag, mp, vops, vpp) 338 enum vtagtype tag; 339 struct mount *mp; 340 vop_t **vops; 341 struct vnode **vpp; 342 { 343 struct proc *p = curproc; /* XXX */ 344 struct vnode *vp; 345 346 simple_lock(&vnode_free_list_slock); 347 retry: 348 /* 349 * we allocate a new vnode if 350 * 1. we don't have any free 351 * Pretty obvious, we actually used to panic, but that 352 * is a silly thing to do. 353 * 2. we havn't filled our pool yet 354 * We don't want to trash the incore (VM-)vnodecache. 355 * 3. if less that 1/4th of our vnodes are free. 356 * We don't want to trash the namei cache either. 357 */ 358 if (freevnodes < (numvnodes >> 2) || 359 numvnodes < desiredvnodes || 360 vnode_free_list.tqh_first == NULL) { 361 simple_unlock(&vnode_free_list_slock); 362 vp = (struct vnode *) malloc((u_long) sizeof *vp, 363 M_VNODE, M_WAITOK); 364 bzero((char *) vp, sizeof *vp); 365 numvnodes++; 366 } else { 367 for (vp = vnode_free_list.tqh_first; 368 vp != NULLVP; vp = vp->v_freelist.tqe_next) { 369 if (simple_lock_try(&vp->v_interlock)) 370 break; 371 } 372 /* 373 * Unless this is a bad time of the month, at most 374 * the first NCPUS items on the free list are 375 * locked, so this is close enough to being empty. 376 */ 377 if (vp == NULLVP) { 378 simple_unlock(&vnode_free_list_slock); 379 tablefull("vnode"); 380 *vpp = 0; 381 return (ENFILE); 382 } 383 if (vp->v_usecount) 384 panic("free vnode isn't"); 385 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 386 if (vp->v_usage > 0) { 387 simple_unlock(&vp->v_interlock); 388 --vp->v_usage; 389 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 390 goto retry; 391 } 392 freevnodes--; 393 394 /* see comment on why 0xdeadb is set at end of vgone (below) */ 395 vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; 396 simple_unlock(&vnode_free_list_slock); 397 vp->v_lease = NULL; 398 if (vp->v_type != VBAD) 399 vgonel(vp, p); 400 else { 401 simple_unlock(&vp->v_interlock); 402 } 403 404 #ifdef DIAGNOSTIC 405 { 406 int s; 407 408 if (vp->v_data) 409 panic("cleaned vnode isn't"); 410 s = splbio(); 411 if (vp->v_numoutput) 412 panic("Clean vnode has pending I/O's"); 413 splx(s); 414 } 415 #endif 416 vp->v_flag = 0; 417 vp->v_lastr = 0; 418 vp->v_lastw = 0; 419 vp->v_lasta = 0; 420 vp->v_cstart = 0; 421 vp->v_clen = 0; 422 vp->v_socket = 0; 423 vp->v_writecount = 0; /* XXX */ 424 vp->v_usage = 0; 425 } 426 vp->v_type = VNON; 427 cache_purge(vp); 428 vp->v_tag = tag; 429 vp->v_op = vops; 430 insmntque(vp, mp); 431 *vpp = vp; 432 vp->v_usecount = 1; 433 vp->v_data = 0; 434 return (0); 435 } 436 437 /* 438 * Move a vnode from one mount queue to another. 439 */ 440 void 441 insmntque(vp, mp) 442 register struct vnode *vp; 443 register struct mount *mp; 444 { 445 446 simple_lock(&mntvnode_slock); 447 /* 448 * Delete from old mount point vnode list, if on one. 449 */ 450 if (vp->v_mount != NULL) 451 LIST_REMOVE(vp, v_mntvnodes); 452 /* 453 * Insert into list of vnodes for the new mount point, if available. 454 */ 455 if ((vp->v_mount = mp) == NULL) { 456 simple_unlock(&mntvnode_slock); 457 return; 458 } 459 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 460 simple_unlock(&mntvnode_slock); 461 } 462 463 /* 464 * Update outstanding I/O count and do wakeup if requested. 465 */ 466 void 467 vwakeup(bp) 468 register struct buf *bp; 469 { 470 register struct vnode *vp; 471 472 bp->b_flags &= ~B_WRITEINPROG; 473 if ((vp = bp->b_vp)) { 474 vp->v_numoutput--; 475 if (vp->v_numoutput < 0) 476 panic("vwakeup: neg numoutput"); 477 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 478 vp->v_flag &= ~VBWAIT; 479 wakeup((caddr_t) &vp->v_numoutput); 480 } 481 } 482 } 483 484 /* 485 * Flush out and invalidate all buffers associated with a vnode. 486 * Called with the underlying object locked. 487 */ 488 int 489 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 490 register struct vnode *vp; 491 int flags; 492 struct ucred *cred; 493 struct proc *p; 494 int slpflag, slptimeo; 495 { 496 register struct buf *bp; 497 struct buf *nbp, *blist; 498 int s, error; 499 vm_object_t object; 500 501 if (flags & V_SAVE) { 502 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) 503 return (error); 504 if (vp->v_dirtyblkhd.lh_first != NULL) 505 panic("vinvalbuf: dirty bufs"); 506 } 507 508 s = splbio(); 509 for (;;) { 510 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 511 while (blist && blist->b_lblkno < 0) 512 blist = blist->b_vnbufs.le_next; 513 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 514 (flags & V_SAVEMETA)) 515 while (blist && blist->b_lblkno < 0) 516 blist = blist->b_vnbufs.le_next; 517 if (!blist) 518 break; 519 520 for (bp = blist; bp; bp = nbp) { 521 nbp = bp->b_vnbufs.le_next; 522 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) 523 continue; 524 if (bp->b_flags & B_BUSY) { 525 bp->b_flags |= B_WANTED; 526 error = tsleep((caddr_t) bp, 527 slpflag | (PRIBIO + 1), "vinvalbuf", 528 slptimeo); 529 if (error) { 530 splx(s); 531 return (error); 532 } 533 break; 534 } 535 bremfree(bp); 536 bp->b_flags |= B_BUSY; 537 /* 538 * XXX Since there are no node locks for NFS, I 539 * believe there is a slight chance that a delayed 540 * write will occur while sleeping just above, so 541 * check for it. 542 */ 543 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 544 (void) VOP_BWRITE(bp); 545 break; 546 } 547 bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF); 548 brelse(bp); 549 } 550 } 551 552 while (vp->v_numoutput > 0) { 553 vp->v_flag |= VBWAIT; 554 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 555 } 556 557 splx(s); 558 559 /* 560 * Destroy the copy in the VM cache, too. 561 */ 562 object = vp->v_object; 563 if (object != NULL) { 564 vm_object_page_remove(object, 0, object->size, 565 (flags & V_SAVE) ? TRUE : FALSE); 566 } 567 if (!(flags & V_SAVEMETA) && 568 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 569 panic("vinvalbuf: flush failed"); 570 return (0); 571 } 572 573 /* 574 * Associate a buffer with a vnode. 575 */ 576 void 577 bgetvp(vp, bp) 578 register struct vnode *vp; 579 register struct buf *bp; 580 { 581 int s; 582 583 if (bp->b_vp) 584 panic("bgetvp: not free"); 585 VHOLD(vp); 586 bp->b_vp = vp; 587 if (vp->v_type == VBLK || vp->v_type == VCHR) 588 bp->b_dev = vp->v_rdev; 589 else 590 bp->b_dev = NODEV; 591 /* 592 * Insert onto list for new vnode. 593 */ 594 s = splbio(); 595 bufinsvn(bp, &vp->v_cleanblkhd); 596 splx(s); 597 } 598 599 /* 600 * Disassociate a buffer from a vnode. 601 */ 602 void 603 brelvp(bp) 604 register struct buf *bp; 605 { 606 struct vnode *vp; 607 int s; 608 609 if (bp->b_vp == (struct vnode *) 0) 610 panic("brelvp: NULL"); 611 /* 612 * Delete from old vnode list, if on one. 613 */ 614 s = splbio(); 615 if (bp->b_vnbufs.le_next != NOLIST) 616 bufremvn(bp); 617 splx(s); 618 619 vp = bp->b_vp; 620 bp->b_vp = (struct vnode *) 0; 621 HOLDRELE(vp); 622 } 623 624 /* 625 * Associate a p-buffer with a vnode. 626 */ 627 void 628 pbgetvp(vp, bp) 629 register struct vnode *vp; 630 register struct buf *bp; 631 { 632 #if defined(DIAGNOSTIC) 633 if (bp->b_vp) 634 panic("pbgetvp: not free"); 635 #endif 636 bp->b_vp = vp; 637 if (vp->v_type == VBLK || vp->v_type == VCHR) 638 bp->b_dev = vp->v_rdev; 639 else 640 bp->b_dev = NODEV; 641 } 642 643 /* 644 * Disassociate a p-buffer from a vnode. 645 */ 646 void 647 pbrelvp(bp) 648 register struct buf *bp; 649 { 650 struct vnode *vp; 651 652 #if defined(DIAGNOSTIC) 653 if (bp->b_vp == (struct vnode *) 0) 654 panic("pbrelvp: NULL"); 655 #endif 656 657 bp->b_vp = (struct vnode *) 0; 658 } 659 660 /* 661 * Reassign a buffer from one vnode to another. 662 * Used to assign file specific control information 663 * (indirect blocks) to the vnode to which they belong. 664 */ 665 void 666 reassignbuf(bp, newvp) 667 register struct buf *bp; 668 register struct vnode *newvp; 669 { 670 int s; 671 672 if (newvp == NULL) { 673 printf("reassignbuf: NULL"); 674 return; 675 } 676 677 s = splbio(); 678 /* 679 * Delete from old vnode list, if on one. 680 */ 681 if (bp->b_vnbufs.le_next != NOLIST) 682 bufremvn(bp); 683 /* 684 * If dirty, put on list of dirty buffers; otherwise insert onto list 685 * of clean buffers. 686 */ 687 if (bp->b_flags & B_DELWRI) { 688 struct buf *tbp; 689 690 tbp = newvp->v_dirtyblkhd.lh_first; 691 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { 692 bufinsvn(bp, &newvp->v_dirtyblkhd); 693 } else { 694 while (tbp->b_vnbufs.le_next && 695 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { 696 tbp = tbp->b_vnbufs.le_next; 697 } 698 LIST_INSERT_AFTER(tbp, bp, b_vnbufs); 699 } 700 } else { 701 bufinsvn(bp, &newvp->v_cleanblkhd); 702 } 703 splx(s); 704 } 705 706 #ifndef DEVFS_ROOT 707 /* 708 * Create a vnode for a block device. 709 * Used for root filesystem, argdev, and swap areas. 710 * Also used for memory file system special devices. 711 */ 712 int 713 bdevvp(dev, vpp) 714 dev_t dev; 715 struct vnode **vpp; 716 { 717 register struct vnode *vp; 718 struct vnode *nvp; 719 int error; 720 721 if (dev == NODEV) 722 return (0); 723 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); 724 if (error) { 725 *vpp = 0; 726 return (error); 727 } 728 vp = nvp; 729 vp->v_type = VBLK; 730 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { 731 vput(vp); 732 vp = nvp; 733 } 734 *vpp = vp; 735 return (0); 736 } 737 #endif /* !DEVFS_ROOT */ 738 739 /* 740 * Check to see if the new vnode represents a special device 741 * for which we already have a vnode (either because of 742 * bdevvp() or because of a different vnode representing 743 * the same block device). If such an alias exists, deallocate 744 * the existing contents and return the aliased vnode. The 745 * caller is responsible for filling it with its new contents. 746 */ 747 struct vnode * 748 checkalias(nvp, nvp_rdev, mp) 749 register struct vnode *nvp; 750 dev_t nvp_rdev; 751 struct mount *mp; 752 { 753 struct proc *p = curproc; /* XXX */ 754 struct vnode *vp; 755 struct vnode **vpp; 756 757 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 758 return (NULLVP); 759 760 vpp = &speclisth[SPECHASH(nvp_rdev)]; 761 loop: 762 simple_lock(&spechash_slock); 763 for (vp = *vpp; vp; vp = vp->v_specnext) { 764 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 765 continue; 766 /* 767 * Alias, but not in use, so flush it out. 768 */ 769 simple_lock(&vp->v_interlock); 770 if (vp->v_usecount == 0) { 771 simple_unlock(&spechash_slock); 772 vgonel(vp, p); 773 goto loop; 774 } 775 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 776 simple_unlock(&spechash_slock); 777 goto loop; 778 } 779 break; 780 } 781 if (vp == NULL || vp->v_tag != VT_NON) { 782 MALLOC(nvp->v_specinfo, struct specinfo *, 783 sizeof(struct specinfo), M_VNODE, M_WAITOK); 784 nvp->v_rdev = nvp_rdev; 785 nvp->v_hashchain = vpp; 786 nvp->v_specnext = *vpp; 787 nvp->v_specflags = 0; 788 simple_unlock(&spechash_slock); 789 *vpp = nvp; 790 if (vp != NULLVP) { 791 nvp->v_flag |= VALIASED; 792 vp->v_flag |= VALIASED; 793 vput(vp); 794 } 795 return (NULLVP); 796 } 797 simple_unlock(&spechash_slock); 798 VOP_UNLOCK(vp, 0, p); 799 simple_lock(&vp->v_interlock); 800 vclean(vp, 0, p); 801 vp->v_op = nvp->v_op; 802 vp->v_tag = nvp->v_tag; 803 nvp->v_type = VNON; 804 insmntque(vp, mp); 805 return (vp); 806 } 807 808 /* 809 * Grab a particular vnode from the free list, increment its 810 * reference count and lock it. The vnode lock bit is set the 811 * vnode is being eliminated in vgone. The process is awakened 812 * when the transition is completed, and an error returned to 813 * indicate that the vnode is no longer usable (possibly having 814 * been changed to a new file system type). 815 */ 816 int 817 vget(vp, flags, p) 818 register struct vnode *vp; 819 int flags; 820 struct proc *p; 821 { 822 int error; 823 824 /* 825 * If the vnode is in the process of being cleaned out for 826 * another use, we wait for the cleaning to finish and then 827 * return failure. Cleaning is determined by checking that 828 * the VXLOCK flag is set. 829 */ 830 if ((flags & LK_INTERLOCK) == 0) { 831 simple_lock(&vp->v_interlock); 832 } 833 if (vp->v_flag & VXLOCK) { 834 vp->v_flag |= VXWANT; 835 simple_unlock(&vp->v_interlock); 836 tsleep((caddr_t)vp, PINOD, "vget", 0); 837 return (ENOENT); 838 } 839 if (vp->v_usecount == 0) { 840 simple_lock(&vnode_free_list_slock); 841 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 842 simple_unlock(&vnode_free_list_slock); 843 freevnodes--; 844 } 845 vp->v_usecount++; 846 /* 847 * Create the VM object, if needed 848 */ 849 if ((vp->v_type == VREG) && 850 ((vp->v_object == NULL) || 851 (vp->v_object->flags & OBJ_VFS_REF) == 0)) { 852 /* 853 * XXX vfs_object_create probably needs the interlock. 854 */ 855 simple_unlock(&vp->v_interlock); 856 vfs_object_create(vp, curproc, curproc->p_ucred, 0); 857 simple_lock(&vp->v_interlock); 858 } 859 if (flags & LK_TYPE_MASK) { 860 if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) 861 vrele(vp); 862 return (error); 863 } 864 simple_unlock(&vp->v_interlock); 865 return (0); 866 } 867 868 /* 869 * Stubs to use when there is no locking to be done on the underlying object. 870 * A minimal shared lock is necessary to ensure that the underlying object 871 * is not revoked while an operation is in progress. So, an active shared 872 * count is maintained in an auxillary vnode lock structure. 873 */ 874 int 875 vop_nolock(ap) 876 struct vop_lock_args /* { 877 struct vnode *a_vp; 878 int a_flags; 879 struct proc *a_p; 880 } */ *ap; 881 { 882 #ifdef notyet 883 /* 884 * This code cannot be used until all the non-locking filesystems 885 * (notably NFS) are converted to properly lock and release nodes. 886 * Also, certain vnode operations change the locking state within 887 * the operation (create, mknod, remove, link, rename, mkdir, rmdir, 888 * and symlink). Ideally these operations should not change the 889 * lock state, but should be changed to let the caller of the 890 * function unlock them. Otherwise all intermediate vnode layers 891 * (such as union, umapfs, etc) must catch these functions to do 892 * the necessary locking at their layer. Note that the inactive 893 * and lookup operations also change their lock state, but this 894 * cannot be avoided, so these two operations will always need 895 * to be handled in intermediate layers. 896 */ 897 struct vnode *vp = ap->a_vp; 898 int vnflags, flags = ap->a_flags; 899 900 if (vp->v_vnlock == NULL) { 901 if ((flags & LK_TYPE_MASK) == LK_DRAIN) 902 return (0); 903 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock), 904 M_VNODE, M_WAITOK); 905 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 906 } 907 switch (flags & LK_TYPE_MASK) { 908 case LK_DRAIN: 909 vnflags = LK_DRAIN; 910 break; 911 case LK_EXCLUSIVE: 912 case LK_SHARED: 913 vnflags = LK_SHARED; 914 break; 915 case LK_UPGRADE: 916 case LK_EXCLUPGRADE: 917 case LK_DOWNGRADE: 918 return (0); 919 case LK_RELEASE: 920 default: 921 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); 922 } 923 if (flags & LK_INTERLOCK) 924 vnflags |= LK_INTERLOCK; 925 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); 926 #else /* for now */ 927 /* 928 * Since we are not using the lock manager, we must clear 929 * the interlock here. 930 */ 931 if (ap->a_flags & LK_INTERLOCK) { 932 simple_unlock(&ap->a_vp->v_interlock); 933 } 934 return (0); 935 #endif 936 } 937 938 /* 939 * Do the inverse of vop_nolock, handling the interlock in a compatible way. 940 */ 941 int 942 vop_nounlock(ap) 943 struct vop_unlock_args /* { 944 struct vnode *a_vp; 945 int a_flags; 946 struct proc *a_p; 947 } */ *ap; 948 { 949 struct vnode *vp = ap->a_vp; 950 951 if (vp->v_vnlock == NULL) { 952 if (ap->a_flags & LK_INTERLOCK) 953 simple_unlock(&ap->a_vp->v_interlock); 954 return (0); 955 } 956 return (lockmgr(vp->v_vnlock, LK_RELEASE | ap->a_flags, 957 &ap->a_vp->v_interlock, ap->a_p)); 958 } 959 960 /* 961 * Return whether or not the node is in use. 962 */ 963 int 964 vop_noislocked(ap) 965 struct vop_islocked_args /* { 966 struct vnode *a_vp; 967 } */ *ap; 968 { 969 struct vnode *vp = ap->a_vp; 970 971 if (vp->v_vnlock == NULL) 972 return (0); 973 return (lockstatus(vp->v_vnlock)); 974 } 975 976 /* #ifdef DIAGNOSTIC */ 977 /* 978 * Vnode reference, just increment the count 979 */ 980 void 981 vref(vp) 982 struct vnode *vp; 983 { 984 simple_lock(&vp->v_interlock); 985 if (vp->v_usecount <= 0) 986 panic("vref used where vget required"); 987 988 vp->v_usecount++; 989 990 if ((vp->v_type == VREG) && 991 ((vp->v_object == NULL) || 992 ((vp->v_object->flags & OBJ_VFS_REF) == 0)) ) { 993 /* 994 * We need to lock to VP during the time that 995 * the object is created. This is necessary to 996 * keep the system from re-entrantly doing it 997 * multiple times. 998 * XXX vfs_object_create probably needs the interlock? 999 */ 1000 simple_unlock(&vp->v_interlock); 1001 vfs_object_create(vp, curproc, curproc->p_ucred, 0); 1002 return; 1003 } 1004 simple_unlock(&vp->v_interlock); 1005 } 1006 1007 /* 1008 * Vnode put/release. 1009 * If count drops to zero, call inactive routine and return to freelist. 1010 */ 1011 void 1012 vputrele(vp, put) 1013 struct vnode *vp; 1014 int put; 1015 { 1016 struct proc *p = curproc; /* XXX */ 1017 1018 #ifdef DIAGNOSTIC 1019 if (vp == NULL) 1020 panic("vputrele: null vp"); 1021 #endif 1022 simple_lock(&vp->v_interlock); 1023 vp->v_usecount--; 1024 1025 if ((vp->v_usecount == 1) && 1026 vp->v_object && 1027 (vp->v_object->flags & OBJ_VFS_REF)) { 1028 vp->v_object->flags &= ~OBJ_VFS_REF; 1029 if (put) { 1030 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1031 } else { 1032 simple_unlock(&vp->v_interlock); 1033 } 1034 vm_object_deallocate(vp->v_object); 1035 return; 1036 } 1037 1038 if (vp->v_usecount > 0) { 1039 if (put) { 1040 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1041 } else { 1042 simple_unlock(&vp->v_interlock); 1043 } 1044 return; 1045 } 1046 1047 if (vp->v_usecount < 0) { 1048 #ifdef DIAGNOSTIC 1049 vprint("vputrele: negative ref count", vp); 1050 #endif 1051 panic("vputrele: negative ref cnt"); 1052 } 1053 simple_lock(&vnode_free_list_slock); 1054 if (vp->v_flag & VAGE) { 1055 vp->v_flag &= ~VAGE; 1056 vp->v_usage = 0; 1057 if(vp->v_tag != VT_TFS) 1058 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1059 } else { 1060 if(vp->v_tag != VT_TFS) 1061 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1062 } 1063 freevnodes++; 1064 simple_unlock(&vnode_free_list_slock); 1065 1066 /* 1067 * If we are doing a vput, the node is already locked, and we must 1068 * call VOP_INACTIVE with the node locked. So, in the case of 1069 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1070 */ 1071 if (put) { 1072 simple_unlock(&vp->v_interlock); 1073 VOP_INACTIVE(vp, p); 1074 } else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1075 VOP_INACTIVE(vp, p); 1076 } 1077 } 1078 1079 /* 1080 * vput(), just unlock and vrele() 1081 */ 1082 void 1083 vput(vp) 1084 struct vnode *vp; 1085 { 1086 vputrele(vp, 1); 1087 } 1088 1089 void 1090 vrele(vp) 1091 struct vnode *vp; 1092 { 1093 vputrele(vp, 0); 1094 } 1095 1096 #ifdef DIAGNOSTIC 1097 /* 1098 * Page or buffer structure gets a reference. 1099 */ 1100 void 1101 vhold(vp) 1102 register struct vnode *vp; 1103 { 1104 1105 simple_lock(&vp->v_interlock); 1106 vp->v_holdcnt++; 1107 simple_unlock(&vp->v_interlock); 1108 } 1109 1110 /* 1111 * Page or buffer structure frees a reference. 1112 */ 1113 void 1114 holdrele(vp) 1115 register struct vnode *vp; 1116 { 1117 1118 simple_lock(&vp->v_interlock); 1119 if (vp->v_holdcnt <= 0) 1120 panic("holdrele: holdcnt"); 1121 vp->v_holdcnt--; 1122 simple_unlock(&vp->v_interlock); 1123 } 1124 #endif /* DIAGNOSTIC */ 1125 1126 /* 1127 * Remove any vnodes in the vnode table belonging to mount point mp. 1128 * 1129 * If MNT_NOFORCE is specified, there should not be any active ones, 1130 * return error if any are found (nb: this is a user error, not a 1131 * system error). If MNT_FORCE is specified, detach any active vnodes 1132 * that are found. 1133 */ 1134 #ifdef DIAGNOSTIC 1135 static int busyprt = 0; /* print out busy vnodes */ 1136 SYSCTL_INT(_debug, 1, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1137 #endif 1138 1139 int 1140 vflush(mp, skipvp, flags) 1141 struct mount *mp; 1142 struct vnode *skipvp; 1143 int flags; 1144 { 1145 struct proc *p = curproc; /* XXX */ 1146 struct vnode *vp, *nvp; 1147 int busy = 0; 1148 1149 simple_lock(&mntvnode_slock); 1150 loop: 1151 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1152 /* 1153 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1154 * Start over if it has (it won't be on the list anymore). 1155 */ 1156 if (vp->v_mount != mp) 1157 goto loop; 1158 nvp = vp->v_mntvnodes.le_next; 1159 /* 1160 * Skip over a selected vnode. 1161 */ 1162 if (vp == skipvp) 1163 continue; 1164 1165 simple_lock(&vp->v_interlock); 1166 /* 1167 * Skip over a vnodes marked VSYSTEM. 1168 */ 1169 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1170 simple_unlock(&vp->v_interlock); 1171 continue; 1172 } 1173 /* 1174 * If WRITECLOSE is set, only flush out regular file vnodes 1175 * open for writing. 1176 */ 1177 if ((flags & WRITECLOSE) && 1178 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1179 simple_unlock(&vp->v_interlock); 1180 continue; 1181 } 1182 1183 if (vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) { 1184 simple_unlock(&vp->v_interlock); 1185 simple_unlock(&mntvnode_slock); 1186 vm_object_reference(vp->v_object); 1187 pager_cache(vp->v_object, FALSE); 1188 vp->v_object->flags &= ~OBJ_VFS_REF; 1189 vm_object_deallocate(vp->v_object); 1190 simple_lock(&mntvnode_slock); 1191 simple_lock(&vp->v_interlock); 1192 } 1193 1194 /* 1195 * With v_usecount == 0, all we need to do is clear out the 1196 * vnode data structures and we are done. 1197 */ 1198 if (vp->v_usecount == 0) { 1199 simple_unlock(&mntvnode_slock); 1200 vgonel(vp, p); 1201 simple_lock(&mntvnode_slock); 1202 continue; 1203 } 1204 1205 /* 1206 * If FORCECLOSE is set, forcibly close the vnode. For block 1207 * or character devices, revert to an anonymous device. For 1208 * all other files, just kill them. 1209 */ 1210 if (flags & FORCECLOSE) { 1211 simple_unlock(&mntvnode_slock); 1212 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1213 vgonel(vp, p); 1214 } else { 1215 vclean(vp, 0, p); 1216 vp->v_op = spec_vnodeop_p; 1217 insmntque(vp, (struct mount *) 0); 1218 } 1219 simple_lock(&mntvnode_slock); 1220 continue; 1221 } 1222 #ifdef DIAGNOSTIC 1223 if (busyprt) 1224 vprint("vflush: busy vnode", vp); 1225 #endif 1226 simple_unlock(&vp->v_interlock); 1227 busy++; 1228 } 1229 simple_unlock(&mntvnode_slock); 1230 if (busy) 1231 return (EBUSY); 1232 return (0); 1233 } 1234 1235 /* 1236 * Disassociate the underlying file system from a vnode. 1237 */ 1238 static void 1239 vclean(struct vnode *vp, int flags, struct proc *p) 1240 { 1241 int active; 1242 1243 /* 1244 * Check to see if the vnode is in use. If so we have to reference it 1245 * before we clean it out so that its count cannot fall to zero and 1246 * generate a race against ourselves to recycle it. 1247 */ 1248 if ((active = vp->v_usecount)) 1249 vp->v_usecount++; 1250 /* 1251 * Prevent the vnode from being recycled or brought into use while we 1252 * clean it out. 1253 */ 1254 if (vp->v_flag & VXLOCK) 1255 panic("vclean: deadlock"); 1256 vp->v_flag |= VXLOCK; 1257 /* 1258 * Even if the count is zero, the VOP_INACTIVE routine may still 1259 * have the object locked while it cleans it out. The VOP_LOCK 1260 * ensures that the VOP_INACTIVE routine is done with its work. 1261 * For active vnodes, it ensures that no other activity can 1262 * occur while the underlying object is being cleaned out. 1263 */ 1264 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1265 /* 1266 * Clean out any buffers associated with the vnode. 1267 */ 1268 if (flags & DOCLOSE) 1269 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1270 /* 1271 * If purging an active vnode, it must be closed and 1272 * deactivated before being reclaimed. Note that the 1273 * VOP_INACTIVE will unlock the vnode. 1274 */ 1275 if (active) { 1276 if (flags & DOCLOSE) 1277 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 1278 VOP_INACTIVE(vp, p); 1279 } else { 1280 /* 1281 * Any other processes trying to obtain this lock must first 1282 * wait for VXLOCK to clear, then call the new lock operation. 1283 */ 1284 VOP_UNLOCK(vp, 0, p); 1285 } 1286 /* 1287 * Reclaim the vnode. 1288 */ 1289 if (VOP_RECLAIM(vp, p)) 1290 panic("vclean: cannot reclaim"); 1291 if (active) 1292 vrele(vp); 1293 cache_purge(vp); 1294 if (vp->v_vnlock) { 1295 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1296 vprint("vclean: lock not drained", vp); 1297 FREE(vp->v_vnlock, M_VNODE); 1298 vp->v_vnlock = NULL; 1299 } 1300 1301 /* 1302 * Done with purge, notify sleepers of the grim news. 1303 */ 1304 vp->v_op = dead_vnodeop_p; 1305 vp->v_tag = VT_NON; 1306 vp->v_flag &= ~VXLOCK; 1307 if (vp->v_flag & VXWANT) { 1308 vp->v_flag &= ~VXWANT; 1309 wakeup((caddr_t) vp); 1310 } 1311 } 1312 1313 /* 1314 * Eliminate all activity associated with the requested vnode 1315 * and with all vnodes aliased to the requested vnode. 1316 */ 1317 int 1318 vop_revoke(ap) 1319 struct vop_revoke_args /* { 1320 struct vnode *a_vp; 1321 int a_flags; 1322 } */ *ap; 1323 { 1324 struct vnode *vp, *vq; 1325 struct proc *p = curproc; /* XXX */ 1326 1327 #ifdef DIAGNOSTIC 1328 if ((ap->a_flags & REVOKEALL) == 0) 1329 panic("vop_revoke"); 1330 #endif 1331 1332 vp = ap->a_vp; 1333 simple_lock(&vp->v_interlock); 1334 1335 if (vp->v_flag & VALIASED) { 1336 /* 1337 * If a vgone (or vclean) is already in progress, 1338 * wait until it is done and return. 1339 */ 1340 if (vp->v_flag & VXLOCK) { 1341 vp->v_flag |= VXWANT; 1342 simple_unlock(&vp->v_interlock); 1343 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1344 return (0); 1345 } 1346 /* 1347 * Ensure that vp will not be vgone'd while we 1348 * are eliminating its aliases. 1349 */ 1350 vp->v_flag |= VXLOCK; 1351 simple_unlock(&vp->v_interlock); 1352 while (vp->v_flag & VALIASED) { 1353 simple_lock(&spechash_slock); 1354 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1355 if (vq->v_rdev != vp->v_rdev || 1356 vq->v_type != vp->v_type || vp == vq) 1357 continue; 1358 simple_unlock(&spechash_slock); 1359 vgone(vq); 1360 break; 1361 } 1362 if (vq == NULLVP) { 1363 simple_unlock(&spechash_slock); 1364 } 1365 } 1366 /* 1367 * Remove the lock so that vgone below will 1368 * really eliminate the vnode after which time 1369 * vgone will awaken any sleepers. 1370 */ 1371 simple_lock(&vp->v_interlock); 1372 vp->v_flag &= ~VXLOCK; 1373 } 1374 vgonel(vp, p); 1375 return (0); 1376 } 1377 1378 /* 1379 * Recycle an unused vnode to the front of the free list. 1380 * Release the passed interlock if the vnode will be recycled. 1381 */ 1382 int 1383 vrecycle(vp, inter_lkp, p) 1384 struct vnode *vp; 1385 struct simplelock *inter_lkp; 1386 struct proc *p; 1387 { 1388 1389 simple_lock(&vp->v_interlock); 1390 if (vp->v_usecount == 0) { 1391 if (inter_lkp) { 1392 simple_unlock(inter_lkp); 1393 } 1394 vgonel(vp, p); 1395 return (1); 1396 } 1397 simple_unlock(&vp->v_interlock); 1398 return (0); 1399 } 1400 1401 /* 1402 * Eliminate all activity associated with a vnode 1403 * in preparation for reuse. 1404 */ 1405 void 1406 vgone(vp) 1407 register struct vnode *vp; 1408 { 1409 struct proc *p = curproc; /* XXX */ 1410 1411 simple_lock(&vp->v_interlock); 1412 vgonel(vp, p); 1413 } 1414 1415 /* 1416 * vgone, with the vp interlock held. 1417 */ 1418 void 1419 vgonel(vp, p) 1420 struct vnode *vp; 1421 struct proc *p; 1422 { 1423 struct vnode *vq; 1424 struct vnode *vx; 1425 1426 /* 1427 * If a vgone (or vclean) is already in progress, 1428 * wait until it is done and return. 1429 */ 1430 if (vp->v_flag & VXLOCK) { 1431 vp->v_flag |= VXWANT; 1432 simple_unlock(&vp->v_interlock); 1433 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1434 return; 1435 } 1436 1437 if (vp->v_object) { 1438 vp->v_object->flags |= OBJ_VNODE_GONE; 1439 } 1440 1441 /* 1442 * Clean out the filesystem specific data. 1443 */ 1444 vclean(vp, DOCLOSE, p); 1445 /* 1446 * Delete from old mount point vnode list, if on one. 1447 */ 1448 if (vp->v_mount != NULL) 1449 insmntque(vp, (struct mount *)0); 1450 /* 1451 * If special device, remove it from special device alias list 1452 * if it is on one. 1453 */ 1454 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1455 simple_lock(&spechash_slock); 1456 if (*vp->v_hashchain == vp) { 1457 *vp->v_hashchain = vp->v_specnext; 1458 } else { 1459 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1460 if (vq->v_specnext != vp) 1461 continue; 1462 vq->v_specnext = vp->v_specnext; 1463 break; 1464 } 1465 if (vq == NULL) 1466 panic("missing bdev"); 1467 } 1468 if (vp->v_flag & VALIASED) { 1469 vx = NULL; 1470 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1471 if (vq->v_rdev != vp->v_rdev || 1472 vq->v_type != vp->v_type) 1473 continue; 1474 if (vx) 1475 break; 1476 vx = vq; 1477 } 1478 if (vx == NULL) 1479 panic("missing alias"); 1480 if (vq == NULL) 1481 vx->v_flag &= ~VALIASED; 1482 vp->v_flag &= ~VALIASED; 1483 } 1484 simple_unlock(&spechash_slock); 1485 FREE(vp->v_specinfo, M_VNODE); 1486 vp->v_specinfo = NULL; 1487 } 1488 1489 /* 1490 * If it is on the freelist and not already at the head, 1491 * move it to the head of the list. The test of the back 1492 * pointer and the reference count of zero is because 1493 * it will be removed from the free list by getnewvnode, 1494 * but will not have its reference count incremented until 1495 * after calling vgone. If the reference count were 1496 * incremented first, vgone would (incorrectly) try to 1497 * close the previous instance of the underlying object. 1498 * So, the back pointer is explicitly set to `0xdeadb' in 1499 * getnewvnode after removing it from the freelist to ensure 1500 * that we do not try to move it here. 1501 */ 1502 if (vp->v_usecount == 0) { 1503 simple_lock(&vnode_free_list_slock); 1504 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1505 vnode_free_list.tqh_first != vp) { 1506 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1507 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1508 } 1509 simple_unlock(&vnode_free_list_slock); 1510 } 1511 1512 vp->v_type = VBAD; 1513 } 1514 1515 /* 1516 * Lookup a vnode by device number. 1517 */ 1518 int 1519 vfinddev(dev, type, vpp) 1520 dev_t dev; 1521 enum vtype type; 1522 struct vnode **vpp; 1523 { 1524 register struct vnode *vp; 1525 int rc = 0; 1526 1527 simple_lock(&spechash_slock); 1528 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1529 if (dev != vp->v_rdev || type != vp->v_type) 1530 continue; 1531 *vpp = vp; 1532 rc = 1; 1533 break; 1534 } 1535 simple_unlock(&spechash_slock); 1536 return (rc); 1537 } 1538 1539 /* 1540 * Calculate the total number of references to a special device. 1541 */ 1542 int 1543 vcount(vp) 1544 register struct vnode *vp; 1545 { 1546 struct vnode *vq, *vnext; 1547 int count; 1548 1549 loop: 1550 if ((vp->v_flag & VALIASED) == 0) 1551 return (vp->v_usecount); 1552 simple_lock(&spechash_slock); 1553 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1554 vnext = vq->v_specnext; 1555 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1556 continue; 1557 /* 1558 * Alias, but not in use, so flush it out. 1559 */ 1560 if (vq->v_usecount == 0 && vq != vp) { 1561 simple_unlock(&spechash_slock); 1562 vgone(vq); 1563 goto loop; 1564 } 1565 count += vq->v_usecount; 1566 } 1567 simple_unlock(&spechash_slock); 1568 return (count); 1569 } 1570 1571 /* 1572 * Print out a description of a vnode. 1573 */ 1574 static char *typename[] = 1575 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1576 1577 void 1578 vprint(label, vp) 1579 char *label; 1580 register struct vnode *vp; 1581 { 1582 char buf[64]; 1583 1584 if (label != NULL) 1585 printf("%s: ", label); 1586 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1587 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1588 vp->v_holdcnt); 1589 buf[0] = '\0'; 1590 if (vp->v_flag & VROOT) 1591 strcat(buf, "|VROOT"); 1592 if (vp->v_flag & VTEXT) 1593 strcat(buf, "|VTEXT"); 1594 if (vp->v_flag & VSYSTEM) 1595 strcat(buf, "|VSYSTEM"); 1596 if (vp->v_flag & VXLOCK) 1597 strcat(buf, "|VXLOCK"); 1598 if (vp->v_flag & VXWANT) 1599 strcat(buf, "|VXWANT"); 1600 if (vp->v_flag & VBWAIT) 1601 strcat(buf, "|VBWAIT"); 1602 if (vp->v_flag & VALIASED) 1603 strcat(buf, "|VALIASED"); 1604 if (buf[0] != '\0') 1605 printf(" flags (%s)", &buf[1]); 1606 if (vp->v_data == NULL) { 1607 printf("\n"); 1608 } else { 1609 printf("\n\t"); 1610 VOP_PRINT(vp); 1611 } 1612 } 1613 1614 #ifdef DDB 1615 /* 1616 * List all of the locked vnodes in the system. 1617 * Called when debugging the kernel. 1618 */ 1619 void 1620 printlockedvnodes() 1621 { 1622 struct proc *p = curproc; /* XXX */ 1623 struct mount *mp, *nmp; 1624 struct vnode *vp; 1625 1626 printf("Locked vnodes\n"); 1627 simple_lock(&mountlist_slock); 1628 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1629 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1630 nmp = mp->mnt_list.cqe_next; 1631 continue; 1632 } 1633 for (vp = mp->mnt_vnodelist.lh_first; 1634 vp != NULL; 1635 vp = vp->v_mntvnodes.le_next) { 1636 if (VOP_ISLOCKED(vp)) 1637 vprint((char *)0, vp); 1638 } 1639 simple_lock(&mountlist_slock); 1640 nmp = mp->mnt_list.cqe_next; 1641 vfs_unbusy(mp, p); 1642 } 1643 simple_unlock(&mountlist_slock); 1644 } 1645 #endif 1646 1647 /* 1648 * Top level filesystem related information gathering. 1649 */ 1650 static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1651 1652 static int 1653 vfs_sysctl SYSCTL_HANDLER_ARGS 1654 { 1655 int *name = (int *)arg1 - 1; /* XXX */ 1656 u_int namelen = arg2 + 1; /* XXX */ 1657 struct vfsconf *vfsp; 1658 1659 #ifndef NO_COMPAT_PRELITE2 1660 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1661 if (namelen == 1) 1662 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1663 #endif 1664 1665 #ifdef notyet 1666 /* all sysctl names at this level are at least name and field */ 1667 if (namelen < 2) 1668 return (ENOTDIR); /* overloaded */ 1669 if (name[0] != VFS_GENERIC) { 1670 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1671 if (vfsp->vfc_typenum == name[0]) 1672 break; 1673 if (vfsp == NULL) 1674 return (EOPNOTSUPP); 1675 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1676 oldp, oldlenp, newp, newlen, p)); 1677 } 1678 #endif 1679 switch (name[1]) { 1680 case VFS_MAXTYPENUM: 1681 if (namelen != 2) 1682 return (ENOTDIR); 1683 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 1684 case VFS_CONF: 1685 if (namelen != 3) 1686 return (ENOTDIR); /* overloaded */ 1687 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1688 if (vfsp->vfc_typenum == name[2]) 1689 break; 1690 if (vfsp == NULL) 1691 return (EOPNOTSUPP); 1692 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 1693 } 1694 return (EOPNOTSUPP); 1695 } 1696 1697 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 1698 "Generic filesystem"); 1699 1700 #ifndef NO_COMPAT_PRELITE2 1701 1702 static int 1703 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 1704 { 1705 int error; 1706 struct vfsconf *vfsp; 1707 struct ovfsconf ovfs; 1708 1709 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 1710 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 1711 strcpy(ovfs.vfc_name, vfsp->vfc_name); 1712 ovfs.vfc_index = vfsp->vfc_typenum; 1713 ovfs.vfc_refcount = vfsp->vfc_refcount; 1714 ovfs.vfc_flags = vfsp->vfc_flags; 1715 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 1716 if (error) 1717 return error; 1718 } 1719 return 0; 1720 } 1721 1722 #endif /* !NO_COMPAT_PRELITE2 */ 1723 1724 int kinfo_vdebug = 1; 1725 int kinfo_vgetfailed; 1726 1727 #define KINFO_VNODESLOP 10 1728 /* 1729 * Dump vnode list (via sysctl). 1730 * Copyout address of vnode followed by vnode. 1731 */ 1732 /* ARGSUSED */ 1733 static int 1734 sysctl_vnode SYSCTL_HANDLER_ARGS 1735 { 1736 struct proc *p = curproc; /* XXX */ 1737 struct mount *mp, *nmp; 1738 struct vnode *nvp, *vp; 1739 int error; 1740 1741 #define VPTRSZ sizeof (struct vnode *) 1742 #define VNODESZ sizeof (struct vnode) 1743 1744 req->lock = 0; 1745 if (!req->oldptr) /* Make an estimate */ 1746 return (SYSCTL_OUT(req, 0, 1747 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 1748 1749 simple_lock(&mountlist_slock); 1750 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1751 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1752 nmp = mp->mnt_list.cqe_next; 1753 continue; 1754 } 1755 again: 1756 simple_lock(&mntvnode_slock); 1757 for (vp = mp->mnt_vnodelist.lh_first; 1758 vp != NULL; 1759 vp = nvp) { 1760 /* 1761 * Check that the vp is still associated with 1762 * this filesystem. RACE: could have been 1763 * recycled onto the same filesystem. 1764 */ 1765 if (vp->v_mount != mp) { 1766 simple_unlock(&mntvnode_slock); 1767 if (kinfo_vdebug) 1768 printf("kinfo: vp changed\n"); 1769 goto again; 1770 } 1771 nvp = vp->v_mntvnodes.le_next; 1772 simple_unlock(&mntvnode_slock); 1773 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 1774 (error = SYSCTL_OUT(req, vp, VNODESZ))) 1775 return (error); 1776 simple_lock(&mntvnode_slock); 1777 } 1778 simple_unlock(&mntvnode_slock); 1779 simple_lock(&mountlist_slock); 1780 nmp = mp->mnt_list.cqe_next; 1781 vfs_unbusy(mp, p); 1782 } 1783 simple_unlock(&mountlist_slock); 1784 1785 return (0); 1786 } 1787 1788 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 1789 0, 0, sysctl_vnode, "S,vnode", ""); 1790 1791 /* 1792 * Check to see if a filesystem is mounted on a block device. 1793 */ 1794 int 1795 vfs_mountedon(vp) 1796 struct vnode *vp; 1797 { 1798 struct vnode *vq; 1799 int error = 0; 1800 1801 if (vp->v_specflags & SI_MOUNTEDON) 1802 return (EBUSY); 1803 if (vp->v_flag & VALIASED) { 1804 simple_lock(&spechash_slock); 1805 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1806 if (vq->v_rdev != vp->v_rdev || 1807 vq->v_type != vp->v_type) 1808 continue; 1809 if (vq->v_specflags & SI_MOUNTEDON) { 1810 error = EBUSY; 1811 break; 1812 } 1813 } 1814 simple_unlock(&spechash_slock); 1815 } 1816 return (error); 1817 } 1818 1819 /* 1820 * Unmount all filesystems. The list is traversed in reverse order 1821 * of mounting to avoid dependencies. 1822 */ 1823 void 1824 vfs_unmountall() 1825 { 1826 struct mount *mp, *nmp; 1827 struct proc *p = initproc; /* XXX XXX should this be proc0? */ 1828 int error; 1829 1830 /* 1831 * Since this only runs when rebooting, it is not interlocked. 1832 */ 1833 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1834 nmp = mp->mnt_list.cqe_prev; 1835 error = dounmount(mp, MNT_FORCE, p); 1836 if (error) { 1837 printf("unmount of %s failed (", 1838 mp->mnt_stat.f_mntonname); 1839 if (error == EBUSY) 1840 printf("BUSY)\n"); 1841 else 1842 printf("%d)\n", error); 1843 } 1844 } 1845 } 1846 1847 /* 1848 * Build hash lists of net addresses and hang them off the mount point. 1849 * Called by ufs_mount() to set up the lists of export addresses. 1850 */ 1851 static int 1852 vfs_hang_addrlist(struct mount *mp, struct netexport *nep, 1853 struct export_args *argp) 1854 { 1855 register struct netcred *np; 1856 register struct radix_node_head *rnh; 1857 register int i; 1858 struct radix_node *rn; 1859 struct sockaddr *saddr, *smask = 0; 1860 struct domain *dom; 1861 int error; 1862 1863 if (argp->ex_addrlen == 0) { 1864 if (mp->mnt_flag & MNT_DEFEXPORTED) 1865 return (EPERM); 1866 np = &nep->ne_defexported; 1867 np->netc_exflags = argp->ex_flags; 1868 np->netc_anon = argp->ex_anon; 1869 np->netc_anon.cr_ref = 1; 1870 mp->mnt_flag |= MNT_DEFEXPORTED; 1871 return (0); 1872 } 1873 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1874 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 1875 bzero((caddr_t) np, i); 1876 saddr = (struct sockaddr *) (np + 1); 1877 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 1878 goto out; 1879 if (saddr->sa_len > argp->ex_addrlen) 1880 saddr->sa_len = argp->ex_addrlen; 1881 if (argp->ex_masklen) { 1882 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 1883 error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen); 1884 if (error) 1885 goto out; 1886 if (smask->sa_len > argp->ex_masklen) 1887 smask->sa_len = argp->ex_masklen; 1888 } 1889 i = saddr->sa_family; 1890 if ((rnh = nep->ne_rtable[i]) == 0) { 1891 /* 1892 * Seems silly to initialize every AF when most are not used, 1893 * do so on demand here 1894 */ 1895 for (dom = domains; dom; dom = dom->dom_next) 1896 if (dom->dom_family == i && dom->dom_rtattach) { 1897 dom->dom_rtattach((void **) &nep->ne_rtable[i], 1898 dom->dom_rtoffset); 1899 break; 1900 } 1901 if ((rnh = nep->ne_rtable[i]) == 0) { 1902 error = ENOBUFS; 1903 goto out; 1904 } 1905 } 1906 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 1907 np->netc_rnodes); 1908 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 1909 error = EPERM; 1910 goto out; 1911 } 1912 np->netc_exflags = argp->ex_flags; 1913 np->netc_anon = argp->ex_anon; 1914 np->netc_anon.cr_ref = 1; 1915 return (0); 1916 out: 1917 free(np, M_NETADDR); 1918 return (error); 1919 } 1920 1921 /* ARGSUSED */ 1922 static int 1923 vfs_free_netcred(struct radix_node *rn, void *w) 1924 { 1925 register struct radix_node_head *rnh = (struct radix_node_head *) w; 1926 1927 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 1928 free((caddr_t) rn, M_NETADDR); 1929 return (0); 1930 } 1931 1932 /* 1933 * Free the net address hash lists that are hanging off the mount points. 1934 */ 1935 static void 1936 vfs_free_addrlist(struct netexport *nep) 1937 { 1938 register int i; 1939 register struct radix_node_head *rnh; 1940 1941 for (i = 0; i <= AF_MAX; i++) 1942 if ((rnh = nep->ne_rtable[i])) { 1943 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 1944 (caddr_t) rnh); 1945 free((caddr_t) rnh, M_RTABLE); 1946 nep->ne_rtable[i] = 0; 1947 } 1948 } 1949 1950 int 1951 vfs_export(mp, nep, argp) 1952 struct mount *mp; 1953 struct netexport *nep; 1954 struct export_args *argp; 1955 { 1956 int error; 1957 1958 if (argp->ex_flags & MNT_DELEXPORT) { 1959 vfs_free_addrlist(nep); 1960 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1961 } 1962 if (argp->ex_flags & MNT_EXPORTED) { 1963 if ((error = vfs_hang_addrlist(mp, nep, argp))) 1964 return (error); 1965 mp->mnt_flag |= MNT_EXPORTED; 1966 } 1967 return (0); 1968 } 1969 1970 struct netcred * 1971 vfs_export_lookup(mp, nep, nam) 1972 register struct mount *mp; 1973 struct netexport *nep; 1974 struct mbuf *nam; 1975 { 1976 register struct netcred *np; 1977 register struct radix_node_head *rnh; 1978 struct sockaddr *saddr; 1979 1980 np = NULL; 1981 if (mp->mnt_flag & MNT_EXPORTED) { 1982 /* 1983 * Lookup in the export list first. 1984 */ 1985 if (nam != NULL) { 1986 saddr = mtod(nam, struct sockaddr *); 1987 rnh = nep->ne_rtable[saddr->sa_family]; 1988 if (rnh != NULL) { 1989 np = (struct netcred *) 1990 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1991 rnh); 1992 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1993 np = NULL; 1994 } 1995 } 1996 /* 1997 * If no address match, use the default if it exists. 1998 */ 1999 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2000 np = &nep->ne_defexported; 2001 } 2002 return (np); 2003 } 2004 2005 /* 2006 * perform msync on all vnodes under a mount point 2007 * the mount point must be locked. 2008 */ 2009 void 2010 vfs_msync(struct mount *mp, int flags) { 2011 struct vnode *vp, *nvp; 2012 loop: 2013 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2014 2015 if (vp->v_mount != mp) 2016 goto loop; 2017 nvp = vp->v_mntvnodes.le_next; 2018 if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT)) 2019 continue; 2020 if (vp->v_object && 2021 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2022 vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE); 2023 } 2024 } 2025 } 2026 2027 /* 2028 * Create the VM object needed for VMIO and mmap support. This 2029 * is done for all VREG files in the system. Some filesystems might 2030 * afford the additional metadata buffering capability of the 2031 * VMIO code by making the device node be VMIO mode also. 2032 */ 2033 int 2034 vfs_object_create(vp, p, cred, waslocked) 2035 struct vnode *vp; 2036 struct proc *p; 2037 struct ucred *cred; 2038 int waslocked; 2039 { 2040 struct vattr vat; 2041 vm_object_t object; 2042 int error = 0; 2043 2044 retry: 2045 if ((object = vp->v_object) == NULL) { 2046 if (vp->v_type == VREG) { 2047 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2048 goto retn; 2049 (void) vnode_pager_alloc(vp, 2050 OFF_TO_IDX(round_page(vat.va_size)), 0, 0); 2051 } else { 2052 /* 2053 * This simply allocates the biggest object possible 2054 * for a VBLK vnode. This should be fixed, but doesn't 2055 * cause any problems (yet). 2056 */ 2057 (void) vnode_pager_alloc(vp, INT_MAX, 0, 0); 2058 } 2059 vp->v_object->flags |= OBJ_VFS_REF; 2060 } else { 2061 if (object->flags & OBJ_DEAD) { 2062 if (waslocked) 2063 VOP_UNLOCK(vp, 0, p); 2064 tsleep(object, PVM, "vodead", 0); 2065 if (waslocked) 2066 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2067 goto retry; 2068 } 2069 if ((object->flags & OBJ_VFS_REF) == 0) { 2070 object->flags |= OBJ_VFS_REF; 2071 vm_object_reference(object); 2072 } 2073 } 2074 if (vp->v_object) 2075 vp->v_flag |= VVMIO; 2076 2077 retn: 2078 return error; 2079 } 2080