1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id$ 40 */ 41 42 /* 43 * External virtual filesystem routines 44 */ 45 #include "opt_ddb.h" 46 #include "opt_devfs.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/file.h> 52 #include <sys/proc.h> 53 #include <sys/mount.h> 54 #include <sys/time.h> 55 #include <sys/vnode.h> 56 #include <sys/stat.h> 57 #include <sys/namei.h> 58 #include <sys/ucred.h> 59 #include <sys/buf.h> 60 #include <sys/errno.h> 61 #include <sys/malloc.h> 62 #include <sys/domain.h> 63 #include <sys/mbuf.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_param.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_extern.h> 69 #include <vm/vm_pager.h> 70 #include <vm/vnode_pager.h> 71 #include <sys/sysctl.h> 72 73 #include <miscfs/specfs/specdev.h> 74 75 #ifdef DDB 76 extern void printlockedvnodes __P((void)); 77 #endif 78 static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 79 extern void vgonel __P((struct vnode *vp, struct proc *p)); 80 unsigned long numvnodes; 81 extern void vfs_unmountroot __P((struct mount *rootfs)); 82 83 enum vtype iftovt_tab[16] = { 84 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 85 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 86 }; 87 int vttoif_tab[9] = { 88 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 89 S_IFSOCK, S_IFIFO, S_IFMT, 90 }; 91 92 /* 93 * Insq/Remq for the vnode usage lists. 94 */ 95 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 96 #define bufremvn(bp) { \ 97 LIST_REMOVE(bp, b_vnbufs); \ 98 (bp)->b_vnbufs.le_next = NOLIST; \ 99 } 100 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 101 static u_long freevnodes = 0; 102 103 struct mntlist mountlist; /* mounted filesystem list */ 104 struct simplelock mountlist_slock; 105 static struct simplelock mntid_slock; 106 struct simplelock mntvnode_slock; 107 struct simplelock vnode_free_list_slock; 108 static struct simplelock spechash_slock; 109 110 int desiredvnodes; 111 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 112 113 static void vfs_free_addrlist __P((struct netexport *nep)); 114 static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 115 static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 116 struct export_args *argp)); 117 118 /* 119 * Initialize the vnode management data structures. 120 */ 121 void 122 vntblinit() 123 { 124 125 desiredvnodes = maxproc + vm_object_cache_max; 126 simple_lock_init(&mntvnode_slock); 127 simple_lock_init(&mntid_slock); 128 simple_lock_init(&spechash_slock); 129 TAILQ_INIT(&vnode_free_list); 130 simple_lock_init(&vnode_free_list_slock); 131 CIRCLEQ_INIT(&mountlist); 132 } 133 134 /* 135 * Mark a mount point as busy. Used to synchronize access and to delay 136 * unmounting. Interlock is not released on failure. 137 */ 138 int 139 vfs_busy(mp, flags, interlkp, p) 140 struct mount *mp; 141 int flags; 142 struct simplelock *interlkp; 143 struct proc *p; 144 { 145 int lkflags; 146 147 if (mp->mnt_flag & MNT_UNMOUNT) { 148 if (flags & LK_NOWAIT) 149 return (ENOENT); 150 mp->mnt_flag |= MNT_MWAIT; 151 if (interlkp) { 152 simple_unlock(interlkp); 153 } 154 /* 155 * Since all busy locks are shared except the exclusive 156 * lock granted when unmounting, the only place that a 157 * wakeup needs to be done is at the release of the 158 * exclusive lock at the end of dounmount. 159 */ 160 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 161 if (interlkp) { 162 simple_lock(interlkp); 163 } 164 return (ENOENT); 165 } 166 lkflags = LK_SHARED; 167 if (interlkp) 168 lkflags |= LK_INTERLOCK; 169 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 170 panic("vfs_busy: unexpected lock failure"); 171 return (0); 172 } 173 174 /* 175 * Free a busy filesystem. 176 */ 177 void 178 vfs_unbusy(mp, p) 179 struct mount *mp; 180 struct proc *p; 181 { 182 183 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 184 } 185 186 /* 187 * Lookup a filesystem type, and if found allocate and initialize 188 * a mount structure for it. 189 * 190 * Devname is usually updated by mount(8) after booting. 191 */ 192 int 193 vfs_rootmountalloc(fstypename, devname, mpp) 194 char *fstypename; 195 char *devname; 196 struct mount **mpp; 197 { 198 struct proc *p = curproc; /* XXX */ 199 struct vfsconf *vfsp; 200 struct mount *mp; 201 202 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 203 if (!strcmp(vfsp->vfc_name, fstypename)) 204 break; 205 if (vfsp == NULL) 206 return (ENODEV); 207 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 208 bzero((char *)mp, (u_long)sizeof(struct mount)); 209 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 210 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 211 LIST_INIT(&mp->mnt_vnodelist); 212 mp->mnt_vfc = vfsp; 213 mp->mnt_op = vfsp->vfc_vfsops; 214 mp->mnt_flag = MNT_RDONLY; 215 mp->mnt_vnodecovered = NULLVP; 216 vfsp->vfc_refcount++; 217 mp->mnt_stat.f_type = vfsp->vfc_typenum; 218 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 219 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 220 mp->mnt_stat.f_mntonname[0] = '/'; 221 mp->mnt_stat.f_mntonname[1] = 0; 222 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 223 *mpp = mp; 224 return (0); 225 } 226 227 /* 228 * Find an appropriate filesystem to use for the root. If a filesystem 229 * has not been preselected, walk through the list of known filesystems 230 * trying those that have mountroot routines, and try them until one 231 * works or we have tried them all. 232 */ 233 #ifdef notdef /* XXX JH */ 234 int 235 lite2_vfs_mountroot(void) 236 { 237 struct vfsconf *vfsp; 238 extern int (*lite2_mountroot)(void); 239 int error; 240 241 if (lite2_mountroot != NULL) 242 return ((*lite2_mountroot)()); 243 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 244 if (vfsp->vfc_mountroot == NULL) 245 continue; 246 if ((error = (*vfsp->vfc_mountroot)()) == 0) 247 return (0); 248 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 249 } 250 return (ENODEV); 251 } 252 #endif 253 254 /* 255 * Lookup a mount point by filesystem identifier. 256 */ 257 struct mount * 258 vfs_getvfs(fsid) 259 fsid_t *fsid; 260 { 261 register struct mount *mp; 262 263 simple_lock(&mountlist_slock); 264 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 265 mp = mp->mnt_list.cqe_next) { 266 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 267 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 268 simple_unlock(&mountlist_slock); 269 return (mp); 270 } 271 } 272 simple_unlock(&mountlist_slock); 273 return ((struct mount *) 0); 274 } 275 276 /* 277 * Get a new unique fsid 278 */ 279 void 280 vfs_getnewfsid(mp) 281 struct mount *mp; 282 { 283 static u_short xxxfs_mntid; 284 285 fsid_t tfsid; 286 int mtype; 287 288 simple_lock(&mntid_slock); 289 mtype = mp->mnt_vfc->vfc_typenum; 290 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 291 mp->mnt_stat.f_fsid.val[1] = mtype; 292 if (xxxfs_mntid == 0) 293 ++xxxfs_mntid; 294 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 295 tfsid.val[1] = mtype; 296 if (mountlist.cqh_first != (void *)&mountlist) { 297 while (vfs_getvfs(&tfsid)) { 298 tfsid.val[0]++; 299 xxxfs_mntid++; 300 } 301 } 302 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 303 simple_unlock(&mntid_slock); 304 } 305 306 /* 307 * Set vnode attributes to VNOVAL 308 */ 309 void 310 vattr_null(vap) 311 register struct vattr *vap; 312 { 313 314 vap->va_type = VNON; 315 vap->va_size = VNOVAL; 316 vap->va_bytes = VNOVAL; 317 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 318 vap->va_fsid = vap->va_fileid = 319 vap->va_blocksize = vap->va_rdev = 320 vap->va_atime.tv_sec = vap->va_atime.tv_nsec = 321 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = 322 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = 323 vap->va_flags = vap->va_gen = VNOVAL; 324 vap->va_vaflags = 0; 325 } 326 327 void 328 vfs_unmountroot(struct mount *rootfs) 329 { 330 struct proc *p = curproc; /* XXX */ 331 struct mount *mp = rootfs; 332 int error; 333 334 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 335 printf("failed to unmount root\n"); 336 return; 337 } 338 mp->mnt_flag |= MNT_UNMOUNT; 339 vnode_pager_umount(mp); /* release cached vnodes */ 340 cache_purgevfs(mp); /* remove cache entries for this file sys */ 341 342 if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) 343 printf("sync of root filesystem failed (%d)\n", error); 344 345 if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { 346 printf("unmount of root filesystem failed ("); 347 if (error == EBUSY) 348 printf("BUSY)\n"); 349 else 350 printf("%d)\n", error); 351 } 352 mp->mnt_flag &= ~MNT_UNMOUNT; 353 vfs_unbusy(mp, p); 354 } 355 356 /* 357 * Routines having to do with the management of the vnode table. 358 */ 359 extern vop_t **dead_vnodeop_p; 360 361 /* 362 * Return the next vnode from the free list. 363 */ 364 int 365 getnewvnode(tag, mp, vops, vpp) 366 enum vtagtype tag; 367 struct mount *mp; 368 vop_t **vops; 369 struct vnode **vpp; 370 { 371 struct proc *p = curproc; /* XXX */ 372 struct vnode *vp; 373 374 simple_lock(&vnode_free_list_slock); 375 retry: 376 /* 377 * we allocate a new vnode if 378 * 1. we don't have any free 379 * Pretty obvious, we actually used to panic, but that 380 * is a silly thing to do. 381 * 2. we havn't filled our pool yet 382 * We don't want to trash the incore (VM-)vnodecache. 383 * 3. if less that 1/4th of our vnodes are free. 384 * We don't want to trash the namei cache either. 385 */ 386 if (freevnodes < (numvnodes >> 2) || 387 numvnodes < desiredvnodes || 388 vnode_free_list.tqh_first == NULL) { 389 simple_unlock(&vnode_free_list_slock); 390 vp = (struct vnode *) malloc((u_long) sizeof *vp, 391 M_VNODE, M_WAITOK); 392 bzero((char *) vp, sizeof *vp); 393 numvnodes++; 394 } else { 395 for (vp = vnode_free_list.tqh_first; 396 vp != NULLVP; vp = vp->v_freelist.tqe_next) { 397 if (simple_lock_try(&vp->v_interlock)) 398 break; 399 } 400 /* 401 * Unless this is a bad time of the month, at most 402 * the first NCPUS items on the free list are 403 * locked, so this is close enough to being empty. 404 */ 405 if (vp == NULLVP) { 406 simple_unlock(&vnode_free_list_slock); 407 tablefull("vnode"); 408 *vpp = 0; 409 return (ENFILE); 410 } 411 if (vp->v_usecount) 412 panic("free vnode isn't"); 413 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 414 if (vp->v_usage > 0) { 415 --vp->v_usage; 416 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 417 goto retry; 418 } 419 freevnodes--; 420 421 /* see comment on why 0xdeadb is set at end of vgone (below) */ 422 vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; 423 simple_unlock(&vnode_free_list_slock); 424 vp->v_lease = NULL; 425 if (vp->v_type != VBAD) 426 vgonel(vp, p); 427 else { 428 simple_unlock(&vp->v_interlock); 429 } 430 431 #ifdef DIAGNOSTIC 432 { 433 int s; 434 435 if (vp->v_data) 436 panic("cleaned vnode isn't"); 437 s = splbio(); 438 if (vp->v_numoutput) 439 panic("Clean vnode has pending I/O's"); 440 splx(s); 441 } 442 #endif 443 vp->v_flag = 0; 444 vp->v_lastr = 0; 445 vp->v_lastw = 0; 446 vp->v_lasta = 0; 447 vp->v_cstart = 0; 448 vp->v_clen = 0; 449 vp->v_socket = 0; 450 vp->v_writecount = 0; /* XXX */ 451 vp->v_usage = 0; 452 } 453 vp->v_type = VNON; 454 cache_purge(vp); 455 vp->v_tag = tag; 456 vp->v_op = vops; 457 insmntque(vp, mp); 458 *vpp = vp; 459 vp->v_usecount = 1; 460 vp->v_data = 0; 461 return (0); 462 } 463 464 /* 465 * Move a vnode from one mount queue to another. 466 */ 467 void 468 insmntque(vp, mp) 469 register struct vnode *vp; 470 register struct mount *mp; 471 { 472 473 simple_lock(&mntvnode_slock); 474 /* 475 * Delete from old mount point vnode list, if on one. 476 */ 477 if (vp->v_mount != NULL) 478 LIST_REMOVE(vp, v_mntvnodes); 479 /* 480 * Insert into list of vnodes for the new mount point, if available. 481 */ 482 if ((vp->v_mount = mp) == NULL) { 483 simple_unlock(&mntvnode_slock); 484 return; 485 } 486 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 487 simple_unlock(&mntvnode_slock); 488 } 489 490 /* 491 * Update outstanding I/O count and do wakeup if requested. 492 */ 493 void 494 vwakeup(bp) 495 register struct buf *bp; 496 { 497 register struct vnode *vp; 498 499 bp->b_flags &= ~B_WRITEINPROG; 500 if ((vp = bp->b_vp)) { 501 vp->v_numoutput--; 502 if (vp->v_numoutput < 0) 503 panic("vwakeup: neg numoutput"); 504 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 505 vp->v_flag &= ~VBWAIT; 506 wakeup((caddr_t) &vp->v_numoutput); 507 } 508 } 509 } 510 511 /* 512 * Flush out and invalidate all buffers associated with a vnode. 513 * Called with the underlying object locked. 514 */ 515 int 516 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 517 register struct vnode *vp; 518 int flags; 519 struct ucred *cred; 520 struct proc *p; 521 int slpflag, slptimeo; 522 { 523 register struct buf *bp; 524 struct buf *nbp, *blist; 525 int s, error; 526 vm_object_t object; 527 528 if (flags & V_SAVE) { 529 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) 530 return (error); 531 if (vp->v_dirtyblkhd.lh_first != NULL) 532 panic("vinvalbuf: dirty bufs"); 533 } 534 535 s = splbio(); 536 for (;;) { 537 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 538 while (blist && blist->b_lblkno < 0) 539 blist = blist->b_vnbufs.le_next; 540 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 541 (flags & V_SAVEMETA)) 542 while (blist && blist->b_lblkno < 0) 543 blist = blist->b_vnbufs.le_next; 544 if (!blist) 545 break; 546 547 for (bp = blist; bp; bp = nbp) { 548 nbp = bp->b_vnbufs.le_next; 549 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) 550 continue; 551 if (bp->b_flags & B_BUSY) { 552 bp->b_flags |= B_WANTED; 553 error = tsleep((caddr_t) bp, 554 slpflag | (PRIBIO + 1), "vinvalbuf", 555 slptimeo); 556 splx(s); 557 if (error) 558 return (error); 559 break; 560 } 561 bremfree(bp); 562 bp->b_flags |= B_BUSY; 563 /* 564 * XXX Since there are no node locks for NFS, I 565 * believe there is a slight chance that a delayed 566 * write will occur while sleeping just above, so 567 * check for it. 568 */ 569 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 570 (void) VOP_BWRITE(bp); 571 break; 572 } 573 bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF); 574 brelse(bp); 575 } 576 } 577 splx(s); 578 579 s = splbio(); 580 while (vp->v_numoutput > 0) { 581 vp->v_flag |= VBWAIT; 582 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 583 } 584 splx(s); 585 586 /* 587 * Destroy the copy in the VM cache, too. 588 */ 589 object = vp->v_object; 590 if (object != NULL) { 591 vm_object_page_remove(object, 0, object->size, 592 (flags & V_SAVE) ? TRUE : FALSE); 593 } 594 if (!(flags & V_SAVEMETA) && 595 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 596 panic("vinvalbuf: flush failed"); 597 return (0); 598 } 599 600 /* 601 * Associate a buffer with a vnode. 602 */ 603 void 604 bgetvp(vp, bp) 605 register struct vnode *vp; 606 register struct buf *bp; 607 { 608 int s; 609 610 if (bp->b_vp) 611 panic("bgetvp: not free"); 612 VHOLD(vp); 613 bp->b_vp = vp; 614 if (vp->v_type == VBLK || vp->v_type == VCHR) 615 bp->b_dev = vp->v_rdev; 616 else 617 bp->b_dev = NODEV; 618 /* 619 * Insert onto list for new vnode. 620 */ 621 s = splbio(); 622 bufinsvn(bp, &vp->v_cleanblkhd); 623 splx(s); 624 } 625 626 /* 627 * Disassociate a buffer from a vnode. 628 */ 629 void 630 brelvp(bp) 631 register struct buf *bp; 632 { 633 struct vnode *vp; 634 int s; 635 636 if (bp->b_vp == (struct vnode *) 0) 637 panic("brelvp: NULL"); 638 /* 639 * Delete from old vnode list, if on one. 640 */ 641 s = splbio(); 642 if (bp->b_vnbufs.le_next != NOLIST) 643 bufremvn(bp); 644 splx(s); 645 646 vp = bp->b_vp; 647 bp->b_vp = (struct vnode *) 0; 648 HOLDRELE(vp); 649 } 650 651 /* 652 * Associate a p-buffer with a vnode. 653 */ 654 void 655 pbgetvp(vp, bp) 656 register struct vnode *vp; 657 register struct buf *bp; 658 { 659 if (bp->b_vp) 660 panic("pbgetvp: not free"); 661 VHOLD(vp); 662 bp->b_vp = vp; 663 if (vp->v_type == VBLK || vp->v_type == VCHR) 664 bp->b_dev = vp->v_rdev; 665 else 666 bp->b_dev = NODEV; 667 } 668 669 /* 670 * Disassociate a p-buffer from a vnode. 671 */ 672 void 673 pbrelvp(bp) 674 register struct buf *bp; 675 { 676 struct vnode *vp; 677 678 if (bp->b_vp == (struct vnode *) 0) 679 panic("brelvp: NULL"); 680 681 vp = bp->b_vp; 682 bp->b_vp = (struct vnode *) 0; 683 HOLDRELE(vp); 684 } 685 686 /* 687 * Reassign a buffer from one vnode to another. 688 * Used to assign file specific control information 689 * (indirect blocks) to the vnode to which they belong. 690 */ 691 void 692 reassignbuf(bp, newvp) 693 register struct buf *bp; 694 register struct vnode *newvp; 695 { 696 int s; 697 698 if (newvp == NULL) { 699 printf("reassignbuf: NULL"); 700 return; 701 } 702 703 s = splbio(); 704 /* 705 * Delete from old vnode list, if on one. 706 */ 707 if (bp->b_vnbufs.le_next != NOLIST) 708 bufremvn(bp); 709 /* 710 * If dirty, put on list of dirty buffers; otherwise insert onto list 711 * of clean buffers. 712 */ 713 if (bp->b_flags & B_DELWRI) { 714 struct buf *tbp; 715 716 tbp = newvp->v_dirtyblkhd.lh_first; 717 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { 718 bufinsvn(bp, &newvp->v_dirtyblkhd); 719 } else { 720 while (tbp->b_vnbufs.le_next && 721 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { 722 tbp = tbp->b_vnbufs.le_next; 723 } 724 LIST_INSERT_AFTER(tbp, bp, b_vnbufs); 725 } 726 } else { 727 bufinsvn(bp, &newvp->v_cleanblkhd); 728 } 729 splx(s); 730 } 731 732 #ifndef DEVFS_ROOT 733 /* 734 * Create a vnode for a block device. 735 * Used for root filesystem, argdev, and swap areas. 736 * Also used for memory file system special devices. 737 */ 738 int 739 bdevvp(dev, vpp) 740 dev_t dev; 741 struct vnode **vpp; 742 { 743 register struct vnode *vp; 744 struct vnode *nvp; 745 int error; 746 747 if (dev == NODEV) 748 return (0); 749 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); 750 if (error) { 751 *vpp = 0; 752 return (error); 753 } 754 vp = nvp; 755 vp->v_type = VBLK; 756 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { 757 vput(vp); 758 vp = nvp; 759 } 760 *vpp = vp; 761 return (0); 762 } 763 #endif /* !DEVFS_ROOT */ 764 765 /* 766 * Check to see if the new vnode represents a special device 767 * for which we already have a vnode (either because of 768 * bdevvp() or because of a different vnode representing 769 * the same block device). If such an alias exists, deallocate 770 * the existing contents and return the aliased vnode. The 771 * caller is responsible for filling it with its new contents. 772 */ 773 struct vnode * 774 checkalias(nvp, nvp_rdev, mp) 775 register struct vnode *nvp; 776 dev_t nvp_rdev; 777 struct mount *mp; 778 { 779 struct proc *p = curproc; /* XXX */ 780 struct vnode *vp; 781 struct vnode **vpp; 782 783 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 784 return (NULLVP); 785 786 vpp = &speclisth[SPECHASH(nvp_rdev)]; 787 loop: 788 simple_lock(&spechash_slock); 789 for (vp = *vpp; vp; vp = vp->v_specnext) { 790 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 791 continue; 792 /* 793 * Alias, but not in use, so flush it out. 794 */ 795 simple_lock(&vp->v_interlock); 796 if (vp->v_usecount == 0) { 797 simple_unlock(&spechash_slock); 798 vgonel(vp, p); 799 goto loop; 800 } 801 simple_unlock(&spechash_slock); 802 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 803 goto loop; 804 } 805 simple_lock(&spechash_slock); 806 break; 807 } 808 809 if (vp == NULL || vp->v_tag != VT_NON) { 810 MALLOC(nvp->v_specinfo, struct specinfo *, 811 sizeof(struct specinfo), M_VNODE, M_WAITOK); 812 nvp->v_rdev = nvp_rdev; 813 nvp->v_hashchain = vpp; 814 nvp->v_specnext = *vpp; 815 nvp->v_specflags = 0; 816 simple_unlock(&spechash_slock); 817 *vpp = nvp; 818 if (vp != NULLVP) { 819 nvp->v_flag |= VALIASED; 820 vp->v_flag |= VALIASED; 821 vput(vp); 822 } 823 return (NULLVP); 824 } 825 simple_unlock(&spechash_slock); 826 VOP_UNLOCK(vp, 0, p); 827 simple_lock(&vp->v_interlock); 828 vclean(vp, 0, p); 829 vp->v_op = nvp->v_op; 830 vp->v_tag = nvp->v_tag; 831 nvp->v_type = VNON; 832 insmntque(vp, mp); 833 return (vp); 834 } 835 836 /* 837 * Grab a particular vnode from the free list, increment its 838 * reference count and lock it. The vnode lock bit is set the 839 * vnode is being eliminated in vgone. The process is awakened 840 * when the transition is completed, and an error returned to 841 * indicate that the vnode is no longer usable (possibly having 842 * been changed to a new file system type). 843 */ 844 int 845 vget(vp, flags, p) 846 register struct vnode *vp; 847 int flags; 848 struct proc *p; 849 { 850 int error; 851 852 /* 853 * If the vnode is in the process of being cleaned out for 854 * another use, we wait for the cleaning to finish and then 855 * return failure. Cleaning is determined by checking that 856 * the VXLOCK flag is set. 857 */ 858 if ((flags & LK_INTERLOCK) == 0) { 859 simple_lock(&vp->v_interlock); 860 } 861 if (vp->v_flag & VXLOCK) { 862 vp->v_flag |= VXWANT; 863 simple_unlock(&vp->v_interlock); 864 tsleep((caddr_t)vp, PINOD, "vget", 0); 865 return (ENOENT); 866 } 867 if (vp->v_usecount == 0) { 868 simple_lock(&vnode_free_list_slock); 869 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 870 simple_unlock(&vnode_free_list_slock); 871 freevnodes--; 872 } 873 vp->v_usecount++; 874 /* 875 * Create the VM object, if needed 876 */ 877 if ((vp->v_type == VREG) && 878 ((vp->v_object == NULL) || 879 (vp->v_object->flags & OBJ_VFS_REF) == 0)) { 880 /* 881 * XXX vfs_object_create probably needs the interlock. 882 */ 883 simple_unlock(&vp->v_interlock); 884 vfs_object_create(vp, curproc, curproc->p_ucred, 0); 885 simple_lock(&vp->v_interlock); 886 } 887 if (flags & LK_TYPE_MASK) { 888 if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) 889 vrele(vp); 890 return (error); 891 } 892 simple_unlock(&vp->v_interlock); 893 return (0); 894 } 895 896 /* 897 * Stubs to use when there is no locking to be done on the underlying object. 898 * A minimal shared lock is necessary to ensure that the underlying object 899 * is not revoked while an operation is in progress. So, an active shared 900 * count is maintained in an auxillary vnode lock structure. 901 */ 902 int 903 vop_nolock(ap) 904 struct vop_lock_args /* { 905 struct vnode *a_vp; 906 int a_flags; 907 struct proc *a_p; 908 } */ *ap; 909 { 910 #ifdef notyet 911 /* 912 * This code cannot be used until all the non-locking filesystems 913 * (notably NFS) are converted to properly lock and release nodes. 914 * Also, certain vnode operations change the locking state within 915 * the operation (create, mknod, remove, link, rename, mkdir, rmdir, 916 * and symlink). Ideally these operations should not change the 917 * lock state, but should be changed to let the caller of the 918 * function unlock them. Otherwise all intermediate vnode layers 919 * (such as union, umapfs, etc) must catch these functions to do 920 * the necessary locking at their layer. Note that the inactive 921 * and lookup operations also change their lock state, but this 922 * cannot be avoided, so these two operations will always need 923 * to be handled in intermediate layers. 924 */ 925 struct vnode *vp = ap->a_vp; 926 int vnflags, flags = ap->a_flags; 927 928 if (vp->v_vnlock == NULL) { 929 if ((flags & LK_TYPE_MASK) == LK_DRAIN) 930 return (0); 931 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock), 932 M_VNODE, M_WAITOK); 933 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 934 } 935 switch (flags & LK_TYPE_MASK) { 936 case LK_DRAIN: 937 vnflags = LK_DRAIN; 938 break; 939 case LK_EXCLUSIVE: 940 case LK_SHARED: 941 vnflags = LK_SHARED; 942 break; 943 case LK_UPGRADE: 944 case LK_EXCLUPGRADE: 945 case LK_DOWNGRADE: 946 return (0); 947 case LK_RELEASE: 948 default: 949 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); 950 } 951 if (flags & LK_INTERLOCK) 952 vnflags |= LK_INTERLOCK; 953 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); 954 #else /* for now */ 955 /* 956 * Since we are not using the lock manager, we must clear 957 * the interlock here. 958 */ 959 if (ap->a_flags & LK_INTERLOCK) { 960 simple_unlock(&ap->a_vp->v_interlock); 961 } 962 return (0); 963 #endif 964 } 965 966 /* 967 * Decrement the active use count. 968 */ 969 int 970 vop_nounlock(ap) 971 struct vop_unlock_args /* { 972 struct vnode *a_vp; 973 int a_flags; 974 struct proc *a_p; 975 } */ *ap; 976 { 977 struct vnode *vp = ap->a_vp; 978 979 if (vp->v_vnlock == NULL) 980 return (0); 981 return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p)); 982 } 983 984 /* 985 * Return whether or not the node is in use. 986 */ 987 int 988 vop_noislocked(ap) 989 struct vop_islocked_args /* { 990 struct vnode *a_vp; 991 } */ *ap; 992 { 993 struct vnode *vp = ap->a_vp; 994 995 if (vp->v_vnlock == NULL) 996 return (0); 997 return (lockstatus(vp->v_vnlock)); 998 } 999 1000 /* #ifdef DIAGNOSTIC */ 1001 /* 1002 * Vnode reference, just increment the count 1003 */ 1004 void 1005 vref(vp) 1006 struct vnode *vp; 1007 { 1008 simple_lock(&vp->v_interlock); 1009 if (vp->v_usecount <= 0) 1010 panic("vref used where vget required"); 1011 1012 vp->v_usecount++; 1013 1014 if ((vp->v_type == VREG) && 1015 ((vp->v_object == NULL) || 1016 ((vp->v_object->flags & OBJ_VFS_REF) == 0)) ) { 1017 /* 1018 * We need to lock to VP during the time that 1019 * the object is created. This is necessary to 1020 * keep the system from re-entrantly doing it 1021 * multiple times. 1022 * XXX vfs_object_create probably needs the interlock? 1023 */ 1024 simple_unlock(&vp->v_interlock); 1025 vfs_object_create(vp, curproc, curproc->p_ucred, 0); 1026 simple_lock(&vp->v_interlock); 1027 } 1028 } 1029 1030 /* 1031 * vput(), just unlock and vrele() 1032 */ 1033 void 1034 vput(vp) 1035 struct vnode *vp; 1036 { 1037 VOP_UNLOCK(vp, 0, curproc); 1038 vrele(vp); 1039 } 1040 1041 /* 1042 * Vnode release. 1043 * If count drops to zero, call inactive routine and return to freelist. 1044 */ 1045 void 1046 vrele(vp) 1047 struct vnode *vp; 1048 { 1049 struct proc *p = curproc; /* XXX */ 1050 1051 #ifdef DIAGNOSTIC 1052 if (vp == NULL) 1053 panic("vrele: null vp"); 1054 #endif 1055 simple_lock(&vp->v_interlock); 1056 vp->v_usecount--; 1057 1058 if ((vp->v_usecount == 1) && 1059 vp->v_object && 1060 (vp->v_object->flags & OBJ_VFS_REF)) { 1061 vp->v_object->flags &= ~OBJ_VFS_REF; 1062 simple_unlock(&vp->v_interlock); 1063 vm_object_deallocate(vp->v_object); 1064 return; 1065 } 1066 1067 if (vp->v_usecount > 0) { 1068 simple_unlock(&vp->v_interlock); 1069 return; 1070 } 1071 1072 if (vp->v_usecount < 0) { 1073 #ifdef DIAGNOSTIC 1074 vprint("vrele: negative ref count", vp); 1075 #endif 1076 panic("vrele: negative ref cnt"); 1077 } 1078 simple_lock(&vnode_free_list_slock); 1079 if (vp->v_flag & VAGE) { 1080 if(vp->v_tag != VT_TFS) 1081 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1082 vp->v_flag &= ~VAGE; 1083 vp->v_usage = 0; 1084 } else { 1085 if(vp->v_tag != VT_TFS) 1086 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1087 } 1088 simple_unlock(&vnode_free_list_slock); 1089 1090 freevnodes++; 1091 1092 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) 1093 VOP_INACTIVE(vp, p); 1094 } 1095 1096 #ifdef DIAGNOSTIC 1097 /* 1098 * Page or buffer structure gets a reference. 1099 */ 1100 void 1101 vhold(vp) 1102 register struct vnode *vp; 1103 { 1104 1105 simple_lock(&vp->v_interlock); 1106 vp->v_holdcnt++; 1107 simple_unlock(&vp->v_interlock); 1108 } 1109 1110 /* 1111 * Page or buffer structure frees a reference. 1112 */ 1113 void 1114 holdrele(vp) 1115 register struct vnode *vp; 1116 { 1117 1118 simple_lock(&vp->v_interlock); 1119 if (vp->v_holdcnt <= 0) 1120 panic("holdrele: holdcnt"); 1121 vp->v_holdcnt--; 1122 simple_unlock(&vp->v_interlock); 1123 } 1124 #endif /* DIAGNOSTIC */ 1125 1126 /* 1127 * Remove any vnodes in the vnode table belonging to mount point mp. 1128 * 1129 * If MNT_NOFORCE is specified, there should not be any active ones, 1130 * return error if any are found (nb: this is a user error, not a 1131 * system error). If MNT_FORCE is specified, detach any active vnodes 1132 * that are found. 1133 */ 1134 #ifdef DIAGNOSTIC 1135 static int busyprt = 0; /* print out busy vnodes */ 1136 SYSCTL_INT(_debug, 1, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1137 #endif 1138 1139 int 1140 vflush(mp, skipvp, flags) 1141 struct mount *mp; 1142 struct vnode *skipvp; 1143 int flags; 1144 { 1145 struct proc *p = curproc; /* XXX */ 1146 struct vnode *vp, *nvp; 1147 int busy = 0; 1148 1149 simple_lock(&mntvnode_slock); 1150 loop: 1151 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1152 /* 1153 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1154 * Start over if it has (it won't be on the list anymore). 1155 */ 1156 if (vp->v_mount != mp) 1157 goto loop; 1158 nvp = vp->v_mntvnodes.le_next; 1159 /* 1160 * Skip over a selected vnode. 1161 */ 1162 if (vp == skipvp) 1163 continue; 1164 1165 simple_lock(&vp->v_interlock); 1166 /* 1167 * Skip over a vnodes marked VSYSTEM. 1168 */ 1169 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1170 simple_unlock(&vp->v_interlock); 1171 continue; 1172 } 1173 /* 1174 * If WRITECLOSE is set, only flush out regular file vnodes 1175 * open for writing. 1176 */ 1177 if ((flags & WRITECLOSE) && 1178 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1179 simple_unlock(&vp->v_interlock); 1180 continue; 1181 } 1182 1183 if (vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) { 1184 simple_unlock(&vp->v_interlock); 1185 simple_unlock(&mntvnode_slock); 1186 vm_object_reference(vp->v_object); 1187 pager_cache(vp->v_object, FALSE); 1188 vp->v_object->flags &= ~OBJ_VFS_REF; 1189 vm_object_deallocate(vp->v_object); 1190 simple_lock(&mntvnode_slock); 1191 simple_lock(&vp->v_interlock); 1192 } 1193 1194 /* 1195 * With v_usecount == 0, all we need to do is clear out the 1196 * vnode data structures and we are done. 1197 */ 1198 if (vp->v_usecount == 0) { 1199 simple_unlock(&mntvnode_slock); 1200 vgonel(vp, p); 1201 simple_lock(&mntvnode_slock); 1202 continue; 1203 } 1204 1205 /* 1206 * If FORCECLOSE is set, forcibly close the vnode. For block 1207 * or character devices, revert to an anonymous device. For 1208 * all other files, just kill them. 1209 */ 1210 if (flags & FORCECLOSE) { 1211 simple_unlock(&mntvnode_slock); 1212 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1213 vgonel(vp, p); 1214 } else { 1215 vclean(vp, 0, p); 1216 vp->v_op = spec_vnodeop_p; 1217 insmntque(vp, (struct mount *) 0); 1218 } 1219 simple_lock(&mntvnode_slock); 1220 continue; 1221 } 1222 #ifdef DIAGNOSTIC 1223 if (busyprt) 1224 vprint("vflush: busy vnode", vp); 1225 #endif 1226 simple_unlock(&vp->v_interlock); 1227 busy++; 1228 } 1229 simple_unlock(&mntvnode_slock); 1230 if (busy) 1231 return (EBUSY); 1232 return (0); 1233 } 1234 1235 /* 1236 * Disassociate the underlying file system from a vnode. 1237 */ 1238 static void 1239 vclean(struct vnode *vp, int flags, struct proc *p) 1240 { 1241 int active; 1242 1243 /* 1244 * Check to see if the vnode is in use. If so we have to reference it 1245 * before we clean it out so that its count cannot fall to zero and 1246 * generate a race against ourselves to recycle it. 1247 */ 1248 if ((active = vp->v_usecount)) 1249 vp->v_usecount++; 1250 /* 1251 * Prevent the vnode from being recycled or brought into use while we 1252 * clean it out. 1253 */ 1254 if (vp->v_flag & VXLOCK) 1255 panic("vclean: deadlock"); 1256 vp->v_flag |= VXLOCK; 1257 /* 1258 * Even if the count is zero, the VOP_INACTIVE routine may still 1259 * have the object locked while it cleans it out. The VOP_LOCK 1260 * ensures that the VOP_INACTIVE routine is done with its work. 1261 * For active vnodes, it ensures that no other activity can 1262 * occur while the underlying object is being cleaned out. 1263 */ 1264 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1265 /* 1266 * Clean out any buffers associated with the vnode. 1267 */ 1268 if (flags & DOCLOSE) 1269 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1270 /* 1271 * If purging an active vnode, it must be closed and 1272 * deactivated before being reclaimed. Note that the 1273 * VOP_INACTIVE will unlock the vnode. 1274 */ 1275 if (active) { 1276 if (flags & DOCLOSE) 1277 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 1278 VOP_INACTIVE(vp, p); 1279 } else { 1280 /* 1281 * Any other processes trying to obtain this lock must first 1282 * wait for VXLOCK to clear, then call the new lock operation. 1283 */ 1284 VOP_UNLOCK(vp, 0, p); 1285 } 1286 /* 1287 * Reclaim the vnode. 1288 */ 1289 if (VOP_RECLAIM(vp, p)) 1290 panic("vclean: cannot reclaim"); 1291 if (active) 1292 vrele(vp); 1293 cache_purge(vp); 1294 if (vp->v_vnlock) { 1295 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1296 vprint("vclean: lock not drained", vp); 1297 FREE(vp->v_vnlock, M_VNODE); 1298 vp->v_vnlock = NULL; 1299 } 1300 1301 /* 1302 * Done with purge, notify sleepers of the grim news. 1303 */ 1304 vp->v_op = dead_vnodeop_p; 1305 vp->v_tag = VT_NON; 1306 vp->v_flag &= ~VXLOCK; 1307 if (vp->v_flag & VXWANT) { 1308 vp->v_flag &= ~VXWANT; 1309 wakeup((caddr_t) vp); 1310 } 1311 } 1312 1313 /* 1314 * Eliminate all activity associated with the requested vnode 1315 * and with all vnodes aliased to the requested vnode. 1316 */ 1317 int 1318 vop_revoke(ap) 1319 struct vop_revoke_args /* { 1320 struct vnode *a_vp; 1321 int a_flags; 1322 } */ *ap; 1323 { 1324 struct vnode *vp, *vq; 1325 struct proc *p = curproc; /* XXX */ 1326 1327 #ifdef DIAGNOSTIC 1328 if ((ap->a_flags & REVOKEALL) == 0) 1329 panic("vop_revoke"); 1330 #endif 1331 1332 vp = ap->a_vp; 1333 simple_lock(&vp->v_interlock); 1334 1335 if (vp->v_flag & VALIASED) { 1336 /* 1337 * If a vgone (or vclean) is already in progress, 1338 * wait until it is done and return. 1339 */ 1340 if (vp->v_flag & VXLOCK) { 1341 vp->v_flag |= VXWANT; 1342 simple_unlock(&vp->v_interlock); 1343 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1344 return (0); 1345 } 1346 /* 1347 * Ensure that vp will not be vgone'd while we 1348 * are eliminating its aliases. 1349 */ 1350 vp->v_flag |= VXLOCK; 1351 simple_unlock(&vp->v_interlock); 1352 while (vp->v_flag & VALIASED) { 1353 simple_lock(&spechash_slock); 1354 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1355 if (vq->v_rdev != vp->v_rdev || 1356 vq->v_type != vp->v_type || vp == vq) 1357 continue; 1358 simple_unlock(&spechash_slock); 1359 vgone(vq); 1360 break; 1361 } 1362 if (vq == NULLVP) { 1363 simple_unlock(&spechash_slock); 1364 } 1365 } 1366 /* 1367 * Remove the lock so that vgone below will 1368 * really eliminate the vnode after which time 1369 * vgone will awaken any sleepers. 1370 */ 1371 simple_lock(&vp->v_interlock); 1372 vp->v_flag &= ~VXLOCK; 1373 } 1374 vgonel(vp, p); 1375 return (0); 1376 } 1377 1378 /* 1379 * Recycle an unused vnode to the front of the free list. 1380 * Release the passed interlock if the vnode will be recycled. 1381 */ 1382 int 1383 vrecycle(vp, inter_lkp, p) 1384 struct vnode *vp; 1385 struct simplelock *inter_lkp; 1386 struct proc *p; 1387 { 1388 1389 simple_lock(&vp->v_interlock); 1390 if (vp->v_usecount == 0) { 1391 if (inter_lkp) { 1392 simple_unlock(inter_lkp); 1393 } 1394 vgonel(vp, p); 1395 return (1); 1396 } 1397 simple_unlock(&vp->v_interlock); 1398 return (0); 1399 } 1400 1401 /* 1402 * Eliminate all activity associated with a vnode 1403 * in preparation for reuse. 1404 */ 1405 void 1406 vgone(vp) 1407 register struct vnode *vp; 1408 { 1409 struct proc *p = curproc; /* XXX */ 1410 1411 simple_lock(&vp->v_interlock); 1412 vgonel(vp, p); 1413 } 1414 1415 /* 1416 * vgone, with the vp interlock held. 1417 */ 1418 void 1419 vgonel(vp, p) 1420 struct vnode *vp; 1421 struct proc *p; 1422 { 1423 struct vnode *vq; 1424 struct vnode *vx; 1425 1426 /* 1427 * If a vgone (or vclean) is already in progress, 1428 * wait until it is done and return. 1429 */ 1430 if (vp->v_flag & VXLOCK) { 1431 vp->v_flag |= VXWANT; 1432 simple_unlock(&vp->v_interlock); 1433 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1434 return; 1435 } 1436 1437 if (vp->v_object) { 1438 vp->v_object->flags |= OBJ_VNODE_GONE; 1439 } 1440 1441 /* 1442 * Clean out the filesystem specific data. 1443 */ 1444 vclean(vp, DOCLOSE, p); 1445 /* 1446 * Delete from old mount point vnode list, if on one. 1447 */ 1448 if (vp->v_mount != NULL) 1449 insmntque(vp, (struct mount *)0); 1450 /* 1451 * If special device, remove it from special device alias list 1452 * if it is on one. 1453 */ 1454 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1455 simple_lock(&spechash_slock); 1456 if (*vp->v_hashchain == vp) { 1457 *vp->v_hashchain = vp->v_specnext; 1458 } else { 1459 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1460 if (vq->v_specnext != vp) 1461 continue; 1462 vq->v_specnext = vp->v_specnext; 1463 break; 1464 } 1465 if (vq == NULL) 1466 panic("missing bdev"); 1467 } 1468 if (vp->v_flag & VALIASED) { 1469 vx = NULL; 1470 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1471 if (vq->v_rdev != vp->v_rdev || 1472 vq->v_type != vp->v_type) 1473 continue; 1474 if (vx) 1475 break; 1476 vx = vq; 1477 } 1478 if (vx == NULL) 1479 panic("missing alias"); 1480 if (vq == NULL) 1481 vx->v_flag &= ~VALIASED; 1482 vp->v_flag &= ~VALIASED; 1483 } 1484 simple_unlock(&spechash_slock); 1485 FREE(vp->v_specinfo, M_VNODE); 1486 vp->v_specinfo = NULL; 1487 } 1488 1489 /* 1490 * If it is on the freelist and not already at the head, 1491 * move it to the head of the list. The test of the back 1492 * pointer and the reference count of zero is because 1493 * it will be removed from the free list by getnewvnode, 1494 * but will not have its reference count incremented until 1495 * after calling vgone. If the reference count were 1496 * incremented first, vgone would (incorrectly) try to 1497 * close the previous instance of the underlying object. 1498 * So, the back pointer is explicitly set to `0xdeadb' in 1499 * getnewvnode after removing it from the freelist to ensure 1500 * that we do not try to move it here. 1501 */ 1502 if (vp->v_usecount == 0) { 1503 simple_lock(&vnode_free_list_slock); 1504 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1505 vnode_free_list.tqh_first != vp) { 1506 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1507 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1508 } 1509 simple_unlock(&vnode_free_list_slock); 1510 } 1511 1512 vp->v_type = VBAD; 1513 } 1514 1515 /* 1516 * Lookup a vnode by device number. 1517 */ 1518 int 1519 vfinddev(dev, type, vpp) 1520 dev_t dev; 1521 enum vtype type; 1522 struct vnode **vpp; 1523 { 1524 register struct vnode *vp; 1525 1526 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1527 if (dev != vp->v_rdev || type != vp->v_type) 1528 continue; 1529 *vpp = vp; 1530 return (1); 1531 } 1532 return (0); 1533 } 1534 1535 /* 1536 * Calculate the total number of references to a special device. 1537 */ 1538 int 1539 vcount(vp) 1540 register struct vnode *vp; 1541 { 1542 struct vnode *vq, *vnext; 1543 int count; 1544 1545 loop: 1546 if ((vp->v_flag & VALIASED) == 0) 1547 return (vp->v_usecount); 1548 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1549 vnext = vq->v_specnext; 1550 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1551 continue; 1552 /* 1553 * Alias, but not in use, so flush it out. 1554 */ 1555 if (vq->v_usecount == 0 && vq != vp) { 1556 vgone(vq); 1557 goto loop; 1558 } 1559 count += vq->v_usecount; 1560 } 1561 return (count); 1562 } 1563 1564 /* 1565 * Print out a description of a vnode. 1566 */ 1567 static char *typename[] = 1568 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1569 1570 void 1571 vprint(label, vp) 1572 char *label; 1573 register struct vnode *vp; 1574 { 1575 char buf[64]; 1576 1577 if (label != NULL) 1578 printf("%s: ", label); 1579 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1580 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1581 vp->v_holdcnt); 1582 buf[0] = '\0'; 1583 if (vp->v_flag & VROOT) 1584 strcat(buf, "|VROOT"); 1585 if (vp->v_flag & VTEXT) 1586 strcat(buf, "|VTEXT"); 1587 if (vp->v_flag & VSYSTEM) 1588 strcat(buf, "|VSYSTEM"); 1589 if (vp->v_flag & VXLOCK) 1590 strcat(buf, "|VXLOCK"); 1591 if (vp->v_flag & VXWANT) 1592 strcat(buf, "|VXWANT"); 1593 if (vp->v_flag & VBWAIT) 1594 strcat(buf, "|VBWAIT"); 1595 if (vp->v_flag & VALIASED) 1596 strcat(buf, "|VALIASED"); 1597 if (buf[0] != '\0') 1598 printf(" flags (%s)", &buf[1]); 1599 if (vp->v_data == NULL) { 1600 printf("\n"); 1601 } else { 1602 printf("\n\t"); 1603 VOP_PRINT(vp); 1604 } 1605 } 1606 1607 #ifdef DDB 1608 /* 1609 * List all of the locked vnodes in the system. 1610 * Called when debugging the kernel. 1611 */ 1612 void 1613 printlockedvnodes(void) 1614 { 1615 register struct mount *mp; 1616 register struct vnode *vp; 1617 1618 printf("Locked vnodes\n"); 1619 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 1620 mp = mp->mnt_list.cqe_next) { 1621 for (vp = mp->mnt_vnodelist.lh_first; 1622 vp != NULL; 1623 vp = vp->v_mntvnodes.le_next) 1624 if (VOP_ISLOCKED(vp)) 1625 vprint((char *) 0, vp); 1626 } 1627 } 1628 #endif 1629 1630 int kinfo_vdebug = 1; 1631 int kinfo_vgetfailed; 1632 1633 #define KINFO_VNODESLOP 10 1634 /* 1635 * Dump vnode list (via sysctl). 1636 * Copyout address of vnode followed by vnode. 1637 */ 1638 /* ARGSUSED */ 1639 static int 1640 sysctl_vnode SYSCTL_HANDLER_ARGS 1641 { 1642 struct proc *p = curproc; /* XXX */ 1643 register struct mount *mp, *nmp; 1644 struct vnode *vp; 1645 int error; 1646 1647 #define VPTRSZ sizeof (struct vnode *) 1648 #define VNODESZ sizeof (struct vnode) 1649 1650 req->lock = 0; 1651 if (!req->oldptr) /* Make an estimate */ 1652 return (SYSCTL_OUT(req, 0, 1653 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 1654 1655 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1656 nmp = mp->mnt_list.cqe_next; 1657 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) 1658 continue; 1659 again: 1660 for (vp = mp->mnt_vnodelist.lh_first; 1661 vp != NULL; 1662 vp = vp->v_mntvnodes.le_next) { 1663 /* 1664 * Check that the vp is still associated with this 1665 * filesystem. RACE: could have been recycled onto 1666 * the same filesystem. 1667 */ 1668 if (vp->v_mount != mp) { 1669 if (kinfo_vdebug) 1670 printf("kinfo: vp changed\n"); 1671 goto again; 1672 } 1673 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 1674 (error = SYSCTL_OUT(req, vp, VNODESZ))) { 1675 vfs_unbusy(mp, p); 1676 return (error); 1677 } 1678 } 1679 vfs_unbusy(mp, p); 1680 } 1681 1682 return (0); 1683 } 1684 1685 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 1686 0, 0, sysctl_vnode, "S,vnode", ""); 1687 1688 /* 1689 * Check to see if a filesystem is mounted on a block device. 1690 */ 1691 int 1692 vfs_mountedon(vp) 1693 struct vnode *vp; 1694 { 1695 struct vnode *vq; 1696 int error = 0; 1697 1698 if (vp->v_specflags & SI_MOUNTEDON) 1699 return (EBUSY); 1700 if (vp->v_flag & VALIASED) { 1701 simple_lock(&spechash_slock); 1702 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1703 if (vq->v_rdev != vp->v_rdev || 1704 vq->v_type != vp->v_type) 1705 continue; 1706 if (vq->v_specflags & SI_MOUNTEDON) { 1707 error = EBUSY; 1708 break; 1709 } 1710 } 1711 simple_unlock(&spechash_slock); 1712 } 1713 return (error); 1714 } 1715 1716 /* 1717 * Unmount all filesystems. The list is traversed in reverse order 1718 * of mounting to avoid dependencies. Should only be called by halt(). 1719 */ 1720 void 1721 vfs_unmountall() 1722 { 1723 struct mount *mp, *nmp, *rootfs = NULL; 1724 int error; 1725 1726 /* unmount all but rootfs */ 1727 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1728 nmp = mp->mnt_list.cqe_prev; 1729 1730 if (mp->mnt_flag & MNT_ROOTFS) { 1731 rootfs = mp; 1732 continue; 1733 } 1734 error = dounmount(mp, MNT_FORCE, initproc); 1735 if (error) { 1736 printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); 1737 if (error == EBUSY) 1738 printf("BUSY)\n"); 1739 else 1740 printf("%d)\n", error); 1741 } 1742 } 1743 1744 /* and finally... */ 1745 if (rootfs) { 1746 vfs_unmountroot(rootfs); 1747 } else { 1748 printf("no root filesystem\n"); 1749 } 1750 } 1751 1752 /* 1753 * Build hash lists of net addresses and hang them off the mount point. 1754 * Called by ufs_mount() to set up the lists of export addresses. 1755 */ 1756 static int 1757 vfs_hang_addrlist(struct mount *mp, struct netexport *nep, 1758 struct export_args *argp) 1759 { 1760 register struct netcred *np; 1761 register struct radix_node_head *rnh; 1762 register int i; 1763 struct radix_node *rn; 1764 struct sockaddr *saddr, *smask = 0; 1765 struct domain *dom; 1766 int error; 1767 1768 if (argp->ex_addrlen == 0) { 1769 if (mp->mnt_flag & MNT_DEFEXPORTED) 1770 return (EPERM); 1771 np = &nep->ne_defexported; 1772 np->netc_exflags = argp->ex_flags; 1773 np->netc_anon = argp->ex_anon; 1774 np->netc_anon.cr_ref = 1; 1775 mp->mnt_flag |= MNT_DEFEXPORTED; 1776 return (0); 1777 } 1778 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1779 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 1780 bzero((caddr_t) np, i); 1781 saddr = (struct sockaddr *) (np + 1); 1782 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 1783 goto out; 1784 if (saddr->sa_len > argp->ex_addrlen) 1785 saddr->sa_len = argp->ex_addrlen; 1786 if (argp->ex_masklen) { 1787 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 1788 error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen); 1789 if (error) 1790 goto out; 1791 if (smask->sa_len > argp->ex_masklen) 1792 smask->sa_len = argp->ex_masklen; 1793 } 1794 i = saddr->sa_family; 1795 if ((rnh = nep->ne_rtable[i]) == 0) { 1796 /* 1797 * Seems silly to initialize every AF when most are not used, 1798 * do so on demand here 1799 */ 1800 for (dom = domains; dom; dom = dom->dom_next) 1801 if (dom->dom_family == i && dom->dom_rtattach) { 1802 dom->dom_rtattach((void **) &nep->ne_rtable[i], 1803 dom->dom_rtoffset); 1804 break; 1805 } 1806 if ((rnh = nep->ne_rtable[i]) == 0) { 1807 error = ENOBUFS; 1808 goto out; 1809 } 1810 } 1811 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 1812 np->netc_rnodes); 1813 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 1814 error = EPERM; 1815 goto out; 1816 } 1817 np->netc_exflags = argp->ex_flags; 1818 np->netc_anon = argp->ex_anon; 1819 np->netc_anon.cr_ref = 1; 1820 return (0); 1821 out: 1822 free(np, M_NETADDR); 1823 return (error); 1824 } 1825 1826 /* ARGSUSED */ 1827 static int 1828 vfs_free_netcred(struct radix_node *rn, void *w) 1829 { 1830 register struct radix_node_head *rnh = (struct radix_node_head *) w; 1831 1832 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 1833 free((caddr_t) rn, M_NETADDR); 1834 return (0); 1835 } 1836 1837 /* 1838 * Free the net address hash lists that are hanging off the mount points. 1839 */ 1840 static void 1841 vfs_free_addrlist(struct netexport *nep) 1842 { 1843 register int i; 1844 register struct radix_node_head *rnh; 1845 1846 for (i = 0; i <= AF_MAX; i++) 1847 if ((rnh = nep->ne_rtable[i])) { 1848 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 1849 (caddr_t) rnh); 1850 free((caddr_t) rnh, M_RTABLE); 1851 nep->ne_rtable[i] = 0; 1852 } 1853 } 1854 1855 int 1856 vfs_export(mp, nep, argp) 1857 struct mount *mp; 1858 struct netexport *nep; 1859 struct export_args *argp; 1860 { 1861 int error; 1862 1863 if (argp->ex_flags & MNT_DELEXPORT) { 1864 vfs_free_addrlist(nep); 1865 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1866 } 1867 if (argp->ex_flags & MNT_EXPORTED) { 1868 if ((error = vfs_hang_addrlist(mp, nep, argp))) 1869 return (error); 1870 mp->mnt_flag |= MNT_EXPORTED; 1871 } 1872 return (0); 1873 } 1874 1875 struct netcred * 1876 vfs_export_lookup(mp, nep, nam) 1877 register struct mount *mp; 1878 struct netexport *nep; 1879 struct mbuf *nam; 1880 { 1881 register struct netcred *np; 1882 register struct radix_node_head *rnh; 1883 struct sockaddr *saddr; 1884 1885 np = NULL; 1886 if (mp->mnt_flag & MNT_EXPORTED) { 1887 /* 1888 * Lookup in the export list first. 1889 */ 1890 if (nam != NULL) { 1891 saddr = mtod(nam, struct sockaddr *); 1892 rnh = nep->ne_rtable[saddr->sa_family]; 1893 if (rnh != NULL) { 1894 np = (struct netcred *) 1895 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1896 rnh); 1897 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1898 np = NULL; 1899 } 1900 } 1901 /* 1902 * If no address match, use the default if it exists. 1903 */ 1904 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1905 np = &nep->ne_defexported; 1906 } 1907 return (np); 1908 } 1909 1910 /* 1911 * perform msync on all vnodes under a mount point 1912 * the mount point must be locked. 1913 */ 1914 void 1915 vfs_msync(struct mount *mp, int flags) { 1916 struct vnode *vp, *nvp; 1917 loop: 1918 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 1919 1920 if (vp->v_mount != mp) 1921 goto loop; 1922 nvp = vp->v_mntvnodes.le_next; 1923 if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT)) 1924 continue; 1925 if (vp->v_object && 1926 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 1927 vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE); 1928 } 1929 } 1930 } 1931 1932 /* 1933 * Create the VM object needed for VMIO and mmap support. This 1934 * is done for all VREG files in the system. Some filesystems might 1935 * afford the additional metadata buffering capability of the 1936 * VMIO code by making the device node be VMIO mode also. 1937 */ 1938 int 1939 vfs_object_create(vp, p, cred, waslocked) 1940 struct vnode *vp; 1941 struct proc *p; 1942 struct ucred *cred; 1943 int waslocked; 1944 { 1945 struct vattr vat; 1946 vm_object_t object; 1947 int error = 0; 1948 1949 retry: 1950 if ((object = vp->v_object) == NULL) { 1951 if (vp->v_type == VREG) { 1952 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 1953 goto retn; 1954 (void) vnode_pager_alloc(vp, 1955 OFF_TO_IDX(round_page(vat.va_size)), 0, 0); 1956 } else { 1957 /* 1958 * This simply allocates the biggest object possible 1959 * for a VBLK vnode. This should be fixed, but doesn't 1960 * cause any problems (yet). 1961 */ 1962 (void) vnode_pager_alloc(vp, INT_MAX, 0, 0); 1963 } 1964 vp->v_object->flags |= OBJ_VFS_REF; 1965 } else { 1966 if (object->flags & OBJ_DEAD) { 1967 if (waslocked) 1968 VOP_UNLOCK(vp, 0, p); 1969 tsleep(object, PVM, "vodead", 0); 1970 if (waslocked) 1971 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 1972 goto retry; 1973 } 1974 if ((object->flags & OBJ_VFS_REF) == 0) { 1975 object->flags |= OBJ_VFS_REF; 1976 vm_object_reference(object); 1977 } 1978 } 1979 if (vp->v_object) 1980 vp->v_flag |= VVMIO; 1981 1982 retn: 1983 return error; 1984 } 1985