1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 39 * $Id: vfs_subr.c,v 1.6 1994/08/22 17:05:00 davidg Exp $ 40 */ 41 42 /* 43 * External virtual filesystem routines 44 */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/proc.h> 49 #include <sys/mount.h> 50 #include <sys/time.h> 51 #include <sys/vnode.h> 52 #include <sys/stat.h> 53 #include <sys/namei.h> 54 #include <sys/ucred.h> 55 #include <sys/buf.h> 56 #include <sys/errno.h> 57 #include <sys/malloc.h> 58 #include <sys/domain.h> 59 #include <sys/mbuf.h> 60 61 #include <vm/vm.h> 62 #include <sys/sysctl.h> 63 64 #include <miscfs/specfs/specdev.h> 65 66 void insmntque __P((struct vnode *, struct mount *)); 67 68 enum vtype iftovt_tab[16] = { 69 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 70 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 71 }; 72 int vttoif_tab[9] = { 73 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 74 S_IFSOCK, S_IFIFO, S_IFMT, 75 }; 76 77 /* 78 * Insq/Remq for the vnode usage lists. 79 */ 80 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 81 #define bufremvn(bp) { \ 82 LIST_REMOVE(bp, b_vnbufs); \ 83 (bp)->b_vnbufs.le_next = NOLIST; \ 84 } 85 86 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 87 struct mntlist mountlist; /* mounted filesystem list */ 88 89 /* 90 * Initialize the vnode management data structures. 91 */ 92 void 93 vntblinit() 94 { 95 96 TAILQ_INIT(&vnode_free_list); 97 TAILQ_INIT(&mountlist); 98 } 99 100 /* 101 * Lock a filesystem. 102 * Used to prevent access to it while mounting and unmounting. 103 */ 104 int 105 vfs_lock(mp) 106 register struct mount *mp; 107 { 108 109 while(mp->mnt_flag & MNT_MLOCK) { 110 mp->mnt_flag |= MNT_MWAIT; 111 sleep((caddr_t)mp, PVFS); 112 } 113 mp->mnt_flag |= MNT_MLOCK; 114 return (0); 115 } 116 117 /* 118 * Unlock a locked filesystem. 119 * Panic if filesystem is not locked. 120 */ 121 void 122 vfs_unlock(mp) 123 register struct mount *mp; 124 { 125 126 if ((mp->mnt_flag & MNT_MLOCK) == 0) 127 panic("vfs_unlock: not locked"); 128 mp->mnt_flag &= ~MNT_MLOCK; 129 if (mp->mnt_flag & MNT_MWAIT) { 130 mp->mnt_flag &= ~MNT_MWAIT; 131 wakeup((caddr_t)mp); 132 } 133 } 134 135 /* 136 * Mark a mount point as busy. 137 * Used to synchronize access and to delay unmounting. 138 */ 139 int 140 vfs_busy(mp) 141 register struct mount *mp; 142 { 143 144 while(mp->mnt_flag & MNT_MPBUSY) { 145 mp->mnt_flag |= MNT_MPWANT; 146 sleep((caddr_t)&mp->mnt_flag, PVFS); 147 } 148 if (mp->mnt_flag & MNT_UNMOUNT) 149 return (1); 150 mp->mnt_flag |= MNT_MPBUSY; 151 return (0); 152 } 153 154 /* 155 * Free a busy filesystem. 156 * Panic if filesystem is not busy. 157 */ 158 void 159 vfs_unbusy(mp) 160 register struct mount *mp; 161 { 162 163 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 164 panic("vfs_unbusy: not busy"); 165 mp->mnt_flag &= ~MNT_MPBUSY; 166 if (mp->mnt_flag & MNT_MPWANT) { 167 mp->mnt_flag &= ~MNT_MPWANT; 168 wakeup((caddr_t)&mp->mnt_flag); 169 } 170 } 171 172 void 173 vfs_unmountroot(rootfs) 174 struct mount *rootfs; 175 { 176 struct mount *mp = rootfs; 177 int error; 178 179 if (vfs_busy(mp)) { 180 printf("failed to unmount root\n"); 181 return; 182 } 183 184 mp->mnt_flag |= MNT_UNMOUNT; 185 if (error = vfs_lock(mp)) { 186 printf("lock of root filesystem failed (%d)\n", error); 187 return; 188 } 189 190 vnode_pager_umount(mp); /* release cached vnodes */ 191 cache_purgevfs(mp); /* remove cache entries for this file sys */ 192 193 if (error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc)) 194 printf("sync of root filesystem failed (%d)\n", error); 195 196 if (error = VFS_UNMOUNT(mp, MNT_FORCE, initproc)) { 197 printf("unmount of root filesystem failed ("); 198 if (error == EBUSY) 199 printf("BUSY)\n"); 200 else 201 printf("%d)\n", error); 202 } 203 204 mp->mnt_flag &= ~MNT_UNMOUNT; 205 vfs_unbusy(mp); 206 } 207 208 /* 209 * Unmount all filesystems. Should only be called by halt(). 210 */ 211 void 212 vfs_unmountall() 213 { 214 struct mount *mp, *mp_next, *rootfs = NULL; 215 int error; 216 217 /* unmount all but rootfs */ 218 for (mp = mountlist.tqh_first; mp != NULL; mp = mp_next) { 219 mp_next = mp->mnt_list.tqe_next; 220 221 if (mp->mnt_flag & MNT_ROOTFS) { 222 rootfs = mp; 223 continue; 224 } 225 226 error = dounmount(mp, MNT_FORCE, initproc); 227 if (error) { 228 printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); 229 if (error == EBUSY) 230 printf("BUSY)\n"); 231 else 232 printf("%d)\n", error); 233 } 234 } 235 236 /* and finally... */ 237 if (rootfs) { 238 vfs_unmountroot(rootfs); 239 } else { 240 printf("no root filesystem\n"); 241 } 242 } 243 244 /* 245 * Lookup a mount point by filesystem identifier. 246 */ 247 struct mount * 248 getvfs(fsid) 249 fsid_t *fsid; 250 { 251 register struct mount *mp; 252 253 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 254 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 255 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 256 return (mp); 257 } 258 return ((struct mount *)0); 259 } 260 261 /* 262 * Get a new unique fsid 263 */ 264 void 265 getnewfsid(mp, mtype) 266 struct mount *mp; 267 int mtype; 268 { 269 static u_short xxxfs_mntid; 270 271 fsid_t tfsid; 272 273 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 274 mp->mnt_stat.f_fsid.val[1] = mtype; 275 if (xxxfs_mntid == 0) 276 ++xxxfs_mntid; 277 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 278 tfsid.val[1] = mtype; 279 if (mountlist.tqh_first != NULL) { 280 while (getvfs(&tfsid)) { 281 tfsid.val[0]++; 282 xxxfs_mntid++; 283 } 284 } 285 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 286 } 287 288 /* 289 * Set vnode attributes to VNOVAL 290 */ 291 void 292 vattr_null(vap) 293 register struct vattr *vap; 294 { 295 296 vap->va_type = VNON; 297 vap->va_size = VNOVAL; 298 vap->va_bytes = VNOVAL; 299 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 300 vap->va_fsid = vap->va_fileid = 301 vap->va_blocksize = vap->va_rdev = 302 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 303 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 304 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 305 vap->va_flags = vap->va_gen = VNOVAL; 306 vap->va_vaflags = 0; 307 } 308 309 /* 310 * Routines having to do with the management of the vnode table. 311 */ 312 extern int (**dead_vnodeop_p)(); 313 extern void vclean(); 314 long numvnodes; 315 316 /* 317 * Return the next vnode from the free list. 318 */ 319 int 320 getnewvnode(tag, mp, vops, vpp) 321 enum vtagtype tag; 322 struct mount *mp; 323 int (**vops)(); 324 struct vnode **vpp; 325 { 326 register struct vnode *vp; 327 int s; 328 329 if ((vnode_free_list.tqh_first == NULL && 330 numvnodes < 2 * desiredvnodes) || 331 numvnodes < desiredvnodes) { 332 vp = (struct vnode *)malloc((u_long)sizeof *vp, 333 M_VNODE, M_WAITOK); 334 bzero((char *)vp, sizeof *vp); 335 numvnodes++; 336 } else { 337 if ((vp = vnode_free_list.tqh_first) == NULL) { 338 tablefull("vnode"); 339 *vpp = 0; 340 return (ENFILE); 341 } 342 if (vp->v_usecount) 343 panic("free vnode isn't"); 344 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 345 /* see comment on why 0xdeadb is set at end of vgone (below) */ 346 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 347 vp->v_lease = NULL; 348 if (vp->v_type != VBAD) 349 vgone(vp); 350 #ifdef DIAGNOSTIC 351 if (vp->v_data) 352 panic("cleaned vnode isn't"); 353 s = splbio(); 354 if (vp->v_numoutput) 355 panic("Clean vnode has pending I/O's"); 356 splx(s); 357 #endif 358 vp->v_flag = 0; 359 vp->v_lastr = 0; 360 vp->v_ralen = 0; 361 vp->v_maxra = 0; 362 vp->v_lastw = 0; 363 vp->v_lasta = 0; 364 vp->v_cstart = 0; 365 vp->v_clen = 0; 366 vp->v_socket = 0; 367 vp->v_writecount = 0; /* XXX */ 368 } 369 vp->v_type = VNON; 370 cache_purge(vp); 371 vp->v_tag = tag; 372 vp->v_op = vops; 373 insmntque(vp, mp); 374 *vpp = vp; 375 vp->v_usecount = 1; 376 vp->v_data = 0; 377 return (0); 378 } 379 380 /* 381 * Move a vnode from one mount queue to another. 382 */ 383 void 384 insmntque(vp, mp) 385 register struct vnode *vp; 386 register struct mount *mp; 387 { 388 389 /* 390 * Delete from old mount point vnode list, if on one. 391 */ 392 if (vp->v_mount != NULL) 393 LIST_REMOVE(vp, v_mntvnodes); 394 /* 395 * Insert into list of vnodes for the new mount point, if available. 396 */ 397 if ((vp->v_mount = mp) == NULL) 398 return; 399 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 400 } 401 402 /* 403 * Update outstanding I/O count and do wakeup if requested. 404 */ 405 void 406 vwakeup(bp) 407 register struct buf *bp; 408 { 409 register struct vnode *vp; 410 411 bp->b_flags &= ~B_WRITEINPROG; 412 if (vp = bp->b_vp) { 413 vp->v_numoutput--; 414 if (vp->v_numoutput < 0) 415 panic("vwakeup: neg numoutput"); 416 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 417 if (vp->v_numoutput < 0) 418 panic("vwakeup: neg numoutput"); 419 vp->v_flag &= ~VBWAIT; 420 wakeup((caddr_t)&vp->v_numoutput); 421 } 422 } 423 } 424 425 /* 426 * Flush out and invalidate all buffers associated with a vnode. 427 * Called with the underlying object locked. 428 */ 429 int 430 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 431 register struct vnode *vp; 432 int flags; 433 struct ucred *cred; 434 struct proc *p; 435 int slpflag, slptimeo; 436 { 437 register struct buf *bp; 438 struct buf *nbp, *blist; 439 int s, error; 440 441 if (flags & V_SAVE) { 442 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 443 return (error); 444 if (vp->v_dirtyblkhd.lh_first != NULL) 445 panic("vinvalbuf: dirty bufs"); 446 } 447 for (;;) { 448 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 449 while (blist && blist->b_lblkno < 0) 450 blist = blist->b_vnbufs.le_next; 451 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 452 (flags & V_SAVEMETA)) 453 while (blist && blist->b_lblkno < 0) 454 blist = blist->b_vnbufs.le_next; 455 if (!blist) 456 break; 457 458 for (bp = blist; bp; bp = nbp) { 459 nbp = bp->b_vnbufs.le_next; 460 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 461 continue; 462 s = splbio(); 463 if (bp->b_flags & B_BUSY) { 464 bp->b_flags |= B_WANTED; 465 error = tsleep((caddr_t)bp, 466 slpflag | (PRIBIO + 1), "vinvalbuf", 467 slptimeo); 468 splx(s); 469 if (error) 470 return (error); 471 break; 472 } 473 bremfree(bp); 474 bp->b_flags |= B_BUSY; 475 splx(s); 476 /* 477 * XXX Since there are no node locks for NFS, I believe 478 * there is a slight chance that a delayed write will 479 * occur while sleeping just above, so check for it. 480 */ 481 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 482 (void) VOP_BWRITE(bp); 483 break; 484 } 485 bp->b_flags |= B_INVAL; 486 brelse(bp); 487 } 488 } 489 if (!(flags & V_SAVEMETA) && 490 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 491 panic("vinvalbuf: flush failed"); 492 return (0); 493 } 494 495 /* 496 * Associate a buffer with a vnode. 497 */ 498 void 499 bgetvp(vp, bp) 500 register struct vnode *vp; 501 register struct buf *bp; 502 { 503 504 if (bp->b_vp) 505 panic("bgetvp: not free"); 506 VHOLD(vp); 507 bp->b_vp = vp; 508 if (vp->v_type == VBLK || vp->v_type == VCHR) 509 bp->b_dev = vp->v_rdev; 510 else 511 bp->b_dev = NODEV; 512 /* 513 * Insert onto list for new vnode. 514 */ 515 bufinsvn(bp, &vp->v_cleanblkhd); 516 } 517 518 /* 519 * Disassociate a buffer from a vnode. 520 */ 521 void 522 brelvp(bp) 523 register struct buf *bp; 524 { 525 struct vnode *vp; 526 527 if (bp->b_vp == (struct vnode *) 0) 528 panic("brelvp: NULL"); 529 /* 530 * Delete from old vnode list, if on one. 531 */ 532 if (bp->b_vnbufs.le_next != NOLIST) 533 bufremvn(bp); 534 vp = bp->b_vp; 535 bp->b_vp = (struct vnode *) 0; 536 HOLDRELE(vp); 537 } 538 539 /* 540 * Reassign a buffer from one vnode to another. 541 * Used to assign file specific control information 542 * (indirect blocks) to the vnode to which they belong. 543 */ 544 void 545 reassignbuf(bp, newvp) 546 register struct buf *bp; 547 register struct vnode *newvp; 548 { 549 register struct buflists *listheadp; 550 551 if (newvp == NULL) { 552 printf("reassignbuf: NULL"); 553 return; 554 } 555 /* 556 * Delete from old vnode list, if on one. 557 */ 558 if (bp->b_vnbufs.le_next != NOLIST) 559 bufremvn(bp); 560 /* 561 * If dirty, put on list of dirty buffers; 562 * otherwise insert onto list of clean buffers. 563 */ 564 if (bp->b_flags & B_DELWRI) 565 listheadp = &newvp->v_dirtyblkhd; 566 else 567 listheadp = &newvp->v_cleanblkhd; 568 bufinsvn(bp, listheadp); 569 } 570 571 /* 572 * Create a vnode for a block device. 573 * Used for root filesystem, argdev, and swap areas. 574 * Also used for memory file system special devices. 575 */ 576 int 577 bdevvp(dev, vpp) 578 dev_t dev; 579 struct vnode **vpp; 580 { 581 register struct vnode *vp; 582 struct vnode *nvp; 583 int error; 584 585 if (dev == NODEV) 586 return (0); 587 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 588 if (error) { 589 *vpp = 0; 590 return (error); 591 } 592 vp = nvp; 593 vp->v_type = VBLK; 594 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 595 vput(vp); 596 vp = nvp; 597 } 598 *vpp = vp; 599 return (0); 600 } 601 602 /* 603 * Check to see if the new vnode represents a special device 604 * for which we already have a vnode (either because of 605 * bdevvp() or because of a different vnode representing 606 * the same block device). If such an alias exists, deallocate 607 * the existing contents and return the aliased vnode. The 608 * caller is responsible for filling it with its new contents. 609 */ 610 struct vnode * 611 checkalias(nvp, nvp_rdev, mp) 612 register struct vnode *nvp; 613 dev_t nvp_rdev; 614 struct mount *mp; 615 { 616 register struct vnode *vp; 617 struct vnode **vpp; 618 619 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 620 return (NULLVP); 621 622 vpp = &speclisth[SPECHASH(nvp_rdev)]; 623 loop: 624 for (vp = *vpp; vp; vp = vp->v_specnext) { 625 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 626 continue; 627 /* 628 * Alias, but not in use, so flush it out. 629 */ 630 if (vp->v_usecount == 0) { 631 vgone(vp); 632 goto loop; 633 } 634 if (vget(vp, 1)) 635 goto loop; 636 break; 637 } 638 if (vp == NULL || vp->v_tag != VT_NON) { 639 MALLOC(nvp->v_specinfo, struct specinfo *, 640 sizeof(struct specinfo), M_VNODE, M_WAITOK); 641 nvp->v_rdev = nvp_rdev; 642 nvp->v_hashchain = vpp; 643 nvp->v_specnext = *vpp; 644 nvp->v_specflags = 0; 645 *vpp = nvp; 646 if (vp != NULL) { 647 nvp->v_flag |= VALIASED; 648 vp->v_flag |= VALIASED; 649 vput(vp); 650 } 651 return (NULLVP); 652 } 653 VOP_UNLOCK(vp); 654 vclean(vp, 0); 655 vp->v_op = nvp->v_op; 656 vp->v_tag = nvp->v_tag; 657 nvp->v_type = VNON; 658 insmntque(vp, mp); 659 return (vp); 660 } 661 662 /* 663 * Grab a particular vnode from the free list, increment its 664 * reference count and lock it. The vnode lock bit is set the 665 * vnode is being eliminated in vgone. The process is awakened 666 * when the transition is completed, and an error returned to 667 * indicate that the vnode is no longer usable (possibly having 668 * been changed to a new file system type). 669 */ 670 int 671 vget(vp, lockflag) 672 register struct vnode *vp; 673 int lockflag; 674 { 675 676 /* 677 * If the vnode is in the process of being cleaned out for 678 * another use, we wait for the cleaning to finish and then 679 * return failure. Cleaning is determined either by checking 680 * that the VXLOCK flag is set, or that the use count is 681 * zero with the back pointer set to show that it has been 682 * removed from the free list by getnewvnode. The VXLOCK 683 * flag may not have been set yet because vclean is blocked in 684 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 685 */ 686 if ((vp->v_flag & VXLOCK) || 687 (vp->v_usecount == 0 && 688 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 689 vp->v_flag |= VXWANT; 690 sleep((caddr_t)vp, PINOD); 691 return (1); 692 } 693 if (vp->v_usecount == 0) 694 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 695 vp->v_usecount++; 696 if (lockflag) 697 VOP_LOCK(vp); 698 return (0); 699 } 700 701 /* 702 * Vnode reference, just increment the count 703 */ 704 void 705 vref(vp) 706 struct vnode *vp; 707 { 708 709 if (vp->v_usecount <= 0) 710 panic("vref used where vget required"); 711 vp->v_usecount++; 712 } 713 714 /* 715 * vput(), just unlock and vrele() 716 */ 717 void 718 vput(vp) 719 register struct vnode *vp; 720 { 721 722 VOP_UNLOCK(vp); 723 vrele(vp); 724 } 725 726 /* 727 * Vnode release. 728 * If count drops to zero, call inactive routine and return to freelist. 729 */ 730 void 731 vrele(vp) 732 register struct vnode *vp; 733 { 734 735 #ifdef DIAGNOSTIC 736 if (vp == NULL) 737 panic("vrele: null vp"); 738 #endif 739 vp->v_usecount--; 740 if (vp->v_usecount > 0) 741 return; 742 #ifdef DIAGNOSTIC 743 if (vp->v_usecount != 0 /* || vp->v_writecount != 0 */) { 744 vprint("vrele: bad ref count", vp); 745 panic("vrele: ref cnt"); 746 } 747 #endif 748 /* 749 * insert at tail of LRU list 750 */ 751 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 752 VOP_INACTIVE(vp); 753 } 754 755 /* 756 * Page or buffer structure gets a reference. 757 */ 758 void 759 vhold(vp) 760 register struct vnode *vp; 761 { 762 763 vp->v_holdcnt++; 764 } 765 766 /* 767 * Page or buffer structure frees a reference. 768 */ 769 void 770 holdrele(vp) 771 register struct vnode *vp; 772 { 773 774 if (vp->v_holdcnt <= 0) 775 panic("holdrele: holdcnt"); 776 vp->v_holdcnt--; 777 } 778 779 /* 780 * Remove any vnodes in the vnode table belonging to mount point mp. 781 * 782 * If MNT_NOFORCE is specified, there should not be any active ones, 783 * return error if any are found (nb: this is a user error, not a 784 * system error). If MNT_FORCE is specified, detach any active vnodes 785 * that are found. 786 */ 787 #ifdef DIAGNOSTIC 788 int busyprt = 0; /* print out busy vnodes */ 789 struct ctldebug debug1 = { "busyprt", &busyprt }; 790 #endif 791 792 int 793 vflush(mp, skipvp, flags) 794 struct mount *mp; 795 struct vnode *skipvp; 796 int flags; 797 { 798 register struct vnode *vp, *nvp; 799 int busy = 0; 800 801 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 802 panic("vflush: not busy"); 803 loop: 804 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 805 if (vp->v_mount != mp) 806 goto loop; 807 nvp = vp->v_mntvnodes.le_next; 808 /* 809 * Skip over a selected vnode. 810 */ 811 if (vp == skipvp) 812 continue; 813 /* 814 * Skip over a vnodes marked VSYSTEM. 815 */ 816 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 817 continue; 818 /* 819 * If WRITECLOSE is set, only flush out regular file 820 * vnodes open for writing. 821 */ 822 if ((flags & WRITECLOSE) && 823 (vp->v_writecount == 0 || vp->v_type != VREG)) 824 continue; 825 /* 826 * With v_usecount == 0, all we need to do is clear 827 * out the vnode data structures and we are done. 828 */ 829 if (vp->v_usecount == 0) { 830 vgone(vp); 831 continue; 832 } 833 /* 834 * If FORCECLOSE is set, forcibly close the vnode. 835 * For block or character devices, revert to an 836 * anonymous device. For all other files, just kill them. 837 */ 838 if (flags & FORCECLOSE) { 839 if (vp->v_type != VBLK && vp->v_type != VCHR) { 840 vgone(vp); 841 } else { 842 vclean(vp, 0); 843 vp->v_op = spec_vnodeop_p; 844 insmntque(vp, (struct mount *)0); 845 } 846 continue; 847 } 848 #ifdef DIAGNOSTIC 849 if (busyprt) 850 vprint("vflush: busy vnode", vp); 851 #endif 852 busy++; 853 } 854 if (busy) 855 return (EBUSY); 856 return (0); 857 } 858 859 /* 860 * Disassociate the underlying file system from a vnode. 861 */ 862 void 863 vclean(vp, flags) 864 register struct vnode *vp; 865 int flags; 866 { 867 int active; 868 869 /* 870 * Check to see if the vnode is in use. 871 * If so we have to reference it before we clean it out 872 * so that its count cannot fall to zero and generate a 873 * race against ourselves to recycle it. 874 */ 875 if (active = vp->v_usecount) 876 VREF(vp); 877 /* 878 * Even if the count is zero, the VOP_INACTIVE routine may still 879 * have the object locked while it cleans it out. The VOP_LOCK 880 * ensures that the VOP_INACTIVE routine is done with its work. 881 * For active vnodes, it ensures that no other activity can 882 * occur while the underlying object is being cleaned out. 883 */ 884 VOP_LOCK(vp); 885 /* 886 * Prevent the vnode from being recycled or 887 * brought into use while we clean it out. 888 */ 889 if (vp->v_flag & VXLOCK) 890 panic("vclean: deadlock"); 891 vp->v_flag |= VXLOCK; 892 /* 893 * Clean out any buffers associated with the vnode. 894 */ 895 if (flags & DOCLOSE) 896 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 897 /* 898 * Any other processes trying to obtain this lock must first 899 * wait for VXLOCK to clear, then call the new lock operation. 900 */ 901 VOP_UNLOCK(vp); 902 /* 903 * If purging an active vnode, it must be closed and 904 * deactivated before being reclaimed. 905 */ 906 if (active) { 907 if (flags & DOCLOSE) 908 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 909 VOP_INACTIVE(vp); 910 } 911 /* 912 * Reclaim the vnode. 913 */ 914 if (VOP_RECLAIM(vp)) 915 panic("vclean: cannot reclaim"); 916 if (active) 917 vrele(vp); 918 919 /* 920 * Done with purge, notify sleepers of the grim news. 921 */ 922 vp->v_op = dead_vnodeop_p; 923 vp->v_tag = VT_NON; 924 vp->v_flag &= ~VXLOCK; 925 if (vp->v_flag & VXWANT) { 926 vp->v_flag &= ~VXWANT; 927 wakeup((caddr_t)vp); 928 } 929 } 930 931 /* 932 * Eliminate all activity associated with the requested vnode 933 * and with all vnodes aliased to the requested vnode. 934 */ 935 void 936 vgoneall(vp) 937 register struct vnode *vp; 938 { 939 register struct vnode *vq; 940 941 if (vp->v_flag & VALIASED) { 942 /* 943 * If a vgone (or vclean) is already in progress, 944 * wait until it is done and return. 945 */ 946 if (vp->v_flag & VXLOCK) { 947 vp->v_flag |= VXWANT; 948 sleep((caddr_t)vp, PINOD); 949 return; 950 } 951 /* 952 * Ensure that vp will not be vgone'd while we 953 * are eliminating its aliases. 954 */ 955 vp->v_flag |= VXLOCK; 956 while (vp->v_flag & VALIASED) { 957 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 958 if (vq->v_rdev != vp->v_rdev || 959 vq->v_type != vp->v_type || vp == vq) 960 continue; 961 vgone(vq); 962 break; 963 } 964 } 965 /* 966 * Remove the lock so that vgone below will 967 * really eliminate the vnode after which time 968 * vgone will awaken any sleepers. 969 */ 970 vp->v_flag &= ~VXLOCK; 971 } 972 vgone(vp); 973 } 974 975 /* 976 * Eliminate all activity associated with a vnode 977 * in preparation for reuse. 978 */ 979 void 980 vgone(vp) 981 register struct vnode *vp; 982 { 983 register struct vnode *vq; 984 struct vnode *vx; 985 986 /* 987 * If a vgone (or vclean) is already in progress, 988 * wait until it is done and return. 989 */ 990 if (vp->v_flag & VXLOCK) { 991 vp->v_flag |= VXWANT; 992 sleep((caddr_t)vp, PINOD); 993 return; 994 } 995 /* 996 * Clean out the filesystem specific data. 997 */ 998 vclean(vp, DOCLOSE); 999 /* 1000 * Delete from old mount point vnode list, if on one. 1001 */ 1002 if (vp->v_mount != NULL) { 1003 LIST_REMOVE(vp, v_mntvnodes); 1004 vp->v_mount = NULL; 1005 } 1006 /* 1007 * If special device, remove it from special device alias list. 1008 */ 1009 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1010 if (*vp->v_hashchain == vp) { 1011 *vp->v_hashchain = vp->v_specnext; 1012 } else { 1013 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1014 if (vq->v_specnext != vp) 1015 continue; 1016 vq->v_specnext = vp->v_specnext; 1017 break; 1018 } 1019 if (vq == NULL) 1020 panic("missing bdev"); 1021 } 1022 if (vp->v_flag & VALIASED) { 1023 vx = NULL; 1024 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1025 if (vq->v_rdev != vp->v_rdev || 1026 vq->v_type != vp->v_type) 1027 continue; 1028 if (vx) 1029 break; 1030 vx = vq; 1031 } 1032 if (vx == NULL) 1033 panic("missing alias"); 1034 if (vq == NULL) 1035 vx->v_flag &= ~VALIASED; 1036 vp->v_flag &= ~VALIASED; 1037 } 1038 FREE(vp->v_specinfo, M_VNODE); 1039 vp->v_specinfo = NULL; 1040 } 1041 /* 1042 * If it is on the freelist and not already at the head, 1043 * move it to the head of the list. The test of the back 1044 * pointer and the reference count of zero is because 1045 * it will be removed from the free list by getnewvnode, 1046 * but will not have its reference count incremented until 1047 * after calling vgone. If the reference count were 1048 * incremented first, vgone would (incorrectly) try to 1049 * close the previous instance of the underlying object. 1050 * So, the back pointer is explicitly set to `0xdeadb' in 1051 * getnewvnode after removing it from the freelist to ensure 1052 * that we do not try to move it here. 1053 */ 1054 if (vp->v_usecount == 0 && 1055 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1056 vnode_free_list.tqh_first != vp) { 1057 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1058 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1059 } 1060 vp->v_type = VBAD; 1061 } 1062 1063 /* 1064 * Lookup a vnode by device number. 1065 */ 1066 int 1067 vfinddev(dev, type, vpp) 1068 dev_t dev; 1069 enum vtype type; 1070 struct vnode **vpp; 1071 { 1072 register struct vnode *vp; 1073 1074 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1075 if (dev != vp->v_rdev || type != vp->v_type) 1076 continue; 1077 *vpp = vp; 1078 return (1); 1079 } 1080 return (0); 1081 } 1082 1083 /* 1084 * Calculate the total number of references to a special device. 1085 */ 1086 int 1087 vcount(vp) 1088 register struct vnode *vp; 1089 { 1090 register struct vnode *vq, *vnext; 1091 int count; 1092 1093 loop: 1094 if ((vp->v_flag & VALIASED) == 0) 1095 return (vp->v_usecount); 1096 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1097 vnext = vq->v_specnext; 1098 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1099 continue; 1100 /* 1101 * Alias, but not in use, so flush it out. 1102 */ 1103 if (vq->v_usecount == 0 && vq != vp) { 1104 vgone(vq); 1105 goto loop; 1106 } 1107 count += vq->v_usecount; 1108 } 1109 return (count); 1110 } 1111 1112 /* 1113 * Print out a description of a vnode. 1114 */ 1115 static char *typename[] = 1116 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1117 1118 void 1119 vprint(label, vp) 1120 char *label; 1121 register struct vnode *vp; 1122 { 1123 char buf[64]; 1124 1125 if (label != NULL) 1126 printf("%s: ", label); 1127 printf("type %s, usecount %d, writecount %d, refcount %d,", 1128 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1129 vp->v_holdcnt); 1130 buf[0] = '\0'; 1131 if (vp->v_flag & VROOT) 1132 strcat(buf, "|VROOT"); 1133 if (vp->v_flag & VTEXT) 1134 strcat(buf, "|VTEXT"); 1135 if (vp->v_flag & VSYSTEM) 1136 strcat(buf, "|VSYSTEM"); 1137 if (vp->v_flag & VXLOCK) 1138 strcat(buf, "|VXLOCK"); 1139 if (vp->v_flag & VXWANT) 1140 strcat(buf, "|VXWANT"); 1141 if (vp->v_flag & VBWAIT) 1142 strcat(buf, "|VBWAIT"); 1143 if (vp->v_flag & VALIASED) 1144 strcat(buf, "|VALIASED"); 1145 if (buf[0] != '\0') 1146 printf(" flags (%s)", &buf[1]); 1147 if (vp->v_data == NULL) { 1148 printf("\n"); 1149 } else { 1150 printf("\n\t"); 1151 VOP_PRINT(vp); 1152 } 1153 } 1154 1155 #ifdef DEBUG 1156 /* 1157 * List all of the locked vnodes in the system. 1158 * Called when debugging the kernel. 1159 */ 1160 void 1161 printlockedvnodes() 1162 { 1163 register struct mount *mp; 1164 register struct vnode *vp; 1165 1166 printf("Locked vnodes\n"); 1167 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 1168 for (vp = mp->mnt_vnodelist.lh_first; 1169 vp != NULL; 1170 vp = vp->v_mntvnodes.le_next) 1171 if (VOP_ISLOCKED(vp)) 1172 vprint((char *)0, vp); 1173 } 1174 } 1175 #endif 1176 1177 int kinfo_vdebug = 1; 1178 int kinfo_vgetfailed; 1179 #define KINFO_VNODESLOP 10 1180 /* 1181 * Dump vnode list (via sysctl). 1182 * Copyout address of vnode followed by vnode. 1183 */ 1184 /* ARGSUSED */ 1185 int 1186 sysctl_vnode(where, sizep) 1187 char *where; 1188 size_t *sizep; 1189 { 1190 register struct mount *mp, *nmp; 1191 struct vnode *vp; 1192 register char *bp = where, *savebp; 1193 char *ewhere; 1194 int error; 1195 1196 #define VPTRSZ sizeof (struct vnode *) 1197 #define VNODESZ sizeof (struct vnode) 1198 if (where == NULL) { 1199 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1200 return (0); 1201 } 1202 ewhere = where + *sizep; 1203 1204 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { 1205 nmp = mp->mnt_list.tqe_next; 1206 if (vfs_busy(mp)) 1207 continue; 1208 savebp = bp; 1209 again: 1210 for (vp = mp->mnt_vnodelist.lh_first; 1211 vp != NULL; 1212 vp = vp->v_mntvnodes.le_next) { 1213 /* 1214 * Check that the vp is still associated with 1215 * this filesystem. RACE: could have been 1216 * recycled onto the same filesystem. 1217 */ 1218 if (vp->v_mount != mp) { 1219 if (kinfo_vdebug) 1220 printf("kinfo: vp changed\n"); 1221 bp = savebp; 1222 goto again; 1223 } 1224 if (bp + VPTRSZ + VNODESZ > ewhere) { 1225 *sizep = bp - where; 1226 return (ENOMEM); 1227 } 1228 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1229 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1230 return (error); 1231 bp += VPTRSZ + VNODESZ; 1232 } 1233 vfs_unbusy(mp); 1234 } 1235 1236 *sizep = bp - where; 1237 return (0); 1238 } 1239 1240 /* 1241 * Check to see if a filesystem is mounted on a block device. 1242 */ 1243 int 1244 vfs_mountedon(vp) 1245 register struct vnode *vp; 1246 { 1247 register struct vnode *vq; 1248 1249 if (vp->v_specflags & SI_MOUNTEDON) 1250 return (EBUSY); 1251 if (vp->v_flag & VALIASED) { 1252 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1253 if (vq->v_rdev != vp->v_rdev || 1254 vq->v_type != vp->v_type) 1255 continue; 1256 if (vq->v_specflags & SI_MOUNTEDON) 1257 return (EBUSY); 1258 } 1259 } 1260 return (0); 1261 } 1262 1263 /* 1264 * Build hash lists of net addresses and hang them off the mount point. 1265 * Called by ufs_mount() to set up the lists of export addresses. 1266 */ 1267 static int 1268 vfs_hang_addrlist(mp, nep, argp) 1269 struct mount *mp; 1270 struct netexport *nep; 1271 struct export_args *argp; 1272 { 1273 register struct netcred *np; 1274 register struct radix_node_head *rnh; 1275 register int i; 1276 struct radix_node *rn; 1277 struct sockaddr *saddr, *smask = 0; 1278 struct domain *dom; 1279 int error; 1280 1281 if (argp->ex_addrlen == 0) { 1282 if (mp->mnt_flag & MNT_DEFEXPORTED) 1283 return (EPERM); 1284 np = &nep->ne_defexported; 1285 np->netc_exflags = argp->ex_flags; 1286 np->netc_anon = argp->ex_anon; 1287 np->netc_anon.cr_ref = 1; 1288 mp->mnt_flag |= MNT_DEFEXPORTED; 1289 return (0); 1290 } 1291 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1292 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1293 bzero((caddr_t)np, i); 1294 saddr = (struct sockaddr *)(np + 1); 1295 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1296 goto out; 1297 if (saddr->sa_len > argp->ex_addrlen) 1298 saddr->sa_len = argp->ex_addrlen; 1299 if (argp->ex_masklen) { 1300 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1301 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1302 if (error) 1303 goto out; 1304 if (smask->sa_len > argp->ex_masklen) 1305 smask->sa_len = argp->ex_masklen; 1306 } 1307 i = saddr->sa_family; 1308 if ((rnh = nep->ne_rtable[i]) == 0) { 1309 /* 1310 * Seems silly to initialize every AF when most are not 1311 * used, do so on demand here 1312 */ 1313 for (dom = domains; dom; dom = dom->dom_next) 1314 if (dom->dom_family == i && dom->dom_rtattach) { 1315 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1316 dom->dom_rtoffset); 1317 break; 1318 } 1319 if ((rnh = nep->ne_rtable[i]) == 0) { 1320 error = ENOBUFS; 1321 goto out; 1322 } 1323 } 1324 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1325 np->netc_rnodes); 1326 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1327 error = EPERM; 1328 goto out; 1329 } 1330 np->netc_exflags = argp->ex_flags; 1331 np->netc_anon = argp->ex_anon; 1332 np->netc_anon.cr_ref = 1; 1333 return (0); 1334 out: 1335 free(np, M_NETADDR); 1336 return (error); 1337 } 1338 1339 /* ARGSUSED */ 1340 static int 1341 vfs_free_netcred(rn, w) 1342 struct radix_node *rn; 1343 caddr_t w; 1344 { 1345 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1346 1347 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1348 free((caddr_t)rn, M_NETADDR); 1349 return (0); 1350 } 1351 1352 /* 1353 * Free the net address hash lists that are hanging off the mount points. 1354 */ 1355 static void 1356 vfs_free_addrlist(nep) 1357 struct netexport *nep; 1358 { 1359 register int i; 1360 register struct radix_node_head *rnh; 1361 1362 for (i = 0; i <= AF_MAX; i++) 1363 if (rnh = nep->ne_rtable[i]) { 1364 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1365 (caddr_t)rnh); 1366 free((caddr_t)rnh, M_RTABLE); 1367 nep->ne_rtable[i] = 0; 1368 } 1369 } 1370 1371 int 1372 vfs_export(mp, nep, argp) 1373 struct mount *mp; 1374 struct netexport *nep; 1375 struct export_args *argp; 1376 { 1377 int error; 1378 1379 if (argp->ex_flags & MNT_DELEXPORT) { 1380 vfs_free_addrlist(nep); 1381 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1382 } 1383 if (argp->ex_flags & MNT_EXPORTED) { 1384 if (error = vfs_hang_addrlist(mp, nep, argp)) 1385 return (error); 1386 mp->mnt_flag |= MNT_EXPORTED; 1387 } 1388 return (0); 1389 } 1390 1391 struct netcred * 1392 vfs_export_lookup(mp, nep, nam) 1393 register struct mount *mp; 1394 struct netexport *nep; 1395 struct mbuf *nam; 1396 { 1397 register struct netcred *np; 1398 register struct radix_node_head *rnh; 1399 struct sockaddr *saddr; 1400 1401 np = NULL; 1402 if (mp->mnt_flag & MNT_EXPORTED) { 1403 /* 1404 * Lookup in the export list first. 1405 */ 1406 if (nam != NULL) { 1407 saddr = mtod(nam, struct sockaddr *); 1408 rnh = nep->ne_rtable[saddr->sa_family]; 1409 if (rnh != NULL) { 1410 np = (struct netcred *) 1411 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1412 rnh); 1413 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1414 np = NULL; 1415 } 1416 } 1417 /* 1418 * If no address match, use the default if it exists. 1419 */ 1420 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1421 np = &nep->ne_defexported; 1422 } 1423 return (np); 1424 } 1425