1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 39 */ 40 41 /* 42 * External virtual filesystem routines 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/proc.h> 48 #include <sys/mount.h> 49 #include <sys/time.h> 50 #include <sys/vnode.h> 51 #include <sys/stat.h> 52 #include <sys/namei.h> 53 #include <sys/ucred.h> 54 #include <sys/buf.h> 55 #include <sys/errno.h> 56 #include <sys/malloc.h> 57 #include <sys/domain.h> 58 #include <sys/mbuf.h> 59 60 #include <vm/vm.h> 61 #include <sys/sysctl.h> 62 63 #include <miscfs/specfs/specdev.h> 64 65 void insmntque __P((struct vnode *, struct mount *)); 66 67 enum vtype iftovt_tab[16] = { 68 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 69 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 70 }; 71 int vttoif_tab[9] = { 72 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 73 S_IFSOCK, S_IFIFO, S_IFMT, 74 }; 75 76 /* 77 * Insq/Remq for the vnode usage lists. 78 */ 79 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 80 #define bufremvn(bp) { \ 81 LIST_REMOVE(bp, b_vnbufs); \ 82 (bp)->b_vnbufs.le_next = NOLIST; \ 83 } 84 85 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 86 struct mntlist mountlist; /* mounted filesystem list */ 87 88 /* 89 * Initialize the vnode management data structures. 90 */ 91 void 92 vntblinit() 93 { 94 95 TAILQ_INIT(&vnode_free_list); 96 TAILQ_INIT(&mountlist); 97 } 98 99 /* 100 * Lock a filesystem. 101 * Used to prevent access to it while mounting and unmounting. 102 */ 103 int 104 vfs_lock(mp) 105 register struct mount *mp; 106 { 107 108 while(mp->mnt_flag & MNT_MLOCK) { 109 mp->mnt_flag |= MNT_MWAIT; 110 sleep((caddr_t)mp, PVFS); 111 } 112 mp->mnt_flag |= MNT_MLOCK; 113 return (0); 114 } 115 116 /* 117 * Unlock a locked filesystem. 118 * Panic if filesystem is not locked. 119 */ 120 void 121 vfs_unlock(mp) 122 register struct mount *mp; 123 { 124 125 if ((mp->mnt_flag & MNT_MLOCK) == 0) 126 panic("vfs_unlock: not locked"); 127 mp->mnt_flag &= ~MNT_MLOCK; 128 if (mp->mnt_flag & MNT_MWAIT) { 129 mp->mnt_flag &= ~MNT_MWAIT; 130 wakeup((caddr_t)mp); 131 } 132 } 133 134 /* 135 * Mark a mount point as busy. 136 * Used to synchronize access and to delay unmounting. 137 */ 138 int 139 vfs_busy(mp) 140 register struct mount *mp; 141 { 142 143 while(mp->mnt_flag & MNT_MPBUSY) { 144 mp->mnt_flag |= MNT_MPWANT; 145 sleep((caddr_t)&mp->mnt_flag, PVFS); 146 } 147 if (mp->mnt_flag & MNT_UNMOUNT) 148 return (1); 149 mp->mnt_flag |= MNT_MPBUSY; 150 return (0); 151 } 152 153 /* 154 * Free a busy filesystem. 155 * Panic if filesystem is not busy. 156 */ 157 void 158 vfs_unbusy(mp) 159 register struct mount *mp; 160 { 161 162 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 163 panic("vfs_unbusy: not busy"); 164 mp->mnt_flag &= ~MNT_MPBUSY; 165 if (mp->mnt_flag & MNT_MPWANT) { 166 mp->mnt_flag &= ~MNT_MPWANT; 167 wakeup((caddr_t)&mp->mnt_flag); 168 } 169 } 170 171 /* 172 * Lookup a mount point by filesystem identifier. 173 */ 174 struct mount * 175 getvfs(fsid) 176 fsid_t *fsid; 177 { 178 register struct mount *mp; 179 180 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 181 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 182 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 183 return (mp); 184 } 185 return ((struct mount *)0); 186 } 187 188 /* 189 * Get a new unique fsid 190 */ 191 void 192 getnewfsid(mp, mtype) 193 struct mount *mp; 194 int mtype; 195 { 196 static u_short xxxfs_mntid; 197 198 fsid_t tfsid; 199 200 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 201 mp->mnt_stat.f_fsid.val[1] = mtype; 202 if (xxxfs_mntid == 0) 203 ++xxxfs_mntid; 204 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 205 tfsid.val[1] = mtype; 206 if (mountlist.tqh_first != NULL) { 207 while (getvfs(&tfsid)) { 208 tfsid.val[0]++; 209 xxxfs_mntid++; 210 } 211 } 212 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 213 } 214 215 /* 216 * Set vnode attributes to VNOVAL 217 */ 218 void 219 vattr_null(vap) 220 register struct vattr *vap; 221 { 222 223 vap->va_type = VNON; 224 vap->va_size = VNOVAL; 225 vap->va_bytes = VNOVAL; 226 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 227 vap->va_fsid = vap->va_fileid = 228 vap->va_blocksize = vap->va_rdev = 229 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 230 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 231 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 232 vap->va_flags = vap->va_gen = VNOVAL; 233 vap->va_vaflags = 0; 234 } 235 236 /* 237 * Routines having to do with the management of the vnode table. 238 */ 239 extern int (**dead_vnodeop_p)(); 240 extern void vclean(); 241 long numvnodes; 242 extern struct vattr va_null; 243 244 /* 245 * Return the next vnode from the free list. 246 */ 247 int 248 getnewvnode(tag, mp, vops, vpp) 249 enum vtagtype tag; 250 struct mount *mp; 251 int (**vops)(); 252 struct vnode **vpp; 253 { 254 register struct vnode *vp; 255 int s; 256 257 if ((vnode_free_list.tqh_first == NULL && 258 numvnodes < 2 * desiredvnodes) || 259 numvnodes < desiredvnodes) { 260 vp = (struct vnode *)malloc((u_long)sizeof *vp, 261 M_VNODE, M_WAITOK); 262 bzero((char *)vp, sizeof *vp); 263 numvnodes++; 264 } else { 265 if ((vp = vnode_free_list.tqh_first) == NULL) { 266 tablefull("vnode"); 267 *vpp = 0; 268 return (ENFILE); 269 } 270 if (vp->v_usecount) 271 panic("free vnode isn't"); 272 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 273 /* see comment on why 0xdeadb is set at end of vgone (below) */ 274 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 275 vp->v_lease = NULL; 276 if (vp->v_type != VBAD) 277 vgone(vp); 278 #ifdef DIAGNOSTIC 279 if (vp->v_data) 280 panic("cleaned vnode isn't"); 281 s = splbio(); 282 if (vp->v_numoutput) 283 panic("Clean vnode has pending I/O's"); 284 splx(s); 285 #endif 286 vp->v_flag = 0; 287 vp->v_lastr = 0; 288 vp->v_ralen = 0; 289 vp->v_maxra = 0; 290 vp->v_lastw = 0; 291 vp->v_lasta = 0; 292 vp->v_cstart = 0; 293 vp->v_clen = 0; 294 vp->v_socket = 0; 295 } 296 vp->v_type = VNON; 297 cache_purge(vp); 298 vp->v_tag = tag; 299 vp->v_op = vops; 300 insmntque(vp, mp); 301 *vpp = vp; 302 vp->v_usecount = 1; 303 vp->v_data = 0; 304 return (0); 305 } 306 307 /* 308 * Move a vnode from one mount queue to another. 309 */ 310 void 311 insmntque(vp, mp) 312 register struct vnode *vp; 313 register struct mount *mp; 314 { 315 316 /* 317 * Delete from old mount point vnode list, if on one. 318 */ 319 if (vp->v_mount != NULL) 320 LIST_REMOVE(vp, v_mntvnodes); 321 /* 322 * Insert into list of vnodes for the new mount point, if available. 323 */ 324 if ((vp->v_mount = mp) == NULL) 325 return; 326 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 327 } 328 329 /* 330 * Update outstanding I/O count and do wakeup if requested. 331 */ 332 void 333 vwakeup(bp) 334 register struct buf *bp; 335 { 336 register struct vnode *vp; 337 338 bp->b_flags &= ~B_WRITEINPROG; 339 if (vp = bp->b_vp) { 340 vp->v_numoutput--; 341 if (vp->v_numoutput < 0) 342 panic("vwakeup: neg numoutput"); 343 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 344 if (vp->v_numoutput < 0) 345 panic("vwakeup: neg numoutput"); 346 vp->v_flag &= ~VBWAIT; 347 wakeup((caddr_t)&vp->v_numoutput); 348 } 349 } 350 } 351 352 /* 353 * Flush out and invalidate all buffers associated with a vnode. 354 * Called with the underlying object locked. 355 */ 356 int 357 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 358 register struct vnode *vp; 359 int flags; 360 struct ucred *cred; 361 struct proc *p; 362 int slpflag, slptimeo; 363 { 364 register struct buf *bp; 365 struct buf *nbp, *blist; 366 int s, error; 367 368 if (flags & V_SAVE) { 369 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 370 return (error); 371 if (vp->v_dirtyblkhd.lh_first != NULL) 372 panic("vinvalbuf: dirty bufs"); 373 } 374 for (;;) { 375 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 376 while (blist && blist->b_lblkno < 0) 377 blist = blist->b_vnbufs.le_next; 378 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 379 (flags & V_SAVEMETA)) 380 while (blist && blist->b_lblkno < 0) 381 blist = blist->b_vnbufs.le_next; 382 if (!blist) 383 break; 384 385 for (bp = blist; bp; bp = nbp) { 386 nbp = bp->b_vnbufs.le_next; 387 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 388 continue; 389 s = splbio(); 390 if (bp->b_flags & B_BUSY) { 391 bp->b_flags |= B_WANTED; 392 error = tsleep((caddr_t)bp, 393 slpflag | (PRIBIO + 1), "vinvalbuf", 394 slptimeo); 395 splx(s); 396 if (error) 397 return (error); 398 break; 399 } 400 bremfree(bp); 401 bp->b_flags |= B_BUSY; 402 splx(s); 403 /* 404 * XXX Since there are no node locks for NFS, I believe 405 * there is a slight chance that a delayed write will 406 * occur while sleeping just above, so check for it. 407 */ 408 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 409 (void) VOP_BWRITE(bp); 410 break; 411 } 412 bp->b_flags |= B_INVAL; 413 brelse(bp); 414 } 415 } 416 if (!(flags & V_SAVEMETA) && 417 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 418 panic("vinvalbuf: flush failed"); 419 return (0); 420 } 421 422 /* 423 * Associate a buffer with a vnode. 424 */ 425 void 426 bgetvp(vp, bp) 427 register struct vnode *vp; 428 register struct buf *bp; 429 { 430 431 if (bp->b_vp) 432 panic("bgetvp: not free"); 433 VHOLD(vp); 434 bp->b_vp = vp; 435 if (vp->v_type == VBLK || vp->v_type == VCHR) 436 bp->b_dev = vp->v_rdev; 437 else 438 bp->b_dev = NODEV; 439 /* 440 * Insert onto list for new vnode. 441 */ 442 bufinsvn(bp, &vp->v_cleanblkhd); 443 } 444 445 /* 446 * Disassociate a buffer from a vnode. 447 */ 448 void 449 brelvp(bp) 450 register struct buf *bp; 451 { 452 struct vnode *vp; 453 454 if (bp->b_vp == (struct vnode *) 0) 455 panic("brelvp: NULL"); 456 /* 457 * Delete from old vnode list, if on one. 458 */ 459 if (bp->b_vnbufs.le_next != NOLIST) 460 bufremvn(bp); 461 vp = bp->b_vp; 462 bp->b_vp = (struct vnode *) 0; 463 HOLDRELE(vp); 464 } 465 466 /* 467 * Reassign a buffer from one vnode to another. 468 * Used to assign file specific control information 469 * (indirect blocks) to the vnode to which they belong. 470 */ 471 void 472 reassignbuf(bp, newvp) 473 register struct buf *bp; 474 register struct vnode *newvp; 475 { 476 register struct buflists *listheadp; 477 478 if (newvp == NULL) { 479 printf("reassignbuf: NULL"); 480 return; 481 } 482 /* 483 * Delete from old vnode list, if on one. 484 */ 485 if (bp->b_vnbufs.le_next != NOLIST) 486 bufremvn(bp); 487 /* 488 * If dirty, put on list of dirty buffers; 489 * otherwise insert onto list of clean buffers. 490 */ 491 if (bp->b_flags & B_DELWRI) 492 listheadp = &newvp->v_dirtyblkhd; 493 else 494 listheadp = &newvp->v_cleanblkhd; 495 bufinsvn(bp, listheadp); 496 } 497 498 /* 499 * Create a vnode for a block device. 500 * Used for root filesystem, argdev, and swap areas. 501 * Also used for memory file system special devices. 502 */ 503 int 504 bdevvp(dev, vpp) 505 dev_t dev; 506 struct vnode **vpp; 507 { 508 register struct vnode *vp; 509 struct vnode *nvp; 510 int error; 511 512 if (dev == NODEV) 513 return (0); 514 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 515 if (error) { 516 *vpp = 0; 517 return (error); 518 } 519 vp = nvp; 520 vp->v_type = VBLK; 521 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 522 vput(vp); 523 vp = nvp; 524 } 525 *vpp = vp; 526 return (0); 527 } 528 529 /* 530 * Check to see if the new vnode represents a special device 531 * for which we already have a vnode (either because of 532 * bdevvp() or because of a different vnode representing 533 * the same block device). If such an alias exists, deallocate 534 * the existing contents and return the aliased vnode. The 535 * caller is responsible for filling it with its new contents. 536 */ 537 struct vnode * 538 checkalias(nvp, nvp_rdev, mp) 539 register struct vnode *nvp; 540 dev_t nvp_rdev; 541 struct mount *mp; 542 { 543 register struct vnode *vp; 544 struct vnode **vpp; 545 546 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 547 return (NULLVP); 548 549 vpp = &speclisth[SPECHASH(nvp_rdev)]; 550 loop: 551 for (vp = *vpp; vp; vp = vp->v_specnext) { 552 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 553 continue; 554 /* 555 * Alias, but not in use, so flush it out. 556 */ 557 if (vp->v_usecount == 0) { 558 vgone(vp); 559 goto loop; 560 } 561 if (vget(vp, 1)) 562 goto loop; 563 break; 564 } 565 if (vp == NULL || vp->v_tag != VT_NON) { 566 MALLOC(nvp->v_specinfo, struct specinfo *, 567 sizeof(struct specinfo), M_VNODE, M_WAITOK); 568 nvp->v_rdev = nvp_rdev; 569 nvp->v_hashchain = vpp; 570 nvp->v_specnext = *vpp; 571 nvp->v_specflags = 0; 572 *vpp = nvp; 573 if (vp != NULL) { 574 nvp->v_flag |= VALIASED; 575 vp->v_flag |= VALIASED; 576 vput(vp); 577 } 578 return (NULLVP); 579 } 580 VOP_UNLOCK(vp); 581 vclean(vp, 0); 582 vp->v_op = nvp->v_op; 583 vp->v_tag = nvp->v_tag; 584 nvp->v_type = VNON; 585 insmntque(vp, mp); 586 return (vp); 587 } 588 589 /* 590 * Grab a particular vnode from the free list, increment its 591 * reference count and lock it. The vnode lock bit is set the 592 * vnode is being eliminated in vgone. The process is awakened 593 * when the transition is completed, and an error returned to 594 * indicate that the vnode is no longer usable (possibly having 595 * been changed to a new file system type). 596 */ 597 int 598 vget(vp, lockflag) 599 register struct vnode *vp; 600 int lockflag; 601 { 602 603 /* 604 * If the vnode is in the process of being cleaned out for 605 * another use, we wait for the cleaning to finish and then 606 * return failure. Cleaning is determined either by checking 607 * that the VXLOCK flag is set, or that the use count is 608 * zero with the back pointer set to show that it has been 609 * removed from the free list by getnewvnode. The VXLOCK 610 * flag may not have been set yet because vclean is blocked in 611 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 612 */ 613 if ((vp->v_flag & VXLOCK) || 614 (vp->v_usecount == 0 && 615 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 616 vp->v_flag |= VXWANT; 617 sleep((caddr_t)vp, PINOD); 618 return (1); 619 } 620 if (vp->v_usecount == 0) 621 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 622 vp->v_usecount++; 623 if (lockflag) 624 VOP_LOCK(vp); 625 return (0); 626 } 627 628 /* 629 * Vnode reference, just increment the count 630 */ 631 void 632 vref(vp) 633 struct vnode *vp; 634 { 635 636 if (vp->v_usecount <= 0) 637 panic("vref used where vget required"); 638 vp->v_usecount++; 639 } 640 641 /* 642 * vput(), just unlock and vrele() 643 */ 644 void 645 vput(vp) 646 register struct vnode *vp; 647 { 648 649 VOP_UNLOCK(vp); 650 vrele(vp); 651 } 652 653 /* 654 * Vnode release. 655 * If count drops to zero, call inactive routine and return to freelist. 656 */ 657 void 658 vrele(vp) 659 register struct vnode *vp; 660 { 661 662 #ifdef DIAGNOSTIC 663 if (vp == NULL) 664 panic("vrele: null vp"); 665 #endif 666 vp->v_usecount--; 667 if (vp->v_usecount > 0) 668 return; 669 #ifdef DIAGNOSTIC 670 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 671 vprint("vrele: bad ref count", vp); 672 panic("vrele: ref cnt"); 673 } 674 #endif 675 /* 676 * insert at tail of LRU list 677 */ 678 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 679 VOP_INACTIVE(vp); 680 } 681 682 /* 683 * Page or buffer structure gets a reference. 684 */ 685 void 686 vhold(vp) 687 register struct vnode *vp; 688 { 689 690 vp->v_holdcnt++; 691 } 692 693 /* 694 * Page or buffer structure frees a reference. 695 */ 696 void 697 holdrele(vp) 698 register struct vnode *vp; 699 { 700 701 if (vp->v_holdcnt <= 0) 702 panic("holdrele: holdcnt"); 703 vp->v_holdcnt--; 704 } 705 706 /* 707 * Remove any vnodes in the vnode table belonging to mount point mp. 708 * 709 * If MNT_NOFORCE is specified, there should not be any active ones, 710 * return error if any are found (nb: this is a user error, not a 711 * system error). If MNT_FORCE is specified, detach any active vnodes 712 * that are found. 713 */ 714 #ifdef DIAGNOSTIC 715 int busyprt = 0; /* print out busy vnodes */ 716 struct ctldebug debug1 = { "busyprt", &busyprt }; 717 #endif 718 719 int 720 vflush(mp, skipvp, flags) 721 struct mount *mp; 722 struct vnode *skipvp; 723 int flags; 724 { 725 register struct vnode *vp, *nvp; 726 int busy = 0; 727 728 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 729 panic("vflush: not busy"); 730 loop: 731 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 732 if (vp->v_mount != mp) 733 goto loop; 734 nvp = vp->v_mntvnodes.le_next; 735 /* 736 * Skip over a selected vnode. 737 */ 738 if (vp == skipvp) 739 continue; 740 /* 741 * Skip over a vnodes marked VSYSTEM. 742 */ 743 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 744 continue; 745 /* 746 * If WRITECLOSE is set, only flush out regular file 747 * vnodes open for writing. 748 */ 749 if ((flags & WRITECLOSE) && 750 (vp->v_writecount == 0 || vp->v_type != VREG)) 751 continue; 752 /* 753 * With v_usecount == 0, all we need to do is clear 754 * out the vnode data structures and we are done. 755 */ 756 if (vp->v_usecount == 0) { 757 vgone(vp); 758 continue; 759 } 760 /* 761 * If FORCECLOSE is set, forcibly close the vnode. 762 * For block or character devices, revert to an 763 * anonymous device. For all other files, just kill them. 764 */ 765 if (flags & FORCECLOSE) { 766 if (vp->v_type != VBLK && vp->v_type != VCHR) { 767 vgone(vp); 768 } else { 769 vclean(vp, 0); 770 vp->v_op = spec_vnodeop_p; 771 insmntque(vp, (struct mount *)0); 772 } 773 continue; 774 } 775 #ifdef DIAGNOSTIC 776 if (busyprt) 777 vprint("vflush: busy vnode", vp); 778 #endif 779 busy++; 780 } 781 if (busy) 782 return (EBUSY); 783 return (0); 784 } 785 786 /* 787 * Disassociate the underlying file system from a vnode. 788 */ 789 void 790 vclean(vp, flags) 791 register struct vnode *vp; 792 int flags; 793 { 794 int active; 795 796 /* 797 * Check to see if the vnode is in use. 798 * If so we have to reference it before we clean it out 799 * so that its count cannot fall to zero and generate a 800 * race against ourselves to recycle it. 801 */ 802 if (active = vp->v_usecount) 803 VREF(vp); 804 /* 805 * Even if the count is zero, the VOP_INACTIVE routine may still 806 * have the object locked while it cleans it out. The VOP_LOCK 807 * ensures that the VOP_INACTIVE routine is done with its work. 808 * For active vnodes, it ensures that no other activity can 809 * occur while the underlying object is being cleaned out. 810 */ 811 VOP_LOCK(vp); 812 /* 813 * Prevent the vnode from being recycled or 814 * brought into use while we clean it out. 815 */ 816 if (vp->v_flag & VXLOCK) 817 panic("vclean: deadlock"); 818 vp->v_flag |= VXLOCK; 819 /* 820 * Clean out any buffers associated with the vnode. 821 */ 822 if (flags & DOCLOSE) 823 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 824 /* 825 * Any other processes trying to obtain this lock must first 826 * wait for VXLOCK to clear, then call the new lock operation. 827 */ 828 VOP_UNLOCK(vp); 829 /* 830 * If purging an active vnode, it must be closed and 831 * deactivated before being reclaimed. 832 */ 833 if (active) { 834 if (flags & DOCLOSE) 835 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 836 VOP_INACTIVE(vp); 837 } 838 /* 839 * Reclaim the vnode. 840 */ 841 if (VOP_RECLAIM(vp)) 842 panic("vclean: cannot reclaim"); 843 if (active) 844 vrele(vp); 845 846 /* 847 * Done with purge, notify sleepers of the grim news. 848 */ 849 vp->v_op = dead_vnodeop_p; 850 vp->v_tag = VT_NON; 851 vp->v_flag &= ~VXLOCK; 852 if (vp->v_flag & VXWANT) { 853 vp->v_flag &= ~VXWANT; 854 wakeup((caddr_t)vp); 855 } 856 } 857 858 /* 859 * Eliminate all activity associated with the requested vnode 860 * and with all vnodes aliased to the requested vnode. 861 */ 862 void 863 vgoneall(vp) 864 register struct vnode *vp; 865 { 866 register struct vnode *vq; 867 868 if (vp->v_flag & VALIASED) { 869 /* 870 * If a vgone (or vclean) is already in progress, 871 * wait until it is done and return. 872 */ 873 if (vp->v_flag & VXLOCK) { 874 vp->v_flag |= VXWANT; 875 sleep((caddr_t)vp, PINOD); 876 return; 877 } 878 /* 879 * Ensure that vp will not be vgone'd while we 880 * are eliminating its aliases. 881 */ 882 vp->v_flag |= VXLOCK; 883 while (vp->v_flag & VALIASED) { 884 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 885 if (vq->v_rdev != vp->v_rdev || 886 vq->v_type != vp->v_type || vp == vq) 887 continue; 888 vgone(vq); 889 break; 890 } 891 } 892 /* 893 * Remove the lock so that vgone below will 894 * really eliminate the vnode after which time 895 * vgone will awaken any sleepers. 896 */ 897 vp->v_flag &= ~VXLOCK; 898 } 899 vgone(vp); 900 } 901 902 /* 903 * Eliminate all activity associated with a vnode 904 * in preparation for reuse. 905 */ 906 void 907 vgone(vp) 908 register struct vnode *vp; 909 { 910 register struct vnode *vq; 911 struct vnode *vx; 912 913 /* 914 * If a vgone (or vclean) is already in progress, 915 * wait until it is done and return. 916 */ 917 if (vp->v_flag & VXLOCK) { 918 vp->v_flag |= VXWANT; 919 sleep((caddr_t)vp, PINOD); 920 return; 921 } 922 /* 923 * Clean out the filesystem specific data. 924 */ 925 vclean(vp, DOCLOSE); 926 /* 927 * Delete from old mount point vnode list, if on one. 928 */ 929 if (vp->v_mount != NULL) { 930 LIST_REMOVE(vp, v_mntvnodes); 931 vp->v_mount = NULL; 932 } 933 /* 934 * If special device, remove it from special device alias list. 935 */ 936 if (vp->v_type == VBLK || vp->v_type == VCHR) { 937 if (*vp->v_hashchain == vp) { 938 *vp->v_hashchain = vp->v_specnext; 939 } else { 940 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 941 if (vq->v_specnext != vp) 942 continue; 943 vq->v_specnext = vp->v_specnext; 944 break; 945 } 946 if (vq == NULL) 947 panic("missing bdev"); 948 } 949 if (vp->v_flag & VALIASED) { 950 vx = NULL; 951 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 952 if (vq->v_rdev != vp->v_rdev || 953 vq->v_type != vp->v_type) 954 continue; 955 if (vx) 956 break; 957 vx = vq; 958 } 959 if (vx == NULL) 960 panic("missing alias"); 961 if (vq == NULL) 962 vx->v_flag &= ~VALIASED; 963 vp->v_flag &= ~VALIASED; 964 } 965 FREE(vp->v_specinfo, M_VNODE); 966 vp->v_specinfo = NULL; 967 } 968 /* 969 * If it is on the freelist and not already at the head, 970 * move it to the head of the list. The test of the back 971 * pointer and the reference count of zero is because 972 * it will be removed from the free list by getnewvnode, 973 * but will not have its reference count incremented until 974 * after calling vgone. If the reference count were 975 * incremented first, vgone would (incorrectly) try to 976 * close the previous instance of the underlying object. 977 * So, the back pointer is explicitly set to `0xdeadb' in 978 * getnewvnode after removing it from the freelist to ensure 979 * that we do not try to move it here. 980 */ 981 if (vp->v_usecount == 0 && 982 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 983 vnode_free_list.tqh_first != vp) { 984 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 985 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 986 } 987 vp->v_type = VBAD; 988 } 989 990 /* 991 * Lookup a vnode by device number. 992 */ 993 int 994 vfinddev(dev, type, vpp) 995 dev_t dev; 996 enum vtype type; 997 struct vnode **vpp; 998 { 999 register struct vnode *vp; 1000 1001 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1002 if (dev != vp->v_rdev || type != vp->v_type) 1003 continue; 1004 *vpp = vp; 1005 return (1); 1006 } 1007 return (0); 1008 } 1009 1010 /* 1011 * Calculate the total number of references to a special device. 1012 */ 1013 int 1014 vcount(vp) 1015 register struct vnode *vp; 1016 { 1017 register struct vnode *vq, *vnext; 1018 int count; 1019 1020 loop: 1021 if ((vp->v_flag & VALIASED) == 0) 1022 return (vp->v_usecount); 1023 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1024 vnext = vq->v_specnext; 1025 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1026 continue; 1027 /* 1028 * Alias, but not in use, so flush it out. 1029 */ 1030 if (vq->v_usecount == 0 && vq != vp) { 1031 vgone(vq); 1032 goto loop; 1033 } 1034 count += vq->v_usecount; 1035 } 1036 return (count); 1037 } 1038 1039 /* 1040 * Print out a description of a vnode. 1041 */ 1042 static char *typename[] = 1043 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1044 1045 void 1046 vprint(label, vp) 1047 char *label; 1048 register struct vnode *vp; 1049 { 1050 char buf[64]; 1051 1052 if (label != NULL) 1053 printf("%s: ", label); 1054 printf("type %s, usecount %d, writecount %d, refcount %d,", 1055 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1056 vp->v_holdcnt); 1057 buf[0] = '\0'; 1058 if (vp->v_flag & VROOT) 1059 strcat(buf, "|VROOT"); 1060 if (vp->v_flag & VTEXT) 1061 strcat(buf, "|VTEXT"); 1062 if (vp->v_flag & VSYSTEM) 1063 strcat(buf, "|VSYSTEM"); 1064 if (vp->v_flag & VXLOCK) 1065 strcat(buf, "|VXLOCK"); 1066 if (vp->v_flag & VXWANT) 1067 strcat(buf, "|VXWANT"); 1068 if (vp->v_flag & VBWAIT) 1069 strcat(buf, "|VBWAIT"); 1070 if (vp->v_flag & VALIASED) 1071 strcat(buf, "|VALIASED"); 1072 if (buf[0] != '\0') 1073 printf(" flags (%s)", &buf[1]); 1074 if (vp->v_data == NULL) { 1075 printf("\n"); 1076 } else { 1077 printf("\n\t"); 1078 VOP_PRINT(vp); 1079 } 1080 } 1081 1082 #ifdef DEBUG 1083 /* 1084 * List all of the locked vnodes in the system. 1085 * Called when debugging the kernel. 1086 */ 1087 void 1088 printlockedvnodes() 1089 { 1090 register struct mount *mp; 1091 register struct vnode *vp; 1092 1093 printf("Locked vnodes\n"); 1094 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 1095 for (vp = mp->mnt_vnodelist.lh_first; 1096 vp != NULL; 1097 vp = vp->v_mntvnodes.le_next) 1098 if (VOP_ISLOCKED(vp)) 1099 vprint((char *)0, vp); 1100 } 1101 } 1102 #endif 1103 1104 int kinfo_vdebug = 1; 1105 int kinfo_vgetfailed; 1106 #define KINFO_VNODESLOP 10 1107 /* 1108 * Dump vnode list (via sysctl). 1109 * Copyout address of vnode followed by vnode. 1110 */ 1111 /* ARGSUSED */ 1112 int 1113 sysctl_vnode(where, sizep) 1114 char *where; 1115 size_t *sizep; 1116 { 1117 register struct mount *mp, *nmp; 1118 struct vnode *vp; 1119 register char *bp = where, *savebp; 1120 char *ewhere; 1121 int error; 1122 1123 #define VPTRSZ sizeof (struct vnode *) 1124 #define VNODESZ sizeof (struct vnode) 1125 if (where == NULL) { 1126 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1127 return (0); 1128 } 1129 ewhere = where + *sizep; 1130 1131 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { 1132 nmp = mp->mnt_list.tqe_next; 1133 if (vfs_busy(mp)) 1134 continue; 1135 savebp = bp; 1136 again: 1137 for (vp = mp->mnt_vnodelist.lh_first; 1138 vp != NULL; 1139 vp = vp->v_mntvnodes.le_next) { 1140 /* 1141 * Check that the vp is still associated with 1142 * this filesystem. RACE: could have been 1143 * recycled onto the same filesystem. 1144 */ 1145 if (vp->v_mount != mp) { 1146 if (kinfo_vdebug) 1147 printf("kinfo: vp changed\n"); 1148 bp = savebp; 1149 goto again; 1150 } 1151 if (bp + VPTRSZ + VNODESZ > ewhere) { 1152 *sizep = bp - where; 1153 return (ENOMEM); 1154 } 1155 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1156 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1157 return (error); 1158 bp += VPTRSZ + VNODESZ; 1159 } 1160 vfs_unbusy(mp); 1161 } 1162 1163 *sizep = bp - where; 1164 return (0); 1165 } 1166 1167 /* 1168 * Check to see if a filesystem is mounted on a block device. 1169 */ 1170 int 1171 vfs_mountedon(vp) 1172 register struct vnode *vp; 1173 { 1174 register struct vnode *vq; 1175 1176 if (vp->v_specflags & SI_MOUNTEDON) 1177 return (EBUSY); 1178 if (vp->v_flag & VALIASED) { 1179 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1180 if (vq->v_rdev != vp->v_rdev || 1181 vq->v_type != vp->v_type) 1182 continue; 1183 if (vq->v_specflags & SI_MOUNTEDON) 1184 return (EBUSY); 1185 } 1186 } 1187 return (0); 1188 } 1189 1190 /* 1191 * Build hash lists of net addresses and hang them off the mount point. 1192 * Called by ufs_mount() to set up the lists of export addresses. 1193 */ 1194 static int 1195 vfs_hang_addrlist(mp, nep, argp) 1196 struct mount *mp; 1197 struct netexport *nep; 1198 struct export_args *argp; 1199 { 1200 register struct netcred *np; 1201 register struct radix_node_head *rnh; 1202 register int i; 1203 struct radix_node *rn; 1204 struct sockaddr *saddr, *smask = 0; 1205 struct domain *dom; 1206 int error; 1207 1208 if (argp->ex_addrlen == 0) { 1209 if (mp->mnt_flag & MNT_DEFEXPORTED) 1210 return (EPERM); 1211 np = &nep->ne_defexported; 1212 np->netc_exflags = argp->ex_flags; 1213 np->netc_anon = argp->ex_anon; 1214 np->netc_anon.cr_ref = 1; 1215 mp->mnt_flag |= MNT_DEFEXPORTED; 1216 return (0); 1217 } 1218 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1219 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1220 bzero((caddr_t)np, i); 1221 saddr = (struct sockaddr *)(np + 1); 1222 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1223 goto out; 1224 if (saddr->sa_len > argp->ex_addrlen) 1225 saddr->sa_len = argp->ex_addrlen; 1226 if (argp->ex_masklen) { 1227 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1228 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1229 if (error) 1230 goto out; 1231 if (smask->sa_len > argp->ex_masklen) 1232 smask->sa_len = argp->ex_masklen; 1233 } 1234 i = saddr->sa_family; 1235 if ((rnh = nep->ne_rtable[i]) == 0) { 1236 /* 1237 * Seems silly to initialize every AF when most are not 1238 * used, do so on demand here 1239 */ 1240 for (dom = domains; dom; dom = dom->dom_next) 1241 if (dom->dom_family == i && dom->dom_rtattach) { 1242 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1243 dom->dom_rtoffset); 1244 break; 1245 } 1246 if ((rnh = nep->ne_rtable[i]) == 0) { 1247 error = ENOBUFS; 1248 goto out; 1249 } 1250 } 1251 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1252 np->netc_rnodes); 1253 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1254 error = EPERM; 1255 goto out; 1256 } 1257 np->netc_exflags = argp->ex_flags; 1258 np->netc_anon = argp->ex_anon; 1259 np->netc_anon.cr_ref = 1; 1260 return (0); 1261 out: 1262 free(np, M_NETADDR); 1263 return (error); 1264 } 1265 1266 /* ARGSUSED */ 1267 static int 1268 vfs_free_netcred(rn, w) 1269 struct radix_node *rn; 1270 caddr_t w; 1271 { 1272 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1273 1274 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1275 free((caddr_t)rn, M_NETADDR); 1276 return (0); 1277 } 1278 1279 /* 1280 * Free the net address hash lists that are hanging off the mount points. 1281 */ 1282 static void 1283 vfs_free_addrlist(nep) 1284 struct netexport *nep; 1285 { 1286 register int i; 1287 register struct radix_node_head *rnh; 1288 1289 for (i = 0; i <= AF_MAX; i++) 1290 if (rnh = nep->ne_rtable[i]) { 1291 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1292 (caddr_t)rnh); 1293 free((caddr_t)rnh, M_RTABLE); 1294 nep->ne_rtable[i] = 0; 1295 } 1296 } 1297 1298 int 1299 vfs_export(mp, nep, argp) 1300 struct mount *mp; 1301 struct netexport *nep; 1302 struct export_args *argp; 1303 { 1304 int error; 1305 1306 if (argp->ex_flags & MNT_DELEXPORT) { 1307 vfs_free_addrlist(nep); 1308 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1309 } 1310 if (argp->ex_flags & MNT_EXPORTED) { 1311 if (error = vfs_hang_addrlist(mp, nep, argp)) 1312 return (error); 1313 mp->mnt_flag |= MNT_EXPORTED; 1314 } 1315 return (0); 1316 } 1317 1318 struct netcred * 1319 vfs_export_lookup(mp, nep, nam) 1320 register struct mount *mp; 1321 struct netexport *nep; 1322 struct mbuf *nam; 1323 { 1324 register struct netcred *np; 1325 register struct radix_node_head *rnh; 1326 struct sockaddr *saddr; 1327 1328 np = NULL; 1329 if (mp->mnt_flag & MNT_EXPORTED) { 1330 /* 1331 * Lookup in the export list first. 1332 */ 1333 if (nam != NULL) { 1334 saddr = mtod(nam, struct sockaddr *); 1335 rnh = nep->ne_rtable[saddr->sa_family]; 1336 if (rnh != NULL) { 1337 np = (struct netcred *) 1338 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1339 rnh); 1340 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1341 np = NULL; 1342 } 1343 } 1344 /* 1345 * If no address match, use the default if it exists. 1346 */ 1347 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1348 np = &nep->ne_defexported; 1349 } 1350 return (np); 1351 } 1352