1 /* 2 * Copyright (c) 1989, 1991, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 34 * $Id: ffs_vfsops.c,v 1.86 1998/09/07 13:17:06 bde Exp $ 35 */ 36 37 #include "opt_quota.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/namei.h> 42 #include <sys/proc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/mount.h> 46 #include <sys/buf.h> 47 #include <sys/conf.h> 48 #include <sys/fcntl.h> 49 #include <sys/disklabel.h> 50 #include <sys/malloc.h> 51 52 #include <miscfs/specfs/specdev.h> 53 54 #include <ufs/ufs/quota.h> 55 #include <ufs/ufs/ufsmount.h> 56 #include <ufs/ufs/inode.h> 57 #include <ufs/ufs/ufs_extern.h> 58 59 #include <ufs/ffs/fs.h> 60 #include <ufs/ffs/ffs_extern.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_prot.h> 64 #include <vm/vm_page.h> 65 66 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part"); 67 68 static int ffs_sbupdate __P((struct ufsmount *, int)); 69 static int ffs_reload __P((struct mount *,struct ucred *,struct proc *)); 70 static int ffs_oldfscompat __P((struct fs *)); 71 static int ffs_mount __P((struct mount *, char *, caddr_t, 72 struct nameidata *, struct proc *)); 73 static int ffs_init __P((struct vfsconf *)); 74 75 static struct vfsops ufs_vfsops = { 76 ffs_mount, 77 ufs_start, 78 ffs_unmount, 79 ufs_root, 80 ufs_quotactl, 81 ffs_statfs, 82 ffs_sync, 83 ffs_vget, 84 ffs_fhtovp, 85 ffs_vptofh, 86 ffs_init, 87 }; 88 89 VFS_SET(ufs_vfsops, ufs, 0); 90 91 /* 92 * ffs_mount 93 * 94 * Called when mounting local physical media 95 * 96 * PARAMETERS: 97 * mountroot 98 * mp mount point structure 99 * path NULL (flag for root mount!!!) 100 * data <unused> 101 * ndp <unused> 102 * p process (user credentials check [statfs]) 103 * 104 * mount 105 * mp mount point structure 106 * path path to mount point 107 * data pointer to argument struct in user space 108 * ndp mount point namei() return (used for 109 * credentials on reload), reused to look 110 * up block device. 111 * p process (user credentials check) 112 * 113 * RETURNS: 0 Success 114 * !0 error number (errno.h) 115 * 116 * LOCK STATE: 117 * 118 * ENTRY 119 * mount point is locked 120 * EXIT 121 * mount point is locked 122 * 123 * NOTES: 124 * A NULL path can be used for a flag since the mount 125 * system call will fail with EFAULT in copyinstr in 126 * namei() if it is a genuine NULL from the user. 127 */ 128 static int 129 ffs_mount( mp, path, data, ndp, p) 130 struct mount *mp; /* mount struct pointer*/ 131 char *path; /* path to mount point*/ 132 caddr_t data; /* arguments to FS specific mount*/ 133 struct nameidata *ndp; /* mount point credentials*/ 134 struct proc *p; /* process requesting mount*/ 135 { 136 size_t size; 137 int err = 0; 138 struct vnode *devvp; 139 140 struct ufs_args args; 141 struct ufsmount *ump = 0; 142 register struct fs *fs; 143 int error, flags; 144 mode_t accessmode; 145 int ronly = 0; 146 147 /* 148 * Use NULL path to flag a root mount 149 */ 150 if( path == NULL) { 151 /* 152 *** 153 * Mounting root file system 154 *** 155 */ 156 157 if ((err = bdevvp(rootdev, &rootvp))) { 158 printf("ffs_mountroot: can't find rootvp"); 159 return (err); 160 } 161 162 if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR) 163 mp->mnt_flag |= MNT_NOCLUSTERR; 164 if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERW) 165 mp->mnt_flag |= MNT_NOCLUSTERW; 166 if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) { 167 /* fs specific cleanup (if any)*/ 168 goto error_1; 169 } 170 171 goto dostatfs; /* success*/ 172 173 } 174 175 /* 176 *** 177 * Mounting non-root file system or updating a file system 178 *** 179 */ 180 181 /* copy in user arguments*/ 182 err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)); 183 if (err) 184 goto error_1; /* can't get arguments*/ 185 186 /* 187 * If updating, check whether changing from read-only to 188 * read/write; if there is no device name, that's all we do. 189 * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags, 190 * if block device requests. 191 */ 192 if (mp->mnt_flag & MNT_UPDATE) { 193 ump = VFSTOUFS(mp); 194 fs = ump->um_fs; 195 devvp = ump->um_devvp; 196 err = 0; 197 ronly = fs->fs_ronly; /* MNT_RELOAD might change this */ 198 if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERR) 199 mp->mnt_flag |= MNT_NOCLUSTERR; 200 if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERW) 201 mp->mnt_flag |= MNT_NOCLUSTERW; 202 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 203 flags = WRITECLOSE; 204 if (mp->mnt_flag & MNT_FORCE) 205 flags |= FORCECLOSE; 206 if (mp->mnt_flag & MNT_SOFTDEP) { 207 err = softdep_flushfiles(mp, flags, p); 208 } else { 209 err = ffs_flushfiles(mp, flags, p); 210 } 211 } 212 if (!err && (mp->mnt_flag & MNT_RELOAD)) 213 err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p); 214 if (err) { 215 goto error_1; 216 } 217 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 218 if (!fs->fs_clean) { 219 if (mp->mnt_flag & MNT_FORCE) { 220 printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt); 221 } else { 222 printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n", 223 fs->fs_fsmnt); 224 err = EPERM; 225 goto error_1; 226 } 227 } 228 229 /* 230 * If upgrade to read-write by non-root, then verify 231 * that user has necessary permissions on the device. 232 */ 233 if (p->p_ucred->cr_uid != 0) { 234 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 235 if (error = VOP_ACCESS(devvp, VREAD | VWRITE, 236 p->p_ucred, p)) { 237 VOP_UNLOCK(devvp, 0, p); 238 return (error); 239 } 240 VOP_UNLOCK(devvp, 0, p); 241 } 242 243 /* check to see if we need to start softdep */ 244 if (fs->fs_flags & FS_DOSOFTDEP) { 245 err = softdep_mount(devvp, mp, fs, p->p_ucred); 246 if (err) 247 goto error_1; 248 } 249 250 ronly = 0; 251 } 252 /* 253 * Soft updates is incompatible with "async", 254 * so if we are doing softupdates stop the user 255 * from setting the async flag in an update. 256 * Softdep_mount() clears it in an initial mount 257 * or ro->rw remount. 258 */ 259 if (mp->mnt_flag & MNT_SOFTDEP) { 260 mp->mnt_flag &= ~MNT_ASYNC; 261 } 262 /* if not updating name...*/ 263 if (args.fspec == 0) { 264 /* 265 * Process export requests. Jumping to "success" 266 * will return the vfs_export() error code. 267 */ 268 err = vfs_export(mp, &ump->um_export, &args.export); 269 goto success; 270 } 271 } 272 273 /* 274 * Not an update, or updating the name: look up the name 275 * and verify that it refers to a sensible block device. 276 */ 277 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); 278 err = namei(ndp); 279 if (err) { 280 /* can't get devvp!*/ 281 goto error_1; 282 } 283 284 devvp = ndp->ni_vp; 285 286 if (devvp->v_type != VBLK) { 287 err = ENOTBLK; 288 goto error_2; 289 } 290 if (major(devvp->v_rdev) >= nblkdev) { 291 err = ENXIO; 292 goto error_2; 293 } 294 295 /* 296 * If mount by non-root, then verify that user has necessary 297 * permissions on the device. 298 */ 299 if (p->p_ucred->cr_uid != 0) { 300 accessmode = VREAD; 301 if ((mp->mnt_flag & MNT_RDONLY) == 0) 302 accessmode |= VWRITE; 303 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 304 if (error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) { 305 vput(devvp); 306 return (error); 307 } 308 VOP_UNLOCK(devvp, 0, p); 309 } 310 311 if (mp->mnt_flag & MNT_UPDATE) { 312 /* 313 ******************** 314 * UPDATE 315 * If it's not the same vnode, or at least the same device 316 * then it's not correct. 317 ******************** 318 */ 319 320 if (devvp != ump->um_devvp) { 321 if ( devvp->v_rdev == ump->um_devvp->v_rdev) { 322 vrele(devvp); 323 } else { 324 err = EINVAL; /* needs translation */ 325 } 326 } else 327 vrele(devvp); 328 /* 329 * Update device name only on success 330 */ 331 if( !err) { 332 /* Save "mounted from" info for mount point (NULL pad)*/ 333 copyinstr( args.fspec, 334 mp->mnt_stat.f_mntfromname, 335 MNAMELEN - 1, 336 &size); 337 bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 338 } 339 } else { 340 /* 341 ******************** 342 * NEW MOUNT 343 ******************** 344 */ 345 346 if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR) 347 mp->mnt_flag |= MNT_NOCLUSTERR; 348 if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERW) 349 mp->mnt_flag |= MNT_NOCLUSTERW; 350 351 /* 352 * Since this is a new mount, we want the names for 353 * the device and the mount point copied in. If an 354 * error occurs, the mountpoint is discarded by the 355 * upper level code. 356 */ 357 /* Save "last mounted on" info for mount point (NULL pad)*/ 358 copyinstr( path, /* mount point*/ 359 mp->mnt_stat.f_mntonname, /* save area*/ 360 MNAMELEN - 1, /* max size*/ 361 &size); /* real size*/ 362 bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size); 363 364 /* Save "mounted from" info for mount point (NULL pad)*/ 365 copyinstr( args.fspec, /* device name*/ 366 mp->mnt_stat.f_mntfromname, /* save area*/ 367 MNAMELEN - 1, /* max size*/ 368 &size); /* real size*/ 369 bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 370 371 err = ffs_mountfs(devvp, mp, p, M_FFSNODE); 372 } 373 if (err) { 374 goto error_2; 375 } 376 377 dostatfs: 378 /* 379 * Initialize FS stat information in mount struct; uses both 380 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname 381 * 382 * This code is common to root and non-root mounts 383 */ 384 (void)VFS_STATFS(mp, &mp->mnt_stat, p); 385 386 goto success; 387 388 389 error_2: /* error with devvp held*/ 390 391 /* release devvp before failing*/ 392 vrele(devvp); 393 394 error_1: /* no state to back out*/ 395 396 success: 397 if (!err && path && (mp->mnt_flag & MNT_UPDATE)) { 398 /* update superblock after ro -> rw update */ 399 fs = ump->um_fs; 400 if (!ronly && fs->fs_ronly) { 401 fs->fs_ronly = 0; 402 fs->fs_clean = 0; 403 ffs_sbupdate(ump, MNT_WAIT); 404 } 405 } 406 return (err); 407 } 408 409 /* 410 * Reload all incore data for a filesystem (used after running fsck on 411 * the root filesystem and finding things to fix). The filesystem must 412 * be mounted read-only. 413 * 414 * Things to do to update the mount: 415 * 1) invalidate all cached meta-data. 416 * 2) re-read superblock from disk. 417 * 3) re-read summary information from disk. 418 * 4) invalidate all inactive vnodes. 419 * 5) invalidate all cached file data. 420 * 6) re-read inode data for all active vnodes. 421 */ 422 static int 423 ffs_reload(mp, cred, p) 424 register struct mount *mp; 425 struct ucred *cred; 426 struct proc *p; 427 { 428 register struct vnode *vp, *nvp, *devvp; 429 struct inode *ip; 430 struct csum *space; 431 struct buf *bp; 432 struct fs *fs, *newfs; 433 struct partinfo dpart; 434 dev_t dev; 435 int i, blks, size, error; 436 int32_t *lp; 437 438 if ((mp->mnt_flag & MNT_RDONLY) == 0) 439 return (EINVAL); 440 /* 441 * Step 1: invalidate all cached meta-data. 442 */ 443 devvp = VFSTOUFS(mp)->um_devvp; 444 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 445 error = vinvalbuf(devvp, 0, cred, p, 0, 0); 446 VOP_UNLOCK(devvp, 0, p); 447 if (error) 448 panic("ffs_reload: dirty1"); 449 450 dev = devvp->v_rdev; 451 /* 452 * Only VMIO the backing device if the backing device is a real 453 * block device. This excludes the original MFS implementation. 454 * Note that it is optional that the backing device be VMIOed. This 455 * increases the opportunity for metadata caching. 456 */ 457 if ((devvp->v_type == VBLK) && (major(dev) < nblkdev)) { 458 simple_lock(&devvp->v_interlock); 459 vfs_object_create(devvp, p, p->p_ucred, 0); 460 } 461 462 /* 463 * Step 2: re-read superblock from disk. 464 */ 465 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) 466 size = DEV_BSIZE; 467 else 468 size = dpart.disklab->d_secsize; 469 if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) 470 return (error); 471 newfs = (struct fs *)bp->b_data; 472 if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE || 473 newfs->fs_bsize < sizeof(struct fs)) { 474 brelse(bp); 475 return (EIO); /* XXX needs translation */ 476 } 477 fs = VFSTOUFS(mp)->um_fs; 478 /* 479 * Copy pointer fields back into superblock before copying in XXX 480 * new superblock. These should really be in the ufsmount. XXX 481 * Note that important parameters (eg fs_ncg) are unchanged. 482 */ 483 bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp)); 484 newfs->fs_maxcluster = fs->fs_maxcluster; 485 bcopy(newfs, fs, (u_int)fs->fs_sbsize); 486 if (fs->fs_sbsize < SBSIZE) 487 bp->b_flags |= B_INVAL; 488 brelse(bp); 489 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; 490 ffs_oldfscompat(fs); 491 492 /* 493 * Step 3: re-read summary information from disk. 494 */ 495 blks = howmany(fs->fs_cssize, fs->fs_fsize); 496 space = fs->fs_csp[0]; 497 for (i = 0; i < blks; i += fs->fs_frag) { 498 size = fs->fs_bsize; 499 if (i + fs->fs_frag > blks) 500 size = (blks - i) * fs->fs_fsize; 501 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 502 NOCRED, &bp); 503 if (error) 504 return (error); 505 bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size); 506 brelse(bp); 507 } 508 /* 509 * We no longer know anything about clusters per cylinder group. 510 */ 511 if (fs->fs_contigsumsize > 0) { 512 lp = fs->fs_maxcluster; 513 for (i = 0; i < fs->fs_ncg; i++) 514 *lp++ = fs->fs_contigsumsize; 515 } 516 517 loop: 518 simple_lock(&mntvnode_slock); 519 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 520 if (vp->v_mount != mp) { 521 simple_unlock(&mntvnode_slock); 522 goto loop; 523 } 524 nvp = vp->v_mntvnodes.le_next; 525 /* 526 * Step 4: invalidate all inactive vnodes. 527 */ 528 if (vrecycle(vp, &mntvnode_slock, p)) 529 goto loop; 530 /* 531 * Step 5: invalidate all cached file data. 532 */ 533 simple_lock(&vp->v_interlock); 534 simple_unlock(&mntvnode_slock); 535 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 536 goto loop; 537 } 538 if (vinvalbuf(vp, 0, cred, p, 0, 0)) 539 panic("ffs_reload: dirty2"); 540 /* 541 * Step 6: re-read inode data for all active vnodes. 542 */ 543 ip = VTOI(vp); 544 error = 545 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 546 (int)fs->fs_bsize, NOCRED, &bp); 547 if (error) { 548 vput(vp); 549 return (error); 550 } 551 ip->i_din = *((struct dinode *)bp->b_data + 552 ino_to_fsbo(fs, ip->i_number)); 553 ip->i_effnlink = ip->i_nlink; 554 brelse(bp); 555 vput(vp); 556 simple_lock(&mntvnode_slock); 557 } 558 simple_unlock(&mntvnode_slock); 559 return (0); 560 } 561 562 /* 563 * Common code for mount and mountroot 564 */ 565 int 566 ffs_mountfs(devvp, mp, p, malloctype) 567 register struct vnode *devvp; 568 struct mount *mp; 569 struct proc *p; 570 struct malloc_type *malloctype; 571 { 572 register struct ufsmount *ump; 573 struct buf *bp; 574 register struct fs *fs; 575 struct cg *cgp; 576 dev_t dev; 577 struct partinfo dpart; 578 struct csum cstotal; 579 caddr_t base, space; 580 int error, i, cyl, blks, size, ronly; 581 int32_t *lp; 582 struct ucred *cred; 583 u_int64_t maxfilesize; /* XXX */ 584 size_t strsize; 585 int ncount; 586 587 dev = devvp->v_rdev; 588 cred = p ? p->p_ucred : NOCRED; 589 /* 590 * Disallow multiple mounts of the same device. 591 * Disallow mounting of a device that is currently in use 592 * (except for root, which might share swap device for miniroot). 593 * Flush out any old buffers remaining from a previous use. 594 */ 595 error = vfs_mountedon(devvp); 596 if (error) 597 return (error); 598 ncount = vcount(devvp); 599 600 if (ncount > 1 && devvp != rootvp) 601 return (EBUSY); 602 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 603 error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0); 604 VOP_UNLOCK(devvp, 0, p); 605 if (error) 606 return (error); 607 608 /* 609 * Only VMIO the backing device if the backing device is a real 610 * block device. This excludes the original MFS implementation. 611 * Note that it is optional that the backing device be VMIOed. This 612 * increases the opportunity for metadata caching. 613 */ 614 if ((devvp->v_type == VBLK) && (major(dev) < nblkdev)) { 615 simple_lock(&devvp->v_interlock); 616 vfs_object_create(devvp, p, p->p_ucred, 0); 617 } 618 619 620 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 621 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); 622 if (error) 623 return (error); 624 625 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0) 626 size = DEV_BSIZE; 627 else 628 size = dpart.disklab->d_secsize; 629 630 bp = NULL; 631 ump = NULL; 632 if (error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) 633 goto out; 634 fs = (struct fs *)bp->b_data; 635 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || 636 fs->fs_bsize < sizeof(struct fs)) { 637 error = EINVAL; /* XXX needs translation */ 638 goto out; 639 } 640 fs->fs_fmod = 0; 641 if (!fs->fs_clean) { 642 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 643 printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt); 644 } else { 645 printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt); 646 error = EPERM; 647 goto out; 648 } 649 } 650 /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */ 651 if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) { 652 error = EROFS; /* needs translation */ 653 goto out; 654 } 655 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK); 656 bzero((caddr_t)ump, sizeof *ump); 657 ump->um_malloctype = malloctype; 658 ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, 659 M_WAITOK); 660 ump->um_blkatoff = ffs_blkatoff; 661 ump->um_truncate = ffs_truncate; 662 ump->um_update = ffs_update; 663 ump->um_valloc = ffs_valloc; 664 ump->um_vfree = ffs_vfree; 665 bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); 666 if (fs->fs_sbsize < SBSIZE) 667 bp->b_flags |= B_INVAL; 668 brelse(bp); 669 bp = NULL; 670 fs = ump->um_fs; 671 fs->fs_ronly = ronly; 672 if (ronly == 0) { 673 fs->fs_fmod = 1; 674 fs->fs_clean = 0; 675 } 676 size = fs->fs_cssize; 677 blks = howmany(size, fs->fs_fsize); 678 if (fs->fs_contigsumsize > 0) 679 size += fs->fs_ncg * sizeof(int32_t); 680 base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 681 for (i = 0; i < blks; i += fs->fs_frag) { 682 size = fs->fs_bsize; 683 if (i + fs->fs_frag > blks) 684 size = (blks - i) * fs->fs_fsize; 685 if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 686 cred, &bp)) { 687 free(base, M_UFSMNT); 688 goto out; 689 } 690 bcopy(bp->b_data, space, (u_int)size); 691 fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space; 692 space += size; 693 brelse(bp); 694 bp = NULL; 695 } 696 if (fs->fs_contigsumsize > 0) { 697 fs->fs_maxcluster = lp = (int32_t *)space; 698 for (i = 0; i < fs->fs_ncg; i++) 699 *lp++ = fs->fs_contigsumsize; 700 } 701 mp->mnt_data = (qaddr_t)ump; 702 mp->mnt_stat.f_fsid.val[0] = (long)dev; 703 if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0) 704 mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; 705 else 706 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 707 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; 708 mp->mnt_flag |= MNT_LOCAL; 709 ump->um_mountp = mp; 710 ump->um_dev = dev; 711 ump->um_devvp = devvp; 712 ump->um_nindir = fs->fs_nindir; 713 ump->um_bptrtodb = fs->fs_fsbtodb; 714 ump->um_seqinc = fs->fs_frag; 715 for (i = 0; i < MAXQUOTAS; i++) 716 ump->um_quotas[i] = NULLVP; 717 devvp->v_specmountpoint = mp; 718 ffs_oldfscompat(fs); 719 720 /* 721 * Set FS local "last mounted on" information (NULL pad) 722 */ 723 copystr( mp->mnt_stat.f_mntonname, /* mount point*/ 724 fs->fs_fsmnt, /* copy area*/ 725 sizeof(fs->fs_fsmnt) - 1, /* max size*/ 726 &strsize); /* real size*/ 727 bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize); 728 729 if( mp->mnt_flag & MNT_ROOTFS) { 730 /* 731 * Root mount; update timestamp in mount structure. 732 * this will be used by the common root mount code 733 * to update the system clock. 734 */ 735 mp->mnt_time = fs->fs_time; 736 } 737 738 ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */ 739 maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */ 740 if (fs->fs_maxfilesize > maxfilesize) /* XXX */ 741 fs->fs_maxfilesize = maxfilesize; /* XXX */ 742 if (ronly == 0) { 743 if ((fs->fs_flags & FS_DOSOFTDEP) && 744 (error = softdep_mount(devvp, mp, fs, cred)) != 0) { 745 free(base, M_UFSMNT); 746 goto out; 747 } 748 fs->fs_clean = 0; 749 (void) ffs_sbupdate(ump, MNT_WAIT); 750 } 751 return (0); 752 out: 753 devvp->v_specmountpoint = NULL; 754 if (bp) 755 brelse(bp); 756 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); 757 if (ump) { 758 free(ump->um_fs, M_UFSMNT); 759 free(ump, M_UFSMNT); 760 mp->mnt_data = (qaddr_t)0; 761 } 762 return (error); 763 } 764 765 /* 766 * Sanity checks for old file systems. 767 * 768 * XXX - goes away some day. 769 */ 770 static int 771 ffs_oldfscompat(fs) 772 struct fs *fs; 773 { 774 775 fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */ 776 fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */ 777 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ 778 fs->fs_nrpos = 8; /* XXX */ 779 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 780 #if 0 781 int i; /* XXX */ 782 u_int64_t sizepb = fs->fs_bsize; /* XXX */ 783 /* XXX */ 784 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */ 785 for (i = 0; i < NIADDR; i++) { /* XXX */ 786 sizepb *= NINDIR(fs); /* XXX */ 787 fs->fs_maxfilesize += sizepb; /* XXX */ 788 } /* XXX */ 789 #endif 790 fs->fs_maxfilesize = (u_quad_t) 1LL << 39; 791 fs->fs_qbmask = ~fs->fs_bmask; /* XXX */ 792 fs->fs_qfmask = ~fs->fs_fmask; /* XXX */ 793 } /* XXX */ 794 return (0); 795 } 796 797 /* 798 * unmount system call 799 */ 800 int 801 ffs_unmount(mp, mntflags, p) 802 struct mount *mp; 803 int mntflags; 804 struct proc *p; 805 { 806 register struct ufsmount *ump; 807 register struct fs *fs; 808 int error, flags; 809 810 flags = 0; 811 if (mntflags & MNT_FORCE) { 812 flags |= FORCECLOSE; 813 } 814 if (mp->mnt_flag & MNT_SOFTDEP) { 815 if ((error = softdep_flushfiles(mp, flags, p)) != 0) 816 return (error); 817 } else { 818 if ((error = ffs_flushfiles(mp, flags, p)) != 0) 819 return (error); 820 } 821 ump = VFSTOUFS(mp); 822 fs = ump->um_fs; 823 if (fs->fs_ronly == 0) { 824 fs->fs_clean = 1; 825 error = ffs_sbupdate(ump, MNT_WAIT); 826 if (error) { 827 fs->fs_clean = 0; 828 return (error); 829 } 830 } 831 ump->um_devvp->v_specmountpoint = NULL; 832 833 vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0); 834 error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, 835 NOCRED, p); 836 837 vrele(ump->um_devvp); 838 839 free(fs->fs_csp[0], M_UFSMNT); 840 free(fs, M_UFSMNT); 841 free(ump, M_UFSMNT); 842 mp->mnt_data = (qaddr_t)0; 843 mp->mnt_flag &= ~MNT_LOCAL; 844 return (error); 845 } 846 847 /* 848 * Flush out all the files in a filesystem. 849 */ 850 int 851 ffs_flushfiles(mp, flags, p) 852 register struct mount *mp; 853 int flags; 854 struct proc *p; 855 { 856 register struct ufsmount *ump; 857 int error; 858 859 ump = VFSTOUFS(mp); 860 #ifdef QUOTA 861 if (mp->mnt_flag & MNT_QUOTA) { 862 int i; 863 error = vflush(mp, NULLVP, SKIPSYSTEM|flags); 864 if (error) 865 return (error); 866 for (i = 0; i < MAXQUOTAS; i++) { 867 if (ump->um_quotas[i] == NULLVP) 868 continue; 869 quotaoff(p, mp, i); 870 } 871 /* 872 * Here we fall through to vflush again to ensure 873 * that we have gotten rid of all the system vnodes. 874 */ 875 } 876 #endif 877 /* 878 * Flush all the files. 879 */ 880 if ((error = vflush(mp, NULL, flags)) != 0) 881 return (error); 882 /* 883 * Flush filesystem metadata. 884 */ 885 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); 886 error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p); 887 VOP_UNLOCK(ump->um_devvp, 0, p); 888 return (error); 889 } 890 891 /* 892 * Get file system statistics. 893 */ 894 int 895 ffs_statfs(mp, sbp, p) 896 struct mount *mp; 897 register struct statfs *sbp; 898 struct proc *p; 899 { 900 register struct ufsmount *ump; 901 register struct fs *fs; 902 903 ump = VFSTOUFS(mp); 904 fs = ump->um_fs; 905 if (fs->fs_magic != FS_MAGIC) 906 panic("ffs_statfs"); 907 sbp->f_bsize = fs->fs_fsize; 908 sbp->f_iosize = fs->fs_bsize; 909 sbp->f_blocks = fs->fs_dsize; 910 sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + 911 fs->fs_cstotal.cs_nffree; 912 sbp->f_bavail = freespace(fs, fs->fs_minfree); 913 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; 914 sbp->f_ffree = fs->fs_cstotal.cs_nifree; 915 if (sbp != &mp->mnt_stat) { 916 sbp->f_type = mp->mnt_vfc->vfc_typenum; 917 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 918 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 919 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 920 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 921 } 922 return (0); 923 } 924 925 /* 926 * Go through the disk queues to initiate sandbagged IO; 927 * go through the inodes to write those that have been modified; 928 * initiate the writing of the super block if it has been modified. 929 * 930 * Note: we are always called with the filesystem marked `MPBUSY'. 931 */ 932 int 933 ffs_sync(mp, waitfor, cred, p) 934 struct mount *mp; 935 int waitfor; 936 struct ucred *cred; 937 struct proc *p; 938 { 939 struct vnode *nvp, *vp; 940 struct inode *ip; 941 struct ufsmount *ump = VFSTOUFS(mp); 942 struct fs *fs; 943 struct timeval tv; 944 int error, allerror = 0; 945 946 fs = ump->um_fs; 947 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ 948 printf("fs = %s\n", fs->fs_fsmnt); 949 panic("ffs_sync: rofs mod"); 950 } 951 /* 952 * Write back each (modified) inode. 953 */ 954 simple_lock(&mntvnode_slock); 955 loop: 956 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 957 /* 958 * If the vnode that we are about to sync is no longer 959 * associated with this mount point, start over. 960 */ 961 if (vp->v_mount != mp) 962 goto loop; 963 simple_lock(&vp->v_interlock); 964 nvp = vp->v_mntvnodes.le_next; 965 ip = VTOI(vp); 966 if ((vp->v_type == VNON) || ((ip->i_flag & 967 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && 968 ((vp->v_dirtyblkhd.lh_first == NULL) || (waitfor == MNT_LAZY))) { 969 simple_unlock(&vp->v_interlock); 970 continue; 971 } 972 if (vp->v_type != VCHR) { 973 simple_unlock(&mntvnode_slock); 974 error = 975 vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); 976 if (error) { 977 simple_lock(&mntvnode_slock); 978 if (error == ENOENT) 979 goto loop; 980 continue; 981 } 982 if (error = VOP_FSYNC(vp, cred, waitfor, p)) 983 allerror = error; 984 VOP_UNLOCK(vp, 0, p); 985 vrele(vp); 986 simple_lock(&mntvnode_slock); 987 } else { 988 simple_unlock(&mntvnode_slock); 989 simple_unlock(&vp->v_interlock); 990 getmicrotime(&tv); 991 /* UFS_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */ 992 UFS_UPDATE(vp, &tv, &tv, 0); 993 simple_lock(&mntvnode_slock); 994 } 995 } 996 simple_unlock(&mntvnode_slock); 997 /* 998 * Force stale file system control information to be flushed. 999 */ 1000 if (waitfor != MNT_LAZY) { 1001 if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) 1002 waitfor = MNT_NOWAIT; 1003 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); 1004 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) 1005 allerror = error; 1006 VOP_UNLOCK(ump->um_devvp, 0, p); 1007 } 1008 #ifdef QUOTA 1009 qsync(mp); 1010 #endif 1011 /* 1012 * Write back modified superblock. 1013 */ 1014 if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) 1015 allerror = error; 1016 return (allerror); 1017 } 1018 1019 /* 1020 * Look up a FFS dinode number to find its incore vnode, otherwise read it 1021 * in from disk. If it is in core, wait for the lock bit to clear, then 1022 * return the inode locked. Detection and handling of mount points must be 1023 * done by the calling routine. 1024 */ 1025 static int ffs_inode_hash_lock; 1026 1027 int 1028 ffs_vget(mp, ino, vpp) 1029 struct mount *mp; 1030 ino_t ino; 1031 struct vnode **vpp; 1032 { 1033 struct fs *fs; 1034 struct inode *ip; 1035 struct ufsmount *ump; 1036 struct buf *bp; 1037 struct vnode *vp; 1038 dev_t dev; 1039 int error; 1040 1041 ump = VFSTOUFS(mp); 1042 dev = ump->um_dev; 1043 restart: 1044 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) { 1045 return (0); 1046 } 1047 1048 /* 1049 * Lock out the creation of new entries in the FFS hash table in 1050 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 1051 * may occur! 1052 */ 1053 if (ffs_inode_hash_lock) { 1054 while (ffs_inode_hash_lock) { 1055 ffs_inode_hash_lock = -1; 1056 tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0); 1057 } 1058 goto restart; 1059 } 1060 ffs_inode_hash_lock = 1; 1061 1062 /* 1063 * If this MALLOC() is performed after the getnewvnode() 1064 * it might block, leaving a vnode with a NULL v_data to be 1065 * found by ffs_sync() if a sync happens to fire right then, 1066 * which will cause a panic because ffs_sync() blindly 1067 * dereferences vp->v_data (as well it should). 1068 */ 1069 MALLOC(ip, struct inode *, sizeof(struct inode), 1070 ump->um_malloctype, M_WAITOK); 1071 1072 /* Allocate a new vnode/inode. */ 1073 error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp); 1074 if (error) { 1075 if (ffs_inode_hash_lock < 0) 1076 wakeup(&ffs_inode_hash_lock); 1077 ffs_inode_hash_lock = 0; 1078 *vpp = NULL; 1079 FREE(ip, ump->um_malloctype); 1080 return (error); 1081 } 1082 bzero((caddr_t)ip, sizeof(struct inode)); 1083 lockinit(&ip->i_lock, PINOD, "inode", 0, 0); 1084 vp->v_data = ip; 1085 ip->i_vnode = vp; 1086 ip->i_fs = fs = ump->um_fs; 1087 ip->i_dev = dev; 1088 ip->i_number = ino; 1089 #ifdef QUOTA 1090 { 1091 int i; 1092 for (i = 0; i < MAXQUOTAS; i++) 1093 ip->i_dquot[i] = NODQUOT; 1094 } 1095 #endif 1096 /* 1097 * Put it onto its hash chain and lock it so that other requests for 1098 * this inode will block if they arrive while we are sleeping waiting 1099 * for old data structures to be purged or for the contents of the 1100 * disk portion of this inode to be read. 1101 */ 1102 ufs_ihashins(ip); 1103 1104 if (ffs_inode_hash_lock < 0) 1105 wakeup(&ffs_inode_hash_lock); 1106 ffs_inode_hash_lock = 0; 1107 1108 /* Read in the disk contents for the inode, copy into the inode. */ 1109 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1110 (int)fs->fs_bsize, NOCRED, &bp); 1111 if (error) { 1112 /* 1113 * The inode does not contain anything useful, so it would 1114 * be misleading to leave it on its hash chain. With mode 1115 * still zero, it will be unlinked and returned to the free 1116 * list by vput(). 1117 */ 1118 brelse(bp); 1119 vput(vp); 1120 *vpp = NULL; 1121 return (error); 1122 } 1123 ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); 1124 if (DOINGSOFTDEP(vp)) 1125 softdep_load_inodeblock(ip); 1126 else 1127 ip->i_effnlink = ip->i_nlink; 1128 bqrelse(bp); 1129 1130 /* 1131 * Initialize the vnode from the inode, check for aliases. 1132 * Note that the underlying vnode may have changed. 1133 */ 1134 error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 1135 if (error) { 1136 vput(vp); 1137 *vpp = NULL; 1138 return (error); 1139 } 1140 /* 1141 * Finish inode initialization now that aliasing has been resolved. 1142 */ 1143 ip->i_devvp = ump->um_devvp; 1144 VREF(ip->i_devvp); 1145 /* 1146 * Set up a generation number for this inode if it does not 1147 * already have one. This should only happen on old filesystems. 1148 */ 1149 if (ip->i_gen == 0) { 1150 ip->i_gen = random() / 2 + 1; 1151 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1152 ip->i_flag |= IN_MODIFIED; 1153 } 1154 /* 1155 * Ensure that uid and gid are correct. This is a temporary 1156 * fix until fsck has been changed to do the update. 1157 */ 1158 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 1159 ip->i_uid = ip->i_din.di_ouid; /* XXX */ 1160 ip->i_gid = ip->i_din.di_ogid; /* XXX */ 1161 } /* XXX */ 1162 1163 *vpp = vp; 1164 return (0); 1165 } 1166 1167 /* 1168 * File handle to vnode 1169 * 1170 * Have to be really careful about stale file handles: 1171 * - check that the inode number is valid 1172 * - call ffs_vget() to get the locked inode 1173 * - check for an unallocated inode (i_mode == 0) 1174 * - check that the given client host has export rights and return 1175 * those rights via. exflagsp and credanonp 1176 */ 1177 int 1178 ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) 1179 register struct mount *mp; 1180 struct fid *fhp; 1181 struct sockaddr *nam; 1182 struct vnode **vpp; 1183 int *exflagsp; 1184 struct ucred **credanonp; 1185 { 1186 register struct ufid *ufhp; 1187 struct fs *fs; 1188 1189 ufhp = (struct ufid *)fhp; 1190 fs = VFSTOUFS(mp)->um_fs; 1191 if (ufhp->ufid_ino < ROOTINO || 1192 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) 1193 return (ESTALE); 1194 return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)); 1195 } 1196 1197 /* 1198 * Vnode pointer to File handle 1199 */ 1200 /* ARGSUSED */ 1201 int 1202 ffs_vptofh(vp, fhp) 1203 struct vnode *vp; 1204 struct fid *fhp; 1205 { 1206 register struct inode *ip; 1207 register struct ufid *ufhp; 1208 1209 ip = VTOI(vp); 1210 ufhp = (struct ufid *)fhp; 1211 ufhp->ufid_len = sizeof(struct ufid); 1212 ufhp->ufid_ino = ip->i_number; 1213 ufhp->ufid_gen = ip->i_gen; 1214 return (0); 1215 } 1216 1217 /* 1218 * Initialize the filesystem; just use ufs_init. 1219 */ 1220 static int 1221 ffs_init(vfsp) 1222 struct vfsconf *vfsp; 1223 { 1224 1225 softdep_initialize(); 1226 return (ufs_init(vfsp)); 1227 } 1228 1229 /* 1230 * Write a superblock and associated information back to disk. 1231 */ 1232 static int 1233 ffs_sbupdate(mp, waitfor) 1234 struct ufsmount *mp; 1235 int waitfor; 1236 { 1237 register struct fs *dfs, *fs = mp->um_fs; 1238 register struct buf *bp; 1239 int blks; 1240 caddr_t space; 1241 int i, size, error, allerror = 0; 1242 1243 /* 1244 * First write back the summary information. 1245 */ 1246 blks = howmany(fs->fs_cssize, fs->fs_fsize); 1247 space = (caddr_t)fs->fs_csp[0]; 1248 for (i = 0; i < blks; i += fs->fs_frag) { 1249 size = fs->fs_bsize; 1250 if (i + fs->fs_frag > blks) 1251 size = (blks - i) * fs->fs_fsize; 1252 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), 1253 size, 0, 0); 1254 bcopy(space, bp->b_data, (u_int)size); 1255 space += size; 1256 if (waitfor != MNT_WAIT) 1257 bawrite(bp); 1258 else if (error = bwrite(bp)) 1259 allerror = error; 1260 } 1261 /* 1262 * Now write back the superblock itself. If any errors occurred 1263 * up to this point, then fail so that the superblock avoids 1264 * being written out as clean. 1265 */ 1266 if (allerror) 1267 return (allerror); 1268 bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0); 1269 fs->fs_fmod = 0; 1270 fs->fs_time = time_second; 1271 bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); 1272 /* Restore compatibility to old file systems. XXX */ 1273 dfs = (struct fs *)bp->b_data; /* XXX */ 1274 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ 1275 dfs->fs_nrpos = -1; /* XXX */ 1276 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 1277 int32_t *lp, tmp; /* XXX */ 1278 /* XXX */ 1279 lp = (int32_t *)&dfs->fs_qbmask; /* XXX */ 1280 tmp = lp[4]; /* XXX */ 1281 for (i = 4; i > 0; i--) /* XXX */ 1282 lp[i] = lp[i-1]; /* XXX */ 1283 lp[0] = tmp; /* XXX */ 1284 } /* XXX */ 1285 dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */ 1286 if (waitfor != MNT_WAIT) 1287 bawrite(bp); 1288 else if (error = bwrite(bp)) 1289 allerror = error; 1290 return (allerror); 1291 } 1292