1 /* 2 * Copyright (c) 1989, 1991, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 34 * $FreeBSD$ 35 */ 36 37 #include "opt_quota.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/namei.h> 42 #include <sys/proc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/mount.h> 46 #include <sys/buf.h> 47 #include <sys/conf.h> 48 #include <sys/fcntl.h> 49 #include <sys/disklabel.h> 50 #include <sys/malloc.h> 51 52 #include <ufs/ufs/quota.h> 53 #include <ufs/ufs/ufsmount.h> 54 #include <ufs/ufs/inode.h> 55 #include <ufs/ufs/ufs_extern.h> 56 57 #include <ufs/ffs/fs.h> 58 #include <ufs/ffs/ffs_extern.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_prot.h> 62 #include <vm/vm_page.h> 63 64 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part"); 65 66 static int ffs_sbupdate __P((struct ufsmount *, int)); 67 static int ffs_reload __P((struct mount *,struct ucred *,struct proc *)); 68 static int ffs_oldfscompat __P((struct fs *)); 69 static int ffs_mount __P((struct mount *, char *, caddr_t, 70 struct nameidata *, struct proc *)); 71 static int ffs_init __P((struct vfsconf *)); 72 73 static struct vfsops ufs_vfsops = { 74 ffs_mount, 75 ufs_start, 76 ffs_unmount, 77 ufs_root, 78 ufs_quotactl, 79 ffs_statfs, 80 ffs_sync, 81 ffs_vget, 82 ffs_fhtovp, 83 ufs_check_export, 84 ffs_vptofh, 85 ffs_init, 86 }; 87 88 VFS_SET(ufs_vfsops, ufs, 0); 89 90 /* 91 * ffs_mount 92 * 93 * Called when mounting local physical media 94 * 95 * PARAMETERS: 96 * mountroot 97 * mp mount point structure 98 * path NULL (flag for root mount!!!) 99 * data <unused> 100 * ndp <unused> 101 * p process (user credentials check [statfs]) 102 * 103 * mount 104 * mp mount point structure 105 * path path to mount point 106 * data pointer to argument struct in user space 107 * ndp mount point namei() return (used for 108 * credentials on reload), reused to look 109 * up block device. 110 * p process (user credentials check) 111 * 112 * RETURNS: 0 Success 113 * !0 error number (errno.h) 114 * 115 * LOCK STATE: 116 * 117 * ENTRY 118 * mount point is locked 119 * EXIT 120 * mount point is locked 121 * 122 * NOTES: 123 * A NULL path can be used for a flag since the mount 124 * system call will fail with EFAULT in copyinstr in 125 * namei() if it is a genuine NULL from the user. 126 */ 127 static int 128 ffs_mount( mp, path, data, ndp, p) 129 struct mount *mp; /* mount struct pointer*/ 130 char *path; /* path to mount point*/ 131 caddr_t data; /* arguments to FS specific mount*/ 132 struct nameidata *ndp; /* mount point credentials*/ 133 struct proc *p; /* process requesting mount*/ 134 { 135 size_t size; 136 int err = 0; 137 struct vnode *devvp; 138 139 struct ufs_args args; 140 struct ufsmount *ump = 0; 141 register struct fs *fs; 142 int error, flags, ronly = 0; 143 mode_t accessmode; 144 145 /* 146 * Use NULL path to flag a root mount 147 */ 148 if( path == NULL) { 149 /* 150 *** 151 * Mounting root file system 152 *** 153 */ 154 155 if ((err = bdevvp(rootdev, &rootvp))) { 156 printf("ffs_mountroot: can't find rootvp"); 157 return (err); 158 } 159 160 if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) { 161 /* fs specific cleanup (if any)*/ 162 goto error_1; 163 } 164 165 goto dostatfs; /* success*/ 166 167 } 168 169 /* 170 *** 171 * Mounting non-root file system or updating a file system 172 *** 173 */ 174 175 /* copy in user arguments*/ 176 err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)); 177 if (err) 178 goto error_1; /* can't get arguments*/ 179 180 /* 181 * If updating, check whether changing from read-only to 182 * read/write; if there is no device name, that's all we do. 183 */ 184 if (mp->mnt_flag & MNT_UPDATE) { 185 ump = VFSTOUFS(mp); 186 fs = ump->um_fs; 187 devvp = ump->um_devvp; 188 err = 0; 189 ronly = fs->fs_ronly; /* MNT_RELOAD might change this */ 190 if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 191 flags = WRITECLOSE; 192 if (mp->mnt_flag & MNT_FORCE) 193 flags |= FORCECLOSE; 194 if (mp->mnt_flag & MNT_SOFTDEP) { 195 err = softdep_flushfiles(mp, flags, p); 196 } else { 197 err = ffs_flushfiles(mp, flags, p); 198 } 199 ronly = 1; 200 } 201 if (!err && (mp->mnt_flag & MNT_RELOAD)) 202 err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p); 203 if (err) { 204 goto error_1; 205 } 206 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 207 /* 208 * If upgrade to read-write by non-root, then verify 209 * that user has necessary permissions on the device. 210 */ 211 if (p->p_ucred->cr_uid != 0) { 212 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 213 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 214 p->p_ucred, p)) != 0) { 215 VOP_UNLOCK(devvp, 0, p); 216 return (error); 217 } 218 VOP_UNLOCK(devvp, 0, p); 219 } 220 221 if (fs->fs_clean == 0) { 222 if (mp->mnt_flag & MNT_FORCE) { 223 printf( 224 "WARNING: %s was not properly dismounted\n", 225 fs->fs_fsmnt); 226 } else { 227 printf( 228 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 229 fs->fs_fsmnt); 230 err = EPERM; 231 goto error_1; 232 } 233 } 234 235 /* check to see if we need to start softdep */ 236 if (fs->fs_flags & FS_DOSOFTDEP) { 237 err = softdep_mount(devvp, mp, fs, p->p_ucred); 238 if (err) 239 goto error_1; 240 } 241 242 ronly = 0; 243 } 244 /* 245 * Soft updates is incompatible with "async", 246 * so if we are doing softupdates stop the user 247 * from setting the async flag in an update. 248 * Softdep_mount() clears it in an initial mount 249 * or ro->rw remount. 250 */ 251 if (mp->mnt_flag & MNT_SOFTDEP) { 252 mp->mnt_flag &= ~MNT_ASYNC; 253 } 254 /* if not updating name...*/ 255 if (args.fspec == 0) { 256 /* 257 * Process export requests. Jumping to "success" 258 * will return the vfs_export() error code. 259 */ 260 err = vfs_export(mp, &ump->um_export, &args.export); 261 goto success; 262 } 263 } 264 265 /* 266 * Not an update, or updating the name: look up the name 267 * and verify that it refers to a sensible block device. 268 */ 269 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); 270 err = namei(ndp); 271 if (err) { 272 /* can't get devvp!*/ 273 goto error_1; 274 } 275 276 devvp = ndp->ni_vp; 277 278 if (!vn_isdisk(devvp)) { 279 err = ENOTBLK; 280 goto error_2; 281 } 282 283 /* 284 * If mount by non-root, then verify that user has necessary 285 * permissions on the device. 286 */ 287 if (p->p_ucred->cr_uid != 0) { 288 accessmode = VREAD; 289 if ((mp->mnt_flag & MNT_RDONLY) == 0) 290 accessmode |= VWRITE; 291 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 292 if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) { 293 vput(devvp); 294 return (error); 295 } 296 VOP_UNLOCK(devvp, 0, p); 297 } 298 299 if (mp->mnt_flag & MNT_UPDATE) { 300 /* 301 ******************** 302 * UPDATE 303 * If it's not the same vnode, or at least the same device 304 * then it's not correct. 305 ******************** 306 */ 307 308 if (devvp != ump->um_devvp) { 309 if ( devvp->v_rdev == ump->um_devvp->v_rdev) { 310 vrele(devvp); 311 } else { 312 err = EINVAL; /* needs translation */ 313 } 314 } else 315 vrele(devvp); 316 /* 317 * Update device name only on success 318 */ 319 if( !err) { 320 /* Save "mounted from" info for mount point (NULL pad)*/ 321 copyinstr( args.fspec, 322 mp->mnt_stat.f_mntfromname, 323 MNAMELEN - 1, 324 &size); 325 bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 326 } 327 } else { 328 /* 329 ******************** 330 * NEW MOUNT 331 ******************** 332 */ 333 334 /* 335 * Since this is a new mount, we want the names for 336 * the device and the mount point copied in. If an 337 * error occurs, the mountpoint is discarded by the 338 * upper level code. 339 */ 340 /* Save "last mounted on" info for mount point (NULL pad)*/ 341 copyinstr( path, /* mount point*/ 342 mp->mnt_stat.f_mntonname, /* save area*/ 343 MNAMELEN - 1, /* max size*/ 344 &size); /* real size*/ 345 bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size); 346 347 /* Save "mounted from" info for mount point (NULL pad)*/ 348 copyinstr( args.fspec, /* device name*/ 349 mp->mnt_stat.f_mntfromname, /* save area*/ 350 MNAMELEN - 1, /* max size*/ 351 &size); /* real size*/ 352 bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 353 354 err = ffs_mountfs(devvp, mp, p, M_FFSNODE); 355 } 356 if (err) { 357 goto error_2; 358 } 359 360 dostatfs: 361 /* 362 * Initialize FS stat information in mount struct; uses both 363 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname 364 * 365 * This code is common to root and non-root mounts 366 */ 367 (void)VFS_STATFS(mp, &mp->mnt_stat, p); 368 369 goto success; 370 371 372 error_2: /* error with devvp held*/ 373 374 /* release devvp before failing*/ 375 vrele(devvp); 376 377 error_1: /* no state to back out*/ 378 379 success: 380 if (!err && path && (mp->mnt_flag & MNT_UPDATE)) { 381 /* Update clean flag after changing read-onlyness. */ 382 fs = ump->um_fs; 383 if (ronly != fs->fs_ronly) { 384 fs->fs_ronly = ronly; 385 fs->fs_clean = ronly && 386 (fs->fs_flags & FS_UNCLEAN) == 0 ? 1 : 0; 387 ffs_sbupdate(ump, MNT_WAIT); 388 } 389 } 390 return (err); 391 } 392 393 /* 394 * Reload all incore data for a filesystem (used after running fsck on 395 * the root filesystem and finding things to fix). The filesystem must 396 * be mounted read-only. 397 * 398 * Things to do to update the mount: 399 * 1) invalidate all cached meta-data. 400 * 2) re-read superblock from disk. 401 * 3) re-read summary information from disk. 402 * 4) invalidate all inactive vnodes. 403 * 5) invalidate all cached file data. 404 * 6) re-read inode data for all active vnodes. 405 */ 406 static int 407 ffs_reload(mp, cred, p) 408 register struct mount *mp; 409 struct ucred *cred; 410 struct proc *p; 411 { 412 register struct vnode *vp, *nvp, *devvp; 413 struct inode *ip; 414 struct csum *space; 415 struct buf *bp; 416 struct fs *fs, *newfs; 417 struct partinfo dpart; 418 dev_t dev; 419 int i, blks, size, error; 420 int32_t *lp; 421 422 if ((mp->mnt_flag & MNT_RDONLY) == 0) 423 return (EINVAL); 424 /* 425 * Step 1: invalidate all cached meta-data. 426 */ 427 devvp = VFSTOUFS(mp)->um_devvp; 428 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 429 error = vinvalbuf(devvp, 0, cred, p, 0, 0); 430 VOP_UNLOCK(devvp, 0, p); 431 if (error) 432 panic("ffs_reload: dirty1"); 433 434 dev = devvp->v_rdev; 435 436 /* 437 * Only VMIO the backing device if the backing device is a real 438 * block device. See ffs_mountmfs() for more details. 439 */ 440 if (devvp->v_tag != VT_MFS && devvp->v_type == VBLK) { 441 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 442 vfs_object_create(devvp, p, p->p_ucred); 443 simple_lock(&devvp->v_interlock); 444 VOP_UNLOCK(devvp, LK_INTERLOCK, p); 445 } 446 447 /* 448 * Step 2: re-read superblock from disk. 449 */ 450 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) 451 size = DEV_BSIZE; 452 else 453 size = dpart.disklab->d_secsize; 454 if ((error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) != 0) 455 return (error); 456 newfs = (struct fs *)bp->b_data; 457 if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE || 458 newfs->fs_bsize < sizeof(struct fs)) { 459 brelse(bp); 460 return (EIO); /* XXX needs translation */ 461 } 462 fs = VFSTOUFS(mp)->um_fs; 463 /* 464 * Copy pointer fields back into superblock before copying in XXX 465 * new superblock. These should really be in the ufsmount. XXX 466 * Note that important parameters (eg fs_ncg) are unchanged. 467 */ 468 bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp)); 469 newfs->fs_maxcluster = fs->fs_maxcluster; 470 bcopy(newfs, fs, (u_int)fs->fs_sbsize); 471 if (fs->fs_sbsize < SBSIZE) 472 bp->b_flags |= B_INVAL; 473 brelse(bp); 474 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; 475 ffs_oldfscompat(fs); 476 477 /* 478 * Step 3: re-read summary information from disk. 479 */ 480 blks = howmany(fs->fs_cssize, fs->fs_fsize); 481 space = fs->fs_csp[0]; 482 for (i = 0; i < blks; i += fs->fs_frag) { 483 size = fs->fs_bsize; 484 if (i + fs->fs_frag > blks) 485 size = (blks - i) * fs->fs_fsize; 486 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 487 NOCRED, &bp); 488 if (error) 489 return (error); 490 bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size); 491 brelse(bp); 492 } 493 /* 494 * We no longer know anything about clusters per cylinder group. 495 */ 496 if (fs->fs_contigsumsize > 0) { 497 lp = fs->fs_maxcluster; 498 for (i = 0; i < fs->fs_ncg; i++) 499 *lp++ = fs->fs_contigsumsize; 500 } 501 502 loop: 503 simple_lock(&mntvnode_slock); 504 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 505 if (vp->v_mount != mp) { 506 simple_unlock(&mntvnode_slock); 507 goto loop; 508 } 509 nvp = vp->v_mntvnodes.le_next; 510 /* 511 * Step 4: invalidate all inactive vnodes. 512 */ 513 if (vrecycle(vp, &mntvnode_slock, p)) 514 goto loop; 515 /* 516 * Step 5: invalidate all cached file data. 517 */ 518 simple_lock(&vp->v_interlock); 519 simple_unlock(&mntvnode_slock); 520 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 521 goto loop; 522 } 523 if (vinvalbuf(vp, 0, cred, p, 0, 0)) 524 panic("ffs_reload: dirty2"); 525 /* 526 * Step 6: re-read inode data for all active vnodes. 527 */ 528 ip = VTOI(vp); 529 error = 530 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 531 (int)fs->fs_bsize, NOCRED, &bp); 532 if (error) { 533 vput(vp); 534 return (error); 535 } 536 ip->i_din = *((struct dinode *)bp->b_data + 537 ino_to_fsbo(fs, ip->i_number)); 538 ip->i_effnlink = ip->i_nlink; 539 brelse(bp); 540 vput(vp); 541 simple_lock(&mntvnode_slock); 542 } 543 simple_unlock(&mntvnode_slock); 544 return (0); 545 } 546 547 /* 548 * Common code for mount and mountroot 549 */ 550 int 551 ffs_mountfs(devvp, mp, p, malloctype) 552 register struct vnode *devvp; 553 struct mount *mp; 554 struct proc *p; 555 struct malloc_type *malloctype; 556 { 557 register struct ufsmount *ump; 558 struct buf *bp; 559 register struct fs *fs; 560 dev_t dev; 561 struct partinfo dpart; 562 caddr_t base, space; 563 int error, i, blks, size, ronly; 564 int32_t *lp; 565 struct ucred *cred; 566 u_int64_t maxfilesize; /* XXX */ 567 size_t strsize; 568 int ncount; 569 570 dev = devvp->v_rdev; 571 cred = p ? p->p_ucred : NOCRED; 572 /* 573 * Disallow multiple mounts of the same device. 574 * Disallow mounting of a device that is currently in use 575 * (except for root, which might share swap device for miniroot). 576 * Flush out any old buffers remaining from a previous use. 577 */ 578 error = vfs_mountedon(devvp); 579 if (error) 580 return (error); 581 ncount = vcount(devvp); 582 583 if (ncount > 1 && devvp != rootvp) 584 return (EBUSY); 585 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 586 error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0); 587 VOP_UNLOCK(devvp, 0, p); 588 if (error) 589 return (error); 590 591 /* 592 * Only VMIO the backing device if the backing device is a real 593 * block device. This excludes the original MFS implementation. 594 * Note that it is optional that the backing device be VMIOed. This 595 * increases the opportunity for metadata caching. 596 */ 597 if (devvp->v_tag != VT_MFS && devvp->v_type == VBLK) { 598 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 599 vfs_object_create(devvp, p, p->p_ucred); 600 simple_lock(&devvp->v_interlock); 601 VOP_UNLOCK(devvp, LK_INTERLOCK, p); 602 } 603 604 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 605 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); 606 if (error) 607 return (error); 608 if (devvp->v_rdev->si_iosize_max > mp->mnt_iosize_max) 609 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 610 if (mp->mnt_iosize_max > MAXPHYS) 611 mp->mnt_iosize_max = MAXPHYS; 612 613 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0) 614 size = DEV_BSIZE; 615 else 616 size = dpart.disklab->d_secsize; 617 618 bp = NULL; 619 ump = NULL; 620 if ((error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) != 0) 621 goto out; 622 fs = (struct fs *)bp->b_data; 623 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || 624 fs->fs_bsize < sizeof(struct fs)) { 625 error = EINVAL; /* XXX needs translation */ 626 goto out; 627 } 628 fs->fs_fmod = 0; 629 fs->fs_flags &= ~FS_UNCLEAN; 630 if (fs->fs_clean == 0) { 631 fs->fs_flags |= FS_UNCLEAN; 632 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 633 printf( 634 "WARNING: %s was not properly dismounted\n", 635 fs->fs_fsmnt); 636 } else { 637 printf( 638 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 639 fs->fs_fsmnt); 640 error = EPERM; 641 goto out; 642 } 643 } 644 /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */ 645 if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) { 646 error = EROFS; /* needs translation */ 647 goto out; 648 } 649 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK); 650 bzero((caddr_t)ump, sizeof *ump); 651 ump->um_malloctype = malloctype; 652 ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, 653 M_WAITOK); 654 ump->um_blkatoff = ffs_blkatoff; 655 ump->um_truncate = ffs_truncate; 656 ump->um_update = ffs_update; 657 ump->um_valloc = ffs_valloc; 658 ump->um_vfree = ffs_vfree; 659 bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); 660 if (fs->fs_sbsize < SBSIZE) 661 bp->b_flags |= B_INVAL; 662 brelse(bp); 663 bp = NULL; 664 fs = ump->um_fs; 665 fs->fs_ronly = ronly; 666 if (ronly == 0) { 667 fs->fs_fmod = 1; 668 fs->fs_clean = 0; 669 } 670 size = fs->fs_cssize; 671 blks = howmany(size, fs->fs_fsize); 672 if (fs->fs_contigsumsize > 0) 673 size += fs->fs_ncg * sizeof(int32_t); 674 base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 675 for (i = 0; i < blks; i += fs->fs_frag) { 676 size = fs->fs_bsize; 677 if (i + fs->fs_frag > blks) 678 size = (blks - i) * fs->fs_fsize; 679 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 680 cred, &bp)) != 0) { 681 free(base, M_UFSMNT); 682 goto out; 683 } 684 bcopy(bp->b_data, space, (u_int)size); 685 fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space; 686 space += size; 687 brelse(bp); 688 bp = NULL; 689 } 690 if (fs->fs_contigsumsize > 0) { 691 fs->fs_maxcluster = lp = (int32_t *)space; 692 for (i = 0; i < fs->fs_ncg; i++) 693 *lp++ = fs->fs_contigsumsize; 694 } 695 mp->mnt_data = (qaddr_t)ump; 696 mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0]; 697 mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; 698 if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || 699 vfs_getvfs(&mp->mnt_stat.f_fsid)) 700 vfs_getnewfsid(mp); 701 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; 702 mp->mnt_flag |= MNT_LOCAL; 703 ump->um_mountp = mp; 704 ump->um_dev = dev; 705 ump->um_devvp = devvp; 706 ump->um_nindir = fs->fs_nindir; 707 ump->um_bptrtodb = fs->fs_fsbtodb; 708 ump->um_seqinc = fs->fs_frag; 709 for (i = 0; i < MAXQUOTAS; i++) 710 ump->um_quotas[i] = NULLVP; 711 devvp->v_specmountpoint = mp; 712 ffs_oldfscompat(fs); 713 714 /* 715 * Set FS local "last mounted on" information (NULL pad) 716 */ 717 copystr( mp->mnt_stat.f_mntonname, /* mount point*/ 718 fs->fs_fsmnt, /* copy area*/ 719 sizeof(fs->fs_fsmnt) - 1, /* max size*/ 720 &strsize); /* real size*/ 721 bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize); 722 723 if( mp->mnt_flag & MNT_ROOTFS) { 724 /* 725 * Root mount; update timestamp in mount structure. 726 * this will be used by the common root mount code 727 * to update the system clock. 728 */ 729 mp->mnt_time = fs->fs_time; 730 } 731 732 ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */ 733 maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */ 734 if (fs->fs_maxfilesize > maxfilesize) /* XXX */ 735 fs->fs_maxfilesize = maxfilesize; /* XXX */ 736 if (ronly == 0) { 737 if ((fs->fs_flags & FS_DOSOFTDEP) && 738 (error = softdep_mount(devvp, mp, fs, cred)) != 0) { 739 free(base, M_UFSMNT); 740 goto out; 741 } 742 fs->fs_clean = 0; 743 (void) ffs_sbupdate(ump, MNT_WAIT); 744 } 745 return (0); 746 out: 747 devvp->v_specmountpoint = NULL; 748 if (bp) 749 brelse(bp); 750 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); 751 if (ump) { 752 free(ump->um_fs, M_UFSMNT); 753 free(ump, M_UFSMNT); 754 mp->mnt_data = (qaddr_t)0; 755 } 756 return (error); 757 } 758 759 /* 760 * Sanity checks for old file systems. 761 * 762 * XXX - goes away some day. 763 */ 764 static int 765 ffs_oldfscompat(fs) 766 struct fs *fs; 767 { 768 769 fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */ 770 fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */ 771 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ 772 fs->fs_nrpos = 8; /* XXX */ 773 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 774 #if 0 775 int i; /* XXX */ 776 u_int64_t sizepb = fs->fs_bsize; /* XXX */ 777 /* XXX */ 778 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */ 779 for (i = 0; i < NIADDR; i++) { /* XXX */ 780 sizepb *= NINDIR(fs); /* XXX */ 781 fs->fs_maxfilesize += sizepb; /* XXX */ 782 } /* XXX */ 783 #endif 784 fs->fs_maxfilesize = (u_quad_t) 1LL << 39; 785 fs->fs_qbmask = ~fs->fs_bmask; /* XXX */ 786 fs->fs_qfmask = ~fs->fs_fmask; /* XXX */ 787 } /* XXX */ 788 return (0); 789 } 790 791 /* 792 * unmount system call 793 */ 794 int 795 ffs_unmount(mp, mntflags, p) 796 struct mount *mp; 797 int mntflags; 798 struct proc *p; 799 { 800 register struct ufsmount *ump; 801 register struct fs *fs; 802 int error, flags; 803 804 flags = 0; 805 if (mntflags & MNT_FORCE) { 806 flags |= FORCECLOSE; 807 } 808 if (mp->mnt_flag & MNT_SOFTDEP) { 809 if ((error = softdep_flushfiles(mp, flags, p)) != 0) 810 return (error); 811 } else { 812 if ((error = ffs_flushfiles(mp, flags, p)) != 0) 813 return (error); 814 } 815 ump = VFSTOUFS(mp); 816 fs = ump->um_fs; 817 if (fs->fs_ronly == 0) { 818 fs->fs_clean = fs->fs_flags & FS_UNCLEAN ? 0 : 1; 819 error = ffs_sbupdate(ump, MNT_WAIT); 820 if (error) { 821 fs->fs_clean = 0; 822 return (error); 823 } 824 } 825 ump->um_devvp->v_specmountpoint = NULL; 826 827 vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0); 828 error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, 829 NOCRED, p); 830 831 vrele(ump->um_devvp); 832 833 free(fs->fs_csp[0], M_UFSMNT); 834 free(fs, M_UFSMNT); 835 free(ump, M_UFSMNT); 836 mp->mnt_data = (qaddr_t)0; 837 mp->mnt_flag &= ~MNT_LOCAL; 838 return (error); 839 } 840 841 /* 842 * Flush out all the files in a filesystem. 843 */ 844 int 845 ffs_flushfiles(mp, flags, p) 846 register struct mount *mp; 847 int flags; 848 struct proc *p; 849 { 850 register struct ufsmount *ump; 851 int error; 852 853 ump = VFSTOUFS(mp); 854 #ifdef QUOTA 855 if (mp->mnt_flag & MNT_QUOTA) { 856 int i; 857 error = vflush(mp, NULLVP, SKIPSYSTEM|flags); 858 if (error) 859 return (error); 860 for (i = 0; i < MAXQUOTAS; i++) { 861 if (ump->um_quotas[i] == NULLVP) 862 continue; 863 quotaoff(p, mp, i); 864 } 865 /* 866 * Here we fall through to vflush again to ensure 867 * that we have gotten rid of all the system vnodes. 868 */ 869 } 870 #endif 871 /* 872 * Flush all the files. 873 */ 874 if ((error = vflush(mp, NULL, flags)) != 0) 875 return (error); 876 /* 877 * Flush filesystem metadata. 878 */ 879 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); 880 error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p); 881 VOP_UNLOCK(ump->um_devvp, 0, p); 882 return (error); 883 } 884 885 /* 886 * Get file system statistics. 887 */ 888 int 889 ffs_statfs(mp, sbp, p) 890 struct mount *mp; 891 register struct statfs *sbp; 892 struct proc *p; 893 { 894 register struct ufsmount *ump; 895 register struct fs *fs; 896 897 ump = VFSTOUFS(mp); 898 fs = ump->um_fs; 899 if (fs->fs_magic != FS_MAGIC) 900 panic("ffs_statfs"); 901 sbp->f_bsize = fs->fs_fsize; 902 sbp->f_iosize = fs->fs_bsize; 903 sbp->f_blocks = fs->fs_dsize; 904 sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + 905 fs->fs_cstotal.cs_nffree; 906 sbp->f_bavail = freespace(fs, fs->fs_minfree); 907 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; 908 sbp->f_ffree = fs->fs_cstotal.cs_nifree; 909 if (sbp != &mp->mnt_stat) { 910 sbp->f_type = mp->mnt_vfc->vfc_typenum; 911 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 912 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 913 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 914 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 915 } 916 return (0); 917 } 918 919 /* 920 * Go through the disk queues to initiate sandbagged IO; 921 * go through the inodes to write those that have been modified; 922 * initiate the writing of the super block if it has been modified. 923 * 924 * Note: we are always called with the filesystem marked `MPBUSY'. 925 */ 926 int 927 ffs_sync(mp, waitfor, cred, p) 928 struct mount *mp; 929 int waitfor; 930 struct ucred *cred; 931 struct proc *p; 932 { 933 struct vnode *nvp, *vp; 934 struct inode *ip; 935 struct ufsmount *ump = VFSTOUFS(mp); 936 struct fs *fs; 937 int error, allerror = 0; 938 939 fs = ump->um_fs; 940 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ 941 printf("fs = %s\n", fs->fs_fsmnt); 942 panic("ffs_sync: rofs mod"); 943 } 944 /* 945 * Write back each (modified) inode. 946 */ 947 simple_lock(&mntvnode_slock); 948 loop: 949 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 950 /* 951 * If the vnode that we are about to sync is no longer 952 * associated with this mount point, start over. 953 */ 954 if (vp->v_mount != mp) 955 goto loop; 956 simple_lock(&vp->v_interlock); 957 nvp = vp->v_mntvnodes.le_next; 958 ip = VTOI(vp); 959 if ((vp->v_type == VNON) || (((ip->i_flag & 960 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && 961 (TAILQ_EMPTY(&vp->v_dirtyblkhd) || (waitfor == MNT_LAZY)))) { 962 simple_unlock(&vp->v_interlock); 963 continue; 964 } 965 if (vp->v_type != VCHR) { 966 simple_unlock(&mntvnode_slock); 967 error = 968 vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); 969 if (error) { 970 simple_lock(&mntvnode_slock); 971 if (error == ENOENT) 972 goto loop; 973 continue; 974 } 975 if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) 976 allerror = error; 977 VOP_UNLOCK(vp, 0, p); 978 vrele(vp); 979 simple_lock(&mntvnode_slock); 980 } else { 981 simple_unlock(&mntvnode_slock); 982 simple_unlock(&vp->v_interlock); 983 /* UFS_UPDATE(vp, waitfor == MNT_WAIT); */ 984 UFS_UPDATE(vp, 0); 985 simple_lock(&mntvnode_slock); 986 } 987 } 988 simple_unlock(&mntvnode_slock); 989 /* 990 * Force stale file system control information to be flushed. 991 */ 992 if (waitfor != MNT_LAZY) { 993 if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) 994 waitfor = MNT_NOWAIT; 995 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); 996 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) 997 allerror = error; 998 VOP_UNLOCK(ump->um_devvp, 0, p); 999 } 1000 #ifdef QUOTA 1001 qsync(mp); 1002 #endif 1003 /* 1004 * Write back modified superblock. 1005 */ 1006 if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) 1007 allerror = error; 1008 return (allerror); 1009 } 1010 1011 /* 1012 * Look up a FFS dinode number to find its incore vnode, otherwise read it 1013 * in from disk. If it is in core, wait for the lock bit to clear, then 1014 * return the inode locked. Detection and handling of mount points must be 1015 * done by the calling routine. 1016 */ 1017 static int ffs_inode_hash_lock; 1018 1019 int 1020 ffs_vget(mp, ino, vpp) 1021 struct mount *mp; 1022 ino_t ino; 1023 struct vnode **vpp; 1024 { 1025 struct fs *fs; 1026 struct inode *ip; 1027 struct ufsmount *ump; 1028 struct buf *bp; 1029 struct vnode *vp; 1030 dev_t dev; 1031 int error; 1032 1033 ump = VFSTOUFS(mp); 1034 dev = ump->um_dev; 1035 restart: 1036 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) { 1037 return (0); 1038 } 1039 1040 /* 1041 * Lock out the creation of new entries in the FFS hash table in 1042 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 1043 * may occur! 1044 */ 1045 if (ffs_inode_hash_lock) { 1046 while (ffs_inode_hash_lock) { 1047 ffs_inode_hash_lock = -1; 1048 tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0); 1049 } 1050 goto restart; 1051 } 1052 ffs_inode_hash_lock = 1; 1053 1054 /* 1055 * If this MALLOC() is performed after the getnewvnode() 1056 * it might block, leaving a vnode with a NULL v_data to be 1057 * found by ffs_sync() if a sync happens to fire right then, 1058 * which will cause a panic because ffs_sync() blindly 1059 * dereferences vp->v_data (as well it should). 1060 */ 1061 MALLOC(ip, struct inode *, sizeof(struct inode), 1062 ump->um_malloctype, M_WAITOK); 1063 1064 /* Allocate a new vnode/inode. */ 1065 error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp); 1066 if (error) { 1067 if (ffs_inode_hash_lock < 0) 1068 wakeup(&ffs_inode_hash_lock); 1069 ffs_inode_hash_lock = 0; 1070 *vpp = NULL; 1071 FREE(ip, ump->um_malloctype); 1072 return (error); 1073 } 1074 bzero((caddr_t)ip, sizeof(struct inode)); 1075 lockinit(&ip->i_lock, PINOD, "inode", 0, 0); 1076 vp->v_data = ip; 1077 ip->i_vnode = vp; 1078 ip->i_fs = fs = ump->um_fs; 1079 ip->i_dev = dev; 1080 ip->i_number = ino; 1081 #ifdef QUOTA 1082 { 1083 int i; 1084 for (i = 0; i < MAXQUOTAS; i++) 1085 ip->i_dquot[i] = NODQUOT; 1086 } 1087 #endif 1088 /* 1089 * Put it onto its hash chain and lock it so that other requests for 1090 * this inode will block if they arrive while we are sleeping waiting 1091 * for old data structures to be purged or for the contents of the 1092 * disk portion of this inode to be read. 1093 */ 1094 ufs_ihashins(ip); 1095 1096 if (ffs_inode_hash_lock < 0) 1097 wakeup(&ffs_inode_hash_lock); 1098 ffs_inode_hash_lock = 0; 1099 1100 /* Read in the disk contents for the inode, copy into the inode. */ 1101 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1102 (int)fs->fs_bsize, NOCRED, &bp); 1103 if (error) { 1104 /* 1105 * The inode does not contain anything useful, so it would 1106 * be misleading to leave it on its hash chain. With mode 1107 * still zero, it will be unlinked and returned to the free 1108 * list by vput(). 1109 */ 1110 brelse(bp); 1111 vput(vp); 1112 *vpp = NULL; 1113 return (error); 1114 } 1115 ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); 1116 if (DOINGSOFTDEP(vp)) 1117 softdep_load_inodeblock(ip); 1118 else 1119 ip->i_effnlink = ip->i_nlink; 1120 bqrelse(bp); 1121 1122 /* 1123 * Initialize the vnode from the inode, check for aliases. 1124 * Note that the underlying vnode may have changed. 1125 */ 1126 error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 1127 if (error) { 1128 vput(vp); 1129 *vpp = NULL; 1130 return (error); 1131 } 1132 /* 1133 * Finish inode initialization now that aliasing has been resolved. 1134 */ 1135 ip->i_devvp = ump->um_devvp; 1136 VREF(ip->i_devvp); 1137 /* 1138 * Set up a generation number for this inode if it does not 1139 * already have one. This should only happen on old filesystems. 1140 */ 1141 if (ip->i_gen == 0) { 1142 ip->i_gen = random() / 2 + 1; 1143 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1144 ip->i_flag |= IN_MODIFIED; 1145 } 1146 /* 1147 * Ensure that uid and gid are correct. This is a temporary 1148 * fix until fsck has been changed to do the update. 1149 */ 1150 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 1151 ip->i_uid = ip->i_din.di_ouid; /* XXX */ 1152 ip->i_gid = ip->i_din.di_ogid; /* XXX */ 1153 } /* XXX */ 1154 1155 *vpp = vp; 1156 return (0); 1157 } 1158 1159 /* 1160 * File handle to vnode 1161 * 1162 * Have to be really careful about stale file handles: 1163 * - check that the inode number is valid 1164 * - call ffs_vget() to get the locked inode 1165 * - check for an unallocated inode (i_mode == 0) 1166 * - check that the given client host has export rights and return 1167 * those rights via. exflagsp and credanonp 1168 */ 1169 int 1170 ffs_fhtovp(mp, fhp, vpp) 1171 register struct mount *mp; 1172 struct fid *fhp; 1173 struct vnode **vpp; 1174 { 1175 register struct ufid *ufhp; 1176 struct fs *fs; 1177 1178 ufhp = (struct ufid *)fhp; 1179 fs = VFSTOUFS(mp)->um_fs; 1180 if (ufhp->ufid_ino < ROOTINO || 1181 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) 1182 return (ESTALE); 1183 return (ufs_fhtovp(mp, ufhp, vpp)); 1184 } 1185 1186 /* 1187 * Vnode pointer to File handle 1188 */ 1189 /* ARGSUSED */ 1190 int 1191 ffs_vptofh(vp, fhp) 1192 struct vnode *vp; 1193 struct fid *fhp; 1194 { 1195 register struct inode *ip; 1196 register struct ufid *ufhp; 1197 1198 ip = VTOI(vp); 1199 ufhp = (struct ufid *)fhp; 1200 ufhp->ufid_len = sizeof(struct ufid); 1201 ufhp->ufid_ino = ip->i_number; 1202 ufhp->ufid_gen = ip->i_gen; 1203 return (0); 1204 } 1205 1206 /* 1207 * Initialize the filesystem; just use ufs_init. 1208 */ 1209 static int 1210 ffs_init(vfsp) 1211 struct vfsconf *vfsp; 1212 { 1213 1214 softdep_initialize(); 1215 return (ufs_init(vfsp)); 1216 } 1217 1218 /* 1219 * Write a superblock and associated information back to disk. 1220 */ 1221 static int 1222 ffs_sbupdate(mp, waitfor) 1223 struct ufsmount *mp; 1224 int waitfor; 1225 { 1226 register struct fs *dfs, *fs = mp->um_fs; 1227 register struct buf *bp; 1228 int blks; 1229 caddr_t space; 1230 int i, size, error, allerror = 0; 1231 1232 /* 1233 * First write back the summary information. 1234 */ 1235 blks = howmany(fs->fs_cssize, fs->fs_fsize); 1236 space = (caddr_t)fs->fs_csp[0]; 1237 for (i = 0; i < blks; i += fs->fs_frag) { 1238 size = fs->fs_bsize; 1239 if (i + fs->fs_frag > blks) 1240 size = (blks - i) * fs->fs_fsize; 1241 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), 1242 size, 0, 0); 1243 bcopy(space, bp->b_data, (u_int)size); 1244 space += size; 1245 if (waitfor != MNT_WAIT) 1246 bawrite(bp); 1247 else if ((error = bwrite(bp)) != 0) 1248 allerror = error; 1249 } 1250 /* 1251 * Now write back the superblock itself. If any errors occurred 1252 * up to this point, then fail so that the superblock avoids 1253 * being written out as clean. 1254 */ 1255 if (allerror) 1256 return (allerror); 1257 bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0); 1258 fs->fs_fmod = 0; 1259 fs->fs_time = time_second; 1260 bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); 1261 /* Restore compatibility to old file systems. XXX */ 1262 dfs = (struct fs *)bp->b_data; /* XXX */ 1263 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ 1264 dfs->fs_nrpos = -1; /* XXX */ 1265 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 1266 int32_t *lp, tmp; /* XXX */ 1267 /* XXX */ 1268 lp = (int32_t *)&dfs->fs_qbmask; /* XXX */ 1269 tmp = lp[4]; /* XXX */ 1270 for (i = 4; i > 0; i--) /* XXX */ 1271 lp[i] = lp[i-1]; /* XXX */ 1272 lp[0] = tmp; /* XXX */ 1273 } /* XXX */ 1274 dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */ 1275 if (waitfor != MNT_WAIT) 1276 bawrite(bp); 1277 else if ((error = bwrite(bp)) != 0) 1278 allerror = error; 1279 return (allerror); 1280 } 1281