1 /* 2 * Copyright (c) 1989, 1991, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 34 * $FreeBSD$ 35 */ 36 37 #include "opt_quota.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/namei.h> 42 #include <sys/proc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/mount.h> 46 #include <sys/buf.h> 47 #include <sys/conf.h> 48 #include <sys/fcntl.h> 49 #include <sys/disklabel.h> 50 #include <sys/malloc.h> 51 52 #include <ufs/ufs/quota.h> 53 #include <ufs/ufs/ufsmount.h> 54 #include <ufs/ufs/inode.h> 55 #include <ufs/ufs/ufs_extern.h> 56 57 #include <ufs/ffs/fs.h> 58 #include <ufs/ffs/ffs_extern.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_page.h> 62 63 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part"); 64 65 static int ffs_sbupdate __P((struct ufsmount *, int)); 66 static int ffs_reload __P((struct mount *,struct ucred *,struct proc *)); 67 static int ffs_oldfscompat __P((struct fs *)); 68 static int ffs_mount __P((struct mount *, char *, caddr_t, 69 struct nameidata *, struct proc *)); 70 static int ffs_init __P((struct vfsconf *)); 71 72 static struct vfsops ufs_vfsops = { 73 ffs_mount, 74 ufs_start, 75 ffs_unmount, 76 ufs_root, 77 ufs_quotactl, 78 ffs_statfs, 79 ffs_sync, 80 ffs_vget, 81 ffs_fhtovp, 82 ufs_check_export, 83 ffs_vptofh, 84 ffs_init, 85 }; 86 87 VFS_SET(ufs_vfsops, ufs, 0); 88 89 /* 90 * ffs_mount 91 * 92 * Called when mounting local physical media 93 * 94 * PARAMETERS: 95 * mountroot 96 * mp mount point structure 97 * path NULL (flag for root mount!!!) 98 * data <unused> 99 * ndp <unused> 100 * p process (user credentials check [statfs]) 101 * 102 * mount 103 * mp mount point structure 104 * path path to mount point 105 * data pointer to argument struct in user space 106 * ndp mount point namei() return (used for 107 * credentials on reload), reused to look 108 * up block device. 109 * p process (user credentials check) 110 * 111 * RETURNS: 0 Success 112 * !0 error number (errno.h) 113 * 114 * LOCK STATE: 115 * 116 * ENTRY 117 * mount point is locked 118 * EXIT 119 * mount point is locked 120 * 121 * NOTES: 122 * A NULL path can be used for a flag since the mount 123 * system call will fail with EFAULT in copyinstr in 124 * namei() if it is a genuine NULL from the user. 125 */ 126 static int 127 ffs_mount( mp, path, data, ndp, p) 128 struct mount *mp; /* mount struct pointer*/ 129 char *path; /* path to mount point*/ 130 caddr_t data; /* arguments to FS specific mount*/ 131 struct nameidata *ndp; /* mount point credentials*/ 132 struct proc *p; /* process requesting mount*/ 133 { 134 size_t size; 135 int err = 0; 136 struct vnode *devvp; 137 138 struct ufs_args args; 139 struct ufsmount *ump = 0; 140 register struct fs *fs; 141 int error, flags, ronly = 0; 142 mode_t accessmode; 143 144 /* 145 * Use NULL path to flag a root mount 146 */ 147 if( path == NULL) { 148 /* 149 *** 150 * Mounting root file system 151 *** 152 */ 153 154 if ((err = bdevvp(rootdev, &rootvp))) { 155 printf("ffs_mountroot: can't find rootvp"); 156 return (err); 157 } 158 159 if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) { 160 /* fs specific cleanup (if any)*/ 161 goto error_1; 162 } 163 164 goto dostatfs; /* success*/ 165 166 } 167 168 /* 169 *** 170 * Mounting non-root file system or updating a file system 171 *** 172 */ 173 174 /* copy in user arguments*/ 175 err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)); 176 if (err) 177 goto error_1; /* can't get arguments*/ 178 179 /* 180 * If updating, check whether changing from read-only to 181 * read/write; if there is no device name, that's all we do. 182 */ 183 if (mp->mnt_flag & MNT_UPDATE) { 184 ump = VFSTOUFS(mp); 185 fs = ump->um_fs; 186 devvp = ump->um_devvp; 187 err = 0; 188 ronly = fs->fs_ronly; /* MNT_RELOAD might change this */ 189 if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 190 flags = WRITECLOSE; 191 if (mp->mnt_flag & MNT_FORCE) 192 flags |= FORCECLOSE; 193 if (mp->mnt_flag & MNT_SOFTDEP) { 194 err = softdep_flushfiles(mp, flags, p); 195 } else { 196 err = ffs_flushfiles(mp, flags, p); 197 } 198 ronly = 1; 199 } 200 if (!err && (mp->mnt_flag & MNT_RELOAD)) 201 err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p); 202 if (err) { 203 goto error_1; 204 } 205 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 206 /* 207 * If upgrade to read-write by non-root, then verify 208 * that user has necessary permissions on the device. 209 */ 210 if (p->p_ucred->cr_uid != 0) { 211 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 212 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 213 p->p_ucred, p)) != 0) { 214 VOP_UNLOCK(devvp, 0, p); 215 return (error); 216 } 217 VOP_UNLOCK(devvp, 0, p); 218 } 219 220 if (fs->fs_clean == 0) { 221 if (mp->mnt_flag & MNT_FORCE) { 222 printf( 223 "WARNING: %s was not properly dismounted\n", 224 fs->fs_fsmnt); 225 } else { 226 printf( 227 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 228 fs->fs_fsmnt); 229 err = EPERM; 230 goto error_1; 231 } 232 } 233 234 /* check to see if we need to start softdep */ 235 if (fs->fs_flags & FS_DOSOFTDEP) { 236 err = softdep_mount(devvp, mp, fs, p->p_ucred); 237 if (err) 238 goto error_1; 239 } 240 241 ronly = 0; 242 } 243 /* 244 * Soft updates is incompatible with "async", 245 * so if we are doing softupdates stop the user 246 * from setting the async flag in an update. 247 * Softdep_mount() clears it in an initial mount 248 * or ro->rw remount. 249 */ 250 if (mp->mnt_flag & MNT_SOFTDEP) { 251 mp->mnt_flag &= ~MNT_ASYNC; 252 } 253 /* if not updating name...*/ 254 if (args.fspec == 0) { 255 /* 256 * Process export requests. Jumping to "success" 257 * will return the vfs_export() error code. 258 */ 259 err = vfs_export(mp, &ump->um_export, &args.export); 260 goto success; 261 } 262 } 263 264 /* 265 * Not an update, or updating the name: look up the name 266 * and verify that it refers to a sensible block device. 267 */ 268 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); 269 err = namei(ndp); 270 if (err) { 271 /* can't get devvp!*/ 272 goto error_1; 273 } 274 275 devvp = ndp->ni_vp; 276 277 if (!vn_isdisk(devvp)) { 278 err = ENOTBLK; 279 goto error_2; 280 } 281 282 /* 283 * If mount by non-root, then verify that user has necessary 284 * permissions on the device. 285 */ 286 if (p->p_ucred->cr_uid != 0) { 287 accessmode = VREAD; 288 if ((mp->mnt_flag & MNT_RDONLY) == 0) 289 accessmode |= VWRITE; 290 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 291 if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) { 292 vput(devvp); 293 return (error); 294 } 295 VOP_UNLOCK(devvp, 0, p); 296 } 297 298 if (mp->mnt_flag & MNT_UPDATE) { 299 /* 300 ******************** 301 * UPDATE 302 * If it's not the same vnode, or at least the same device 303 * then it's not correct. 304 ******************** 305 */ 306 307 if (devvp != ump->um_devvp) { 308 if ( devvp->v_rdev == ump->um_devvp->v_rdev) { 309 vrele(devvp); 310 } else { 311 err = EINVAL; /* needs translation */ 312 } 313 } else 314 vrele(devvp); 315 /* 316 * Update device name only on success 317 */ 318 if( !err) { 319 /* Save "mounted from" info for mount point (NULL pad)*/ 320 copyinstr( args.fspec, 321 mp->mnt_stat.f_mntfromname, 322 MNAMELEN - 1, 323 &size); 324 bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 325 } 326 } else { 327 /* 328 ******************** 329 * NEW MOUNT 330 ******************** 331 */ 332 333 /* 334 * Since this is a new mount, we want the names for 335 * the device and the mount point copied in. If an 336 * error occurs, the mountpoint is discarded by the 337 * upper level code. 338 */ 339 /* Save "last mounted on" info for mount point (NULL pad)*/ 340 copyinstr( path, /* mount point*/ 341 mp->mnt_stat.f_mntonname, /* save area*/ 342 MNAMELEN - 1, /* max size*/ 343 &size); /* real size*/ 344 bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size); 345 346 /* Save "mounted from" info for mount point (NULL pad)*/ 347 copyinstr( args.fspec, /* device name*/ 348 mp->mnt_stat.f_mntfromname, /* save area*/ 349 MNAMELEN - 1, /* max size*/ 350 &size); /* real size*/ 351 bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 352 353 err = ffs_mountfs(devvp, mp, p, M_FFSNODE); 354 } 355 if (err) { 356 goto error_2; 357 } 358 359 dostatfs: 360 /* 361 * Initialize FS stat information in mount struct; uses both 362 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname 363 * 364 * This code is common to root and non-root mounts 365 */ 366 (void)VFS_STATFS(mp, &mp->mnt_stat, p); 367 368 goto success; 369 370 371 error_2: /* error with devvp held*/ 372 373 /* release devvp before failing*/ 374 vrele(devvp); 375 376 error_1: /* no state to back out*/ 377 378 success: 379 if (!err && path && (mp->mnt_flag & MNT_UPDATE)) { 380 /* Update clean flag after changing read-onlyness. */ 381 fs = ump->um_fs; 382 if (ronly != fs->fs_ronly) { 383 fs->fs_ronly = ronly; 384 fs->fs_clean = ronly && 385 (fs->fs_flags & FS_UNCLEAN) == 0 ? 1 : 0; 386 ffs_sbupdate(ump, MNT_WAIT); 387 } 388 } 389 return (err); 390 } 391 392 /* 393 * Reload all incore data for a filesystem (used after running fsck on 394 * the root filesystem and finding things to fix). The filesystem must 395 * be mounted read-only. 396 * 397 * Things to do to update the mount: 398 * 1) invalidate all cached meta-data. 399 * 2) re-read superblock from disk. 400 * 3) re-read summary information from disk. 401 * 4) invalidate all inactive vnodes. 402 * 5) invalidate all cached file data. 403 * 6) re-read inode data for all active vnodes. 404 */ 405 static int 406 ffs_reload(mp, cred, p) 407 register struct mount *mp; 408 struct ucred *cred; 409 struct proc *p; 410 { 411 register struct vnode *vp, *nvp, *devvp; 412 struct inode *ip; 413 struct csum *space; 414 struct buf *bp; 415 struct fs *fs, *newfs; 416 struct partinfo dpart; 417 dev_t dev; 418 int i, blks, size, error; 419 int32_t *lp; 420 421 if ((mp->mnt_flag & MNT_RDONLY) == 0) 422 return (EINVAL); 423 /* 424 * Step 1: invalidate all cached meta-data. 425 */ 426 devvp = VFSTOUFS(mp)->um_devvp; 427 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 428 error = vinvalbuf(devvp, 0, cred, p, 0, 0); 429 VOP_UNLOCK(devvp, 0, p); 430 if (error) 431 panic("ffs_reload: dirty1"); 432 433 dev = devvp->v_rdev; 434 435 /* 436 * Only VMIO the backing device if the backing device is a real 437 * block device. See ffs_mountmfs() for more details. 438 */ 439 if (devvp->v_tag != VT_MFS && devvp->v_type == VBLK) { 440 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 441 vfs_object_create(devvp, p, p->p_ucred); 442 simple_lock(&devvp->v_interlock); 443 VOP_UNLOCK(devvp, LK_INTERLOCK, p); 444 } 445 446 /* 447 * Step 2: re-read superblock from disk. 448 */ 449 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) 450 size = DEV_BSIZE; 451 else 452 size = dpart.disklab->d_secsize; 453 if ((error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) != 0) 454 return (error); 455 newfs = (struct fs *)bp->b_data; 456 if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE || 457 newfs->fs_bsize < sizeof(struct fs)) { 458 brelse(bp); 459 return (EIO); /* XXX needs translation */ 460 } 461 fs = VFSTOUFS(mp)->um_fs; 462 /* 463 * Copy pointer fields back into superblock before copying in XXX 464 * new superblock. These should really be in the ufsmount. XXX 465 * Note that important parameters (eg fs_ncg) are unchanged. 466 */ 467 bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp)); 468 newfs->fs_maxcluster = fs->fs_maxcluster; 469 bcopy(newfs, fs, (u_int)fs->fs_sbsize); 470 if (fs->fs_sbsize < SBSIZE) 471 bp->b_flags |= B_INVAL; 472 brelse(bp); 473 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; 474 ffs_oldfscompat(fs); 475 476 /* 477 * Step 3: re-read summary information from disk. 478 */ 479 blks = howmany(fs->fs_cssize, fs->fs_fsize); 480 space = fs->fs_csp[0]; 481 for (i = 0; i < blks; i += fs->fs_frag) { 482 size = fs->fs_bsize; 483 if (i + fs->fs_frag > blks) 484 size = (blks - i) * fs->fs_fsize; 485 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 486 NOCRED, &bp); 487 if (error) 488 return (error); 489 bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size); 490 brelse(bp); 491 } 492 /* 493 * We no longer know anything about clusters per cylinder group. 494 */ 495 if (fs->fs_contigsumsize > 0) { 496 lp = fs->fs_maxcluster; 497 for (i = 0; i < fs->fs_ncg; i++) 498 *lp++ = fs->fs_contigsumsize; 499 } 500 501 loop: 502 simple_lock(&mntvnode_slock); 503 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 504 if (vp->v_mount != mp) { 505 simple_unlock(&mntvnode_slock); 506 goto loop; 507 } 508 nvp = vp->v_mntvnodes.le_next; 509 /* 510 * Step 4: invalidate all inactive vnodes. 511 */ 512 if (vrecycle(vp, &mntvnode_slock, p)) 513 goto loop; 514 /* 515 * Step 5: invalidate all cached file data. 516 */ 517 simple_lock(&vp->v_interlock); 518 simple_unlock(&mntvnode_slock); 519 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 520 goto loop; 521 } 522 if (vinvalbuf(vp, 0, cred, p, 0, 0)) 523 panic("ffs_reload: dirty2"); 524 /* 525 * Step 6: re-read inode data for all active vnodes. 526 */ 527 ip = VTOI(vp); 528 error = 529 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 530 (int)fs->fs_bsize, NOCRED, &bp); 531 if (error) { 532 vput(vp); 533 return (error); 534 } 535 ip->i_din = *((struct dinode *)bp->b_data + 536 ino_to_fsbo(fs, ip->i_number)); 537 ip->i_effnlink = ip->i_nlink; 538 brelse(bp); 539 vput(vp); 540 simple_lock(&mntvnode_slock); 541 } 542 simple_unlock(&mntvnode_slock); 543 return (0); 544 } 545 546 /* 547 * Common code for mount and mountroot 548 */ 549 int 550 ffs_mountfs(devvp, mp, p, malloctype) 551 register struct vnode *devvp; 552 struct mount *mp; 553 struct proc *p; 554 struct malloc_type *malloctype; 555 { 556 register struct ufsmount *ump; 557 struct buf *bp; 558 register struct fs *fs; 559 dev_t dev; 560 struct partinfo dpart; 561 caddr_t base, space; 562 int error, i, blks, size, ronly; 563 int32_t *lp; 564 struct ucred *cred; 565 u_int64_t maxfilesize; /* XXX */ 566 size_t strsize; 567 int ncount; 568 569 dev = devvp->v_rdev; 570 cred = p ? p->p_ucred : NOCRED; 571 /* 572 * Disallow multiple mounts of the same device. 573 * Disallow mounting of a device that is currently in use 574 * (except for root, which might share swap device for miniroot). 575 * Flush out any old buffers remaining from a previous use. 576 */ 577 error = vfs_mountedon(devvp); 578 if (error) 579 return (error); 580 ncount = vcount(devvp); 581 582 if (ncount > 1 && devvp != rootvp) 583 return (EBUSY); 584 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 585 error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0); 586 VOP_UNLOCK(devvp, 0, p); 587 if (error) 588 return (error); 589 590 /* 591 * Only VMIO the backing device if the backing device is a real 592 * block device. This excludes the original MFS implementation. 593 * Note that it is optional that the backing device be VMIOed. This 594 * increases the opportunity for metadata caching. 595 */ 596 if (devvp->v_tag != VT_MFS && devvp->v_type == VBLK) { 597 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 598 vfs_object_create(devvp, p, p->p_ucred); 599 simple_lock(&devvp->v_interlock); 600 VOP_UNLOCK(devvp, LK_INTERLOCK, p); 601 } 602 603 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 604 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); 605 if (error) 606 return (error); 607 if (devvp->v_rdev->si_iosize_max > mp->mnt_iosize_max) 608 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 609 if (mp->mnt_iosize_max > MAXPHYS) 610 mp->mnt_iosize_max = MAXPHYS; 611 612 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0) 613 size = DEV_BSIZE; 614 else 615 size = dpart.disklab->d_secsize; 616 617 bp = NULL; 618 ump = NULL; 619 if ((error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) != 0) 620 goto out; 621 fs = (struct fs *)bp->b_data; 622 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || 623 fs->fs_bsize < sizeof(struct fs)) { 624 error = EINVAL; /* XXX needs translation */ 625 goto out; 626 } 627 fs->fs_fmod = 0; 628 fs->fs_flags &= ~FS_UNCLEAN; 629 if (fs->fs_clean == 0) { 630 fs->fs_flags |= FS_UNCLEAN; 631 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 632 printf( 633 "WARNING: %s was not properly dismounted\n", 634 fs->fs_fsmnt); 635 } else { 636 printf( 637 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 638 fs->fs_fsmnt); 639 error = EPERM; 640 goto out; 641 } 642 } 643 /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */ 644 if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) { 645 error = EROFS; /* needs translation */ 646 goto out; 647 } 648 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK); 649 bzero((caddr_t)ump, sizeof *ump); 650 ump->um_malloctype = malloctype; 651 ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, 652 M_WAITOK); 653 ump->um_blkatoff = ffs_blkatoff; 654 ump->um_truncate = ffs_truncate; 655 ump->um_update = ffs_update; 656 ump->um_valloc = ffs_valloc; 657 ump->um_vfree = ffs_vfree; 658 bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); 659 if (fs->fs_sbsize < SBSIZE) 660 bp->b_flags |= B_INVAL; 661 brelse(bp); 662 bp = NULL; 663 fs = ump->um_fs; 664 fs->fs_ronly = ronly; 665 if (ronly == 0) { 666 fs->fs_fmod = 1; 667 fs->fs_clean = 0; 668 } 669 size = fs->fs_cssize; 670 blks = howmany(size, fs->fs_fsize); 671 if (fs->fs_contigsumsize > 0) 672 size += fs->fs_ncg * sizeof(int32_t); 673 base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 674 for (i = 0; i < blks; i += fs->fs_frag) { 675 size = fs->fs_bsize; 676 if (i + fs->fs_frag > blks) 677 size = (blks - i) * fs->fs_fsize; 678 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 679 cred, &bp)) != 0) { 680 free(base, M_UFSMNT); 681 goto out; 682 } 683 bcopy(bp->b_data, space, (u_int)size); 684 fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space; 685 space += size; 686 brelse(bp); 687 bp = NULL; 688 } 689 if (fs->fs_contigsumsize > 0) { 690 fs->fs_maxcluster = lp = (int32_t *)space; 691 for (i = 0; i < fs->fs_ncg; i++) 692 *lp++ = fs->fs_contigsumsize; 693 } 694 mp->mnt_data = (qaddr_t)ump; 695 mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0]; 696 mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; 697 if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || 698 vfs_getvfs(&mp->mnt_stat.f_fsid)) 699 vfs_getnewfsid(mp); 700 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; 701 mp->mnt_flag |= MNT_LOCAL; 702 ump->um_mountp = mp; 703 ump->um_dev = dev; 704 ump->um_devvp = devvp; 705 ump->um_nindir = fs->fs_nindir; 706 ump->um_bptrtodb = fs->fs_fsbtodb; 707 ump->um_seqinc = fs->fs_frag; 708 for (i = 0; i < MAXQUOTAS; i++) 709 ump->um_quotas[i] = NULLVP; 710 devvp->v_specmountpoint = mp; 711 ffs_oldfscompat(fs); 712 713 /* 714 * Set FS local "last mounted on" information (NULL pad) 715 */ 716 copystr( mp->mnt_stat.f_mntonname, /* mount point*/ 717 fs->fs_fsmnt, /* copy area*/ 718 sizeof(fs->fs_fsmnt) - 1, /* max size*/ 719 &strsize); /* real size*/ 720 bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize); 721 722 if( mp->mnt_flag & MNT_ROOTFS) { 723 /* 724 * Root mount; update timestamp in mount structure. 725 * this will be used by the common root mount code 726 * to update the system clock. 727 */ 728 mp->mnt_time = fs->fs_time; 729 } 730 731 ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */ 732 maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */ 733 if (fs->fs_maxfilesize > maxfilesize) /* XXX */ 734 fs->fs_maxfilesize = maxfilesize; /* XXX */ 735 if (ronly == 0) { 736 if ((fs->fs_flags & FS_DOSOFTDEP) && 737 (error = softdep_mount(devvp, mp, fs, cred)) != 0) { 738 free(base, M_UFSMNT); 739 goto out; 740 } 741 fs->fs_clean = 0; 742 (void) ffs_sbupdate(ump, MNT_WAIT); 743 } 744 return (0); 745 out: 746 devvp->v_specmountpoint = NULL; 747 if (bp) 748 brelse(bp); 749 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); 750 if (ump) { 751 free(ump->um_fs, M_UFSMNT); 752 free(ump, M_UFSMNT); 753 mp->mnt_data = (qaddr_t)0; 754 } 755 return (error); 756 } 757 758 /* 759 * Sanity checks for old file systems. 760 * 761 * XXX - goes away some day. 762 */ 763 static int 764 ffs_oldfscompat(fs) 765 struct fs *fs; 766 { 767 768 fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */ 769 fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */ 770 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ 771 fs->fs_nrpos = 8; /* XXX */ 772 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 773 #if 0 774 int i; /* XXX */ 775 u_int64_t sizepb = fs->fs_bsize; /* XXX */ 776 /* XXX */ 777 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */ 778 for (i = 0; i < NIADDR; i++) { /* XXX */ 779 sizepb *= NINDIR(fs); /* XXX */ 780 fs->fs_maxfilesize += sizepb; /* XXX */ 781 } /* XXX */ 782 #endif 783 fs->fs_maxfilesize = (u_quad_t) 1LL << 39; 784 fs->fs_qbmask = ~fs->fs_bmask; /* XXX */ 785 fs->fs_qfmask = ~fs->fs_fmask; /* XXX */ 786 } /* XXX */ 787 return (0); 788 } 789 790 /* 791 * unmount system call 792 */ 793 int 794 ffs_unmount(mp, mntflags, p) 795 struct mount *mp; 796 int mntflags; 797 struct proc *p; 798 { 799 register struct ufsmount *ump; 800 register struct fs *fs; 801 int error, flags; 802 803 flags = 0; 804 if (mntflags & MNT_FORCE) { 805 flags |= FORCECLOSE; 806 } 807 if (mp->mnt_flag & MNT_SOFTDEP) { 808 if ((error = softdep_flushfiles(mp, flags, p)) != 0) 809 return (error); 810 } else { 811 if ((error = ffs_flushfiles(mp, flags, p)) != 0) 812 return (error); 813 } 814 ump = VFSTOUFS(mp); 815 fs = ump->um_fs; 816 if (fs->fs_ronly == 0) { 817 fs->fs_clean = fs->fs_flags & FS_UNCLEAN ? 0 : 1; 818 error = ffs_sbupdate(ump, MNT_WAIT); 819 if (error) { 820 fs->fs_clean = 0; 821 return (error); 822 } 823 } 824 ump->um_devvp->v_specmountpoint = NULL; 825 826 vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0); 827 error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, 828 NOCRED, p); 829 830 vrele(ump->um_devvp); 831 832 free(fs->fs_csp[0], M_UFSMNT); 833 free(fs, M_UFSMNT); 834 free(ump, M_UFSMNT); 835 mp->mnt_data = (qaddr_t)0; 836 mp->mnt_flag &= ~MNT_LOCAL; 837 return (error); 838 } 839 840 /* 841 * Flush out all the files in a filesystem. 842 */ 843 int 844 ffs_flushfiles(mp, flags, p) 845 register struct mount *mp; 846 int flags; 847 struct proc *p; 848 { 849 register struct ufsmount *ump; 850 int error; 851 852 ump = VFSTOUFS(mp); 853 #ifdef QUOTA 854 if (mp->mnt_flag & MNT_QUOTA) { 855 int i; 856 error = vflush(mp, NULLVP, SKIPSYSTEM|flags); 857 if (error) 858 return (error); 859 for (i = 0; i < MAXQUOTAS; i++) { 860 if (ump->um_quotas[i] == NULLVP) 861 continue; 862 quotaoff(p, mp, i); 863 } 864 /* 865 * Here we fall through to vflush again to ensure 866 * that we have gotten rid of all the system vnodes. 867 */ 868 } 869 #endif 870 /* 871 * Flush all the files. 872 */ 873 if ((error = vflush(mp, NULL, flags)) != 0) 874 return (error); 875 /* 876 * Flush filesystem metadata. 877 */ 878 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); 879 error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p); 880 VOP_UNLOCK(ump->um_devvp, 0, p); 881 return (error); 882 } 883 884 /* 885 * Get file system statistics. 886 */ 887 int 888 ffs_statfs(mp, sbp, p) 889 struct mount *mp; 890 register struct statfs *sbp; 891 struct proc *p; 892 { 893 register struct ufsmount *ump; 894 register struct fs *fs; 895 896 ump = VFSTOUFS(mp); 897 fs = ump->um_fs; 898 if (fs->fs_magic != FS_MAGIC) 899 panic("ffs_statfs"); 900 sbp->f_bsize = fs->fs_fsize; 901 sbp->f_iosize = fs->fs_bsize; 902 sbp->f_blocks = fs->fs_dsize; 903 sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + 904 fs->fs_cstotal.cs_nffree; 905 sbp->f_bavail = freespace(fs, fs->fs_minfree); 906 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; 907 sbp->f_ffree = fs->fs_cstotal.cs_nifree; 908 if (sbp != &mp->mnt_stat) { 909 sbp->f_type = mp->mnt_vfc->vfc_typenum; 910 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 911 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 912 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 913 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 914 } 915 return (0); 916 } 917 918 /* 919 * Go through the disk queues to initiate sandbagged IO; 920 * go through the inodes to write those that have been modified; 921 * initiate the writing of the super block if it has been modified. 922 * 923 * Note: we are always called with the filesystem marked `MPBUSY'. 924 */ 925 int 926 ffs_sync(mp, waitfor, cred, p) 927 struct mount *mp; 928 int waitfor; 929 struct ucred *cred; 930 struct proc *p; 931 { 932 struct vnode *nvp, *vp; 933 struct inode *ip; 934 struct ufsmount *ump = VFSTOUFS(mp); 935 struct fs *fs; 936 int error, allerror = 0; 937 938 fs = ump->um_fs; 939 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ 940 printf("fs = %s\n", fs->fs_fsmnt); 941 panic("ffs_sync: rofs mod"); 942 } 943 /* 944 * Write back each (modified) inode. 945 */ 946 simple_lock(&mntvnode_slock); 947 loop: 948 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 949 /* 950 * If the vnode that we are about to sync is no longer 951 * associated with this mount point, start over. 952 */ 953 if (vp->v_mount != mp) 954 goto loop; 955 simple_lock(&vp->v_interlock); 956 nvp = vp->v_mntvnodes.le_next; 957 ip = VTOI(vp); 958 if ((vp->v_type == VNON) || (((ip->i_flag & 959 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && 960 (TAILQ_EMPTY(&vp->v_dirtyblkhd) || (waitfor == MNT_LAZY)))) { 961 simple_unlock(&vp->v_interlock); 962 continue; 963 } 964 if (vp->v_type != VCHR) { 965 simple_unlock(&mntvnode_slock); 966 error = 967 vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); 968 if (error) { 969 simple_lock(&mntvnode_slock); 970 if (error == ENOENT) 971 goto loop; 972 continue; 973 } 974 if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) 975 allerror = error; 976 VOP_UNLOCK(vp, 0, p); 977 vrele(vp); 978 simple_lock(&mntvnode_slock); 979 } else { 980 simple_unlock(&mntvnode_slock); 981 simple_unlock(&vp->v_interlock); 982 /* UFS_UPDATE(vp, waitfor == MNT_WAIT); */ 983 UFS_UPDATE(vp, 0); 984 simple_lock(&mntvnode_slock); 985 } 986 } 987 simple_unlock(&mntvnode_slock); 988 /* 989 * Force stale file system control information to be flushed. 990 */ 991 if (waitfor != MNT_LAZY) { 992 if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) 993 waitfor = MNT_NOWAIT; 994 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); 995 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) 996 allerror = error; 997 VOP_UNLOCK(ump->um_devvp, 0, p); 998 } 999 #ifdef QUOTA 1000 qsync(mp); 1001 #endif 1002 /* 1003 * Write back modified superblock. 1004 */ 1005 if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) 1006 allerror = error; 1007 return (allerror); 1008 } 1009 1010 /* 1011 * Look up a FFS dinode number to find its incore vnode, otherwise read it 1012 * in from disk. If it is in core, wait for the lock bit to clear, then 1013 * return the inode locked. Detection and handling of mount points must be 1014 * done by the calling routine. 1015 */ 1016 static int ffs_inode_hash_lock; 1017 1018 int 1019 ffs_vget(mp, ino, vpp) 1020 struct mount *mp; 1021 ino_t ino; 1022 struct vnode **vpp; 1023 { 1024 struct fs *fs; 1025 struct inode *ip; 1026 struct ufsmount *ump; 1027 struct buf *bp; 1028 struct vnode *vp; 1029 dev_t dev; 1030 int error; 1031 1032 ump = VFSTOUFS(mp); 1033 dev = ump->um_dev; 1034 restart: 1035 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) { 1036 return (0); 1037 } 1038 1039 /* 1040 * Lock out the creation of new entries in the FFS hash table in 1041 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 1042 * may occur! 1043 */ 1044 if (ffs_inode_hash_lock) { 1045 while (ffs_inode_hash_lock) { 1046 ffs_inode_hash_lock = -1; 1047 tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0); 1048 } 1049 goto restart; 1050 } 1051 ffs_inode_hash_lock = 1; 1052 1053 /* 1054 * If this MALLOC() is performed after the getnewvnode() 1055 * it might block, leaving a vnode with a NULL v_data to be 1056 * found by ffs_sync() if a sync happens to fire right then, 1057 * which will cause a panic because ffs_sync() blindly 1058 * dereferences vp->v_data (as well it should). 1059 */ 1060 MALLOC(ip, struct inode *, sizeof(struct inode), 1061 ump->um_malloctype, M_WAITOK); 1062 1063 /* Allocate a new vnode/inode. */ 1064 error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp); 1065 if (error) { 1066 if (ffs_inode_hash_lock < 0) 1067 wakeup(&ffs_inode_hash_lock); 1068 ffs_inode_hash_lock = 0; 1069 *vpp = NULL; 1070 FREE(ip, ump->um_malloctype); 1071 return (error); 1072 } 1073 bzero((caddr_t)ip, sizeof(struct inode)); 1074 lockinit(&ip->i_lock, PINOD, "inode", 0, 0); 1075 vp->v_data = ip; 1076 ip->i_vnode = vp; 1077 ip->i_fs = fs = ump->um_fs; 1078 ip->i_dev = dev; 1079 ip->i_number = ino; 1080 #ifdef QUOTA 1081 { 1082 int i; 1083 for (i = 0; i < MAXQUOTAS; i++) 1084 ip->i_dquot[i] = NODQUOT; 1085 } 1086 #endif 1087 /* 1088 * Put it onto its hash chain and lock it so that other requests for 1089 * this inode will block if they arrive while we are sleeping waiting 1090 * for old data structures to be purged or for the contents of the 1091 * disk portion of this inode to be read. 1092 */ 1093 ufs_ihashins(ip); 1094 1095 if (ffs_inode_hash_lock < 0) 1096 wakeup(&ffs_inode_hash_lock); 1097 ffs_inode_hash_lock = 0; 1098 1099 /* Read in the disk contents for the inode, copy into the inode. */ 1100 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1101 (int)fs->fs_bsize, NOCRED, &bp); 1102 if (error) { 1103 /* 1104 * The inode does not contain anything useful, so it would 1105 * be misleading to leave it on its hash chain. With mode 1106 * still zero, it will be unlinked and returned to the free 1107 * list by vput(). 1108 */ 1109 brelse(bp); 1110 vput(vp); 1111 *vpp = NULL; 1112 return (error); 1113 } 1114 ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); 1115 if (DOINGSOFTDEP(vp)) 1116 softdep_load_inodeblock(ip); 1117 else 1118 ip->i_effnlink = ip->i_nlink; 1119 bqrelse(bp); 1120 1121 /* 1122 * Initialize the vnode from the inode, check for aliases. 1123 * Note that the underlying vnode may have changed. 1124 */ 1125 error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 1126 if (error) { 1127 vput(vp); 1128 *vpp = NULL; 1129 return (error); 1130 } 1131 /* 1132 * Finish inode initialization now that aliasing has been resolved. 1133 */ 1134 ip->i_devvp = ump->um_devvp; 1135 VREF(ip->i_devvp); 1136 /* 1137 * Set up a generation number for this inode if it does not 1138 * already have one. This should only happen on old filesystems. 1139 */ 1140 if (ip->i_gen == 0) { 1141 ip->i_gen = random() / 2 + 1; 1142 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1143 ip->i_flag |= IN_MODIFIED; 1144 } 1145 /* 1146 * Ensure that uid and gid are correct. This is a temporary 1147 * fix until fsck has been changed to do the update. 1148 */ 1149 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 1150 ip->i_uid = ip->i_din.di_ouid; /* XXX */ 1151 ip->i_gid = ip->i_din.di_ogid; /* XXX */ 1152 } /* XXX */ 1153 1154 *vpp = vp; 1155 return (0); 1156 } 1157 1158 /* 1159 * File handle to vnode 1160 * 1161 * Have to be really careful about stale file handles: 1162 * - check that the inode number is valid 1163 * - call ffs_vget() to get the locked inode 1164 * - check for an unallocated inode (i_mode == 0) 1165 * - check that the given client host has export rights and return 1166 * those rights via. exflagsp and credanonp 1167 */ 1168 int 1169 ffs_fhtovp(mp, fhp, vpp) 1170 register struct mount *mp; 1171 struct fid *fhp; 1172 struct vnode **vpp; 1173 { 1174 register struct ufid *ufhp; 1175 struct fs *fs; 1176 1177 ufhp = (struct ufid *)fhp; 1178 fs = VFSTOUFS(mp)->um_fs; 1179 if (ufhp->ufid_ino < ROOTINO || 1180 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) 1181 return (ESTALE); 1182 return (ufs_fhtovp(mp, ufhp, vpp)); 1183 } 1184 1185 /* 1186 * Vnode pointer to File handle 1187 */ 1188 /* ARGSUSED */ 1189 int 1190 ffs_vptofh(vp, fhp) 1191 struct vnode *vp; 1192 struct fid *fhp; 1193 { 1194 register struct inode *ip; 1195 register struct ufid *ufhp; 1196 1197 ip = VTOI(vp); 1198 ufhp = (struct ufid *)fhp; 1199 ufhp->ufid_len = sizeof(struct ufid); 1200 ufhp->ufid_ino = ip->i_number; 1201 ufhp->ufid_gen = ip->i_gen; 1202 return (0); 1203 } 1204 1205 /* 1206 * Initialize the filesystem; just use ufs_init. 1207 */ 1208 static int 1209 ffs_init(vfsp) 1210 struct vfsconf *vfsp; 1211 { 1212 1213 softdep_initialize(); 1214 return (ufs_init(vfsp)); 1215 } 1216 1217 /* 1218 * Write a superblock and associated information back to disk. 1219 */ 1220 static int 1221 ffs_sbupdate(mp, waitfor) 1222 struct ufsmount *mp; 1223 int waitfor; 1224 { 1225 register struct fs *dfs, *fs = mp->um_fs; 1226 register struct buf *bp; 1227 int blks; 1228 caddr_t space; 1229 int i, size, error, allerror = 0; 1230 1231 /* 1232 * First write back the summary information. 1233 */ 1234 blks = howmany(fs->fs_cssize, fs->fs_fsize); 1235 space = (caddr_t)fs->fs_csp[0]; 1236 for (i = 0; i < blks; i += fs->fs_frag) { 1237 size = fs->fs_bsize; 1238 if (i + fs->fs_frag > blks) 1239 size = (blks - i) * fs->fs_fsize; 1240 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), 1241 size, 0, 0); 1242 bcopy(space, bp->b_data, (u_int)size); 1243 space += size; 1244 if (waitfor != MNT_WAIT) 1245 bawrite(bp); 1246 else if ((error = bwrite(bp)) != 0) 1247 allerror = error; 1248 } 1249 /* 1250 * Now write back the superblock itself. If any errors occurred 1251 * up to this point, then fail so that the superblock avoids 1252 * being written out as clean. 1253 */ 1254 if (allerror) 1255 return (allerror); 1256 bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0); 1257 fs->fs_fmod = 0; 1258 fs->fs_time = time_second; 1259 bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); 1260 /* Restore compatibility to old file systems. XXX */ 1261 dfs = (struct fs *)bp->b_data; /* XXX */ 1262 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ 1263 dfs->fs_nrpos = -1; /* XXX */ 1264 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 1265 int32_t *lp, tmp; /* XXX */ 1266 /* XXX */ 1267 lp = (int32_t *)&dfs->fs_qbmask; /* XXX */ 1268 tmp = lp[4]; /* XXX */ 1269 for (i = 4; i > 0; i--) /* XXX */ 1270 lp[i] = lp[i-1]; /* XXX */ 1271 lp[0] = tmp; /* XXX */ 1272 } /* XXX */ 1273 dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */ 1274 if (waitfor != MNT_WAIT) 1275 bawrite(bp); 1276 else if ((error = bwrite(bp)) != 0) 1277 allerror = error; 1278 return (allerror); 1279 } 1280