1 /* 2 * Copyright (c) 1989, 1991, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 34 * $FreeBSD$ 35 */ 36 37 #include "opt_quota.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/namei.h> 42 #include <sys/proc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/mount.h> 46 #include <sys/buf.h> 47 #include <sys/conf.h> 48 #include <sys/fcntl.h> 49 #include <sys/disklabel.h> 50 #include <sys/malloc.h> 51 52 #include <ufs/ufs/quota.h> 53 #include <ufs/ufs/ufsmount.h> 54 #include <ufs/ufs/inode.h> 55 #include <ufs/ufs/ufs_extern.h> 56 57 #include <ufs/ffs/fs.h> 58 #include <ufs/ffs/ffs_extern.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_page.h> 62 63 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part"); 64 65 static int ffs_sbupdate __P((struct ufsmount *, int)); 66 static int ffs_reload __P((struct mount *,struct ucred *,struct proc *)); 67 static int ffs_oldfscompat __P((struct fs *)); 68 static int ffs_mount __P((struct mount *, char *, caddr_t, 69 struct nameidata *, struct proc *)); 70 static int ffs_init __P((struct vfsconf *)); 71 72 static struct vfsops ufs_vfsops = { 73 ffs_mount, 74 ufs_start, 75 ffs_unmount, 76 ufs_root, 77 ufs_quotactl, 78 ffs_statfs, 79 ffs_sync, 80 ffs_vget, 81 ffs_fhtovp, 82 ufs_check_export, 83 ffs_vptofh, 84 ffs_init, 85 }; 86 87 VFS_SET(ufs_vfsops, ufs, 0); 88 89 /* 90 * ffs_mount 91 * 92 * Called when mounting local physical media 93 * 94 * PARAMETERS: 95 * mountroot 96 * mp mount point structure 97 * path NULL (flag for root mount!!!) 98 * data <unused> 99 * ndp <unused> 100 * p process (user credentials check [statfs]) 101 * 102 * mount 103 * mp mount point structure 104 * path path to mount point 105 * data pointer to argument struct in user space 106 * ndp mount point namei() return (used for 107 * credentials on reload), reused to look 108 * up block device. 109 * p process (user credentials check) 110 * 111 * RETURNS: 0 Success 112 * !0 error number (errno.h) 113 * 114 * LOCK STATE: 115 * 116 * ENTRY 117 * mount point is locked 118 * EXIT 119 * mount point is locked 120 * 121 * NOTES: 122 * A NULL path can be used for a flag since the mount 123 * system call will fail with EFAULT in copyinstr in 124 * namei() if it is a genuine NULL from the user. 125 */ 126 static int 127 ffs_mount( mp, path, data, ndp, p) 128 struct mount *mp; /* mount struct pointer*/ 129 char *path; /* path to mount point*/ 130 caddr_t data; /* arguments to FS specific mount*/ 131 struct nameidata *ndp; /* mount point credentials*/ 132 struct proc *p; /* process requesting mount*/ 133 { 134 size_t size; 135 int err = 0; 136 struct vnode *devvp; 137 138 struct ufs_args args; 139 struct ufsmount *ump = 0; 140 register struct fs *fs; 141 int error, flags, ronly = 0; 142 mode_t accessmode; 143 144 /* 145 * Use NULL path to flag a root mount 146 */ 147 if( path == NULL) { 148 /* 149 *** 150 * Mounting root file system 151 *** 152 */ 153 154 if ((err = bdevvp(rootdev, &rootvp))) { 155 printf("ffs_mountroot: can't find rootvp\n"); 156 return (err); 157 } 158 159 if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) { 160 /* fs specific cleanup (if any)*/ 161 goto error_1; 162 } 163 164 goto dostatfs; /* success*/ 165 166 } 167 168 /* 169 *** 170 * Mounting non-root file system or updating a file system 171 *** 172 */ 173 174 /* copy in user arguments*/ 175 err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)); 176 if (err) 177 goto error_1; /* can't get arguments*/ 178 179 /* 180 * If updating, check whether changing from read-only to 181 * read/write; if there is no device name, that's all we do. 182 */ 183 if (mp->mnt_flag & MNT_UPDATE) { 184 ump = VFSTOUFS(mp); 185 fs = ump->um_fs; 186 devvp = ump->um_devvp; 187 err = 0; 188 ronly = fs->fs_ronly; /* MNT_RELOAD might change this */ 189 if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 190 flags = WRITECLOSE; 191 if (mp->mnt_flag & MNT_FORCE) 192 flags |= FORCECLOSE; 193 if (mp->mnt_flag & MNT_SOFTDEP) { 194 err = softdep_flushfiles(mp, flags, p); 195 } else { 196 err = ffs_flushfiles(mp, flags, p); 197 } 198 ronly = 1; 199 } 200 if (!err && (mp->mnt_flag & MNT_RELOAD)) 201 err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p); 202 if (err) { 203 goto error_1; 204 } 205 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 206 /* 207 * If upgrade to read-write by non-root, then verify 208 * that user has necessary permissions on the device. 209 */ 210 if (p->p_ucred->cr_uid != 0) { 211 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 212 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 213 p->p_ucred, p)) != 0) { 214 VOP_UNLOCK(devvp, 0, p); 215 return (error); 216 } 217 VOP_UNLOCK(devvp, 0, p); 218 } 219 220 if (fs->fs_clean == 0) { 221 if (mp->mnt_flag & MNT_FORCE) { 222 printf( 223 "WARNING: %s was not properly dismounted\n", 224 fs->fs_fsmnt); 225 } else { 226 printf( 227 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 228 fs->fs_fsmnt); 229 err = EPERM; 230 goto error_1; 231 } 232 } 233 234 /* check to see if we need to start softdep */ 235 if (fs->fs_flags & FS_DOSOFTDEP) { 236 err = softdep_mount(devvp, mp, fs, p->p_ucred); 237 if (err) 238 goto error_1; 239 } 240 241 ronly = 0; 242 } 243 /* 244 * Soft updates is incompatible with "async", 245 * so if we are doing softupdates stop the user 246 * from setting the async flag in an update. 247 * Softdep_mount() clears it in an initial mount 248 * or ro->rw remount. 249 */ 250 if (mp->mnt_flag & MNT_SOFTDEP) { 251 mp->mnt_flag &= ~MNT_ASYNC; 252 } 253 /* if not updating name...*/ 254 if (args.fspec == 0) { 255 /* 256 * Process export requests. Jumping to "success" 257 * will return the vfs_export() error code. 258 */ 259 err = vfs_export(mp, &ump->um_export, &args.export); 260 goto success; 261 } 262 } 263 264 /* 265 * Not an update, or updating the name: look up the name 266 * and verify that it refers to a sensible block device. 267 */ 268 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); 269 err = namei(ndp); 270 if (err) { 271 /* can't get devvp!*/ 272 goto error_1; 273 } 274 275 devvp = ndp->ni_vp; 276 277 if (!vn_isdisk(devvp)) { 278 err = ENOTBLK; 279 goto error_2; 280 } 281 282 /* 283 * If mount by non-root, then verify that user has necessary 284 * permissions on the device. 285 */ 286 if (p->p_ucred->cr_uid != 0) { 287 accessmode = VREAD; 288 if ((mp->mnt_flag & MNT_RDONLY) == 0) 289 accessmode |= VWRITE; 290 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 291 if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) { 292 vput(devvp); 293 return (error); 294 } 295 VOP_UNLOCK(devvp, 0, p); 296 } 297 298 if (mp->mnt_flag & MNT_UPDATE) { 299 /* 300 ******************** 301 * UPDATE 302 * If it's not the same vnode, or at least the same device 303 * then it's not correct. 304 ******************** 305 */ 306 307 if (devvp != ump->um_devvp) { 308 if ( devvp->v_rdev == ump->um_devvp->v_rdev) { 309 vrele(devvp); 310 } else { 311 err = EINVAL; /* needs translation */ 312 } 313 } else 314 vrele(devvp); 315 /* 316 * Update device name only on success 317 */ 318 if( !err) { 319 /* Save "mounted from" info for mount point (NULL pad)*/ 320 copyinstr( args.fspec, 321 mp->mnt_stat.f_mntfromname, 322 MNAMELEN - 1, 323 &size); 324 bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 325 } 326 } else { 327 /* 328 ******************** 329 * NEW MOUNT 330 ******************** 331 */ 332 333 /* 334 * Since this is a new mount, we want the names for 335 * the device and the mount point copied in. If an 336 * error occurs, the mountpoint is discarded by the 337 * upper level code. 338 */ 339 /* Save "last mounted on" info for mount point (NULL pad)*/ 340 copyinstr( path, /* mount point*/ 341 mp->mnt_stat.f_mntonname, /* save area*/ 342 MNAMELEN - 1, /* max size*/ 343 &size); /* real size*/ 344 bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size); 345 346 /* Save "mounted from" info for mount point (NULL pad)*/ 347 copyinstr( args.fspec, /* device name*/ 348 mp->mnt_stat.f_mntfromname, /* save area*/ 349 MNAMELEN - 1, /* max size*/ 350 &size); /* real size*/ 351 bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 352 353 err = ffs_mountfs(devvp, mp, p, M_FFSNODE); 354 } 355 if (err) { 356 goto error_2; 357 } 358 359 dostatfs: 360 /* 361 * Initialize FS stat information in mount struct; uses both 362 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname 363 * 364 * This code is common to root and non-root mounts 365 */ 366 (void)VFS_STATFS(mp, &mp->mnt_stat, p); 367 368 goto success; 369 370 371 error_2: /* error with devvp held*/ 372 373 /* release devvp before failing*/ 374 vrele(devvp); 375 376 error_1: /* no state to back out*/ 377 378 success: 379 if (!err && path && (mp->mnt_flag & MNT_UPDATE)) { 380 /* Update clean flag after changing read-onlyness. */ 381 fs = ump->um_fs; 382 if (ronly != fs->fs_ronly) { 383 fs->fs_ronly = ronly; 384 fs->fs_clean = ronly && 385 (fs->fs_flags & FS_UNCLEAN) == 0 ? 1 : 0; 386 ffs_sbupdate(ump, MNT_WAIT); 387 } 388 } 389 return (err); 390 } 391 392 /* 393 * Reload all incore data for a filesystem (used after running fsck on 394 * the root filesystem and finding things to fix). The filesystem must 395 * be mounted read-only. 396 * 397 * Things to do to update the mount: 398 * 1) invalidate all cached meta-data. 399 * 2) re-read superblock from disk. 400 * 3) re-read summary information from disk. 401 * 4) invalidate all inactive vnodes. 402 * 5) invalidate all cached file data. 403 * 6) re-read inode data for all active vnodes. 404 */ 405 static int 406 ffs_reload(mp, cred, p) 407 register struct mount *mp; 408 struct ucred *cred; 409 struct proc *p; 410 { 411 register struct vnode *vp, *nvp, *devvp; 412 struct inode *ip; 413 struct csum *space; 414 struct buf *bp; 415 struct fs *fs, *newfs; 416 struct partinfo dpart; 417 dev_t dev; 418 int i, blks, size, error; 419 int32_t *lp; 420 421 if ((mp->mnt_flag & MNT_RDONLY) == 0) 422 return (EINVAL); 423 /* 424 * Step 1: invalidate all cached meta-data. 425 */ 426 devvp = VFSTOUFS(mp)->um_devvp; 427 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 428 error = vinvalbuf(devvp, 0, cred, p, 0, 0); 429 VOP_UNLOCK(devvp, 0, p); 430 if (error) 431 panic("ffs_reload: dirty1"); 432 433 dev = devvp->v_rdev; 434 435 /* 436 * Only VMIO the backing device if the backing device is a real 437 * block device. See ffs_mountmfs() for more details. 438 */ 439 if (devvp->v_tag != VT_MFS && devvp->v_type == VBLK) { 440 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 441 vfs_object_create(devvp, p, p->p_ucred); 442 simple_lock(&devvp->v_interlock); 443 VOP_UNLOCK(devvp, LK_INTERLOCK, p); 444 } 445 446 /* 447 * Step 2: re-read superblock from disk. 448 */ 449 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) 450 size = DEV_BSIZE; 451 else 452 size = dpart.disklab->d_secsize; 453 if ((error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) != 0) 454 return (error); 455 newfs = (struct fs *)bp->b_data; 456 if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE || 457 newfs->fs_bsize < sizeof(struct fs)) { 458 brelse(bp); 459 return (EIO); /* XXX needs translation */ 460 } 461 fs = VFSTOUFS(mp)->um_fs; 462 /* 463 * Copy pointer fields back into superblock before copying in XXX 464 * new superblock. These should really be in the ufsmount. XXX 465 * Note that important parameters (eg fs_ncg) are unchanged. 466 */ 467 bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp)); 468 newfs->fs_maxcluster = fs->fs_maxcluster; 469 bcopy(newfs, fs, (u_int)fs->fs_sbsize); 470 if (fs->fs_sbsize < SBSIZE) 471 bp->b_flags |= B_INVAL; 472 brelse(bp); 473 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; 474 ffs_oldfscompat(fs); 475 476 /* 477 * Step 3: re-read summary information from disk. 478 */ 479 blks = howmany(fs->fs_cssize, fs->fs_fsize); 480 space = fs->fs_csp[0]; 481 for (i = 0; i < blks; i += fs->fs_frag) { 482 size = fs->fs_bsize; 483 if (i + fs->fs_frag > blks) 484 size = (blks - i) * fs->fs_fsize; 485 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 486 NOCRED, &bp); 487 if (error) 488 return (error); 489 bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size); 490 brelse(bp); 491 } 492 /* 493 * We no longer know anything about clusters per cylinder group. 494 */ 495 if (fs->fs_contigsumsize > 0) { 496 lp = fs->fs_maxcluster; 497 for (i = 0; i < fs->fs_ncg; i++) 498 *lp++ = fs->fs_contigsumsize; 499 } 500 501 loop: 502 simple_lock(&mntvnode_slock); 503 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 504 if (vp->v_mount != mp) { 505 simple_unlock(&mntvnode_slock); 506 goto loop; 507 } 508 nvp = vp->v_mntvnodes.le_next; 509 /* 510 * Step 4: invalidate all inactive vnodes. 511 */ 512 if (vrecycle(vp, &mntvnode_slock, p)) 513 goto loop; 514 /* 515 * Step 5: invalidate all cached file data. 516 */ 517 simple_lock(&vp->v_interlock); 518 simple_unlock(&mntvnode_slock); 519 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 520 goto loop; 521 } 522 if (vinvalbuf(vp, 0, cred, p, 0, 0)) 523 panic("ffs_reload: dirty2"); 524 /* 525 * Step 6: re-read inode data for all active vnodes. 526 */ 527 ip = VTOI(vp); 528 error = 529 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 530 (int)fs->fs_bsize, NOCRED, &bp); 531 if (error) { 532 vput(vp); 533 return (error); 534 } 535 ip->i_din = *((struct dinode *)bp->b_data + 536 ino_to_fsbo(fs, ip->i_number)); 537 ip->i_effnlink = ip->i_nlink; 538 brelse(bp); 539 vput(vp); 540 simple_lock(&mntvnode_slock); 541 } 542 simple_unlock(&mntvnode_slock); 543 return (0); 544 } 545 546 /* 547 * Common code for mount and mountroot 548 */ 549 int 550 ffs_mountfs(devvp, mp, p, malloctype) 551 register struct vnode *devvp; 552 struct mount *mp; 553 struct proc *p; 554 struct malloc_type *malloctype; 555 { 556 register struct ufsmount *ump; 557 struct buf *bp; 558 register struct fs *fs; 559 dev_t dev; 560 struct partinfo dpart; 561 caddr_t base, space; 562 int error, i, blks, size, ronly; 563 int32_t *lp; 564 struct ucred *cred; 565 u_int64_t maxfilesize; /* XXX */ 566 size_t strsize; 567 int ncount; 568 569 dev = devvp->v_rdev; 570 cred = p ? p->p_ucred : NOCRED; 571 /* 572 * Disallow multiple mounts of the same device. 573 * Disallow mounting of a device that is currently in use 574 * (except for root, which might share swap device for miniroot). 575 * Flush out any old buffers remaining from a previous use. 576 */ 577 error = vfs_mountedon(devvp); 578 if (error) 579 return (error); 580 ncount = vcount(devvp); 581 582 if (ncount > 1 && devvp != rootvp) 583 return (EBUSY); 584 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 585 error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0); 586 VOP_UNLOCK(devvp, 0, p); 587 if (error) 588 return (error); 589 590 /* 591 * Only VMIO the backing device if the backing device is a real 592 * block device. This excludes the original MFS implementation. 593 * Note that it is optional that the backing device be VMIOed. This 594 * increases the opportunity for metadata caching. 595 */ 596 if (devvp->v_tag != VT_MFS && devvp->v_type == VBLK) { 597 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 598 vfs_object_create(devvp, p, p->p_ucred); 599 simple_lock(&devvp->v_interlock); 600 VOP_UNLOCK(devvp, LK_INTERLOCK, p); 601 } 602 603 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 604 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); 605 if (error) 606 return (error); 607 if (devvp->v_rdev->si_iosize_max > mp->mnt_iosize_max) 608 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 609 if (mp->mnt_iosize_max > MAXPHYS) 610 mp->mnt_iosize_max = MAXPHYS; 611 612 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0) 613 size = DEV_BSIZE; 614 else 615 size = dpart.disklab->d_secsize; 616 617 bp = NULL; 618 ump = NULL; 619 if ((error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) != 0) 620 goto out; 621 fs = (struct fs *)bp->b_data; 622 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || 623 fs->fs_bsize < sizeof(struct fs)) { 624 error = EINVAL; /* XXX needs translation */ 625 goto out; 626 } 627 fs->fs_fmod = 0; 628 fs->fs_flags &= ~FS_UNCLEAN; 629 if (fs->fs_clean == 0) { 630 fs->fs_flags |= FS_UNCLEAN; 631 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 632 printf( 633 "WARNING: %s was not properly dismounted\n", 634 fs->fs_fsmnt); 635 } else { 636 printf( 637 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 638 fs->fs_fsmnt); 639 error = EPERM; 640 goto out; 641 } 642 } 643 /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */ 644 if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) { 645 error = EROFS; /* needs translation */ 646 goto out; 647 } 648 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK); 649 bzero((caddr_t)ump, sizeof *ump); 650 ump->um_malloctype = malloctype; 651 ump->um_i_effnlink_valid = 1; 652 ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, 653 M_WAITOK); 654 ump->um_blkatoff = ffs_blkatoff; 655 ump->um_truncate = ffs_truncate; 656 ump->um_update = ffs_update; 657 ump->um_valloc = ffs_valloc; 658 ump->um_vfree = ffs_vfree; 659 bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); 660 if (fs->fs_sbsize < SBSIZE) 661 bp->b_flags |= B_INVAL; 662 brelse(bp); 663 bp = NULL; 664 fs = ump->um_fs; 665 fs->fs_ronly = ronly; 666 if (ronly == 0) { 667 fs->fs_fmod = 1; 668 fs->fs_clean = 0; 669 } 670 size = fs->fs_cssize; 671 blks = howmany(size, fs->fs_fsize); 672 if (fs->fs_contigsumsize > 0) 673 size += fs->fs_ncg * sizeof(int32_t); 674 base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 675 for (i = 0; i < blks; i += fs->fs_frag) { 676 size = fs->fs_bsize; 677 if (i + fs->fs_frag > blks) 678 size = (blks - i) * fs->fs_fsize; 679 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 680 cred, &bp)) != 0) { 681 free(base, M_UFSMNT); 682 goto out; 683 } 684 bcopy(bp->b_data, space, (u_int)size); 685 fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space; 686 space += size; 687 brelse(bp); 688 bp = NULL; 689 } 690 if (fs->fs_contigsumsize > 0) { 691 fs->fs_maxcluster = lp = (int32_t *)space; 692 for (i = 0; i < fs->fs_ncg; i++) 693 *lp++ = fs->fs_contigsumsize; 694 } 695 mp->mnt_data = (qaddr_t)ump; 696 mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0]; 697 mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; 698 if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || 699 vfs_getvfs(&mp->mnt_stat.f_fsid)) 700 vfs_getnewfsid(mp); 701 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; 702 mp->mnt_flag |= MNT_LOCAL; 703 ump->um_mountp = mp; 704 ump->um_dev = dev; 705 ump->um_devvp = devvp; 706 ump->um_nindir = fs->fs_nindir; 707 ump->um_bptrtodb = fs->fs_fsbtodb; 708 ump->um_seqinc = fs->fs_frag; 709 for (i = 0; i < MAXQUOTAS; i++) 710 ump->um_quotas[i] = NULLVP; 711 devvp->v_specmountpoint = mp; 712 ffs_oldfscompat(fs); 713 714 /* 715 * Set FS local "last mounted on" information (NULL pad) 716 */ 717 copystr( mp->mnt_stat.f_mntonname, /* mount point*/ 718 fs->fs_fsmnt, /* copy area*/ 719 sizeof(fs->fs_fsmnt) - 1, /* max size*/ 720 &strsize); /* real size*/ 721 bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize); 722 723 if( mp->mnt_flag & MNT_ROOTFS) { 724 /* 725 * Root mount; update timestamp in mount structure. 726 * this will be used by the common root mount code 727 * to update the system clock. 728 */ 729 mp->mnt_time = fs->fs_time; 730 } 731 732 ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */ 733 maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */ 734 if (fs->fs_maxfilesize > maxfilesize) /* XXX */ 735 fs->fs_maxfilesize = maxfilesize; /* XXX */ 736 if (ronly == 0) { 737 if ((fs->fs_flags & FS_DOSOFTDEP) && 738 (error = softdep_mount(devvp, mp, fs, cred)) != 0) { 739 free(base, M_UFSMNT); 740 goto out; 741 } 742 fs->fs_clean = 0; 743 (void) ffs_sbupdate(ump, MNT_WAIT); 744 } 745 return (0); 746 out: 747 devvp->v_specmountpoint = NULL; 748 if (bp) 749 brelse(bp); 750 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); 751 if (ump) { 752 free(ump->um_fs, M_UFSMNT); 753 free(ump, M_UFSMNT); 754 mp->mnt_data = (qaddr_t)0; 755 } 756 return (error); 757 } 758 759 /* 760 * Sanity checks for old file systems. 761 * 762 * XXX - goes away some day. 763 */ 764 static int 765 ffs_oldfscompat(fs) 766 struct fs *fs; 767 { 768 769 fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */ 770 fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */ 771 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ 772 fs->fs_nrpos = 8; /* XXX */ 773 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 774 #if 0 775 int i; /* XXX */ 776 u_int64_t sizepb = fs->fs_bsize; /* XXX */ 777 /* XXX */ 778 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */ 779 for (i = 0; i < NIADDR; i++) { /* XXX */ 780 sizepb *= NINDIR(fs); /* XXX */ 781 fs->fs_maxfilesize += sizepb; /* XXX */ 782 } /* XXX */ 783 #endif 784 fs->fs_maxfilesize = (u_quad_t) 1LL << 39; 785 fs->fs_qbmask = ~fs->fs_bmask; /* XXX */ 786 fs->fs_qfmask = ~fs->fs_fmask; /* XXX */ 787 } /* XXX */ 788 return (0); 789 } 790 791 /* 792 * unmount system call 793 */ 794 int 795 ffs_unmount(mp, mntflags, p) 796 struct mount *mp; 797 int mntflags; 798 struct proc *p; 799 { 800 register struct ufsmount *ump; 801 register struct fs *fs; 802 int error, flags; 803 804 flags = 0; 805 if (mntflags & MNT_FORCE) { 806 flags |= FORCECLOSE; 807 } 808 if (mp->mnt_flag & MNT_SOFTDEP) { 809 if ((error = softdep_flushfiles(mp, flags, p)) != 0) 810 return (error); 811 } else { 812 if ((error = ffs_flushfiles(mp, flags, p)) != 0) 813 return (error); 814 } 815 ump = VFSTOUFS(mp); 816 fs = ump->um_fs; 817 if (fs->fs_ronly == 0) { 818 fs->fs_clean = fs->fs_flags & FS_UNCLEAN ? 0 : 1; 819 error = ffs_sbupdate(ump, MNT_WAIT); 820 if (error) { 821 fs->fs_clean = 0; 822 return (error); 823 } 824 } 825 ump->um_devvp->v_specmountpoint = NULL; 826 827 vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0); 828 error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, 829 NOCRED, p); 830 831 vrele(ump->um_devvp); 832 833 free(fs->fs_csp[0], M_UFSMNT); 834 free(fs, M_UFSMNT); 835 free(ump, M_UFSMNT); 836 mp->mnt_data = (qaddr_t)0; 837 mp->mnt_flag &= ~MNT_LOCAL; 838 return (error); 839 } 840 841 /* 842 * Flush out all the files in a filesystem. 843 */ 844 int 845 ffs_flushfiles(mp, flags, p) 846 register struct mount *mp; 847 int flags; 848 struct proc *p; 849 { 850 register struct ufsmount *ump; 851 int error; 852 853 ump = VFSTOUFS(mp); 854 #ifdef QUOTA 855 if (mp->mnt_flag & MNT_QUOTA) { 856 int i; 857 error = vflush(mp, NULLVP, SKIPSYSTEM|flags); 858 if (error) 859 return (error); 860 for (i = 0; i < MAXQUOTAS; i++) { 861 if (ump->um_quotas[i] == NULLVP) 862 continue; 863 quotaoff(p, mp, i); 864 } 865 /* 866 * Here we fall through to vflush again to ensure 867 * that we have gotten rid of all the system vnodes. 868 */ 869 } 870 #endif 871 /* 872 * Flush all the files. 873 */ 874 if ((error = vflush(mp, NULL, flags)) != 0) 875 return (error); 876 /* 877 * Flush filesystem metadata. 878 */ 879 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); 880 error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p); 881 VOP_UNLOCK(ump->um_devvp, 0, p); 882 return (error); 883 } 884 885 /* 886 * Get file system statistics. 887 */ 888 int 889 ffs_statfs(mp, sbp, p) 890 struct mount *mp; 891 register struct statfs *sbp; 892 struct proc *p; 893 { 894 register struct ufsmount *ump; 895 register struct fs *fs; 896 897 ump = VFSTOUFS(mp); 898 fs = ump->um_fs; 899 if (fs->fs_magic != FS_MAGIC) 900 panic("ffs_statfs"); 901 sbp->f_bsize = fs->fs_fsize; 902 sbp->f_iosize = fs->fs_bsize; 903 sbp->f_blocks = fs->fs_dsize; 904 sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + 905 fs->fs_cstotal.cs_nffree; 906 sbp->f_bavail = freespace(fs, fs->fs_minfree); 907 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; 908 sbp->f_ffree = fs->fs_cstotal.cs_nifree; 909 if (sbp != &mp->mnt_stat) { 910 sbp->f_type = mp->mnt_vfc->vfc_typenum; 911 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 912 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 913 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 914 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 915 } 916 return (0); 917 } 918 919 /* 920 * Go through the disk queues to initiate sandbagged IO; 921 * go through the inodes to write those that have been modified; 922 * initiate the writing of the super block if it has been modified. 923 * 924 * Note: we are always called with the filesystem marked `MPBUSY'. 925 */ 926 int 927 ffs_sync(mp, waitfor, cred, p) 928 struct mount *mp; 929 int waitfor; 930 struct ucred *cred; 931 struct proc *p; 932 { 933 struct vnode *nvp, *vp; 934 struct inode *ip; 935 struct ufsmount *ump = VFSTOUFS(mp); 936 struct fs *fs; 937 int error, allerror = 0; 938 939 fs = ump->um_fs; 940 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ 941 printf("fs = %s\n", fs->fs_fsmnt); 942 panic("ffs_sync: rofs mod"); 943 } 944 /* 945 * Write back each (modified) inode. 946 */ 947 simple_lock(&mntvnode_slock); 948 loop: 949 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 950 /* 951 * If the vnode that we are about to sync is no longer 952 * associated with this mount point, start over. 953 */ 954 if (vp->v_mount != mp) 955 goto loop; 956 simple_lock(&vp->v_interlock); 957 nvp = vp->v_mntvnodes.le_next; 958 ip = VTOI(vp); 959 if ((vp->v_type == VNON) || (((ip->i_flag & 960 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && 961 (TAILQ_EMPTY(&vp->v_dirtyblkhd) || (waitfor == MNT_LAZY)))) { 962 simple_unlock(&vp->v_interlock); 963 continue; 964 } 965 if (vp->v_type != VCHR) { 966 simple_unlock(&mntvnode_slock); 967 error = 968 vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); 969 if (error) { 970 simple_lock(&mntvnode_slock); 971 if (error == ENOENT) 972 goto loop; 973 continue; 974 } 975 if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) 976 allerror = error; 977 VOP_UNLOCK(vp, 0, p); 978 vrele(vp); 979 simple_lock(&mntvnode_slock); 980 } else { 981 simple_unlock(&mntvnode_slock); 982 simple_unlock(&vp->v_interlock); 983 /* UFS_UPDATE(vp, waitfor == MNT_WAIT); */ 984 UFS_UPDATE(vp, 0); 985 simple_lock(&mntvnode_slock); 986 } 987 } 988 simple_unlock(&mntvnode_slock); 989 /* 990 * Force stale file system control information to be flushed. 991 */ 992 if (waitfor != MNT_LAZY) { 993 if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) 994 waitfor = MNT_NOWAIT; 995 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); 996 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) 997 allerror = error; 998 VOP_UNLOCK(ump->um_devvp, 0, p); 999 } 1000 #ifdef QUOTA 1001 qsync(mp); 1002 #endif 1003 /* 1004 * Write back modified superblock. 1005 */ 1006 if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) 1007 allerror = error; 1008 return (allerror); 1009 } 1010 1011 /* 1012 * Look up a FFS dinode number to find its incore vnode, otherwise read it 1013 * in from disk. If it is in core, wait for the lock bit to clear, then 1014 * return the inode locked. Detection and handling of mount points must be 1015 * done by the calling routine. 1016 */ 1017 static int ffs_inode_hash_lock; 1018 1019 int 1020 ffs_vget(mp, ino, vpp) 1021 struct mount *mp; 1022 ino_t ino; 1023 struct vnode **vpp; 1024 { 1025 struct fs *fs; 1026 struct inode *ip; 1027 struct ufsmount *ump; 1028 struct buf *bp; 1029 struct vnode *vp; 1030 dev_t dev; 1031 int error; 1032 1033 ump = VFSTOUFS(mp); 1034 dev = ump->um_dev; 1035 restart: 1036 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) { 1037 return (0); 1038 } 1039 1040 /* 1041 * Lock out the creation of new entries in the FFS hash table in 1042 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 1043 * may occur! 1044 */ 1045 if (ffs_inode_hash_lock) { 1046 while (ffs_inode_hash_lock) { 1047 ffs_inode_hash_lock = -1; 1048 tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0); 1049 } 1050 goto restart; 1051 } 1052 ffs_inode_hash_lock = 1; 1053 1054 /* 1055 * If this MALLOC() is performed after the getnewvnode() 1056 * it might block, leaving a vnode with a NULL v_data to be 1057 * found by ffs_sync() if a sync happens to fire right then, 1058 * which will cause a panic because ffs_sync() blindly 1059 * dereferences vp->v_data (as well it should). 1060 */ 1061 MALLOC(ip, struct inode *, sizeof(struct inode), 1062 ump->um_malloctype, M_WAITOK); 1063 1064 /* Allocate a new vnode/inode. */ 1065 error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp); 1066 if (error) { 1067 if (ffs_inode_hash_lock < 0) 1068 wakeup(&ffs_inode_hash_lock); 1069 ffs_inode_hash_lock = 0; 1070 *vpp = NULL; 1071 FREE(ip, ump->um_malloctype); 1072 return (error); 1073 } 1074 bzero((caddr_t)ip, sizeof(struct inode)); 1075 lockinit(&ip->i_lock, PINOD, "inode", 0, 0); 1076 vp->v_data = ip; 1077 ip->i_vnode = vp; 1078 ip->i_fs = fs = ump->um_fs; 1079 ip->i_dev = dev; 1080 ip->i_number = ino; 1081 #ifdef QUOTA 1082 { 1083 int i; 1084 for (i = 0; i < MAXQUOTAS; i++) 1085 ip->i_dquot[i] = NODQUOT; 1086 } 1087 #endif 1088 /* 1089 * Put it onto its hash chain and lock it so that other requests for 1090 * this inode will block if they arrive while we are sleeping waiting 1091 * for old data structures to be purged or for the contents of the 1092 * disk portion of this inode to be read. 1093 */ 1094 ufs_ihashins(ip); 1095 1096 if (ffs_inode_hash_lock < 0) 1097 wakeup(&ffs_inode_hash_lock); 1098 ffs_inode_hash_lock = 0; 1099 1100 /* Read in the disk contents for the inode, copy into the inode. */ 1101 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1102 (int)fs->fs_bsize, NOCRED, &bp); 1103 if (error) { 1104 /* 1105 * The inode does not contain anything useful, so it would 1106 * be misleading to leave it on its hash chain. With mode 1107 * still zero, it will be unlinked and returned to the free 1108 * list by vput(). 1109 */ 1110 brelse(bp); 1111 vput(vp); 1112 *vpp = NULL; 1113 return (error); 1114 } 1115 ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); 1116 if (DOINGSOFTDEP(vp)) 1117 softdep_load_inodeblock(ip); 1118 else 1119 ip->i_effnlink = ip->i_nlink; 1120 bqrelse(bp); 1121 1122 /* 1123 * Initialize the vnode from the inode, check for aliases. 1124 * Note that the underlying vnode may have changed. 1125 */ 1126 error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 1127 if (error) { 1128 vput(vp); 1129 *vpp = NULL; 1130 return (error); 1131 } 1132 /* 1133 * Finish inode initialization now that aliasing has been resolved. 1134 */ 1135 ip->i_devvp = ump->um_devvp; 1136 VREF(ip->i_devvp); 1137 /* 1138 * Set up a generation number for this inode if it does not 1139 * already have one. This should only happen on old filesystems. 1140 */ 1141 if (ip->i_gen == 0) { 1142 ip->i_gen = random() / 2 + 1; 1143 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1144 ip->i_flag |= IN_MODIFIED; 1145 } 1146 /* 1147 * Ensure that uid and gid are correct. This is a temporary 1148 * fix until fsck has been changed to do the update. 1149 */ 1150 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 1151 ip->i_uid = ip->i_din.di_ouid; /* XXX */ 1152 ip->i_gid = ip->i_din.di_ogid; /* XXX */ 1153 } /* XXX */ 1154 1155 *vpp = vp; 1156 return (0); 1157 } 1158 1159 /* 1160 * File handle to vnode 1161 * 1162 * Have to be really careful about stale file handles: 1163 * - check that the inode number is valid 1164 * - call ffs_vget() to get the locked inode 1165 * - check for an unallocated inode (i_mode == 0) 1166 * - check that the given client host has export rights and return 1167 * those rights via. exflagsp and credanonp 1168 */ 1169 int 1170 ffs_fhtovp(mp, fhp, vpp) 1171 register struct mount *mp; 1172 struct fid *fhp; 1173 struct vnode **vpp; 1174 { 1175 register struct ufid *ufhp; 1176 struct fs *fs; 1177 1178 ufhp = (struct ufid *)fhp; 1179 fs = VFSTOUFS(mp)->um_fs; 1180 if (ufhp->ufid_ino < ROOTINO || 1181 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) 1182 return (ESTALE); 1183 return (ufs_fhtovp(mp, ufhp, vpp)); 1184 } 1185 1186 /* 1187 * Vnode pointer to File handle 1188 */ 1189 /* ARGSUSED */ 1190 int 1191 ffs_vptofh(vp, fhp) 1192 struct vnode *vp; 1193 struct fid *fhp; 1194 { 1195 register struct inode *ip; 1196 register struct ufid *ufhp; 1197 1198 ip = VTOI(vp); 1199 ufhp = (struct ufid *)fhp; 1200 ufhp->ufid_len = sizeof(struct ufid); 1201 ufhp->ufid_ino = ip->i_number; 1202 ufhp->ufid_gen = ip->i_gen; 1203 return (0); 1204 } 1205 1206 /* 1207 * Initialize the filesystem; just use ufs_init. 1208 */ 1209 static int 1210 ffs_init(vfsp) 1211 struct vfsconf *vfsp; 1212 { 1213 1214 softdep_initialize(); 1215 return (ufs_init(vfsp)); 1216 } 1217 1218 /* 1219 * Write a superblock and associated information back to disk. 1220 */ 1221 static int 1222 ffs_sbupdate(mp, waitfor) 1223 struct ufsmount *mp; 1224 int waitfor; 1225 { 1226 register struct fs *dfs, *fs = mp->um_fs; 1227 register struct buf *bp; 1228 int blks; 1229 caddr_t space; 1230 int i, size, error, allerror = 0; 1231 1232 /* 1233 * First write back the summary information. 1234 */ 1235 blks = howmany(fs->fs_cssize, fs->fs_fsize); 1236 space = (caddr_t)fs->fs_csp[0]; 1237 for (i = 0; i < blks; i += fs->fs_frag) { 1238 size = fs->fs_bsize; 1239 if (i + fs->fs_frag > blks) 1240 size = (blks - i) * fs->fs_fsize; 1241 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), 1242 size, 0, 0); 1243 bcopy(space, bp->b_data, (u_int)size); 1244 space += size; 1245 if (waitfor != MNT_WAIT) 1246 bawrite(bp); 1247 else if ((error = bwrite(bp)) != 0) 1248 allerror = error; 1249 } 1250 /* 1251 * Now write back the superblock itself. If any errors occurred 1252 * up to this point, then fail so that the superblock avoids 1253 * being written out as clean. 1254 */ 1255 if (allerror) 1256 return (allerror); 1257 bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0); 1258 fs->fs_fmod = 0; 1259 fs->fs_time = time_second; 1260 bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); 1261 /* Restore compatibility to old file systems. XXX */ 1262 dfs = (struct fs *)bp->b_data; /* XXX */ 1263 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ 1264 dfs->fs_nrpos = -1; /* XXX */ 1265 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 1266 int32_t *lp, tmp; /* XXX */ 1267 /* XXX */ 1268 lp = (int32_t *)&dfs->fs_qbmask; /* XXX */ 1269 tmp = lp[4]; /* XXX */ 1270 for (i = 4; i > 0; i--) /* XXX */ 1271 lp[i] = lp[i-1]; /* XXX */ 1272 lp[0] = tmp; /* XXX */ 1273 } /* XXX */ 1274 dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */ 1275 if (waitfor != MNT_WAIT) 1276 bawrite(bp); 1277 else if ((error = bwrite(bp)) != 0) 1278 allerror = error; 1279 return (allerror); 1280 } 1281