1 /*- 2 * Copyright (c) 1989, 1991, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_mac.h" 36 #include "opt_quota.h" 37 #include "opt_ufs.h" 38 #include "opt_ffs.h" 39 #include "opt_ddb.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/bio.h> 50 #include <sys/buf.h> 51 #include <sys/conf.h> 52 #include <sys/fcntl.h> 53 #include <sys/malloc.h> 54 #include <sys/mutex.h> 55 56 #include <security/mac/mac_framework.h> 57 58 #include <ufs/ufs/extattr.h> 59 #include <ufs/ufs/gjournal.h> 60 #include <ufs/ufs/quota.h> 61 #include <ufs/ufs/ufsmount.h> 62 #include <ufs/ufs/inode.h> 63 #include <ufs/ufs/ufs_extern.h> 64 65 #include <ufs/ffs/fs.h> 66 #include <ufs/ffs/ffs_extern.h> 67 68 #include <vm/vm.h> 69 #include <vm/uma.h> 70 #include <vm/vm_page.h> 71 72 #include <geom/geom.h> 73 #include <geom/geom_vfs.h> 74 75 #include <ddb/ddb.h> 76 77 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2; 78 79 static int ffs_reload(struct mount *, struct thread *); 80 static int ffs_mountfs(struct vnode *, struct mount *, struct thread *); 81 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, 82 ufs2_daddr_t); 83 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *); 84 static void ffs_ifree(struct ufsmount *ump, struct inode *ip); 85 static vfs_init_t ffs_init; 86 static vfs_uninit_t ffs_uninit; 87 static vfs_extattrctl_t ffs_extattrctl; 88 static vfs_cmount_t ffs_cmount; 89 static vfs_unmount_t ffs_unmount; 90 static vfs_mount_t ffs_mount; 91 static vfs_statfs_t ffs_statfs; 92 static vfs_fhtovp_t ffs_fhtovp; 93 static vfs_sync_t ffs_sync; 94 95 static struct vfsops ufs_vfsops = { 96 .vfs_extattrctl = ffs_extattrctl, 97 .vfs_fhtovp = ffs_fhtovp, 98 .vfs_init = ffs_init, 99 .vfs_mount = ffs_mount, 100 .vfs_cmount = ffs_cmount, 101 .vfs_quotactl = ufs_quotactl, 102 .vfs_root = ufs_root, 103 .vfs_statfs = ffs_statfs, 104 .vfs_sync = ffs_sync, 105 .vfs_uninit = ffs_uninit, 106 .vfs_unmount = ffs_unmount, 107 .vfs_vget = ffs_vget, 108 .vfs_susp_clean = process_deferred_inactive, 109 }; 110 111 VFS_SET(ufs_vfsops, ufs, 0); 112 MODULE_VERSION(ufs, 1); 113 114 static b_strategy_t ffs_geom_strategy; 115 static b_write_t ffs_bufwrite; 116 117 static struct buf_ops ffs_ops = { 118 .bop_name = "FFS", 119 .bop_write = ffs_bufwrite, 120 .bop_strategy = ffs_geom_strategy, 121 .bop_sync = bufsync, 122 #ifdef NO_FFS_SNAPSHOT 123 .bop_bdflush = bufbdflush, 124 #else 125 .bop_bdflush = ffs_bdflush, 126 #endif 127 }; 128 129 static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr", 130 "noclusterw", "noexec", "export", "force", "from", "multilabel", 131 "snapshot", "nosuid", "suiddir", "nosymfollow", "sync", 132 "union", NULL }; 133 134 static int 135 ffs_mount(struct mount *mp) 136 { 137 struct vnode *devvp; 138 struct thread *td; 139 struct ufsmount *ump = 0; 140 struct fs *fs; 141 int error, flags; 142 u_int mntorflags, mntandnotflags; 143 accmode_t accmode; 144 struct nameidata ndp; 145 char *fspec; 146 147 td = curthread; 148 if (vfs_filteropt(mp->mnt_optnew, ffs_opts)) 149 return (EINVAL); 150 if (uma_inode == NULL) { 151 uma_inode = uma_zcreate("FFS inode", 152 sizeof(struct inode), NULL, NULL, NULL, NULL, 153 UMA_ALIGN_PTR, 0); 154 uma_ufs1 = uma_zcreate("FFS1 dinode", 155 sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL, 156 UMA_ALIGN_PTR, 0); 157 uma_ufs2 = uma_zcreate("FFS2 dinode", 158 sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL, 159 UMA_ALIGN_PTR, 0); 160 } 161 162 fspec = vfs_getopts(mp->mnt_optnew, "from", &error); 163 if (error) 164 return (error); 165 166 mntorflags = 0; 167 mntandnotflags = 0; 168 if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0) 169 mntorflags |= MNT_ACLS; 170 171 if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) { 172 mntorflags |= MNT_SNAPSHOT; 173 /* 174 * Once we have set the MNT_SNAPSHOT flag, do not 175 * persist "snapshot" in the options list. 176 */ 177 vfs_deleteopt(mp->mnt_optnew, "snapshot"); 178 vfs_deleteopt(mp->mnt_opt, "snapshot"); 179 } 180 181 MNT_ILOCK(mp); 182 mp->mnt_flag = (mp->mnt_flag | mntorflags) & ~mntandnotflags; 183 MNT_IUNLOCK(mp); 184 /* 185 * If updating, check whether changing from read-only to 186 * read/write; if there is no device name, that's all we do. 187 */ 188 if (mp->mnt_flag & MNT_UPDATE) { 189 ump = VFSTOUFS(mp); 190 fs = ump->um_fs; 191 devvp = ump->um_devvp; 192 if (fs->fs_ronly == 0 && 193 vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { 194 /* 195 * Flush any dirty data and suspend filesystem. 196 */ 197 if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) 198 return (error); 199 for (;;) { 200 vn_finished_write(mp); 201 if ((error = vfs_write_suspend(mp)) != 0) 202 return (error); 203 MNT_ILOCK(mp); 204 if (mp->mnt_kern_flag & MNTK_SUSPENDED) { 205 /* 206 * Allow the secondary writes 207 * to proceed. 208 */ 209 mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | 210 MNTK_SUSPEND2); 211 wakeup(&mp->mnt_flag); 212 MNT_IUNLOCK(mp); 213 /* 214 * Allow the curthread to 215 * ignore the suspension to 216 * synchronize on-disk state. 217 */ 218 td->td_pflags |= TDP_IGNSUSP; 219 break; 220 } 221 MNT_IUNLOCK(mp); 222 vn_start_write(NULL, &mp, V_WAIT); 223 } 224 /* 225 * Check for and optionally get rid of files open 226 * for writing. 227 */ 228 flags = WRITECLOSE; 229 if (mp->mnt_flag & MNT_FORCE) 230 flags |= FORCECLOSE; 231 if (mp->mnt_flag & MNT_SOFTDEP) { 232 error = softdep_flushfiles(mp, flags, td); 233 } else { 234 error = ffs_flushfiles(mp, flags, td); 235 } 236 if (error) { 237 vfs_write_resume(mp); 238 return (error); 239 } 240 if (fs->fs_pendingblocks != 0 || 241 fs->fs_pendinginodes != 0) { 242 printf("%s: %s: blocks %jd files %d\n", 243 fs->fs_fsmnt, "update error", 244 (intmax_t)fs->fs_pendingblocks, 245 fs->fs_pendinginodes); 246 fs->fs_pendingblocks = 0; 247 fs->fs_pendinginodes = 0; 248 } 249 if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) 250 fs->fs_clean = 1; 251 if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { 252 fs->fs_ronly = 0; 253 fs->fs_clean = 0; 254 vfs_write_resume(mp); 255 return (error); 256 } 257 DROP_GIANT(); 258 g_topology_lock(); 259 g_access(ump->um_cp, 0, -1, 0); 260 g_topology_unlock(); 261 PICKUP_GIANT(); 262 fs->fs_ronly = 1; 263 MNT_ILOCK(mp); 264 mp->mnt_flag |= MNT_RDONLY; 265 MNT_IUNLOCK(mp); 266 /* 267 * Allow the writers to note that filesystem 268 * is ro now. 269 */ 270 vfs_write_resume(mp); 271 } 272 if ((mp->mnt_flag & MNT_RELOAD) && 273 (error = ffs_reload(mp, td)) != 0) 274 return (error); 275 if (fs->fs_ronly && 276 !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { 277 /* 278 * If upgrade to read-write by non-root, then verify 279 * that user has necessary permissions on the device. 280 */ 281 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 282 error = VOP_ACCESS(devvp, VREAD | VWRITE, 283 td->td_ucred, td); 284 if (error) 285 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 286 if (error) { 287 VOP_UNLOCK(devvp, 0); 288 return (error); 289 } 290 VOP_UNLOCK(devvp, 0); 291 fs->fs_flags &= ~FS_UNCLEAN; 292 if (fs->fs_clean == 0) { 293 fs->fs_flags |= FS_UNCLEAN; 294 if ((mp->mnt_flag & MNT_FORCE) || 295 ((fs->fs_flags & FS_NEEDSFSCK) == 0 && 296 (fs->fs_flags & FS_DOSOFTDEP))) { 297 printf("WARNING: %s was not %s\n", 298 fs->fs_fsmnt, "properly dismounted"); 299 } else { 300 printf( 301 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 302 fs->fs_fsmnt); 303 return (EPERM); 304 } 305 } 306 DROP_GIANT(); 307 g_topology_lock(); 308 /* 309 * If we're the root device, we may not have an E count 310 * yet, get it now. 311 */ 312 if (ump->um_cp->ace == 0) 313 error = g_access(ump->um_cp, 0, 1, 1); 314 else 315 error = g_access(ump->um_cp, 0, 1, 0); 316 g_topology_unlock(); 317 PICKUP_GIANT(); 318 if (error) 319 return (error); 320 if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) 321 return (error); 322 fs->fs_ronly = 0; 323 MNT_ILOCK(mp); 324 mp->mnt_flag &= ~MNT_RDONLY; 325 MNT_IUNLOCK(mp); 326 fs->fs_clean = 0; 327 if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { 328 vn_finished_write(mp); 329 return (error); 330 } 331 /* check to see if we need to start softdep */ 332 if ((fs->fs_flags & FS_DOSOFTDEP) && 333 (error = softdep_mount(devvp, mp, fs, td->td_ucred))){ 334 vn_finished_write(mp); 335 return (error); 336 } 337 if (fs->fs_snapinum[0] != 0) 338 ffs_snapshot_mount(mp); 339 vn_finished_write(mp); 340 } 341 /* 342 * Soft updates is incompatible with "async", 343 * so if we are doing softupdates stop the user 344 * from setting the async flag in an update. 345 * Softdep_mount() clears it in an initial mount 346 * or ro->rw remount. 347 */ 348 if (mp->mnt_flag & MNT_SOFTDEP) { 349 /* XXX: Reset too late ? */ 350 MNT_ILOCK(mp); 351 mp->mnt_flag &= ~MNT_ASYNC; 352 MNT_IUNLOCK(mp); 353 } 354 /* 355 * Keep MNT_ACLS flag if it is stored in superblock. 356 */ 357 if ((fs->fs_flags & FS_ACLS) != 0) { 358 /* XXX: Set too late ? */ 359 MNT_ILOCK(mp); 360 mp->mnt_flag |= MNT_ACLS; 361 MNT_IUNLOCK(mp); 362 } 363 364 /* 365 * If this is a snapshot request, take the snapshot. 366 */ 367 if (mp->mnt_flag & MNT_SNAPSHOT) 368 return (ffs_snapshot(mp, fspec)); 369 } 370 371 /* 372 * Not an update, or updating the name: look up the name 373 * and verify that it refers to a sensible disk device. 374 */ 375 NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); 376 if ((error = namei(&ndp)) != 0) 377 return (error); 378 NDFREE(&ndp, NDF_ONLY_PNBUF); 379 devvp = ndp.ni_vp; 380 if (!vn_isdisk(devvp, &error)) { 381 vput(devvp); 382 return (error); 383 } 384 385 /* 386 * If mount by non-root, then verify that user has necessary 387 * permissions on the device. 388 */ 389 accmode = VREAD; 390 if ((mp->mnt_flag & MNT_RDONLY) == 0) 391 accmode |= VWRITE; 392 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); 393 if (error) 394 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 395 if (error) { 396 vput(devvp); 397 return (error); 398 } 399 400 if (mp->mnt_flag & MNT_UPDATE) { 401 /* 402 * Update only 403 * 404 * If it's not the same vnode, or at least the same device 405 * then it's not correct. 406 */ 407 408 if (devvp->v_rdev != ump->um_devvp->v_rdev) 409 error = EINVAL; /* needs translation */ 410 vput(devvp); 411 if (error) 412 return (error); 413 } else { 414 /* 415 * New mount 416 * 417 * We need the name for the mount point (also used for 418 * "last mounted on") copied in. If an error occurs, 419 * the mount point is discarded by the upper level code. 420 * Note that vfs_mount() populates f_mntonname for us. 421 */ 422 if ((error = ffs_mountfs(devvp, mp, td)) != 0) { 423 vrele(devvp); 424 return (error); 425 } 426 } 427 vfs_mountedfrom(mp, fspec); 428 return (0); 429 } 430 431 /* 432 * Compatibility with old mount system call. 433 */ 434 435 static int 436 ffs_cmount(struct mntarg *ma, void *data, int flags) 437 { 438 struct ufs_args args; 439 int error; 440 441 if (data == NULL) 442 return (EINVAL); 443 error = copyin(data, &args, sizeof args); 444 if (error) 445 return (error); 446 447 ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN); 448 ma = mount_arg(ma, "export", &args.export, sizeof args.export); 449 error = kernel_mount(ma, flags); 450 451 return (error); 452 } 453 454 /* 455 * Reload all incore data for a filesystem (used after running fsck on 456 * the root filesystem and finding things to fix). The filesystem must 457 * be mounted read-only. 458 * 459 * Things to do to update the mount: 460 * 1) invalidate all cached meta-data. 461 * 2) re-read superblock from disk. 462 * 3) re-read summary information from disk. 463 * 4) invalidate all inactive vnodes. 464 * 5) invalidate all cached file data. 465 * 6) re-read inode data for all active vnodes. 466 */ 467 static int 468 ffs_reload(struct mount *mp, struct thread *td) 469 { 470 struct vnode *vp, *mvp, *devvp; 471 struct inode *ip; 472 void *space; 473 struct buf *bp; 474 struct fs *fs, *newfs; 475 struct ufsmount *ump; 476 ufs2_daddr_t sblockloc; 477 int i, blks, size, error; 478 int32_t *lp; 479 480 if ((mp->mnt_flag & MNT_RDONLY) == 0) 481 return (EINVAL); 482 ump = VFSTOUFS(mp); 483 /* 484 * Step 1: invalidate all cached meta-data. 485 */ 486 devvp = VFSTOUFS(mp)->um_devvp; 487 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 488 if (vinvalbuf(devvp, 0, 0, 0) != 0) 489 panic("ffs_reload: dirty1"); 490 VOP_UNLOCK(devvp, 0); 491 492 /* 493 * Step 2: re-read superblock from disk. 494 */ 495 fs = VFSTOUFS(mp)->um_fs; 496 if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize, 497 NOCRED, &bp)) != 0) 498 return (error); 499 newfs = (struct fs *)bp->b_data; 500 if ((newfs->fs_magic != FS_UFS1_MAGIC && 501 newfs->fs_magic != FS_UFS2_MAGIC) || 502 newfs->fs_bsize > MAXBSIZE || 503 newfs->fs_bsize < sizeof(struct fs)) { 504 brelse(bp); 505 return (EIO); /* XXX needs translation */ 506 } 507 /* 508 * Copy pointer fields back into superblock before copying in XXX 509 * new superblock. These should really be in the ufsmount. XXX 510 * Note that important parameters (eg fs_ncg) are unchanged. 511 */ 512 newfs->fs_csp = fs->fs_csp; 513 newfs->fs_maxcluster = fs->fs_maxcluster; 514 newfs->fs_contigdirs = fs->fs_contigdirs; 515 newfs->fs_active = fs->fs_active; 516 /* The file system is still read-only. */ 517 newfs->fs_ronly = 1; 518 sblockloc = fs->fs_sblockloc; 519 bcopy(newfs, fs, (u_int)fs->fs_sbsize); 520 brelse(bp); 521 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; 522 ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc); 523 UFS_LOCK(ump); 524 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 525 printf("%s: reload pending error: blocks %jd files %d\n", 526 fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, 527 fs->fs_pendinginodes); 528 fs->fs_pendingblocks = 0; 529 fs->fs_pendinginodes = 0; 530 } 531 UFS_UNLOCK(ump); 532 533 /* 534 * Step 3: re-read summary information from disk. 535 */ 536 blks = howmany(fs->fs_cssize, fs->fs_fsize); 537 space = fs->fs_csp; 538 for (i = 0; i < blks; i += fs->fs_frag) { 539 size = fs->fs_bsize; 540 if (i + fs->fs_frag > blks) 541 size = (blks - i) * fs->fs_fsize; 542 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 543 NOCRED, &bp); 544 if (error) 545 return (error); 546 bcopy(bp->b_data, space, (u_int)size); 547 space = (char *)space + size; 548 brelse(bp); 549 } 550 /* 551 * We no longer know anything about clusters per cylinder group. 552 */ 553 if (fs->fs_contigsumsize > 0) { 554 lp = fs->fs_maxcluster; 555 for (i = 0; i < fs->fs_ncg; i++) 556 *lp++ = fs->fs_contigsumsize; 557 } 558 559 loop: 560 MNT_ILOCK(mp); 561 MNT_VNODE_FOREACH(vp, mp, mvp) { 562 VI_LOCK(vp); 563 if (vp->v_iflag & VI_DOOMED) { 564 VI_UNLOCK(vp); 565 continue; 566 } 567 MNT_IUNLOCK(mp); 568 /* 569 * Step 4: invalidate all cached file data. 570 */ 571 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 572 MNT_VNODE_FOREACH_ABORT(mp, mvp); 573 goto loop; 574 } 575 if (vinvalbuf(vp, 0, 0, 0)) 576 panic("ffs_reload: dirty2"); 577 /* 578 * Step 5: re-read inode data for all active vnodes. 579 */ 580 ip = VTOI(vp); 581 error = 582 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 583 (int)fs->fs_bsize, NOCRED, &bp); 584 if (error) { 585 VOP_UNLOCK(vp, 0); 586 vrele(vp); 587 MNT_VNODE_FOREACH_ABORT(mp, mvp); 588 return (error); 589 } 590 ffs_load_inode(bp, ip, fs, ip->i_number); 591 ip->i_effnlink = ip->i_nlink; 592 brelse(bp); 593 VOP_UNLOCK(vp, 0); 594 vrele(vp); 595 MNT_ILOCK(mp); 596 } 597 MNT_IUNLOCK(mp); 598 return (0); 599 } 600 601 /* 602 * Possible superblock locations ordered from most to least likely. 603 */ 604 static int sblock_try[] = SBLOCKSEARCH; 605 606 /* 607 * Common code for mount and mountroot 608 */ 609 static int 610 ffs_mountfs(devvp, mp, td) 611 struct vnode *devvp; 612 struct mount *mp; 613 struct thread *td; 614 { 615 struct ufsmount *ump; 616 struct buf *bp; 617 struct fs *fs; 618 struct cdev *dev; 619 void *space; 620 ufs2_daddr_t sblockloc; 621 int error, i, blks, size, ronly; 622 int32_t *lp; 623 struct ucred *cred; 624 struct g_consumer *cp; 625 struct mount *nmp; 626 627 bp = NULL; 628 ump = NULL; 629 cred = td ? td->td_ucred : NOCRED; 630 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 631 632 dev = devvp->v_rdev; 633 dev_ref(dev); 634 DROP_GIANT(); 635 g_topology_lock(); 636 error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1); 637 638 /* 639 * If we are a root mount, drop the E flag so fsck can do its magic. 640 * We will pick it up again when we remount R/W. 641 */ 642 if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS)) 643 error = g_access(cp, 0, 0, -1); 644 g_topology_unlock(); 645 PICKUP_GIANT(); 646 VOP_UNLOCK(devvp, 0); 647 if (error) 648 goto out; 649 if (devvp->v_rdev->si_iosize_max != 0) 650 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 651 if (mp->mnt_iosize_max > MAXPHYS) 652 mp->mnt_iosize_max = MAXPHYS; 653 654 devvp->v_bufobj.bo_private = cp; 655 devvp->v_bufobj.bo_ops = &ffs_ops; 656 657 fs = NULL; 658 sblockloc = 0; 659 /* 660 * Try reading the superblock in each of its possible locations. 661 */ 662 for (i = 0; sblock_try[i] != -1; i++) { 663 if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) { 664 error = EINVAL; 665 vfs_mount_error(mp, 666 "Invalid sectorsize %d for superblock size %d", 667 cp->provider->sectorsize, SBLOCKSIZE); 668 goto out; 669 } 670 if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE, 671 cred, &bp)) != 0) 672 goto out; 673 fs = (struct fs *)bp->b_data; 674 sblockloc = sblock_try[i]; 675 if ((fs->fs_magic == FS_UFS1_MAGIC || 676 (fs->fs_magic == FS_UFS2_MAGIC && 677 (fs->fs_sblockloc == sblockloc || 678 (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) && 679 fs->fs_bsize <= MAXBSIZE && 680 fs->fs_bsize >= sizeof(struct fs)) 681 break; 682 brelse(bp); 683 bp = NULL; 684 } 685 if (sblock_try[i] == -1) { 686 error = EINVAL; /* XXX needs translation */ 687 goto out; 688 } 689 fs->fs_fmod = 0; 690 fs->fs_flags &= ~FS_INDEXDIRS; /* no support for directory indicies */ 691 fs->fs_flags &= ~FS_UNCLEAN; 692 if (fs->fs_clean == 0) { 693 fs->fs_flags |= FS_UNCLEAN; 694 if (ronly || (mp->mnt_flag & MNT_FORCE) || 695 ((fs->fs_flags & FS_NEEDSFSCK) == 0 && 696 (fs->fs_flags & FS_DOSOFTDEP))) { 697 printf( 698 "WARNING: %s was not properly dismounted\n", 699 fs->fs_fsmnt); 700 } else { 701 printf( 702 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 703 fs->fs_fsmnt); 704 error = EPERM; 705 goto out; 706 } 707 if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) && 708 (mp->mnt_flag & MNT_FORCE)) { 709 printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt, 710 (intmax_t)fs->fs_pendingblocks, 711 fs->fs_pendinginodes); 712 fs->fs_pendingblocks = 0; 713 fs->fs_pendinginodes = 0; 714 } 715 } 716 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 717 printf("%s: mount pending error: blocks %jd files %d\n", 718 fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, 719 fs->fs_pendinginodes); 720 fs->fs_pendingblocks = 0; 721 fs->fs_pendinginodes = 0; 722 } 723 if ((fs->fs_flags & FS_GJOURNAL) != 0) { 724 #ifdef UFS_GJOURNAL 725 /* 726 * Get journal provider name. 727 */ 728 size = 1024; 729 mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK); 730 if (g_io_getattr("GJOURNAL::provider", cp, &size, 731 mp->mnt_gjprovider) == 0) { 732 mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size, 733 M_UFSMNT, M_WAITOK); 734 MNT_ILOCK(mp); 735 mp->mnt_flag |= MNT_GJOURNAL; 736 MNT_IUNLOCK(mp); 737 } else { 738 printf( 739 "WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n", 740 mp->mnt_stat.f_mntonname); 741 free(mp->mnt_gjprovider, M_UFSMNT); 742 mp->mnt_gjprovider = NULL; 743 } 744 #else 745 printf( 746 "WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n", 747 mp->mnt_stat.f_mntonname); 748 #endif 749 } else { 750 mp->mnt_gjprovider = NULL; 751 } 752 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO); 753 ump->um_cp = cp; 754 ump->um_bo = &devvp->v_bufobj; 755 ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK); 756 if (fs->fs_magic == FS_UFS1_MAGIC) { 757 ump->um_fstype = UFS1; 758 ump->um_balloc = ffs_balloc_ufs1; 759 } else { 760 ump->um_fstype = UFS2; 761 ump->um_balloc = ffs_balloc_ufs2; 762 } 763 ump->um_blkatoff = ffs_blkatoff; 764 ump->um_truncate = ffs_truncate; 765 ump->um_update = ffs_update; 766 ump->um_valloc = ffs_valloc; 767 ump->um_vfree = ffs_vfree; 768 ump->um_ifree = ffs_ifree; 769 ump->um_rdonly = ffs_rdonly; 770 mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF); 771 bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); 772 if (fs->fs_sbsize < SBLOCKSIZE) 773 bp->b_flags |= B_INVAL | B_NOCACHE; 774 brelse(bp); 775 bp = NULL; 776 fs = ump->um_fs; 777 ffs_oldfscompat_read(fs, ump, sblockloc); 778 fs->fs_ronly = ronly; 779 size = fs->fs_cssize; 780 blks = howmany(size, fs->fs_fsize); 781 if (fs->fs_contigsumsize > 0) 782 size += fs->fs_ncg * sizeof(int32_t); 783 size += fs->fs_ncg * sizeof(u_int8_t); 784 space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 785 fs->fs_csp = space; 786 for (i = 0; i < blks; i += fs->fs_frag) { 787 size = fs->fs_bsize; 788 if (i + fs->fs_frag > blks) 789 size = (blks - i) * fs->fs_fsize; 790 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 791 cred, &bp)) != 0) { 792 free(fs->fs_csp, M_UFSMNT); 793 goto out; 794 } 795 bcopy(bp->b_data, space, (u_int)size); 796 space = (char *)space + size; 797 brelse(bp); 798 bp = NULL; 799 } 800 if (fs->fs_contigsumsize > 0) { 801 fs->fs_maxcluster = lp = space; 802 for (i = 0; i < fs->fs_ncg; i++) 803 *lp++ = fs->fs_contigsumsize; 804 space = lp; 805 } 806 size = fs->fs_ncg * sizeof(u_int8_t); 807 fs->fs_contigdirs = (u_int8_t *)space; 808 bzero(fs->fs_contigdirs, size); 809 fs->fs_active = NULL; 810 mp->mnt_data = ump; 811 mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0]; 812 mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; 813 nmp = NULL; 814 if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || 815 (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) { 816 if (nmp) 817 vfs_rel(nmp); 818 vfs_getnewfsid(mp); 819 } 820 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; 821 MNT_ILOCK(mp); 822 mp->mnt_flag |= MNT_LOCAL; 823 MNT_IUNLOCK(mp); 824 if ((fs->fs_flags & FS_MULTILABEL) != 0) { 825 #ifdef MAC 826 MNT_ILOCK(mp); 827 mp->mnt_flag |= MNT_MULTILABEL; 828 MNT_IUNLOCK(mp); 829 #else 830 printf( 831 "WARNING: %s: multilabel flag on fs but no MAC support\n", 832 mp->mnt_stat.f_mntonname); 833 #endif 834 } 835 if ((fs->fs_flags & FS_ACLS) != 0) { 836 #ifdef UFS_ACL 837 MNT_ILOCK(mp); 838 mp->mnt_flag |= MNT_ACLS; 839 MNT_IUNLOCK(mp); 840 #else 841 printf( 842 "WARNING: %s: ACLs flag on fs but no ACLs support\n", 843 mp->mnt_stat.f_mntonname); 844 #endif 845 } 846 ump->um_mountp = mp; 847 ump->um_dev = dev; 848 ump->um_devvp = devvp; 849 ump->um_nindir = fs->fs_nindir; 850 ump->um_bptrtodb = fs->fs_fsbtodb; 851 ump->um_seqinc = fs->fs_frag; 852 for (i = 0; i < MAXQUOTAS; i++) 853 ump->um_quotas[i] = NULLVP; 854 #ifdef UFS_EXTATTR 855 ufs_extattr_uepm_init(&ump->um_extattr); 856 #endif 857 /* 858 * Set FS local "last mounted on" information (NULL pad) 859 */ 860 bzero(fs->fs_fsmnt, MAXMNTLEN); 861 strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN); 862 863 if( mp->mnt_flag & MNT_ROOTFS) { 864 /* 865 * Root mount; update timestamp in mount structure. 866 * this will be used by the common root mount code 867 * to update the system clock. 868 */ 869 mp->mnt_time = fs->fs_time; 870 } 871 872 if (ronly == 0) { 873 if ((fs->fs_flags & FS_DOSOFTDEP) && 874 (error = softdep_mount(devvp, mp, fs, cred)) != 0) { 875 free(fs->fs_csp, M_UFSMNT); 876 goto out; 877 } 878 if (fs->fs_snapinum[0] != 0) 879 ffs_snapshot_mount(mp); 880 fs->fs_fmod = 1; 881 fs->fs_clean = 0; 882 (void) ffs_sbupdate(ump, MNT_WAIT, 0); 883 } 884 /* 885 * Initialize filesystem stat information in mount struct. 886 */ 887 MNT_ILOCK(mp); 888 mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED | 889 MNTK_EXTENDED_SHARED; 890 MNT_IUNLOCK(mp); 891 #ifdef UFS_EXTATTR 892 #ifdef UFS_EXTATTR_AUTOSTART 893 /* 894 * 895 * Auto-starting does the following: 896 * - check for /.attribute in the fs, and extattr_start if so 897 * - for each file in .attribute, enable that file with 898 * an attribute of the same name. 899 * Not clear how to report errors -- probably eat them. 900 * This would all happen while the filesystem was busy/not 901 * available, so would effectively be "atomic". 902 */ 903 mp->mnt_stat.f_iosize = fs->fs_bsize; 904 (void) ufs_extattr_autostart(mp, td); 905 #endif /* !UFS_EXTATTR_AUTOSTART */ 906 #endif /* !UFS_EXTATTR */ 907 return (0); 908 out: 909 if (bp) 910 brelse(bp); 911 if (cp != NULL) { 912 DROP_GIANT(); 913 g_topology_lock(); 914 g_vfs_close(cp); 915 g_topology_unlock(); 916 PICKUP_GIANT(); 917 } 918 if (ump) { 919 mtx_destroy(UFS_MTX(ump)); 920 if (mp->mnt_gjprovider != NULL) { 921 free(mp->mnt_gjprovider, M_UFSMNT); 922 mp->mnt_gjprovider = NULL; 923 } 924 free(ump->um_fs, M_UFSMNT); 925 free(ump, M_UFSMNT); 926 mp->mnt_data = NULL; 927 } 928 dev_rel(dev); 929 return (error); 930 } 931 932 #include <sys/sysctl.h> 933 static int bigcgs = 0; 934 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, ""); 935 936 /* 937 * Sanity checks for loading old filesystem superblocks. 938 * See ffs_oldfscompat_write below for unwound actions. 939 * 940 * XXX - Parts get retired eventually. 941 * Unfortunately new bits get added. 942 */ 943 static void 944 ffs_oldfscompat_read(fs, ump, sblockloc) 945 struct fs *fs; 946 struct ufsmount *ump; 947 ufs2_daddr_t sblockloc; 948 { 949 off_t maxfilesize; 950 951 /* 952 * If not yet done, update fs_flags location and value of fs_sblockloc. 953 */ 954 if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { 955 fs->fs_flags = fs->fs_old_flags; 956 fs->fs_old_flags |= FS_FLAGS_UPDATED; 957 fs->fs_sblockloc = sblockloc; 958 } 959 /* 960 * If not yet done, update UFS1 superblock with new wider fields. 961 */ 962 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) { 963 fs->fs_maxbsize = fs->fs_bsize; 964 fs->fs_time = fs->fs_old_time; 965 fs->fs_size = fs->fs_old_size; 966 fs->fs_dsize = fs->fs_old_dsize; 967 fs->fs_csaddr = fs->fs_old_csaddr; 968 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; 969 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; 970 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; 971 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; 972 } 973 if (fs->fs_magic == FS_UFS1_MAGIC && 974 fs->fs_old_inodefmt < FS_44INODEFMT) { 975 fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1; 976 fs->fs_qbmask = ~fs->fs_bmask; 977 fs->fs_qfmask = ~fs->fs_fmask; 978 } 979 if (fs->fs_magic == FS_UFS1_MAGIC) { 980 ump->um_savedmaxfilesize = fs->fs_maxfilesize; 981 maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1; 982 if (fs->fs_maxfilesize > maxfilesize) 983 fs->fs_maxfilesize = maxfilesize; 984 } 985 /* Compatibility for old filesystems */ 986 if (fs->fs_avgfilesize <= 0) 987 fs->fs_avgfilesize = AVFILESIZ; 988 if (fs->fs_avgfpdir <= 0) 989 fs->fs_avgfpdir = AFPDIR; 990 if (bigcgs) { 991 fs->fs_save_cgsize = fs->fs_cgsize; 992 fs->fs_cgsize = fs->fs_bsize; 993 } 994 } 995 996 /* 997 * Unwinding superblock updates for old filesystems. 998 * See ffs_oldfscompat_read above for details. 999 * 1000 * XXX - Parts get retired eventually. 1001 * Unfortunately new bits get added. 1002 */ 1003 static void 1004 ffs_oldfscompat_write(fs, ump) 1005 struct fs *fs; 1006 struct ufsmount *ump; 1007 { 1008 1009 /* 1010 * Copy back UFS2 updated fields that UFS1 inspects. 1011 */ 1012 if (fs->fs_magic == FS_UFS1_MAGIC) { 1013 fs->fs_old_time = fs->fs_time; 1014 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; 1015 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; 1016 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; 1017 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; 1018 fs->fs_maxfilesize = ump->um_savedmaxfilesize; 1019 } 1020 if (bigcgs) { 1021 fs->fs_cgsize = fs->fs_save_cgsize; 1022 fs->fs_save_cgsize = 0; 1023 } 1024 } 1025 1026 /* 1027 * unmount system call 1028 */ 1029 static int 1030 ffs_unmount(mp, mntflags) 1031 struct mount *mp; 1032 int mntflags; 1033 { 1034 struct thread *td; 1035 struct ufsmount *ump = VFSTOUFS(mp); 1036 struct fs *fs; 1037 int error, flags, susp; 1038 #ifdef UFS_EXTATTR 1039 int e_restart; 1040 #endif 1041 1042 flags = 0; 1043 td = curthread; 1044 fs = ump->um_fs; 1045 if (mntflags & MNT_FORCE) { 1046 flags |= FORCECLOSE; 1047 susp = fs->fs_ronly != 0; 1048 } else 1049 susp = 0; 1050 #ifdef UFS_EXTATTR 1051 if ((error = ufs_extattr_stop(mp, td))) { 1052 if (error != EOPNOTSUPP) 1053 printf("ffs_unmount: ufs_extattr_stop returned %d\n", 1054 error); 1055 e_restart = 0; 1056 } else { 1057 ufs_extattr_uepm_destroy(&ump->um_extattr); 1058 e_restart = 1; 1059 } 1060 #endif 1061 if (susp) { 1062 /* 1063 * dounmount already called vn_start_write(). 1064 */ 1065 for (;;) { 1066 vn_finished_write(mp); 1067 if ((error = vfs_write_suspend(mp)) != 0) 1068 return (error); 1069 MNT_ILOCK(mp); 1070 if (mp->mnt_kern_flag & MNTK_SUSPENDED) { 1071 mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | 1072 MNTK_SUSPEND2); 1073 wakeup(&mp->mnt_flag); 1074 MNT_IUNLOCK(mp); 1075 td->td_pflags |= TDP_IGNSUSP; 1076 break; 1077 } 1078 MNT_IUNLOCK(mp); 1079 vn_start_write(NULL, &mp, V_WAIT); 1080 } 1081 } 1082 if (mp->mnt_flag & MNT_SOFTDEP) 1083 error = softdep_flushfiles(mp, flags, td); 1084 else 1085 error = ffs_flushfiles(mp, flags, td); 1086 if (error != 0 && error != ENXIO) 1087 goto fail; 1088 1089 UFS_LOCK(ump); 1090 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 1091 printf("%s: unmount pending error: blocks %jd files %d\n", 1092 fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, 1093 fs->fs_pendinginodes); 1094 fs->fs_pendingblocks = 0; 1095 fs->fs_pendinginodes = 0; 1096 } 1097 UFS_UNLOCK(ump); 1098 if (fs->fs_ronly == 0) { 1099 fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1; 1100 error = ffs_sbupdate(ump, MNT_WAIT, 0); 1101 if (error && error != ENXIO) { 1102 fs->fs_clean = 0; 1103 goto fail; 1104 } 1105 } 1106 if (susp) { 1107 vfs_write_resume(mp); 1108 vn_start_write(NULL, &mp, V_WAIT); 1109 } 1110 DROP_GIANT(); 1111 g_topology_lock(); 1112 g_vfs_close(ump->um_cp); 1113 g_topology_unlock(); 1114 PICKUP_GIANT(); 1115 vrele(ump->um_devvp); 1116 dev_rel(ump->um_dev); 1117 mtx_destroy(UFS_MTX(ump)); 1118 if (mp->mnt_gjprovider != NULL) { 1119 free(mp->mnt_gjprovider, M_UFSMNT); 1120 mp->mnt_gjprovider = NULL; 1121 } 1122 free(fs->fs_csp, M_UFSMNT); 1123 free(fs, M_UFSMNT); 1124 free(ump, M_UFSMNT); 1125 mp->mnt_data = NULL; 1126 MNT_ILOCK(mp); 1127 mp->mnt_flag &= ~MNT_LOCAL; 1128 MNT_IUNLOCK(mp); 1129 return (error); 1130 1131 fail: 1132 if (susp) { 1133 vfs_write_resume(mp); 1134 vn_start_write(NULL, &mp, V_WAIT); 1135 } 1136 #ifdef UFS_EXTATTR 1137 if (e_restart) { 1138 ufs_extattr_uepm_init(&ump->um_extattr); 1139 #ifdef UFS_EXTATTR_AUTOSTART 1140 (void) ufs_extattr_autostart(mp, td); 1141 #endif 1142 } 1143 #endif 1144 1145 return (error); 1146 } 1147 1148 /* 1149 * Flush out all the files in a filesystem. 1150 */ 1151 int 1152 ffs_flushfiles(mp, flags, td) 1153 struct mount *mp; 1154 int flags; 1155 struct thread *td; 1156 { 1157 struct ufsmount *ump; 1158 int error; 1159 1160 ump = VFSTOUFS(mp); 1161 #ifdef QUOTA 1162 if (mp->mnt_flag & MNT_QUOTA) { 1163 int i; 1164 error = vflush(mp, 0, SKIPSYSTEM|flags, td); 1165 if (error) 1166 return (error); 1167 for (i = 0; i < MAXQUOTAS; i++) { 1168 quotaoff(td, mp, i); 1169 } 1170 /* 1171 * Here we fall through to vflush again to ensure 1172 * that we have gotten rid of all the system vnodes. 1173 */ 1174 } 1175 #endif 1176 ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles"); 1177 if (ump->um_devvp->v_vflag & VV_COPYONWRITE) { 1178 if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0) 1179 return (error); 1180 ffs_snapshot_unmount(mp); 1181 flags |= FORCECLOSE; 1182 /* 1183 * Here we fall through to vflush again to ensure 1184 * that we have gotten rid of all the system vnodes. 1185 */ 1186 } 1187 /* 1188 * Flush all the files. 1189 */ 1190 if ((error = vflush(mp, 0, flags, td)) != 0) 1191 return (error); 1192 /* 1193 * Flush filesystem metadata. 1194 */ 1195 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1196 error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td); 1197 VOP_UNLOCK(ump->um_devvp, 0); 1198 return (error); 1199 } 1200 1201 /* 1202 * Get filesystem statistics. 1203 */ 1204 static int 1205 ffs_statfs(mp, sbp) 1206 struct mount *mp; 1207 struct statfs *sbp; 1208 { 1209 struct ufsmount *ump; 1210 struct fs *fs; 1211 1212 ump = VFSTOUFS(mp); 1213 fs = ump->um_fs; 1214 if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC) 1215 panic("ffs_statfs"); 1216 sbp->f_version = STATFS_VERSION; 1217 sbp->f_bsize = fs->fs_fsize; 1218 sbp->f_iosize = fs->fs_bsize; 1219 sbp->f_blocks = fs->fs_dsize; 1220 UFS_LOCK(ump); 1221 sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + 1222 fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks); 1223 sbp->f_bavail = freespace(fs, fs->fs_minfree) + 1224 dbtofsb(fs, fs->fs_pendingblocks); 1225 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; 1226 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; 1227 UFS_UNLOCK(ump); 1228 sbp->f_namemax = NAME_MAX; 1229 return (0); 1230 } 1231 1232 /* 1233 * Go through the disk queues to initiate sandbagged IO; 1234 * go through the inodes to write those that have been modified; 1235 * initiate the writing of the super block if it has been modified. 1236 * 1237 * Note: we are always called with the filesystem marked `MPBUSY'. 1238 */ 1239 static int 1240 ffs_sync(mp, waitfor) 1241 struct mount *mp; 1242 int waitfor; 1243 { 1244 struct vnode *mvp, *vp, *devvp; 1245 struct thread *td; 1246 struct inode *ip; 1247 struct ufsmount *ump = VFSTOUFS(mp); 1248 struct fs *fs; 1249 int error, count, wait, lockreq, allerror = 0; 1250 int suspend; 1251 int suspended; 1252 int secondary_writes; 1253 int secondary_accwrites; 1254 int softdep_deps; 1255 int softdep_accdeps; 1256 struct bufobj *bo; 1257 1258 td = curthread; 1259 fs = ump->um_fs; 1260 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ 1261 printf("fs = %s\n", fs->fs_fsmnt); 1262 panic("ffs_sync: rofs mod"); 1263 } 1264 /* 1265 * Write back each (modified) inode. 1266 */ 1267 wait = 0; 1268 suspend = 0; 1269 suspended = 0; 1270 lockreq = LK_EXCLUSIVE | LK_NOWAIT; 1271 if (waitfor == MNT_SUSPEND) { 1272 suspend = 1; 1273 waitfor = MNT_WAIT; 1274 } 1275 if (waitfor == MNT_WAIT) { 1276 wait = 1; 1277 lockreq = LK_EXCLUSIVE; 1278 } 1279 lockreq |= LK_INTERLOCK | LK_SLEEPFAIL; 1280 MNT_ILOCK(mp); 1281 loop: 1282 /* Grab snapshot of secondary write counts */ 1283 secondary_writes = mp->mnt_secondary_writes; 1284 secondary_accwrites = mp->mnt_secondary_accwrites; 1285 1286 /* Grab snapshot of softdep dependency counts */ 1287 MNT_IUNLOCK(mp); 1288 softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps); 1289 MNT_ILOCK(mp); 1290 1291 MNT_VNODE_FOREACH(vp, mp, mvp) { 1292 /* 1293 * Depend on the mntvnode_slock to keep things stable enough 1294 * for a quick test. Since there might be hundreds of 1295 * thousands of vnodes, we cannot afford even a subroutine 1296 * call unless there's a good chance that we have work to do. 1297 */ 1298 VI_LOCK(vp); 1299 if (vp->v_iflag & VI_DOOMED) { 1300 VI_UNLOCK(vp); 1301 continue; 1302 } 1303 ip = VTOI(vp); 1304 if (vp->v_type == VNON || ((ip->i_flag & 1305 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1306 vp->v_bufobj.bo_dirty.bv_cnt == 0)) { 1307 VI_UNLOCK(vp); 1308 continue; 1309 } 1310 MNT_IUNLOCK(mp); 1311 if ((error = vget(vp, lockreq, td)) != 0) { 1312 MNT_ILOCK(mp); 1313 if (error == ENOENT || error == ENOLCK) { 1314 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 1315 goto loop; 1316 } 1317 continue; 1318 } 1319 if ((error = ffs_syncvnode(vp, waitfor)) != 0) 1320 allerror = error; 1321 vput(vp); 1322 MNT_ILOCK(mp); 1323 } 1324 MNT_IUNLOCK(mp); 1325 /* 1326 * Force stale filesystem control information to be flushed. 1327 */ 1328 if (waitfor == MNT_WAIT) { 1329 if ((error = softdep_flushworklist(ump->um_mountp, &count, td))) 1330 allerror = error; 1331 /* Flushed work items may create new vnodes to clean */ 1332 if (allerror == 0 && count) { 1333 MNT_ILOCK(mp); 1334 goto loop; 1335 } 1336 } 1337 #ifdef QUOTA 1338 qsync(mp); 1339 #endif 1340 devvp = ump->um_devvp; 1341 bo = &devvp->v_bufobj; 1342 BO_LOCK(bo); 1343 if (waitfor != MNT_LAZY && 1344 (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) { 1345 BO_UNLOCK(bo); 1346 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1347 if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0) 1348 allerror = error; 1349 VOP_UNLOCK(devvp, 0); 1350 if (allerror == 0 && waitfor == MNT_WAIT) { 1351 MNT_ILOCK(mp); 1352 goto loop; 1353 } 1354 } else if (suspend != 0) { 1355 if (softdep_check_suspend(mp, 1356 devvp, 1357 softdep_deps, 1358 softdep_accdeps, 1359 secondary_writes, 1360 secondary_accwrites) != 0) 1361 goto loop; /* More work needed */ 1362 mtx_assert(MNT_MTX(mp), MA_OWNED); 1363 mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED; 1364 MNT_IUNLOCK(mp); 1365 suspended = 1; 1366 } else 1367 BO_UNLOCK(bo); 1368 /* 1369 * Write back modified superblock. 1370 */ 1371 if (fs->fs_fmod != 0 && 1372 (error = ffs_sbupdate(ump, waitfor, suspended)) != 0) 1373 allerror = error; 1374 return (allerror); 1375 } 1376 1377 int 1378 ffs_vget(mp, ino, flags, vpp) 1379 struct mount *mp; 1380 ino_t ino; 1381 int flags; 1382 struct vnode **vpp; 1383 { 1384 return (ffs_vgetf(mp, ino, flags, vpp, 0)); 1385 } 1386 1387 int 1388 ffs_vgetf(mp, ino, flags, vpp, ffs_flags) 1389 struct mount *mp; 1390 ino_t ino; 1391 int flags; 1392 struct vnode **vpp; 1393 int ffs_flags; 1394 { 1395 struct fs *fs; 1396 struct inode *ip; 1397 struct ufsmount *ump; 1398 struct buf *bp; 1399 struct vnode *vp; 1400 struct cdev *dev; 1401 int error; 1402 1403 error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL); 1404 if (error || *vpp != NULL) 1405 return (error); 1406 1407 /* 1408 * We must promote to an exclusive lock for vnode creation. This 1409 * can happen if lookup is passed LOCKSHARED. 1410 */ 1411 if ((flags & LK_TYPE_MASK) == LK_SHARED) { 1412 flags &= ~LK_TYPE_MASK; 1413 flags |= LK_EXCLUSIVE; 1414 } 1415 1416 /* 1417 * We do not lock vnode creation as it is believed to be too 1418 * expensive for such rare case as simultaneous creation of vnode 1419 * for same ino by different processes. We just allow them to race 1420 * and check later to decide who wins. Let the race begin! 1421 */ 1422 1423 ump = VFSTOUFS(mp); 1424 dev = ump->um_dev; 1425 fs = ump->um_fs; 1426 1427 /* 1428 * If this malloc() is performed after the getnewvnode() 1429 * it might block, leaving a vnode with a NULL v_data to be 1430 * found by ffs_sync() if a sync happens to fire right then, 1431 * which will cause a panic because ffs_sync() blindly 1432 * dereferences vp->v_data (as well it should). 1433 */ 1434 ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO); 1435 1436 /* Allocate a new vnode/inode. */ 1437 if (fs->fs_magic == FS_UFS1_MAGIC) 1438 error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp); 1439 else 1440 error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp); 1441 if (error) { 1442 *vpp = NULL; 1443 uma_zfree(uma_inode, ip); 1444 return (error); 1445 } 1446 /* 1447 * FFS supports recursive locking. 1448 */ 1449 VN_LOCK_AREC(vp); 1450 vp->v_data = ip; 1451 vp->v_bufobj.bo_bsize = fs->fs_bsize; 1452 ip->i_vnode = vp; 1453 ip->i_ump = ump; 1454 ip->i_fs = fs; 1455 ip->i_dev = dev; 1456 ip->i_number = ino; 1457 ip->i_ea_refs = 0; 1458 #ifdef QUOTA 1459 { 1460 int i; 1461 for (i = 0; i < MAXQUOTAS; i++) 1462 ip->i_dquot[i] = NODQUOT; 1463 } 1464 #endif 1465 1466 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 1467 if (ffs_flags & FFSV_FORCEINSMQ) 1468 vp->v_vflag |= VV_FORCEINSMQ; 1469 error = insmntque(vp, mp); 1470 if (error != 0) { 1471 *vpp = NULL; 1472 return (error); 1473 } 1474 vp->v_vflag &= ~VV_FORCEINSMQ; 1475 error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL); 1476 if (error || *vpp != NULL) 1477 return (error); 1478 1479 /* Read in the disk contents for the inode, copy into the inode. */ 1480 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1481 (int)fs->fs_bsize, NOCRED, &bp); 1482 if (error) { 1483 /* 1484 * The inode does not contain anything useful, so it would 1485 * be misleading to leave it on its hash chain. With mode 1486 * still zero, it will be unlinked and returned to the free 1487 * list by vput(). 1488 */ 1489 brelse(bp); 1490 vput(vp); 1491 *vpp = NULL; 1492 return (error); 1493 } 1494 if (ip->i_ump->um_fstype == UFS1) 1495 ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK); 1496 else 1497 ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK); 1498 ffs_load_inode(bp, ip, fs, ino); 1499 if (DOINGSOFTDEP(vp)) 1500 softdep_load_inodeblock(ip); 1501 else 1502 ip->i_effnlink = ip->i_nlink; 1503 bqrelse(bp); 1504 1505 /* 1506 * Initialize the vnode from the inode, check for aliases. 1507 * Note that the underlying vnode may have changed. 1508 */ 1509 if (ip->i_ump->um_fstype == UFS1) 1510 error = ufs_vinit(mp, &ffs_fifoops1, &vp); 1511 else 1512 error = ufs_vinit(mp, &ffs_fifoops2, &vp); 1513 if (error) { 1514 vput(vp); 1515 *vpp = NULL; 1516 return (error); 1517 } 1518 1519 /* 1520 * Finish inode initialization. 1521 */ 1522 if (vp->v_type != VFIFO) { 1523 /* FFS supports shared locking for all files except fifos. */ 1524 VN_LOCK_ASHARE(vp); 1525 } 1526 1527 /* 1528 * Set up a generation number for this inode if it does not 1529 * already have one. This should only happen on old filesystems. 1530 */ 1531 if (ip->i_gen == 0) { 1532 ip->i_gen = arc4random() / 2 + 1; 1533 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 1534 ip->i_flag |= IN_MODIFIED; 1535 DIP_SET(ip, i_gen, ip->i_gen); 1536 } 1537 } 1538 /* 1539 * Ensure that uid and gid are correct. This is a temporary 1540 * fix until fsck has been changed to do the update. 1541 */ 1542 if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */ 1543 fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ 1544 ip->i_uid = ip->i_din1->di_ouid; /* XXX */ 1545 ip->i_gid = ip->i_din1->di_ogid; /* XXX */ 1546 } /* XXX */ 1547 1548 #ifdef MAC 1549 if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) { 1550 /* 1551 * If this vnode is already allocated, and we're running 1552 * multi-label, attempt to perform a label association 1553 * from the extended attributes on the inode. 1554 */ 1555 error = mac_vnode_associate_extattr(mp, vp); 1556 if (error) { 1557 /* ufs_inactive will release ip->i_devvp ref. */ 1558 vput(vp); 1559 *vpp = NULL; 1560 return (error); 1561 } 1562 } 1563 #endif 1564 1565 *vpp = vp; 1566 return (0); 1567 } 1568 1569 /* 1570 * File handle to vnode 1571 * 1572 * Have to be really careful about stale file handles: 1573 * - check that the inode number is valid 1574 * - call ffs_vget() to get the locked inode 1575 * - check for an unallocated inode (i_mode == 0) 1576 * - check that the given client host has export rights and return 1577 * those rights via. exflagsp and credanonp 1578 */ 1579 static int 1580 ffs_fhtovp(mp, fhp, vpp) 1581 struct mount *mp; 1582 struct fid *fhp; 1583 struct vnode **vpp; 1584 { 1585 struct ufid *ufhp; 1586 struct fs *fs; 1587 1588 ufhp = (struct ufid *)fhp; 1589 fs = VFSTOUFS(mp)->um_fs; 1590 if (ufhp->ufid_ino < ROOTINO || 1591 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) 1592 return (ESTALE); 1593 return (ufs_fhtovp(mp, ufhp, vpp)); 1594 } 1595 1596 /* 1597 * Initialize the filesystem. 1598 */ 1599 static int 1600 ffs_init(vfsp) 1601 struct vfsconf *vfsp; 1602 { 1603 1604 softdep_initialize(); 1605 return (ufs_init(vfsp)); 1606 } 1607 1608 /* 1609 * Undo the work of ffs_init(). 1610 */ 1611 static int 1612 ffs_uninit(vfsp) 1613 struct vfsconf *vfsp; 1614 { 1615 int ret; 1616 1617 ret = ufs_uninit(vfsp); 1618 softdep_uninitialize(); 1619 return (ret); 1620 } 1621 1622 /* 1623 * Write a superblock and associated information back to disk. 1624 */ 1625 int 1626 ffs_sbupdate(mp, waitfor, suspended) 1627 struct ufsmount *mp; 1628 int waitfor; 1629 int suspended; 1630 { 1631 struct fs *fs = mp->um_fs; 1632 struct buf *sbbp; 1633 struct buf *bp; 1634 int blks; 1635 void *space; 1636 int i, size, error, allerror = 0; 1637 1638 if (fs->fs_ronly == 1 && 1639 (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != 1640 (MNT_RDONLY | MNT_UPDATE)) 1641 panic("ffs_sbupdate: write read-only filesystem"); 1642 /* 1643 * We use the superblock's buf to serialize calls to ffs_sbupdate(). 1644 */ 1645 sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize, 1646 0, 0, 0); 1647 /* 1648 * First write back the summary information. 1649 */ 1650 blks = howmany(fs->fs_cssize, fs->fs_fsize); 1651 space = fs->fs_csp; 1652 for (i = 0; i < blks; i += fs->fs_frag) { 1653 size = fs->fs_bsize; 1654 if (i + fs->fs_frag > blks) 1655 size = (blks - i) * fs->fs_fsize; 1656 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), 1657 size, 0, 0, 0); 1658 bcopy(space, bp->b_data, (u_int)size); 1659 space = (char *)space + size; 1660 if (suspended) 1661 bp->b_flags |= B_VALIDSUSPWRT; 1662 if (waitfor != MNT_WAIT) 1663 bawrite(bp); 1664 else if ((error = bwrite(bp)) != 0) 1665 allerror = error; 1666 } 1667 /* 1668 * Now write back the superblock itself. If any errors occurred 1669 * up to this point, then fail so that the superblock avoids 1670 * being written out as clean. 1671 */ 1672 if (allerror) { 1673 brelse(sbbp); 1674 return (allerror); 1675 } 1676 bp = sbbp; 1677 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 && 1678 (fs->fs_flags & FS_FLAGS_UPDATED) == 0) { 1679 printf("%s: correcting fs_sblockloc from %jd to %d\n", 1680 fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1); 1681 fs->fs_sblockloc = SBLOCK_UFS1; 1682 } 1683 if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 && 1684 (fs->fs_flags & FS_FLAGS_UPDATED) == 0) { 1685 printf("%s: correcting fs_sblockloc from %jd to %d\n", 1686 fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2); 1687 fs->fs_sblockloc = SBLOCK_UFS2; 1688 } 1689 fs->fs_fmod = 0; 1690 fs->fs_time = time_second; 1691 bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); 1692 ffs_oldfscompat_write((struct fs *)bp->b_data, mp); 1693 if (suspended) 1694 bp->b_flags |= B_VALIDSUSPWRT; 1695 if (waitfor != MNT_WAIT) 1696 bawrite(bp); 1697 else if ((error = bwrite(bp)) != 0) 1698 allerror = error; 1699 return (allerror); 1700 } 1701 1702 static int 1703 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, 1704 int attrnamespace, const char *attrname) 1705 { 1706 1707 #ifdef UFS_EXTATTR 1708 return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace, 1709 attrname)); 1710 #else 1711 return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, 1712 attrname)); 1713 #endif 1714 } 1715 1716 static void 1717 ffs_ifree(struct ufsmount *ump, struct inode *ip) 1718 { 1719 1720 if (ump->um_fstype == UFS1 && ip->i_din1 != NULL) 1721 uma_zfree(uma_ufs1, ip->i_din1); 1722 else if (ip->i_din2 != NULL) 1723 uma_zfree(uma_ufs2, ip->i_din2); 1724 uma_zfree(uma_inode, ip); 1725 } 1726 1727 static int dobkgrdwrite = 1; 1728 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0, 1729 "Do background writes (honoring the BV_BKGRDWRITE flag)?"); 1730 1731 /* 1732 * Complete a background write started from bwrite. 1733 */ 1734 static void 1735 ffs_backgroundwritedone(struct buf *bp) 1736 { 1737 struct bufobj *bufobj; 1738 struct buf *origbp; 1739 1740 /* 1741 * Find the original buffer that we are writing. 1742 */ 1743 bufobj = bp->b_bufobj; 1744 BO_LOCK(bufobj); 1745 if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL) 1746 panic("backgroundwritedone: lost buffer"); 1747 /* Grab an extra reference to be dropped by the bufdone() below. */ 1748 bufobj_wrefl(bufobj); 1749 BO_UNLOCK(bufobj); 1750 /* 1751 * Process dependencies then return any unfinished ones. 1752 */ 1753 if (!LIST_EMPTY(&bp->b_dep)) 1754 buf_complete(bp); 1755 #ifdef SOFTUPDATES 1756 if (!LIST_EMPTY(&bp->b_dep)) 1757 softdep_move_dependencies(bp, origbp); 1758 #endif 1759 /* 1760 * This buffer is marked B_NOCACHE so when it is released 1761 * by biodone it will be tossed. 1762 */ 1763 bp->b_flags |= B_NOCACHE; 1764 bp->b_flags &= ~B_CACHE; 1765 bufdone(bp); 1766 BO_LOCK(bufobj); 1767 /* 1768 * Clear the BV_BKGRDINPROG flag in the original buffer 1769 * and awaken it if it is waiting for the write to complete. 1770 * If BV_BKGRDINPROG is not set in the original buffer it must 1771 * have been released and re-instantiated - which is not legal. 1772 */ 1773 KASSERT((origbp->b_vflags & BV_BKGRDINPROG), 1774 ("backgroundwritedone: lost buffer2")); 1775 origbp->b_vflags &= ~BV_BKGRDINPROG; 1776 if (origbp->b_vflags & BV_BKGRDWAIT) { 1777 origbp->b_vflags &= ~BV_BKGRDWAIT; 1778 wakeup(&origbp->b_xflags); 1779 } 1780 BO_UNLOCK(bufobj); 1781 } 1782 1783 1784 /* 1785 * Write, release buffer on completion. (Done by iodone 1786 * if async). Do not bother writing anything if the buffer 1787 * is invalid. 1788 * 1789 * Note that we set B_CACHE here, indicating that buffer is 1790 * fully valid and thus cacheable. This is true even of NFS 1791 * now so we set it generally. This could be set either here 1792 * or in biodone() since the I/O is synchronous. We put it 1793 * here. 1794 */ 1795 static int 1796 ffs_bufwrite(struct buf *bp) 1797 { 1798 int oldflags, s; 1799 struct buf *newbp; 1800 1801 CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); 1802 if (bp->b_flags & B_INVAL) { 1803 brelse(bp); 1804 return (0); 1805 } 1806 1807 oldflags = bp->b_flags; 1808 1809 if (!BUF_ISLOCKED(bp)) 1810 panic("bufwrite: buffer is not busy???"); 1811 s = splbio(); 1812 /* 1813 * If a background write is already in progress, delay 1814 * writing this block if it is asynchronous. Otherwise 1815 * wait for the background write to complete. 1816 */ 1817 BO_LOCK(bp->b_bufobj); 1818 if (bp->b_vflags & BV_BKGRDINPROG) { 1819 if (bp->b_flags & B_ASYNC) { 1820 BO_UNLOCK(bp->b_bufobj); 1821 splx(s); 1822 bdwrite(bp); 1823 return (0); 1824 } 1825 bp->b_vflags |= BV_BKGRDWAIT; 1826 msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0); 1827 if (bp->b_vflags & BV_BKGRDINPROG) 1828 panic("bufwrite: still writing"); 1829 } 1830 BO_UNLOCK(bp->b_bufobj); 1831 1832 /* Mark the buffer clean */ 1833 bundirty(bp); 1834 1835 /* 1836 * If this buffer is marked for background writing and we 1837 * do not have to wait for it, make a copy and write the 1838 * copy so as to leave this buffer ready for further use. 1839 * 1840 * This optimization eats a lot of memory. If we have a page 1841 * or buffer shortfall we can't do it. 1842 */ 1843 if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) && 1844 (bp->b_flags & B_ASYNC) && 1845 !vm_page_count_severe() && 1846 !buf_dirty_count_severe()) { 1847 KASSERT(bp->b_iodone == NULL, 1848 ("bufwrite: needs chained iodone (%p)", bp->b_iodone)); 1849 1850 /* get a new block */ 1851 newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD); 1852 if (newbp == NULL) 1853 goto normal_write; 1854 1855 /* 1856 * set it to be identical to the old block. We have to 1857 * set b_lblkno and BKGRDMARKER before calling bgetvp() 1858 * to avoid confusing the splay tree and gbincore(). 1859 */ 1860 memcpy(newbp->b_data, bp->b_data, bp->b_bufsize); 1861 newbp->b_lblkno = bp->b_lblkno; 1862 newbp->b_xflags |= BX_BKGRDMARKER; 1863 BO_LOCK(bp->b_bufobj); 1864 bp->b_vflags |= BV_BKGRDINPROG; 1865 bgetvp(bp->b_vp, newbp); 1866 BO_UNLOCK(bp->b_bufobj); 1867 newbp->b_bufobj = &bp->b_vp->v_bufobj; 1868 newbp->b_blkno = bp->b_blkno; 1869 newbp->b_offset = bp->b_offset; 1870 newbp->b_iodone = ffs_backgroundwritedone; 1871 newbp->b_flags |= B_ASYNC; 1872 newbp->b_flags &= ~B_INVAL; 1873 1874 #ifdef SOFTUPDATES 1875 /* move over the dependencies */ 1876 if (!LIST_EMPTY(&bp->b_dep)) 1877 softdep_move_dependencies(bp, newbp); 1878 #endif 1879 1880 /* 1881 * Initiate write on the copy, release the original to 1882 * the B_LOCKED queue so that it cannot go away until 1883 * the background write completes. If not locked it could go 1884 * away and then be reconstituted while it was being written. 1885 * If the reconstituted buffer were written, we could end up 1886 * with two background copies being written at the same time. 1887 */ 1888 bqrelse(bp); 1889 bp = newbp; 1890 } 1891 1892 /* Let the normal bufwrite do the rest for us */ 1893 normal_write: 1894 return (bufwrite(bp)); 1895 } 1896 1897 1898 static void 1899 ffs_geom_strategy(struct bufobj *bo, struct buf *bp) 1900 { 1901 struct vnode *vp; 1902 int error; 1903 struct buf *tbp; 1904 1905 vp = bo->__bo_vnode; 1906 if (bp->b_iocmd == BIO_WRITE) { 1907 if ((bp->b_flags & B_VALIDSUSPWRT) == 0 && 1908 bp->b_vp != NULL && bp->b_vp->v_mount != NULL && 1909 (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0) 1910 panic("ffs_geom_strategy: bad I/O"); 1911 bp->b_flags &= ~B_VALIDSUSPWRT; 1912 if ((vp->v_vflag & VV_COPYONWRITE) && 1913 vp->v_rdev->si_snapdata != NULL) { 1914 if ((bp->b_flags & B_CLUSTER) != 0) { 1915 runningbufwakeup(bp); 1916 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head, 1917 b_cluster.cluster_entry) { 1918 error = ffs_copyonwrite(vp, tbp); 1919 if (error != 0 && 1920 error != EOPNOTSUPP) { 1921 bp->b_error = error; 1922 bp->b_ioflags |= BIO_ERROR; 1923 bufdone(bp); 1924 return; 1925 } 1926 } 1927 bp->b_runningbufspace = bp->b_bufsize; 1928 atomic_add_long(&runningbufspace, 1929 bp->b_runningbufspace); 1930 } else { 1931 error = ffs_copyonwrite(vp, bp); 1932 if (error != 0 && error != EOPNOTSUPP) { 1933 bp->b_error = error; 1934 bp->b_ioflags |= BIO_ERROR; 1935 bufdone(bp); 1936 return; 1937 } 1938 } 1939 } 1940 #ifdef SOFTUPDATES 1941 if ((bp->b_flags & B_CLUSTER) != 0) { 1942 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head, 1943 b_cluster.cluster_entry) { 1944 if (!LIST_EMPTY(&tbp->b_dep)) 1945 buf_start(tbp); 1946 } 1947 } else { 1948 if (!LIST_EMPTY(&bp->b_dep)) 1949 buf_start(bp); 1950 } 1951 1952 #endif 1953 } 1954 g_vfs_strategy(bo, bp); 1955 } 1956 1957 #ifdef DDB 1958 1959 static void 1960 db_print_ffs(struct ufsmount *ump) 1961 { 1962 db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d " 1963 "su_req %d\n", 1964 ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname, 1965 ump->um_devvp, ump->um_fs, ump->softdep_on_worklist, 1966 ump->softdep_on_worklist_inprogress, ump->softdep_deps, 1967 ump->softdep_req); 1968 } 1969 1970 DB_SHOW_COMMAND(ffs, db_show_ffs) 1971 { 1972 struct mount *mp; 1973 struct ufsmount *ump; 1974 1975 if (have_addr) { 1976 ump = VFSTOUFS((struct mount *)addr); 1977 db_print_ffs(ump); 1978 return; 1979 } 1980 1981 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 1982 if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name)) 1983 db_print_ffs(VFSTOUFS(mp)); 1984 } 1985 } 1986 1987 #endif /* DDB */ 1988