1 /*- 2 * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. 3 * 4 * Further information about snapshots can be obtained from: 5 * 6 * Marshall Kirk McKusick http://www.mckusick.com/softdep/ 7 * 1614 Oxford Street mckusick@mckusick.com 8 * Berkeley, CA 94709-1608 +1-510-843-9542 9 * USA 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY 22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR 25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/systm.h> 42 #include <sys/conf.h> 43 #include <sys/bio.h> 44 #include <sys/buf.h> 45 #include <sys/proc.h> 46 #include <sys/namei.h> 47 #include <sys/sched.h> 48 #include <sys/stat.h> 49 #include <sys/malloc.h> 50 #include <sys/mount.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/vnode.h> 54 55 #include <geom/geom.h> 56 57 #include <ufs/ufs/extattr.h> 58 #include <ufs/ufs/quota.h> 59 #include <ufs/ufs/ufsmount.h> 60 #include <ufs/ufs/inode.h> 61 #include <ufs/ufs/ufs_extern.h> 62 63 #include <ufs/ffs/fs.h> 64 #include <ufs/ffs/ffs_extern.h> 65 66 #define KERNCRED thread0.td_ucred 67 #define DEBUG 1 68 69 #include "opt_ffs.h" 70 71 #ifdef NO_FFS_SNAPSHOT 72 int 73 ffs_snapshot(mp, snapfile) 74 struct mount *mp; 75 char *snapfile; 76 { 77 return (EINVAL); 78 } 79 80 int 81 ffs_snapblkfree(fs, devvp, bno, size, inum) 82 struct fs *fs; 83 struct vnode *devvp; 84 ufs2_daddr_t bno; 85 long size; 86 ino_t inum; 87 { 88 return (EINVAL); 89 } 90 91 void 92 ffs_snapremove(vp) 93 struct vnode *vp; 94 { 95 } 96 97 void 98 ffs_snapshot_mount(mp) 99 struct mount *mp; 100 { 101 } 102 103 void 104 ffs_snapshot_unmount(mp) 105 struct mount *mp; 106 { 107 } 108 109 void 110 ffs_snapgone(ip) 111 struct inode *ip; 112 { 113 } 114 115 int 116 ffs_copyonwrite(devvp, bp) 117 struct vnode *devvp; 118 struct buf *bp; 119 { 120 return (EINVAL); 121 } 122 123 #else 124 125 TAILQ_HEAD(snaphead, inode); 126 127 struct snapdata { 128 struct snaphead sn_head; 129 daddr_t sn_listsize; 130 daddr_t *sn_blklist; 131 struct lock sn_lock; 132 }; 133 134 static int cgaccount(int, struct vnode *, struct buf *, int); 135 static int expunge_ufs1(struct vnode *, struct inode *, struct fs *, 136 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 137 ufs_lbn_t, int), int); 138 static int indiracct_ufs1(struct vnode *, struct vnode *, int, 139 ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 140 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 141 ufs_lbn_t, int), int); 142 static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 143 struct fs *, ufs_lbn_t, int); 144 static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 145 struct fs *, ufs_lbn_t, int); 146 static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 147 struct fs *, ufs_lbn_t, int); 148 static int expunge_ufs2(struct vnode *, struct inode *, struct fs *, 149 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 150 ufs_lbn_t, int), int); 151 static int indiracct_ufs2(struct vnode *, struct vnode *, int, 152 ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 153 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 154 ufs_lbn_t, int), int); 155 static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 156 struct fs *, ufs_lbn_t, int); 157 static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 158 struct fs *, ufs_lbn_t, int); 159 static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 160 struct fs *, ufs_lbn_t, int); 161 static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t); 162 static void process_deferred_inactive(struct mount *); 163 164 /* 165 * To ensure the consistency of snapshots across crashes, we must 166 * synchronously write out copied blocks before allowing the 167 * originals to be modified. Because of the rather severe speed 168 * penalty that this imposes, the following flag allows this 169 * crash persistence to be disabled. 170 */ 171 int dopersistence = 0; 172 173 #ifdef DEBUG 174 #include <sys/sysctl.h> 175 SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, ""); 176 static int snapdebug = 0; 177 SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, ""); 178 int collectsnapstats = 0; 179 SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats, 180 0, ""); 181 #endif /* DEBUG */ 182 183 /* 184 * Create a snapshot file and initialize it for the filesystem. 185 */ 186 int 187 ffs_snapshot(mp, snapfile) 188 struct mount *mp; 189 char *snapfile; 190 { 191 ufs2_daddr_t numblks, blkno, *blkp, *snapblklist; 192 int error, cg, snaploc; 193 int i, size, len, loc; 194 int flag = mp->mnt_flag; 195 struct timespec starttime = {0, 0}, endtime; 196 char saved_nice = 0; 197 long redo = 0, snaplistsize = 0; 198 int32_t *lp; 199 void *space; 200 struct fs *copy_fs = NULL, *fs; 201 struct thread *td = curthread; 202 struct inode *ip, *xp; 203 struct buf *bp, *nbp, *ibp, *sbp = NULL; 204 struct nameidata nd; 205 struct mount *wrtmp; 206 struct vattr vat; 207 struct vnode *vp, *xvp, *mvp, *devvp; 208 struct uio auio; 209 struct iovec aiov; 210 struct snapdata *sn; 211 struct ufsmount *ump; 212 213 ump = VFSTOUFS(mp); 214 fs = ump->um_fs; 215 /* 216 * XXX: make sure we don't go to out1 before we setup sn 217 */ 218 sn = (void *)0xdeadbeef; 219 220 /* 221 * Need to serialize access to snapshot code per filesystem. 222 */ 223 /* 224 * Assign a snapshot slot in the superblock. 225 */ 226 UFS_LOCK(ump); 227 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 228 if (fs->fs_snapinum[snaploc] == 0) 229 break; 230 UFS_UNLOCK(ump); 231 if (snaploc == FSMAXSNAP) 232 return (ENOSPC); 233 /* 234 * Create the snapshot file. 235 */ 236 restart: 237 NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_SYSSPACE, snapfile, td); 238 if ((error = namei(&nd)) != 0) 239 return (error); 240 if (nd.ni_vp != NULL) { 241 vput(nd.ni_vp); 242 error = EEXIST; 243 } 244 if (nd.ni_dvp->v_mount != mp) 245 error = EXDEV; 246 if (error) { 247 NDFREE(&nd, NDF_ONLY_PNBUF); 248 if (nd.ni_dvp == nd.ni_vp) 249 vrele(nd.ni_dvp); 250 else 251 vput(nd.ni_dvp); 252 return (error); 253 } 254 VATTR_NULL(&vat); 255 vat.va_type = VREG; 256 vat.va_mode = S_IRUSR; 257 vat.va_vaflags |= VA_EXCLUSIVE; 258 if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) 259 wrtmp = NULL; 260 if (wrtmp != mp) 261 panic("ffs_snapshot: mount mismatch"); 262 if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) { 263 NDFREE(&nd, NDF_ONLY_PNBUF); 264 vput(nd.ni_dvp); 265 if ((error = vn_start_write(NULL, &wrtmp, 266 V_XSLEEP | PCATCH)) != 0) 267 return (error); 268 goto restart; 269 } 270 VOP_LEASE(nd.ni_dvp, td, KERNCRED, LEASE_WRITE); 271 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat); 272 vhold(nd.ni_dvp); 273 vput(nd.ni_dvp); 274 if (error) { 275 NDFREE(&nd, NDF_ONLY_PNBUF); 276 vn_finished_write(wrtmp); 277 vdrop(nd.ni_dvp); 278 return (error); 279 } 280 vp = nd.ni_vp; 281 ip = VTOI(vp); 282 devvp = ip->i_devvp; 283 /* 284 * Allocate and copy the last block contents so as to be able 285 * to set size to that of the filesystem. 286 */ 287 numblks = howmany(fs->fs_size, fs->fs_frag); 288 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 289 fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 290 if (error) 291 goto out; 292 ip->i_size = lblktosize(fs, (off_t)numblks); 293 DIP_SET(ip, i_size, ip->i_size); 294 ip->i_flag |= IN_CHANGE | IN_UPDATE; 295 if ((error = readblock(vp, bp, numblks - 1)) != 0) 296 goto out; 297 bawrite(bp); 298 /* 299 * Preallocate critical data structures so that we can copy 300 * them in without further allocation after we suspend all 301 * operations on the filesystem. We would like to just release 302 * the allocated buffers without writing them since they will 303 * be filled in below once we are ready to go, but this upsets 304 * the soft update code, so we go ahead and write the new buffers. 305 * 306 * Allocate all indirect blocks and mark all of them as not 307 * needing to be copied. 308 */ 309 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 310 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 311 fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp); 312 if (error) 313 goto out; 314 bawrite(ibp); 315 } 316 /* 317 * Allocate copies for the superblock and its summary information. 318 */ 319 error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED, 320 0, &nbp); 321 if (error) 322 goto out; 323 bawrite(nbp); 324 blkno = fragstoblks(fs, fs->fs_csaddr); 325 len = howmany(fs->fs_cssize, fs->fs_bsize); 326 for (loc = 0; loc < len; loc++) { 327 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), 328 fs->fs_bsize, KERNCRED, 0, &nbp); 329 if (error) 330 goto out; 331 bawrite(nbp); 332 } 333 /* 334 * Allocate all cylinder group blocks. 335 */ 336 for (cg = 0; cg < fs->fs_ncg; cg++) { 337 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 338 fs->fs_bsize, KERNCRED, 0, &nbp); 339 if (error) 340 goto out; 341 bawrite(nbp); 342 } 343 /* 344 * Copy all the cylinder group maps. Although the 345 * filesystem is still active, we hope that only a few 346 * cylinder groups will change between now and when we 347 * suspend operations. Thus, we will be able to quickly 348 * touch up the few cylinder groups that changed during 349 * the suspension period. 350 */ 351 len = howmany(fs->fs_ncg, NBBY); 352 MALLOC(space, void *, len, M_DEVBUF, M_WAITOK|M_ZERO); 353 UFS_LOCK(ump); 354 fs->fs_active = space; 355 UFS_UNLOCK(ump); 356 for (cg = 0; cg < fs->fs_ncg; cg++) { 357 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 358 fs->fs_bsize, KERNCRED, 0, &nbp); 359 if (error) 360 goto out; 361 error = cgaccount(cg, vp, nbp, 1); 362 bawrite(nbp); 363 if (error) 364 goto out; 365 } 366 /* 367 * Change inode to snapshot type file. 368 */ 369 ip->i_flags |= SF_SNAPSHOT; 370 DIP_SET(ip, i_flags, ip->i_flags); 371 ip->i_flag |= IN_CHANGE | IN_UPDATE; 372 /* 373 * Ensure that the snapshot is completely on disk. 374 * Since we have marked it as a snapshot it is safe to 375 * unlock it as no process will be allowed to write to it. 376 */ 377 if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) 378 goto out; 379 VOP_UNLOCK(vp, 0, td); 380 /* 381 * All allocations are done, so we can now snapshot the system. 382 * 383 * Recind nice scheduling while running with the filesystem suspended. 384 */ 385 if (td->td_proc->p_nice > 0) { 386 PROC_LOCK(td->td_proc); 387 mtx_lock_spin(&sched_lock); 388 saved_nice = td->td_proc->p_nice; 389 sched_nice(td->td_proc, 0); 390 mtx_unlock_spin(&sched_lock); 391 PROC_UNLOCK(td->td_proc); 392 } 393 /* 394 * Suspend operation on filesystem. 395 */ 396 for (;;) { 397 vn_finished_write(wrtmp); 398 if ((error = vfs_write_suspend(vp->v_mount)) != 0) { 399 vn_start_write(NULL, &wrtmp, V_WAIT); 400 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 401 goto out; 402 } 403 if (mp->mnt_kern_flag & MNTK_SUSPENDED) 404 break; 405 vn_start_write(NULL, &wrtmp, V_WAIT); 406 } 407 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 408 if (collectsnapstats) 409 nanotime(&starttime); 410 /* 411 * First, copy all the cylinder group maps that have changed. 412 */ 413 for (cg = 0; cg < fs->fs_ncg; cg++) { 414 if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0) 415 continue; 416 redo++; 417 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 418 fs->fs_bsize, KERNCRED, 0, &nbp); 419 if (error) 420 goto out1; 421 error = cgaccount(cg, vp, nbp, 2); 422 bawrite(nbp); 423 if (error) 424 goto out1; 425 } 426 /* 427 * Grab a copy of the superblock and its summary information. 428 * We delay writing it until the suspension is released below. 429 */ 430 error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, 431 KERNCRED, &sbp); 432 if (error) { 433 brelse(sbp); 434 sbp = NULL; 435 goto out1; 436 } 437 loc = blkoff(fs, fs->fs_sblockloc); 438 copy_fs = (struct fs *)(sbp->b_data + loc); 439 bcopy(fs, copy_fs, fs->fs_sbsize); 440 if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) 441 copy_fs->fs_clean = 1; 442 size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE; 443 if (fs->fs_sbsize < size) 444 bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize); 445 size = blkroundup(fs, fs->fs_cssize); 446 if (fs->fs_contigsumsize > 0) 447 size += fs->fs_ncg * sizeof(int32_t); 448 space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 449 copy_fs->fs_csp = space; 450 bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize); 451 space = (char *)space + fs->fs_cssize; 452 loc = howmany(fs->fs_cssize, fs->fs_fsize); 453 i = fs->fs_frag - loc % fs->fs_frag; 454 len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize; 455 if (len > 0) { 456 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc), 457 len, KERNCRED, &bp)) != 0) { 458 brelse(bp); 459 free(copy_fs->fs_csp, M_UFSMNT); 460 bawrite(sbp); 461 sbp = NULL; 462 goto out1; 463 } 464 bcopy(bp->b_data, space, (u_int)len); 465 space = (char *)space + len; 466 bp->b_flags |= B_INVAL | B_NOCACHE; 467 brelse(bp); 468 } 469 if (fs->fs_contigsumsize > 0) { 470 copy_fs->fs_maxcluster = lp = space; 471 for (i = 0; i < fs->fs_ncg; i++) 472 *lp++ = fs->fs_contigsumsize; 473 } 474 /* 475 * We must check for active files that have been unlinked 476 * (e.g., with a zero link count). We have to expunge all 477 * trace of these files from the snapshot so that they are 478 * not reclaimed prematurely by fsck or unnecessarily dumped. 479 * We turn off the MNTK_SUSPENDED flag to avoid a panic from 480 * spec_strategy about writing on a suspended filesystem. 481 * Note that we skip unlinked snapshot files as they will 482 * be handled separately below. 483 * 484 * We also calculate the needed size for the snapshot list. 485 */ 486 snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + 487 FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */; 488 MNT_ILOCK(mp); 489 mp->mnt_kern_flag &= ~MNTK_SUSPENDED; 490 loop: 491 MNT_VNODE_FOREACH(xvp, mp, mvp) { 492 VI_LOCK(xvp); 493 MNT_IUNLOCK(mp); 494 if ((xvp->v_iflag & VI_DOOMED) || 495 (xvp->v_usecount == 0 && 496 (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) || 497 xvp->v_type == VNON || 498 (VTOI(xvp)->i_flags & SF_SNAPSHOT)) { 499 VI_UNLOCK(xvp); 500 MNT_ILOCK(mp); 501 continue; 502 } 503 /* 504 * We can skip parent directory vnode because it must have 505 * this snapshot file in it. 506 */ 507 if (xvp == nd.ni_dvp) { 508 VI_UNLOCK(xvp); 509 MNT_ILOCK(mp); 510 continue; 511 } 512 vholdl(xvp); 513 if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK, td) != 0) { 514 MNT_ILOCK(mp); 515 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 516 vdrop(xvp); 517 goto loop; 518 } 519 VI_LOCK(xvp); 520 if (xvp->v_usecount == 0 && 521 (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) { 522 VI_UNLOCK(xvp); 523 VOP_UNLOCK(xvp, 0, td); 524 vdrop(xvp); 525 MNT_ILOCK(mp); 526 continue; 527 } 528 VI_UNLOCK(xvp); 529 if (snapdebug) 530 vprint("ffs_snapshot: busy vnode", xvp); 531 if (VOP_GETATTR(xvp, &vat, td->td_ucred, td) == 0 && 532 vat.va_nlink > 0) { 533 VOP_UNLOCK(xvp, 0, td); 534 vdrop(xvp); 535 MNT_ILOCK(mp); 536 continue; 537 } 538 xp = VTOI(xvp); 539 if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) { 540 VOP_UNLOCK(xvp, 0, td); 541 vdrop(xvp); 542 MNT_ILOCK(mp); 543 continue; 544 } 545 /* 546 * If there is a fragment, clear it here. 547 */ 548 blkno = 0; 549 loc = howmany(xp->i_size, fs->fs_bsize) - 1; 550 if (loc < NDADDR) { 551 len = fragroundup(fs, blkoff(fs, xp->i_size)); 552 if (len != 0 && len < fs->fs_bsize) { 553 ffs_blkfree(ump, copy_fs, vp, 554 DIP(xp, i_db[loc]), len, xp->i_number); 555 blkno = DIP(xp, i_db[loc]); 556 DIP_SET(xp, i_db[loc], 0); 557 } 558 } 559 snaplistsize += 1; 560 if (xp->i_ump->um_fstype == UFS1) 561 error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 562 BLK_NOCOPY); 563 else 564 error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 565 BLK_NOCOPY); 566 if (blkno) 567 DIP_SET(xp, i_db[loc], blkno); 568 if (!error) 569 error = ffs_freefile(ump, copy_fs, vp, xp->i_number, 570 xp->i_mode); 571 VOP_UNLOCK(xvp, 0, td); 572 vdrop(xvp); 573 if (error) { 574 free(copy_fs->fs_csp, M_UFSMNT); 575 bawrite(sbp); 576 sbp = NULL; 577 MNT_VNODE_FOREACH_ABORT(mp, mvp); 578 goto out1; 579 } 580 MNT_ILOCK(mp); 581 } 582 MNT_IUNLOCK(mp); 583 vdrop(nd.ni_dvp); 584 /* 585 * If there already exist snapshots on this filesystem, grab a 586 * reference to their shared lock. If this is the first snapshot 587 * on this filesystem, we need to allocate a lock for the snapshots 588 * to share. In either case, acquire the snapshot lock and give 589 * up our original private lock. 590 */ 591 VI_LOCK(devvp); 592 sn = devvp->v_rdev->si_snapdata; 593 if (sn != NULL) { 594 xp = TAILQ_FIRST(&sn->sn_head); 595 VI_UNLOCK(devvp); 596 VI_LOCK(vp); 597 vp->v_vnlock = &sn->sn_lock; 598 } else { 599 VI_UNLOCK(devvp); 600 sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO); 601 TAILQ_INIT(&sn->sn_head); 602 lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT, 603 LK_CANRECURSE | LK_NOSHARE); 604 VI_LOCK(vp); 605 vp->v_vnlock = &sn->sn_lock; 606 mp_fixme("si_snapdata setting is racey."); 607 devvp->v_rdev->si_snapdata = sn; 608 xp = NULL; 609 } 610 lockmgr(vp->v_vnlock, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, 611 VI_MTX(vp), td); 612 transferlockers(&vp->v_lock, vp->v_vnlock); 613 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 614 /* 615 * If this is the first snapshot on this filesystem, then we need 616 * to allocate the space for the list of preallocated snapshot blocks. 617 * This list will be refined below, but this preliminary one will 618 * keep us out of deadlock until the full one is ready. 619 */ 620 if (xp == NULL) { 621 MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t), 622 M_UFSMNT, M_WAITOK); 623 blkp = &snapblklist[1]; 624 *blkp++ = lblkno(fs, fs->fs_sblockloc); 625 blkno = fragstoblks(fs, fs->fs_csaddr); 626 for (cg = 0; cg < fs->fs_ncg; cg++) { 627 if (fragstoblks(fs, cgtod(fs, cg) > blkno)) 628 break; 629 *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 630 } 631 len = howmany(fs->fs_cssize, fs->fs_bsize); 632 for (loc = 0; loc < len; loc++) 633 *blkp++ = blkno + loc; 634 for (; cg < fs->fs_ncg; cg++) 635 *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 636 snapblklist[0] = blkp - snapblklist; 637 VI_LOCK(devvp); 638 if (sn->sn_blklist != NULL) 639 panic("ffs_snapshot: non-empty list"); 640 sn->sn_blklist = snapblklist; 641 sn->sn_listsize = blkp - snapblklist; 642 VI_UNLOCK(devvp); 643 } 644 /* 645 * Record snapshot inode. Since this is the newest snapshot, 646 * it must be placed at the end of the list. 647 */ 648 VI_LOCK(devvp); 649 fs->fs_snapinum[snaploc] = ip->i_number; 650 if (ip->i_nextsnap.tqe_prev != 0) 651 panic("ffs_snapshot: %d already on list", ip->i_number); 652 TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 653 devvp->v_vflag |= VV_COPYONWRITE; 654 VI_UNLOCK(devvp); 655 ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); 656 vp->v_vflag |= VV_SYSTEM; 657 out1: 658 KASSERT(sn != (void *)0xdeadbeef, ("email phk@ and mckusick@")); 659 /* 660 * Resume operation on filesystem. 661 */ 662 vfs_write_resume(vp->v_mount); 663 vn_start_write(NULL, &wrtmp, V_WAIT); 664 if (collectsnapstats && starttime.tv_sec > 0) { 665 nanotime(&endtime); 666 timespecsub(&endtime, &starttime); 667 printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n", 668 vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec, 669 endtime.tv_nsec / 1000000, redo, fs->fs_ncg); 670 } 671 if (sbp == NULL) 672 goto out; 673 /* 674 * Copy allocation information from all the snapshots in 675 * this snapshot and then expunge them from its view. 676 */ 677 TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) { 678 if (xp == ip) 679 break; 680 if (xp->i_ump->um_fstype == UFS1) 681 error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, 682 BLK_SNAP); 683 else 684 error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, 685 BLK_SNAP); 686 if (error) { 687 fs->fs_snapinum[snaploc] = 0; 688 goto done; 689 } 690 } 691 /* 692 * Allocate space for the full list of preallocated snapshot blocks. 693 */ 694 MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t), 695 M_UFSMNT, M_WAITOK); 696 ip->i_snapblklist = &snapblklist[1]; 697 /* 698 * Expunge the blocks used by the snapshots from the set of 699 * blocks marked as used in the snapshot bitmaps. Also, collect 700 * the list of allocated blocks in i_snapblklist. 701 */ 702 if (ip->i_ump->um_fstype == UFS1) 703 error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); 704 else 705 error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); 706 if (error) { 707 fs->fs_snapinum[snaploc] = 0; 708 FREE(snapblklist, M_UFSMNT); 709 goto done; 710 } 711 if (snaplistsize < ip->i_snapblklist - snapblklist) 712 panic("ffs_snapshot: list too small"); 713 snaplistsize = ip->i_snapblklist - snapblklist; 714 snapblklist[0] = snaplistsize; 715 ip->i_snapblklist = 0; 716 /* 717 * Write out the list of allocated blocks to the end of the snapshot. 718 */ 719 auio.uio_iov = &aiov; 720 auio.uio_iovcnt = 1; 721 aiov.iov_base = (void *)snapblklist; 722 aiov.iov_len = snaplistsize * sizeof(daddr_t); 723 auio.uio_resid = aiov.iov_len;; 724 auio.uio_offset = ip->i_size; 725 auio.uio_segflg = UIO_SYSSPACE; 726 auio.uio_rw = UIO_WRITE; 727 auio.uio_td = td; 728 if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 729 fs->fs_snapinum[snaploc] = 0; 730 FREE(snapblklist, M_UFSMNT); 731 goto done; 732 } 733 /* 734 * Write the superblock and its summary information 735 * to the snapshot. 736 */ 737 blkno = fragstoblks(fs, fs->fs_csaddr); 738 len = howmany(fs->fs_cssize, fs->fs_bsize); 739 space = copy_fs->fs_csp; 740 for (loc = 0; loc < len; loc++) { 741 error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp); 742 if (error) { 743 brelse(nbp); 744 fs->fs_snapinum[snaploc] = 0; 745 FREE(snapblklist, M_UFSMNT); 746 goto done; 747 } 748 bcopy(space, nbp->b_data, fs->fs_bsize); 749 space = (char *)space + fs->fs_bsize; 750 bawrite(nbp); 751 } 752 /* 753 * As this is the newest list, it is the most inclusive, so 754 * should replace the previous list. 755 */ 756 VI_LOCK(devvp); 757 space = sn->sn_blklist; 758 sn->sn_blklist = snapblklist; 759 sn->sn_listsize = snaplistsize; 760 VI_UNLOCK(devvp); 761 if (space != NULL) 762 FREE(space, M_UFSMNT); 763 /* 764 * If another process is currently writing the buffer containing 765 * the inode for this snapshot then a deadlock can occur. Drop 766 * the snapshot lock until the buffer has been written. 767 */ 768 VOP_UNLOCK(vp, 0, td); 769 (void) bread(ip->i_devvp, 770 fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 771 (int) fs->fs_bsize, NOCRED, &nbp); 772 brelse(nbp); 773 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 774 done: 775 FREE(copy_fs->fs_csp, M_UFSMNT); 776 bawrite(sbp); 777 out: 778 if (saved_nice > 0) { 779 PROC_LOCK(td->td_proc); 780 mtx_lock_spin(&sched_lock); 781 sched_nice(td->td_proc, saved_nice); 782 mtx_unlock_spin(&sched_lock); 783 PROC_UNLOCK(td->td_proc); 784 } 785 UFS_LOCK(ump); 786 if (fs->fs_active != 0) { 787 FREE(fs->fs_active, M_DEVBUF); 788 fs->fs_active = 0; 789 } 790 UFS_UNLOCK(ump); 791 mp->mnt_flag = flag; 792 if (error) 793 (void) ffs_truncate(vp, (off_t)0, 0, NOCRED, td); 794 (void) ffs_syncvnode(vp, MNT_WAIT); 795 if (error) 796 vput(vp); 797 else 798 VOP_UNLOCK(vp, 0, td); 799 vn_finished_write(wrtmp); 800 process_deferred_inactive(mp); 801 return (error); 802 } 803 804 /* 805 * Copy a cylinder group map. All the unallocated blocks are marked 806 * BLK_NOCOPY so that the snapshot knows that it need not copy them 807 * if they are later written. If passno is one, then this is a first 808 * pass, so only setting needs to be done. If passno is 2, then this 809 * is a revision to a previous pass which must be undone as the 810 * replacement pass is done. 811 */ 812 static int 813 cgaccount(cg, vp, nbp, passno) 814 int cg; 815 struct vnode *vp; 816 struct buf *nbp; 817 int passno; 818 { 819 struct buf *bp, *ibp; 820 struct inode *ip; 821 struct cg *cgp; 822 struct fs *fs; 823 ufs2_daddr_t base, numblks; 824 int error, len, loc, indiroff; 825 826 ip = VTOI(vp); 827 fs = ip->i_fs; 828 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 829 (int)fs->fs_cgsize, KERNCRED, &bp); 830 if (error) { 831 brelse(bp); 832 return (error); 833 } 834 cgp = (struct cg *)bp->b_data; 835 if (!cg_chkmagic(cgp)) { 836 brelse(bp); 837 return (EIO); 838 } 839 UFS_LOCK(ip->i_ump); 840 ACTIVESET(fs, cg); 841 UFS_UNLOCK(ip->i_ump); 842 bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize); 843 if (fs->fs_cgsize < fs->fs_bsize) 844 bzero(&nbp->b_data[fs->fs_cgsize], 845 fs->fs_bsize - fs->fs_cgsize); 846 cgp = (struct cg *)nbp->b_data; 847 bqrelse(bp); 848 if (passno == 2) 849 nbp->b_flags |= B_VALIDSUSPWRT; 850 numblks = howmany(fs->fs_size, fs->fs_frag); 851 len = howmany(fs->fs_fpg, fs->fs_frag); 852 base = cgbase(fs, cg) / fs->fs_frag; 853 if (base + len >= numblks) 854 len = numblks - base - 1; 855 loc = 0; 856 if (base < NDADDR) { 857 for ( ; loc < NDADDR; loc++) { 858 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 859 DIP_SET(ip, i_db[loc], BLK_NOCOPY); 860 else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY) 861 DIP_SET(ip, i_db[loc], 0); 862 else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY) 863 panic("ffs_snapshot: lost direct block"); 864 } 865 } 866 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), 867 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 868 if (error) { 869 return (error); 870 } 871 indiroff = (base + loc - NDADDR) % NINDIR(fs); 872 for ( ; loc < len; loc++, indiroff++) { 873 if (indiroff >= NINDIR(fs)) { 874 if (passno == 2) 875 ibp->b_flags |= B_VALIDSUSPWRT; 876 bawrite(ibp); 877 error = UFS_BALLOC(vp, 878 lblktosize(fs, (off_t)(base + loc)), 879 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 880 if (error) { 881 return (error); 882 } 883 indiroff = 0; 884 } 885 if (ip->i_ump->um_fstype == UFS1) { 886 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 887 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 888 BLK_NOCOPY; 889 else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data)) 890 [indiroff] == BLK_NOCOPY) 891 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0; 892 else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data)) 893 [indiroff] == BLK_NOCOPY) 894 panic("ffs_snapshot: lost indirect block"); 895 continue; 896 } 897 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 898 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY; 899 else if (passno == 2 && 900 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 901 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0; 902 else if (passno == 1 && 903 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 904 panic("ffs_snapshot: lost indirect block"); 905 } 906 if (passno == 2) 907 ibp->b_flags |= B_VALIDSUSPWRT; 908 bdwrite(ibp); 909 return (0); 910 } 911 912 /* 913 * Before expunging a snapshot inode, note all the 914 * blocks that it claims with BLK_SNAP so that fsck will 915 * be able to account for those blocks properly and so 916 * that this snapshot knows that it need not copy them 917 * if the other snapshot holding them is freed. This code 918 * is reproduced once each for UFS1 and UFS2. 919 */ 920 static int 921 expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) 922 struct vnode *snapvp; 923 struct inode *cancelip; 924 struct fs *fs; 925 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 926 struct fs *, ufs_lbn_t, int); 927 int expungetype; 928 { 929 int i, error, indiroff; 930 ufs_lbn_t lbn, rlbn; 931 ufs2_daddr_t len, blkno, numblks, blksperindir; 932 struct ufs1_dinode *dip; 933 struct thread *td = curthread; 934 struct buf *bp; 935 936 /* 937 * Prepare to expunge the inode. If its inode block has not 938 * yet been copied, then allocate and fill the copy. 939 */ 940 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 941 blkno = 0; 942 if (lbn < NDADDR) { 943 blkno = VTOI(snapvp)->i_din1->di_db[lbn]; 944 } else { 945 td->td_pflags |= TDP_COWINPROGRESS; 946 error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 947 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 948 td->td_pflags &= ~TDP_COWINPROGRESS; 949 if (error) 950 return (error); 951 indiroff = (lbn - NDADDR) % NINDIR(fs); 952 blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; 953 bqrelse(bp); 954 } 955 if (blkno != 0) { 956 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 957 return (error); 958 } else { 959 error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 960 fs->fs_bsize, KERNCRED, 0, &bp); 961 if (error) 962 return (error); 963 if ((error = readblock(snapvp, bp, lbn)) != 0) 964 return (error); 965 } 966 /* 967 * Set a snapshot inode to be a zero length file, regular files 968 * to be completely unallocated. 969 */ 970 dip = (struct ufs1_dinode *)bp->b_data + 971 ino_to_fsbo(fs, cancelip->i_number); 972 if (expungetype == BLK_NOCOPY) 973 dip->di_mode = 0; 974 dip->di_size = 0; 975 dip->di_blocks = 0; 976 dip->di_flags &= ~SF_SNAPSHOT; 977 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); 978 bdwrite(bp); 979 /* 980 * Now go through and expunge all the blocks in the file 981 * using the function requested. 982 */ 983 numblks = howmany(cancelip->i_size, fs->fs_bsize); 984 if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0], 985 &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype))) 986 return (error); 987 if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0], 988 &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype))) 989 return (error); 990 blksperindir = 1; 991 lbn = -NDADDR; 992 len = numblks - NDADDR; 993 rlbn = NDADDR; 994 for (i = 0; len > 0 && i < NIADDR; i++) { 995 error = indiracct_ufs1(snapvp, ITOV(cancelip), i, 996 cancelip->i_din1->di_ib[i], lbn, rlbn, len, 997 blksperindir, fs, acctfunc, expungetype); 998 if (error) 999 return (error); 1000 blksperindir *= NINDIR(fs); 1001 lbn -= blksperindir + 1; 1002 len -= blksperindir; 1003 rlbn += blksperindir; 1004 } 1005 return (0); 1006 } 1007 1008 /* 1009 * Descend an indirect block chain for vnode cancelvp accounting for all 1010 * its indirect blocks in snapvp. 1011 */ 1012 static int 1013 indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 1014 blksperindir, fs, acctfunc, expungetype) 1015 struct vnode *snapvp; 1016 struct vnode *cancelvp; 1017 int level; 1018 ufs1_daddr_t blkno; 1019 ufs_lbn_t lbn; 1020 ufs_lbn_t rlbn; 1021 ufs_lbn_t remblks; 1022 ufs_lbn_t blksperindir; 1023 struct fs *fs; 1024 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 1025 struct fs *, ufs_lbn_t, int); 1026 int expungetype; 1027 { 1028 int error, num, i; 1029 ufs_lbn_t subblksperindir; 1030 struct indir indirs[NIADDR + 2]; 1031 ufs1_daddr_t last, *bap; 1032 struct buf *bp; 1033 1034 if (blkno == 0) { 1035 if (expungetype == BLK_NOCOPY) 1036 return (0); 1037 panic("indiracct_ufs1: missing indir"); 1038 } 1039 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1040 return (error); 1041 if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1042 panic("indiracct_ufs1: botched params"); 1043 /* 1044 * We have to expand bread here since it will deadlock looking 1045 * up the block number for any blocks that are not in the cache. 1046 */ 1047 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 1048 bp->b_blkno = fsbtodb(fs, blkno); 1049 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1050 (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 1051 brelse(bp); 1052 return (error); 1053 } 1054 /* 1055 * Account for the block pointers in this indirect block. 1056 */ 1057 last = howmany(remblks, blksperindir); 1058 if (last > NINDIR(fs)) 1059 last = NINDIR(fs); 1060 MALLOC(bap, ufs1_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); 1061 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1062 bqrelse(bp); 1063 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1064 level == 0 ? rlbn : -1, expungetype); 1065 if (error || level == 0) 1066 goto out; 1067 /* 1068 * Account for the block pointers in each of the indirect blocks 1069 * in the levels below us. 1070 */ 1071 subblksperindir = blksperindir / NINDIR(fs); 1072 for (lbn++, level--, i = 0; i < last; i++) { 1073 error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn, 1074 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1075 if (error) 1076 goto out; 1077 rlbn += blksperindir; 1078 lbn -= blksperindir; 1079 remblks -= blksperindir; 1080 } 1081 out: 1082 FREE(bap, M_DEVBUF); 1083 return (error); 1084 } 1085 1086 /* 1087 * Do both snap accounting and map accounting. 1088 */ 1089 static int 1090 fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1091 struct vnode *vp; 1092 ufs1_daddr_t *oldblkp, *lastblkp; 1093 struct fs *fs; 1094 ufs_lbn_t lblkno; 1095 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 1096 { 1097 int error; 1098 1099 if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1100 return (error); 1101 return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1102 } 1103 1104 /* 1105 * Identify a set of blocks allocated in a snapshot inode. 1106 */ 1107 static int 1108 snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1109 struct vnode *vp; 1110 ufs1_daddr_t *oldblkp, *lastblkp; 1111 struct fs *fs; 1112 ufs_lbn_t lblkno; 1113 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 1114 { 1115 struct inode *ip = VTOI(vp); 1116 ufs1_daddr_t blkno, *blkp; 1117 ufs_lbn_t lbn; 1118 struct buf *ibp; 1119 int error; 1120 1121 for ( ; oldblkp < lastblkp; oldblkp++) { 1122 blkno = *oldblkp; 1123 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 1124 continue; 1125 lbn = fragstoblks(fs, blkno); 1126 if (lbn < NDADDR) { 1127 blkp = &ip->i_din1->di_db[lbn]; 1128 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1129 } else { 1130 error = ffs_balloc_ufs1(vp, lblktosize(fs, (off_t)lbn), 1131 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1132 if (error) 1133 return (error); 1134 blkp = &((ufs1_daddr_t *)(ibp->b_data)) 1135 [(lbn - NDADDR) % NINDIR(fs)]; 1136 } 1137 /* 1138 * If we are expunging a snapshot vnode and we 1139 * find a block marked BLK_NOCOPY, then it is 1140 * one that has been allocated to this snapshot after 1141 * we took our current snapshot and can be ignored. 1142 */ 1143 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 1144 if (lbn >= NDADDR) 1145 brelse(ibp); 1146 } else { 1147 if (*blkp != 0) 1148 panic("snapacct_ufs1: bad block"); 1149 *blkp = expungetype; 1150 if (lbn >= NDADDR) 1151 bdwrite(ibp); 1152 } 1153 } 1154 return (0); 1155 } 1156 1157 /* 1158 * Account for a set of blocks allocated in a snapshot inode. 1159 */ 1160 static int 1161 mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1162 struct vnode *vp; 1163 ufs1_daddr_t *oldblkp, *lastblkp; 1164 struct fs *fs; 1165 ufs_lbn_t lblkno; 1166 int expungetype; 1167 { 1168 ufs1_daddr_t blkno; 1169 struct inode *ip; 1170 ino_t inum; 1171 int acctit; 1172 1173 ip = VTOI(vp); 1174 inum = ip->i_number; 1175 if (lblkno == -1) 1176 acctit = 0; 1177 else 1178 acctit = 1; 1179 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1180 blkno = *oldblkp; 1181 if (blkno == 0 || blkno == BLK_NOCOPY) 1182 continue; 1183 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1184 *ip->i_snapblklist++ = lblkno; 1185 if (blkno == BLK_SNAP) 1186 blkno = blkstofrags(fs, lblkno); 1187 ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum); 1188 } 1189 return (0); 1190 } 1191 1192 /* 1193 * Before expunging a snapshot inode, note all the 1194 * blocks that it claims with BLK_SNAP so that fsck will 1195 * be able to account for those blocks properly and so 1196 * that this snapshot knows that it need not copy them 1197 * if the other snapshot holding them is freed. This code 1198 * is reproduced once each for UFS1 and UFS2. 1199 */ 1200 static int 1201 expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) 1202 struct vnode *snapvp; 1203 struct inode *cancelip; 1204 struct fs *fs; 1205 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 1206 struct fs *, ufs_lbn_t, int); 1207 int expungetype; 1208 { 1209 int i, error, indiroff; 1210 ufs_lbn_t lbn, rlbn; 1211 ufs2_daddr_t len, blkno, numblks, blksperindir; 1212 struct ufs2_dinode *dip; 1213 struct thread *td = curthread; 1214 struct buf *bp; 1215 1216 /* 1217 * Prepare to expunge the inode. If its inode block has not 1218 * yet been copied, then allocate and fill the copy. 1219 */ 1220 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 1221 blkno = 0; 1222 if (lbn < NDADDR) { 1223 blkno = VTOI(snapvp)->i_din2->di_db[lbn]; 1224 } else { 1225 td->td_pflags |= TDP_COWINPROGRESS; 1226 error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 1227 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 1228 td->td_pflags &= ~TDP_COWINPROGRESS; 1229 if (error) 1230 return (error); 1231 indiroff = (lbn - NDADDR) % NINDIR(fs); 1232 blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; 1233 bqrelse(bp); 1234 } 1235 if (blkno != 0) { 1236 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1237 return (error); 1238 } else { 1239 error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 1240 fs->fs_bsize, KERNCRED, 0, &bp); 1241 if (error) 1242 return (error); 1243 if ((error = readblock(snapvp, bp, lbn)) != 0) 1244 return (error); 1245 } 1246 /* 1247 * Set a snapshot inode to be a zero length file, regular files 1248 * to be completely unallocated. 1249 */ 1250 dip = (struct ufs2_dinode *)bp->b_data + 1251 ino_to_fsbo(fs, cancelip->i_number); 1252 if (expungetype == BLK_NOCOPY) 1253 dip->di_mode = 0; 1254 dip->di_size = 0; 1255 dip->di_blocks = 0; 1256 dip->di_flags &= ~SF_SNAPSHOT; 1257 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); 1258 bdwrite(bp); 1259 /* 1260 * Now go through and expunge all the blocks in the file 1261 * using the function requested. 1262 */ 1263 numblks = howmany(cancelip->i_size, fs->fs_bsize); 1264 if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0], 1265 &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype))) 1266 return (error); 1267 if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0], 1268 &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype))) 1269 return (error); 1270 blksperindir = 1; 1271 lbn = -NDADDR; 1272 len = numblks - NDADDR; 1273 rlbn = NDADDR; 1274 for (i = 0; len > 0 && i < NIADDR; i++) { 1275 error = indiracct_ufs2(snapvp, ITOV(cancelip), i, 1276 cancelip->i_din2->di_ib[i], lbn, rlbn, len, 1277 blksperindir, fs, acctfunc, expungetype); 1278 if (error) 1279 return (error); 1280 blksperindir *= NINDIR(fs); 1281 lbn -= blksperindir + 1; 1282 len -= blksperindir; 1283 rlbn += blksperindir; 1284 } 1285 return (0); 1286 } 1287 1288 /* 1289 * Descend an indirect block chain for vnode cancelvp accounting for all 1290 * its indirect blocks in snapvp. 1291 */ 1292 static int 1293 indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 1294 blksperindir, fs, acctfunc, expungetype) 1295 struct vnode *snapvp; 1296 struct vnode *cancelvp; 1297 int level; 1298 ufs2_daddr_t blkno; 1299 ufs_lbn_t lbn; 1300 ufs_lbn_t rlbn; 1301 ufs_lbn_t remblks; 1302 ufs_lbn_t blksperindir; 1303 struct fs *fs; 1304 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 1305 struct fs *, ufs_lbn_t, int); 1306 int expungetype; 1307 { 1308 int error, num, i; 1309 ufs_lbn_t subblksperindir; 1310 struct indir indirs[NIADDR + 2]; 1311 ufs2_daddr_t last, *bap; 1312 struct buf *bp; 1313 1314 if (blkno == 0) { 1315 if (expungetype == BLK_NOCOPY) 1316 return (0); 1317 panic("indiracct_ufs2: missing indir"); 1318 } 1319 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1320 return (error); 1321 if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1322 panic("indiracct_ufs2: botched params"); 1323 /* 1324 * We have to expand bread here since it will deadlock looking 1325 * up the block number for any blocks that are not in the cache. 1326 */ 1327 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 1328 bp->b_blkno = fsbtodb(fs, blkno); 1329 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1330 (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 1331 brelse(bp); 1332 return (error); 1333 } 1334 /* 1335 * Account for the block pointers in this indirect block. 1336 */ 1337 last = howmany(remblks, blksperindir); 1338 if (last > NINDIR(fs)) 1339 last = NINDIR(fs); 1340 MALLOC(bap, ufs2_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); 1341 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1342 bqrelse(bp); 1343 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1344 level == 0 ? rlbn : -1, expungetype); 1345 if (error || level == 0) 1346 goto out; 1347 /* 1348 * Account for the block pointers in each of the indirect blocks 1349 * in the levels below us. 1350 */ 1351 subblksperindir = blksperindir / NINDIR(fs); 1352 for (lbn++, level--, i = 0; i < last; i++) { 1353 error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn, 1354 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1355 if (error) 1356 goto out; 1357 rlbn += blksperindir; 1358 lbn -= blksperindir; 1359 remblks -= blksperindir; 1360 } 1361 out: 1362 FREE(bap, M_DEVBUF); 1363 return (error); 1364 } 1365 1366 /* 1367 * Do both snap accounting and map accounting. 1368 */ 1369 static int 1370 fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1371 struct vnode *vp; 1372 ufs2_daddr_t *oldblkp, *lastblkp; 1373 struct fs *fs; 1374 ufs_lbn_t lblkno; 1375 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 1376 { 1377 int error; 1378 1379 if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1380 return (error); 1381 return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1382 } 1383 1384 /* 1385 * Identify a set of blocks allocated in a snapshot inode. 1386 */ 1387 static int 1388 snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1389 struct vnode *vp; 1390 ufs2_daddr_t *oldblkp, *lastblkp; 1391 struct fs *fs; 1392 ufs_lbn_t lblkno; 1393 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 1394 { 1395 struct inode *ip = VTOI(vp); 1396 ufs2_daddr_t blkno, *blkp; 1397 ufs_lbn_t lbn; 1398 struct buf *ibp; 1399 int error; 1400 1401 for ( ; oldblkp < lastblkp; oldblkp++) { 1402 blkno = *oldblkp; 1403 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 1404 continue; 1405 lbn = fragstoblks(fs, blkno); 1406 if (lbn < NDADDR) { 1407 blkp = &ip->i_din2->di_db[lbn]; 1408 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1409 } else { 1410 error = ffs_balloc_ufs2(vp, lblktosize(fs, (off_t)lbn), 1411 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1412 if (error) 1413 return (error); 1414 blkp = &((ufs2_daddr_t *)(ibp->b_data)) 1415 [(lbn - NDADDR) % NINDIR(fs)]; 1416 } 1417 /* 1418 * If we are expunging a snapshot vnode and we 1419 * find a block marked BLK_NOCOPY, then it is 1420 * one that has been allocated to this snapshot after 1421 * we took our current snapshot and can be ignored. 1422 */ 1423 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 1424 if (lbn >= NDADDR) 1425 brelse(ibp); 1426 } else { 1427 if (*blkp != 0) 1428 panic("snapacct_ufs2: bad block"); 1429 *blkp = expungetype; 1430 if (lbn >= NDADDR) 1431 bdwrite(ibp); 1432 } 1433 } 1434 return (0); 1435 } 1436 1437 /* 1438 * Account for a set of blocks allocated in a snapshot inode. 1439 */ 1440 static int 1441 mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1442 struct vnode *vp; 1443 ufs2_daddr_t *oldblkp, *lastblkp; 1444 struct fs *fs; 1445 ufs_lbn_t lblkno; 1446 int expungetype; 1447 { 1448 ufs2_daddr_t blkno; 1449 struct inode *ip; 1450 ino_t inum; 1451 int acctit; 1452 1453 ip = VTOI(vp); 1454 inum = ip->i_number; 1455 if (lblkno == -1) 1456 acctit = 0; 1457 else 1458 acctit = 1; 1459 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1460 blkno = *oldblkp; 1461 if (blkno == 0 || blkno == BLK_NOCOPY) 1462 continue; 1463 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1464 *ip->i_snapblklist++ = lblkno; 1465 if (blkno == BLK_SNAP) 1466 blkno = blkstofrags(fs, lblkno); 1467 ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum); 1468 } 1469 return (0); 1470 } 1471 1472 /* 1473 * Decrement extra reference on snapshot when last name is removed. 1474 * It will not be freed until the last open reference goes away. 1475 */ 1476 void 1477 ffs_snapgone(ip) 1478 struct inode *ip; 1479 { 1480 struct inode *xp; 1481 struct fs *fs; 1482 int snaploc; 1483 struct snapdata *sn; 1484 struct ufsmount *ump; 1485 1486 /* 1487 * Find snapshot in incore list. 1488 */ 1489 xp = NULL; 1490 sn = ip->i_devvp->v_rdev->si_snapdata; 1491 if (sn != NULL) 1492 TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) 1493 if (xp == ip) 1494 break; 1495 if (xp != NULL) 1496 vrele(ITOV(ip)); 1497 else if (snapdebug) 1498 printf("ffs_snapgone: lost snapshot vnode %d\n", 1499 ip->i_number); 1500 /* 1501 * Delete snapshot inode from superblock. Keep list dense. 1502 */ 1503 fs = ip->i_fs; 1504 ump = ip->i_ump; 1505 UFS_LOCK(ump); 1506 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 1507 if (fs->fs_snapinum[snaploc] == ip->i_number) 1508 break; 1509 if (snaploc < FSMAXSNAP) { 1510 for (snaploc++; snaploc < FSMAXSNAP; snaploc++) { 1511 if (fs->fs_snapinum[snaploc] == 0) 1512 break; 1513 fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc]; 1514 } 1515 fs->fs_snapinum[snaploc - 1] = 0; 1516 } 1517 UFS_UNLOCK(ump); 1518 } 1519 1520 /* 1521 * Prepare a snapshot file for being removed. 1522 */ 1523 void 1524 ffs_snapremove(vp) 1525 struct vnode *vp; 1526 { 1527 struct inode *ip; 1528 struct vnode *devvp; 1529 struct lock *lkp; 1530 struct buf *ibp; 1531 struct fs *fs; 1532 struct thread *td = curthread; 1533 ufs2_daddr_t numblks, blkno, dblk, *snapblklist; 1534 int error, loc, last; 1535 struct snapdata *sn; 1536 1537 ip = VTOI(vp); 1538 fs = ip->i_fs; 1539 devvp = ip->i_devvp; 1540 sn = devvp->v_rdev->si_snapdata; 1541 /* 1542 * If active, delete from incore list (this snapshot may 1543 * already have been in the process of being deleted, so 1544 * would not have been active). 1545 * 1546 * Clear copy-on-write flag if last snapshot. 1547 */ 1548 if (ip->i_nextsnap.tqe_prev != 0) { 1549 lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL, td); 1550 VI_LOCK(devvp); 1551 TAILQ_REMOVE(&sn->sn_head, ip, i_nextsnap); 1552 ip->i_nextsnap.tqe_prev = 0; 1553 lkp = vp->v_vnlock; 1554 vp->v_vnlock = &vp->v_lock; 1555 lockmgr(lkp, LK_RELEASE, NULL, td); 1556 if (TAILQ_FIRST(&sn->sn_head) != 0) { 1557 VI_UNLOCK(devvp); 1558 } else { 1559 snapblklist = sn->sn_blklist; 1560 sn->sn_blklist = 0; 1561 sn->sn_listsize = 0; 1562 devvp->v_rdev->si_snapdata = NULL; 1563 devvp->v_vflag &= ~VV_COPYONWRITE; 1564 lockmgr(lkp, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp), td); 1565 lockmgr(lkp, LK_RELEASE, NULL, td); 1566 lockdestroy(lkp); 1567 free(sn, M_UFSMNT); 1568 FREE(snapblklist, M_UFSMNT); 1569 } 1570 } 1571 /* 1572 * Clear all BLK_NOCOPY fields. Pass any block claims to other 1573 * snapshots that want them (see ffs_snapblkfree below). 1574 */ 1575 for (blkno = 1; blkno < NDADDR; blkno++) { 1576 dblk = DIP(ip, i_db[blkno]); 1577 if (dblk == 0) 1578 continue; 1579 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1580 DIP_SET(ip, i_db[blkno], 0); 1581 else if ((dblk == blkstofrags(fs, blkno) && 1582 ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, 1583 ip->i_number))) { 1584 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - 1585 btodb(fs->fs_bsize)); 1586 DIP_SET(ip, i_db[blkno], 0); 1587 } 1588 } 1589 numblks = howmany(ip->i_size, fs->fs_bsize); 1590 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 1591 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 1592 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1593 if (error) 1594 continue; 1595 if (fs->fs_size - blkno > NINDIR(fs)) 1596 last = NINDIR(fs); 1597 else 1598 last = fs->fs_size - blkno; 1599 for (loc = 0; loc < last; loc++) { 1600 if (ip->i_ump->um_fstype == UFS1) { 1601 dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc]; 1602 if (dblk == 0) 1603 continue; 1604 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1605 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1606 else if ((dblk == blkstofrags(fs, blkno) && 1607 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1608 fs->fs_bsize, ip->i_number))) { 1609 ip->i_din1->di_blocks -= 1610 btodb(fs->fs_bsize); 1611 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1612 } 1613 continue; 1614 } 1615 dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc]; 1616 if (dblk == 0) 1617 continue; 1618 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1619 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1620 else if ((dblk == blkstofrags(fs, blkno) && 1621 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1622 fs->fs_bsize, ip->i_number))) { 1623 ip->i_din2->di_blocks -= btodb(fs->fs_bsize); 1624 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1625 } 1626 } 1627 bawrite(ibp); 1628 } 1629 /* 1630 * Clear snapshot flag and drop reference. 1631 */ 1632 ip->i_flags &= ~SF_SNAPSHOT; 1633 DIP_SET(ip, i_flags, ip->i_flags); 1634 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1635 } 1636 1637 /* 1638 * Notification that a block is being freed. Return zero if the free 1639 * should be allowed to proceed. Return non-zero if the snapshot file 1640 * wants to claim the block. The block will be claimed if it is an 1641 * uncopied part of one of the snapshots. It will be freed if it is 1642 * either a BLK_NOCOPY or has already been copied in all of the snapshots. 1643 * If a fragment is being freed, then all snapshots that care about 1644 * it must make a copy since a snapshot file can only claim full sized 1645 * blocks. Note that if more than one snapshot file maps the block, 1646 * we can pick one at random to claim it. Since none of the snapshots 1647 * can change, we are assurred that they will all see the same unmodified 1648 * image. When deleting a snapshot file (see ffs_snapremove above), we 1649 * must push any of these claimed blocks to one of the other snapshots 1650 * that maps it. These claimed blocks are easily identified as they will 1651 * have a block number equal to their logical block number within the 1652 * snapshot. A copied block can never have this property because they 1653 * must always have been allocated from a BLK_NOCOPY location. 1654 */ 1655 int 1656 ffs_snapblkfree(fs, devvp, bno, size, inum) 1657 struct fs *fs; 1658 struct vnode *devvp; 1659 ufs2_daddr_t bno; 1660 long size; 1661 ino_t inum; 1662 { 1663 struct buf *ibp, *cbp, *savedcbp = 0; 1664 struct thread *td = curthread; 1665 struct inode *ip; 1666 struct vnode *vp = NULL; 1667 ufs_lbn_t lbn; 1668 ufs2_daddr_t blkno; 1669 int indiroff = 0, error = 0, claimedblk = 0; 1670 struct snapdata *sn; 1671 1672 lbn = fragstoblks(fs, bno); 1673 retry: 1674 VI_LOCK(devvp); 1675 sn = devvp->v_rdev->si_snapdata; 1676 if (sn == NULL) { 1677 VI_UNLOCK(devvp); 1678 return (0); 1679 } 1680 if (lockmgr(&sn->sn_lock, 1681 LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 1682 VI_MTX(devvp), td) != 0) 1683 goto retry; 1684 TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 1685 vp = ITOV(ip); 1686 /* 1687 * Lookup block being written. 1688 */ 1689 if (lbn < NDADDR) { 1690 blkno = DIP(ip, i_db[lbn]); 1691 } else { 1692 td->td_pflags |= TDP_COWINPROGRESS; 1693 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1694 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1695 td->td_pflags &= ~TDP_COWINPROGRESS; 1696 if (error) 1697 break; 1698 indiroff = (lbn - NDADDR) % NINDIR(fs); 1699 if (ip->i_ump->um_fstype == UFS1) 1700 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 1701 else 1702 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 1703 } 1704 /* 1705 * Check to see if block needs to be copied. 1706 */ 1707 if (blkno == 0) { 1708 /* 1709 * A block that we map is being freed. If it has not 1710 * been claimed yet, we will claim or copy it (below). 1711 */ 1712 claimedblk = 1; 1713 } else if (blkno == BLK_SNAP) { 1714 /* 1715 * No previous snapshot claimed the block, 1716 * so it will be freed and become a BLK_NOCOPY 1717 * (don't care) for us. 1718 */ 1719 if (claimedblk) 1720 panic("snapblkfree: inconsistent block type"); 1721 if (lbn < NDADDR) { 1722 DIP_SET(ip, i_db[lbn], BLK_NOCOPY); 1723 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1724 } else if (ip->i_ump->um_fstype == UFS1) { 1725 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 1726 BLK_NOCOPY; 1727 bdwrite(ibp); 1728 } else { 1729 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 1730 BLK_NOCOPY; 1731 bdwrite(ibp); 1732 } 1733 continue; 1734 } else /* BLK_NOCOPY or default */ { 1735 /* 1736 * If the snapshot has already copied the block 1737 * (default), or does not care about the block, 1738 * it is not needed. 1739 */ 1740 if (lbn >= NDADDR) 1741 bqrelse(ibp); 1742 continue; 1743 } 1744 /* 1745 * If this is a full size block, we will just grab it 1746 * and assign it to the snapshot inode. Otherwise we 1747 * will proceed to copy it. See explanation for this 1748 * routine as to why only a single snapshot needs to 1749 * claim this block. 1750 */ 1751 if (size == fs->fs_bsize) { 1752 #ifdef DEBUG 1753 if (snapdebug) 1754 printf("%s %d lbn %jd from inum %d\n", 1755 "Grabonremove: snapino", ip->i_number, 1756 (intmax_t)lbn, inum); 1757 #endif 1758 if (lbn < NDADDR) { 1759 DIP_SET(ip, i_db[lbn], bno); 1760 } else if (ip->i_ump->um_fstype == UFS1) { 1761 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno; 1762 bdwrite(ibp); 1763 } else { 1764 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno; 1765 bdwrite(ibp); 1766 } 1767 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size)); 1768 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1769 lockmgr(vp->v_vnlock, LK_RELEASE, NULL, td); 1770 return (1); 1771 } 1772 if (lbn >= NDADDR) 1773 bqrelse(ibp); 1774 /* 1775 * Allocate the block into which to do the copy. Note that this 1776 * allocation will never require any additional allocations for 1777 * the snapshot inode. 1778 */ 1779 td->td_pflags |= TDP_COWINPROGRESS; 1780 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1781 fs->fs_bsize, KERNCRED, 0, &cbp); 1782 td->td_pflags &= ~TDP_COWINPROGRESS; 1783 if (error) 1784 break; 1785 #ifdef DEBUG 1786 if (snapdebug) 1787 printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n", 1788 "Copyonremove: snapino ", ip->i_number, 1789 (intmax_t)lbn, "for inum", inum, size, 1790 (intmax_t)cbp->b_blkno); 1791 #endif 1792 /* 1793 * If we have already read the old block contents, then 1794 * simply copy them to the new block. Note that we need 1795 * to synchronously write snapshots that have not been 1796 * unlinked, and hence will be visible after a crash, 1797 * to ensure their integrity. 1798 */ 1799 if (savedcbp != 0) { 1800 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 1801 bawrite(cbp); 1802 if (dopersistence && ip->i_effnlink > 0) 1803 (void) ffs_syncvnode(vp, MNT_WAIT); 1804 continue; 1805 } 1806 /* 1807 * Otherwise, read the old block contents into the buffer. 1808 */ 1809 if ((error = readblock(vp, cbp, lbn)) != 0) { 1810 bzero(cbp->b_data, fs->fs_bsize); 1811 bawrite(cbp); 1812 if (dopersistence && ip->i_effnlink > 0) 1813 (void) ffs_syncvnode(vp, MNT_WAIT); 1814 break; 1815 } 1816 savedcbp = cbp; 1817 } 1818 /* 1819 * Note that we need to synchronously write snapshots that 1820 * have not been unlinked, and hence will be visible after 1821 * a crash, to ensure their integrity. 1822 */ 1823 if (savedcbp) { 1824 vp = savedcbp->b_vp; 1825 bawrite(savedcbp); 1826 if (dopersistence && VTOI(vp)->i_effnlink > 0) 1827 (void) ffs_syncvnode(vp, MNT_WAIT); 1828 } 1829 /* 1830 * If we have been unable to allocate a block in which to do 1831 * the copy, then return non-zero so that the fragment will 1832 * not be freed. Although space will be lost, the snapshot 1833 * will stay consistent. 1834 */ 1835 lockmgr(vp->v_vnlock, LK_RELEASE, NULL, td); 1836 return (error); 1837 } 1838 1839 /* 1840 * Associate snapshot files when mounting. 1841 */ 1842 void 1843 ffs_snapshot_mount(mp) 1844 struct mount *mp; 1845 { 1846 struct ufsmount *ump = VFSTOUFS(mp); 1847 struct vnode *devvp = ump->um_devvp; 1848 struct fs *fs = ump->um_fs; 1849 struct thread *td = curthread; 1850 struct snapdata *sn; 1851 struct vnode *vp; 1852 struct inode *ip; 1853 struct uio auio; 1854 struct iovec aiov; 1855 void *snapblklist; 1856 char *reason; 1857 daddr_t snaplistsize; 1858 int error, snaploc, loc; 1859 1860 /* 1861 * XXX The following needs to be set before ffs_truncate or 1862 * VOP_READ can be called. 1863 */ 1864 mp->mnt_stat.f_iosize = fs->fs_bsize; 1865 /* 1866 * Process each snapshot listed in the superblock. 1867 */ 1868 vp = NULL; 1869 sn = devvp->v_rdev->si_snapdata; 1870 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { 1871 if (fs->fs_snapinum[snaploc] == 0) 1872 break; 1873 if ((error = ffs_vget(mp, fs->fs_snapinum[snaploc], 1874 LK_EXCLUSIVE, &vp)) != 0){ 1875 printf("ffs_snapshot_mount: vget failed %d\n", error); 1876 continue; 1877 } 1878 ip = VTOI(vp); 1879 if ((ip->i_flags & SF_SNAPSHOT) == 0 || ip->i_size == 1880 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) { 1881 if ((ip->i_flags & SF_SNAPSHOT) == 0) { 1882 reason = "non-snapshot"; 1883 } else { 1884 reason = "old format snapshot"; 1885 (void)ffs_truncate(vp, (off_t)0, 0, NOCRED, td); 1886 (void)ffs_syncvnode(vp, MNT_WAIT); 1887 } 1888 printf("ffs_snapshot_mount: %s inode %d\n", 1889 reason, fs->fs_snapinum[snaploc]); 1890 vput(vp); 1891 vp = NULL; 1892 for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { 1893 if (fs->fs_snapinum[loc] == 0) 1894 break; 1895 fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc]; 1896 } 1897 fs->fs_snapinum[loc - 1] = 0; 1898 snaploc--; 1899 continue; 1900 } 1901 /* 1902 * If there already exist snapshots on this filesystem, grab a 1903 * reference to their shared lock. If this is the first snapshot 1904 * on this filesystem, we need to allocate a lock for the 1905 * snapshots to share. In either case, acquire the snapshot 1906 * lock and give up our original private lock. 1907 */ 1908 VI_LOCK(devvp); 1909 if (sn != NULL) { 1910 1911 VI_UNLOCK(devvp); 1912 VI_LOCK(vp); 1913 vp->v_vnlock = &sn->sn_lock; 1914 } else { 1915 VI_UNLOCK(devvp); 1916 sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO); 1917 TAILQ_INIT(&sn->sn_head); 1918 lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT, 1919 LK_CANRECURSE | LK_NOSHARE); 1920 VI_LOCK(vp); 1921 vp->v_vnlock = &sn->sn_lock; 1922 devvp->v_rdev->si_snapdata = sn; 1923 } 1924 lockmgr(vp->v_vnlock, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, 1925 VI_MTX(vp), td); 1926 transferlockers(&vp->v_lock, vp->v_vnlock); 1927 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 1928 /* 1929 * Link it onto the active snapshot list. 1930 */ 1931 VI_LOCK(devvp); 1932 if (ip->i_nextsnap.tqe_prev != 0) 1933 panic("ffs_snapshot_mount: %d already on list", 1934 ip->i_number); 1935 else 1936 TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 1937 vp->v_vflag |= VV_SYSTEM; 1938 VI_UNLOCK(devvp); 1939 VOP_UNLOCK(vp, 0, td); 1940 } 1941 /* 1942 * No usable snapshots found. 1943 */ 1944 if (vp == NULL) 1945 return; 1946 /* 1947 * Allocate the space for the block hints list. We always want to 1948 * use the list from the newest snapshot. 1949 */ 1950 auio.uio_iov = &aiov; 1951 auio.uio_iovcnt = 1; 1952 aiov.iov_base = (void *)&snaplistsize; 1953 aiov.iov_len = sizeof(snaplistsize); 1954 auio.uio_resid = aiov.iov_len; 1955 auio.uio_offset = 1956 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)); 1957 auio.uio_segflg = UIO_SYSSPACE; 1958 auio.uio_rw = UIO_READ; 1959 auio.uio_td = td; 1960 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1961 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 1962 printf("ffs_snapshot_mount: read_1 failed %d\n", error); 1963 VOP_UNLOCK(vp, 0, td); 1964 return; 1965 } 1966 MALLOC(snapblklist, void *, snaplistsize * sizeof(daddr_t), 1967 M_UFSMNT, M_WAITOK); 1968 auio.uio_iovcnt = 1; 1969 aiov.iov_base = snapblklist; 1970 aiov.iov_len = snaplistsize * sizeof (daddr_t); 1971 auio.uio_resid = aiov.iov_len; 1972 auio.uio_offset -= sizeof(snaplistsize); 1973 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 1974 printf("ffs_snapshot_mount: read_2 failed %d\n", error); 1975 VOP_UNLOCK(vp, 0, td); 1976 FREE(snapblklist, M_UFSMNT); 1977 return; 1978 } 1979 VOP_UNLOCK(vp, 0, td); 1980 VI_LOCK(devvp); 1981 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount"); 1982 sn->sn_listsize = snaplistsize; 1983 sn->sn_blklist = (daddr_t *)snapblklist; 1984 devvp->v_vflag |= VV_COPYONWRITE; 1985 VI_UNLOCK(devvp); 1986 } 1987 1988 /* 1989 * Disassociate snapshot files when unmounting. 1990 */ 1991 void 1992 ffs_snapshot_unmount(mp) 1993 struct mount *mp; 1994 { 1995 struct vnode *devvp = VFSTOUFS(mp)->um_devvp; 1996 struct snapdata *sn; 1997 struct inode *xp; 1998 struct vnode *vp; 1999 2000 sn = devvp->v_rdev->si_snapdata; 2001 VI_LOCK(devvp); 2002 while ((xp = TAILQ_FIRST(&sn->sn_head)) != 0) { 2003 vp = ITOV(xp); 2004 vp->v_vnlock = &vp->v_lock; 2005 TAILQ_REMOVE(&sn->sn_head, xp, i_nextsnap); 2006 xp->i_nextsnap.tqe_prev = 0; 2007 if (xp->i_effnlink > 0) { 2008 VI_UNLOCK(devvp); 2009 vrele(vp); 2010 VI_LOCK(devvp); 2011 } 2012 } 2013 devvp->v_rdev->si_snapdata = NULL; 2014 devvp->v_vflag &= ~VV_COPYONWRITE; 2015 VI_UNLOCK(devvp); 2016 if (sn->sn_blklist != NULL) { 2017 FREE(sn->sn_blklist, M_UFSMNT); 2018 sn->sn_blklist = NULL; 2019 sn->sn_listsize = 0; 2020 } 2021 lockdestroy(&sn->sn_lock); 2022 free(sn, M_UFSMNT); 2023 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount"); 2024 } 2025 2026 /* 2027 * Check for need to copy block that is about to be written, 2028 * copying the block if necessary. 2029 */ 2030 int 2031 ffs_copyonwrite(devvp, bp) 2032 struct vnode *devvp; 2033 struct buf *bp; 2034 { 2035 struct snapdata *sn; 2036 struct buf *ibp, *cbp, *savedcbp = 0; 2037 struct thread *td = curthread; 2038 struct fs *fs; 2039 struct inode *ip; 2040 struct vnode *vp = 0; 2041 ufs2_daddr_t lbn, blkno, *snapblklist; 2042 int lower, upper, mid, indiroff, error = 0; 2043 int launched_async_io, prev_norunningbuf; 2044 2045 if ((VTOI(bp->b_vp)->i_flags & SF_SNAPSHOT) != 0) 2046 return (0); /* Update on a snapshot file */ 2047 if (td->td_pflags & TDP_COWINPROGRESS) 2048 panic("ffs_copyonwrite: recursive call"); 2049 /* 2050 * First check to see if it is in the preallocated list. 2051 * By doing this check we avoid several potential deadlocks. 2052 */ 2053 VI_LOCK(devvp); 2054 sn = devvp->v_rdev->si_snapdata; 2055 if (sn == NULL || 2056 TAILQ_FIRST(&sn->sn_head) == NULL) { 2057 VI_UNLOCK(devvp); 2058 return (0); /* No snapshot */ 2059 } 2060 ip = TAILQ_FIRST(&sn->sn_head); 2061 fs = ip->i_fs; 2062 lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 2063 snapblklist = sn->sn_blklist; 2064 upper = sn->sn_listsize - 1; 2065 lower = 1; 2066 while (lower <= upper) { 2067 mid = (lower + upper) / 2; 2068 if (snapblklist[mid] == lbn) 2069 break; 2070 if (snapblklist[mid] < lbn) 2071 lower = mid + 1; 2072 else 2073 upper = mid - 1; 2074 } 2075 if (lower <= upper) { 2076 VI_UNLOCK(devvp); 2077 return (0); 2078 } 2079 launched_async_io = 0; 2080 prev_norunningbuf = td->td_pflags & TDP_NORUNNINGBUF; 2081 /* 2082 * Since I/O on bp isn't yet in progress and it may be blocked 2083 * for a long time waiting on snaplk, back it out of 2084 * runningbufspace, possibly waking other threads waiting for space. 2085 */ 2086 runningbufwakeup(bp); 2087 /* 2088 * Not in the precomputed list, so check the snapshots. 2089 */ 2090 while (lockmgr(&sn->sn_lock, 2091 LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 2092 VI_MTX(devvp), td) != 0) { 2093 VI_LOCK(devvp); 2094 sn = devvp->v_rdev->si_snapdata; 2095 if (sn == NULL || 2096 TAILQ_FIRST(&sn->sn_head) == NULL) { 2097 VI_UNLOCK(devvp); 2098 if (bp->b_runningbufspace) 2099 atomic_add_int(&runningbufspace, 2100 bp->b_runningbufspace); 2101 return (0); /* Snapshot gone */ 2102 } 2103 } 2104 TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 2105 vp = ITOV(ip); 2106 /* 2107 * We ensure that everything of our own that needs to be 2108 * copied will be done at the time that ffs_snapshot is 2109 * called. Thus we can skip the check here which can 2110 * deadlock in doing the lookup in UFS_BALLOC. 2111 */ 2112 if (bp->b_vp == vp) 2113 continue; 2114 /* 2115 * Check to see if block needs to be copied. We do not have 2116 * to hold the snapshot lock while doing this lookup as it 2117 * will never require any additional allocations for the 2118 * snapshot inode. 2119 */ 2120 if (lbn < NDADDR) { 2121 blkno = DIP(ip, i_db[lbn]); 2122 } else { 2123 td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 2124 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2125 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 2126 td->td_pflags &= ~TDP_COWINPROGRESS; 2127 if (error) 2128 break; 2129 indiroff = (lbn - NDADDR) % NINDIR(fs); 2130 if (ip->i_ump->um_fstype == UFS1) 2131 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 2132 else 2133 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 2134 bqrelse(ibp); 2135 } 2136 #ifdef DIAGNOSTIC 2137 if (blkno == BLK_SNAP && bp->b_lblkno >= 0) 2138 panic("ffs_copyonwrite: bad copy block"); 2139 #endif 2140 if (blkno != 0) 2141 continue; 2142 /* 2143 * Allocate the block into which to do the copy. Since 2144 * multiple processes may all try to copy the same block, 2145 * we have to recheck our need to do a copy if we sleep 2146 * waiting for the lock. 2147 * 2148 * Because all snapshots on a filesystem share a single 2149 * lock, we ensure that we will never be in competition 2150 * with another process to allocate a block. 2151 */ 2152 td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 2153 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2154 fs->fs_bsize, KERNCRED, 0, &cbp); 2155 td->td_pflags &= ~TDP_COWINPROGRESS; 2156 if (error) 2157 break; 2158 #ifdef DEBUG 2159 if (snapdebug) { 2160 printf("Copyonwrite: snapino %d lbn %jd for ", 2161 ip->i_number, (intmax_t)lbn); 2162 if (bp->b_vp == devvp) 2163 printf("fs metadata"); 2164 else 2165 printf("inum %d", VTOI(bp->b_vp)->i_number); 2166 printf(" lblkno %jd to blkno %jd\n", 2167 (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno); 2168 } 2169 #endif 2170 /* 2171 * If we have already read the old block contents, then 2172 * simply copy them to the new block. Note that we need 2173 * to synchronously write snapshots that have not been 2174 * unlinked, and hence will be visible after a crash, 2175 * to ensure their integrity. 2176 */ 2177 if (savedcbp != 0) { 2178 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 2179 bawrite(cbp); 2180 if (dopersistence && ip->i_effnlink > 0) 2181 (void) ffs_syncvnode(vp, MNT_WAIT); 2182 else 2183 launched_async_io = 1; 2184 continue; 2185 } 2186 /* 2187 * Otherwise, read the old block contents into the buffer. 2188 */ 2189 if ((error = readblock(vp, cbp, lbn)) != 0) { 2190 bzero(cbp->b_data, fs->fs_bsize); 2191 bawrite(cbp); 2192 if (dopersistence && ip->i_effnlink > 0) 2193 (void) ffs_syncvnode(vp, MNT_WAIT); 2194 else 2195 launched_async_io = 1; 2196 break; 2197 } 2198 savedcbp = cbp; 2199 } 2200 /* 2201 * Note that we need to synchronously write snapshots that 2202 * have not been unlinked, and hence will be visible after 2203 * a crash, to ensure their integrity. 2204 */ 2205 if (savedcbp) { 2206 vp = savedcbp->b_vp; 2207 bawrite(savedcbp); 2208 if (dopersistence && VTOI(vp)->i_effnlink > 0) 2209 (void) ffs_syncvnode(vp, MNT_WAIT); 2210 else 2211 launched_async_io = 1; 2212 } 2213 lockmgr(vp->v_vnlock, LK_RELEASE, NULL, td); 2214 td->td_pflags = (td->td_pflags & ~TDP_NORUNNINGBUF) | 2215 prev_norunningbuf; 2216 if (launched_async_io && (td->td_pflags & TDP_NORUNNINGBUF) == 0) 2217 waitrunningbufspace(); 2218 /* 2219 * I/O on bp will now be started, so count it in runningbufspace. 2220 */ 2221 if (bp->b_runningbufspace) 2222 atomic_add_int(&runningbufspace, bp->b_runningbufspace); 2223 return (error); 2224 } 2225 2226 /* 2227 * Read the specified block into the given buffer. 2228 * Much of this boiler-plate comes from bwrite(). 2229 */ 2230 static int 2231 readblock(vp, bp, lbn) 2232 struct vnode *vp; 2233 struct buf *bp; 2234 ufs2_daddr_t lbn; 2235 { 2236 struct inode *ip = VTOI(vp); 2237 struct bio *bip; 2238 2239 bip = g_alloc_bio(); 2240 bip->bio_cmd = BIO_READ; 2241 bip->bio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn))); 2242 bip->bio_data = bp->b_data; 2243 bip->bio_length = bp->b_bcount; 2244 2245 g_io_request(bip, ip->i_devvp->v_bufobj.bo_private); 2246 2247 do 2248 msleep(bip, NULL, PRIBIO, "snaprdb", hz/10); 2249 while (!(bip->bio_flags & BIO_DONE)); 2250 bp->b_error = bip->bio_error; 2251 g_destroy_bio(bip); 2252 return (bp->b_error); 2253 } 2254 2255 2256 /* 2257 * Process file deletes that were deferred by ufs_inactive() due to 2258 * the file system being suspended. 2259 */ 2260 static void 2261 process_deferred_inactive(struct mount *mp) 2262 { 2263 struct vnode *vp, *mvp; 2264 struct thread *td; 2265 int error; 2266 2267 td = curthread; 2268 (void) vn_start_secondary_write(NULL, &mp, V_WAIT); 2269 MNT_ILOCK(mp); 2270 loop: 2271 MNT_VNODE_FOREACH(vp, mp, mvp) { 2272 VI_LOCK(vp); 2273 if ((vp->v_iflag & (VI_DOOMED | VI_OWEINACT)) != VI_OWEINACT || 2274 vp->v_usecount > 0 || 2275 vp->v_type == VNON) { 2276 VI_UNLOCK(vp); 2277 continue; 2278 } 2279 MNT_IUNLOCK(mp); 2280 vholdl(vp); 2281 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); 2282 if (error != 0) { 2283 vdrop(vp); 2284 MNT_ILOCK(mp); 2285 if (error == ENOENT) 2286 continue; /* vnode recycled */ 2287 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 2288 goto loop; 2289 } 2290 VI_LOCK(vp); 2291 if ((vp->v_iflag & VI_OWEINACT) == 0) { 2292 VI_UNLOCK(vp); 2293 VOP_UNLOCK(vp, 0, td); 2294 vdrop(vp); 2295 MNT_ILOCK(mp); 2296 continue; 2297 } 2298 2299 VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp, 2300 ("process_deferred_inactive: " 2301 "recursed on VI_DOINGINACT")); 2302 vp->v_iflag |= VI_DOINGINACT; 2303 vp->v_iflag &= ~VI_OWEINACT; 2304 VI_UNLOCK(vp); 2305 (void) VOP_INACTIVE(vp, td); 2306 VI_LOCK(vp); 2307 VNASSERT(vp->v_iflag & VI_DOINGINACT, vp, 2308 ("process_deferred_inactive: lost VI_DOINGINACT")); 2309 VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, 2310 ("process_deferred_inactive: got VI_OWEINACT")); 2311 vp->v_iflag &= ~VI_DOINGINACT; 2312 VI_UNLOCK(vp); 2313 VOP_UNLOCK(vp, 0, td); 2314 vdrop(vp); 2315 MNT_ILOCK(mp); 2316 } 2317 MNT_IUNLOCK(mp); 2318 vn_finished_secondary_write(mp); 2319 } 2320 #endif 2321