1 /*- 2 * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. 3 * 4 * Further information about snapshots can be obtained from: 5 * 6 * Marshall Kirk McKusick http://www.mckusick.com/softdep/ 7 * 1614 Oxford Street mckusick@mckusick.com 8 * Berkeley, CA 94709-1608 +1-510-843-9542 9 * USA 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY 22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR 25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_quota.h" 40 41 #include <sys/param.h> 42 #include <sys/kernel.h> 43 #include <sys/systm.h> 44 #include <sys/conf.h> 45 #include <sys/bio.h> 46 #include <sys/buf.h> 47 #include <sys/proc.h> 48 #include <sys/namei.h> 49 #include <sys/sched.h> 50 #include <sys/stat.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/resource.h> 54 #include <sys/resourcevar.h> 55 #include <sys/vnode.h> 56 57 #include <geom/geom.h> 58 59 #include <ufs/ufs/extattr.h> 60 #include <ufs/ufs/quota.h> 61 #include <ufs/ufs/ufsmount.h> 62 #include <ufs/ufs/inode.h> 63 #include <ufs/ufs/ufs_extern.h> 64 65 #include <ufs/ffs/fs.h> 66 #include <ufs/ffs/ffs_extern.h> 67 68 #define KERNCRED thread0.td_ucred 69 #define DEBUG 1 70 71 #include "opt_ffs.h" 72 73 #ifdef NO_FFS_SNAPSHOT 74 int 75 ffs_snapshot(mp, snapfile) 76 struct mount *mp; 77 char *snapfile; 78 { 79 return (EINVAL); 80 } 81 82 int 83 ffs_snapblkfree(fs, devvp, bno, size, inum) 84 struct fs *fs; 85 struct vnode *devvp; 86 ufs2_daddr_t bno; 87 long size; 88 ino_t inum; 89 { 90 return (EINVAL); 91 } 92 93 void 94 ffs_snapremove(vp) 95 struct vnode *vp; 96 { 97 } 98 99 void 100 ffs_snapshot_mount(mp) 101 struct mount *mp; 102 { 103 } 104 105 void 106 ffs_snapshot_unmount(mp) 107 struct mount *mp; 108 { 109 } 110 111 void 112 ffs_snapgone(ip) 113 struct inode *ip; 114 { 115 } 116 117 int 118 ffs_copyonwrite(devvp, bp) 119 struct vnode *devvp; 120 struct buf *bp; 121 { 122 return (EINVAL); 123 } 124 125 #else 126 127 TAILQ_HEAD(snaphead, inode); 128 129 struct snapdata { 130 struct snaphead sn_head; 131 daddr_t sn_listsize; 132 daddr_t *sn_blklist; 133 struct lock sn_lock; 134 }; 135 136 static int cgaccount(int, struct vnode *, struct buf *, int); 137 static int expunge_ufs1(struct vnode *, struct inode *, struct fs *, 138 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 139 ufs_lbn_t, int), int); 140 static int indiracct_ufs1(struct vnode *, struct vnode *, int, 141 ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 142 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 143 ufs_lbn_t, int), int); 144 static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 145 struct fs *, ufs_lbn_t, int); 146 static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 147 struct fs *, ufs_lbn_t, int); 148 static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 149 struct fs *, ufs_lbn_t, int); 150 static int expunge_ufs2(struct vnode *, struct inode *, struct fs *, 151 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 152 ufs_lbn_t, int), int); 153 static int indiracct_ufs2(struct vnode *, struct vnode *, int, 154 ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 155 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 156 ufs_lbn_t, int), int); 157 static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 158 struct fs *, ufs_lbn_t, int); 159 static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 160 struct fs *, ufs_lbn_t, int); 161 static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 162 struct fs *, ufs_lbn_t, int); 163 static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t); 164 static void process_deferred_inactive(struct mount *); 165 static void try_free_snapdata(struct vnode *devvp, struct thread *td); 166 167 /* 168 * To ensure the consistency of snapshots across crashes, we must 169 * synchronously write out copied blocks before allowing the 170 * originals to be modified. Because of the rather severe speed 171 * penalty that this imposes, the following flag allows this 172 * crash persistence to be disabled. 173 */ 174 int dopersistence = 0; 175 176 #ifdef DEBUG 177 #include <sys/sysctl.h> 178 SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, ""); 179 static int snapdebug = 0; 180 SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, ""); 181 int collectsnapstats = 0; 182 SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats, 183 0, ""); 184 #endif /* DEBUG */ 185 186 /* 187 * Create a snapshot file and initialize it for the filesystem. 188 */ 189 int 190 ffs_snapshot(mp, snapfile) 191 struct mount *mp; 192 char *snapfile; 193 { 194 ufs2_daddr_t numblks, blkno, *blkp, *snapblklist; 195 int error, cg, snaploc; 196 int i, size, len, loc; 197 int flag = mp->mnt_flag; 198 struct timespec starttime = {0, 0}, endtime; 199 char saved_nice = 0; 200 long redo = 0, snaplistsize = 0; 201 int32_t *lp; 202 void *space; 203 struct fs *copy_fs = NULL, *fs; 204 struct thread *td = curthread; 205 struct inode *ip, *xp; 206 struct buf *bp, *nbp, *ibp, *sbp = NULL; 207 struct nameidata nd; 208 struct mount *wrtmp; 209 struct vattr vat; 210 struct vnode *vp, *xvp, *mvp, *devvp; 211 struct uio auio; 212 struct iovec aiov; 213 struct snapdata *sn; 214 struct ufsmount *ump; 215 216 ump = VFSTOUFS(mp); 217 fs = ump->um_fs; 218 sn = NULL; 219 220 /* 221 * Need to serialize access to snapshot code per filesystem. 222 */ 223 /* 224 * Assign a snapshot slot in the superblock. 225 */ 226 UFS_LOCK(ump); 227 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 228 if (fs->fs_snapinum[snaploc] == 0) 229 break; 230 UFS_UNLOCK(ump); 231 if (snaploc == FSMAXSNAP) 232 return (ENOSPC); 233 /* 234 * Create the snapshot file. 235 */ 236 restart: 237 NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_SYSSPACE, snapfile, td); 238 if ((error = namei(&nd)) != 0) 239 return (error); 240 if (nd.ni_vp != NULL) { 241 vput(nd.ni_vp); 242 error = EEXIST; 243 } 244 if (nd.ni_dvp->v_mount != mp) 245 error = EXDEV; 246 if (error) { 247 NDFREE(&nd, NDF_ONLY_PNBUF); 248 if (nd.ni_dvp == nd.ni_vp) 249 vrele(nd.ni_dvp); 250 else 251 vput(nd.ni_dvp); 252 return (error); 253 } 254 VATTR_NULL(&vat); 255 vat.va_type = VREG; 256 vat.va_mode = S_IRUSR; 257 vat.va_vaflags |= VA_EXCLUSIVE; 258 if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) 259 wrtmp = NULL; 260 if (wrtmp != mp) 261 panic("ffs_snapshot: mount mismatch"); 262 vfs_rel(wrtmp); 263 if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) { 264 NDFREE(&nd, NDF_ONLY_PNBUF); 265 vput(nd.ni_dvp); 266 if ((error = vn_start_write(NULL, &wrtmp, 267 V_XSLEEP | PCATCH)) != 0) 268 return (error); 269 goto restart; 270 } 271 VOP_LEASE(nd.ni_dvp, td, KERNCRED, LEASE_WRITE); 272 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat); 273 VOP_UNLOCK(nd.ni_dvp, 0, td); 274 if (error) { 275 NDFREE(&nd, NDF_ONLY_PNBUF); 276 vn_finished_write(wrtmp); 277 vrele(nd.ni_dvp); 278 return (error); 279 } 280 vp = nd.ni_vp; 281 ip = VTOI(vp); 282 devvp = ip->i_devvp; 283 /* 284 * Allocate and copy the last block contents so as to be able 285 * to set size to that of the filesystem. 286 */ 287 numblks = howmany(fs->fs_size, fs->fs_frag); 288 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 289 fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 290 if (error) 291 goto out; 292 ip->i_size = lblktosize(fs, (off_t)numblks); 293 DIP_SET(ip, i_size, ip->i_size); 294 ip->i_flag |= IN_CHANGE | IN_UPDATE; 295 error = readblock(vp, bp, numblks - 1); 296 bawrite(bp); 297 if (error != 0) 298 goto out; 299 /* 300 * Preallocate critical data structures so that we can copy 301 * them in without further allocation after we suspend all 302 * operations on the filesystem. We would like to just release 303 * the allocated buffers without writing them since they will 304 * be filled in below once we are ready to go, but this upsets 305 * the soft update code, so we go ahead and write the new buffers. 306 * 307 * Allocate all indirect blocks and mark all of them as not 308 * needing to be copied. 309 */ 310 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 311 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 312 fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp); 313 if (error) 314 goto out; 315 bawrite(ibp); 316 } 317 /* 318 * Allocate copies for the superblock and its summary information. 319 */ 320 error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED, 321 0, &nbp); 322 if (error) 323 goto out; 324 bawrite(nbp); 325 blkno = fragstoblks(fs, fs->fs_csaddr); 326 len = howmany(fs->fs_cssize, fs->fs_bsize); 327 for (loc = 0; loc < len; loc++) { 328 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), 329 fs->fs_bsize, KERNCRED, 0, &nbp); 330 if (error) 331 goto out; 332 bawrite(nbp); 333 } 334 /* 335 * Allocate all cylinder group blocks. 336 */ 337 for (cg = 0; cg < fs->fs_ncg; cg++) { 338 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 339 fs->fs_bsize, KERNCRED, 0, &nbp); 340 if (error) 341 goto out; 342 bawrite(nbp); 343 } 344 /* 345 * Copy all the cylinder group maps. Although the 346 * filesystem is still active, we hope that only a few 347 * cylinder groups will change between now and when we 348 * suspend operations. Thus, we will be able to quickly 349 * touch up the few cylinder groups that changed during 350 * the suspension period. 351 */ 352 len = howmany(fs->fs_ncg, NBBY); 353 MALLOC(space, void *, len, M_DEVBUF, M_WAITOK|M_ZERO); 354 UFS_LOCK(ump); 355 fs->fs_active = space; 356 UFS_UNLOCK(ump); 357 for (cg = 0; cg < fs->fs_ncg; cg++) { 358 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 359 fs->fs_bsize, KERNCRED, 0, &nbp); 360 if (error) 361 goto out; 362 error = cgaccount(cg, vp, nbp, 1); 363 bawrite(nbp); 364 if (error) 365 goto out; 366 } 367 #ifdef QUOTA 368 /* 369 * Turn off disk quotas for snapshot file. 370 */ 371 (void) chkdq(ip, -DIP(ip, i_blocks), KERNCRED, FORCE); 372 for (i = 0; i < MAXQUOTAS; i++) { 373 if (ip->i_dquot[i] != NODQUOT) { 374 dqrele(vp, ip->i_dquot[i]); 375 ip->i_dquot[i] = NODQUOT; 376 } 377 } 378 #endif 379 /* 380 * Change inode to snapshot type file. 381 */ 382 ip->i_flags |= SF_SNAPSHOT; 383 DIP_SET(ip, i_flags, ip->i_flags); 384 ip->i_flag |= IN_CHANGE | IN_UPDATE; 385 /* 386 * Ensure that the snapshot is completely on disk. 387 * Since we have marked it as a snapshot it is safe to 388 * unlock it as no process will be allowed to write to it. 389 */ 390 if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) 391 goto out; 392 VOP_UNLOCK(vp, 0, td); 393 /* 394 * All allocations are done, so we can now snapshot the system. 395 * 396 * Recind nice scheduling while running with the filesystem suspended. 397 */ 398 if (td->td_proc->p_nice > 0) { 399 PROC_LOCK(td->td_proc); 400 mtx_lock_spin(&sched_lock); 401 saved_nice = td->td_proc->p_nice; 402 sched_nice(td->td_proc, 0); 403 mtx_unlock_spin(&sched_lock); 404 PROC_UNLOCK(td->td_proc); 405 } 406 /* 407 * Suspend operation on filesystem. 408 */ 409 for (;;) { 410 vn_finished_write(wrtmp); 411 if ((error = vfs_write_suspend(vp->v_mount)) != 0) { 412 vn_start_write(NULL, &wrtmp, V_WAIT); 413 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 414 goto out; 415 } 416 if (mp->mnt_kern_flag & MNTK_SUSPENDED) 417 break; 418 vn_start_write(NULL, &wrtmp, V_WAIT); 419 } 420 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 421 if (ip->i_effnlink == 0) { 422 error = ENOENT; /* Snapshot file unlinked */ 423 goto out1; 424 } 425 if (collectsnapstats) 426 nanotime(&starttime); 427 428 /* The last block might have changed. Copy it again to be sure. */ 429 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 430 fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 431 if (error != 0) 432 goto out1; 433 error = readblock(vp, bp, numblks - 1); 434 bp->b_flags |= B_VALIDSUSPWRT; 435 bawrite(bp); 436 if (error != 0) 437 goto out1; 438 /* 439 * First, copy all the cylinder group maps that have changed. 440 */ 441 for (cg = 0; cg < fs->fs_ncg; cg++) { 442 if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0) 443 continue; 444 redo++; 445 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 446 fs->fs_bsize, KERNCRED, 0, &nbp); 447 if (error) 448 goto out1; 449 error = cgaccount(cg, vp, nbp, 2); 450 bawrite(nbp); 451 if (error) 452 goto out1; 453 } 454 /* 455 * Grab a copy of the superblock and its summary information. 456 * We delay writing it until the suspension is released below. 457 */ 458 error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, 459 KERNCRED, &sbp); 460 if (error) { 461 brelse(sbp); 462 sbp = NULL; 463 goto out1; 464 } 465 loc = blkoff(fs, fs->fs_sblockloc); 466 copy_fs = (struct fs *)(sbp->b_data + loc); 467 bcopy(fs, copy_fs, fs->fs_sbsize); 468 if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) 469 copy_fs->fs_clean = 1; 470 size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE; 471 if (fs->fs_sbsize < size) 472 bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize); 473 size = blkroundup(fs, fs->fs_cssize); 474 if (fs->fs_contigsumsize > 0) 475 size += fs->fs_ncg * sizeof(int32_t); 476 space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 477 copy_fs->fs_csp = space; 478 bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize); 479 space = (char *)space + fs->fs_cssize; 480 loc = howmany(fs->fs_cssize, fs->fs_fsize); 481 i = fs->fs_frag - loc % fs->fs_frag; 482 len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize; 483 if (len > 0) { 484 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc), 485 len, KERNCRED, &bp)) != 0) { 486 brelse(bp); 487 free(copy_fs->fs_csp, M_UFSMNT); 488 bawrite(sbp); 489 sbp = NULL; 490 goto out1; 491 } 492 bcopy(bp->b_data, space, (u_int)len); 493 space = (char *)space + len; 494 bp->b_flags |= B_INVAL | B_NOCACHE; 495 brelse(bp); 496 } 497 if (fs->fs_contigsumsize > 0) { 498 copy_fs->fs_maxcluster = lp = space; 499 for (i = 0; i < fs->fs_ncg; i++) 500 *lp++ = fs->fs_contigsumsize; 501 } 502 /* 503 * We must check for active files that have been unlinked 504 * (e.g., with a zero link count). We have to expunge all 505 * trace of these files from the snapshot so that they are 506 * not reclaimed prematurely by fsck or unnecessarily dumped. 507 * We turn off the MNTK_SUSPENDED flag to avoid a panic from 508 * spec_strategy about writing on a suspended filesystem. 509 * Note that we skip unlinked snapshot files as they will 510 * be handled separately below. 511 * 512 * We also calculate the needed size for the snapshot list. 513 */ 514 snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + 515 FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */; 516 MNT_ILOCK(mp); 517 mp->mnt_kern_flag &= ~MNTK_SUSPENDED; 518 loop: 519 MNT_VNODE_FOREACH(xvp, mp, mvp) { 520 VI_LOCK(xvp); 521 MNT_IUNLOCK(mp); 522 if ((xvp->v_iflag & VI_DOOMED) || 523 (xvp->v_usecount == 0 && 524 (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) || 525 xvp->v_type == VNON || 526 (VTOI(xvp)->i_flags & SF_SNAPSHOT)) { 527 VI_UNLOCK(xvp); 528 MNT_ILOCK(mp); 529 continue; 530 } 531 /* 532 * We can skip parent directory vnode because it must have 533 * this snapshot file in it. 534 */ 535 if (xvp == nd.ni_dvp) { 536 VI_UNLOCK(xvp); 537 MNT_ILOCK(mp); 538 continue; 539 } 540 vholdl(xvp); 541 if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK, td) != 0) { 542 MNT_ILOCK(mp); 543 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 544 vdrop(xvp); 545 goto loop; 546 } 547 VI_LOCK(xvp); 548 if (xvp->v_usecount == 0 && 549 (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) { 550 VI_UNLOCK(xvp); 551 VOP_UNLOCK(xvp, 0, td); 552 vdrop(xvp); 553 MNT_ILOCK(mp); 554 continue; 555 } 556 VI_UNLOCK(xvp); 557 if (snapdebug) 558 vprint("ffs_snapshot: busy vnode", xvp); 559 if (VOP_GETATTR(xvp, &vat, td->td_ucred, td) == 0 && 560 vat.va_nlink > 0) { 561 VOP_UNLOCK(xvp, 0, td); 562 vdrop(xvp); 563 MNT_ILOCK(mp); 564 continue; 565 } 566 xp = VTOI(xvp); 567 if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) { 568 VOP_UNLOCK(xvp, 0, td); 569 vdrop(xvp); 570 MNT_ILOCK(mp); 571 continue; 572 } 573 /* 574 * If there is a fragment, clear it here. 575 */ 576 blkno = 0; 577 loc = howmany(xp->i_size, fs->fs_bsize) - 1; 578 if (loc < NDADDR) { 579 len = fragroundup(fs, blkoff(fs, xp->i_size)); 580 if (len != 0 && len < fs->fs_bsize) { 581 ffs_blkfree(ump, copy_fs, vp, 582 DIP(xp, i_db[loc]), len, xp->i_number); 583 blkno = DIP(xp, i_db[loc]); 584 DIP_SET(xp, i_db[loc], 0); 585 } 586 } 587 snaplistsize += 1; 588 if (xp->i_ump->um_fstype == UFS1) 589 error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 590 BLK_NOCOPY); 591 else 592 error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 593 BLK_NOCOPY); 594 if (blkno) 595 DIP_SET(xp, i_db[loc], blkno); 596 if (!error) 597 error = ffs_freefile(ump, copy_fs, vp, xp->i_number, 598 xp->i_mode); 599 VOP_UNLOCK(xvp, 0, td); 600 vdrop(xvp); 601 if (error) { 602 free(copy_fs->fs_csp, M_UFSMNT); 603 bawrite(sbp); 604 sbp = NULL; 605 MNT_VNODE_FOREACH_ABORT(mp, mvp); 606 goto out1; 607 } 608 MNT_ILOCK(mp); 609 } 610 MNT_IUNLOCK(mp); 611 /* 612 * If there already exist snapshots on this filesystem, grab a 613 * reference to their shared lock. If this is the first snapshot 614 * on this filesystem, we need to allocate a lock for the snapshots 615 * to share. In either case, acquire the snapshot lock and give 616 * up our original private lock. 617 */ 618 VI_LOCK(devvp); 619 sn = devvp->v_rdev->si_snapdata; 620 if (sn != NULL) { 621 xp = TAILQ_FIRST(&sn->sn_head); 622 VI_UNLOCK(devvp); 623 VI_LOCK(vp); 624 vp->v_vnlock = &sn->sn_lock; 625 } else { 626 VI_UNLOCK(devvp); 627 sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO); 628 TAILQ_INIT(&sn->sn_head); 629 lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT, 630 LK_CANRECURSE | LK_NOSHARE); 631 VI_LOCK(vp); 632 vp->v_vnlock = &sn->sn_lock; 633 mp_fixme("si_snapdata setting is racey."); 634 devvp->v_rdev->si_snapdata = sn; 635 xp = NULL; 636 } 637 lockmgr(vp->v_vnlock, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, 638 VI_MTX(vp), td); 639 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 640 /* 641 * If this is the first snapshot on this filesystem, then we need 642 * to allocate the space for the list of preallocated snapshot blocks. 643 * This list will be refined below, but this preliminary one will 644 * keep us out of deadlock until the full one is ready. 645 */ 646 if (xp == NULL) { 647 MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t), 648 M_UFSMNT, M_WAITOK); 649 blkp = &snapblklist[1]; 650 *blkp++ = lblkno(fs, fs->fs_sblockloc); 651 blkno = fragstoblks(fs, fs->fs_csaddr); 652 for (cg = 0; cg < fs->fs_ncg; cg++) { 653 if (fragstoblks(fs, cgtod(fs, cg) > blkno)) 654 break; 655 *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 656 } 657 len = howmany(fs->fs_cssize, fs->fs_bsize); 658 for (loc = 0; loc < len; loc++) 659 *blkp++ = blkno + loc; 660 for (; cg < fs->fs_ncg; cg++) 661 *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 662 snapblklist[0] = blkp - snapblklist; 663 VI_LOCK(devvp); 664 if (sn->sn_blklist != NULL) 665 panic("ffs_snapshot: non-empty list"); 666 sn->sn_blklist = snapblklist; 667 sn->sn_listsize = blkp - snapblklist; 668 VI_UNLOCK(devvp); 669 } 670 /* 671 * Record snapshot inode. Since this is the newest snapshot, 672 * it must be placed at the end of the list. 673 */ 674 VI_LOCK(devvp); 675 fs->fs_snapinum[snaploc] = ip->i_number; 676 if (ip->i_nextsnap.tqe_prev != 0) 677 panic("ffs_snapshot: %d already on list", ip->i_number); 678 TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 679 devvp->v_vflag |= VV_COPYONWRITE; 680 VI_UNLOCK(devvp); 681 ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); 682 vp->v_vflag |= VV_SYSTEM; 683 out1: 684 KASSERT((sn != NULL && sbp != NULL && error == 0) || 685 (sn == NULL && sbp == NULL && error != 0), 686 ("email phk@ and mckusick@")); 687 /* 688 * Resume operation on filesystem. 689 */ 690 vfs_write_resume(vp->v_mount); 691 vn_start_write(NULL, &wrtmp, V_WAIT); 692 if (collectsnapstats && starttime.tv_sec > 0) { 693 nanotime(&endtime); 694 timespecsub(&endtime, &starttime); 695 printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n", 696 vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec, 697 endtime.tv_nsec / 1000000, redo, fs->fs_ncg); 698 } 699 if (sbp == NULL) 700 goto out; 701 /* 702 * Copy allocation information from all the snapshots in 703 * this snapshot and then expunge them from its view. 704 */ 705 TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) { 706 if (xp == ip) 707 break; 708 if (xp->i_ump->um_fstype == UFS1) 709 error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, 710 BLK_SNAP); 711 else 712 error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, 713 BLK_SNAP); 714 if (error == 0 && xp->i_effnlink == 0) { 715 error = ffs_freefile(ump, 716 copy_fs, 717 vp, 718 xp->i_number, 719 xp->i_mode); 720 } 721 if (error) { 722 fs->fs_snapinum[snaploc] = 0; 723 goto done; 724 } 725 } 726 /* 727 * Allocate space for the full list of preallocated snapshot blocks. 728 */ 729 MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t), 730 M_UFSMNT, M_WAITOK); 731 ip->i_snapblklist = &snapblklist[1]; 732 /* 733 * Expunge the blocks used by the snapshots from the set of 734 * blocks marked as used in the snapshot bitmaps. Also, collect 735 * the list of allocated blocks in i_snapblklist. 736 */ 737 if (ip->i_ump->um_fstype == UFS1) 738 error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); 739 else 740 error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); 741 if (error) { 742 fs->fs_snapinum[snaploc] = 0; 743 FREE(snapblklist, M_UFSMNT); 744 goto done; 745 } 746 if (snaplistsize < ip->i_snapblklist - snapblklist) 747 panic("ffs_snapshot: list too small"); 748 snaplistsize = ip->i_snapblklist - snapblklist; 749 snapblklist[0] = snaplistsize; 750 ip->i_snapblklist = 0; 751 /* 752 * Write out the list of allocated blocks to the end of the snapshot. 753 */ 754 auio.uio_iov = &aiov; 755 auio.uio_iovcnt = 1; 756 aiov.iov_base = (void *)snapblklist; 757 aiov.iov_len = snaplistsize * sizeof(daddr_t); 758 auio.uio_resid = aiov.iov_len;; 759 auio.uio_offset = ip->i_size; 760 auio.uio_segflg = UIO_SYSSPACE; 761 auio.uio_rw = UIO_WRITE; 762 auio.uio_td = td; 763 if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 764 fs->fs_snapinum[snaploc] = 0; 765 FREE(snapblklist, M_UFSMNT); 766 goto done; 767 } 768 /* 769 * Write the superblock and its summary information 770 * to the snapshot. 771 */ 772 blkno = fragstoblks(fs, fs->fs_csaddr); 773 len = howmany(fs->fs_cssize, fs->fs_bsize); 774 space = copy_fs->fs_csp; 775 for (loc = 0; loc < len; loc++) { 776 error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp); 777 if (error) { 778 brelse(nbp); 779 fs->fs_snapinum[snaploc] = 0; 780 FREE(snapblklist, M_UFSMNT); 781 goto done; 782 } 783 bcopy(space, nbp->b_data, fs->fs_bsize); 784 space = (char *)space + fs->fs_bsize; 785 bawrite(nbp); 786 } 787 /* 788 * As this is the newest list, it is the most inclusive, so 789 * should replace the previous list. 790 */ 791 VI_LOCK(devvp); 792 space = sn->sn_blklist; 793 sn->sn_blklist = snapblklist; 794 sn->sn_listsize = snaplistsize; 795 VI_UNLOCK(devvp); 796 if (space != NULL) 797 FREE(space, M_UFSMNT); 798 /* 799 * If another process is currently writing the buffer containing 800 * the inode for this snapshot then a deadlock can occur. Drop 801 * the snapshot lock until the buffer has been written. 802 */ 803 VREF(vp); /* Protect against ffs_snapgone() */ 804 VOP_UNLOCK(vp, 0, td); 805 (void) bread(ip->i_devvp, 806 fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 807 (int) fs->fs_bsize, NOCRED, &nbp); 808 brelse(nbp); 809 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 810 if (ip->i_effnlink == 0) 811 error = ENOENT; /* Snapshot file unlinked */ 812 else 813 vrele(vp); /* Drop extra reference */ 814 done: 815 FREE(copy_fs->fs_csp, M_UFSMNT); 816 bawrite(sbp); 817 out: 818 if (saved_nice > 0) { 819 PROC_LOCK(td->td_proc); 820 mtx_lock_spin(&sched_lock); 821 sched_nice(td->td_proc, saved_nice); 822 mtx_unlock_spin(&sched_lock); 823 PROC_UNLOCK(td->td_proc); 824 } 825 UFS_LOCK(ump); 826 if (fs->fs_active != 0) { 827 FREE(fs->fs_active, M_DEVBUF); 828 fs->fs_active = 0; 829 } 830 UFS_UNLOCK(ump); 831 mp->mnt_flag = flag; 832 if (error) 833 (void) ffs_truncate(vp, (off_t)0, 0, NOCRED, td); 834 (void) ffs_syncvnode(vp, MNT_WAIT); 835 if (error) 836 vput(vp); 837 else 838 VOP_UNLOCK(vp, 0, td); 839 vrele(nd.ni_dvp); 840 vn_finished_write(wrtmp); 841 process_deferred_inactive(mp); 842 return (error); 843 } 844 845 /* 846 * Copy a cylinder group map. All the unallocated blocks are marked 847 * BLK_NOCOPY so that the snapshot knows that it need not copy them 848 * if they are later written. If passno is one, then this is a first 849 * pass, so only setting needs to be done. If passno is 2, then this 850 * is a revision to a previous pass which must be undone as the 851 * replacement pass is done. 852 */ 853 static int 854 cgaccount(cg, vp, nbp, passno) 855 int cg; 856 struct vnode *vp; 857 struct buf *nbp; 858 int passno; 859 { 860 struct buf *bp, *ibp; 861 struct inode *ip; 862 struct cg *cgp; 863 struct fs *fs; 864 ufs2_daddr_t base, numblks; 865 int error, len, loc, indiroff; 866 867 ip = VTOI(vp); 868 fs = ip->i_fs; 869 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 870 (int)fs->fs_cgsize, KERNCRED, &bp); 871 if (error) { 872 brelse(bp); 873 return (error); 874 } 875 cgp = (struct cg *)bp->b_data; 876 if (!cg_chkmagic(cgp)) { 877 brelse(bp); 878 return (EIO); 879 } 880 UFS_LOCK(ip->i_ump); 881 ACTIVESET(fs, cg); 882 UFS_UNLOCK(ip->i_ump); 883 bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize); 884 if (fs->fs_cgsize < fs->fs_bsize) 885 bzero(&nbp->b_data[fs->fs_cgsize], 886 fs->fs_bsize - fs->fs_cgsize); 887 cgp = (struct cg *)nbp->b_data; 888 bqrelse(bp); 889 if (passno == 2) 890 nbp->b_flags |= B_VALIDSUSPWRT; 891 numblks = howmany(fs->fs_size, fs->fs_frag); 892 len = howmany(fs->fs_fpg, fs->fs_frag); 893 base = cgbase(fs, cg) / fs->fs_frag; 894 if (base + len >= numblks) 895 len = numblks - base - 1; 896 loc = 0; 897 if (base < NDADDR) { 898 for ( ; loc < NDADDR; loc++) { 899 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 900 DIP_SET(ip, i_db[loc], BLK_NOCOPY); 901 else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY) 902 DIP_SET(ip, i_db[loc], 0); 903 else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY) 904 panic("ffs_snapshot: lost direct block"); 905 } 906 } 907 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), 908 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 909 if (error) { 910 return (error); 911 } 912 indiroff = (base + loc - NDADDR) % NINDIR(fs); 913 for ( ; loc < len; loc++, indiroff++) { 914 if (indiroff >= NINDIR(fs)) { 915 if (passno == 2) 916 ibp->b_flags |= B_VALIDSUSPWRT; 917 bawrite(ibp); 918 error = UFS_BALLOC(vp, 919 lblktosize(fs, (off_t)(base + loc)), 920 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 921 if (error) { 922 return (error); 923 } 924 indiroff = 0; 925 } 926 if (ip->i_ump->um_fstype == UFS1) { 927 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 928 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 929 BLK_NOCOPY; 930 else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data)) 931 [indiroff] == BLK_NOCOPY) 932 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0; 933 else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data)) 934 [indiroff] == BLK_NOCOPY) 935 panic("ffs_snapshot: lost indirect block"); 936 continue; 937 } 938 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 939 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY; 940 else if (passno == 2 && 941 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 942 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0; 943 else if (passno == 1 && 944 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 945 panic("ffs_snapshot: lost indirect block"); 946 } 947 if (passno == 2) 948 ibp->b_flags |= B_VALIDSUSPWRT; 949 bdwrite(ibp); 950 return (0); 951 } 952 953 /* 954 * Before expunging a snapshot inode, note all the 955 * blocks that it claims with BLK_SNAP so that fsck will 956 * be able to account for those blocks properly and so 957 * that this snapshot knows that it need not copy them 958 * if the other snapshot holding them is freed. This code 959 * is reproduced once each for UFS1 and UFS2. 960 */ 961 static int 962 expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) 963 struct vnode *snapvp; 964 struct inode *cancelip; 965 struct fs *fs; 966 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 967 struct fs *, ufs_lbn_t, int); 968 int expungetype; 969 { 970 int i, error, indiroff; 971 ufs_lbn_t lbn, rlbn; 972 ufs2_daddr_t len, blkno, numblks, blksperindir; 973 struct ufs1_dinode *dip; 974 struct thread *td = curthread; 975 struct buf *bp; 976 977 /* 978 * Prepare to expunge the inode. If its inode block has not 979 * yet been copied, then allocate and fill the copy. 980 */ 981 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 982 blkno = 0; 983 if (lbn < NDADDR) { 984 blkno = VTOI(snapvp)->i_din1->di_db[lbn]; 985 } else { 986 td->td_pflags |= TDP_COWINPROGRESS; 987 error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 988 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 989 td->td_pflags &= ~TDP_COWINPROGRESS; 990 if (error) 991 return (error); 992 indiroff = (lbn - NDADDR) % NINDIR(fs); 993 blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; 994 bqrelse(bp); 995 } 996 if (blkno != 0) { 997 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 998 return (error); 999 } else { 1000 error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 1001 fs->fs_bsize, KERNCRED, 0, &bp); 1002 if (error) 1003 return (error); 1004 if ((error = readblock(snapvp, bp, lbn)) != 0) 1005 return (error); 1006 } 1007 /* 1008 * Set a snapshot inode to be a zero length file, regular files 1009 * or unlinked snapshots to be completely unallocated. 1010 */ 1011 dip = (struct ufs1_dinode *)bp->b_data + 1012 ino_to_fsbo(fs, cancelip->i_number); 1013 if (expungetype == BLK_NOCOPY || cancelip->i_effnlink == 0) 1014 dip->di_mode = 0; 1015 dip->di_size = 0; 1016 dip->di_blocks = 0; 1017 dip->di_flags &= ~SF_SNAPSHOT; 1018 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); 1019 bdwrite(bp); 1020 /* 1021 * Now go through and expunge all the blocks in the file 1022 * using the function requested. 1023 */ 1024 numblks = howmany(cancelip->i_size, fs->fs_bsize); 1025 if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0], 1026 &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype))) 1027 return (error); 1028 if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0], 1029 &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype))) 1030 return (error); 1031 blksperindir = 1; 1032 lbn = -NDADDR; 1033 len = numblks - NDADDR; 1034 rlbn = NDADDR; 1035 for (i = 0; len > 0 && i < NIADDR; i++) { 1036 error = indiracct_ufs1(snapvp, ITOV(cancelip), i, 1037 cancelip->i_din1->di_ib[i], lbn, rlbn, len, 1038 blksperindir, fs, acctfunc, expungetype); 1039 if (error) 1040 return (error); 1041 blksperindir *= NINDIR(fs); 1042 lbn -= blksperindir + 1; 1043 len -= blksperindir; 1044 rlbn += blksperindir; 1045 } 1046 return (0); 1047 } 1048 1049 /* 1050 * Descend an indirect block chain for vnode cancelvp accounting for all 1051 * its indirect blocks in snapvp. 1052 */ 1053 static int 1054 indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 1055 blksperindir, fs, acctfunc, expungetype) 1056 struct vnode *snapvp; 1057 struct vnode *cancelvp; 1058 int level; 1059 ufs1_daddr_t blkno; 1060 ufs_lbn_t lbn; 1061 ufs_lbn_t rlbn; 1062 ufs_lbn_t remblks; 1063 ufs_lbn_t blksperindir; 1064 struct fs *fs; 1065 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 1066 struct fs *, ufs_lbn_t, int); 1067 int expungetype; 1068 { 1069 int error, num, i; 1070 ufs_lbn_t subblksperindir; 1071 struct indir indirs[NIADDR + 2]; 1072 ufs1_daddr_t last, *bap; 1073 struct buf *bp; 1074 1075 if (blkno == 0) { 1076 if (expungetype == BLK_NOCOPY) 1077 return (0); 1078 panic("indiracct_ufs1: missing indir"); 1079 } 1080 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1081 return (error); 1082 if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1083 panic("indiracct_ufs1: botched params"); 1084 /* 1085 * We have to expand bread here since it will deadlock looking 1086 * up the block number for any blocks that are not in the cache. 1087 */ 1088 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 1089 bp->b_blkno = fsbtodb(fs, blkno); 1090 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1091 (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 1092 brelse(bp); 1093 return (error); 1094 } 1095 /* 1096 * Account for the block pointers in this indirect block. 1097 */ 1098 last = howmany(remblks, blksperindir); 1099 if (last > NINDIR(fs)) 1100 last = NINDIR(fs); 1101 MALLOC(bap, ufs1_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); 1102 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1103 bqrelse(bp); 1104 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1105 level == 0 ? rlbn : -1, expungetype); 1106 if (error || level == 0) 1107 goto out; 1108 /* 1109 * Account for the block pointers in each of the indirect blocks 1110 * in the levels below us. 1111 */ 1112 subblksperindir = blksperindir / NINDIR(fs); 1113 for (lbn++, level--, i = 0; i < last; i++) { 1114 error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn, 1115 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1116 if (error) 1117 goto out; 1118 rlbn += blksperindir; 1119 lbn -= blksperindir; 1120 remblks -= blksperindir; 1121 } 1122 out: 1123 FREE(bap, M_DEVBUF); 1124 return (error); 1125 } 1126 1127 /* 1128 * Do both snap accounting and map accounting. 1129 */ 1130 static int 1131 fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1132 struct vnode *vp; 1133 ufs1_daddr_t *oldblkp, *lastblkp; 1134 struct fs *fs; 1135 ufs_lbn_t lblkno; 1136 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 1137 { 1138 int error; 1139 1140 if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1141 return (error); 1142 return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1143 } 1144 1145 /* 1146 * Identify a set of blocks allocated in a snapshot inode. 1147 */ 1148 static int 1149 snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1150 struct vnode *vp; 1151 ufs1_daddr_t *oldblkp, *lastblkp; 1152 struct fs *fs; 1153 ufs_lbn_t lblkno; 1154 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 1155 { 1156 struct inode *ip = VTOI(vp); 1157 ufs1_daddr_t blkno, *blkp; 1158 ufs_lbn_t lbn; 1159 struct buf *ibp; 1160 int error; 1161 1162 for ( ; oldblkp < lastblkp; oldblkp++) { 1163 blkno = *oldblkp; 1164 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 1165 continue; 1166 lbn = fragstoblks(fs, blkno); 1167 if (lbn < NDADDR) { 1168 blkp = &ip->i_din1->di_db[lbn]; 1169 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1170 } else { 1171 error = ffs_balloc_ufs1(vp, lblktosize(fs, (off_t)lbn), 1172 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1173 if (error) 1174 return (error); 1175 blkp = &((ufs1_daddr_t *)(ibp->b_data)) 1176 [(lbn - NDADDR) % NINDIR(fs)]; 1177 } 1178 /* 1179 * If we are expunging a snapshot vnode and we 1180 * find a block marked BLK_NOCOPY, then it is 1181 * one that has been allocated to this snapshot after 1182 * we took our current snapshot and can be ignored. 1183 */ 1184 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 1185 if (lbn >= NDADDR) 1186 brelse(ibp); 1187 } else { 1188 if (*blkp != 0) 1189 panic("snapacct_ufs1: bad block"); 1190 *blkp = expungetype; 1191 if (lbn >= NDADDR) 1192 bdwrite(ibp); 1193 } 1194 } 1195 return (0); 1196 } 1197 1198 /* 1199 * Account for a set of blocks allocated in a snapshot inode. 1200 */ 1201 static int 1202 mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1203 struct vnode *vp; 1204 ufs1_daddr_t *oldblkp, *lastblkp; 1205 struct fs *fs; 1206 ufs_lbn_t lblkno; 1207 int expungetype; 1208 { 1209 ufs1_daddr_t blkno; 1210 struct inode *ip; 1211 ino_t inum; 1212 int acctit; 1213 1214 ip = VTOI(vp); 1215 inum = ip->i_number; 1216 if (lblkno == -1) 1217 acctit = 0; 1218 else 1219 acctit = 1; 1220 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1221 blkno = *oldblkp; 1222 if (blkno == 0 || blkno == BLK_NOCOPY) 1223 continue; 1224 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1225 *ip->i_snapblklist++ = lblkno; 1226 if (blkno == BLK_SNAP) 1227 blkno = blkstofrags(fs, lblkno); 1228 ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum); 1229 } 1230 return (0); 1231 } 1232 1233 /* 1234 * Before expunging a snapshot inode, note all the 1235 * blocks that it claims with BLK_SNAP so that fsck will 1236 * be able to account for those blocks properly and so 1237 * that this snapshot knows that it need not copy them 1238 * if the other snapshot holding them is freed. This code 1239 * is reproduced once each for UFS1 and UFS2. 1240 */ 1241 static int 1242 expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) 1243 struct vnode *snapvp; 1244 struct inode *cancelip; 1245 struct fs *fs; 1246 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 1247 struct fs *, ufs_lbn_t, int); 1248 int expungetype; 1249 { 1250 int i, error, indiroff; 1251 ufs_lbn_t lbn, rlbn; 1252 ufs2_daddr_t len, blkno, numblks, blksperindir; 1253 struct ufs2_dinode *dip; 1254 struct thread *td = curthread; 1255 struct buf *bp; 1256 1257 /* 1258 * Prepare to expunge the inode. If its inode block has not 1259 * yet been copied, then allocate and fill the copy. 1260 */ 1261 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 1262 blkno = 0; 1263 if (lbn < NDADDR) { 1264 blkno = VTOI(snapvp)->i_din2->di_db[lbn]; 1265 } else { 1266 td->td_pflags |= TDP_COWINPROGRESS; 1267 error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 1268 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 1269 td->td_pflags &= ~TDP_COWINPROGRESS; 1270 if (error) 1271 return (error); 1272 indiroff = (lbn - NDADDR) % NINDIR(fs); 1273 blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; 1274 bqrelse(bp); 1275 } 1276 if (blkno != 0) { 1277 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1278 return (error); 1279 } else { 1280 error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 1281 fs->fs_bsize, KERNCRED, 0, &bp); 1282 if (error) 1283 return (error); 1284 if ((error = readblock(snapvp, bp, lbn)) != 0) 1285 return (error); 1286 } 1287 /* 1288 * Set a snapshot inode to be a zero length file, regular files 1289 * to be completely unallocated. 1290 */ 1291 dip = (struct ufs2_dinode *)bp->b_data + 1292 ino_to_fsbo(fs, cancelip->i_number); 1293 if (expungetype == BLK_NOCOPY) 1294 dip->di_mode = 0; 1295 dip->di_size = 0; 1296 dip->di_blocks = 0; 1297 dip->di_flags &= ~SF_SNAPSHOT; 1298 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); 1299 bdwrite(bp); 1300 /* 1301 * Now go through and expunge all the blocks in the file 1302 * using the function requested. 1303 */ 1304 numblks = howmany(cancelip->i_size, fs->fs_bsize); 1305 if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0], 1306 &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype))) 1307 return (error); 1308 if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0], 1309 &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype))) 1310 return (error); 1311 blksperindir = 1; 1312 lbn = -NDADDR; 1313 len = numblks - NDADDR; 1314 rlbn = NDADDR; 1315 for (i = 0; len > 0 && i < NIADDR; i++) { 1316 error = indiracct_ufs2(snapvp, ITOV(cancelip), i, 1317 cancelip->i_din2->di_ib[i], lbn, rlbn, len, 1318 blksperindir, fs, acctfunc, expungetype); 1319 if (error) 1320 return (error); 1321 blksperindir *= NINDIR(fs); 1322 lbn -= blksperindir + 1; 1323 len -= blksperindir; 1324 rlbn += blksperindir; 1325 } 1326 return (0); 1327 } 1328 1329 /* 1330 * Descend an indirect block chain for vnode cancelvp accounting for all 1331 * its indirect blocks in snapvp. 1332 */ 1333 static int 1334 indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 1335 blksperindir, fs, acctfunc, expungetype) 1336 struct vnode *snapvp; 1337 struct vnode *cancelvp; 1338 int level; 1339 ufs2_daddr_t blkno; 1340 ufs_lbn_t lbn; 1341 ufs_lbn_t rlbn; 1342 ufs_lbn_t remblks; 1343 ufs_lbn_t blksperindir; 1344 struct fs *fs; 1345 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 1346 struct fs *, ufs_lbn_t, int); 1347 int expungetype; 1348 { 1349 int error, num, i; 1350 ufs_lbn_t subblksperindir; 1351 struct indir indirs[NIADDR + 2]; 1352 ufs2_daddr_t last, *bap; 1353 struct buf *bp; 1354 1355 if (blkno == 0) { 1356 if (expungetype == BLK_NOCOPY) 1357 return (0); 1358 panic("indiracct_ufs2: missing indir"); 1359 } 1360 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1361 return (error); 1362 if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1363 panic("indiracct_ufs2: botched params"); 1364 /* 1365 * We have to expand bread here since it will deadlock looking 1366 * up the block number for any blocks that are not in the cache. 1367 */ 1368 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 1369 bp->b_blkno = fsbtodb(fs, blkno); 1370 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1371 (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 1372 brelse(bp); 1373 return (error); 1374 } 1375 /* 1376 * Account for the block pointers in this indirect block. 1377 */ 1378 last = howmany(remblks, blksperindir); 1379 if (last > NINDIR(fs)) 1380 last = NINDIR(fs); 1381 MALLOC(bap, ufs2_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); 1382 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1383 bqrelse(bp); 1384 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1385 level == 0 ? rlbn : -1, expungetype); 1386 if (error || level == 0) 1387 goto out; 1388 /* 1389 * Account for the block pointers in each of the indirect blocks 1390 * in the levels below us. 1391 */ 1392 subblksperindir = blksperindir / NINDIR(fs); 1393 for (lbn++, level--, i = 0; i < last; i++) { 1394 error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn, 1395 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1396 if (error) 1397 goto out; 1398 rlbn += blksperindir; 1399 lbn -= blksperindir; 1400 remblks -= blksperindir; 1401 } 1402 out: 1403 FREE(bap, M_DEVBUF); 1404 return (error); 1405 } 1406 1407 /* 1408 * Do both snap accounting and map accounting. 1409 */ 1410 static int 1411 fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1412 struct vnode *vp; 1413 ufs2_daddr_t *oldblkp, *lastblkp; 1414 struct fs *fs; 1415 ufs_lbn_t lblkno; 1416 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 1417 { 1418 int error; 1419 1420 if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1421 return (error); 1422 return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1423 } 1424 1425 /* 1426 * Identify a set of blocks allocated in a snapshot inode. 1427 */ 1428 static int 1429 snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1430 struct vnode *vp; 1431 ufs2_daddr_t *oldblkp, *lastblkp; 1432 struct fs *fs; 1433 ufs_lbn_t lblkno; 1434 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 1435 { 1436 struct inode *ip = VTOI(vp); 1437 ufs2_daddr_t blkno, *blkp; 1438 ufs_lbn_t lbn; 1439 struct buf *ibp; 1440 int error; 1441 1442 for ( ; oldblkp < lastblkp; oldblkp++) { 1443 blkno = *oldblkp; 1444 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 1445 continue; 1446 lbn = fragstoblks(fs, blkno); 1447 if (lbn < NDADDR) { 1448 blkp = &ip->i_din2->di_db[lbn]; 1449 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1450 } else { 1451 error = ffs_balloc_ufs2(vp, lblktosize(fs, (off_t)lbn), 1452 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1453 if (error) 1454 return (error); 1455 blkp = &((ufs2_daddr_t *)(ibp->b_data)) 1456 [(lbn - NDADDR) % NINDIR(fs)]; 1457 } 1458 /* 1459 * If we are expunging a snapshot vnode and we 1460 * find a block marked BLK_NOCOPY, then it is 1461 * one that has been allocated to this snapshot after 1462 * we took our current snapshot and can be ignored. 1463 */ 1464 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 1465 if (lbn >= NDADDR) 1466 brelse(ibp); 1467 } else { 1468 if (*blkp != 0) 1469 panic("snapacct_ufs2: bad block"); 1470 *blkp = expungetype; 1471 if (lbn >= NDADDR) 1472 bdwrite(ibp); 1473 } 1474 } 1475 return (0); 1476 } 1477 1478 /* 1479 * Account for a set of blocks allocated in a snapshot inode. 1480 */ 1481 static int 1482 mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1483 struct vnode *vp; 1484 ufs2_daddr_t *oldblkp, *lastblkp; 1485 struct fs *fs; 1486 ufs_lbn_t lblkno; 1487 int expungetype; 1488 { 1489 ufs2_daddr_t blkno; 1490 struct inode *ip; 1491 ino_t inum; 1492 int acctit; 1493 1494 ip = VTOI(vp); 1495 inum = ip->i_number; 1496 if (lblkno == -1) 1497 acctit = 0; 1498 else 1499 acctit = 1; 1500 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1501 blkno = *oldblkp; 1502 if (blkno == 0 || blkno == BLK_NOCOPY) 1503 continue; 1504 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1505 *ip->i_snapblklist++ = lblkno; 1506 if (blkno == BLK_SNAP) 1507 blkno = blkstofrags(fs, lblkno); 1508 ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum); 1509 } 1510 return (0); 1511 } 1512 1513 /* 1514 * Decrement extra reference on snapshot when last name is removed. 1515 * It will not be freed until the last open reference goes away. 1516 */ 1517 void 1518 ffs_snapgone(ip) 1519 struct inode *ip; 1520 { 1521 struct inode *xp; 1522 struct fs *fs; 1523 int snaploc; 1524 struct snapdata *sn; 1525 struct ufsmount *ump; 1526 1527 /* 1528 * Find snapshot in incore list. 1529 */ 1530 xp = NULL; 1531 sn = ip->i_devvp->v_rdev->si_snapdata; 1532 if (sn != NULL) 1533 TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) 1534 if (xp == ip) 1535 break; 1536 if (xp != NULL) 1537 vrele(ITOV(ip)); 1538 else if (snapdebug) 1539 printf("ffs_snapgone: lost snapshot vnode %d\n", 1540 ip->i_number); 1541 /* 1542 * Delete snapshot inode from superblock. Keep list dense. 1543 */ 1544 fs = ip->i_fs; 1545 ump = ip->i_ump; 1546 UFS_LOCK(ump); 1547 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 1548 if (fs->fs_snapinum[snaploc] == ip->i_number) 1549 break; 1550 if (snaploc < FSMAXSNAP) { 1551 for (snaploc++; snaploc < FSMAXSNAP; snaploc++) { 1552 if (fs->fs_snapinum[snaploc] == 0) 1553 break; 1554 fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc]; 1555 } 1556 fs->fs_snapinum[snaploc - 1] = 0; 1557 } 1558 UFS_UNLOCK(ump); 1559 } 1560 1561 /* 1562 * Prepare a snapshot file for being removed. 1563 */ 1564 void 1565 ffs_snapremove(vp) 1566 struct vnode *vp; 1567 { 1568 struct inode *ip; 1569 struct vnode *devvp; 1570 struct buf *ibp; 1571 struct fs *fs; 1572 struct thread *td = curthread; 1573 ufs2_daddr_t numblks, blkno, dblk; 1574 int error, loc, last; 1575 struct snapdata *sn; 1576 1577 ip = VTOI(vp); 1578 fs = ip->i_fs; 1579 devvp = ip->i_devvp; 1580 /* 1581 * If active, delete from incore list (this snapshot may 1582 * already have been in the process of being deleted, so 1583 * would not have been active). 1584 * 1585 * Clear copy-on-write flag if last snapshot. 1586 */ 1587 VI_LOCK(devvp); 1588 if (ip->i_nextsnap.tqe_prev != 0) { 1589 sn = devvp->v_rdev->si_snapdata; 1590 TAILQ_REMOVE(&sn->sn_head, ip, i_nextsnap); 1591 ip->i_nextsnap.tqe_prev = 0; 1592 VI_UNLOCK(devvp); 1593 lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL, td); 1594 VI_LOCK(vp); 1595 KASSERT(vp->v_vnlock == &sn->sn_lock, 1596 ("ffs_snapremove: lost lock mutation")); 1597 vp->v_vnlock = &vp->v_lock; 1598 VI_UNLOCK(vp); 1599 VI_LOCK(devvp); 1600 lockmgr(&sn->sn_lock, LK_RELEASE, NULL, td); 1601 try_free_snapdata(devvp, td); 1602 } else 1603 VI_UNLOCK(devvp); 1604 /* 1605 * Clear all BLK_NOCOPY fields. Pass any block claims to other 1606 * snapshots that want them (see ffs_snapblkfree below). 1607 */ 1608 for (blkno = 1; blkno < NDADDR; blkno++) { 1609 dblk = DIP(ip, i_db[blkno]); 1610 if (dblk == 0) 1611 continue; 1612 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1613 DIP_SET(ip, i_db[blkno], 0); 1614 else if ((dblk == blkstofrags(fs, blkno) && 1615 ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, 1616 ip->i_number))) { 1617 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - 1618 btodb(fs->fs_bsize)); 1619 DIP_SET(ip, i_db[blkno], 0); 1620 } 1621 } 1622 numblks = howmany(ip->i_size, fs->fs_bsize); 1623 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 1624 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 1625 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1626 if (error) 1627 continue; 1628 if (fs->fs_size - blkno > NINDIR(fs)) 1629 last = NINDIR(fs); 1630 else 1631 last = fs->fs_size - blkno; 1632 for (loc = 0; loc < last; loc++) { 1633 if (ip->i_ump->um_fstype == UFS1) { 1634 dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc]; 1635 if (dblk == 0) 1636 continue; 1637 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1638 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1639 else if ((dblk == blkstofrags(fs, blkno) && 1640 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1641 fs->fs_bsize, ip->i_number))) { 1642 ip->i_din1->di_blocks -= 1643 btodb(fs->fs_bsize); 1644 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1645 } 1646 continue; 1647 } 1648 dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc]; 1649 if (dblk == 0) 1650 continue; 1651 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1652 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1653 else if ((dblk == blkstofrags(fs, blkno) && 1654 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1655 fs->fs_bsize, ip->i_number))) { 1656 ip->i_din2->di_blocks -= btodb(fs->fs_bsize); 1657 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1658 } 1659 } 1660 bawrite(ibp); 1661 } 1662 /* 1663 * Clear snapshot flag and drop reference. 1664 */ 1665 ip->i_flags &= ~SF_SNAPSHOT; 1666 DIP_SET(ip, i_flags, ip->i_flags); 1667 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1668 #ifdef QUOTA 1669 /* 1670 * Reenable disk quotas for ex-snapshot file. 1671 */ 1672 if (!getinoquota(ip)) 1673 (void) chkdq(ip, DIP(ip, i_blocks), KERNCRED, FORCE); 1674 #endif 1675 } 1676 1677 /* 1678 * Notification that a block is being freed. Return zero if the free 1679 * should be allowed to proceed. Return non-zero if the snapshot file 1680 * wants to claim the block. The block will be claimed if it is an 1681 * uncopied part of one of the snapshots. It will be freed if it is 1682 * either a BLK_NOCOPY or has already been copied in all of the snapshots. 1683 * If a fragment is being freed, then all snapshots that care about 1684 * it must make a copy since a snapshot file can only claim full sized 1685 * blocks. Note that if more than one snapshot file maps the block, 1686 * we can pick one at random to claim it. Since none of the snapshots 1687 * can change, we are assurred that they will all see the same unmodified 1688 * image. When deleting a snapshot file (see ffs_snapremove above), we 1689 * must push any of these claimed blocks to one of the other snapshots 1690 * that maps it. These claimed blocks are easily identified as they will 1691 * have a block number equal to their logical block number within the 1692 * snapshot. A copied block can never have this property because they 1693 * must always have been allocated from a BLK_NOCOPY location. 1694 */ 1695 int 1696 ffs_snapblkfree(fs, devvp, bno, size, inum) 1697 struct fs *fs; 1698 struct vnode *devvp; 1699 ufs2_daddr_t bno; 1700 long size; 1701 ino_t inum; 1702 { 1703 struct buf *ibp, *cbp, *savedcbp = 0; 1704 struct thread *td = curthread; 1705 struct inode *ip; 1706 struct vnode *vp = NULL; 1707 ufs_lbn_t lbn; 1708 ufs2_daddr_t blkno; 1709 int indiroff = 0, error = 0, claimedblk = 0; 1710 struct snapdata *sn; 1711 1712 lbn = fragstoblks(fs, bno); 1713 retry: 1714 VI_LOCK(devvp); 1715 sn = devvp->v_rdev->si_snapdata; 1716 if (sn == NULL) { 1717 VI_UNLOCK(devvp); 1718 return (0); 1719 } 1720 if (lockmgr(&sn->sn_lock, 1721 LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 1722 VI_MTX(devvp), td) != 0) 1723 goto retry; 1724 TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 1725 vp = ITOV(ip); 1726 /* 1727 * Lookup block being written. 1728 */ 1729 if (lbn < NDADDR) { 1730 blkno = DIP(ip, i_db[lbn]); 1731 } else { 1732 td->td_pflags |= TDP_COWINPROGRESS; 1733 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1734 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1735 td->td_pflags &= ~TDP_COWINPROGRESS; 1736 if (error) 1737 break; 1738 indiroff = (lbn - NDADDR) % NINDIR(fs); 1739 if (ip->i_ump->um_fstype == UFS1) 1740 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 1741 else 1742 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 1743 } 1744 /* 1745 * Check to see if block needs to be copied. 1746 */ 1747 if (blkno == 0) { 1748 /* 1749 * A block that we map is being freed. If it has not 1750 * been claimed yet, we will claim or copy it (below). 1751 */ 1752 claimedblk = 1; 1753 } else if (blkno == BLK_SNAP) { 1754 /* 1755 * No previous snapshot claimed the block, 1756 * so it will be freed and become a BLK_NOCOPY 1757 * (don't care) for us. 1758 */ 1759 if (claimedblk) 1760 panic("snapblkfree: inconsistent block type"); 1761 if (lbn < NDADDR) { 1762 DIP_SET(ip, i_db[lbn], BLK_NOCOPY); 1763 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1764 } else if (ip->i_ump->um_fstype == UFS1) { 1765 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 1766 BLK_NOCOPY; 1767 bdwrite(ibp); 1768 } else { 1769 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 1770 BLK_NOCOPY; 1771 bdwrite(ibp); 1772 } 1773 continue; 1774 } else /* BLK_NOCOPY or default */ { 1775 /* 1776 * If the snapshot has already copied the block 1777 * (default), or does not care about the block, 1778 * it is not needed. 1779 */ 1780 if (lbn >= NDADDR) 1781 bqrelse(ibp); 1782 continue; 1783 } 1784 /* 1785 * If this is a full size block, we will just grab it 1786 * and assign it to the snapshot inode. Otherwise we 1787 * will proceed to copy it. See explanation for this 1788 * routine as to why only a single snapshot needs to 1789 * claim this block. 1790 */ 1791 if (size == fs->fs_bsize) { 1792 #ifdef DEBUG 1793 if (snapdebug) 1794 printf("%s %d lbn %jd from inum %d\n", 1795 "Grabonremove: snapino", ip->i_number, 1796 (intmax_t)lbn, inum); 1797 #endif 1798 if (lbn < NDADDR) { 1799 DIP_SET(ip, i_db[lbn], bno); 1800 } else if (ip->i_ump->um_fstype == UFS1) { 1801 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno; 1802 bdwrite(ibp); 1803 } else { 1804 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno; 1805 bdwrite(ibp); 1806 } 1807 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size)); 1808 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1809 lockmgr(vp->v_vnlock, LK_RELEASE, NULL, td); 1810 return (1); 1811 } 1812 if (lbn >= NDADDR) 1813 bqrelse(ibp); 1814 /* 1815 * Allocate the block into which to do the copy. Note that this 1816 * allocation will never require any additional allocations for 1817 * the snapshot inode. 1818 */ 1819 td->td_pflags |= TDP_COWINPROGRESS; 1820 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1821 fs->fs_bsize, KERNCRED, 0, &cbp); 1822 td->td_pflags &= ~TDP_COWINPROGRESS; 1823 if (error) 1824 break; 1825 #ifdef DEBUG 1826 if (snapdebug) 1827 printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n", 1828 "Copyonremove: snapino ", ip->i_number, 1829 (intmax_t)lbn, "for inum", inum, size, 1830 (intmax_t)cbp->b_blkno); 1831 #endif 1832 /* 1833 * If we have already read the old block contents, then 1834 * simply copy them to the new block. Note that we need 1835 * to synchronously write snapshots that have not been 1836 * unlinked, and hence will be visible after a crash, 1837 * to ensure their integrity. 1838 */ 1839 if (savedcbp != 0) { 1840 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 1841 bawrite(cbp); 1842 if (dopersistence && ip->i_effnlink > 0) 1843 (void) ffs_syncvnode(vp, MNT_WAIT); 1844 continue; 1845 } 1846 /* 1847 * Otherwise, read the old block contents into the buffer. 1848 */ 1849 if ((error = readblock(vp, cbp, lbn)) != 0) { 1850 bzero(cbp->b_data, fs->fs_bsize); 1851 bawrite(cbp); 1852 if (dopersistence && ip->i_effnlink > 0) 1853 (void) ffs_syncvnode(vp, MNT_WAIT); 1854 break; 1855 } 1856 savedcbp = cbp; 1857 } 1858 /* 1859 * Note that we need to synchronously write snapshots that 1860 * have not been unlinked, and hence will be visible after 1861 * a crash, to ensure their integrity. 1862 */ 1863 if (savedcbp) { 1864 vp = savedcbp->b_vp; 1865 bawrite(savedcbp); 1866 if (dopersistence && VTOI(vp)->i_effnlink > 0) 1867 (void) ffs_syncvnode(vp, MNT_WAIT); 1868 } 1869 /* 1870 * If we have been unable to allocate a block in which to do 1871 * the copy, then return non-zero so that the fragment will 1872 * not be freed. Although space will be lost, the snapshot 1873 * will stay consistent. 1874 */ 1875 lockmgr(vp->v_vnlock, LK_RELEASE, NULL, td); 1876 return (error); 1877 } 1878 1879 /* 1880 * Associate snapshot files when mounting. 1881 */ 1882 void 1883 ffs_snapshot_mount(mp) 1884 struct mount *mp; 1885 { 1886 struct ufsmount *ump = VFSTOUFS(mp); 1887 struct vnode *devvp = ump->um_devvp; 1888 struct fs *fs = ump->um_fs; 1889 struct thread *td = curthread; 1890 struct snapdata *sn; 1891 struct vnode *vp; 1892 struct vnode *lastvp; 1893 struct inode *ip; 1894 struct uio auio; 1895 struct iovec aiov; 1896 void *snapblklist; 1897 char *reason; 1898 daddr_t snaplistsize; 1899 int error, snaploc, loc; 1900 1901 /* 1902 * XXX The following needs to be set before ffs_truncate or 1903 * VOP_READ can be called. 1904 */ 1905 mp->mnt_stat.f_iosize = fs->fs_bsize; 1906 /* 1907 * Process each snapshot listed in the superblock. 1908 */ 1909 vp = NULL; 1910 lastvp = NULL; 1911 sn = devvp->v_rdev->si_snapdata; 1912 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { 1913 if (fs->fs_snapinum[snaploc] == 0) 1914 break; 1915 if ((error = ffs_vget(mp, fs->fs_snapinum[snaploc], 1916 LK_EXCLUSIVE, &vp)) != 0){ 1917 printf("ffs_snapshot_mount: vget failed %d\n", error); 1918 continue; 1919 } 1920 ip = VTOI(vp); 1921 if ((ip->i_flags & SF_SNAPSHOT) == 0 || ip->i_size == 1922 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) { 1923 if ((ip->i_flags & SF_SNAPSHOT) == 0) { 1924 reason = "non-snapshot"; 1925 } else { 1926 reason = "old format snapshot"; 1927 (void)ffs_truncate(vp, (off_t)0, 0, NOCRED, td); 1928 (void)ffs_syncvnode(vp, MNT_WAIT); 1929 } 1930 printf("ffs_snapshot_mount: %s inode %d\n", 1931 reason, fs->fs_snapinum[snaploc]); 1932 vput(vp); 1933 vp = NULL; 1934 for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { 1935 if (fs->fs_snapinum[loc] == 0) 1936 break; 1937 fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc]; 1938 } 1939 fs->fs_snapinum[loc - 1] = 0; 1940 snaploc--; 1941 continue; 1942 } 1943 /* 1944 * If there already exist snapshots on this filesystem, grab a 1945 * reference to their shared lock. If this is the first snapshot 1946 * on this filesystem, we need to allocate a lock for the 1947 * snapshots to share. In either case, acquire the snapshot 1948 * lock and give up our original private lock. 1949 */ 1950 VI_LOCK(devvp); 1951 if (sn != NULL) { 1952 1953 VI_UNLOCK(devvp); 1954 VI_LOCK(vp); 1955 vp->v_vnlock = &sn->sn_lock; 1956 } else { 1957 VI_UNLOCK(devvp); 1958 sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO); 1959 TAILQ_INIT(&sn->sn_head); 1960 lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT, 1961 LK_CANRECURSE | LK_NOSHARE); 1962 VI_LOCK(vp); 1963 vp->v_vnlock = &sn->sn_lock; 1964 devvp->v_rdev->si_snapdata = sn; 1965 } 1966 lockmgr(vp->v_vnlock, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, 1967 VI_MTX(vp), td); 1968 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 1969 /* 1970 * Link it onto the active snapshot list. 1971 */ 1972 VI_LOCK(devvp); 1973 if (ip->i_nextsnap.tqe_prev != 0) 1974 panic("ffs_snapshot_mount: %d already on list", 1975 ip->i_number); 1976 else 1977 TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 1978 vp->v_vflag |= VV_SYSTEM; 1979 VI_UNLOCK(devvp); 1980 VOP_UNLOCK(vp, 0, td); 1981 lastvp = vp; 1982 } 1983 vp = lastvp; 1984 /* 1985 * No usable snapshots found. 1986 */ 1987 if (vp == NULL) 1988 return; 1989 /* 1990 * Allocate the space for the block hints list. We always want to 1991 * use the list from the newest snapshot. 1992 */ 1993 auio.uio_iov = &aiov; 1994 auio.uio_iovcnt = 1; 1995 aiov.iov_base = (void *)&snaplistsize; 1996 aiov.iov_len = sizeof(snaplistsize); 1997 auio.uio_resid = aiov.iov_len; 1998 auio.uio_offset = 1999 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)); 2000 auio.uio_segflg = UIO_SYSSPACE; 2001 auio.uio_rw = UIO_READ; 2002 auio.uio_td = td; 2003 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 2004 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 2005 printf("ffs_snapshot_mount: read_1 failed %d\n", error); 2006 VOP_UNLOCK(vp, 0, td); 2007 return; 2008 } 2009 MALLOC(snapblklist, void *, snaplistsize * sizeof(daddr_t), 2010 M_UFSMNT, M_WAITOK); 2011 auio.uio_iovcnt = 1; 2012 aiov.iov_base = snapblklist; 2013 aiov.iov_len = snaplistsize * sizeof (daddr_t); 2014 auio.uio_resid = aiov.iov_len; 2015 auio.uio_offset -= sizeof(snaplistsize); 2016 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 2017 printf("ffs_snapshot_mount: read_2 failed %d\n", error); 2018 VOP_UNLOCK(vp, 0, td); 2019 FREE(snapblklist, M_UFSMNT); 2020 return; 2021 } 2022 VOP_UNLOCK(vp, 0, td); 2023 VI_LOCK(devvp); 2024 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount"); 2025 sn->sn_listsize = snaplistsize; 2026 sn->sn_blklist = (daddr_t *)snapblklist; 2027 devvp->v_vflag |= VV_COPYONWRITE; 2028 VI_UNLOCK(devvp); 2029 } 2030 2031 /* 2032 * Disassociate snapshot files when unmounting. 2033 */ 2034 void 2035 ffs_snapshot_unmount(mp) 2036 struct mount *mp; 2037 { 2038 struct vnode *devvp = VFSTOUFS(mp)->um_devvp; 2039 struct snapdata *sn; 2040 struct inode *xp; 2041 struct vnode *vp; 2042 struct thread *td = curthread; 2043 2044 VI_LOCK(devvp); 2045 sn = devvp->v_rdev->si_snapdata; 2046 while (sn != NULL && (xp = TAILQ_FIRST(&sn->sn_head)) != NULL) { 2047 vp = ITOV(xp); 2048 TAILQ_REMOVE(&sn->sn_head, xp, i_nextsnap); 2049 xp->i_nextsnap.tqe_prev = 0; 2050 lockmgr(&sn->sn_lock, 2051 LK_INTERLOCK | LK_EXCLUSIVE, 2052 VI_MTX(devvp), 2053 td); 2054 VI_LOCK(vp); 2055 lockmgr(&vp->v_lock, 2056 LK_INTERLOCK | LK_EXCLUSIVE, 2057 VI_MTX(vp), td); 2058 VI_LOCK(vp); 2059 KASSERT(vp->v_vnlock == &sn->sn_lock, 2060 ("ffs_snapshot_unmount: lost lock mutation")); 2061 vp->v_vnlock = &vp->v_lock; 2062 VI_UNLOCK(vp); 2063 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 2064 lockmgr(&sn->sn_lock, LK_RELEASE, NULL, td); 2065 if (xp->i_effnlink > 0) 2066 vrele(vp); 2067 VI_LOCK(devvp); 2068 sn = devvp->v_rdev->si_snapdata; 2069 } 2070 try_free_snapdata(devvp, td); 2071 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount"); 2072 } 2073 2074 /* 2075 * Check for need to copy block that is about to be written, 2076 * copying the block if necessary. 2077 */ 2078 int 2079 ffs_copyonwrite(devvp, bp) 2080 struct vnode *devvp; 2081 struct buf *bp; 2082 { 2083 struct snapdata *sn; 2084 struct buf *ibp, *cbp, *savedcbp = 0; 2085 struct thread *td = curthread; 2086 struct fs *fs; 2087 struct inode *ip; 2088 struct vnode *vp = 0; 2089 ufs2_daddr_t lbn, blkno, *snapblklist; 2090 int lower, upper, mid, indiroff, error = 0; 2091 int launched_async_io, prev_norunningbuf; 2092 long saved_runningbufspace; 2093 2094 if ((VTOI(bp->b_vp)->i_flags & SF_SNAPSHOT) != 0) 2095 return (0); /* Update on a snapshot file */ 2096 if (td->td_pflags & TDP_COWINPROGRESS) 2097 panic("ffs_copyonwrite: recursive call"); 2098 /* 2099 * First check to see if it is in the preallocated list. 2100 * By doing this check we avoid several potential deadlocks. 2101 */ 2102 VI_LOCK(devvp); 2103 sn = devvp->v_rdev->si_snapdata; 2104 if (sn == NULL || 2105 TAILQ_FIRST(&sn->sn_head) == NULL) { 2106 VI_UNLOCK(devvp); 2107 return (0); /* No snapshot */ 2108 } 2109 ip = TAILQ_FIRST(&sn->sn_head); 2110 fs = ip->i_fs; 2111 lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 2112 snapblklist = sn->sn_blklist; 2113 upper = sn->sn_listsize - 1; 2114 lower = 1; 2115 while (lower <= upper) { 2116 mid = (lower + upper) / 2; 2117 if (snapblklist[mid] == lbn) 2118 break; 2119 if (snapblklist[mid] < lbn) 2120 lower = mid + 1; 2121 else 2122 upper = mid - 1; 2123 } 2124 if (lower <= upper) { 2125 VI_UNLOCK(devvp); 2126 return (0); 2127 } 2128 launched_async_io = 0; 2129 prev_norunningbuf = td->td_pflags & TDP_NORUNNINGBUF; 2130 /* 2131 * Since I/O on bp isn't yet in progress and it may be blocked 2132 * for a long time waiting on snaplk, back it out of 2133 * runningbufspace, possibly waking other threads waiting for space. 2134 */ 2135 saved_runningbufspace = bp->b_runningbufspace; 2136 if (saved_runningbufspace != 0) 2137 runningbufwakeup(bp); 2138 /* 2139 * Not in the precomputed list, so check the snapshots. 2140 */ 2141 while (lockmgr(&sn->sn_lock, 2142 LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 2143 VI_MTX(devvp), td) != 0) { 2144 VI_LOCK(devvp); 2145 sn = devvp->v_rdev->si_snapdata; 2146 if (sn == NULL || 2147 TAILQ_FIRST(&sn->sn_head) == NULL) { 2148 VI_UNLOCK(devvp); 2149 if (saved_runningbufspace != 0) { 2150 bp->b_runningbufspace = saved_runningbufspace; 2151 atomic_add_int(&runningbufspace, 2152 bp->b_runningbufspace); 2153 } 2154 return (0); /* Snapshot gone */ 2155 } 2156 } 2157 TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 2158 vp = ITOV(ip); 2159 /* 2160 * We ensure that everything of our own that needs to be 2161 * copied will be done at the time that ffs_snapshot is 2162 * called. Thus we can skip the check here which can 2163 * deadlock in doing the lookup in UFS_BALLOC. 2164 */ 2165 if (bp->b_vp == vp) 2166 continue; 2167 /* 2168 * Check to see if block needs to be copied. We do not have 2169 * to hold the snapshot lock while doing this lookup as it 2170 * will never require any additional allocations for the 2171 * snapshot inode. 2172 */ 2173 if (lbn < NDADDR) { 2174 blkno = DIP(ip, i_db[lbn]); 2175 } else { 2176 td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 2177 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2178 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 2179 td->td_pflags &= ~TDP_COWINPROGRESS; 2180 if (error) 2181 break; 2182 indiroff = (lbn - NDADDR) % NINDIR(fs); 2183 if (ip->i_ump->um_fstype == UFS1) 2184 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 2185 else 2186 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 2187 bqrelse(ibp); 2188 } 2189 #ifdef DIAGNOSTIC 2190 if (blkno == BLK_SNAP && bp->b_lblkno >= 0) 2191 panic("ffs_copyonwrite: bad copy block"); 2192 #endif 2193 if (blkno != 0) 2194 continue; 2195 /* 2196 * Allocate the block into which to do the copy. Since 2197 * multiple processes may all try to copy the same block, 2198 * we have to recheck our need to do a copy if we sleep 2199 * waiting for the lock. 2200 * 2201 * Because all snapshots on a filesystem share a single 2202 * lock, we ensure that we will never be in competition 2203 * with another process to allocate a block. 2204 */ 2205 td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 2206 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2207 fs->fs_bsize, KERNCRED, 0, &cbp); 2208 td->td_pflags &= ~TDP_COWINPROGRESS; 2209 if (error) 2210 break; 2211 #ifdef DEBUG 2212 if (snapdebug) { 2213 printf("Copyonwrite: snapino %d lbn %jd for ", 2214 ip->i_number, (intmax_t)lbn); 2215 if (bp->b_vp == devvp) 2216 printf("fs metadata"); 2217 else 2218 printf("inum %d", VTOI(bp->b_vp)->i_number); 2219 printf(" lblkno %jd to blkno %jd\n", 2220 (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno); 2221 } 2222 #endif 2223 /* 2224 * If we have already read the old block contents, then 2225 * simply copy them to the new block. Note that we need 2226 * to synchronously write snapshots that have not been 2227 * unlinked, and hence will be visible after a crash, 2228 * to ensure their integrity. 2229 */ 2230 if (savedcbp != 0) { 2231 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 2232 bawrite(cbp); 2233 if (dopersistence && ip->i_effnlink > 0) 2234 (void) ffs_syncvnode(vp, MNT_WAIT); 2235 else 2236 launched_async_io = 1; 2237 continue; 2238 } 2239 /* 2240 * Otherwise, read the old block contents into the buffer. 2241 */ 2242 if ((error = readblock(vp, cbp, lbn)) != 0) { 2243 bzero(cbp->b_data, fs->fs_bsize); 2244 bawrite(cbp); 2245 if (dopersistence && ip->i_effnlink > 0) 2246 (void) ffs_syncvnode(vp, MNT_WAIT); 2247 else 2248 launched_async_io = 1; 2249 break; 2250 } 2251 savedcbp = cbp; 2252 } 2253 /* 2254 * Note that we need to synchronously write snapshots that 2255 * have not been unlinked, and hence will be visible after 2256 * a crash, to ensure their integrity. 2257 */ 2258 if (savedcbp) { 2259 vp = savedcbp->b_vp; 2260 bawrite(savedcbp); 2261 if (dopersistence && VTOI(vp)->i_effnlink > 0) 2262 (void) ffs_syncvnode(vp, MNT_WAIT); 2263 else 2264 launched_async_io = 1; 2265 } 2266 lockmgr(vp->v_vnlock, LK_RELEASE, NULL, td); 2267 td->td_pflags = (td->td_pflags & ~TDP_NORUNNINGBUF) | 2268 prev_norunningbuf; 2269 if (launched_async_io && (td->td_pflags & TDP_NORUNNINGBUF) == 0) 2270 waitrunningbufspace(); 2271 /* 2272 * I/O on bp will now be started, so count it in runningbufspace. 2273 */ 2274 if (saved_runningbufspace != 0) { 2275 bp->b_runningbufspace = saved_runningbufspace; 2276 atomic_add_int(&runningbufspace, bp->b_runningbufspace); 2277 } 2278 return (error); 2279 } 2280 2281 /* 2282 * Read the specified block into the given buffer. 2283 * Much of this boiler-plate comes from bwrite(). 2284 */ 2285 static int 2286 readblock(vp, bp, lbn) 2287 struct vnode *vp; 2288 struct buf *bp; 2289 ufs2_daddr_t lbn; 2290 { 2291 struct inode *ip = VTOI(vp); 2292 struct bio *bip; 2293 2294 bip = g_alloc_bio(); 2295 bip->bio_cmd = BIO_READ; 2296 bip->bio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn))); 2297 bip->bio_data = bp->b_data; 2298 bip->bio_length = bp->b_bcount; 2299 bip->bio_done = NULL; 2300 2301 g_io_request(bip, ip->i_devvp->v_bufobj.bo_private); 2302 bp->b_error = biowait(bip, "snaprdb"); 2303 g_destroy_bio(bip); 2304 return (bp->b_error); 2305 } 2306 2307 2308 /* 2309 * Process file deletes that were deferred by ufs_inactive() due to 2310 * the file system being suspended. 2311 */ 2312 static void 2313 process_deferred_inactive(struct mount *mp) 2314 { 2315 struct vnode *vp, *mvp; 2316 struct thread *td; 2317 int error; 2318 2319 td = curthread; 2320 (void) vn_start_secondary_write(NULL, &mp, V_WAIT); 2321 MNT_ILOCK(mp); 2322 loop: 2323 MNT_VNODE_FOREACH(vp, mp, mvp) { 2324 VI_LOCK(vp); 2325 if ((vp->v_iflag & (VI_DOOMED | VI_OWEINACT)) != VI_OWEINACT || 2326 vp->v_usecount > 0 || 2327 vp->v_type == VNON) { 2328 VI_UNLOCK(vp); 2329 continue; 2330 } 2331 MNT_IUNLOCK(mp); 2332 vholdl(vp); 2333 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); 2334 if (error != 0) { 2335 vdrop(vp); 2336 MNT_ILOCK(mp); 2337 if (error == ENOENT) 2338 continue; /* vnode recycled */ 2339 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 2340 goto loop; 2341 } 2342 VI_LOCK(vp); 2343 if ((vp->v_iflag & VI_OWEINACT) == 0) { 2344 VI_UNLOCK(vp); 2345 VOP_UNLOCK(vp, 0, td); 2346 vdrop(vp); 2347 MNT_ILOCK(mp); 2348 continue; 2349 } 2350 2351 VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp, 2352 ("process_deferred_inactive: " 2353 "recursed on VI_DOINGINACT")); 2354 vp->v_iflag |= VI_DOINGINACT; 2355 vp->v_iflag &= ~VI_OWEINACT; 2356 VI_UNLOCK(vp); 2357 (void) VOP_INACTIVE(vp, td); 2358 VI_LOCK(vp); 2359 VNASSERT(vp->v_iflag & VI_DOINGINACT, vp, 2360 ("process_deferred_inactive: lost VI_DOINGINACT")); 2361 VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, 2362 ("process_deferred_inactive: got VI_OWEINACT")); 2363 vp->v_iflag &= ~VI_DOINGINACT; 2364 VI_UNLOCK(vp); 2365 VOP_UNLOCK(vp, 0, td); 2366 vdrop(vp); 2367 MNT_ILOCK(mp); 2368 } 2369 MNT_IUNLOCK(mp); 2370 vn_finished_secondary_write(mp); 2371 } 2372 2373 /* Try to free snapdata associated with devvp */ 2374 static void 2375 try_free_snapdata(struct vnode *devvp, 2376 struct thread *td) 2377 { 2378 struct snapdata *sn; 2379 ufs2_daddr_t *snapblklist; 2380 2381 sn = devvp->v_rdev->si_snapdata; 2382 2383 if (sn == NULL || TAILQ_FIRST(&sn->sn_head) != NULL || 2384 (devvp->v_vflag & VV_COPYONWRITE) == 0) { 2385 VI_UNLOCK(devvp); 2386 return; 2387 } 2388 2389 devvp->v_rdev->si_snapdata = NULL; 2390 devvp->v_vflag &= ~VV_COPYONWRITE; 2391 snapblklist = sn->sn_blklist; 2392 sn->sn_blklist = NULL; 2393 sn->sn_listsize = 0; 2394 lockmgr(&sn->sn_lock, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp), td); 2395 lockmgr(&sn->sn_lock, LK_RELEASE, NULL, td); 2396 lockdestroy(&sn->sn_lock); 2397 free(sn, M_UFSMNT); 2398 if (snapblklist != NULL) 2399 FREE(snapblklist, M_UFSMNT); 2400 } 2401 #endif 2402