1 /*- 2 * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. 3 * 4 * Further information about snapshots can be obtained from: 5 * 6 * Marshall Kirk McKusick http://www.mckusick.com/softdep/ 7 * 1614 Oxford Street mckusick@mckusick.com 8 * Berkeley, CA 94709-1608 +1-510-843-9542 9 * USA 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY 22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR 25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_quota.h" 40 41 #include <sys/param.h> 42 #include <sys/kernel.h> 43 #include <sys/systm.h> 44 #include <sys/conf.h> 45 #include <sys/bio.h> 46 #include <sys/buf.h> 47 #include <sys/proc.h> 48 #include <sys/namei.h> 49 #include <sys/sched.h> 50 #include <sys/stat.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/resource.h> 54 #include <sys/resourcevar.h> 55 #include <sys/vnode.h> 56 57 #include <geom/geom.h> 58 59 #include <ufs/ufs/extattr.h> 60 #include <ufs/ufs/quota.h> 61 #include <ufs/ufs/ufsmount.h> 62 #include <ufs/ufs/inode.h> 63 #include <ufs/ufs/ufs_extern.h> 64 65 #include <ufs/ffs/fs.h> 66 #include <ufs/ffs/ffs_extern.h> 67 68 #define KERNCRED thread0.td_ucred 69 #define DEBUG 1 70 71 #include "opt_ffs.h" 72 73 #ifdef NO_FFS_SNAPSHOT 74 int 75 ffs_snapshot(mp, snapfile) 76 struct mount *mp; 77 char *snapfile; 78 { 79 return (EINVAL); 80 } 81 82 int 83 ffs_snapblkfree(fs, devvp, bno, size, inum) 84 struct fs *fs; 85 struct vnode *devvp; 86 ufs2_daddr_t bno; 87 long size; 88 ino_t inum; 89 { 90 return (EINVAL); 91 } 92 93 void 94 ffs_snapremove(vp) 95 struct vnode *vp; 96 { 97 } 98 99 void 100 ffs_snapshot_mount(mp) 101 struct mount *mp; 102 { 103 } 104 105 void 106 ffs_snapshot_unmount(mp) 107 struct mount *mp; 108 { 109 } 110 111 void 112 ffs_snapgone(ip) 113 struct inode *ip; 114 { 115 } 116 117 int 118 ffs_copyonwrite(devvp, bp) 119 struct vnode *devvp; 120 struct buf *bp; 121 { 122 return (EINVAL); 123 } 124 125 #else 126 127 TAILQ_HEAD(snaphead, inode); 128 129 struct snapdata { 130 struct snaphead sn_head; 131 daddr_t sn_listsize; 132 daddr_t *sn_blklist; 133 struct lock sn_lock; 134 }; 135 136 static int cgaccount(int, struct vnode *, struct buf *, int); 137 static int expunge_ufs1(struct vnode *, struct inode *, struct fs *, 138 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 139 ufs_lbn_t, int), int); 140 static int indiracct_ufs1(struct vnode *, struct vnode *, int, 141 ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 142 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 143 ufs_lbn_t, int), int); 144 static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 145 struct fs *, ufs_lbn_t, int); 146 static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 147 struct fs *, ufs_lbn_t, int); 148 static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 149 struct fs *, ufs_lbn_t, int); 150 static int expunge_ufs2(struct vnode *, struct inode *, struct fs *, 151 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 152 ufs_lbn_t, int), int); 153 static int indiracct_ufs2(struct vnode *, struct vnode *, int, 154 ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 155 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 156 ufs_lbn_t, int), int); 157 static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 158 struct fs *, ufs_lbn_t, int); 159 static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 160 struct fs *, ufs_lbn_t, int); 161 static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 162 struct fs *, ufs_lbn_t, int); 163 static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t); 164 static void process_deferred_inactive(struct mount *); 165 static void try_free_snapdata(struct vnode *devvp, struct thread *td); 166 167 /* 168 * To ensure the consistency of snapshots across crashes, we must 169 * synchronously write out copied blocks before allowing the 170 * originals to be modified. Because of the rather severe speed 171 * penalty that this imposes, the following flag allows this 172 * crash persistence to be disabled. 173 */ 174 int dopersistence = 0; 175 176 #ifdef DEBUG 177 #include <sys/sysctl.h> 178 SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, ""); 179 static int snapdebug = 0; 180 SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, ""); 181 int collectsnapstats = 0; 182 SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats, 183 0, ""); 184 #endif /* DEBUG */ 185 186 /* 187 * Create a snapshot file and initialize it for the filesystem. 188 */ 189 int 190 ffs_snapshot(mp, snapfile) 191 struct mount *mp; 192 char *snapfile; 193 { 194 ufs2_daddr_t numblks, blkno, *blkp, *snapblklist; 195 int error, cg, snaploc; 196 int i, size, len, loc; 197 int flag; 198 struct timespec starttime = {0, 0}, endtime; 199 char saved_nice = 0; 200 long redo = 0, snaplistsize = 0; 201 int32_t *lp; 202 void *space; 203 struct fs *copy_fs = NULL, *fs; 204 struct thread *td = curthread; 205 struct inode *ip, *xp; 206 struct buf *bp, *nbp, *ibp, *sbp = NULL; 207 struct nameidata nd; 208 struct mount *wrtmp; 209 struct vattr vat; 210 struct vnode *vp, *xvp, *mvp, *devvp; 211 struct uio auio; 212 struct iovec aiov; 213 struct snapdata *sn; 214 struct ufsmount *ump; 215 216 ump = VFSTOUFS(mp); 217 fs = ump->um_fs; 218 sn = NULL; 219 MNT_ILOCK(mp); 220 flag = mp->mnt_flag; 221 MNT_IUNLOCK(mp); 222 223 /* 224 * Need to serialize access to snapshot code per filesystem. 225 */ 226 /* 227 * Assign a snapshot slot in the superblock. 228 */ 229 UFS_LOCK(ump); 230 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 231 if (fs->fs_snapinum[snaploc] == 0) 232 break; 233 UFS_UNLOCK(ump); 234 if (snaploc == FSMAXSNAP) 235 return (ENOSPC); 236 /* 237 * Create the snapshot file. 238 */ 239 restart: 240 NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_SYSSPACE, snapfile, td); 241 if ((error = namei(&nd)) != 0) 242 return (error); 243 if (nd.ni_vp != NULL) { 244 vput(nd.ni_vp); 245 error = EEXIST; 246 } 247 if (nd.ni_dvp->v_mount != mp) 248 error = EXDEV; 249 if (error) { 250 NDFREE(&nd, NDF_ONLY_PNBUF); 251 if (nd.ni_dvp == nd.ni_vp) 252 vrele(nd.ni_dvp); 253 else 254 vput(nd.ni_dvp); 255 return (error); 256 } 257 VATTR_NULL(&vat); 258 vat.va_type = VREG; 259 vat.va_mode = S_IRUSR; 260 vat.va_vaflags |= VA_EXCLUSIVE; 261 if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) 262 wrtmp = NULL; 263 if (wrtmp != mp) 264 panic("ffs_snapshot: mount mismatch"); 265 vfs_rel(wrtmp); 266 if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) { 267 NDFREE(&nd, NDF_ONLY_PNBUF); 268 vput(nd.ni_dvp); 269 if ((error = vn_start_write(NULL, &wrtmp, 270 V_XSLEEP | PCATCH)) != 0) 271 return (error); 272 goto restart; 273 } 274 VOP_LEASE(nd.ni_dvp, td, KERNCRED, LEASE_WRITE); 275 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat); 276 VOP_UNLOCK(nd.ni_dvp, 0, td); 277 if (error) { 278 NDFREE(&nd, NDF_ONLY_PNBUF); 279 vn_finished_write(wrtmp); 280 vrele(nd.ni_dvp); 281 return (error); 282 } 283 vp = nd.ni_vp; 284 ip = VTOI(vp); 285 devvp = ip->i_devvp; 286 /* 287 * Allocate and copy the last block contents so as to be able 288 * to set size to that of the filesystem. 289 */ 290 numblks = howmany(fs->fs_size, fs->fs_frag); 291 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 292 fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 293 if (error) 294 goto out; 295 ip->i_size = lblktosize(fs, (off_t)numblks); 296 DIP_SET(ip, i_size, ip->i_size); 297 ip->i_flag |= IN_CHANGE | IN_UPDATE; 298 error = readblock(vp, bp, numblks - 1); 299 bawrite(bp); 300 if (error != 0) 301 goto out; 302 /* 303 * Preallocate critical data structures so that we can copy 304 * them in without further allocation after we suspend all 305 * operations on the filesystem. We would like to just release 306 * the allocated buffers without writing them since they will 307 * be filled in below once we are ready to go, but this upsets 308 * the soft update code, so we go ahead and write the new buffers. 309 * 310 * Allocate all indirect blocks and mark all of them as not 311 * needing to be copied. 312 */ 313 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 314 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 315 fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp); 316 if (error) 317 goto out; 318 bawrite(ibp); 319 } 320 /* 321 * Allocate copies for the superblock and its summary information. 322 */ 323 error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED, 324 0, &nbp); 325 if (error) 326 goto out; 327 bawrite(nbp); 328 blkno = fragstoblks(fs, fs->fs_csaddr); 329 len = howmany(fs->fs_cssize, fs->fs_bsize); 330 for (loc = 0; loc < len; loc++) { 331 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), 332 fs->fs_bsize, KERNCRED, 0, &nbp); 333 if (error) 334 goto out; 335 bawrite(nbp); 336 } 337 /* 338 * Allocate all cylinder group blocks. 339 */ 340 for (cg = 0; cg < fs->fs_ncg; cg++) { 341 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 342 fs->fs_bsize, KERNCRED, 0, &nbp); 343 if (error) 344 goto out; 345 bawrite(nbp); 346 } 347 /* 348 * Copy all the cylinder group maps. Although the 349 * filesystem is still active, we hope that only a few 350 * cylinder groups will change between now and when we 351 * suspend operations. Thus, we will be able to quickly 352 * touch up the few cylinder groups that changed during 353 * the suspension period. 354 */ 355 len = howmany(fs->fs_ncg, NBBY); 356 MALLOC(space, void *, len, M_DEVBUF, M_WAITOK|M_ZERO); 357 UFS_LOCK(ump); 358 fs->fs_active = space; 359 UFS_UNLOCK(ump); 360 for (cg = 0; cg < fs->fs_ncg; cg++) { 361 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 362 fs->fs_bsize, KERNCRED, 0, &nbp); 363 if (error) 364 goto out; 365 error = cgaccount(cg, vp, nbp, 1); 366 bawrite(nbp); 367 if (error) 368 goto out; 369 } 370 #ifdef QUOTA 371 /* 372 * Turn off disk quotas for snapshot file. 373 */ 374 (void) chkdq(ip, -DIP(ip, i_blocks), KERNCRED, FORCE); 375 for (i = 0; i < MAXQUOTAS; i++) { 376 if (ip->i_dquot[i] != NODQUOT) { 377 dqrele(vp, ip->i_dquot[i]); 378 ip->i_dquot[i] = NODQUOT; 379 } 380 } 381 #endif 382 /* 383 * Change inode to snapshot type file. 384 */ 385 ip->i_flags |= SF_SNAPSHOT; 386 DIP_SET(ip, i_flags, ip->i_flags); 387 ip->i_flag |= IN_CHANGE | IN_UPDATE; 388 /* 389 * Ensure that the snapshot is completely on disk. 390 * Since we have marked it as a snapshot it is safe to 391 * unlock it as no process will be allowed to write to it. 392 */ 393 if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) 394 goto out; 395 VOP_UNLOCK(vp, 0, td); 396 /* 397 * All allocations are done, so we can now snapshot the system. 398 * 399 * Recind nice scheduling while running with the filesystem suspended. 400 */ 401 if (td->td_proc->p_nice > 0) { 402 PROC_LOCK(td->td_proc); 403 mtx_lock_spin(&sched_lock); 404 saved_nice = td->td_proc->p_nice; 405 sched_nice(td->td_proc, 0); 406 mtx_unlock_spin(&sched_lock); 407 PROC_UNLOCK(td->td_proc); 408 } 409 /* 410 * Suspend operation on filesystem. 411 */ 412 for (;;) { 413 vn_finished_write(wrtmp); 414 if ((error = vfs_write_suspend(vp->v_mount)) != 0) { 415 vn_start_write(NULL, &wrtmp, V_WAIT); 416 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 417 goto out; 418 } 419 if (mp->mnt_kern_flag & MNTK_SUSPENDED) 420 break; 421 vn_start_write(NULL, &wrtmp, V_WAIT); 422 } 423 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 424 if (ip->i_effnlink == 0) { 425 error = ENOENT; /* Snapshot file unlinked */ 426 goto out1; 427 } 428 if (collectsnapstats) 429 nanotime(&starttime); 430 431 /* The last block might have changed. Copy it again to be sure. */ 432 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 433 fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 434 if (error != 0) 435 goto out1; 436 error = readblock(vp, bp, numblks - 1); 437 bp->b_flags |= B_VALIDSUSPWRT; 438 bawrite(bp); 439 if (error != 0) 440 goto out1; 441 /* 442 * First, copy all the cylinder group maps that have changed. 443 */ 444 for (cg = 0; cg < fs->fs_ncg; cg++) { 445 if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0) 446 continue; 447 redo++; 448 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 449 fs->fs_bsize, KERNCRED, 0, &nbp); 450 if (error) 451 goto out1; 452 error = cgaccount(cg, vp, nbp, 2); 453 bawrite(nbp); 454 if (error) 455 goto out1; 456 } 457 /* 458 * Grab a copy of the superblock and its summary information. 459 * We delay writing it until the suspension is released below. 460 */ 461 error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, 462 KERNCRED, &sbp); 463 if (error) { 464 brelse(sbp); 465 sbp = NULL; 466 goto out1; 467 } 468 loc = blkoff(fs, fs->fs_sblockloc); 469 copy_fs = (struct fs *)(sbp->b_data + loc); 470 bcopy(fs, copy_fs, fs->fs_sbsize); 471 if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) 472 copy_fs->fs_clean = 1; 473 size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE; 474 if (fs->fs_sbsize < size) 475 bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize); 476 size = blkroundup(fs, fs->fs_cssize); 477 if (fs->fs_contigsumsize > 0) 478 size += fs->fs_ncg * sizeof(int32_t); 479 space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 480 copy_fs->fs_csp = space; 481 bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize); 482 space = (char *)space + fs->fs_cssize; 483 loc = howmany(fs->fs_cssize, fs->fs_fsize); 484 i = fs->fs_frag - loc % fs->fs_frag; 485 len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize; 486 if (len > 0) { 487 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc), 488 len, KERNCRED, &bp)) != 0) { 489 brelse(bp); 490 free(copy_fs->fs_csp, M_UFSMNT); 491 bawrite(sbp); 492 sbp = NULL; 493 goto out1; 494 } 495 bcopy(bp->b_data, space, (u_int)len); 496 space = (char *)space + len; 497 bp->b_flags |= B_INVAL | B_NOCACHE; 498 brelse(bp); 499 } 500 if (fs->fs_contigsumsize > 0) { 501 copy_fs->fs_maxcluster = lp = space; 502 for (i = 0; i < fs->fs_ncg; i++) 503 *lp++ = fs->fs_contigsumsize; 504 } 505 /* 506 * We must check for active files that have been unlinked 507 * (e.g., with a zero link count). We have to expunge all 508 * trace of these files from the snapshot so that they are 509 * not reclaimed prematurely by fsck or unnecessarily dumped. 510 * We turn off the MNTK_SUSPENDED flag to avoid a panic from 511 * spec_strategy about writing on a suspended filesystem. 512 * Note that we skip unlinked snapshot files as they will 513 * be handled separately below. 514 * 515 * We also calculate the needed size for the snapshot list. 516 */ 517 snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + 518 FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */; 519 MNT_ILOCK(mp); 520 mp->mnt_kern_flag &= ~MNTK_SUSPENDED; 521 loop: 522 MNT_VNODE_FOREACH(xvp, mp, mvp) { 523 VI_LOCK(xvp); 524 MNT_IUNLOCK(mp); 525 if ((xvp->v_iflag & VI_DOOMED) || 526 (xvp->v_usecount == 0 && 527 (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) || 528 xvp->v_type == VNON || 529 (VTOI(xvp)->i_flags & SF_SNAPSHOT)) { 530 VI_UNLOCK(xvp); 531 MNT_ILOCK(mp); 532 continue; 533 } 534 /* 535 * We can skip parent directory vnode because it must have 536 * this snapshot file in it. 537 */ 538 if (xvp == nd.ni_dvp) { 539 VI_UNLOCK(xvp); 540 MNT_ILOCK(mp); 541 continue; 542 } 543 vholdl(xvp); 544 if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK, td) != 0) { 545 MNT_ILOCK(mp); 546 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 547 vdrop(xvp); 548 goto loop; 549 } 550 VI_LOCK(xvp); 551 if (xvp->v_usecount == 0 && 552 (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) { 553 VI_UNLOCK(xvp); 554 VOP_UNLOCK(xvp, 0, td); 555 vdrop(xvp); 556 MNT_ILOCK(mp); 557 continue; 558 } 559 VI_UNLOCK(xvp); 560 if (snapdebug) 561 vprint("ffs_snapshot: busy vnode", xvp); 562 if (VOP_GETATTR(xvp, &vat, td->td_ucred, td) == 0 && 563 vat.va_nlink > 0) { 564 VOP_UNLOCK(xvp, 0, td); 565 vdrop(xvp); 566 MNT_ILOCK(mp); 567 continue; 568 } 569 xp = VTOI(xvp); 570 if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) { 571 VOP_UNLOCK(xvp, 0, td); 572 vdrop(xvp); 573 MNT_ILOCK(mp); 574 continue; 575 } 576 /* 577 * If there is a fragment, clear it here. 578 */ 579 blkno = 0; 580 loc = howmany(xp->i_size, fs->fs_bsize) - 1; 581 if (loc < NDADDR) { 582 len = fragroundup(fs, blkoff(fs, xp->i_size)); 583 if (len != 0 && len < fs->fs_bsize) { 584 ffs_blkfree(ump, copy_fs, vp, 585 DIP(xp, i_db[loc]), len, xp->i_number); 586 blkno = DIP(xp, i_db[loc]); 587 DIP_SET(xp, i_db[loc], 0); 588 } 589 } 590 snaplistsize += 1; 591 if (xp->i_ump->um_fstype == UFS1) 592 error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 593 BLK_NOCOPY); 594 else 595 error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 596 BLK_NOCOPY); 597 if (blkno) 598 DIP_SET(xp, i_db[loc], blkno); 599 if (!error) 600 error = ffs_freefile(ump, copy_fs, vp, xp->i_number, 601 xp->i_mode); 602 VOP_UNLOCK(xvp, 0, td); 603 vdrop(xvp); 604 if (error) { 605 free(copy_fs->fs_csp, M_UFSMNT); 606 bawrite(sbp); 607 sbp = NULL; 608 MNT_VNODE_FOREACH_ABORT(mp, mvp); 609 goto out1; 610 } 611 MNT_ILOCK(mp); 612 } 613 MNT_IUNLOCK(mp); 614 /* 615 * If there already exist snapshots on this filesystem, grab a 616 * reference to their shared lock. If this is the first snapshot 617 * on this filesystem, we need to allocate a lock for the snapshots 618 * to share. In either case, acquire the snapshot lock and give 619 * up our original private lock. 620 */ 621 VI_LOCK(devvp); 622 sn = devvp->v_rdev->si_snapdata; 623 if (sn != NULL) { 624 xp = TAILQ_FIRST(&sn->sn_head); 625 VI_UNLOCK(devvp); 626 VI_LOCK(vp); 627 vp->v_vnlock = &sn->sn_lock; 628 } else { 629 VI_UNLOCK(devvp); 630 sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO); 631 TAILQ_INIT(&sn->sn_head); 632 lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT, 633 LK_CANRECURSE | LK_NOSHARE); 634 VI_LOCK(vp); 635 vp->v_vnlock = &sn->sn_lock; 636 mp_fixme("si_snapdata setting is racey."); 637 devvp->v_rdev->si_snapdata = sn; 638 xp = NULL; 639 } 640 lockmgr(vp->v_vnlock, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, 641 VI_MTX(vp), td); 642 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 643 /* 644 * If this is the first snapshot on this filesystem, then we need 645 * to allocate the space for the list of preallocated snapshot blocks. 646 * This list will be refined below, but this preliminary one will 647 * keep us out of deadlock until the full one is ready. 648 */ 649 if (xp == NULL) { 650 MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t), 651 M_UFSMNT, M_WAITOK); 652 blkp = &snapblklist[1]; 653 *blkp++ = lblkno(fs, fs->fs_sblockloc); 654 blkno = fragstoblks(fs, fs->fs_csaddr); 655 for (cg = 0; cg < fs->fs_ncg; cg++) { 656 if (fragstoblks(fs, cgtod(fs, cg) > blkno)) 657 break; 658 *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 659 } 660 len = howmany(fs->fs_cssize, fs->fs_bsize); 661 for (loc = 0; loc < len; loc++) 662 *blkp++ = blkno + loc; 663 for (; cg < fs->fs_ncg; cg++) 664 *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 665 snapblklist[0] = blkp - snapblklist; 666 VI_LOCK(devvp); 667 if (sn->sn_blklist != NULL) 668 panic("ffs_snapshot: non-empty list"); 669 sn->sn_blklist = snapblklist; 670 sn->sn_listsize = blkp - snapblklist; 671 VI_UNLOCK(devvp); 672 } 673 /* 674 * Record snapshot inode. Since this is the newest snapshot, 675 * it must be placed at the end of the list. 676 */ 677 VI_LOCK(devvp); 678 fs->fs_snapinum[snaploc] = ip->i_number; 679 if (ip->i_nextsnap.tqe_prev != 0) 680 panic("ffs_snapshot: %d already on list", ip->i_number); 681 TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 682 devvp->v_vflag |= VV_COPYONWRITE; 683 VI_UNLOCK(devvp); 684 ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); 685 vp->v_vflag |= VV_SYSTEM; 686 out1: 687 KASSERT((sn != NULL && sbp != NULL && error == 0) || 688 (sn == NULL && sbp == NULL && error != 0), 689 ("email phk@ and mckusick@")); 690 /* 691 * Resume operation on filesystem. 692 */ 693 vfs_write_resume(vp->v_mount); 694 vn_start_write(NULL, &wrtmp, V_WAIT); 695 if (collectsnapstats && starttime.tv_sec > 0) { 696 nanotime(&endtime); 697 timespecsub(&endtime, &starttime); 698 printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n", 699 vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec, 700 endtime.tv_nsec / 1000000, redo, fs->fs_ncg); 701 } 702 if (sbp == NULL) 703 goto out; 704 /* 705 * Copy allocation information from all the snapshots in 706 * this snapshot and then expunge them from its view. 707 */ 708 TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) { 709 if (xp == ip) 710 break; 711 if (xp->i_ump->um_fstype == UFS1) 712 error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, 713 BLK_SNAP); 714 else 715 error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, 716 BLK_SNAP); 717 if (error == 0 && xp->i_effnlink == 0) { 718 error = ffs_freefile(ump, 719 copy_fs, 720 vp, 721 xp->i_number, 722 xp->i_mode); 723 } 724 if (error) { 725 fs->fs_snapinum[snaploc] = 0; 726 goto done; 727 } 728 } 729 /* 730 * Allocate space for the full list of preallocated snapshot blocks. 731 */ 732 MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t), 733 M_UFSMNT, M_WAITOK); 734 ip->i_snapblklist = &snapblklist[1]; 735 /* 736 * Expunge the blocks used by the snapshots from the set of 737 * blocks marked as used in the snapshot bitmaps. Also, collect 738 * the list of allocated blocks in i_snapblklist. 739 */ 740 if (ip->i_ump->um_fstype == UFS1) 741 error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); 742 else 743 error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); 744 if (error) { 745 fs->fs_snapinum[snaploc] = 0; 746 FREE(snapblklist, M_UFSMNT); 747 goto done; 748 } 749 if (snaplistsize < ip->i_snapblklist - snapblklist) 750 panic("ffs_snapshot: list too small"); 751 snaplistsize = ip->i_snapblklist - snapblklist; 752 snapblklist[0] = snaplistsize; 753 ip->i_snapblklist = 0; 754 /* 755 * Write out the list of allocated blocks to the end of the snapshot. 756 */ 757 auio.uio_iov = &aiov; 758 auio.uio_iovcnt = 1; 759 aiov.iov_base = (void *)snapblklist; 760 aiov.iov_len = snaplistsize * sizeof(daddr_t); 761 auio.uio_resid = aiov.iov_len;; 762 auio.uio_offset = ip->i_size; 763 auio.uio_segflg = UIO_SYSSPACE; 764 auio.uio_rw = UIO_WRITE; 765 auio.uio_td = td; 766 if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 767 fs->fs_snapinum[snaploc] = 0; 768 FREE(snapblklist, M_UFSMNT); 769 goto done; 770 } 771 /* 772 * Write the superblock and its summary information 773 * to the snapshot. 774 */ 775 blkno = fragstoblks(fs, fs->fs_csaddr); 776 len = howmany(fs->fs_cssize, fs->fs_bsize); 777 space = copy_fs->fs_csp; 778 for (loc = 0; loc < len; loc++) { 779 error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp); 780 if (error) { 781 brelse(nbp); 782 fs->fs_snapinum[snaploc] = 0; 783 FREE(snapblklist, M_UFSMNT); 784 goto done; 785 } 786 bcopy(space, nbp->b_data, fs->fs_bsize); 787 space = (char *)space + fs->fs_bsize; 788 bawrite(nbp); 789 } 790 /* 791 * As this is the newest list, it is the most inclusive, so 792 * should replace the previous list. 793 */ 794 VI_LOCK(devvp); 795 space = sn->sn_blklist; 796 sn->sn_blklist = snapblklist; 797 sn->sn_listsize = snaplistsize; 798 VI_UNLOCK(devvp); 799 if (space != NULL) 800 FREE(space, M_UFSMNT); 801 /* 802 * If another process is currently writing the buffer containing 803 * the inode for this snapshot then a deadlock can occur. Drop 804 * the snapshot lock until the buffer has been written. 805 */ 806 VREF(vp); /* Protect against ffs_snapgone() */ 807 VOP_UNLOCK(vp, 0, td); 808 (void) bread(ip->i_devvp, 809 fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 810 (int) fs->fs_bsize, NOCRED, &nbp); 811 brelse(nbp); 812 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 813 if (ip->i_effnlink == 0) 814 error = ENOENT; /* Snapshot file unlinked */ 815 else 816 vrele(vp); /* Drop extra reference */ 817 done: 818 FREE(copy_fs->fs_csp, M_UFSMNT); 819 bawrite(sbp); 820 out: 821 if (saved_nice > 0) { 822 PROC_LOCK(td->td_proc); 823 mtx_lock_spin(&sched_lock); 824 sched_nice(td->td_proc, saved_nice); 825 mtx_unlock_spin(&sched_lock); 826 PROC_UNLOCK(td->td_proc); 827 } 828 UFS_LOCK(ump); 829 if (fs->fs_active != 0) { 830 FREE(fs->fs_active, M_DEVBUF); 831 fs->fs_active = 0; 832 } 833 UFS_UNLOCK(ump); 834 MNT_ILOCK(mp); 835 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA); 836 MNT_IUNLOCK(mp); 837 if (error) 838 (void) ffs_truncate(vp, (off_t)0, 0, NOCRED, td); 839 (void) ffs_syncvnode(vp, MNT_WAIT); 840 if (error) 841 vput(vp); 842 else 843 VOP_UNLOCK(vp, 0, td); 844 vrele(nd.ni_dvp); 845 vn_finished_write(wrtmp); 846 process_deferred_inactive(mp); 847 return (error); 848 } 849 850 /* 851 * Copy a cylinder group map. All the unallocated blocks are marked 852 * BLK_NOCOPY so that the snapshot knows that it need not copy them 853 * if they are later written. If passno is one, then this is a first 854 * pass, so only setting needs to be done. If passno is 2, then this 855 * is a revision to a previous pass which must be undone as the 856 * replacement pass is done. 857 */ 858 static int 859 cgaccount(cg, vp, nbp, passno) 860 int cg; 861 struct vnode *vp; 862 struct buf *nbp; 863 int passno; 864 { 865 struct buf *bp, *ibp; 866 struct inode *ip; 867 struct cg *cgp; 868 struct fs *fs; 869 ufs2_daddr_t base, numblks; 870 int error, len, loc, indiroff; 871 872 ip = VTOI(vp); 873 fs = ip->i_fs; 874 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 875 (int)fs->fs_cgsize, KERNCRED, &bp); 876 if (error) { 877 brelse(bp); 878 return (error); 879 } 880 cgp = (struct cg *)bp->b_data; 881 if (!cg_chkmagic(cgp)) { 882 brelse(bp); 883 return (EIO); 884 } 885 UFS_LOCK(ip->i_ump); 886 ACTIVESET(fs, cg); 887 UFS_UNLOCK(ip->i_ump); 888 bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize); 889 if (fs->fs_cgsize < fs->fs_bsize) 890 bzero(&nbp->b_data[fs->fs_cgsize], 891 fs->fs_bsize - fs->fs_cgsize); 892 cgp = (struct cg *)nbp->b_data; 893 bqrelse(bp); 894 if (passno == 2) 895 nbp->b_flags |= B_VALIDSUSPWRT; 896 numblks = howmany(fs->fs_size, fs->fs_frag); 897 len = howmany(fs->fs_fpg, fs->fs_frag); 898 base = cgbase(fs, cg) / fs->fs_frag; 899 if (base + len >= numblks) 900 len = numblks - base - 1; 901 loc = 0; 902 if (base < NDADDR) { 903 for ( ; loc < NDADDR; loc++) { 904 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 905 DIP_SET(ip, i_db[loc], BLK_NOCOPY); 906 else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY) 907 DIP_SET(ip, i_db[loc], 0); 908 else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY) 909 panic("ffs_snapshot: lost direct block"); 910 } 911 } 912 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), 913 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 914 if (error) { 915 return (error); 916 } 917 indiroff = (base + loc - NDADDR) % NINDIR(fs); 918 for ( ; loc < len; loc++, indiroff++) { 919 if (indiroff >= NINDIR(fs)) { 920 if (passno == 2) 921 ibp->b_flags |= B_VALIDSUSPWRT; 922 bawrite(ibp); 923 error = UFS_BALLOC(vp, 924 lblktosize(fs, (off_t)(base + loc)), 925 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 926 if (error) { 927 return (error); 928 } 929 indiroff = 0; 930 } 931 if (ip->i_ump->um_fstype == UFS1) { 932 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 933 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 934 BLK_NOCOPY; 935 else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data)) 936 [indiroff] == BLK_NOCOPY) 937 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0; 938 else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data)) 939 [indiroff] == BLK_NOCOPY) 940 panic("ffs_snapshot: lost indirect block"); 941 continue; 942 } 943 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 944 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY; 945 else if (passno == 2 && 946 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 947 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0; 948 else if (passno == 1 && 949 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 950 panic("ffs_snapshot: lost indirect block"); 951 } 952 if (passno == 2) 953 ibp->b_flags |= B_VALIDSUSPWRT; 954 bdwrite(ibp); 955 return (0); 956 } 957 958 /* 959 * Before expunging a snapshot inode, note all the 960 * blocks that it claims with BLK_SNAP so that fsck will 961 * be able to account for those blocks properly and so 962 * that this snapshot knows that it need not copy them 963 * if the other snapshot holding them is freed. This code 964 * is reproduced once each for UFS1 and UFS2. 965 */ 966 static int 967 expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) 968 struct vnode *snapvp; 969 struct inode *cancelip; 970 struct fs *fs; 971 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 972 struct fs *, ufs_lbn_t, int); 973 int expungetype; 974 { 975 int i, error, indiroff; 976 ufs_lbn_t lbn, rlbn; 977 ufs2_daddr_t len, blkno, numblks, blksperindir; 978 struct ufs1_dinode *dip; 979 struct thread *td = curthread; 980 struct buf *bp; 981 982 /* 983 * Prepare to expunge the inode. If its inode block has not 984 * yet been copied, then allocate and fill the copy. 985 */ 986 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 987 blkno = 0; 988 if (lbn < NDADDR) { 989 blkno = VTOI(snapvp)->i_din1->di_db[lbn]; 990 } else { 991 td->td_pflags |= TDP_COWINPROGRESS; 992 error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 993 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 994 td->td_pflags &= ~TDP_COWINPROGRESS; 995 if (error) 996 return (error); 997 indiroff = (lbn - NDADDR) % NINDIR(fs); 998 blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; 999 bqrelse(bp); 1000 } 1001 if (blkno != 0) { 1002 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1003 return (error); 1004 } else { 1005 error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 1006 fs->fs_bsize, KERNCRED, 0, &bp); 1007 if (error) 1008 return (error); 1009 if ((error = readblock(snapvp, bp, lbn)) != 0) 1010 return (error); 1011 } 1012 /* 1013 * Set a snapshot inode to be a zero length file, regular files 1014 * or unlinked snapshots to be completely unallocated. 1015 */ 1016 dip = (struct ufs1_dinode *)bp->b_data + 1017 ino_to_fsbo(fs, cancelip->i_number); 1018 if (expungetype == BLK_NOCOPY || cancelip->i_effnlink == 0) 1019 dip->di_mode = 0; 1020 dip->di_size = 0; 1021 dip->di_blocks = 0; 1022 dip->di_flags &= ~SF_SNAPSHOT; 1023 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); 1024 bdwrite(bp); 1025 /* 1026 * Now go through and expunge all the blocks in the file 1027 * using the function requested. 1028 */ 1029 numblks = howmany(cancelip->i_size, fs->fs_bsize); 1030 if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0], 1031 &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype))) 1032 return (error); 1033 if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0], 1034 &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype))) 1035 return (error); 1036 blksperindir = 1; 1037 lbn = -NDADDR; 1038 len = numblks - NDADDR; 1039 rlbn = NDADDR; 1040 for (i = 0; len > 0 && i < NIADDR; i++) { 1041 error = indiracct_ufs1(snapvp, ITOV(cancelip), i, 1042 cancelip->i_din1->di_ib[i], lbn, rlbn, len, 1043 blksperindir, fs, acctfunc, expungetype); 1044 if (error) 1045 return (error); 1046 blksperindir *= NINDIR(fs); 1047 lbn -= blksperindir + 1; 1048 len -= blksperindir; 1049 rlbn += blksperindir; 1050 } 1051 return (0); 1052 } 1053 1054 /* 1055 * Descend an indirect block chain for vnode cancelvp accounting for all 1056 * its indirect blocks in snapvp. 1057 */ 1058 static int 1059 indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 1060 blksperindir, fs, acctfunc, expungetype) 1061 struct vnode *snapvp; 1062 struct vnode *cancelvp; 1063 int level; 1064 ufs1_daddr_t blkno; 1065 ufs_lbn_t lbn; 1066 ufs_lbn_t rlbn; 1067 ufs_lbn_t remblks; 1068 ufs_lbn_t blksperindir; 1069 struct fs *fs; 1070 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 1071 struct fs *, ufs_lbn_t, int); 1072 int expungetype; 1073 { 1074 int error, num, i; 1075 ufs_lbn_t subblksperindir; 1076 struct indir indirs[NIADDR + 2]; 1077 ufs1_daddr_t last, *bap; 1078 struct buf *bp; 1079 1080 if (blkno == 0) { 1081 if (expungetype == BLK_NOCOPY) 1082 return (0); 1083 panic("indiracct_ufs1: missing indir"); 1084 } 1085 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1086 return (error); 1087 if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1088 panic("indiracct_ufs1: botched params"); 1089 /* 1090 * We have to expand bread here since it will deadlock looking 1091 * up the block number for any blocks that are not in the cache. 1092 */ 1093 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 1094 bp->b_blkno = fsbtodb(fs, blkno); 1095 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1096 (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 1097 brelse(bp); 1098 return (error); 1099 } 1100 /* 1101 * Account for the block pointers in this indirect block. 1102 */ 1103 last = howmany(remblks, blksperindir); 1104 if (last > NINDIR(fs)) 1105 last = NINDIR(fs); 1106 MALLOC(bap, ufs1_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); 1107 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1108 bqrelse(bp); 1109 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1110 level == 0 ? rlbn : -1, expungetype); 1111 if (error || level == 0) 1112 goto out; 1113 /* 1114 * Account for the block pointers in each of the indirect blocks 1115 * in the levels below us. 1116 */ 1117 subblksperindir = blksperindir / NINDIR(fs); 1118 for (lbn++, level--, i = 0; i < last; i++) { 1119 error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn, 1120 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1121 if (error) 1122 goto out; 1123 rlbn += blksperindir; 1124 lbn -= blksperindir; 1125 remblks -= blksperindir; 1126 } 1127 out: 1128 FREE(bap, M_DEVBUF); 1129 return (error); 1130 } 1131 1132 /* 1133 * Do both snap accounting and map accounting. 1134 */ 1135 static int 1136 fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1137 struct vnode *vp; 1138 ufs1_daddr_t *oldblkp, *lastblkp; 1139 struct fs *fs; 1140 ufs_lbn_t lblkno; 1141 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 1142 { 1143 int error; 1144 1145 if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1146 return (error); 1147 return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1148 } 1149 1150 /* 1151 * Identify a set of blocks allocated in a snapshot inode. 1152 */ 1153 static int 1154 snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1155 struct vnode *vp; 1156 ufs1_daddr_t *oldblkp, *lastblkp; 1157 struct fs *fs; 1158 ufs_lbn_t lblkno; 1159 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 1160 { 1161 struct inode *ip = VTOI(vp); 1162 ufs1_daddr_t blkno, *blkp; 1163 ufs_lbn_t lbn; 1164 struct buf *ibp; 1165 int error; 1166 1167 for ( ; oldblkp < lastblkp; oldblkp++) { 1168 blkno = *oldblkp; 1169 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 1170 continue; 1171 lbn = fragstoblks(fs, blkno); 1172 if (lbn < NDADDR) { 1173 blkp = &ip->i_din1->di_db[lbn]; 1174 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1175 } else { 1176 error = ffs_balloc_ufs1(vp, lblktosize(fs, (off_t)lbn), 1177 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1178 if (error) 1179 return (error); 1180 blkp = &((ufs1_daddr_t *)(ibp->b_data)) 1181 [(lbn - NDADDR) % NINDIR(fs)]; 1182 } 1183 /* 1184 * If we are expunging a snapshot vnode and we 1185 * find a block marked BLK_NOCOPY, then it is 1186 * one that has been allocated to this snapshot after 1187 * we took our current snapshot and can be ignored. 1188 */ 1189 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 1190 if (lbn >= NDADDR) 1191 brelse(ibp); 1192 } else { 1193 if (*blkp != 0) 1194 panic("snapacct_ufs1: bad block"); 1195 *blkp = expungetype; 1196 if (lbn >= NDADDR) 1197 bdwrite(ibp); 1198 } 1199 } 1200 return (0); 1201 } 1202 1203 /* 1204 * Account for a set of blocks allocated in a snapshot inode. 1205 */ 1206 static int 1207 mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1208 struct vnode *vp; 1209 ufs1_daddr_t *oldblkp, *lastblkp; 1210 struct fs *fs; 1211 ufs_lbn_t lblkno; 1212 int expungetype; 1213 { 1214 ufs1_daddr_t blkno; 1215 struct inode *ip; 1216 ino_t inum; 1217 int acctit; 1218 1219 ip = VTOI(vp); 1220 inum = ip->i_number; 1221 if (lblkno == -1) 1222 acctit = 0; 1223 else 1224 acctit = 1; 1225 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1226 blkno = *oldblkp; 1227 if (blkno == 0 || blkno == BLK_NOCOPY) 1228 continue; 1229 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1230 *ip->i_snapblklist++ = lblkno; 1231 if (blkno == BLK_SNAP) 1232 blkno = blkstofrags(fs, lblkno); 1233 ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum); 1234 } 1235 return (0); 1236 } 1237 1238 /* 1239 * Before expunging a snapshot inode, note all the 1240 * blocks that it claims with BLK_SNAP so that fsck will 1241 * be able to account for those blocks properly and so 1242 * that this snapshot knows that it need not copy them 1243 * if the other snapshot holding them is freed. This code 1244 * is reproduced once each for UFS1 and UFS2. 1245 */ 1246 static int 1247 expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) 1248 struct vnode *snapvp; 1249 struct inode *cancelip; 1250 struct fs *fs; 1251 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 1252 struct fs *, ufs_lbn_t, int); 1253 int expungetype; 1254 { 1255 int i, error, indiroff; 1256 ufs_lbn_t lbn, rlbn; 1257 ufs2_daddr_t len, blkno, numblks, blksperindir; 1258 struct ufs2_dinode *dip; 1259 struct thread *td = curthread; 1260 struct buf *bp; 1261 1262 /* 1263 * Prepare to expunge the inode. If its inode block has not 1264 * yet been copied, then allocate and fill the copy. 1265 */ 1266 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 1267 blkno = 0; 1268 if (lbn < NDADDR) { 1269 blkno = VTOI(snapvp)->i_din2->di_db[lbn]; 1270 } else { 1271 td->td_pflags |= TDP_COWINPROGRESS; 1272 error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 1273 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 1274 td->td_pflags &= ~TDP_COWINPROGRESS; 1275 if (error) 1276 return (error); 1277 indiroff = (lbn - NDADDR) % NINDIR(fs); 1278 blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; 1279 bqrelse(bp); 1280 } 1281 if (blkno != 0) { 1282 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1283 return (error); 1284 } else { 1285 error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 1286 fs->fs_bsize, KERNCRED, 0, &bp); 1287 if (error) 1288 return (error); 1289 if ((error = readblock(snapvp, bp, lbn)) != 0) 1290 return (error); 1291 } 1292 /* 1293 * Set a snapshot inode to be a zero length file, regular files 1294 * to be completely unallocated. 1295 */ 1296 dip = (struct ufs2_dinode *)bp->b_data + 1297 ino_to_fsbo(fs, cancelip->i_number); 1298 if (expungetype == BLK_NOCOPY) 1299 dip->di_mode = 0; 1300 dip->di_size = 0; 1301 dip->di_blocks = 0; 1302 dip->di_flags &= ~SF_SNAPSHOT; 1303 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); 1304 bdwrite(bp); 1305 /* 1306 * Now go through and expunge all the blocks in the file 1307 * using the function requested. 1308 */ 1309 numblks = howmany(cancelip->i_size, fs->fs_bsize); 1310 if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0], 1311 &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype))) 1312 return (error); 1313 if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0], 1314 &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype))) 1315 return (error); 1316 blksperindir = 1; 1317 lbn = -NDADDR; 1318 len = numblks - NDADDR; 1319 rlbn = NDADDR; 1320 for (i = 0; len > 0 && i < NIADDR; i++) { 1321 error = indiracct_ufs2(snapvp, ITOV(cancelip), i, 1322 cancelip->i_din2->di_ib[i], lbn, rlbn, len, 1323 blksperindir, fs, acctfunc, expungetype); 1324 if (error) 1325 return (error); 1326 blksperindir *= NINDIR(fs); 1327 lbn -= blksperindir + 1; 1328 len -= blksperindir; 1329 rlbn += blksperindir; 1330 } 1331 return (0); 1332 } 1333 1334 /* 1335 * Descend an indirect block chain for vnode cancelvp accounting for all 1336 * its indirect blocks in snapvp. 1337 */ 1338 static int 1339 indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 1340 blksperindir, fs, acctfunc, expungetype) 1341 struct vnode *snapvp; 1342 struct vnode *cancelvp; 1343 int level; 1344 ufs2_daddr_t blkno; 1345 ufs_lbn_t lbn; 1346 ufs_lbn_t rlbn; 1347 ufs_lbn_t remblks; 1348 ufs_lbn_t blksperindir; 1349 struct fs *fs; 1350 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 1351 struct fs *, ufs_lbn_t, int); 1352 int expungetype; 1353 { 1354 int error, num, i; 1355 ufs_lbn_t subblksperindir; 1356 struct indir indirs[NIADDR + 2]; 1357 ufs2_daddr_t last, *bap; 1358 struct buf *bp; 1359 1360 if (blkno == 0) { 1361 if (expungetype == BLK_NOCOPY) 1362 return (0); 1363 panic("indiracct_ufs2: missing indir"); 1364 } 1365 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1366 return (error); 1367 if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1368 panic("indiracct_ufs2: botched params"); 1369 /* 1370 * We have to expand bread here since it will deadlock looking 1371 * up the block number for any blocks that are not in the cache. 1372 */ 1373 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 1374 bp->b_blkno = fsbtodb(fs, blkno); 1375 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1376 (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 1377 brelse(bp); 1378 return (error); 1379 } 1380 /* 1381 * Account for the block pointers in this indirect block. 1382 */ 1383 last = howmany(remblks, blksperindir); 1384 if (last > NINDIR(fs)) 1385 last = NINDIR(fs); 1386 MALLOC(bap, ufs2_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); 1387 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1388 bqrelse(bp); 1389 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1390 level == 0 ? rlbn : -1, expungetype); 1391 if (error || level == 0) 1392 goto out; 1393 /* 1394 * Account for the block pointers in each of the indirect blocks 1395 * in the levels below us. 1396 */ 1397 subblksperindir = blksperindir / NINDIR(fs); 1398 for (lbn++, level--, i = 0; i < last; i++) { 1399 error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn, 1400 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1401 if (error) 1402 goto out; 1403 rlbn += blksperindir; 1404 lbn -= blksperindir; 1405 remblks -= blksperindir; 1406 } 1407 out: 1408 FREE(bap, M_DEVBUF); 1409 return (error); 1410 } 1411 1412 /* 1413 * Do both snap accounting and map accounting. 1414 */ 1415 static int 1416 fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1417 struct vnode *vp; 1418 ufs2_daddr_t *oldblkp, *lastblkp; 1419 struct fs *fs; 1420 ufs_lbn_t lblkno; 1421 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 1422 { 1423 int error; 1424 1425 if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1426 return (error); 1427 return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1428 } 1429 1430 /* 1431 * Identify a set of blocks allocated in a snapshot inode. 1432 */ 1433 static int 1434 snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1435 struct vnode *vp; 1436 ufs2_daddr_t *oldblkp, *lastblkp; 1437 struct fs *fs; 1438 ufs_lbn_t lblkno; 1439 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 1440 { 1441 struct inode *ip = VTOI(vp); 1442 ufs2_daddr_t blkno, *blkp; 1443 ufs_lbn_t lbn; 1444 struct buf *ibp; 1445 int error; 1446 1447 for ( ; oldblkp < lastblkp; oldblkp++) { 1448 blkno = *oldblkp; 1449 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 1450 continue; 1451 lbn = fragstoblks(fs, blkno); 1452 if (lbn < NDADDR) { 1453 blkp = &ip->i_din2->di_db[lbn]; 1454 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1455 } else { 1456 error = ffs_balloc_ufs2(vp, lblktosize(fs, (off_t)lbn), 1457 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1458 if (error) 1459 return (error); 1460 blkp = &((ufs2_daddr_t *)(ibp->b_data)) 1461 [(lbn - NDADDR) % NINDIR(fs)]; 1462 } 1463 /* 1464 * If we are expunging a snapshot vnode and we 1465 * find a block marked BLK_NOCOPY, then it is 1466 * one that has been allocated to this snapshot after 1467 * we took our current snapshot and can be ignored. 1468 */ 1469 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 1470 if (lbn >= NDADDR) 1471 brelse(ibp); 1472 } else { 1473 if (*blkp != 0) 1474 panic("snapacct_ufs2: bad block"); 1475 *blkp = expungetype; 1476 if (lbn >= NDADDR) 1477 bdwrite(ibp); 1478 } 1479 } 1480 return (0); 1481 } 1482 1483 /* 1484 * Account for a set of blocks allocated in a snapshot inode. 1485 */ 1486 static int 1487 mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1488 struct vnode *vp; 1489 ufs2_daddr_t *oldblkp, *lastblkp; 1490 struct fs *fs; 1491 ufs_lbn_t lblkno; 1492 int expungetype; 1493 { 1494 ufs2_daddr_t blkno; 1495 struct inode *ip; 1496 ino_t inum; 1497 int acctit; 1498 1499 ip = VTOI(vp); 1500 inum = ip->i_number; 1501 if (lblkno == -1) 1502 acctit = 0; 1503 else 1504 acctit = 1; 1505 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1506 blkno = *oldblkp; 1507 if (blkno == 0 || blkno == BLK_NOCOPY) 1508 continue; 1509 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1510 *ip->i_snapblklist++ = lblkno; 1511 if (blkno == BLK_SNAP) 1512 blkno = blkstofrags(fs, lblkno); 1513 ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum); 1514 } 1515 return (0); 1516 } 1517 1518 /* 1519 * Decrement extra reference on snapshot when last name is removed. 1520 * It will not be freed until the last open reference goes away. 1521 */ 1522 void 1523 ffs_snapgone(ip) 1524 struct inode *ip; 1525 { 1526 struct inode *xp; 1527 struct fs *fs; 1528 int snaploc; 1529 struct snapdata *sn; 1530 struct ufsmount *ump; 1531 1532 /* 1533 * Find snapshot in incore list. 1534 */ 1535 xp = NULL; 1536 sn = ip->i_devvp->v_rdev->si_snapdata; 1537 if (sn != NULL) 1538 TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) 1539 if (xp == ip) 1540 break; 1541 if (xp != NULL) 1542 vrele(ITOV(ip)); 1543 else if (snapdebug) 1544 printf("ffs_snapgone: lost snapshot vnode %d\n", 1545 ip->i_number); 1546 /* 1547 * Delete snapshot inode from superblock. Keep list dense. 1548 */ 1549 fs = ip->i_fs; 1550 ump = ip->i_ump; 1551 UFS_LOCK(ump); 1552 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 1553 if (fs->fs_snapinum[snaploc] == ip->i_number) 1554 break; 1555 if (snaploc < FSMAXSNAP) { 1556 for (snaploc++; snaploc < FSMAXSNAP; snaploc++) { 1557 if (fs->fs_snapinum[snaploc] == 0) 1558 break; 1559 fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc]; 1560 } 1561 fs->fs_snapinum[snaploc - 1] = 0; 1562 } 1563 UFS_UNLOCK(ump); 1564 } 1565 1566 /* 1567 * Prepare a snapshot file for being removed. 1568 */ 1569 void 1570 ffs_snapremove(vp) 1571 struct vnode *vp; 1572 { 1573 struct inode *ip; 1574 struct vnode *devvp; 1575 struct buf *ibp; 1576 struct fs *fs; 1577 struct thread *td = curthread; 1578 ufs2_daddr_t numblks, blkno, dblk; 1579 int error, loc, last; 1580 struct snapdata *sn; 1581 1582 ip = VTOI(vp); 1583 fs = ip->i_fs; 1584 devvp = ip->i_devvp; 1585 /* 1586 * If active, delete from incore list (this snapshot may 1587 * already have been in the process of being deleted, so 1588 * would not have been active). 1589 * 1590 * Clear copy-on-write flag if last snapshot. 1591 */ 1592 VI_LOCK(devvp); 1593 if (ip->i_nextsnap.tqe_prev != 0) { 1594 sn = devvp->v_rdev->si_snapdata; 1595 TAILQ_REMOVE(&sn->sn_head, ip, i_nextsnap); 1596 ip->i_nextsnap.tqe_prev = 0; 1597 VI_UNLOCK(devvp); 1598 lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL, td); 1599 VI_LOCK(vp); 1600 KASSERT(vp->v_vnlock == &sn->sn_lock, 1601 ("ffs_snapremove: lost lock mutation")); 1602 vp->v_vnlock = &vp->v_lock; 1603 VI_UNLOCK(vp); 1604 VI_LOCK(devvp); 1605 lockmgr(&sn->sn_lock, LK_RELEASE, NULL, td); 1606 try_free_snapdata(devvp, td); 1607 } else 1608 VI_UNLOCK(devvp); 1609 /* 1610 * Clear all BLK_NOCOPY fields. Pass any block claims to other 1611 * snapshots that want them (see ffs_snapblkfree below). 1612 */ 1613 for (blkno = 1; blkno < NDADDR; blkno++) { 1614 dblk = DIP(ip, i_db[blkno]); 1615 if (dblk == 0) 1616 continue; 1617 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1618 DIP_SET(ip, i_db[blkno], 0); 1619 else if ((dblk == blkstofrags(fs, blkno) && 1620 ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, 1621 ip->i_number))) { 1622 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - 1623 btodb(fs->fs_bsize)); 1624 DIP_SET(ip, i_db[blkno], 0); 1625 } 1626 } 1627 numblks = howmany(ip->i_size, fs->fs_bsize); 1628 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 1629 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 1630 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1631 if (error) 1632 continue; 1633 if (fs->fs_size - blkno > NINDIR(fs)) 1634 last = NINDIR(fs); 1635 else 1636 last = fs->fs_size - blkno; 1637 for (loc = 0; loc < last; loc++) { 1638 if (ip->i_ump->um_fstype == UFS1) { 1639 dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc]; 1640 if (dblk == 0) 1641 continue; 1642 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1643 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1644 else if ((dblk == blkstofrags(fs, blkno) && 1645 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1646 fs->fs_bsize, ip->i_number))) { 1647 ip->i_din1->di_blocks -= 1648 btodb(fs->fs_bsize); 1649 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1650 } 1651 continue; 1652 } 1653 dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc]; 1654 if (dblk == 0) 1655 continue; 1656 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1657 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1658 else if ((dblk == blkstofrags(fs, blkno) && 1659 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1660 fs->fs_bsize, ip->i_number))) { 1661 ip->i_din2->di_blocks -= btodb(fs->fs_bsize); 1662 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1663 } 1664 } 1665 bawrite(ibp); 1666 } 1667 /* 1668 * Clear snapshot flag and drop reference. 1669 */ 1670 ip->i_flags &= ~SF_SNAPSHOT; 1671 DIP_SET(ip, i_flags, ip->i_flags); 1672 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1673 #ifdef QUOTA 1674 /* 1675 * Reenable disk quotas for ex-snapshot file. 1676 */ 1677 if (!getinoquota(ip)) 1678 (void) chkdq(ip, DIP(ip, i_blocks), KERNCRED, FORCE); 1679 #endif 1680 } 1681 1682 /* 1683 * Notification that a block is being freed. Return zero if the free 1684 * should be allowed to proceed. Return non-zero if the snapshot file 1685 * wants to claim the block. The block will be claimed if it is an 1686 * uncopied part of one of the snapshots. It will be freed if it is 1687 * either a BLK_NOCOPY or has already been copied in all of the snapshots. 1688 * If a fragment is being freed, then all snapshots that care about 1689 * it must make a copy since a snapshot file can only claim full sized 1690 * blocks. Note that if more than one snapshot file maps the block, 1691 * we can pick one at random to claim it. Since none of the snapshots 1692 * can change, we are assurred that they will all see the same unmodified 1693 * image. When deleting a snapshot file (see ffs_snapremove above), we 1694 * must push any of these claimed blocks to one of the other snapshots 1695 * that maps it. These claimed blocks are easily identified as they will 1696 * have a block number equal to their logical block number within the 1697 * snapshot. A copied block can never have this property because they 1698 * must always have been allocated from a BLK_NOCOPY location. 1699 */ 1700 int 1701 ffs_snapblkfree(fs, devvp, bno, size, inum) 1702 struct fs *fs; 1703 struct vnode *devvp; 1704 ufs2_daddr_t bno; 1705 long size; 1706 ino_t inum; 1707 { 1708 struct buf *ibp, *cbp, *savedcbp = 0; 1709 struct thread *td = curthread; 1710 struct inode *ip; 1711 struct vnode *vp = NULL; 1712 ufs_lbn_t lbn; 1713 ufs2_daddr_t blkno; 1714 int indiroff = 0, error = 0, claimedblk = 0; 1715 struct snapdata *sn; 1716 1717 lbn = fragstoblks(fs, bno); 1718 retry: 1719 VI_LOCK(devvp); 1720 sn = devvp->v_rdev->si_snapdata; 1721 if (sn == NULL) { 1722 VI_UNLOCK(devvp); 1723 return (0); 1724 } 1725 if (lockmgr(&sn->sn_lock, 1726 LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 1727 VI_MTX(devvp), td) != 0) 1728 goto retry; 1729 TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 1730 vp = ITOV(ip); 1731 /* 1732 * Lookup block being written. 1733 */ 1734 if (lbn < NDADDR) { 1735 blkno = DIP(ip, i_db[lbn]); 1736 } else { 1737 td->td_pflags |= TDP_COWINPROGRESS; 1738 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1739 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1740 td->td_pflags &= ~TDP_COWINPROGRESS; 1741 if (error) 1742 break; 1743 indiroff = (lbn - NDADDR) % NINDIR(fs); 1744 if (ip->i_ump->um_fstype == UFS1) 1745 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 1746 else 1747 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 1748 } 1749 /* 1750 * Check to see if block needs to be copied. 1751 */ 1752 if (blkno == 0) { 1753 /* 1754 * A block that we map is being freed. If it has not 1755 * been claimed yet, we will claim or copy it (below). 1756 */ 1757 claimedblk = 1; 1758 } else if (blkno == BLK_SNAP) { 1759 /* 1760 * No previous snapshot claimed the block, 1761 * so it will be freed and become a BLK_NOCOPY 1762 * (don't care) for us. 1763 */ 1764 if (claimedblk) 1765 panic("snapblkfree: inconsistent block type"); 1766 if (lbn < NDADDR) { 1767 DIP_SET(ip, i_db[lbn], BLK_NOCOPY); 1768 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1769 } else if (ip->i_ump->um_fstype == UFS1) { 1770 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 1771 BLK_NOCOPY; 1772 bdwrite(ibp); 1773 } else { 1774 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 1775 BLK_NOCOPY; 1776 bdwrite(ibp); 1777 } 1778 continue; 1779 } else /* BLK_NOCOPY or default */ { 1780 /* 1781 * If the snapshot has already copied the block 1782 * (default), or does not care about the block, 1783 * it is not needed. 1784 */ 1785 if (lbn >= NDADDR) 1786 bqrelse(ibp); 1787 continue; 1788 } 1789 /* 1790 * If this is a full size block, we will just grab it 1791 * and assign it to the snapshot inode. Otherwise we 1792 * will proceed to copy it. See explanation for this 1793 * routine as to why only a single snapshot needs to 1794 * claim this block. 1795 */ 1796 if (size == fs->fs_bsize) { 1797 #ifdef DEBUG 1798 if (snapdebug) 1799 printf("%s %d lbn %jd from inum %d\n", 1800 "Grabonremove: snapino", ip->i_number, 1801 (intmax_t)lbn, inum); 1802 #endif 1803 if (lbn < NDADDR) { 1804 DIP_SET(ip, i_db[lbn], bno); 1805 } else if (ip->i_ump->um_fstype == UFS1) { 1806 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno; 1807 bdwrite(ibp); 1808 } else { 1809 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno; 1810 bdwrite(ibp); 1811 } 1812 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size)); 1813 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1814 lockmgr(vp->v_vnlock, LK_RELEASE, NULL, td); 1815 return (1); 1816 } 1817 if (lbn >= NDADDR) 1818 bqrelse(ibp); 1819 /* 1820 * Allocate the block into which to do the copy. Note that this 1821 * allocation will never require any additional allocations for 1822 * the snapshot inode. 1823 */ 1824 td->td_pflags |= TDP_COWINPROGRESS; 1825 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1826 fs->fs_bsize, KERNCRED, 0, &cbp); 1827 td->td_pflags &= ~TDP_COWINPROGRESS; 1828 if (error) 1829 break; 1830 #ifdef DEBUG 1831 if (snapdebug) 1832 printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n", 1833 "Copyonremove: snapino ", ip->i_number, 1834 (intmax_t)lbn, "for inum", inum, size, 1835 (intmax_t)cbp->b_blkno); 1836 #endif 1837 /* 1838 * If we have already read the old block contents, then 1839 * simply copy them to the new block. Note that we need 1840 * to synchronously write snapshots that have not been 1841 * unlinked, and hence will be visible after a crash, 1842 * to ensure their integrity. 1843 */ 1844 if (savedcbp != 0) { 1845 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 1846 bawrite(cbp); 1847 if (dopersistence && ip->i_effnlink > 0) 1848 (void) ffs_syncvnode(vp, MNT_WAIT); 1849 continue; 1850 } 1851 /* 1852 * Otherwise, read the old block contents into the buffer. 1853 */ 1854 if ((error = readblock(vp, cbp, lbn)) != 0) { 1855 bzero(cbp->b_data, fs->fs_bsize); 1856 bawrite(cbp); 1857 if (dopersistence && ip->i_effnlink > 0) 1858 (void) ffs_syncvnode(vp, MNT_WAIT); 1859 break; 1860 } 1861 savedcbp = cbp; 1862 } 1863 /* 1864 * Note that we need to synchronously write snapshots that 1865 * have not been unlinked, and hence will be visible after 1866 * a crash, to ensure their integrity. 1867 */ 1868 if (savedcbp) { 1869 vp = savedcbp->b_vp; 1870 bawrite(savedcbp); 1871 if (dopersistence && VTOI(vp)->i_effnlink > 0) 1872 (void) ffs_syncvnode(vp, MNT_WAIT); 1873 } 1874 /* 1875 * If we have been unable to allocate a block in which to do 1876 * the copy, then return non-zero so that the fragment will 1877 * not be freed. Although space will be lost, the snapshot 1878 * will stay consistent. 1879 */ 1880 lockmgr(vp->v_vnlock, LK_RELEASE, NULL, td); 1881 return (error); 1882 } 1883 1884 /* 1885 * Associate snapshot files when mounting. 1886 */ 1887 void 1888 ffs_snapshot_mount(mp) 1889 struct mount *mp; 1890 { 1891 struct ufsmount *ump = VFSTOUFS(mp); 1892 struct vnode *devvp = ump->um_devvp; 1893 struct fs *fs = ump->um_fs; 1894 struct thread *td = curthread; 1895 struct snapdata *sn; 1896 struct vnode *vp; 1897 struct vnode *lastvp; 1898 struct inode *ip; 1899 struct uio auio; 1900 struct iovec aiov; 1901 void *snapblklist; 1902 char *reason; 1903 daddr_t snaplistsize; 1904 int error, snaploc, loc; 1905 1906 /* 1907 * XXX The following needs to be set before ffs_truncate or 1908 * VOP_READ can be called. 1909 */ 1910 mp->mnt_stat.f_iosize = fs->fs_bsize; 1911 /* 1912 * Process each snapshot listed in the superblock. 1913 */ 1914 vp = NULL; 1915 lastvp = NULL; 1916 sn = devvp->v_rdev->si_snapdata; 1917 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { 1918 if (fs->fs_snapinum[snaploc] == 0) 1919 break; 1920 if ((error = ffs_vget(mp, fs->fs_snapinum[snaploc], 1921 LK_EXCLUSIVE, &vp)) != 0){ 1922 printf("ffs_snapshot_mount: vget failed %d\n", error); 1923 continue; 1924 } 1925 ip = VTOI(vp); 1926 if ((ip->i_flags & SF_SNAPSHOT) == 0 || ip->i_size == 1927 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) { 1928 if ((ip->i_flags & SF_SNAPSHOT) == 0) { 1929 reason = "non-snapshot"; 1930 } else { 1931 reason = "old format snapshot"; 1932 (void)ffs_truncate(vp, (off_t)0, 0, NOCRED, td); 1933 (void)ffs_syncvnode(vp, MNT_WAIT); 1934 } 1935 printf("ffs_snapshot_mount: %s inode %d\n", 1936 reason, fs->fs_snapinum[snaploc]); 1937 vput(vp); 1938 vp = NULL; 1939 for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { 1940 if (fs->fs_snapinum[loc] == 0) 1941 break; 1942 fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc]; 1943 } 1944 fs->fs_snapinum[loc - 1] = 0; 1945 snaploc--; 1946 continue; 1947 } 1948 /* 1949 * If there already exist snapshots on this filesystem, grab a 1950 * reference to their shared lock. If this is the first snapshot 1951 * on this filesystem, we need to allocate a lock for the 1952 * snapshots to share. In either case, acquire the snapshot 1953 * lock and give up our original private lock. 1954 */ 1955 VI_LOCK(devvp); 1956 if (sn != NULL) { 1957 1958 VI_UNLOCK(devvp); 1959 VI_LOCK(vp); 1960 vp->v_vnlock = &sn->sn_lock; 1961 } else { 1962 VI_UNLOCK(devvp); 1963 sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO); 1964 TAILQ_INIT(&sn->sn_head); 1965 lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT, 1966 LK_CANRECURSE | LK_NOSHARE); 1967 VI_LOCK(vp); 1968 vp->v_vnlock = &sn->sn_lock; 1969 devvp->v_rdev->si_snapdata = sn; 1970 } 1971 lockmgr(vp->v_vnlock, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, 1972 VI_MTX(vp), td); 1973 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 1974 /* 1975 * Link it onto the active snapshot list. 1976 */ 1977 VI_LOCK(devvp); 1978 if (ip->i_nextsnap.tqe_prev != 0) 1979 panic("ffs_snapshot_mount: %d already on list", 1980 ip->i_number); 1981 else 1982 TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 1983 vp->v_vflag |= VV_SYSTEM; 1984 VI_UNLOCK(devvp); 1985 VOP_UNLOCK(vp, 0, td); 1986 lastvp = vp; 1987 } 1988 vp = lastvp; 1989 /* 1990 * No usable snapshots found. 1991 */ 1992 if (vp == NULL) 1993 return; 1994 /* 1995 * Allocate the space for the block hints list. We always want to 1996 * use the list from the newest snapshot. 1997 */ 1998 auio.uio_iov = &aiov; 1999 auio.uio_iovcnt = 1; 2000 aiov.iov_base = (void *)&snaplistsize; 2001 aiov.iov_len = sizeof(snaplistsize); 2002 auio.uio_resid = aiov.iov_len; 2003 auio.uio_offset = 2004 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)); 2005 auio.uio_segflg = UIO_SYSSPACE; 2006 auio.uio_rw = UIO_READ; 2007 auio.uio_td = td; 2008 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 2009 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 2010 printf("ffs_snapshot_mount: read_1 failed %d\n", error); 2011 VOP_UNLOCK(vp, 0, td); 2012 return; 2013 } 2014 MALLOC(snapblklist, void *, snaplistsize * sizeof(daddr_t), 2015 M_UFSMNT, M_WAITOK); 2016 auio.uio_iovcnt = 1; 2017 aiov.iov_base = snapblklist; 2018 aiov.iov_len = snaplistsize * sizeof (daddr_t); 2019 auio.uio_resid = aiov.iov_len; 2020 auio.uio_offset -= sizeof(snaplistsize); 2021 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 2022 printf("ffs_snapshot_mount: read_2 failed %d\n", error); 2023 VOP_UNLOCK(vp, 0, td); 2024 FREE(snapblklist, M_UFSMNT); 2025 return; 2026 } 2027 VOP_UNLOCK(vp, 0, td); 2028 VI_LOCK(devvp); 2029 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount"); 2030 sn->sn_listsize = snaplistsize; 2031 sn->sn_blklist = (daddr_t *)snapblklist; 2032 devvp->v_vflag |= VV_COPYONWRITE; 2033 VI_UNLOCK(devvp); 2034 } 2035 2036 /* 2037 * Disassociate snapshot files when unmounting. 2038 */ 2039 void 2040 ffs_snapshot_unmount(mp) 2041 struct mount *mp; 2042 { 2043 struct vnode *devvp = VFSTOUFS(mp)->um_devvp; 2044 struct snapdata *sn; 2045 struct inode *xp; 2046 struct vnode *vp; 2047 struct thread *td = curthread; 2048 2049 VI_LOCK(devvp); 2050 sn = devvp->v_rdev->si_snapdata; 2051 while (sn != NULL && (xp = TAILQ_FIRST(&sn->sn_head)) != NULL) { 2052 vp = ITOV(xp); 2053 TAILQ_REMOVE(&sn->sn_head, xp, i_nextsnap); 2054 xp->i_nextsnap.tqe_prev = 0; 2055 lockmgr(&sn->sn_lock, 2056 LK_INTERLOCK | LK_EXCLUSIVE, 2057 VI_MTX(devvp), 2058 td); 2059 VI_LOCK(vp); 2060 lockmgr(&vp->v_lock, 2061 LK_INTERLOCK | LK_EXCLUSIVE, 2062 VI_MTX(vp), td); 2063 VI_LOCK(vp); 2064 KASSERT(vp->v_vnlock == &sn->sn_lock, 2065 ("ffs_snapshot_unmount: lost lock mutation")); 2066 vp->v_vnlock = &vp->v_lock; 2067 VI_UNLOCK(vp); 2068 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 2069 lockmgr(&sn->sn_lock, LK_RELEASE, NULL, td); 2070 if (xp->i_effnlink > 0) 2071 vrele(vp); 2072 VI_LOCK(devvp); 2073 sn = devvp->v_rdev->si_snapdata; 2074 } 2075 try_free_snapdata(devvp, td); 2076 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount"); 2077 } 2078 2079 /* 2080 * Check for need to copy block that is about to be written, 2081 * copying the block if necessary. 2082 */ 2083 int 2084 ffs_copyonwrite(devvp, bp) 2085 struct vnode *devvp; 2086 struct buf *bp; 2087 { 2088 struct snapdata *sn; 2089 struct buf *ibp, *cbp, *savedcbp = 0; 2090 struct thread *td = curthread; 2091 struct fs *fs; 2092 struct inode *ip; 2093 struct vnode *vp = 0; 2094 ufs2_daddr_t lbn, blkno, *snapblklist; 2095 int lower, upper, mid, indiroff, error = 0; 2096 int launched_async_io, prev_norunningbuf; 2097 long saved_runningbufspace; 2098 2099 if (devvp != bp->b_vp && (VTOI(bp->b_vp)->i_flags & SF_SNAPSHOT) != 0) 2100 return (0); /* Update on a snapshot file */ 2101 if (td->td_pflags & TDP_COWINPROGRESS) 2102 panic("ffs_copyonwrite: recursive call"); 2103 /* 2104 * First check to see if it is in the preallocated list. 2105 * By doing this check we avoid several potential deadlocks. 2106 */ 2107 VI_LOCK(devvp); 2108 sn = devvp->v_rdev->si_snapdata; 2109 if (sn == NULL || 2110 TAILQ_FIRST(&sn->sn_head) == NULL) { 2111 VI_UNLOCK(devvp); 2112 return (0); /* No snapshot */ 2113 } 2114 ip = TAILQ_FIRST(&sn->sn_head); 2115 fs = ip->i_fs; 2116 lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 2117 snapblklist = sn->sn_blklist; 2118 upper = sn->sn_listsize - 1; 2119 lower = 1; 2120 while (lower <= upper) { 2121 mid = (lower + upper) / 2; 2122 if (snapblklist[mid] == lbn) 2123 break; 2124 if (snapblklist[mid] < lbn) 2125 lower = mid + 1; 2126 else 2127 upper = mid - 1; 2128 } 2129 if (lower <= upper) { 2130 VI_UNLOCK(devvp); 2131 return (0); 2132 } 2133 launched_async_io = 0; 2134 prev_norunningbuf = td->td_pflags & TDP_NORUNNINGBUF; 2135 /* 2136 * Since I/O on bp isn't yet in progress and it may be blocked 2137 * for a long time waiting on snaplk, back it out of 2138 * runningbufspace, possibly waking other threads waiting for space. 2139 */ 2140 saved_runningbufspace = bp->b_runningbufspace; 2141 if (saved_runningbufspace != 0) 2142 runningbufwakeup(bp); 2143 /* 2144 * Not in the precomputed list, so check the snapshots. 2145 */ 2146 while (lockmgr(&sn->sn_lock, 2147 LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 2148 VI_MTX(devvp), td) != 0) { 2149 VI_LOCK(devvp); 2150 sn = devvp->v_rdev->si_snapdata; 2151 if (sn == NULL || 2152 TAILQ_FIRST(&sn->sn_head) == NULL) { 2153 VI_UNLOCK(devvp); 2154 if (saved_runningbufspace != 0) { 2155 bp->b_runningbufspace = saved_runningbufspace; 2156 atomic_add_int(&runningbufspace, 2157 bp->b_runningbufspace); 2158 } 2159 return (0); /* Snapshot gone */ 2160 } 2161 } 2162 TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 2163 vp = ITOV(ip); 2164 /* 2165 * We ensure that everything of our own that needs to be 2166 * copied will be done at the time that ffs_snapshot is 2167 * called. Thus we can skip the check here which can 2168 * deadlock in doing the lookup in UFS_BALLOC. 2169 */ 2170 if (bp->b_vp == vp) 2171 continue; 2172 /* 2173 * Check to see if block needs to be copied. We do not have 2174 * to hold the snapshot lock while doing this lookup as it 2175 * will never require any additional allocations for the 2176 * snapshot inode. 2177 */ 2178 if (lbn < NDADDR) { 2179 blkno = DIP(ip, i_db[lbn]); 2180 } else { 2181 td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 2182 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2183 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 2184 td->td_pflags &= ~TDP_COWINPROGRESS; 2185 if (error) 2186 break; 2187 indiroff = (lbn - NDADDR) % NINDIR(fs); 2188 if (ip->i_ump->um_fstype == UFS1) 2189 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 2190 else 2191 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 2192 bqrelse(ibp); 2193 } 2194 #ifdef DIAGNOSTIC 2195 if (blkno == BLK_SNAP && bp->b_lblkno >= 0) 2196 panic("ffs_copyonwrite: bad copy block"); 2197 #endif 2198 if (blkno != 0) 2199 continue; 2200 /* 2201 * Allocate the block into which to do the copy. Since 2202 * multiple processes may all try to copy the same block, 2203 * we have to recheck our need to do a copy if we sleep 2204 * waiting for the lock. 2205 * 2206 * Because all snapshots on a filesystem share a single 2207 * lock, we ensure that we will never be in competition 2208 * with another process to allocate a block. 2209 */ 2210 td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 2211 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2212 fs->fs_bsize, KERNCRED, 0, &cbp); 2213 td->td_pflags &= ~TDP_COWINPROGRESS; 2214 if (error) 2215 break; 2216 #ifdef DEBUG 2217 if (snapdebug) { 2218 printf("Copyonwrite: snapino %d lbn %jd for ", 2219 ip->i_number, (intmax_t)lbn); 2220 if (bp->b_vp == devvp) 2221 printf("fs metadata"); 2222 else 2223 printf("inum %d", VTOI(bp->b_vp)->i_number); 2224 printf(" lblkno %jd to blkno %jd\n", 2225 (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno); 2226 } 2227 #endif 2228 /* 2229 * If we have already read the old block contents, then 2230 * simply copy them to the new block. Note that we need 2231 * to synchronously write snapshots that have not been 2232 * unlinked, and hence will be visible after a crash, 2233 * to ensure their integrity. 2234 */ 2235 if (savedcbp != 0) { 2236 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 2237 bawrite(cbp); 2238 if (dopersistence && ip->i_effnlink > 0) 2239 (void) ffs_syncvnode(vp, MNT_WAIT); 2240 else 2241 launched_async_io = 1; 2242 continue; 2243 } 2244 /* 2245 * Otherwise, read the old block contents into the buffer. 2246 */ 2247 if ((error = readblock(vp, cbp, lbn)) != 0) { 2248 bzero(cbp->b_data, fs->fs_bsize); 2249 bawrite(cbp); 2250 if (dopersistence && ip->i_effnlink > 0) 2251 (void) ffs_syncvnode(vp, MNT_WAIT); 2252 else 2253 launched_async_io = 1; 2254 break; 2255 } 2256 savedcbp = cbp; 2257 } 2258 /* 2259 * Note that we need to synchronously write snapshots that 2260 * have not been unlinked, and hence will be visible after 2261 * a crash, to ensure their integrity. 2262 */ 2263 if (savedcbp) { 2264 vp = savedcbp->b_vp; 2265 bawrite(savedcbp); 2266 if (dopersistence && VTOI(vp)->i_effnlink > 0) 2267 (void) ffs_syncvnode(vp, MNT_WAIT); 2268 else 2269 launched_async_io = 1; 2270 } 2271 lockmgr(vp->v_vnlock, LK_RELEASE, NULL, td); 2272 td->td_pflags = (td->td_pflags & ~TDP_NORUNNINGBUF) | 2273 prev_norunningbuf; 2274 if (launched_async_io && (td->td_pflags & TDP_NORUNNINGBUF) == 0) 2275 waitrunningbufspace(); 2276 /* 2277 * I/O on bp will now be started, so count it in runningbufspace. 2278 */ 2279 if (saved_runningbufspace != 0) { 2280 bp->b_runningbufspace = saved_runningbufspace; 2281 atomic_add_int(&runningbufspace, bp->b_runningbufspace); 2282 } 2283 return (error); 2284 } 2285 2286 /* 2287 * Read the specified block into the given buffer. 2288 * Much of this boiler-plate comes from bwrite(). 2289 */ 2290 static int 2291 readblock(vp, bp, lbn) 2292 struct vnode *vp; 2293 struct buf *bp; 2294 ufs2_daddr_t lbn; 2295 { 2296 struct inode *ip = VTOI(vp); 2297 struct bio *bip; 2298 2299 bip = g_alloc_bio(); 2300 bip->bio_cmd = BIO_READ; 2301 bip->bio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn))); 2302 bip->bio_data = bp->b_data; 2303 bip->bio_length = bp->b_bcount; 2304 bip->bio_done = NULL; 2305 2306 g_io_request(bip, ip->i_devvp->v_bufobj.bo_private); 2307 bp->b_error = biowait(bip, "snaprdb"); 2308 g_destroy_bio(bip); 2309 return (bp->b_error); 2310 } 2311 2312 /* 2313 * Process file deletes that were deferred by ufs_inactive() due to 2314 * the file system being suspended. Transfer IN_LAZYACCESS into 2315 * IN_MODIFIED for vnodes that were accessed during suspension. 2316 */ 2317 static void 2318 process_deferred_inactive(struct mount *mp) 2319 { 2320 struct vnode *vp, *mvp; 2321 struct inode *ip; 2322 struct thread *td; 2323 int error; 2324 2325 td = curthread; 2326 (void) vn_start_secondary_write(NULL, &mp, V_WAIT); 2327 MNT_ILOCK(mp); 2328 loop: 2329 MNT_VNODE_FOREACH(vp, mp, mvp) { 2330 VI_LOCK(vp); 2331 /* 2332 * IN_LAZYACCESS is checked here without holding any 2333 * vnode lock, but this flag is set only while holding 2334 * vnode interlock. 2335 */ 2336 if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED) != 0 || 2337 ((VTOI(vp)->i_flag & IN_LAZYACCESS) == 0 && 2338 ((vp->v_iflag & VI_OWEINACT) == 0 || 2339 vp->v_usecount > 0))) { 2340 VI_UNLOCK(vp); 2341 continue; 2342 } 2343 MNT_IUNLOCK(mp); 2344 vholdl(vp); 2345 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); 2346 if (error != 0) { 2347 vdrop(vp); 2348 MNT_ILOCK(mp); 2349 if (error == ENOENT) 2350 continue; /* vnode recycled */ 2351 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 2352 goto loop; 2353 } 2354 ip = VTOI(vp); 2355 if ((ip->i_flag & IN_LAZYACCESS) != 0) { 2356 ip->i_flag &= ~IN_LAZYACCESS; 2357 ip->i_flag |= IN_MODIFIED; 2358 } 2359 VI_LOCK(vp); 2360 if ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0) { 2361 VI_UNLOCK(vp); 2362 VOP_UNLOCK(vp, 0, td); 2363 vdrop(vp); 2364 MNT_ILOCK(mp); 2365 continue; 2366 } 2367 2368 VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp, 2369 ("process_deferred_inactive: " 2370 "recursed on VI_DOINGINACT")); 2371 vp->v_iflag |= VI_DOINGINACT; 2372 vp->v_iflag &= ~VI_OWEINACT; 2373 VI_UNLOCK(vp); 2374 (void) VOP_INACTIVE(vp, td); 2375 VI_LOCK(vp); 2376 VNASSERT(vp->v_iflag & VI_DOINGINACT, vp, 2377 ("process_deferred_inactive: lost VI_DOINGINACT")); 2378 VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, 2379 ("process_deferred_inactive: got VI_OWEINACT")); 2380 vp->v_iflag &= ~VI_DOINGINACT; 2381 VI_UNLOCK(vp); 2382 VOP_UNLOCK(vp, 0, td); 2383 vdrop(vp); 2384 MNT_ILOCK(mp); 2385 } 2386 MNT_IUNLOCK(mp); 2387 vn_finished_secondary_write(mp); 2388 } 2389 2390 /* Try to free snapdata associated with devvp */ 2391 static void 2392 try_free_snapdata(struct vnode *devvp, 2393 struct thread *td) 2394 { 2395 struct snapdata *sn; 2396 ufs2_daddr_t *snapblklist; 2397 2398 sn = devvp->v_rdev->si_snapdata; 2399 2400 if (sn == NULL || TAILQ_FIRST(&sn->sn_head) != NULL || 2401 (devvp->v_vflag & VV_COPYONWRITE) == 0) { 2402 VI_UNLOCK(devvp); 2403 return; 2404 } 2405 2406 devvp->v_rdev->si_snapdata = NULL; 2407 devvp->v_vflag &= ~VV_COPYONWRITE; 2408 snapblklist = sn->sn_blklist; 2409 sn->sn_blklist = NULL; 2410 sn->sn_listsize = 0; 2411 lockmgr(&sn->sn_lock, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp), td); 2412 lockmgr(&sn->sn_lock, LK_RELEASE, NULL, td); 2413 lockdestroy(&sn->sn_lock); 2414 free(sn, M_UFSMNT); 2415 if (snapblklist != NULL) 2416 FREE(snapblklist, M_UFSMNT); 2417 } 2418 #endif 2419