1 /*- 2 * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. 3 * 4 * Further information about snapshots can be obtained from: 5 * 6 * Marshall Kirk McKusick http://www.mckusick.com/softdep/ 7 * 1614 Oxford Street mckusick@mckusick.com 8 * Berkeley, CA 94709-1608 +1-510-843-9542 9 * USA 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY 22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR 25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_quota.h" 40 41 #include <sys/param.h> 42 #include <sys/kernel.h> 43 #include <sys/systm.h> 44 #include <sys/conf.h> 45 #include <sys/bio.h> 46 #include <sys/buf.h> 47 #include <sys/fcntl.h> 48 #include <sys/proc.h> 49 #include <sys/namei.h> 50 #include <sys/sched.h> 51 #include <sys/stat.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/resource.h> 55 #include <sys/resourcevar.h> 56 #include <sys/vnode.h> 57 58 #include <geom/geom.h> 59 60 #include <ufs/ufs/extattr.h> 61 #include <ufs/ufs/quota.h> 62 #include <ufs/ufs/ufsmount.h> 63 #include <ufs/ufs/inode.h> 64 #include <ufs/ufs/ufs_extern.h> 65 66 #include <ufs/ffs/fs.h> 67 #include <ufs/ffs/ffs_extern.h> 68 69 #define KERNCRED thread0.td_ucred 70 #define DEBUG 1 71 72 #include "opt_ffs.h" 73 74 #ifdef NO_FFS_SNAPSHOT 75 int 76 ffs_snapshot(mp, snapfile) 77 struct mount *mp; 78 char *snapfile; 79 { 80 return (EINVAL); 81 } 82 83 int 84 ffs_snapblkfree(fs, devvp, bno, size, inum) 85 struct fs *fs; 86 struct vnode *devvp; 87 ufs2_daddr_t bno; 88 long size; 89 ino_t inum; 90 { 91 return (EINVAL); 92 } 93 94 void 95 ffs_snapremove(vp) 96 struct vnode *vp; 97 { 98 } 99 100 void 101 ffs_snapshot_mount(mp) 102 struct mount *mp; 103 { 104 } 105 106 void 107 ffs_snapshot_unmount(mp) 108 struct mount *mp; 109 { 110 } 111 112 void 113 ffs_snapgone(ip) 114 struct inode *ip; 115 { 116 } 117 118 int 119 ffs_copyonwrite(devvp, bp) 120 struct vnode *devvp; 121 struct buf *bp; 122 { 123 return (EINVAL); 124 } 125 126 #else 127 128 TAILQ_HEAD(snaphead, inode); 129 130 struct snapdata { 131 LIST_ENTRY(snapdata) sn_link; 132 struct snaphead sn_head; 133 daddr_t sn_listsize; 134 daddr_t *sn_blklist; 135 struct lock sn_lock; 136 }; 137 138 LIST_HEAD(, snapdata) snapfree; 139 static struct mtx snapfree_lock; 140 MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF); 141 142 static int cgaccount(int, struct vnode *, struct buf *, int); 143 static int expunge_ufs1(struct vnode *, struct inode *, struct fs *, 144 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 145 ufs_lbn_t, int), int, int); 146 static int indiracct_ufs1(struct vnode *, struct vnode *, int, 147 ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 148 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 149 ufs_lbn_t, int), int); 150 static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 151 struct fs *, ufs_lbn_t, int); 152 static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 153 struct fs *, ufs_lbn_t, int); 154 static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 155 struct fs *, ufs_lbn_t, int); 156 static int expunge_ufs2(struct vnode *, struct inode *, struct fs *, 157 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 158 ufs_lbn_t, int), int, int); 159 static int indiracct_ufs2(struct vnode *, struct vnode *, int, 160 ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 161 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 162 ufs_lbn_t, int), int); 163 static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 164 struct fs *, ufs_lbn_t, int); 165 static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 166 struct fs *, ufs_lbn_t, int); 167 static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 168 struct fs *, ufs_lbn_t, int); 169 static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t); 170 static void try_free_snapdata(struct vnode *devvp); 171 static struct snapdata *ffs_snapdata_acquire(struct vnode *devvp); 172 static int ffs_bp_snapblk(struct vnode *, struct buf *); 173 174 /* 175 * To ensure the consistency of snapshots across crashes, we must 176 * synchronously write out copied blocks before allowing the 177 * originals to be modified. Because of the rather severe speed 178 * penalty that this imposes, the following flag allows this 179 * crash persistence to be disabled. 180 */ 181 int dopersistence = 0; 182 183 #ifdef DEBUG 184 #include <sys/sysctl.h> 185 SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, ""); 186 static int snapdebug = 0; 187 SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, ""); 188 int collectsnapstats = 0; 189 SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats, 190 0, ""); 191 #endif /* DEBUG */ 192 193 /* 194 * Create a snapshot file and initialize it for the filesystem. 195 */ 196 int 197 ffs_snapshot(mp, snapfile) 198 struct mount *mp; 199 char *snapfile; 200 { 201 ufs2_daddr_t numblks, blkno, *blkp, *snapblklist; 202 int error, cg, snaploc; 203 int i, size, len, loc; 204 int flag; 205 struct timespec starttime = {0, 0}, endtime; 206 char saved_nice = 0; 207 long redo = 0, snaplistsize = 0; 208 int32_t *lp; 209 void *space; 210 struct fs *copy_fs = NULL, *fs; 211 struct thread *td = curthread; 212 struct inode *ip, *xp; 213 struct buf *bp, *nbp, *ibp, *sbp = NULL; 214 struct nameidata nd; 215 struct mount *wrtmp; 216 struct vattr vat; 217 struct vnode *vp, *xvp, *mvp, *devvp; 218 struct uio auio; 219 struct iovec aiov; 220 struct snapdata *sn; 221 struct ufsmount *ump; 222 223 ump = VFSTOUFS(mp); 224 fs = ump->um_fs; 225 sn = NULL; 226 MNT_ILOCK(mp); 227 flag = mp->mnt_flag; 228 MNT_IUNLOCK(mp); 229 230 /* 231 * Need to serialize access to snapshot code per filesystem. 232 */ 233 /* 234 * Assign a snapshot slot in the superblock. 235 */ 236 UFS_LOCK(ump); 237 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 238 if (fs->fs_snapinum[snaploc] == 0) 239 break; 240 UFS_UNLOCK(ump); 241 if (snaploc == FSMAXSNAP) 242 return (ENOSPC); 243 /* 244 * Create the snapshot file. 245 */ 246 restart: 247 NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_SYSSPACE, snapfile, td); 248 if ((error = namei(&nd)) != 0) 249 return (error); 250 if (nd.ni_vp != NULL) { 251 vput(nd.ni_vp); 252 error = EEXIST; 253 } 254 if (nd.ni_dvp->v_mount != mp) 255 error = EXDEV; 256 if (error) { 257 NDFREE(&nd, NDF_ONLY_PNBUF); 258 if (nd.ni_dvp == nd.ni_vp) 259 vrele(nd.ni_dvp); 260 else 261 vput(nd.ni_dvp); 262 return (error); 263 } 264 VATTR_NULL(&vat); 265 vat.va_type = VREG; 266 vat.va_mode = S_IRUSR; 267 vat.va_vaflags |= VA_EXCLUSIVE; 268 if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) 269 wrtmp = NULL; 270 if (wrtmp != mp) 271 panic("ffs_snapshot: mount mismatch"); 272 vfs_rel(wrtmp); 273 if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) { 274 NDFREE(&nd, NDF_ONLY_PNBUF); 275 vput(nd.ni_dvp); 276 if ((error = vn_start_write(NULL, &wrtmp, 277 V_XSLEEP | PCATCH)) != 0) 278 return (error); 279 goto restart; 280 } 281 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat); 282 VOP_UNLOCK(nd.ni_dvp, 0); 283 if (error) { 284 NDFREE(&nd, NDF_ONLY_PNBUF); 285 vn_finished_write(wrtmp); 286 vrele(nd.ni_dvp); 287 return (error); 288 } 289 vp = nd.ni_vp; 290 vp->v_vflag |= VV_SYSTEM; 291 ip = VTOI(vp); 292 devvp = ip->i_devvp; 293 /* 294 * Allocate and copy the last block contents so as to be able 295 * to set size to that of the filesystem. 296 */ 297 numblks = howmany(fs->fs_size, fs->fs_frag); 298 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 299 fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 300 if (error) 301 goto out; 302 ip->i_size = lblktosize(fs, (off_t)numblks); 303 DIP_SET(ip, i_size, ip->i_size); 304 ip->i_flag |= IN_CHANGE | IN_UPDATE; 305 error = readblock(vp, bp, numblks - 1); 306 bawrite(bp); 307 if (error != 0) 308 goto out; 309 /* 310 * Preallocate critical data structures so that we can copy 311 * them in without further allocation after we suspend all 312 * operations on the filesystem. We would like to just release 313 * the allocated buffers without writing them since they will 314 * be filled in below once we are ready to go, but this upsets 315 * the soft update code, so we go ahead and write the new buffers. 316 * 317 * Allocate all indirect blocks and mark all of them as not 318 * needing to be copied. 319 */ 320 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 321 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 322 fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp); 323 if (error) 324 goto out; 325 bawrite(ibp); 326 } 327 /* 328 * Allocate copies for the superblock and its summary information. 329 */ 330 error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED, 331 0, &nbp); 332 if (error) 333 goto out; 334 bawrite(nbp); 335 blkno = fragstoblks(fs, fs->fs_csaddr); 336 len = howmany(fs->fs_cssize, fs->fs_bsize); 337 for (loc = 0; loc < len; loc++) { 338 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), 339 fs->fs_bsize, KERNCRED, 0, &nbp); 340 if (error) 341 goto out; 342 bawrite(nbp); 343 } 344 /* 345 * Allocate all cylinder group blocks. 346 */ 347 for (cg = 0; cg < fs->fs_ncg; cg++) { 348 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 349 fs->fs_bsize, KERNCRED, 0, &nbp); 350 if (error) 351 goto out; 352 bawrite(nbp); 353 if (cg % 10 == 0) 354 ffs_syncvnode(vp, MNT_WAIT); 355 } 356 /* 357 * Copy all the cylinder group maps. Although the 358 * filesystem is still active, we hope that only a few 359 * cylinder groups will change between now and when we 360 * suspend operations. Thus, we will be able to quickly 361 * touch up the few cylinder groups that changed during 362 * the suspension period. 363 */ 364 len = howmany(fs->fs_ncg, NBBY); 365 space = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 366 UFS_LOCK(ump); 367 fs->fs_active = space; 368 UFS_UNLOCK(ump); 369 for (cg = 0; cg < fs->fs_ncg; cg++) { 370 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 371 fs->fs_bsize, KERNCRED, 0, &nbp); 372 if (error) 373 goto out; 374 error = cgaccount(cg, vp, nbp, 1); 375 bawrite(nbp); 376 if (cg % 10 == 0) 377 ffs_syncvnode(vp, MNT_WAIT); 378 if (error) 379 goto out; 380 } 381 /* 382 * Change inode to snapshot type file. 383 */ 384 ip->i_flags |= SF_SNAPSHOT; 385 DIP_SET(ip, i_flags, ip->i_flags); 386 ip->i_flag |= IN_CHANGE | IN_UPDATE; 387 /* 388 * Ensure that the snapshot is completely on disk. 389 * Since we have marked it as a snapshot it is safe to 390 * unlock it as no process will be allowed to write to it. 391 */ 392 if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) 393 goto out; 394 VOP_UNLOCK(vp, 0); 395 /* 396 * All allocations are done, so we can now snapshot the system. 397 * 398 * Recind nice scheduling while running with the filesystem suspended. 399 */ 400 if (td->td_proc->p_nice > 0) { 401 struct proc *p; 402 403 p = td->td_proc; 404 PROC_LOCK(p); 405 saved_nice = p->p_nice; 406 sched_nice(p, 0); 407 PROC_UNLOCK(p); 408 } 409 /* 410 * Suspend operation on filesystem. 411 */ 412 for (;;) { 413 vn_finished_write(wrtmp); 414 if ((error = vfs_write_suspend(vp->v_mount)) != 0) { 415 vn_start_write(NULL, &wrtmp, V_WAIT); 416 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 417 goto out; 418 } 419 if (mp->mnt_kern_flag & MNTK_SUSPENDED) 420 break; 421 vn_start_write(NULL, &wrtmp, V_WAIT); 422 } 423 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 424 if (ip->i_effnlink == 0) { 425 error = ENOENT; /* Snapshot file unlinked */ 426 goto out1; 427 } 428 if (collectsnapstats) 429 nanotime(&starttime); 430 431 /* The last block might have changed. Copy it again to be sure. */ 432 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 433 fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 434 if (error != 0) 435 goto out1; 436 error = readblock(vp, bp, numblks - 1); 437 bp->b_flags |= B_VALIDSUSPWRT; 438 bawrite(bp); 439 if (error != 0) 440 goto out1; 441 /* 442 * First, copy all the cylinder group maps that have changed. 443 */ 444 for (cg = 0; cg < fs->fs_ncg; cg++) { 445 if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0) 446 continue; 447 redo++; 448 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 449 fs->fs_bsize, KERNCRED, 0, &nbp); 450 if (error) 451 goto out1; 452 error = cgaccount(cg, vp, nbp, 2); 453 bawrite(nbp); 454 if (error) 455 goto out1; 456 } 457 /* 458 * Grab a copy of the superblock and its summary information. 459 * We delay writing it until the suspension is released below. 460 */ 461 error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, 462 KERNCRED, &sbp); 463 if (error) { 464 brelse(sbp); 465 sbp = NULL; 466 goto out1; 467 } 468 loc = blkoff(fs, fs->fs_sblockloc); 469 copy_fs = (struct fs *)(sbp->b_data + loc); 470 bcopy(fs, copy_fs, fs->fs_sbsize); 471 if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) 472 copy_fs->fs_clean = 1; 473 size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE; 474 if (fs->fs_sbsize < size) 475 bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize); 476 size = blkroundup(fs, fs->fs_cssize); 477 if (fs->fs_contigsumsize > 0) 478 size += fs->fs_ncg * sizeof(int32_t); 479 space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 480 copy_fs->fs_csp = space; 481 bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize); 482 space = (char *)space + fs->fs_cssize; 483 loc = howmany(fs->fs_cssize, fs->fs_fsize); 484 i = fs->fs_frag - loc % fs->fs_frag; 485 len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize; 486 if (len > 0) { 487 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc), 488 len, KERNCRED, &bp)) != 0) { 489 brelse(bp); 490 free(copy_fs->fs_csp, M_UFSMNT); 491 bawrite(sbp); 492 sbp = NULL; 493 goto out1; 494 } 495 bcopy(bp->b_data, space, (u_int)len); 496 space = (char *)space + len; 497 bp->b_flags |= B_INVAL | B_NOCACHE; 498 brelse(bp); 499 } 500 if (fs->fs_contigsumsize > 0) { 501 copy_fs->fs_maxcluster = lp = space; 502 for (i = 0; i < fs->fs_ncg; i++) 503 *lp++ = fs->fs_contigsumsize; 504 } 505 /* 506 * We must check for active files that have been unlinked 507 * (e.g., with a zero link count). We have to expunge all 508 * trace of these files from the snapshot so that they are 509 * not reclaimed prematurely by fsck or unnecessarily dumped. 510 * We turn off the MNTK_SUSPENDED flag to avoid a panic from 511 * spec_strategy about writing on a suspended filesystem. 512 * Note that we skip unlinked snapshot files as they will 513 * be handled separately below. 514 * 515 * We also calculate the needed size for the snapshot list. 516 */ 517 snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + 518 FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */; 519 MNT_ILOCK(mp); 520 mp->mnt_kern_flag &= ~MNTK_SUSPENDED; 521 loop: 522 MNT_VNODE_FOREACH(xvp, mp, mvp) { 523 VI_LOCK(xvp); 524 MNT_IUNLOCK(mp); 525 if ((xvp->v_iflag & VI_DOOMED) || 526 (xvp->v_usecount == 0 && 527 (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) || 528 xvp->v_type == VNON || 529 (VTOI(xvp)->i_flags & SF_SNAPSHOT)) { 530 VI_UNLOCK(xvp); 531 MNT_ILOCK(mp); 532 continue; 533 } 534 /* 535 * We can skip parent directory vnode because it must have 536 * this snapshot file in it. 537 */ 538 if (xvp == nd.ni_dvp) { 539 VI_UNLOCK(xvp); 540 MNT_ILOCK(mp); 541 continue; 542 } 543 vholdl(xvp); 544 if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK) != 0) { 545 MNT_ILOCK(mp); 546 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 547 vdrop(xvp); 548 goto loop; 549 } 550 VI_LOCK(xvp); 551 if (xvp->v_usecount == 0 && 552 (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) { 553 VI_UNLOCK(xvp); 554 VOP_UNLOCK(xvp, 0); 555 vdrop(xvp); 556 MNT_ILOCK(mp); 557 continue; 558 } 559 VI_UNLOCK(xvp); 560 if (snapdebug) 561 vprint("ffs_snapshot: busy vnode", xvp); 562 if (VOP_GETATTR(xvp, &vat, td->td_ucred) == 0 && 563 vat.va_nlink > 0) { 564 VOP_UNLOCK(xvp, 0); 565 vdrop(xvp); 566 MNT_ILOCK(mp); 567 continue; 568 } 569 xp = VTOI(xvp); 570 if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) { 571 VOP_UNLOCK(xvp, 0); 572 vdrop(xvp); 573 MNT_ILOCK(mp); 574 continue; 575 } 576 /* 577 * If there is a fragment, clear it here. 578 */ 579 blkno = 0; 580 loc = howmany(xp->i_size, fs->fs_bsize) - 1; 581 if (loc < NDADDR) { 582 len = fragroundup(fs, blkoff(fs, xp->i_size)); 583 if (len != 0 && len < fs->fs_bsize) { 584 ffs_blkfree(ump, copy_fs, vp, 585 DIP(xp, i_db[loc]), len, xp->i_number, 586 NULL); 587 blkno = DIP(xp, i_db[loc]); 588 DIP_SET(xp, i_db[loc], 0); 589 } 590 } 591 snaplistsize += 1; 592 if (xp->i_ump->um_fstype == UFS1) 593 error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 594 BLK_NOCOPY, 1); 595 else 596 error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 597 BLK_NOCOPY, 1); 598 if (blkno) 599 DIP_SET(xp, i_db[loc], blkno); 600 if (!error) 601 error = ffs_freefile(ump, copy_fs, vp, xp->i_number, 602 xp->i_mode, NULL); 603 VOP_UNLOCK(xvp, 0); 604 vdrop(xvp); 605 if (error) { 606 free(copy_fs->fs_csp, M_UFSMNT); 607 bawrite(sbp); 608 sbp = NULL; 609 MNT_VNODE_FOREACH_ABORT(mp, mvp); 610 goto out1; 611 } 612 MNT_ILOCK(mp); 613 } 614 MNT_IUNLOCK(mp); 615 /* 616 * Erase the journal file from the snapshot. 617 */ 618 if (fs->fs_flags & FS_SUJ) { 619 error = softdep_journal_lookup(mp, &xvp); 620 if (error) { 621 free(copy_fs->fs_csp, M_UFSMNT); 622 bawrite(sbp); 623 sbp = NULL; 624 goto out1; 625 } 626 xp = VTOI(xvp); 627 if (xp->i_ump->um_fstype == UFS1) 628 error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 629 BLK_NOCOPY, 0); 630 else 631 error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 632 BLK_NOCOPY, 0); 633 vput(xvp); 634 } 635 /* 636 * Acquire a lock on the snapdata structure, creating it if necessary. 637 */ 638 sn = ffs_snapdata_acquire(devvp); 639 /* 640 * Change vnode to use shared snapshot lock instead of the original 641 * private lock. 642 */ 643 vp->v_vnlock = &sn->sn_lock; 644 lockmgr(&vp->v_lock, LK_RELEASE, NULL); 645 xp = TAILQ_FIRST(&sn->sn_head); 646 /* 647 * If this is the first snapshot on this filesystem, then we need 648 * to allocate the space for the list of preallocated snapshot blocks. 649 * This list will be refined below, but this preliminary one will 650 * keep us out of deadlock until the full one is ready. 651 */ 652 if (xp == NULL) { 653 snapblklist = malloc(snaplistsize * sizeof(daddr_t), 654 M_UFSMNT, M_WAITOK); 655 blkp = &snapblklist[1]; 656 *blkp++ = lblkno(fs, fs->fs_sblockloc); 657 blkno = fragstoblks(fs, fs->fs_csaddr); 658 for (cg = 0; cg < fs->fs_ncg; cg++) { 659 if (fragstoblks(fs, cgtod(fs, cg) > blkno)) 660 break; 661 *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 662 } 663 len = howmany(fs->fs_cssize, fs->fs_bsize); 664 for (loc = 0; loc < len; loc++) 665 *blkp++ = blkno + loc; 666 for (; cg < fs->fs_ncg; cg++) 667 *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 668 snapblklist[0] = blkp - snapblklist; 669 VI_LOCK(devvp); 670 if (sn->sn_blklist != NULL) 671 panic("ffs_snapshot: non-empty list"); 672 sn->sn_blklist = snapblklist; 673 sn->sn_listsize = blkp - snapblklist; 674 VI_UNLOCK(devvp); 675 } 676 /* 677 * Record snapshot inode. Since this is the newest snapshot, 678 * it must be placed at the end of the list. 679 */ 680 VI_LOCK(devvp); 681 fs->fs_snapinum[snaploc] = ip->i_number; 682 if (ip->i_nextsnap.tqe_prev != 0) 683 panic("ffs_snapshot: %d already on list", ip->i_number); 684 TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 685 devvp->v_vflag |= VV_COPYONWRITE; 686 VI_UNLOCK(devvp); 687 ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); 688 out1: 689 KASSERT((sn != NULL && sbp != NULL && error == 0) || 690 (sn == NULL && sbp == NULL && error != 0), 691 ("email phk@ and mckusick@")); 692 /* 693 * Resume operation on filesystem. 694 */ 695 vfs_write_resume(vp->v_mount); 696 vn_start_write(NULL, &wrtmp, V_WAIT); 697 if (collectsnapstats && starttime.tv_sec > 0) { 698 nanotime(&endtime); 699 timespecsub(&endtime, &starttime); 700 printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n", 701 vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec, 702 endtime.tv_nsec / 1000000, redo, fs->fs_ncg); 703 } 704 if (sbp == NULL) 705 goto out; 706 /* 707 * Copy allocation information from all the snapshots in 708 * this snapshot and then expunge them from its view. 709 */ 710 TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) { 711 if (xp == ip) 712 break; 713 if (xp->i_ump->um_fstype == UFS1) 714 error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, 715 BLK_SNAP, 0); 716 else 717 error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, 718 BLK_SNAP, 0); 719 if (error == 0 && xp->i_effnlink == 0) { 720 error = ffs_freefile(ump, 721 copy_fs, 722 vp, 723 xp->i_number, 724 xp->i_mode, NULL); 725 } 726 if (error) { 727 fs->fs_snapinum[snaploc] = 0; 728 goto done; 729 } 730 } 731 /* 732 * Allocate space for the full list of preallocated snapshot blocks. 733 */ 734 snapblklist = malloc(snaplistsize * sizeof(daddr_t), 735 M_UFSMNT, M_WAITOK); 736 ip->i_snapblklist = &snapblklist[1]; 737 /* 738 * Expunge the blocks used by the snapshots from the set of 739 * blocks marked as used in the snapshot bitmaps. Also, collect 740 * the list of allocated blocks in i_snapblklist. 741 */ 742 if (ip->i_ump->um_fstype == UFS1) 743 error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, 744 BLK_SNAP, 0); 745 else 746 error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, 747 BLK_SNAP, 0); 748 if (error) { 749 fs->fs_snapinum[snaploc] = 0; 750 free(snapblklist, M_UFSMNT); 751 goto done; 752 } 753 if (snaplistsize < ip->i_snapblklist - snapblklist) 754 panic("ffs_snapshot: list too small"); 755 snaplistsize = ip->i_snapblklist - snapblklist; 756 snapblklist[0] = snaplistsize; 757 ip->i_snapblklist = 0; 758 /* 759 * Write out the list of allocated blocks to the end of the snapshot. 760 */ 761 auio.uio_iov = &aiov; 762 auio.uio_iovcnt = 1; 763 aiov.iov_base = (void *)snapblklist; 764 aiov.iov_len = snaplistsize * sizeof(daddr_t); 765 auio.uio_resid = aiov.iov_len; 766 auio.uio_offset = ip->i_size; 767 auio.uio_segflg = UIO_SYSSPACE; 768 auio.uio_rw = UIO_WRITE; 769 auio.uio_td = td; 770 if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 771 fs->fs_snapinum[snaploc] = 0; 772 free(snapblklist, M_UFSMNT); 773 goto done; 774 } 775 /* 776 * Write the superblock and its summary information 777 * to the snapshot. 778 */ 779 blkno = fragstoblks(fs, fs->fs_csaddr); 780 len = howmany(fs->fs_cssize, fs->fs_bsize); 781 space = copy_fs->fs_csp; 782 for (loc = 0; loc < len; loc++) { 783 error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp); 784 if (error) { 785 brelse(nbp); 786 fs->fs_snapinum[snaploc] = 0; 787 free(snapblklist, M_UFSMNT); 788 goto done; 789 } 790 bcopy(space, nbp->b_data, fs->fs_bsize); 791 space = (char *)space + fs->fs_bsize; 792 bawrite(nbp); 793 } 794 /* 795 * As this is the newest list, it is the most inclusive, so 796 * should replace the previous list. 797 */ 798 VI_LOCK(devvp); 799 space = sn->sn_blklist; 800 sn->sn_blklist = snapblklist; 801 sn->sn_listsize = snaplistsize; 802 VI_UNLOCK(devvp); 803 if (space != NULL) 804 free(space, M_UFSMNT); 805 /* 806 * If another process is currently writing the buffer containing 807 * the inode for this snapshot then a deadlock can occur. Drop 808 * the snapshot lock until the buffer has been written. 809 */ 810 VREF(vp); /* Protect against ffs_snapgone() */ 811 VOP_UNLOCK(vp, 0); 812 (void) bread(ip->i_devvp, 813 fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 814 (int) fs->fs_bsize, NOCRED, &nbp); 815 brelse(nbp); 816 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 817 if (ip->i_effnlink == 0) 818 error = ENOENT; /* Snapshot file unlinked */ 819 else 820 vrele(vp); /* Drop extra reference */ 821 done: 822 free(copy_fs->fs_csp, M_UFSMNT); 823 bawrite(sbp); 824 out: 825 NDFREE(&nd, NDF_ONLY_PNBUF); 826 if (saved_nice > 0) { 827 struct proc *p; 828 829 p = td->td_proc; 830 PROC_LOCK(p); 831 sched_nice(td->td_proc, saved_nice); 832 PROC_UNLOCK(td->td_proc); 833 } 834 UFS_LOCK(ump); 835 if (fs->fs_active != 0) { 836 free(fs->fs_active, M_DEVBUF); 837 fs->fs_active = 0; 838 } 839 UFS_UNLOCK(ump); 840 MNT_ILOCK(mp); 841 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA); 842 MNT_IUNLOCK(mp); 843 if (error) 844 (void) ffs_truncate(vp, (off_t)0, 0, NOCRED, td); 845 (void) ffs_syncvnode(vp, MNT_WAIT); 846 if (error) 847 vput(vp); 848 else 849 VOP_UNLOCK(vp, 0); 850 vrele(nd.ni_dvp); 851 vn_finished_write(wrtmp); 852 process_deferred_inactive(mp); 853 return (error); 854 } 855 856 /* 857 * Copy a cylinder group map. All the unallocated blocks are marked 858 * BLK_NOCOPY so that the snapshot knows that it need not copy them 859 * if they are later written. If passno is one, then this is a first 860 * pass, so only setting needs to be done. If passno is 2, then this 861 * is a revision to a previous pass which must be undone as the 862 * replacement pass is done. 863 */ 864 static int 865 cgaccount(cg, vp, nbp, passno) 866 int cg; 867 struct vnode *vp; 868 struct buf *nbp; 869 int passno; 870 { 871 struct buf *bp, *ibp; 872 struct inode *ip; 873 struct cg *cgp; 874 struct fs *fs; 875 ufs2_daddr_t base, numblks; 876 int error, len, loc, indiroff; 877 878 ip = VTOI(vp); 879 fs = ip->i_fs; 880 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 881 (int)fs->fs_cgsize, KERNCRED, &bp); 882 if (error) { 883 brelse(bp); 884 return (error); 885 } 886 cgp = (struct cg *)bp->b_data; 887 if (!cg_chkmagic(cgp)) { 888 brelse(bp); 889 return (EIO); 890 } 891 UFS_LOCK(ip->i_ump); 892 ACTIVESET(fs, cg); 893 /* 894 * Recomputation of summary information might not have been performed 895 * at mount time. Sync up summary information for current cylinder 896 * group while data is in memory to ensure that result of background 897 * fsck is slightly more consistent. 898 */ 899 fs->fs_cs(fs, cg) = cgp->cg_cs; 900 UFS_UNLOCK(ip->i_ump); 901 bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize); 902 if (fs->fs_cgsize < fs->fs_bsize) 903 bzero(&nbp->b_data[fs->fs_cgsize], 904 fs->fs_bsize - fs->fs_cgsize); 905 cgp = (struct cg *)nbp->b_data; 906 bqrelse(bp); 907 if (passno == 2) 908 nbp->b_flags |= B_VALIDSUSPWRT; 909 numblks = howmany(fs->fs_size, fs->fs_frag); 910 len = howmany(fs->fs_fpg, fs->fs_frag); 911 base = cgbase(fs, cg) / fs->fs_frag; 912 if (base + len >= numblks) 913 len = numblks - base - 1; 914 loc = 0; 915 if (base < NDADDR) { 916 for ( ; loc < NDADDR; loc++) { 917 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 918 DIP_SET(ip, i_db[loc], BLK_NOCOPY); 919 else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY) 920 DIP_SET(ip, i_db[loc], 0); 921 else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY) 922 panic("ffs_snapshot: lost direct block"); 923 } 924 } 925 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), 926 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 927 if (error) { 928 return (error); 929 } 930 indiroff = (base + loc - NDADDR) % NINDIR(fs); 931 for ( ; loc < len; loc++, indiroff++) { 932 if (indiroff >= NINDIR(fs)) { 933 if (passno == 2) 934 ibp->b_flags |= B_VALIDSUSPWRT; 935 bawrite(ibp); 936 error = UFS_BALLOC(vp, 937 lblktosize(fs, (off_t)(base + loc)), 938 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 939 if (error) { 940 return (error); 941 } 942 indiroff = 0; 943 } 944 if (ip->i_ump->um_fstype == UFS1) { 945 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 946 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 947 BLK_NOCOPY; 948 else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data)) 949 [indiroff] == BLK_NOCOPY) 950 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0; 951 else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data)) 952 [indiroff] == BLK_NOCOPY) 953 panic("ffs_snapshot: lost indirect block"); 954 continue; 955 } 956 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 957 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY; 958 else if (passno == 2 && 959 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 960 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0; 961 else if (passno == 1 && 962 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 963 panic("ffs_snapshot: lost indirect block"); 964 } 965 if (passno == 2) 966 ibp->b_flags |= B_VALIDSUSPWRT; 967 bdwrite(ibp); 968 return (0); 969 } 970 971 /* 972 * Before expunging a snapshot inode, note all the 973 * blocks that it claims with BLK_SNAP so that fsck will 974 * be able to account for those blocks properly and so 975 * that this snapshot knows that it need not copy them 976 * if the other snapshot holding them is freed. This code 977 * is reproduced once each for UFS1 and UFS2. 978 */ 979 static int 980 expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype, clearmode) 981 struct vnode *snapvp; 982 struct inode *cancelip; 983 struct fs *fs; 984 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 985 struct fs *, ufs_lbn_t, int); 986 int expungetype; 987 int clearmode; 988 { 989 int i, error, indiroff; 990 ufs_lbn_t lbn, rlbn; 991 ufs2_daddr_t len, blkno, numblks, blksperindir; 992 struct ufs1_dinode *dip; 993 struct thread *td = curthread; 994 struct buf *bp; 995 996 /* 997 * Prepare to expunge the inode. If its inode block has not 998 * yet been copied, then allocate and fill the copy. 999 */ 1000 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 1001 blkno = 0; 1002 if (lbn < NDADDR) { 1003 blkno = VTOI(snapvp)->i_din1->di_db[lbn]; 1004 } else { 1005 if (DOINGSOFTDEP(snapvp)) 1006 softdep_prealloc(snapvp, MNT_WAIT); 1007 td->td_pflags |= TDP_COWINPROGRESS; 1008 error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 1009 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 1010 td->td_pflags &= ~TDP_COWINPROGRESS; 1011 if (error) 1012 return (error); 1013 indiroff = (lbn - NDADDR) % NINDIR(fs); 1014 blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; 1015 bqrelse(bp); 1016 } 1017 if (blkno != 0) { 1018 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1019 return (error); 1020 } else { 1021 error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 1022 fs->fs_bsize, KERNCRED, 0, &bp); 1023 if (error) 1024 return (error); 1025 if ((error = readblock(snapvp, bp, lbn)) != 0) 1026 return (error); 1027 } 1028 /* 1029 * Set a snapshot inode to be a zero length file, regular files 1030 * or unlinked snapshots to be completely unallocated. 1031 */ 1032 dip = (struct ufs1_dinode *)bp->b_data + 1033 ino_to_fsbo(fs, cancelip->i_number); 1034 if (clearmode || cancelip->i_effnlink == 0) 1035 dip->di_mode = 0; 1036 dip->di_size = 0; 1037 dip->di_blocks = 0; 1038 dip->di_flags &= ~SF_SNAPSHOT; 1039 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); 1040 bdwrite(bp); 1041 /* 1042 * Now go through and expunge all the blocks in the file 1043 * using the function requested. 1044 */ 1045 numblks = howmany(cancelip->i_size, fs->fs_bsize); 1046 if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0], 1047 &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype))) 1048 return (error); 1049 if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0], 1050 &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype))) 1051 return (error); 1052 blksperindir = 1; 1053 lbn = -NDADDR; 1054 len = numblks - NDADDR; 1055 rlbn = NDADDR; 1056 for (i = 0; len > 0 && i < NIADDR; i++) { 1057 error = indiracct_ufs1(snapvp, ITOV(cancelip), i, 1058 cancelip->i_din1->di_ib[i], lbn, rlbn, len, 1059 blksperindir, fs, acctfunc, expungetype); 1060 if (error) 1061 return (error); 1062 blksperindir *= NINDIR(fs); 1063 lbn -= blksperindir + 1; 1064 len -= blksperindir; 1065 rlbn += blksperindir; 1066 } 1067 return (0); 1068 } 1069 1070 /* 1071 * Descend an indirect block chain for vnode cancelvp accounting for all 1072 * its indirect blocks in snapvp. 1073 */ 1074 static int 1075 indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 1076 blksperindir, fs, acctfunc, expungetype) 1077 struct vnode *snapvp; 1078 struct vnode *cancelvp; 1079 int level; 1080 ufs1_daddr_t blkno; 1081 ufs_lbn_t lbn; 1082 ufs_lbn_t rlbn; 1083 ufs_lbn_t remblks; 1084 ufs_lbn_t blksperindir; 1085 struct fs *fs; 1086 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 1087 struct fs *, ufs_lbn_t, int); 1088 int expungetype; 1089 { 1090 int error, num, i; 1091 ufs_lbn_t subblksperindir; 1092 struct indir indirs[NIADDR + 2]; 1093 ufs1_daddr_t last, *bap; 1094 struct buf *bp; 1095 1096 if (blkno == 0) { 1097 if (expungetype == BLK_NOCOPY) 1098 return (0); 1099 panic("indiracct_ufs1: missing indir"); 1100 } 1101 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1102 return (error); 1103 if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1104 panic("indiracct_ufs1: botched params"); 1105 /* 1106 * We have to expand bread here since it will deadlock looking 1107 * up the block number for any blocks that are not in the cache. 1108 */ 1109 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 1110 bp->b_blkno = fsbtodb(fs, blkno); 1111 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1112 (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 1113 brelse(bp); 1114 return (error); 1115 } 1116 /* 1117 * Account for the block pointers in this indirect block. 1118 */ 1119 last = howmany(remblks, blksperindir); 1120 if (last > NINDIR(fs)) 1121 last = NINDIR(fs); 1122 bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK); 1123 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1124 bqrelse(bp); 1125 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1126 level == 0 ? rlbn : -1, expungetype); 1127 if (error || level == 0) 1128 goto out; 1129 /* 1130 * Account for the block pointers in each of the indirect blocks 1131 * in the levels below us. 1132 */ 1133 subblksperindir = blksperindir / NINDIR(fs); 1134 for (lbn++, level--, i = 0; i < last; i++) { 1135 error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn, 1136 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1137 if (error) 1138 goto out; 1139 rlbn += blksperindir; 1140 lbn -= blksperindir; 1141 remblks -= blksperindir; 1142 } 1143 out: 1144 free(bap, M_DEVBUF); 1145 return (error); 1146 } 1147 1148 /* 1149 * Do both snap accounting and map accounting. 1150 */ 1151 static int 1152 fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1153 struct vnode *vp; 1154 ufs1_daddr_t *oldblkp, *lastblkp; 1155 struct fs *fs; 1156 ufs_lbn_t lblkno; 1157 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 1158 { 1159 int error; 1160 1161 if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1162 return (error); 1163 return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1164 } 1165 1166 /* 1167 * Identify a set of blocks allocated in a snapshot inode. 1168 */ 1169 static int 1170 snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1171 struct vnode *vp; 1172 ufs1_daddr_t *oldblkp, *lastblkp; 1173 struct fs *fs; 1174 ufs_lbn_t lblkno; 1175 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 1176 { 1177 struct inode *ip = VTOI(vp); 1178 ufs1_daddr_t blkno, *blkp; 1179 ufs_lbn_t lbn; 1180 struct buf *ibp; 1181 int error; 1182 1183 for ( ; oldblkp < lastblkp; oldblkp++) { 1184 blkno = *oldblkp; 1185 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 1186 continue; 1187 lbn = fragstoblks(fs, blkno); 1188 if (lbn < NDADDR) { 1189 blkp = &ip->i_din1->di_db[lbn]; 1190 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1191 } else { 1192 error = ffs_balloc_ufs1(vp, lblktosize(fs, (off_t)lbn), 1193 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1194 if (error) 1195 return (error); 1196 blkp = &((ufs1_daddr_t *)(ibp->b_data)) 1197 [(lbn - NDADDR) % NINDIR(fs)]; 1198 } 1199 /* 1200 * If we are expunging a snapshot vnode and we 1201 * find a block marked BLK_NOCOPY, then it is 1202 * one that has been allocated to this snapshot after 1203 * we took our current snapshot and can be ignored. 1204 */ 1205 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 1206 if (lbn >= NDADDR) 1207 brelse(ibp); 1208 } else { 1209 if (*blkp != 0) 1210 panic("snapacct_ufs1: bad block"); 1211 *blkp = expungetype; 1212 if (lbn >= NDADDR) 1213 bdwrite(ibp); 1214 } 1215 } 1216 return (0); 1217 } 1218 1219 /* 1220 * Account for a set of blocks allocated in a snapshot inode. 1221 */ 1222 static int 1223 mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1224 struct vnode *vp; 1225 ufs1_daddr_t *oldblkp, *lastblkp; 1226 struct fs *fs; 1227 ufs_lbn_t lblkno; 1228 int expungetype; 1229 { 1230 ufs1_daddr_t blkno; 1231 struct inode *ip; 1232 ino_t inum; 1233 int acctit; 1234 1235 ip = VTOI(vp); 1236 inum = ip->i_number; 1237 if (lblkno == -1) 1238 acctit = 0; 1239 else 1240 acctit = 1; 1241 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1242 blkno = *oldblkp; 1243 if (blkno == 0 || blkno == BLK_NOCOPY) 1244 continue; 1245 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1246 *ip->i_snapblklist++ = lblkno; 1247 if (blkno == BLK_SNAP) 1248 blkno = blkstofrags(fs, lblkno); 1249 ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL); 1250 } 1251 return (0); 1252 } 1253 1254 /* 1255 * Before expunging a snapshot inode, note all the 1256 * blocks that it claims with BLK_SNAP so that fsck will 1257 * be able to account for those blocks properly and so 1258 * that this snapshot knows that it need not copy them 1259 * if the other snapshot holding them is freed. This code 1260 * is reproduced once each for UFS1 and UFS2. 1261 */ 1262 static int 1263 expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode) 1264 struct vnode *snapvp; 1265 struct inode *cancelip; 1266 struct fs *fs; 1267 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 1268 struct fs *, ufs_lbn_t, int); 1269 int expungetype; 1270 int clearmode; 1271 { 1272 int i, error, indiroff; 1273 ufs_lbn_t lbn, rlbn; 1274 ufs2_daddr_t len, blkno, numblks, blksperindir; 1275 struct ufs2_dinode *dip; 1276 struct thread *td = curthread; 1277 struct buf *bp; 1278 1279 /* 1280 * Prepare to expunge the inode. If its inode block has not 1281 * yet been copied, then allocate and fill the copy. 1282 */ 1283 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 1284 blkno = 0; 1285 if (lbn < NDADDR) { 1286 blkno = VTOI(snapvp)->i_din2->di_db[lbn]; 1287 } else { 1288 if (DOINGSOFTDEP(snapvp)) 1289 softdep_prealloc(snapvp, MNT_WAIT); 1290 td->td_pflags |= TDP_COWINPROGRESS; 1291 error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 1292 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 1293 td->td_pflags &= ~TDP_COWINPROGRESS; 1294 if (error) 1295 return (error); 1296 indiroff = (lbn - NDADDR) % NINDIR(fs); 1297 blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; 1298 bqrelse(bp); 1299 } 1300 if (blkno != 0) { 1301 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1302 return (error); 1303 } else { 1304 error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 1305 fs->fs_bsize, KERNCRED, 0, &bp); 1306 if (error) 1307 return (error); 1308 if ((error = readblock(snapvp, bp, lbn)) != 0) 1309 return (error); 1310 } 1311 /* 1312 * Set a snapshot inode to be a zero length file, regular files 1313 * to be completely unallocated. 1314 */ 1315 dip = (struct ufs2_dinode *)bp->b_data + 1316 ino_to_fsbo(fs, cancelip->i_number); 1317 if (clearmode || cancelip->i_effnlink == 0) 1318 dip->di_mode = 0; 1319 dip->di_size = 0; 1320 dip->di_blocks = 0; 1321 dip->di_flags &= ~SF_SNAPSHOT; 1322 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); 1323 bdwrite(bp); 1324 /* 1325 * Now go through and expunge all the blocks in the file 1326 * using the function requested. 1327 */ 1328 numblks = howmany(cancelip->i_size, fs->fs_bsize); 1329 if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0], 1330 &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype))) 1331 return (error); 1332 if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0], 1333 &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype))) 1334 return (error); 1335 blksperindir = 1; 1336 lbn = -NDADDR; 1337 len = numblks - NDADDR; 1338 rlbn = NDADDR; 1339 for (i = 0; len > 0 && i < NIADDR; i++) { 1340 error = indiracct_ufs2(snapvp, ITOV(cancelip), i, 1341 cancelip->i_din2->di_ib[i], lbn, rlbn, len, 1342 blksperindir, fs, acctfunc, expungetype); 1343 if (error) 1344 return (error); 1345 blksperindir *= NINDIR(fs); 1346 lbn -= blksperindir + 1; 1347 len -= blksperindir; 1348 rlbn += blksperindir; 1349 } 1350 return (0); 1351 } 1352 1353 /* 1354 * Descend an indirect block chain for vnode cancelvp accounting for all 1355 * its indirect blocks in snapvp. 1356 */ 1357 static int 1358 indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 1359 blksperindir, fs, acctfunc, expungetype) 1360 struct vnode *snapvp; 1361 struct vnode *cancelvp; 1362 int level; 1363 ufs2_daddr_t blkno; 1364 ufs_lbn_t lbn; 1365 ufs_lbn_t rlbn; 1366 ufs_lbn_t remblks; 1367 ufs_lbn_t blksperindir; 1368 struct fs *fs; 1369 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 1370 struct fs *, ufs_lbn_t, int); 1371 int expungetype; 1372 { 1373 int error, num, i; 1374 ufs_lbn_t subblksperindir; 1375 struct indir indirs[NIADDR + 2]; 1376 ufs2_daddr_t last, *bap; 1377 struct buf *bp; 1378 1379 if (blkno == 0) { 1380 if (expungetype == BLK_NOCOPY) 1381 return (0); 1382 panic("indiracct_ufs2: missing indir"); 1383 } 1384 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1385 return (error); 1386 if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1387 panic("indiracct_ufs2: botched params"); 1388 /* 1389 * We have to expand bread here since it will deadlock looking 1390 * up the block number for any blocks that are not in the cache. 1391 */ 1392 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 1393 bp->b_blkno = fsbtodb(fs, blkno); 1394 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1395 (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 1396 brelse(bp); 1397 return (error); 1398 } 1399 /* 1400 * Account for the block pointers in this indirect block. 1401 */ 1402 last = howmany(remblks, blksperindir); 1403 if (last > NINDIR(fs)) 1404 last = NINDIR(fs); 1405 bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK); 1406 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1407 bqrelse(bp); 1408 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1409 level == 0 ? rlbn : -1, expungetype); 1410 if (error || level == 0) 1411 goto out; 1412 /* 1413 * Account for the block pointers in each of the indirect blocks 1414 * in the levels below us. 1415 */ 1416 subblksperindir = blksperindir / NINDIR(fs); 1417 for (lbn++, level--, i = 0; i < last; i++) { 1418 error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn, 1419 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1420 if (error) 1421 goto out; 1422 rlbn += blksperindir; 1423 lbn -= blksperindir; 1424 remblks -= blksperindir; 1425 } 1426 out: 1427 free(bap, M_DEVBUF); 1428 return (error); 1429 } 1430 1431 /* 1432 * Do both snap accounting and map accounting. 1433 */ 1434 static int 1435 fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1436 struct vnode *vp; 1437 ufs2_daddr_t *oldblkp, *lastblkp; 1438 struct fs *fs; 1439 ufs_lbn_t lblkno; 1440 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 1441 { 1442 int error; 1443 1444 if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1445 return (error); 1446 return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1447 } 1448 1449 /* 1450 * Identify a set of blocks allocated in a snapshot inode. 1451 */ 1452 static int 1453 snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1454 struct vnode *vp; 1455 ufs2_daddr_t *oldblkp, *lastblkp; 1456 struct fs *fs; 1457 ufs_lbn_t lblkno; 1458 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 1459 { 1460 struct inode *ip = VTOI(vp); 1461 ufs2_daddr_t blkno, *blkp; 1462 ufs_lbn_t lbn; 1463 struct buf *ibp; 1464 int error; 1465 1466 for ( ; oldblkp < lastblkp; oldblkp++) { 1467 blkno = *oldblkp; 1468 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 1469 continue; 1470 lbn = fragstoblks(fs, blkno); 1471 if (lbn < NDADDR) { 1472 blkp = &ip->i_din2->di_db[lbn]; 1473 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1474 } else { 1475 error = ffs_balloc_ufs2(vp, lblktosize(fs, (off_t)lbn), 1476 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1477 if (error) 1478 return (error); 1479 blkp = &((ufs2_daddr_t *)(ibp->b_data)) 1480 [(lbn - NDADDR) % NINDIR(fs)]; 1481 } 1482 /* 1483 * If we are expunging a snapshot vnode and we 1484 * find a block marked BLK_NOCOPY, then it is 1485 * one that has been allocated to this snapshot after 1486 * we took our current snapshot and can be ignored. 1487 */ 1488 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 1489 if (lbn >= NDADDR) 1490 brelse(ibp); 1491 } else { 1492 if (*blkp != 0) 1493 panic("snapacct_ufs2: bad block"); 1494 *blkp = expungetype; 1495 if (lbn >= NDADDR) 1496 bdwrite(ibp); 1497 } 1498 } 1499 return (0); 1500 } 1501 1502 /* 1503 * Account for a set of blocks allocated in a snapshot inode. 1504 */ 1505 static int 1506 mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1507 struct vnode *vp; 1508 ufs2_daddr_t *oldblkp, *lastblkp; 1509 struct fs *fs; 1510 ufs_lbn_t lblkno; 1511 int expungetype; 1512 { 1513 ufs2_daddr_t blkno; 1514 struct inode *ip; 1515 ino_t inum; 1516 int acctit; 1517 1518 ip = VTOI(vp); 1519 inum = ip->i_number; 1520 if (lblkno == -1) 1521 acctit = 0; 1522 else 1523 acctit = 1; 1524 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1525 blkno = *oldblkp; 1526 if (blkno == 0 || blkno == BLK_NOCOPY) 1527 continue; 1528 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1529 *ip->i_snapblklist++ = lblkno; 1530 if (blkno == BLK_SNAP) 1531 blkno = blkstofrags(fs, lblkno); 1532 ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL); 1533 } 1534 return (0); 1535 } 1536 1537 /* 1538 * Decrement extra reference on snapshot when last name is removed. 1539 * It will not be freed until the last open reference goes away. 1540 */ 1541 void 1542 ffs_snapgone(ip) 1543 struct inode *ip; 1544 { 1545 struct inode *xp; 1546 struct fs *fs; 1547 int snaploc; 1548 struct snapdata *sn; 1549 struct ufsmount *ump; 1550 1551 /* 1552 * Find snapshot in incore list. 1553 */ 1554 xp = NULL; 1555 sn = ip->i_devvp->v_rdev->si_snapdata; 1556 if (sn != NULL) 1557 TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) 1558 if (xp == ip) 1559 break; 1560 if (xp != NULL) 1561 vrele(ITOV(ip)); 1562 else if (snapdebug) 1563 printf("ffs_snapgone: lost snapshot vnode %d\n", 1564 ip->i_number); 1565 /* 1566 * Delete snapshot inode from superblock. Keep list dense. 1567 */ 1568 fs = ip->i_fs; 1569 ump = ip->i_ump; 1570 UFS_LOCK(ump); 1571 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 1572 if (fs->fs_snapinum[snaploc] == ip->i_number) 1573 break; 1574 if (snaploc < FSMAXSNAP) { 1575 for (snaploc++; snaploc < FSMAXSNAP; snaploc++) { 1576 if (fs->fs_snapinum[snaploc] == 0) 1577 break; 1578 fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc]; 1579 } 1580 fs->fs_snapinum[snaploc - 1] = 0; 1581 } 1582 UFS_UNLOCK(ump); 1583 } 1584 1585 /* 1586 * Prepare a snapshot file for being removed. 1587 */ 1588 void 1589 ffs_snapremove(vp) 1590 struct vnode *vp; 1591 { 1592 struct inode *ip; 1593 struct vnode *devvp; 1594 struct buf *ibp; 1595 struct fs *fs; 1596 ufs2_daddr_t numblks, blkno, dblk; 1597 int error, loc, last; 1598 struct snapdata *sn; 1599 1600 ip = VTOI(vp); 1601 fs = ip->i_fs; 1602 devvp = ip->i_devvp; 1603 /* 1604 * If active, delete from incore list (this snapshot may 1605 * already have been in the process of being deleted, so 1606 * would not have been active). 1607 * 1608 * Clear copy-on-write flag if last snapshot. 1609 */ 1610 VI_LOCK(devvp); 1611 if (ip->i_nextsnap.tqe_prev != 0) { 1612 sn = devvp->v_rdev->si_snapdata; 1613 TAILQ_REMOVE(&sn->sn_head, ip, i_nextsnap); 1614 ip->i_nextsnap.tqe_prev = 0; 1615 VI_UNLOCK(devvp); 1616 lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL); 1617 KASSERT(vp->v_vnlock == &sn->sn_lock, 1618 ("ffs_snapremove: lost lock mutation")); 1619 vp->v_vnlock = &vp->v_lock; 1620 VI_LOCK(devvp); 1621 lockmgr(&sn->sn_lock, LK_RELEASE, NULL); 1622 try_free_snapdata(devvp); 1623 } else 1624 VI_UNLOCK(devvp); 1625 /* 1626 * Clear all BLK_NOCOPY fields. Pass any block claims to other 1627 * snapshots that want them (see ffs_snapblkfree below). 1628 */ 1629 for (blkno = 1; blkno < NDADDR; blkno++) { 1630 dblk = DIP(ip, i_db[blkno]); 1631 if (dblk == 0) 1632 continue; 1633 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1634 DIP_SET(ip, i_db[blkno], 0); 1635 else if ((dblk == blkstofrags(fs, blkno) && 1636 ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, 1637 ip->i_number))) { 1638 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - 1639 btodb(fs->fs_bsize)); 1640 DIP_SET(ip, i_db[blkno], 0); 1641 } 1642 } 1643 numblks = howmany(ip->i_size, fs->fs_bsize); 1644 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 1645 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 1646 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1647 if (error) 1648 continue; 1649 if (fs->fs_size - blkno > NINDIR(fs)) 1650 last = NINDIR(fs); 1651 else 1652 last = fs->fs_size - blkno; 1653 for (loc = 0; loc < last; loc++) { 1654 if (ip->i_ump->um_fstype == UFS1) { 1655 dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc]; 1656 if (dblk == 0) 1657 continue; 1658 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1659 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1660 else if ((dblk == blkstofrags(fs, blkno) && 1661 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1662 fs->fs_bsize, ip->i_number))) { 1663 ip->i_din1->di_blocks -= 1664 btodb(fs->fs_bsize); 1665 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1666 } 1667 continue; 1668 } 1669 dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc]; 1670 if (dblk == 0) 1671 continue; 1672 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1673 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1674 else if ((dblk == blkstofrags(fs, blkno) && 1675 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1676 fs->fs_bsize, ip->i_number))) { 1677 ip->i_din2->di_blocks -= btodb(fs->fs_bsize); 1678 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1679 } 1680 } 1681 bawrite(ibp); 1682 } 1683 /* 1684 * Clear snapshot flag and drop reference. 1685 */ 1686 ip->i_flags &= ~SF_SNAPSHOT; 1687 DIP_SET(ip, i_flags, ip->i_flags); 1688 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1689 /* 1690 * The dirtied indirects must be written out before 1691 * softdep_setup_freeblocks() is called. Otherwise indir_trunc() 1692 * may find indirect pointers using the magic BLK_* values. 1693 */ 1694 if (DOINGSOFTDEP(vp)) 1695 ffs_syncvnode(vp, MNT_WAIT); 1696 #ifdef QUOTA 1697 /* 1698 * Reenable disk quotas for ex-snapshot file. 1699 */ 1700 if (!getinoquota(ip)) 1701 (void) chkdq(ip, DIP(ip, i_blocks), KERNCRED, FORCE); 1702 #endif 1703 } 1704 1705 /* 1706 * Notification that a block is being freed. Return zero if the free 1707 * should be allowed to proceed. Return non-zero if the snapshot file 1708 * wants to claim the block. The block will be claimed if it is an 1709 * uncopied part of one of the snapshots. It will be freed if it is 1710 * either a BLK_NOCOPY or has already been copied in all of the snapshots. 1711 * If a fragment is being freed, then all snapshots that care about 1712 * it must make a copy since a snapshot file can only claim full sized 1713 * blocks. Note that if more than one snapshot file maps the block, 1714 * we can pick one at random to claim it. Since none of the snapshots 1715 * can change, we are assurred that they will all see the same unmodified 1716 * image. When deleting a snapshot file (see ffs_snapremove above), we 1717 * must push any of these claimed blocks to one of the other snapshots 1718 * that maps it. These claimed blocks are easily identified as they will 1719 * have a block number equal to their logical block number within the 1720 * snapshot. A copied block can never have this property because they 1721 * must always have been allocated from a BLK_NOCOPY location. 1722 */ 1723 int 1724 ffs_snapblkfree(fs, devvp, bno, size, inum) 1725 struct fs *fs; 1726 struct vnode *devvp; 1727 ufs2_daddr_t bno; 1728 long size; 1729 ino_t inum; 1730 { 1731 struct buf *ibp, *cbp, *savedcbp = 0; 1732 struct thread *td = curthread; 1733 struct inode *ip; 1734 struct vnode *vp = NULL; 1735 ufs_lbn_t lbn; 1736 ufs2_daddr_t blkno; 1737 int indiroff = 0, error = 0, claimedblk = 0; 1738 struct snapdata *sn; 1739 1740 lbn = fragstoblks(fs, bno); 1741 retry: 1742 VI_LOCK(devvp); 1743 sn = devvp->v_rdev->si_snapdata; 1744 if (sn == NULL) { 1745 VI_UNLOCK(devvp); 1746 return (0); 1747 } 1748 if (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 1749 VI_MTX(devvp)) != 0) 1750 goto retry; 1751 TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 1752 vp = ITOV(ip); 1753 if (DOINGSOFTDEP(vp)) 1754 softdep_prealloc(vp, MNT_WAIT); 1755 /* 1756 * Lookup block being written. 1757 */ 1758 if (lbn < NDADDR) { 1759 blkno = DIP(ip, i_db[lbn]); 1760 } else { 1761 td->td_pflags |= TDP_COWINPROGRESS; 1762 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1763 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1764 td->td_pflags &= ~TDP_COWINPROGRESS; 1765 if (error) 1766 break; 1767 indiroff = (lbn - NDADDR) % NINDIR(fs); 1768 if (ip->i_ump->um_fstype == UFS1) 1769 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 1770 else 1771 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 1772 } 1773 /* 1774 * Check to see if block needs to be copied. 1775 */ 1776 if (blkno == 0) { 1777 /* 1778 * A block that we map is being freed. If it has not 1779 * been claimed yet, we will claim or copy it (below). 1780 */ 1781 claimedblk = 1; 1782 } else if (blkno == BLK_SNAP) { 1783 /* 1784 * No previous snapshot claimed the block, 1785 * so it will be freed and become a BLK_NOCOPY 1786 * (don't care) for us. 1787 */ 1788 if (claimedblk) 1789 panic("snapblkfree: inconsistent block type"); 1790 if (lbn < NDADDR) { 1791 DIP_SET(ip, i_db[lbn], BLK_NOCOPY); 1792 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1793 } else if (ip->i_ump->um_fstype == UFS1) { 1794 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 1795 BLK_NOCOPY; 1796 bdwrite(ibp); 1797 } else { 1798 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 1799 BLK_NOCOPY; 1800 bdwrite(ibp); 1801 } 1802 continue; 1803 } else /* BLK_NOCOPY or default */ { 1804 /* 1805 * If the snapshot has already copied the block 1806 * (default), or does not care about the block, 1807 * it is not needed. 1808 */ 1809 if (lbn >= NDADDR) 1810 bqrelse(ibp); 1811 continue; 1812 } 1813 /* 1814 * If this is a full size block, we will just grab it 1815 * and assign it to the snapshot inode. Otherwise we 1816 * will proceed to copy it. See explanation for this 1817 * routine as to why only a single snapshot needs to 1818 * claim this block. 1819 */ 1820 if (size == fs->fs_bsize) { 1821 #ifdef DEBUG 1822 if (snapdebug) 1823 printf("%s %d lbn %jd from inum %d\n", 1824 "Grabonremove: snapino", ip->i_number, 1825 (intmax_t)lbn, inum); 1826 #endif 1827 if (lbn < NDADDR) { 1828 DIP_SET(ip, i_db[lbn], bno); 1829 } else if (ip->i_ump->um_fstype == UFS1) { 1830 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno; 1831 bdwrite(ibp); 1832 } else { 1833 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno; 1834 bdwrite(ibp); 1835 } 1836 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size)); 1837 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1838 lockmgr(vp->v_vnlock, LK_RELEASE, NULL); 1839 return (1); 1840 } 1841 if (lbn >= NDADDR) 1842 bqrelse(ibp); 1843 /* 1844 * Allocate the block into which to do the copy. Note that this 1845 * allocation will never require any additional allocations for 1846 * the snapshot inode. 1847 */ 1848 td->td_pflags |= TDP_COWINPROGRESS; 1849 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1850 fs->fs_bsize, KERNCRED, 0, &cbp); 1851 td->td_pflags &= ~TDP_COWINPROGRESS; 1852 if (error) 1853 break; 1854 #ifdef DEBUG 1855 if (snapdebug) 1856 printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n", 1857 "Copyonremove: snapino ", ip->i_number, 1858 (intmax_t)lbn, "for inum", inum, size, 1859 (intmax_t)cbp->b_blkno); 1860 #endif 1861 /* 1862 * If we have already read the old block contents, then 1863 * simply copy them to the new block. Note that we need 1864 * to synchronously write snapshots that have not been 1865 * unlinked, and hence will be visible after a crash, 1866 * to ensure their integrity. 1867 */ 1868 if (savedcbp != 0) { 1869 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 1870 bawrite(cbp); 1871 if (dopersistence && ip->i_effnlink > 0) 1872 (void) ffs_syncvnode(vp, MNT_WAIT); 1873 continue; 1874 } 1875 /* 1876 * Otherwise, read the old block contents into the buffer. 1877 */ 1878 if ((error = readblock(vp, cbp, lbn)) != 0) { 1879 bzero(cbp->b_data, fs->fs_bsize); 1880 bawrite(cbp); 1881 if (dopersistence && ip->i_effnlink > 0) 1882 (void) ffs_syncvnode(vp, MNT_WAIT); 1883 break; 1884 } 1885 savedcbp = cbp; 1886 } 1887 /* 1888 * Note that we need to synchronously write snapshots that 1889 * have not been unlinked, and hence will be visible after 1890 * a crash, to ensure their integrity. 1891 */ 1892 if (savedcbp) { 1893 vp = savedcbp->b_vp; 1894 bawrite(savedcbp); 1895 if (dopersistence && VTOI(vp)->i_effnlink > 0) 1896 (void) ffs_syncvnode(vp, MNT_WAIT); 1897 } 1898 /* 1899 * If we have been unable to allocate a block in which to do 1900 * the copy, then return non-zero so that the fragment will 1901 * not be freed. Although space will be lost, the snapshot 1902 * will stay consistent. 1903 */ 1904 lockmgr(vp->v_vnlock, LK_RELEASE, NULL); 1905 return (error); 1906 } 1907 1908 /* 1909 * Associate snapshot files when mounting. 1910 */ 1911 void 1912 ffs_snapshot_mount(mp) 1913 struct mount *mp; 1914 { 1915 struct ufsmount *ump = VFSTOUFS(mp); 1916 struct vnode *devvp = ump->um_devvp; 1917 struct fs *fs = ump->um_fs; 1918 struct thread *td = curthread; 1919 struct snapdata *sn; 1920 struct vnode *vp; 1921 struct vnode *lastvp; 1922 struct inode *ip; 1923 struct uio auio; 1924 struct iovec aiov; 1925 void *snapblklist; 1926 char *reason; 1927 daddr_t snaplistsize; 1928 int error, snaploc, loc; 1929 1930 /* 1931 * XXX The following needs to be set before ffs_truncate or 1932 * VOP_READ can be called. 1933 */ 1934 mp->mnt_stat.f_iosize = fs->fs_bsize; 1935 /* 1936 * Process each snapshot listed in the superblock. 1937 */ 1938 vp = NULL; 1939 lastvp = NULL; 1940 sn = NULL; 1941 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { 1942 if (fs->fs_snapinum[snaploc] == 0) 1943 break; 1944 if ((error = ffs_vget(mp, fs->fs_snapinum[snaploc], 1945 LK_EXCLUSIVE, &vp)) != 0){ 1946 printf("ffs_snapshot_mount: vget failed %d\n", error); 1947 continue; 1948 } 1949 ip = VTOI(vp); 1950 if ((ip->i_flags & SF_SNAPSHOT) == 0 || ip->i_size == 1951 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) { 1952 if ((ip->i_flags & SF_SNAPSHOT) == 0) { 1953 reason = "non-snapshot"; 1954 } else { 1955 reason = "old format snapshot"; 1956 (void)ffs_truncate(vp, (off_t)0, 0, NOCRED, td); 1957 (void)ffs_syncvnode(vp, MNT_WAIT); 1958 } 1959 printf("ffs_snapshot_mount: %s inode %d\n", 1960 reason, fs->fs_snapinum[snaploc]); 1961 vput(vp); 1962 vp = NULL; 1963 for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { 1964 if (fs->fs_snapinum[loc] == 0) 1965 break; 1966 fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc]; 1967 } 1968 fs->fs_snapinum[loc - 1] = 0; 1969 snaploc--; 1970 continue; 1971 } 1972 /* 1973 * Acquire a lock on the snapdata structure, creating it if 1974 * necessary. 1975 */ 1976 sn = ffs_snapdata_acquire(devvp); 1977 /* 1978 * Change vnode to use shared snapshot lock instead of the 1979 * original private lock. 1980 */ 1981 vp->v_vnlock = &sn->sn_lock; 1982 lockmgr(&vp->v_lock, LK_RELEASE, NULL); 1983 /* 1984 * Link it onto the active snapshot list. 1985 */ 1986 VI_LOCK(devvp); 1987 if (ip->i_nextsnap.tqe_prev != 0) 1988 panic("ffs_snapshot_mount: %d already on list", 1989 ip->i_number); 1990 else 1991 TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 1992 vp->v_vflag |= VV_SYSTEM; 1993 VI_UNLOCK(devvp); 1994 VOP_UNLOCK(vp, 0); 1995 lastvp = vp; 1996 } 1997 vp = lastvp; 1998 /* 1999 * No usable snapshots found. 2000 */ 2001 if (sn == NULL || vp == NULL) 2002 return; 2003 /* 2004 * Allocate the space for the block hints list. We always want to 2005 * use the list from the newest snapshot. 2006 */ 2007 auio.uio_iov = &aiov; 2008 auio.uio_iovcnt = 1; 2009 aiov.iov_base = (void *)&snaplistsize; 2010 aiov.iov_len = sizeof(snaplistsize); 2011 auio.uio_resid = aiov.iov_len; 2012 auio.uio_offset = 2013 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)); 2014 auio.uio_segflg = UIO_SYSSPACE; 2015 auio.uio_rw = UIO_READ; 2016 auio.uio_td = td; 2017 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2018 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 2019 printf("ffs_snapshot_mount: read_1 failed %d\n", error); 2020 VOP_UNLOCK(vp, 0); 2021 return; 2022 } 2023 snapblklist = malloc(snaplistsize * sizeof(daddr_t), 2024 M_UFSMNT, M_WAITOK); 2025 auio.uio_iovcnt = 1; 2026 aiov.iov_base = snapblklist; 2027 aiov.iov_len = snaplistsize * sizeof (daddr_t); 2028 auio.uio_resid = aiov.iov_len; 2029 auio.uio_offset -= sizeof(snaplistsize); 2030 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 2031 printf("ffs_snapshot_mount: read_2 failed %d\n", error); 2032 VOP_UNLOCK(vp, 0); 2033 free(snapblklist, M_UFSMNT); 2034 return; 2035 } 2036 VOP_UNLOCK(vp, 0); 2037 VI_LOCK(devvp); 2038 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount"); 2039 sn->sn_listsize = snaplistsize; 2040 sn->sn_blklist = (daddr_t *)snapblklist; 2041 devvp->v_vflag |= VV_COPYONWRITE; 2042 VI_UNLOCK(devvp); 2043 } 2044 2045 /* 2046 * Disassociate snapshot files when unmounting. 2047 */ 2048 void 2049 ffs_snapshot_unmount(mp) 2050 struct mount *mp; 2051 { 2052 struct vnode *devvp = VFSTOUFS(mp)->um_devvp; 2053 struct snapdata *sn; 2054 struct inode *xp; 2055 struct vnode *vp; 2056 2057 VI_LOCK(devvp); 2058 sn = devvp->v_rdev->si_snapdata; 2059 while (sn != NULL && (xp = TAILQ_FIRST(&sn->sn_head)) != NULL) { 2060 vp = ITOV(xp); 2061 TAILQ_REMOVE(&sn->sn_head, xp, i_nextsnap); 2062 xp->i_nextsnap.tqe_prev = 0; 2063 lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE, 2064 VI_MTX(devvp)); 2065 lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL); 2066 KASSERT(vp->v_vnlock == &sn->sn_lock, 2067 ("ffs_snapshot_unmount: lost lock mutation")); 2068 vp->v_vnlock = &vp->v_lock; 2069 lockmgr(&vp->v_lock, LK_RELEASE, NULL); 2070 lockmgr(&sn->sn_lock, LK_RELEASE, NULL); 2071 if (xp->i_effnlink > 0) 2072 vrele(vp); 2073 VI_LOCK(devvp); 2074 sn = devvp->v_rdev->si_snapdata; 2075 } 2076 try_free_snapdata(devvp); 2077 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount"); 2078 } 2079 2080 /* 2081 * Check the buffer block to be belong to device buffer that shall be 2082 * locked after snaplk. devvp shall be locked on entry, and will be 2083 * leaved locked upon exit. 2084 */ 2085 static int 2086 ffs_bp_snapblk(devvp, bp) 2087 struct vnode *devvp; 2088 struct buf *bp; 2089 { 2090 struct snapdata *sn; 2091 struct fs *fs; 2092 ufs2_daddr_t lbn, *snapblklist; 2093 int lower, upper, mid; 2094 2095 ASSERT_VI_LOCKED(devvp, "ffs_bp_snapblk"); 2096 KASSERT(devvp->v_type == VCHR, ("Not a device %p", devvp)); 2097 sn = devvp->v_rdev->si_snapdata; 2098 if (sn == NULL || TAILQ_FIRST(&sn->sn_head) == NULL) 2099 return (0); 2100 fs = TAILQ_FIRST(&sn->sn_head)->i_fs; 2101 lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 2102 snapblklist = sn->sn_blklist; 2103 upper = sn->sn_listsize - 1; 2104 lower = 1; 2105 while (lower <= upper) { 2106 mid = (lower + upper) / 2; 2107 if (snapblklist[mid] == lbn) 2108 break; 2109 if (snapblklist[mid] < lbn) 2110 lower = mid + 1; 2111 else 2112 upper = mid - 1; 2113 } 2114 if (lower <= upper) 2115 return (1); 2116 return (0); 2117 } 2118 2119 void 2120 ffs_bdflush(bo, bp) 2121 struct bufobj *bo; 2122 struct buf *bp; 2123 { 2124 struct thread *td; 2125 struct vnode *vp, *devvp; 2126 struct buf *nbp; 2127 int bp_bdskip; 2128 2129 if (bo->bo_dirty.bv_cnt <= dirtybufthresh) 2130 return; 2131 2132 td = curthread; 2133 vp = bp->b_vp; 2134 devvp = bo->__bo_vnode; 2135 KASSERT(vp == devvp, ("devvp != vp %p %p", bo, bp)); 2136 2137 VI_LOCK(devvp); 2138 bp_bdskip = ffs_bp_snapblk(devvp, bp); 2139 if (bp_bdskip) 2140 bdwriteskip++; 2141 VI_UNLOCK(devvp); 2142 if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10 && !bp_bdskip) { 2143 (void) VOP_FSYNC(vp, MNT_NOWAIT, td); 2144 altbufferflushes++; 2145 } else { 2146 BO_LOCK(bo); 2147 /* 2148 * Try to find a buffer to flush. 2149 */ 2150 TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) { 2151 if ((nbp->b_vflags & BV_BKGRDINPROG) || 2152 BUF_LOCK(nbp, 2153 LK_EXCLUSIVE | LK_NOWAIT, NULL)) 2154 continue; 2155 if (bp == nbp) 2156 panic("bdwrite: found ourselves"); 2157 BO_UNLOCK(bo); 2158 /* 2159 * Don't countdeps with the bo lock 2160 * held. 2161 */ 2162 if (buf_countdeps(nbp, 0)) { 2163 BO_LOCK(bo); 2164 BUF_UNLOCK(nbp); 2165 continue; 2166 } 2167 if (bp_bdskip) { 2168 VI_LOCK(devvp); 2169 if (!ffs_bp_snapblk(vp, nbp)) { 2170 if (BO_MTX(bo) != VI_MTX(vp)) { 2171 VI_UNLOCK(devvp); 2172 BO_LOCK(bo); 2173 } 2174 BUF_UNLOCK(nbp); 2175 continue; 2176 } 2177 VI_UNLOCK(devvp); 2178 } 2179 if (nbp->b_flags & B_CLUSTEROK) { 2180 vfs_bio_awrite(nbp); 2181 } else { 2182 bremfree(nbp); 2183 bawrite(nbp); 2184 } 2185 dirtybufferflushes++; 2186 break; 2187 } 2188 if (nbp == NULL) 2189 BO_UNLOCK(bo); 2190 } 2191 } 2192 2193 /* 2194 * Check for need to copy block that is about to be written, 2195 * copying the block if necessary. 2196 */ 2197 int 2198 ffs_copyonwrite(devvp, bp) 2199 struct vnode *devvp; 2200 struct buf *bp; 2201 { 2202 struct snapdata *sn; 2203 struct buf *ibp, *cbp, *savedcbp = 0; 2204 struct thread *td = curthread; 2205 struct fs *fs; 2206 struct inode *ip; 2207 struct vnode *vp = 0; 2208 ufs2_daddr_t lbn, blkno, *snapblklist; 2209 int lower, upper, mid, indiroff, error = 0; 2210 int launched_async_io, prev_norunningbuf; 2211 long saved_runningbufspace; 2212 2213 if (devvp != bp->b_vp && (VTOI(bp->b_vp)->i_flags & SF_SNAPSHOT) != 0) 2214 return (0); /* Update on a snapshot file */ 2215 if (td->td_pflags & TDP_COWINPROGRESS) 2216 panic("ffs_copyonwrite: recursive call"); 2217 /* 2218 * First check to see if it is in the preallocated list. 2219 * By doing this check we avoid several potential deadlocks. 2220 */ 2221 VI_LOCK(devvp); 2222 sn = devvp->v_rdev->si_snapdata; 2223 if (sn == NULL || 2224 TAILQ_EMPTY(&sn->sn_head)) { 2225 VI_UNLOCK(devvp); 2226 return (0); /* No snapshot */ 2227 } 2228 ip = TAILQ_FIRST(&sn->sn_head); 2229 fs = ip->i_fs; 2230 lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 2231 snapblklist = sn->sn_blklist; 2232 upper = sn->sn_listsize - 1; 2233 lower = 1; 2234 while (lower <= upper) { 2235 mid = (lower + upper) / 2; 2236 if (snapblklist[mid] == lbn) 2237 break; 2238 if (snapblklist[mid] < lbn) 2239 lower = mid + 1; 2240 else 2241 upper = mid - 1; 2242 } 2243 if (lower <= upper) { 2244 VI_UNLOCK(devvp); 2245 return (0); 2246 } 2247 launched_async_io = 0; 2248 prev_norunningbuf = td->td_pflags & TDP_NORUNNINGBUF; 2249 /* 2250 * Since I/O on bp isn't yet in progress and it may be blocked 2251 * for a long time waiting on snaplk, back it out of 2252 * runningbufspace, possibly waking other threads waiting for space. 2253 */ 2254 saved_runningbufspace = bp->b_runningbufspace; 2255 if (saved_runningbufspace != 0) 2256 runningbufwakeup(bp); 2257 /* 2258 * Not in the precomputed list, so check the snapshots. 2259 */ 2260 while (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 2261 VI_MTX(devvp)) != 0) { 2262 VI_LOCK(devvp); 2263 sn = devvp->v_rdev->si_snapdata; 2264 if (sn == NULL || 2265 TAILQ_EMPTY(&sn->sn_head)) { 2266 VI_UNLOCK(devvp); 2267 if (saved_runningbufspace != 0) { 2268 bp->b_runningbufspace = saved_runningbufspace; 2269 atomic_add_long(&runningbufspace, 2270 bp->b_runningbufspace); 2271 } 2272 return (0); /* Snapshot gone */ 2273 } 2274 } 2275 TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 2276 vp = ITOV(ip); 2277 if (DOINGSOFTDEP(vp)) 2278 softdep_prealloc(vp, MNT_WAIT); 2279 /* 2280 * We ensure that everything of our own that needs to be 2281 * copied will be done at the time that ffs_snapshot is 2282 * called. Thus we can skip the check here which can 2283 * deadlock in doing the lookup in UFS_BALLOC. 2284 */ 2285 if (bp->b_vp == vp) 2286 continue; 2287 /* 2288 * Check to see if block needs to be copied. We do not have 2289 * to hold the snapshot lock while doing this lookup as it 2290 * will never require any additional allocations for the 2291 * snapshot inode. 2292 */ 2293 if (lbn < NDADDR) { 2294 blkno = DIP(ip, i_db[lbn]); 2295 } else { 2296 td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 2297 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2298 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 2299 td->td_pflags &= ~TDP_COWINPROGRESS; 2300 if (error) 2301 break; 2302 indiroff = (lbn - NDADDR) % NINDIR(fs); 2303 if (ip->i_ump->um_fstype == UFS1) 2304 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 2305 else 2306 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 2307 bqrelse(ibp); 2308 } 2309 #ifdef INVARIANTS 2310 if (blkno == BLK_SNAP && bp->b_lblkno >= 0) 2311 panic("ffs_copyonwrite: bad copy block"); 2312 #endif 2313 if (blkno != 0) 2314 continue; 2315 /* 2316 * Allocate the block into which to do the copy. Since 2317 * multiple processes may all try to copy the same block, 2318 * we have to recheck our need to do a copy if we sleep 2319 * waiting for the lock. 2320 * 2321 * Because all snapshots on a filesystem share a single 2322 * lock, we ensure that we will never be in competition 2323 * with another process to allocate a block. 2324 */ 2325 td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 2326 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2327 fs->fs_bsize, KERNCRED, 0, &cbp); 2328 td->td_pflags &= ~TDP_COWINPROGRESS; 2329 if (error) 2330 break; 2331 #ifdef DEBUG 2332 if (snapdebug) { 2333 printf("Copyonwrite: snapino %d lbn %jd for ", 2334 ip->i_number, (intmax_t)lbn); 2335 if (bp->b_vp == devvp) 2336 printf("fs metadata"); 2337 else 2338 printf("inum %d", VTOI(bp->b_vp)->i_number); 2339 printf(" lblkno %jd to blkno %jd\n", 2340 (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno); 2341 } 2342 #endif 2343 /* 2344 * If we have already read the old block contents, then 2345 * simply copy them to the new block. Note that we need 2346 * to synchronously write snapshots that have not been 2347 * unlinked, and hence will be visible after a crash, 2348 * to ensure their integrity. 2349 */ 2350 if (savedcbp != 0) { 2351 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 2352 bawrite(cbp); 2353 if (dopersistence && ip->i_effnlink > 0) 2354 (void) ffs_syncvnode(vp, MNT_WAIT); 2355 else 2356 launched_async_io = 1; 2357 continue; 2358 } 2359 /* 2360 * Otherwise, read the old block contents into the buffer. 2361 */ 2362 if ((error = readblock(vp, cbp, lbn)) != 0) { 2363 bzero(cbp->b_data, fs->fs_bsize); 2364 bawrite(cbp); 2365 if (dopersistence && ip->i_effnlink > 0) 2366 (void) ffs_syncvnode(vp, MNT_WAIT); 2367 else 2368 launched_async_io = 1; 2369 break; 2370 } 2371 savedcbp = cbp; 2372 } 2373 /* 2374 * Note that we need to synchronously write snapshots that 2375 * have not been unlinked, and hence will be visible after 2376 * a crash, to ensure their integrity. 2377 */ 2378 if (savedcbp) { 2379 vp = savedcbp->b_vp; 2380 bawrite(savedcbp); 2381 if (dopersistence && VTOI(vp)->i_effnlink > 0) 2382 (void) ffs_syncvnode(vp, MNT_WAIT); 2383 else 2384 launched_async_io = 1; 2385 } 2386 lockmgr(vp->v_vnlock, LK_RELEASE, NULL); 2387 td->td_pflags = (td->td_pflags & ~TDP_NORUNNINGBUF) | 2388 prev_norunningbuf; 2389 if (launched_async_io && (td->td_pflags & TDP_NORUNNINGBUF) == 0) 2390 waitrunningbufspace(); 2391 /* 2392 * I/O on bp will now be started, so count it in runningbufspace. 2393 */ 2394 if (saved_runningbufspace != 0) { 2395 bp->b_runningbufspace = saved_runningbufspace; 2396 atomic_add_long(&runningbufspace, bp->b_runningbufspace); 2397 } 2398 return (error); 2399 } 2400 2401 /* 2402 * Read the specified block into the given buffer. 2403 * Much of this boiler-plate comes from bwrite(). 2404 */ 2405 static int 2406 readblock(vp, bp, lbn) 2407 struct vnode *vp; 2408 struct buf *bp; 2409 ufs2_daddr_t lbn; 2410 { 2411 struct inode *ip = VTOI(vp); 2412 struct bio *bip; 2413 2414 bip = g_alloc_bio(); 2415 bip->bio_cmd = BIO_READ; 2416 bip->bio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn))); 2417 bip->bio_data = bp->b_data; 2418 bip->bio_length = bp->b_bcount; 2419 bip->bio_done = NULL; 2420 2421 g_io_request(bip, ip->i_devvp->v_bufobj.bo_private); 2422 bp->b_error = biowait(bip, "snaprdb"); 2423 g_destroy_bio(bip); 2424 return (bp->b_error); 2425 } 2426 2427 #endif 2428 2429 /* 2430 * Process file deletes that were deferred by ufs_inactive() due to 2431 * the file system being suspended. Transfer IN_LAZYACCESS into 2432 * IN_MODIFIED for vnodes that were accessed during suspension. 2433 */ 2434 void 2435 process_deferred_inactive(struct mount *mp) 2436 { 2437 struct vnode *vp, *mvp; 2438 struct inode *ip; 2439 struct thread *td; 2440 int error; 2441 2442 td = curthread; 2443 (void) vn_start_secondary_write(NULL, &mp, V_WAIT); 2444 MNT_ILOCK(mp); 2445 loop: 2446 MNT_VNODE_FOREACH(vp, mp, mvp) { 2447 VI_LOCK(vp); 2448 /* 2449 * IN_LAZYACCESS is checked here without holding any 2450 * vnode lock, but this flag is set only while holding 2451 * vnode interlock. 2452 */ 2453 if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED) != 0 || 2454 ((VTOI(vp)->i_flag & IN_LAZYACCESS) == 0 && 2455 ((vp->v_iflag & VI_OWEINACT) == 0 || 2456 vp->v_usecount > 0))) { 2457 VI_UNLOCK(vp); 2458 continue; 2459 } 2460 MNT_IUNLOCK(mp); 2461 vholdl(vp); 2462 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK); 2463 if (error != 0) { 2464 vdrop(vp); 2465 MNT_ILOCK(mp); 2466 if (error == ENOENT) 2467 continue; /* vnode recycled */ 2468 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 2469 goto loop; 2470 } 2471 ip = VTOI(vp); 2472 if ((ip->i_flag & IN_LAZYACCESS) != 0) { 2473 ip->i_flag &= ~IN_LAZYACCESS; 2474 ip->i_flag |= IN_MODIFIED; 2475 } 2476 VI_LOCK(vp); 2477 if ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0) { 2478 VI_UNLOCK(vp); 2479 VOP_UNLOCK(vp, 0); 2480 vdrop(vp); 2481 MNT_ILOCK(mp); 2482 continue; 2483 } 2484 2485 VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp, 2486 ("process_deferred_inactive: " 2487 "recursed on VI_DOINGINACT")); 2488 vp->v_iflag |= VI_DOINGINACT; 2489 vp->v_iflag &= ~VI_OWEINACT; 2490 VI_UNLOCK(vp); 2491 (void) VOP_INACTIVE(vp, td); 2492 VI_LOCK(vp); 2493 VNASSERT(vp->v_iflag & VI_DOINGINACT, vp, 2494 ("process_deferred_inactive: lost VI_DOINGINACT")); 2495 VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, 2496 ("process_deferred_inactive: got VI_OWEINACT")); 2497 vp->v_iflag &= ~VI_DOINGINACT; 2498 VI_UNLOCK(vp); 2499 VOP_UNLOCK(vp, 0); 2500 vdrop(vp); 2501 MNT_ILOCK(mp); 2502 } 2503 MNT_IUNLOCK(mp); 2504 vn_finished_secondary_write(mp); 2505 } 2506 2507 #ifndef NO_FFS_SNAPSHOT 2508 2509 static struct snapdata * 2510 ffs_snapdata_alloc(void) 2511 { 2512 struct snapdata *sn; 2513 2514 /* 2515 * Fetch a snapdata from the free list if there is one available. 2516 */ 2517 mtx_lock(&snapfree_lock); 2518 sn = LIST_FIRST(&snapfree); 2519 if (sn != NULL) 2520 LIST_REMOVE(sn, sn_link); 2521 mtx_unlock(&snapfree_lock); 2522 if (sn != NULL) 2523 return (sn); 2524 /* 2525 * If there were no free snapdatas allocate one. 2526 */ 2527 sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO); 2528 TAILQ_INIT(&sn->sn_head); 2529 lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT, 2530 LK_CANRECURSE | LK_NOSHARE); 2531 return (sn); 2532 } 2533 2534 /* 2535 * The snapdata is never freed because we can not be certain that 2536 * there are no threads sleeping on the snap lock. Persisting 2537 * them permanently avoids costly synchronization in ffs_lock(). 2538 */ 2539 static void 2540 ffs_snapdata_free(struct snapdata *sn) 2541 { 2542 mtx_lock(&snapfree_lock); 2543 LIST_INSERT_HEAD(&snapfree, sn, sn_link); 2544 mtx_unlock(&snapfree_lock); 2545 } 2546 2547 /* Try to free snapdata associated with devvp */ 2548 static void 2549 try_free_snapdata(struct vnode *devvp) 2550 { 2551 struct snapdata *sn; 2552 ufs2_daddr_t *snapblklist; 2553 2554 ASSERT_VI_LOCKED(devvp, "try_free_snapdata"); 2555 sn = devvp->v_rdev->si_snapdata; 2556 2557 if (sn == NULL || TAILQ_FIRST(&sn->sn_head) != NULL || 2558 (devvp->v_vflag & VV_COPYONWRITE) == 0) { 2559 VI_UNLOCK(devvp); 2560 return; 2561 } 2562 2563 devvp->v_rdev->si_snapdata = NULL; 2564 devvp->v_vflag &= ~VV_COPYONWRITE; 2565 lockmgr(&sn->sn_lock, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp)); 2566 snapblklist = sn->sn_blklist; 2567 sn->sn_blklist = NULL; 2568 sn->sn_listsize = 0; 2569 lockmgr(&sn->sn_lock, LK_RELEASE, NULL); 2570 if (snapblklist != NULL) 2571 free(snapblklist, M_UFSMNT); 2572 ffs_snapdata_free(sn); 2573 } 2574 2575 static struct snapdata * 2576 ffs_snapdata_acquire(struct vnode *devvp) 2577 { 2578 struct snapdata *nsn; 2579 struct snapdata *sn; 2580 2581 /* 2582 * Allocate a free snapdata. This is done before acquiring the 2583 * devvp lock to avoid allocation while the devvp interlock is 2584 * held. 2585 */ 2586 nsn = ffs_snapdata_alloc(); 2587 /* 2588 * If there snapshots already exist on this filesystem grab a 2589 * reference to the shared lock. Otherwise this is the first 2590 * snapshot on this filesystem and we need to use our 2591 * pre-allocated snapdata. 2592 */ 2593 VI_LOCK(devvp); 2594 if (devvp->v_rdev->si_snapdata == NULL) { 2595 devvp->v_rdev->si_snapdata = nsn; 2596 nsn = NULL; 2597 } 2598 sn = devvp->v_rdev->si_snapdata; 2599 /* 2600 * Acquire the snapshot lock. 2601 */ 2602 lockmgr(&sn->sn_lock, 2603 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, VI_MTX(devvp)); 2604 /* 2605 * Free any unused snapdata. 2606 */ 2607 if (nsn != NULL) 2608 ffs_snapdata_free(nsn); 2609 2610 return (sn); 2611 } 2612 2613 #endif 2614