1 /* 2 * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. 3 * 4 * Further information about snapshots can be obtained from: 5 * 6 * Marshall Kirk McKusick http://www.mckusick.com/softdep/ 7 * 1614 Oxford Street mckusick@mckusick.com 8 * Berkeley, CA 94709-1608 +1-510-843-9542 9 * USA 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY 22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR 25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 34 * $FreeBSD$ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/stdint.h> 39 #include <sys/systm.h> 40 #include <sys/conf.h> 41 #include <sys/disklabel.h> 42 #include <sys/bio.h> 43 #include <sys/buf.h> 44 #include <sys/proc.h> 45 #include <sys/namei.h> 46 #include <sys/stat.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/resource.h> 50 #include <sys/resourcevar.h> 51 #include <sys/disklabel.h> 52 #include <sys/vnode.h> 53 54 #include <ufs/ufs/extattr.h> 55 #include <ufs/ufs/quota.h> 56 #include <ufs/ufs/ufsmount.h> 57 #include <ufs/ufs/inode.h> 58 #include <ufs/ufs/ufs_extern.h> 59 60 #include <ufs/ffs/fs.h> 61 #include <ufs/ffs/ffs_extern.h> 62 63 #define KERNCRED thread0.td_ucred 64 #define DEBUG 1 65 66 static int cgaccount(int, struct vnode *, struct buf *, int); 67 static int expunge_ufs1(struct vnode *, struct inode *, struct fs *, 68 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 69 ufs_lbn_t, int), int); 70 static int indiracct_ufs1(struct vnode *, struct vnode *, int, 71 ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 72 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 73 ufs_lbn_t, int), int); 74 static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 75 struct fs *, ufs_lbn_t, int); 76 static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 77 struct fs *, ufs_lbn_t, int); 78 static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 79 struct fs *, ufs_lbn_t, int); 80 static int expunge_ufs2(struct vnode *, struct inode *, struct fs *, 81 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 82 ufs_lbn_t, int), int); 83 static int indiracct_ufs2(struct vnode *, struct vnode *, int, 84 ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 85 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 86 ufs_lbn_t, int), int); 87 static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 88 struct fs *, ufs_lbn_t, int); 89 static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 90 struct fs *, ufs_lbn_t, int); 91 static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 92 struct fs *, ufs_lbn_t, int); 93 static int ffs_copyonwrite(struct vnode *, struct buf *); 94 static int readblock(struct buf *, ufs2_daddr_t); 95 96 /* 97 * To ensure the consistency of snapshots across crashes, we must 98 * synchronously write out copied blocks before allowing the 99 * originals to be modified. Because of the rather severe speed 100 * penalty that this imposes, the following flag allows this 101 * crash persistence to be disabled. 102 */ 103 int dopersistence = 0; 104 105 #ifdef DEBUG 106 #include <sys/sysctl.h> 107 SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, ""); 108 int snapdebug = 0; 109 SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, ""); 110 int collectsnapstats = 0; 111 SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats, 112 0, ""); 113 #endif /* DEBUG */ 114 115 /* 116 * Create a snapshot file and initialize it for the filesystem. 117 */ 118 int 119 ffs_snapshot(mp, snapfile) 120 struct mount *mp; 121 char *snapfile; 122 { 123 ufs2_daddr_t numblks, blkno; 124 int error, cg, snaploc; 125 int i, size, len, loc; 126 int flag = mp->mnt_flag; 127 struct timespec starttime = {0, 0}, endtime; 128 char saved_nice = 0; 129 long redo = 0; 130 int32_t *lp; 131 void *space; 132 struct fs *copy_fs = NULL, *fs = VFSTOUFS(mp)->um_fs; 133 struct snaphead *snaphead; 134 struct thread *td = curthread; 135 struct inode *ip, *xp; 136 struct buf *bp, *nbp, *ibp, *sbp = NULL; 137 struct nameidata nd; 138 struct mount *wrtmp; 139 struct vattr vat; 140 struct vnode *vp, *xvp, *nvp; 141 142 /* 143 * Need to serialize access to snapshot code per filesystem. 144 */ 145 /* 146 * Assign a snapshot slot in the superblock. 147 */ 148 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 149 if (fs->fs_snapinum[snaploc] == 0) 150 break; 151 if (snaploc == FSMAXSNAP) 152 return (ENOSPC); 153 /* 154 * Create the snapshot file. 155 */ 156 restart: 157 NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, snapfile, td); 158 if ((error = namei(&nd)) != 0) 159 return (error); 160 if (nd.ni_vp != NULL) { 161 vput(nd.ni_vp); 162 error = EEXIST; 163 } 164 if (nd.ni_dvp->v_mount != mp) 165 error = EXDEV; 166 if (error) { 167 NDFREE(&nd, NDF_ONLY_PNBUF); 168 if (nd.ni_dvp == nd.ni_vp) 169 vrele(nd.ni_dvp); 170 else 171 vput(nd.ni_dvp); 172 return (error); 173 } 174 VATTR_NULL(&vat); 175 vat.va_type = VREG; 176 vat.va_mode = S_IRUSR; 177 vat.va_vaflags |= VA_EXCLUSIVE; 178 if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) 179 wrtmp = NULL; 180 if (wrtmp != mp) 181 panic("ffs_snapshot: mount mismatch"); 182 if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) { 183 NDFREE(&nd, NDF_ONLY_PNBUF); 184 vput(nd.ni_dvp); 185 if ((error = vn_start_write(NULL, &wrtmp, 186 V_XSLEEP | PCATCH)) != 0) 187 return (error); 188 goto restart; 189 } 190 VOP_LEASE(nd.ni_dvp, td, KERNCRED, LEASE_WRITE); 191 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat); 192 vput(nd.ni_dvp); 193 if (error) { 194 NDFREE(&nd, NDF_ONLY_PNBUF); 195 vn_finished_write(wrtmp); 196 return (error); 197 } 198 vp = nd.ni_vp; 199 ip = VTOI(vp); 200 /* 201 * Allocate and copy the last block contents so as to be able 202 * to set size to that of the filesystem. 203 */ 204 numblks = howmany(fs->fs_size, fs->fs_frag); 205 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 206 fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 207 if (error) 208 goto out; 209 ip->i_size = lblktosize(fs, (off_t)numblks); 210 DIP(ip, i_size) = ip->i_size; 211 ip->i_flag |= IN_CHANGE | IN_UPDATE; 212 if ((error = readblock(bp, numblks - 1)) != 0) 213 goto out; 214 bawrite(bp); 215 /* 216 * Preallocate critical data structures so that we can copy 217 * them in without further allocation after we suspend all 218 * operations on the filesystem. We would like to just release 219 * the allocated buffers without writing them since they will 220 * be filled in below once we are ready to go, but this upsets 221 * the soft update code, so we go ahead and write the new buffers. 222 * 223 * Allocate all indirect blocks and mark all of them as not 224 * needing to be copied. 225 */ 226 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 227 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 228 fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp); 229 if (error) 230 goto out; 231 bdwrite(ibp); 232 } 233 /* 234 * Allocate copies for the superblock and its summary information. 235 */ 236 error = UFS_BALLOC(vp, lfragtosize(fs, fs->fs_sblockloc), 237 fs->fs_sbsize, KERNCRED, 0, &nbp); 238 if (error) 239 goto out; 240 bawrite(nbp); 241 blkno = fragstoblks(fs, fs->fs_csaddr); 242 len = howmany(fs->fs_cssize, fs->fs_bsize); 243 for (loc = 0; loc < len; loc++) { 244 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), 245 fs->fs_bsize, KERNCRED, 0, &nbp); 246 if (error) 247 goto out; 248 bawrite(nbp); 249 } 250 /* 251 * Allocate all cylinder group blocks. 252 */ 253 for (cg = 0; cg < fs->fs_ncg; cg++) { 254 error = UFS_BALLOC(vp, (off_t)(cgtod(fs, cg)) << fs->fs_fshift, 255 fs->fs_bsize, KERNCRED, 0, &nbp); 256 if (error) 257 goto out; 258 bdwrite(nbp); 259 } 260 /* 261 * Copy all the cylinder group maps. Although the 262 * filesystem is still active, we hope that only a few 263 * cylinder groups will change between now and when we 264 * suspend operations. Thus, we will be able to quickly 265 * touch up the few cylinder groups that changed during 266 * the suspension period. 267 */ 268 len = howmany(fs->fs_ncg, NBBY); 269 MALLOC(fs->fs_active, int *, len, M_DEVBUF, M_WAITOK); 270 bzero(fs->fs_active, len); 271 for (cg = 0; cg < fs->fs_ncg; cg++) { 272 error = bread(vp, fragstoblks(fs, cgtod(fs, cg)), fs->fs_bsize, 273 KERNCRED, &nbp); 274 if (error) { 275 brelse(nbp); 276 goto out; 277 } 278 error = cgaccount(cg, vp, nbp, 1); 279 bawrite(nbp); 280 if (error) 281 goto out; 282 } 283 /* 284 * Change inode to snapshot type file. 285 */ 286 ip->i_flags |= SF_SNAPSHOT; 287 DIP(ip, i_flags) = ip->i_flags; 288 ip->i_flag |= IN_CHANGE | IN_UPDATE; 289 /* 290 * Ensure that the snapshot is completely on disk. 291 */ 292 if ((error = VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td)) != 0) 293 goto out; 294 /* 295 * All allocations are done, so we can now snapshot the system. 296 * 297 * Recind nice scheduling while running with the filesystem suspended. 298 */ 299 if (td->td_ksegrp->kg_nice > 0) { 300 saved_nice = td->td_ksegrp->kg_nice; 301 td->td_ksegrp->kg_nice = 0; 302 } 303 /* 304 * Suspend operation on filesystem. 305 */ 306 for (;;) { 307 vn_finished_write(wrtmp); 308 vfs_write_suspend(vp->v_mount); 309 if (mp->mnt_kern_flag & MNTK_SUSPENDED) 310 break; 311 vn_start_write(NULL, &wrtmp, V_WAIT); 312 } 313 if (collectsnapstats) 314 nanotime(&starttime); 315 /* 316 * First, copy all the cylinder group maps that have changed. 317 */ 318 for (cg = 0; cg < fs->fs_ncg; cg++) { 319 if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0) 320 continue; 321 redo++; 322 error = bread(vp, fragstoblks(fs, cgtod(fs, cg)), fs->fs_bsize, 323 KERNCRED, &nbp); 324 if (error) { 325 brelse(nbp); 326 goto out1; 327 } 328 error = cgaccount(cg, vp, nbp, 2); 329 bawrite(nbp); 330 if (error) 331 goto out1; 332 } 333 /* 334 * Grab a copy of the superblock and its summary information. 335 * We delay writing it until the suspension is released below. 336 */ 337 error = bread(vp, fragstoblks(fs, fs->fs_sblockloc), fs->fs_bsize, 338 KERNCRED, &sbp); 339 if (error) { 340 brelse(sbp); 341 sbp = NULL; 342 goto out1; 343 } 344 loc = blkoff(fs, lfragtosize(fs, fs->fs_sblockloc)); 345 copy_fs = (struct fs *)(sbp->b_data + loc); 346 bcopy(fs, copy_fs, fs->fs_sbsize); 347 if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) 348 copy_fs->fs_clean = 1; 349 if (fs->fs_sbsize < SBLOCKSIZE) 350 bzero(&sbp->b_data[loc + fs->fs_sbsize], 351 SBLOCKSIZE - fs->fs_sbsize); 352 size = blkroundup(fs, fs->fs_cssize); 353 if (fs->fs_contigsumsize > 0) 354 size += fs->fs_ncg * sizeof(int32_t); 355 space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 356 copy_fs->fs_csp = space; 357 bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize); 358 (char *)space += fs->fs_cssize; 359 loc = howmany(fs->fs_cssize, fs->fs_fsize); 360 i = fs->fs_frag - loc % fs->fs_frag; 361 len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize; 362 if (len > 0) { 363 if ((error = bread(ip->i_devvp, 364 fsbtodb(fs, fs->fs_csaddr + loc), 365 len, KERNCRED, &bp)) != 0) { 366 brelse(bp); 367 free(copy_fs->fs_csp, M_UFSMNT); 368 bawrite(sbp); 369 sbp = NULL; 370 goto out1; 371 } 372 bcopy(bp->b_data, space, (u_int)len); 373 (char *)space += len; 374 bp->b_flags |= B_INVAL | B_NOCACHE; 375 brelse(bp); 376 } 377 if (fs->fs_contigsumsize > 0) { 378 copy_fs->fs_maxcluster = lp = space; 379 for (i = 0; i < fs->fs_ncg; i++) 380 *lp++ = fs->fs_contigsumsize; 381 } 382 /* 383 * We must check for active files that have been unlinked 384 * (e.g., with a zero link count). We have to expunge all 385 * trace of these files from the snapshot so that they are 386 * not reclaimed prematurely by fsck or unnecessarily dumped. 387 * We turn off the MNTK_SUSPENDED flag to avoid a panic from 388 * spec_strategy about writing on a suspended filesystem. 389 */ 390 mp->mnt_kern_flag &= ~MNTK_SUSPENDED; 391 mtx_lock(&mntvnode_mtx); 392 loop: 393 for (xvp = TAILQ_FIRST(&mp->mnt_nvnodelist); xvp; xvp = nvp) { 394 /* 395 * Make sure this vnode wasn't reclaimed in getnewvnode(). 396 * Start over if it has (it won't be on the list anymore). 397 */ 398 if (xvp->v_mount != mp) 399 goto loop; 400 nvp = TAILQ_NEXT(xvp, v_nmntvnodes); 401 mtx_unlock(&mntvnode_mtx); 402 mtx_lock(&xvp->v_interlock); 403 if (xvp->v_usecount == 0 || xvp->v_type == VNON || 404 (VOP_GETATTR(xvp, &vat, td->td_proc->p_ucred, td) == 0 && 405 vat.va_nlink > 0)) { 406 mtx_unlock(&xvp->v_interlock); 407 mtx_lock(&mntvnode_mtx); 408 continue; 409 } 410 if (snapdebug) 411 vprint("ffs_snapshot: busy vnode", xvp); 412 if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK, td) != 0) 413 goto loop; 414 xp = VTOI(xvp); 415 /* 416 * If there is a fragment, clear it here. 417 */ 418 blkno = 0; 419 loc = howmany(xp->i_size, fs->fs_bsize) - 1; 420 if (loc < NDADDR) { 421 len = fragroundup(fs, blkoff(fs, xp->i_size)); 422 if (len < fs->fs_bsize) { 423 ffs_blkfree(copy_fs, vp, DIP(xp, i_db[loc]), 424 len, xp->i_number); 425 blkno = DIP(xp, i_db[loc]); 426 DIP(xp, i_db[loc]) = 0; 427 } 428 } 429 if (xp->i_ump->um_fstype == UFS1) 430 error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 431 BLK_NOCOPY); 432 else 433 error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 434 BLK_NOCOPY); 435 if (blkno) 436 DIP(xp, i_db[loc]) = blkno; 437 if (!error) 438 error = ffs_freefile(copy_fs, vp, xp->i_number, 439 xp->i_mode); 440 VOP_UNLOCK(xvp, 0, td); 441 if (error) { 442 free(copy_fs->fs_csp, M_UFSMNT); 443 bawrite(sbp); 444 sbp = NULL; 445 goto out1; 446 } 447 mtx_lock(&mntvnode_mtx); 448 } 449 mtx_unlock(&mntvnode_mtx); 450 /* 451 * Record snapshot inode. Since this is the newest snapshot, 452 * it must be placed at the end of the list. 453 */ 454 fs->fs_snapinum[snaploc] = ip->i_number; 455 if (ip->i_nextsnap.tqe_prev != 0) 456 panic("ffs_snapshot: %d already on list", ip->i_number); 457 snaphead = &ip->i_devvp->v_rdev->si_snapshots; 458 TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); 459 460 ASSERT_VOP_LOCKED(ip->i_devvp, "ffs_snapshot devvp"); 461 ip->i_devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; 462 ip->i_devvp->v_vflag |= VV_COPYONWRITE; 463 ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); 464 vp->v_vflag |= VV_SYSTEM; 465 out1: 466 /* 467 * Resume operation on filesystem. 468 */ 469 vfs_write_resume(vp->v_mount); 470 if (saved_nice > 0) 471 td->td_ksegrp->kg_nice = saved_nice; 472 vn_start_write(NULL, &wrtmp, V_WAIT); 473 if (collectsnapstats && starttime.tv_sec > 0) { 474 nanotime(&endtime); 475 timespecsub(&endtime, &starttime); 476 printf("%s: suspended %d.%03ld sec, redo %ld of %d\n", 477 vp->v_mount->mnt_stat.f_mntonname, endtime.tv_sec, 478 endtime.tv_nsec / 1000000, redo, fs->fs_ncg); 479 } 480 if (sbp == NULL) 481 goto out; 482 /* 483 * Copy allocation information from all the snapshots in 484 * this snapshot and then expunge them from its view. 485 */ 486 snaphead = &ip->i_devvp->v_rdev->si_snapshots; 487 TAILQ_FOREACH(xp, snaphead, i_nextsnap) { 488 if (xp == ip) 489 break; 490 if (xp->i_ump->um_fstype == UFS1) 491 error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, 492 BLK_SNAP); 493 else 494 error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, 495 BLK_SNAP); 496 if (error) { 497 fs->fs_snapinum[snaploc] = 0; 498 goto done; 499 } 500 } 501 /* 502 * Expunge the blocks used by the snapshots from the set of 503 * blocks marked as used in the snapshot bitmaps. 504 */ 505 if (ip->i_ump->um_fstype == UFS1) 506 error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); 507 else 508 error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); 509 if (error) { 510 fs->fs_snapinum[snaploc] = 0; 511 goto done; 512 } 513 /* 514 * Write the superblock and its summary information 515 * to the snapshot. 516 */ 517 blkno = fragstoblks(fs, fs->fs_csaddr); 518 len = howmany(fs->fs_cssize, fs->fs_bsize); 519 space = copy_fs->fs_csp; 520 for (loc = 0; loc < len; loc++) { 521 error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp); 522 if (error) { 523 brelse(nbp); 524 fs->fs_snapinum[snaploc] = 0; 525 goto done; 526 } 527 bcopy(space, nbp->b_data, fs->fs_bsize); 528 space = (char *)space + fs->fs_bsize; 529 bawrite(nbp); 530 } 531 done: 532 free(copy_fs->fs_csp, M_UFSMNT); 533 bawrite(sbp); 534 out: 535 if (fs->fs_active != 0) { 536 FREE(fs->fs_active, M_DEVBUF); 537 fs->fs_active = 0; 538 } 539 mp->mnt_flag = flag; 540 if (error) 541 (void) UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td); 542 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 543 if (error) 544 vput(vp); 545 else 546 VOP_UNLOCK(vp, 0, td); 547 vn_finished_write(wrtmp); 548 return (error); 549 } 550 551 /* 552 * Copy a cylinder group map. All the unallocated blocks are marked 553 * BLK_NOCOPY so that the snapshot knows that it need not copy them 554 * if they are later written. If passno is one, then this is a first 555 * pass, so only setting needs to be done. If passno is 2, then this 556 * is a revision to a previous pass which must be undone as the 557 * replacement pass is done. 558 */ 559 static int 560 cgaccount(cg, vp, nbp, passno) 561 int cg; 562 struct vnode *vp; 563 struct buf *nbp; 564 int passno; 565 { 566 struct buf *bp, *ibp; 567 struct inode *ip; 568 struct cg *cgp; 569 struct fs *fs; 570 ufs2_daddr_t base, numblks; 571 int error, len, loc, indiroff; 572 573 ip = VTOI(vp); 574 fs = ip->i_fs; 575 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 576 (int)fs->fs_cgsize, KERNCRED, &bp); 577 if (error) { 578 brelse(bp); 579 return (error); 580 } 581 cgp = (struct cg *)bp->b_data; 582 if (!cg_chkmagic(cgp)) { 583 brelse(bp); 584 return (EIO); 585 } 586 atomic_set_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 587 bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize); 588 if (fs->fs_cgsize < fs->fs_bsize) 589 bzero(&nbp->b_data[fs->fs_cgsize], 590 fs->fs_bsize - fs->fs_cgsize); 591 if (passno == 2) 592 nbp->b_flags |= B_VALIDSUSPWRT; 593 numblks = howmany(fs->fs_size, fs->fs_frag); 594 len = howmany(fs->fs_fpg, fs->fs_frag); 595 base = cg * fs->fs_fpg / fs->fs_frag; 596 if (base + len >= numblks) 597 len = numblks - base - 1; 598 loc = 0; 599 if (base < NDADDR) { 600 for ( ; loc < NDADDR; loc++) { 601 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 602 DIP(ip, i_db[loc]) = BLK_NOCOPY; 603 else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY) 604 DIP(ip, i_db[loc]) = 0; 605 else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY) 606 panic("ffs_snapshot: lost direct block"); 607 } 608 } 609 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), 610 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 611 if (error) { 612 brelse(bp); 613 return (error); 614 } 615 indiroff = (base + loc - NDADDR) % NINDIR(fs); 616 for ( ; loc < len; loc++, indiroff++) { 617 if (indiroff >= NINDIR(fs)) { 618 if (passno == 2) 619 ibp->b_flags |= B_VALIDSUSPWRT; 620 bawrite(ibp); 621 error = UFS_BALLOC(vp, 622 lblktosize(fs, (off_t)(base + loc)), 623 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 624 if (error) { 625 brelse(bp); 626 return (error); 627 } 628 indiroff = 0; 629 } 630 if (ip->i_ump->um_fstype == UFS1) { 631 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 632 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 633 BLK_NOCOPY; 634 else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data)) 635 [indiroff] == BLK_NOCOPY) 636 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0; 637 else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data)) 638 [indiroff] == BLK_NOCOPY) 639 panic("ffs_snapshot: lost indirect block"); 640 continue; 641 } 642 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 643 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY; 644 else if (passno == 2 && 645 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 646 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0; 647 else if (passno == 1 && 648 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 649 panic("ffs_snapshot: lost indirect block"); 650 } 651 bqrelse(bp); 652 if (passno == 2) 653 ibp->b_flags |= B_VALIDSUSPWRT; 654 bdwrite(ibp); 655 return (0); 656 } 657 658 /* 659 * Before expunging a snapshot inode, note all the 660 * blocks that it claims with BLK_SNAP so that fsck will 661 * be able to account for those blocks properly and so 662 * that this snapshot knows that it need not copy them 663 * if the other snapshot holding them is freed. This code 664 * is reproduced once each for UFS1 and UFS2. 665 */ 666 static int 667 expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) 668 struct vnode *snapvp; 669 struct inode *cancelip; 670 struct fs *fs; 671 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 672 struct fs *, ufs_lbn_t, int); 673 int expungetype; 674 { 675 int i, error, indiroff; 676 ufs_lbn_t lbn, rlbn; 677 ufs2_daddr_t len, blkno, numblks, blksperindir; 678 struct ufs1_dinode *dip; 679 struct thread *td = curthread; 680 struct buf *bp; 681 682 numblks = howmany(cancelip->i_size, fs->fs_bsize); 683 if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0], 684 &cancelip->i_din1->di_ib[NIADDR], fs, 0, expungetype))) 685 return (error); 686 blksperindir = 1; 687 lbn = -NDADDR; 688 len = numblks - NDADDR; 689 rlbn = NDADDR; 690 for (i = 0; len > 0 && i < NIADDR; i++) { 691 error = indiracct_ufs1(snapvp, ITOV(cancelip), i, 692 cancelip->i_din1->di_ib[i], lbn, rlbn, len, 693 blksperindir, fs, acctfunc, expungetype); 694 if (error) 695 return (error); 696 blksperindir *= NINDIR(fs); 697 lbn -= blksperindir + 1; 698 len -= blksperindir; 699 rlbn += blksperindir; 700 } 701 /* 702 * Prepare to expunge the inode. If its inode block has not 703 * yet been copied, then allocate and fill the copy. 704 */ 705 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 706 blkno = 0; 707 if (lbn < NDADDR) { 708 blkno = cancelip->i_din1->di_db[lbn]; 709 } else { 710 td->td_proc->p_flag |= P_COWINPROGRESS; 711 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 712 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 713 td->td_proc->p_flag &= ~P_COWINPROGRESS; 714 if (error) 715 return (error); 716 indiroff = (lbn - NDADDR) % NINDIR(fs); 717 blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; 718 bqrelse(bp); 719 } 720 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 721 fs->fs_bsize, KERNCRED, 0, &bp); 722 if (error) 723 return (error); 724 if (blkno == 0 && (error = readblock(bp, lbn))) 725 return (error); 726 /* 727 * Set a snapshot inode to be a zero length file, regular files 728 * to be completely unallocated. 729 */ 730 dip = (struct ufs1_dinode *)bp->b_data + 731 ino_to_fsbo(fs, cancelip->i_number); 732 if (expungetype == BLK_NOCOPY) 733 dip->di_mode = 0; 734 dip->di_size = 0; 735 dip->di_blocks = 0; 736 dip->di_flags &= ~SF_SNAPSHOT; 737 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); 738 bdwrite(bp); 739 return (0); 740 } 741 742 /* 743 * Descend an indirect block chain for vnode cancelvp accounting for all 744 * its indirect blocks in snapvp. 745 */ 746 static int 747 indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 748 blksperindir, fs, acctfunc, expungetype) 749 struct vnode *snapvp; 750 struct vnode *cancelvp; 751 int level; 752 ufs1_daddr_t blkno; 753 ufs_lbn_t lbn; 754 ufs_lbn_t rlbn; 755 ufs_lbn_t remblks; 756 ufs_lbn_t blksperindir; 757 struct fs *fs; 758 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 759 struct fs *, ufs_lbn_t, int); 760 int expungetype; 761 { 762 int error, num, i; 763 ufs_lbn_t subblksperindir; 764 struct indir indirs[NIADDR + 2]; 765 ufs1_daddr_t last, *bap; 766 struct buf *bp; 767 768 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 769 return (error); 770 if (lbn != indirs[num - 1 - level].in_lbn || blkno == 0 || num < 2) 771 panic("indiracct: botched params"); 772 /* 773 * We have to expand bread here since it will deadlock looking 774 * up the block number for any blocks that are not in the cache. 775 */ 776 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0); 777 bp->b_blkno = fsbtodb(fs, blkno); 778 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 779 (error = readblock(bp, fragstoblks(fs, blkno)))) { 780 brelse(bp); 781 return (error); 782 } 783 /* 784 * Account for the block pointers in this indirect block. 785 */ 786 last = howmany(remblks, blksperindir); 787 if (last > NINDIR(fs)) 788 last = NINDIR(fs); 789 MALLOC(bap, ufs1_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); 790 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 791 bqrelse(bp); 792 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, rlbn, expungetype); 793 if (error || level == 0) 794 goto out; 795 /* 796 * Account for the block pointers in each of the indirect blocks 797 * in the levels below us. 798 */ 799 subblksperindir = blksperindir / NINDIR(fs); 800 for (lbn++, level--, i = 0; i < last; i++) { 801 error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn, 802 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 803 if (error) 804 goto out; 805 rlbn += blksperindir; 806 lbn -= blksperindir; 807 remblks -= blksperindir; 808 } 809 out: 810 FREE(bap, M_DEVBUF); 811 return (error); 812 } 813 814 /* 815 * Do both snap accounting and map accounting. 816 */ 817 static int 818 fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype) 819 struct vnode *vp; 820 ufs1_daddr_t *oldblkp, *lastblkp; 821 struct fs *fs; 822 ufs_lbn_t lblkno; 823 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 824 { 825 int error; 826 827 if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 828 return (error); 829 return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 830 } 831 832 /* 833 * Identify a set of blocks allocated in a snapshot inode. 834 */ 835 static int 836 snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 837 struct vnode *vp; 838 ufs1_daddr_t *oldblkp, *lastblkp; 839 struct fs *fs; 840 ufs_lbn_t lblkno; 841 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 842 { 843 struct inode *ip = VTOI(vp); 844 ufs1_daddr_t blkno, *blkp; 845 ufs_lbn_t lbn; 846 struct buf *ibp; 847 int error; 848 849 for ( ; oldblkp < lastblkp; oldblkp++) { 850 blkno = *oldblkp; 851 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 852 continue; 853 lbn = fragstoblks(fs, blkno); 854 if (lbn < NDADDR) { 855 blkp = &ip->i_din1->di_db[lbn]; 856 ip->i_flag |= IN_CHANGE | IN_UPDATE; 857 } else { 858 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 859 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 860 if (error) 861 return (error); 862 blkp = &((ufs1_daddr_t *)(ibp->b_data)) 863 [(lbn - NDADDR) % NINDIR(fs)]; 864 } 865 /* 866 * If we are expunging a snapshot vnode and we 867 * find a block marked BLK_NOCOPY, then it is 868 * one that has been allocated to this snapshot after 869 * we took our current snapshot and can be ignored. 870 */ 871 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 872 if (lbn >= NDADDR) 873 brelse(ibp); 874 } else { 875 if (*blkp != 0) 876 panic("snapacct: bad block"); 877 *blkp = expungetype; 878 if (lbn >= NDADDR) 879 bdwrite(ibp); 880 } 881 } 882 return (0); 883 } 884 885 /* 886 * Account for a set of blocks allocated in a snapshot inode. 887 */ 888 static int 889 mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 890 struct vnode *vp; 891 ufs1_daddr_t *oldblkp, *lastblkp; 892 struct fs *fs; 893 ufs_lbn_t lblkno; 894 int expungetype; 895 { 896 ufs1_daddr_t blkno; 897 ino_t inum; 898 899 inum = VTOI(vp)->i_number; 900 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 901 blkno = *oldblkp; 902 if (blkno == 0 || blkno == BLK_NOCOPY) 903 continue; 904 if (blkno == BLK_SNAP) 905 blkno = blkstofrags(fs, lblkno); 906 ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); 907 } 908 return (0); 909 } 910 911 /* 912 * Before expunging a snapshot inode, note all the 913 * blocks that it claims with BLK_SNAP so that fsck will 914 * be able to account for those blocks properly and so 915 * that this snapshot knows that it need not copy them 916 * if the other snapshot holding them is freed. This code 917 * is reproduced once each for UFS1 and UFS2. 918 */ 919 static int 920 expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) 921 struct vnode *snapvp; 922 struct inode *cancelip; 923 struct fs *fs; 924 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 925 struct fs *, ufs_lbn_t, int); 926 int expungetype; 927 { 928 int i, error, indiroff; 929 ufs_lbn_t lbn, rlbn; 930 ufs2_daddr_t len, blkno, numblks, blksperindir; 931 struct ufs2_dinode *dip; 932 struct thread *td = curthread; 933 struct buf *bp; 934 935 numblks = howmany(cancelip->i_size, fs->fs_bsize); 936 if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0], 937 &cancelip->i_din2->di_ib[NIADDR], fs, 0, expungetype))) 938 return (error); 939 blksperindir = 1; 940 lbn = -NDADDR; 941 len = numblks - NDADDR; 942 rlbn = NDADDR; 943 for (i = 0; len > 0 && i < NIADDR; i++) { 944 error = indiracct_ufs2(snapvp, ITOV(cancelip), i, 945 cancelip->i_din2->di_ib[i], lbn, rlbn, len, 946 blksperindir, fs, acctfunc, expungetype); 947 if (error) 948 return (error); 949 blksperindir *= NINDIR(fs); 950 lbn -= blksperindir + 1; 951 len -= blksperindir; 952 rlbn += blksperindir; 953 } 954 /* 955 * Prepare to expunge the inode. If its inode block has not 956 * yet been copied, then allocate and fill the copy. 957 */ 958 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 959 blkno = 0; 960 if (lbn < NDADDR) { 961 blkno = cancelip->i_din2->di_db[lbn]; 962 } else { 963 td->td_proc->p_flag |= P_COWINPROGRESS; 964 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 965 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 966 td->td_proc->p_flag &= ~P_COWINPROGRESS; 967 if (error) 968 return (error); 969 indiroff = (lbn - NDADDR) % NINDIR(fs); 970 blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; 971 bqrelse(bp); 972 } 973 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 974 fs->fs_bsize, KERNCRED, 0, &bp); 975 if (error) 976 return (error); 977 if (blkno == 0 && (error = readblock(bp, lbn))) 978 return (error); 979 /* 980 * Set a snapshot inode to be a zero length file, regular files 981 * to be completely unallocated. 982 */ 983 dip = (struct ufs2_dinode *)bp->b_data + 984 ino_to_fsbo(fs, cancelip->i_number); 985 if (expungetype == BLK_NOCOPY) 986 dip->di_mode = 0; 987 dip->di_size = 0; 988 dip->di_blocks = 0; 989 dip->di_flags &= ~SF_SNAPSHOT; 990 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); 991 bdwrite(bp); 992 return (0); 993 } 994 995 /* 996 * Descend an indirect block chain for vnode cancelvp accounting for all 997 * its indirect blocks in snapvp. 998 */ 999 static int 1000 indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 1001 blksperindir, fs, acctfunc, expungetype) 1002 struct vnode *snapvp; 1003 struct vnode *cancelvp; 1004 int level; 1005 ufs2_daddr_t blkno; 1006 ufs_lbn_t lbn; 1007 ufs_lbn_t rlbn; 1008 ufs_lbn_t remblks; 1009 ufs_lbn_t blksperindir; 1010 struct fs *fs; 1011 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 1012 struct fs *, ufs_lbn_t, int); 1013 int expungetype; 1014 { 1015 int error, num, i; 1016 ufs_lbn_t subblksperindir; 1017 struct indir indirs[NIADDR + 2]; 1018 ufs2_daddr_t last, *bap; 1019 struct buf *bp; 1020 1021 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1022 return (error); 1023 if (lbn != indirs[num - 1 - level].in_lbn || blkno == 0 || num < 2) 1024 panic("indiracct: botched params"); 1025 /* 1026 * We have to expand bread here since it will deadlock looking 1027 * up the block number for any blocks that are not in the cache. 1028 */ 1029 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0); 1030 bp->b_blkno = fsbtodb(fs, blkno); 1031 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1032 (error = readblock(bp, fragstoblks(fs, blkno)))) { 1033 brelse(bp); 1034 return (error); 1035 } 1036 /* 1037 * Account for the block pointers in this indirect block. 1038 */ 1039 last = howmany(remblks, blksperindir); 1040 if (last > NINDIR(fs)) 1041 last = NINDIR(fs); 1042 MALLOC(bap, ufs2_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); 1043 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1044 bqrelse(bp); 1045 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, rlbn, expungetype); 1046 if (error || level == 0) 1047 goto out; 1048 /* 1049 * Account for the block pointers in each of the indirect blocks 1050 * in the levels below us. 1051 */ 1052 subblksperindir = blksperindir / NINDIR(fs); 1053 for (lbn++, level--, i = 0; i < last; i++) { 1054 error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn, 1055 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1056 if (error) 1057 goto out; 1058 rlbn += blksperindir; 1059 lbn -= blksperindir; 1060 remblks -= blksperindir; 1061 } 1062 out: 1063 FREE(bap, M_DEVBUF); 1064 return (error); 1065 } 1066 1067 /* 1068 * Do both snap accounting and map accounting. 1069 */ 1070 static int 1071 fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1072 struct vnode *vp; 1073 ufs2_daddr_t *oldblkp, *lastblkp; 1074 struct fs *fs; 1075 ufs_lbn_t lblkno; 1076 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 1077 { 1078 int error; 1079 1080 if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1081 return (error); 1082 return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1083 } 1084 1085 /* 1086 * Identify a set of blocks allocated in a snapshot inode. 1087 */ 1088 static int 1089 snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1090 struct vnode *vp; 1091 ufs2_daddr_t *oldblkp, *lastblkp; 1092 struct fs *fs; 1093 ufs_lbn_t lblkno; 1094 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 1095 { 1096 struct inode *ip = VTOI(vp); 1097 ufs2_daddr_t blkno, *blkp; 1098 ufs_lbn_t lbn; 1099 struct buf *ibp; 1100 int error; 1101 1102 for ( ; oldblkp < lastblkp; oldblkp++) { 1103 blkno = *oldblkp; 1104 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 1105 continue; 1106 lbn = fragstoblks(fs, blkno); 1107 if (lbn < NDADDR) { 1108 blkp = &ip->i_din2->di_db[lbn]; 1109 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1110 } else { 1111 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1112 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1113 if (error) 1114 return (error); 1115 blkp = &((ufs2_daddr_t *)(ibp->b_data)) 1116 [(lbn - NDADDR) % NINDIR(fs)]; 1117 } 1118 /* 1119 * If we are expunging a snapshot vnode and we 1120 * find a block marked BLK_NOCOPY, then it is 1121 * one that has been allocated to this snapshot after 1122 * we took our current snapshot and can be ignored. 1123 */ 1124 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 1125 if (lbn >= NDADDR) 1126 brelse(ibp); 1127 } else { 1128 if (*blkp != 0) 1129 panic("snapacct: bad block"); 1130 *blkp = expungetype; 1131 if (lbn >= NDADDR) 1132 bdwrite(ibp); 1133 } 1134 } 1135 return (0); 1136 } 1137 1138 /* 1139 * Account for a set of blocks allocated in a snapshot inode. 1140 */ 1141 static int 1142 mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1143 struct vnode *vp; 1144 ufs2_daddr_t *oldblkp, *lastblkp; 1145 struct fs *fs; 1146 ufs_lbn_t lblkno; 1147 int expungetype; 1148 { 1149 ufs2_daddr_t blkno; 1150 ino_t inum; 1151 1152 inum = VTOI(vp)->i_number; 1153 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1154 blkno = *oldblkp; 1155 if (blkno == 0 || blkno == BLK_NOCOPY) 1156 continue; 1157 if (blkno == BLK_SNAP) 1158 blkno = blkstofrags(fs, lblkno); 1159 ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); 1160 } 1161 return (0); 1162 } 1163 1164 /* 1165 * Decrement extra reference on snapshot when last name is removed. 1166 * It will not be freed until the last open reference goes away. 1167 */ 1168 void 1169 ffs_snapgone(ip) 1170 struct inode *ip; 1171 { 1172 struct inode *xp; 1173 struct fs *fs; 1174 int snaploc; 1175 1176 /* 1177 * Find snapshot in incore list. 1178 */ 1179 TAILQ_FOREACH(xp, &ip->i_devvp->v_rdev->si_snapshots, i_nextsnap) 1180 if (xp == ip) 1181 break; 1182 if (xp == 0) 1183 printf("ffs_snapgone: lost snapshot vnode %d\n", 1184 ip->i_number); 1185 else 1186 vrele(ITOV(ip)); 1187 /* 1188 * Delete snapshot inode from superblock. Keep list dense. 1189 */ 1190 fs = ip->i_fs; 1191 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 1192 if (fs->fs_snapinum[snaploc] == ip->i_number) 1193 break; 1194 if (snaploc < FSMAXSNAP) { 1195 for (snaploc++; snaploc < FSMAXSNAP; snaploc++) { 1196 if (fs->fs_snapinum[snaploc] == 0) 1197 break; 1198 fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc]; 1199 } 1200 fs->fs_snapinum[snaploc - 1] = 0; 1201 } 1202 } 1203 1204 /* 1205 * Prepare a snapshot file for being removed. 1206 */ 1207 void 1208 ffs_snapremove(vp) 1209 struct vnode *vp; 1210 { 1211 struct inode *ip; 1212 struct vnode *devvp; 1213 struct buf *ibp; 1214 struct fs *fs; 1215 ufs2_daddr_t numblks, blkno, dblk; 1216 int error, loc, last; 1217 1218 ip = VTOI(vp); 1219 fs = ip->i_fs; 1220 /* 1221 * If active, delete from incore list (this snapshot may 1222 * already have been in the process of being deleted, so 1223 * would not have been active). 1224 * 1225 * Clear copy-on-write flag if last snapshot. 1226 */ 1227 if (ip->i_nextsnap.tqe_prev != 0) { 1228 devvp = ip->i_devvp; 1229 TAILQ_REMOVE(&devvp->v_rdev->si_snapshots, ip, i_nextsnap); 1230 ip->i_nextsnap.tqe_prev = 0; 1231 ASSERT_VOP_LOCKED(devvp, "ffs_snapremove devvp"); 1232 if (TAILQ_FIRST(&devvp->v_rdev->si_snapshots) == 0) { 1233 devvp->v_rdev->si_copyonwrite = 0; 1234 devvp->v_vflag &= ~VV_COPYONWRITE; 1235 } 1236 } 1237 /* 1238 * Clear all BLK_NOCOPY fields. Pass any block claims to other 1239 * snapshots that want them (see ffs_snapblkfree below). 1240 */ 1241 for (blkno = 1; blkno < NDADDR; blkno++) { 1242 dblk = DIP(ip, i_db[blkno]); 1243 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1244 DIP(ip, i_db[blkno]) = 0; 1245 else if ((dblk == blkstofrags(fs, blkno) && 1246 ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, 1247 ip->i_number))) { 1248 DIP(ip, i_blocks) -= btodb(fs->fs_bsize); 1249 DIP(ip, i_db[blkno]) = 0; 1250 } 1251 } 1252 numblks = howmany(ip->i_size, fs->fs_bsize); 1253 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 1254 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 1255 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1256 if (error) 1257 continue; 1258 if (fs->fs_size - blkno > NINDIR(fs)) 1259 last = NINDIR(fs); 1260 else 1261 last = fs->fs_size - blkno; 1262 for (loc = 0; loc < last; loc++) { 1263 if (ip->i_ump->um_fstype == UFS1) { 1264 dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc]; 1265 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1266 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1267 else if ((dblk == blkstofrags(fs, blkno) && 1268 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1269 fs->fs_bsize, ip->i_number))) { 1270 ip->i_din1->di_blocks -= 1271 btodb(fs->fs_bsize); 1272 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1273 } 1274 continue; 1275 } 1276 dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc]; 1277 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1278 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1279 else if ((dblk == blkstofrags(fs, blkno) && 1280 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1281 fs->fs_bsize, ip->i_number))) { 1282 ip->i_din2->di_blocks -= btodb(fs->fs_bsize); 1283 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1284 } 1285 } 1286 bawrite(ibp); 1287 } 1288 /* 1289 * Clear snapshot flag and drop reference. 1290 */ 1291 ip->i_flags &= ~SF_SNAPSHOT; 1292 DIP(ip, i_flags) = ip->i_flags; 1293 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1294 } 1295 1296 /* 1297 * Notification that a block is being freed. Return zero if the free 1298 * should be allowed to proceed. Return non-zero if the snapshot file 1299 * wants to claim the block. The block will be claimed if it is an 1300 * uncopied part of one of the snapshots. It will be freed if it is 1301 * either a BLK_NOCOPY or has already been copied in all of the snapshots. 1302 * If a fragment is being freed, then all snapshots that care about 1303 * it must make a copy since a snapshot file can only claim full sized 1304 * blocks. Note that if more than one snapshot file maps the block, 1305 * we can pick one at random to claim it. Since none of the snapshots 1306 * can change, we are assurred that they will all see the same unmodified 1307 * image. When deleting a snapshot file (see ffs_snapremove above), we 1308 * must push any of these claimed blocks to one of the other snapshots 1309 * that maps it. These claimed blocks are easily identified as they will 1310 * have a block number equal to their logical block number within the 1311 * snapshot. A copied block can never have this property because they 1312 * must always have been allocated from a BLK_NOCOPY location. 1313 */ 1314 int 1315 ffs_snapblkfree(fs, devvp, bno, size, inum) 1316 struct fs *fs; 1317 struct vnode *devvp; 1318 ufs2_daddr_t bno; 1319 long size; 1320 ino_t inum; 1321 { 1322 struct buf *ibp, *cbp, *savedcbp = 0; 1323 struct thread *td = curthread; 1324 struct inode *ip; 1325 struct vnode *vp; 1326 ufs_lbn_t lbn; 1327 ufs2_daddr_t blkno; 1328 int indiroff = 0, error = 0, claimedblk = 0; 1329 struct snaphead *snaphead; 1330 1331 lbn = fragstoblks(fs, bno); 1332 snaphead = &devvp->v_rdev->si_snapshots; 1333 TAILQ_FOREACH(ip, snaphead, i_nextsnap) { 1334 vp = ITOV(ip); 1335 /* 1336 * Lookup block being written. 1337 */ 1338 if (lbn < NDADDR) { 1339 blkno = DIP(ip, i_db[lbn]); 1340 } else { 1341 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1342 td->td_proc->p_flag |= P_COWINPROGRESS; 1343 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1344 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1345 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1346 VOP_UNLOCK(vp, 0, td); 1347 if (error) 1348 break; 1349 indiroff = (lbn - NDADDR) % NINDIR(fs); 1350 if (ip->i_ump->um_fstype == UFS1) 1351 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 1352 else 1353 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 1354 } 1355 /* 1356 * Check to see if block needs to be copied. 1357 */ 1358 if (blkno == 0) { 1359 /* 1360 * A block that we map is being freed. If it has not 1361 * been claimed yet, we will claim or copy it (below). 1362 */ 1363 claimedblk = 1; 1364 } else if (blkno == BLK_SNAP) { 1365 /* 1366 * No previous snapshot claimed the block, 1367 * so it will be * freed and become a BLK_NOCOPY 1368 * (don't care) for us. 1369 */ 1370 if (claimedblk) 1371 panic("snapblkfree: inconsistent block type"); 1372 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1373 if (lbn < NDADDR) { 1374 DIP(ip, i_db[lbn]) = BLK_NOCOPY; 1375 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1376 } else if (ip->i_ump->um_fstype == UFS1) { 1377 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 1378 BLK_NOCOPY; 1379 bdwrite(ibp); 1380 } else { 1381 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 1382 BLK_NOCOPY; 1383 bdwrite(ibp); 1384 } 1385 VOP_UNLOCK(vp, 0, td); 1386 continue; 1387 } else /* BLK_NOCOPY or default */ { 1388 /* 1389 * If the snapshot has already copied the block 1390 * (default), or does not care about the block, 1391 * it is not needed. 1392 */ 1393 if (lbn >= NDADDR) 1394 bqrelse(ibp); 1395 continue; 1396 } 1397 /* 1398 * If this is a full size block, we will just grab it 1399 * and assign it to the snapshot inode. Otherwise we 1400 * will proceed to copy it. See explanation for this 1401 * routine as to why only a single snapshot needs to 1402 * claim this block. 1403 */ 1404 if (size == fs->fs_bsize) { 1405 #ifdef DEBUG 1406 if (snapdebug) 1407 printf("%s %d lbn %jd from inum %d\n", 1408 "Grabonremove: snapino", ip->i_number, 1409 (intmax_t)lbn, inum); 1410 #endif 1411 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1412 if (lbn < NDADDR) { 1413 DIP(ip, i_db[lbn]) = bno; 1414 } else if (ip->i_ump->um_fstype == UFS1) { 1415 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno; 1416 bdwrite(ibp); 1417 } else { 1418 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno; 1419 bdwrite(ibp); 1420 } 1421 DIP(ip, i_blocks) += btodb(size); 1422 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1423 VOP_UNLOCK(vp, 0, td); 1424 return (1); 1425 } 1426 if (lbn >= NDADDR) 1427 bqrelse(ibp); 1428 /* 1429 * Allocate the block into which to do the copy. Note that this 1430 * allocation will never require any additional allocations for 1431 * the snapshot inode. 1432 */ 1433 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1434 td->td_proc->p_flag |= P_COWINPROGRESS; 1435 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1436 fs->fs_bsize, KERNCRED, 0, &cbp); 1437 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1438 if (error) { 1439 VOP_UNLOCK(vp, 0, td); 1440 break; 1441 } 1442 #ifdef DEBUG 1443 if (snapdebug) 1444 printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n", 1445 "Copyonremove: snapino ", ip->i_number, 1446 (intmax_t)lbn, "for inum", inum, size, 1447 (intmax_t)cbp->b_blkno); 1448 #endif 1449 /* 1450 * If we have already read the old block contents, then 1451 * simply copy them to the new block. Note that we need 1452 * to synchronously write snapshots that have not been 1453 * unlinked, and hence will be visible after a crash, 1454 * to ensure their integrity. 1455 */ 1456 if (savedcbp != 0) { 1457 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 1458 bawrite(cbp); 1459 if (dopersistence && ip->i_effnlink > 0) 1460 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1461 VOP_UNLOCK(vp, 0, td); 1462 continue; 1463 } 1464 /* 1465 * Otherwise, read the old block contents into the buffer. 1466 */ 1467 if ((error = readblock(cbp, lbn)) != 0) { 1468 bzero(cbp->b_data, fs->fs_bsize); 1469 bawrite(cbp); 1470 if (dopersistence && ip->i_effnlink > 0) 1471 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1472 VOP_UNLOCK(vp, 0, td); 1473 break; 1474 } 1475 VOP_UNLOCK(vp, 0, td); 1476 savedcbp = cbp; 1477 } 1478 /* 1479 * Note that we need to synchronously write snapshots that 1480 * have not been unlinked, and hence will be visible after 1481 * a crash, to ensure their integrity. 1482 */ 1483 if (savedcbp) { 1484 vp = savedcbp->b_vp; 1485 bawrite(savedcbp); 1486 if (dopersistence && VTOI(vp)->i_effnlink > 0) { 1487 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1488 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1489 VOP_UNLOCK(vp, 0, td); 1490 } 1491 } 1492 /* 1493 * If we have been unable to allocate a block in which to do 1494 * the copy, then return non-zero so that the fragment will 1495 * not be freed. Although space will be lost, the snapshot 1496 * will stay consistent. 1497 */ 1498 return (error); 1499 } 1500 1501 /* 1502 * Associate snapshot files when mounting. 1503 */ 1504 void 1505 ffs_snapshot_mount(mp) 1506 struct mount *mp; 1507 { 1508 struct ufsmount *ump = VFSTOUFS(mp); 1509 struct fs *fs = ump->um_fs; 1510 struct thread *td = curthread; 1511 struct snaphead *snaphead; 1512 struct vnode *vp; 1513 struct inode *ip; 1514 int error, snaploc, loc; 1515 1516 snaphead = &ump->um_devvp->v_rdev->si_snapshots; 1517 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { 1518 if (fs->fs_snapinum[snaploc] == 0) 1519 return; 1520 if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc], 1521 LK_EXCLUSIVE, &vp)) != 0){ 1522 printf("ffs_snapshot_mount: vget failed %d\n", error); 1523 continue; 1524 } 1525 ip = VTOI(vp); 1526 if ((ip->i_flags & SF_SNAPSHOT) == 0) { 1527 printf("ffs_snapshot_mount: non-snapshot inode %d\n", 1528 fs->fs_snapinum[snaploc]); 1529 vput(vp); 1530 for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { 1531 if (fs->fs_snapinum[loc] == 0) 1532 break; 1533 fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc]; 1534 } 1535 fs->fs_snapinum[loc - 1] = 0; 1536 snaploc--; 1537 continue; 1538 } 1539 if (ip->i_nextsnap.tqe_prev != 0) 1540 panic("ffs_snapshot_mount: %d already on list", 1541 ip->i_number); 1542 else 1543 TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); 1544 vp->v_vflag |= VV_SYSTEM; 1545 ump->um_devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; 1546 ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_snapshot_mount"); 1547 ump->um_devvp->v_vflag |= VV_COPYONWRITE; 1548 VOP_UNLOCK(vp, 0, td); 1549 } 1550 } 1551 1552 /* 1553 * Disassociate snapshot files when unmounting. 1554 */ 1555 void 1556 ffs_snapshot_unmount(mp) 1557 struct mount *mp; 1558 { 1559 struct ufsmount *ump = VFSTOUFS(mp); 1560 struct snaphead *snaphead = &ump->um_devvp->v_rdev->si_snapshots; 1561 struct inode *xp; 1562 1563 while ((xp = TAILQ_FIRST(snaphead)) != 0) { 1564 TAILQ_REMOVE(snaphead, xp, i_nextsnap); 1565 xp->i_nextsnap.tqe_prev = 0; 1566 if (xp->i_effnlink > 0) 1567 vrele(ITOV(xp)); 1568 } 1569 ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_snapshot_unmount"); 1570 ump->um_devvp->v_rdev->si_copyonwrite = 0; 1571 ump->um_devvp->v_vflag &= ~VV_COPYONWRITE; 1572 } 1573 1574 /* 1575 * Check for need to copy block that is about to be written, 1576 * copying the block if necessary. 1577 */ 1578 static int 1579 ffs_copyonwrite(devvp, bp) 1580 struct vnode *devvp; 1581 struct buf *bp; 1582 { 1583 struct buf *ibp, *cbp, *savedcbp = 0; 1584 struct thread *td = curthread; 1585 struct fs *fs; 1586 struct inode *ip; 1587 struct vnode *vp; 1588 ufs2_daddr_t lbn, blkno; 1589 int indiroff, error = 0; 1590 1591 fs = TAILQ_FIRST(&devvp->v_rdev->si_snapshots)->i_fs; 1592 lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 1593 if (td->td_proc->p_flag & P_COWINPROGRESS) 1594 panic("ffs_copyonwrite: recursive call"); 1595 TAILQ_FOREACH(ip, &devvp->v_rdev->si_snapshots, i_nextsnap) { 1596 vp = ITOV(ip); 1597 /* 1598 * We ensure that everything of our own that needs to be 1599 * copied will be done at the time that ffs_snapshot is 1600 * called. Thus we can skip the check here which can 1601 * deadlock in doing the lookup in UFS_BALLOC. 1602 */ 1603 if (bp->b_vp == vp) 1604 continue; 1605 /* 1606 * Check to see if block needs to be copied. We have to 1607 * be able to do the UFS_BALLOC without blocking, otherwise 1608 * we may get in a deadlock with another process also 1609 * trying to allocate. If we find outselves unable to 1610 * get the buffer lock, we unlock the snapshot vnode, 1611 * sleep briefly, and try again. 1612 */ 1613 retry: 1614 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1615 if (lbn < NDADDR) { 1616 blkno = DIP(ip, i_db[lbn]); 1617 } else { 1618 td->td_proc->p_flag |= P_COWINPROGRESS; 1619 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1620 fs->fs_bsize, KERNCRED, BA_METAONLY | BA_NOWAIT, &ibp); 1621 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1622 if (error) { 1623 VOP_UNLOCK(vp, 0, td); 1624 if (error != EWOULDBLOCK) 1625 break; 1626 tsleep(vp, td->td_ksegrp->kg_user_pri, "nap", 1); 1627 goto retry; 1628 } 1629 indiroff = (lbn - NDADDR) % NINDIR(fs); 1630 if (ip->i_ump->um_fstype == UFS1) 1631 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 1632 else 1633 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 1634 bqrelse(ibp); 1635 } 1636 #ifdef DIAGNOSTIC 1637 if (blkno == BLK_SNAP && bp->b_lblkno >= 0) 1638 panic("ffs_copyonwrite: bad copy block"); 1639 #endif 1640 if (blkno != 0) { 1641 VOP_UNLOCK(vp, 0, td); 1642 continue; 1643 } 1644 /* 1645 * Allocate the block into which to do the copy. Note that this 1646 * allocation will never require any additional allocations for 1647 * the snapshot inode. 1648 */ 1649 td->td_proc->p_flag |= P_COWINPROGRESS; 1650 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1651 fs->fs_bsize, KERNCRED, BA_NOWAIT, &cbp); 1652 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1653 if (error) { 1654 VOP_UNLOCK(vp, 0, td); 1655 if (error != EWOULDBLOCK) 1656 break; 1657 tsleep(vp, td->td_ksegrp->kg_user_pri, "nap", 1); 1658 goto retry; 1659 } 1660 #ifdef DEBUG 1661 if (snapdebug) { 1662 printf("Copyonwrite: snapino %d lbn %jd for ", 1663 ip->i_number, (intmax_t)lbn); 1664 if (bp->b_vp == devvp) 1665 printf("fs metadata"); 1666 else 1667 printf("inum %d", VTOI(bp->b_vp)->i_number); 1668 printf(" lblkno %jd to blkno %jd\n", 1669 (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno); 1670 } 1671 #endif 1672 /* 1673 * If we have already read the old block contents, then 1674 * simply copy them to the new block. Note that we need 1675 * to synchronously write snapshots that have not been 1676 * unlinked, and hence will be visible after a crash, 1677 * to ensure their integrity. 1678 */ 1679 if (savedcbp != 0) { 1680 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 1681 bawrite(cbp); 1682 if (dopersistence && ip->i_effnlink > 0) 1683 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1684 VOP_UNLOCK(vp, 0, td); 1685 continue; 1686 } 1687 /* 1688 * Otherwise, read the old block contents into the buffer. 1689 */ 1690 if ((error = readblock(cbp, lbn)) != 0) { 1691 bzero(cbp->b_data, fs->fs_bsize); 1692 bawrite(cbp); 1693 if (dopersistence && ip->i_effnlink > 0) 1694 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1695 VOP_UNLOCK(vp, 0, td); 1696 break; 1697 } 1698 savedcbp = cbp; 1699 VOP_UNLOCK(vp, 0, td); 1700 } 1701 /* 1702 * Note that we need to synchronously write snapshots that 1703 * have not been unlinked, and hence will be visible after 1704 * a crash, to ensure their integrity. 1705 */ 1706 if (savedcbp) { 1707 vp = savedcbp->b_vp; 1708 bawrite(savedcbp); 1709 if (dopersistence && VTOI(vp)->i_effnlink > 0) { 1710 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1711 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1712 VOP_UNLOCK(vp, 0, td); 1713 } 1714 } 1715 return (error); 1716 } 1717 1718 /* 1719 * Read the specified block into the given buffer. 1720 * Much of this boiler-plate comes from bwrite(). 1721 */ 1722 static int 1723 readblock(bp, lbn) 1724 struct buf *bp; 1725 ufs2_daddr_t lbn; 1726 { 1727 struct uio auio; 1728 struct iovec aiov; 1729 struct thread *td = curthread; 1730 struct inode *ip = VTOI(bp->b_vp); 1731 1732 aiov.iov_base = bp->b_data; 1733 aiov.iov_len = bp->b_bcount; 1734 auio.uio_iov = &aiov; 1735 auio.uio_iovcnt = 1; 1736 auio.uio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn))); 1737 auio.uio_resid = bp->b_bcount; 1738 auio.uio_rw = UIO_READ; 1739 auio.uio_segflg = UIO_SYSSPACE; 1740 auio.uio_td = td; 1741 return (physio(ip->i_devvp->v_rdev, &auio, 0)); 1742 } 1743