1 /* 2 * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. 3 * 4 * Further information about snapshots can be obtained from: 5 * 6 * Marshall Kirk McKusick http://www.mckusick.com/softdep/ 7 * 1614 Oxford Street mckusick@mckusick.com 8 * Berkeley, CA 94709-1608 +1-510-843-9542 9 * USA 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY 22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR 25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 34 * $FreeBSD$ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/stdint.h> 39 #include <sys/systm.h> 40 #include <sys/conf.h> 41 #include <sys/disklabel.h> 42 #include <sys/bio.h> 43 #include <sys/buf.h> 44 #include <sys/proc.h> 45 #include <sys/namei.h> 46 #include <sys/stat.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/resource.h> 50 #include <sys/resourcevar.h> 51 #include <sys/disklabel.h> 52 #include <sys/vnode.h> 53 54 #include <ufs/ufs/extattr.h> 55 #include <ufs/ufs/quota.h> 56 #include <ufs/ufs/ufsmount.h> 57 #include <ufs/ufs/inode.h> 58 #include <ufs/ufs/ufs_extern.h> 59 60 #include <ufs/ffs/fs.h> 61 #include <ufs/ffs/ffs_extern.h> 62 63 #define KERNCRED thread0.td_ucred 64 #define DEBUG 1 65 66 static int cgaccount(int, struct vnode *, struct buf *, int); 67 static int expunge_ufs1(struct vnode *, struct inode *, struct fs *, 68 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 69 ufs_lbn_t, int), int); 70 static int indiracct_ufs1(struct vnode *, struct vnode *, int, 71 ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 72 int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 73 ufs_lbn_t, int), int); 74 static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 75 struct fs *, ufs_lbn_t, int); 76 static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 77 struct fs *, ufs_lbn_t, int); 78 static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 79 struct fs *, ufs_lbn_t, int); 80 static int expunge_ufs2(struct vnode *, struct inode *, struct fs *, 81 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 82 ufs_lbn_t, int), int); 83 static int indiracct_ufs2(struct vnode *, struct vnode *, int, 84 ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 85 int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 86 ufs_lbn_t, int), int); 87 static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 88 struct fs *, ufs_lbn_t, int); 89 static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 90 struct fs *, ufs_lbn_t, int); 91 static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 92 struct fs *, ufs_lbn_t, int); 93 static int ffs_copyonwrite(struct vnode *, struct buf *); 94 static int readblock(struct buf *, ufs2_daddr_t); 95 96 /* 97 * To ensure the consistency of snapshots across crashes, we must 98 * synchronously write out copied blocks before allowing the 99 * originals to be modified. Because of the rather severe speed 100 * penalty that this imposes, the following flag allows this 101 * crash persistence to be disabled. 102 */ 103 int dopersistence = 0; 104 105 #ifdef DEBUG 106 #include <sys/sysctl.h> 107 SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, ""); 108 int snapdebug = 0; 109 SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, ""); 110 int collectsnapstats = 0; 111 SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats, 112 0, ""); 113 #endif /* DEBUG */ 114 115 /* 116 * Create a snapshot file and initialize it for the filesystem. 117 */ 118 int 119 ffs_snapshot(mp, snapfile) 120 struct mount *mp; 121 char *snapfile; 122 { 123 ufs2_daddr_t numblks, blkno; 124 int error, cg, snaploc; 125 int i, size, len, loc; 126 int flag = mp->mnt_flag; 127 struct timespec starttime = {0, 0}, endtime; 128 char saved_nice = 0; 129 long redo = 0; 130 int32_t *lp; 131 void *space; 132 struct fs *copy_fs = NULL, *fs = VFSTOUFS(mp)->um_fs; 133 struct snaphead *snaphead; 134 struct thread *td = curthread; 135 struct inode *ip, *xp; 136 struct buf *bp, *nbp, *ibp, *sbp = NULL; 137 struct nameidata nd; 138 struct mount *wrtmp; 139 struct vattr vat; 140 struct vnode *vp, *xvp, *nvp; 141 142 /* 143 * Need to serialize access to snapshot code per filesystem. 144 */ 145 /* 146 * Assign a snapshot slot in the superblock. 147 */ 148 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 149 if (fs->fs_snapinum[snaploc] == 0) 150 break; 151 if (snaploc == FSMAXSNAP) 152 return (ENOSPC); 153 /* 154 * Create the snapshot file. 155 */ 156 restart: 157 NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, snapfile, td); 158 if ((error = namei(&nd)) != 0) 159 return (error); 160 if (nd.ni_vp != NULL) { 161 vput(nd.ni_vp); 162 error = EEXIST; 163 } 164 if (nd.ni_dvp->v_mount != mp) 165 error = EXDEV; 166 if (error) { 167 NDFREE(&nd, NDF_ONLY_PNBUF); 168 if (nd.ni_dvp == nd.ni_vp) 169 vrele(nd.ni_dvp); 170 else 171 vput(nd.ni_dvp); 172 return (error); 173 } 174 VATTR_NULL(&vat); 175 vat.va_type = VREG; 176 vat.va_mode = S_IRUSR; 177 vat.va_vaflags |= VA_EXCLUSIVE; 178 if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) 179 wrtmp = NULL; 180 if (wrtmp != mp) 181 panic("ffs_snapshot: mount mismatch"); 182 if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) { 183 NDFREE(&nd, NDF_ONLY_PNBUF); 184 vput(nd.ni_dvp); 185 if ((error = vn_start_write(NULL, &wrtmp, 186 V_XSLEEP | PCATCH)) != 0) 187 return (error); 188 goto restart; 189 } 190 VOP_LEASE(nd.ni_dvp, td, KERNCRED, LEASE_WRITE); 191 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat); 192 vput(nd.ni_dvp); 193 if (error) { 194 NDFREE(&nd, NDF_ONLY_PNBUF); 195 vn_finished_write(wrtmp); 196 return (error); 197 } 198 vp = nd.ni_vp; 199 ip = VTOI(vp); 200 /* 201 * Allocate and copy the last block contents so as to be able 202 * to set size to that of the filesystem. 203 */ 204 numblks = howmany(fs->fs_size, fs->fs_frag); 205 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 206 fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 207 if (error) 208 goto out; 209 ip->i_size = lblktosize(fs, (off_t)numblks); 210 DIP(ip, i_size) = ip->i_size; 211 ip->i_flag |= IN_CHANGE | IN_UPDATE; 212 if ((error = readblock(bp, numblks - 1)) != 0) 213 goto out; 214 bawrite(bp); 215 /* 216 * Preallocate critical data structures so that we can copy 217 * them in without further allocation after we suspend all 218 * operations on the filesystem. We would like to just release 219 * the allocated buffers without writing them since they will 220 * be filled in below once we are ready to go, but this upsets 221 * the soft update code, so we go ahead and write the new buffers. 222 * 223 * Allocate all indirect blocks and mark all of them as not 224 * needing to be copied. 225 */ 226 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 227 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 228 fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp); 229 if (error) 230 goto out; 231 bdwrite(ibp); 232 } 233 /* 234 * Allocate copies for the superblock and its summary information. 235 */ 236 error = UFS_BALLOC(vp, lfragtosize(fs, fs->fs_sblockloc), 237 fs->fs_sbsize, KERNCRED, 0, &nbp); 238 if (error) 239 goto out; 240 bawrite(nbp); 241 blkno = fragstoblks(fs, fs->fs_csaddr); 242 len = howmany(fs->fs_cssize, fs->fs_bsize); 243 for (loc = 0; loc < len; loc++) { 244 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), 245 fs->fs_bsize, KERNCRED, 0, &nbp); 246 if (error) 247 goto out; 248 bawrite(nbp); 249 } 250 /* 251 * Allocate all cylinder group blocks. 252 */ 253 for (cg = 0; cg < fs->fs_ncg; cg++) { 254 error = UFS_BALLOC(vp, (off_t)(cgtod(fs, cg)) << fs->fs_fshift, 255 fs->fs_bsize, KERNCRED, 0, &nbp); 256 if (error) 257 goto out; 258 bdwrite(nbp); 259 } 260 /* 261 * Copy all the cylinder group maps. Although the 262 * filesystem is still active, we hope that only a few 263 * cylinder groups will change between now and when we 264 * suspend operations. Thus, we will be able to quickly 265 * touch up the few cylinder groups that changed during 266 * the suspension period. 267 */ 268 len = howmany(fs->fs_ncg, NBBY); 269 MALLOC(fs->fs_active, int *, len, M_DEVBUF, M_WAITOK); 270 bzero(fs->fs_active, len); 271 for (cg = 0; cg < fs->fs_ncg; cg++) { 272 error = bread(vp, fragstoblks(fs, cgtod(fs, cg)), fs->fs_bsize, 273 KERNCRED, &nbp); 274 if (error) { 275 brelse(nbp); 276 goto out; 277 } 278 error = cgaccount(cg, vp, nbp, 1); 279 bawrite(nbp); 280 if (error) 281 goto out; 282 } 283 /* 284 * Change inode to snapshot type file. 285 */ 286 ip->i_flags |= SF_SNAPSHOT; 287 DIP(ip, i_flags) = ip->i_flags; 288 ip->i_flag |= IN_CHANGE | IN_UPDATE; 289 /* 290 * Ensure that the snapshot is completely on disk. 291 */ 292 if ((error = VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td)) != 0) 293 goto out; 294 /* 295 * All allocations are done, so we can now snapshot the system. 296 * 297 * Recind nice scheduling while running with the filesystem suspended. 298 */ 299 if (td->td_ksegrp->kg_nice > 0) { 300 saved_nice = td->td_ksegrp->kg_nice; 301 td->td_ksegrp->kg_nice = 0; 302 } 303 /* 304 * Suspend operation on filesystem. 305 */ 306 for (;;) { 307 vn_finished_write(wrtmp); 308 vfs_write_suspend(vp->v_mount); 309 if (mp->mnt_kern_flag & MNTK_SUSPENDED) 310 break; 311 vn_start_write(NULL, &wrtmp, V_WAIT); 312 } 313 if (collectsnapstats) 314 nanotime(&starttime); 315 /* 316 * First, copy all the cylinder group maps that have changed. 317 */ 318 for (cg = 0; cg < fs->fs_ncg; cg++) { 319 if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0) 320 continue; 321 redo++; 322 error = bread(vp, fragstoblks(fs, cgtod(fs, cg)), fs->fs_bsize, 323 KERNCRED, &nbp); 324 if (error) { 325 brelse(nbp); 326 goto out1; 327 } 328 error = cgaccount(cg, vp, nbp, 2); 329 bawrite(nbp); 330 if (error) 331 goto out1; 332 } 333 /* 334 * Grab a copy of the superblock and its summary information. 335 * We delay writing it until the suspension is released below. 336 */ 337 error = bread(vp, fragstoblks(fs, fs->fs_sblockloc), fs->fs_bsize, 338 KERNCRED, &sbp); 339 if (error) { 340 brelse(sbp); 341 sbp = NULL; 342 goto out1; 343 } 344 loc = blkoff(fs, lfragtosize(fs, fs->fs_sblockloc)); 345 copy_fs = (struct fs *)(sbp->b_data + loc); 346 bcopy(fs, copy_fs, fs->fs_sbsize); 347 if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) 348 copy_fs->fs_clean = 1; 349 if (fs->fs_sbsize < SBLOCKSIZE) 350 bzero(&sbp->b_data[loc + fs->fs_sbsize], 351 SBLOCKSIZE - fs->fs_sbsize); 352 size = blkroundup(fs, fs->fs_cssize); 353 if (fs->fs_contigsumsize > 0) 354 size += fs->fs_ncg * sizeof(int32_t); 355 space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 356 copy_fs->fs_csp = space; 357 bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize); 358 (char *)space += fs->fs_cssize; 359 loc = howmany(fs->fs_cssize, fs->fs_fsize); 360 i = fs->fs_frag - loc % fs->fs_frag; 361 len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize; 362 if (len > 0) { 363 if ((error = bread(ip->i_devvp, 364 fsbtodb(fs, fs->fs_csaddr + loc), 365 len, KERNCRED, &bp)) != 0) { 366 brelse(bp); 367 free(copy_fs->fs_csp, M_UFSMNT); 368 bawrite(sbp); 369 sbp = NULL; 370 goto out1; 371 } 372 bcopy(bp->b_data, space, (u_int)len); 373 (char *)space += len; 374 bp->b_flags |= B_INVAL | B_NOCACHE; 375 brelse(bp); 376 } 377 if (fs->fs_contigsumsize > 0) { 378 copy_fs->fs_maxcluster = lp = space; 379 for (i = 0; i < fs->fs_ncg; i++) 380 *lp++ = fs->fs_contigsumsize; 381 } 382 /* 383 * We must check for active files that have been unlinked 384 * (e.g., with a zero link count). We have to expunge all 385 * trace of these files from the snapshot so that they are 386 * not reclaimed prematurely by fsck or unnecessarily dumped. 387 * We turn off the MNTK_SUSPENDED flag to avoid a panic from 388 * spec_strategy about writing on a suspended filesystem. 389 */ 390 mp->mnt_kern_flag &= ~MNTK_SUSPENDED; 391 mtx_lock(&mntvnode_mtx); 392 loop: 393 for (xvp = TAILQ_FIRST(&mp->mnt_nvnodelist); xvp; xvp = nvp) { 394 /* 395 * Make sure this vnode wasn't reclaimed in getnewvnode(). 396 * Start over if it has (it won't be on the list anymore). 397 */ 398 if (xvp->v_mount != mp) 399 goto loop; 400 nvp = TAILQ_NEXT(xvp, v_nmntvnodes); 401 mtx_unlock(&mntvnode_mtx); 402 mtx_lock(&xvp->v_interlock); 403 if (xvp->v_usecount == 0 || xvp->v_type == VNON || 404 (VOP_GETATTR(xvp, &vat, td->td_proc->p_ucred, td) == 0 && 405 vat.va_nlink > 0)) { 406 mtx_unlock(&xvp->v_interlock); 407 mtx_lock(&mntvnode_mtx); 408 continue; 409 } 410 if (snapdebug) 411 vprint("ffs_snapshot: busy vnode", xvp); 412 if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK, td) != 0) 413 goto loop; 414 xp = VTOI(xvp); 415 /* 416 * If there is a fragment, clear it here. 417 */ 418 blkno = 0; 419 loc = howmany(xp->i_size, fs->fs_bsize) - 1; 420 if (loc < NDADDR) { 421 len = fragroundup(fs, blkoff(fs, xp->i_size)); 422 if (len < fs->fs_bsize) { 423 ffs_blkfree(copy_fs, vp, DIP(xp, i_db[loc]), 424 len, xp->i_number); 425 blkno = DIP(xp, i_db[loc]); 426 DIP(xp, i_db[loc]) = 0; 427 } 428 } 429 if (xp->i_ump->um_fstype == UFS1) 430 error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 431 BLK_NOCOPY); 432 else 433 error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 434 BLK_NOCOPY); 435 if (blkno) 436 DIP(xp, i_db[loc]) = blkno; 437 if (!error) 438 error = ffs_freefile(copy_fs, vp, xp->i_number, 439 xp->i_mode); 440 VOP_UNLOCK(xvp, 0, td); 441 if (error) { 442 free(copy_fs->fs_csp, M_UFSMNT); 443 bawrite(sbp); 444 sbp = NULL; 445 goto out1; 446 } 447 mtx_lock(&mntvnode_mtx); 448 } 449 mtx_unlock(&mntvnode_mtx); 450 /* 451 * Record snapshot inode. Since this is the newest snapshot, 452 * it must be placed at the end of the list. 453 */ 454 fs->fs_snapinum[snaploc] = ip->i_number; 455 if (ip->i_nextsnap.tqe_prev != 0) 456 panic("ffs_snapshot: %d already on list", ip->i_number); 457 snaphead = &ip->i_devvp->v_rdev->si_snapshots; 458 TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); 459 ip->i_devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; 460 ip->i_devvp->v_flag |= VCOPYONWRITE; 461 vp->v_flag |= VSYSTEM; 462 out1: 463 /* 464 * Resume operation on filesystem. 465 */ 466 vfs_write_resume(vp->v_mount); 467 if (saved_nice > 0) 468 td->td_ksegrp->kg_nice = saved_nice; 469 vn_start_write(NULL, &wrtmp, V_WAIT); 470 if (collectsnapstats && starttime.tv_sec > 0) { 471 nanotime(&endtime); 472 timespecsub(&endtime, &starttime); 473 printf("%s: suspended %d.%03ld sec, redo %ld of %d\n", 474 vp->v_mount->mnt_stat.f_mntonname, endtime.tv_sec, 475 endtime.tv_nsec / 1000000, redo, fs->fs_ncg); 476 } 477 if (sbp == NULL) 478 goto out; 479 /* 480 * Copy allocation information from all the snapshots in 481 * this snapshot and then expunge them from its view. 482 */ 483 snaphead = &ip->i_devvp->v_rdev->si_snapshots; 484 TAILQ_FOREACH(xp, snaphead, i_nextsnap) { 485 if (xp == ip) 486 break; 487 if (xp->i_ump->um_fstype == UFS1) 488 error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, 489 BLK_SNAP); 490 else 491 error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, 492 BLK_SNAP); 493 if (error) { 494 fs->fs_snapinum[snaploc] = 0; 495 goto done; 496 } 497 } 498 /* 499 * Expunge the blocks used by the snapshots from the set of 500 * blocks marked as used in the snapshot bitmaps. 501 */ 502 if (ip->i_ump->um_fstype == UFS1) 503 error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); 504 else 505 error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); 506 if (error) { 507 fs->fs_snapinum[snaploc] = 0; 508 goto done; 509 } 510 /* 511 * Write the superblock and its summary information 512 * to the snapshot. 513 */ 514 blkno = fragstoblks(fs, fs->fs_csaddr); 515 len = howmany(fs->fs_cssize, fs->fs_bsize); 516 space = copy_fs->fs_csp; 517 for (loc = 0; loc < len; loc++) { 518 error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp); 519 if (error) { 520 brelse(nbp); 521 fs->fs_snapinum[snaploc] = 0; 522 goto done; 523 } 524 bcopy(space, nbp->b_data, fs->fs_bsize); 525 space = (char *)space + fs->fs_bsize; 526 bawrite(nbp); 527 } 528 done: 529 free(copy_fs->fs_csp, M_UFSMNT); 530 bawrite(sbp); 531 out: 532 if (fs->fs_active != 0) { 533 FREE(fs->fs_active, M_DEVBUF); 534 fs->fs_active = 0; 535 } 536 mp->mnt_flag = flag; 537 if (error) 538 (void) UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td); 539 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 540 if (error) 541 vput(vp); 542 else 543 VOP_UNLOCK(vp, 0, td); 544 vn_finished_write(wrtmp); 545 return (error); 546 } 547 548 /* 549 * Copy a cylinder group map. All the unallocated blocks are marked 550 * BLK_NOCOPY so that the snapshot knows that it need not copy them 551 * if they are later written. If passno is one, then this is a first 552 * pass, so only setting needs to be done. If passno is 2, then this 553 * is a revision to a previous pass which must be undone as the 554 * replacement pass is done. 555 */ 556 static int 557 cgaccount(cg, vp, nbp, passno) 558 int cg; 559 struct vnode *vp; 560 struct buf *nbp; 561 int passno; 562 { 563 struct buf *bp, *ibp; 564 struct inode *ip; 565 struct cg *cgp; 566 struct fs *fs; 567 ufs2_daddr_t base, numblks; 568 int error, len, loc, indiroff; 569 570 ip = VTOI(vp); 571 fs = ip->i_fs; 572 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 573 (int)fs->fs_cgsize, KERNCRED, &bp); 574 if (error) { 575 brelse(bp); 576 return (error); 577 } 578 cgp = (struct cg *)bp->b_data; 579 if (!cg_chkmagic(cgp)) { 580 brelse(bp); 581 return (EIO); 582 } 583 atomic_set_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 584 bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize); 585 if (fs->fs_cgsize < fs->fs_bsize) 586 bzero(&nbp->b_data[fs->fs_cgsize], 587 fs->fs_bsize - fs->fs_cgsize); 588 if (passno == 2) 589 nbp->b_flags |= B_VALIDSUSPWRT; 590 numblks = howmany(fs->fs_size, fs->fs_frag); 591 len = howmany(fs->fs_fpg, fs->fs_frag); 592 base = cg * fs->fs_fpg / fs->fs_frag; 593 if (base + len >= numblks) 594 len = numblks - base - 1; 595 loc = 0; 596 if (base < NDADDR) { 597 for ( ; loc < NDADDR; loc++) { 598 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 599 DIP(ip, i_db[loc]) = BLK_NOCOPY; 600 else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY) 601 DIP(ip, i_db[loc]) = 0; 602 else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY) 603 panic("ffs_snapshot: lost direct block"); 604 } 605 } 606 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), 607 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 608 if (error) { 609 brelse(bp); 610 return (error); 611 } 612 indiroff = (base + loc - NDADDR) % NINDIR(fs); 613 for ( ; loc < len; loc++, indiroff++) { 614 if (indiroff >= NINDIR(fs)) { 615 if (passno == 2) 616 ibp->b_flags |= B_VALIDSUSPWRT; 617 bawrite(ibp); 618 error = UFS_BALLOC(vp, 619 lblktosize(fs, (off_t)(base + loc)), 620 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 621 if (error) { 622 brelse(bp); 623 return (error); 624 } 625 indiroff = 0; 626 } 627 if (ip->i_ump->um_fstype == UFS1) { 628 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 629 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 630 BLK_NOCOPY; 631 else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data)) 632 [indiroff] == BLK_NOCOPY) 633 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0; 634 else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data)) 635 [indiroff] == BLK_NOCOPY) 636 panic("ffs_snapshot: lost indirect block"); 637 continue; 638 } 639 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 640 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY; 641 else if (passno == 2 && 642 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 643 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0; 644 else if (passno == 1 && 645 ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 646 panic("ffs_snapshot: lost indirect block"); 647 } 648 bqrelse(bp); 649 if (passno == 2) 650 ibp->b_flags |= B_VALIDSUSPWRT; 651 bdwrite(ibp); 652 return (0); 653 } 654 655 /* 656 * Before expunging a snapshot inode, note all the 657 * blocks that it claims with BLK_SNAP so that fsck will 658 * be able to account for those blocks properly and so 659 * that this snapshot knows that it need not copy them 660 * if the other snapshot holding them is freed. This code 661 * is reproduced once each for UFS1 and UFS2. 662 */ 663 static int 664 expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) 665 struct vnode *snapvp; 666 struct inode *cancelip; 667 struct fs *fs; 668 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 669 struct fs *, ufs_lbn_t, int); 670 int expungetype; 671 { 672 int i, error, indiroff; 673 ufs_lbn_t lbn, rlbn; 674 ufs2_daddr_t len, blkno, numblks, blksperindir; 675 struct ufs1_dinode *dip; 676 struct thread *td = curthread; 677 struct buf *bp; 678 679 numblks = howmany(cancelip->i_size, fs->fs_bsize); 680 if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0], 681 &cancelip->i_din1->di_ib[NIADDR], fs, 0, expungetype))) 682 return (error); 683 blksperindir = 1; 684 lbn = -NDADDR; 685 len = numblks - NDADDR; 686 rlbn = NDADDR; 687 for (i = 0; len > 0 && i < NIADDR; i++) { 688 error = indiracct_ufs1(snapvp, ITOV(cancelip), i, 689 cancelip->i_din1->di_ib[i], lbn, rlbn, len, 690 blksperindir, fs, acctfunc, expungetype); 691 if (error) 692 return (error); 693 blksperindir *= NINDIR(fs); 694 lbn -= blksperindir + 1; 695 len -= blksperindir; 696 rlbn += blksperindir; 697 } 698 /* 699 * Prepare to expunge the inode. If its inode block has not 700 * yet been copied, then allocate and fill the copy. 701 */ 702 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 703 blkno = 0; 704 if (lbn < NDADDR) { 705 blkno = cancelip->i_din1->di_db[lbn]; 706 } else { 707 td->td_proc->p_flag |= P_COWINPROGRESS; 708 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 709 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 710 td->td_proc->p_flag &= ~P_COWINPROGRESS; 711 if (error) 712 return (error); 713 indiroff = (lbn - NDADDR) % NINDIR(fs); 714 blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; 715 bqrelse(bp); 716 } 717 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 718 fs->fs_bsize, KERNCRED, 0, &bp); 719 if (error) 720 return (error); 721 if (blkno == 0 && (error = readblock(bp, lbn))) 722 return (error); 723 /* 724 * Set a snapshot inode to be a zero length file, regular files 725 * to be completely unallocated. 726 */ 727 dip = (struct ufs1_dinode *)bp->b_data + 728 ino_to_fsbo(fs, cancelip->i_number); 729 if (expungetype == BLK_NOCOPY) 730 dip->di_mode = 0; 731 dip->di_size = 0; 732 dip->di_blocks = 0; 733 dip->di_flags &= ~SF_SNAPSHOT; 734 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); 735 bdwrite(bp); 736 return (0); 737 } 738 739 /* 740 * Descend an indirect block chain for vnode cancelvp accounting for all 741 * its indirect blocks in snapvp. 742 */ 743 static int 744 indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 745 blksperindir, fs, acctfunc, expungetype) 746 struct vnode *snapvp; 747 struct vnode *cancelvp; 748 int level; 749 ufs1_daddr_t blkno; 750 ufs_lbn_t lbn; 751 ufs_lbn_t rlbn; 752 ufs_lbn_t remblks; 753 ufs_lbn_t blksperindir; 754 struct fs *fs; 755 int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 756 struct fs *, ufs_lbn_t, int); 757 int expungetype; 758 { 759 int error, num, i; 760 ufs_lbn_t subblksperindir; 761 struct indir indirs[NIADDR + 2]; 762 ufs1_daddr_t last, *bap; 763 struct buf *bp; 764 765 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 766 return (error); 767 if (lbn != indirs[num - 1 - level].in_lbn || blkno == 0 || num < 2) 768 panic("indiracct: botched params"); 769 /* 770 * We have to expand bread here since it will deadlock looking 771 * up the block number for any blocks that are not in the cache. 772 */ 773 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0); 774 bp->b_blkno = fsbtodb(fs, blkno); 775 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 776 (error = readblock(bp, fragstoblks(fs, blkno)))) { 777 brelse(bp); 778 return (error); 779 } 780 /* 781 * Account for the block pointers in this indirect block. 782 */ 783 last = howmany(remblks, blksperindir); 784 if (last > NINDIR(fs)) 785 last = NINDIR(fs); 786 MALLOC(bap, ufs1_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); 787 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 788 bqrelse(bp); 789 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, rlbn, expungetype); 790 if (error || level == 0) 791 goto out; 792 /* 793 * Account for the block pointers in each of the indirect blocks 794 * in the levels below us. 795 */ 796 subblksperindir = blksperindir / NINDIR(fs); 797 for (lbn++, level--, i = 0; i < last; i++) { 798 error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn, 799 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 800 if (error) 801 goto out; 802 rlbn += blksperindir; 803 lbn -= blksperindir; 804 remblks -= blksperindir; 805 } 806 out: 807 FREE(bap, M_DEVBUF); 808 return (error); 809 } 810 811 /* 812 * Do both snap accounting and map accounting. 813 */ 814 static int 815 fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype) 816 struct vnode *vp; 817 ufs1_daddr_t *oldblkp, *lastblkp; 818 struct fs *fs; 819 ufs_lbn_t lblkno; 820 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 821 { 822 int error; 823 824 if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 825 return (error); 826 return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 827 } 828 829 /* 830 * Identify a set of blocks allocated in a snapshot inode. 831 */ 832 static int 833 snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 834 struct vnode *vp; 835 ufs1_daddr_t *oldblkp, *lastblkp; 836 struct fs *fs; 837 ufs_lbn_t lblkno; 838 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 839 { 840 struct inode *ip = VTOI(vp); 841 ufs1_daddr_t blkno, *blkp; 842 ufs_lbn_t lbn; 843 struct buf *ibp; 844 int error; 845 846 for ( ; oldblkp < lastblkp; oldblkp++) { 847 blkno = *oldblkp; 848 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 849 continue; 850 lbn = fragstoblks(fs, blkno); 851 if (lbn < NDADDR) { 852 blkp = &ip->i_din1->di_db[lbn]; 853 ip->i_flag |= IN_CHANGE | IN_UPDATE; 854 } else { 855 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 856 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 857 if (error) 858 return (error); 859 blkp = &((ufs1_daddr_t *)(ibp->b_data)) 860 [(lbn - NDADDR) % NINDIR(fs)]; 861 } 862 /* 863 * If we are expunging a snapshot vnode and we 864 * find a block marked BLK_NOCOPY, then it is 865 * one that has been allocated to this snapshot after 866 * we took our current snapshot and can be ignored. 867 */ 868 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 869 if (lbn >= NDADDR) 870 brelse(ibp); 871 } else { 872 if (*blkp != 0) 873 panic("snapacct: bad block"); 874 *blkp = expungetype; 875 if (lbn >= NDADDR) 876 bdwrite(ibp); 877 } 878 } 879 return (0); 880 } 881 882 /* 883 * Account for a set of blocks allocated in a snapshot inode. 884 */ 885 static int 886 mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 887 struct vnode *vp; 888 ufs1_daddr_t *oldblkp, *lastblkp; 889 struct fs *fs; 890 ufs_lbn_t lblkno; 891 int expungetype; 892 { 893 ufs1_daddr_t blkno; 894 ino_t inum; 895 896 inum = VTOI(vp)->i_number; 897 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 898 blkno = *oldblkp; 899 if (blkno == 0 || blkno == BLK_NOCOPY) 900 continue; 901 if (blkno == BLK_SNAP) 902 blkno = blkstofrags(fs, lblkno); 903 ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); 904 } 905 return (0); 906 } 907 908 /* 909 * Before expunging a snapshot inode, note all the 910 * blocks that it claims with BLK_SNAP so that fsck will 911 * be able to account for those blocks properly and so 912 * that this snapshot knows that it need not copy them 913 * if the other snapshot holding them is freed. This code 914 * is reproduced once each for UFS1 and UFS2. 915 */ 916 static int 917 expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) 918 struct vnode *snapvp; 919 struct inode *cancelip; 920 struct fs *fs; 921 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 922 struct fs *, ufs_lbn_t, int); 923 int expungetype; 924 { 925 int i, error, indiroff; 926 ufs_lbn_t lbn, rlbn; 927 ufs2_daddr_t len, blkno, numblks, blksperindir; 928 struct ufs2_dinode *dip; 929 struct thread *td = curthread; 930 struct buf *bp; 931 932 numblks = howmany(cancelip->i_size, fs->fs_bsize); 933 if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0], 934 &cancelip->i_din2->di_ib[NIADDR], fs, 0, expungetype))) 935 return (error); 936 blksperindir = 1; 937 lbn = -NDADDR; 938 len = numblks - NDADDR; 939 rlbn = NDADDR; 940 for (i = 0; len > 0 && i < NIADDR; i++) { 941 error = indiracct_ufs2(snapvp, ITOV(cancelip), i, 942 cancelip->i_din2->di_ib[i], lbn, rlbn, len, 943 blksperindir, fs, acctfunc, expungetype); 944 if (error) 945 return (error); 946 blksperindir *= NINDIR(fs); 947 lbn -= blksperindir + 1; 948 len -= blksperindir; 949 rlbn += blksperindir; 950 } 951 /* 952 * Prepare to expunge the inode. If its inode block has not 953 * yet been copied, then allocate and fill the copy. 954 */ 955 lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 956 blkno = 0; 957 if (lbn < NDADDR) { 958 blkno = cancelip->i_din2->di_db[lbn]; 959 } else { 960 td->td_proc->p_flag |= P_COWINPROGRESS; 961 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 962 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 963 td->td_proc->p_flag &= ~P_COWINPROGRESS; 964 if (error) 965 return (error); 966 indiroff = (lbn - NDADDR) % NINDIR(fs); 967 blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; 968 bqrelse(bp); 969 } 970 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 971 fs->fs_bsize, KERNCRED, 0, &bp); 972 if (error) 973 return (error); 974 if (blkno == 0 && (error = readblock(bp, lbn))) 975 return (error); 976 /* 977 * Set a snapshot inode to be a zero length file, regular files 978 * to be completely unallocated. 979 */ 980 dip = (struct ufs2_dinode *)bp->b_data + 981 ino_to_fsbo(fs, cancelip->i_number); 982 if (expungetype == BLK_NOCOPY) 983 dip->di_mode = 0; 984 dip->di_size = 0; 985 dip->di_blocks = 0; 986 dip->di_flags &= ~SF_SNAPSHOT; 987 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); 988 bdwrite(bp); 989 return (0); 990 } 991 992 /* 993 * Descend an indirect block chain for vnode cancelvp accounting for all 994 * its indirect blocks in snapvp. 995 */ 996 static int 997 indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 998 blksperindir, fs, acctfunc, expungetype) 999 struct vnode *snapvp; 1000 struct vnode *cancelvp; 1001 int level; 1002 ufs2_daddr_t blkno; 1003 ufs_lbn_t lbn; 1004 ufs_lbn_t rlbn; 1005 ufs_lbn_t remblks; 1006 ufs_lbn_t blksperindir; 1007 struct fs *fs; 1008 int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 1009 struct fs *, ufs_lbn_t, int); 1010 int expungetype; 1011 { 1012 int error, num, i; 1013 ufs_lbn_t subblksperindir; 1014 struct indir indirs[NIADDR + 2]; 1015 ufs2_daddr_t last, *bap; 1016 struct buf *bp; 1017 1018 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1019 return (error); 1020 if (lbn != indirs[num - 1 - level].in_lbn || blkno == 0 || num < 2) 1021 panic("indiracct: botched params"); 1022 /* 1023 * We have to expand bread here since it will deadlock looking 1024 * up the block number for any blocks that are not in the cache. 1025 */ 1026 bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0); 1027 bp->b_blkno = fsbtodb(fs, blkno); 1028 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1029 (error = readblock(bp, fragstoblks(fs, blkno)))) { 1030 brelse(bp); 1031 return (error); 1032 } 1033 /* 1034 * Account for the block pointers in this indirect block. 1035 */ 1036 last = howmany(remblks, blksperindir); 1037 if (last > NINDIR(fs)) 1038 last = NINDIR(fs); 1039 MALLOC(bap, ufs2_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); 1040 bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1041 bqrelse(bp); 1042 error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, rlbn, expungetype); 1043 if (error || level == 0) 1044 goto out; 1045 /* 1046 * Account for the block pointers in each of the indirect blocks 1047 * in the levels below us. 1048 */ 1049 subblksperindir = blksperindir / NINDIR(fs); 1050 for (lbn++, level--, i = 0; i < last; i++) { 1051 error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn, 1052 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1053 if (error) 1054 goto out; 1055 rlbn += blksperindir; 1056 lbn -= blksperindir; 1057 remblks -= blksperindir; 1058 } 1059 out: 1060 FREE(bap, M_DEVBUF); 1061 return (error); 1062 } 1063 1064 /* 1065 * Do both snap accounting and map accounting. 1066 */ 1067 static int 1068 fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1069 struct vnode *vp; 1070 ufs2_daddr_t *oldblkp, *lastblkp; 1071 struct fs *fs; 1072 ufs_lbn_t lblkno; 1073 int exptype; /* BLK_SNAP or BLK_NOCOPY */ 1074 { 1075 int error; 1076 1077 if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1078 return (error); 1079 return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1080 } 1081 1082 /* 1083 * Identify a set of blocks allocated in a snapshot inode. 1084 */ 1085 static int 1086 snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1087 struct vnode *vp; 1088 ufs2_daddr_t *oldblkp, *lastblkp; 1089 struct fs *fs; 1090 ufs_lbn_t lblkno; 1091 int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 1092 { 1093 struct inode *ip = VTOI(vp); 1094 ufs2_daddr_t blkno, *blkp; 1095 ufs_lbn_t lbn; 1096 struct buf *ibp; 1097 int error; 1098 1099 for ( ; oldblkp < lastblkp; oldblkp++) { 1100 blkno = *oldblkp; 1101 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 1102 continue; 1103 lbn = fragstoblks(fs, blkno); 1104 if (lbn < NDADDR) { 1105 blkp = &ip->i_din2->di_db[lbn]; 1106 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1107 } else { 1108 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1109 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1110 if (error) 1111 return (error); 1112 blkp = &((ufs2_daddr_t *)(ibp->b_data)) 1113 [(lbn - NDADDR) % NINDIR(fs)]; 1114 } 1115 /* 1116 * If we are expunging a snapshot vnode and we 1117 * find a block marked BLK_NOCOPY, then it is 1118 * one that has been allocated to this snapshot after 1119 * we took our current snapshot and can be ignored. 1120 */ 1121 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 1122 if (lbn >= NDADDR) 1123 brelse(ibp); 1124 } else { 1125 if (*blkp != 0) 1126 panic("snapacct: bad block"); 1127 *blkp = expungetype; 1128 if (lbn >= NDADDR) 1129 bdwrite(ibp); 1130 } 1131 } 1132 return (0); 1133 } 1134 1135 /* 1136 * Account for a set of blocks allocated in a snapshot inode. 1137 */ 1138 static int 1139 mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1140 struct vnode *vp; 1141 ufs2_daddr_t *oldblkp, *lastblkp; 1142 struct fs *fs; 1143 ufs_lbn_t lblkno; 1144 int expungetype; 1145 { 1146 ufs2_daddr_t blkno; 1147 ino_t inum; 1148 1149 inum = VTOI(vp)->i_number; 1150 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1151 blkno = *oldblkp; 1152 if (blkno == 0 || blkno == BLK_NOCOPY) 1153 continue; 1154 if (blkno == BLK_SNAP) 1155 blkno = blkstofrags(fs, lblkno); 1156 ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); 1157 } 1158 return (0); 1159 } 1160 1161 /* 1162 * Decrement extra reference on snapshot when last name is removed. 1163 * It will not be freed until the last open reference goes away. 1164 */ 1165 void 1166 ffs_snapgone(ip) 1167 struct inode *ip; 1168 { 1169 struct inode *xp; 1170 struct fs *fs; 1171 int snaploc; 1172 1173 /* 1174 * Find snapshot in incore list. 1175 */ 1176 TAILQ_FOREACH(xp, &ip->i_devvp->v_rdev->si_snapshots, i_nextsnap) 1177 if (xp == ip) 1178 break; 1179 if (xp == 0) 1180 printf("ffs_snapgone: lost snapshot vnode %d\n", 1181 ip->i_number); 1182 else 1183 vrele(ITOV(ip)); 1184 /* 1185 * Delete snapshot inode from superblock. Keep list dense. 1186 */ 1187 fs = ip->i_fs; 1188 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 1189 if (fs->fs_snapinum[snaploc] == ip->i_number) 1190 break; 1191 if (snaploc < FSMAXSNAP) { 1192 for (snaploc++; snaploc < FSMAXSNAP; snaploc++) { 1193 if (fs->fs_snapinum[snaploc] == 0) 1194 break; 1195 fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc]; 1196 } 1197 fs->fs_snapinum[snaploc - 1] = 0; 1198 } 1199 } 1200 1201 /* 1202 * Prepare a snapshot file for being removed. 1203 */ 1204 void 1205 ffs_snapremove(vp) 1206 struct vnode *vp; 1207 { 1208 struct inode *ip; 1209 struct vnode *devvp; 1210 struct buf *ibp; 1211 struct fs *fs; 1212 ufs2_daddr_t numblks, blkno, dblk; 1213 int error, loc, last; 1214 1215 ip = VTOI(vp); 1216 fs = ip->i_fs; 1217 /* 1218 * If active, delete from incore list (this snapshot may 1219 * already have been in the process of being deleted, so 1220 * would not have been active). 1221 * 1222 * Clear copy-on-write flag if last snapshot. 1223 */ 1224 if (ip->i_nextsnap.tqe_prev != 0) { 1225 devvp = ip->i_devvp; 1226 TAILQ_REMOVE(&devvp->v_rdev->si_snapshots, ip, i_nextsnap); 1227 ip->i_nextsnap.tqe_prev = 0; 1228 if (TAILQ_FIRST(&devvp->v_rdev->si_snapshots) == 0) { 1229 devvp->v_rdev->si_copyonwrite = 0; 1230 devvp->v_flag &= ~VCOPYONWRITE; 1231 } 1232 } 1233 /* 1234 * Clear all BLK_NOCOPY fields. Pass any block claims to other 1235 * snapshots that want them (see ffs_snapblkfree below). 1236 */ 1237 for (blkno = 1; blkno < NDADDR; blkno++) { 1238 dblk = DIP(ip, i_db[blkno]); 1239 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1240 DIP(ip, i_db[blkno]) = 0; 1241 else if ((dblk == blkstofrags(fs, blkno) && 1242 ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, 1243 ip->i_number))) { 1244 DIP(ip, i_blocks) -= btodb(fs->fs_bsize); 1245 DIP(ip, i_db[blkno]) = 0; 1246 } 1247 } 1248 numblks = howmany(ip->i_size, fs->fs_bsize); 1249 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 1250 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 1251 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1252 if (error) 1253 continue; 1254 if (fs->fs_size - blkno > NINDIR(fs)) 1255 last = NINDIR(fs); 1256 else 1257 last = fs->fs_size - blkno; 1258 for (loc = 0; loc < last; loc++) { 1259 if (ip->i_ump->um_fstype == UFS1) { 1260 dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc]; 1261 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1262 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1263 else if ((dblk == blkstofrags(fs, blkno) && 1264 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1265 fs->fs_bsize, ip->i_number))) { 1266 ip->i_din1->di_blocks -= 1267 btodb(fs->fs_bsize); 1268 ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 1269 } 1270 continue; 1271 } 1272 dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc]; 1273 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1274 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1275 else if ((dblk == blkstofrags(fs, blkno) && 1276 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1277 fs->fs_bsize, ip->i_number))) { 1278 ip->i_din2->di_blocks -= btodb(fs->fs_bsize); 1279 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 1280 } 1281 } 1282 bawrite(ibp); 1283 } 1284 /* 1285 * Clear snapshot flag and drop reference. 1286 */ 1287 ip->i_flags &= ~SF_SNAPSHOT; 1288 DIP(ip, i_flags) = ip->i_flags; 1289 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1290 } 1291 1292 /* 1293 * Notification that a block is being freed. Return zero if the free 1294 * should be allowed to proceed. Return non-zero if the snapshot file 1295 * wants to claim the block. The block will be claimed if it is an 1296 * uncopied part of one of the snapshots. It will be freed if it is 1297 * either a BLK_NOCOPY or has already been copied in all of the snapshots. 1298 * If a fragment is being freed, then all snapshots that care about 1299 * it must make a copy since a snapshot file can only claim full sized 1300 * blocks. Note that if more than one snapshot file maps the block, 1301 * we can pick one at random to claim it. Since none of the snapshots 1302 * can change, we are assurred that they will all see the same unmodified 1303 * image. When deleting a snapshot file (see ffs_snapremove above), we 1304 * must push any of these claimed blocks to one of the other snapshots 1305 * that maps it. These claimed blocks are easily identified as they will 1306 * have a block number equal to their logical block number within the 1307 * snapshot. A copied block can never have this property because they 1308 * must always have been allocated from a BLK_NOCOPY location. 1309 */ 1310 int 1311 ffs_snapblkfree(fs, devvp, bno, size, inum) 1312 struct fs *fs; 1313 struct vnode *devvp; 1314 ufs2_daddr_t bno; 1315 long size; 1316 ino_t inum; 1317 { 1318 struct buf *ibp, *cbp, *savedcbp = 0; 1319 struct thread *td = curthread; 1320 struct inode *ip; 1321 struct vnode *vp; 1322 ufs_lbn_t lbn; 1323 ufs2_daddr_t blkno; 1324 int indiroff = 0, error = 0, claimedblk = 0; 1325 struct snaphead *snaphead; 1326 1327 lbn = fragstoblks(fs, bno); 1328 snaphead = &devvp->v_rdev->si_snapshots; 1329 TAILQ_FOREACH(ip, snaphead, i_nextsnap) { 1330 vp = ITOV(ip); 1331 /* 1332 * Lookup block being written. 1333 */ 1334 if (lbn < NDADDR) { 1335 blkno = DIP(ip, i_db[lbn]); 1336 } else { 1337 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1338 td->td_proc->p_flag |= P_COWINPROGRESS; 1339 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1340 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1341 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1342 VOP_UNLOCK(vp, 0, td); 1343 if (error) 1344 break; 1345 indiroff = (lbn - NDADDR) % NINDIR(fs); 1346 if (ip->i_ump->um_fstype == UFS1) 1347 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 1348 else 1349 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 1350 } 1351 /* 1352 * Check to see if block needs to be copied. 1353 */ 1354 if (blkno == 0) { 1355 /* 1356 * A block that we map is being freed. If it has not 1357 * been claimed yet, we will claim or copy it (below). 1358 */ 1359 claimedblk = 1; 1360 } else if (blkno == BLK_SNAP) { 1361 /* 1362 * No previous snapshot claimed the block, 1363 * so it will be * freed and become a BLK_NOCOPY 1364 * (don't care) for us. 1365 */ 1366 if (claimedblk) 1367 panic("snapblkfree: inconsistent block type"); 1368 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1369 if (lbn < NDADDR) { 1370 DIP(ip, i_db[lbn]) = BLK_NOCOPY; 1371 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1372 } else if (ip->i_ump->um_fstype == UFS1) { 1373 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 1374 BLK_NOCOPY; 1375 bdwrite(ibp); 1376 } else { 1377 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 1378 BLK_NOCOPY; 1379 bdwrite(ibp); 1380 } 1381 VOP_UNLOCK(vp, 0, td); 1382 continue; 1383 } else /* BLK_NOCOPY or default */ { 1384 /* 1385 * If the snapshot has already copied the block 1386 * (default), or does not care about the block, 1387 * it is not needed. 1388 */ 1389 if (lbn >= NDADDR) 1390 bqrelse(ibp); 1391 continue; 1392 } 1393 /* 1394 * If this is a full size block, we will just grab it 1395 * and assign it to the snapshot inode. Otherwise we 1396 * will proceed to copy it. See explanation for this 1397 * routine as to why only a single snapshot needs to 1398 * claim this block. 1399 */ 1400 if (size == fs->fs_bsize) { 1401 #ifdef DEBUG 1402 if (snapdebug) 1403 printf("%s %d lbn %jd from inum %d\n", 1404 "Grabonremove: snapino", ip->i_number, 1405 (intmax_t)lbn, inum); 1406 #endif 1407 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1408 if (lbn < NDADDR) { 1409 DIP(ip, i_db[lbn]) = bno; 1410 } else if (ip->i_ump->um_fstype == UFS1) { 1411 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno; 1412 bdwrite(ibp); 1413 } else { 1414 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno; 1415 bdwrite(ibp); 1416 } 1417 DIP(ip, i_blocks) += btodb(size); 1418 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1419 VOP_UNLOCK(vp, 0, td); 1420 return (1); 1421 } 1422 if (lbn >= NDADDR) 1423 bqrelse(ibp); 1424 /* 1425 * Allocate the block into which to do the copy. Note that this 1426 * allocation will never require any additional allocations for 1427 * the snapshot inode. 1428 */ 1429 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1430 td->td_proc->p_flag |= P_COWINPROGRESS; 1431 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1432 fs->fs_bsize, KERNCRED, 0, &cbp); 1433 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1434 if (error) { 1435 VOP_UNLOCK(vp, 0, td); 1436 break; 1437 } 1438 #ifdef DEBUG 1439 if (snapdebug) 1440 printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n", 1441 "Copyonremove: snapino ", ip->i_number, 1442 (intmax_t)lbn, "for inum", inum, size, 1443 (intmax_t)cbp->b_blkno); 1444 #endif 1445 /* 1446 * If we have already read the old block contents, then 1447 * simply copy them to the new block. Note that we need 1448 * to synchronously write snapshots that have not been 1449 * unlinked, and hence will be visible after a crash, 1450 * to ensure their integrity. 1451 */ 1452 if (savedcbp != 0) { 1453 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 1454 bawrite(cbp); 1455 if (dopersistence && ip->i_effnlink > 0) 1456 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1457 VOP_UNLOCK(vp, 0, td); 1458 continue; 1459 } 1460 /* 1461 * Otherwise, read the old block contents into the buffer. 1462 */ 1463 if ((error = readblock(cbp, lbn)) != 0) { 1464 bzero(cbp->b_data, fs->fs_bsize); 1465 bawrite(cbp); 1466 if (dopersistence && ip->i_effnlink > 0) 1467 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1468 VOP_UNLOCK(vp, 0, td); 1469 break; 1470 } 1471 VOP_UNLOCK(vp, 0, td); 1472 savedcbp = cbp; 1473 } 1474 /* 1475 * Note that we need to synchronously write snapshots that 1476 * have not been unlinked, and hence will be visible after 1477 * a crash, to ensure their integrity. 1478 */ 1479 if (savedcbp) { 1480 vp = savedcbp->b_vp; 1481 bawrite(savedcbp); 1482 if (dopersistence && VTOI(vp)->i_effnlink > 0) { 1483 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1484 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1485 VOP_UNLOCK(vp, 0, td); 1486 } 1487 } 1488 /* 1489 * If we have been unable to allocate a block in which to do 1490 * the copy, then return non-zero so that the fragment will 1491 * not be freed. Although space will be lost, the snapshot 1492 * will stay consistent. 1493 */ 1494 return (error); 1495 } 1496 1497 /* 1498 * Associate snapshot files when mounting. 1499 */ 1500 void 1501 ffs_snapshot_mount(mp) 1502 struct mount *mp; 1503 { 1504 struct ufsmount *ump = VFSTOUFS(mp); 1505 struct fs *fs = ump->um_fs; 1506 struct thread *td = curthread; 1507 struct snaphead *snaphead; 1508 struct vnode *vp; 1509 struct inode *ip; 1510 int error, snaploc, loc; 1511 1512 snaphead = &ump->um_devvp->v_rdev->si_snapshots; 1513 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { 1514 if (fs->fs_snapinum[snaploc] == 0) 1515 return; 1516 if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc], 1517 LK_EXCLUSIVE, &vp)) != 0){ 1518 printf("ffs_snapshot_mount: vget failed %d\n", error); 1519 continue; 1520 } 1521 ip = VTOI(vp); 1522 if ((ip->i_flags & SF_SNAPSHOT) == 0) { 1523 printf("ffs_snapshot_mount: non-snapshot inode %d\n", 1524 fs->fs_snapinum[snaploc]); 1525 vput(vp); 1526 for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { 1527 if (fs->fs_snapinum[loc] == 0) 1528 break; 1529 fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc]; 1530 } 1531 fs->fs_snapinum[loc - 1] = 0; 1532 snaploc--; 1533 continue; 1534 } 1535 if (ip->i_nextsnap.tqe_prev != 0) 1536 panic("ffs_snapshot_mount: %d already on list", 1537 ip->i_number); 1538 else 1539 TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); 1540 vp->v_flag |= VSYSTEM; 1541 ump->um_devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; 1542 ump->um_devvp->v_flag |= VCOPYONWRITE; 1543 VOP_UNLOCK(vp, 0, td); 1544 } 1545 } 1546 1547 /* 1548 * Disassociate snapshot files when unmounting. 1549 */ 1550 void 1551 ffs_snapshot_unmount(mp) 1552 struct mount *mp; 1553 { 1554 struct ufsmount *ump = VFSTOUFS(mp); 1555 struct snaphead *snaphead = &ump->um_devvp->v_rdev->si_snapshots; 1556 struct inode *xp; 1557 1558 while ((xp = TAILQ_FIRST(snaphead)) != 0) { 1559 TAILQ_REMOVE(snaphead, xp, i_nextsnap); 1560 xp->i_nextsnap.tqe_prev = 0; 1561 if (xp->i_effnlink > 0) 1562 vrele(ITOV(xp)); 1563 } 1564 ump->um_devvp->v_rdev->si_copyonwrite = 0; 1565 ump->um_devvp->v_flag &= ~VCOPYONWRITE; 1566 } 1567 1568 /* 1569 * Check for need to copy block that is about to be written, 1570 * copying the block if necessary. 1571 */ 1572 static int 1573 ffs_copyonwrite(devvp, bp) 1574 struct vnode *devvp; 1575 struct buf *bp; 1576 { 1577 struct buf *ibp, *cbp, *savedcbp = 0; 1578 struct thread *td = curthread; 1579 struct fs *fs; 1580 struct inode *ip; 1581 struct vnode *vp; 1582 ufs2_daddr_t lbn, blkno; 1583 int indiroff, error = 0; 1584 1585 fs = TAILQ_FIRST(&devvp->v_rdev->si_snapshots)->i_fs; 1586 lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 1587 if (td->td_proc->p_flag & P_COWINPROGRESS) 1588 panic("ffs_copyonwrite: recursive call"); 1589 TAILQ_FOREACH(ip, &devvp->v_rdev->si_snapshots, i_nextsnap) { 1590 vp = ITOV(ip); 1591 /* 1592 * We ensure that everything of our own that needs to be 1593 * copied will be done at the time that ffs_snapshot is 1594 * called. Thus we can skip the check here which can 1595 * deadlock in doing the lookup in UFS_BALLOC. 1596 */ 1597 if (bp->b_vp == vp) 1598 continue; 1599 /* 1600 * Check to see if block needs to be copied. We have to 1601 * be able to do the UFS_BALLOC without blocking, otherwise 1602 * we may get in a deadlock with another process also 1603 * trying to allocate. If we find outselves unable to 1604 * get the buffer lock, we unlock the snapshot vnode, 1605 * sleep briefly, and try again. 1606 */ 1607 retry: 1608 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1609 if (lbn < NDADDR) { 1610 blkno = DIP(ip, i_db[lbn]); 1611 } else { 1612 td->td_proc->p_flag |= P_COWINPROGRESS; 1613 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1614 fs->fs_bsize, KERNCRED, BA_METAONLY | BA_NOWAIT, &ibp); 1615 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1616 if (error) { 1617 VOP_UNLOCK(vp, 0, td); 1618 if (error != EWOULDBLOCK) 1619 break; 1620 tsleep(vp, td->td_ksegrp->kg_user_pri, "nap", 1); 1621 goto retry; 1622 } 1623 indiroff = (lbn - NDADDR) % NINDIR(fs); 1624 if (ip->i_ump->um_fstype == UFS1) 1625 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 1626 else 1627 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 1628 bqrelse(ibp); 1629 } 1630 #ifdef DIAGNOSTIC 1631 if (blkno == BLK_SNAP && bp->b_lblkno >= 0) 1632 panic("ffs_copyonwrite: bad copy block"); 1633 #endif 1634 if (blkno != 0) { 1635 VOP_UNLOCK(vp, 0, td); 1636 continue; 1637 } 1638 /* 1639 * Allocate the block into which to do the copy. Note that this 1640 * allocation will never require any additional allocations for 1641 * the snapshot inode. 1642 */ 1643 td->td_proc->p_flag |= P_COWINPROGRESS; 1644 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1645 fs->fs_bsize, KERNCRED, BA_NOWAIT, &cbp); 1646 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1647 if (error) { 1648 VOP_UNLOCK(vp, 0, td); 1649 if (error != EWOULDBLOCK) 1650 break; 1651 tsleep(vp, td->td_ksegrp->kg_user_pri, "nap", 1); 1652 goto retry; 1653 } 1654 #ifdef DEBUG 1655 if (snapdebug) { 1656 printf("Copyonwrite: snapino %d lbn %jd for ", 1657 ip->i_number, (intmax_t)lbn); 1658 if (bp->b_vp == devvp) 1659 printf("fs metadata"); 1660 else 1661 printf("inum %d", VTOI(bp->b_vp)->i_number); 1662 printf(" lblkno %jd to blkno %jd\n", 1663 (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno); 1664 } 1665 #endif 1666 /* 1667 * If we have already read the old block contents, then 1668 * simply copy them to the new block. Note that we need 1669 * to synchronously write snapshots that have not been 1670 * unlinked, and hence will be visible after a crash, 1671 * to ensure their integrity. 1672 */ 1673 if (savedcbp != 0) { 1674 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 1675 bawrite(cbp); 1676 if (dopersistence && ip->i_effnlink > 0) 1677 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1678 VOP_UNLOCK(vp, 0, td); 1679 continue; 1680 } 1681 /* 1682 * Otherwise, read the old block contents into the buffer. 1683 */ 1684 if ((error = readblock(cbp, lbn)) != 0) { 1685 bzero(cbp->b_data, fs->fs_bsize); 1686 bawrite(cbp); 1687 if (dopersistence && ip->i_effnlink > 0) 1688 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1689 VOP_UNLOCK(vp, 0, td); 1690 break; 1691 } 1692 savedcbp = cbp; 1693 VOP_UNLOCK(vp, 0, td); 1694 } 1695 /* 1696 * Note that we need to synchronously write snapshots that 1697 * have not been unlinked, and hence will be visible after 1698 * a crash, to ensure their integrity. 1699 */ 1700 if (savedcbp) { 1701 vp = savedcbp->b_vp; 1702 bawrite(savedcbp); 1703 if (dopersistence && VTOI(vp)->i_effnlink > 0) { 1704 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1705 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1706 VOP_UNLOCK(vp, 0, td); 1707 } 1708 } 1709 return (error); 1710 } 1711 1712 /* 1713 * Read the specified block into the given buffer. 1714 * Much of this boiler-plate comes from bwrite(). 1715 */ 1716 static int 1717 readblock(bp, lbn) 1718 struct buf *bp; 1719 ufs2_daddr_t lbn; 1720 { 1721 struct uio auio; 1722 struct iovec aiov; 1723 struct thread *td = curthread; 1724 struct inode *ip = VTOI(bp->b_vp); 1725 1726 aiov.iov_base = bp->b_data; 1727 aiov.iov_len = bp->b_bcount; 1728 auio.uio_iov = &aiov; 1729 auio.uio_iovcnt = 1; 1730 auio.uio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn))); 1731 auio.uio_resid = bp->b_bcount; 1732 auio.uio_rw = UIO_READ; 1733 auio.uio_segflg = UIO_SYSSPACE; 1734 auio.uio_td = td; 1735 return (physio(ip->i_devvp->v_rdev, &auio, 0)); 1736 } 1737