1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_quota.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/bio.h> 42 #include <sys/buf.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/proc.h> 46 #include <sys/racct.h> 47 #include <sys/random.h> 48 #include <sys/resourcevar.h> 49 #include <sys/rwlock.h> 50 #include <sys/stat.h> 51 #include <sys/vmmeter.h> 52 #include <sys/vnode.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_extern.h> 56 #include <vm/vm_object.h> 57 58 #include <ufs/ufs/extattr.h> 59 #include <ufs/ufs/quota.h> 60 #include <ufs/ufs/ufsmount.h> 61 #include <ufs/ufs/inode.h> 62 #include <ufs/ufs/ufs_extern.h> 63 64 #include <ufs/ffs/fs.h> 65 #include <ufs/ffs/ffs_extern.h> 66 67 static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t, 68 ufs2_daddr_t, int, ufs2_daddr_t *); 69 70 /* 71 * Update the access, modified, and inode change times as specified by the 72 * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode 73 * to disk if the IN_MODIFIED flag is set (it may be set initially, or by 74 * the timestamp update). The IN_LAZYMOD flag is set to force a write 75 * later if not now. The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs 76 * is currently being suspended (or is suspended) and vnode has been accessed. 77 * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to 78 * reflect the presumably successful write, and if waitfor is set, then wait 79 * for the write to complete. 80 */ 81 int 82 ffs_update(vp, waitfor) 83 struct vnode *vp; 84 int waitfor; 85 { 86 struct fs *fs; 87 struct buf *bp; 88 struct inode *ip; 89 daddr_t bn; 90 int flags, error; 91 92 ASSERT_VOP_ELOCKED(vp, "ffs_update"); 93 ufs_itimes(vp); 94 ip = VTOI(vp); 95 if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) 96 return (0); 97 ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED); 98 /* 99 * The IN_SIZEMOD and IN_IBLKDATA flags indicate changes to the 100 * file size and block pointer fields in the inode. When these 101 * fields have been changed, the fsync() and fsyncdata() system 102 * calls must write the inode to ensure their semantics that the 103 * file is on stable store. 104 * 105 * The IN_SIZEMOD and IN_IBLKDATA flags cannot be cleared until 106 * a synchronous write of the inode is done. If they are cleared 107 * on an asynchronous write, then the inode may not yet have been 108 * written to the disk when an fsync() or fsyncdata() call is done. 109 * Absent these flags, these calls would not know that they needed 110 * to write the inode. Thus, these flags only can be cleared on 111 * synchronous writes of the inode. Since the inode will be locked 112 * for the duration of the I/O that writes it to disk, no fsync() 113 * or fsyncdata() will be able to run before the on-disk inode 114 * is complete. 115 */ 116 if (waitfor) 117 ip->i_flag &= ~(IN_SIZEMOD | IN_IBLKDATA); 118 fs = ITOFS(ip); 119 if (fs->fs_ronly && ITOUMP(ip)->um_fsckpid == 0) 120 return (0); 121 /* 122 * If we are updating a snapshot and another process is currently 123 * writing the buffer containing the inode for this snapshot then 124 * a deadlock can occur when it tries to check the snapshot to see 125 * if that block needs to be copied. Thus when updating a snapshot 126 * we check to see if the buffer is already locked, and if it is 127 * we drop the snapshot lock until the buffer has been written 128 * and is available to us. We have to grab a reference to the 129 * snapshot vnode to prevent it from being removed while we are 130 * waiting for the buffer. 131 */ 132 flags = 0; 133 if (IS_SNAPSHOT(ip)) 134 flags = GB_LOCK_NOWAIT; 135 loop: 136 bn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number)); 137 error = ffs_breadz(VFSTOUFS(vp->v_mount), ITODEVVP(ip), bn, bn, 138 (int) fs->fs_bsize, NULL, NULL, 0, NOCRED, flags, NULL, &bp); 139 if (error != 0) { 140 if (error != EBUSY) 141 return (error); 142 KASSERT((IS_SNAPSHOT(ip)), ("EBUSY from non-snapshot")); 143 /* 144 * Wait for our inode block to become available. 145 * 146 * Hold a reference to the vnode to protect against 147 * ffs_snapgone(). Since we hold a reference, it can only 148 * get reclaimed (VIRF_DOOMED flag) in a forcible downgrade 149 * or unmount. For an unmount, the entire filesystem will be 150 * gone, so we cannot attempt to touch anything associated 151 * with it while the vnode is unlocked; all we can do is 152 * pause briefly and try again. If when we relock the vnode 153 * we discover that it has been reclaimed, updating it is no 154 * longer necessary and we can just return an error. 155 */ 156 vref(vp); 157 VOP_UNLOCK(vp); 158 pause("ffsupd", 1); 159 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 160 vrele(vp); 161 if (VN_IS_DOOMED(vp)) 162 return (ENOENT); 163 goto loop; 164 } 165 if (DOINGSOFTDEP(vp)) 166 softdep_update_inodeblock(ip, bp, waitfor); 167 else if (ip->i_effnlink != ip->i_nlink) 168 panic("ffs_update: bad link cnt"); 169 if (I_IS_UFS1(ip)) { 170 *((struct ufs1_dinode *)bp->b_data + 171 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; 172 /* 173 * XXX: FIX? The entropy here is desirable, 174 * but the harvesting may be expensive 175 */ 176 random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), RANDOM_FS_ATIME); 177 } else { 178 ffs_update_dinode_ckhash(fs, ip->i_din2); 179 *((struct ufs2_dinode *)bp->b_data + 180 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; 181 /* 182 * XXX: FIX? The entropy here is desirable, 183 * but the harvesting may be expensive 184 */ 185 random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME); 186 } 187 if (waitfor) { 188 error = bwrite(bp); 189 if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error)) 190 error = 0; 191 } else if (vm_page_count_severe() || buf_dirty_count_severe()) { 192 bawrite(bp); 193 error = 0; 194 } else { 195 if (bp->b_bufsize == fs->fs_bsize) 196 bp->b_flags |= B_CLUSTEROK; 197 bdwrite(bp); 198 error = 0; 199 } 200 return (error); 201 } 202 203 #define SINGLE 0 /* index of single indirect block */ 204 #define DOUBLE 1 /* index of double indirect block */ 205 #define TRIPLE 2 /* index of triple indirect block */ 206 /* 207 * Truncate the inode ip to at most length size, freeing the 208 * disk blocks. 209 */ 210 int 211 ffs_truncate(vp, length, flags, cred) 212 struct vnode *vp; 213 off_t length; 214 int flags; 215 struct ucred *cred; 216 { 217 struct inode *ip; 218 ufs2_daddr_t bn, lbn, lastblock, lastiblock[UFS_NIADDR]; 219 ufs2_daddr_t indir_lbn[UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR]; 220 ufs2_daddr_t newblks[UFS_NDADDR + UFS_NIADDR]; 221 ufs2_daddr_t count, blocksreleased = 0, datablocks, blkno; 222 struct bufobj *bo; 223 struct fs *fs; 224 struct buf *bp; 225 struct ufsmount *ump; 226 int softdeptrunc, journaltrunc; 227 int needextclean, extblocks; 228 int offset, size, level, nblocks; 229 int i, error, allerror, indiroff, waitforupdate; 230 u_long key; 231 off_t osize; 232 233 ip = VTOI(vp); 234 ump = VFSTOUFS(vp->v_mount); 235 fs = ump->um_fs; 236 bo = &vp->v_bufobj; 237 238 ASSERT_VOP_LOCKED(vp, "ffs_truncate"); 239 240 if (length < 0) 241 return (EINVAL); 242 if (length > fs->fs_maxfilesize) 243 return (EFBIG); 244 #ifdef QUOTA 245 error = getinoquota(ip); 246 if (error) 247 return (error); 248 #endif 249 /* 250 * Historically clients did not have to specify which data 251 * they were truncating. So, if not specified, we assume 252 * traditional behavior, e.g., just the normal data. 253 */ 254 if ((flags & (IO_EXT | IO_NORMAL)) == 0) 255 flags |= IO_NORMAL; 256 if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp)) 257 flags |= IO_SYNC; 258 waitforupdate = (flags & IO_SYNC) != 0 || !DOINGASYNC(vp); 259 /* 260 * If we are truncating the extended-attributes, and cannot 261 * do it with soft updates, then do it slowly here. If we are 262 * truncating both the extended attributes and the file contents 263 * (e.g., the file is being unlinked), then pick it off with 264 * soft updates below. 265 */ 266 allerror = 0; 267 needextclean = 0; 268 softdeptrunc = 0; 269 journaltrunc = DOINGSUJ(vp); 270 journaltrunc = 0; /* XXX temp patch until bug found */ 271 if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0) 272 softdeptrunc = !softdep_slowdown(vp); 273 extblocks = 0; 274 datablocks = DIP(ip, i_blocks); 275 if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) { 276 extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); 277 datablocks -= extblocks; 278 } 279 if ((flags & IO_EXT) && extblocks > 0) { 280 if (length != 0) 281 panic("ffs_truncate: partial trunc of extdata"); 282 if (softdeptrunc || journaltrunc) { 283 if ((flags & IO_NORMAL) == 0) 284 goto extclean; 285 needextclean = 1; 286 } else { 287 if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) 288 return (error); 289 #ifdef QUOTA 290 (void) chkdq(ip, -extblocks, NOCRED, FORCE); 291 #endif 292 vinvalbuf(vp, V_ALT, 0, 0); 293 vn_pages_remove(vp, 294 OFF_TO_IDX(lblktosize(fs, -extblocks)), 0); 295 osize = ip->i_din2->di_extsize; 296 ip->i_din2->di_blocks -= extblocks; 297 ip->i_din2->di_extsize = 0; 298 for (i = 0; i < UFS_NXADDR; i++) { 299 oldblks[i] = ip->i_din2->di_extb[i]; 300 ip->i_din2->di_extb[i] = 0; 301 } 302 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE); 303 if ((error = ffs_update(vp, waitforupdate))) 304 return (error); 305 for (i = 0; i < UFS_NXADDR; i++) { 306 if (oldblks[i] == 0) 307 continue; 308 ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i], 309 sblksize(fs, osize, i), ip->i_number, 310 vp->v_type, NULL, SINGLETON_KEY); 311 } 312 } 313 } 314 if ((flags & IO_NORMAL) == 0) 315 return (0); 316 if (vp->v_type == VLNK && 317 (ip->i_size < vp->v_mount->mnt_maxsymlinklen || 318 datablocks == 0)) { 319 #ifdef INVARIANTS 320 if (length != 0) 321 panic("ffs_truncate: partial truncate of symlink"); 322 #endif 323 bzero(SHORTLINK(ip), (u_int)ip->i_size); 324 ip->i_size = 0; 325 DIP_SET(ip, i_size, 0); 326 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); 327 if (needextclean) 328 goto extclean; 329 return (ffs_update(vp, waitforupdate)); 330 } 331 if (ip->i_size == length) { 332 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); 333 if (needextclean) 334 goto extclean; 335 return (ffs_update(vp, 0)); 336 } 337 if (fs->fs_ronly) 338 panic("ffs_truncate: read-only filesystem"); 339 if (IS_SNAPSHOT(ip)) 340 ffs_snapremove(vp); 341 vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; 342 osize = ip->i_size; 343 /* 344 * Lengthen the size of the file. We must ensure that the 345 * last byte of the file is allocated. Since the smallest 346 * value of osize is 0, length will be at least 1. 347 */ 348 if (osize < length) { 349 vnode_pager_setsize(vp, length); 350 flags |= BA_CLRBUF; 351 error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); 352 if (error) { 353 vnode_pager_setsize(vp, osize); 354 return (error); 355 } 356 ip->i_size = length; 357 DIP_SET(ip, i_size, length); 358 if (bp->b_bufsize == fs->fs_bsize) 359 bp->b_flags |= B_CLUSTEROK; 360 if (flags & IO_SYNC) 361 bwrite(bp); 362 else if (DOINGASYNC(vp)) 363 bdwrite(bp); 364 else 365 bawrite(bp); 366 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); 367 return (ffs_update(vp, waitforupdate)); 368 } 369 /* 370 * Lookup block number for a given offset. Zero length files 371 * have no blocks, so return a blkno of -1. 372 */ 373 lbn = lblkno(fs, length - 1); 374 if (length == 0) { 375 blkno = -1; 376 } else if (lbn < UFS_NDADDR) { 377 blkno = DIP(ip, i_db[lbn]); 378 } else { 379 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, 380 cred, BA_METAONLY, &bp); 381 if (error) 382 return (error); 383 indiroff = (lbn - UFS_NDADDR) % NINDIR(fs); 384 if (I_IS_UFS1(ip)) 385 blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; 386 else 387 blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; 388 /* 389 * If the block number is non-zero, then the indirect block 390 * must have been previously allocated and need not be written. 391 * If the block number is zero, then we may have allocated 392 * the indirect block and hence need to write it out. 393 */ 394 if (blkno != 0) 395 brelse(bp); 396 else if (flags & IO_SYNC) 397 bwrite(bp); 398 else 399 bdwrite(bp); 400 } 401 /* 402 * If the block number at the new end of the file is zero, 403 * then we must allocate it to ensure that the last block of 404 * the file is allocated. Soft updates does not handle this 405 * case, so here we have to clean up the soft updates data 406 * structures describing the allocation past the truncation 407 * point. Finding and deallocating those structures is a lot of 408 * work. Since partial truncation with a hole at the end occurs 409 * rarely, we solve the problem by syncing the file so that it 410 * will have no soft updates data structures left. 411 */ 412 if (blkno == 0 && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) 413 return (error); 414 if (blkno != 0 && DOINGSOFTDEP(vp)) { 415 if (softdeptrunc == 0 && journaltrunc == 0) { 416 /* 417 * If soft updates cannot handle this truncation, 418 * clean up soft dependency data structures and 419 * fall through to the synchronous truncation. 420 */ 421 if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) 422 return (error); 423 } else { 424 flags = IO_NORMAL | (needextclean ? IO_EXT: 0); 425 if (journaltrunc) 426 softdep_journal_freeblocks(ip, cred, length, 427 flags); 428 else 429 softdep_setup_freeblocks(ip, length, flags); 430 ASSERT_VOP_LOCKED(vp, "ffs_truncate1"); 431 if (journaltrunc == 0) { 432 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); 433 error = ffs_update(vp, 0); 434 } 435 return (error); 436 } 437 } 438 /* 439 * Shorten the size of the file. If the last block of the 440 * shortened file is unallocated, we must allocate it. 441 * Additionally, if the file is not being truncated to a 442 * block boundary, the contents of the partial block 443 * following the end of the file must be zero'ed in 444 * case it ever becomes accessible again because of 445 * subsequent file growth. Directories however are not 446 * zero'ed as they should grow back initialized to empty. 447 */ 448 offset = blkoff(fs, length); 449 if (blkno != 0 && offset == 0) { 450 ip->i_size = length; 451 DIP_SET(ip, i_size, length); 452 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); 453 } else { 454 lbn = lblkno(fs, length); 455 flags |= BA_CLRBUF; 456 error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); 457 if (error) 458 return (error); 459 /* 460 * When we are doing soft updates and the UFS_BALLOC 461 * above fills in a direct block hole with a full sized 462 * block that will be truncated down to a fragment below, 463 * we must flush out the block dependency with an FSYNC 464 * so that we do not get a soft updates inconsistency 465 * when we create the fragment below. 466 */ 467 if (DOINGSOFTDEP(vp) && lbn < UFS_NDADDR && 468 fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && 469 (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) 470 return (error); 471 ip->i_size = length; 472 DIP_SET(ip, i_size, length); 473 size = blksize(fs, ip, lbn); 474 if (vp->v_type != VDIR && offset != 0) 475 bzero((char *)bp->b_data + offset, 476 (u_int)(size - offset)); 477 /* Kirk's code has reallocbuf(bp, size, 1) here */ 478 allocbuf(bp, size); 479 if (bp->b_bufsize == fs->fs_bsize) 480 bp->b_flags |= B_CLUSTEROK; 481 if (flags & IO_SYNC) 482 bwrite(bp); 483 else if (DOINGASYNC(vp)) 484 bdwrite(bp); 485 else 486 bawrite(bp); 487 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); 488 } 489 /* 490 * Calculate index into inode's block list of 491 * last direct and indirect blocks (if any) 492 * which we want to keep. Lastblock is -1 when 493 * the file is truncated to 0. 494 */ 495 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 496 lastiblock[SINGLE] = lastblock - UFS_NDADDR; 497 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 498 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 499 nblocks = btodb(fs->fs_bsize); 500 /* 501 * Update file and block pointers on disk before we start freeing 502 * blocks. If we crash before free'ing blocks below, the blocks 503 * will be returned to the free list. lastiblock values are also 504 * normalized to -1 for calls to ffs_indirtrunc below. 505 */ 506 for (level = TRIPLE; level >= SINGLE; level--) { 507 oldblks[UFS_NDADDR + level] = DIP(ip, i_ib[level]); 508 if (lastiblock[level] < 0) { 509 DIP_SET(ip, i_ib[level], 0); 510 lastiblock[level] = -1; 511 } 512 } 513 for (i = 0; i < UFS_NDADDR; i++) { 514 oldblks[i] = DIP(ip, i_db[i]); 515 if (i > lastblock) 516 DIP_SET(ip, i_db[i], 0); 517 } 518 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); 519 allerror = ffs_update(vp, waitforupdate); 520 521 /* 522 * Having written the new inode to disk, save its new configuration 523 * and put back the old block pointers long enough to process them. 524 * Note that we save the new block configuration so we can check it 525 * when we are done. 526 */ 527 for (i = 0; i < UFS_NDADDR; i++) { 528 newblks[i] = DIP(ip, i_db[i]); 529 DIP_SET(ip, i_db[i], oldblks[i]); 530 } 531 for (i = 0; i < UFS_NIADDR; i++) { 532 newblks[UFS_NDADDR + i] = DIP(ip, i_ib[i]); 533 DIP_SET(ip, i_ib[i], oldblks[UFS_NDADDR + i]); 534 } 535 ip->i_size = osize; 536 DIP_SET(ip, i_size, osize); 537 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); 538 539 error = vtruncbuf(vp, length, fs->fs_bsize); 540 if (error && (allerror == 0)) 541 allerror = error; 542 543 /* 544 * Indirect blocks first. 545 */ 546 indir_lbn[SINGLE] = -UFS_NDADDR; 547 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; 548 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; 549 for (level = TRIPLE; level >= SINGLE; level--) { 550 bn = DIP(ip, i_ib[level]); 551 if (bn != 0) { 552 error = ffs_indirtrunc(ip, indir_lbn[level], 553 fsbtodb(fs, bn), lastiblock[level], level, &count); 554 if (error) 555 allerror = error; 556 blocksreleased += count; 557 if (lastiblock[level] < 0) { 558 DIP_SET(ip, i_ib[level], 0); 559 ffs_blkfree(ump, fs, ump->um_devvp, bn, 560 fs->fs_bsize, ip->i_number, 561 vp->v_type, NULL, SINGLETON_KEY); 562 blocksreleased += nblocks; 563 } 564 } 565 if (lastiblock[level] >= 0) 566 goto done; 567 } 568 569 /* 570 * All whole direct blocks or frags. 571 */ 572 key = ffs_blkrelease_start(ump, ump->um_devvp, ip->i_number); 573 for (i = UFS_NDADDR - 1; i > lastblock; i--) { 574 long bsize; 575 576 bn = DIP(ip, i_db[i]); 577 if (bn == 0) 578 continue; 579 DIP_SET(ip, i_db[i], 0); 580 bsize = blksize(fs, ip, i); 581 ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number, 582 vp->v_type, NULL, key); 583 blocksreleased += btodb(bsize); 584 } 585 ffs_blkrelease_finish(ump, key); 586 if (lastblock < 0) 587 goto done; 588 589 /* 590 * Finally, look for a change in size of the 591 * last direct block; release any frags. 592 */ 593 bn = DIP(ip, i_db[lastblock]); 594 if (bn != 0) { 595 long oldspace, newspace; 596 597 /* 598 * Calculate amount of space we're giving 599 * back as old block size minus new block size. 600 */ 601 oldspace = blksize(fs, ip, lastblock); 602 ip->i_size = length; 603 DIP_SET(ip, i_size, length); 604 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); 605 newspace = blksize(fs, ip, lastblock); 606 if (newspace == 0) 607 panic("ffs_truncate: newspace"); 608 if (oldspace - newspace > 0) { 609 /* 610 * Block number of space to be free'd is 611 * the old block # plus the number of frags 612 * required for the storage we're keeping. 613 */ 614 bn += numfrags(fs, newspace); 615 ffs_blkfree(ump, fs, ump->um_devvp, bn, 616 oldspace - newspace, ip->i_number, vp->v_type, 617 NULL, SINGLETON_KEY); 618 blocksreleased += btodb(oldspace - newspace); 619 } 620 } 621 done: 622 #ifdef INVARIANTS 623 for (level = SINGLE; level <= TRIPLE; level++) 624 if (newblks[UFS_NDADDR + level] != DIP(ip, i_ib[level])) 625 panic("ffs_truncate1: level %d newblks %jd != i_ib %jd", 626 level, (intmax_t)newblks[UFS_NDADDR + level], 627 (intmax_t)DIP(ip, i_ib[level])); 628 for (i = 0; i < UFS_NDADDR; i++) 629 if (newblks[i] != DIP(ip, i_db[i])) 630 panic("ffs_truncate2: blkno %d newblks %jd != i_db %jd", 631 i, (intmax_t)newblks[UFS_NDADDR + level], 632 (intmax_t)DIP(ip, i_ib[level])); 633 BO_LOCK(bo); 634 if (length == 0 && 635 (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) && 636 (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) 637 panic("ffs_truncate3: vp = %p, buffers: dirty = %d, clean = %d", 638 vp, bo->bo_dirty.bv_cnt, bo->bo_clean.bv_cnt); 639 BO_UNLOCK(bo); 640 #endif /* INVARIANTS */ 641 /* 642 * Put back the real size. 643 */ 644 ip->i_size = length; 645 DIP_SET(ip, i_size, length); 646 if (DIP(ip, i_blocks) >= blocksreleased) 647 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased); 648 else /* sanity */ 649 DIP_SET(ip, i_blocks, 0); 650 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE); 651 #ifdef QUOTA 652 (void) chkdq(ip, -blocksreleased, NOCRED, FORCE); 653 #endif 654 return (allerror); 655 656 extclean: 657 if (journaltrunc) 658 softdep_journal_freeblocks(ip, cred, length, IO_EXT); 659 else 660 softdep_setup_freeblocks(ip, length, IO_EXT); 661 return (ffs_update(vp, waitforupdate)); 662 } 663 664 /* 665 * Release blocks associated with the inode ip and stored in the indirect 666 * block bn. Blocks are free'd in LIFO order up to (but not including) 667 * lastbn. If level is greater than SINGLE, the block is an indirect block 668 * and recursive calls to indirtrunc must be used to cleanse other indirect 669 * blocks. 670 */ 671 static int 672 ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) 673 struct inode *ip; 674 ufs2_daddr_t lbn, lastbn; 675 ufs2_daddr_t dbn; 676 int level; 677 ufs2_daddr_t *countp; 678 { 679 struct buf *bp; 680 struct fs *fs; 681 struct ufsmount *ump; 682 struct vnode *vp; 683 caddr_t copy = NULL; 684 u_long key; 685 int i, nblocks, error = 0, allerror = 0; 686 ufs2_daddr_t nb, nlbn, last; 687 ufs2_daddr_t blkcount, factor, blocksreleased = 0; 688 ufs1_daddr_t *bap1 = NULL; 689 ufs2_daddr_t *bap2 = NULL; 690 #define BAP(ip, i) (I_IS_UFS1(ip) ? bap1[i] : bap2[i]) 691 692 fs = ITOFS(ip); 693 ump = ITOUMP(ip); 694 695 /* 696 * Calculate index in current block of last 697 * block to be kept. -1 indicates the entire 698 * block so we need not calculate the index. 699 */ 700 factor = lbn_offset(fs, level); 701 last = lastbn; 702 if (lastbn > 0) 703 last /= factor; 704 nblocks = btodb(fs->fs_bsize); 705 /* 706 * Get buffer of block pointers, zero those entries corresponding 707 * to blocks to be free'd, and update on disk copy first. Since 708 * double(triple) indirect before single(double) indirect, calls 709 * to VOP_BMAP() on these blocks will fail. However, we already 710 * have the on-disk address, so we just pass it to bread() instead 711 * of having bread() attempt to calculate it using VOP_BMAP(). 712 */ 713 vp = ITOV(ip); 714 error = ffs_breadz(ump, vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0, 715 NOCRED, 0, NULL, &bp); 716 if (error) { 717 *countp = 0; 718 return (error); 719 } 720 721 if (I_IS_UFS1(ip)) 722 bap1 = (ufs1_daddr_t *)bp->b_data; 723 else 724 bap2 = (ufs2_daddr_t *)bp->b_data; 725 if (lastbn != -1) { 726 copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK); 727 bcopy((caddr_t)bp->b_data, copy, (u_int)fs->fs_bsize); 728 for (i = last + 1; i < NINDIR(fs); i++) 729 if (I_IS_UFS1(ip)) 730 bap1[i] = 0; 731 else 732 bap2[i] = 0; 733 if (DOINGASYNC(vp)) { 734 bdwrite(bp); 735 } else { 736 error = bwrite(bp); 737 if (error) 738 allerror = error; 739 } 740 if (I_IS_UFS1(ip)) 741 bap1 = (ufs1_daddr_t *)copy; 742 else 743 bap2 = (ufs2_daddr_t *)copy; 744 } 745 746 /* 747 * Recursively free totally unused blocks. 748 */ 749 key = ffs_blkrelease_start(ump, ITODEVVP(ip), ip->i_number); 750 for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; 751 i--, nlbn += factor) { 752 nb = BAP(ip, i); 753 if (nb == 0) 754 continue; 755 if (level > SINGLE) { 756 if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 757 (ufs2_daddr_t)-1, level - 1, &blkcount)) != 0) 758 allerror = error; 759 blocksreleased += blkcount; 760 } 761 ffs_blkfree(ump, fs, ITODEVVP(ip), nb, fs->fs_bsize, 762 ip->i_number, vp->v_type, NULL, key); 763 blocksreleased += nblocks; 764 } 765 ffs_blkrelease_finish(ump, key); 766 767 /* 768 * Recursively free last partial block. 769 */ 770 if (level > SINGLE && lastbn >= 0) { 771 last = lastbn % factor; 772 nb = BAP(ip, i); 773 if (nb != 0) { 774 error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 775 last, level - 1, &blkcount); 776 if (error) 777 allerror = error; 778 blocksreleased += blkcount; 779 } 780 } 781 if (copy != NULL) { 782 free(copy, M_TEMP); 783 } else { 784 bp->b_flags |= B_INVAL | B_NOCACHE; 785 brelse(bp); 786 } 787 788 *countp = blocksreleased; 789 return (allerror); 790 } 791 792 int 793 ffs_rdonly(struct inode *ip) 794 { 795 796 return (ITOFS(ip)->fs_ronly != 0); 797 } 798