xref: /freebsd/sys/ufs/ffs/ffs_inode.c (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)ffs_inode.c	8.13 (Berkeley) 4/21/95
32  */
33 
34 #include <sys/cdefs.h>
35 #include "opt_ufs.h"
36 #include "opt_quota.h"
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/bio.h>
41 #include <sys/buf.h>
42 #include <sys/malloc.h>
43 #include <sys/mount.h>
44 #include <sys/proc.h>
45 #include <sys/racct.h>
46 #include <sys/random.h>
47 #include <sys/resourcevar.h>
48 #include <sys/rwlock.h>
49 #include <sys/stat.h>
50 #include <sys/vmmeter.h>
51 #include <sys/vnode.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_object.h>
56 
57 #include <ufs/ufs/extattr.h>
58 #include <ufs/ufs/quota.h>
59 #include <ufs/ufs/ufsmount.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/dir.h>
62 #ifdef UFS_DIRHASH
63 #include <ufs/ufs/dirhash.h>
64 #endif
65 #include <ufs/ufs/ufs_extern.h>
66 
67 #include <ufs/ffs/fs.h>
68 #include <ufs/ffs/ffs_extern.h>
69 
70 static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
71 	    ufs2_daddr_t, int, ufs2_daddr_t *);
72 
73 static void
74 ffs_inode_bwrite(struct vnode *vp, struct buf *bp, int flags)
75 {
76 	if ((flags & IO_SYNC) != 0)
77 		bwrite(bp);
78 	else if (DOINGASYNC(vp))
79 		bdwrite(bp);
80 	else
81 		bawrite(bp);
82 }
83 
84 /*
85  * Update the access, modified, and inode change times as specified by the
86  * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively.  Write the inode
87  * to disk if the IN_MODIFIED flag is set (it may be set initially, or by
88  * the timestamp update).  The IN_LAZYMOD flag is set to force a write
89  * later if not now.  The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs
90  * is currently being suspended (or is suspended) and vnode has been accessed.
91  * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to
92  * reflect the presumably successful write, and if waitfor is set, then wait
93  * for the write to complete.
94  */
95 int
96 ffs_update(struct vnode *vp, int waitfor)
97 {
98 	struct fs *fs;
99 	struct buf *bp;
100 	struct inode *ip;
101 	daddr_t bn;
102 	int flags, error;
103 
104 	ASSERT_VOP_ELOCKED(vp, "ffs_update");
105 	ufs_itimes(vp);
106 	ip = VTOI(vp);
107 	if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0)
108 		return (0);
109 	ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
110 	/*
111 	 * The IN_SIZEMOD and IN_IBLKDATA flags indicate changes to the
112 	 * file size and block pointer fields in the inode. When these
113 	 * fields have been changed, the fsync() and fsyncdata() system
114 	 * calls must write the inode to ensure their semantics that the
115 	 * file is on stable store.
116 	 *
117 	 * The IN_SIZEMOD and IN_IBLKDATA flags cannot be cleared until
118 	 * a synchronous write of the inode is done. If they are cleared
119 	 * on an asynchronous write, then the inode may not yet have been
120 	 * written to the disk when an fsync() or fsyncdata() call is done.
121 	 * Absent these flags, these calls would not know that they needed
122 	 * to write the inode. Thus, these flags only can be cleared on
123 	 * synchronous writes of the inode. Since the inode will be locked
124 	 * for the duration of the I/O that writes it to disk, no fsync()
125 	 * or fsyncdata() will be able to run before the on-disk inode
126 	 * is complete.
127 	 */
128 	if (waitfor)
129 		ip->i_flag &= ~(IN_SIZEMOD | IN_IBLKDATA);
130 	fs = ITOFS(ip);
131 	if (fs->fs_ronly)
132 		return (0);
133 	/*
134 	 * If we are updating a snapshot and another process is currently
135 	 * writing the buffer containing the inode for this snapshot then
136 	 * a deadlock can occur when it tries to check the snapshot to see
137 	 * if that block needs to be copied. Thus when updating a snapshot
138 	 * we check to see if the buffer is already locked, and if it is
139 	 * we drop the snapshot lock until the buffer has been written
140 	 * and is available to us. We have to grab a reference to the
141 	 * snapshot vnode to prevent it from being removed while we are
142 	 * waiting for the buffer.
143 	 */
144 loop:
145 	flags = 0;
146 	if (IS_SNAPSHOT(ip))
147 		flags = GB_LOCK_NOWAIT;
148 	bn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number));
149 	error = ffs_breadz(VFSTOUFS(vp->v_mount), ITODEVVP(ip), bn, bn,
150 	     (int) fs->fs_bsize, NULL, NULL, 0, NOCRED, flags, NULL, &bp);
151 	if (error != 0) {
152 		/*
153 		 * If EBUSY was returned without GB_LOCK_NOWAIT (which
154 		 * requests trylock for buffer lock), it is for some
155 		 * other reason and we should not handle it specially.
156 		 */
157 		if (error != EBUSY || (flags & GB_LOCK_NOWAIT) == 0)
158 			return (error);
159 
160 		/*
161 		 * Wait for our inode block to become available.
162 		 *
163 		 * Hold a reference to the vnode to protect against
164 		 * ffs_snapgone(). Since we hold a reference, it can only
165 		 * get reclaimed (VIRF_DOOMED flag) in a forcible downgrade
166 		 * or unmount. For an unmount, the entire filesystem will be
167 		 * gone, so we cannot attempt to touch anything associated
168 		 * with it while the vnode is unlocked; all we can do is
169 		 * pause briefly and try again. If when we relock the vnode
170 		 * we discover that it has been reclaimed, updating it is no
171 		 * longer necessary and we can just return an error.
172 		 */
173 		vref(vp);
174 		VOP_UNLOCK(vp);
175 		pause("ffsupd", 1);
176 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
177 		vrele(vp);
178 		if (!IS_UFS(vp))
179 			return (ENOENT);
180 
181 		/*
182 		 * Recalculate flags, because the vnode was relocked and
183 		 * could no longer be a snapshot.
184 		 */
185 		goto loop;
186 	}
187 	if (DOINGSOFTDEP(vp))
188 		softdep_update_inodeblock(ip, bp, waitfor);
189 	else if (ip->i_effnlink != ip->i_nlink)
190 		panic("ffs_update: bad link cnt");
191 	if (I_IS_UFS1(ip)) {
192 		*((struct ufs1_dinode *)bp->b_data +
193 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
194 		/*
195 		 * XXX: FIX? The entropy here is desirable,
196 		 * but the harvesting may be expensive
197 		 */
198 		random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), RANDOM_FS_ATIME);
199 	} else {
200 		ffs_update_dinode_ckhash(fs, ip->i_din2);
201 		*((struct ufs2_dinode *)bp->b_data +
202 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
203 		/*
204 		 * XXX: FIX? The entropy here is desirable,
205 		 * but the harvesting may be expensive
206 		 */
207 		random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME);
208 	}
209 	if (waitfor) {
210 		error = bwrite(bp);
211 		if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
212 			error = 0;
213 	} else if (vm_page_count_severe() || buf_dirty_count_severe()) {
214 		bawrite(bp);
215 		error = 0;
216 	} else {
217 		if (bp->b_bufsize == fs->fs_bsize)
218 			bp->b_flags |= B_CLUSTEROK;
219 		bdwrite(bp);
220 		error = 0;
221 	}
222 	return (error);
223 }
224 
225 #define	SINGLE	0	/* index of single indirect block */
226 #define	DOUBLE	1	/* index of double indirect block */
227 #define	TRIPLE	2	/* index of triple indirect block */
228 /*
229  * Truncate the inode ip to at most length size, freeing the
230  * disk blocks.
231  */
232 int
233 ffs_truncate(struct vnode *vp,
234 	off_t length,
235 	int flags,
236 	struct ucred *cred)
237 {
238 	struct inode *ip;
239 	ufs2_daddr_t bn, lbn, lastblock, lastiblock[UFS_NIADDR];
240 	ufs2_daddr_t indir_lbn[UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR];
241 	ufs2_daddr_t newblks[UFS_NDADDR + UFS_NIADDR];
242 	ufs2_daddr_t count, blocksreleased = 0, blkno;
243 	struct bufobj *bo __diagused;
244 	struct fs *fs;
245 	struct buf *bp;
246 	struct ufsmount *ump;
247 	int softdeptrunc, journaltrunc;
248 	int needextclean, extblocks;
249 	int offset, size, level, nblocks;
250 	int i, error, allerror, indiroff, waitforupdate;
251 	uint64_t key;
252 	off_t osize;
253 
254 	ip = VTOI(vp);
255 	ump = VFSTOUFS(vp->v_mount);
256 	fs = ump->um_fs;
257 	bo = &vp->v_bufobj;
258 
259 	ASSERT_VOP_LOCKED(vp, "ffs_truncate");
260 
261 	if (length < 0)
262 		return (EINVAL);
263 	if (length > fs->fs_maxfilesize)
264 		return (EFBIG);
265 #ifdef QUOTA
266 	error = getinoquota(ip);
267 	if (error)
268 		return (error);
269 #endif
270 	/*
271 	 * Historically clients did not have to specify which data
272 	 * they were truncating. So, if not specified, we assume
273 	 * traditional behavior, e.g., just the normal data.
274 	 */
275 	if ((flags & (IO_EXT | IO_NORMAL)) == 0)
276 		flags |= IO_NORMAL;
277 	if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp))
278 		flags |= IO_SYNC;
279 	waitforupdate = (flags & IO_SYNC) != 0 || !DOINGASYNC(vp);
280 	/*
281 	 * If we are truncating the extended-attributes, and cannot
282 	 * do it with soft updates, then do it slowly here. If we are
283 	 * truncating both the extended attributes and the file contents
284 	 * (e.g., the file is being unlinked), then pick it off with
285 	 * soft updates below.
286 	 */
287 	allerror = 0;
288 	needextclean = 0;
289 	softdeptrunc = 0;
290 	journaltrunc = DOINGSUJ(vp);
291 	journaltrunc = 0;	/* XXX temp patch until bug found */
292 	if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0)
293 		softdeptrunc = !softdep_slowdown(vp);
294 	extblocks = 0;
295 	if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) {
296 		extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
297 	}
298 	if ((flags & IO_EXT) && extblocks > 0) {
299 		if (length != 0)
300 			panic("ffs_truncate: partial trunc of extdata");
301 		if (softdeptrunc || journaltrunc) {
302 			if ((flags & IO_NORMAL) == 0)
303 				goto extclean;
304 			needextclean = 1;
305 		} else {
306 			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
307 				return (error);
308 #ifdef QUOTA
309 			(void) chkdq(ip, -extblocks, NOCRED, FORCE);
310 #endif
311 			vinvalbuf(vp, V_ALT, 0, 0);
312 			vn_pages_remove(vp,
313 			    OFF_TO_IDX(lblktosize(fs, -extblocks)), 0);
314 			osize = ip->i_din2->di_extsize;
315 			ip->i_din2->di_blocks -= extblocks;
316 			ip->i_din2->di_extsize = 0;
317 			for (i = 0; i < UFS_NXADDR; i++) {
318 				oldblks[i] = ip->i_din2->di_extb[i];
319 				ip->i_din2->di_extb[i] = 0;
320 			}
321 			UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
322 			if ((error = ffs_update(vp, waitforupdate)))
323 				return (error);
324 			for (i = 0; i < UFS_NXADDR; i++) {
325 				if (oldblks[i] == 0)
326 					continue;
327 				ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i],
328 				    sblksize(fs, osize, i), ip->i_number,
329 				    vp->v_type, NULL, SINGLETON_KEY);
330 			}
331 		}
332 	}
333 	if ((flags & IO_NORMAL) == 0)
334 		return (0);
335 	if (vp->v_type == VLNK && ip->i_size < ump->um_maxsymlinklen) {
336 #ifdef INVARIANTS
337 		if (length != 0)
338 			panic("ffs_truncate: partial truncate of symlink");
339 #endif
340 		bzero(DIP(ip, i_shortlink), (uint64_t)ip->i_size);
341 		ip->i_size = 0;
342 		DIP_SET(ip, i_size, 0);
343 		UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
344 		if (needextclean)
345 			goto extclean;
346 		return (ffs_update(vp, waitforupdate));
347 	}
348 	if (ip->i_size == length) {
349 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
350 		if (needextclean)
351 			goto extclean;
352 		return (ffs_update(vp, 0));
353 	}
354 	if (fs->fs_ronly)
355 		panic("ffs_truncate: read-only filesystem");
356 	if (IS_SNAPSHOT(ip))
357 		ffs_snapremove(vp);
358 	cluster_init_vn(&ip->i_clusterw);
359 	osize = ip->i_size;
360 	/*
361 	 * Lengthen the size of the file. We must ensure that the
362 	 * last byte of the file is allocated. Since the smallest
363 	 * value of osize is 0, length will be at least 1.
364 	 */
365 	if (osize < length) {
366 		vnode_pager_setsize(vp, length);
367 		flags |= BA_CLRBUF;
368 		error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
369 		if (error) {
370 			vnode_pager_setsize(vp, osize);
371 			return (error);
372 		}
373 		ip->i_size = length;
374 		DIP_SET(ip, i_size, length);
375 		if (bp->b_bufsize == fs->fs_bsize)
376 			bp->b_flags |= B_CLUSTEROK;
377 		ffs_inode_bwrite(vp, bp, flags);
378 		UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
379 		return (ffs_update(vp, waitforupdate));
380 	}
381 	/*
382 	 * Lookup block number for a given offset. Zero length files
383 	 * have no blocks, so return a blkno of -1.
384 	 */
385 	lbn = lblkno(fs, length - 1);
386 	if (length == 0) {
387 		blkno = -1;
388 	} else if (lbn < UFS_NDADDR) {
389 		blkno = DIP(ip, i_db[lbn]);
390 	} else {
391 		error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize,
392 		    cred, BA_METAONLY, &bp);
393 		if (error)
394 			return (error);
395 		indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
396 		if (I_IS_UFS1(ip))
397 			blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff];
398 		else
399 			blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff];
400 		/*
401 		 * If the block number is non-zero, then the indirect block
402 		 * must have been previously allocated and need not be written.
403 		 * If the block number is zero, then we may have allocated
404 		 * the indirect block and hence need to write it out.
405 		 */
406 		if (blkno != 0)
407 			brelse(bp);
408 		else if (flags & IO_SYNC)
409 			bwrite(bp);
410 		else
411 			bdwrite(bp);
412 	}
413 	/*
414 	 * If the block number at the new end of the file is zero,
415 	 * then we must allocate it to ensure that the last block of
416 	 * the file is allocated. Soft updates does not handle this
417 	 * case, so here we have to clean up the soft updates data
418 	 * structures describing the allocation past the truncation
419 	 * point. Finding and deallocating those structures is a lot of
420 	 * work. Since partial truncation with a hole at the end occurs
421 	 * rarely, we solve the problem by syncing the file so that it
422 	 * will have no soft updates data structures left.
423 	 */
424 	if (blkno == 0 && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
425 		return (error);
426 	if (blkno != 0 && DOINGSOFTDEP(vp)) {
427 		if (softdeptrunc == 0 && journaltrunc == 0) {
428 			/*
429 			 * If soft updates cannot handle this truncation,
430 			 * clean up soft dependency data structures and
431 			 * fall through to the synchronous truncation.
432 			 */
433 			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
434 				return (error);
435 		} else {
436 			flags = IO_NORMAL | (needextclean ? IO_EXT: 0);
437 			if (journaltrunc)
438 				softdep_journal_freeblocks(ip, cred, length,
439 				    flags);
440 			else
441 				softdep_setup_freeblocks(ip, length, flags);
442 			ASSERT_VOP_LOCKED(vp, "ffs_truncate1");
443 			if (journaltrunc == 0) {
444 				UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
445 				error = ffs_update(vp, 0);
446 			}
447 			return (error);
448 		}
449 	}
450 	/*
451 	 * Shorten the size of the file. If the last block of the
452 	 * shortened file is unallocated, we must allocate it.
453 	 * Additionally, if the file is not being truncated to a
454 	 * block boundary, the contents of the partial block
455 	 * following the end of the file must be zero'ed in
456 	 * case it ever becomes accessible again because of
457 	 * subsequent file growth. Directories however are not
458 	 * zero'ed as they should grow back initialized to empty.
459 	 */
460 	offset = blkoff(fs, length);
461 	if (blkno != 0 && offset == 0) {
462 		ip->i_size = length;
463 		DIP_SET(ip, i_size, length);
464 		UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
465 #ifdef UFS_DIRHASH
466 		if (vp->v_type == VDIR && ip->i_dirhash != NULL)
467 			ufsdirhash_dirtrunc(ip, length);
468 #endif
469 	} else {
470 		lbn = lblkno(fs, length);
471 		flags |= BA_CLRBUF;
472 		error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
473 		if (error)
474 			return (error);
475 		ffs_inode_bwrite(vp, bp, flags);
476 
477 		/*
478 		 * When we are doing soft updates and the UFS_BALLOC
479 		 * above fills in a direct block hole with a full sized
480 		 * block that will be truncated down to a fragment below,
481 		 * we must flush out the block dependency with an FSYNC
482 		 * so that we do not get a soft updates inconsistency
483 		 * when we create the fragment below.
484 		 */
485 		if (DOINGSOFTDEP(vp) && lbn < UFS_NDADDR &&
486 		    fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
487 		    (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
488 			return (error);
489 
490 		error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
491 		if (error)
492 			return (error);
493 		ip->i_size = length;
494 		DIP_SET(ip, i_size, length);
495 #ifdef UFS_DIRHASH
496 		if (vp->v_type == VDIR && ip->i_dirhash != NULL)
497 			ufsdirhash_dirtrunc(ip, length);
498 #endif
499 		size = blksize(fs, ip, lbn);
500 		if (vp->v_type != VDIR && offset != 0)
501 			bzero((char *)bp->b_data + offset,
502 			    (uint64_t)(size - offset));
503 		/* Kirk's code has reallocbuf(bp, size, 1) here */
504 		allocbuf(bp, size);
505 		if (bp->b_bufsize == fs->fs_bsize)
506 			bp->b_flags |= B_CLUSTEROK;
507 		ffs_inode_bwrite(vp, bp, flags);
508 		UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
509 	}
510 	/*
511 	 * Calculate index into inode's block list of
512 	 * last direct and indirect blocks (if any)
513 	 * which we want to keep.  Lastblock is -1 when
514 	 * the file is truncated to 0.
515 	 */
516 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
517 	lastiblock[SINGLE] = lastblock - UFS_NDADDR;
518 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
519 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
520 	nblocks = btodb(fs->fs_bsize);
521 	/*
522 	 * Update file and block pointers on disk before we start freeing
523 	 * blocks.  If we crash before free'ing blocks below, the blocks
524 	 * will be returned to the free list.  lastiblock values are also
525 	 * normalized to -1 for calls to ffs_indirtrunc below.
526 	 */
527 	for (level = TRIPLE; level >= SINGLE; level--) {
528 		oldblks[UFS_NDADDR + level] = DIP(ip, i_ib[level]);
529 		if (lastiblock[level] < 0) {
530 			DIP_SET(ip, i_ib[level], 0);
531 			lastiblock[level] = -1;
532 		}
533 	}
534 	for (i = 0; i < UFS_NDADDR; i++) {
535 		oldblks[i] = DIP(ip, i_db[i]);
536 		if (i > lastblock)
537 			DIP_SET(ip, i_db[i], 0);
538 	}
539 	UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
540 	allerror = ffs_update(vp, waitforupdate);
541 
542 	/*
543 	 * Having written the new inode to disk, save its new configuration
544 	 * and put back the old block pointers long enough to process them.
545 	 * Note that we save the new block configuration so we can check it
546 	 * when we are done.
547 	 */
548 	for (i = 0; i < UFS_NDADDR; i++) {
549 		newblks[i] = DIP(ip, i_db[i]);
550 		DIP_SET(ip, i_db[i], oldblks[i]);
551 	}
552 	for (i = 0; i < UFS_NIADDR; i++) {
553 		newblks[UFS_NDADDR + i] = DIP(ip, i_ib[i]);
554 		DIP_SET(ip, i_ib[i], oldblks[UFS_NDADDR + i]);
555 	}
556 	ip->i_size = osize;
557 	DIP_SET(ip, i_size, osize);
558 	UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
559 
560 	error = vtruncbuf(vp, length, fs->fs_bsize);
561 	if (error && (allerror == 0))
562 		allerror = error;
563 
564 	/*
565 	 * Indirect blocks first.
566 	 */
567 	indir_lbn[SINGLE] = -UFS_NDADDR;
568 	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
569 	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
570 	for (level = TRIPLE; level >= SINGLE; level--) {
571 		bn = DIP(ip, i_ib[level]);
572 		if (bn != 0) {
573 			error = ffs_indirtrunc(ip, indir_lbn[level],
574 			    fsbtodb(fs, bn), lastiblock[level], level, &count);
575 			if (error)
576 				allerror = error;
577 			blocksreleased += count;
578 			if (lastiblock[level] < 0) {
579 				DIP_SET(ip, i_ib[level], 0);
580 				ffs_blkfree(ump, fs, ump->um_devvp, bn,
581 				    fs->fs_bsize, ip->i_number,
582 				    vp->v_type, NULL, SINGLETON_KEY);
583 				blocksreleased += nblocks;
584 			}
585 		}
586 		if (lastiblock[level] >= 0)
587 			goto done;
588 	}
589 
590 	/*
591 	 * All whole direct blocks or frags.
592 	 */
593 	key = ffs_blkrelease_start(ump, ump->um_devvp, ip->i_number);
594 	for (i = UFS_NDADDR - 1; i > lastblock; i--) {
595 		long bsize;
596 
597 		bn = DIP(ip, i_db[i]);
598 		if (bn == 0)
599 			continue;
600 		DIP_SET(ip, i_db[i], 0);
601 		bsize = blksize(fs, ip, i);
602 		ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number,
603 		    vp->v_type, NULL, key);
604 		blocksreleased += btodb(bsize);
605 	}
606 	ffs_blkrelease_finish(ump, key);
607 	if (lastblock < 0)
608 		goto done;
609 
610 	/*
611 	 * Finally, look for a change in size of the
612 	 * last direct block; release any frags.
613 	 */
614 	bn = DIP(ip, i_db[lastblock]);
615 	if (bn != 0) {
616 		long oldspace, newspace;
617 
618 		/*
619 		 * Calculate amount of space we're giving
620 		 * back as old block size minus new block size.
621 		 */
622 		oldspace = blksize(fs, ip, lastblock);
623 		ip->i_size = length;
624 		DIP_SET(ip, i_size, length);
625 		UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
626 		newspace = blksize(fs, ip, lastblock);
627 		if (newspace == 0)
628 			panic("ffs_truncate: newspace");
629 		if (oldspace - newspace > 0) {
630 			/*
631 			 * Block number of space to be free'd is
632 			 * the old block # plus the number of frags
633 			 * required for the storage we're keeping.
634 			 */
635 			bn += numfrags(fs, newspace);
636 			ffs_blkfree(ump, fs, ump->um_devvp, bn,
637 			   oldspace - newspace, ip->i_number, vp->v_type,
638 			   NULL, SINGLETON_KEY);
639 			blocksreleased += btodb(oldspace - newspace);
640 		}
641 	}
642 done:
643 #ifdef INVARIANTS
644 	for (level = SINGLE; level <= TRIPLE; level++)
645 		if (newblks[UFS_NDADDR + level] != DIP(ip, i_ib[level]))
646 			panic("ffs_truncate1: level %d newblks %jd != i_ib %jd",
647 			    level, (intmax_t)newblks[UFS_NDADDR + level],
648 			    (intmax_t)DIP(ip, i_ib[level]));
649 	for (i = 0; i < UFS_NDADDR; i++)
650 		if (newblks[i] != DIP(ip, i_db[i]))
651 			panic("ffs_truncate2: blkno %d newblks %jd != i_db %jd",
652 			    i, (intmax_t)newblks[UFS_NDADDR + level],
653 			    (intmax_t)DIP(ip, i_ib[level]));
654 	BO_LOCK(bo);
655 	if (length == 0 &&
656 	    (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) &&
657 	    (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0))
658 		panic("ffs_truncate3: vp = %p, buffers: dirty = %d, clean = %d",
659 			vp, bo->bo_dirty.bv_cnt, bo->bo_clean.bv_cnt);
660 	BO_UNLOCK(bo);
661 #endif /* INVARIANTS */
662 	/*
663 	 * Put back the real size.
664 	 */
665 	ip->i_size = length;
666 	DIP_SET(ip, i_size, length);
667 	if (DIP(ip, i_blocks) >= blocksreleased)
668 		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased);
669 	else	/* sanity */
670 		DIP_SET(ip, i_blocks, 0);
671 	UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
672 #ifdef QUOTA
673 	(void) chkdq(ip, -blocksreleased, NOCRED, FORCE);
674 #endif
675 	return (allerror);
676 
677 extclean:
678 	if (journaltrunc)
679 		softdep_journal_freeblocks(ip, cred, length, IO_EXT);
680 	else
681 		softdep_setup_freeblocks(ip, length, IO_EXT);
682 	return (ffs_update(vp, waitforupdate));
683 }
684 
685 /*
686  * Release blocks associated with the inode ip and stored in the indirect
687  * block bn.  Blocks are free'd in LIFO order up to (but not including)
688  * lastbn.  If level is greater than SINGLE, the block is an indirect block
689  * and recursive calls to indirtrunc must be used to cleanse other indirect
690  * blocks.
691  */
692 static int
693 ffs_indirtrunc(struct inode *ip,
694 	ufs2_daddr_t lbn,
695 	ufs2_daddr_t dbn,
696 	ufs2_daddr_t lastbn,
697 	int level,
698 	ufs2_daddr_t *countp)
699 {
700 	struct buf *bp;
701 	struct fs *fs;
702 	struct ufsmount *ump;
703 	struct vnode *vp;
704 	caddr_t copy = NULL;
705 	uint64_t key;
706 	int i, nblocks, error = 0, allerror = 0;
707 	ufs2_daddr_t nb, nlbn, last;
708 	ufs2_daddr_t blkcount, factor, blocksreleased = 0;
709 	ufs1_daddr_t *bap1 = NULL;
710 	ufs2_daddr_t *bap2 = NULL;
711 #define BAP(ip, i) (I_IS_UFS1(ip) ? bap1[i] : bap2[i])
712 
713 	fs = ITOFS(ip);
714 	ump = ITOUMP(ip);
715 
716 	/*
717 	 * Calculate index in current block of last
718 	 * block to be kept.  -1 indicates the entire
719 	 * block so we need not calculate the index.
720 	 */
721 	factor = lbn_offset(fs, level);
722 	last = lastbn;
723 	if (lastbn > 0)
724 		last /= factor;
725 	nblocks = btodb(fs->fs_bsize);
726 	/*
727 	 * Get buffer of block pointers, zero those entries corresponding
728 	 * to blocks to be free'd, and update on disk copy first.  Since
729 	 * double(triple) indirect before single(double) indirect, calls
730 	 * to VOP_BMAP() on these blocks will fail.  However, we already
731 	 * have the on-disk address, so we just pass it to bread() instead
732 	 * of having bread() attempt to calculate it using VOP_BMAP().
733 	 */
734 	vp = ITOV(ip);
735 	error = ffs_breadz(ump, vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0,
736 	    NOCRED, 0, NULL, &bp);
737 	if (error) {
738 		*countp = 0;
739 		return (error);
740 	}
741 
742 	if (I_IS_UFS1(ip))
743 		bap1 = (ufs1_daddr_t *)bp->b_data;
744 	else
745 		bap2 = (ufs2_daddr_t *)bp->b_data;
746 	if (lastbn != -1) {
747 		copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK);
748 		bcopy((caddr_t)bp->b_data, copy, (uint64_t)fs->fs_bsize);
749 		for (i = last + 1; i < NINDIR(fs); i++)
750 			if (I_IS_UFS1(ip))
751 				bap1[i] = 0;
752 			else
753 				bap2[i] = 0;
754 		if (DOINGASYNC(vp)) {
755 			bdwrite(bp);
756 		} else {
757 			error = bwrite(bp);
758 			if (error)
759 				allerror = error;
760 		}
761 		if (I_IS_UFS1(ip))
762 			bap1 = (ufs1_daddr_t *)copy;
763 		else
764 			bap2 = (ufs2_daddr_t *)copy;
765 	}
766 
767 	/*
768 	 * Recursively free totally unused blocks.
769 	 */
770 	key = ffs_blkrelease_start(ump, ITODEVVP(ip), ip->i_number);
771 	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
772 	    i--, nlbn += factor) {
773 		nb = BAP(ip, i);
774 		if (nb == 0)
775 			continue;
776 		if (level > SINGLE) {
777 			if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
778 			    (ufs2_daddr_t)-1, level - 1, &blkcount)) != 0)
779 				allerror = error;
780 			blocksreleased += blkcount;
781 		}
782 		ffs_blkfree(ump, fs, ITODEVVP(ip), nb, fs->fs_bsize,
783 		    ip->i_number, vp->v_type, NULL, key);
784 		blocksreleased += nblocks;
785 	}
786 	ffs_blkrelease_finish(ump, key);
787 
788 	/*
789 	 * Recursively free last partial block.
790 	 */
791 	if (level > SINGLE && lastbn >= 0) {
792 		last = lastbn % factor;
793 		nb = BAP(ip, i);
794 		if (nb != 0) {
795 			error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
796 			    last, level - 1, &blkcount);
797 			if (error)
798 				allerror = error;
799 			blocksreleased += blkcount;
800 		}
801 	}
802 	if (copy != NULL) {
803 		free(copy, M_TEMP);
804 	} else {
805 		bp->b_flags |= B_INVAL | B_NOCACHE;
806 		brelse(bp);
807 	}
808 
809 	*countp = blocksreleased;
810 	return (allerror);
811 }
812 
813 int
814 ffs_rdonly(struct inode *ip)
815 {
816 
817 	return (ITOFS(ip)->fs_ronly != 0);
818 }
819