1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #include "opt_ufs.h"
34 #include "opt_quota.h"
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bio.h>
39 #include <sys/buf.h>
40 #include <sys/malloc.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/racct.h>
44 #include <sys/random.h>
45 #include <sys/resourcevar.h>
46 #include <sys/rwlock.h>
47 #include <sys/stat.h>
48 #include <sys/vmmeter.h>
49 #include <sys/vnode.h>
50
51 #include <vm/vm.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_object.h>
54
55 #include <ufs/ufs/extattr.h>
56 #include <ufs/ufs/quota.h>
57 #include <ufs/ufs/ufsmount.h>
58 #include <ufs/ufs/inode.h>
59 #include <ufs/ufs/dir.h>
60 #ifdef UFS_DIRHASH
61 #include <ufs/ufs/dirhash.h>
62 #endif
63 #include <ufs/ufs/ufs_extern.h>
64
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
67
68 static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
69 ufs2_daddr_t, int, ufs2_daddr_t *);
70
71 static void
ffs_inode_bwrite(struct vnode * vp,struct buf * bp,int flags)72 ffs_inode_bwrite(struct vnode *vp, struct buf *bp, int flags)
73 {
74 if ((flags & IO_SYNC) != 0)
75 bwrite(bp);
76 else if (DOINGASYNC(vp))
77 bdwrite(bp);
78 else
79 bawrite(bp);
80 }
81
82 /*
83 * Update the access, modified, and inode change times as specified by the
84 * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode
85 * to disk if the IN_MODIFIED flag is set (it may be set initially, or by
86 * the timestamp update). The IN_LAZYMOD flag is set to force a write
87 * later if not now. The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs
88 * is currently being suspended (or is suspended) and vnode has been accessed.
89 * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to
90 * reflect the presumably successful write, and if waitfor is set, then wait
91 * for the write to complete.
92 */
93 int
ffs_update(struct vnode * vp,int waitfor)94 ffs_update(struct vnode *vp, int waitfor)
95 {
96 struct fs *fs;
97 struct buf *bp;
98 struct inode *ip;
99 daddr_t bn;
100 int flags, error;
101
102 ASSERT_VOP_ELOCKED(vp, "ffs_update");
103 ufs_itimes(vp);
104 ip = VTOI(vp);
105 if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0)
106 return (0);
107 ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
108 /*
109 * The IN_SIZEMOD and IN_IBLKDATA flags indicate changes to the
110 * file size and block pointer fields in the inode. When these
111 * fields have been changed, the fsync() and fsyncdata() system
112 * calls must write the inode to ensure their semantics that the
113 * file is on stable store.
114 *
115 * The IN_SIZEMOD and IN_IBLKDATA flags cannot be cleared until
116 * a synchronous write of the inode is done. If they are cleared
117 * on an asynchronous write, then the inode may not yet have been
118 * written to the disk when an fsync() or fsyncdata() call is done.
119 * Absent these flags, these calls would not know that they needed
120 * to write the inode. Thus, these flags only can be cleared on
121 * synchronous writes of the inode. Since the inode will be locked
122 * for the duration of the I/O that writes it to disk, no fsync()
123 * or fsyncdata() will be able to run before the on-disk inode
124 * is complete.
125 */
126 if (waitfor)
127 ip->i_flag &= ~(IN_SIZEMOD | IN_IBLKDATA);
128 fs = ITOFS(ip);
129 if (fs->fs_ronly)
130 return (0);
131 /*
132 * If we are updating a snapshot and another process is currently
133 * writing the buffer containing the inode for this snapshot then
134 * a deadlock can occur when it tries to check the snapshot to see
135 * if that block needs to be copied. Thus when updating a snapshot
136 * we check to see if the buffer is already locked, and if it is
137 * we drop the snapshot lock until the buffer has been written
138 * and is available to us. We have to grab a reference to the
139 * snapshot vnode to prevent it from being removed while we are
140 * waiting for the buffer.
141 */
142 loop:
143 flags = 0;
144 if (IS_SNAPSHOT(ip))
145 flags = GB_LOCK_NOWAIT;
146 bn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number));
147 error = ffs_breadz(VFSTOUFS(vp->v_mount), ITODEVVP(ip), bn, bn,
148 (int) fs->fs_bsize, NULL, NULL, 0, NOCRED, flags, NULL, &bp);
149 if (error != 0) {
150 /*
151 * If EBUSY was returned without GB_LOCK_NOWAIT (which
152 * requests trylock for buffer lock), it is for some
153 * other reason and we should not handle it specially.
154 */
155 if (error != EBUSY || (flags & GB_LOCK_NOWAIT) == 0)
156 return (error);
157
158 /*
159 * Wait for our inode block to become available.
160 *
161 * Hold a reference to the vnode to protect against
162 * ffs_snapgone(). Since we hold a reference, it can only
163 * get reclaimed (VIRF_DOOMED flag) in a forcible downgrade
164 * or unmount. For an unmount, the entire filesystem will be
165 * gone, so we cannot attempt to touch anything associated
166 * with it while the vnode is unlocked; all we can do is
167 * pause briefly and try again. If when we relock the vnode
168 * we discover that it has been reclaimed, updating it is no
169 * longer necessary and we can just return an error.
170 */
171 vref(vp);
172 VOP_UNLOCK(vp);
173 pause("ffsupd", 1);
174 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
175 vrele(vp);
176 if (!IS_UFS(vp))
177 return (ENOENT);
178
179 /*
180 * Recalculate flags, because the vnode was relocked and
181 * could no longer be a snapshot.
182 */
183 goto loop;
184 }
185 if (DOINGSOFTDEP(vp))
186 softdep_update_inodeblock(ip, bp, waitfor);
187 else if (ip->i_effnlink != ip->i_nlink)
188 panic("ffs_update: bad link cnt");
189 if (I_IS_UFS1(ip)) {
190 *((struct ufs1_dinode *)bp->b_data +
191 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
192 /*
193 * XXX: FIX? The entropy here is desirable,
194 * but the harvesting may be expensive
195 */
196 random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), RANDOM_FS_ATIME);
197 } else {
198 ffs_update_dinode_ckhash(fs, ip->i_din2);
199 *((struct ufs2_dinode *)bp->b_data +
200 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
201 /*
202 * XXX: FIX? The entropy here is desirable,
203 * but the harvesting may be expensive
204 */
205 random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME);
206 }
207 if (waitfor) {
208 error = bwrite(bp);
209 if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
210 error = 0;
211 } else if (vm_page_count_severe() || buf_dirty_count_severe()) {
212 bawrite(bp);
213 error = 0;
214 } else {
215 if (bp->b_bufsize == fs->fs_bsize)
216 bp->b_flags |= B_CLUSTEROK;
217 bdwrite(bp);
218 error = 0;
219 }
220 return (error);
221 }
222
223 #define SINGLE 0 /* index of single indirect block */
224 #define DOUBLE 1 /* index of double indirect block */
225 #define TRIPLE 2 /* index of triple indirect block */
226 /*
227 * Truncate the inode ip to at most length size, freeing the
228 * disk blocks.
229 */
230 int
ffs_truncate(struct vnode * vp,off_t length,int flags,struct ucred * cred)231 ffs_truncate(struct vnode *vp,
232 off_t length,
233 int flags,
234 struct ucred *cred)
235 {
236 struct inode *ip;
237 ufs2_daddr_t bn, lbn, lastblock, lastiblock[UFS_NIADDR];
238 ufs2_daddr_t indir_lbn[UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR];
239 #ifdef INVARIANTS
240 ufs2_daddr_t newblks[UFS_NDADDR + UFS_NIADDR];
241 #endif
242 ufs2_daddr_t count, blocksreleased = 0, blkno;
243 struct bufobj *bo __diagused;
244 struct fs *fs;
245 struct buf *bp;
246 struct ufsmount *ump;
247 int softdeptrunc, journaltrunc;
248 int needextclean, extblocks;
249 int offset, size, level, nblocks;
250 int i, error, allerror, indiroff, waitforupdate;
251 uint64_t key;
252 off_t osize;
253
254 ip = VTOI(vp);
255 ump = VFSTOUFS(vp->v_mount);
256 fs = ump->um_fs;
257 bo = &vp->v_bufobj;
258
259 ASSERT_VOP_LOCKED(vp, "ffs_truncate");
260
261 if (length < 0)
262 return (EINVAL);
263 if (length > fs->fs_maxfilesize)
264 return (EFBIG);
265 #ifdef QUOTA
266 error = getinoquota(ip);
267 if (error)
268 return (error);
269 #endif
270 /*
271 * Historically clients did not have to specify which data
272 * they were truncating. So, if not specified, we assume
273 * traditional behavior, e.g., just the normal data.
274 */
275 if ((flags & (IO_EXT | IO_NORMAL)) == 0)
276 flags |= IO_NORMAL;
277 if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp))
278 flags |= IO_SYNC;
279 waitforupdate = (flags & IO_SYNC) != 0 || !DOINGASYNC(vp);
280 /*
281 * If we are truncating the extended-attributes, and cannot
282 * do it with soft updates, then do it slowly here. If we are
283 * truncating both the extended attributes and the file contents
284 * (e.g., the file is being unlinked), then pick it off with
285 * soft updates below.
286 */
287 allerror = 0;
288 needextclean = 0;
289 softdeptrunc = 0;
290 journaltrunc = DOINGSUJ(vp);
291 journaltrunc = 0; /* XXX temp patch until bug found */
292 if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0)
293 softdeptrunc = !softdep_slowdown(vp);
294 extblocks = 0;
295 if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) {
296 extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
297 }
298 if ((flags & IO_EXT) && extblocks > 0) {
299 if (length != 0)
300 panic("ffs_truncate: partial trunc of extdata");
301 if (softdeptrunc || journaltrunc) {
302 if ((flags & IO_NORMAL) == 0)
303 goto extclean;
304 needextclean = 1;
305 } else {
306 if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
307 return (error);
308 #ifdef QUOTA
309 (void) chkdq(ip, -extblocks, NOCRED, FORCE);
310 #endif
311 vinvalbuf(vp, V_ALT, 0, 0);
312 vn_pages_remove(vp,
313 OFF_TO_IDX(lblktosize(fs, -extblocks)), 0);
314 osize = ip->i_din2->di_extsize;
315 ip->i_din2->di_blocks -= extblocks;
316 ip->i_din2->di_extsize = 0;
317 for (i = 0; i < UFS_NXADDR; i++) {
318 oldblks[i] = ip->i_din2->di_extb[i];
319 ip->i_din2->di_extb[i] = 0;
320 }
321 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
322 if ((error = ffs_update(vp, waitforupdate)))
323 return (error);
324 for (i = 0; i < UFS_NXADDR; i++) {
325 if (oldblks[i] == 0)
326 continue;
327 ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i],
328 sblksize(fs, osize, i), ip->i_number,
329 vp->v_type, NULL, SINGLETON_KEY);
330 }
331 }
332 }
333 if ((flags & IO_NORMAL) == 0)
334 return (0);
335 if (vp->v_type == VLNK && ip->i_size < ump->um_maxsymlinklen) {
336 #ifdef INVARIANTS
337 if (length != 0)
338 panic("ffs_truncate: partial truncate of symlink");
339 #endif
340 bzero(DIP(ip, i_shortlink), (uint64_t)ip->i_size);
341 ip->i_size = 0;
342 DIP_SET(ip, i_size, 0);
343 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
344 if (needextclean)
345 goto extclean;
346 return (ffs_update(vp, waitforupdate));
347 }
348 if (ip->i_size == length) {
349 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
350 if (needextclean)
351 goto extclean;
352 return (ffs_update(vp, 0));
353 }
354 if (fs->fs_ronly)
355 panic("ffs_truncate: read-only filesystem");
356 if (IS_SNAPSHOT(ip))
357 ffs_snapremove(vp);
358 cluster_init_vn(&ip->i_clusterw);
359 osize = ip->i_size;
360 /*
361 * Lengthen the size of the file. We must ensure that the
362 * last byte of the file is allocated. Since the smallest
363 * value of osize is 0, length will be at least 1.
364 */
365 if (osize < length) {
366 vnode_pager_setsize(vp, length);
367 flags |= BA_CLRBUF;
368 error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
369 if (error) {
370 vnode_pager_setsize(vp, osize);
371 return (error);
372 }
373 ip->i_size = length;
374 DIP_SET(ip, i_size, length);
375 if (bp->b_bufsize == fs->fs_bsize)
376 bp->b_flags |= B_CLUSTEROK;
377 ffs_inode_bwrite(vp, bp, flags);
378 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
379 return (ffs_update(vp, waitforupdate));
380 }
381 /*
382 * Lookup block number for a given offset. Zero length files
383 * have no blocks, so return a blkno of -1.
384 */
385 lbn = lblkno(fs, length - 1);
386 if (length == 0) {
387 blkno = -1;
388 } else if (lbn < UFS_NDADDR) {
389 blkno = DIP(ip, i_db[lbn]);
390 } else {
391 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize,
392 cred, BA_METAONLY, &bp);
393 if (error)
394 return (error);
395 indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
396 if (I_IS_UFS1(ip))
397 blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff];
398 else
399 blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff];
400 /*
401 * If the block number is non-zero, then the indirect block
402 * must have been previously allocated and need not be written.
403 * If the block number is zero, then we may have allocated
404 * the indirect block and hence need to write it out.
405 */
406 if (blkno != 0)
407 brelse(bp);
408 else if (flags & IO_SYNC)
409 bwrite(bp);
410 else
411 bdwrite(bp);
412 }
413 /*
414 * If the block number at the new end of the file is zero,
415 * then we must allocate it to ensure that the last block of
416 * the file is allocated. Soft updates does not handle this
417 * case, so here we have to clean up the soft updates data
418 * structures describing the allocation past the truncation
419 * point. Finding and deallocating those structures is a lot of
420 * work. Since partial truncation with a hole at the end occurs
421 * rarely, we solve the problem by syncing the file so that it
422 * will have no soft updates data structures left.
423 */
424 if (blkno == 0 && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
425 return (error);
426 if (blkno != 0 && DOINGSOFTDEP(vp)) {
427 if (softdeptrunc == 0 && journaltrunc == 0) {
428 /*
429 * If soft updates cannot handle this truncation,
430 * clean up soft dependency data structures and
431 * fall through to the synchronous truncation.
432 */
433 if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
434 return (error);
435 } else {
436 flags = IO_NORMAL | (needextclean ? IO_EXT: 0);
437 if (journaltrunc)
438 softdep_journal_freeblocks(ip, cred, length,
439 flags);
440 else
441 softdep_setup_freeblocks(ip, length, flags);
442 ASSERT_VOP_LOCKED(vp, "ffs_truncate1");
443 if (journaltrunc == 0) {
444 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
445 error = ffs_update(vp, 0);
446 }
447 return (error);
448 }
449 }
450 /*
451 * Shorten the size of the file. If the last block of the
452 * shortened file is unallocated, we must allocate it.
453 * Additionally, if the file is not being truncated to a
454 * block boundary, the contents of the partial block
455 * following the end of the file must be zero'ed in
456 * case it ever becomes accessible again because of
457 * subsequent file growth. Directories however are not
458 * zero'ed as they should grow back initialized to empty.
459 */
460 offset = blkoff(fs, length);
461 if (blkno != 0 && offset == 0) {
462 ip->i_size = length;
463 DIP_SET(ip, i_size, length);
464 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
465 #ifdef UFS_DIRHASH
466 if (vp->v_type == VDIR && ip->i_dirhash != NULL)
467 ufsdirhash_dirtrunc(ip, length);
468 #endif
469 } else {
470 lbn = lblkno(fs, length);
471 flags |= BA_CLRBUF;
472 error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
473 if (error)
474 return (error);
475 ffs_inode_bwrite(vp, bp, flags);
476
477 /*
478 * When we are doing soft updates and the UFS_BALLOC
479 * above fills in a direct block hole with a full sized
480 * block that will be truncated down to a fragment below,
481 * we must flush out the block dependency with an FSYNC
482 * so that we do not get a soft updates inconsistency
483 * when we create the fragment below.
484 */
485 if (DOINGSOFTDEP(vp) && lbn < UFS_NDADDR &&
486 fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
487 (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
488 return (error);
489
490 error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
491 if (error)
492 return (error);
493 ip->i_size = length;
494 DIP_SET(ip, i_size, length);
495 #ifdef UFS_DIRHASH
496 if (vp->v_type == VDIR && ip->i_dirhash != NULL)
497 ufsdirhash_dirtrunc(ip, length);
498 #endif
499 size = blksize(fs, ip, lbn);
500 if (vp->v_type != VDIR && offset != 0)
501 bzero((char *)bp->b_data + offset,
502 (uint64_t)(size - offset));
503 /* Kirk's code has reallocbuf(bp, size, 1) here */
504 allocbuf(bp, size);
505 if (bp->b_bufsize == fs->fs_bsize)
506 bp->b_flags |= B_CLUSTEROK;
507 ffs_inode_bwrite(vp, bp, flags);
508 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
509 }
510 /*
511 * Calculate index into inode's block list of
512 * last direct and indirect blocks (if any)
513 * which we want to keep. Lastblock is -1 when
514 * the file is truncated to 0.
515 */
516 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
517 lastiblock[SINGLE] = lastblock - UFS_NDADDR;
518 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
519 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
520 nblocks = btodb(fs->fs_bsize);
521 /*
522 * Update file and block pointers on disk before we start freeing
523 * blocks. If we crash before free'ing blocks below, the blocks
524 * will be returned to the free list. lastiblock values are also
525 * normalized to -1 for calls to ffs_indirtrunc below.
526 */
527 for (level = TRIPLE; level >= SINGLE; level--) {
528 oldblks[UFS_NDADDR + level] = DIP(ip, i_ib[level]);
529 if (lastiblock[level] < 0) {
530 DIP_SET(ip, i_ib[level], 0);
531 lastiblock[level] = -1;
532 }
533 }
534 for (i = 0; i < UFS_NDADDR; i++) {
535 oldblks[i] = DIP(ip, i_db[i]);
536 if (i > lastblock)
537 DIP_SET(ip, i_db[i], 0);
538 }
539 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
540 allerror = ffs_update(vp, waitforupdate);
541
542 /*
543 * Having written the new inode to disk, save its new configuration
544 * and put back the old block pointers long enough to process them.
545 * Note that we save the new block configuration so we can check it
546 * when we are done.
547 */
548 for (i = 0; i < UFS_NDADDR; i++) {
549 #ifdef INVARIANTS
550 newblks[i] = DIP(ip, i_db[i]);
551 #endif
552 DIP_SET(ip, i_db[i], oldblks[i]);
553 }
554 for (i = 0; i < UFS_NIADDR; i++) {
555 #ifdef INVARIANTS
556 newblks[UFS_NDADDR + i] = DIP(ip, i_ib[i]);
557 #endif
558 DIP_SET(ip, i_ib[i], oldblks[UFS_NDADDR + i]);
559 }
560 ip->i_size = osize;
561 DIP_SET(ip, i_size, osize);
562 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
563
564 error = vtruncbuf(vp, length, fs->fs_bsize);
565 if (error && (allerror == 0))
566 allerror = error;
567
568 /*
569 * Indirect blocks first.
570 */
571 indir_lbn[SINGLE] = -UFS_NDADDR;
572 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
573 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
574 for (level = TRIPLE; level >= SINGLE; level--) {
575 bn = DIP(ip, i_ib[level]);
576 if (bn != 0) {
577 error = ffs_indirtrunc(ip, indir_lbn[level],
578 fsbtodb(fs, bn), lastiblock[level], level, &count);
579 if (error)
580 allerror = error;
581 blocksreleased += count;
582 if (lastiblock[level] < 0) {
583 DIP_SET(ip, i_ib[level], 0);
584 ffs_blkfree(ump, fs, ump->um_devvp, bn,
585 fs->fs_bsize, ip->i_number,
586 vp->v_type, NULL, SINGLETON_KEY);
587 blocksreleased += nblocks;
588 }
589 }
590 if (lastiblock[level] >= 0)
591 goto done;
592 }
593
594 /*
595 * All whole direct blocks or frags.
596 */
597 key = ffs_blkrelease_start(ump, ump->um_devvp, ip->i_number);
598 for (i = UFS_NDADDR - 1; i > lastblock; i--) {
599 long bsize;
600
601 bn = DIP(ip, i_db[i]);
602 if (bn == 0)
603 continue;
604 DIP_SET(ip, i_db[i], 0);
605 bsize = blksize(fs, ip, i);
606 ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number,
607 vp->v_type, NULL, key);
608 blocksreleased += btodb(bsize);
609 }
610 ffs_blkrelease_finish(ump, key);
611 if (lastblock < 0)
612 goto done;
613
614 /*
615 * Finally, look for a change in size of the
616 * last direct block; release any frags.
617 */
618 bn = DIP(ip, i_db[lastblock]);
619 if (bn != 0) {
620 long oldspace, newspace;
621
622 /*
623 * Calculate amount of space we're giving
624 * back as old block size minus new block size.
625 */
626 oldspace = blksize(fs, ip, lastblock);
627 ip->i_size = length;
628 DIP_SET(ip, i_size, length);
629 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
630 newspace = blksize(fs, ip, lastblock);
631 if (newspace == 0)
632 panic("ffs_truncate: newspace");
633 if (oldspace - newspace > 0) {
634 /*
635 * Block number of space to be free'd is
636 * the old block # plus the number of frags
637 * required for the storage we're keeping.
638 */
639 bn += numfrags(fs, newspace);
640 ffs_blkfree(ump, fs, ump->um_devvp, bn,
641 oldspace - newspace, ip->i_number, vp->v_type,
642 NULL, SINGLETON_KEY);
643 blocksreleased += btodb(oldspace - newspace);
644 }
645 }
646 done:
647 #ifdef INVARIANTS
648 for (level = SINGLE; level <= TRIPLE; level++)
649 if (newblks[UFS_NDADDR + level] != DIP(ip, i_ib[level]))
650 panic("ffs_truncate1: level %d newblks %jd != i_ib %jd",
651 level, (intmax_t)newblks[UFS_NDADDR + level],
652 (intmax_t)DIP(ip, i_ib[level]));
653 for (i = 0; i < UFS_NDADDR; i++)
654 if (newblks[i] != DIP(ip, i_db[i]))
655 panic("ffs_truncate2: blkno %d newblks %jd != i_db %jd",
656 i, (intmax_t)newblks[UFS_NDADDR + level],
657 (intmax_t)DIP(ip, i_ib[level]));
658 BO_LOCK(bo);
659 if (length == 0 &&
660 (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) &&
661 (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0))
662 panic("ffs_truncate3: vp = %p, buffers: dirty = %d, clean = %d",
663 vp, bo->bo_dirty.bv_cnt, bo->bo_clean.bv_cnt);
664 BO_UNLOCK(bo);
665 #endif /* INVARIANTS */
666 /*
667 * Put back the real size.
668 */
669 ip->i_size = length;
670 DIP_SET(ip, i_size, length);
671 if (DIP(ip, i_blocks) >= blocksreleased)
672 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased);
673 else /* sanity */
674 DIP_SET(ip, i_blocks, 0);
675 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
676 #ifdef QUOTA
677 (void) chkdq(ip, -blocksreleased, NOCRED, FORCE);
678 #endif
679 return (allerror);
680
681 extclean:
682 if (journaltrunc)
683 softdep_journal_freeblocks(ip, cred, length, IO_EXT);
684 else
685 softdep_setup_freeblocks(ip, length, IO_EXT);
686 return (ffs_update(vp, waitforupdate));
687 }
688
689 /*
690 * Release blocks associated with the inode ip and stored in the indirect
691 * block bn. Blocks are free'd in LIFO order up to (but not including)
692 * lastbn. If level is greater than SINGLE, the block is an indirect block
693 * and recursive calls to indirtrunc must be used to cleanse other indirect
694 * blocks.
695 */
696 static int
ffs_indirtrunc(struct inode * ip,ufs2_daddr_t lbn,ufs2_daddr_t dbn,ufs2_daddr_t lastbn,int level,ufs2_daddr_t * countp)697 ffs_indirtrunc(struct inode *ip,
698 ufs2_daddr_t lbn,
699 ufs2_daddr_t dbn,
700 ufs2_daddr_t lastbn,
701 int level,
702 ufs2_daddr_t *countp)
703 {
704 struct buf *bp;
705 struct fs *fs;
706 struct ufsmount *ump;
707 struct vnode *vp;
708 caddr_t copy = NULL;
709 uint64_t key;
710 int i, nblocks, error = 0, allerror = 0;
711 ufs2_daddr_t nb, nlbn, last;
712 ufs2_daddr_t blkcount, factor, blocksreleased = 0;
713 ufs1_daddr_t *bap1 = NULL;
714 ufs2_daddr_t *bap2 = NULL;
715 #define BAP(ip, i) (I_IS_UFS1(ip) ? bap1[i] : bap2[i])
716
717 fs = ITOFS(ip);
718 ump = ITOUMP(ip);
719
720 /*
721 * Calculate index in current block of last
722 * block to be kept. -1 indicates the entire
723 * block so we need not calculate the index.
724 */
725 factor = lbn_offset(fs, level);
726 last = lastbn;
727 if (lastbn > 0)
728 last /= factor;
729 nblocks = btodb(fs->fs_bsize);
730 /*
731 * Get buffer of block pointers, zero those entries corresponding
732 * to blocks to be free'd, and update on disk copy first. Since
733 * double(triple) indirect before single(double) indirect, calls
734 * to VOP_BMAP() on these blocks will fail. However, we already
735 * have the on-disk address, so we just pass it to bread() instead
736 * of having bread() attempt to calculate it using VOP_BMAP().
737 */
738 vp = ITOV(ip);
739 error = ffs_breadz(ump, vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0,
740 NOCRED, 0, NULL, &bp);
741 if (error) {
742 *countp = 0;
743 return (error);
744 }
745
746 if (I_IS_UFS1(ip))
747 bap1 = (ufs1_daddr_t *)bp->b_data;
748 else
749 bap2 = (ufs2_daddr_t *)bp->b_data;
750 if (lastbn != -1) {
751 copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK);
752 bcopy((caddr_t)bp->b_data, copy, (uint64_t)fs->fs_bsize);
753 for (i = last + 1; i < NINDIR(fs); i++)
754 if (I_IS_UFS1(ip))
755 bap1[i] = 0;
756 else
757 bap2[i] = 0;
758 if (DOINGASYNC(vp)) {
759 bdwrite(bp);
760 } else {
761 error = bwrite(bp);
762 if (error)
763 allerror = error;
764 }
765 if (I_IS_UFS1(ip))
766 bap1 = (ufs1_daddr_t *)copy;
767 else
768 bap2 = (ufs2_daddr_t *)copy;
769 }
770
771 /*
772 * Recursively free totally unused blocks.
773 */
774 key = ffs_blkrelease_start(ump, ITODEVVP(ip), ip->i_number);
775 for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
776 i--, nlbn += factor) {
777 nb = BAP(ip, i);
778 if (nb == 0)
779 continue;
780 if (level > SINGLE) {
781 if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
782 (ufs2_daddr_t)-1, level - 1, &blkcount)) != 0)
783 allerror = error;
784 blocksreleased += blkcount;
785 }
786 ffs_blkfree(ump, fs, ITODEVVP(ip), nb, fs->fs_bsize,
787 ip->i_number, vp->v_type, NULL, key);
788 blocksreleased += nblocks;
789 }
790 ffs_blkrelease_finish(ump, key);
791
792 /*
793 * Recursively free last partial block.
794 */
795 if (level > SINGLE && lastbn >= 0) {
796 last = lastbn % factor;
797 nb = BAP(ip, i);
798 if (nb != 0) {
799 error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
800 last, level - 1, &blkcount);
801 if (error)
802 allerror = error;
803 blocksreleased += blkcount;
804 }
805 }
806 if (copy != NULL) {
807 free(copy, M_TEMP);
808 } else {
809 bp->b_flags |= B_INVAL | B_NOCACHE;
810 brelse(bp);
811 }
812
813 *countp = blocksreleased;
814 return (allerror);
815 }
816
817 int
ffs_rdonly(struct inode * ip)818 ffs_rdonly(struct inode *ip)
819 {
820
821 return (ITOFS(ip)->fs_ronly != 0);
822 }
823