xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision 99282790b7d01ec3c4072621d46a0d7302517ad4)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
62  */
63 
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/bio.h>
70 #include <sys/buf.h>
71 #include <sys/lock.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
75 
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
81 
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
84 
85 /*
86  * Balloc defines the structure of filesystem storage
87  * by allocating the physical blocks on a device given
88  * the inode and the logical block number in a file.
89  * This is the allocation strategy for UFS1. Below is
90  * the allocation strategy for UFS2.
91  */
92 int
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94     struct ucred *cred, int flags, struct buf **bpp)
95 {
96 	struct inode *ip;
97 	struct ufs1_dinode *dp;
98 	ufs_lbn_t lbn, lastlbn;
99 	struct fs *fs;
100 	ufs1_daddr_t nb;
101 	struct buf *bp, *nbp;
102 	struct mount *mp;
103 	struct ufsmount *ump;
104 	struct indir indirs[UFS_NIADDR + 2];
105 	int deallocated, osize, nsize, num, i, error;
106 	ufs2_daddr_t newb;
107 	ufs1_daddr_t *bap, pref;
108 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
109 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
110 	int unwindidx = -1;
111 	int saved_inbdflush;
112 	int gbflags, reclaimed;
113 
114 	ip = VTOI(vp);
115 	dp = ip->i_din1;
116 	fs = ITOFS(ip);
117 	mp = ITOVFS(ip);
118 	ump = ITOUMP(ip);
119 	lbn = lblkno(fs, startoffset);
120 	size = blkoff(fs, startoffset) + size;
121 	reclaimed = 0;
122 	if (size > fs->fs_bsize)
123 		panic("ffs_balloc_ufs1: blk too big");
124 	*bpp = NULL;
125 	if (flags & IO_EXT)
126 		return (EOPNOTSUPP);
127 	if (lbn < 0)
128 		return (EFBIG);
129 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
130 
131 	if (DOINGSOFTDEP(vp))
132 		softdep_prealloc(vp, MNT_WAIT);
133 	/*
134 	 * If the next write will extend the file into a new block,
135 	 * and the file is currently composed of a fragment
136 	 * this fragment has to be extended to be a full block.
137 	 */
138 	lastlbn = lblkno(fs, ip->i_size);
139 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
140 		nb = lastlbn;
141 		osize = blksize(fs, ip, nb);
142 		if (osize < fs->fs_bsize && osize > 0) {
143 			UFS_LOCK(ump);
144 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
145 			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
146 			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
147 			   cred, &bp);
148 			if (error)
149 				return (error);
150 			if (DOINGSOFTDEP(vp))
151 				softdep_setup_allocdirect(ip, nb,
152 				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
153 				    fs->fs_bsize, osize, bp);
154 			ip->i_size = smalllblktosize(fs, nb + 1);
155 			dp->di_size = ip->i_size;
156 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
157 			UFS_INODE_SET_FLAG(ip,
158 			    IN_SIZEMOD | IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
159 			if (flags & IO_SYNC)
160 				bwrite(bp);
161 			else if (DOINGASYNC(vp))
162 				bdwrite(bp);
163 			else
164 				bawrite(bp);
165 		}
166 	}
167 	/*
168 	 * The first UFS_NDADDR blocks are direct blocks
169 	 */
170 	if (lbn < UFS_NDADDR) {
171 		if (flags & BA_METAONLY)
172 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
173 		nb = dp->di_db[lbn];
174 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
175 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
176 			if (error) {
177 				return (error);
178 			}
179 			bp->b_blkno = fsbtodb(fs, nb);
180 			*bpp = bp;
181 			return (0);
182 		}
183 		if (nb != 0) {
184 			/*
185 			 * Consider need to reallocate a fragment.
186 			 */
187 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
188 			nsize = fragroundup(fs, size);
189 			if (nsize <= osize) {
190 				error = bread(vp, lbn, osize, NOCRED, &bp);
191 				if (error) {
192 					return (error);
193 				}
194 				bp->b_blkno = fsbtodb(fs, nb);
195 			} else {
196 				UFS_LOCK(ump);
197 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
198 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
199 				    &dp->di_db[0]), osize, nsize, flags,
200 				    cred, &bp);
201 				if (error)
202 					return (error);
203 				if (DOINGSOFTDEP(vp))
204 					softdep_setup_allocdirect(ip, lbn,
205 					    dbtofsb(fs, bp->b_blkno), nb,
206 					    nsize, osize, bp);
207 			}
208 		} else {
209 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
210 				nsize = fragroundup(fs, size);
211 			else
212 				nsize = fs->fs_bsize;
213 			UFS_LOCK(ump);
214 			error = ffs_alloc(ip, lbn,
215 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
216 			    nsize, flags, cred, &newb);
217 			if (error)
218 				return (error);
219 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
220 			bp->b_blkno = fsbtodb(fs, newb);
221 			if (flags & BA_CLRBUF)
222 				vfs_bio_clrbuf(bp);
223 			if (DOINGSOFTDEP(vp))
224 				softdep_setup_allocdirect(ip, lbn, newb, 0,
225 				    nsize, 0, bp);
226 		}
227 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
228 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
229 		*bpp = bp;
230 		return (0);
231 	}
232 	/*
233 	 * Determine the number of levels of indirection.
234 	 */
235 	pref = 0;
236 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
237 		return(error);
238 #ifdef INVARIANTS
239 	if (num < 1)
240 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
241 #endif
242 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
243 	/*
244 	 * Fetch the first indirect block allocating if necessary.
245 	 */
246 	--num;
247 	nb = dp->di_ib[indirs[0].in_off];
248 	allocib = NULL;
249 	allocblk = allociblk;
250 	lbns_remfree = lbns;
251 	if (nb == 0) {
252 		UFS_LOCK(ump);
253 		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
254 		    (ufs1_daddr_t *)0);
255 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
256 		    flags, cred, &newb)) != 0) {
257 			curthread_pflags_restore(saved_inbdflush);
258 			return (error);
259 		}
260 		pref = newb + fs->fs_frag;
261 		nb = newb;
262 		MPASS(allocblk < allociblk + nitems(allociblk));
263 		MPASS(lbns_remfree < lbns + nitems(lbns));
264 		*allocblk++ = nb;
265 		*lbns_remfree++ = indirs[1].in_lbn;
266 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
267 		bp->b_blkno = fsbtodb(fs, nb);
268 		vfs_bio_clrbuf(bp);
269 		if (DOINGSOFTDEP(vp)) {
270 			softdep_setup_allocdirect(ip,
271 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
272 			    fs->fs_bsize, 0, bp);
273 			bdwrite(bp);
274 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
275 			if (bp->b_bufsize == fs->fs_bsize)
276 				bp->b_flags |= B_CLUSTEROK;
277 			bdwrite(bp);
278 		} else {
279 			if ((error = bwrite(bp)) != 0)
280 				goto fail;
281 		}
282 		allocib = &dp->di_ib[indirs[0].in_off];
283 		*allocib = nb;
284 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
285 	}
286 	/*
287 	 * Fetch through the indirect blocks, allocating as necessary.
288 	 */
289 retry:
290 	for (i = 1;;) {
291 		error = bread(vp,
292 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
293 		if (error) {
294 			goto fail;
295 		}
296 		bap = (ufs1_daddr_t *)bp->b_data;
297 		nb = bap[indirs[i].in_off];
298 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
299 		    fs->fs_bsize)) != 0) {
300 			brelse(bp);
301 			goto fail;
302 		}
303 		if (i == num)
304 			break;
305 		i += 1;
306 		if (nb != 0) {
307 			bqrelse(bp);
308 			continue;
309 		}
310 		UFS_LOCK(ump);
311 		/*
312 		 * If parent indirect has just been allocated, try to cluster
313 		 * immediately following it.
314 		 */
315 		if (pref == 0)
316 			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
317 			    (ufs1_daddr_t *)0);
318 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
319 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
320 			brelse(bp);
321 			UFS_LOCK(ump);
322 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
323 				softdep_request_cleanup(fs, vp, cred,
324 				    FLUSH_BLOCKS_WAIT);
325 				UFS_UNLOCK(ump);
326 				goto retry;
327 			}
328 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
329 			    ppsratecheck(&ump->um_last_fullmsg,
330 			    &ump->um_secs_fullmsg, 1)) {
331 				UFS_UNLOCK(ump);
332 				ffs_fserr(fs, ip->i_number, "filesystem full");
333 				uprintf("\n%s: write failed, filesystem "
334 				    "is full\n", fs->fs_fsmnt);
335 			} else {
336 				UFS_UNLOCK(ump);
337 			}
338 			goto fail;
339 		}
340 		pref = newb + fs->fs_frag;
341 		nb = newb;
342 		MPASS(allocblk < allociblk + nitems(allociblk));
343 		MPASS(lbns_remfree < lbns + nitems(lbns));
344 		*allocblk++ = nb;
345 		*lbns_remfree++ = indirs[i].in_lbn;
346 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
347 		nbp->b_blkno = fsbtodb(fs, nb);
348 		vfs_bio_clrbuf(nbp);
349 		if (DOINGSOFTDEP(vp)) {
350 			softdep_setup_allocindir_meta(nbp, ip, bp,
351 			    indirs[i - 1].in_off, nb);
352 			bdwrite(nbp);
353 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
354 			if (nbp->b_bufsize == fs->fs_bsize)
355 				nbp->b_flags |= B_CLUSTEROK;
356 			bdwrite(nbp);
357 		} else {
358 			if ((error = bwrite(nbp)) != 0) {
359 				brelse(bp);
360 				goto fail;
361 			}
362 		}
363 		bap[indirs[i - 1].in_off] = nb;
364 		if (allocib == NULL && unwindidx < 0)
365 			unwindidx = i - 1;
366 		/*
367 		 * If required, write synchronously, otherwise use
368 		 * delayed write.
369 		 */
370 		if (flags & IO_SYNC) {
371 			bwrite(bp);
372 		} else {
373 			if (bp->b_bufsize == fs->fs_bsize)
374 				bp->b_flags |= B_CLUSTEROK;
375 			bdwrite(bp);
376 		}
377 	}
378 	/*
379 	 * If asked only for the indirect block, then return it.
380 	 */
381 	if (flags & BA_METAONLY) {
382 		curthread_pflags_restore(saved_inbdflush);
383 		*bpp = bp;
384 		return (0);
385 	}
386 	/*
387 	 * Get the data block, allocating if necessary.
388 	 */
389 	if (nb == 0) {
390 		UFS_LOCK(ump);
391 		/*
392 		 * If allocating metadata at the front of the cylinder
393 		 * group and parent indirect block has just been allocated,
394 		 * then cluster next to it if it is the first indirect in
395 		 * the file. Otherwise it has been allocated in the metadata
396 		 * area, so we want to find our own place out in the data area.
397 		 */
398 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
399 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
400 			    &bap[0]);
401 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
402 		    flags | IO_BUFLOCKED, cred, &newb);
403 		if (error) {
404 			brelse(bp);
405 			UFS_LOCK(ump);
406 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
407 				softdep_request_cleanup(fs, vp, cred,
408 				    FLUSH_BLOCKS_WAIT);
409 				UFS_UNLOCK(ump);
410 				goto retry;
411 			}
412 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
413 			    ppsratecheck(&ump->um_last_fullmsg,
414 			    &ump->um_secs_fullmsg, 1)) {
415 				UFS_UNLOCK(ump);
416 				ffs_fserr(fs, ip->i_number, "filesystem full");
417 				uprintf("\n%s: write failed, filesystem "
418 				    "is full\n", fs->fs_fsmnt);
419 			} else {
420 				UFS_UNLOCK(ump);
421 			}
422 			goto fail;
423 		}
424 		nb = newb;
425 		MPASS(allocblk < allociblk + nitems(allociblk));
426 		MPASS(lbns_remfree < lbns + nitems(lbns));
427 		*allocblk++ = nb;
428 		*lbns_remfree++ = lbn;
429 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
430 		nbp->b_blkno = fsbtodb(fs, nb);
431 		if (flags & BA_CLRBUF)
432 			vfs_bio_clrbuf(nbp);
433 		if (DOINGSOFTDEP(vp))
434 			softdep_setup_allocindir_page(ip, lbn, bp,
435 			    indirs[i].in_off, nb, 0, nbp);
436 		bap[indirs[i].in_off] = nb;
437 		/*
438 		 * If required, write synchronously, otherwise use
439 		 * delayed write.
440 		 */
441 		if (flags & IO_SYNC) {
442 			bwrite(bp);
443 		} else {
444 			if (bp->b_bufsize == fs->fs_bsize)
445 				bp->b_flags |= B_CLUSTEROK;
446 			bdwrite(bp);
447 		}
448 		curthread_pflags_restore(saved_inbdflush);
449 		*bpp = nbp;
450 		return (0);
451 	}
452 	brelse(bp);
453 	if (flags & BA_CLRBUF) {
454 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
455 		if (seqcount != 0 &&
456 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
457 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
458 			error = cluster_read(vp, ip->i_size, lbn,
459 			    (int)fs->fs_bsize, NOCRED,
460 			    MAXBSIZE, seqcount, gbflags, &nbp);
461 		} else {
462 			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
463 			    gbflags, &nbp);
464 		}
465 		if (error) {
466 			brelse(nbp);
467 			goto fail;
468 		}
469 	} else {
470 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
471 		nbp->b_blkno = fsbtodb(fs, nb);
472 	}
473 	curthread_pflags_restore(saved_inbdflush);
474 	*bpp = nbp;
475 	return (0);
476 fail:
477 	curthread_pflags_restore(saved_inbdflush);
478 	/*
479 	 * If we have failed to allocate any blocks, simply return the error.
480 	 * This is the usual case and avoids the need to fsync the file.
481 	 */
482 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
483 		return (error);
484 	/*
485 	 * If we have failed part way through block allocation, we
486 	 * have to deallocate any indirect blocks that we have allocated.
487 	 * We have to fsync the file before we start to get rid of all
488 	 * of its dependencies so that we do not leave them dangling.
489 	 * We have to sync it at the end so that the soft updates code
490 	 * does not find any untracked changes. Although this is really
491 	 * slow, running out of disk space is not expected to be a common
492 	 * occurrence. The error return from fsync is ignored as we already
493 	 * have an error to return to the user.
494 	 *
495 	 * XXX Still have to journal the free below
496 	 */
497 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
498 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
499 	     blkp < allocblk; blkp++, lbns_remfree++) {
500 		/*
501 		 * We shall not leave the freed blocks on the vnode
502 		 * buffer object lists.
503 		 */
504 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
505 		    GB_NOCREAT | GB_UNMAPPED);
506 		if (bp != NULL) {
507 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
508 			    ("mismatch1 l %jd %jd b %ju %ju",
509 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
510 			    (uintmax_t)bp->b_blkno,
511 			    (uintmax_t)fsbtodb(fs, *blkp)));
512 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
513 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
514 			brelse(bp);
515 		}
516 		deallocated += fs->fs_bsize;
517 	}
518 	if (allocib != NULL) {
519 		*allocib = 0;
520 	} else if (unwindidx >= 0) {
521 		int r;
522 
523 		r = bread(vp, indirs[unwindidx].in_lbn,
524 		    (int)fs->fs_bsize, NOCRED, &bp);
525 		if (r) {
526 			panic("Could not unwind indirect block, error %d", r);
527 			brelse(bp);
528 		} else {
529 			bap = (ufs1_daddr_t *)bp->b_data;
530 			bap[indirs[unwindidx].in_off] = 0;
531 			if (flags & IO_SYNC) {
532 				bwrite(bp);
533 			} else {
534 				if (bp->b_bufsize == fs->fs_bsize)
535 					bp->b_flags |= B_CLUSTEROK;
536 				bdwrite(bp);
537 			}
538 		}
539 	}
540 	if (deallocated) {
541 #ifdef QUOTA
542 		/*
543 		 * Restore user's disk quota because allocation failed.
544 		 */
545 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
546 #endif
547 		dp->di_blocks -= btodb(deallocated);
548 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
549 	}
550 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
551 	/*
552 	 * After the buffers are invalidated and on-disk pointers are
553 	 * cleared, free the blocks.
554 	 */
555 	for (blkp = allociblk; blkp < allocblk; blkp++) {
556 #ifdef INVARIANTS
557 		if (blkp == allociblk)
558 			lbns_remfree = lbns;
559 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
560 		    GB_NOCREAT | GB_UNMAPPED);
561 		if (bp != NULL) {
562 			panic("zombie1 %jd %ju %ju",
563 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
564 			    (uintmax_t)fsbtodb(fs, *blkp));
565 		}
566 		lbns_remfree++;
567 #endif
568 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
569 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
570 	}
571 	return (error);
572 }
573 
574 /*
575  * Balloc defines the structure of file system storage
576  * by allocating the physical blocks on a device given
577  * the inode and the logical block number in a file.
578  * This is the allocation strategy for UFS2. Above is
579  * the allocation strategy for UFS1.
580  */
581 int
582 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
583     struct ucred *cred, int flags, struct buf **bpp)
584 {
585 	struct inode *ip;
586 	struct ufs2_dinode *dp;
587 	ufs_lbn_t lbn, lastlbn;
588 	struct fs *fs;
589 	struct buf *bp, *nbp;
590 	struct mount *mp;
591 	struct ufsmount *ump;
592 	struct indir indirs[UFS_NIADDR + 2];
593 	ufs2_daddr_t nb, newb, *bap, pref;
594 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
595 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
596 	int deallocated, osize, nsize, num, i, error;
597 	int unwindidx = -1;
598 	int saved_inbdflush;
599 	int gbflags, reclaimed;
600 
601 	ip = VTOI(vp);
602 	dp = ip->i_din2;
603 	fs = ITOFS(ip);
604 	mp = ITOVFS(ip);
605 	ump = ITOUMP(ip);
606 	lbn = lblkno(fs, startoffset);
607 	size = blkoff(fs, startoffset) + size;
608 	reclaimed = 0;
609 	if (size > fs->fs_bsize)
610 		panic("ffs_balloc_ufs2: blk too big");
611 	*bpp = NULL;
612 	if (lbn < 0)
613 		return (EFBIG);
614 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
615 
616 	if (DOINGSOFTDEP(vp))
617 		softdep_prealloc(vp, MNT_WAIT);
618 
619 	/*
620 	 * Check for allocating external data.
621 	 */
622 	if (flags & IO_EXT) {
623 		if (lbn >= UFS_NXADDR)
624 			return (EFBIG);
625 		/*
626 		 * If the next write will extend the data into a new block,
627 		 * and the data is currently composed of a fragment
628 		 * this fragment has to be extended to be a full block.
629 		 */
630 		lastlbn = lblkno(fs, dp->di_extsize);
631 		if (lastlbn < lbn) {
632 			nb = lastlbn;
633 			osize = sblksize(fs, dp->di_extsize, nb);
634 			if (osize < fs->fs_bsize && osize > 0) {
635 				UFS_LOCK(ump);
636 				error = ffs_realloccg(ip, -1 - nb,
637 				    dp->di_extb[nb],
638 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
639 				    &dp->di_extb[0]), osize,
640 				    (int)fs->fs_bsize, flags, cred, &bp);
641 				if (error)
642 					return (error);
643 				if (DOINGSOFTDEP(vp))
644 					softdep_setup_allocext(ip, nb,
645 					    dbtofsb(fs, bp->b_blkno),
646 					    dp->di_extb[nb],
647 					    fs->fs_bsize, osize, bp);
648 				dp->di_extsize = smalllblktosize(fs, nb + 1);
649 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
650 				bp->b_xflags |= BX_ALTDATA;
651 				UFS_INODE_SET_FLAG(ip,
652 				    IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA);
653 				if (flags & IO_SYNC)
654 					bwrite(bp);
655 				else
656 					bawrite(bp);
657 			}
658 		}
659 		/*
660 		 * All blocks are direct blocks
661 		 */
662 		if (flags & BA_METAONLY)
663 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
664 		nb = dp->di_extb[lbn];
665 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
666 			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
667 			    gbflags, &bp);
668 			if (error) {
669 				return (error);
670 			}
671 			bp->b_blkno = fsbtodb(fs, nb);
672 			bp->b_xflags |= BX_ALTDATA;
673 			*bpp = bp;
674 			return (0);
675 		}
676 		if (nb != 0) {
677 			/*
678 			 * Consider need to reallocate a fragment.
679 			 */
680 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
681 			nsize = fragroundup(fs, size);
682 			if (nsize <= osize) {
683 				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
684 				    gbflags, &bp);
685 				if (error) {
686 					return (error);
687 				}
688 				bp->b_blkno = fsbtodb(fs, nb);
689 				bp->b_xflags |= BX_ALTDATA;
690 			} else {
691 				UFS_LOCK(ump);
692 				error = ffs_realloccg(ip, -1 - lbn,
693 				    dp->di_extb[lbn],
694 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
695 				    &dp->di_extb[0]), osize, nsize, flags,
696 				    cred, &bp);
697 				if (error)
698 					return (error);
699 				bp->b_xflags |= BX_ALTDATA;
700 				if (DOINGSOFTDEP(vp))
701 					softdep_setup_allocext(ip, lbn,
702 					    dbtofsb(fs, bp->b_blkno), nb,
703 					    nsize, osize, bp);
704 			}
705 		} else {
706 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
707 				nsize = fragroundup(fs, size);
708 			else
709 				nsize = fs->fs_bsize;
710 			UFS_LOCK(ump);
711 			error = ffs_alloc(ip, lbn,
712 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
713 			   nsize, flags, cred, &newb);
714 			if (error)
715 				return (error);
716 			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
717 			bp->b_blkno = fsbtodb(fs, newb);
718 			bp->b_xflags |= BX_ALTDATA;
719 			if (flags & BA_CLRBUF)
720 				vfs_bio_clrbuf(bp);
721 			if (DOINGSOFTDEP(vp))
722 				softdep_setup_allocext(ip, lbn, newb, 0,
723 				    nsize, 0, bp);
724 		}
725 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
726 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_IBLKDATA);
727 		*bpp = bp;
728 		return (0);
729 	}
730 	/*
731 	 * If the next write will extend the file into a new block,
732 	 * and the file is currently composed of a fragment
733 	 * this fragment has to be extended to be a full block.
734 	 */
735 	lastlbn = lblkno(fs, ip->i_size);
736 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
737 		nb = lastlbn;
738 		osize = blksize(fs, ip, nb);
739 		if (osize < fs->fs_bsize && osize > 0) {
740 			UFS_LOCK(ump);
741 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
742 			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
743 			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
744 			    flags, cred, &bp);
745 			if (error)
746 				return (error);
747 			if (DOINGSOFTDEP(vp))
748 				softdep_setup_allocdirect(ip, nb,
749 				    dbtofsb(fs, bp->b_blkno),
750 				    dp->di_db[nb],
751 				    fs->fs_bsize, osize, bp);
752 			ip->i_size = smalllblktosize(fs, nb + 1);
753 			dp->di_size = ip->i_size;
754 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
755 			UFS_INODE_SET_FLAG(ip,
756 			    IN_SIZEMOD |IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
757 			if (flags & IO_SYNC)
758 				bwrite(bp);
759 			else
760 				bawrite(bp);
761 		}
762 	}
763 	/*
764 	 * The first UFS_NDADDR blocks are direct blocks
765 	 */
766 	if (lbn < UFS_NDADDR) {
767 		if (flags & BA_METAONLY)
768 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
769 		nb = dp->di_db[lbn];
770 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
771 			error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
772 			    gbflags, &bp);
773 			if (error) {
774 				return (error);
775 			}
776 			bp->b_blkno = fsbtodb(fs, nb);
777 			*bpp = bp;
778 			return (0);
779 		}
780 		if (nb != 0) {
781 			/*
782 			 * Consider need to reallocate a fragment.
783 			 */
784 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
785 			nsize = fragroundup(fs, size);
786 			if (nsize <= osize) {
787 				error = bread_gb(vp, lbn, osize, NOCRED,
788 				    gbflags, &bp);
789 				if (error) {
790 					return (error);
791 				}
792 				bp->b_blkno = fsbtodb(fs, nb);
793 			} else {
794 				UFS_LOCK(ump);
795 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
796 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
797 				    &dp->di_db[0]), osize, nsize, flags,
798 				    cred, &bp);
799 				if (error)
800 					return (error);
801 				if (DOINGSOFTDEP(vp))
802 					softdep_setup_allocdirect(ip, lbn,
803 					    dbtofsb(fs, bp->b_blkno), nb,
804 					    nsize, osize, bp);
805 			}
806 		} else {
807 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
808 				nsize = fragroundup(fs, size);
809 			else
810 				nsize = fs->fs_bsize;
811 			UFS_LOCK(ump);
812 			error = ffs_alloc(ip, lbn,
813 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
814 				&dp->di_db[0]), nsize, flags, cred, &newb);
815 			if (error)
816 				return (error);
817 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
818 			bp->b_blkno = fsbtodb(fs, newb);
819 			if (flags & BA_CLRBUF)
820 				vfs_bio_clrbuf(bp);
821 			if (DOINGSOFTDEP(vp))
822 				softdep_setup_allocdirect(ip, lbn, newb, 0,
823 				    nsize, 0, bp);
824 		}
825 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
826 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
827 		*bpp = bp;
828 		return (0);
829 	}
830 	/*
831 	 * Determine the number of levels of indirection.
832 	 */
833 	pref = 0;
834 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
835 		return(error);
836 #ifdef INVARIANTS
837 	if (num < 1)
838 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
839 #endif
840 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
841 	/*
842 	 * Fetch the first indirect block allocating if necessary.
843 	 */
844 	--num;
845 	nb = dp->di_ib[indirs[0].in_off];
846 	allocib = NULL;
847 	allocblk = allociblk;
848 	lbns_remfree = lbns;
849 	if (nb == 0) {
850 		UFS_LOCK(ump);
851 		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
852 		    (ufs2_daddr_t *)0);
853 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
854 		    flags, cred, &newb)) != 0) {
855 			curthread_pflags_restore(saved_inbdflush);
856 			return (error);
857 		}
858 		pref = newb + fs->fs_frag;
859 		nb = newb;
860 		MPASS(allocblk < allociblk + nitems(allociblk));
861 		MPASS(lbns_remfree < lbns + nitems(lbns));
862 		*allocblk++ = nb;
863 		*lbns_remfree++ = indirs[1].in_lbn;
864 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
865 		    GB_UNMAPPED);
866 		bp->b_blkno = fsbtodb(fs, nb);
867 		vfs_bio_clrbuf(bp);
868 		if (DOINGSOFTDEP(vp)) {
869 			softdep_setup_allocdirect(ip,
870 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
871 			    fs->fs_bsize, 0, bp);
872 			bdwrite(bp);
873 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
874 			if (bp->b_bufsize == fs->fs_bsize)
875 				bp->b_flags |= B_CLUSTEROK;
876 			bdwrite(bp);
877 		} else {
878 			if ((error = bwrite(bp)) != 0)
879 				goto fail;
880 		}
881 		allocib = &dp->di_ib[indirs[0].in_off];
882 		*allocib = nb;
883 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
884 	}
885 	/*
886 	 * Fetch through the indirect blocks, allocating as necessary.
887 	 */
888 retry:
889 	for (i = 1;;) {
890 		error = bread(vp,
891 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
892 		if (error) {
893 			goto fail;
894 		}
895 		bap = (ufs2_daddr_t *)bp->b_data;
896 		nb = bap[indirs[i].in_off];
897 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
898 		    fs->fs_bsize)) != 0) {
899 			brelse(bp);
900 			goto fail;
901 		}
902 		if (i == num)
903 			break;
904 		i += 1;
905 		if (nb != 0) {
906 			bqrelse(bp);
907 			continue;
908 		}
909 		UFS_LOCK(ump);
910 		/*
911 		 * If parent indirect has just been allocated, try to cluster
912 		 * immediately following it.
913 		 */
914 		if (pref == 0)
915 			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
916 			    (ufs2_daddr_t *)0);
917 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
918 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
919 			brelse(bp);
920 			UFS_LOCK(ump);
921 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
922 				softdep_request_cleanup(fs, vp, cred,
923 				    FLUSH_BLOCKS_WAIT);
924 				UFS_UNLOCK(ump);
925 				goto retry;
926 			}
927 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
928 			    ppsratecheck(&ump->um_last_fullmsg,
929 			    &ump->um_secs_fullmsg, 1)) {
930 				UFS_UNLOCK(ump);
931 				ffs_fserr(fs, ip->i_number, "filesystem full");
932 				uprintf("\n%s: write failed, filesystem "
933 				    "is full\n", fs->fs_fsmnt);
934 			} else {
935 				UFS_UNLOCK(ump);
936 			}
937 			goto fail;
938 		}
939 		pref = newb + fs->fs_frag;
940 		nb = newb;
941 		MPASS(allocblk < allociblk + nitems(allociblk));
942 		MPASS(lbns_remfree < lbns + nitems(lbns));
943 		*allocblk++ = nb;
944 		*lbns_remfree++ = indirs[i].in_lbn;
945 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
946 		    GB_UNMAPPED);
947 		nbp->b_blkno = fsbtodb(fs, nb);
948 		vfs_bio_clrbuf(nbp);
949 		if (DOINGSOFTDEP(vp)) {
950 			softdep_setup_allocindir_meta(nbp, ip, bp,
951 			    indirs[i - 1].in_off, nb);
952 			bdwrite(nbp);
953 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
954 			if (nbp->b_bufsize == fs->fs_bsize)
955 				nbp->b_flags |= B_CLUSTEROK;
956 			bdwrite(nbp);
957 		} else {
958 			if ((error = bwrite(nbp)) != 0) {
959 				brelse(bp);
960 				goto fail;
961 			}
962 		}
963 		bap[indirs[i - 1].in_off] = nb;
964 		if (allocib == NULL && unwindidx < 0)
965 			unwindidx = i - 1;
966 		/*
967 		 * If required, write synchronously, otherwise use
968 		 * delayed write.
969 		 */
970 		if (flags & IO_SYNC) {
971 			bwrite(bp);
972 		} else {
973 			if (bp->b_bufsize == fs->fs_bsize)
974 				bp->b_flags |= B_CLUSTEROK;
975 			bdwrite(bp);
976 		}
977 	}
978 	/*
979 	 * If asked only for the indirect block, then return it.
980 	 */
981 	if (flags & BA_METAONLY) {
982 		curthread_pflags_restore(saved_inbdflush);
983 		*bpp = bp;
984 		return (0);
985 	}
986 	/*
987 	 * Get the data block, allocating if necessary.
988 	 */
989 	if (nb == 0) {
990 		UFS_LOCK(ump);
991 		/*
992 		 * If allocating metadata at the front of the cylinder
993 		 * group and parent indirect block has just been allocated,
994 		 * then cluster next to it if it is the first indirect in
995 		 * the file. Otherwise it has been allocated in the metadata
996 		 * area, so we want to find our own place out in the data area.
997 		 */
998 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
999 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1000 			    &bap[0]);
1001 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1002 		    flags | IO_BUFLOCKED, cred, &newb);
1003 		if (error) {
1004 			brelse(bp);
1005 			UFS_LOCK(ump);
1006 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1007 				softdep_request_cleanup(fs, vp, cred,
1008 				    FLUSH_BLOCKS_WAIT);
1009 				UFS_UNLOCK(ump);
1010 				goto retry;
1011 			}
1012 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
1013 			    ppsratecheck(&ump->um_last_fullmsg,
1014 			    &ump->um_secs_fullmsg, 1)) {
1015 				UFS_UNLOCK(ump);
1016 				ffs_fserr(fs, ip->i_number, "filesystem full");
1017 				uprintf("\n%s: write failed, filesystem "
1018 				    "is full\n", fs->fs_fsmnt);
1019 			} else {
1020 				UFS_UNLOCK(ump);
1021 			}
1022 			goto fail;
1023 		}
1024 		nb = newb;
1025 		MPASS(allocblk < allociblk + nitems(allociblk));
1026 		MPASS(lbns_remfree < lbns + nitems(lbns));
1027 		*allocblk++ = nb;
1028 		*lbns_remfree++ = lbn;
1029 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1030 		nbp->b_blkno = fsbtodb(fs, nb);
1031 		if (flags & BA_CLRBUF)
1032 			vfs_bio_clrbuf(nbp);
1033 		if (DOINGSOFTDEP(vp))
1034 			softdep_setup_allocindir_page(ip, lbn, bp,
1035 			    indirs[i].in_off, nb, 0, nbp);
1036 		bap[indirs[i].in_off] = nb;
1037 		/*
1038 		 * If required, write synchronously, otherwise use
1039 		 * delayed write.
1040 		 */
1041 		if (flags & IO_SYNC) {
1042 			bwrite(bp);
1043 		} else {
1044 			if (bp->b_bufsize == fs->fs_bsize)
1045 				bp->b_flags |= B_CLUSTEROK;
1046 			bdwrite(bp);
1047 		}
1048 		curthread_pflags_restore(saved_inbdflush);
1049 		*bpp = nbp;
1050 		return (0);
1051 	}
1052 	brelse(bp);
1053 	/*
1054 	 * If requested clear invalid portions of the buffer.  If we
1055 	 * have to do a read-before-write (typical if BA_CLRBUF is set),
1056 	 * try to do some read-ahead in the sequential case to reduce
1057 	 * the number of I/O transactions.
1058 	 */
1059 	if (flags & BA_CLRBUF) {
1060 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1061 		if (seqcount != 0 &&
1062 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1063 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
1064 			error = cluster_read(vp, ip->i_size, lbn,
1065 			    (int)fs->fs_bsize, NOCRED,
1066 			    MAXBSIZE, seqcount, gbflags, &nbp);
1067 		} else {
1068 			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1069 			    NOCRED, gbflags, &nbp);
1070 		}
1071 		if (error) {
1072 			brelse(nbp);
1073 			goto fail;
1074 		}
1075 	} else {
1076 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1077 		nbp->b_blkno = fsbtodb(fs, nb);
1078 	}
1079 	curthread_pflags_restore(saved_inbdflush);
1080 	*bpp = nbp;
1081 	return (0);
1082 fail:
1083 	curthread_pflags_restore(saved_inbdflush);
1084 	/*
1085 	 * If we have failed to allocate any blocks, simply return the error.
1086 	 * This is the usual case and avoids the need to fsync the file.
1087 	 */
1088 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1089 		return (error);
1090 	/*
1091 	 * If we have failed part way through block allocation, we
1092 	 * have to deallocate any indirect blocks that we have allocated.
1093 	 * We have to fsync the file before we start to get rid of all
1094 	 * of its dependencies so that we do not leave them dangling.
1095 	 * We have to sync it at the end so that the soft updates code
1096 	 * does not find any untracked changes. Although this is really
1097 	 * slow, running out of disk space is not expected to be a common
1098 	 * occurrence. The error return from fsync is ignored as we already
1099 	 * have an error to return to the user.
1100 	 *
1101 	 * XXX Still have to journal the free below
1102 	 */
1103 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1104 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1105 	     blkp < allocblk; blkp++, lbns_remfree++) {
1106 		/*
1107 		 * We shall not leave the freed blocks on the vnode
1108 		 * buffer object lists.
1109 		 */
1110 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1111 		    GB_NOCREAT | GB_UNMAPPED);
1112 		if (bp != NULL) {
1113 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1114 			    ("mismatch2 l %jd %jd b %ju %ju",
1115 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1116 			    (uintmax_t)bp->b_blkno,
1117 			    (uintmax_t)fsbtodb(fs, *blkp)));
1118 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1119 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
1120 			brelse(bp);
1121 		}
1122 		deallocated += fs->fs_bsize;
1123 	}
1124 	if (allocib != NULL) {
1125 		*allocib = 0;
1126 	} else if (unwindidx >= 0) {
1127 		int r;
1128 
1129 		r = bread(vp, indirs[unwindidx].in_lbn,
1130 		    (int)fs->fs_bsize, NOCRED, &bp);
1131 		if (r) {
1132 			panic("Could not unwind indirect block, error %d", r);
1133 			brelse(bp);
1134 		} else {
1135 			bap = (ufs2_daddr_t *)bp->b_data;
1136 			bap[indirs[unwindidx].in_off] = 0;
1137 			if (flags & IO_SYNC) {
1138 				bwrite(bp);
1139 			} else {
1140 				if (bp->b_bufsize == fs->fs_bsize)
1141 					bp->b_flags |= B_CLUSTEROK;
1142 				bdwrite(bp);
1143 			}
1144 		}
1145 	}
1146 	if (deallocated) {
1147 #ifdef QUOTA
1148 		/*
1149 		 * Restore user's disk quota because allocation failed.
1150 		 */
1151 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1152 #endif
1153 		dp->di_blocks -= btodb(deallocated);
1154 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1155 	}
1156 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1157 	/*
1158 	 * After the buffers are invalidated and on-disk pointers are
1159 	 * cleared, free the blocks.
1160 	 */
1161 	for (blkp = allociblk; blkp < allocblk; blkp++) {
1162 #ifdef INVARIANTS
1163 		if (blkp == allociblk)
1164 			lbns_remfree = lbns;
1165 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1166 		    GB_NOCREAT | GB_UNMAPPED);
1167 		if (bp != NULL) {
1168 			panic("zombie2 %jd %ju %ju",
1169 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1170 			    (uintmax_t)fsbtodb(fs, *blkp));
1171 		}
1172 		lbns_remfree++;
1173 #endif
1174 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1175 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
1176 	}
1177 	return (error);
1178 }
1179