xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision bdcbfde31e8e9b343f113a1956384bdf30d1ed62)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  */
61 
62 #include <sys/cdefs.h>
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/bio.h>
66 #include <sys/buf.h>
67 #include <sys/lock.h>
68 #include <sys/mount.h>
69 #include <sys/stat.h>
70 #include <sys/vnode.h>
71 #include <sys/vmmeter.h>
72 
73 #include <ufs/ufs/quota.h>
74 #include <ufs/ufs/inode.h>
75 #include <ufs/ufs/ufs_extern.h>
76 #include <ufs/ufs/extattr.h>
77 #include <ufs/ufs/ufsmount.h>
78 
79 #include <ufs/ffs/fs.h>
80 #include <ufs/ffs/ffs_extern.h>
81 
82 /*
83  * Balloc defines the structure of filesystem storage
84  * by allocating the physical blocks on a device given
85  * the inode and the logical block number in a file.
86  * This is the allocation strategy for UFS1. Below is
87  * the allocation strategy for UFS2.
88  */
89 int
90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
91     struct ucred *cred, int flags, struct buf **bpp)
92 {
93 	struct inode *ip;
94 	struct ufs1_dinode *dp;
95 	ufs_lbn_t lbn, lastlbn;
96 	struct fs *fs;
97 	ufs1_daddr_t nb;
98 	struct buf *bp, *nbp;
99 	struct mount *mp;
100 	struct ufsmount *ump;
101 	struct indir indirs[UFS_NIADDR + 2];
102 	int deallocated, osize, nsize, num, i, error;
103 	ufs2_daddr_t newb;
104 	ufs1_daddr_t *bap, pref;
105 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
106 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
107 	int unwindidx = -1;
108 	int saved_inbdflush;
109 	int gbflags, reclaimed;
110 
111 	ip = VTOI(vp);
112 	dp = ip->i_din1;
113 	fs = ITOFS(ip);
114 	mp = ITOVFS(ip);
115 	ump = ITOUMP(ip);
116 	lbn = lblkno(fs, startoffset);
117 	size = blkoff(fs, startoffset) + size;
118 	reclaimed = 0;
119 	if (size > fs->fs_bsize)
120 		panic("ffs_balloc_ufs1: blk too big");
121 	*bpp = NULL;
122 	if (flags & IO_EXT)
123 		return (EOPNOTSUPP);
124 	if (lbn < 0)
125 		return (EFBIG);
126 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
127 
128 	vn_seqc_write_begin(vp);
129 
130 	/*
131 	 * If the next write will extend the file into a new block,
132 	 * and the file is currently composed of a fragment
133 	 * this fragment has to be extended to be a full block.
134 	 */
135 	lastlbn = lblkno(fs, ip->i_size);
136 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
137 		nb = lastlbn;
138 		osize = blksize(fs, ip, nb);
139 		if (osize < fs->fs_bsize && osize > 0) {
140 			UFS_LOCK(ump);
141 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
142 			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
143 			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
144 			   cred, &bp);
145 			if (error)
146 				goto done;
147 			if (DOINGSOFTDEP(vp))
148 				softdep_setup_allocdirect(ip, nb,
149 				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
150 				    fs->fs_bsize, osize, bp);
151 			ip->i_size = smalllblktosize(fs, nb + 1);
152 			dp->di_size = ip->i_size;
153 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
154 			UFS_INODE_SET_FLAG(ip,
155 			    IN_SIZEMOD | IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
156 			if (flags & IO_SYNC)
157 				bwrite(bp);
158 			else if (DOINGASYNC(vp))
159 				bdwrite(bp);
160 			else
161 				bawrite(bp);
162 		}
163 	}
164 	/*
165 	 * The first UFS_NDADDR blocks are direct blocks
166 	 */
167 	if (lbn < UFS_NDADDR) {
168 		if (flags & BA_METAONLY)
169 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
170 		nb = dp->di_db[lbn];
171 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
172 			if ((flags & BA_CLRBUF) != 0) {
173 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
174 				    &bp);
175 				if (error != 0)
176 					goto done;
177 			} else {
178 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
179 				    gbflags);
180 				if (bp == NULL) {
181 					error = EIO;
182 					goto done;
183 				}
184 				vfs_bio_clrbuf(bp);
185 			}
186 			bp->b_blkno = fsbtodb(fs, nb);
187 			*bpp = bp;
188 			error = 0;
189 			goto done;
190 		}
191 		if (nb != 0) {
192 			/*
193 			 * Consider need to reallocate a fragment.
194 			 */
195 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
196 			nsize = fragroundup(fs, size);
197 			if (nsize <= osize) {
198 				error = bread(vp, lbn, osize, NOCRED, &bp);
199 				if (error)
200 					goto done;
201 				bp->b_blkno = fsbtodb(fs, nb);
202 			} else {
203 				UFS_LOCK(ump);
204 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
205 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
206 				    &dp->di_db[0]), osize, nsize, flags,
207 				    cred, &bp);
208 				if (error)
209 					goto done;
210 				if (DOINGSOFTDEP(vp))
211 					softdep_setup_allocdirect(ip, lbn,
212 					    dbtofsb(fs, bp->b_blkno), nb,
213 					    nsize, osize, bp);
214 			}
215 		} else {
216 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
217 				nsize = fragroundup(fs, size);
218 			else
219 				nsize = fs->fs_bsize;
220 			UFS_LOCK(ump);
221 			error = ffs_alloc(ip, lbn,
222 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
223 			    nsize, flags, cred, &newb);
224 			if (error)
225 				goto done;
226 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
227 			bp->b_blkno = fsbtodb(fs, newb);
228 			if (flags & BA_CLRBUF)
229 				vfs_bio_clrbuf(bp);
230 			if (DOINGSOFTDEP(vp))
231 				softdep_setup_allocdirect(ip, lbn, newb, 0,
232 				    nsize, 0, bp);
233 		}
234 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
235 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
236 		*bpp = bp;
237 		error = 0;
238 		goto done;
239 	}
240 	/*
241 	 * Determine the number of levels of indirection.
242 	 */
243 	pref = 0;
244 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
245 		goto done;
246 #ifdef INVARIANTS
247 	if (num < 1)
248 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
249 #endif
250 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
251 	/*
252 	 * Fetch the first indirect block allocating if necessary.
253 	 */
254 	--num;
255 	nb = dp->di_ib[indirs[0].in_off];
256 	allocib = NULL;
257 	allocblk = allociblk;
258 	lbns_remfree = lbns;
259 	if (nb == 0) {
260 		UFS_LOCK(ump);
261 		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
262 		    (ufs1_daddr_t *)0);
263 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
264 		    flags, cred, &newb)) != 0) {
265 			curthread_pflags_restore(saved_inbdflush);
266 			goto done;
267 		}
268 		pref = newb + fs->fs_frag;
269 		nb = newb;
270 		MPASS(allocblk < allociblk + nitems(allociblk));
271 		MPASS(lbns_remfree < lbns + nitems(lbns));
272 		*allocblk++ = nb;
273 		*lbns_remfree++ = indirs[1].in_lbn;
274 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
275 		bp->b_blkno = fsbtodb(fs, nb);
276 		vfs_bio_clrbuf(bp);
277 		if (DOINGSOFTDEP(vp)) {
278 			softdep_setup_allocdirect(ip,
279 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
280 			    fs->fs_bsize, 0, bp);
281 			bdwrite(bp);
282 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
283 			if (bp->b_bufsize == fs->fs_bsize)
284 				bp->b_flags |= B_CLUSTEROK;
285 			bdwrite(bp);
286 		} else {
287 			if ((error = bwrite(bp)) != 0)
288 				goto fail;
289 		}
290 		allocib = &dp->di_ib[indirs[0].in_off];
291 		*allocib = nb;
292 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
293 	}
294 	/*
295 	 * Fetch through the indirect blocks, allocating as necessary.
296 	 */
297 retry:
298 	for (i = 1;;) {
299 		error = bread(vp,
300 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
301 		if (error) {
302 			goto fail;
303 		}
304 		bap = (ufs1_daddr_t *)bp->b_data;
305 		nb = bap[indirs[i].in_off];
306 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
307 		    fs->fs_bsize)) != 0) {
308 			brelse(bp);
309 			goto fail;
310 		}
311 		if (i == num)
312 			break;
313 		i += 1;
314 		if (nb != 0) {
315 			bqrelse(bp);
316 			continue;
317 		}
318 		UFS_LOCK(ump);
319 		/*
320 		 * If parent indirect has just been allocated, try to cluster
321 		 * immediately following it.
322 		 */
323 		if (pref == 0)
324 			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
325 			    (ufs1_daddr_t *)0);
326 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
327 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
328 			brelse(bp);
329 			UFS_LOCK(ump);
330 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
331 				softdep_request_cleanup(fs, vp, cred,
332 				    FLUSH_BLOCKS_WAIT);
333 				UFS_UNLOCK(ump);
334 				goto retry;
335 			}
336 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
337 			    ppsratecheck(&ump->um_last_fullmsg,
338 			    &ump->um_secs_fullmsg, 1)) {
339 				UFS_UNLOCK(ump);
340 				ffs_fserr(fs, ip->i_number, "filesystem full");
341 				uprintf("\n%s: write failed, filesystem "
342 				    "is full\n", fs->fs_fsmnt);
343 			} else {
344 				UFS_UNLOCK(ump);
345 			}
346 			goto fail;
347 		}
348 		pref = newb + fs->fs_frag;
349 		nb = newb;
350 		MPASS(allocblk < allociblk + nitems(allociblk));
351 		MPASS(lbns_remfree < lbns + nitems(lbns));
352 		*allocblk++ = nb;
353 		*lbns_remfree++ = indirs[i].in_lbn;
354 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
355 		nbp->b_blkno = fsbtodb(fs, nb);
356 		vfs_bio_clrbuf(nbp);
357 		if (DOINGSOFTDEP(vp)) {
358 			softdep_setup_allocindir_meta(nbp, ip, bp,
359 			    indirs[i - 1].in_off, nb);
360 			bdwrite(nbp);
361 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
362 			if (nbp->b_bufsize == fs->fs_bsize)
363 				nbp->b_flags |= B_CLUSTEROK;
364 			bdwrite(nbp);
365 		} else {
366 			if ((error = bwrite(nbp)) != 0) {
367 				brelse(bp);
368 				goto fail;
369 			}
370 		}
371 		bap[indirs[i - 1].in_off] = nb;
372 		if (allocib == NULL && unwindidx < 0)
373 			unwindidx = i - 1;
374 		/*
375 		 * If required, write synchronously, otherwise use
376 		 * delayed write.
377 		 */
378 		if (flags & IO_SYNC) {
379 			bwrite(bp);
380 		} else {
381 			if (bp->b_bufsize == fs->fs_bsize)
382 				bp->b_flags |= B_CLUSTEROK;
383 			bdwrite(bp);
384 		}
385 	}
386 	/*
387 	 * If asked only for the indirect block, then return it.
388 	 */
389 	if (flags & BA_METAONLY) {
390 		curthread_pflags_restore(saved_inbdflush);
391 		*bpp = bp;
392 		error = 0;
393 		goto done;
394 	}
395 	/*
396 	 * Get the data block, allocating if necessary.
397 	 */
398 	if (nb == 0) {
399 		UFS_LOCK(ump);
400 		/*
401 		 * If allocating metadata at the front of the cylinder
402 		 * group and parent indirect block has just been allocated,
403 		 * then cluster next to it if it is the first indirect in
404 		 * the file. Otherwise it has been allocated in the metadata
405 		 * area, so we want to find our own place out in the data area.
406 		 */
407 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
408 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
409 			    &bap[0]);
410 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
411 		    flags | IO_BUFLOCKED, cred, &newb);
412 		if (error) {
413 			brelse(bp);
414 			UFS_LOCK(ump);
415 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
416 				softdep_request_cleanup(fs, vp, cred,
417 				    FLUSH_BLOCKS_WAIT);
418 				UFS_UNLOCK(ump);
419 				goto retry;
420 			}
421 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
422 			    ppsratecheck(&ump->um_last_fullmsg,
423 			    &ump->um_secs_fullmsg, 1)) {
424 				UFS_UNLOCK(ump);
425 				ffs_fserr(fs, ip->i_number, "filesystem full");
426 				uprintf("\n%s: write failed, filesystem "
427 				    "is full\n", fs->fs_fsmnt);
428 			} else {
429 				UFS_UNLOCK(ump);
430 			}
431 			goto fail;
432 		}
433 		nb = newb;
434 		MPASS(allocblk < allociblk + nitems(allociblk));
435 		MPASS(lbns_remfree < lbns + nitems(lbns));
436 		*allocblk++ = nb;
437 		*lbns_remfree++ = lbn;
438 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
439 		nbp->b_blkno = fsbtodb(fs, nb);
440 		if (flags & BA_CLRBUF)
441 			vfs_bio_clrbuf(nbp);
442 		if (DOINGSOFTDEP(vp))
443 			softdep_setup_allocindir_page(ip, lbn, bp,
444 			    indirs[i].in_off, nb, 0, nbp);
445 		bap[indirs[i].in_off] = nb;
446 		/*
447 		 * If required, write synchronously, otherwise use
448 		 * delayed write.
449 		 */
450 		if (flags & IO_SYNC) {
451 			bwrite(bp);
452 		} else {
453 			if (bp->b_bufsize == fs->fs_bsize)
454 				bp->b_flags |= B_CLUSTEROK;
455 			bdwrite(bp);
456 		}
457 		curthread_pflags_restore(saved_inbdflush);
458 		*bpp = nbp;
459 		error = 0;
460 		goto done;
461 	}
462 	brelse(bp);
463 	if (flags & BA_CLRBUF) {
464 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
465 		if (seqcount != 0 &&
466 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
467 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
468 			error = cluster_read(vp, ip->i_size, lbn,
469 			    (int)fs->fs_bsize, NOCRED,
470 			    MAXBSIZE, seqcount, gbflags, &nbp);
471 		} else {
472 			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
473 			    gbflags, &nbp);
474 		}
475 		if (error) {
476 			brelse(nbp);
477 			goto fail;
478 		}
479 	} else {
480 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
481 		nbp->b_blkno = fsbtodb(fs, nb);
482 	}
483 	curthread_pflags_restore(saved_inbdflush);
484 	*bpp = nbp;
485 	error = 0;
486 	goto done;
487 fail:
488 	curthread_pflags_restore(saved_inbdflush);
489 	/*
490 	 * If we have failed to allocate any blocks, simply return the error.
491 	 * This is the usual case and avoids the need to fsync the file.
492 	 */
493 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
494 		goto done;
495 	/*
496 	 * If we have failed part way through block allocation, we
497 	 * have to deallocate any indirect blocks that we have allocated.
498 	 * We have to fsync the file before we start to get rid of all
499 	 * of its dependencies so that we do not leave them dangling.
500 	 * We have to sync it at the end so that the soft updates code
501 	 * does not find any untracked changes. Although this is really
502 	 * slow, running out of disk space is not expected to be a common
503 	 * occurrence. The error return from fsync is ignored as we already
504 	 * have an error to return to the user.
505 	 *
506 	 * XXX Still have to journal the free below
507 	 */
508 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
509 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
510 	     blkp < allocblk; blkp++, lbns_remfree++) {
511 		/*
512 		 * We shall not leave the freed blocks on the vnode
513 		 * buffer object lists.
514 		 */
515 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
516 		    GB_NOCREAT | GB_UNMAPPED);
517 		if (bp != NULL) {
518 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
519 			    ("mismatch1 l %jd %jd b %ju %ju",
520 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
521 			    (uintmax_t)bp->b_blkno,
522 			    (uintmax_t)fsbtodb(fs, *blkp)));
523 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
524 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
525 			brelse(bp);
526 		}
527 		deallocated += fs->fs_bsize;
528 	}
529 	if (allocib != NULL) {
530 		*allocib = 0;
531 	} else if (unwindidx >= 0) {
532 		int r;
533 
534 		r = bread(vp, indirs[unwindidx].in_lbn,
535 		    (int)fs->fs_bsize, NOCRED, &bp);
536 		if (r) {
537 			panic("Could not unwind indirect block, error %d", r);
538 			brelse(bp);
539 		} else {
540 			bap = (ufs1_daddr_t *)bp->b_data;
541 			bap[indirs[unwindidx].in_off] = 0;
542 			if (flags & IO_SYNC) {
543 				bwrite(bp);
544 			} else {
545 				if (bp->b_bufsize == fs->fs_bsize)
546 					bp->b_flags |= B_CLUSTEROK;
547 				bdwrite(bp);
548 			}
549 		}
550 	}
551 	if (deallocated) {
552 #ifdef QUOTA
553 		/*
554 		 * Restore user's disk quota because allocation failed.
555 		 */
556 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
557 #endif
558 		dp->di_blocks -= btodb(deallocated);
559 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
560 	}
561 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
562 	/*
563 	 * After the buffers are invalidated and on-disk pointers are
564 	 * cleared, free the blocks.
565 	 */
566 	for (blkp = allociblk; blkp < allocblk; blkp++) {
567 #ifdef INVARIANTS
568 		if (blkp == allociblk)
569 			lbns_remfree = lbns;
570 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
571 		    GB_NOCREAT | GB_UNMAPPED);
572 		if (bp != NULL) {
573 			panic("zombie1 %jd %ju %ju",
574 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
575 			    (uintmax_t)fsbtodb(fs, *blkp));
576 		}
577 		lbns_remfree++;
578 #endif
579 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
580 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
581 	}
582 done:
583 	vn_seqc_write_end(vp);
584 	return (error);
585 }
586 
587 /*
588  * Balloc defines the structure of file system storage
589  * by allocating the physical blocks on a device given
590  * the inode and the logical block number in a file.
591  * This is the allocation strategy for UFS2. Above is
592  * the allocation strategy for UFS1.
593  */
594 int
595 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
596     struct ucred *cred, int flags, struct buf **bpp)
597 {
598 	struct inode *ip;
599 	struct ufs2_dinode *dp;
600 	ufs_lbn_t lbn, lastlbn;
601 	struct fs *fs;
602 	struct buf *bp, *nbp;
603 	struct mount *mp;
604 	struct ufsmount *ump;
605 	struct indir indirs[UFS_NIADDR + 2];
606 	ufs2_daddr_t nb, newb, *bap, pref;
607 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
608 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
609 	int deallocated, osize, nsize, num, i, error;
610 	int unwindidx = -1;
611 	int saved_inbdflush;
612 	int gbflags, gbwflag, reclaimed;
613 
614 	ip = VTOI(vp);
615 	dp = ip->i_din2;
616 	fs = ITOFS(ip);
617 	mp = ITOVFS(ip);
618 	ump = ITOUMP(ip);
619 	lbn = lblkno(fs, startoffset);
620 	size = blkoff(fs, startoffset) + size;
621 	reclaimed = 0;
622 	if (size > fs->fs_bsize)
623 		panic("ffs_balloc_ufs2: blk too big");
624 	*bpp = NULL;
625 	if (lbn < 0)
626 		return (EFBIG);
627 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
628 #ifdef WITNESS
629 	gbwflag = IS_SNAPSHOT(ip) ? GB_NOWITNESS : 0;
630 	gbflags |= gbwflag;
631 #else
632 	gbwflag = 0;
633 #endif
634 
635 	vn_seqc_write_begin(vp);
636 
637 	/*
638 	 * Check for allocating external data.
639 	 */
640 	if (flags & IO_EXT) {
641 		if (lbn >= UFS_NXADDR) {
642 			error = EFBIG;
643 			goto done;
644 		}
645 
646 		/*
647 		 * If the next write will extend the data into a new block,
648 		 * and the data is currently composed of a fragment
649 		 * this fragment has to be extended to be a full block.
650 		 */
651 		lastlbn = lblkno(fs, dp->di_extsize);
652 		if (lastlbn < lbn) {
653 			nb = lastlbn;
654 			osize = sblksize(fs, dp->di_extsize, nb);
655 			if (osize < fs->fs_bsize && osize > 0) {
656 				UFS_LOCK(ump);
657 				error = ffs_realloccg(ip, -1 - nb,
658 				    dp->di_extb[nb],
659 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
660 				    &dp->di_extb[0]), osize,
661 				    (int)fs->fs_bsize, flags, cred, &bp);
662 				if (error)
663 					goto done;
664 				if (DOINGSOFTDEP(vp))
665 					softdep_setup_allocext(ip, nb,
666 					    dbtofsb(fs, bp->b_blkno),
667 					    dp->di_extb[nb],
668 					    fs->fs_bsize, osize, bp);
669 				dp->di_extsize = smalllblktosize(fs, nb + 1);
670 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
671 				bp->b_xflags |= BX_ALTDATA;
672 				UFS_INODE_SET_FLAG(ip,
673 				    IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA);
674 				if (flags & IO_SYNC)
675 					bwrite(bp);
676 				else
677 					bawrite(bp);
678 			}
679 		}
680 		/*
681 		 * All blocks are direct blocks
682 		 */
683 		if (flags & BA_METAONLY)
684 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
685 		nb = dp->di_extb[lbn];
686 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
687 			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
688 			    gbflags, &bp);
689 			if (error)
690 				goto done;
691 			bp->b_blkno = fsbtodb(fs, nb);
692 			bp->b_xflags |= BX_ALTDATA;
693 			*bpp = bp;
694 			goto done;
695 		}
696 		if (nb != 0) {
697 			/*
698 			 * Consider need to reallocate a fragment.
699 			 */
700 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
701 			nsize = fragroundup(fs, size);
702 			if (nsize <= osize) {
703 				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
704 				    gbflags, &bp);
705 				if (error)
706 					goto done;
707 				bp->b_blkno = fsbtodb(fs, nb);
708 				bp->b_xflags |= BX_ALTDATA;
709 			} else {
710 				UFS_LOCK(ump);
711 				error = ffs_realloccg(ip, -1 - lbn,
712 				    dp->di_extb[lbn],
713 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
714 				    &dp->di_extb[0]), osize, nsize, flags,
715 				    cred, &bp);
716 				if (error)
717 					goto done;
718 				bp->b_xflags |= BX_ALTDATA;
719 				if (DOINGSOFTDEP(vp))
720 					softdep_setup_allocext(ip, lbn,
721 					    dbtofsb(fs, bp->b_blkno), nb,
722 					    nsize, osize, bp);
723 			}
724 		} else {
725 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
726 				nsize = fragroundup(fs, size);
727 			else
728 				nsize = fs->fs_bsize;
729 			UFS_LOCK(ump);
730 			error = ffs_alloc(ip, lbn,
731 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
732 			   nsize, flags, cred, &newb);
733 			if (error)
734 				goto done;
735 			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
736 			bp->b_blkno = fsbtodb(fs, newb);
737 			bp->b_xflags |= BX_ALTDATA;
738 			if (flags & BA_CLRBUF)
739 				vfs_bio_clrbuf(bp);
740 			if (DOINGSOFTDEP(vp))
741 				softdep_setup_allocext(ip, lbn, newb, 0,
742 				    nsize, 0, bp);
743 		}
744 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
745 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_IBLKDATA);
746 		*bpp = bp;
747 		error = 0;
748 		goto done;
749 	}
750 	/*
751 	 * If the next write will extend the file into a new block,
752 	 * and the file is currently composed of a fragment
753 	 * this fragment has to be extended to be a full block.
754 	 */
755 	lastlbn = lblkno(fs, ip->i_size);
756 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
757 		nb = lastlbn;
758 		osize = blksize(fs, ip, nb);
759 		if (osize < fs->fs_bsize && osize > 0) {
760 			UFS_LOCK(ump);
761 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
762 			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
763 			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
764 			    flags, cred, &bp);
765 			if (error)
766 				goto done;
767 			if (DOINGSOFTDEP(vp))
768 				softdep_setup_allocdirect(ip, nb,
769 				    dbtofsb(fs, bp->b_blkno),
770 				    dp->di_db[nb],
771 				    fs->fs_bsize, osize, bp);
772 			ip->i_size = smalllblktosize(fs, nb + 1);
773 			dp->di_size = ip->i_size;
774 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
775 			UFS_INODE_SET_FLAG(ip,
776 			    IN_SIZEMOD |IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
777 			if (flags & IO_SYNC)
778 				bwrite(bp);
779 			else
780 				bawrite(bp);
781 		}
782 	}
783 	/*
784 	 * The first UFS_NDADDR blocks are direct blocks
785 	 */
786 	if (lbn < UFS_NDADDR) {
787 		if (flags & BA_METAONLY)
788 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
789 		nb = dp->di_db[lbn];
790 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
791 			if ((flags & BA_CLRBUF) != 0) {
792 				error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
793 				    gbflags, &bp);
794 				if (error != 0)
795 					goto done;
796 			} else {
797 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
798 				    gbflags);
799 				if (bp == NULL) {
800 					error = EIO;
801 					goto done;
802 				}
803 				vfs_bio_clrbuf(bp);
804 			}
805 			bp->b_blkno = fsbtodb(fs, nb);
806 			*bpp = bp;
807 			error = 0;
808 			goto done;
809 		}
810 		if (nb != 0) {
811 			/*
812 			 * Consider need to reallocate a fragment.
813 			 */
814 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
815 			nsize = fragroundup(fs, size);
816 			if (nsize <= osize) {
817 				error = bread_gb(vp, lbn, osize, NOCRED,
818 				    gbflags, &bp);
819 				if (error)
820 					goto done;
821 				bp->b_blkno = fsbtodb(fs, nb);
822 			} else {
823 				UFS_LOCK(ump);
824 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
825 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
826 				    &dp->di_db[0]), osize, nsize, flags,
827 				    cred, &bp);
828 				if (error)
829 					goto done;
830 				if (DOINGSOFTDEP(vp))
831 					softdep_setup_allocdirect(ip, lbn,
832 					    dbtofsb(fs, bp->b_blkno), nb,
833 					    nsize, osize, bp);
834 			}
835 		} else {
836 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
837 				nsize = fragroundup(fs, size);
838 			else
839 				nsize = fs->fs_bsize;
840 			UFS_LOCK(ump);
841 			error = ffs_alloc(ip, lbn,
842 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
843 				&dp->di_db[0]), nsize, flags, cred, &newb);
844 			if (error)
845 				goto done;
846 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
847 			bp->b_blkno = fsbtodb(fs, newb);
848 			if (flags & BA_CLRBUF)
849 				vfs_bio_clrbuf(bp);
850 			if (DOINGSOFTDEP(vp))
851 				softdep_setup_allocdirect(ip, lbn, newb, 0,
852 				    nsize, 0, bp);
853 		}
854 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
855 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
856 		*bpp = bp;
857 		error = 0;
858 		goto done;
859 	}
860 	/*
861 	 * Determine the number of levels of indirection.
862 	 */
863 	pref = 0;
864 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
865 		goto done;
866 #ifdef INVARIANTS
867 	if (num < 1)
868 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
869 #endif
870 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
871 	/*
872 	 * Fetch the first indirect block allocating if necessary.
873 	 */
874 	--num;
875 	nb = dp->di_ib[indirs[0].in_off];
876 	allocib = NULL;
877 	allocblk = allociblk;
878 	lbns_remfree = lbns;
879 	if (nb == 0) {
880 		UFS_LOCK(ump);
881 		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
882 		    (ufs2_daddr_t *)0);
883 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
884 		    flags, cred, &newb)) != 0) {
885 			curthread_pflags_restore(saved_inbdflush);
886 			goto done;
887 		}
888 		pref = newb + fs->fs_frag;
889 		nb = newb;
890 		MPASS(allocblk < allociblk + nitems(allociblk));
891 		MPASS(lbns_remfree < lbns + nitems(lbns));
892 		*allocblk++ = nb;
893 		*lbns_remfree++ = indirs[1].in_lbn;
894 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
895 		    GB_UNMAPPED | gbwflag);
896 		bp->b_blkno = fsbtodb(fs, nb);
897 		vfs_bio_clrbuf(bp);
898 		if (DOINGSOFTDEP(vp)) {
899 			softdep_setup_allocdirect(ip,
900 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
901 			    fs->fs_bsize, 0, bp);
902 			bdwrite(bp);
903 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
904 			if (bp->b_bufsize == fs->fs_bsize)
905 				bp->b_flags |= B_CLUSTEROK;
906 			bdwrite(bp);
907 		} else {
908 			if ((error = bwrite(bp)) != 0)
909 				goto fail;
910 		}
911 		allocib = &dp->di_ib[indirs[0].in_off];
912 		*allocib = nb;
913 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
914 	}
915 	/*
916 	 * Fetch through the indirect blocks, allocating as necessary.
917 	 */
918 retry:
919 	for (i = 1;;) {
920 		error = bread_gb(vp, indirs[i].in_lbn, (int)fs->fs_bsize,
921 		    NOCRED, gbwflag, &bp);
922 		if (error) {
923 			goto fail;
924 		}
925 		bap = (ufs2_daddr_t *)bp->b_data;
926 		nb = bap[indirs[i].in_off];
927 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
928 		    fs->fs_bsize)) != 0) {
929 			brelse(bp);
930 			goto fail;
931 		}
932 		if (i == num)
933 			break;
934 		i += 1;
935 		if (nb != 0) {
936 			bqrelse(bp);
937 			continue;
938 		}
939 		UFS_LOCK(ump);
940 		/*
941 		 * If parent indirect has just been allocated, try to cluster
942 		 * immediately following it.
943 		 */
944 		if (pref == 0)
945 			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
946 			    (ufs2_daddr_t *)0);
947 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
948 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
949 			brelse(bp);
950 			UFS_LOCK(ump);
951 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
952 				softdep_request_cleanup(fs, vp, cred,
953 				    FLUSH_BLOCKS_WAIT);
954 				UFS_UNLOCK(ump);
955 				goto retry;
956 			}
957 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
958 			    ppsratecheck(&ump->um_last_fullmsg,
959 			    &ump->um_secs_fullmsg, 1)) {
960 				UFS_UNLOCK(ump);
961 				ffs_fserr(fs, ip->i_number, "filesystem full");
962 				uprintf("\n%s: write failed, filesystem "
963 				    "is full\n", fs->fs_fsmnt);
964 			} else {
965 				UFS_UNLOCK(ump);
966 			}
967 			goto fail;
968 		}
969 		pref = newb + fs->fs_frag;
970 		nb = newb;
971 		MPASS(allocblk < allociblk + nitems(allociblk));
972 		MPASS(lbns_remfree < lbns + nitems(lbns));
973 		*allocblk++ = nb;
974 		*lbns_remfree++ = indirs[i].in_lbn;
975 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
976 		    GB_UNMAPPED);
977 		nbp->b_blkno = fsbtodb(fs, nb);
978 		vfs_bio_clrbuf(nbp);
979 		if (DOINGSOFTDEP(vp)) {
980 			softdep_setup_allocindir_meta(nbp, ip, bp,
981 			    indirs[i - 1].in_off, nb);
982 			bdwrite(nbp);
983 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
984 			if (nbp->b_bufsize == fs->fs_bsize)
985 				nbp->b_flags |= B_CLUSTEROK;
986 			bdwrite(nbp);
987 		} else {
988 			if ((error = bwrite(nbp)) != 0) {
989 				brelse(bp);
990 				goto fail;
991 			}
992 		}
993 		bap[indirs[i - 1].in_off] = nb;
994 		if (allocib == NULL && unwindidx < 0)
995 			unwindidx = i - 1;
996 		/*
997 		 * If required, write synchronously, otherwise use
998 		 * delayed write.
999 		 */
1000 		if (flags & IO_SYNC) {
1001 			bwrite(bp);
1002 		} else {
1003 			if (bp->b_bufsize == fs->fs_bsize)
1004 				bp->b_flags |= B_CLUSTEROK;
1005 			bdwrite(bp);
1006 		}
1007 	}
1008 	/*
1009 	 * If asked only for the indirect block, then return it.
1010 	 */
1011 	if (flags & BA_METAONLY) {
1012 		curthread_pflags_restore(saved_inbdflush);
1013 		*bpp = bp;
1014 		error = 0;
1015 		goto done;
1016 	}
1017 	/*
1018 	 * Get the data block, allocating if necessary.
1019 	 */
1020 	if (nb == 0) {
1021 		UFS_LOCK(ump);
1022 		/*
1023 		 * If allocating metadata at the front of the cylinder
1024 		 * group and parent indirect block has just been allocated,
1025 		 * then cluster next to it if it is the first indirect in
1026 		 * the file. Otherwise it has been allocated in the metadata
1027 		 * area, so we want to find our own place out in the data area.
1028 		 */
1029 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
1030 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1031 			    &bap[0]);
1032 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1033 		    flags | IO_BUFLOCKED, cred, &newb);
1034 		if (error) {
1035 			brelse(bp);
1036 			UFS_LOCK(ump);
1037 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1038 				softdep_request_cleanup(fs, vp, cred,
1039 				    FLUSH_BLOCKS_WAIT);
1040 				UFS_UNLOCK(ump);
1041 				goto retry;
1042 			}
1043 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
1044 			    ppsratecheck(&ump->um_last_fullmsg,
1045 			    &ump->um_secs_fullmsg, 1)) {
1046 				UFS_UNLOCK(ump);
1047 				ffs_fserr(fs, ip->i_number, "filesystem full");
1048 				uprintf("\n%s: write failed, filesystem "
1049 				    "is full\n", fs->fs_fsmnt);
1050 			} else {
1051 				UFS_UNLOCK(ump);
1052 			}
1053 			goto fail;
1054 		}
1055 		nb = newb;
1056 		MPASS(allocblk < allociblk + nitems(allociblk));
1057 		MPASS(lbns_remfree < lbns + nitems(lbns));
1058 		*allocblk++ = nb;
1059 		*lbns_remfree++ = lbn;
1060 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1061 		nbp->b_blkno = fsbtodb(fs, nb);
1062 		if (flags & BA_CLRBUF)
1063 			vfs_bio_clrbuf(nbp);
1064 		if (DOINGSOFTDEP(vp))
1065 			softdep_setup_allocindir_page(ip, lbn, bp,
1066 			    indirs[i].in_off, nb, 0, nbp);
1067 		bap[indirs[i].in_off] = nb;
1068 		/*
1069 		 * If required, write synchronously, otherwise use
1070 		 * delayed write.
1071 		 */
1072 		if (flags & IO_SYNC) {
1073 			bwrite(bp);
1074 		} else {
1075 			if (bp->b_bufsize == fs->fs_bsize)
1076 				bp->b_flags |= B_CLUSTEROK;
1077 			bdwrite(bp);
1078 		}
1079 		curthread_pflags_restore(saved_inbdflush);
1080 		*bpp = nbp;
1081 		error = 0;
1082 		goto done;
1083 	}
1084 	brelse(bp);
1085 	/*
1086 	 * If requested clear invalid portions of the buffer.  If we
1087 	 * have to do a read-before-write (typical if BA_CLRBUF is set),
1088 	 * try to do some read-ahead in the sequential case to reduce
1089 	 * the number of I/O transactions.
1090 	 */
1091 	if (flags & BA_CLRBUF) {
1092 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1093 		if (seqcount != 0 &&
1094 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1095 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
1096 			error = cluster_read(vp, ip->i_size, lbn,
1097 			    (int)fs->fs_bsize, NOCRED,
1098 			    MAXBSIZE, seqcount, gbflags, &nbp);
1099 		} else {
1100 			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1101 			    NOCRED, gbflags, &nbp);
1102 		}
1103 		if (error) {
1104 			brelse(nbp);
1105 			goto fail;
1106 		}
1107 	} else {
1108 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1109 		nbp->b_blkno = fsbtodb(fs, nb);
1110 	}
1111 	curthread_pflags_restore(saved_inbdflush);
1112 	*bpp = nbp;
1113 	error = 0;
1114 	goto done;
1115 fail:
1116 	curthread_pflags_restore(saved_inbdflush);
1117 	/*
1118 	 * If we have failed to allocate any blocks, simply return the error.
1119 	 * This is the usual case and avoids the need to fsync the file.
1120 	 */
1121 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1122 		goto done;
1123 	/*
1124 	 * If we have failed part way through block allocation, we
1125 	 * have to deallocate any indirect blocks that we have allocated.
1126 	 * We have to fsync the file before we start to get rid of all
1127 	 * of its dependencies so that we do not leave them dangling.
1128 	 * We have to sync it at the end so that the soft updates code
1129 	 * does not find any untracked changes. Although this is really
1130 	 * slow, running out of disk space is not expected to be a common
1131 	 * occurrence. The error return from fsync is ignored as we already
1132 	 * have an error to return to the user.
1133 	 *
1134 	 * XXX Still have to journal the free below
1135 	 */
1136 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1137 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1138 	     blkp < allocblk; blkp++, lbns_remfree++) {
1139 		/*
1140 		 * We shall not leave the freed blocks on the vnode
1141 		 * buffer object lists.
1142 		 */
1143 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1144 		    GB_NOCREAT | GB_UNMAPPED | gbwflag);
1145 		if (bp != NULL) {
1146 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1147 			    ("mismatch2 l %jd %jd b %ju %ju",
1148 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1149 			    (uintmax_t)bp->b_blkno,
1150 			    (uintmax_t)fsbtodb(fs, *blkp)));
1151 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1152 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
1153 			brelse(bp);
1154 		}
1155 		deallocated += fs->fs_bsize;
1156 	}
1157 	if (allocib != NULL) {
1158 		*allocib = 0;
1159 	} else if (unwindidx >= 0) {
1160 		int r;
1161 
1162 		r = bread_gb(vp, indirs[unwindidx].in_lbn,
1163 		    (int)fs->fs_bsize, NOCRED, gbwflag, &bp);
1164 		if (r) {
1165 			panic("Could not unwind indirect block, error %d", r);
1166 			brelse(bp);
1167 		} else {
1168 			bap = (ufs2_daddr_t *)bp->b_data;
1169 			bap[indirs[unwindidx].in_off] = 0;
1170 			if (flags & IO_SYNC) {
1171 				bwrite(bp);
1172 			} else {
1173 				if (bp->b_bufsize == fs->fs_bsize)
1174 					bp->b_flags |= B_CLUSTEROK;
1175 				bdwrite(bp);
1176 			}
1177 		}
1178 	}
1179 	if (deallocated) {
1180 #ifdef QUOTA
1181 		/*
1182 		 * Restore user's disk quota because allocation failed.
1183 		 */
1184 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1185 #endif
1186 		dp->di_blocks -= btodb(deallocated);
1187 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1188 	}
1189 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1190 	/*
1191 	 * After the buffers are invalidated and on-disk pointers are
1192 	 * cleared, free the blocks.
1193 	 */
1194 	for (blkp = allociblk; blkp < allocblk; blkp++) {
1195 #ifdef INVARIANTS
1196 		if (blkp == allociblk)
1197 			lbns_remfree = lbns;
1198 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1199 		    GB_NOCREAT | GB_UNMAPPED | gbwflag);
1200 		if (bp != NULL) {
1201 			panic("zombie2 %jd %ju %ju",
1202 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1203 			    (uintmax_t)fsbtodb(fs, *blkp));
1204 		}
1205 		lbns_remfree++;
1206 #endif
1207 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1208 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
1209 	}
1210 done:
1211 	vn_seqc_write_end(vp);
1212 	return (error);
1213 }
1214