xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision f2d48b5e2c3b45850585e4d7aee324fe148afbf2)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
62  */
63 
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/bio.h>
70 #include <sys/buf.h>
71 #include <sys/lock.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
75 
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
81 
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
84 
85 /*
86  * Balloc defines the structure of filesystem storage
87  * by allocating the physical blocks on a device given
88  * the inode and the logical block number in a file.
89  * This is the allocation strategy for UFS1. Below is
90  * the allocation strategy for UFS2.
91  */
92 int
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94     struct ucred *cred, int flags, struct buf **bpp)
95 {
96 	struct inode *ip;
97 	struct ufs1_dinode *dp;
98 	ufs_lbn_t lbn, lastlbn;
99 	struct fs *fs;
100 	ufs1_daddr_t nb;
101 	struct buf *bp, *nbp;
102 	struct mount *mp;
103 	struct ufsmount *ump;
104 	struct indir indirs[UFS_NIADDR + 2];
105 	int deallocated, osize, nsize, num, i, error;
106 	ufs2_daddr_t newb;
107 	ufs1_daddr_t *bap, pref;
108 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
109 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
110 	int unwindidx = -1;
111 	int saved_inbdflush;
112 	int gbflags, reclaimed;
113 
114 	ip = VTOI(vp);
115 	dp = ip->i_din1;
116 	fs = ITOFS(ip);
117 	mp = ITOVFS(ip);
118 	ump = ITOUMP(ip);
119 	lbn = lblkno(fs, startoffset);
120 	size = blkoff(fs, startoffset) + size;
121 	reclaimed = 0;
122 	if (size > fs->fs_bsize)
123 		panic("ffs_balloc_ufs1: blk too big");
124 	*bpp = NULL;
125 	if (flags & IO_EXT)
126 		return (EOPNOTSUPP);
127 	if (lbn < 0)
128 		return (EFBIG);
129 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
130 
131 	if (DOINGSOFTDEP(vp))
132 		softdep_prealloc(vp, MNT_WAIT);
133 	/*
134 	 * If the next write will extend the file into a new block,
135 	 * and the file is currently composed of a fragment
136 	 * this fragment has to be extended to be a full block.
137 	 */
138 	lastlbn = lblkno(fs, ip->i_size);
139 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
140 		nb = lastlbn;
141 		osize = blksize(fs, ip, nb);
142 		if (osize < fs->fs_bsize && osize > 0) {
143 			UFS_LOCK(ump);
144 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
145 			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
146 			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
147 			   cred, &bp);
148 			if (error)
149 				return (error);
150 			if (DOINGSOFTDEP(vp))
151 				softdep_setup_allocdirect(ip, nb,
152 				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
153 				    fs->fs_bsize, osize, bp);
154 			ip->i_size = smalllblktosize(fs, nb + 1);
155 			dp->di_size = ip->i_size;
156 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
157 			UFS_INODE_SET_FLAG(ip,
158 			    IN_SIZEMOD | IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
159 			if (flags & IO_SYNC)
160 				bwrite(bp);
161 			else if (DOINGASYNC(vp))
162 				bdwrite(bp);
163 			else
164 				bawrite(bp);
165 		}
166 	}
167 	/*
168 	 * The first UFS_NDADDR blocks are direct blocks
169 	 */
170 	if (lbn < UFS_NDADDR) {
171 		if (flags & BA_METAONLY)
172 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
173 		nb = dp->di_db[lbn];
174 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
175 			if ((flags & BA_CLRBUF) != 0) {
176 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
177 				    &bp);
178 				if (error != 0)
179 					return (error);
180 			} else {
181 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
182 				    gbflags);
183 				if (bp == NULL)
184 					return (EIO);
185 				vfs_bio_clrbuf(bp);
186 			}
187 			bp->b_blkno = fsbtodb(fs, nb);
188 			*bpp = bp;
189 			return (0);
190 		}
191 		if (nb != 0) {
192 			/*
193 			 * Consider need to reallocate a fragment.
194 			 */
195 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
196 			nsize = fragroundup(fs, size);
197 			if (nsize <= osize) {
198 				error = bread(vp, lbn, osize, NOCRED, &bp);
199 				if (error) {
200 					return (error);
201 				}
202 				bp->b_blkno = fsbtodb(fs, nb);
203 			} else {
204 				UFS_LOCK(ump);
205 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
206 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
207 				    &dp->di_db[0]), osize, nsize, flags,
208 				    cred, &bp);
209 				if (error)
210 					return (error);
211 				if (DOINGSOFTDEP(vp))
212 					softdep_setup_allocdirect(ip, lbn,
213 					    dbtofsb(fs, bp->b_blkno), nb,
214 					    nsize, osize, bp);
215 			}
216 		} else {
217 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
218 				nsize = fragroundup(fs, size);
219 			else
220 				nsize = fs->fs_bsize;
221 			UFS_LOCK(ump);
222 			error = ffs_alloc(ip, lbn,
223 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
224 			    nsize, flags, cred, &newb);
225 			if (error)
226 				return (error);
227 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
228 			bp->b_blkno = fsbtodb(fs, newb);
229 			if (flags & BA_CLRBUF)
230 				vfs_bio_clrbuf(bp);
231 			if (DOINGSOFTDEP(vp))
232 				softdep_setup_allocdirect(ip, lbn, newb, 0,
233 				    nsize, 0, bp);
234 		}
235 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
236 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
237 		*bpp = bp;
238 		return (0);
239 	}
240 	/*
241 	 * Determine the number of levels of indirection.
242 	 */
243 	pref = 0;
244 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
245 		return(error);
246 #ifdef INVARIANTS
247 	if (num < 1)
248 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
249 #endif
250 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
251 	/*
252 	 * Fetch the first indirect block allocating if necessary.
253 	 */
254 	--num;
255 	nb = dp->di_ib[indirs[0].in_off];
256 	allocib = NULL;
257 	allocblk = allociblk;
258 	lbns_remfree = lbns;
259 	if (nb == 0) {
260 		UFS_LOCK(ump);
261 		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
262 		    (ufs1_daddr_t *)0);
263 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
264 		    flags, cred, &newb)) != 0) {
265 			curthread_pflags_restore(saved_inbdflush);
266 			return (error);
267 		}
268 		pref = newb + fs->fs_frag;
269 		nb = newb;
270 		MPASS(allocblk < allociblk + nitems(allociblk));
271 		MPASS(lbns_remfree < lbns + nitems(lbns));
272 		*allocblk++ = nb;
273 		*lbns_remfree++ = indirs[1].in_lbn;
274 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
275 		bp->b_blkno = fsbtodb(fs, nb);
276 		vfs_bio_clrbuf(bp);
277 		if (DOINGSOFTDEP(vp)) {
278 			softdep_setup_allocdirect(ip,
279 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
280 			    fs->fs_bsize, 0, bp);
281 			bdwrite(bp);
282 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
283 			if (bp->b_bufsize == fs->fs_bsize)
284 				bp->b_flags |= B_CLUSTEROK;
285 			bdwrite(bp);
286 		} else {
287 			if ((error = bwrite(bp)) != 0)
288 				goto fail;
289 		}
290 		allocib = &dp->di_ib[indirs[0].in_off];
291 		*allocib = nb;
292 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
293 	}
294 	/*
295 	 * Fetch through the indirect blocks, allocating as necessary.
296 	 */
297 retry:
298 	for (i = 1;;) {
299 		error = bread(vp,
300 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
301 		if (error) {
302 			goto fail;
303 		}
304 		bap = (ufs1_daddr_t *)bp->b_data;
305 		nb = bap[indirs[i].in_off];
306 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
307 		    fs->fs_bsize)) != 0) {
308 			brelse(bp);
309 			goto fail;
310 		}
311 		if (i == num)
312 			break;
313 		i += 1;
314 		if (nb != 0) {
315 			bqrelse(bp);
316 			continue;
317 		}
318 		UFS_LOCK(ump);
319 		/*
320 		 * If parent indirect has just been allocated, try to cluster
321 		 * immediately following it.
322 		 */
323 		if (pref == 0)
324 			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
325 			    (ufs1_daddr_t *)0);
326 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
327 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
328 			brelse(bp);
329 			UFS_LOCK(ump);
330 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
331 				softdep_request_cleanup(fs, vp, cred,
332 				    FLUSH_BLOCKS_WAIT);
333 				UFS_UNLOCK(ump);
334 				goto retry;
335 			}
336 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
337 			    ppsratecheck(&ump->um_last_fullmsg,
338 			    &ump->um_secs_fullmsg, 1)) {
339 				UFS_UNLOCK(ump);
340 				ffs_fserr(fs, ip->i_number, "filesystem full");
341 				uprintf("\n%s: write failed, filesystem "
342 				    "is full\n", fs->fs_fsmnt);
343 			} else {
344 				UFS_UNLOCK(ump);
345 			}
346 			goto fail;
347 		}
348 		pref = newb + fs->fs_frag;
349 		nb = newb;
350 		MPASS(allocblk < allociblk + nitems(allociblk));
351 		MPASS(lbns_remfree < lbns + nitems(lbns));
352 		*allocblk++ = nb;
353 		*lbns_remfree++ = indirs[i].in_lbn;
354 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
355 		nbp->b_blkno = fsbtodb(fs, nb);
356 		vfs_bio_clrbuf(nbp);
357 		if (DOINGSOFTDEP(vp)) {
358 			softdep_setup_allocindir_meta(nbp, ip, bp,
359 			    indirs[i - 1].in_off, nb);
360 			bdwrite(nbp);
361 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
362 			if (nbp->b_bufsize == fs->fs_bsize)
363 				nbp->b_flags |= B_CLUSTEROK;
364 			bdwrite(nbp);
365 		} else {
366 			if ((error = bwrite(nbp)) != 0) {
367 				brelse(bp);
368 				goto fail;
369 			}
370 		}
371 		bap[indirs[i - 1].in_off] = nb;
372 		if (allocib == NULL && unwindidx < 0)
373 			unwindidx = i - 1;
374 		/*
375 		 * If required, write synchronously, otherwise use
376 		 * delayed write.
377 		 */
378 		if (flags & IO_SYNC) {
379 			bwrite(bp);
380 		} else {
381 			if (bp->b_bufsize == fs->fs_bsize)
382 				bp->b_flags |= B_CLUSTEROK;
383 			bdwrite(bp);
384 		}
385 	}
386 	/*
387 	 * If asked only for the indirect block, then return it.
388 	 */
389 	if (flags & BA_METAONLY) {
390 		curthread_pflags_restore(saved_inbdflush);
391 		*bpp = bp;
392 		return (0);
393 	}
394 	/*
395 	 * Get the data block, allocating if necessary.
396 	 */
397 	if (nb == 0) {
398 		UFS_LOCK(ump);
399 		/*
400 		 * If allocating metadata at the front of the cylinder
401 		 * group and parent indirect block has just been allocated,
402 		 * then cluster next to it if it is the first indirect in
403 		 * the file. Otherwise it has been allocated in the metadata
404 		 * area, so we want to find our own place out in the data area.
405 		 */
406 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
407 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
408 			    &bap[0]);
409 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
410 		    flags | IO_BUFLOCKED, cred, &newb);
411 		if (error) {
412 			brelse(bp);
413 			UFS_LOCK(ump);
414 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
415 				softdep_request_cleanup(fs, vp, cred,
416 				    FLUSH_BLOCKS_WAIT);
417 				UFS_UNLOCK(ump);
418 				goto retry;
419 			}
420 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
421 			    ppsratecheck(&ump->um_last_fullmsg,
422 			    &ump->um_secs_fullmsg, 1)) {
423 				UFS_UNLOCK(ump);
424 				ffs_fserr(fs, ip->i_number, "filesystem full");
425 				uprintf("\n%s: write failed, filesystem "
426 				    "is full\n", fs->fs_fsmnt);
427 			} else {
428 				UFS_UNLOCK(ump);
429 			}
430 			goto fail;
431 		}
432 		nb = newb;
433 		MPASS(allocblk < allociblk + nitems(allociblk));
434 		MPASS(lbns_remfree < lbns + nitems(lbns));
435 		*allocblk++ = nb;
436 		*lbns_remfree++ = lbn;
437 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
438 		nbp->b_blkno = fsbtodb(fs, nb);
439 		if (flags & BA_CLRBUF)
440 			vfs_bio_clrbuf(nbp);
441 		if (DOINGSOFTDEP(vp))
442 			softdep_setup_allocindir_page(ip, lbn, bp,
443 			    indirs[i].in_off, nb, 0, nbp);
444 		bap[indirs[i].in_off] = nb;
445 		/*
446 		 * If required, write synchronously, otherwise use
447 		 * delayed write.
448 		 */
449 		if (flags & IO_SYNC) {
450 			bwrite(bp);
451 		} else {
452 			if (bp->b_bufsize == fs->fs_bsize)
453 				bp->b_flags |= B_CLUSTEROK;
454 			bdwrite(bp);
455 		}
456 		curthread_pflags_restore(saved_inbdflush);
457 		*bpp = nbp;
458 		return (0);
459 	}
460 	brelse(bp);
461 	if (flags & BA_CLRBUF) {
462 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
463 		if (seqcount != 0 &&
464 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
465 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
466 			error = cluster_read(vp, ip->i_size, lbn,
467 			    (int)fs->fs_bsize, NOCRED,
468 			    MAXBSIZE, seqcount, gbflags, &nbp);
469 		} else {
470 			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
471 			    gbflags, &nbp);
472 		}
473 		if (error) {
474 			brelse(nbp);
475 			goto fail;
476 		}
477 	} else {
478 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
479 		nbp->b_blkno = fsbtodb(fs, nb);
480 	}
481 	curthread_pflags_restore(saved_inbdflush);
482 	*bpp = nbp;
483 	return (0);
484 fail:
485 	curthread_pflags_restore(saved_inbdflush);
486 	/*
487 	 * If we have failed to allocate any blocks, simply return the error.
488 	 * This is the usual case and avoids the need to fsync the file.
489 	 */
490 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
491 		return (error);
492 	/*
493 	 * If we have failed part way through block allocation, we
494 	 * have to deallocate any indirect blocks that we have allocated.
495 	 * We have to fsync the file before we start to get rid of all
496 	 * of its dependencies so that we do not leave them dangling.
497 	 * We have to sync it at the end so that the soft updates code
498 	 * does not find any untracked changes. Although this is really
499 	 * slow, running out of disk space is not expected to be a common
500 	 * occurrence. The error return from fsync is ignored as we already
501 	 * have an error to return to the user.
502 	 *
503 	 * XXX Still have to journal the free below
504 	 */
505 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
506 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
507 	     blkp < allocblk; blkp++, lbns_remfree++) {
508 		/*
509 		 * We shall not leave the freed blocks on the vnode
510 		 * buffer object lists.
511 		 */
512 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
513 		    GB_NOCREAT | GB_UNMAPPED);
514 		if (bp != NULL) {
515 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
516 			    ("mismatch1 l %jd %jd b %ju %ju",
517 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
518 			    (uintmax_t)bp->b_blkno,
519 			    (uintmax_t)fsbtodb(fs, *blkp)));
520 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
521 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
522 			brelse(bp);
523 		}
524 		deallocated += fs->fs_bsize;
525 	}
526 	if (allocib != NULL) {
527 		*allocib = 0;
528 	} else if (unwindidx >= 0) {
529 		int r;
530 
531 		r = bread(vp, indirs[unwindidx].in_lbn,
532 		    (int)fs->fs_bsize, NOCRED, &bp);
533 		if (r) {
534 			panic("Could not unwind indirect block, error %d", r);
535 			brelse(bp);
536 		} else {
537 			bap = (ufs1_daddr_t *)bp->b_data;
538 			bap[indirs[unwindidx].in_off] = 0;
539 			if (flags & IO_SYNC) {
540 				bwrite(bp);
541 			} else {
542 				if (bp->b_bufsize == fs->fs_bsize)
543 					bp->b_flags |= B_CLUSTEROK;
544 				bdwrite(bp);
545 			}
546 		}
547 	}
548 	if (deallocated) {
549 #ifdef QUOTA
550 		/*
551 		 * Restore user's disk quota because allocation failed.
552 		 */
553 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
554 #endif
555 		dp->di_blocks -= btodb(deallocated);
556 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
557 	}
558 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
559 	/*
560 	 * After the buffers are invalidated and on-disk pointers are
561 	 * cleared, free the blocks.
562 	 */
563 	for (blkp = allociblk; blkp < allocblk; blkp++) {
564 #ifdef INVARIANTS
565 		if (blkp == allociblk)
566 			lbns_remfree = lbns;
567 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
568 		    GB_NOCREAT | GB_UNMAPPED);
569 		if (bp != NULL) {
570 			panic("zombie1 %jd %ju %ju",
571 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
572 			    (uintmax_t)fsbtodb(fs, *blkp));
573 		}
574 		lbns_remfree++;
575 #endif
576 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
577 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
578 	}
579 	return (error);
580 }
581 
582 /*
583  * Balloc defines the structure of file system storage
584  * by allocating the physical blocks on a device given
585  * the inode and the logical block number in a file.
586  * This is the allocation strategy for UFS2. Above is
587  * the allocation strategy for UFS1.
588  */
589 int
590 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
591     struct ucred *cred, int flags, struct buf **bpp)
592 {
593 	struct inode *ip;
594 	struct ufs2_dinode *dp;
595 	ufs_lbn_t lbn, lastlbn;
596 	struct fs *fs;
597 	struct buf *bp, *nbp;
598 	struct mount *mp;
599 	struct ufsmount *ump;
600 	struct indir indirs[UFS_NIADDR + 2];
601 	ufs2_daddr_t nb, newb, *bap, pref;
602 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
603 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
604 	int deallocated, osize, nsize, num, i, error;
605 	int unwindidx = -1;
606 	int saved_inbdflush;
607 	int gbflags, reclaimed;
608 
609 	ip = VTOI(vp);
610 	dp = ip->i_din2;
611 	fs = ITOFS(ip);
612 	mp = ITOVFS(ip);
613 	ump = ITOUMP(ip);
614 	lbn = lblkno(fs, startoffset);
615 	size = blkoff(fs, startoffset) + size;
616 	reclaimed = 0;
617 	if (size > fs->fs_bsize)
618 		panic("ffs_balloc_ufs2: blk too big");
619 	*bpp = NULL;
620 	if (lbn < 0)
621 		return (EFBIG);
622 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
623 
624 	if (DOINGSOFTDEP(vp))
625 		softdep_prealloc(vp, MNT_WAIT);
626 
627 	/*
628 	 * Check for allocating external data.
629 	 */
630 	if (flags & IO_EXT) {
631 		if (lbn >= UFS_NXADDR)
632 			return (EFBIG);
633 		/*
634 		 * If the next write will extend the data into a new block,
635 		 * and the data is currently composed of a fragment
636 		 * this fragment has to be extended to be a full block.
637 		 */
638 		lastlbn = lblkno(fs, dp->di_extsize);
639 		if (lastlbn < lbn) {
640 			nb = lastlbn;
641 			osize = sblksize(fs, dp->di_extsize, nb);
642 			if (osize < fs->fs_bsize && osize > 0) {
643 				UFS_LOCK(ump);
644 				error = ffs_realloccg(ip, -1 - nb,
645 				    dp->di_extb[nb],
646 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
647 				    &dp->di_extb[0]), osize,
648 				    (int)fs->fs_bsize, flags, cred, &bp);
649 				if (error)
650 					return (error);
651 				if (DOINGSOFTDEP(vp))
652 					softdep_setup_allocext(ip, nb,
653 					    dbtofsb(fs, bp->b_blkno),
654 					    dp->di_extb[nb],
655 					    fs->fs_bsize, osize, bp);
656 				dp->di_extsize = smalllblktosize(fs, nb + 1);
657 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
658 				bp->b_xflags |= BX_ALTDATA;
659 				UFS_INODE_SET_FLAG(ip,
660 				    IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA);
661 				if (flags & IO_SYNC)
662 					bwrite(bp);
663 				else
664 					bawrite(bp);
665 			}
666 		}
667 		/*
668 		 * All blocks are direct blocks
669 		 */
670 		if (flags & BA_METAONLY)
671 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
672 		nb = dp->di_extb[lbn];
673 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
674 			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
675 			    gbflags, &bp);
676 			if (error) {
677 				return (error);
678 			}
679 			bp->b_blkno = fsbtodb(fs, nb);
680 			bp->b_xflags |= BX_ALTDATA;
681 			*bpp = bp;
682 			return (0);
683 		}
684 		if (nb != 0) {
685 			/*
686 			 * Consider need to reallocate a fragment.
687 			 */
688 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
689 			nsize = fragroundup(fs, size);
690 			if (nsize <= osize) {
691 				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
692 				    gbflags, &bp);
693 				if (error) {
694 					return (error);
695 				}
696 				bp->b_blkno = fsbtodb(fs, nb);
697 				bp->b_xflags |= BX_ALTDATA;
698 			} else {
699 				UFS_LOCK(ump);
700 				error = ffs_realloccg(ip, -1 - lbn,
701 				    dp->di_extb[lbn],
702 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
703 				    &dp->di_extb[0]), osize, nsize, flags,
704 				    cred, &bp);
705 				if (error)
706 					return (error);
707 				bp->b_xflags |= BX_ALTDATA;
708 				if (DOINGSOFTDEP(vp))
709 					softdep_setup_allocext(ip, lbn,
710 					    dbtofsb(fs, bp->b_blkno), nb,
711 					    nsize, osize, bp);
712 			}
713 		} else {
714 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
715 				nsize = fragroundup(fs, size);
716 			else
717 				nsize = fs->fs_bsize;
718 			UFS_LOCK(ump);
719 			error = ffs_alloc(ip, lbn,
720 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
721 			   nsize, flags, cred, &newb);
722 			if (error)
723 				return (error);
724 			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
725 			bp->b_blkno = fsbtodb(fs, newb);
726 			bp->b_xflags |= BX_ALTDATA;
727 			if (flags & BA_CLRBUF)
728 				vfs_bio_clrbuf(bp);
729 			if (DOINGSOFTDEP(vp))
730 				softdep_setup_allocext(ip, lbn, newb, 0,
731 				    nsize, 0, bp);
732 		}
733 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
734 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_IBLKDATA);
735 		*bpp = bp;
736 		return (0);
737 	}
738 	/*
739 	 * If the next write will extend the file into a new block,
740 	 * and the file is currently composed of a fragment
741 	 * this fragment has to be extended to be a full block.
742 	 */
743 	lastlbn = lblkno(fs, ip->i_size);
744 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
745 		nb = lastlbn;
746 		osize = blksize(fs, ip, nb);
747 		if (osize < fs->fs_bsize && osize > 0) {
748 			UFS_LOCK(ump);
749 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
750 			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
751 			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
752 			    flags, cred, &bp);
753 			if (error)
754 				return (error);
755 			if (DOINGSOFTDEP(vp))
756 				softdep_setup_allocdirect(ip, nb,
757 				    dbtofsb(fs, bp->b_blkno),
758 				    dp->di_db[nb],
759 				    fs->fs_bsize, osize, bp);
760 			ip->i_size = smalllblktosize(fs, nb + 1);
761 			dp->di_size = ip->i_size;
762 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
763 			UFS_INODE_SET_FLAG(ip,
764 			    IN_SIZEMOD |IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
765 			if (flags & IO_SYNC)
766 				bwrite(bp);
767 			else
768 				bawrite(bp);
769 		}
770 	}
771 	/*
772 	 * The first UFS_NDADDR blocks are direct blocks
773 	 */
774 	if (lbn < UFS_NDADDR) {
775 		if (flags & BA_METAONLY)
776 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
777 		nb = dp->di_db[lbn];
778 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
779 			if ((flags & BA_CLRBUF) != 0) {
780 				error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
781 				    gbflags, &bp);
782 				if (error != 0)
783 					return (error);
784 			} else {
785 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
786 				    gbflags);
787 				if (bp == NULL)
788 					return (EIO);
789 				vfs_bio_clrbuf(bp);
790 			}
791 			bp->b_blkno = fsbtodb(fs, nb);
792 			*bpp = bp;
793 			return (0);
794 		}
795 		if (nb != 0) {
796 			/*
797 			 * Consider need to reallocate a fragment.
798 			 */
799 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
800 			nsize = fragroundup(fs, size);
801 			if (nsize <= osize) {
802 				error = bread_gb(vp, lbn, osize, NOCRED,
803 				    gbflags, &bp);
804 				if (error) {
805 					return (error);
806 				}
807 				bp->b_blkno = fsbtodb(fs, nb);
808 			} else {
809 				UFS_LOCK(ump);
810 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
811 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
812 				    &dp->di_db[0]), osize, nsize, flags,
813 				    cred, &bp);
814 				if (error)
815 					return (error);
816 				if (DOINGSOFTDEP(vp))
817 					softdep_setup_allocdirect(ip, lbn,
818 					    dbtofsb(fs, bp->b_blkno), nb,
819 					    nsize, osize, bp);
820 			}
821 		} else {
822 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
823 				nsize = fragroundup(fs, size);
824 			else
825 				nsize = fs->fs_bsize;
826 			UFS_LOCK(ump);
827 			error = ffs_alloc(ip, lbn,
828 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
829 				&dp->di_db[0]), nsize, flags, cred, &newb);
830 			if (error)
831 				return (error);
832 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
833 			bp->b_blkno = fsbtodb(fs, newb);
834 			if (flags & BA_CLRBUF)
835 				vfs_bio_clrbuf(bp);
836 			if (DOINGSOFTDEP(vp))
837 				softdep_setup_allocdirect(ip, lbn, newb, 0,
838 				    nsize, 0, bp);
839 		}
840 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
841 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
842 		*bpp = bp;
843 		return (0);
844 	}
845 	/*
846 	 * Determine the number of levels of indirection.
847 	 */
848 	pref = 0;
849 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
850 		return(error);
851 #ifdef INVARIANTS
852 	if (num < 1)
853 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
854 #endif
855 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
856 	/*
857 	 * Fetch the first indirect block allocating if necessary.
858 	 */
859 	--num;
860 	nb = dp->di_ib[indirs[0].in_off];
861 	allocib = NULL;
862 	allocblk = allociblk;
863 	lbns_remfree = lbns;
864 	if (nb == 0) {
865 		UFS_LOCK(ump);
866 		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
867 		    (ufs2_daddr_t *)0);
868 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
869 		    flags, cred, &newb)) != 0) {
870 			curthread_pflags_restore(saved_inbdflush);
871 			return (error);
872 		}
873 		pref = newb + fs->fs_frag;
874 		nb = newb;
875 		MPASS(allocblk < allociblk + nitems(allociblk));
876 		MPASS(lbns_remfree < lbns + nitems(lbns));
877 		*allocblk++ = nb;
878 		*lbns_remfree++ = indirs[1].in_lbn;
879 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
880 		    GB_UNMAPPED);
881 		bp->b_blkno = fsbtodb(fs, nb);
882 		vfs_bio_clrbuf(bp);
883 		if (DOINGSOFTDEP(vp)) {
884 			softdep_setup_allocdirect(ip,
885 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
886 			    fs->fs_bsize, 0, bp);
887 			bdwrite(bp);
888 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
889 			if (bp->b_bufsize == fs->fs_bsize)
890 				bp->b_flags |= B_CLUSTEROK;
891 			bdwrite(bp);
892 		} else {
893 			if ((error = bwrite(bp)) != 0)
894 				goto fail;
895 		}
896 		allocib = &dp->di_ib[indirs[0].in_off];
897 		*allocib = nb;
898 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
899 	}
900 	/*
901 	 * Fetch through the indirect blocks, allocating as necessary.
902 	 */
903 retry:
904 	for (i = 1;;) {
905 		error = bread(vp,
906 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
907 		if (error) {
908 			goto fail;
909 		}
910 		bap = (ufs2_daddr_t *)bp->b_data;
911 		nb = bap[indirs[i].in_off];
912 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
913 		    fs->fs_bsize)) != 0) {
914 			brelse(bp);
915 			goto fail;
916 		}
917 		if (i == num)
918 			break;
919 		i += 1;
920 		if (nb != 0) {
921 			bqrelse(bp);
922 			continue;
923 		}
924 		UFS_LOCK(ump);
925 		/*
926 		 * If parent indirect has just been allocated, try to cluster
927 		 * immediately following it.
928 		 */
929 		if (pref == 0)
930 			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
931 			    (ufs2_daddr_t *)0);
932 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
933 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
934 			brelse(bp);
935 			UFS_LOCK(ump);
936 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
937 				softdep_request_cleanup(fs, vp, cred,
938 				    FLUSH_BLOCKS_WAIT);
939 				UFS_UNLOCK(ump);
940 				goto retry;
941 			}
942 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
943 			    ppsratecheck(&ump->um_last_fullmsg,
944 			    &ump->um_secs_fullmsg, 1)) {
945 				UFS_UNLOCK(ump);
946 				ffs_fserr(fs, ip->i_number, "filesystem full");
947 				uprintf("\n%s: write failed, filesystem "
948 				    "is full\n", fs->fs_fsmnt);
949 			} else {
950 				UFS_UNLOCK(ump);
951 			}
952 			goto fail;
953 		}
954 		pref = newb + fs->fs_frag;
955 		nb = newb;
956 		MPASS(allocblk < allociblk + nitems(allociblk));
957 		MPASS(lbns_remfree < lbns + nitems(lbns));
958 		*allocblk++ = nb;
959 		*lbns_remfree++ = indirs[i].in_lbn;
960 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
961 		    GB_UNMAPPED);
962 		nbp->b_blkno = fsbtodb(fs, nb);
963 		vfs_bio_clrbuf(nbp);
964 		if (DOINGSOFTDEP(vp)) {
965 			softdep_setup_allocindir_meta(nbp, ip, bp,
966 			    indirs[i - 1].in_off, nb);
967 			bdwrite(nbp);
968 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
969 			if (nbp->b_bufsize == fs->fs_bsize)
970 				nbp->b_flags |= B_CLUSTEROK;
971 			bdwrite(nbp);
972 		} else {
973 			if ((error = bwrite(nbp)) != 0) {
974 				brelse(bp);
975 				goto fail;
976 			}
977 		}
978 		bap[indirs[i - 1].in_off] = nb;
979 		if (allocib == NULL && unwindidx < 0)
980 			unwindidx = i - 1;
981 		/*
982 		 * If required, write synchronously, otherwise use
983 		 * delayed write.
984 		 */
985 		if (flags & IO_SYNC) {
986 			bwrite(bp);
987 		} else {
988 			if (bp->b_bufsize == fs->fs_bsize)
989 				bp->b_flags |= B_CLUSTEROK;
990 			bdwrite(bp);
991 		}
992 	}
993 	/*
994 	 * If asked only for the indirect block, then return it.
995 	 */
996 	if (flags & BA_METAONLY) {
997 		curthread_pflags_restore(saved_inbdflush);
998 		*bpp = bp;
999 		return (0);
1000 	}
1001 	/*
1002 	 * Get the data block, allocating if necessary.
1003 	 */
1004 	if (nb == 0) {
1005 		UFS_LOCK(ump);
1006 		/*
1007 		 * If allocating metadata at the front of the cylinder
1008 		 * group and parent indirect block has just been allocated,
1009 		 * then cluster next to it if it is the first indirect in
1010 		 * the file. Otherwise it has been allocated in the metadata
1011 		 * area, so we want to find our own place out in the data area.
1012 		 */
1013 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
1014 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1015 			    &bap[0]);
1016 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1017 		    flags | IO_BUFLOCKED, cred, &newb);
1018 		if (error) {
1019 			brelse(bp);
1020 			UFS_LOCK(ump);
1021 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1022 				softdep_request_cleanup(fs, vp, cred,
1023 				    FLUSH_BLOCKS_WAIT);
1024 				UFS_UNLOCK(ump);
1025 				goto retry;
1026 			}
1027 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
1028 			    ppsratecheck(&ump->um_last_fullmsg,
1029 			    &ump->um_secs_fullmsg, 1)) {
1030 				UFS_UNLOCK(ump);
1031 				ffs_fserr(fs, ip->i_number, "filesystem full");
1032 				uprintf("\n%s: write failed, filesystem "
1033 				    "is full\n", fs->fs_fsmnt);
1034 			} else {
1035 				UFS_UNLOCK(ump);
1036 			}
1037 			goto fail;
1038 		}
1039 		nb = newb;
1040 		MPASS(allocblk < allociblk + nitems(allociblk));
1041 		MPASS(lbns_remfree < lbns + nitems(lbns));
1042 		*allocblk++ = nb;
1043 		*lbns_remfree++ = lbn;
1044 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1045 		nbp->b_blkno = fsbtodb(fs, nb);
1046 		if (flags & BA_CLRBUF)
1047 			vfs_bio_clrbuf(nbp);
1048 		if (DOINGSOFTDEP(vp))
1049 			softdep_setup_allocindir_page(ip, lbn, bp,
1050 			    indirs[i].in_off, nb, 0, nbp);
1051 		bap[indirs[i].in_off] = nb;
1052 		/*
1053 		 * If required, write synchronously, otherwise use
1054 		 * delayed write.
1055 		 */
1056 		if (flags & IO_SYNC) {
1057 			bwrite(bp);
1058 		} else {
1059 			if (bp->b_bufsize == fs->fs_bsize)
1060 				bp->b_flags |= B_CLUSTEROK;
1061 			bdwrite(bp);
1062 		}
1063 		curthread_pflags_restore(saved_inbdflush);
1064 		*bpp = nbp;
1065 		return (0);
1066 	}
1067 	brelse(bp);
1068 	/*
1069 	 * If requested clear invalid portions of the buffer.  If we
1070 	 * have to do a read-before-write (typical if BA_CLRBUF is set),
1071 	 * try to do some read-ahead in the sequential case to reduce
1072 	 * the number of I/O transactions.
1073 	 */
1074 	if (flags & BA_CLRBUF) {
1075 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1076 		if (seqcount != 0 &&
1077 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1078 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
1079 			error = cluster_read(vp, ip->i_size, lbn,
1080 			    (int)fs->fs_bsize, NOCRED,
1081 			    MAXBSIZE, seqcount, gbflags, &nbp);
1082 		} else {
1083 			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1084 			    NOCRED, gbflags, &nbp);
1085 		}
1086 		if (error) {
1087 			brelse(nbp);
1088 			goto fail;
1089 		}
1090 	} else {
1091 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1092 		nbp->b_blkno = fsbtodb(fs, nb);
1093 	}
1094 	curthread_pflags_restore(saved_inbdflush);
1095 	*bpp = nbp;
1096 	return (0);
1097 fail:
1098 	curthread_pflags_restore(saved_inbdflush);
1099 	/*
1100 	 * If we have failed to allocate any blocks, simply return the error.
1101 	 * This is the usual case and avoids the need to fsync the file.
1102 	 */
1103 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1104 		return (error);
1105 	/*
1106 	 * If we have failed part way through block allocation, we
1107 	 * have to deallocate any indirect blocks that we have allocated.
1108 	 * We have to fsync the file before we start to get rid of all
1109 	 * of its dependencies so that we do not leave them dangling.
1110 	 * We have to sync it at the end so that the soft updates code
1111 	 * does not find any untracked changes. Although this is really
1112 	 * slow, running out of disk space is not expected to be a common
1113 	 * occurrence. The error return from fsync is ignored as we already
1114 	 * have an error to return to the user.
1115 	 *
1116 	 * XXX Still have to journal the free below
1117 	 */
1118 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1119 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1120 	     blkp < allocblk; blkp++, lbns_remfree++) {
1121 		/*
1122 		 * We shall not leave the freed blocks on the vnode
1123 		 * buffer object lists.
1124 		 */
1125 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1126 		    GB_NOCREAT | GB_UNMAPPED);
1127 		if (bp != NULL) {
1128 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1129 			    ("mismatch2 l %jd %jd b %ju %ju",
1130 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1131 			    (uintmax_t)bp->b_blkno,
1132 			    (uintmax_t)fsbtodb(fs, *blkp)));
1133 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1134 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
1135 			brelse(bp);
1136 		}
1137 		deallocated += fs->fs_bsize;
1138 	}
1139 	if (allocib != NULL) {
1140 		*allocib = 0;
1141 	} else if (unwindidx >= 0) {
1142 		int r;
1143 
1144 		r = bread(vp, indirs[unwindidx].in_lbn,
1145 		    (int)fs->fs_bsize, NOCRED, &bp);
1146 		if (r) {
1147 			panic("Could not unwind indirect block, error %d", r);
1148 			brelse(bp);
1149 		} else {
1150 			bap = (ufs2_daddr_t *)bp->b_data;
1151 			bap[indirs[unwindidx].in_off] = 0;
1152 			if (flags & IO_SYNC) {
1153 				bwrite(bp);
1154 			} else {
1155 				if (bp->b_bufsize == fs->fs_bsize)
1156 					bp->b_flags |= B_CLUSTEROK;
1157 				bdwrite(bp);
1158 			}
1159 		}
1160 	}
1161 	if (deallocated) {
1162 #ifdef QUOTA
1163 		/*
1164 		 * Restore user's disk quota because allocation failed.
1165 		 */
1166 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1167 #endif
1168 		dp->di_blocks -= btodb(deallocated);
1169 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1170 	}
1171 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1172 	/*
1173 	 * After the buffers are invalidated and on-disk pointers are
1174 	 * cleared, free the blocks.
1175 	 */
1176 	for (blkp = allociblk; blkp < allocblk; blkp++) {
1177 #ifdef INVARIANTS
1178 		if (blkp == allociblk)
1179 			lbns_remfree = lbns;
1180 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1181 		    GB_NOCREAT | GB_UNMAPPED);
1182 		if (bp != NULL) {
1183 			panic("zombie2 %jd %ju %ju",
1184 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1185 			    (uintmax_t)fsbtodb(fs, *blkp));
1186 		}
1187 		lbns_remfree++;
1188 #endif
1189 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1190 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
1191 	}
1192 	return (error);
1193 }
1194