xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision 963f5dc7a30624e95d72fb7f87b8892651164e46)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
62  */
63 
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/bio.h>
70 #include <sys/buf.h>
71 #include <sys/lock.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
75 
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
81 
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
84 
85 /*
86  * Balloc defines the structure of filesystem storage
87  * by allocating the physical blocks on a device given
88  * the inode and the logical block number in a file.
89  * This is the allocation strategy for UFS1. Below is
90  * the allocation strategy for UFS2.
91  */
92 int
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94     struct ucred *cred, int flags, struct buf **bpp)
95 {
96 	struct inode *ip;
97 	struct ufs1_dinode *dp;
98 	ufs_lbn_t lbn, lastlbn;
99 	struct fs *fs;
100 	ufs1_daddr_t nb;
101 	struct buf *bp, *nbp;
102 	struct mount *mp;
103 	struct ufsmount *ump;
104 	struct indir indirs[UFS_NIADDR + 2];
105 	int deallocated, osize, nsize, num, i, error;
106 	ufs2_daddr_t newb;
107 	ufs1_daddr_t *bap, pref;
108 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
109 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
110 	int unwindidx = -1;
111 	int saved_inbdflush;
112 	int gbflags, reclaimed;
113 
114 	ip = VTOI(vp);
115 	dp = ip->i_din1;
116 	fs = ITOFS(ip);
117 	mp = ITOVFS(ip);
118 	ump = ITOUMP(ip);
119 	lbn = lblkno(fs, startoffset);
120 	size = blkoff(fs, startoffset) + size;
121 	reclaimed = 0;
122 	if (size > fs->fs_bsize)
123 		panic("ffs_balloc_ufs1: blk too big");
124 	*bpp = NULL;
125 	if (flags & IO_EXT)
126 		return (EOPNOTSUPP);
127 	if (lbn < 0)
128 		return (EFBIG);
129 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
130 
131 	vn_seqc_write_begin(vp);
132 
133 	/*
134 	 * If the next write will extend the file into a new block,
135 	 * and the file is currently composed of a fragment
136 	 * this fragment has to be extended to be a full block.
137 	 */
138 	lastlbn = lblkno(fs, ip->i_size);
139 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
140 		nb = lastlbn;
141 		osize = blksize(fs, ip, nb);
142 		if (osize < fs->fs_bsize && osize > 0) {
143 			UFS_LOCK(ump);
144 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
145 			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
146 			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
147 			   cred, &bp);
148 			if (error)
149 				goto done;
150 			if (DOINGSOFTDEP(vp))
151 				softdep_setup_allocdirect(ip, nb,
152 				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
153 				    fs->fs_bsize, osize, bp);
154 			ip->i_size = smalllblktosize(fs, nb + 1);
155 			dp->di_size = ip->i_size;
156 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
157 			UFS_INODE_SET_FLAG(ip,
158 			    IN_SIZEMOD | IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
159 			if (flags & IO_SYNC)
160 				bwrite(bp);
161 			else if (DOINGASYNC(vp))
162 				bdwrite(bp);
163 			else
164 				bawrite(bp);
165 		}
166 	}
167 	/*
168 	 * The first UFS_NDADDR blocks are direct blocks
169 	 */
170 	if (lbn < UFS_NDADDR) {
171 		if (flags & BA_METAONLY)
172 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
173 		nb = dp->di_db[lbn];
174 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
175 			if ((flags & BA_CLRBUF) != 0) {
176 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
177 				    &bp);
178 				if (error != 0)
179 					goto done;
180 			} else {
181 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
182 				    gbflags);
183 				if (bp == NULL) {
184 					error = EIO;
185 					goto done;
186 				}
187 				vfs_bio_clrbuf(bp);
188 			}
189 			bp->b_blkno = fsbtodb(fs, nb);
190 			*bpp = bp;
191 			error = 0;
192 			goto done;
193 		}
194 		if (nb != 0) {
195 			/*
196 			 * Consider need to reallocate a fragment.
197 			 */
198 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
199 			nsize = fragroundup(fs, size);
200 			if (nsize <= osize) {
201 				error = bread(vp, lbn, osize, NOCRED, &bp);
202 				if (error)
203 					goto done;
204 				bp->b_blkno = fsbtodb(fs, nb);
205 			} else {
206 				UFS_LOCK(ump);
207 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
208 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
209 				    &dp->di_db[0]), osize, nsize, flags,
210 				    cred, &bp);
211 				if (error)
212 					goto done;
213 				if (DOINGSOFTDEP(vp))
214 					softdep_setup_allocdirect(ip, lbn,
215 					    dbtofsb(fs, bp->b_blkno), nb,
216 					    nsize, osize, bp);
217 			}
218 		} else {
219 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
220 				nsize = fragroundup(fs, size);
221 			else
222 				nsize = fs->fs_bsize;
223 			UFS_LOCK(ump);
224 			error = ffs_alloc(ip, lbn,
225 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
226 			    nsize, flags, cred, &newb);
227 			if (error)
228 				goto done;
229 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
230 			bp->b_blkno = fsbtodb(fs, newb);
231 			if (flags & BA_CLRBUF)
232 				vfs_bio_clrbuf(bp);
233 			if (DOINGSOFTDEP(vp))
234 				softdep_setup_allocdirect(ip, lbn, newb, 0,
235 				    nsize, 0, bp);
236 		}
237 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
238 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
239 		*bpp = bp;
240 		error = 0;
241 		goto done;
242 	}
243 	/*
244 	 * Determine the number of levels of indirection.
245 	 */
246 	pref = 0;
247 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
248 		goto done;
249 #ifdef INVARIANTS
250 	if (num < 1)
251 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
252 #endif
253 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
254 	/*
255 	 * Fetch the first indirect block allocating if necessary.
256 	 */
257 	--num;
258 	nb = dp->di_ib[indirs[0].in_off];
259 	allocib = NULL;
260 	allocblk = allociblk;
261 	lbns_remfree = lbns;
262 	if (nb == 0) {
263 		UFS_LOCK(ump);
264 		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
265 		    (ufs1_daddr_t *)0);
266 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
267 		    flags, cred, &newb)) != 0) {
268 			curthread_pflags_restore(saved_inbdflush);
269 			goto done;
270 		}
271 		pref = newb + fs->fs_frag;
272 		nb = newb;
273 		MPASS(allocblk < allociblk + nitems(allociblk));
274 		MPASS(lbns_remfree < lbns + nitems(lbns));
275 		*allocblk++ = nb;
276 		*lbns_remfree++ = indirs[1].in_lbn;
277 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
278 		bp->b_blkno = fsbtodb(fs, nb);
279 		vfs_bio_clrbuf(bp);
280 		if (DOINGSOFTDEP(vp)) {
281 			softdep_setup_allocdirect(ip,
282 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
283 			    fs->fs_bsize, 0, bp);
284 			bdwrite(bp);
285 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
286 			if (bp->b_bufsize == fs->fs_bsize)
287 				bp->b_flags |= B_CLUSTEROK;
288 			bdwrite(bp);
289 		} else {
290 			if ((error = bwrite(bp)) != 0)
291 				goto fail;
292 		}
293 		allocib = &dp->di_ib[indirs[0].in_off];
294 		*allocib = nb;
295 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
296 	}
297 	/*
298 	 * Fetch through the indirect blocks, allocating as necessary.
299 	 */
300 retry:
301 	for (i = 1;;) {
302 		error = bread(vp,
303 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
304 		if (error) {
305 			goto fail;
306 		}
307 		bap = (ufs1_daddr_t *)bp->b_data;
308 		nb = bap[indirs[i].in_off];
309 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
310 		    fs->fs_bsize)) != 0) {
311 			brelse(bp);
312 			goto fail;
313 		}
314 		if (i == num)
315 			break;
316 		i += 1;
317 		if (nb != 0) {
318 			bqrelse(bp);
319 			continue;
320 		}
321 		UFS_LOCK(ump);
322 		/*
323 		 * If parent indirect has just been allocated, try to cluster
324 		 * immediately following it.
325 		 */
326 		if (pref == 0)
327 			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
328 			    (ufs1_daddr_t *)0);
329 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
330 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
331 			brelse(bp);
332 			UFS_LOCK(ump);
333 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
334 				softdep_request_cleanup(fs, vp, cred,
335 				    FLUSH_BLOCKS_WAIT);
336 				UFS_UNLOCK(ump);
337 				goto retry;
338 			}
339 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
340 			    ppsratecheck(&ump->um_last_fullmsg,
341 			    &ump->um_secs_fullmsg, 1)) {
342 				UFS_UNLOCK(ump);
343 				ffs_fserr(fs, ip->i_number, "filesystem full");
344 				uprintf("\n%s: write failed, filesystem "
345 				    "is full\n", fs->fs_fsmnt);
346 			} else {
347 				UFS_UNLOCK(ump);
348 			}
349 			goto fail;
350 		}
351 		pref = newb + fs->fs_frag;
352 		nb = newb;
353 		MPASS(allocblk < allociblk + nitems(allociblk));
354 		MPASS(lbns_remfree < lbns + nitems(lbns));
355 		*allocblk++ = nb;
356 		*lbns_remfree++ = indirs[i].in_lbn;
357 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
358 		nbp->b_blkno = fsbtodb(fs, nb);
359 		vfs_bio_clrbuf(nbp);
360 		if (DOINGSOFTDEP(vp)) {
361 			softdep_setup_allocindir_meta(nbp, ip, bp,
362 			    indirs[i - 1].in_off, nb);
363 			bdwrite(nbp);
364 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
365 			if (nbp->b_bufsize == fs->fs_bsize)
366 				nbp->b_flags |= B_CLUSTEROK;
367 			bdwrite(nbp);
368 		} else {
369 			if ((error = bwrite(nbp)) != 0) {
370 				brelse(bp);
371 				goto fail;
372 			}
373 		}
374 		bap[indirs[i - 1].in_off] = nb;
375 		if (allocib == NULL && unwindidx < 0)
376 			unwindidx = i - 1;
377 		/*
378 		 * If required, write synchronously, otherwise use
379 		 * delayed write.
380 		 */
381 		if (flags & IO_SYNC) {
382 			bwrite(bp);
383 		} else {
384 			if (bp->b_bufsize == fs->fs_bsize)
385 				bp->b_flags |= B_CLUSTEROK;
386 			bdwrite(bp);
387 		}
388 	}
389 	/*
390 	 * If asked only for the indirect block, then return it.
391 	 */
392 	if (flags & BA_METAONLY) {
393 		curthread_pflags_restore(saved_inbdflush);
394 		*bpp = bp;
395 		error = 0;
396 		goto done;
397 	}
398 	/*
399 	 * Get the data block, allocating if necessary.
400 	 */
401 	if (nb == 0) {
402 		UFS_LOCK(ump);
403 		/*
404 		 * If allocating metadata at the front of the cylinder
405 		 * group and parent indirect block has just been allocated,
406 		 * then cluster next to it if it is the first indirect in
407 		 * the file. Otherwise it has been allocated in the metadata
408 		 * area, so we want to find our own place out in the data area.
409 		 */
410 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
411 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
412 			    &bap[0]);
413 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
414 		    flags | IO_BUFLOCKED, cred, &newb);
415 		if (error) {
416 			brelse(bp);
417 			UFS_LOCK(ump);
418 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
419 				softdep_request_cleanup(fs, vp, cred,
420 				    FLUSH_BLOCKS_WAIT);
421 				UFS_UNLOCK(ump);
422 				goto retry;
423 			}
424 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
425 			    ppsratecheck(&ump->um_last_fullmsg,
426 			    &ump->um_secs_fullmsg, 1)) {
427 				UFS_UNLOCK(ump);
428 				ffs_fserr(fs, ip->i_number, "filesystem full");
429 				uprintf("\n%s: write failed, filesystem "
430 				    "is full\n", fs->fs_fsmnt);
431 			} else {
432 				UFS_UNLOCK(ump);
433 			}
434 			goto fail;
435 		}
436 		nb = newb;
437 		MPASS(allocblk < allociblk + nitems(allociblk));
438 		MPASS(lbns_remfree < lbns + nitems(lbns));
439 		*allocblk++ = nb;
440 		*lbns_remfree++ = lbn;
441 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
442 		nbp->b_blkno = fsbtodb(fs, nb);
443 		if (flags & BA_CLRBUF)
444 			vfs_bio_clrbuf(nbp);
445 		if (DOINGSOFTDEP(vp))
446 			softdep_setup_allocindir_page(ip, lbn, bp,
447 			    indirs[i].in_off, nb, 0, nbp);
448 		bap[indirs[i].in_off] = nb;
449 		/*
450 		 * If required, write synchronously, otherwise use
451 		 * delayed write.
452 		 */
453 		if (flags & IO_SYNC) {
454 			bwrite(bp);
455 		} else {
456 			if (bp->b_bufsize == fs->fs_bsize)
457 				bp->b_flags |= B_CLUSTEROK;
458 			bdwrite(bp);
459 		}
460 		curthread_pflags_restore(saved_inbdflush);
461 		*bpp = nbp;
462 		error = 0;
463 		goto done;
464 	}
465 	brelse(bp);
466 	if (flags & BA_CLRBUF) {
467 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
468 		if (seqcount != 0 &&
469 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
470 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
471 			error = cluster_read(vp, ip->i_size, lbn,
472 			    (int)fs->fs_bsize, NOCRED,
473 			    MAXBSIZE, seqcount, gbflags, &nbp);
474 		} else {
475 			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
476 			    gbflags, &nbp);
477 		}
478 		if (error) {
479 			brelse(nbp);
480 			goto fail;
481 		}
482 	} else {
483 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
484 		nbp->b_blkno = fsbtodb(fs, nb);
485 	}
486 	curthread_pflags_restore(saved_inbdflush);
487 	*bpp = nbp;
488 	error = 0;
489 	goto done;
490 fail:
491 	curthread_pflags_restore(saved_inbdflush);
492 	/*
493 	 * If we have failed to allocate any blocks, simply return the error.
494 	 * This is the usual case and avoids the need to fsync the file.
495 	 */
496 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
497 		goto done;
498 	/*
499 	 * If we have failed part way through block allocation, we
500 	 * have to deallocate any indirect blocks that we have allocated.
501 	 * We have to fsync the file before we start to get rid of all
502 	 * of its dependencies so that we do not leave them dangling.
503 	 * We have to sync it at the end so that the soft updates code
504 	 * does not find any untracked changes. Although this is really
505 	 * slow, running out of disk space is not expected to be a common
506 	 * occurrence. The error return from fsync is ignored as we already
507 	 * have an error to return to the user.
508 	 *
509 	 * XXX Still have to journal the free below
510 	 */
511 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
512 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
513 	     blkp < allocblk; blkp++, lbns_remfree++) {
514 		/*
515 		 * We shall not leave the freed blocks on the vnode
516 		 * buffer object lists.
517 		 */
518 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
519 		    GB_NOCREAT | GB_UNMAPPED);
520 		if (bp != NULL) {
521 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
522 			    ("mismatch1 l %jd %jd b %ju %ju",
523 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
524 			    (uintmax_t)bp->b_blkno,
525 			    (uintmax_t)fsbtodb(fs, *blkp)));
526 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
527 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
528 			brelse(bp);
529 		}
530 		deallocated += fs->fs_bsize;
531 	}
532 	if (allocib != NULL) {
533 		*allocib = 0;
534 	} else if (unwindidx >= 0) {
535 		int r;
536 
537 		r = bread(vp, indirs[unwindidx].in_lbn,
538 		    (int)fs->fs_bsize, NOCRED, &bp);
539 		if (r) {
540 			panic("Could not unwind indirect block, error %d", r);
541 			brelse(bp);
542 		} else {
543 			bap = (ufs1_daddr_t *)bp->b_data;
544 			bap[indirs[unwindidx].in_off] = 0;
545 			if (flags & IO_SYNC) {
546 				bwrite(bp);
547 			} else {
548 				if (bp->b_bufsize == fs->fs_bsize)
549 					bp->b_flags |= B_CLUSTEROK;
550 				bdwrite(bp);
551 			}
552 		}
553 	}
554 	if (deallocated) {
555 #ifdef QUOTA
556 		/*
557 		 * Restore user's disk quota because allocation failed.
558 		 */
559 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
560 #endif
561 		dp->di_blocks -= btodb(deallocated);
562 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
563 	}
564 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
565 	/*
566 	 * After the buffers are invalidated and on-disk pointers are
567 	 * cleared, free the blocks.
568 	 */
569 	for (blkp = allociblk; blkp < allocblk; blkp++) {
570 #ifdef INVARIANTS
571 		if (blkp == allociblk)
572 			lbns_remfree = lbns;
573 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
574 		    GB_NOCREAT | GB_UNMAPPED);
575 		if (bp != NULL) {
576 			panic("zombie1 %jd %ju %ju",
577 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
578 			    (uintmax_t)fsbtodb(fs, *blkp));
579 		}
580 		lbns_remfree++;
581 #endif
582 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
583 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
584 	}
585 done:
586 	vn_seqc_write_end(vp);
587 	return (error);
588 }
589 
590 /*
591  * Balloc defines the structure of file system storage
592  * by allocating the physical blocks on a device given
593  * the inode and the logical block number in a file.
594  * This is the allocation strategy for UFS2. Above is
595  * the allocation strategy for UFS1.
596  */
597 int
598 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
599     struct ucred *cred, int flags, struct buf **bpp)
600 {
601 	struct inode *ip;
602 	struct ufs2_dinode *dp;
603 	ufs_lbn_t lbn, lastlbn;
604 	struct fs *fs;
605 	struct buf *bp, *nbp;
606 	struct mount *mp;
607 	struct ufsmount *ump;
608 	struct indir indirs[UFS_NIADDR + 2];
609 	ufs2_daddr_t nb, newb, *bap, pref;
610 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
611 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
612 	int deallocated, osize, nsize, num, i, error;
613 	int unwindidx = -1;
614 	int saved_inbdflush;
615 	int gbflags, reclaimed;
616 
617 	ip = VTOI(vp);
618 	dp = ip->i_din2;
619 	fs = ITOFS(ip);
620 	mp = ITOVFS(ip);
621 	ump = ITOUMP(ip);
622 	lbn = lblkno(fs, startoffset);
623 	size = blkoff(fs, startoffset) + size;
624 	reclaimed = 0;
625 	if (size > fs->fs_bsize)
626 		panic("ffs_balloc_ufs2: blk too big");
627 	*bpp = NULL;
628 	if (lbn < 0)
629 		return (EFBIG);
630 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
631 
632 	vn_seqc_write_begin(vp);
633 
634 	/*
635 	 * Check for allocating external data.
636 	 */
637 	if (flags & IO_EXT) {
638 		if (lbn >= UFS_NXADDR) {
639 			error = EFBIG;
640 			goto done;
641 		}
642 
643 		/*
644 		 * If the next write will extend the data into a new block,
645 		 * and the data is currently composed of a fragment
646 		 * this fragment has to be extended to be a full block.
647 		 */
648 		lastlbn = lblkno(fs, dp->di_extsize);
649 		if (lastlbn < lbn) {
650 			nb = lastlbn;
651 			osize = sblksize(fs, dp->di_extsize, nb);
652 			if (osize < fs->fs_bsize && osize > 0) {
653 				UFS_LOCK(ump);
654 				error = ffs_realloccg(ip, -1 - nb,
655 				    dp->di_extb[nb],
656 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
657 				    &dp->di_extb[0]), osize,
658 				    (int)fs->fs_bsize, flags, cred, &bp);
659 				if (error)
660 					goto done;
661 				if (DOINGSOFTDEP(vp))
662 					softdep_setup_allocext(ip, nb,
663 					    dbtofsb(fs, bp->b_blkno),
664 					    dp->di_extb[nb],
665 					    fs->fs_bsize, osize, bp);
666 				dp->di_extsize = smalllblktosize(fs, nb + 1);
667 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
668 				bp->b_xflags |= BX_ALTDATA;
669 				UFS_INODE_SET_FLAG(ip,
670 				    IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA);
671 				if (flags & IO_SYNC)
672 					bwrite(bp);
673 				else
674 					bawrite(bp);
675 			}
676 		}
677 		/*
678 		 * All blocks are direct blocks
679 		 */
680 		if (flags & BA_METAONLY)
681 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
682 		nb = dp->di_extb[lbn];
683 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
684 			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
685 			    gbflags, &bp);
686 			if (error)
687 				goto done;
688 			bp->b_blkno = fsbtodb(fs, nb);
689 			bp->b_xflags |= BX_ALTDATA;
690 			*bpp = bp;
691 			goto done;
692 		}
693 		if (nb != 0) {
694 			/*
695 			 * Consider need to reallocate a fragment.
696 			 */
697 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
698 			nsize = fragroundup(fs, size);
699 			if (nsize <= osize) {
700 				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
701 				    gbflags, &bp);
702 				if (error)
703 					goto done;
704 				bp->b_blkno = fsbtodb(fs, nb);
705 				bp->b_xflags |= BX_ALTDATA;
706 			} else {
707 				UFS_LOCK(ump);
708 				error = ffs_realloccg(ip, -1 - lbn,
709 				    dp->di_extb[lbn],
710 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
711 				    &dp->di_extb[0]), osize, nsize, flags,
712 				    cred, &bp);
713 				if (error)
714 					goto done;
715 				bp->b_xflags |= BX_ALTDATA;
716 				if (DOINGSOFTDEP(vp))
717 					softdep_setup_allocext(ip, lbn,
718 					    dbtofsb(fs, bp->b_blkno), nb,
719 					    nsize, osize, bp);
720 			}
721 		} else {
722 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
723 				nsize = fragroundup(fs, size);
724 			else
725 				nsize = fs->fs_bsize;
726 			UFS_LOCK(ump);
727 			error = ffs_alloc(ip, lbn,
728 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
729 			   nsize, flags, cred, &newb);
730 			if (error)
731 				goto done;
732 			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
733 			bp->b_blkno = fsbtodb(fs, newb);
734 			bp->b_xflags |= BX_ALTDATA;
735 			if (flags & BA_CLRBUF)
736 				vfs_bio_clrbuf(bp);
737 			if (DOINGSOFTDEP(vp))
738 				softdep_setup_allocext(ip, lbn, newb, 0,
739 				    nsize, 0, bp);
740 		}
741 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
742 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_IBLKDATA);
743 		*bpp = bp;
744 		error = 0;
745 		goto done;
746 	}
747 	/*
748 	 * If the next write will extend the file into a new block,
749 	 * and the file is currently composed of a fragment
750 	 * this fragment has to be extended to be a full block.
751 	 */
752 	lastlbn = lblkno(fs, ip->i_size);
753 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
754 		nb = lastlbn;
755 		osize = blksize(fs, ip, nb);
756 		if (osize < fs->fs_bsize && osize > 0) {
757 			UFS_LOCK(ump);
758 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
759 			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
760 			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
761 			    flags, cred, &bp);
762 			if (error)
763 				goto done;
764 			if (DOINGSOFTDEP(vp))
765 				softdep_setup_allocdirect(ip, nb,
766 				    dbtofsb(fs, bp->b_blkno),
767 				    dp->di_db[nb],
768 				    fs->fs_bsize, osize, bp);
769 			ip->i_size = smalllblktosize(fs, nb + 1);
770 			dp->di_size = ip->i_size;
771 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
772 			UFS_INODE_SET_FLAG(ip,
773 			    IN_SIZEMOD |IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
774 			if (flags & IO_SYNC)
775 				bwrite(bp);
776 			else
777 				bawrite(bp);
778 		}
779 	}
780 	/*
781 	 * The first UFS_NDADDR blocks are direct blocks
782 	 */
783 	if (lbn < UFS_NDADDR) {
784 		if (flags & BA_METAONLY)
785 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
786 		nb = dp->di_db[lbn];
787 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
788 			if ((flags & BA_CLRBUF) != 0) {
789 				error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
790 				    gbflags, &bp);
791 				if (error != 0)
792 					goto done;
793 			} else {
794 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
795 				    gbflags);
796 				if (bp == NULL) {
797 					error = EIO;
798 					goto done;
799 				}
800 				vfs_bio_clrbuf(bp);
801 			}
802 			bp->b_blkno = fsbtodb(fs, nb);
803 			*bpp = bp;
804 			error = 0;
805 			goto done;
806 		}
807 		if (nb != 0) {
808 			/*
809 			 * Consider need to reallocate a fragment.
810 			 */
811 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
812 			nsize = fragroundup(fs, size);
813 			if (nsize <= osize) {
814 				error = bread_gb(vp, lbn, osize, NOCRED,
815 				    gbflags, &bp);
816 				if (error)
817 					goto done;
818 				bp->b_blkno = fsbtodb(fs, nb);
819 			} else {
820 				UFS_LOCK(ump);
821 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
822 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
823 				    &dp->di_db[0]), osize, nsize, flags,
824 				    cred, &bp);
825 				if (error)
826 					goto done;
827 				if (DOINGSOFTDEP(vp))
828 					softdep_setup_allocdirect(ip, lbn,
829 					    dbtofsb(fs, bp->b_blkno), nb,
830 					    nsize, osize, bp);
831 			}
832 		} else {
833 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
834 				nsize = fragroundup(fs, size);
835 			else
836 				nsize = fs->fs_bsize;
837 			UFS_LOCK(ump);
838 			error = ffs_alloc(ip, lbn,
839 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
840 				&dp->di_db[0]), nsize, flags, cred, &newb);
841 			if (error)
842 				goto done;
843 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
844 			bp->b_blkno = fsbtodb(fs, newb);
845 			if (flags & BA_CLRBUF)
846 				vfs_bio_clrbuf(bp);
847 			if (DOINGSOFTDEP(vp))
848 				softdep_setup_allocdirect(ip, lbn, newb, 0,
849 				    nsize, 0, bp);
850 		}
851 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
852 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
853 		*bpp = bp;
854 		error = 0;
855 		goto done;
856 	}
857 	/*
858 	 * Determine the number of levels of indirection.
859 	 */
860 	pref = 0;
861 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
862 		goto done;
863 #ifdef INVARIANTS
864 	if (num < 1)
865 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
866 #endif
867 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
868 	/*
869 	 * Fetch the first indirect block allocating if necessary.
870 	 */
871 	--num;
872 	nb = dp->di_ib[indirs[0].in_off];
873 	allocib = NULL;
874 	allocblk = allociblk;
875 	lbns_remfree = lbns;
876 	if (nb == 0) {
877 		UFS_LOCK(ump);
878 		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
879 		    (ufs2_daddr_t *)0);
880 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
881 		    flags, cred, &newb)) != 0) {
882 			curthread_pflags_restore(saved_inbdflush);
883 			goto done;
884 		}
885 		pref = newb + fs->fs_frag;
886 		nb = newb;
887 		MPASS(allocblk < allociblk + nitems(allociblk));
888 		MPASS(lbns_remfree < lbns + nitems(lbns));
889 		*allocblk++ = nb;
890 		*lbns_remfree++ = indirs[1].in_lbn;
891 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
892 		    GB_UNMAPPED);
893 		bp->b_blkno = fsbtodb(fs, nb);
894 		vfs_bio_clrbuf(bp);
895 		if (DOINGSOFTDEP(vp)) {
896 			softdep_setup_allocdirect(ip,
897 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
898 			    fs->fs_bsize, 0, bp);
899 			bdwrite(bp);
900 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
901 			if (bp->b_bufsize == fs->fs_bsize)
902 				bp->b_flags |= B_CLUSTEROK;
903 			bdwrite(bp);
904 		} else {
905 			if ((error = bwrite(bp)) != 0)
906 				goto fail;
907 		}
908 		allocib = &dp->di_ib[indirs[0].in_off];
909 		*allocib = nb;
910 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
911 	}
912 	/*
913 	 * Fetch through the indirect blocks, allocating as necessary.
914 	 */
915 retry:
916 	for (i = 1;;) {
917 		error = bread(vp,
918 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
919 		if (error) {
920 			goto fail;
921 		}
922 		bap = (ufs2_daddr_t *)bp->b_data;
923 		nb = bap[indirs[i].in_off];
924 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
925 		    fs->fs_bsize)) != 0) {
926 			brelse(bp);
927 			goto fail;
928 		}
929 		if (i == num)
930 			break;
931 		i += 1;
932 		if (nb != 0) {
933 			bqrelse(bp);
934 			continue;
935 		}
936 		UFS_LOCK(ump);
937 		/*
938 		 * If parent indirect has just been allocated, try to cluster
939 		 * immediately following it.
940 		 */
941 		if (pref == 0)
942 			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
943 			    (ufs2_daddr_t *)0);
944 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
945 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
946 			brelse(bp);
947 			UFS_LOCK(ump);
948 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
949 				softdep_request_cleanup(fs, vp, cred,
950 				    FLUSH_BLOCKS_WAIT);
951 				UFS_UNLOCK(ump);
952 				goto retry;
953 			}
954 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
955 			    ppsratecheck(&ump->um_last_fullmsg,
956 			    &ump->um_secs_fullmsg, 1)) {
957 				UFS_UNLOCK(ump);
958 				ffs_fserr(fs, ip->i_number, "filesystem full");
959 				uprintf("\n%s: write failed, filesystem "
960 				    "is full\n", fs->fs_fsmnt);
961 			} else {
962 				UFS_UNLOCK(ump);
963 			}
964 			goto fail;
965 		}
966 		pref = newb + fs->fs_frag;
967 		nb = newb;
968 		MPASS(allocblk < allociblk + nitems(allociblk));
969 		MPASS(lbns_remfree < lbns + nitems(lbns));
970 		*allocblk++ = nb;
971 		*lbns_remfree++ = indirs[i].in_lbn;
972 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
973 		    GB_UNMAPPED);
974 		nbp->b_blkno = fsbtodb(fs, nb);
975 		vfs_bio_clrbuf(nbp);
976 		if (DOINGSOFTDEP(vp)) {
977 			softdep_setup_allocindir_meta(nbp, ip, bp,
978 			    indirs[i - 1].in_off, nb);
979 			bdwrite(nbp);
980 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
981 			if (nbp->b_bufsize == fs->fs_bsize)
982 				nbp->b_flags |= B_CLUSTEROK;
983 			bdwrite(nbp);
984 		} else {
985 			if ((error = bwrite(nbp)) != 0) {
986 				brelse(bp);
987 				goto fail;
988 			}
989 		}
990 		bap[indirs[i - 1].in_off] = nb;
991 		if (allocib == NULL && unwindidx < 0)
992 			unwindidx = i - 1;
993 		/*
994 		 * If required, write synchronously, otherwise use
995 		 * delayed write.
996 		 */
997 		if (flags & IO_SYNC) {
998 			bwrite(bp);
999 		} else {
1000 			if (bp->b_bufsize == fs->fs_bsize)
1001 				bp->b_flags |= B_CLUSTEROK;
1002 			bdwrite(bp);
1003 		}
1004 	}
1005 	/*
1006 	 * If asked only for the indirect block, then return it.
1007 	 */
1008 	if (flags & BA_METAONLY) {
1009 		curthread_pflags_restore(saved_inbdflush);
1010 		*bpp = bp;
1011 		error = 0;
1012 		goto done;
1013 	}
1014 	/*
1015 	 * Get the data block, allocating if necessary.
1016 	 */
1017 	if (nb == 0) {
1018 		UFS_LOCK(ump);
1019 		/*
1020 		 * If allocating metadata at the front of the cylinder
1021 		 * group and parent indirect block has just been allocated,
1022 		 * then cluster next to it if it is the first indirect in
1023 		 * the file. Otherwise it has been allocated in the metadata
1024 		 * area, so we want to find our own place out in the data area.
1025 		 */
1026 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
1027 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1028 			    &bap[0]);
1029 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1030 		    flags | IO_BUFLOCKED, cred, &newb);
1031 		if (error) {
1032 			brelse(bp);
1033 			UFS_LOCK(ump);
1034 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1035 				softdep_request_cleanup(fs, vp, cred,
1036 				    FLUSH_BLOCKS_WAIT);
1037 				UFS_UNLOCK(ump);
1038 				goto retry;
1039 			}
1040 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
1041 			    ppsratecheck(&ump->um_last_fullmsg,
1042 			    &ump->um_secs_fullmsg, 1)) {
1043 				UFS_UNLOCK(ump);
1044 				ffs_fserr(fs, ip->i_number, "filesystem full");
1045 				uprintf("\n%s: write failed, filesystem "
1046 				    "is full\n", fs->fs_fsmnt);
1047 			} else {
1048 				UFS_UNLOCK(ump);
1049 			}
1050 			goto fail;
1051 		}
1052 		nb = newb;
1053 		MPASS(allocblk < allociblk + nitems(allociblk));
1054 		MPASS(lbns_remfree < lbns + nitems(lbns));
1055 		*allocblk++ = nb;
1056 		*lbns_remfree++ = lbn;
1057 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1058 		nbp->b_blkno = fsbtodb(fs, nb);
1059 		if (flags & BA_CLRBUF)
1060 			vfs_bio_clrbuf(nbp);
1061 		if (DOINGSOFTDEP(vp))
1062 			softdep_setup_allocindir_page(ip, lbn, bp,
1063 			    indirs[i].in_off, nb, 0, nbp);
1064 		bap[indirs[i].in_off] = nb;
1065 		/*
1066 		 * If required, write synchronously, otherwise use
1067 		 * delayed write.
1068 		 */
1069 		if (flags & IO_SYNC) {
1070 			bwrite(bp);
1071 		} else {
1072 			if (bp->b_bufsize == fs->fs_bsize)
1073 				bp->b_flags |= B_CLUSTEROK;
1074 			bdwrite(bp);
1075 		}
1076 		curthread_pflags_restore(saved_inbdflush);
1077 		*bpp = nbp;
1078 		error = 0;
1079 		goto done;
1080 	}
1081 	brelse(bp);
1082 	/*
1083 	 * If requested clear invalid portions of the buffer.  If we
1084 	 * have to do a read-before-write (typical if BA_CLRBUF is set),
1085 	 * try to do some read-ahead in the sequential case to reduce
1086 	 * the number of I/O transactions.
1087 	 */
1088 	if (flags & BA_CLRBUF) {
1089 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1090 		if (seqcount != 0 &&
1091 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1092 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
1093 			error = cluster_read(vp, ip->i_size, lbn,
1094 			    (int)fs->fs_bsize, NOCRED,
1095 			    MAXBSIZE, seqcount, gbflags, &nbp);
1096 		} else {
1097 			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1098 			    NOCRED, gbflags, &nbp);
1099 		}
1100 		if (error) {
1101 			brelse(nbp);
1102 			goto fail;
1103 		}
1104 	} else {
1105 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1106 		nbp->b_blkno = fsbtodb(fs, nb);
1107 	}
1108 	curthread_pflags_restore(saved_inbdflush);
1109 	*bpp = nbp;
1110 	error = 0;
1111 	goto done;
1112 fail:
1113 	curthread_pflags_restore(saved_inbdflush);
1114 	/*
1115 	 * If we have failed to allocate any blocks, simply return the error.
1116 	 * This is the usual case and avoids the need to fsync the file.
1117 	 */
1118 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1119 		goto done;
1120 	/*
1121 	 * If we have failed part way through block allocation, we
1122 	 * have to deallocate any indirect blocks that we have allocated.
1123 	 * We have to fsync the file before we start to get rid of all
1124 	 * of its dependencies so that we do not leave them dangling.
1125 	 * We have to sync it at the end so that the soft updates code
1126 	 * does not find any untracked changes. Although this is really
1127 	 * slow, running out of disk space is not expected to be a common
1128 	 * occurrence. The error return from fsync is ignored as we already
1129 	 * have an error to return to the user.
1130 	 *
1131 	 * XXX Still have to journal the free below
1132 	 */
1133 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1134 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1135 	     blkp < allocblk; blkp++, lbns_remfree++) {
1136 		/*
1137 		 * We shall not leave the freed blocks on the vnode
1138 		 * buffer object lists.
1139 		 */
1140 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1141 		    GB_NOCREAT | GB_UNMAPPED);
1142 		if (bp != NULL) {
1143 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1144 			    ("mismatch2 l %jd %jd b %ju %ju",
1145 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1146 			    (uintmax_t)bp->b_blkno,
1147 			    (uintmax_t)fsbtodb(fs, *blkp)));
1148 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1149 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
1150 			brelse(bp);
1151 		}
1152 		deallocated += fs->fs_bsize;
1153 	}
1154 	if (allocib != NULL) {
1155 		*allocib = 0;
1156 	} else if (unwindidx >= 0) {
1157 		int r;
1158 
1159 		r = bread(vp, indirs[unwindidx].in_lbn,
1160 		    (int)fs->fs_bsize, NOCRED, &bp);
1161 		if (r) {
1162 			panic("Could not unwind indirect block, error %d", r);
1163 			brelse(bp);
1164 		} else {
1165 			bap = (ufs2_daddr_t *)bp->b_data;
1166 			bap[indirs[unwindidx].in_off] = 0;
1167 			if (flags & IO_SYNC) {
1168 				bwrite(bp);
1169 			} else {
1170 				if (bp->b_bufsize == fs->fs_bsize)
1171 					bp->b_flags |= B_CLUSTEROK;
1172 				bdwrite(bp);
1173 			}
1174 		}
1175 	}
1176 	if (deallocated) {
1177 #ifdef QUOTA
1178 		/*
1179 		 * Restore user's disk quota because allocation failed.
1180 		 */
1181 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1182 #endif
1183 		dp->di_blocks -= btodb(deallocated);
1184 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1185 	}
1186 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1187 	/*
1188 	 * After the buffers are invalidated and on-disk pointers are
1189 	 * cleared, free the blocks.
1190 	 */
1191 	for (blkp = allociblk; blkp < allocblk; blkp++) {
1192 #ifdef INVARIANTS
1193 		if (blkp == allociblk)
1194 			lbns_remfree = lbns;
1195 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1196 		    GB_NOCREAT | GB_UNMAPPED);
1197 		if (bp != NULL) {
1198 			panic("zombie2 %jd %ju %ju",
1199 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1200 			    (uintmax_t)fsbtodb(fs, *blkp));
1201 		}
1202 		lbns_remfree++;
1203 #endif
1204 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1205 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
1206 	}
1207 done:
1208 	vn_seqc_write_end(vp);
1209 	return (error);
1210 }
1211