xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
62  */
63 
64 #include <sys/cdefs.h>
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/bio.h>
68 #include <sys/buf.h>
69 #include <sys/lock.h>
70 #include <sys/mount.h>
71 #include <sys/stat.h>
72 #include <sys/vnode.h>
73 #include <sys/vmmeter.h>
74 
75 #include <ufs/ufs/quota.h>
76 #include <ufs/ufs/inode.h>
77 #include <ufs/ufs/ufs_extern.h>
78 #include <ufs/ufs/extattr.h>
79 #include <ufs/ufs/ufsmount.h>
80 
81 #include <ufs/ffs/fs.h>
82 #include <ufs/ffs/ffs_extern.h>
83 
84 /*
85  * Balloc defines the structure of filesystem storage
86  * by allocating the physical blocks on a device given
87  * the inode and the logical block number in a file.
88  * This is the allocation strategy for UFS1. Below is
89  * the allocation strategy for UFS2.
90  */
91 int
92 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
93     struct ucred *cred, int flags, struct buf **bpp)
94 {
95 	struct inode *ip;
96 	struct ufs1_dinode *dp;
97 	ufs_lbn_t lbn, lastlbn;
98 	struct fs *fs;
99 	ufs1_daddr_t nb;
100 	struct buf *bp, *nbp;
101 	struct mount *mp;
102 	struct ufsmount *ump;
103 	struct indir indirs[UFS_NIADDR + 2];
104 	int deallocated, osize, nsize, num, i, error;
105 	ufs2_daddr_t newb;
106 	ufs1_daddr_t *bap, pref;
107 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
108 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
109 	int unwindidx = -1;
110 	int saved_inbdflush;
111 	int gbflags, reclaimed;
112 
113 	ip = VTOI(vp);
114 	dp = ip->i_din1;
115 	fs = ITOFS(ip);
116 	mp = ITOVFS(ip);
117 	ump = ITOUMP(ip);
118 	lbn = lblkno(fs, startoffset);
119 	size = blkoff(fs, startoffset) + size;
120 	reclaimed = 0;
121 	if (size > fs->fs_bsize)
122 		panic("ffs_balloc_ufs1: blk too big");
123 	*bpp = NULL;
124 	if (flags & IO_EXT)
125 		return (EOPNOTSUPP);
126 	if (lbn < 0)
127 		return (EFBIG);
128 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
129 
130 	vn_seqc_write_begin(vp);
131 
132 	/*
133 	 * If the next write will extend the file into a new block,
134 	 * and the file is currently composed of a fragment
135 	 * this fragment has to be extended to be a full block.
136 	 */
137 	lastlbn = lblkno(fs, ip->i_size);
138 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
139 		nb = lastlbn;
140 		osize = blksize(fs, ip, nb);
141 		if (osize < fs->fs_bsize && osize > 0) {
142 			UFS_LOCK(ump);
143 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
144 			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
145 			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
146 			   cred, &bp);
147 			if (error)
148 				goto done;
149 			if (DOINGSOFTDEP(vp))
150 				softdep_setup_allocdirect(ip, nb,
151 				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
152 				    fs->fs_bsize, osize, bp);
153 			ip->i_size = smalllblktosize(fs, nb + 1);
154 			dp->di_size = ip->i_size;
155 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
156 			UFS_INODE_SET_FLAG(ip,
157 			    IN_SIZEMOD | IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
158 			if (flags & IO_SYNC)
159 				bwrite(bp);
160 			else if (DOINGASYNC(vp))
161 				bdwrite(bp);
162 			else
163 				bawrite(bp);
164 		}
165 	}
166 	/*
167 	 * The first UFS_NDADDR blocks are direct blocks
168 	 */
169 	if (lbn < UFS_NDADDR) {
170 		if (flags & BA_METAONLY)
171 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
172 		nb = dp->di_db[lbn];
173 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
174 			if ((flags & BA_CLRBUF) != 0) {
175 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
176 				    &bp);
177 				if (error != 0)
178 					goto done;
179 			} else {
180 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
181 				    gbflags);
182 				if (bp == NULL) {
183 					error = EIO;
184 					goto done;
185 				}
186 				vfs_bio_clrbuf(bp);
187 			}
188 			bp->b_blkno = fsbtodb(fs, nb);
189 			*bpp = bp;
190 			error = 0;
191 			goto done;
192 		}
193 		if (nb != 0) {
194 			/*
195 			 * Consider need to reallocate a fragment.
196 			 */
197 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
198 			nsize = fragroundup(fs, size);
199 			if (nsize <= osize) {
200 				error = bread(vp, lbn, osize, NOCRED, &bp);
201 				if (error)
202 					goto done;
203 				bp->b_blkno = fsbtodb(fs, nb);
204 			} else {
205 				UFS_LOCK(ump);
206 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
207 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
208 				    &dp->di_db[0]), osize, nsize, flags,
209 				    cred, &bp);
210 				if (error)
211 					goto done;
212 				if (DOINGSOFTDEP(vp))
213 					softdep_setup_allocdirect(ip, lbn,
214 					    dbtofsb(fs, bp->b_blkno), nb,
215 					    nsize, osize, bp);
216 			}
217 		} else {
218 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
219 				nsize = fragroundup(fs, size);
220 			else
221 				nsize = fs->fs_bsize;
222 			UFS_LOCK(ump);
223 			error = ffs_alloc(ip, lbn,
224 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
225 			    nsize, flags, cred, &newb);
226 			if (error)
227 				goto done;
228 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
229 			bp->b_blkno = fsbtodb(fs, newb);
230 			if (flags & BA_CLRBUF)
231 				vfs_bio_clrbuf(bp);
232 			if (DOINGSOFTDEP(vp))
233 				softdep_setup_allocdirect(ip, lbn, newb, 0,
234 				    nsize, 0, bp);
235 		}
236 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
237 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
238 		*bpp = bp;
239 		error = 0;
240 		goto done;
241 	}
242 	/*
243 	 * Determine the number of levels of indirection.
244 	 */
245 	pref = 0;
246 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
247 		goto done;
248 #ifdef INVARIANTS
249 	if (num < 1)
250 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
251 #endif
252 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
253 	/*
254 	 * Fetch the first indirect block allocating if necessary.
255 	 */
256 	--num;
257 	nb = dp->di_ib[indirs[0].in_off];
258 	allocib = NULL;
259 	allocblk = allociblk;
260 	lbns_remfree = lbns;
261 	if (nb == 0) {
262 		UFS_LOCK(ump);
263 		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
264 		    (ufs1_daddr_t *)0);
265 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
266 		    flags, cred, &newb)) != 0) {
267 			curthread_pflags_restore(saved_inbdflush);
268 			goto done;
269 		}
270 		pref = newb + fs->fs_frag;
271 		nb = newb;
272 		MPASS(allocblk < allociblk + nitems(allociblk));
273 		MPASS(lbns_remfree < lbns + nitems(lbns));
274 		*allocblk++ = nb;
275 		*lbns_remfree++ = indirs[1].in_lbn;
276 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
277 		bp->b_blkno = fsbtodb(fs, nb);
278 		vfs_bio_clrbuf(bp);
279 		if (DOINGSOFTDEP(vp)) {
280 			softdep_setup_allocdirect(ip,
281 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
282 			    fs->fs_bsize, 0, bp);
283 			bdwrite(bp);
284 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
285 			if (bp->b_bufsize == fs->fs_bsize)
286 				bp->b_flags |= B_CLUSTEROK;
287 			bdwrite(bp);
288 		} else {
289 			if ((error = bwrite(bp)) != 0)
290 				goto fail;
291 		}
292 		allocib = &dp->di_ib[indirs[0].in_off];
293 		*allocib = nb;
294 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
295 	}
296 	/*
297 	 * Fetch through the indirect blocks, allocating as necessary.
298 	 */
299 retry:
300 	for (i = 1;;) {
301 		error = bread(vp,
302 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
303 		if (error) {
304 			goto fail;
305 		}
306 		bap = (ufs1_daddr_t *)bp->b_data;
307 		nb = bap[indirs[i].in_off];
308 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
309 		    fs->fs_bsize)) != 0) {
310 			brelse(bp);
311 			goto fail;
312 		}
313 		if (i == num)
314 			break;
315 		i += 1;
316 		if (nb != 0) {
317 			bqrelse(bp);
318 			continue;
319 		}
320 		UFS_LOCK(ump);
321 		/*
322 		 * If parent indirect has just been allocated, try to cluster
323 		 * immediately following it.
324 		 */
325 		if (pref == 0)
326 			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
327 			    (ufs1_daddr_t *)0);
328 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
329 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
330 			brelse(bp);
331 			UFS_LOCK(ump);
332 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
333 				softdep_request_cleanup(fs, vp, cred,
334 				    FLUSH_BLOCKS_WAIT);
335 				UFS_UNLOCK(ump);
336 				goto retry;
337 			}
338 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
339 			    ppsratecheck(&ump->um_last_fullmsg,
340 			    &ump->um_secs_fullmsg, 1)) {
341 				UFS_UNLOCK(ump);
342 				ffs_fserr(fs, ip->i_number, "filesystem full");
343 				uprintf("\n%s: write failed, filesystem "
344 				    "is full\n", fs->fs_fsmnt);
345 			} else {
346 				UFS_UNLOCK(ump);
347 			}
348 			goto fail;
349 		}
350 		pref = newb + fs->fs_frag;
351 		nb = newb;
352 		MPASS(allocblk < allociblk + nitems(allociblk));
353 		MPASS(lbns_remfree < lbns + nitems(lbns));
354 		*allocblk++ = nb;
355 		*lbns_remfree++ = indirs[i].in_lbn;
356 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
357 		nbp->b_blkno = fsbtodb(fs, nb);
358 		vfs_bio_clrbuf(nbp);
359 		if (DOINGSOFTDEP(vp)) {
360 			softdep_setup_allocindir_meta(nbp, ip, bp,
361 			    indirs[i - 1].in_off, nb);
362 			bdwrite(nbp);
363 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
364 			if (nbp->b_bufsize == fs->fs_bsize)
365 				nbp->b_flags |= B_CLUSTEROK;
366 			bdwrite(nbp);
367 		} else {
368 			if ((error = bwrite(nbp)) != 0) {
369 				brelse(bp);
370 				goto fail;
371 			}
372 		}
373 		bap[indirs[i - 1].in_off] = nb;
374 		if (allocib == NULL && unwindidx < 0)
375 			unwindidx = i - 1;
376 		/*
377 		 * If required, write synchronously, otherwise use
378 		 * delayed write.
379 		 */
380 		if (flags & IO_SYNC) {
381 			bwrite(bp);
382 		} else {
383 			if (bp->b_bufsize == fs->fs_bsize)
384 				bp->b_flags |= B_CLUSTEROK;
385 			bdwrite(bp);
386 		}
387 	}
388 	/*
389 	 * If asked only for the indirect block, then return it.
390 	 */
391 	if (flags & BA_METAONLY) {
392 		curthread_pflags_restore(saved_inbdflush);
393 		*bpp = bp;
394 		error = 0;
395 		goto done;
396 	}
397 	/*
398 	 * Get the data block, allocating if necessary.
399 	 */
400 	if (nb == 0) {
401 		UFS_LOCK(ump);
402 		/*
403 		 * If allocating metadata at the front of the cylinder
404 		 * group and parent indirect block has just been allocated,
405 		 * then cluster next to it if it is the first indirect in
406 		 * the file. Otherwise it has been allocated in the metadata
407 		 * area, so we want to find our own place out in the data area.
408 		 */
409 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
410 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
411 			    &bap[0]);
412 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
413 		    flags | IO_BUFLOCKED, cred, &newb);
414 		if (error) {
415 			brelse(bp);
416 			UFS_LOCK(ump);
417 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
418 				softdep_request_cleanup(fs, vp, cred,
419 				    FLUSH_BLOCKS_WAIT);
420 				UFS_UNLOCK(ump);
421 				goto retry;
422 			}
423 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
424 			    ppsratecheck(&ump->um_last_fullmsg,
425 			    &ump->um_secs_fullmsg, 1)) {
426 				UFS_UNLOCK(ump);
427 				ffs_fserr(fs, ip->i_number, "filesystem full");
428 				uprintf("\n%s: write failed, filesystem "
429 				    "is full\n", fs->fs_fsmnt);
430 			} else {
431 				UFS_UNLOCK(ump);
432 			}
433 			goto fail;
434 		}
435 		nb = newb;
436 		MPASS(allocblk < allociblk + nitems(allociblk));
437 		MPASS(lbns_remfree < lbns + nitems(lbns));
438 		*allocblk++ = nb;
439 		*lbns_remfree++ = lbn;
440 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
441 		nbp->b_blkno = fsbtodb(fs, nb);
442 		if (flags & BA_CLRBUF)
443 			vfs_bio_clrbuf(nbp);
444 		if (DOINGSOFTDEP(vp))
445 			softdep_setup_allocindir_page(ip, lbn, bp,
446 			    indirs[i].in_off, nb, 0, nbp);
447 		bap[indirs[i].in_off] = nb;
448 		/*
449 		 * If required, write synchronously, otherwise use
450 		 * delayed write.
451 		 */
452 		if (flags & IO_SYNC) {
453 			bwrite(bp);
454 		} else {
455 			if (bp->b_bufsize == fs->fs_bsize)
456 				bp->b_flags |= B_CLUSTEROK;
457 			bdwrite(bp);
458 		}
459 		curthread_pflags_restore(saved_inbdflush);
460 		*bpp = nbp;
461 		error = 0;
462 		goto done;
463 	}
464 	brelse(bp);
465 	if (flags & BA_CLRBUF) {
466 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
467 		if (seqcount != 0 &&
468 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
469 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
470 			error = cluster_read(vp, ip->i_size, lbn,
471 			    (int)fs->fs_bsize, NOCRED,
472 			    MAXBSIZE, seqcount, gbflags, &nbp);
473 		} else {
474 			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
475 			    gbflags, &nbp);
476 		}
477 		if (error) {
478 			brelse(nbp);
479 			goto fail;
480 		}
481 	} else {
482 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
483 		nbp->b_blkno = fsbtodb(fs, nb);
484 	}
485 	curthread_pflags_restore(saved_inbdflush);
486 	*bpp = nbp;
487 	error = 0;
488 	goto done;
489 fail:
490 	curthread_pflags_restore(saved_inbdflush);
491 	/*
492 	 * If we have failed to allocate any blocks, simply return the error.
493 	 * This is the usual case and avoids the need to fsync the file.
494 	 */
495 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
496 		goto done;
497 	/*
498 	 * If we have failed part way through block allocation, we
499 	 * have to deallocate any indirect blocks that we have allocated.
500 	 * We have to fsync the file before we start to get rid of all
501 	 * of its dependencies so that we do not leave them dangling.
502 	 * We have to sync it at the end so that the soft updates code
503 	 * does not find any untracked changes. Although this is really
504 	 * slow, running out of disk space is not expected to be a common
505 	 * occurrence. The error return from fsync is ignored as we already
506 	 * have an error to return to the user.
507 	 *
508 	 * XXX Still have to journal the free below
509 	 */
510 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
511 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
512 	     blkp < allocblk; blkp++, lbns_remfree++) {
513 		/*
514 		 * We shall not leave the freed blocks on the vnode
515 		 * buffer object lists.
516 		 */
517 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
518 		    GB_NOCREAT | GB_UNMAPPED);
519 		if (bp != NULL) {
520 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
521 			    ("mismatch1 l %jd %jd b %ju %ju",
522 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
523 			    (uintmax_t)bp->b_blkno,
524 			    (uintmax_t)fsbtodb(fs, *blkp)));
525 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
526 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
527 			brelse(bp);
528 		}
529 		deallocated += fs->fs_bsize;
530 	}
531 	if (allocib != NULL) {
532 		*allocib = 0;
533 	} else if (unwindidx >= 0) {
534 		int r;
535 
536 		r = bread(vp, indirs[unwindidx].in_lbn,
537 		    (int)fs->fs_bsize, NOCRED, &bp);
538 		if (r) {
539 			panic("Could not unwind indirect block, error %d", r);
540 			brelse(bp);
541 		} else {
542 			bap = (ufs1_daddr_t *)bp->b_data;
543 			bap[indirs[unwindidx].in_off] = 0;
544 			if (flags & IO_SYNC) {
545 				bwrite(bp);
546 			} else {
547 				if (bp->b_bufsize == fs->fs_bsize)
548 					bp->b_flags |= B_CLUSTEROK;
549 				bdwrite(bp);
550 			}
551 		}
552 	}
553 	if (deallocated) {
554 #ifdef QUOTA
555 		/*
556 		 * Restore user's disk quota because allocation failed.
557 		 */
558 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
559 #endif
560 		dp->di_blocks -= btodb(deallocated);
561 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
562 	}
563 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
564 	/*
565 	 * After the buffers are invalidated and on-disk pointers are
566 	 * cleared, free the blocks.
567 	 */
568 	for (blkp = allociblk; blkp < allocblk; blkp++) {
569 #ifdef INVARIANTS
570 		if (blkp == allociblk)
571 			lbns_remfree = lbns;
572 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
573 		    GB_NOCREAT | GB_UNMAPPED);
574 		if (bp != NULL) {
575 			panic("zombie1 %jd %ju %ju",
576 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
577 			    (uintmax_t)fsbtodb(fs, *blkp));
578 		}
579 		lbns_remfree++;
580 #endif
581 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
582 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
583 	}
584 done:
585 	vn_seqc_write_end(vp);
586 	return (error);
587 }
588 
589 /*
590  * Balloc defines the structure of file system storage
591  * by allocating the physical blocks on a device given
592  * the inode and the logical block number in a file.
593  * This is the allocation strategy for UFS2. Above is
594  * the allocation strategy for UFS1.
595  */
596 int
597 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
598     struct ucred *cred, int flags, struct buf **bpp)
599 {
600 	struct inode *ip;
601 	struct ufs2_dinode *dp;
602 	ufs_lbn_t lbn, lastlbn;
603 	struct fs *fs;
604 	struct buf *bp, *nbp;
605 	struct mount *mp;
606 	struct ufsmount *ump;
607 	struct indir indirs[UFS_NIADDR + 2];
608 	ufs2_daddr_t nb, newb, *bap, pref;
609 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
610 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
611 	int deallocated, osize, nsize, num, i, error;
612 	int unwindidx = -1;
613 	int saved_inbdflush;
614 	int gbflags, gbwflag, reclaimed;
615 
616 	ip = VTOI(vp);
617 	dp = ip->i_din2;
618 	fs = ITOFS(ip);
619 	mp = ITOVFS(ip);
620 	ump = ITOUMP(ip);
621 	lbn = lblkno(fs, startoffset);
622 	size = blkoff(fs, startoffset) + size;
623 	reclaimed = 0;
624 	if (size > fs->fs_bsize)
625 		panic("ffs_balloc_ufs2: blk too big");
626 	*bpp = NULL;
627 	if (lbn < 0)
628 		return (EFBIG);
629 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
630 #ifdef WITNESS
631 	gbwflag = IS_SNAPSHOT(ip) ? GB_NOWITNESS : 0;
632 	gbflags |= gbwflag;
633 #else
634 	gbwflag = 0;
635 #endif
636 
637 	vn_seqc_write_begin(vp);
638 
639 	/*
640 	 * Check for allocating external data.
641 	 */
642 	if (flags & IO_EXT) {
643 		if (lbn >= UFS_NXADDR) {
644 			error = EFBIG;
645 			goto done;
646 		}
647 
648 		/*
649 		 * If the next write will extend the data into a new block,
650 		 * and the data is currently composed of a fragment
651 		 * this fragment has to be extended to be a full block.
652 		 */
653 		lastlbn = lblkno(fs, dp->di_extsize);
654 		if (lastlbn < lbn) {
655 			nb = lastlbn;
656 			osize = sblksize(fs, dp->di_extsize, nb);
657 			if (osize < fs->fs_bsize && osize > 0) {
658 				UFS_LOCK(ump);
659 				error = ffs_realloccg(ip, -1 - nb,
660 				    dp->di_extb[nb],
661 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
662 				    &dp->di_extb[0]), osize,
663 				    (int)fs->fs_bsize, flags, cred, &bp);
664 				if (error)
665 					goto done;
666 				if (DOINGSOFTDEP(vp))
667 					softdep_setup_allocext(ip, nb,
668 					    dbtofsb(fs, bp->b_blkno),
669 					    dp->di_extb[nb],
670 					    fs->fs_bsize, osize, bp);
671 				dp->di_extsize = smalllblktosize(fs, nb + 1);
672 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
673 				bp->b_xflags |= BX_ALTDATA;
674 				UFS_INODE_SET_FLAG(ip,
675 				    IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA);
676 				if (flags & IO_SYNC)
677 					bwrite(bp);
678 				else
679 					bawrite(bp);
680 			}
681 		}
682 		/*
683 		 * All blocks are direct blocks
684 		 */
685 		if (flags & BA_METAONLY)
686 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
687 		nb = dp->di_extb[lbn];
688 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
689 			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
690 			    gbflags, &bp);
691 			if (error)
692 				goto done;
693 			bp->b_blkno = fsbtodb(fs, nb);
694 			bp->b_xflags |= BX_ALTDATA;
695 			*bpp = bp;
696 			goto done;
697 		}
698 		if (nb != 0) {
699 			/*
700 			 * Consider need to reallocate a fragment.
701 			 */
702 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
703 			nsize = fragroundup(fs, size);
704 			if (nsize <= osize) {
705 				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
706 				    gbflags, &bp);
707 				if (error)
708 					goto done;
709 				bp->b_blkno = fsbtodb(fs, nb);
710 				bp->b_xflags |= BX_ALTDATA;
711 			} else {
712 				UFS_LOCK(ump);
713 				error = ffs_realloccg(ip, -1 - lbn,
714 				    dp->di_extb[lbn],
715 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
716 				    &dp->di_extb[0]), osize, nsize, flags,
717 				    cred, &bp);
718 				if (error)
719 					goto done;
720 				bp->b_xflags |= BX_ALTDATA;
721 				if (DOINGSOFTDEP(vp))
722 					softdep_setup_allocext(ip, lbn,
723 					    dbtofsb(fs, bp->b_blkno), nb,
724 					    nsize, osize, bp);
725 			}
726 		} else {
727 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
728 				nsize = fragroundup(fs, size);
729 			else
730 				nsize = fs->fs_bsize;
731 			UFS_LOCK(ump);
732 			error = ffs_alloc(ip, lbn,
733 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
734 			   nsize, flags, cred, &newb);
735 			if (error)
736 				goto done;
737 			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
738 			bp->b_blkno = fsbtodb(fs, newb);
739 			bp->b_xflags |= BX_ALTDATA;
740 			if (flags & BA_CLRBUF)
741 				vfs_bio_clrbuf(bp);
742 			if (DOINGSOFTDEP(vp))
743 				softdep_setup_allocext(ip, lbn, newb, 0,
744 				    nsize, 0, bp);
745 		}
746 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
747 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_IBLKDATA);
748 		*bpp = bp;
749 		error = 0;
750 		goto done;
751 	}
752 	/*
753 	 * If the next write will extend the file into a new block,
754 	 * and the file is currently composed of a fragment
755 	 * this fragment has to be extended to be a full block.
756 	 */
757 	lastlbn = lblkno(fs, ip->i_size);
758 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
759 		nb = lastlbn;
760 		osize = blksize(fs, ip, nb);
761 		if (osize < fs->fs_bsize && osize > 0) {
762 			UFS_LOCK(ump);
763 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
764 			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
765 			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
766 			    flags, cred, &bp);
767 			if (error)
768 				goto done;
769 			if (DOINGSOFTDEP(vp))
770 				softdep_setup_allocdirect(ip, nb,
771 				    dbtofsb(fs, bp->b_blkno),
772 				    dp->di_db[nb],
773 				    fs->fs_bsize, osize, bp);
774 			ip->i_size = smalllblktosize(fs, nb + 1);
775 			dp->di_size = ip->i_size;
776 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
777 			UFS_INODE_SET_FLAG(ip,
778 			    IN_SIZEMOD |IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
779 			if (flags & IO_SYNC)
780 				bwrite(bp);
781 			else
782 				bawrite(bp);
783 		}
784 	}
785 	/*
786 	 * The first UFS_NDADDR blocks are direct blocks
787 	 */
788 	if (lbn < UFS_NDADDR) {
789 		if (flags & BA_METAONLY)
790 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
791 		nb = dp->di_db[lbn];
792 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
793 			if ((flags & BA_CLRBUF) != 0) {
794 				error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
795 				    gbflags, &bp);
796 				if (error != 0)
797 					goto done;
798 			} else {
799 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
800 				    gbflags);
801 				if (bp == NULL) {
802 					error = EIO;
803 					goto done;
804 				}
805 				vfs_bio_clrbuf(bp);
806 			}
807 			bp->b_blkno = fsbtodb(fs, nb);
808 			*bpp = bp;
809 			error = 0;
810 			goto done;
811 		}
812 		if (nb != 0) {
813 			/*
814 			 * Consider need to reallocate a fragment.
815 			 */
816 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
817 			nsize = fragroundup(fs, size);
818 			if (nsize <= osize) {
819 				error = bread_gb(vp, lbn, osize, NOCRED,
820 				    gbflags, &bp);
821 				if (error)
822 					goto done;
823 				bp->b_blkno = fsbtodb(fs, nb);
824 			} else {
825 				UFS_LOCK(ump);
826 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
827 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
828 				    &dp->di_db[0]), osize, nsize, flags,
829 				    cred, &bp);
830 				if (error)
831 					goto done;
832 				if (DOINGSOFTDEP(vp))
833 					softdep_setup_allocdirect(ip, lbn,
834 					    dbtofsb(fs, bp->b_blkno), nb,
835 					    nsize, osize, bp);
836 			}
837 		} else {
838 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
839 				nsize = fragroundup(fs, size);
840 			else
841 				nsize = fs->fs_bsize;
842 			UFS_LOCK(ump);
843 			error = ffs_alloc(ip, lbn,
844 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
845 				&dp->di_db[0]), nsize, flags, cred, &newb);
846 			if (error)
847 				goto done;
848 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
849 			bp->b_blkno = fsbtodb(fs, newb);
850 			if (flags & BA_CLRBUF)
851 				vfs_bio_clrbuf(bp);
852 			if (DOINGSOFTDEP(vp))
853 				softdep_setup_allocdirect(ip, lbn, newb, 0,
854 				    nsize, 0, bp);
855 		}
856 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
857 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
858 		*bpp = bp;
859 		error = 0;
860 		goto done;
861 	}
862 	/*
863 	 * Determine the number of levels of indirection.
864 	 */
865 	pref = 0;
866 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
867 		goto done;
868 #ifdef INVARIANTS
869 	if (num < 1)
870 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
871 #endif
872 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
873 	/*
874 	 * Fetch the first indirect block allocating if necessary.
875 	 */
876 	--num;
877 	nb = dp->di_ib[indirs[0].in_off];
878 	allocib = NULL;
879 	allocblk = allociblk;
880 	lbns_remfree = lbns;
881 	if (nb == 0) {
882 		UFS_LOCK(ump);
883 		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
884 		    (ufs2_daddr_t *)0);
885 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
886 		    flags, cred, &newb)) != 0) {
887 			curthread_pflags_restore(saved_inbdflush);
888 			goto done;
889 		}
890 		pref = newb + fs->fs_frag;
891 		nb = newb;
892 		MPASS(allocblk < allociblk + nitems(allociblk));
893 		MPASS(lbns_remfree < lbns + nitems(lbns));
894 		*allocblk++ = nb;
895 		*lbns_remfree++ = indirs[1].in_lbn;
896 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
897 		    GB_UNMAPPED | gbwflag);
898 		bp->b_blkno = fsbtodb(fs, nb);
899 		vfs_bio_clrbuf(bp);
900 		if (DOINGSOFTDEP(vp)) {
901 			softdep_setup_allocdirect(ip,
902 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
903 			    fs->fs_bsize, 0, bp);
904 			bdwrite(bp);
905 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
906 			if (bp->b_bufsize == fs->fs_bsize)
907 				bp->b_flags |= B_CLUSTEROK;
908 			bdwrite(bp);
909 		} else {
910 			if ((error = bwrite(bp)) != 0)
911 				goto fail;
912 		}
913 		allocib = &dp->di_ib[indirs[0].in_off];
914 		*allocib = nb;
915 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
916 	}
917 	/*
918 	 * Fetch through the indirect blocks, allocating as necessary.
919 	 */
920 retry:
921 	for (i = 1;;) {
922 		error = bread_gb(vp, indirs[i].in_lbn, (int)fs->fs_bsize,
923 		    NOCRED, gbwflag, &bp);
924 		if (error) {
925 			goto fail;
926 		}
927 		bap = (ufs2_daddr_t *)bp->b_data;
928 		nb = bap[indirs[i].in_off];
929 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
930 		    fs->fs_bsize)) != 0) {
931 			brelse(bp);
932 			goto fail;
933 		}
934 		if (i == num)
935 			break;
936 		i += 1;
937 		if (nb != 0) {
938 			bqrelse(bp);
939 			continue;
940 		}
941 		UFS_LOCK(ump);
942 		/*
943 		 * If parent indirect has just been allocated, try to cluster
944 		 * immediately following it.
945 		 */
946 		if (pref == 0)
947 			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
948 			    (ufs2_daddr_t *)0);
949 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
950 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
951 			brelse(bp);
952 			UFS_LOCK(ump);
953 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
954 				softdep_request_cleanup(fs, vp, cred,
955 				    FLUSH_BLOCKS_WAIT);
956 				UFS_UNLOCK(ump);
957 				goto retry;
958 			}
959 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
960 			    ppsratecheck(&ump->um_last_fullmsg,
961 			    &ump->um_secs_fullmsg, 1)) {
962 				UFS_UNLOCK(ump);
963 				ffs_fserr(fs, ip->i_number, "filesystem full");
964 				uprintf("\n%s: write failed, filesystem "
965 				    "is full\n", fs->fs_fsmnt);
966 			} else {
967 				UFS_UNLOCK(ump);
968 			}
969 			goto fail;
970 		}
971 		pref = newb + fs->fs_frag;
972 		nb = newb;
973 		MPASS(allocblk < allociblk + nitems(allociblk));
974 		MPASS(lbns_remfree < lbns + nitems(lbns));
975 		*allocblk++ = nb;
976 		*lbns_remfree++ = indirs[i].in_lbn;
977 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
978 		    GB_UNMAPPED);
979 		nbp->b_blkno = fsbtodb(fs, nb);
980 		vfs_bio_clrbuf(nbp);
981 		if (DOINGSOFTDEP(vp)) {
982 			softdep_setup_allocindir_meta(nbp, ip, bp,
983 			    indirs[i - 1].in_off, nb);
984 			bdwrite(nbp);
985 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
986 			if (nbp->b_bufsize == fs->fs_bsize)
987 				nbp->b_flags |= B_CLUSTEROK;
988 			bdwrite(nbp);
989 		} else {
990 			if ((error = bwrite(nbp)) != 0) {
991 				brelse(bp);
992 				goto fail;
993 			}
994 		}
995 		bap[indirs[i - 1].in_off] = nb;
996 		if (allocib == NULL && unwindidx < 0)
997 			unwindidx = i - 1;
998 		/*
999 		 * If required, write synchronously, otherwise use
1000 		 * delayed write.
1001 		 */
1002 		if (flags & IO_SYNC) {
1003 			bwrite(bp);
1004 		} else {
1005 			if (bp->b_bufsize == fs->fs_bsize)
1006 				bp->b_flags |= B_CLUSTEROK;
1007 			bdwrite(bp);
1008 		}
1009 	}
1010 	/*
1011 	 * If asked only for the indirect block, then return it.
1012 	 */
1013 	if (flags & BA_METAONLY) {
1014 		curthread_pflags_restore(saved_inbdflush);
1015 		*bpp = bp;
1016 		error = 0;
1017 		goto done;
1018 	}
1019 	/*
1020 	 * Get the data block, allocating if necessary.
1021 	 */
1022 	if (nb == 0) {
1023 		UFS_LOCK(ump);
1024 		/*
1025 		 * If allocating metadata at the front of the cylinder
1026 		 * group and parent indirect block has just been allocated,
1027 		 * then cluster next to it if it is the first indirect in
1028 		 * the file. Otherwise it has been allocated in the metadata
1029 		 * area, so we want to find our own place out in the data area.
1030 		 */
1031 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
1032 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1033 			    &bap[0]);
1034 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1035 		    flags | IO_BUFLOCKED, cred, &newb);
1036 		if (error) {
1037 			brelse(bp);
1038 			UFS_LOCK(ump);
1039 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1040 				softdep_request_cleanup(fs, vp, cred,
1041 				    FLUSH_BLOCKS_WAIT);
1042 				UFS_UNLOCK(ump);
1043 				goto retry;
1044 			}
1045 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
1046 			    ppsratecheck(&ump->um_last_fullmsg,
1047 			    &ump->um_secs_fullmsg, 1)) {
1048 				UFS_UNLOCK(ump);
1049 				ffs_fserr(fs, ip->i_number, "filesystem full");
1050 				uprintf("\n%s: write failed, filesystem "
1051 				    "is full\n", fs->fs_fsmnt);
1052 			} else {
1053 				UFS_UNLOCK(ump);
1054 			}
1055 			goto fail;
1056 		}
1057 		nb = newb;
1058 		MPASS(allocblk < allociblk + nitems(allociblk));
1059 		MPASS(lbns_remfree < lbns + nitems(lbns));
1060 		*allocblk++ = nb;
1061 		*lbns_remfree++ = lbn;
1062 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1063 		nbp->b_blkno = fsbtodb(fs, nb);
1064 		if (flags & BA_CLRBUF)
1065 			vfs_bio_clrbuf(nbp);
1066 		if (DOINGSOFTDEP(vp))
1067 			softdep_setup_allocindir_page(ip, lbn, bp,
1068 			    indirs[i].in_off, nb, 0, nbp);
1069 		bap[indirs[i].in_off] = nb;
1070 		/*
1071 		 * If required, write synchronously, otherwise use
1072 		 * delayed write.
1073 		 */
1074 		if (flags & IO_SYNC) {
1075 			bwrite(bp);
1076 		} else {
1077 			if (bp->b_bufsize == fs->fs_bsize)
1078 				bp->b_flags |= B_CLUSTEROK;
1079 			bdwrite(bp);
1080 		}
1081 		curthread_pflags_restore(saved_inbdflush);
1082 		*bpp = nbp;
1083 		error = 0;
1084 		goto done;
1085 	}
1086 	brelse(bp);
1087 	/*
1088 	 * If requested clear invalid portions of the buffer.  If we
1089 	 * have to do a read-before-write (typical if BA_CLRBUF is set),
1090 	 * try to do some read-ahead in the sequential case to reduce
1091 	 * the number of I/O transactions.
1092 	 */
1093 	if (flags & BA_CLRBUF) {
1094 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1095 		if (seqcount != 0 &&
1096 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1097 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
1098 			error = cluster_read(vp, ip->i_size, lbn,
1099 			    (int)fs->fs_bsize, NOCRED,
1100 			    MAXBSIZE, seqcount, gbflags, &nbp);
1101 		} else {
1102 			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1103 			    NOCRED, gbflags, &nbp);
1104 		}
1105 		if (error) {
1106 			brelse(nbp);
1107 			goto fail;
1108 		}
1109 	} else {
1110 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1111 		nbp->b_blkno = fsbtodb(fs, nb);
1112 	}
1113 	curthread_pflags_restore(saved_inbdflush);
1114 	*bpp = nbp;
1115 	error = 0;
1116 	goto done;
1117 fail:
1118 	curthread_pflags_restore(saved_inbdflush);
1119 	/*
1120 	 * If we have failed to allocate any blocks, simply return the error.
1121 	 * This is the usual case and avoids the need to fsync the file.
1122 	 */
1123 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1124 		goto done;
1125 	/*
1126 	 * If we have failed part way through block allocation, we
1127 	 * have to deallocate any indirect blocks that we have allocated.
1128 	 * We have to fsync the file before we start to get rid of all
1129 	 * of its dependencies so that we do not leave them dangling.
1130 	 * We have to sync it at the end so that the soft updates code
1131 	 * does not find any untracked changes. Although this is really
1132 	 * slow, running out of disk space is not expected to be a common
1133 	 * occurrence. The error return from fsync is ignored as we already
1134 	 * have an error to return to the user.
1135 	 *
1136 	 * XXX Still have to journal the free below
1137 	 */
1138 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1139 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1140 	     blkp < allocblk; blkp++, lbns_remfree++) {
1141 		/*
1142 		 * We shall not leave the freed blocks on the vnode
1143 		 * buffer object lists.
1144 		 */
1145 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1146 		    GB_NOCREAT | GB_UNMAPPED | gbwflag);
1147 		if (bp != NULL) {
1148 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1149 			    ("mismatch2 l %jd %jd b %ju %ju",
1150 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1151 			    (uintmax_t)bp->b_blkno,
1152 			    (uintmax_t)fsbtodb(fs, *blkp)));
1153 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1154 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
1155 			brelse(bp);
1156 		}
1157 		deallocated += fs->fs_bsize;
1158 	}
1159 	if (allocib != NULL) {
1160 		*allocib = 0;
1161 	} else if (unwindidx >= 0) {
1162 		int r;
1163 
1164 		r = bread_gb(vp, indirs[unwindidx].in_lbn,
1165 		    (int)fs->fs_bsize, NOCRED, gbwflag, &bp);
1166 		if (r) {
1167 			panic("Could not unwind indirect block, error %d", r);
1168 			brelse(bp);
1169 		} else {
1170 			bap = (ufs2_daddr_t *)bp->b_data;
1171 			bap[indirs[unwindidx].in_off] = 0;
1172 			if (flags & IO_SYNC) {
1173 				bwrite(bp);
1174 			} else {
1175 				if (bp->b_bufsize == fs->fs_bsize)
1176 					bp->b_flags |= B_CLUSTEROK;
1177 				bdwrite(bp);
1178 			}
1179 		}
1180 	}
1181 	if (deallocated) {
1182 #ifdef QUOTA
1183 		/*
1184 		 * Restore user's disk quota because allocation failed.
1185 		 */
1186 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1187 #endif
1188 		dp->di_blocks -= btodb(deallocated);
1189 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1190 	}
1191 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1192 	/*
1193 	 * After the buffers are invalidated and on-disk pointers are
1194 	 * cleared, free the blocks.
1195 	 */
1196 	for (blkp = allociblk; blkp < allocblk; blkp++) {
1197 #ifdef INVARIANTS
1198 		if (blkp == allociblk)
1199 			lbns_remfree = lbns;
1200 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1201 		    GB_NOCREAT | GB_UNMAPPED | gbwflag);
1202 		if (bp != NULL) {
1203 			panic("zombie2 %jd %ju %ju",
1204 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1205 			    (uintmax_t)fsbtodb(fs, *blkp));
1206 		}
1207 		lbns_remfree++;
1208 #endif
1209 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1210 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
1211 	}
1212 done:
1213 	vn_seqc_write_end(vp);
1214 	return (error);
1215 }
1216