xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision 3ff01b231dfa83d518854c63e7c9cd1debd1139e)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
62  */
63 
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/bio.h>
70 #include <sys/buf.h>
71 #include <sys/lock.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
75 
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
81 
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
84 
85 /*
86  * Balloc defines the structure of filesystem storage
87  * by allocating the physical blocks on a device given
88  * the inode and the logical block number in a file.
89  * This is the allocation strategy for UFS1. Below is
90  * the allocation strategy for UFS2.
91  */
92 int
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94     struct ucred *cred, int flags, struct buf **bpp)
95 {
96 	struct inode *ip;
97 	struct ufs1_dinode *dp;
98 	ufs_lbn_t lbn, lastlbn;
99 	struct fs *fs;
100 	ufs1_daddr_t nb;
101 	struct buf *bp, *nbp;
102 	struct mount *mp;
103 	struct ufsmount *ump;
104 	struct indir indirs[UFS_NIADDR + 2];
105 	int deallocated, osize, nsize, num, i, error;
106 	ufs2_daddr_t newb;
107 	ufs1_daddr_t *bap, pref;
108 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
109 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
110 	int unwindidx = -1;
111 	int saved_inbdflush;
112 	int gbflags, reclaimed;
113 
114 	ip = VTOI(vp);
115 	dp = ip->i_din1;
116 	fs = ITOFS(ip);
117 	mp = ITOVFS(ip);
118 	ump = ITOUMP(ip);
119 	lbn = lblkno(fs, startoffset);
120 	size = blkoff(fs, startoffset) + size;
121 	reclaimed = 0;
122 	if (size > fs->fs_bsize)
123 		panic("ffs_balloc_ufs1: blk too big");
124 	*bpp = NULL;
125 	if (flags & IO_EXT)
126 		return (EOPNOTSUPP);
127 	if (lbn < 0)
128 		return (EFBIG);
129 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
130 
131 	/*
132 	 * If the next write will extend the file into a new block,
133 	 * and the file is currently composed of a fragment
134 	 * this fragment has to be extended to be a full block.
135 	 */
136 	lastlbn = lblkno(fs, ip->i_size);
137 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
138 		nb = lastlbn;
139 		osize = blksize(fs, ip, nb);
140 		if (osize < fs->fs_bsize && osize > 0) {
141 			UFS_LOCK(ump);
142 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
143 			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
144 			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
145 			   cred, &bp);
146 			if (error)
147 				return (error);
148 			if (DOINGSOFTDEP(vp))
149 				softdep_setup_allocdirect(ip, nb,
150 				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
151 				    fs->fs_bsize, osize, bp);
152 			ip->i_size = smalllblktosize(fs, nb + 1);
153 			dp->di_size = ip->i_size;
154 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
155 			UFS_INODE_SET_FLAG(ip,
156 			    IN_SIZEMOD | IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
157 			if (flags & IO_SYNC)
158 				bwrite(bp);
159 			else if (DOINGASYNC(vp))
160 				bdwrite(bp);
161 			else
162 				bawrite(bp);
163 		}
164 	}
165 	/*
166 	 * The first UFS_NDADDR blocks are direct blocks
167 	 */
168 	if (lbn < UFS_NDADDR) {
169 		if (flags & BA_METAONLY)
170 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
171 		nb = dp->di_db[lbn];
172 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
173 			if ((flags & BA_CLRBUF) != 0) {
174 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
175 				    &bp);
176 				if (error != 0)
177 					return (error);
178 			} else {
179 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
180 				    gbflags);
181 				if (bp == NULL)
182 					return (EIO);
183 				vfs_bio_clrbuf(bp);
184 			}
185 			bp->b_blkno = fsbtodb(fs, nb);
186 			*bpp = bp;
187 			return (0);
188 		}
189 		if (nb != 0) {
190 			/*
191 			 * Consider need to reallocate a fragment.
192 			 */
193 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
194 			nsize = fragroundup(fs, size);
195 			if (nsize <= osize) {
196 				error = bread(vp, lbn, osize, NOCRED, &bp);
197 				if (error) {
198 					return (error);
199 				}
200 				bp->b_blkno = fsbtodb(fs, nb);
201 			} else {
202 				UFS_LOCK(ump);
203 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
204 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
205 				    &dp->di_db[0]), osize, nsize, flags,
206 				    cred, &bp);
207 				if (error)
208 					return (error);
209 				if (DOINGSOFTDEP(vp))
210 					softdep_setup_allocdirect(ip, lbn,
211 					    dbtofsb(fs, bp->b_blkno), nb,
212 					    nsize, osize, bp);
213 			}
214 		} else {
215 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
216 				nsize = fragroundup(fs, size);
217 			else
218 				nsize = fs->fs_bsize;
219 			UFS_LOCK(ump);
220 			error = ffs_alloc(ip, lbn,
221 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
222 			    nsize, flags, cred, &newb);
223 			if (error)
224 				return (error);
225 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
226 			bp->b_blkno = fsbtodb(fs, newb);
227 			if (flags & BA_CLRBUF)
228 				vfs_bio_clrbuf(bp);
229 			if (DOINGSOFTDEP(vp))
230 				softdep_setup_allocdirect(ip, lbn, newb, 0,
231 				    nsize, 0, bp);
232 		}
233 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
234 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
235 		*bpp = bp;
236 		return (0);
237 	}
238 	/*
239 	 * Determine the number of levels of indirection.
240 	 */
241 	pref = 0;
242 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
243 		return(error);
244 #ifdef INVARIANTS
245 	if (num < 1)
246 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
247 #endif
248 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
249 	/*
250 	 * Fetch the first indirect block allocating if necessary.
251 	 */
252 	--num;
253 	nb = dp->di_ib[indirs[0].in_off];
254 	allocib = NULL;
255 	allocblk = allociblk;
256 	lbns_remfree = lbns;
257 	if (nb == 0) {
258 		UFS_LOCK(ump);
259 		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
260 		    (ufs1_daddr_t *)0);
261 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
262 		    flags, cred, &newb)) != 0) {
263 			curthread_pflags_restore(saved_inbdflush);
264 			return (error);
265 		}
266 		pref = newb + fs->fs_frag;
267 		nb = newb;
268 		MPASS(allocblk < allociblk + nitems(allociblk));
269 		MPASS(lbns_remfree < lbns + nitems(lbns));
270 		*allocblk++ = nb;
271 		*lbns_remfree++ = indirs[1].in_lbn;
272 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
273 		bp->b_blkno = fsbtodb(fs, nb);
274 		vfs_bio_clrbuf(bp);
275 		if (DOINGSOFTDEP(vp)) {
276 			softdep_setup_allocdirect(ip,
277 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
278 			    fs->fs_bsize, 0, bp);
279 			bdwrite(bp);
280 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
281 			if (bp->b_bufsize == fs->fs_bsize)
282 				bp->b_flags |= B_CLUSTEROK;
283 			bdwrite(bp);
284 		} else {
285 			if ((error = bwrite(bp)) != 0)
286 				goto fail;
287 		}
288 		allocib = &dp->di_ib[indirs[0].in_off];
289 		*allocib = nb;
290 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
291 	}
292 	/*
293 	 * Fetch through the indirect blocks, allocating as necessary.
294 	 */
295 retry:
296 	for (i = 1;;) {
297 		error = bread(vp,
298 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
299 		if (error) {
300 			goto fail;
301 		}
302 		bap = (ufs1_daddr_t *)bp->b_data;
303 		nb = bap[indirs[i].in_off];
304 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
305 		    fs->fs_bsize)) != 0) {
306 			brelse(bp);
307 			goto fail;
308 		}
309 		if (i == num)
310 			break;
311 		i += 1;
312 		if (nb != 0) {
313 			bqrelse(bp);
314 			continue;
315 		}
316 		UFS_LOCK(ump);
317 		/*
318 		 * If parent indirect has just been allocated, try to cluster
319 		 * immediately following it.
320 		 */
321 		if (pref == 0)
322 			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
323 			    (ufs1_daddr_t *)0);
324 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
325 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
326 			brelse(bp);
327 			UFS_LOCK(ump);
328 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
329 				softdep_request_cleanup(fs, vp, cred,
330 				    FLUSH_BLOCKS_WAIT);
331 				UFS_UNLOCK(ump);
332 				goto retry;
333 			}
334 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
335 			    ppsratecheck(&ump->um_last_fullmsg,
336 			    &ump->um_secs_fullmsg, 1)) {
337 				UFS_UNLOCK(ump);
338 				ffs_fserr(fs, ip->i_number, "filesystem full");
339 				uprintf("\n%s: write failed, filesystem "
340 				    "is full\n", fs->fs_fsmnt);
341 			} else {
342 				UFS_UNLOCK(ump);
343 			}
344 			goto fail;
345 		}
346 		pref = newb + fs->fs_frag;
347 		nb = newb;
348 		MPASS(allocblk < allociblk + nitems(allociblk));
349 		MPASS(lbns_remfree < lbns + nitems(lbns));
350 		*allocblk++ = nb;
351 		*lbns_remfree++ = indirs[i].in_lbn;
352 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
353 		nbp->b_blkno = fsbtodb(fs, nb);
354 		vfs_bio_clrbuf(nbp);
355 		if (DOINGSOFTDEP(vp)) {
356 			softdep_setup_allocindir_meta(nbp, ip, bp,
357 			    indirs[i - 1].in_off, nb);
358 			bdwrite(nbp);
359 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
360 			if (nbp->b_bufsize == fs->fs_bsize)
361 				nbp->b_flags |= B_CLUSTEROK;
362 			bdwrite(nbp);
363 		} else {
364 			if ((error = bwrite(nbp)) != 0) {
365 				brelse(bp);
366 				goto fail;
367 			}
368 		}
369 		bap[indirs[i - 1].in_off] = nb;
370 		if (allocib == NULL && unwindidx < 0)
371 			unwindidx = i - 1;
372 		/*
373 		 * If required, write synchronously, otherwise use
374 		 * delayed write.
375 		 */
376 		if (flags & IO_SYNC) {
377 			bwrite(bp);
378 		} else {
379 			if (bp->b_bufsize == fs->fs_bsize)
380 				bp->b_flags |= B_CLUSTEROK;
381 			bdwrite(bp);
382 		}
383 	}
384 	/*
385 	 * If asked only for the indirect block, then return it.
386 	 */
387 	if (flags & BA_METAONLY) {
388 		curthread_pflags_restore(saved_inbdflush);
389 		*bpp = bp;
390 		return (0);
391 	}
392 	/*
393 	 * Get the data block, allocating if necessary.
394 	 */
395 	if (nb == 0) {
396 		UFS_LOCK(ump);
397 		/*
398 		 * If allocating metadata at the front of the cylinder
399 		 * group and parent indirect block has just been allocated,
400 		 * then cluster next to it if it is the first indirect in
401 		 * the file. Otherwise it has been allocated in the metadata
402 		 * area, so we want to find our own place out in the data area.
403 		 */
404 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
405 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
406 			    &bap[0]);
407 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
408 		    flags | IO_BUFLOCKED, cred, &newb);
409 		if (error) {
410 			brelse(bp);
411 			UFS_LOCK(ump);
412 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
413 				softdep_request_cleanup(fs, vp, cred,
414 				    FLUSH_BLOCKS_WAIT);
415 				UFS_UNLOCK(ump);
416 				goto retry;
417 			}
418 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
419 			    ppsratecheck(&ump->um_last_fullmsg,
420 			    &ump->um_secs_fullmsg, 1)) {
421 				UFS_UNLOCK(ump);
422 				ffs_fserr(fs, ip->i_number, "filesystem full");
423 				uprintf("\n%s: write failed, filesystem "
424 				    "is full\n", fs->fs_fsmnt);
425 			} else {
426 				UFS_UNLOCK(ump);
427 			}
428 			goto fail;
429 		}
430 		nb = newb;
431 		MPASS(allocblk < allociblk + nitems(allociblk));
432 		MPASS(lbns_remfree < lbns + nitems(lbns));
433 		*allocblk++ = nb;
434 		*lbns_remfree++ = lbn;
435 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
436 		nbp->b_blkno = fsbtodb(fs, nb);
437 		if (flags & BA_CLRBUF)
438 			vfs_bio_clrbuf(nbp);
439 		if (DOINGSOFTDEP(vp))
440 			softdep_setup_allocindir_page(ip, lbn, bp,
441 			    indirs[i].in_off, nb, 0, nbp);
442 		bap[indirs[i].in_off] = nb;
443 		/*
444 		 * If required, write synchronously, otherwise use
445 		 * delayed write.
446 		 */
447 		if (flags & IO_SYNC) {
448 			bwrite(bp);
449 		} else {
450 			if (bp->b_bufsize == fs->fs_bsize)
451 				bp->b_flags |= B_CLUSTEROK;
452 			bdwrite(bp);
453 		}
454 		curthread_pflags_restore(saved_inbdflush);
455 		*bpp = nbp;
456 		return (0);
457 	}
458 	brelse(bp);
459 	if (flags & BA_CLRBUF) {
460 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
461 		if (seqcount != 0 &&
462 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
463 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
464 			error = cluster_read(vp, ip->i_size, lbn,
465 			    (int)fs->fs_bsize, NOCRED,
466 			    MAXBSIZE, seqcount, gbflags, &nbp);
467 		} else {
468 			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
469 			    gbflags, &nbp);
470 		}
471 		if (error) {
472 			brelse(nbp);
473 			goto fail;
474 		}
475 	} else {
476 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
477 		nbp->b_blkno = fsbtodb(fs, nb);
478 	}
479 	curthread_pflags_restore(saved_inbdflush);
480 	*bpp = nbp;
481 	return (0);
482 fail:
483 	curthread_pflags_restore(saved_inbdflush);
484 	/*
485 	 * If we have failed to allocate any blocks, simply return the error.
486 	 * This is the usual case and avoids the need to fsync the file.
487 	 */
488 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
489 		return (error);
490 	/*
491 	 * If we have failed part way through block allocation, we
492 	 * have to deallocate any indirect blocks that we have allocated.
493 	 * We have to fsync the file before we start to get rid of all
494 	 * of its dependencies so that we do not leave them dangling.
495 	 * We have to sync it at the end so that the soft updates code
496 	 * does not find any untracked changes. Although this is really
497 	 * slow, running out of disk space is not expected to be a common
498 	 * occurrence. The error return from fsync is ignored as we already
499 	 * have an error to return to the user.
500 	 *
501 	 * XXX Still have to journal the free below
502 	 */
503 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
504 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
505 	     blkp < allocblk; blkp++, lbns_remfree++) {
506 		/*
507 		 * We shall not leave the freed blocks on the vnode
508 		 * buffer object lists.
509 		 */
510 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
511 		    GB_NOCREAT | GB_UNMAPPED);
512 		if (bp != NULL) {
513 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
514 			    ("mismatch1 l %jd %jd b %ju %ju",
515 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
516 			    (uintmax_t)bp->b_blkno,
517 			    (uintmax_t)fsbtodb(fs, *blkp)));
518 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
519 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
520 			brelse(bp);
521 		}
522 		deallocated += fs->fs_bsize;
523 	}
524 	if (allocib != NULL) {
525 		*allocib = 0;
526 	} else if (unwindidx >= 0) {
527 		int r;
528 
529 		r = bread(vp, indirs[unwindidx].in_lbn,
530 		    (int)fs->fs_bsize, NOCRED, &bp);
531 		if (r) {
532 			panic("Could not unwind indirect block, error %d", r);
533 			brelse(bp);
534 		} else {
535 			bap = (ufs1_daddr_t *)bp->b_data;
536 			bap[indirs[unwindidx].in_off] = 0;
537 			if (flags & IO_SYNC) {
538 				bwrite(bp);
539 			} else {
540 				if (bp->b_bufsize == fs->fs_bsize)
541 					bp->b_flags |= B_CLUSTEROK;
542 				bdwrite(bp);
543 			}
544 		}
545 	}
546 	if (deallocated) {
547 #ifdef QUOTA
548 		/*
549 		 * Restore user's disk quota because allocation failed.
550 		 */
551 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
552 #endif
553 		dp->di_blocks -= btodb(deallocated);
554 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
555 	}
556 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
557 	/*
558 	 * After the buffers are invalidated and on-disk pointers are
559 	 * cleared, free the blocks.
560 	 */
561 	for (blkp = allociblk; blkp < allocblk; blkp++) {
562 #ifdef INVARIANTS
563 		if (blkp == allociblk)
564 			lbns_remfree = lbns;
565 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
566 		    GB_NOCREAT | GB_UNMAPPED);
567 		if (bp != NULL) {
568 			panic("zombie1 %jd %ju %ju",
569 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
570 			    (uintmax_t)fsbtodb(fs, *blkp));
571 		}
572 		lbns_remfree++;
573 #endif
574 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
575 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
576 	}
577 	return (error);
578 }
579 
580 /*
581  * Balloc defines the structure of file system storage
582  * by allocating the physical blocks on a device given
583  * the inode and the logical block number in a file.
584  * This is the allocation strategy for UFS2. Above is
585  * the allocation strategy for UFS1.
586  */
587 int
588 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
589     struct ucred *cred, int flags, struct buf **bpp)
590 {
591 	struct inode *ip;
592 	struct ufs2_dinode *dp;
593 	ufs_lbn_t lbn, lastlbn;
594 	struct fs *fs;
595 	struct buf *bp, *nbp;
596 	struct mount *mp;
597 	struct ufsmount *ump;
598 	struct indir indirs[UFS_NIADDR + 2];
599 	ufs2_daddr_t nb, newb, *bap, pref;
600 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
601 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
602 	int deallocated, osize, nsize, num, i, error;
603 	int unwindidx = -1;
604 	int saved_inbdflush;
605 	int gbflags, reclaimed;
606 
607 	ip = VTOI(vp);
608 	dp = ip->i_din2;
609 	fs = ITOFS(ip);
610 	mp = ITOVFS(ip);
611 	ump = ITOUMP(ip);
612 	lbn = lblkno(fs, startoffset);
613 	size = blkoff(fs, startoffset) + size;
614 	reclaimed = 0;
615 	if (size > fs->fs_bsize)
616 		panic("ffs_balloc_ufs2: blk too big");
617 	*bpp = NULL;
618 	if (lbn < 0)
619 		return (EFBIG);
620 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
621 
622 	/*
623 	 * Check for allocating external data.
624 	 */
625 	if (flags & IO_EXT) {
626 		if (lbn >= UFS_NXADDR)
627 			return (EFBIG);
628 		/*
629 		 * If the next write will extend the data into a new block,
630 		 * and the data is currently composed of a fragment
631 		 * this fragment has to be extended to be a full block.
632 		 */
633 		lastlbn = lblkno(fs, dp->di_extsize);
634 		if (lastlbn < lbn) {
635 			nb = lastlbn;
636 			osize = sblksize(fs, dp->di_extsize, nb);
637 			if (osize < fs->fs_bsize && osize > 0) {
638 				UFS_LOCK(ump);
639 				error = ffs_realloccg(ip, -1 - nb,
640 				    dp->di_extb[nb],
641 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
642 				    &dp->di_extb[0]), osize,
643 				    (int)fs->fs_bsize, flags, cred, &bp);
644 				if (error)
645 					return (error);
646 				if (DOINGSOFTDEP(vp))
647 					softdep_setup_allocext(ip, nb,
648 					    dbtofsb(fs, bp->b_blkno),
649 					    dp->di_extb[nb],
650 					    fs->fs_bsize, osize, bp);
651 				dp->di_extsize = smalllblktosize(fs, nb + 1);
652 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
653 				bp->b_xflags |= BX_ALTDATA;
654 				UFS_INODE_SET_FLAG(ip,
655 				    IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA);
656 				if (flags & IO_SYNC)
657 					bwrite(bp);
658 				else
659 					bawrite(bp);
660 			}
661 		}
662 		/*
663 		 * All blocks are direct blocks
664 		 */
665 		if (flags & BA_METAONLY)
666 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
667 		nb = dp->di_extb[lbn];
668 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
669 			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
670 			    gbflags, &bp);
671 			if (error) {
672 				return (error);
673 			}
674 			bp->b_blkno = fsbtodb(fs, nb);
675 			bp->b_xflags |= BX_ALTDATA;
676 			*bpp = bp;
677 			return (0);
678 		}
679 		if (nb != 0) {
680 			/*
681 			 * Consider need to reallocate a fragment.
682 			 */
683 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
684 			nsize = fragroundup(fs, size);
685 			if (nsize <= osize) {
686 				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
687 				    gbflags, &bp);
688 				if (error) {
689 					return (error);
690 				}
691 				bp->b_blkno = fsbtodb(fs, nb);
692 				bp->b_xflags |= BX_ALTDATA;
693 			} else {
694 				UFS_LOCK(ump);
695 				error = ffs_realloccg(ip, -1 - lbn,
696 				    dp->di_extb[lbn],
697 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
698 				    &dp->di_extb[0]), osize, nsize, flags,
699 				    cred, &bp);
700 				if (error)
701 					return (error);
702 				bp->b_xflags |= BX_ALTDATA;
703 				if (DOINGSOFTDEP(vp))
704 					softdep_setup_allocext(ip, lbn,
705 					    dbtofsb(fs, bp->b_blkno), nb,
706 					    nsize, osize, bp);
707 			}
708 		} else {
709 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
710 				nsize = fragroundup(fs, size);
711 			else
712 				nsize = fs->fs_bsize;
713 			UFS_LOCK(ump);
714 			error = ffs_alloc(ip, lbn,
715 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
716 			   nsize, flags, cred, &newb);
717 			if (error)
718 				return (error);
719 			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
720 			bp->b_blkno = fsbtodb(fs, newb);
721 			bp->b_xflags |= BX_ALTDATA;
722 			if (flags & BA_CLRBUF)
723 				vfs_bio_clrbuf(bp);
724 			if (DOINGSOFTDEP(vp))
725 				softdep_setup_allocext(ip, lbn, newb, 0,
726 				    nsize, 0, bp);
727 		}
728 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
729 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_IBLKDATA);
730 		*bpp = bp;
731 		return (0);
732 	}
733 	/*
734 	 * If the next write will extend the file into a new block,
735 	 * and the file is currently composed of a fragment
736 	 * this fragment has to be extended to be a full block.
737 	 */
738 	lastlbn = lblkno(fs, ip->i_size);
739 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
740 		nb = lastlbn;
741 		osize = blksize(fs, ip, nb);
742 		if (osize < fs->fs_bsize && osize > 0) {
743 			UFS_LOCK(ump);
744 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
745 			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
746 			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
747 			    flags, cred, &bp);
748 			if (error)
749 				return (error);
750 			if (DOINGSOFTDEP(vp))
751 				softdep_setup_allocdirect(ip, nb,
752 				    dbtofsb(fs, bp->b_blkno),
753 				    dp->di_db[nb],
754 				    fs->fs_bsize, osize, bp);
755 			ip->i_size = smalllblktosize(fs, nb + 1);
756 			dp->di_size = ip->i_size;
757 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
758 			UFS_INODE_SET_FLAG(ip,
759 			    IN_SIZEMOD |IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
760 			if (flags & IO_SYNC)
761 				bwrite(bp);
762 			else
763 				bawrite(bp);
764 		}
765 	}
766 	/*
767 	 * The first UFS_NDADDR blocks are direct blocks
768 	 */
769 	if (lbn < UFS_NDADDR) {
770 		if (flags & BA_METAONLY)
771 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
772 		nb = dp->di_db[lbn];
773 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
774 			if ((flags & BA_CLRBUF) != 0) {
775 				error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
776 				    gbflags, &bp);
777 				if (error != 0)
778 					return (error);
779 			} else {
780 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
781 				    gbflags);
782 				if (bp == NULL)
783 					return (EIO);
784 				vfs_bio_clrbuf(bp);
785 			}
786 			bp->b_blkno = fsbtodb(fs, nb);
787 			*bpp = bp;
788 			return (0);
789 		}
790 		if (nb != 0) {
791 			/*
792 			 * Consider need to reallocate a fragment.
793 			 */
794 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
795 			nsize = fragroundup(fs, size);
796 			if (nsize <= osize) {
797 				error = bread_gb(vp, lbn, osize, NOCRED,
798 				    gbflags, &bp);
799 				if (error) {
800 					return (error);
801 				}
802 				bp->b_blkno = fsbtodb(fs, nb);
803 			} else {
804 				UFS_LOCK(ump);
805 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
806 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
807 				    &dp->di_db[0]), osize, nsize, flags,
808 				    cred, &bp);
809 				if (error)
810 					return (error);
811 				if (DOINGSOFTDEP(vp))
812 					softdep_setup_allocdirect(ip, lbn,
813 					    dbtofsb(fs, bp->b_blkno), nb,
814 					    nsize, osize, bp);
815 			}
816 		} else {
817 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
818 				nsize = fragroundup(fs, size);
819 			else
820 				nsize = fs->fs_bsize;
821 			UFS_LOCK(ump);
822 			error = ffs_alloc(ip, lbn,
823 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
824 				&dp->di_db[0]), nsize, flags, cred, &newb);
825 			if (error)
826 				return (error);
827 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
828 			bp->b_blkno = fsbtodb(fs, newb);
829 			if (flags & BA_CLRBUF)
830 				vfs_bio_clrbuf(bp);
831 			if (DOINGSOFTDEP(vp))
832 				softdep_setup_allocdirect(ip, lbn, newb, 0,
833 				    nsize, 0, bp);
834 		}
835 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
836 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
837 		*bpp = bp;
838 		return (0);
839 	}
840 	/*
841 	 * Determine the number of levels of indirection.
842 	 */
843 	pref = 0;
844 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
845 		return(error);
846 #ifdef INVARIANTS
847 	if (num < 1)
848 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
849 #endif
850 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
851 	/*
852 	 * Fetch the first indirect block allocating if necessary.
853 	 */
854 	--num;
855 	nb = dp->di_ib[indirs[0].in_off];
856 	allocib = NULL;
857 	allocblk = allociblk;
858 	lbns_remfree = lbns;
859 	if (nb == 0) {
860 		UFS_LOCK(ump);
861 		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
862 		    (ufs2_daddr_t *)0);
863 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
864 		    flags, cred, &newb)) != 0) {
865 			curthread_pflags_restore(saved_inbdflush);
866 			return (error);
867 		}
868 		pref = newb + fs->fs_frag;
869 		nb = newb;
870 		MPASS(allocblk < allociblk + nitems(allociblk));
871 		MPASS(lbns_remfree < lbns + nitems(lbns));
872 		*allocblk++ = nb;
873 		*lbns_remfree++ = indirs[1].in_lbn;
874 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
875 		    GB_UNMAPPED);
876 		bp->b_blkno = fsbtodb(fs, nb);
877 		vfs_bio_clrbuf(bp);
878 		if (DOINGSOFTDEP(vp)) {
879 			softdep_setup_allocdirect(ip,
880 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
881 			    fs->fs_bsize, 0, bp);
882 			bdwrite(bp);
883 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
884 			if (bp->b_bufsize == fs->fs_bsize)
885 				bp->b_flags |= B_CLUSTEROK;
886 			bdwrite(bp);
887 		} else {
888 			if ((error = bwrite(bp)) != 0)
889 				goto fail;
890 		}
891 		allocib = &dp->di_ib[indirs[0].in_off];
892 		*allocib = nb;
893 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
894 	}
895 	/*
896 	 * Fetch through the indirect blocks, allocating as necessary.
897 	 */
898 retry:
899 	for (i = 1;;) {
900 		error = bread(vp,
901 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
902 		if (error) {
903 			goto fail;
904 		}
905 		bap = (ufs2_daddr_t *)bp->b_data;
906 		nb = bap[indirs[i].in_off];
907 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
908 		    fs->fs_bsize)) != 0) {
909 			brelse(bp);
910 			goto fail;
911 		}
912 		if (i == num)
913 			break;
914 		i += 1;
915 		if (nb != 0) {
916 			bqrelse(bp);
917 			continue;
918 		}
919 		UFS_LOCK(ump);
920 		/*
921 		 * If parent indirect has just been allocated, try to cluster
922 		 * immediately following it.
923 		 */
924 		if (pref == 0)
925 			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
926 			    (ufs2_daddr_t *)0);
927 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
928 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
929 			brelse(bp);
930 			UFS_LOCK(ump);
931 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
932 				softdep_request_cleanup(fs, vp, cred,
933 				    FLUSH_BLOCKS_WAIT);
934 				UFS_UNLOCK(ump);
935 				goto retry;
936 			}
937 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
938 			    ppsratecheck(&ump->um_last_fullmsg,
939 			    &ump->um_secs_fullmsg, 1)) {
940 				UFS_UNLOCK(ump);
941 				ffs_fserr(fs, ip->i_number, "filesystem full");
942 				uprintf("\n%s: write failed, filesystem "
943 				    "is full\n", fs->fs_fsmnt);
944 			} else {
945 				UFS_UNLOCK(ump);
946 			}
947 			goto fail;
948 		}
949 		pref = newb + fs->fs_frag;
950 		nb = newb;
951 		MPASS(allocblk < allociblk + nitems(allociblk));
952 		MPASS(lbns_remfree < lbns + nitems(lbns));
953 		*allocblk++ = nb;
954 		*lbns_remfree++ = indirs[i].in_lbn;
955 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
956 		    GB_UNMAPPED);
957 		nbp->b_blkno = fsbtodb(fs, nb);
958 		vfs_bio_clrbuf(nbp);
959 		if (DOINGSOFTDEP(vp)) {
960 			softdep_setup_allocindir_meta(nbp, ip, bp,
961 			    indirs[i - 1].in_off, nb);
962 			bdwrite(nbp);
963 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
964 			if (nbp->b_bufsize == fs->fs_bsize)
965 				nbp->b_flags |= B_CLUSTEROK;
966 			bdwrite(nbp);
967 		} else {
968 			if ((error = bwrite(nbp)) != 0) {
969 				brelse(bp);
970 				goto fail;
971 			}
972 		}
973 		bap[indirs[i - 1].in_off] = nb;
974 		if (allocib == NULL && unwindidx < 0)
975 			unwindidx = i - 1;
976 		/*
977 		 * If required, write synchronously, otherwise use
978 		 * delayed write.
979 		 */
980 		if (flags & IO_SYNC) {
981 			bwrite(bp);
982 		} else {
983 			if (bp->b_bufsize == fs->fs_bsize)
984 				bp->b_flags |= B_CLUSTEROK;
985 			bdwrite(bp);
986 		}
987 	}
988 	/*
989 	 * If asked only for the indirect block, then return it.
990 	 */
991 	if (flags & BA_METAONLY) {
992 		curthread_pflags_restore(saved_inbdflush);
993 		*bpp = bp;
994 		return (0);
995 	}
996 	/*
997 	 * Get the data block, allocating if necessary.
998 	 */
999 	if (nb == 0) {
1000 		UFS_LOCK(ump);
1001 		/*
1002 		 * If allocating metadata at the front of the cylinder
1003 		 * group and parent indirect block has just been allocated,
1004 		 * then cluster next to it if it is the first indirect in
1005 		 * the file. Otherwise it has been allocated in the metadata
1006 		 * area, so we want to find our own place out in the data area.
1007 		 */
1008 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
1009 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1010 			    &bap[0]);
1011 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1012 		    flags | IO_BUFLOCKED, cred, &newb);
1013 		if (error) {
1014 			brelse(bp);
1015 			UFS_LOCK(ump);
1016 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1017 				softdep_request_cleanup(fs, vp, cred,
1018 				    FLUSH_BLOCKS_WAIT);
1019 				UFS_UNLOCK(ump);
1020 				goto retry;
1021 			}
1022 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
1023 			    ppsratecheck(&ump->um_last_fullmsg,
1024 			    &ump->um_secs_fullmsg, 1)) {
1025 				UFS_UNLOCK(ump);
1026 				ffs_fserr(fs, ip->i_number, "filesystem full");
1027 				uprintf("\n%s: write failed, filesystem "
1028 				    "is full\n", fs->fs_fsmnt);
1029 			} else {
1030 				UFS_UNLOCK(ump);
1031 			}
1032 			goto fail;
1033 		}
1034 		nb = newb;
1035 		MPASS(allocblk < allociblk + nitems(allociblk));
1036 		MPASS(lbns_remfree < lbns + nitems(lbns));
1037 		*allocblk++ = nb;
1038 		*lbns_remfree++ = lbn;
1039 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1040 		nbp->b_blkno = fsbtodb(fs, nb);
1041 		if (flags & BA_CLRBUF)
1042 			vfs_bio_clrbuf(nbp);
1043 		if (DOINGSOFTDEP(vp))
1044 			softdep_setup_allocindir_page(ip, lbn, bp,
1045 			    indirs[i].in_off, nb, 0, nbp);
1046 		bap[indirs[i].in_off] = nb;
1047 		/*
1048 		 * If required, write synchronously, otherwise use
1049 		 * delayed write.
1050 		 */
1051 		if (flags & IO_SYNC) {
1052 			bwrite(bp);
1053 		} else {
1054 			if (bp->b_bufsize == fs->fs_bsize)
1055 				bp->b_flags |= B_CLUSTEROK;
1056 			bdwrite(bp);
1057 		}
1058 		curthread_pflags_restore(saved_inbdflush);
1059 		*bpp = nbp;
1060 		return (0);
1061 	}
1062 	brelse(bp);
1063 	/*
1064 	 * If requested clear invalid portions of the buffer.  If we
1065 	 * have to do a read-before-write (typical if BA_CLRBUF is set),
1066 	 * try to do some read-ahead in the sequential case to reduce
1067 	 * the number of I/O transactions.
1068 	 */
1069 	if (flags & BA_CLRBUF) {
1070 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1071 		if (seqcount != 0 &&
1072 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1073 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
1074 			error = cluster_read(vp, ip->i_size, lbn,
1075 			    (int)fs->fs_bsize, NOCRED,
1076 			    MAXBSIZE, seqcount, gbflags, &nbp);
1077 		} else {
1078 			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1079 			    NOCRED, gbflags, &nbp);
1080 		}
1081 		if (error) {
1082 			brelse(nbp);
1083 			goto fail;
1084 		}
1085 	} else {
1086 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1087 		nbp->b_blkno = fsbtodb(fs, nb);
1088 	}
1089 	curthread_pflags_restore(saved_inbdflush);
1090 	*bpp = nbp;
1091 	return (0);
1092 fail:
1093 	curthread_pflags_restore(saved_inbdflush);
1094 	/*
1095 	 * If we have failed to allocate any blocks, simply return the error.
1096 	 * This is the usual case and avoids the need to fsync the file.
1097 	 */
1098 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1099 		return (error);
1100 	/*
1101 	 * If we have failed part way through block allocation, we
1102 	 * have to deallocate any indirect blocks that we have allocated.
1103 	 * We have to fsync the file before we start to get rid of all
1104 	 * of its dependencies so that we do not leave them dangling.
1105 	 * We have to sync it at the end so that the soft updates code
1106 	 * does not find any untracked changes. Although this is really
1107 	 * slow, running out of disk space is not expected to be a common
1108 	 * occurrence. The error return from fsync is ignored as we already
1109 	 * have an error to return to the user.
1110 	 *
1111 	 * XXX Still have to journal the free below
1112 	 */
1113 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1114 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1115 	     blkp < allocblk; blkp++, lbns_remfree++) {
1116 		/*
1117 		 * We shall not leave the freed blocks on the vnode
1118 		 * buffer object lists.
1119 		 */
1120 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1121 		    GB_NOCREAT | GB_UNMAPPED);
1122 		if (bp != NULL) {
1123 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1124 			    ("mismatch2 l %jd %jd b %ju %ju",
1125 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1126 			    (uintmax_t)bp->b_blkno,
1127 			    (uintmax_t)fsbtodb(fs, *blkp)));
1128 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1129 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
1130 			brelse(bp);
1131 		}
1132 		deallocated += fs->fs_bsize;
1133 	}
1134 	if (allocib != NULL) {
1135 		*allocib = 0;
1136 	} else if (unwindidx >= 0) {
1137 		int r;
1138 
1139 		r = bread(vp, indirs[unwindidx].in_lbn,
1140 		    (int)fs->fs_bsize, NOCRED, &bp);
1141 		if (r) {
1142 			panic("Could not unwind indirect block, error %d", r);
1143 			brelse(bp);
1144 		} else {
1145 			bap = (ufs2_daddr_t *)bp->b_data;
1146 			bap[indirs[unwindidx].in_off] = 0;
1147 			if (flags & IO_SYNC) {
1148 				bwrite(bp);
1149 			} else {
1150 				if (bp->b_bufsize == fs->fs_bsize)
1151 					bp->b_flags |= B_CLUSTEROK;
1152 				bdwrite(bp);
1153 			}
1154 		}
1155 	}
1156 	if (deallocated) {
1157 #ifdef QUOTA
1158 		/*
1159 		 * Restore user's disk quota because allocation failed.
1160 		 */
1161 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1162 #endif
1163 		dp->di_blocks -= btodb(deallocated);
1164 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1165 	}
1166 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1167 	/*
1168 	 * After the buffers are invalidated and on-disk pointers are
1169 	 * cleared, free the blocks.
1170 	 */
1171 	for (blkp = allociblk; blkp < allocblk; blkp++) {
1172 #ifdef INVARIANTS
1173 		if (blkp == allociblk)
1174 			lbns_remfree = lbns;
1175 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1176 		    GB_NOCREAT | GB_UNMAPPED);
1177 		if (bp != NULL) {
1178 			panic("zombie2 %jd %ju %ju",
1179 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1180 			    (uintmax_t)fsbtodb(fs, *blkp));
1181 		}
1182 		lbns_remfree++;
1183 #endif
1184 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1185 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
1186 	}
1187 	return (error);
1188 }
1189