xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision 8483b844e7e48dbfc61c6e38908921c4fa691d4c)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
62  */
63 
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/bio.h>
70 #include <sys/buf.h>
71 #include <sys/lock.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
75 
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
81 
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
84 
85 /*
86  * Balloc defines the structure of filesystem storage
87  * by allocating the physical blocks on a device given
88  * the inode and the logical block number in a file.
89  * This is the allocation strategy for UFS1. Below is
90  * the allocation strategy for UFS2.
91  */
92 int
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94     struct ucred *cred, int flags, struct buf **bpp)
95 {
96 	struct inode *ip;
97 	struct ufs1_dinode *dp;
98 	ufs_lbn_t lbn, lastlbn;
99 	struct fs *fs;
100 	ufs1_daddr_t nb;
101 	struct buf *bp, *nbp;
102 	struct mount *mp;
103 	struct ufsmount *ump;
104 	struct indir indirs[UFS_NIADDR + 2];
105 	int deallocated, osize, nsize, num, i, error;
106 	ufs2_daddr_t newb;
107 	ufs1_daddr_t *bap, pref;
108 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
109 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
110 	int unwindidx = -1;
111 	int saved_inbdflush;
112 	int gbflags, reclaimed;
113 
114 	ip = VTOI(vp);
115 	dp = ip->i_din1;
116 	fs = ITOFS(ip);
117 	mp = ITOVFS(ip);
118 	ump = ITOUMP(ip);
119 	lbn = lblkno(fs, startoffset);
120 	size = blkoff(fs, startoffset) + size;
121 	reclaimed = 0;
122 	if (size > fs->fs_bsize)
123 		panic("ffs_balloc_ufs1: blk too big");
124 	*bpp = NULL;
125 	if (flags & IO_EXT)
126 		return (EOPNOTSUPP);
127 	if (lbn < 0)
128 		return (EFBIG);
129 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
130 
131 	if (DOINGSOFTDEP(vp))
132 		softdep_prealloc(vp, MNT_WAIT);
133 	/*
134 	 * If the next write will extend the file into a new block,
135 	 * and the file is currently composed of a fragment
136 	 * this fragment has to be extended to be a full block.
137 	 */
138 	lastlbn = lblkno(fs, ip->i_size);
139 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
140 		nb = lastlbn;
141 		osize = blksize(fs, ip, nb);
142 		if (osize < fs->fs_bsize && osize > 0) {
143 			UFS_LOCK(ump);
144 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
145 			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
146 			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
147 			   cred, &bp);
148 			if (error)
149 				return (error);
150 			if (DOINGSOFTDEP(vp))
151 				softdep_setup_allocdirect(ip, nb,
152 				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
153 				    fs->fs_bsize, osize, bp);
154 			ip->i_size = smalllblktosize(fs, nb + 1);
155 			dp->di_size = ip->i_size;
156 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
157 			UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
158 			if (flags & IO_SYNC)
159 				bwrite(bp);
160 			else if (DOINGASYNC(vp))
161 				bdwrite(bp);
162 			else
163 				bawrite(bp);
164 		}
165 	}
166 	/*
167 	 * The first UFS_NDADDR blocks are direct blocks
168 	 */
169 	if (lbn < UFS_NDADDR) {
170 		if (flags & BA_METAONLY)
171 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
172 		nb = dp->di_db[lbn];
173 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
174 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
175 			if (error) {
176 				return (error);
177 			}
178 			bp->b_blkno = fsbtodb(fs, nb);
179 			*bpp = bp;
180 			return (0);
181 		}
182 		if (nb != 0) {
183 			/*
184 			 * Consider need to reallocate a fragment.
185 			 */
186 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
187 			nsize = fragroundup(fs, size);
188 			if (nsize <= osize) {
189 				error = bread(vp, lbn, osize, NOCRED, &bp);
190 				if (error) {
191 					return (error);
192 				}
193 				bp->b_blkno = fsbtodb(fs, nb);
194 			} else {
195 				UFS_LOCK(ump);
196 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
197 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
198 				    &dp->di_db[0]), osize, nsize, flags,
199 				    cred, &bp);
200 				if (error)
201 					return (error);
202 				if (DOINGSOFTDEP(vp))
203 					softdep_setup_allocdirect(ip, lbn,
204 					    dbtofsb(fs, bp->b_blkno), nb,
205 					    nsize, osize, bp);
206 			}
207 		} else {
208 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
209 				nsize = fragroundup(fs, size);
210 			else
211 				nsize = fs->fs_bsize;
212 			UFS_LOCK(ump);
213 			error = ffs_alloc(ip, lbn,
214 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
215 			    nsize, flags, cred, &newb);
216 			if (error)
217 				return (error);
218 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
219 			bp->b_blkno = fsbtodb(fs, newb);
220 			if (flags & BA_CLRBUF)
221 				vfs_bio_clrbuf(bp);
222 			if (DOINGSOFTDEP(vp))
223 				softdep_setup_allocdirect(ip, lbn, newb, 0,
224 				    nsize, 0, bp);
225 		}
226 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
227 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
228 		*bpp = bp;
229 		return (0);
230 	}
231 	/*
232 	 * Determine the number of levels of indirection.
233 	 */
234 	pref = 0;
235 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
236 		return(error);
237 #ifdef INVARIANTS
238 	if (num < 1)
239 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
240 #endif
241 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
242 	/*
243 	 * Fetch the first indirect block allocating if necessary.
244 	 */
245 	--num;
246 	nb = dp->di_ib[indirs[0].in_off];
247 	allocib = NULL;
248 	allocblk = allociblk;
249 	lbns_remfree = lbns;
250 	if (nb == 0) {
251 		UFS_LOCK(ump);
252 		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
253 		    (ufs1_daddr_t *)0);
254 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
255 		    flags, cred, &newb)) != 0) {
256 			curthread_pflags_restore(saved_inbdflush);
257 			return (error);
258 		}
259 		pref = newb + fs->fs_frag;
260 		nb = newb;
261 		MPASS(allocblk < allociblk + nitems(allociblk));
262 		MPASS(lbns_remfree < lbns + nitems(lbns));
263 		*allocblk++ = nb;
264 		*lbns_remfree++ = indirs[1].in_lbn;
265 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
266 		bp->b_blkno = fsbtodb(fs, nb);
267 		vfs_bio_clrbuf(bp);
268 		if (DOINGSOFTDEP(vp)) {
269 			softdep_setup_allocdirect(ip,
270 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
271 			    fs->fs_bsize, 0, bp);
272 			bdwrite(bp);
273 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
274 			if (bp->b_bufsize == fs->fs_bsize)
275 				bp->b_flags |= B_CLUSTEROK;
276 			bdwrite(bp);
277 		} else {
278 			if ((error = bwrite(bp)) != 0)
279 				goto fail;
280 		}
281 		allocib = &dp->di_ib[indirs[0].in_off];
282 		*allocib = nb;
283 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
284 	}
285 	/*
286 	 * Fetch through the indirect blocks, allocating as necessary.
287 	 */
288 retry:
289 	for (i = 1;;) {
290 		error = bread(vp,
291 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
292 		if (error) {
293 			goto fail;
294 		}
295 		bap = (ufs1_daddr_t *)bp->b_data;
296 		nb = bap[indirs[i].in_off];
297 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
298 		    fs->fs_bsize)) != 0) {
299 			brelse(bp);
300 			goto fail;
301 		}
302 		if (i == num)
303 			break;
304 		i += 1;
305 		if (nb != 0) {
306 			bqrelse(bp);
307 			continue;
308 		}
309 		UFS_LOCK(ump);
310 		/*
311 		 * If parent indirect has just been allocated, try to cluster
312 		 * immediately following it.
313 		 */
314 		if (pref == 0)
315 			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
316 			    (ufs1_daddr_t *)0);
317 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
318 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
319 			brelse(bp);
320 			UFS_LOCK(ump);
321 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
322 				softdep_request_cleanup(fs, vp, cred,
323 				    FLUSH_BLOCKS_WAIT);
324 				UFS_UNLOCK(ump);
325 				goto retry;
326 			}
327 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
328 			    ppsratecheck(&ump->um_last_fullmsg,
329 			    &ump->um_secs_fullmsg, 1)) {
330 				UFS_UNLOCK(ump);
331 				ffs_fserr(fs, ip->i_number, "filesystem full");
332 				uprintf("\n%s: write failed, filesystem "
333 				    "is full\n", fs->fs_fsmnt);
334 			} else {
335 				UFS_UNLOCK(ump);
336 			}
337 			goto fail;
338 		}
339 		pref = newb + fs->fs_frag;
340 		nb = newb;
341 		MPASS(allocblk < allociblk + nitems(allociblk));
342 		MPASS(lbns_remfree < lbns + nitems(lbns));
343 		*allocblk++ = nb;
344 		*lbns_remfree++ = indirs[i].in_lbn;
345 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
346 		nbp->b_blkno = fsbtodb(fs, nb);
347 		vfs_bio_clrbuf(nbp);
348 		if (DOINGSOFTDEP(vp)) {
349 			softdep_setup_allocindir_meta(nbp, ip, bp,
350 			    indirs[i - 1].in_off, nb);
351 			bdwrite(nbp);
352 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
353 			if (nbp->b_bufsize == fs->fs_bsize)
354 				nbp->b_flags |= B_CLUSTEROK;
355 			bdwrite(nbp);
356 		} else {
357 			if ((error = bwrite(nbp)) != 0) {
358 				brelse(bp);
359 				goto fail;
360 			}
361 		}
362 		bap[indirs[i - 1].in_off] = nb;
363 		if (allocib == NULL && unwindidx < 0)
364 			unwindidx = i - 1;
365 		/*
366 		 * If required, write synchronously, otherwise use
367 		 * delayed write.
368 		 */
369 		if (flags & IO_SYNC) {
370 			bwrite(bp);
371 		} else {
372 			if (bp->b_bufsize == fs->fs_bsize)
373 				bp->b_flags |= B_CLUSTEROK;
374 			bdwrite(bp);
375 		}
376 	}
377 	/*
378 	 * If asked only for the indirect block, then return it.
379 	 */
380 	if (flags & BA_METAONLY) {
381 		curthread_pflags_restore(saved_inbdflush);
382 		*bpp = bp;
383 		return (0);
384 	}
385 	/*
386 	 * Get the data block, allocating if necessary.
387 	 */
388 	if (nb == 0) {
389 		UFS_LOCK(ump);
390 		/*
391 		 * If allocating metadata at the front of the cylinder
392 		 * group and parent indirect block has just been allocated,
393 		 * then cluster next to it if it is the first indirect in
394 		 * the file. Otherwise it has been allocated in the metadata
395 		 * area, so we want to find our own place out in the data area.
396 		 */
397 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
398 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
399 			    &bap[0]);
400 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
401 		    flags | IO_BUFLOCKED, cred, &newb);
402 		if (error) {
403 			brelse(bp);
404 			UFS_LOCK(ump);
405 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
406 				softdep_request_cleanup(fs, vp, cred,
407 				    FLUSH_BLOCKS_WAIT);
408 				UFS_UNLOCK(ump);
409 				goto retry;
410 			}
411 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
412 			    ppsratecheck(&ump->um_last_fullmsg,
413 			    &ump->um_secs_fullmsg, 1)) {
414 				UFS_UNLOCK(ump);
415 				ffs_fserr(fs, ip->i_number, "filesystem full");
416 				uprintf("\n%s: write failed, filesystem "
417 				    "is full\n", fs->fs_fsmnt);
418 			} else {
419 				UFS_UNLOCK(ump);
420 			}
421 			goto fail;
422 		}
423 		nb = newb;
424 		MPASS(allocblk < allociblk + nitems(allociblk));
425 		MPASS(lbns_remfree < lbns + nitems(lbns));
426 		*allocblk++ = nb;
427 		*lbns_remfree++ = lbn;
428 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
429 		nbp->b_blkno = fsbtodb(fs, nb);
430 		if (flags & BA_CLRBUF)
431 			vfs_bio_clrbuf(nbp);
432 		if (DOINGSOFTDEP(vp))
433 			softdep_setup_allocindir_page(ip, lbn, bp,
434 			    indirs[i].in_off, nb, 0, nbp);
435 		bap[indirs[i].in_off] = nb;
436 		/*
437 		 * If required, write synchronously, otherwise use
438 		 * delayed write.
439 		 */
440 		if (flags & IO_SYNC) {
441 			bwrite(bp);
442 		} else {
443 			if (bp->b_bufsize == fs->fs_bsize)
444 				bp->b_flags |= B_CLUSTEROK;
445 			bdwrite(bp);
446 		}
447 		curthread_pflags_restore(saved_inbdflush);
448 		*bpp = nbp;
449 		return (0);
450 	}
451 	brelse(bp);
452 	if (flags & BA_CLRBUF) {
453 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
454 		if (seqcount != 0 &&
455 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
456 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
457 			error = cluster_read(vp, ip->i_size, lbn,
458 			    (int)fs->fs_bsize, NOCRED,
459 			    MAXBSIZE, seqcount, gbflags, &nbp);
460 		} else {
461 			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
462 			    gbflags, &nbp);
463 		}
464 		if (error) {
465 			brelse(nbp);
466 			goto fail;
467 		}
468 	} else {
469 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
470 		nbp->b_blkno = fsbtodb(fs, nb);
471 	}
472 	curthread_pflags_restore(saved_inbdflush);
473 	*bpp = nbp;
474 	return (0);
475 fail:
476 	curthread_pflags_restore(saved_inbdflush);
477 	/*
478 	 * If we have failed to allocate any blocks, simply return the error.
479 	 * This is the usual case and avoids the need to fsync the file.
480 	 */
481 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
482 		return (error);
483 	/*
484 	 * If we have failed part way through block allocation, we
485 	 * have to deallocate any indirect blocks that we have allocated.
486 	 * We have to fsync the file before we start to get rid of all
487 	 * of its dependencies so that we do not leave them dangling.
488 	 * We have to sync it at the end so that the soft updates code
489 	 * does not find any untracked changes. Although this is really
490 	 * slow, running out of disk space is not expected to be a common
491 	 * occurrence. The error return from fsync is ignored as we already
492 	 * have an error to return to the user.
493 	 *
494 	 * XXX Still have to journal the free below
495 	 */
496 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
497 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
498 	     blkp < allocblk; blkp++, lbns_remfree++) {
499 		/*
500 		 * We shall not leave the freed blocks on the vnode
501 		 * buffer object lists.
502 		 */
503 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
504 		    GB_NOCREAT | GB_UNMAPPED);
505 		if (bp != NULL) {
506 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
507 			    ("mismatch1 l %jd %jd b %ju %ju",
508 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
509 			    (uintmax_t)bp->b_blkno,
510 			    (uintmax_t)fsbtodb(fs, *blkp)));
511 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
512 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
513 			brelse(bp);
514 		}
515 		deallocated += fs->fs_bsize;
516 	}
517 	if (allocib != NULL) {
518 		*allocib = 0;
519 	} else if (unwindidx >= 0) {
520 		int r;
521 
522 		r = bread(vp, indirs[unwindidx].in_lbn,
523 		    (int)fs->fs_bsize, NOCRED, &bp);
524 		if (r) {
525 			panic("Could not unwind indirect block, error %d", r);
526 			brelse(bp);
527 		} else {
528 			bap = (ufs1_daddr_t *)bp->b_data;
529 			bap[indirs[unwindidx].in_off] = 0;
530 			if (flags & IO_SYNC) {
531 				bwrite(bp);
532 			} else {
533 				if (bp->b_bufsize == fs->fs_bsize)
534 					bp->b_flags |= B_CLUSTEROK;
535 				bdwrite(bp);
536 			}
537 		}
538 	}
539 	if (deallocated) {
540 #ifdef QUOTA
541 		/*
542 		 * Restore user's disk quota because allocation failed.
543 		 */
544 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
545 #endif
546 		dp->di_blocks -= btodb(deallocated);
547 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
548 	}
549 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
550 	/*
551 	 * After the buffers are invalidated and on-disk pointers are
552 	 * cleared, free the blocks.
553 	 */
554 	for (blkp = allociblk; blkp < allocblk; blkp++) {
555 #ifdef INVARIANTS
556 		if (blkp == allociblk)
557 			lbns_remfree = lbns;
558 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
559 		    GB_NOCREAT | GB_UNMAPPED);
560 		if (bp != NULL) {
561 			panic("zombie1 %jd %ju %ju",
562 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
563 			    (uintmax_t)fsbtodb(fs, *blkp));
564 		}
565 		lbns_remfree++;
566 #endif
567 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
568 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
569 	}
570 	return (error);
571 }
572 
573 /*
574  * Balloc defines the structure of file system storage
575  * by allocating the physical blocks on a device given
576  * the inode and the logical block number in a file.
577  * This is the allocation strategy for UFS2. Above is
578  * the allocation strategy for UFS1.
579  */
580 int
581 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
582     struct ucred *cred, int flags, struct buf **bpp)
583 {
584 	struct inode *ip;
585 	struct ufs2_dinode *dp;
586 	ufs_lbn_t lbn, lastlbn;
587 	struct fs *fs;
588 	struct buf *bp, *nbp;
589 	struct mount *mp;
590 	struct ufsmount *ump;
591 	struct indir indirs[UFS_NIADDR + 2];
592 	ufs2_daddr_t nb, newb, *bap, pref;
593 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
594 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
595 	int deallocated, osize, nsize, num, i, error;
596 	int unwindidx = -1;
597 	int saved_inbdflush;
598 	int gbflags, reclaimed;
599 
600 	ip = VTOI(vp);
601 	dp = ip->i_din2;
602 	fs = ITOFS(ip);
603 	mp = ITOVFS(ip);
604 	ump = ITOUMP(ip);
605 	lbn = lblkno(fs, startoffset);
606 	size = blkoff(fs, startoffset) + size;
607 	reclaimed = 0;
608 	if (size > fs->fs_bsize)
609 		panic("ffs_balloc_ufs2: blk too big");
610 	*bpp = NULL;
611 	if (lbn < 0)
612 		return (EFBIG);
613 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
614 
615 	if (DOINGSOFTDEP(vp))
616 		softdep_prealloc(vp, MNT_WAIT);
617 
618 	/*
619 	 * Check for allocating external data.
620 	 */
621 	if (flags & IO_EXT) {
622 		if (lbn >= UFS_NXADDR)
623 			return (EFBIG);
624 		/*
625 		 * If the next write will extend the data into a new block,
626 		 * and the data is currently composed of a fragment
627 		 * this fragment has to be extended to be a full block.
628 		 */
629 		lastlbn = lblkno(fs, dp->di_extsize);
630 		if (lastlbn < lbn) {
631 			nb = lastlbn;
632 			osize = sblksize(fs, dp->di_extsize, nb);
633 			if (osize < fs->fs_bsize && osize > 0) {
634 				UFS_LOCK(ump);
635 				error = ffs_realloccg(ip, -1 - nb,
636 				    dp->di_extb[nb],
637 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
638 				    &dp->di_extb[0]), osize,
639 				    (int)fs->fs_bsize, flags, cred, &bp);
640 				if (error)
641 					return (error);
642 				if (DOINGSOFTDEP(vp))
643 					softdep_setup_allocext(ip, nb,
644 					    dbtofsb(fs, bp->b_blkno),
645 					    dp->di_extb[nb],
646 					    fs->fs_bsize, osize, bp);
647 				dp->di_extsize = smalllblktosize(fs, nb + 1);
648 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
649 				bp->b_xflags |= BX_ALTDATA;
650 				UFS_INODE_SET_FLAG(ip, IN_CHANGE);
651 				if (flags & IO_SYNC)
652 					bwrite(bp);
653 				else
654 					bawrite(bp);
655 			}
656 		}
657 		/*
658 		 * All blocks are direct blocks
659 		 */
660 		if (flags & BA_METAONLY)
661 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
662 		nb = dp->di_extb[lbn];
663 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
664 			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
665 			    gbflags, &bp);
666 			if (error) {
667 				return (error);
668 			}
669 			bp->b_blkno = fsbtodb(fs, nb);
670 			bp->b_xflags |= BX_ALTDATA;
671 			*bpp = bp;
672 			return (0);
673 		}
674 		if (nb != 0) {
675 			/*
676 			 * Consider need to reallocate a fragment.
677 			 */
678 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
679 			nsize = fragroundup(fs, size);
680 			if (nsize <= osize) {
681 				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
682 				    gbflags, &bp);
683 				if (error) {
684 					return (error);
685 				}
686 				bp->b_blkno = fsbtodb(fs, nb);
687 				bp->b_xflags |= BX_ALTDATA;
688 			} else {
689 				UFS_LOCK(ump);
690 				error = ffs_realloccg(ip, -1 - lbn,
691 				    dp->di_extb[lbn],
692 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
693 				    &dp->di_extb[0]), osize, nsize, flags,
694 				    cred, &bp);
695 				if (error)
696 					return (error);
697 				bp->b_xflags |= BX_ALTDATA;
698 				if (DOINGSOFTDEP(vp))
699 					softdep_setup_allocext(ip, lbn,
700 					    dbtofsb(fs, bp->b_blkno), nb,
701 					    nsize, osize, bp);
702 			}
703 		} else {
704 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
705 				nsize = fragroundup(fs, size);
706 			else
707 				nsize = fs->fs_bsize;
708 			UFS_LOCK(ump);
709 			error = ffs_alloc(ip, lbn,
710 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
711 			   nsize, flags, cred, &newb);
712 			if (error)
713 				return (error);
714 			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
715 			bp->b_blkno = fsbtodb(fs, newb);
716 			bp->b_xflags |= BX_ALTDATA;
717 			if (flags & BA_CLRBUF)
718 				vfs_bio_clrbuf(bp);
719 			if (DOINGSOFTDEP(vp))
720 				softdep_setup_allocext(ip, lbn, newb, 0,
721 				    nsize, 0, bp);
722 		}
723 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
724 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
725 		*bpp = bp;
726 		return (0);
727 	}
728 	/*
729 	 * If the next write will extend the file into a new block,
730 	 * and the file is currently composed of a fragment
731 	 * this fragment has to be extended to be a full block.
732 	 */
733 	lastlbn = lblkno(fs, ip->i_size);
734 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
735 		nb = lastlbn;
736 		osize = blksize(fs, ip, nb);
737 		if (osize < fs->fs_bsize && osize > 0) {
738 			UFS_LOCK(ump);
739 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
740 			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
741 			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
742 			    flags, cred, &bp);
743 			if (error)
744 				return (error);
745 			if (DOINGSOFTDEP(vp))
746 				softdep_setup_allocdirect(ip, nb,
747 				    dbtofsb(fs, bp->b_blkno),
748 				    dp->di_db[nb],
749 				    fs->fs_bsize, osize, bp);
750 			ip->i_size = smalllblktosize(fs, nb + 1);
751 			dp->di_size = ip->i_size;
752 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
753 			UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
754 			if (flags & IO_SYNC)
755 				bwrite(bp);
756 			else
757 				bawrite(bp);
758 		}
759 	}
760 	/*
761 	 * The first UFS_NDADDR blocks are direct blocks
762 	 */
763 	if (lbn < UFS_NDADDR) {
764 		if (flags & BA_METAONLY)
765 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
766 		nb = dp->di_db[lbn];
767 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
768 			error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
769 			    gbflags, &bp);
770 			if (error) {
771 				return (error);
772 			}
773 			bp->b_blkno = fsbtodb(fs, nb);
774 			*bpp = bp;
775 			return (0);
776 		}
777 		if (nb != 0) {
778 			/*
779 			 * Consider need to reallocate a fragment.
780 			 */
781 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
782 			nsize = fragroundup(fs, size);
783 			if (nsize <= osize) {
784 				error = bread_gb(vp, lbn, osize, NOCRED,
785 				    gbflags, &bp);
786 				if (error) {
787 					return (error);
788 				}
789 				bp->b_blkno = fsbtodb(fs, nb);
790 			} else {
791 				UFS_LOCK(ump);
792 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
793 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
794 				    &dp->di_db[0]), osize, nsize, flags,
795 				    cred, &bp);
796 				if (error)
797 					return (error);
798 				if (DOINGSOFTDEP(vp))
799 					softdep_setup_allocdirect(ip, lbn,
800 					    dbtofsb(fs, bp->b_blkno), nb,
801 					    nsize, osize, bp);
802 			}
803 		} else {
804 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
805 				nsize = fragroundup(fs, size);
806 			else
807 				nsize = fs->fs_bsize;
808 			UFS_LOCK(ump);
809 			error = ffs_alloc(ip, lbn,
810 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
811 				&dp->di_db[0]), nsize, flags, cred, &newb);
812 			if (error)
813 				return (error);
814 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
815 			bp->b_blkno = fsbtodb(fs, newb);
816 			if (flags & BA_CLRBUF)
817 				vfs_bio_clrbuf(bp);
818 			if (DOINGSOFTDEP(vp))
819 				softdep_setup_allocdirect(ip, lbn, newb, 0,
820 				    nsize, 0, bp);
821 		}
822 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
823 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
824 		*bpp = bp;
825 		return (0);
826 	}
827 	/*
828 	 * Determine the number of levels of indirection.
829 	 */
830 	pref = 0;
831 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
832 		return(error);
833 #ifdef INVARIANTS
834 	if (num < 1)
835 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
836 #endif
837 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
838 	/*
839 	 * Fetch the first indirect block allocating if necessary.
840 	 */
841 	--num;
842 	nb = dp->di_ib[indirs[0].in_off];
843 	allocib = NULL;
844 	allocblk = allociblk;
845 	lbns_remfree = lbns;
846 	if (nb == 0) {
847 		UFS_LOCK(ump);
848 		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
849 		    (ufs2_daddr_t *)0);
850 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
851 		    flags, cred, &newb)) != 0) {
852 			curthread_pflags_restore(saved_inbdflush);
853 			return (error);
854 		}
855 		pref = newb + fs->fs_frag;
856 		nb = newb;
857 		MPASS(allocblk < allociblk + nitems(allociblk));
858 		MPASS(lbns_remfree < lbns + nitems(lbns));
859 		*allocblk++ = nb;
860 		*lbns_remfree++ = indirs[1].in_lbn;
861 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
862 		    GB_UNMAPPED);
863 		bp->b_blkno = fsbtodb(fs, nb);
864 		vfs_bio_clrbuf(bp);
865 		if (DOINGSOFTDEP(vp)) {
866 			softdep_setup_allocdirect(ip,
867 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
868 			    fs->fs_bsize, 0, bp);
869 			bdwrite(bp);
870 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
871 			if (bp->b_bufsize == fs->fs_bsize)
872 				bp->b_flags |= B_CLUSTEROK;
873 			bdwrite(bp);
874 		} else {
875 			if ((error = bwrite(bp)) != 0)
876 				goto fail;
877 		}
878 		allocib = &dp->di_ib[indirs[0].in_off];
879 		*allocib = nb;
880 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
881 	}
882 	/*
883 	 * Fetch through the indirect blocks, allocating as necessary.
884 	 */
885 retry:
886 	for (i = 1;;) {
887 		error = bread(vp,
888 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
889 		if (error) {
890 			goto fail;
891 		}
892 		bap = (ufs2_daddr_t *)bp->b_data;
893 		nb = bap[indirs[i].in_off];
894 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
895 		    fs->fs_bsize)) != 0) {
896 			brelse(bp);
897 			goto fail;
898 		}
899 		if (i == num)
900 			break;
901 		i += 1;
902 		if (nb != 0) {
903 			bqrelse(bp);
904 			continue;
905 		}
906 		UFS_LOCK(ump);
907 		/*
908 		 * If parent indirect has just been allocated, try to cluster
909 		 * immediately following it.
910 		 */
911 		if (pref == 0)
912 			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
913 			    (ufs2_daddr_t *)0);
914 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
915 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
916 			brelse(bp);
917 			UFS_LOCK(ump);
918 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
919 				softdep_request_cleanup(fs, vp, cred,
920 				    FLUSH_BLOCKS_WAIT);
921 				UFS_UNLOCK(ump);
922 				goto retry;
923 			}
924 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
925 			    ppsratecheck(&ump->um_last_fullmsg,
926 			    &ump->um_secs_fullmsg, 1)) {
927 				UFS_UNLOCK(ump);
928 				ffs_fserr(fs, ip->i_number, "filesystem full");
929 				uprintf("\n%s: write failed, filesystem "
930 				    "is full\n", fs->fs_fsmnt);
931 			} else {
932 				UFS_UNLOCK(ump);
933 			}
934 			goto fail;
935 		}
936 		pref = newb + fs->fs_frag;
937 		nb = newb;
938 		MPASS(allocblk < allociblk + nitems(allociblk));
939 		MPASS(lbns_remfree < lbns + nitems(lbns));
940 		*allocblk++ = nb;
941 		*lbns_remfree++ = indirs[i].in_lbn;
942 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
943 		    GB_UNMAPPED);
944 		nbp->b_blkno = fsbtodb(fs, nb);
945 		vfs_bio_clrbuf(nbp);
946 		if (DOINGSOFTDEP(vp)) {
947 			softdep_setup_allocindir_meta(nbp, ip, bp,
948 			    indirs[i - 1].in_off, nb);
949 			bdwrite(nbp);
950 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
951 			if (nbp->b_bufsize == fs->fs_bsize)
952 				nbp->b_flags |= B_CLUSTEROK;
953 			bdwrite(nbp);
954 		} else {
955 			if ((error = bwrite(nbp)) != 0) {
956 				brelse(bp);
957 				goto fail;
958 			}
959 		}
960 		bap[indirs[i - 1].in_off] = nb;
961 		if (allocib == NULL && unwindidx < 0)
962 			unwindidx = i - 1;
963 		/*
964 		 * If required, write synchronously, otherwise use
965 		 * delayed write.
966 		 */
967 		if (flags & IO_SYNC) {
968 			bwrite(bp);
969 		} else {
970 			if (bp->b_bufsize == fs->fs_bsize)
971 				bp->b_flags |= B_CLUSTEROK;
972 			bdwrite(bp);
973 		}
974 	}
975 	/*
976 	 * If asked only for the indirect block, then return it.
977 	 */
978 	if (flags & BA_METAONLY) {
979 		curthread_pflags_restore(saved_inbdflush);
980 		*bpp = bp;
981 		return (0);
982 	}
983 	/*
984 	 * Get the data block, allocating if necessary.
985 	 */
986 	if (nb == 0) {
987 		UFS_LOCK(ump);
988 		/*
989 		 * If allocating metadata at the front of the cylinder
990 		 * group and parent indirect block has just been allocated,
991 		 * then cluster next to it if it is the first indirect in
992 		 * the file. Otherwise it has been allocated in the metadata
993 		 * area, so we want to find our own place out in the data area.
994 		 */
995 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
996 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
997 			    &bap[0]);
998 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
999 		    flags | IO_BUFLOCKED, cred, &newb);
1000 		if (error) {
1001 			brelse(bp);
1002 			UFS_LOCK(ump);
1003 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1004 				softdep_request_cleanup(fs, vp, cred,
1005 				    FLUSH_BLOCKS_WAIT);
1006 				UFS_UNLOCK(ump);
1007 				goto retry;
1008 			}
1009 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
1010 			    ppsratecheck(&ump->um_last_fullmsg,
1011 			    &ump->um_secs_fullmsg, 1)) {
1012 				UFS_UNLOCK(ump);
1013 				ffs_fserr(fs, ip->i_number, "filesystem full");
1014 				uprintf("\n%s: write failed, filesystem "
1015 				    "is full\n", fs->fs_fsmnt);
1016 			} else {
1017 				UFS_UNLOCK(ump);
1018 			}
1019 			goto fail;
1020 		}
1021 		nb = newb;
1022 		MPASS(allocblk < allociblk + nitems(allociblk));
1023 		MPASS(lbns_remfree < lbns + nitems(lbns));
1024 		*allocblk++ = nb;
1025 		*lbns_remfree++ = lbn;
1026 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1027 		nbp->b_blkno = fsbtodb(fs, nb);
1028 		if (flags & BA_CLRBUF)
1029 			vfs_bio_clrbuf(nbp);
1030 		if (DOINGSOFTDEP(vp))
1031 			softdep_setup_allocindir_page(ip, lbn, bp,
1032 			    indirs[i].in_off, nb, 0, nbp);
1033 		bap[indirs[i].in_off] = nb;
1034 		/*
1035 		 * If required, write synchronously, otherwise use
1036 		 * delayed write.
1037 		 */
1038 		if (flags & IO_SYNC) {
1039 			bwrite(bp);
1040 		} else {
1041 			if (bp->b_bufsize == fs->fs_bsize)
1042 				bp->b_flags |= B_CLUSTEROK;
1043 			bdwrite(bp);
1044 		}
1045 		curthread_pflags_restore(saved_inbdflush);
1046 		*bpp = nbp;
1047 		return (0);
1048 	}
1049 	brelse(bp);
1050 	/*
1051 	 * If requested clear invalid portions of the buffer.  If we
1052 	 * have to do a read-before-write (typical if BA_CLRBUF is set),
1053 	 * try to do some read-ahead in the sequential case to reduce
1054 	 * the number of I/O transactions.
1055 	 */
1056 	if (flags & BA_CLRBUF) {
1057 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1058 		if (seqcount != 0 &&
1059 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1060 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
1061 			error = cluster_read(vp, ip->i_size, lbn,
1062 			    (int)fs->fs_bsize, NOCRED,
1063 			    MAXBSIZE, seqcount, gbflags, &nbp);
1064 		} else {
1065 			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1066 			    NOCRED, gbflags, &nbp);
1067 		}
1068 		if (error) {
1069 			brelse(nbp);
1070 			goto fail;
1071 		}
1072 	} else {
1073 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1074 		nbp->b_blkno = fsbtodb(fs, nb);
1075 	}
1076 	curthread_pflags_restore(saved_inbdflush);
1077 	*bpp = nbp;
1078 	return (0);
1079 fail:
1080 	curthread_pflags_restore(saved_inbdflush);
1081 	/*
1082 	 * If we have failed to allocate any blocks, simply return the error.
1083 	 * This is the usual case and avoids the need to fsync the file.
1084 	 */
1085 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1086 		return (error);
1087 	/*
1088 	 * If we have failed part way through block allocation, we
1089 	 * have to deallocate any indirect blocks that we have allocated.
1090 	 * We have to fsync the file before we start to get rid of all
1091 	 * of its dependencies so that we do not leave them dangling.
1092 	 * We have to sync it at the end so that the soft updates code
1093 	 * does not find any untracked changes. Although this is really
1094 	 * slow, running out of disk space is not expected to be a common
1095 	 * occurrence. The error return from fsync is ignored as we already
1096 	 * have an error to return to the user.
1097 	 *
1098 	 * XXX Still have to journal the free below
1099 	 */
1100 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1101 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1102 	     blkp < allocblk; blkp++, lbns_remfree++) {
1103 		/*
1104 		 * We shall not leave the freed blocks on the vnode
1105 		 * buffer object lists.
1106 		 */
1107 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1108 		    GB_NOCREAT | GB_UNMAPPED);
1109 		if (bp != NULL) {
1110 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1111 			    ("mismatch2 l %jd %jd b %ju %ju",
1112 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1113 			    (uintmax_t)bp->b_blkno,
1114 			    (uintmax_t)fsbtodb(fs, *blkp)));
1115 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1116 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
1117 			brelse(bp);
1118 		}
1119 		deallocated += fs->fs_bsize;
1120 	}
1121 	if (allocib != NULL) {
1122 		*allocib = 0;
1123 	} else if (unwindidx >= 0) {
1124 		int r;
1125 
1126 		r = bread(vp, indirs[unwindidx].in_lbn,
1127 		    (int)fs->fs_bsize, NOCRED, &bp);
1128 		if (r) {
1129 			panic("Could not unwind indirect block, error %d", r);
1130 			brelse(bp);
1131 		} else {
1132 			bap = (ufs2_daddr_t *)bp->b_data;
1133 			bap[indirs[unwindidx].in_off] = 0;
1134 			if (flags & IO_SYNC) {
1135 				bwrite(bp);
1136 			} else {
1137 				if (bp->b_bufsize == fs->fs_bsize)
1138 					bp->b_flags |= B_CLUSTEROK;
1139 				bdwrite(bp);
1140 			}
1141 		}
1142 	}
1143 	if (deallocated) {
1144 #ifdef QUOTA
1145 		/*
1146 		 * Restore user's disk quota because allocation failed.
1147 		 */
1148 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1149 #endif
1150 		dp->di_blocks -= btodb(deallocated);
1151 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1152 	}
1153 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1154 	/*
1155 	 * After the buffers are invalidated and on-disk pointers are
1156 	 * cleared, free the blocks.
1157 	 */
1158 	for (blkp = allociblk; blkp < allocblk; blkp++) {
1159 #ifdef INVARIANTS
1160 		if (blkp == allociblk)
1161 			lbns_remfree = lbns;
1162 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1163 		    GB_NOCREAT | GB_UNMAPPED);
1164 		if (bp != NULL) {
1165 			panic("zombie2 %jd %ju %ju",
1166 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1167 			    (uintmax_t)fsbtodb(fs, *blkp));
1168 		}
1169 		lbns_remfree++;
1170 #endif
1171 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1172 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
1173 	}
1174 	return (error);
1175 }
1176