xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision 09e8dea79366f1e5b3a73e8a271b26e4b6bf2e6a)
1 /*
2  * Copyright (c) 2002 Networks Associates Technology, Inc.
3  * All rights reserved.
4  *
5  * This software was developed for the FreeBSD Project by Marshall
6  * Kirk McKusick and Network Associates Laboratories, the Security
7  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9  * research program
10  *
11  * Copyright (c) 1982, 1989, 1993
12  *	The Regents of the University of California.  All rights reserved.
13  * (c) UNIX System Laboratories, Inc.
14  * Copyright (c) 1982, 1986, 1989, 1993
15  *	The Regents of the University of California.  All rights reserved.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  * 3. All advertising materials mentioning features or use of this software
26  *    must display the following acknowledgement:
27  *	This product includes software developed by the University of
28  *	California, Berkeley and its contributors.
29  * 4. Neither the name of the University nor the names of its contributors
30  *    may be used to endorse or promote products derived from this software
31  *    without specific prior written permission.
32  *
33  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
34  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
37  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43  * SUCH DAMAGE.
44  *
45  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
46  * $FreeBSD$
47  */
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/bio.h>
52 #include <sys/buf.h>
53 #include <sys/lock.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 
57 #include <ufs/ufs/quota.h>
58 #include <ufs/ufs/inode.h>
59 #include <ufs/ufs/ufs_extern.h>
60 
61 #include <ufs/ffs/fs.h>
62 #include <ufs/ffs/ffs_extern.h>
63 
64 /*
65  * Balloc defines the structure of filesystem storage
66  * by allocating the physical blocks on a device given
67  * the inode and the logical block number in a file.
68  * This is the allocation strategy for UFS1. Below is
69  * the allocation strategy for UFS2.
70  */
71 int
72 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
73     struct ucred *cred, int flags, struct buf **bpp)
74 {
75 	struct inode *ip;
76 	ufs_lbn_t lbn, lastlbn;
77 	struct fs *fs;
78 	ufs1_daddr_t nb;
79 	struct buf *bp, *nbp;
80 	struct indir indirs[NIADDR + 2];
81 	int deallocated, osize, nsize, num, i, error;
82 	ufs2_daddr_t newb;
83 	ufs1_daddr_t *bap, pref;
84 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
85 	int unwindidx = -1;
86 	struct thread *td = curthread;	/* XXX */
87 
88 	ip = VTOI(vp);
89 	fs = ip->i_fs;
90 	lbn = lblkno(fs, startoffset);
91 	size = blkoff(fs, startoffset) + size;
92 	if (size > fs->fs_bsize)
93 		panic("ffs_balloc_ufs1: blk too big");
94 	*bpp = NULL;
95 	if (lbn < 0)
96 		return (EFBIG);
97 
98 	/*
99 	 * If the next write will extend the file into a new block,
100 	 * and the file is currently composed of a fragment
101 	 * this fragment has to be extended to be a full block.
102 	 */
103 	lastlbn = lblkno(fs, ip->i_size);
104 	if (lastlbn < NDADDR && lastlbn < lbn) {
105 		nb = lastlbn;
106 		osize = blksize(fs, ip, nb);
107 		if (osize < fs->fs_bsize && osize > 0) {
108 			error = ffs_realloccg(ip, nb,
109 				ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
110 				    &ip->i_din1->di_db[0]),
111 				osize, (int)fs->fs_bsize, cred, &bp);
112 			if (error)
113 				return (error);
114 			if (DOINGSOFTDEP(vp))
115 				softdep_setup_allocdirect(ip, nb,
116 				    dbtofsb(fs, bp->b_blkno),
117 				    ip->i_din1->di_db[nb],
118 				    fs->fs_bsize, osize, bp);
119 			ip->i_size = smalllblktosize(fs, nb + 1);
120 			ip->i_din1->di_size = ip->i_size;
121 			ip->i_din1->di_db[nb] = dbtofsb(fs, bp->b_blkno);
122 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
123 			if (flags & BA_SYNC)
124 				bwrite(bp);
125 			else
126 				bawrite(bp);
127 		}
128 	}
129 	/*
130 	 * The first NDADDR blocks are direct blocks
131 	 */
132 	if (lbn < NDADDR) {
133 		if (flags & BA_METAONLY)
134 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
135 		nb = ip->i_din1->di_db[lbn];
136 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
137 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
138 			if (error) {
139 				brelse(bp);
140 				return (error);
141 			}
142 			bp->b_blkno = fsbtodb(fs, nb);
143 			*bpp = bp;
144 			return (0);
145 		}
146 		if (nb != 0) {
147 			/*
148 			 * Consider need to reallocate a fragment.
149 			 */
150 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
151 			nsize = fragroundup(fs, size);
152 			if (nsize <= osize) {
153 				error = bread(vp, lbn, osize, NOCRED, &bp);
154 				if (error) {
155 					brelse(bp);
156 					return (error);
157 				}
158 				bp->b_blkno = fsbtodb(fs, nb);
159 			} else {
160 				error = ffs_realloccg(ip, lbn,
161 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
162 					&ip->i_din1->di_db[0]),
163 				    osize, nsize, cred, &bp);
164 				if (error)
165 					return (error);
166 				if (DOINGSOFTDEP(vp))
167 					softdep_setup_allocdirect(ip, lbn,
168 					    dbtofsb(fs, bp->b_blkno), nb,
169 					    nsize, osize, bp);
170 			}
171 		} else {
172 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
173 				nsize = fragroundup(fs, size);
174 			else
175 				nsize = fs->fs_bsize;
176 			error = ffs_alloc(ip, lbn,
177 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
178 				&ip->i_din1->di_db[0]),
179 			    nsize, cred, &newb);
180 			if (error)
181 				return (error);
182 			bp = getblk(vp, lbn, nsize, 0, 0);
183 			bp->b_blkno = fsbtodb(fs, newb);
184 			if (flags & BA_CLRBUF)
185 				vfs_bio_clrbuf(bp);
186 			if (DOINGSOFTDEP(vp))
187 				softdep_setup_allocdirect(ip, lbn, newb, 0,
188 				    nsize, 0, bp);
189 		}
190 		ip->i_din1->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
191 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
192 		*bpp = bp;
193 		return (0);
194 	}
195 	/*
196 	 * Determine the number of levels of indirection.
197 	 */
198 	pref = 0;
199 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
200 		return(error);
201 #ifdef DIAGNOSTIC
202 	if (num < 1)
203 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
204 #endif
205 	/*
206 	 * Fetch the first indirect block allocating if necessary.
207 	 */
208 	--num;
209 	nb = ip->i_din1->di_ib[indirs[0].in_off];
210 	allocib = NULL;
211 	allocblk = allociblk;
212 	if (nb == 0) {
213 		pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
214 	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
215 		    cred, &newb)) != 0)
216 			return (error);
217 		nb = newb;
218 		*allocblk++ = nb;
219 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
220 		bp->b_blkno = fsbtodb(fs, nb);
221 		vfs_bio_clrbuf(bp);
222 		if (DOINGSOFTDEP(vp)) {
223 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
224 			    newb, 0, fs->fs_bsize, 0, bp);
225 			bdwrite(bp);
226 		} else {
227 			/*
228 			 * Write synchronously so that indirect blocks
229 			 * never point at garbage.
230 			 */
231 			if (DOINGASYNC(vp))
232 				bdwrite(bp);
233 			else if ((error = bwrite(bp)) != 0)
234 				goto fail;
235 		}
236 		allocib = &ip->i_din1->di_ib[indirs[0].in_off];
237 		*allocib = nb;
238 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
239 	}
240 	/*
241 	 * Fetch through the indirect blocks, allocating as necessary.
242 	 */
243 	for (i = 1;;) {
244 		error = bread(vp,
245 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
246 		if (error) {
247 			brelse(bp);
248 			goto fail;
249 		}
250 		bap = (ufs1_daddr_t *)bp->b_data;
251 		nb = bap[indirs[i].in_off];
252 		if (i == num)
253 			break;
254 		i += 1;
255 		if (nb != 0) {
256 			bqrelse(bp);
257 			continue;
258 		}
259 		if (pref == 0)
260 			pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
261 		if ((error =
262 		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
263 			brelse(bp);
264 			goto fail;
265 		}
266 		nb = newb;
267 		*allocblk++ = nb;
268 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
269 		nbp->b_blkno = fsbtodb(fs, nb);
270 		vfs_bio_clrbuf(nbp);
271 		if (DOINGSOFTDEP(vp)) {
272 			softdep_setup_allocindir_meta(nbp, ip, bp,
273 			    indirs[i - 1].in_off, nb);
274 			bdwrite(nbp);
275 		} else {
276 			/*
277 			 * Write synchronously so that indirect blocks
278 			 * never point at garbage.
279 			 */
280 			if ((error = bwrite(nbp)) != 0) {
281 				brelse(bp);
282 				goto fail;
283 			}
284 		}
285 		bap[indirs[i - 1].in_off] = nb;
286 		if (allocib == NULL && unwindidx < 0)
287 			unwindidx = i - 1;
288 		/*
289 		 * If required, write synchronously, otherwise use
290 		 * delayed write.
291 		 */
292 		if (flags & BA_SYNC) {
293 			bwrite(bp);
294 		} else {
295 			if (bp->b_bufsize == fs->fs_bsize)
296 				bp->b_flags |= B_CLUSTEROK;
297 			bdwrite(bp);
298 		}
299 	}
300 	/*
301 	 * If asked only for the indirect block, then return it.
302 	 */
303 	if (flags & BA_METAONLY) {
304 		*bpp = bp;
305 		return (0);
306 	}
307 	/*
308 	 * Get the data block, allocating if necessary.
309 	 */
310 	if (nb == 0) {
311 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
312 		error = ffs_alloc(ip,
313 		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
314 		if (error) {
315 			brelse(bp);
316 			goto fail;
317 		}
318 		nb = newb;
319 		*allocblk++ = nb;
320 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
321 		nbp->b_blkno = fsbtodb(fs, nb);
322 		if (flags & BA_CLRBUF)
323 			vfs_bio_clrbuf(nbp);
324 		if (DOINGSOFTDEP(vp))
325 			softdep_setup_allocindir_page(ip, lbn, bp,
326 			    indirs[i].in_off, nb, 0, nbp);
327 		bap[indirs[i].in_off] = nb;
328 		/*
329 		 * If required, write synchronously, otherwise use
330 		 * delayed write.
331 		 */
332 		if (flags & BA_SYNC) {
333 			bwrite(bp);
334 		} else {
335 			if (bp->b_bufsize == fs->fs_bsize)
336 				bp->b_flags |= B_CLUSTEROK;
337 			bdwrite(bp);
338 		}
339 		*bpp = nbp;
340 		return (0);
341 	}
342 	brelse(bp);
343 	if (flags & BA_CLRBUF) {
344 		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
345 		if (error) {
346 			brelse(nbp);
347 			goto fail;
348 		}
349 	} else {
350 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
351 		nbp->b_blkno = fsbtodb(fs, nb);
352 	}
353 	*bpp = nbp;
354 	return (0);
355 fail:
356 	/*
357 	 * If we have failed part way through block allocation, we
358 	 * have to deallocate any indirect blocks that we have allocated.
359 	 * We have to fsync the file before we start to get rid of all
360 	 * of its dependencies so that we do not leave them dangling.
361 	 * We have to sync it at the end so that the soft updates code
362 	 * does not find any untracked changes. Although this is really
363 	 * slow, running out of disk space is not expected to be a common
364 	 * occurence. The error return from fsync is ignored as we already
365 	 * have an error to return to the user.
366 	 */
367 	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
368 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
369 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
370 		deallocated += fs->fs_bsize;
371 	}
372 	if (allocib != NULL) {
373 		*allocib = 0;
374 	} else if (unwindidx >= 0) {
375 		int r;
376 
377 		r = bread(vp, indirs[unwindidx].in_lbn,
378 		    (int)fs->fs_bsize, NOCRED, &bp);
379 		if (r) {
380 			panic("Could not unwind indirect block, error %d", r);
381 			brelse(bp);
382 		} else {
383 			bap = (ufs1_daddr_t *)bp->b_data;
384 			bap[indirs[unwindidx].in_off] = 0;
385 			if (flags & BA_SYNC) {
386 				bwrite(bp);
387 			} else {
388 				if (bp->b_bufsize == fs->fs_bsize)
389 					bp->b_flags |= B_CLUSTEROK;
390 				bdwrite(bp);
391 			}
392 		}
393 	}
394 	if (deallocated) {
395 #ifdef QUOTA
396 		/*
397 		 * Restore user's disk quota because allocation failed.
398 		 */
399 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
400 #endif
401 		ip->i_din1->di_blocks -= btodb(deallocated);
402 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
403 	}
404 	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
405 	return (error);
406 }
407 
408 /*
409  * Balloc defines the structure of file system storage
410  * by allocating the physical blocks on a device given
411  * the inode and the logical block number in a file.
412  * This is the allocation strategy for UFS2. Above is
413  * the allocation strategy for UFS1.
414  */
415 int
416 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
417     struct ucred *cred, int flags, struct buf **bpp)
418 {
419 	struct inode *ip;
420 	ufs_lbn_t lbn, lastlbn;
421 	struct fs *fs;
422 	struct buf *bp, *nbp;
423 	struct indir indirs[NIADDR + 2];
424 	ufs2_daddr_t nb, newb, *bap, pref;
425 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
426 	int deallocated, osize, nsize, num, i, error;
427 	int unwindidx = -1;
428 	struct thread *td = curthread;	/* XXX */
429 
430 	ip = VTOI(vp);
431 	fs = ip->i_fs;
432 	lbn = lblkno(fs, startoffset);
433 	size = blkoff(fs, startoffset) + size;
434 	if (size > fs->fs_bsize)
435 		panic("ffs_balloc_ufs2: blk too big");
436 	*bpp = NULL;
437 	if (lbn < 0)
438 		return (EFBIG);
439 
440 	/*
441 	 * If the next write will extend the file into a new block,
442 	 * and the file is currently composed of a fragment
443 	 * this fragment has to be extended to be a full block.
444 	 */
445 	lastlbn = lblkno(fs, ip->i_size);
446 	if (lastlbn < NDADDR && lastlbn < lbn) {
447 		nb = lastlbn;
448 		osize = blksize(fs, ip, nb);
449 		if (osize < fs->fs_bsize && osize > 0) {
450 			error = ffs_realloccg(ip, nb,
451 				ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
452 				    &ip->i_din2->di_db[0]),
453 				osize, (int)fs->fs_bsize, cred, &bp);
454 			if (error)
455 				return (error);
456 			if (DOINGSOFTDEP(vp))
457 				softdep_setup_allocdirect(ip, nb,
458 				    dbtofsb(fs, bp->b_blkno),
459 				    ip->i_din2->di_db[nb],
460 				    fs->fs_bsize, osize, bp);
461 			ip->i_size = smalllblktosize(fs, nb + 1);
462 			ip->i_din2->di_size = ip->i_size;
463 			ip->i_din2->di_db[nb] = dbtofsb(fs, bp->b_blkno);
464 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
465 			if (flags & BA_SYNC)
466 				bwrite(bp);
467 			else
468 				bawrite(bp);
469 		}
470 	}
471 	/*
472 	 * The first NDADDR blocks are direct blocks
473 	 */
474 	if (lbn < NDADDR) {
475 		if (flags & BA_METAONLY)
476 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
477 		nb = ip->i_din2->di_db[lbn];
478 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
479 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
480 			if (error) {
481 				brelse(bp);
482 				return (error);
483 			}
484 			bp->b_blkno = fsbtodb(fs, nb);
485 			*bpp = bp;
486 			return (0);
487 		}
488 		if (nb != 0) {
489 			/*
490 			 * Consider need to reallocate a fragment.
491 			 */
492 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
493 			nsize = fragroundup(fs, size);
494 			if (nsize <= osize) {
495 				error = bread(vp, lbn, osize, NOCRED, &bp);
496 				if (error) {
497 					brelse(bp);
498 					return (error);
499 				}
500 				bp->b_blkno = fsbtodb(fs, nb);
501 			} else {
502 				error = ffs_realloccg(ip, lbn,
503 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
504 					&ip->i_din2->di_db[0]),
505 				    osize, nsize, cred, &bp);
506 				if (error)
507 					return (error);
508 				if (DOINGSOFTDEP(vp))
509 					softdep_setup_allocdirect(ip, lbn,
510 					    dbtofsb(fs, bp->b_blkno), nb,
511 					    nsize, osize, bp);
512 			}
513 		} else {
514 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
515 				nsize = fragroundup(fs, size);
516 			else
517 				nsize = fs->fs_bsize;
518 			error = ffs_alloc(ip, lbn,
519 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
520 				&ip->i_din2->di_db[0]),
521 			    nsize, cred, &newb);
522 			if (error)
523 				return (error);
524 			bp = getblk(vp, lbn, nsize, 0, 0);
525 			bp->b_blkno = fsbtodb(fs, newb);
526 			if (flags & BA_CLRBUF)
527 				vfs_bio_clrbuf(bp);
528 			if (DOINGSOFTDEP(vp))
529 				softdep_setup_allocdirect(ip, lbn, newb, 0,
530 				    nsize, 0, bp);
531 		}
532 		ip->i_din2->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
533 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
534 		*bpp = bp;
535 		return (0);
536 	}
537 	/*
538 	 * Determine the number of levels of indirection.
539 	 */
540 	pref = 0;
541 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
542 		return(error);
543 #ifdef DIAGNOSTIC
544 	if (num < 1)
545 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
546 #endif
547 	/*
548 	 * Fetch the first indirect block allocating if necessary.
549 	 */
550 	--num;
551 	nb = ip->i_din2->di_ib[indirs[0].in_off];
552 	allocib = NULL;
553 	allocblk = allociblk;
554 	if (nb == 0) {
555 		pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
556 	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
557 		    cred, &newb)) != 0)
558 			return (error);
559 		nb = newb;
560 		*allocblk++ = nb;
561 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
562 		bp->b_blkno = fsbtodb(fs, nb);
563 		vfs_bio_clrbuf(bp);
564 		if (DOINGSOFTDEP(vp)) {
565 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
566 			    newb, 0, fs->fs_bsize, 0, bp);
567 			bdwrite(bp);
568 		} else {
569 			/*
570 			 * Write synchronously so that indirect blocks
571 			 * never point at garbage.
572 			 */
573 			if (DOINGASYNC(vp))
574 				bdwrite(bp);
575 			else if ((error = bwrite(bp)) != 0)
576 				goto fail;
577 		}
578 		allocib = &ip->i_din2->di_ib[indirs[0].in_off];
579 		*allocib = nb;
580 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
581 	}
582 	/*
583 	 * Fetch through the indirect blocks, allocating as necessary.
584 	 */
585 	for (i = 1;;) {
586 		error = bread(vp,
587 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
588 		if (error) {
589 			brelse(bp);
590 			goto fail;
591 		}
592 		bap = (ufs2_daddr_t *)bp->b_data;
593 		nb = bap[indirs[i].in_off];
594 		if (i == num)
595 			break;
596 		i += 1;
597 		if (nb != 0) {
598 			bqrelse(bp);
599 			continue;
600 		}
601 		if (pref == 0)
602 			pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
603 		if ((error =
604 		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
605 			brelse(bp);
606 			goto fail;
607 		}
608 		nb = newb;
609 		*allocblk++ = nb;
610 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
611 		nbp->b_blkno = fsbtodb(fs, nb);
612 		vfs_bio_clrbuf(nbp);
613 		if (DOINGSOFTDEP(vp)) {
614 			softdep_setup_allocindir_meta(nbp, ip, bp,
615 			    indirs[i - 1].in_off, nb);
616 			bdwrite(nbp);
617 		} else {
618 			/*
619 			 * Write synchronously so that indirect blocks
620 			 * never point at garbage.
621 			 */
622 			if ((error = bwrite(nbp)) != 0) {
623 				brelse(bp);
624 				goto fail;
625 			}
626 		}
627 		bap[indirs[i - 1].in_off] = nb;
628 		if (allocib == NULL && unwindidx < 0)
629 			unwindidx = i - 1;
630 		/*
631 		 * If required, write synchronously, otherwise use
632 		 * delayed write.
633 		 */
634 		if (flags & BA_SYNC) {
635 			bwrite(bp);
636 		} else {
637 			if (bp->b_bufsize == fs->fs_bsize)
638 				bp->b_flags |= B_CLUSTEROK;
639 			bdwrite(bp);
640 		}
641 	}
642 	/*
643 	 * If asked only for the indirect block, then return it.
644 	 */
645 	if (flags & BA_METAONLY) {
646 		*bpp = bp;
647 		return (0);
648 	}
649 	/*
650 	 * Get the data block, allocating if necessary.
651 	 */
652 	if (nb == 0) {
653 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
654 		error = ffs_alloc(ip,
655 		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
656 		if (error) {
657 			brelse(bp);
658 			goto fail;
659 		}
660 		nb = newb;
661 		*allocblk++ = nb;
662 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
663 		nbp->b_blkno = fsbtodb(fs, nb);
664 		if (flags & BA_CLRBUF)
665 			vfs_bio_clrbuf(nbp);
666 		if (DOINGSOFTDEP(vp))
667 			softdep_setup_allocindir_page(ip, lbn, bp,
668 			    indirs[i].in_off, nb, 0, nbp);
669 		bap[indirs[i].in_off] = nb;
670 		/*
671 		 * If required, write synchronously, otherwise use
672 		 * delayed write.
673 		 */
674 		if (flags & BA_SYNC) {
675 			bwrite(bp);
676 		} else {
677 			if (bp->b_bufsize == fs->fs_bsize)
678 				bp->b_flags |= B_CLUSTEROK;
679 			bdwrite(bp);
680 		}
681 		*bpp = nbp;
682 		return (0);
683 	}
684 	brelse(bp);
685 	if (flags & BA_CLRBUF) {
686 		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
687 		if (error) {
688 			brelse(nbp);
689 			goto fail;
690 		}
691 	} else {
692 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
693 		nbp->b_blkno = fsbtodb(fs, nb);
694 	}
695 	*bpp = nbp;
696 	return (0);
697 fail:
698 	/*
699 	 * If we have failed part way through block allocation, we
700 	 * have to deallocate any indirect blocks that we have allocated.
701 	 * We have to fsync the file before we start to get rid of all
702 	 * of its dependencies so that we do not leave them dangling.
703 	 * We have to sync it at the end so that the soft updates code
704 	 * does not find any untracked changes. Although this is really
705 	 * slow, running out of disk space is not expected to be a common
706 	 * occurence. The error return from fsync is ignored as we already
707 	 * have an error to return to the user.
708 	 */
709 	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
710 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
711 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
712 		deallocated += fs->fs_bsize;
713 	}
714 	if (allocib != NULL) {
715 		*allocib = 0;
716 	} else if (unwindidx >= 0) {
717 		int r;
718 
719 		r = bread(vp, indirs[unwindidx].in_lbn,
720 		    (int)fs->fs_bsize, NOCRED, &bp);
721 		if (r) {
722 			panic("Could not unwind indirect block, error %d", r);
723 			brelse(bp);
724 		} else {
725 			bap = (ufs2_daddr_t *)bp->b_data;
726 			bap[indirs[unwindidx].in_off] = 0;
727 			if (flags & BA_SYNC) {
728 				bwrite(bp);
729 			} else {
730 				if (bp->b_bufsize == fs->fs_bsize)
731 					bp->b_flags |= B_CLUSTEROK;
732 				bdwrite(bp);
733 			}
734 		}
735 	}
736 	if (deallocated) {
737 #ifdef QUOTA
738 		/*
739 		 * Restore user's disk quota because allocation failed.
740 		 */
741 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
742 #endif
743 		ip->i_din2->di_blocks -= btodb(deallocated);
744 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
745 	}
746 	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
747 	return (error);
748 }
749