xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision ae83180158c4c937f170e31eff311b18c0286a93)
1 /*
2  * Copyright (c) 2002 Networks Associates Technology, Inc.
3  * All rights reserved.
4  *
5  * This software was developed for the FreeBSD Project by Marshall
6  * Kirk McKusick and Network Associates Laboratories, the Security
7  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9  * research program
10  *
11  * Copyright (c) 1982, 1989, 1993
12  *	The Regents of the University of California.  All rights reserved.
13  * (c) UNIX System Laboratories, Inc.
14  * Copyright (c) 1982, 1986, 1989, 1993
15  *	The Regents of the University of California.  All rights reserved.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  * 3. All advertising materials mentioning features or use of this software
26  *    must display the following acknowledgement:
27  *	This product includes software developed by the University of
28  *	California, Berkeley and its contributors.
29  * 4. Neither the name of the University nor the names of its contributors
30  *    may be used to endorse or promote products derived from this software
31  *    without specific prior written permission.
32  *
33  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
34  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
37  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43  * SUCH DAMAGE.
44  *
45  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
46  * $FreeBSD$
47  */
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/bio.h>
52 #include <sys/buf.h>
53 #include <sys/lock.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 
57 #include <ufs/ufs/quota.h>
58 #include <ufs/ufs/inode.h>
59 #include <ufs/ufs/ufs_extern.h>
60 
61 #include <ufs/ffs/fs.h>
62 #include <ufs/ffs/ffs_extern.h>
63 
64 /*
65  * Balloc defines the structure of filesystem storage
66  * by allocating the physical blocks on a device given
67  * the inode and the logical block number in a file.
68  * This is the allocation strategy for UFS1. Below is
69  * the allocation strategy for UFS2.
70  */
71 int
72 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
73     struct ucred *cred, int flags, struct buf **bpp)
74 {
75 	struct inode *ip;
76 	struct ufs1_dinode *dp;
77 	ufs_lbn_t lbn, lastlbn;
78 	struct fs *fs;
79 	ufs1_daddr_t nb;
80 	struct buf *bp, *nbp;
81 	struct indir indirs[NIADDR + 2];
82 	int deallocated, osize, nsize, num, i, error;
83 	ufs2_daddr_t newb;
84 	ufs1_daddr_t *bap, pref;
85 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
86 	int unwindidx = -1;
87 	struct thread *td = curthread;	/* XXX */
88 
89 	ip = VTOI(vp);
90 	dp = ip->i_din1;
91 	fs = ip->i_fs;
92 	lbn = lblkno(fs, startoffset);
93 	size = blkoff(fs, startoffset) + size;
94 	if (size > fs->fs_bsize)
95 		panic("ffs_balloc_ufs1: blk too big");
96 	*bpp = NULL;
97 	if (flags & IO_EXT)
98 		return (EOPNOTSUPP);
99 	if (lbn < 0)
100 		return (EFBIG);
101 
102 	/*
103 	 * If the next write will extend the file into a new block,
104 	 * and the file is currently composed of a fragment
105 	 * this fragment has to be extended to be a full block.
106 	 */
107 	lastlbn = lblkno(fs, ip->i_size);
108 	if (lastlbn < NDADDR && lastlbn < lbn) {
109 		nb = lastlbn;
110 		osize = blksize(fs, ip, nb);
111 		if (osize < fs->fs_bsize && osize > 0) {
112 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
113 			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
114 			   &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp);
115 			if (error)
116 				return (error);
117 			if (DOINGSOFTDEP(vp))
118 				softdep_setup_allocdirect(ip, nb,
119 				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
120 				    fs->fs_bsize, osize, bp);
121 			ip->i_size = smalllblktosize(fs, nb + 1);
122 			dp->di_size = ip->i_size;
123 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
124 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
125 			if (flags & IO_SYNC)
126 				bwrite(bp);
127 			else
128 				bawrite(bp);
129 		}
130 	}
131 	/*
132 	 * The first NDADDR blocks are direct blocks
133 	 */
134 	if (lbn < NDADDR) {
135 		if (flags & BA_METAONLY)
136 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
137 		nb = dp->di_db[lbn];
138 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
139 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
140 			if (error) {
141 				brelse(bp);
142 				return (error);
143 			}
144 			bp->b_blkno = fsbtodb(fs, nb);
145 			*bpp = bp;
146 			return (0);
147 		}
148 		if (nb != 0) {
149 			/*
150 			 * Consider need to reallocate a fragment.
151 			 */
152 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
153 			nsize = fragroundup(fs, size);
154 			if (nsize <= osize) {
155 				error = bread(vp, lbn, osize, NOCRED, &bp);
156 				if (error) {
157 					brelse(bp);
158 					return (error);
159 				}
160 				bp->b_blkno = fsbtodb(fs, nb);
161 			} else {
162 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
163 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
164 				    &dp->di_db[0]), osize, nsize, cred, &bp);
165 				if (error)
166 					return (error);
167 				if (DOINGSOFTDEP(vp))
168 					softdep_setup_allocdirect(ip, lbn,
169 					    dbtofsb(fs, bp->b_blkno), nb,
170 					    nsize, osize, bp);
171 			}
172 		} else {
173 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
174 				nsize = fragroundup(fs, size);
175 			else
176 				nsize = fs->fs_bsize;
177 			error = ffs_alloc(ip, lbn,
178 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
179 			    nsize, cred, &newb);
180 			if (error)
181 				return (error);
182 			bp = getblk(vp, lbn, nsize, 0, 0);
183 			bp->b_blkno = fsbtodb(fs, newb);
184 			if (flags & BA_CLRBUF)
185 				vfs_bio_clrbuf(bp);
186 			if (DOINGSOFTDEP(vp))
187 				softdep_setup_allocdirect(ip, lbn, newb, 0,
188 				    nsize, 0, bp);
189 		}
190 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
191 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
192 		*bpp = bp;
193 		return (0);
194 	}
195 	/*
196 	 * Determine the number of levels of indirection.
197 	 */
198 	pref = 0;
199 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
200 		return(error);
201 #ifdef DIAGNOSTIC
202 	if (num < 1)
203 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
204 #endif
205 	/*
206 	 * Fetch the first indirect block allocating if necessary.
207 	 */
208 	--num;
209 	nb = dp->di_ib[indirs[0].in_off];
210 	allocib = NULL;
211 	allocblk = allociblk;
212 	if (nb == 0) {
213 		pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
214 	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
215 		    cred, &newb)) != 0)
216 			return (error);
217 		nb = newb;
218 		*allocblk++ = nb;
219 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
220 		bp->b_blkno = fsbtodb(fs, nb);
221 		vfs_bio_clrbuf(bp);
222 		if (DOINGSOFTDEP(vp)) {
223 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
224 			    newb, 0, fs->fs_bsize, 0, bp);
225 			bdwrite(bp);
226 		} else {
227 			/*
228 			 * Write synchronously so that indirect blocks
229 			 * never point at garbage.
230 			 */
231 			if (DOINGASYNC(vp))
232 				bdwrite(bp);
233 			else if ((error = bwrite(bp)) != 0)
234 				goto fail;
235 		}
236 		allocib = &dp->di_ib[indirs[0].in_off];
237 		*allocib = nb;
238 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
239 	}
240 	/*
241 	 * Fetch through the indirect blocks, allocating as necessary.
242 	 */
243 	for (i = 1;;) {
244 		error = bread(vp,
245 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
246 		if (error) {
247 			brelse(bp);
248 			goto fail;
249 		}
250 		bap = (ufs1_daddr_t *)bp->b_data;
251 		nb = bap[indirs[i].in_off];
252 		if (i == num)
253 			break;
254 		i += 1;
255 		if (nb != 0) {
256 			bqrelse(bp);
257 			continue;
258 		}
259 		if (pref == 0)
260 			pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
261 		if ((error =
262 		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
263 			brelse(bp);
264 			goto fail;
265 		}
266 		nb = newb;
267 		*allocblk++ = nb;
268 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
269 		nbp->b_blkno = fsbtodb(fs, nb);
270 		vfs_bio_clrbuf(nbp);
271 		if (DOINGSOFTDEP(vp)) {
272 			softdep_setup_allocindir_meta(nbp, ip, bp,
273 			    indirs[i - 1].in_off, nb);
274 			bdwrite(nbp);
275 		} else {
276 			/*
277 			 * Write synchronously so that indirect blocks
278 			 * never point at garbage.
279 			 */
280 			if ((error = bwrite(nbp)) != 0) {
281 				brelse(bp);
282 				goto fail;
283 			}
284 		}
285 		bap[indirs[i - 1].in_off] = nb;
286 		if (allocib == NULL && unwindidx < 0)
287 			unwindidx = i - 1;
288 		/*
289 		 * If required, write synchronously, otherwise use
290 		 * delayed write.
291 		 */
292 		if (flags & IO_SYNC) {
293 			bwrite(bp);
294 		} else {
295 			if (bp->b_bufsize == fs->fs_bsize)
296 				bp->b_flags |= B_CLUSTEROK;
297 			bdwrite(bp);
298 		}
299 	}
300 	/*
301 	 * If asked only for the indirect block, then return it.
302 	 */
303 	if (flags & BA_METAONLY) {
304 		*bpp = bp;
305 		return (0);
306 	}
307 	/*
308 	 * Get the data block, allocating if necessary.
309 	 */
310 	if (nb == 0) {
311 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
312 		error = ffs_alloc(ip,
313 		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
314 		if (error) {
315 			brelse(bp);
316 			goto fail;
317 		}
318 		nb = newb;
319 		*allocblk++ = nb;
320 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
321 		nbp->b_blkno = fsbtodb(fs, nb);
322 		if (flags & BA_CLRBUF)
323 			vfs_bio_clrbuf(nbp);
324 		if (DOINGSOFTDEP(vp))
325 			softdep_setup_allocindir_page(ip, lbn, bp,
326 			    indirs[i].in_off, nb, 0, nbp);
327 		bap[indirs[i].in_off] = nb;
328 		/*
329 		 * If required, write synchronously, otherwise use
330 		 * delayed write.
331 		 */
332 		if (flags & IO_SYNC) {
333 			bwrite(bp);
334 		} else {
335 			if (bp->b_bufsize == fs->fs_bsize)
336 				bp->b_flags |= B_CLUSTEROK;
337 			bdwrite(bp);
338 		}
339 		*bpp = nbp;
340 		return (0);
341 	}
342 	brelse(bp);
343 	if (flags & BA_CLRBUF) {
344 		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
345 		if (error) {
346 			brelse(nbp);
347 			goto fail;
348 		}
349 	} else {
350 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
351 		nbp->b_blkno = fsbtodb(fs, nb);
352 	}
353 	*bpp = nbp;
354 	return (0);
355 fail:
356 	/*
357 	 * If we have failed part way through block allocation, we
358 	 * have to deallocate any indirect blocks that we have allocated.
359 	 * We have to fsync the file before we start to get rid of all
360 	 * of its dependencies so that we do not leave them dangling.
361 	 * We have to sync it at the end so that the soft updates code
362 	 * does not find any untracked changes. Although this is really
363 	 * slow, running out of disk space is not expected to be a common
364 	 * occurence. The error return from fsync is ignored as we already
365 	 * have an error to return to the user.
366 	 */
367 	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
368 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
369 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
370 		deallocated += fs->fs_bsize;
371 	}
372 	if (allocib != NULL) {
373 		*allocib = 0;
374 	} else if (unwindidx >= 0) {
375 		int r;
376 
377 		r = bread(vp, indirs[unwindidx].in_lbn,
378 		    (int)fs->fs_bsize, NOCRED, &bp);
379 		if (r) {
380 			panic("Could not unwind indirect block, error %d", r);
381 			brelse(bp);
382 		} else {
383 			bap = (ufs1_daddr_t *)bp->b_data;
384 			bap[indirs[unwindidx].in_off] = 0;
385 			if (flags & IO_SYNC) {
386 				bwrite(bp);
387 			} else {
388 				if (bp->b_bufsize == fs->fs_bsize)
389 					bp->b_flags |= B_CLUSTEROK;
390 				bdwrite(bp);
391 			}
392 		}
393 	}
394 	if (deallocated) {
395 #ifdef QUOTA
396 		/*
397 		 * Restore user's disk quota because allocation failed.
398 		 */
399 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
400 #endif
401 		dp->di_blocks -= btodb(deallocated);
402 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
403 	}
404 	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
405 	return (error);
406 }
407 
408 /*
409  * Balloc defines the structure of file system storage
410  * by allocating the physical blocks on a device given
411  * the inode and the logical block number in a file.
412  * This is the allocation strategy for UFS2. Above is
413  * the allocation strategy for UFS1.
414  */
415 int
416 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
417     struct ucred *cred, int flags, struct buf **bpp)
418 {
419 	struct inode *ip;
420 	struct ufs2_dinode *dp;
421 	ufs_lbn_t lbn, lastlbn;
422 	struct fs *fs;
423 	struct buf *bp, *nbp;
424 	struct indir indirs[NIADDR + 2];
425 	ufs2_daddr_t nb, newb, *bap, pref;
426 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
427 	int deallocated, osize, nsize, num, i, error;
428 	int unwindidx = -1;
429 	struct thread *td = curthread;	/* XXX */
430 
431 	ip = VTOI(vp);
432 	dp = ip->i_din2;
433 	fs = ip->i_fs;
434 	lbn = lblkno(fs, startoffset);
435 	size = blkoff(fs, startoffset) + size;
436 	if (size > fs->fs_bsize)
437 		panic("ffs_balloc_ufs2: blk too big");
438 	*bpp = NULL;
439 	if (lbn < 0)
440 		return (EFBIG);
441 
442 	/*
443 	 * Check for allocating external data.
444 	 */
445 	if (flags & IO_EXT) {
446 		if (lbn >= NXADDR)
447 			return (EFBIG);
448 		/*
449 		 * If the next write will extend the data into a new block,
450 		 * and the data is currently composed of a fragment
451 		 * this fragment has to be extended to be a full block.
452 		 */
453 		lastlbn = lblkno(fs, dp->di_extsize);
454 		if (lastlbn < lbn) {
455 			nb = lastlbn;
456 			osize = sblksize(fs, dp->di_extsize, nb);
457 			if (osize < fs->fs_bsize && osize > 0) {
458 				error = ffs_realloccg(ip, -1 - nb,
459 				    dp->di_extb[nb],
460 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
461 				    &dp->di_extb[0]), osize,
462 				    (int)fs->fs_bsize, cred, &bp);
463 				if (error)
464 					return (error);
465 				if (DOINGSOFTDEP(vp))
466 					softdep_setup_allocext(ip, nb,
467 					    dbtofsb(fs, bp->b_blkno),
468 					    dp->di_extb[nb],
469 					    fs->fs_bsize, osize, bp);
470 				dp->di_extsize = smalllblktosize(fs, nb + 1);
471 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
472 				bp->b_xflags |= BX_ALTDATA;
473 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
474 				if (flags & IO_SYNC)
475 					bwrite(bp);
476 				else
477 					bawrite(bp);
478 			}
479 		}
480 		/*
481 		 * All blocks are direct blocks
482 		 */
483 		if (flags & BA_METAONLY)
484 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
485 		nb = dp->di_extb[lbn];
486 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
487 			error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
488 			if (error) {
489 				brelse(bp);
490 				return (error);
491 			}
492 			bp->b_blkno = fsbtodb(fs, nb);
493 			bp->b_xflags |= BX_ALTDATA;
494 			*bpp = bp;
495 			return (0);
496 		}
497 		if (nb != 0) {
498 			/*
499 			 * Consider need to reallocate a fragment.
500 			 */
501 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
502 			nsize = fragroundup(fs, size);
503 			if (nsize <= osize) {
504 				error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
505 				if (error) {
506 					brelse(bp);
507 					return (error);
508 				}
509 				bp->b_blkno = fsbtodb(fs, nb);
510 				bp->b_xflags |= BX_ALTDATA;
511 			} else {
512 				error = ffs_realloccg(ip, -1 - lbn,
513 				    dp->di_extb[lbn],
514 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
515 				    &dp->di_extb[0]), osize, nsize, cred, &bp);
516 				if (error)
517 					return (error);
518 				bp->b_xflags |= BX_ALTDATA;
519 				if (DOINGSOFTDEP(vp))
520 					softdep_setup_allocext(ip, lbn,
521 					    dbtofsb(fs, bp->b_blkno), nb,
522 					    nsize, osize, bp);
523 			}
524 		} else {
525 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
526 				nsize = fragroundup(fs, size);
527 			else
528 				nsize = fs->fs_bsize;
529 			error = ffs_alloc(ip, lbn,
530 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
531 			   nsize, cred, &newb);
532 			if (error)
533 				return (error);
534 			bp = getblk(vp, -1 - lbn, nsize, 0, 0);
535 			bp->b_blkno = fsbtodb(fs, newb);
536 			bp->b_xflags |= BX_ALTDATA;
537 			if (flags & BA_CLRBUF)
538 				vfs_bio_clrbuf(bp);
539 			if (DOINGSOFTDEP(vp))
540 				softdep_setup_allocext(ip, lbn, newb, 0,
541 				    nsize, 0, bp);
542 		}
543 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
544 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
545 		*bpp = bp;
546 		return (0);
547 	}
548 	/*
549 	 * If the next write will extend the file into a new block,
550 	 * and the file is currently composed of a fragment
551 	 * this fragment has to be extended to be a full block.
552 	 */
553 	lastlbn = lblkno(fs, ip->i_size);
554 	if (lastlbn < NDADDR && lastlbn < lbn) {
555 		nb = lastlbn;
556 		osize = blksize(fs, ip, nb);
557 		if (osize < fs->fs_bsize && osize > 0) {
558 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
559 				ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
560 				    &dp->di_db[0]), osize, (int)fs->fs_bsize,
561 				    cred, &bp);
562 			if (error)
563 				return (error);
564 			if (DOINGSOFTDEP(vp))
565 				softdep_setup_allocdirect(ip, nb,
566 				    dbtofsb(fs, bp->b_blkno),
567 				    dp->di_db[nb],
568 				    fs->fs_bsize, osize, bp);
569 			ip->i_size = smalllblktosize(fs, nb + 1);
570 			dp->di_size = ip->i_size;
571 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
572 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
573 			if (flags & IO_SYNC)
574 				bwrite(bp);
575 			else
576 				bawrite(bp);
577 		}
578 	}
579 	/*
580 	 * The first NDADDR blocks are direct blocks
581 	 */
582 	if (lbn < NDADDR) {
583 		if (flags & BA_METAONLY)
584 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
585 		nb = dp->di_db[lbn];
586 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
587 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
588 			if (error) {
589 				brelse(bp);
590 				return (error);
591 			}
592 			bp->b_blkno = fsbtodb(fs, nb);
593 			*bpp = bp;
594 			return (0);
595 		}
596 		if (nb != 0) {
597 			/*
598 			 * Consider need to reallocate a fragment.
599 			 */
600 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
601 			nsize = fragroundup(fs, size);
602 			if (nsize <= osize) {
603 				error = bread(vp, lbn, osize, NOCRED, &bp);
604 				if (error) {
605 					brelse(bp);
606 					return (error);
607 				}
608 				bp->b_blkno = fsbtodb(fs, nb);
609 			} else {
610 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
611 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
612 				       &dp->di_db[0]), osize, nsize, cred, &bp);
613 				if (error)
614 					return (error);
615 				if (DOINGSOFTDEP(vp))
616 					softdep_setup_allocdirect(ip, lbn,
617 					    dbtofsb(fs, bp->b_blkno), nb,
618 					    nsize, osize, bp);
619 			}
620 		} else {
621 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
622 				nsize = fragroundup(fs, size);
623 			else
624 				nsize = fs->fs_bsize;
625 			error = ffs_alloc(ip, lbn,
626 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
627 				&dp->di_db[0]), nsize, cred, &newb);
628 			if (error)
629 				return (error);
630 			bp = getblk(vp, lbn, nsize, 0, 0);
631 			bp->b_blkno = fsbtodb(fs, newb);
632 			if (flags & BA_CLRBUF)
633 				vfs_bio_clrbuf(bp);
634 			if (DOINGSOFTDEP(vp))
635 				softdep_setup_allocdirect(ip, lbn, newb, 0,
636 				    nsize, 0, bp);
637 		}
638 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
639 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
640 		*bpp = bp;
641 		return (0);
642 	}
643 	/*
644 	 * Determine the number of levels of indirection.
645 	 */
646 	pref = 0;
647 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
648 		return(error);
649 #ifdef DIAGNOSTIC
650 	if (num < 1)
651 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
652 #endif
653 	/*
654 	 * Fetch the first indirect block allocating if necessary.
655 	 */
656 	--num;
657 	nb = dp->di_ib[indirs[0].in_off];
658 	allocib = NULL;
659 	allocblk = allociblk;
660 	if (nb == 0) {
661 		pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
662 	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
663 		    cred, &newb)) != 0)
664 			return (error);
665 		nb = newb;
666 		*allocblk++ = nb;
667 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
668 		bp->b_blkno = fsbtodb(fs, nb);
669 		vfs_bio_clrbuf(bp);
670 		if (DOINGSOFTDEP(vp)) {
671 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
672 			    newb, 0, fs->fs_bsize, 0, bp);
673 			bdwrite(bp);
674 		} else {
675 			/*
676 			 * Write synchronously so that indirect blocks
677 			 * never point at garbage.
678 			 */
679 			if (DOINGASYNC(vp))
680 				bdwrite(bp);
681 			else if ((error = bwrite(bp)) != 0)
682 				goto fail;
683 		}
684 		allocib = &dp->di_ib[indirs[0].in_off];
685 		*allocib = nb;
686 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
687 	}
688 	/*
689 	 * Fetch through the indirect blocks, allocating as necessary.
690 	 */
691 	for (i = 1;;) {
692 		error = bread(vp,
693 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
694 		if (error) {
695 			brelse(bp);
696 			goto fail;
697 		}
698 		bap = (ufs2_daddr_t *)bp->b_data;
699 		nb = bap[indirs[i].in_off];
700 		if (i == num)
701 			break;
702 		i += 1;
703 		if (nb != 0) {
704 			bqrelse(bp);
705 			continue;
706 		}
707 		if (pref == 0)
708 			pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
709 		if ((error =
710 		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
711 			brelse(bp);
712 			goto fail;
713 		}
714 		nb = newb;
715 		*allocblk++ = nb;
716 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
717 		nbp->b_blkno = fsbtodb(fs, nb);
718 		vfs_bio_clrbuf(nbp);
719 		if (DOINGSOFTDEP(vp)) {
720 			softdep_setup_allocindir_meta(nbp, ip, bp,
721 			    indirs[i - 1].in_off, nb);
722 			bdwrite(nbp);
723 		} else {
724 			/*
725 			 * Write synchronously so that indirect blocks
726 			 * never point at garbage.
727 			 */
728 			if ((error = bwrite(nbp)) != 0) {
729 				brelse(bp);
730 				goto fail;
731 			}
732 		}
733 		bap[indirs[i - 1].in_off] = nb;
734 		if (allocib == NULL && unwindidx < 0)
735 			unwindidx = i - 1;
736 		/*
737 		 * If required, write synchronously, otherwise use
738 		 * delayed write.
739 		 */
740 		if (flags & IO_SYNC) {
741 			bwrite(bp);
742 		} else {
743 			if (bp->b_bufsize == fs->fs_bsize)
744 				bp->b_flags |= B_CLUSTEROK;
745 			bdwrite(bp);
746 		}
747 	}
748 	/*
749 	 * If asked only for the indirect block, then return it.
750 	 */
751 	if (flags & BA_METAONLY) {
752 		*bpp = bp;
753 		return (0);
754 	}
755 	/*
756 	 * Get the data block, allocating if necessary.
757 	 */
758 	if (nb == 0) {
759 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
760 		error = ffs_alloc(ip,
761 		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
762 		if (error) {
763 			brelse(bp);
764 			goto fail;
765 		}
766 		nb = newb;
767 		*allocblk++ = nb;
768 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
769 		nbp->b_blkno = fsbtodb(fs, nb);
770 		if (flags & BA_CLRBUF)
771 			vfs_bio_clrbuf(nbp);
772 		if (DOINGSOFTDEP(vp))
773 			softdep_setup_allocindir_page(ip, lbn, bp,
774 			    indirs[i].in_off, nb, 0, nbp);
775 		bap[indirs[i].in_off] = nb;
776 		/*
777 		 * If required, write synchronously, otherwise use
778 		 * delayed write.
779 		 */
780 		if (flags & IO_SYNC) {
781 			bwrite(bp);
782 		} else {
783 			if (bp->b_bufsize == fs->fs_bsize)
784 				bp->b_flags |= B_CLUSTEROK;
785 			bdwrite(bp);
786 		}
787 		*bpp = nbp;
788 		return (0);
789 	}
790 	brelse(bp);
791 	if (flags & BA_CLRBUF) {
792 		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
793 		if (error) {
794 			brelse(nbp);
795 			goto fail;
796 		}
797 	} else {
798 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
799 		nbp->b_blkno = fsbtodb(fs, nb);
800 	}
801 	*bpp = nbp;
802 	return (0);
803 fail:
804 	/*
805 	 * If we have failed part way through block allocation, we
806 	 * have to deallocate any indirect blocks that we have allocated.
807 	 * We have to fsync the file before we start to get rid of all
808 	 * of its dependencies so that we do not leave them dangling.
809 	 * We have to sync it at the end so that the soft updates code
810 	 * does not find any untracked changes. Although this is really
811 	 * slow, running out of disk space is not expected to be a common
812 	 * occurence. The error return from fsync is ignored as we already
813 	 * have an error to return to the user.
814 	 */
815 	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
816 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
817 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
818 		deallocated += fs->fs_bsize;
819 	}
820 	if (allocib != NULL) {
821 		*allocib = 0;
822 	} else if (unwindidx >= 0) {
823 		int r;
824 
825 		r = bread(vp, indirs[unwindidx].in_lbn,
826 		    (int)fs->fs_bsize, NOCRED, &bp);
827 		if (r) {
828 			panic("Could not unwind indirect block, error %d", r);
829 			brelse(bp);
830 		} else {
831 			bap = (ufs2_daddr_t *)bp->b_data;
832 			bap[indirs[unwindidx].in_off] = 0;
833 			if (flags & IO_SYNC) {
834 				bwrite(bp);
835 			} else {
836 				if (bp->b_bufsize == fs->fs_bsize)
837 					bp->b_flags |= B_CLUSTEROK;
838 				bdwrite(bp);
839 			}
840 		}
841 	}
842 	if (deallocated) {
843 #ifdef QUOTA
844 		/*
845 		 * Restore user's disk quota because allocation failed.
846 		 */
847 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
848 #endif
849 		dp->di_blocks -= btodb(deallocated);
850 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
851 	}
852 	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
853 	return (error);
854 }
855