xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision f0a75d274af375d15b97b830966b99a02b7db911)
1 /*-
2  * Copyright (c) 2002 Networks Associates Technology, Inc.
3  * All rights reserved.
4  *
5  * This software was developed for the FreeBSD Project by Marshall
6  * Kirk McKusick and Network Associates Laboratories, the Security
7  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9  * research program
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * Copyright (c) 1982, 1986, 1989, 1993
33  *	The Regents of the University of California.  All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 4. Neither the name of the University nor the names of its contributors
44  *    may be used to endorse or promote products derived from this software
45  *    without specific prior written permission.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57  * SUCH DAMAGE.
58  *
59  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
60  */
61 
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
64 
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/bio.h>
68 #include <sys/buf.h>
69 #include <sys/lock.h>
70 #include <sys/mount.h>
71 #include <sys/vnode.h>
72 
73 #include <ufs/ufs/quota.h>
74 #include <ufs/ufs/inode.h>
75 #include <ufs/ufs/ufs_extern.h>
76 #include <ufs/ufs/extattr.h>
77 #include <ufs/ufs/ufsmount.h>
78 
79 #include <ufs/ffs/fs.h>
80 #include <ufs/ffs/ffs_extern.h>
81 
82 /*
83  * Balloc defines the structure of filesystem storage
84  * by allocating the physical blocks on a device given
85  * the inode and the logical block number in a file.
86  * This is the allocation strategy for UFS1. Below is
87  * the allocation strategy for UFS2.
88  */
89 int
90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
91     struct ucred *cred, int flags, struct buf **bpp)
92 {
93 	struct inode *ip;
94 	struct ufs1_dinode *dp;
95 	ufs_lbn_t lbn, lastlbn;
96 	struct fs *fs;
97 	ufs1_daddr_t nb;
98 	struct buf *bp, *nbp;
99 	struct ufsmount *ump;
100 	struct indir indirs[NIADDR + 2];
101 	int deallocated, osize, nsize, num, i, error;
102 	ufs2_daddr_t newb;
103 	ufs1_daddr_t *bap, pref;
104 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
105 	int unwindidx = -1;
106 
107 	ip = VTOI(vp);
108 	dp = ip->i_din1;
109 	fs = ip->i_fs;
110 	ump = ip->i_ump;
111 	lbn = lblkno(fs, startoffset);
112 	size = blkoff(fs, startoffset) + size;
113 	if (size > fs->fs_bsize)
114 		panic("ffs_balloc_ufs1: blk too big");
115 	*bpp = NULL;
116 	if (flags & IO_EXT)
117 		return (EOPNOTSUPP);
118 	if (lbn < 0)
119 		return (EFBIG);
120 
121 	/*
122 	 * If the next write will extend the file into a new block,
123 	 * and the file is currently composed of a fragment
124 	 * this fragment has to be extended to be a full block.
125 	 */
126 	lastlbn = lblkno(fs, ip->i_size);
127 	if (lastlbn < NDADDR && lastlbn < lbn) {
128 		nb = lastlbn;
129 		osize = blksize(fs, ip, nb);
130 		if (osize < fs->fs_bsize && osize > 0) {
131 			UFS_LOCK(ump);
132 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
133 			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
134 			   &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp);
135 			if (error)
136 				return (error);
137 			if (DOINGSOFTDEP(vp))
138 				softdep_setup_allocdirect(ip, nb,
139 				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
140 				    fs->fs_bsize, osize, bp);
141 			ip->i_size = smalllblktosize(fs, nb + 1);
142 			dp->di_size = ip->i_size;
143 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
144 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
145 			if (flags & IO_SYNC)
146 				bwrite(bp);
147 			else
148 				bawrite(bp);
149 		}
150 	}
151 	/*
152 	 * The first NDADDR blocks are direct blocks
153 	 */
154 	if (lbn < NDADDR) {
155 		if (flags & BA_METAONLY)
156 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
157 		nb = dp->di_db[lbn];
158 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
159 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
160 			if (error) {
161 				brelse(bp);
162 				return (error);
163 			}
164 			bp->b_blkno = fsbtodb(fs, nb);
165 			*bpp = bp;
166 			return (0);
167 		}
168 		if (nb != 0) {
169 			/*
170 			 * Consider need to reallocate a fragment.
171 			 */
172 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
173 			nsize = fragroundup(fs, size);
174 			if (nsize <= osize) {
175 				error = bread(vp, lbn, osize, NOCRED, &bp);
176 				if (error) {
177 					brelse(bp);
178 					return (error);
179 				}
180 				bp->b_blkno = fsbtodb(fs, nb);
181 			} else {
182 				UFS_LOCK(ump);
183 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
184 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
185 				    &dp->di_db[0]), osize, nsize, cred, &bp);
186 				if (error)
187 					return (error);
188 				if (DOINGSOFTDEP(vp))
189 					softdep_setup_allocdirect(ip, lbn,
190 					    dbtofsb(fs, bp->b_blkno), nb,
191 					    nsize, osize, bp);
192 			}
193 		} else {
194 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
195 				nsize = fragroundup(fs, size);
196 			else
197 				nsize = fs->fs_bsize;
198 			UFS_LOCK(ump);
199 			error = ffs_alloc(ip, lbn,
200 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
201 			    nsize, cred, &newb);
202 			if (error)
203 				return (error);
204 			bp = getblk(vp, lbn, nsize, 0, 0, 0);
205 			bp->b_blkno = fsbtodb(fs, newb);
206 			if (flags & BA_CLRBUF)
207 				vfs_bio_clrbuf(bp);
208 			if (DOINGSOFTDEP(vp))
209 				softdep_setup_allocdirect(ip, lbn, newb, 0,
210 				    nsize, 0, bp);
211 		}
212 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
213 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
214 		*bpp = bp;
215 		return (0);
216 	}
217 	/*
218 	 * Determine the number of levels of indirection.
219 	 */
220 	pref = 0;
221 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
222 		return(error);
223 #ifdef DIAGNOSTIC
224 	if (num < 1)
225 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
226 #endif
227 	/*
228 	 * Fetch the first indirect block allocating if necessary.
229 	 */
230 	--num;
231 	nb = dp->di_ib[indirs[0].in_off];
232 	allocib = NULL;
233 	allocblk = allociblk;
234 	if (nb == 0) {
235 		UFS_LOCK(ump);
236 		pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
237 	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
238 		    cred, &newb)) != 0)
239 			return (error);
240 		nb = newb;
241 		*allocblk++ = nb;
242 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
243 		bp->b_blkno = fsbtodb(fs, nb);
244 		vfs_bio_clrbuf(bp);
245 		if (DOINGSOFTDEP(vp)) {
246 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
247 			    newb, 0, fs->fs_bsize, 0, bp);
248 			bdwrite(bp);
249 		} else {
250 			/*
251 			 * Write synchronously so that indirect blocks
252 			 * never point at garbage.
253 			 */
254 			if (DOINGASYNC(vp))
255 				bdwrite(bp);
256 			else if ((error = bwrite(bp)) != 0)
257 				goto fail;
258 		}
259 		allocib = &dp->di_ib[indirs[0].in_off];
260 		*allocib = nb;
261 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
262 	}
263 	/*
264 	 * Fetch through the indirect blocks, allocating as necessary.
265 	 */
266 	for (i = 1;;) {
267 		error = bread(vp,
268 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
269 		if (error) {
270 			brelse(bp);
271 			goto fail;
272 		}
273 		bap = (ufs1_daddr_t *)bp->b_data;
274 		nb = bap[indirs[i].in_off];
275 		if (i == num)
276 			break;
277 		i += 1;
278 		if (nb != 0) {
279 			bqrelse(bp);
280 			continue;
281 		}
282 		UFS_LOCK(ump);
283 		if (pref == 0)
284 			pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
285 		if ((error =
286 		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
287 			brelse(bp);
288 			goto fail;
289 		}
290 		nb = newb;
291 		*allocblk++ = nb;
292 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
293 		nbp->b_blkno = fsbtodb(fs, nb);
294 		vfs_bio_clrbuf(nbp);
295 		if (DOINGSOFTDEP(vp)) {
296 			softdep_setup_allocindir_meta(nbp, ip, bp,
297 			    indirs[i - 1].in_off, nb);
298 			bdwrite(nbp);
299 		} else {
300 			/*
301 			 * Write synchronously so that indirect blocks
302 			 * never point at garbage.
303 			 */
304 			if ((error = bwrite(nbp)) != 0) {
305 				brelse(bp);
306 				goto fail;
307 			}
308 		}
309 		bap[indirs[i - 1].in_off] = nb;
310 		if (allocib == NULL && unwindidx < 0)
311 			unwindidx = i - 1;
312 		/*
313 		 * If required, write synchronously, otherwise use
314 		 * delayed write.
315 		 */
316 		if (flags & IO_SYNC) {
317 			bwrite(bp);
318 		} else {
319 			if (bp->b_bufsize == fs->fs_bsize)
320 				bp->b_flags |= B_CLUSTEROK;
321 			bdwrite(bp);
322 		}
323 	}
324 	/*
325 	 * If asked only for the indirect block, then return it.
326 	 */
327 	if (flags & BA_METAONLY) {
328 		*bpp = bp;
329 		return (0);
330 	}
331 	/*
332 	 * Get the data block, allocating if necessary.
333 	 */
334 	if (nb == 0) {
335 		UFS_LOCK(ump);
336 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
337 		error = ffs_alloc(ip,
338 		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
339 		if (error) {
340 			brelse(bp);
341 			goto fail;
342 		}
343 		nb = newb;
344 		*allocblk++ = nb;
345 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
346 		nbp->b_blkno = fsbtodb(fs, nb);
347 		if (flags & BA_CLRBUF)
348 			vfs_bio_clrbuf(nbp);
349 		if (DOINGSOFTDEP(vp))
350 			softdep_setup_allocindir_page(ip, lbn, bp,
351 			    indirs[i].in_off, nb, 0, nbp);
352 		bap[indirs[i].in_off] = nb;
353 		/*
354 		 * If required, write synchronously, otherwise use
355 		 * delayed write.
356 		 */
357 		if (flags & IO_SYNC) {
358 			bwrite(bp);
359 		} else {
360 			if (bp->b_bufsize == fs->fs_bsize)
361 				bp->b_flags |= B_CLUSTEROK;
362 			bdwrite(bp);
363 		}
364 		*bpp = nbp;
365 		return (0);
366 	}
367 	brelse(bp);
368 	if (flags & BA_CLRBUF) {
369 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
370 		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
371 			error = cluster_read(vp, ip->i_size, lbn,
372 			    (int)fs->fs_bsize, NOCRED,
373 			    MAXBSIZE, seqcount, &nbp);
374 		} else {
375 			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
376 		}
377 		if (error) {
378 			brelse(nbp);
379 			goto fail;
380 		}
381 	} else {
382 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
383 		nbp->b_blkno = fsbtodb(fs, nb);
384 	}
385 	*bpp = nbp;
386 	return (0);
387 fail:
388 	/*
389 	 * If we have failed to allocate any blocks, simply return the error.
390 	 * This is the usual case and avoids the need to fsync the file.
391 	 */
392 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
393 		return (error);
394 	/*
395 	 * If we have failed part way through block allocation, we
396 	 * have to deallocate any indirect blocks that we have allocated.
397 	 * We have to fsync the file before we start to get rid of all
398 	 * of its dependencies so that we do not leave them dangling.
399 	 * We have to sync it at the end so that the soft updates code
400 	 * does not find any untracked changes. Although this is really
401 	 * slow, running out of disk space is not expected to be a common
402 	 * occurence. The error return from fsync is ignored as we already
403 	 * have an error to return to the user.
404 	 */
405 	(void) ffs_syncvnode(vp, MNT_WAIT);
406 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
407 		ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
408 		    ip->i_number);
409 		deallocated += fs->fs_bsize;
410 	}
411 	if (allocib != NULL) {
412 		*allocib = 0;
413 	} else if (unwindidx >= 0) {
414 		int r;
415 
416 		r = bread(vp, indirs[unwindidx].in_lbn,
417 		    (int)fs->fs_bsize, NOCRED, &bp);
418 		if (r) {
419 			panic("Could not unwind indirect block, error %d", r);
420 			brelse(bp);
421 		} else {
422 			bap = (ufs1_daddr_t *)bp->b_data;
423 			bap[indirs[unwindidx].in_off] = 0;
424 			if (flags & IO_SYNC) {
425 				bwrite(bp);
426 			} else {
427 				if (bp->b_bufsize == fs->fs_bsize)
428 					bp->b_flags |= B_CLUSTEROK;
429 				bdwrite(bp);
430 			}
431 		}
432 	}
433 	if (deallocated) {
434 #ifdef QUOTA
435 		/*
436 		 * Restore user's disk quota because allocation failed.
437 		 */
438 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
439 #endif
440 		dp->di_blocks -= btodb(deallocated);
441 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
442 	}
443 	(void) ffs_syncvnode(vp, MNT_WAIT);
444 	return (error);
445 }
446 
447 /*
448  * Balloc defines the structure of file system storage
449  * by allocating the physical blocks on a device given
450  * the inode and the logical block number in a file.
451  * This is the allocation strategy for UFS2. Above is
452  * the allocation strategy for UFS1.
453  */
454 int
455 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
456     struct ucred *cred, int flags, struct buf **bpp)
457 {
458 	struct inode *ip;
459 	struct ufs2_dinode *dp;
460 	ufs_lbn_t lbn, lastlbn;
461 	struct fs *fs;
462 	struct buf *bp, *nbp;
463 	struct ufsmount *ump;
464 	struct indir indirs[NIADDR + 2];
465 	ufs2_daddr_t nb, newb, *bap, pref;
466 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
467 	int deallocated, osize, nsize, num, i, error;
468 	int unwindidx = -1;
469 
470 	ip = VTOI(vp);
471 	dp = ip->i_din2;
472 	fs = ip->i_fs;
473 	ump = ip->i_ump;
474 	lbn = lblkno(fs, startoffset);
475 	size = blkoff(fs, startoffset) + size;
476 	if (size > fs->fs_bsize)
477 		panic("ffs_balloc_ufs2: blk too big");
478 	*bpp = NULL;
479 	if (lbn < 0)
480 		return (EFBIG);
481 
482 	/*
483 	 * Check for allocating external data.
484 	 */
485 	if (flags & IO_EXT) {
486 		if (lbn >= NXADDR)
487 			return (EFBIG);
488 		/*
489 		 * If the next write will extend the data into a new block,
490 		 * and the data is currently composed of a fragment
491 		 * this fragment has to be extended to be a full block.
492 		 */
493 		lastlbn = lblkno(fs, dp->di_extsize);
494 		if (lastlbn < lbn) {
495 			nb = lastlbn;
496 			osize = sblksize(fs, dp->di_extsize, nb);
497 			if (osize < fs->fs_bsize && osize > 0) {
498 				UFS_LOCK(ump);
499 				error = ffs_realloccg(ip, -1 - nb,
500 				    dp->di_extb[nb],
501 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
502 				    &dp->di_extb[0]), osize,
503 				    (int)fs->fs_bsize, cred, &bp);
504 				if (error)
505 					return (error);
506 				if (DOINGSOFTDEP(vp))
507 					softdep_setup_allocext(ip, nb,
508 					    dbtofsb(fs, bp->b_blkno),
509 					    dp->di_extb[nb],
510 					    fs->fs_bsize, osize, bp);
511 				dp->di_extsize = smalllblktosize(fs, nb + 1);
512 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
513 				bp->b_xflags |= BX_ALTDATA;
514 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
515 				if (flags & IO_SYNC)
516 					bwrite(bp);
517 				else
518 					bawrite(bp);
519 			}
520 		}
521 		/*
522 		 * All blocks are direct blocks
523 		 */
524 		if (flags & BA_METAONLY)
525 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
526 		nb = dp->di_extb[lbn];
527 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
528 			error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
529 			if (error) {
530 				brelse(bp);
531 				return (error);
532 			}
533 			bp->b_blkno = fsbtodb(fs, nb);
534 			bp->b_xflags |= BX_ALTDATA;
535 			*bpp = bp;
536 			return (0);
537 		}
538 		if (nb != 0) {
539 			/*
540 			 * Consider need to reallocate a fragment.
541 			 */
542 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
543 			nsize = fragroundup(fs, size);
544 			if (nsize <= osize) {
545 				error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
546 				if (error) {
547 					brelse(bp);
548 					return (error);
549 				}
550 				bp->b_blkno = fsbtodb(fs, nb);
551 				bp->b_xflags |= BX_ALTDATA;
552 			} else {
553 				UFS_LOCK(ump);
554 				error = ffs_realloccg(ip, -1 - lbn,
555 				    dp->di_extb[lbn],
556 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
557 				    &dp->di_extb[0]), osize, nsize, cred, &bp);
558 				if (error)
559 					return (error);
560 				bp->b_xflags |= BX_ALTDATA;
561 				if (DOINGSOFTDEP(vp))
562 					softdep_setup_allocext(ip, lbn,
563 					    dbtofsb(fs, bp->b_blkno), nb,
564 					    nsize, osize, bp);
565 			}
566 		} else {
567 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
568 				nsize = fragroundup(fs, size);
569 			else
570 				nsize = fs->fs_bsize;
571 			UFS_LOCK(ump);
572 			error = ffs_alloc(ip, lbn,
573 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
574 			   nsize, cred, &newb);
575 			if (error)
576 				return (error);
577 			bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0);
578 			bp->b_blkno = fsbtodb(fs, newb);
579 			bp->b_xflags |= BX_ALTDATA;
580 			if (flags & BA_CLRBUF)
581 				vfs_bio_clrbuf(bp);
582 			if (DOINGSOFTDEP(vp))
583 				softdep_setup_allocext(ip, lbn, newb, 0,
584 				    nsize, 0, bp);
585 		}
586 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
587 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
588 		*bpp = bp;
589 		return (0);
590 	}
591 	/*
592 	 * If the next write will extend the file into a new block,
593 	 * and the file is currently composed of a fragment
594 	 * this fragment has to be extended to be a full block.
595 	 */
596 	lastlbn = lblkno(fs, ip->i_size);
597 	if (lastlbn < NDADDR && lastlbn < lbn) {
598 		nb = lastlbn;
599 		osize = blksize(fs, ip, nb);
600 		if (osize < fs->fs_bsize && osize > 0) {
601 			UFS_LOCK(ump);
602 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
603 				ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
604 				    &dp->di_db[0]), osize, (int)fs->fs_bsize,
605 				    cred, &bp);
606 			if (error)
607 				return (error);
608 			if (DOINGSOFTDEP(vp))
609 				softdep_setup_allocdirect(ip, nb,
610 				    dbtofsb(fs, bp->b_blkno),
611 				    dp->di_db[nb],
612 				    fs->fs_bsize, osize, bp);
613 			ip->i_size = smalllblktosize(fs, nb + 1);
614 			dp->di_size = ip->i_size;
615 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
616 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
617 			if (flags & IO_SYNC)
618 				bwrite(bp);
619 			else
620 				bawrite(bp);
621 		}
622 	}
623 	/*
624 	 * The first NDADDR blocks are direct blocks
625 	 */
626 	if (lbn < NDADDR) {
627 		if (flags & BA_METAONLY)
628 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
629 		nb = dp->di_db[lbn];
630 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
631 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
632 			if (error) {
633 				brelse(bp);
634 				return (error);
635 			}
636 			bp->b_blkno = fsbtodb(fs, nb);
637 			*bpp = bp;
638 			return (0);
639 		}
640 		if (nb != 0) {
641 			/*
642 			 * Consider need to reallocate a fragment.
643 			 */
644 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
645 			nsize = fragroundup(fs, size);
646 			if (nsize <= osize) {
647 				error = bread(vp, lbn, osize, NOCRED, &bp);
648 				if (error) {
649 					brelse(bp);
650 					return (error);
651 				}
652 				bp->b_blkno = fsbtodb(fs, nb);
653 			} else {
654 				UFS_LOCK(ump);
655 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
656 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
657 				       &dp->di_db[0]), osize, nsize, cred, &bp);
658 				if (error)
659 					return (error);
660 				if (DOINGSOFTDEP(vp))
661 					softdep_setup_allocdirect(ip, lbn,
662 					    dbtofsb(fs, bp->b_blkno), nb,
663 					    nsize, osize, bp);
664 			}
665 		} else {
666 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
667 				nsize = fragroundup(fs, size);
668 			else
669 				nsize = fs->fs_bsize;
670 			UFS_LOCK(ump);
671 			error = ffs_alloc(ip, lbn,
672 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
673 				&dp->di_db[0]), nsize, cred, &newb);
674 			if (error)
675 				return (error);
676 			bp = getblk(vp, lbn, nsize, 0, 0, 0);
677 			bp->b_blkno = fsbtodb(fs, newb);
678 			if (flags & BA_CLRBUF)
679 				vfs_bio_clrbuf(bp);
680 			if (DOINGSOFTDEP(vp))
681 				softdep_setup_allocdirect(ip, lbn, newb, 0,
682 				    nsize, 0, bp);
683 		}
684 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
685 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
686 		*bpp = bp;
687 		return (0);
688 	}
689 	/*
690 	 * Determine the number of levels of indirection.
691 	 */
692 	pref = 0;
693 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
694 		return(error);
695 #ifdef DIAGNOSTIC
696 	if (num < 1)
697 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
698 #endif
699 	/*
700 	 * Fetch the first indirect block allocating if necessary.
701 	 */
702 	--num;
703 	nb = dp->di_ib[indirs[0].in_off];
704 	allocib = NULL;
705 	allocblk = allociblk;
706 	if (nb == 0) {
707 		UFS_LOCK(ump);
708 		pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
709 	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
710 		    cred, &newb)) != 0)
711 			return (error);
712 		nb = newb;
713 		*allocblk++ = nb;
714 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
715 		bp->b_blkno = fsbtodb(fs, nb);
716 		vfs_bio_clrbuf(bp);
717 		if (DOINGSOFTDEP(vp)) {
718 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
719 			    newb, 0, fs->fs_bsize, 0, bp);
720 			bdwrite(bp);
721 		} else {
722 			/*
723 			 * Write synchronously so that indirect blocks
724 			 * never point at garbage.
725 			 */
726 			if (DOINGASYNC(vp))
727 				bdwrite(bp);
728 			else if ((error = bwrite(bp)) != 0)
729 				goto fail;
730 		}
731 		allocib = &dp->di_ib[indirs[0].in_off];
732 		*allocib = nb;
733 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
734 	}
735 	/*
736 	 * Fetch through the indirect blocks, allocating as necessary.
737 	 */
738 	for (i = 1;;) {
739 		error = bread(vp,
740 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
741 		if (error) {
742 			brelse(bp);
743 			goto fail;
744 		}
745 		bap = (ufs2_daddr_t *)bp->b_data;
746 		nb = bap[indirs[i].in_off];
747 		if (i == num)
748 			break;
749 		i += 1;
750 		if (nb != 0) {
751 			bqrelse(bp);
752 			continue;
753 		}
754 		UFS_LOCK(ump);
755 		if (pref == 0)
756 			pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
757 		if ((error =
758 		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
759 			brelse(bp);
760 			goto fail;
761 		}
762 		nb = newb;
763 		*allocblk++ = nb;
764 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
765 		nbp->b_blkno = fsbtodb(fs, nb);
766 		vfs_bio_clrbuf(nbp);
767 		if (DOINGSOFTDEP(vp)) {
768 			softdep_setup_allocindir_meta(nbp, ip, bp,
769 			    indirs[i - 1].in_off, nb);
770 			bdwrite(nbp);
771 		} else {
772 			/*
773 			 * Write synchronously so that indirect blocks
774 			 * never point at garbage.
775 			 */
776 			if ((error = bwrite(nbp)) != 0) {
777 				brelse(bp);
778 				goto fail;
779 			}
780 		}
781 		bap[indirs[i - 1].in_off] = nb;
782 		if (allocib == NULL && unwindidx < 0)
783 			unwindidx = i - 1;
784 		/*
785 		 * If required, write synchronously, otherwise use
786 		 * delayed write.
787 		 */
788 		if (flags & IO_SYNC) {
789 			bwrite(bp);
790 		} else {
791 			if (bp->b_bufsize == fs->fs_bsize)
792 				bp->b_flags |= B_CLUSTEROK;
793 			bdwrite(bp);
794 		}
795 	}
796 	/*
797 	 * If asked only for the indirect block, then return it.
798 	 */
799 	if (flags & BA_METAONLY) {
800 		*bpp = bp;
801 		return (0);
802 	}
803 	/*
804 	 * Get the data block, allocating if necessary.
805 	 */
806 	if (nb == 0) {
807 		UFS_LOCK(ump);
808 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
809 		error = ffs_alloc(ip,
810 		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
811 		if (error) {
812 			brelse(bp);
813 			goto fail;
814 		}
815 		nb = newb;
816 		*allocblk++ = nb;
817 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
818 		nbp->b_blkno = fsbtodb(fs, nb);
819 		if (flags & BA_CLRBUF)
820 			vfs_bio_clrbuf(nbp);
821 		if (DOINGSOFTDEP(vp))
822 			softdep_setup_allocindir_page(ip, lbn, bp,
823 			    indirs[i].in_off, nb, 0, nbp);
824 		bap[indirs[i].in_off] = nb;
825 		/*
826 		 * If required, write synchronously, otherwise use
827 		 * delayed write.
828 		 */
829 		if (flags & IO_SYNC) {
830 			bwrite(bp);
831 		} else {
832 			if (bp->b_bufsize == fs->fs_bsize)
833 				bp->b_flags |= B_CLUSTEROK;
834 			bdwrite(bp);
835 		}
836 		*bpp = nbp;
837 		return (0);
838 	}
839 	brelse(bp);
840 	/*
841 	 * If requested clear invalid portions of the buffer.  If we
842 	 * have to do a read-before-write (typical if BA_CLRBUF is set),
843 	 * try to do some read-ahead in the sequential case to reduce
844 	 * the number of I/O transactions.
845 	 */
846 	if (flags & BA_CLRBUF) {
847 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
848 		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
849 			error = cluster_read(vp, ip->i_size, lbn,
850 			    (int)fs->fs_bsize, NOCRED,
851 			    MAXBSIZE, seqcount, &nbp);
852 		} else {
853 			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
854 		}
855 		if (error) {
856 			brelse(nbp);
857 			goto fail;
858 		}
859 	} else {
860 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
861 		nbp->b_blkno = fsbtodb(fs, nb);
862 	}
863 	*bpp = nbp;
864 	return (0);
865 fail:
866 	/*
867 	 * If we have failed to allocate any blocks, simply return the error.
868 	 * This is the usual case and avoids the need to fsync the file.
869 	 */
870 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
871 		return (error);
872 	/*
873 	 * If we have failed part way through block allocation, we
874 	 * have to deallocate any indirect blocks that we have allocated.
875 	 * We have to fsync the file before we start to get rid of all
876 	 * of its dependencies so that we do not leave them dangling.
877 	 * We have to sync it at the end so that the soft updates code
878 	 * does not find any untracked changes. Although this is really
879 	 * slow, running out of disk space is not expected to be a common
880 	 * occurence. The error return from fsync is ignored as we already
881 	 * have an error to return to the user.
882 	 */
883 	(void) ffs_syncvnode(vp, MNT_WAIT);
884 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
885 		ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
886 		    ip->i_number);
887 		deallocated += fs->fs_bsize;
888 	}
889 	if (allocib != NULL) {
890 		*allocib = 0;
891 	} else if (unwindidx >= 0) {
892 		int r;
893 
894 		r = bread(vp, indirs[unwindidx].in_lbn,
895 		    (int)fs->fs_bsize, NOCRED, &bp);
896 		if (r) {
897 			panic("Could not unwind indirect block, error %d", r);
898 			brelse(bp);
899 		} else {
900 			bap = (ufs2_daddr_t *)bp->b_data;
901 			bap[indirs[unwindidx].in_off] = 0;
902 			if (flags & IO_SYNC) {
903 				bwrite(bp);
904 			} else {
905 				if (bp->b_bufsize == fs->fs_bsize)
906 					bp->b_flags |= B_CLUSTEROK;
907 				bdwrite(bp);
908 			}
909 		}
910 	}
911 	if (deallocated) {
912 #ifdef QUOTA
913 		/*
914 		 * Restore user's disk quota because allocation failed.
915 		 */
916 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
917 #endif
918 		dp->di_blocks -= btodb(deallocated);
919 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
920 	}
921 	(void) ffs_syncvnode(vp, MNT_WAIT);
922 	return (error);
923 }
924