xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision 5521ff5a4d1929056e7ffc982fac3341ca54df7c)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
34  * $FreeBSD$
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/bio.h>
40 #include <sys/buf.h>
41 #include <sys/lock.h>
42 #include <sys/mount.h>
43 #include <sys/vnode.h>
44 
45 #include <ufs/ufs/quota.h>
46 #include <ufs/ufs/inode.h>
47 #include <ufs/ufs/ufs_extern.h>
48 
49 #include <ufs/ffs/fs.h>
50 #include <ufs/ffs/ffs_extern.h>
51 
52 /*
53  * Balloc defines the structure of file system storage
54  * by allocating the physical blocks on a device given
55  * the inode and the logical block number in a file.
56  */
57 int
58 ffs_balloc(struct vnode *a_vp, off_t a_startoffset, int a_size,
59     struct ucred *a_cred, int a_flags, struct buf **a_bpp)
60 {
61 	struct inode *ip;
62 	ufs_daddr_t lbn;
63 	int size;
64 	struct ucred *cred;
65 	int flags;
66 	struct fs *fs;
67 	ufs_daddr_t nb;
68 	struct buf *bp, *nbp;
69 	struct vnode *vp;
70 	struct indir indirs[NIADDR + 2];
71 	ufs_daddr_t newb, *bap, pref;
72 	int deallocated, osize, nsize, num, i, error;
73 	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
74 	int unwindidx = -1;
75 	struct proc *p = curproc;	/* XXX */
76 
77 	vp = a_vp;
78 	ip = VTOI(vp);
79 	fs = ip->i_fs;
80 	lbn = lblkno(fs, a_startoffset);
81 	size = blkoff(fs, a_startoffset) + a_size;
82 	if (size > fs->fs_bsize)
83 		panic("ffs_balloc: blk too big");
84 	*a_bpp = NULL;
85 	if (lbn < 0)
86 		return (EFBIG);
87 	cred = a_cred;
88 	flags = a_flags;
89 
90 	/*
91 	 * If the next write will extend the file into a new block,
92 	 * and the file is currently composed of a fragment
93 	 * this fragment has to be extended to be a full block.
94 	 */
95 	nb = lblkno(fs, ip->i_size);
96 	if (nb < NDADDR && nb < lbn) {
97 		osize = blksize(fs, ip, nb);
98 		if (osize < fs->fs_bsize && osize > 0) {
99 			error = ffs_realloccg(ip, nb,
100 				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
101 				osize, (int)fs->fs_bsize, cred, &bp);
102 			if (error)
103 				return (error);
104 			if (DOINGSOFTDEP(vp))
105 				softdep_setup_allocdirect(ip, nb,
106 				    dbtofsb(fs, bp->b_blkno), ip->i_db[nb],
107 				    fs->fs_bsize, osize, bp);
108 			ip->i_size = smalllblktosize(fs, nb + 1);
109 			ip->i_db[nb] = dbtofsb(fs, bp->b_blkno);
110 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
111 			if (flags & B_SYNC)
112 				bwrite(bp);
113 			else
114 				bawrite(bp);
115 		}
116 	}
117 	/*
118 	 * The first NDADDR blocks are direct blocks
119 	 */
120 	if (lbn < NDADDR) {
121 		if (flags & B_METAONLY)
122 			panic("ffs_balloc: B_METAONLY for direct block");
123 		nb = ip->i_db[lbn];
124 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
125 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
126 			if (error) {
127 				brelse(bp);
128 				return (error);
129 			}
130 			bp->b_blkno = fsbtodb(fs, nb);
131 			*a_bpp = bp;
132 			return (0);
133 		}
134 		if (nb != 0) {
135 			/*
136 			 * Consider need to reallocate a fragment.
137 			 */
138 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
139 			nsize = fragroundup(fs, size);
140 			if (nsize <= osize) {
141 				error = bread(vp, lbn, osize, NOCRED, &bp);
142 				if (error) {
143 					brelse(bp);
144 					return (error);
145 				}
146 				bp->b_blkno = fsbtodb(fs, nb);
147 			} else {
148 				error = ffs_realloccg(ip, lbn,
149 				    ffs_blkpref(ip, lbn, (int)lbn,
150 					&ip->i_db[0]), osize, nsize, cred, &bp);
151 				if (error)
152 					return (error);
153 				if (DOINGSOFTDEP(vp))
154 					softdep_setup_allocdirect(ip, lbn,
155 					    dbtofsb(fs, bp->b_blkno), nb,
156 					    nsize, osize, bp);
157 			}
158 		} else {
159 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
160 				nsize = fragroundup(fs, size);
161 			else
162 				nsize = fs->fs_bsize;
163 			error = ffs_alloc(ip, lbn,
164 			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
165 			    nsize, cred, &newb);
166 			if (error)
167 				return (error);
168 			bp = getblk(vp, lbn, nsize, 0, 0);
169 			bp->b_blkno = fsbtodb(fs, newb);
170 			if (flags & B_CLRBUF)
171 				vfs_bio_clrbuf(bp);
172 			if (DOINGSOFTDEP(vp))
173 				softdep_setup_allocdirect(ip, lbn, newb, 0,
174 				    nsize, 0, bp);
175 		}
176 		ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno);
177 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
178 		*a_bpp = bp;
179 		return (0);
180 	}
181 	/*
182 	 * Determine the number of levels of indirection.
183 	 */
184 	pref = 0;
185 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
186 		return(error);
187 #ifdef DIAGNOSTIC
188 	if (num < 1)
189 		panic ("ffs_balloc: ufs_bmaparray returned indirect block");
190 #endif
191 	/*
192 	 * Fetch the first indirect block allocating if necessary.
193 	 */
194 	--num;
195 	nb = ip->i_ib[indirs[0].in_off];
196 	allocib = NULL;
197 	allocblk = allociblk;
198 	if (nb == 0) {
199 		pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
200 	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
201 		    cred, &newb)) != 0)
202 			return (error);
203 		nb = newb;
204 		*allocblk++ = nb;
205 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
206 		bp->b_blkno = fsbtodb(fs, nb);
207 		vfs_bio_clrbuf(bp);
208 		if (DOINGSOFTDEP(vp)) {
209 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
210 			    newb, 0, fs->fs_bsize, 0, bp);
211 			bdwrite(bp);
212 		} else {
213 			/*
214 			 * Write synchronously so that indirect blocks
215 			 * never point at garbage.
216 			 */
217 			if (DOINGASYNC(vp))
218 				bdwrite(bp);
219 			else if ((error = bwrite(bp)) != 0)
220 				goto fail;
221 		}
222 		allocib = &ip->i_ib[indirs[0].in_off];
223 		*allocib = nb;
224 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
225 	}
226 	/*
227 	 * Fetch through the indirect blocks, allocating as necessary.
228 	 */
229 	for (i = 1;;) {
230 		error = bread(vp,
231 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
232 		if (error) {
233 			brelse(bp);
234 			goto fail;
235 		}
236 		bap = (ufs_daddr_t *)bp->b_data;
237 		nb = bap[indirs[i].in_off];
238 		if (i == num)
239 			break;
240 		i += 1;
241 		if (nb != 0) {
242 			bqrelse(bp);
243 			continue;
244 		}
245 		if (pref == 0)
246 			pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
247 		if ((error =
248 		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
249 			brelse(bp);
250 			goto fail;
251 		}
252 		nb = newb;
253 		*allocblk++ = nb;
254 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
255 		nbp->b_blkno = fsbtodb(fs, nb);
256 		vfs_bio_clrbuf(nbp);
257 		if (DOINGSOFTDEP(vp)) {
258 			softdep_setup_allocindir_meta(nbp, ip, bp,
259 			    indirs[i - 1].in_off, nb);
260 			bdwrite(nbp);
261 		} else {
262 			/*
263 			 * Write synchronously so that indirect blocks
264 			 * never point at garbage.
265 			 */
266 			if ((error = bwrite(nbp)) != 0) {
267 				brelse(bp);
268 				goto fail;
269 			}
270 		}
271 		bap[indirs[i - 1].in_off] = nb;
272 		if (allocib == NULL && unwindidx < 0)
273 			unwindidx = i - 1;
274 		/*
275 		 * If required, write synchronously, otherwise use
276 		 * delayed write.
277 		 */
278 		if (flags & B_SYNC) {
279 			bwrite(bp);
280 		} else {
281 			if (bp->b_bufsize == fs->fs_bsize)
282 				bp->b_flags |= B_CLUSTEROK;
283 			bdwrite(bp);
284 		}
285 	}
286 	/*
287 	 * If asked only for the indirect block, then return it.
288 	 */
289 	if (flags & B_METAONLY) {
290 		*a_bpp = bp;
291 		return (0);
292 	}
293 	/*
294 	 * Get the data block, allocating if necessary.
295 	 */
296 	if (nb == 0) {
297 		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
298 		error = ffs_alloc(ip,
299 		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
300 		if (error) {
301 			brelse(bp);
302 			goto fail;
303 		}
304 		nb = newb;
305 		*allocblk++ = nb;
306 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
307 		nbp->b_blkno = fsbtodb(fs, nb);
308 		if (flags & B_CLRBUF)
309 			vfs_bio_clrbuf(nbp);
310 		if (DOINGSOFTDEP(vp))
311 			softdep_setup_allocindir_page(ip, lbn, bp,
312 			    indirs[i].in_off, nb, 0, nbp);
313 		bap[indirs[i].in_off] = nb;
314 		/*
315 		 * If required, write synchronously, otherwise use
316 		 * delayed write.
317 		 */
318 		if (flags & B_SYNC) {
319 			bwrite(bp);
320 		} else {
321 			if (bp->b_bufsize == fs->fs_bsize)
322 				bp->b_flags |= B_CLUSTEROK;
323 			bdwrite(bp);
324 		}
325 		*a_bpp = nbp;
326 		return (0);
327 	}
328 	brelse(bp);
329 	if (flags & B_CLRBUF) {
330 		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
331 		if (error) {
332 			brelse(nbp);
333 			goto fail;
334 		}
335 	} else {
336 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
337 		nbp->b_blkno = fsbtodb(fs, nb);
338 	}
339 	*a_bpp = nbp;
340 	return (0);
341 fail:
342 	/*
343 	 * If we have failed part way through block allocation, we
344 	 * have to deallocate any indirect blocks that we have allocated.
345 	 * We have to fsync the file before we start to get rid of all
346 	 * of its dependencies so that we do not leave them dangling.
347 	 * We have to sync it at the end so that the soft updates code
348 	 * does not find any untracked changes. Although this is really
349 	 * slow, running out of disk space is not expected to be a common
350 	 * occurence. The error return from fsync is ignored as we already
351 	 * have an error to return to the user.
352 	 */
353 	(void) VOP_FSYNC(vp, cred, MNT_WAIT, p);
354 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
355 		ffs_blkfree(ip, *blkp, fs->fs_bsize);
356 		deallocated += fs->fs_bsize;
357 	}
358 	if (allocib != NULL) {
359 		*allocib = 0;
360 	} else if (unwindidx >= 0) {
361 		int r;
362 
363 		r = bread(vp, indirs[unwindidx].in_lbn,
364 		    (int)fs->fs_bsize, NOCRED, &bp);
365 		if (r) {
366 			panic("Could not unwind indirect block, error %d", r);
367 			brelse(bp);
368 		} else {
369 			bap = (ufs_daddr_t *)bp->b_data;
370 			bap[indirs[unwindidx].in_off] = 0;
371 			if (flags & B_SYNC) {
372 				bwrite(bp);
373 			} else {
374 				if (bp->b_bufsize == fs->fs_bsize)
375 					bp->b_flags |= B_CLUSTEROK;
376 				bdwrite(bp);
377 			}
378 		}
379 	}
380 	if (deallocated) {
381 #ifdef QUOTA
382 		/*
383 		 * Restore user's disk quota because allocation failed.
384 		 */
385 		(void) chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
386 #endif
387 		ip->i_blocks -= btodb(deallocated);
388 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
389 	}
390 	(void) VOP_FSYNC(vp, cred, MNT_WAIT, p);
391 	return (error);
392 }
393