xref: /freebsd/sys/fs/ext2fs/ext2_alloc.c (revision 5a0bba9007c527b18db7f9b64f06b486cda4fe9d)
1 /*-
2  *  modified for Lites 1.1
3  *
4  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5  *  University of Utah, Department of Computer Science
6  */
7 /*-
8  * Copyright (c) 1982, 1986, 1989, 1993
9  *	The Regents of the University of California.  All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)ffs_alloc.c	8.8 (Berkeley) 2/21/94
36  * $FreeBSD$
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/conf.h>
42 #include <sys/vnode.h>
43 #include <sys/stat.h>
44 #include <sys/mount.h>
45 #include <sys/syslog.h>
46 #include <sys/buf.h>
47 
48 #include <fs/ext2fs/inode.h>
49 #include <fs/ext2fs/ext2_mount.h>
50 #include <fs/ext2fs/ext2fs.h>
51 #include <fs/ext2fs/fs.h>
52 #include <fs/ext2fs/ext2_extern.h>
53 
54 static daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
55 static u_long	ext2_dirpref(struct inode *);
56 static void	ext2_fserr(struct m_ext2fs *, uid_t, char *);
57 static u_long	ext2_hashalloc(struct inode *, int, long, int,
58 				daddr_t (*)(struct inode *, int, daddr_t,
59 						int));
60 static daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
61 static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
62 #ifdef FANCY_REALLOC
63 static int	ext2_reallocblks(struct vop_reallocblks_args *);
64 #endif
65 
66 /*
67  * Allocate a block in the file system.
68  *
69  * A preference may be optionally specified. If a preference is given
70  * the following hierarchy is used to allocate a block:
71  *   1) allocate the requested block.
72  *   2) allocate a rotationally optimal block in the same cylinder.
73  *   3) allocate a block in the same cylinder group.
74  *   4) quadradically rehash into other cylinder groups, until an
75  *        available block is located.
76  * If no block preference is given the following hierarchy is used
77  * to allocate a block:
78  *   1) allocate a block in the cylinder group that contains the
79  *        inode for the file.
80  *   2) quadradically rehash into other cylinder groups, until an
81  *        available block is located.
82  */
83 int
84 ext2_alloc(ip, lbn, bpref, size, cred, bnp)
85 	struct inode *ip;
86 	int32_t lbn, bpref;
87 	int size;
88 	struct ucred *cred;
89 	int32_t *bnp;
90 {
91 	struct m_ext2fs *fs;
92 	struct ext2mount *ump;
93 	int32_t bno;
94 	int cg;
95 	*bnp = 0;
96 	fs = ip->i_e2fs;
97 	ump = ip->i_ump;
98 	mtx_assert(EXT2_MTX(ump), MA_OWNED);
99 #ifdef DIAGNOSTIC
100 	if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
101 		vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
102 		    (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
103 		panic("ext2_alloc: bad size");
104 	}
105 	if (cred == NOCRED)
106 		panic("ext2_alloc: missing credential");
107 #endif /* DIAGNOSTIC */
108 	if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
109 		goto nospace;
110 	if (cred->cr_uid != 0 &&
111 		fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
112 		goto nospace;
113 	if (bpref >= fs->e2fs->e2fs_bcount)
114 		bpref = 0;
115 	if (bpref == 0)
116                 cg = ino_to_cg(fs, ip->i_number);
117         else
118                 cg = dtog(fs, bpref);
119         bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
120                                                  ext2_alloccg);
121         if (bno > 0) {
122 		/* set next_alloc fields as done in block_getblk */
123 		ip->i_next_alloc_block = lbn;
124 		ip->i_next_alloc_goal = bno;
125 
126                 ip->i_blocks += btodb(fs->e2fs_bsize);
127                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
128                 *bnp = bno;
129                 return (0);
130         }
131 nospace:
132 	EXT2_UNLOCK(ump);
133 	ext2_fserr(fs, cred->cr_uid, "file system full");
134 	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
135 	return (ENOSPC);
136 }
137 
138 /*
139  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
140  *
141  * The vnode and an array of buffer pointers for a range of sequential
142  * logical blocks to be made contiguous is given. The allocator attempts
143  * to find a range of sequential blocks starting as close as possible to
144  * an fs_rotdelay offset from the end of the allocation for the logical
145  * block immediately preceding the current range. If successful, the
146  * physical block numbers in the buffer pointers and in the inode are
147  * changed to reflect the new allocation. If unsuccessful, the allocation
148  * is left unchanged. The success in doing the reallocation is returned.
149  * Note that the error return is not reflected back to the user. Rather
150  * the previous block allocation will be used.
151  */
152 
153 #ifdef FANCY_REALLOC
154 SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem");
155 
156 static int doasyncfree = 1;
157 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
158     "Use asychronous writes to update block pointers when freeing blocks");
159 
160 static int doreallocblks = 1;
161 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
162 #endif
163 
164 int
165 ext2_reallocblks(ap)
166 	struct vop_reallocblks_args /* {
167 		struct vnode *a_vp;
168 		struct cluster_save *a_buflist;
169 	} */ *ap;
170 {
171 #ifndef FANCY_REALLOC
172 /* printf("ext2_reallocblks not implemented\n"); */
173 return ENOSPC;
174 #else
175 
176 	struct m_ext2fs *fs;
177 	struct inode *ip;
178 	struct vnode *vp;
179 	struct buf *sbp, *ebp;
180 	int32_t *bap, *sbap, *ebap = 0;
181 	struct ext2mount *ump;
182 	struct cluster_save *buflist;
183 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
184 	int32_t start_lbn, end_lbn, soff, newblk, blkno =0;
185 	int i, len, start_lvl, end_lvl, pref, ssize;
186 
187 	vp = ap->a_vp;
188 	ip = VTOI(vp);
189 	fs = ip->i_e2fs;
190 	ump = ip->i_ump;
191 #ifdef UNKLAR
192 	if (fs->fs_contigsumsize <= 0)
193 		return (ENOSPC);
194 #endif
195 	buflist = ap->a_buflist;
196 	len = buflist->bs_nchildren;
197 	start_lbn = buflist->bs_children[0]->b_lblkno;
198 	end_lbn = start_lbn + len - 1;
199 #ifdef DIAGNOSTIC
200 	for (i = 1; i < len; i++)
201 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
202 			panic("ext2_reallocblks: non-cluster");
203 #endif
204 	/*
205 	 * If the latest allocation is in a new cylinder group, assume that
206 	 * the filesystem has decided to move and do not force it back to
207 	 * the previous cylinder group.
208 	 */
209 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
210 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
211 		return (ENOSPC);
212 	if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
213 	    ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
214 		return (ENOSPC);
215 	/*
216 	 * Get the starting offset and block map for the first block.
217 	 */
218 	if (start_lvl == 0) {
219 		sbap = &ip->i_db[0];
220 		soff = start_lbn;
221 	} else {
222 		idp = &start_ap[start_lvl - 1];
223 		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
224 			brelse(sbp);
225 			return (ENOSPC);
226 		}
227 		sbap = (int32_t *)sbp->b_data;
228 		soff = idp->in_off;
229 	}
230 	/*
231 	 * Find the preferred location for the cluster.
232 	 */
233 	EXT2_LOCK(ump);
234 	pref = ext2_blkpref(ip, start_lbn, soff, sbap, blkno);
235 	/*
236 	 * If the block range spans two block maps, get the second map.
237 	 */
238 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
239 		ssize = len;
240 	} else {
241 #ifdef DIAGNOSTIC
242 		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
243 			panic("ext2_reallocblk: start == end");
244 #endif
245 		ssize = len - (idp->in_off + 1);
246 		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){
247 			EXT2_UNLOCK(ump);
248 			goto fail;
249 		}
250 		ebap = (int32_t *)ebp->b_data;
251 	}
252 	/*
253 	 * Search the block map looking for an allocation of the desired size.
254 	 */
255 	if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
256 	    len, ext2_clusteralloc)) == 0){
257 		EXT2_UNLOCK(ump);
258 		goto fail;
259 	}
260 	/*
261 	 * We have found a new contiguous block.
262 	 *
263 	 * First we have to replace the old block pointers with the new
264 	 * block pointers in the inode and indirect blocks associated
265 	 * with the file.
266 	 */
267 	blkno = newblk;
268 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
269 		if (i == ssize)
270 			bap = ebap;
271 			soff = -i;
272 #ifdef DIAGNOSTIC
273 		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
274 			panic("ext2_reallocblks: alloc mismatch");
275 #endif
276 		*bap++ = blkno;
277 	}
278 	/*
279 	 * Next we must write out the modified inode and indirect blocks.
280 	 * For strict correctness, the writes should be synchronous since
281 	 * the old block values may have been written to disk. In practise
282 	 * they are almost never written, but if we are concerned about
283 	 * strict correctness, the `doasyncfree' flag should be set to zero.
284 	 *
285 	 * The test on `doasyncfree' should be changed to test a flag
286 	 * that shows whether the associated buffers and inodes have
287 	 * been written. The flag should be set when the cluster is
288 	 * started and cleared whenever the buffer or inode is flushed.
289 	 * We can then check below to see if it is set, and do the
290 	 * synchronous write only when it has been cleared.
291 	 */
292 	if (sbap != &ip->i_db[0]) {
293 		if (doasyncfree)
294 			bdwrite(sbp);
295 		else
296 			bwrite(sbp);
297 	} else {
298 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
299 		if (!doasyncfree)
300 			ext2_update(vp, 1);
301 	}
302 	if (ssize < len) {
303 		if (doasyncfree)
304 			bdwrite(ebp);
305 		else
306 			bwrite(ebp);
307 	}
308 	/*
309 	 * Last, free the old blocks and assign the new blocks to the buffers.
310 	 */
311 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
312 		ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
313 		    fs->e2fs_bsize);
314 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
315 	}
316 	return (0);
317 
318 fail:
319 	if (ssize < len)
320 		brelse(ebp);
321 	if (sbap != &ip->i_db[0])
322 		brelse(sbp);
323 	return (ENOSPC);
324 
325 #endif /* FANCY_REALLOC */
326 }
327 
328 /*
329  * Allocate an inode in the file system.
330  *
331  */
332 int
333 ext2_valloc(pvp, mode, cred, vpp)
334 	struct vnode *pvp;
335 	int mode;
336 	struct ucred *cred;
337 	struct vnode **vpp;
338 {
339 	struct inode *pip;
340 	struct m_ext2fs *fs;
341 	struct inode *ip;
342 	struct ext2mount *ump;
343 	ino_t ino, ipref;
344 	int i, error, cg;
345 
346 	*vpp = NULL;
347 	pip = VTOI(pvp);
348 	fs = pip->i_e2fs;
349 	ump = pip->i_ump;
350 
351 	EXT2_LOCK(ump);
352 	if (fs->e2fs->e2fs_ficount == 0)
353 		goto noinodes;
354 	/*
355 	 * If it is a directory then obtain a cylinder group based on
356 	 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
357 	 * always the next inode.
358 	 */
359 	if((mode & IFMT) == IFDIR) {
360 		cg = ext2_dirpref(pip);
361 		if (fs->e2fs_contigdirs[cg] < 255)
362 			fs->e2fs_contigdirs[cg]++;
363 	} else {
364 		cg = ino_to_cg(fs, pip->i_number);
365 		if (fs->e2fs_contigdirs[cg] > 0)
366 			fs->e2fs_contigdirs[cg]--;
367 	}
368 	ipref = cg * fs->e2fs->e2fs_ipg + 1;
369 	ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
370 
371 	if (ino == 0)
372 		goto noinodes;
373 	error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
374 	if (error) {
375 		ext2_vfree(pvp, ino, mode);
376 		return (error);
377 	}
378 	ip = VTOI(*vpp);
379 
380 	/*
381 	  the question is whether using VGET was such good idea at all -
382 	  Linux doesn't read the old inode in when it's allocating a
383 	  new one. I will set at least i_size & i_blocks the zero.
384 	*/
385 	ip->i_mode = 0;
386 	ip->i_size = 0;
387 	ip->i_blocks = 0;
388 	ip->i_flags = 0;
389         /* now we want to make sure that the block pointers are zeroed out */
390         for (i = 0; i < NDADDR; i++)
391                 ip->i_db[i] = 0;
392         for (i = 0; i < NIADDR; i++)
393                 ip->i_ib[i] = 0;
394 
395 	/*
396 	 * Set up a new generation number for this inode.
397 	 * XXX check if this makes sense in ext2
398 	 */
399 	if (ip->i_gen == 0 || ++ip->i_gen == 0)
400 		ip->i_gen = random() / 2 + 1;
401 /*
402 printf("ext2_valloc: allocated inode %d\n", ino);
403 */
404 	return (0);
405 noinodes:
406 	EXT2_UNLOCK(ump);
407 	ext2_fserr(fs, cred->cr_uid, "out of inodes");
408 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
409 	return (ENOSPC);
410 }
411 
412 /*
413  * Find a cylinder to place a directory.
414  *
415  * The policy implemented by this algorithm is to allocate a
416  * directory inode in the same cylinder group as its parent
417  * directory, but also to reserve space for its files inodes
418  * and data. Restrict the number of directories which may be
419  * allocated one after another in the same cylinder group
420  * without intervening allocation of files.
421  *
422  * If we allocate a first level directory then force allocation
423  * in another cylinder group.
424  *
425  */
426 static u_long
427 ext2_dirpref(struct inode *pip)
428 {
429 	struct m_ext2fs *fs;
430         int cg, prefcg, dirsize, cgsize;
431 	int avgifree, avgbfree, avgndir, curdirsize;
432 	int minifree, minbfree, maxndir;
433 	int mincg, minndir;
434 	int maxcontigdirs;
435 
436 	mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
437 	fs = pip->i_e2fs;
438 
439  	avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount;
440 	avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount;
441 	avgndir  = fs->e2fs_total_dir / fs->e2fs_gcount;
442 
443 	/*
444 	 * Force allocation in another cg if creating a first level dir.
445 	 */
446 	ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
447 	if (ITOV(pip)->v_vflag & VV_ROOT) {
448 		prefcg = arc4random() % fs->e2fs_gcount;
449 		mincg = prefcg;
450 		minndir = fs->e2fs_ipg;
451 		for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
452 			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
453 			    fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
454 			    fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
455 				mincg = cg;
456 				minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
457 			}
458 		for (cg = 0; cg < prefcg; cg++)
459 			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
460                             fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
461                             fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
462                                 mincg = cg;
463                                 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
464                         }
465 
466 		return (mincg);
467 	}
468 
469 	/*
470 	 * Count various limits which used for
471 	 * optimal allocation of a directory inode.
472 	 */
473 	maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
474 	minifree = avgifree - avgifree / 4;
475 	if (minifree < 1)
476 		minifree = 1;
477 	minbfree = avgbfree - avgbfree / 4;
478 	if (minbfree < 1)
479 		minbfree = 1;
480 	cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
481 	dirsize = AVGDIRSIZE;
482 	curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
483 	if (dirsize < curdirsize)
484 		dirsize = curdirsize;
485 	if (dirsize <= 0)
486 		maxcontigdirs = 0;		/* dirsize overflowed */
487 	else
488 		maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
489 	maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
490 	if (maxcontigdirs == 0)
491 		maxcontigdirs = 1;
492 
493 	/*
494 	 * Limit number of dirs in one cg and reserve space for
495 	 * regular files, but only if we have no deficit in
496 	 * inodes or space.
497 	 */
498 	prefcg = ino_to_cg(fs, pip->i_number);
499 	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
500 		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
501 		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
502 	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
503 			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
504 				return (cg);
505 		}
506 	for (cg = 0; cg < prefcg; cg++)
507 		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
508 		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
509 	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
510 			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
511 				return (cg);
512 		}
513 	/*
514 	 * This is a backstop when we have deficit in space.
515 	 */
516 	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
517 		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
518 			return (cg);
519 	for (cg = 0; cg < prefcg; cg++)
520 		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
521 			break;
522 	return (cg);
523 }
524 
525 /*
526  * Select the desired position for the next block in a file.
527  *
528  * we try to mimic what Remy does in inode_getblk/block_getblk
529  *
530  * we note: blocknr == 0 means that we're about to allocate either
531  * a direct block or a pointer block at the first level of indirection
532  * (In other words, stuff that will go in i_db[] or i_ib[])
533  *
534  * blocknr != 0 means that we're allocating a block that is none
535  * of the above. Then, blocknr tells us the number of the block
536  * that will hold the pointer
537  */
538 int32_t
539 ext2_blkpref(ip, lbn, indx, bap, blocknr)
540 	struct inode *ip;
541 	int32_t lbn;
542 	int indx;
543 	int32_t *bap;
544 	int32_t blocknr;
545 {
546 	int	tmp;
547 	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
548 
549 	/* if the next block is actually what we thought it is,
550 	   then set the goal to what we thought it should be
551 	*/
552 	if(ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
553 		return ip->i_next_alloc_goal;
554 
555 	/* now check whether we were provided with an array that basically
556 	   tells us previous blocks to which we want to stay closeby
557 	*/
558 	if(bap)
559                 for (tmp = indx - 1; tmp >= 0; tmp--)
560 			if (bap[tmp])
561 				return bap[tmp];
562 
563 	/* else let's fall back to the blocknr, or, if there is none,
564 	   follow the rule that a block should be allocated near its inode
565 	*/
566 	return blocknr ? blocknr :
567 			(int32_t)(ip->i_block_group *
568 			EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
569 			ip->i_e2fs->e2fs->e2fs_first_dblock;
570 }
571 
572 /*
573  * Implement the cylinder overflow algorithm.
574  *
575  * The policy implemented by this algorithm is:
576  *   1) allocate the block in its requested cylinder group.
577  *   2) quadradically rehash on the cylinder group number.
578  *   3) brute force search for a free block.
579  */
580 static u_long
581 ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
582                 daddr_t (*allocator)(struct inode *, int, daddr_t, int))
583 {
584 	struct m_ext2fs *fs;
585 	ino_t result;
586 	int i, icg = cg;
587 
588 	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
589 	fs = ip->i_e2fs;
590 	/*
591 	 * 1: preferred cylinder group
592 	 */
593 	result = (*allocator)(ip, cg, pref, size);
594 	if (result)
595 		return (result);
596 	/*
597 	 * 2: quadratic rehash
598 	 */
599 	for (i = 1; i < fs->e2fs_gcount; i *= 2) {
600 		cg += i;
601 		if (cg >= fs->e2fs_gcount)
602 			cg -= fs->e2fs_gcount;
603 		result = (*allocator)(ip, cg, 0, size);
604 		if (result)
605 			return (result);
606 	}
607 	/*
608 	 * 3: brute force search
609 	 * Note that we start at i == 2, since 0 was checked initially,
610 	 * and 1 is always checked in the quadratic rehash.
611 	 */
612 	cg = (icg + 2) % fs->e2fs_gcount;
613 	for (i = 2; i < fs->e2fs_gcount; i++) {
614 		result = (*allocator)(ip, cg, 0, size);
615 		if (result)
616 			return (result);
617 		cg++;
618 		if (cg == fs->e2fs_gcount)
619 			cg = 0;
620 	}
621 	return (0);
622 }
623 
624 /*
625  * Determine whether a block can be allocated.
626  *
627  * Check to see if a block of the appropriate size is available,
628  * and if it is, allocate it.
629  */
630 static daddr_t
631 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
632 {
633 	struct m_ext2fs *fs;
634 	struct buf *bp;
635 	struct ext2mount *ump;
636 	daddr_t bno, runstart, runlen;
637 	int bit, loc, end, error, start;
638 	char *bbp;
639 	/* XXX ondisk32 */
640 	fs = ip->i_e2fs;
641 	ump = ip->i_ump;
642 	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
643 		return (0);
644 	EXT2_UNLOCK(ump);
645 	error = bread(ip->i_devvp, fsbtodb(fs,
646 		fs->e2fs_gd[cg].ext2bgd_b_bitmap),
647 		(int)fs->e2fs_bsize, NOCRED, &bp);
648 	if (error) {
649 		brelse(bp);
650 		EXT2_LOCK(ump);
651 		return (0);
652 	}
653 	bbp = (char *)bp->b_data;
654 
655 	if (dtog(fs, bpref) != cg)
656 		bpref = 0;
657 	if (bpref != 0) {
658 		bpref = dtogd(fs, bpref);
659 		/*
660 		 * if the requested block is available, use it
661 		 */
662 		if (isclr(bbp, bpref)) {
663 			bno = bpref;
664 			goto gotit;
665 		}
666 	}
667 	/*
668 	 * no blocks in the requested cylinder, so take next
669 	 * available one in this cylinder group.
670 	 * first try to get 8 contigous blocks, then fall back to a single
671 	 * block.
672 	 */
673 	if (bpref)
674 		start = dtogd(fs, bpref) / NBBY;
675 	else
676 		start = 0;
677 	end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
678 retry:
679 	runlen = 0;
680 	runstart = 0;
681 	for (loc = start; loc < end; loc++) {
682 		if (bbp[loc] == (char)0xff) {
683 			runlen = 0;
684 			continue;
685 		}
686 
687 		/* Start of a run, find the number of high clear bits. */
688 		if (runlen == 0) {
689 			bit = fls(bbp[loc]);
690 			runlen = NBBY - bit;
691 			runstart = loc * NBBY + bit;
692 		} else if (bbp[loc] == 0) {
693 			/* Continue a run. */
694 			runlen += NBBY;
695 		} else {
696 			/*
697 			 * Finish the current run.  If it isn't long
698 			 * enough, start a new one.
699 			 */
700 			bit = ffs(bbp[loc]) - 1;
701 			runlen += bit;
702 			if (runlen >= 8) {
703 				bno = runstart;
704 				goto gotit;
705 			}
706 
707 			/* Run was too short, start a new one. */
708 			bit = fls(bbp[loc]);
709 			runlen = NBBY - bit;
710 			runstart = loc * NBBY + bit;
711 		}
712 
713 		/* If the current run is long enough, use it. */
714 		if (runlen >= 8) {
715 			bno = runstart;
716 			goto gotit;
717 		}
718 	}
719 	if (start != 0) {
720 		end = start;
721 		start = 0;
722 		goto retry;
723 	}
724 
725 	bno = ext2_mapsearch(fs, bbp, bpref);
726 	if (bno < 0){
727 		brelse(bp);
728 		EXT2_LOCK(ump);
729 		return (0);
730 	}
731 gotit:
732 #ifdef DIAGNOSTIC
733 	if (isset(bbp, bno)) {
734 		printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n",
735 			cg, (intmax_t)bno, fs->e2fs_fsmnt);
736 		panic("ext2fs_alloccg: dup alloc");
737 	}
738 #endif
739 	setbit(bbp, bno);
740 	EXT2_LOCK(ump);
741 	fs->e2fs->e2fs_fbcount--;
742 	fs->e2fs_gd[cg].ext2bgd_nbfree--;
743 	fs->e2fs_fmod = 1;
744 	EXT2_UNLOCK(ump);
745 	bdwrite(bp);
746 	return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
747 }
748 
749 /*
750  * Determine whether an inode can be allocated.
751  *
752  * Check to see if an inode is available, and if it is,
753  * allocate it using tode in the specified cylinder group.
754  */
755 static daddr_t
756 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
757 {
758 	struct m_ext2fs *fs;
759 	struct buf *bp;
760 	struct ext2mount *ump;
761 	int error, start, len, loc, map, i;
762 	char *ibp;
763 	ipref--; /* to avoid a lot of (ipref -1) */
764 	if (ipref == -1)
765 		ipref = 0;
766 	fs = ip->i_e2fs;
767 	ump = ip->i_ump;
768 	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
769 		return (0);
770 	EXT2_UNLOCK(ump);
771 	error = bread(ip->i_devvp, fsbtodb(fs,
772 		fs->e2fs_gd[cg].ext2bgd_i_bitmap),
773 		(int)fs->e2fs_bsize, NOCRED, &bp);
774 	if (error) {
775 		brelse(bp);
776 		EXT2_LOCK(ump);
777 		return (0);
778 	}
779 	ibp = (char *)bp->b_data;
780 	if (ipref) {
781 		ipref %= fs->e2fs->e2fs_ipg;
782 		if (isclr(ibp, ipref))
783 			goto gotit;
784 	}
785 	start = ipref / NBBY;
786 	len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY);
787 	loc = skpc(0xff, len, &ibp[start]);
788 	if (loc == 0) {
789 		len = start + 1;
790 		start = 0;
791 		loc = skpc(0xff, len, &ibp[0]);
792 		if (loc == 0) {
793 			printf("cg = %d, ipref = %lld, fs = %s\n",
794 				cg, (long long)ipref, fs->e2fs_fsmnt);
795 			panic("ext2fs_nodealloccg: map corrupted");
796 			/* NOTREACHED */
797 		}
798 	}
799 	i = start + len - loc;
800 	map = ibp[i];
801 	ipref = i * NBBY;
802 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
803 		if ((map & i) == 0) {
804 			goto gotit;
805 		}
806 	}
807 	printf("fs = %s\n", fs->e2fs_fsmnt);
808 	panic("ext2fs_nodealloccg: block not in map");
809 	/* NOTREACHED */
810 gotit:
811 	setbit(ibp, ipref);
812 	EXT2_LOCK(ump);
813 	fs->e2fs_gd[cg].ext2bgd_nifree--;
814 	fs->e2fs->e2fs_ficount--;
815 	fs->e2fs_fmod = 1;
816 	if ((mode & IFMT) == IFDIR) {
817 		fs->e2fs_gd[cg].ext2bgd_ndirs++;
818 		fs->e2fs_total_dir++;
819 	}
820 	EXT2_UNLOCK(ump);
821 	bdwrite(bp);
822 	return (cg * fs->e2fs->e2fs_ipg + ipref +1);
823 }
824 
825 /*
826  * Free a block or fragment.
827  *
828  */
829 void
830 ext2_blkfree(ip, bno, size)
831 	struct inode *ip;
832 	int32_t bno;
833 	long size;
834 {
835 	struct m_ext2fs *fs;
836 	struct buf *bp;
837 	struct ext2mount *ump;
838 	int cg, error;
839 	char *bbp;
840 
841 	fs = ip->i_e2fs;
842 	ump = ip->i_ump;
843 	cg = dtog(fs, bno);
844 	if ((u_int)bno >= fs->e2fs->e2fs_bcount) {
845                 printf("bad block %lld, ino %llu\n", (long long)bno,
846                     (unsigned long long)ip->i_number);
847                 ext2_fserr(fs, ip->i_uid, "bad block");
848                 return;
849         }
850         error = bread(ip->i_devvp,
851                 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
852                 (int)fs->e2fs_bsize, NOCRED, &bp);
853         if (error) {
854                 brelse(bp);
855                 return;
856         }
857         bbp = (char *)bp->b_data;
858         bno = dtogd(fs, bno);
859         if (isclr(bbp, bno)) {
860                 printf("block = %lld, fs = %s\n",
861                      (long long)bno, fs->e2fs_fsmnt);
862                 panic("blkfree: freeing free block");
863         }
864         clrbit(bbp, bno);
865 	EXT2_LOCK(ump);
866         fs->e2fs->e2fs_fbcount++;
867         fs->e2fs_gd[cg].ext2bgd_nbfree++;
868         fs->e2fs_fmod = 1;
869 	EXT2_UNLOCK(ump);
870         bdwrite(bp);
871 }
872 
873 /*
874  * Free an inode.
875  *
876  */
877 int
878 ext2_vfree(pvp, ino, mode)
879 	struct vnode *pvp;
880 	ino_t ino;
881 	int mode;
882 {
883 	struct m_ext2fs *fs;
884 	struct inode *pip;
885 	struct buf *bp;
886 	struct ext2mount *ump;
887 	int error, cg;
888 	char * ibp;
889 /*	mode_t save_i_mode; */
890 
891 	pip = VTOI(pvp);
892 	fs = pip->i_e2fs;
893 	ump = pip->i_ump;
894 	if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
895 		panic("ext2_vfree: range: devvp = %p, ino = %d, fs = %s",
896 		    pip->i_devvp, ino, fs->e2fs_fsmnt);
897 
898 	cg = ino_to_cg(fs, ino);
899 	error = bread(pip->i_devvp,
900 		fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
901 		(int)fs->e2fs_bsize, NOCRED, &bp);
902 	if (error) {
903 		brelse(bp);
904 		return (0);
905 	}
906 	ibp = (char *)bp->b_data;
907 	ino = (ino - 1) % fs->e2fs->e2fs_ipg;
908 	if (isclr(ibp, ino)) {
909 		printf("ino = %llu, fs = %s\n",
910 			 (unsigned long long)ino, fs->e2fs_fsmnt);
911 		if (fs->e2fs_ronly == 0)
912 			panic("ifree: freeing free inode");
913 	}
914 	clrbit(ibp, ino);
915 	EXT2_LOCK(ump);
916 	fs->e2fs->e2fs_ficount++;
917 	fs->e2fs_gd[cg].ext2bgd_nifree++;
918 	if ((mode & IFMT) == IFDIR) {
919 		fs->e2fs_gd[cg].ext2bgd_ndirs--;
920 		fs->e2fs_total_dir--;
921 	}
922 	fs->e2fs_fmod = 1;
923 	EXT2_UNLOCK(ump);
924 	bdwrite(bp);
925 	return (0);
926 }
927 
928 /*
929  * Find a block in the specified cylinder group.
930  *
931  * It is a panic if a request is made to find a block if none are
932  * available.
933  */
934 static daddr_t
935 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
936 {
937 	daddr_t bno;
938 	int start, len, loc, i, map;
939 
940 	/*
941 	 * find the fragment by searching through the free block
942 	 * map for an appropriate bit pattern
943 	 */
944 	if (bpref)
945 		start = dtogd(fs, bpref) / NBBY;
946 	else
947 		start = 0;
948 	len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
949 	loc = skpc(0xff, len, &bbp[start]);
950 	if (loc == 0) {
951 		len = start + 1;
952 		start = 0;
953 		loc = skpc(0xff, len, &bbp[start]);
954 		if (loc == 0) {
955 			printf("start = %d, len = %d, fs = %s\n",
956 				start, len, fs->e2fs_fsmnt);
957 			panic("ext2fs_alloccg: map corrupted");
958 			/* NOTREACHED */
959 		}
960 	}
961 	i = start + len - loc;
962 	map = bbp[i];
963 	bno = i * NBBY;
964 	for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
965 		if ((map & i) == 0)
966 			return (bno);
967 	}
968 	printf("fs = %s\n", fs->e2fs_fsmnt);
969 	panic("ext2fs_mapsearch: block not in map");
970 	/* NOTREACHED */
971 }
972 
973 /*
974  * Fserr prints the name of a file system with an error diagnostic.
975  *
976  * The form of the error message is:
977  *	fs: error message
978  */
979 static void
980 ext2_fserr(fs, uid, cp)
981 	struct m_ext2fs *fs;
982 	uid_t uid;
983 	char *cp;
984 {
985 
986 	log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
987 }
988 
989 int
990 cg_has_sb(int i)
991 {
992         int a3, a5, a7;
993 
994         if (i == 0 || i == 1)
995                 return 1;
996         for (a3 = 3, a5 = 5, a7 = 7;
997             a3 <= i || a5 <= i || a7 <= i;
998             a3 *= 3, a5 *= 5, a7 *= 7)
999                 if (i == a3 || i == a5 || i == a7)
1000                         return 1;
1001         return 0;
1002 }
1003