xref: /freebsd/sys/fs/ext2fs/ext2_alloc.c (revision aa64588d28258aef88cc33b8043112e8856948d0)
1 /*-
2  *  modified for Lites 1.1
3  *
4  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5  *  University of Utah, Department of Computer Science
6  */
7 /*-
8  * Copyright (c) 1982, 1986, 1989, 1993
9  *	The Regents of the University of California.  All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)ffs_alloc.c	8.8 (Berkeley) 2/21/94
36  * $FreeBSD$
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/conf.h>
42 #include <sys/vnode.h>
43 #include <sys/stat.h>
44 #include <sys/mount.h>
45 #include <sys/syslog.h>
46 #include <sys/buf.h>
47 
48 #include <fs/ext2fs/inode.h>
49 #include <fs/ext2fs/ext2_mount.h>
50 #include <fs/ext2fs/ext2fs.h>
51 #include <fs/ext2fs/fs.h>
52 #include <fs/ext2fs/ext2_extern.h>
53 
54 static daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
55 static u_long	ext2_dirpref(struct inode *);
56 static void	ext2_fserr(struct m_ext2fs *, uid_t, char *);
57 static u_long	ext2_hashalloc(struct inode *, int, long, int,
58 				daddr_t (*)(struct inode *, int, daddr_t,
59 						int));
60 static daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
61 static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
62 /*
63  * Allocate a block in the file system.
64  *
65  * A preference may be optionally specified. If a preference is given
66  * the following hierarchy is used to allocate a block:
67  *   1) allocate the requested block.
68  *   2) allocate a rotationally optimal block in the same cylinder.
69  *   3) allocate a block in the same cylinder group.
70  *   4) quadradically rehash into other cylinder groups, until an
71  *        available block is located.
72  * If no block preference is given the following hierarchy is used
73  * to allocate a block:
74  *   1) allocate a block in the cylinder group that contains the
75  *        inode for the file.
76  *   2) quadradically rehash into other cylinder groups, until an
77  *        available block is located.
78  *
79  * A preference may be optionally specified. If a preference is given
80  * the following hierarchy is used to allocate a block:
81  *   1) allocate the requested block.
82  *   2) allocate a rotationally optimal block in the same cylinder.
83  *   3) allocate a block in the same cylinder group.
84  *   4) quadradically rehash into other cylinder groups, until an
85  *        available block is located.
86  * If no block preference is given the following hierarchy is used
87  * to allocate a block:
88  *   1) allocate a block in the cylinder group that contains the
89  *        inode for the file.
90  *   2) quadradically rehash into other cylinder groups, until an
91  *        available block is located.
92  */
93 
94 int
95 ext2_alloc(ip, lbn, bpref, size, cred, bnp)
96 	struct inode *ip;
97 	int32_t lbn, bpref;
98 	int size;
99 	struct ucred *cred;
100 	int32_t *bnp;
101 {
102 	struct m_ext2fs *fs;
103 	struct ext2mount *ump;
104 	int32_t bno;
105 	int cg;
106 	*bnp = 0;
107 	fs = ip->i_e2fs;
108 	ump = ip->i_ump;
109 	mtx_assert(EXT2_MTX(ump), MA_OWNED);
110 #ifdef DIAGNOSTIC
111 	if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
112 		vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
113 		    (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
114 		panic("ext2_alloc: bad size");
115 	}
116 	if (cred == NOCRED)
117 		panic("ext2_alloc: missing credential");
118 #endif /* DIAGNOSTIC */
119 	if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
120 		goto nospace;
121 	if (cred->cr_uid != 0 &&
122 		fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
123 		goto nospace;
124 	if (bpref >= fs->e2fs->e2fs_bcount)
125 		bpref = 0;
126 	 if (bpref == 0)
127                 cg = ino_to_cg(fs, ip->i_number);
128         else
129                 cg = dtog(fs, bpref);
130         bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
131                                                  ext2_alloccg);
132         if (bno > 0) {
133                 ip->i_blocks += btodb(fs->e2fs_bsize);
134                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
135                 *bnp = bno;
136                 return (0);
137         }
138 nospace:
139 	EXT2_UNLOCK(ump);
140 	ext2_fserr(fs, cred->cr_uid, "file system full");
141 	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
142 	return (ENOSPC);
143 }
144 
145 /*
146  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
147  *
148  * The vnode and an array of buffer pointers for a range of sequential
149  * logical blocks to be made contiguous is given. The allocator attempts
150  * to find a range of sequential blocks starting as close as possible to
151  * an fs_rotdelay offset from the end of the allocation for the logical
152  * block immediately preceding the current range. If successful, the
153  * physical block numbers in the buffer pointers and in the inode are
154  * changed to reflect the new allocation. If unsuccessful, the allocation
155  * is left unchanged. The success in doing the reallocation is returned.
156  * Note that the error return is not reflected back to the user. Rather
157  * the previous block allocation will be used.
158  */
159 
160 #ifdef FANCY_REALLOC
161 #include <sys/sysctl.h>
162 static int doasyncfree = 1;
163 static int doreallocblks = 1;
164 
165 #ifdef	OPT_DEBUG
166 SYSCTL_INT(_debug, 14, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
167 #endif	/* OPT_DEBUG */
168 #endif
169 
170 int
171 ext2_reallocblks(ap)
172 	struct vop_reallocblks_args /* {
173 		struct vnode *a_vp;
174 		struct cluster_save *a_buflist;
175 	} */ *ap;
176 {
177 #ifndef FANCY_REALLOC
178 /* printf("ext2_reallocblks not implemented\n"); */
179 return ENOSPC;
180 #else
181 
182 	struct m_ext2fs *fs;
183 	struct inode *ip;
184 	struct vnode *vp;
185 	struct buf *sbp, *ebp;
186 	int32_t *bap, *sbap, *ebap = 0;
187 	struct ext2mount *ump;
188 	struct cluster_save *buflist;
189 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
190 	int32_t start_lbn, end_lbn, soff, newblk, blkno =0;
191 	int i, len, start_lvl, end_lvl, pref, ssize;
192 
193 	vp = ap->a_vp;
194 	ip = VTOI(vp);
195 	fs = ip->i_e2fs;
196 	ump = ip->i_ump;
197 #ifdef UNKLAR
198 	if (fs->fs_contigsumsize <= 0)
199 		return (ENOSPC);
200 #endif
201 	buflist = ap->a_buflist;
202 	len = buflist->bs_nchildren;
203 	start_lbn = buflist->bs_children[0]->b_lblkno;
204 	end_lbn = start_lbn + len - 1;
205 #ifdef DIAGNOSTIC
206 	for (i = 1; i < len; i++)
207 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
208 			panic("ext2_reallocblks: non-cluster");
209 #endif
210 	/*
211 	 * If the latest allocation is in a new cylinder group, assume that
212 	 * the filesystem has decided to move and do not force it back to
213 	 * the previous cylinder group.
214 	 */
215 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
216 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
217 		return (ENOSPC);
218 	if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
219 	    ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
220 		return (ENOSPC);
221 	/*
222 	 * Get the starting offset and block map for the first block.
223 	 */
224 	if (start_lvl == 0) {
225 		sbap = &ip->i_db[0];
226 		soff = start_lbn;
227 	} else {
228 		idp = &start_ap[start_lvl - 1];
229 		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
230 			brelse(sbp);
231 			return (ENOSPC);
232 		}
233 		sbap = (int32_t *)sbp->b_data;
234 		soff = idp->in_off;
235 	}
236 	/*
237 	 * Find the preferred location for the cluster.
238 	 */
239 	EXT2_LOCK(ump);
240 	pref = ext2_blkpref(ip, start_lbn, soff, sbap, blkno);
241 	/*
242 	 * If the block range spans two block maps, get the second map.
243 	 */
244 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
245 		ssize = len;
246 	} else {
247 #ifdef DIAGNOSTIC
248 		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
249 			panic("ext2_reallocblk: start == end");
250 #endif
251 		ssize = len - (idp->in_off + 1);
252 		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){
253 			EXT2_UNLOCK(ump);
254 			goto fail;
255 		}
256 		ebap = (int32_t *)ebp->b_data;
257 	}
258 	/*
259 	 * Search the block map looking for an allocation of the desired size.
260 	 */
261 	if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
262 	    len, ext2_clusteralloc)) == 0){
263 		EXT2_UNLOCK(ump);
264 		goto fail;
265 	}
266 	/*
267 	 * We have found a new contiguous block.
268 	 *
269 	 * First we have to replace the old block pointers with the new
270 	 * block pointers in the inode and indirect blocks associated
271 	 * with the file.
272 	 */
273 	blkno = newblk;
274 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
275 		if (i == ssize)
276 			bap = ebap;
277 			soff = -i;
278 #ifdef DIAGNOSTIC
279 		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
280 			panic("ext2_reallocblks: alloc mismatch");
281 #endif
282 		*bap++ = blkno;
283 	}
284 	/*
285 	 * Next we must write out the modified inode and indirect blocks.
286 	 * For strict correctness, the writes should be synchronous since
287 	 * the old block values may have been written to disk. In practise
288 	 * they are almost never written, but if we are concerned about
289 	 * strict correctness, the `doasyncfree' flag should be set to zero.
290 	 *
291 	 * The test on `doasyncfree' should be changed to test a flag
292 	 * that shows whether the associated buffers and inodes have
293 	 * been written. The flag should be set when the cluster is
294 	 * started and cleared whenever the buffer or inode is flushed.
295 	 * We can then check below to see if it is set, and do the
296 	 * synchronous write only when it has been cleared.
297 	 */
298 	if (sbap != &ip->i_db[0]) {
299 		if (doasyncfree)
300 			bdwrite(sbp);
301 		else
302 			bwrite(sbp);
303 	} else {
304 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
305 		if (!doasyncfree)
306 			ext2_update(vp, 1);
307 	}
308 	if (ssize < len) {
309 		if (doasyncfree)
310 			bdwrite(ebp);
311 		else
312 			bwrite(ebp);
313 	}
314 	/*
315 	 * Last, free the old blocks and assign the new blocks to the buffers.
316 	 */
317 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
318 		ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
319 		    fs->e2fs_bsize);
320 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
321 	}
322 	return (0);
323 
324 fail:
325 	if (ssize < len)
326 		brelse(ebp);
327 	if (sbap != &ip->i_db[0])
328 		brelse(sbp);
329 	return (ENOSPC);
330 
331 #endif /* FANCY_REALLOC */
332 }
333 
334 /*
335  * Allocate an inode in the file system.
336  *
337  */
338 int
339 ext2_valloc(pvp, mode, cred, vpp)
340 	struct vnode *pvp;
341 	int mode;
342 	struct ucred *cred;
343 	struct vnode **vpp;
344 {
345 	struct inode *pip;
346 	struct m_ext2fs *fs;
347 	struct inode *ip;
348 	struct ext2mount *ump;
349 	ino_t ino, ipref;
350 	int i, error, cg;
351 
352 	*vpp = NULL;
353 	pip = VTOI(pvp);
354 	fs = pip->i_e2fs;
355 	ump = pip->i_ump;
356 
357 	EXT2_LOCK(ump);
358 	if (fs->e2fs->e2fs_ficount == 0)
359 		goto noinodes;
360 	/*
361 	 * If it is a directory then obtain a cylinder group based on
362 	 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
363 	 * always the next inode.
364 	 */
365 	if((mode & IFMT) == IFDIR) {
366 		cg = ext2_dirpref(pip);
367 		if (fs->e2fs_contigdirs[cg] < 255)
368 			fs->e2fs_contigdirs[cg]++;
369 	} else {
370 		cg = ino_to_cg(fs, pip->i_number);
371 		if (fs->e2fs_contigdirs[cg] > 0)
372 			fs->e2fs_contigdirs[cg]--;
373 	}
374 	ipref = cg * fs->e2fs->e2fs_ipg + 1;
375 	ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
376 
377 	if (ino == 0)
378 		goto noinodes;
379 	error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
380 	if (error) {
381 		ext2_vfree(pvp, ino, mode);
382 		return (error);
383 	}
384 	ip = VTOI(*vpp);
385 
386 	/*
387 	  the question is whether using VGET was such good idea at all -
388 	  Linux doesn't read the old inode in when it's allocating a
389 	  new one. I will set at least i_size & i_blocks the zero.
390 	*/
391 	ip->i_mode = 0;
392 	ip->i_size = 0;
393 	ip->i_blocks = 0;
394 	ip->i_flags = 0;
395         /* now we want to make sure that the block pointers are zeroed out */
396         for (i = 0; i < NDADDR; i++)
397                 ip->i_db[i] = 0;
398         for (i = 0; i < NIADDR; i++)
399                 ip->i_ib[i] = 0;
400 
401 	/*
402 	 * Set up a new generation number for this inode.
403 	 * XXX check if this makes sense in ext2
404 	 */
405 	if (ip->i_gen == 0 || ++ip->i_gen == 0)
406 		ip->i_gen = random() / 2 + 1;
407 /*
408 printf("ext2_valloc: allocated inode %d\n", ino);
409 */
410 	return (0);
411 noinodes:
412 	EXT2_UNLOCK(ump);
413 	ext2_fserr(fs, cred->cr_uid, "out of inodes");
414 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
415 	return (ENOSPC);
416 }
417 
418 /*
419  * Find a cylinder to place a directory.
420  *
421  * The policy implemented by this algorithm is to allocate a
422  * directory inode in the same cylinder group as its parent
423  * directory, but also to reserve space for its files inodes
424  * and data. Restrict the number of directories which may be
425  * allocated one after another in the same cylinder group
426  * without intervening allocation of files.
427  *
428  * If we allocate a first level directory then force allocation
429  * in another cylinder group.
430  *
431  */
432 static u_long
433 ext2_dirpref(struct inode *pip)
434 {
435 	struct m_ext2fs *fs;
436         int cg, prefcg, dirsize, cgsize;
437 	int avgifree, avgbfree, avgndir, curdirsize;
438 	int minifree, minbfree, maxndir;
439 	int mincg, minndir;
440 	int maxcontigdirs;
441 
442 	mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
443 	fs = pip->i_e2fs;
444 
445  	avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount;
446 	avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount;
447 	avgndir  = fs->e2fs_total_dir / fs->e2fs_gcount;
448 
449 	/*
450 	 * Force allocation in another cg if creating a first level dir.
451 	 */
452 	ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
453 	if (ITOV(pip)->v_vflag & VV_ROOT) {
454 		prefcg = arc4random() % fs->e2fs_gcount;
455 		mincg = prefcg;
456 		minndir = fs->e2fs_ipg;
457 		for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
458 			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
459 			    fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
460 			    fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
461 				mincg = cg;
462 				minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
463 			}
464 		for (cg = 0; cg < prefcg; cg++)
465 			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
466                             fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
467                             fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
468                                 mincg = cg;
469                                 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
470                         }
471 
472 		return (mincg);
473 	}
474 
475 	/*
476 	 * Count various limits which used for
477 	 * optimal allocation of a directory inode.
478 	 */
479 	maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
480 	minifree = avgifree - avgifree / 4;
481 	if (minifree < 1)
482 		minifree = 1;
483 	minbfree = avgbfree - avgbfree / 4;
484 	if (minbfree < 1)
485 		minbfree = 1;
486 	cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
487 	dirsize = AVGDIRSIZE;
488 	curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
489 	if (dirsize < curdirsize)
490 		dirsize = curdirsize;
491 	if (dirsize <= 0)
492 		maxcontigdirs = 0;		/* dirsize overflowed */
493 	else
494 		maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
495 	maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
496 	if (maxcontigdirs == 0)
497 		maxcontigdirs = 1;
498 
499 	/*
500 	 * Limit number of dirs in one cg and reserve space for
501 	 * regular files, but only if we have no deficit in
502 	 * inodes or space.
503 	 */
504 	prefcg = ino_to_cg(fs, pip->i_number);
505 	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
506 		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
507 		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
508 	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
509 			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
510 				return (cg);
511 		}
512 	for (cg = 0; cg < prefcg; cg++)
513 		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
514 		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
515 	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
516 			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
517 				return (cg);
518 		}
519 	/*
520 	 * This is a backstop when we have deficit in space.
521 	 */
522 	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
523 		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
524 			return (cg);
525 	for (cg = 0; cg < prefcg; cg++)
526 		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
527 			break;
528 	return (cg);
529 }
530 
531 /*
532  * Select the desired position for the next block in a file.
533  *
534  * we try to mimic what Remy does in inode_getblk/block_getblk
535  *
536  * we note: blocknr == 0 means that we're about to allocate either
537  * a direct block or a pointer block at the first level of indirection
538  * (In other words, stuff that will go in i_db[] or i_ib[])
539  *
540  * blocknr != 0 means that we're allocating a block that is none
541  * of the above. Then, blocknr tells us the number of the block
542  * that will hold the pointer
543  */
544 int32_t
545 ext2_blkpref(ip, lbn, indx, bap, blocknr)
546 	struct inode *ip;
547 	int32_t lbn;
548 	int indx;
549 	int32_t *bap;
550 	int32_t blocknr;
551 {
552 	int	tmp;
553 	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
554 
555 	/* if the next block is actually what we thought it is,
556 	   then set the goal to what we thought it should be
557 	*/
558 	if(ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
559 		return ip->i_next_alloc_goal;
560 
561 	/* now check whether we were provided with an array that basically
562 	   tells us previous blocks to which we want to stay closeby
563 	*/
564 	if(bap)
565                 for (tmp = indx - 1; tmp >= 0; tmp--)
566 			if (bap[tmp])
567 				return bap[tmp];
568 
569 	/* else let's fall back to the blocknr, or, if there is none,
570 	   follow the rule that a block should be allocated near its inode
571 	*/
572 	return blocknr ? blocknr :
573 			(int32_t)(ip->i_block_group *
574 			EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
575 			ip->i_e2fs->e2fs->e2fs_first_dblock;
576 }
577 
578 /*
579  * Implement the cylinder overflow algorithm.
580  *
581  * The policy implemented by this algorithm is:
582  *   1) allocate the block in its requested cylinder group.
583  *   2) quadradically rehash on the cylinder group number.
584  *   3) brute force search for a free block.
585  */
586 static u_long
587 ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
588                 daddr_t (*allocator)(struct inode *, int, daddr_t, int))
589 {
590 	struct m_ext2fs *fs;
591 	ino_t result;
592 	int i, icg = cg;
593 
594 	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
595 	fs = ip->i_e2fs;
596 	/*
597 	 * 1: preferred cylinder group
598 	 */
599 	result = (*allocator)(ip, cg, pref, size);
600 	if (result)
601 		return (result);
602 	/*
603 	 * 2: quadratic rehash
604 	 */
605 	for (i = 1; i < fs->e2fs_gcount; i *= 2) {
606 		cg += i;
607 		if (cg >= fs->e2fs_gcount)
608 			cg -= fs->e2fs_gcount;
609 		result = (*allocator)(ip, cg, 0, size);
610 		if (result)
611 			return (result);
612 	}
613 	/*
614 	 * 3: brute force search
615 	 * Note that we start at i == 2, since 0 was checked initially,
616 	 * and 1 is always checked in the quadratic rehash.
617 	 */
618 	cg = (icg + 2) % fs->e2fs_gcount;
619 	for (i = 2; i < fs->e2fs_gcount; i++) {
620 		result = (*allocator)(ip, cg, 0, size);
621 		if (result)
622 			return (result);
623 		cg++;
624 		if (cg == fs->e2fs_gcount)
625 			cg = 0;
626 	}
627 	return (0);
628 }
629 
630 /*
631  * Determine whether a block can be allocated.
632  *
633  * Check to see if a block of the appropriate size is available,
634  * and if it is, allocate it.
635  */
636 static daddr_t
637 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
638 {
639 	struct m_ext2fs *fs;
640 	struct buf *bp;
641 	struct ext2mount *ump;
642 	int error, bno, start, end, loc;
643 	char *bbp;
644 	/* XXX ondisk32 */
645 	fs = ip->i_e2fs;
646 	ump = ip->i_ump;
647 	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
648 		return (0);
649 	EXT2_UNLOCK(ump);
650 	error = bread(ip->i_devvp, fsbtodb(fs,
651 		fs->e2fs_gd[cg].ext2bgd_b_bitmap),
652 		(int)fs->e2fs_bsize, NOCRED, &bp);
653 	if (error) {
654 		brelse(bp);
655 		EXT2_LOCK(ump);
656 		return (0);
657 	}
658 	bbp = (char *)bp->b_data;
659 
660 	if (dtog(fs, bpref) != cg)
661 		bpref = 0;
662 	if (bpref != 0) {
663 		bpref = dtogd(fs, bpref);
664 		/*
665 		 * if the requested block is available, use it
666 		 */
667 		if (isclr(bbp, bpref)) {
668 			bno = bpref;
669 			goto gotit;
670 		}
671 	}
672 	/*
673 	 * no blocks in the requested cylinder, so take next
674 	 * available one in this cylinder group.
675 	 * first try to get 8 contigous blocks, then fall back to a single
676 	 * block.
677 	 */
678 	if (bpref)
679 		start = dtogd(fs, bpref) / NBBY;
680 	else
681 		start = 0;
682 	end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
683 	for (loc = start; loc < end; loc++) {
684 		if (bbp[loc] == 0) {
685 			bno = loc * NBBY;
686 			goto gotit;
687 		}
688 	}
689 	for (loc = 0; loc < start; loc++) {
690 		if (bbp[loc] == 0) {
691 			bno = loc * NBBY;
692 			goto gotit;
693 		}
694 	}
695 
696 	bno = ext2_mapsearch(fs, bbp, bpref);
697 	if (bno < 0){
698 		brelse(bp);
699 		EXT2_LOCK(ump);
700 		return (0);
701 	}
702 gotit:
703 #ifdef DIAGNOSTIC
704 	if (isset(bbp, (daddr_t)bno)) {
705 		printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
706 			cg, bno, fs->e2fs_fsmnt);
707 		panic("ext2fs_alloccg: dup alloc");
708 	}
709 #endif
710 	setbit(bbp, (daddr_t)bno);
711 	EXT2_LOCK(ump);
712 	fs->e2fs->e2fs_fbcount--;
713 	fs->e2fs_gd[cg].ext2bgd_nbfree--;
714 	fs->e2fs_fmod = 1;
715 	EXT2_UNLOCK(ump);
716 	bdwrite(bp);
717 	return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
718 }
719 
720 /*
721  * Determine whether an inode can be allocated.
722  *
723  * Check to see if an inode is available, and if it is,
724  * allocate it using tode in the specified cylinder group.
725  */
726 static daddr_t
727 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
728 {
729 	struct m_ext2fs *fs;
730 	struct buf *bp;
731 	struct ext2mount *ump;
732 	int error, start, len, loc, map, i;
733 	char *ibp;
734 	ipref--; /* to avoid a lot of (ipref -1) */
735 	if (ipref == -1)
736 		ipref = 0;
737 	fs = ip->i_e2fs;
738 	ump = ip->i_ump;
739 	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
740 		return (0);
741 	EXT2_UNLOCK(ump);
742 	error = bread(ip->i_devvp, fsbtodb(fs,
743 		fs->e2fs_gd[cg].ext2bgd_i_bitmap),
744 		(int)fs->e2fs_bsize, NOCRED, &bp);
745 	if (error) {
746 		brelse(bp);
747 		EXT2_LOCK(ump);
748 		return (0);
749 	}
750 	ibp = (char *)bp->b_data;
751 	if (ipref) {
752 		ipref %= fs->e2fs->e2fs_ipg;
753 		if (isclr(ibp, ipref))
754 			goto gotit;
755 	}
756 	start = ipref / NBBY;
757 	len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY);
758 	loc = skpc(0xff, len, &ibp[start]);
759 	if (loc == 0) {
760 		len = start + 1;
761 		start = 0;
762 		loc = skpc(0xff, len, &ibp[0]);
763 		if (loc == 0) {
764 			printf("cg = %d, ipref = %lld, fs = %s\n",
765 				cg, (long long)ipref, fs->e2fs_fsmnt);
766 			panic("ext2fs_nodealloccg: map corrupted");
767 			/* NOTREACHED */
768 		}
769 	}
770 	i = start + len - loc;
771 	map = ibp[i];
772 	ipref = i * NBBY;
773 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
774 		if ((map & i) == 0) {
775 			goto gotit;
776 		}
777 	}
778 	printf("fs = %s\n", fs->e2fs_fsmnt);
779 	panic("ext2fs_nodealloccg: block not in map");
780 	/* NOTREACHED */
781 gotit:
782 	setbit(ibp, ipref);
783 	EXT2_LOCK(ump);
784 	fs->e2fs_gd[cg].ext2bgd_nifree--;
785 	fs->e2fs->e2fs_ficount--;
786 	fs->e2fs_fmod = 1;
787 	if ((mode & IFMT) == IFDIR) {
788 		fs->e2fs_gd[cg].ext2bgd_ndirs++;
789 		fs->e2fs_total_dir++;
790 	}
791 	EXT2_UNLOCK(ump);
792 	bdwrite(bp);
793 	return (cg * fs->e2fs->e2fs_ipg + ipref +1);
794 }
795 
796 /*
797  * Free a block or fragment.
798  *
799  */
800 void
801 ext2_blkfree(ip, bno, size)
802 	struct inode *ip;
803 	int32_t bno;
804 	long size;
805 {
806 	struct m_ext2fs *fs;
807 	struct buf *bp;
808 	struct ext2mount *ump;
809 	int cg, error;
810 	char *bbp;
811 
812 	fs = ip->i_e2fs;
813 	ump = ip->i_ump;
814 	cg = dtog(fs, bno);
815 	if ((u_int)bno >= fs->e2fs->e2fs_bcount) {
816                 printf("bad block %lld, ino %llu\n", (long long)bno,
817                     (unsigned long long)ip->i_number);
818                 ext2_fserr(fs, ip->i_uid, "bad block");
819                 return;
820         }
821         error = bread(ip->i_devvp,
822                 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
823                 (int)fs->e2fs_bsize, NOCRED, &bp);
824         if (error) {
825                 brelse(bp);
826                 return;
827         }
828         bbp = (char *)bp->b_data;
829         bno = dtogd(fs, bno);
830         if (isclr(bbp, bno)) {
831                 printf("block = %lld, fs = %s\n",
832                      (long long)bno, fs->e2fs_fsmnt);
833                 panic("blkfree: freeing free block");
834         }
835         clrbit(bbp, bno);
836 	EXT2_LOCK(ump);
837         fs->e2fs->e2fs_fbcount++;
838         fs->e2fs_gd[cg].ext2bgd_nbfree++;
839         fs->e2fs_fmod = 1;
840 	EXT2_UNLOCK(ump);
841         bdwrite(bp);
842 }
843 
844 /*
845  * Free an inode.
846  *
847  */
848 int
849 ext2_vfree(pvp, ino, mode)
850 	struct vnode *pvp;
851 	ino_t ino;
852 	int mode;
853 {
854 	struct m_ext2fs *fs;
855 	struct inode *pip;
856 	struct buf *bp;
857 	struct ext2mount *ump;
858 	int error, cg;
859 	char * ibp;
860 /*	mode_t save_i_mode; */
861 
862 	pip = VTOI(pvp);
863 	fs = pip->i_e2fs;
864 	ump = pip->i_ump;
865 	if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
866 		panic("ext2_vfree: range: devvp = %p, ino = %d, fs = %s",
867 		    pip->i_devvp, ino, fs->e2fs_fsmnt);
868 
869 	cg = ino_to_cg(fs, ino);
870 	error = bread(pip->i_devvp,
871 		fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
872 		(int)fs->e2fs_bsize, NOCRED, &bp);
873 	if (error) {
874 		brelse(bp);
875 		return (0);
876 	}
877 	ibp = (char *)bp->b_data;
878 	ino = (ino - 1) % fs->e2fs->e2fs_ipg;
879 	if (isclr(ibp, ino)) {
880 		printf("ino = %llu, fs = %s\n",
881 			 (unsigned long long)ino, fs->e2fs_fsmnt);
882 		if (fs->e2fs_ronly == 0)
883 			panic("ifree: freeing free inode");
884 	}
885 	clrbit(ibp, ino);
886 	EXT2_LOCK(ump);
887 	fs->e2fs->e2fs_ficount++;
888 	fs->e2fs_gd[cg].ext2bgd_nifree++;
889 	if ((mode & IFMT) == IFDIR) {
890 		fs->e2fs_gd[cg].ext2bgd_ndirs--;
891 		fs->e2fs_total_dir--;
892 	}
893 	fs->e2fs_fmod = 1;
894 	EXT2_UNLOCK(ump);
895 	bdwrite(bp);
896 	return (0);
897 }
898 
899 /*
900  * Find a block in the specified cylinder group.
901  *
902  * It is a panic if a request is made to find a block if none are
903  * available.
904  */
905 static daddr_t
906 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
907 {
908 	daddr_t bno;
909 	int start, len, loc, i, map;
910 
911 	/*
912 	 * find the fragment by searching through the free block
913 	 * map for an appropriate bit pattern
914 	 */
915 	if (bpref)
916 		start = dtogd(fs, bpref) / NBBY;
917 	else
918 		start = 0;
919 	len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
920 	loc = skpc(0xff, len, &bbp[start]);
921 	if (loc == 0) {
922 		len = start + 1;
923 		start = 0;
924 		loc = skpc(0xff, len, &bbp[start]);
925 		if (loc == 0) {
926 			printf("start = %d, len = %d, fs = %s\n",
927 				start, len, fs->e2fs_fsmnt);
928 			panic("ext2fs_alloccg: map corrupted");
929 			/* NOTREACHED */
930 		}
931 	}
932 	i = start + len - loc;
933 	map = bbp[i];
934 	bno = i * NBBY;
935 	for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
936 		if ((map & i) == 0)
937 			return (bno);
938 	}
939 	printf("fs = %s\n", fs->e2fs_fsmnt);
940 	panic("ext2fs_mapsearch: block not in map");
941 	/* NOTREACHED */
942 }
943 
944 /*
945  * Fserr prints the name of a file system with an error diagnostic.
946  *
947  * The form of the error message is:
948  *	fs: error message
949  */
950 static void
951 ext2_fserr(fs, uid, cp)
952 	struct m_ext2fs *fs;
953 	uid_t uid;
954 	char *cp;
955 {
956 
957 	log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
958 }
959 
960 int
961 cg_has_sb(int i)
962 {
963         int a3, a5, a7;
964 
965         if (i == 0 || i == 1)
966                 return 1;
967         for (a3 = 3, a5 = 5, a7 = 7;
968             a3 <= i || a5 <= i || a7 <= i;
969             a3 *= 3, a5 *= 5, a7 *= 7)
970                 if (i == a3 || i == a5 || i == a7)
971                         return 1;
972         return 0;
973 }
974