xref: /freebsd/sys/fs/ext2fs/ext2_alloc.c (revision 8f861da99cb9865b2f1ef6098ad074150f368c23)
1 /*-
2  *  modified for Lites 1.1
3  *
4  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5  *  University of Utah, Department of Computer Science
6  */
7 /*-
8  * Copyright (c) 1982, 1986, 1989, 1993
9  *	The Regents of the University of California.  All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)ffs_alloc.c	8.8 (Berkeley) 2/21/94
36  * $FreeBSD$
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/conf.h>
42 #include <sys/vnode.h>
43 #include <sys/stat.h>
44 #include <sys/mount.h>
45 #include <sys/syslog.h>
46 #include <sys/buf.h>
47 
48 #include <fs/ext2fs/inode.h>
49 #include <fs/ext2fs/ext2_mount.h>
50 #include <fs/ext2fs/ext2fs.h>
51 #include <fs/ext2fs/fs.h>
52 #include <fs/ext2fs/ext2_extern.h>
53 
54 static daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
55 static u_long	ext2_dirpref(struct inode *);
56 static void	ext2_fserr(struct m_ext2fs *, uid_t, char *);
57 static u_long	ext2_hashalloc(struct inode *, int, long, int,
58 				daddr_t (*)(struct inode *, int, daddr_t,
59 						int));
60 static daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
61 static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
62 /*
63  * Allocate a block in the file system.
64  *
65  * A preference may be optionally specified. If a preference is given
66  * the following hierarchy is used to allocate a block:
67  *   1) allocate the requested block.
68  *   2) allocate a rotationally optimal block in the same cylinder.
69  *   3) allocate a block in the same cylinder group.
70  *   4) quadradically rehash into other cylinder groups, until an
71  *        available block is located.
72  * If no block preference is given the following hierarchy is used
73  * to allocate a block:
74  *   1) allocate a block in the cylinder group that contains the
75  *        inode for the file.
76  *   2) quadradically rehash into other cylinder groups, until an
77  *        available block is located.
78  */
79 
80 int
81 ext2_alloc(ip, lbn, bpref, size, cred, bnp)
82 	struct inode *ip;
83 	int32_t lbn, bpref;
84 	int size;
85 	struct ucred *cred;
86 	int32_t *bnp;
87 {
88 	struct m_ext2fs *fs;
89 	struct ext2mount *ump;
90 	int32_t bno;
91 	int cg;
92 	*bnp = 0;
93 	fs = ip->i_e2fs;
94 	ump = ip->i_ump;
95 	mtx_assert(EXT2_MTX(ump), MA_OWNED);
96 #ifdef DIAGNOSTIC
97 	if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
98 		vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
99 		    (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
100 		panic("ext2_alloc: bad size");
101 	}
102 	if (cred == NOCRED)
103 		panic("ext2_alloc: missing credential");
104 #endif /* DIAGNOSTIC */
105 	if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
106 		goto nospace;
107 	if (cred->cr_uid != 0 &&
108 		fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
109 		goto nospace;
110 	if (bpref >= fs->e2fs->e2fs_bcount)
111 		bpref = 0;
112 	 if (bpref == 0)
113                 cg = ino_to_cg(fs, ip->i_number);
114         else
115                 cg = dtog(fs, bpref);
116         bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
117                                                  ext2_alloccg);
118         if (bno > 0) {
119                 ip->i_blocks += btodb(fs->e2fs_bsize);
120                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
121                 *bnp = bno;
122                 return (0);
123         }
124 nospace:
125 	EXT2_UNLOCK(ump);
126 	ext2_fserr(fs, cred->cr_uid, "file system full");
127 	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
128 	return (ENOSPC);
129 }
130 
131 /*
132  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
133  *
134  * The vnode and an array of buffer pointers for a range of sequential
135  * logical blocks to be made contiguous is given. The allocator attempts
136  * to find a range of sequential blocks starting as close as possible to
137  * an fs_rotdelay offset from the end of the allocation for the logical
138  * block immediately preceding the current range. If successful, the
139  * physical block numbers in the buffer pointers and in the inode are
140  * changed to reflect the new allocation. If unsuccessful, the allocation
141  * is left unchanged. The success in doing the reallocation is returned.
142  * Note that the error return is not reflected back to the user. Rather
143  * the previous block allocation will be used.
144  */
145 
146 #ifdef FANCY_REALLOC
147 #include <sys/sysctl.h>
148 static int doasyncfree = 1;
149 static int doreallocblks = 1;
150 
151 #ifdef	OPT_DEBUG
152 SYSCTL_INT(_debug, 14, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
153 #endif	/* OPT_DEBUG */
154 #endif
155 
156 int
157 ext2_reallocblks(ap)
158 	struct vop_reallocblks_args /* {
159 		struct vnode *a_vp;
160 		struct cluster_save *a_buflist;
161 	} */ *ap;
162 {
163 #ifndef FANCY_REALLOC
164 /* printf("ext2_reallocblks not implemented\n"); */
165 return ENOSPC;
166 #else
167 
168 	struct m_ext2fs *fs;
169 	struct inode *ip;
170 	struct vnode *vp;
171 	struct buf *sbp, *ebp;
172 	int32_t *bap, *sbap, *ebap = 0;
173 	struct ext2mount *ump;
174 	struct cluster_save *buflist;
175 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
176 	int32_t start_lbn, end_lbn, soff, newblk, blkno =0;
177 	int i, len, start_lvl, end_lvl, pref, ssize;
178 
179 	vp = ap->a_vp;
180 	ip = VTOI(vp);
181 	fs = ip->i_e2fs;
182 	ump = ip->i_ump;
183 #ifdef UNKLAR
184 	if (fs->fs_contigsumsize <= 0)
185 		return (ENOSPC);
186 #endif
187 	buflist = ap->a_buflist;
188 	len = buflist->bs_nchildren;
189 	start_lbn = buflist->bs_children[0]->b_lblkno;
190 	end_lbn = start_lbn + len - 1;
191 #ifdef DIAGNOSTIC
192 	for (i = 1; i < len; i++)
193 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
194 			panic("ext2_reallocblks: non-cluster");
195 #endif
196 	/*
197 	 * If the latest allocation is in a new cylinder group, assume that
198 	 * the filesystem has decided to move and do not force it back to
199 	 * the previous cylinder group.
200 	 */
201 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
202 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
203 		return (ENOSPC);
204 	if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
205 	    ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
206 		return (ENOSPC);
207 	/*
208 	 * Get the starting offset and block map for the first block.
209 	 */
210 	if (start_lvl == 0) {
211 		sbap = &ip->i_db[0];
212 		soff = start_lbn;
213 	} else {
214 		idp = &start_ap[start_lvl - 1];
215 		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
216 			brelse(sbp);
217 			return (ENOSPC);
218 		}
219 		sbap = (int32_t *)sbp->b_data;
220 		soff = idp->in_off;
221 	}
222 	/*
223 	 * Find the preferred location for the cluster.
224 	 */
225 	EXT2_LOCK(ump);
226 	pref = ext2_blkpref(ip, start_lbn, soff, sbap, blkno);
227 	/*
228 	 * If the block range spans two block maps, get the second map.
229 	 */
230 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
231 		ssize = len;
232 	} else {
233 #ifdef DIAGNOSTIC
234 		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
235 			panic("ext2_reallocblk: start == end");
236 #endif
237 		ssize = len - (idp->in_off + 1);
238 		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){
239 			EXT2_UNLOCK(ump);
240 			goto fail;
241 		}
242 		ebap = (int32_t *)ebp->b_data;
243 	}
244 	/*
245 	 * Search the block map looking for an allocation of the desired size.
246 	 */
247 	if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
248 	    len, ext2_clusteralloc)) == 0){
249 		EXT2_UNLOCK(ump);
250 		goto fail;
251 	}
252 	/*
253 	 * We have found a new contiguous block.
254 	 *
255 	 * First we have to replace the old block pointers with the new
256 	 * block pointers in the inode and indirect blocks associated
257 	 * with the file.
258 	 */
259 	blkno = newblk;
260 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
261 		if (i == ssize)
262 			bap = ebap;
263 			soff = -i;
264 #ifdef DIAGNOSTIC
265 		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
266 			panic("ext2_reallocblks: alloc mismatch");
267 #endif
268 		*bap++ = blkno;
269 	}
270 	/*
271 	 * Next we must write out the modified inode and indirect blocks.
272 	 * For strict correctness, the writes should be synchronous since
273 	 * the old block values may have been written to disk. In practise
274 	 * they are almost never written, but if we are concerned about
275 	 * strict correctness, the `doasyncfree' flag should be set to zero.
276 	 *
277 	 * The test on `doasyncfree' should be changed to test a flag
278 	 * that shows whether the associated buffers and inodes have
279 	 * been written. The flag should be set when the cluster is
280 	 * started and cleared whenever the buffer or inode is flushed.
281 	 * We can then check below to see if it is set, and do the
282 	 * synchronous write only when it has been cleared.
283 	 */
284 	if (sbap != &ip->i_db[0]) {
285 		if (doasyncfree)
286 			bdwrite(sbp);
287 		else
288 			bwrite(sbp);
289 	} else {
290 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
291 		if (!doasyncfree)
292 			ext2_update(vp, 1);
293 	}
294 	if (ssize < len) {
295 		if (doasyncfree)
296 			bdwrite(ebp);
297 		else
298 			bwrite(ebp);
299 	}
300 	/*
301 	 * Last, free the old blocks and assign the new blocks to the buffers.
302 	 */
303 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
304 		ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
305 		    fs->e2fs_bsize);
306 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
307 	}
308 	return (0);
309 
310 fail:
311 	if (ssize < len)
312 		brelse(ebp);
313 	if (sbap != &ip->i_db[0])
314 		brelse(sbp);
315 	return (ENOSPC);
316 
317 #endif /* FANCY_REALLOC */
318 }
319 
320 /*
321  * Allocate an inode in the file system.
322  *
323  */
324 int
325 ext2_valloc(pvp, mode, cred, vpp)
326 	struct vnode *pvp;
327 	int mode;
328 	struct ucred *cred;
329 	struct vnode **vpp;
330 {
331 	struct inode *pip;
332 	struct m_ext2fs *fs;
333 	struct inode *ip;
334 	struct ext2mount *ump;
335 	ino_t ino, ipref;
336 	int i, error, cg;
337 
338 	*vpp = NULL;
339 	pip = VTOI(pvp);
340 	fs = pip->i_e2fs;
341 	ump = pip->i_ump;
342 
343 	EXT2_LOCK(ump);
344 	if (fs->e2fs->e2fs_ficount == 0)
345 		goto noinodes;
346 	/*
347 	 * If it is a directory then obtain a cylinder group based on
348 	 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
349 	 * always the next inode.
350 	 */
351 	if((mode & IFMT) == IFDIR) {
352 		cg = ext2_dirpref(pip);
353 		if (fs->e2fs_contigdirs[cg] < 255)
354 			fs->e2fs_contigdirs[cg]++;
355 	} else {
356 		cg = ino_to_cg(fs, pip->i_number);
357 		if (fs->e2fs_contigdirs[cg] > 0)
358 			fs->e2fs_contigdirs[cg]--;
359 	}
360 	ipref = cg * fs->e2fs->e2fs_ipg + 1;
361 	ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
362 
363 	if (ino == 0)
364 		goto noinodes;
365 	error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
366 	if (error) {
367 		ext2_vfree(pvp, ino, mode);
368 		return (error);
369 	}
370 	ip = VTOI(*vpp);
371 
372 	/*
373 	  the question is whether using VGET was such good idea at all -
374 	  Linux doesn't read the old inode in when it's allocating a
375 	  new one. I will set at least i_size & i_blocks the zero.
376 	*/
377 	ip->i_mode = 0;
378 	ip->i_size = 0;
379 	ip->i_blocks = 0;
380 	ip->i_flags = 0;
381         /* now we want to make sure that the block pointers are zeroed out */
382         for (i = 0; i < NDADDR; i++)
383                 ip->i_db[i] = 0;
384         for (i = 0; i < NIADDR; i++)
385                 ip->i_ib[i] = 0;
386 
387 	/*
388 	 * Set up a new generation number for this inode.
389 	 * XXX check if this makes sense in ext2
390 	 */
391 	if (ip->i_gen == 0 || ++ip->i_gen == 0)
392 		ip->i_gen = random() / 2 + 1;
393 /*
394 printf("ext2_valloc: allocated inode %d\n", ino);
395 */
396 	return (0);
397 noinodes:
398 	EXT2_UNLOCK(ump);
399 	ext2_fserr(fs, cred->cr_uid, "out of inodes");
400 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
401 	return (ENOSPC);
402 }
403 
404 /*
405  * Find a cylinder to place a directory.
406  *
407  * The policy implemented by this algorithm is to allocate a
408  * directory inode in the same cylinder group as its parent
409  * directory, but also to reserve space for its files inodes
410  * and data. Restrict the number of directories which may be
411  * allocated one after another in the same cylinder group
412  * without intervening allocation of files.
413  *
414  * If we allocate a first level directory then force allocation
415  * in another cylinder group.
416  *
417  */
418 static u_long
419 ext2_dirpref(struct inode *pip)
420 {
421 	struct m_ext2fs *fs;
422         int cg, prefcg, dirsize, cgsize;
423 	int avgifree, avgbfree, avgndir, curdirsize;
424 	int minifree, minbfree, maxndir;
425 	int mincg, minndir;
426 	int maxcontigdirs;
427 
428 	mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
429 	fs = pip->i_e2fs;
430 
431  	avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount;
432 	avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount;
433 	avgndir  = fs->e2fs_total_dir / fs->e2fs_gcount;
434 
435 	/*
436 	 * Force allocation in another cg if creating a first level dir.
437 	 */
438 	ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
439 	if (ITOV(pip)->v_vflag & VV_ROOT) {
440 		prefcg = arc4random() % fs->e2fs_gcount;
441 		mincg = prefcg;
442 		minndir = fs->e2fs_ipg;
443 		for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
444 			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
445 			    fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
446 			    fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
447 				mincg = cg;
448 				minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
449 			}
450 		for (cg = 0; cg < prefcg; cg++)
451 			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
452                             fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
453                             fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
454                                 mincg = cg;
455                                 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
456                         }
457 
458 		return (mincg);
459 	}
460 
461 	/*
462 	 * Count various limits which used for
463 	 * optimal allocation of a directory inode.
464 	 */
465 	maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
466 	minifree = avgifree - avgifree / 4;
467 	if (minifree < 1)
468 		minifree = 1;
469 	minbfree = avgbfree - avgbfree / 4;
470 	if (minbfree < 1)
471 		minbfree = 1;
472 	cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
473 	dirsize = AVGDIRSIZE;
474 	curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
475 	if (dirsize < curdirsize)
476 		dirsize = curdirsize;
477 	if (dirsize <= 0)
478 		maxcontigdirs = 0;		/* dirsize overflowed */
479 	else
480 		maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
481 	maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
482 	if (maxcontigdirs == 0)
483 		maxcontigdirs = 1;
484 
485 	/*
486 	 * Limit number of dirs in one cg and reserve space for
487 	 * regular files, but only if we have no deficit in
488 	 * inodes or space.
489 	 */
490 	prefcg = ino_to_cg(fs, pip->i_number);
491 	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
492 		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
493 		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
494 	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
495 			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
496 				return (cg);
497 		}
498 	for (cg = 0; cg < prefcg; cg++)
499 		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
500 		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
501 	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
502 			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
503 				return (cg);
504 		}
505 	/*
506 	 * This is a backstop when we have deficit in space.
507 	 */
508 	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
509 		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
510 			return (cg);
511 	for (cg = 0; cg < prefcg; cg++)
512 		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
513 			break;
514 	return (cg);
515 }
516 
517 /*
518  * Select the desired position for the next block in a file.
519  *
520  * we try to mimic what Remy does in inode_getblk/block_getblk
521  *
522  * we note: blocknr == 0 means that we're about to allocate either
523  * a direct block or a pointer block at the first level of indirection
524  * (In other words, stuff that will go in i_db[] or i_ib[])
525  *
526  * blocknr != 0 means that we're allocating a block that is none
527  * of the above. Then, blocknr tells us the number of the block
528  * that will hold the pointer
529  */
530 int32_t
531 ext2_blkpref(ip, lbn, indx, bap, blocknr)
532 	struct inode *ip;
533 	int32_t lbn;
534 	int indx;
535 	int32_t *bap;
536 	int32_t blocknr;
537 {
538 	int	tmp;
539 	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
540 
541 	/* if the next block is actually what we thought it is,
542 	   then set the goal to what we thought it should be
543 	*/
544 	if(ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
545 		return ip->i_next_alloc_goal;
546 
547 	/* now check whether we were provided with an array that basically
548 	   tells us previous blocks to which we want to stay closeby
549 	*/
550 	if(bap)
551                 for (tmp = indx - 1; tmp >= 0; tmp--)
552 			if (bap[tmp])
553 				return bap[tmp];
554 
555 	/* else let's fall back to the blocknr, or, if there is none,
556 	   follow the rule that a block should be allocated near its inode
557 	*/
558 	return blocknr ? blocknr :
559 			(int32_t)(ip->i_block_group *
560 			EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
561 			ip->i_e2fs->e2fs->e2fs_first_dblock;
562 }
563 
564 /*
565  * Implement the cylinder overflow algorithm.
566  *
567  * The policy implemented by this algorithm is:
568  *   1) allocate the block in its requested cylinder group.
569  *   2) quadradically rehash on the cylinder group number.
570  *   3) brute force search for a free block.
571  */
572 static u_long
573 ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
574                 daddr_t (*allocator)(struct inode *, int, daddr_t, int))
575 {
576 	struct m_ext2fs *fs;
577 	ino_t result;
578 	int i, icg = cg;
579 
580 	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
581 	fs = ip->i_e2fs;
582 	/*
583 	 * 1: preferred cylinder group
584 	 */
585 	result = (*allocator)(ip, cg, pref, size);
586 	if (result)
587 		return (result);
588 	/*
589 	 * 2: quadratic rehash
590 	 */
591 	for (i = 1; i < fs->e2fs_gcount; i *= 2) {
592 		cg += i;
593 		if (cg >= fs->e2fs_gcount)
594 			cg -= fs->e2fs_gcount;
595 		result = (*allocator)(ip, cg, 0, size);
596 		if (result)
597 			return (result);
598 	}
599 	/*
600 	 * 3: brute force search
601 	 * Note that we start at i == 2, since 0 was checked initially,
602 	 * and 1 is always checked in the quadratic rehash.
603 	 */
604 	cg = (icg + 2) % fs->e2fs_gcount;
605 	for (i = 2; i < fs->e2fs_gcount; i++) {
606 		result = (*allocator)(ip, cg, 0, size);
607 		if (result)
608 			return (result);
609 		cg++;
610 		if (cg == fs->e2fs_gcount)
611 			cg = 0;
612 	}
613 	return (0);
614 }
615 
616 /*
617  * Determine whether a block can be allocated.
618  *
619  * Check to see if a block of the appropriate size is available,
620  * and if it is, allocate it.
621  */
622 static daddr_t
623 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
624 {
625 	struct m_ext2fs *fs;
626 	struct buf *bp;
627 	struct ext2mount *ump;
628 	int error, bno, start, end, loc;
629 	char *bbp;
630 	/* XXX ondisk32 */
631 	fs = ip->i_e2fs;
632 	ump = ip->i_ump;
633 	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
634 		return (0);
635 	EXT2_UNLOCK(ump);
636 	error = bread(ip->i_devvp, fsbtodb(fs,
637 		fs->e2fs_gd[cg].ext2bgd_b_bitmap),
638 		(int)fs->e2fs_bsize, NOCRED, &bp);
639 	if (error) {
640 		brelse(bp);
641 		EXT2_LOCK(ump);
642 		return (0);
643 	}
644 	bbp = (char *)bp->b_data;
645 
646 	if (dtog(fs, bpref) != cg)
647 		bpref = 0;
648 	if (bpref != 0) {
649 		bpref = dtogd(fs, bpref);
650 		/*
651 		 * if the requested block is available, use it
652 		 */
653 		if (isclr(bbp, bpref)) {
654 			bno = bpref;
655 			goto gotit;
656 		}
657 	}
658 	/*
659 	 * no blocks in the requested cylinder, so take next
660 	 * available one in this cylinder group.
661 	 * first try to get 8 contigous blocks, then fall back to a single
662 	 * block.
663 	 */
664 	if (bpref)
665 		start = dtogd(fs, bpref) / NBBY;
666 	else
667 		start = 0;
668 	end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
669 	for (loc = start; loc < end; loc++) {
670 		if (bbp[loc] == 0) {
671 			bno = loc * NBBY;
672 			goto gotit;
673 		}
674 	}
675 	for (loc = 0; loc < start; loc++) {
676 		if (bbp[loc] == 0) {
677 			bno = loc * NBBY;
678 			goto gotit;
679 		}
680 	}
681 
682 	bno = ext2_mapsearch(fs, bbp, bpref);
683 	if (bno < 0){
684 		brelse(bp);
685 		EXT2_LOCK(ump);
686 		return (0);
687 	}
688 gotit:
689 #ifdef DIAGNOSTIC
690 	if (isset(bbp, (daddr_t)bno)) {
691 		printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
692 			cg, bno, fs->e2fs_fsmnt);
693 		panic("ext2fs_alloccg: dup alloc");
694 	}
695 #endif
696 	setbit(bbp, (daddr_t)bno);
697 	EXT2_LOCK(ump);
698 	fs->e2fs->e2fs_fbcount--;
699 	fs->e2fs_gd[cg].ext2bgd_nbfree--;
700 	fs->e2fs_fmod = 1;
701 	EXT2_UNLOCK(ump);
702 	bdwrite(bp);
703 	return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
704 }
705 
706 /*
707  * Determine whether an inode can be allocated.
708  *
709  * Check to see if an inode is available, and if it is,
710  * allocate it using tode in the specified cylinder group.
711  */
712 static daddr_t
713 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
714 {
715 	struct m_ext2fs *fs;
716 	struct buf *bp;
717 	struct ext2mount *ump;
718 	int error, start, len, loc, map, i;
719 	char *ibp;
720 	ipref--; /* to avoid a lot of (ipref -1) */
721 	if (ipref == -1)
722 		ipref = 0;
723 	fs = ip->i_e2fs;
724 	ump = ip->i_ump;
725 	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
726 		return (0);
727 	EXT2_UNLOCK(ump);
728 	error = bread(ip->i_devvp, fsbtodb(fs,
729 		fs->e2fs_gd[cg].ext2bgd_i_bitmap),
730 		(int)fs->e2fs_bsize, NOCRED, &bp);
731 	if (error) {
732 		brelse(bp);
733 		EXT2_LOCK(ump);
734 		return (0);
735 	}
736 	ibp = (char *)bp->b_data;
737 	if (ipref) {
738 		ipref %= fs->e2fs->e2fs_ipg;
739 		if (isclr(ibp, ipref))
740 			goto gotit;
741 	}
742 	start = ipref / NBBY;
743 	len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY);
744 	loc = skpc(0xff, len, &ibp[start]);
745 	if (loc == 0) {
746 		len = start + 1;
747 		start = 0;
748 		loc = skpc(0xff, len, &ibp[0]);
749 		if (loc == 0) {
750 			printf("cg = %d, ipref = %lld, fs = %s\n",
751 				cg, (long long)ipref, fs->e2fs_fsmnt);
752 			panic("ext2fs_nodealloccg: map corrupted");
753 			/* NOTREACHED */
754 		}
755 	}
756 	i = start + len - loc;
757 	map = ibp[i];
758 	ipref = i * NBBY;
759 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
760 		if ((map & i) == 0) {
761 			goto gotit;
762 		}
763 	}
764 	printf("fs = %s\n", fs->e2fs_fsmnt);
765 	panic("ext2fs_nodealloccg: block not in map");
766 	/* NOTREACHED */
767 gotit:
768 	setbit(ibp, ipref);
769 	EXT2_LOCK(ump);
770 	fs->e2fs_gd[cg].ext2bgd_nifree--;
771 	fs->e2fs->e2fs_ficount--;
772 	fs->e2fs_fmod = 1;
773 	if ((mode & IFMT) == IFDIR) {
774 		fs->e2fs_gd[cg].ext2bgd_ndirs++;
775 		fs->e2fs_total_dir++;
776 	}
777 	EXT2_UNLOCK(ump);
778 	bdwrite(bp);
779 	return (cg * fs->e2fs->e2fs_ipg + ipref +1);
780 }
781 
782 /*
783  * Free a block or fragment.
784  *
785  */
786 void
787 ext2_blkfree(ip, bno, size)
788 	struct inode *ip;
789 	int32_t bno;
790 	long size;
791 {
792 	struct m_ext2fs *fs;
793 	struct buf *bp;
794 	struct ext2mount *ump;
795 	int cg, error;
796 	char *bbp;
797 
798 	fs = ip->i_e2fs;
799 	ump = ip->i_ump;
800 	cg = dtog(fs, bno);
801 	if ((u_int)bno >= fs->e2fs->e2fs_bcount) {
802                 printf("bad block %lld, ino %llu\n", (long long)bno,
803                     (unsigned long long)ip->i_number);
804                 ext2_fserr(fs, ip->i_uid, "bad block");
805                 return;
806         }
807         error = bread(ip->i_devvp,
808                 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
809                 (int)fs->e2fs_bsize, NOCRED, &bp);
810         if (error) {
811                 brelse(bp);
812                 return;
813         }
814         bbp = (char *)bp->b_data;
815         bno = dtogd(fs, bno);
816         if (isclr(bbp, bno)) {
817                 printf("block = %lld, fs = %s\n",
818                      (long long)bno, fs->e2fs_fsmnt);
819                 panic("blkfree: freeing free block");
820         }
821         clrbit(bbp, bno);
822 	EXT2_LOCK(ump);
823         fs->e2fs->e2fs_fbcount++;
824         fs->e2fs_gd[cg].ext2bgd_nbfree++;
825         fs->e2fs_fmod = 1;
826 	EXT2_UNLOCK(ump);
827         bdwrite(bp);
828 }
829 
830 /*
831  * Free an inode.
832  *
833  */
834 int
835 ext2_vfree(pvp, ino, mode)
836 	struct vnode *pvp;
837 	ino_t ino;
838 	int mode;
839 {
840 	struct m_ext2fs *fs;
841 	struct inode *pip;
842 	struct buf *bp;
843 	struct ext2mount *ump;
844 	int error, cg;
845 	char * ibp;
846 /*	mode_t save_i_mode; */
847 
848 	pip = VTOI(pvp);
849 	fs = pip->i_e2fs;
850 	ump = pip->i_ump;
851 	if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
852 		panic("ext2_vfree: range: devvp = %p, ino = %d, fs = %s",
853 		    pip->i_devvp, ino, fs->e2fs_fsmnt);
854 
855 	cg = ino_to_cg(fs, ino);
856 	error = bread(pip->i_devvp,
857 		fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
858 		(int)fs->e2fs_bsize, NOCRED, &bp);
859 	if (error) {
860 		brelse(bp);
861 		return (0);
862 	}
863 	ibp = (char *)bp->b_data;
864 	ino = (ino - 1) % fs->e2fs->e2fs_ipg;
865 	if (isclr(ibp, ino)) {
866 		printf("ino = %llu, fs = %s\n",
867 			 (unsigned long long)ino, fs->e2fs_fsmnt);
868 		if (fs->e2fs_ronly == 0)
869 			panic("ifree: freeing free inode");
870 	}
871 	clrbit(ibp, ino);
872 	EXT2_LOCK(ump);
873 	fs->e2fs->e2fs_ficount++;
874 	fs->e2fs_gd[cg].ext2bgd_nifree++;
875 	if ((mode & IFMT) == IFDIR) {
876 		fs->e2fs_gd[cg].ext2bgd_ndirs--;
877 		fs->e2fs_total_dir--;
878 	}
879 	fs->e2fs_fmod = 1;
880 	EXT2_UNLOCK(ump);
881 	bdwrite(bp);
882 	return (0);
883 }
884 
885 /*
886  * Find a block in the specified cylinder group.
887  *
888  * It is a panic if a request is made to find a block if none are
889  * available.
890  */
891 static daddr_t
892 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
893 {
894 	daddr_t bno;
895 	int start, len, loc, i, map;
896 
897 	/*
898 	 * find the fragment by searching through the free block
899 	 * map for an appropriate bit pattern
900 	 */
901 	if (bpref)
902 		start = dtogd(fs, bpref) / NBBY;
903 	else
904 		start = 0;
905 	len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
906 	loc = skpc(0xff, len, &bbp[start]);
907 	if (loc == 0) {
908 		len = start + 1;
909 		start = 0;
910 		loc = skpc(0xff, len, &bbp[start]);
911 		if (loc == 0) {
912 			printf("start = %d, len = %d, fs = %s\n",
913 				start, len, fs->e2fs_fsmnt);
914 			panic("ext2fs_alloccg: map corrupted");
915 			/* NOTREACHED */
916 		}
917 	}
918 	i = start + len - loc;
919 	map = bbp[i];
920 	bno = i * NBBY;
921 	for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
922 		if ((map & i) == 0)
923 			return (bno);
924 	}
925 	printf("fs = %s\n", fs->e2fs_fsmnt);
926 	panic("ext2fs_mapsearch: block not in map");
927 	/* NOTREACHED */
928 }
929 
930 /*
931  * Fserr prints the name of a file system with an error diagnostic.
932  *
933  * The form of the error message is:
934  *	fs: error message
935  */
936 static void
937 ext2_fserr(fs, uid, cp)
938 	struct m_ext2fs *fs;
939 	uid_t uid;
940 	char *cp;
941 {
942 
943 	log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
944 }
945 
946 int
947 cg_has_sb(int i)
948 {
949         int a3, a5, a7;
950 
951         if (i == 0 || i == 1)
952                 return 1;
953         for (a3 = 3, a5 = 5, a7 = 7;
954             a3 <= i || a5 <= i || a7 <= i;
955             a3 *= 3, a5 *= 5, a7 *= 7)
956                 if (i == a3 || i == a5 || i == a7)
957                         return 1;
958         return 0;
959 }
960