xref: /freebsd/sys/fs/ext2fs/ext2_alloc.c (revision 0f27aaf940f2fa5a6540285537b33115a96161a4)
1 /*-
2  *  modified for Lites 1.1
3  *
4  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5  *  University of Utah, Department of Computer Science
6  */
7 /*-
8  * Copyright (c) 1982, 1986, 1989, 1993
9  *	The Regents of the University of California.  All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)ffs_alloc.c	8.8 (Berkeley) 2/21/94
36  * $FreeBSD$
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/conf.h>
42 #include <sys/vnode.h>
43 #include <sys/stat.h>
44 #include <sys/mount.h>
45 #include <sys/syslog.h>
46 #include <sys/buf.h>
47 
48 #include <fs/ext2fs/inode.h>
49 #include <fs/ext2fs/ext2_mount.h>
50 #include <fs/ext2fs/ext2fs.h>
51 #include <fs/ext2fs/fs.h>
52 #include <fs/ext2fs/ext2_extern.h>
53 
54 static daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
55 static u_long	ext2_dirpref(struct inode *);
56 static void	ext2_fserr(struct m_ext2fs *, uid_t, char *);
57 static u_long	ext2_hashalloc(struct inode *, int, long, int,
58 				daddr_t (*)(struct inode *, int, daddr_t,
59 						int));
60 static daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
61 static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
62 /*
63  * Allocate a block in the file system.
64  *
65  * A preference may be optionally specified. If a preference is given
66  * the following hierarchy is used to allocate a block:
67  *   1) allocate the requested block.
68  *   2) allocate a rotationally optimal block in the same cylinder.
69  *   3) allocate a block in the same cylinder group.
70  *   4) quadradically rehash into other cylinder groups, until an
71  *        available block is located.
72  * If no block preference is given the following hierarchy is used
73  * to allocate a block:
74  *   1) allocate a block in the cylinder group that contains the
75  *        inode for the file.
76  *   2) quadradically rehash into other cylinder groups, until an
77  *        available block is located.
78  */
79 int
80 ext2_alloc(ip, lbn, bpref, size, cred, bnp)
81 	struct inode *ip;
82 	int32_t lbn, bpref;
83 	int size;
84 	struct ucred *cred;
85 	int32_t *bnp;
86 {
87 	struct m_ext2fs *fs;
88 	struct ext2mount *ump;
89 	int32_t bno;
90 	int cg;
91 	*bnp = 0;
92 	fs = ip->i_e2fs;
93 	ump = ip->i_ump;
94 	mtx_assert(EXT2_MTX(ump), MA_OWNED);
95 #ifdef DIAGNOSTIC
96 	if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
97 		vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
98 		    (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
99 		panic("ext2_alloc: bad size");
100 	}
101 	if (cred == NOCRED)
102 		panic("ext2_alloc: missing credential");
103 #endif /* DIAGNOSTIC */
104 	if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
105 		goto nospace;
106 	if (cred->cr_uid != 0 &&
107 		fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
108 		goto nospace;
109 	if (bpref >= fs->e2fs->e2fs_bcount)
110 		bpref = 0;
111 	 if (bpref == 0)
112                 cg = ino_to_cg(fs, ip->i_number);
113         else
114                 cg = dtog(fs, bpref);
115         bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
116                                                  ext2_alloccg);
117         if (bno > 0) {
118                 ip->i_blocks += btodb(fs->e2fs_bsize);
119                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
120                 *bnp = bno;
121                 return (0);
122         }
123 nospace:
124 	EXT2_UNLOCK(ump);
125 	ext2_fserr(fs, cred->cr_uid, "file system full");
126 	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
127 	return (ENOSPC);
128 }
129 
130 /*
131  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
132  *
133  * The vnode and an array of buffer pointers for a range of sequential
134  * logical blocks to be made contiguous is given. The allocator attempts
135  * to find a range of sequential blocks starting as close as possible to
136  * an fs_rotdelay offset from the end of the allocation for the logical
137  * block immediately preceding the current range. If successful, the
138  * physical block numbers in the buffer pointers and in the inode are
139  * changed to reflect the new allocation. If unsuccessful, the allocation
140  * is left unchanged. The success in doing the reallocation is returned.
141  * Note that the error return is not reflected back to the user. Rather
142  * the previous block allocation will be used.
143  */
144 
145 #ifdef FANCY_REALLOC
146 #include <sys/sysctl.h>
147 static int doasyncfree = 1;
148 static int doreallocblks = 1;
149 
150 #ifdef	OPT_DEBUG
151 SYSCTL_INT(_debug, 14, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
152 #endif	/* OPT_DEBUG */
153 #endif
154 
155 int
156 ext2_reallocblks(ap)
157 	struct vop_reallocblks_args /* {
158 		struct vnode *a_vp;
159 		struct cluster_save *a_buflist;
160 	} */ *ap;
161 {
162 #ifndef FANCY_REALLOC
163 /* printf("ext2_reallocblks not implemented\n"); */
164 return ENOSPC;
165 #else
166 
167 	struct m_ext2fs *fs;
168 	struct inode *ip;
169 	struct vnode *vp;
170 	struct buf *sbp, *ebp;
171 	int32_t *bap, *sbap, *ebap = 0;
172 	struct ext2mount *ump;
173 	struct cluster_save *buflist;
174 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
175 	int32_t start_lbn, end_lbn, soff, newblk, blkno =0;
176 	int i, len, start_lvl, end_lvl, pref, ssize;
177 
178 	vp = ap->a_vp;
179 	ip = VTOI(vp);
180 	fs = ip->i_e2fs;
181 	ump = ip->i_ump;
182 #ifdef UNKLAR
183 	if (fs->fs_contigsumsize <= 0)
184 		return (ENOSPC);
185 #endif
186 	buflist = ap->a_buflist;
187 	len = buflist->bs_nchildren;
188 	start_lbn = buflist->bs_children[0]->b_lblkno;
189 	end_lbn = start_lbn + len - 1;
190 #ifdef DIAGNOSTIC
191 	for (i = 1; i < len; i++)
192 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
193 			panic("ext2_reallocblks: non-cluster");
194 #endif
195 	/*
196 	 * If the latest allocation is in a new cylinder group, assume that
197 	 * the filesystem has decided to move and do not force it back to
198 	 * the previous cylinder group.
199 	 */
200 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
201 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
202 		return (ENOSPC);
203 	if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
204 	    ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
205 		return (ENOSPC);
206 	/*
207 	 * Get the starting offset and block map for the first block.
208 	 */
209 	if (start_lvl == 0) {
210 		sbap = &ip->i_db[0];
211 		soff = start_lbn;
212 	} else {
213 		idp = &start_ap[start_lvl - 1];
214 		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
215 			brelse(sbp);
216 			return (ENOSPC);
217 		}
218 		sbap = (int32_t *)sbp->b_data;
219 		soff = idp->in_off;
220 	}
221 	/*
222 	 * Find the preferred location for the cluster.
223 	 */
224 	EXT2_LOCK(ump);
225 	pref = ext2_blkpref(ip, start_lbn, soff, sbap, blkno);
226 	/*
227 	 * If the block range spans two block maps, get the second map.
228 	 */
229 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
230 		ssize = len;
231 	} else {
232 #ifdef DIAGNOSTIC
233 		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
234 			panic("ext2_reallocblk: start == end");
235 #endif
236 		ssize = len - (idp->in_off + 1);
237 		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){
238 			EXT2_UNLOCK(ump);
239 			goto fail;
240 		}
241 		ebap = (int32_t *)ebp->b_data;
242 	}
243 	/*
244 	 * Search the block map looking for an allocation of the desired size.
245 	 */
246 	if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
247 	    len, ext2_clusteralloc)) == 0){
248 		EXT2_UNLOCK(ump);
249 		goto fail;
250 	}
251 	/*
252 	 * We have found a new contiguous block.
253 	 *
254 	 * First we have to replace the old block pointers with the new
255 	 * block pointers in the inode and indirect blocks associated
256 	 * with the file.
257 	 */
258 	blkno = newblk;
259 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
260 		if (i == ssize)
261 			bap = ebap;
262 			soff = -i;
263 #ifdef DIAGNOSTIC
264 		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
265 			panic("ext2_reallocblks: alloc mismatch");
266 #endif
267 		*bap++ = blkno;
268 	}
269 	/*
270 	 * Next we must write out the modified inode and indirect blocks.
271 	 * For strict correctness, the writes should be synchronous since
272 	 * the old block values may have been written to disk. In practise
273 	 * they are almost never written, but if we are concerned about
274 	 * strict correctness, the `doasyncfree' flag should be set to zero.
275 	 *
276 	 * The test on `doasyncfree' should be changed to test a flag
277 	 * that shows whether the associated buffers and inodes have
278 	 * been written. The flag should be set when the cluster is
279 	 * started and cleared whenever the buffer or inode is flushed.
280 	 * We can then check below to see if it is set, and do the
281 	 * synchronous write only when it has been cleared.
282 	 */
283 	if (sbap != &ip->i_db[0]) {
284 		if (doasyncfree)
285 			bdwrite(sbp);
286 		else
287 			bwrite(sbp);
288 	} else {
289 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
290 		if (!doasyncfree)
291 			ext2_update(vp, 1);
292 	}
293 	if (ssize < len) {
294 		if (doasyncfree)
295 			bdwrite(ebp);
296 		else
297 			bwrite(ebp);
298 	}
299 	/*
300 	 * Last, free the old blocks and assign the new blocks to the buffers.
301 	 */
302 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
303 		ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
304 		    fs->e2fs_bsize);
305 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
306 	}
307 	return (0);
308 
309 fail:
310 	if (ssize < len)
311 		brelse(ebp);
312 	if (sbap != &ip->i_db[0])
313 		brelse(sbp);
314 	return (ENOSPC);
315 
316 #endif /* FANCY_REALLOC */
317 }
318 
319 /*
320  * Allocate an inode in the file system.
321  *
322  */
323 int
324 ext2_valloc(pvp, mode, cred, vpp)
325 	struct vnode *pvp;
326 	int mode;
327 	struct ucred *cred;
328 	struct vnode **vpp;
329 {
330 	struct inode *pip;
331 	struct m_ext2fs *fs;
332 	struct inode *ip;
333 	struct ext2mount *ump;
334 	ino_t ino, ipref;
335 	int i, error, cg;
336 
337 	*vpp = NULL;
338 	pip = VTOI(pvp);
339 	fs = pip->i_e2fs;
340 	ump = pip->i_ump;
341 
342 	EXT2_LOCK(ump);
343 	if (fs->e2fs->e2fs_ficount == 0)
344 		goto noinodes;
345 	/*
346 	 * If it is a directory then obtain a cylinder group based on
347 	 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
348 	 * always the next inode.
349 	 */
350 	if((mode & IFMT) == IFDIR) {
351 		cg = ext2_dirpref(pip);
352 		if (fs->e2fs_contigdirs[cg] < 255)
353 			fs->e2fs_contigdirs[cg]++;
354 	} else {
355 		cg = ino_to_cg(fs, pip->i_number);
356 		if (fs->e2fs_contigdirs[cg] > 0)
357 			fs->e2fs_contigdirs[cg]--;
358 	}
359 	ipref = cg * fs->e2fs->e2fs_ipg + 1;
360 	ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
361 
362 	if (ino == 0)
363 		goto noinodes;
364 	error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
365 	if (error) {
366 		ext2_vfree(pvp, ino, mode);
367 		return (error);
368 	}
369 	ip = VTOI(*vpp);
370 
371 	/*
372 	  the question is whether using VGET was such good idea at all -
373 	  Linux doesn't read the old inode in when it's allocating a
374 	  new one. I will set at least i_size & i_blocks the zero.
375 	*/
376 	ip->i_mode = 0;
377 	ip->i_size = 0;
378 	ip->i_blocks = 0;
379 	ip->i_flags = 0;
380         /* now we want to make sure that the block pointers are zeroed out */
381         for (i = 0; i < NDADDR; i++)
382                 ip->i_db[i] = 0;
383         for (i = 0; i < NIADDR; i++)
384                 ip->i_ib[i] = 0;
385 
386 	/*
387 	 * Set up a new generation number for this inode.
388 	 * XXX check if this makes sense in ext2
389 	 */
390 	if (ip->i_gen == 0 || ++ip->i_gen == 0)
391 		ip->i_gen = random() / 2 + 1;
392 /*
393 printf("ext2_valloc: allocated inode %d\n", ino);
394 */
395 	return (0);
396 noinodes:
397 	EXT2_UNLOCK(ump);
398 	ext2_fserr(fs, cred->cr_uid, "out of inodes");
399 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
400 	return (ENOSPC);
401 }
402 
403 /*
404  * Find a cylinder to place a directory.
405  *
406  * The policy implemented by this algorithm is to allocate a
407  * directory inode in the same cylinder group as its parent
408  * directory, but also to reserve space for its files inodes
409  * and data. Restrict the number of directories which may be
410  * allocated one after another in the same cylinder group
411  * without intervening allocation of files.
412  *
413  * If we allocate a first level directory then force allocation
414  * in another cylinder group.
415  *
416  */
417 static u_long
418 ext2_dirpref(struct inode *pip)
419 {
420 	struct m_ext2fs *fs;
421         int cg, prefcg, dirsize, cgsize;
422 	int avgifree, avgbfree, avgndir, curdirsize;
423 	int minifree, minbfree, maxndir;
424 	int mincg, minndir;
425 	int maxcontigdirs;
426 
427 	mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
428 	fs = pip->i_e2fs;
429 
430  	avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount;
431 	avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount;
432 	avgndir  = fs->e2fs_total_dir / fs->e2fs_gcount;
433 
434 	/*
435 	 * Force allocation in another cg if creating a first level dir.
436 	 */
437 	ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
438 	if (ITOV(pip)->v_vflag & VV_ROOT) {
439 		prefcg = arc4random() % fs->e2fs_gcount;
440 		mincg = prefcg;
441 		minndir = fs->e2fs_ipg;
442 		for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
443 			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
444 			    fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
445 			    fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
446 				mincg = cg;
447 				minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
448 			}
449 		for (cg = 0; cg < prefcg; cg++)
450 			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
451                             fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
452                             fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
453                                 mincg = cg;
454                                 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
455                         }
456 
457 		return (mincg);
458 	}
459 
460 	/*
461 	 * Count various limits which used for
462 	 * optimal allocation of a directory inode.
463 	 */
464 	maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
465 	minifree = avgifree - avgifree / 4;
466 	if (minifree < 1)
467 		minifree = 1;
468 	minbfree = avgbfree - avgbfree / 4;
469 	if (minbfree < 1)
470 		minbfree = 1;
471 	cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
472 	dirsize = AVGDIRSIZE;
473 	curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
474 	if (dirsize < curdirsize)
475 		dirsize = curdirsize;
476 	if (dirsize <= 0)
477 		maxcontigdirs = 0;		/* dirsize overflowed */
478 	else
479 		maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
480 	maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
481 	if (maxcontigdirs == 0)
482 		maxcontigdirs = 1;
483 
484 	/*
485 	 * Limit number of dirs in one cg and reserve space for
486 	 * regular files, but only if we have no deficit in
487 	 * inodes or space.
488 	 */
489 	prefcg = ino_to_cg(fs, pip->i_number);
490 	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
491 		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
492 		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
493 	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
494 			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
495 				return (cg);
496 		}
497 	for (cg = 0; cg < prefcg; cg++)
498 		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
499 		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
500 	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
501 			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
502 				return (cg);
503 		}
504 	/*
505 	 * This is a backstop when we have deficit in space.
506 	 */
507 	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
508 		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
509 			return (cg);
510 	for (cg = 0; cg < prefcg; cg++)
511 		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
512 			break;
513 	return (cg);
514 }
515 
516 /*
517  * Select the desired position for the next block in a file.
518  *
519  * we try to mimic what Remy does in inode_getblk/block_getblk
520  *
521  * we note: blocknr == 0 means that we're about to allocate either
522  * a direct block or a pointer block at the first level of indirection
523  * (In other words, stuff that will go in i_db[] or i_ib[])
524  *
525  * blocknr != 0 means that we're allocating a block that is none
526  * of the above. Then, blocknr tells us the number of the block
527  * that will hold the pointer
528  */
529 int32_t
530 ext2_blkpref(ip, lbn, indx, bap, blocknr)
531 	struct inode *ip;
532 	int32_t lbn;
533 	int indx;
534 	int32_t *bap;
535 	int32_t blocknr;
536 {
537 	int	tmp;
538 	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
539 
540 	/* if the next block is actually what we thought it is,
541 	   then set the goal to what we thought it should be
542 	*/
543 	if(ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
544 		return ip->i_next_alloc_goal;
545 
546 	/* now check whether we were provided with an array that basically
547 	   tells us previous blocks to which we want to stay closeby
548 	*/
549 	if(bap)
550                 for (tmp = indx - 1; tmp >= 0; tmp--)
551 			if (bap[tmp])
552 				return bap[tmp];
553 
554 	/* else let's fall back to the blocknr, or, if there is none,
555 	   follow the rule that a block should be allocated near its inode
556 	*/
557 	return blocknr ? blocknr :
558 			(int32_t)(ip->i_block_group *
559 			EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
560 			ip->i_e2fs->e2fs->e2fs_first_dblock;
561 }
562 
563 /*
564  * Implement the cylinder overflow algorithm.
565  *
566  * The policy implemented by this algorithm is:
567  *   1) allocate the block in its requested cylinder group.
568  *   2) quadradically rehash on the cylinder group number.
569  *   3) brute force search for a free block.
570  */
571 static u_long
572 ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
573                 daddr_t (*allocator)(struct inode *, int, daddr_t, int))
574 {
575 	struct m_ext2fs *fs;
576 	ino_t result;
577 	int i, icg = cg;
578 
579 	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
580 	fs = ip->i_e2fs;
581 	/*
582 	 * 1: preferred cylinder group
583 	 */
584 	result = (*allocator)(ip, cg, pref, size);
585 	if (result)
586 		return (result);
587 	/*
588 	 * 2: quadratic rehash
589 	 */
590 	for (i = 1; i < fs->e2fs_gcount; i *= 2) {
591 		cg += i;
592 		if (cg >= fs->e2fs_gcount)
593 			cg -= fs->e2fs_gcount;
594 		result = (*allocator)(ip, cg, 0, size);
595 		if (result)
596 			return (result);
597 	}
598 	/*
599 	 * 3: brute force search
600 	 * Note that we start at i == 2, since 0 was checked initially,
601 	 * and 1 is always checked in the quadratic rehash.
602 	 */
603 	cg = (icg + 2) % fs->e2fs_gcount;
604 	for (i = 2; i < fs->e2fs_gcount; i++) {
605 		result = (*allocator)(ip, cg, 0, size);
606 		if (result)
607 			return (result);
608 		cg++;
609 		if (cg == fs->e2fs_gcount)
610 			cg = 0;
611 	}
612 	return (0);
613 }
614 
615 /*
616  * Determine whether a block can be allocated.
617  *
618  * Check to see if a block of the appropriate size is available,
619  * and if it is, allocate it.
620  */
621 static daddr_t
622 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
623 {
624 	struct m_ext2fs *fs;
625 	struct buf *bp;
626 	struct ext2mount *ump;
627 	int error, bno, start, end, loc;
628 	char *bbp;
629 	/* XXX ondisk32 */
630 	fs = ip->i_e2fs;
631 	ump = ip->i_ump;
632 	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
633 		return (0);
634 	EXT2_UNLOCK(ump);
635 	error = bread(ip->i_devvp, fsbtodb(fs,
636 		fs->e2fs_gd[cg].ext2bgd_b_bitmap),
637 		(int)fs->e2fs_bsize, NOCRED, &bp);
638 	if (error) {
639 		brelse(bp);
640 		EXT2_LOCK(ump);
641 		return (0);
642 	}
643 	bbp = (char *)bp->b_data;
644 
645 	if (dtog(fs, bpref) != cg)
646 		bpref = 0;
647 	if (bpref != 0) {
648 		bpref = dtogd(fs, bpref);
649 		/*
650 		 * if the requested block is available, use it
651 		 */
652 		if (isclr(bbp, bpref)) {
653 			bno = bpref;
654 			goto gotit;
655 		}
656 	}
657 	/*
658 	 * no blocks in the requested cylinder, so take next
659 	 * available one in this cylinder group.
660 	 * first try to get 8 contigous blocks, then fall back to a single
661 	 * block.
662 	 */
663 	if (bpref)
664 		start = dtogd(fs, bpref) / NBBY;
665 	else
666 		start = 0;
667 	end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
668 	for (loc = start; loc < end; loc++) {
669 		if (bbp[loc] == 0) {
670 			bno = loc * NBBY;
671 			goto gotit;
672 		}
673 	}
674 	for (loc = 0; loc < start; loc++) {
675 		if (bbp[loc] == 0) {
676 			bno = loc * NBBY;
677 			goto gotit;
678 		}
679 	}
680 
681 	bno = ext2_mapsearch(fs, bbp, bpref);
682 	if (bno < 0){
683 		brelse(bp);
684 		EXT2_LOCK(ump);
685 		return (0);
686 	}
687 gotit:
688 #ifdef DIAGNOSTIC
689 	if (isset(bbp, (daddr_t)bno)) {
690 		printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
691 			cg, bno, fs->e2fs_fsmnt);
692 		panic("ext2fs_alloccg: dup alloc");
693 	}
694 #endif
695 	setbit(bbp, (daddr_t)bno);
696 	EXT2_LOCK(ump);
697 	fs->e2fs->e2fs_fbcount--;
698 	fs->e2fs_gd[cg].ext2bgd_nbfree--;
699 	fs->e2fs_fmod = 1;
700 	EXT2_UNLOCK(ump);
701 	bdwrite(bp);
702 	return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
703 }
704 
705 /*
706  * Determine whether an inode can be allocated.
707  *
708  * Check to see if an inode is available, and if it is,
709  * allocate it using tode in the specified cylinder group.
710  */
711 static daddr_t
712 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
713 {
714 	struct m_ext2fs *fs;
715 	struct buf *bp;
716 	struct ext2mount *ump;
717 	int error, start, len, loc, map, i;
718 	char *ibp;
719 	ipref--; /* to avoid a lot of (ipref -1) */
720 	if (ipref == -1)
721 		ipref = 0;
722 	fs = ip->i_e2fs;
723 	ump = ip->i_ump;
724 	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
725 		return (0);
726 	EXT2_UNLOCK(ump);
727 	error = bread(ip->i_devvp, fsbtodb(fs,
728 		fs->e2fs_gd[cg].ext2bgd_i_bitmap),
729 		(int)fs->e2fs_bsize, NOCRED, &bp);
730 	if (error) {
731 		brelse(bp);
732 		EXT2_LOCK(ump);
733 		return (0);
734 	}
735 	ibp = (char *)bp->b_data;
736 	if (ipref) {
737 		ipref %= fs->e2fs->e2fs_ipg;
738 		if (isclr(ibp, ipref))
739 			goto gotit;
740 	}
741 	start = ipref / NBBY;
742 	len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY);
743 	loc = skpc(0xff, len, &ibp[start]);
744 	if (loc == 0) {
745 		len = start + 1;
746 		start = 0;
747 		loc = skpc(0xff, len, &ibp[0]);
748 		if (loc == 0) {
749 			printf("cg = %d, ipref = %lld, fs = %s\n",
750 				cg, (long long)ipref, fs->e2fs_fsmnt);
751 			panic("ext2fs_nodealloccg: map corrupted");
752 			/* NOTREACHED */
753 		}
754 	}
755 	i = start + len - loc;
756 	map = ibp[i];
757 	ipref = i * NBBY;
758 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
759 		if ((map & i) == 0) {
760 			goto gotit;
761 		}
762 	}
763 	printf("fs = %s\n", fs->e2fs_fsmnt);
764 	panic("ext2fs_nodealloccg: block not in map");
765 	/* NOTREACHED */
766 gotit:
767 	setbit(ibp, ipref);
768 	EXT2_LOCK(ump);
769 	fs->e2fs_gd[cg].ext2bgd_nifree--;
770 	fs->e2fs->e2fs_ficount--;
771 	fs->e2fs_fmod = 1;
772 	if ((mode & IFMT) == IFDIR) {
773 		fs->e2fs_gd[cg].ext2bgd_ndirs++;
774 		fs->e2fs_total_dir++;
775 	}
776 	EXT2_UNLOCK(ump);
777 	bdwrite(bp);
778 	return (cg * fs->e2fs->e2fs_ipg + ipref +1);
779 }
780 
781 /*
782  * Free a block or fragment.
783  *
784  */
785 void
786 ext2_blkfree(ip, bno, size)
787 	struct inode *ip;
788 	int32_t bno;
789 	long size;
790 {
791 	struct m_ext2fs *fs;
792 	struct buf *bp;
793 	struct ext2mount *ump;
794 	int cg, error;
795 	char *bbp;
796 
797 	fs = ip->i_e2fs;
798 	ump = ip->i_ump;
799 	cg = dtog(fs, bno);
800 	if ((u_int)bno >= fs->e2fs->e2fs_bcount) {
801                 printf("bad block %lld, ino %llu\n", (long long)bno,
802                     (unsigned long long)ip->i_number);
803                 ext2_fserr(fs, ip->i_uid, "bad block");
804                 return;
805         }
806         error = bread(ip->i_devvp,
807                 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
808                 (int)fs->e2fs_bsize, NOCRED, &bp);
809         if (error) {
810                 brelse(bp);
811                 return;
812         }
813         bbp = (char *)bp->b_data;
814         bno = dtogd(fs, bno);
815         if (isclr(bbp, bno)) {
816                 printf("block = %lld, fs = %s\n",
817                      (long long)bno, fs->e2fs_fsmnt);
818                 panic("blkfree: freeing free block");
819         }
820         clrbit(bbp, bno);
821 	EXT2_LOCK(ump);
822         fs->e2fs->e2fs_fbcount++;
823         fs->e2fs_gd[cg].ext2bgd_nbfree++;
824         fs->e2fs_fmod = 1;
825 	EXT2_UNLOCK(ump);
826         bdwrite(bp);
827 }
828 
829 /*
830  * Free an inode.
831  *
832  */
833 int
834 ext2_vfree(pvp, ino, mode)
835 	struct vnode *pvp;
836 	ino_t ino;
837 	int mode;
838 {
839 	struct m_ext2fs *fs;
840 	struct inode *pip;
841 	struct buf *bp;
842 	struct ext2mount *ump;
843 	int error, cg;
844 	char * ibp;
845 /*	mode_t save_i_mode; */
846 
847 	pip = VTOI(pvp);
848 	fs = pip->i_e2fs;
849 	ump = pip->i_ump;
850 	if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
851 		panic("ext2_vfree: range: devvp = %p, ino = %d, fs = %s",
852 		    pip->i_devvp, ino, fs->e2fs_fsmnt);
853 
854 	cg = ino_to_cg(fs, ino);
855 	error = bread(pip->i_devvp,
856 		fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
857 		(int)fs->e2fs_bsize, NOCRED, &bp);
858 	if (error) {
859 		brelse(bp);
860 		return (0);
861 	}
862 	ibp = (char *)bp->b_data;
863 	ino = (ino - 1) % fs->e2fs->e2fs_ipg;
864 	if (isclr(ibp, ino)) {
865 		printf("ino = %llu, fs = %s\n",
866 			 (unsigned long long)ino, fs->e2fs_fsmnt);
867 		if (fs->e2fs_ronly == 0)
868 			panic("ifree: freeing free inode");
869 	}
870 	clrbit(ibp, ino);
871 	EXT2_LOCK(ump);
872 	fs->e2fs->e2fs_ficount++;
873 	fs->e2fs_gd[cg].ext2bgd_nifree++;
874 	if ((mode & IFMT) == IFDIR) {
875 		fs->e2fs_gd[cg].ext2bgd_ndirs--;
876 		fs->e2fs_total_dir--;
877 	}
878 	fs->e2fs_fmod = 1;
879 	EXT2_UNLOCK(ump);
880 	bdwrite(bp);
881 	return (0);
882 }
883 
884 /*
885  * Find a block in the specified cylinder group.
886  *
887  * It is a panic if a request is made to find a block if none are
888  * available.
889  */
890 static daddr_t
891 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
892 {
893 	daddr_t bno;
894 	int start, len, loc, i, map;
895 
896 	/*
897 	 * find the fragment by searching through the free block
898 	 * map for an appropriate bit pattern
899 	 */
900 	if (bpref)
901 		start = dtogd(fs, bpref) / NBBY;
902 	else
903 		start = 0;
904 	len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
905 	loc = skpc(0xff, len, &bbp[start]);
906 	if (loc == 0) {
907 		len = start + 1;
908 		start = 0;
909 		loc = skpc(0xff, len, &bbp[start]);
910 		if (loc == 0) {
911 			printf("start = %d, len = %d, fs = %s\n",
912 				start, len, fs->e2fs_fsmnt);
913 			panic("ext2fs_alloccg: map corrupted");
914 			/* NOTREACHED */
915 		}
916 	}
917 	i = start + len - loc;
918 	map = bbp[i];
919 	bno = i * NBBY;
920 	for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
921 		if ((map & i) == 0)
922 			return (bno);
923 	}
924 	printf("fs = %s\n", fs->e2fs_fsmnt);
925 	panic("ext2fs_mapsearch: block not in map");
926 	/* NOTREACHED */
927 }
928 
929 /*
930  * Fserr prints the name of a file system with an error diagnostic.
931  *
932  * The form of the error message is:
933  *	fs: error message
934  */
935 static void
936 ext2_fserr(fs, uid, cp)
937 	struct m_ext2fs *fs;
938 	uid_t uid;
939 	char *cp;
940 {
941 
942 	log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
943 }
944 
945 int
946 cg_has_sb(int i)
947 {
948         int a3, a5, a7;
949 
950         if (i == 0 || i == 1)
951                 return 1;
952         for (a3 = 3, a5 = 5, a7 = 7;
953             a3 <= i || a5 <= i || a7 <= i;
954             a3 *= 3, a5 *= 5, a7 *= 7)
955                 if (i == a3 || i == a5 || i == a7)
956                         return 1;
957         return 0;
958 }
959