xref: /freebsd/sys/ufs/ffs/ffs_alloc.c (revision 729362425c09cf6b362366aabc6fb547eee8035a)
1 /*
2  * Copyright (c) 2002 Networks Associates Technology, Inc.
3  * All rights reserved.
4  *
5  * This software was developed for the FreeBSD Project by Marshall
6  * Kirk McKusick and Network Associates Laboratories, the Security
7  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9  * research program
10  *
11  * Copyright (c) 1982, 1986, 1989, 1993
12  *	The Regents of the University of California.  All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	@(#)ffs_alloc.c	8.18 (Berkeley) 5/26/95
43  * $FreeBSD$
44  */
45 
46 #include "opt_quota.h"
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/conf.h>
53 #include <sys/file.h>
54 #include <sys/filedesc.h>
55 #include <sys/proc.h>
56 #include <sys/vnode.h>
57 #include <sys/mount.h>
58 #include <sys/kernel.h>
59 #include <sys/sysctl.h>
60 #include <sys/syslog.h>
61 
62 #include <ufs/ufs/extattr.h>
63 #include <ufs/ufs/quota.h>
64 #include <ufs/ufs/inode.h>
65 #include <ufs/ufs/ufs_extern.h>
66 #include <ufs/ufs/ufsmount.h>
67 
68 #include <ufs/ffs/fs.h>
69 #include <ufs/ffs/ffs_extern.h>
70 
71 typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref,
72 				  int size);
73 
74 static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int);
75 static ufs2_daddr_t
76 	      ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t);
77 #ifdef DIAGNOSTIC
78 static int	ffs_checkblk(struct inode *, ufs2_daddr_t, long);
79 #endif
80 static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int);
81 static ino_t	ffs_dirpref(struct inode *);
82 static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int);
83 static void	ffs_fserr(struct fs *, ino_t, char *);
84 static ufs2_daddr_t	ffs_hashalloc
85 		(struct inode *, int, ufs2_daddr_t, int, allocfcn_t *);
86 static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int);
87 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int);
88 static int	ffs_reallocblks_ufs1(struct vop_reallocblks_args *);
89 static int	ffs_reallocblks_ufs2(struct vop_reallocblks_args *);
90 
91 /*
92  * Allocate a block in the filesystem.
93  *
94  * The size of the requested block is given, which must be some
95  * multiple of fs_fsize and <= fs_bsize.
96  * A preference may be optionally specified. If a preference is given
97  * the following hierarchy is used to allocate a block:
98  *   1) allocate the requested block.
99  *   2) allocate a rotationally optimal block in the same cylinder.
100  *   3) allocate a block in the same cylinder group.
101  *   4) quadradically rehash into other cylinder groups, until an
102  *      available block is located.
103  * If no block preference is given the following heirarchy is used
104  * to allocate a block:
105  *   1) allocate a block in the cylinder group that contains the
106  *      inode for the file.
107  *   2) quadradically rehash into other cylinder groups, until an
108  *      available block is located.
109  */
110 int
111 ffs_alloc(ip, lbn, bpref, size, cred, bnp)
112 	struct inode *ip;
113 	ufs2_daddr_t lbn, bpref;
114 	int size;
115 	struct ucred *cred;
116 	ufs2_daddr_t *bnp;
117 {
118 	struct fs *fs;
119 	ufs2_daddr_t bno;
120 	int cg, reclaimed;
121 #ifdef QUOTA
122 	int error;
123 #endif
124 
125 	*bnp = 0;
126 	fs = ip->i_fs;
127 #ifdef DIAGNOSTIC
128 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
129 		printf("dev = %s, bsize = %ld, size = %d, fs = %s\n",
130 		    devtoname(ip->i_dev), (long)fs->fs_bsize, size,
131 		    fs->fs_fsmnt);
132 		panic("ffs_alloc: bad size");
133 	}
134 	if (cred == NOCRED)
135 		panic("ffs_alloc: missing credential");
136 #endif /* DIAGNOSTIC */
137 	reclaimed = 0;
138 retry:
139 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
140 		goto nospace;
141 	if (suser_cred(cred, PRISON_ROOT) &&
142 	    freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0)
143 		goto nospace;
144 #ifdef QUOTA
145 	error = chkdq(ip, btodb(size), cred, 0);
146 	if (error)
147 		return (error);
148 #endif
149 	if (bpref >= fs->fs_size)
150 		bpref = 0;
151 	if (bpref == 0)
152 		cg = ino_to_cg(fs, ip->i_number);
153 	else
154 		cg = dtog(fs, bpref);
155 	bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg);
156 	if (bno > 0) {
157 		DIP(ip, i_blocks) += btodb(size);
158 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
159 		*bnp = bno;
160 		return (0);
161 	}
162 #ifdef QUOTA
163 	/*
164 	 * Restore user's disk quota because allocation failed.
165 	 */
166 	(void) chkdq(ip, -btodb(size), cred, FORCE);
167 #endif
168 nospace:
169 	if (fs->fs_pendingblocks > 0 && reclaimed == 0) {
170 		reclaimed = 1;
171 		softdep_request_cleanup(fs, ITOV(ip));
172 		goto retry;
173 	}
174 	ffs_fserr(fs, ip->i_number, "filesystem full");
175 	uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt);
176 	return (ENOSPC);
177 }
178 
179 /*
180  * Reallocate a fragment to a bigger size
181  *
182  * The number and size of the old block is given, and a preference
183  * and new size is also specified. The allocator attempts to extend
184  * the original block. Failing that, the regular block allocator is
185  * invoked to get an appropriate block.
186  */
187 int
188 ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, cred, bpp)
189 	struct inode *ip;
190 	ufs2_daddr_t lbprev;
191 	ufs2_daddr_t bprev;
192 	ufs2_daddr_t bpref;
193 	int osize, nsize;
194 	struct ucred *cred;
195 	struct buf **bpp;
196 {
197 	struct vnode *vp;
198 	struct fs *fs;
199 	struct buf *bp;
200 	int cg, request, error, reclaimed;
201 	ufs2_daddr_t bno;
202 
203 	*bpp = 0;
204 	vp = ITOV(ip);
205 	fs = ip->i_fs;
206 #ifdef DIAGNOSTIC
207 	if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
208 		panic("ffs_realloccg: allocation on suspended filesystem");
209 	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
210 	    (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
211 		printf(
212 		"dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n",
213 		    devtoname(ip->i_dev), (long)fs->fs_bsize, osize,
214 		    nsize, fs->fs_fsmnt);
215 		panic("ffs_realloccg: bad size");
216 	}
217 	if (cred == NOCRED)
218 		panic("ffs_realloccg: missing credential");
219 #endif /* DIAGNOSTIC */
220 	reclaimed = 0;
221 retry:
222 	if (suser_cred(cred, PRISON_ROOT) &&
223 	    freespace(fs, fs->fs_minfree) -  numfrags(fs, nsize - osize) < 0)
224 		goto nospace;
225 	if (bprev == 0) {
226 		printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n",
227 		    devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev,
228 		    fs->fs_fsmnt);
229 		panic("ffs_realloccg: bad bprev");
230 	}
231 	/*
232 	 * Allocate the extra space in the buffer.
233 	 */
234 	error = bread(vp, lbprev, osize, NOCRED, &bp);
235 	if (error) {
236 		brelse(bp);
237 		return (error);
238 	}
239 
240 	if (bp->b_blkno == bp->b_lblkno) {
241 		if (lbprev >= NDADDR)
242 			panic("ffs_realloccg: lbprev out of range");
243 		bp->b_blkno = fsbtodb(fs, bprev);
244 	}
245 
246 #ifdef QUOTA
247 	error = chkdq(ip, btodb(nsize - osize), cred, 0);
248 	if (error) {
249 		brelse(bp);
250 		return (error);
251 	}
252 #endif
253 	/*
254 	 * Check for extension in the existing location.
255 	 */
256 	cg = dtog(fs, bprev);
257 	bno = ffs_fragextend(ip, cg, bprev, osize, nsize);
258 	if (bno) {
259 		if (bp->b_blkno != fsbtodb(fs, bno))
260 			panic("ffs_realloccg: bad blockno");
261 		DIP(ip, i_blocks) += btodb(nsize - osize);
262 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
263 		allocbuf(bp, nsize);
264 		bp->b_flags |= B_DONE;
265 		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
266 		*bpp = bp;
267 		return (0);
268 	}
269 	/*
270 	 * Allocate a new disk location.
271 	 */
272 	if (bpref >= fs->fs_size)
273 		bpref = 0;
274 	switch ((int)fs->fs_optim) {
275 	case FS_OPTSPACE:
276 		/*
277 		 * Allocate an exact sized fragment. Although this makes
278 		 * best use of space, we will waste time relocating it if
279 		 * the file continues to grow. If the fragmentation is
280 		 * less than half of the minimum free reserve, we choose
281 		 * to begin optimizing for time.
282 		 */
283 		request = nsize;
284 		if (fs->fs_minfree <= 5 ||
285 		    fs->fs_cstotal.cs_nffree >
286 		    (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100))
287 			break;
288 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
289 			fs->fs_fsmnt);
290 		fs->fs_optim = FS_OPTTIME;
291 		break;
292 	case FS_OPTTIME:
293 		/*
294 		 * At this point we have discovered a file that is trying to
295 		 * grow a small fragment to a larger fragment. To save time,
296 		 * we allocate a full sized block, then free the unused portion.
297 		 * If the file continues to grow, the `ffs_fragextend' call
298 		 * above will be able to grow it in place without further
299 		 * copying. If aberrant programs cause disk fragmentation to
300 		 * grow within 2% of the free reserve, we choose to begin
301 		 * optimizing for space.
302 		 */
303 		request = fs->fs_bsize;
304 		if (fs->fs_cstotal.cs_nffree <
305 		    (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100)
306 			break;
307 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
308 			fs->fs_fsmnt);
309 		fs->fs_optim = FS_OPTSPACE;
310 		break;
311 	default:
312 		printf("dev = %s, optim = %ld, fs = %s\n",
313 		    devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt);
314 		panic("ffs_realloccg: bad optim");
315 		/* NOTREACHED */
316 	}
317 	bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg);
318 	if (bno > 0) {
319 		bp->b_blkno = fsbtodb(fs, bno);
320 		if (!DOINGSOFTDEP(vp))
321 			ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
322 			    ip->i_number);
323 		if (nsize < request)
324 			ffs_blkfree(fs, ip->i_devvp, bno + numfrags(fs, nsize),
325 			    (long)(request - nsize), ip->i_number);
326 		DIP(ip, i_blocks) += btodb(nsize - osize);
327 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
328 		allocbuf(bp, nsize);
329 		bp->b_flags |= B_DONE;
330 		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
331 		*bpp = bp;
332 		return (0);
333 	}
334 #ifdef QUOTA
335 	/*
336 	 * Restore user's disk quota because allocation failed.
337 	 */
338 	(void) chkdq(ip, -btodb(nsize - osize), cred, FORCE);
339 #endif
340 	brelse(bp);
341 nospace:
342 	/*
343 	 * no space available
344 	 */
345 	if (fs->fs_pendingblocks > 0 && reclaimed == 0) {
346 		reclaimed = 1;
347 		softdep_request_cleanup(fs, vp);
348 		goto retry;
349 	}
350 	ffs_fserr(fs, ip->i_number, "filesystem full");
351 	uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt);
352 	return (ENOSPC);
353 }
354 
355 /*
356  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
357  *
358  * The vnode and an array of buffer pointers for a range of sequential
359  * logical blocks to be made contiguous is given. The allocator attempts
360  * to find a range of sequential blocks starting as close as possible
361  * from the end of the allocation for the logical block immediately
362  * preceding the current range. If successful, the physical block numbers
363  * in the buffer pointers and in the inode are changed to reflect the new
364  * allocation. If unsuccessful, the allocation is left unchanged. The
365  * success in doing the reallocation is returned. Note that the error
366  * return is not reflected back to the user. Rather the previous block
367  * allocation will be used.
368  */
369 
370 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem");
371 
372 static int doasyncfree = 1;
373 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
374 
375 static int doreallocblks = 1;
376 SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
377 
378 #ifdef DEBUG
379 static volatile int prtrealloc = 0;
380 #endif
381 
382 int
383 ffs_reallocblks(ap)
384 	struct vop_reallocblks_args /* {
385 		struct vnode *a_vp;
386 		struct cluster_save *a_buflist;
387 	} */ *ap;
388 {
389 
390 	if (doreallocblks == 0)
391 		return (ENOSPC);
392 	if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1)
393 		return (ffs_reallocblks_ufs1(ap));
394 	return (ffs_reallocblks_ufs2(ap));
395 }
396 
397 static int
398 ffs_reallocblks_ufs1(ap)
399 	struct vop_reallocblks_args /* {
400 		struct vnode *a_vp;
401 		struct cluster_save *a_buflist;
402 	} */ *ap;
403 {
404 	struct fs *fs;
405 	struct inode *ip;
406 	struct vnode *vp;
407 	struct buf *sbp, *ebp;
408 	ufs1_daddr_t *bap, *sbap, *ebap = 0;
409 	struct cluster_save *buflist;
410 	ufs_lbn_t start_lbn, end_lbn;
411 	ufs1_daddr_t soff, newblk, blkno;
412 	ufs2_daddr_t pref;
413 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
414 	int i, len, start_lvl, end_lvl, ssize;
415 
416 	vp = ap->a_vp;
417 	ip = VTOI(vp);
418 	fs = ip->i_fs;
419 	if (fs->fs_contigsumsize <= 0)
420 		return (ENOSPC);
421 	buflist = ap->a_buflist;
422 	len = buflist->bs_nchildren;
423 	start_lbn = buflist->bs_children[0]->b_lblkno;
424 	end_lbn = start_lbn + len - 1;
425 #ifdef DIAGNOSTIC
426 	for (i = 0; i < len; i++)
427 		if (!ffs_checkblk(ip,
428 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
429 			panic("ffs_reallocblks: unallocated block 1");
430 	for (i = 1; i < len; i++)
431 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
432 			panic("ffs_reallocblks: non-logical cluster");
433 	blkno = buflist->bs_children[0]->b_blkno;
434 	ssize = fsbtodb(fs, fs->fs_frag);
435 	for (i = 1; i < len - 1; i++)
436 		if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
437 			panic("ffs_reallocblks: non-physical cluster %d", i);
438 #endif
439 	/*
440 	 * If the latest allocation is in a new cylinder group, assume that
441 	 * the filesystem has decided to move and do not force it back to
442 	 * the previous cylinder group.
443 	 */
444 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
445 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
446 		return (ENOSPC);
447 	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
448 	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
449 		return (ENOSPC);
450 	/*
451 	 * Get the starting offset and block map for the first block.
452 	 */
453 	if (start_lvl == 0) {
454 		sbap = &ip->i_din1->di_db[0];
455 		soff = start_lbn;
456 	} else {
457 		idp = &start_ap[start_lvl - 1];
458 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
459 			brelse(sbp);
460 			return (ENOSPC);
461 		}
462 		sbap = (ufs1_daddr_t *)sbp->b_data;
463 		soff = idp->in_off;
464 	}
465 	/*
466 	 * Find the preferred location for the cluster.
467 	 */
468 	pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap);
469 	/*
470 	 * If the block range spans two block maps, get the second map.
471 	 */
472 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
473 		ssize = len;
474 	} else {
475 #ifdef DIAGNOSTIC
476 		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
477 			panic("ffs_reallocblk: start == end");
478 #endif
479 		ssize = len - (idp->in_off + 1);
480 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
481 			goto fail;
482 		ebap = (ufs1_daddr_t *)ebp->b_data;
483 	}
484 	/*
485 	 * Search the block map looking for an allocation of the desired size.
486 	 */
487 	if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
488 	    len, ffs_clusteralloc)) == 0)
489 		goto fail;
490 	/*
491 	 * We have found a new contiguous block.
492 	 *
493 	 * First we have to replace the old block pointers with the new
494 	 * block pointers in the inode and indirect blocks associated
495 	 * with the file.
496 	 */
497 #ifdef DEBUG
498 	if (prtrealloc)
499 		printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
500 		    (intmax_t)start_lbn, (intmax_t)end_lbn);
501 #endif
502 	blkno = newblk;
503 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
504 		if (i == ssize) {
505 			bap = ebap;
506 			soff = -i;
507 		}
508 #ifdef DIAGNOSTIC
509 		if (!ffs_checkblk(ip,
510 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
511 			panic("ffs_reallocblks: unallocated block 2");
512 		if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
513 			panic("ffs_reallocblks: alloc mismatch");
514 #endif
515 #ifdef DEBUG
516 		if (prtrealloc)
517 			printf(" %d,", *bap);
518 #endif
519 		if (DOINGSOFTDEP(vp)) {
520 			if (sbap == &ip->i_din1->di_db[0] && i < ssize)
521 				softdep_setup_allocdirect(ip, start_lbn + i,
522 				    blkno, *bap, fs->fs_bsize, fs->fs_bsize,
523 				    buflist->bs_children[i]);
524 			else
525 				softdep_setup_allocindir_page(ip, start_lbn + i,
526 				    i < ssize ? sbp : ebp, soff + i, blkno,
527 				    *bap, buflist->bs_children[i]);
528 		}
529 		*bap++ = blkno;
530 	}
531 	/*
532 	 * Next we must write out the modified inode and indirect blocks.
533 	 * For strict correctness, the writes should be synchronous since
534 	 * the old block values may have been written to disk. In practise
535 	 * they are almost never written, but if we are concerned about
536 	 * strict correctness, the `doasyncfree' flag should be set to zero.
537 	 *
538 	 * The test on `doasyncfree' should be changed to test a flag
539 	 * that shows whether the associated buffers and inodes have
540 	 * been written. The flag should be set when the cluster is
541 	 * started and cleared whenever the buffer or inode is flushed.
542 	 * We can then check below to see if it is set, and do the
543 	 * synchronous write only when it has been cleared.
544 	 */
545 	if (sbap != &ip->i_din1->di_db[0]) {
546 		if (doasyncfree)
547 			bdwrite(sbp);
548 		else
549 			bwrite(sbp);
550 	} else {
551 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
552 		if (!doasyncfree)
553 			UFS_UPDATE(vp, 1);
554 	}
555 	if (ssize < len) {
556 		if (doasyncfree)
557 			bdwrite(ebp);
558 		else
559 			bwrite(ebp);
560 	}
561 	/*
562 	 * Last, free the old blocks and assign the new blocks to the buffers.
563 	 */
564 #ifdef DEBUG
565 	if (prtrealloc)
566 		printf("\n\tnew:");
567 #endif
568 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
569 		if (!DOINGSOFTDEP(vp))
570 			ffs_blkfree(fs, ip->i_devvp,
571 			    dbtofsb(fs, buflist->bs_children[i]->b_blkno),
572 			    fs->fs_bsize, ip->i_number);
573 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
574 #ifdef DIAGNOSTIC
575 		if (!ffs_checkblk(ip,
576 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
577 			panic("ffs_reallocblks: unallocated block 3");
578 #endif
579 #ifdef DEBUG
580 		if (prtrealloc)
581 			printf(" %d,", blkno);
582 #endif
583 	}
584 #ifdef DEBUG
585 	if (prtrealloc) {
586 		prtrealloc--;
587 		printf("\n");
588 	}
589 #endif
590 	return (0);
591 
592 fail:
593 	if (ssize < len)
594 		brelse(ebp);
595 	if (sbap != &ip->i_din1->di_db[0])
596 		brelse(sbp);
597 	return (ENOSPC);
598 }
599 
600 static int
601 ffs_reallocblks_ufs2(ap)
602 	struct vop_reallocblks_args /* {
603 		struct vnode *a_vp;
604 		struct cluster_save *a_buflist;
605 	} */ *ap;
606 {
607 	struct fs *fs;
608 	struct inode *ip;
609 	struct vnode *vp;
610 	struct buf *sbp, *ebp;
611 	ufs2_daddr_t *bap, *sbap, *ebap = 0;
612 	struct cluster_save *buflist;
613 	ufs_lbn_t start_lbn, end_lbn;
614 	ufs2_daddr_t soff, newblk, blkno, pref;
615 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
616 	int i, len, start_lvl, end_lvl, ssize;
617 
618 	vp = ap->a_vp;
619 	ip = VTOI(vp);
620 	fs = ip->i_fs;
621 	if (fs->fs_contigsumsize <= 0)
622 		return (ENOSPC);
623 	buflist = ap->a_buflist;
624 	len = buflist->bs_nchildren;
625 	start_lbn = buflist->bs_children[0]->b_lblkno;
626 	end_lbn = start_lbn + len - 1;
627 #ifdef DIAGNOSTIC
628 	for (i = 0; i < len; i++)
629 		if (!ffs_checkblk(ip,
630 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
631 			panic("ffs_reallocblks: unallocated block 1");
632 	for (i = 1; i < len; i++)
633 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
634 			panic("ffs_reallocblks: non-logical cluster");
635 	blkno = buflist->bs_children[0]->b_blkno;
636 	ssize = fsbtodb(fs, fs->fs_frag);
637 	for (i = 1; i < len - 1; i++)
638 		if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
639 			panic("ffs_reallocblks: non-physical cluster %d", i);
640 #endif
641 	/*
642 	 * If the latest allocation is in a new cylinder group, assume that
643 	 * the filesystem has decided to move and do not force it back to
644 	 * the previous cylinder group.
645 	 */
646 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
647 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
648 		return (ENOSPC);
649 	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
650 	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
651 		return (ENOSPC);
652 	/*
653 	 * Get the starting offset and block map for the first block.
654 	 */
655 	if (start_lvl == 0) {
656 		sbap = &ip->i_din2->di_db[0];
657 		soff = start_lbn;
658 	} else {
659 		idp = &start_ap[start_lvl - 1];
660 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
661 			brelse(sbp);
662 			return (ENOSPC);
663 		}
664 		sbap = (ufs2_daddr_t *)sbp->b_data;
665 		soff = idp->in_off;
666 	}
667 	/*
668 	 * Find the preferred location for the cluster.
669 	 */
670 	pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
671 	/*
672 	 * If the block range spans two block maps, get the second map.
673 	 */
674 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
675 		ssize = len;
676 	} else {
677 #ifdef DIAGNOSTIC
678 		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
679 			panic("ffs_reallocblk: start == end");
680 #endif
681 		ssize = len - (idp->in_off + 1);
682 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
683 			goto fail;
684 		ebap = (ufs2_daddr_t *)ebp->b_data;
685 	}
686 	/*
687 	 * Search the block map looking for an allocation of the desired size.
688 	 */
689 	if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
690 	    len, ffs_clusteralloc)) == 0)
691 		goto fail;
692 	/*
693 	 * We have found a new contiguous block.
694 	 *
695 	 * First we have to replace the old block pointers with the new
696 	 * block pointers in the inode and indirect blocks associated
697 	 * with the file.
698 	 */
699 #ifdef DEBUG
700 	if (prtrealloc)
701 		printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
702 		    (intmax_t)start_lbn, (intmax_t)end_lbn);
703 #endif
704 	blkno = newblk;
705 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
706 		if (i == ssize) {
707 			bap = ebap;
708 			soff = -i;
709 		}
710 #ifdef DIAGNOSTIC
711 		if (!ffs_checkblk(ip,
712 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
713 			panic("ffs_reallocblks: unallocated block 2");
714 		if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
715 			panic("ffs_reallocblks: alloc mismatch");
716 #endif
717 #ifdef DEBUG
718 		if (prtrealloc)
719 			printf(" %jd,", (intmax_t)*bap);
720 #endif
721 		if (DOINGSOFTDEP(vp)) {
722 			if (sbap == &ip->i_din2->di_db[0] && i < ssize)
723 				softdep_setup_allocdirect(ip, start_lbn + i,
724 				    blkno, *bap, fs->fs_bsize, fs->fs_bsize,
725 				    buflist->bs_children[i]);
726 			else
727 				softdep_setup_allocindir_page(ip, start_lbn + i,
728 				    i < ssize ? sbp : ebp, soff + i, blkno,
729 				    *bap, buflist->bs_children[i]);
730 		}
731 		*bap++ = blkno;
732 	}
733 	/*
734 	 * Next we must write out the modified inode and indirect blocks.
735 	 * For strict correctness, the writes should be synchronous since
736 	 * the old block values may have been written to disk. In practise
737 	 * they are almost never written, but if we are concerned about
738 	 * strict correctness, the `doasyncfree' flag should be set to zero.
739 	 *
740 	 * The test on `doasyncfree' should be changed to test a flag
741 	 * that shows whether the associated buffers and inodes have
742 	 * been written. The flag should be set when the cluster is
743 	 * started and cleared whenever the buffer or inode is flushed.
744 	 * We can then check below to see if it is set, and do the
745 	 * synchronous write only when it has been cleared.
746 	 */
747 	if (sbap != &ip->i_din2->di_db[0]) {
748 		if (doasyncfree)
749 			bdwrite(sbp);
750 		else
751 			bwrite(sbp);
752 	} else {
753 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
754 		if (!doasyncfree)
755 			UFS_UPDATE(vp, 1);
756 	}
757 	if (ssize < len) {
758 		if (doasyncfree)
759 			bdwrite(ebp);
760 		else
761 			bwrite(ebp);
762 	}
763 	/*
764 	 * Last, free the old blocks and assign the new blocks to the buffers.
765 	 */
766 #ifdef DEBUG
767 	if (prtrealloc)
768 		printf("\n\tnew:");
769 #endif
770 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
771 		if (!DOINGSOFTDEP(vp))
772 			ffs_blkfree(fs, ip->i_devvp,
773 			    dbtofsb(fs, buflist->bs_children[i]->b_blkno),
774 			    fs->fs_bsize, ip->i_number);
775 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
776 #ifdef DIAGNOSTIC
777 		if (!ffs_checkblk(ip,
778 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
779 			panic("ffs_reallocblks: unallocated block 3");
780 #endif
781 #ifdef DEBUG
782 		if (prtrealloc)
783 			printf(" %jd,", (intmax_t)blkno);
784 #endif
785 	}
786 #ifdef DEBUG
787 	if (prtrealloc) {
788 		prtrealloc--;
789 		printf("\n");
790 	}
791 #endif
792 	return (0);
793 
794 fail:
795 	if (ssize < len)
796 		brelse(ebp);
797 	if (sbap != &ip->i_din2->di_db[0])
798 		brelse(sbp);
799 	return (ENOSPC);
800 }
801 
802 /*
803  * Allocate an inode in the filesystem.
804  *
805  * If allocating a directory, use ffs_dirpref to select the inode.
806  * If allocating in a directory, the following hierarchy is followed:
807  *   1) allocate the preferred inode.
808  *   2) allocate an inode in the same cylinder group.
809  *   3) quadradically rehash into other cylinder groups, until an
810  *      available inode is located.
811  * If no inode preference is given the following heirarchy is used
812  * to allocate an inode:
813  *   1) allocate an inode in cylinder group 0.
814  *   2) quadradically rehash into other cylinder groups, until an
815  *      available inode is located.
816  */
817 int
818 ffs_valloc(pvp, mode, cred, vpp)
819 	struct vnode *pvp;
820 	int mode;
821 	struct ucred *cred;
822 	struct vnode **vpp;
823 {
824 	struct inode *pip;
825 	struct fs *fs;
826 	struct inode *ip;
827 	struct timespec ts;
828 	ino_t ino, ipref;
829 	int cg, error;
830 
831 	*vpp = NULL;
832 	pip = VTOI(pvp);
833 	fs = pip->i_fs;
834 	if (fs->fs_cstotal.cs_nifree == 0)
835 		goto noinodes;
836 
837 	if ((mode & IFMT) == IFDIR)
838 		ipref = ffs_dirpref(pip);
839 	else
840 		ipref = pip->i_number;
841 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
842 		ipref = 0;
843 	cg = ino_to_cg(fs, ipref);
844 	/*
845 	 * Track number of dirs created one after another
846 	 * in a same cg without intervening by files.
847 	 */
848 	if ((mode & IFMT) == IFDIR) {
849 		if (fs->fs_contigdirs[cg] < 255)
850 			fs->fs_contigdirs[cg]++;
851 	} else {
852 		if (fs->fs_contigdirs[cg] > 0)
853 			fs->fs_contigdirs[cg]--;
854 	}
855 	ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode,
856 					(allocfcn_t *)ffs_nodealloccg);
857 	if (ino == 0)
858 		goto noinodes;
859 	error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
860 	if (error) {
861 		UFS_VFREE(pvp, ino, mode);
862 		return (error);
863 	}
864 	ip = VTOI(*vpp);
865 	if (ip->i_mode) {
866 		printf("mode = 0%o, inum = %lu, fs = %s\n",
867 		    ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt);
868 		panic("ffs_valloc: dup alloc");
869 	}
870 	if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) {  /* XXX */
871 		printf("free inode %s/%lu had %ld blocks\n",
872 		    fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks));
873 		DIP(ip, i_blocks) = 0;
874 	}
875 	ip->i_flags = 0;
876 	DIP(ip, i_flags) = 0;
877 	/*
878 	 * Set up a new generation number for this inode.
879 	 */
880 	if (ip->i_gen == 0 || ++ip->i_gen == 0)
881 		ip->i_gen = arc4random() / 2 + 1;
882 	DIP(ip, i_gen) = ip->i_gen;
883 	if (fs->fs_magic == FS_UFS2_MAGIC) {
884 		vfs_timestamp(&ts);
885 		ip->i_din2->di_birthtime = ts.tv_sec;
886 		ip->i_din2->di_birthnsec = ts.tv_nsec;
887 	}
888 	return (0);
889 noinodes:
890 	ffs_fserr(fs, pip->i_number, "out of inodes");
891 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
892 	return (ENOSPC);
893 }
894 
895 /*
896  * Find a cylinder group to place a directory.
897  *
898  * The policy implemented by this algorithm is to allocate a
899  * directory inode in the same cylinder group as its parent
900  * directory, but also to reserve space for its files inodes
901  * and data. Restrict the number of directories which may be
902  * allocated one after another in the same cylinder group
903  * without intervening allocation of files.
904  *
905  * If we allocate a first level directory then force allocation
906  * in another cylinder group.
907  */
908 static ino_t
909 ffs_dirpref(pip)
910 	struct inode *pip;
911 {
912 	struct fs *fs;
913 	int cg, prefcg, dirsize, cgsize;
914 	int avgifree, avgbfree, avgndir, curdirsize;
915 	int minifree, minbfree, maxndir;
916 	int mincg, minndir;
917 	int maxcontigdirs;
918 
919 	fs = pip->i_fs;
920 
921 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
922 	avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
923 	avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
924 
925 	/*
926 	 * Force allocation in another cg if creating a first level dir.
927 	 */
928 	ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref");
929 	if (ITOV(pip)->v_vflag & VV_ROOT) {
930 		prefcg = arc4random() % fs->fs_ncg;
931 		mincg = prefcg;
932 		minndir = fs->fs_ipg;
933 		for (cg = prefcg; cg < fs->fs_ncg; cg++)
934 			if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
935 			    fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
936 			    fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
937 				mincg = cg;
938 				minndir = fs->fs_cs(fs, cg).cs_ndir;
939 			}
940 		for (cg = 0; cg < prefcg; cg++)
941 			if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
942 			    fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
943 			    fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
944 				mincg = cg;
945 				minndir = fs->fs_cs(fs, cg).cs_ndir;
946 			}
947 		return ((ino_t)(fs->fs_ipg * mincg));
948 	}
949 
950 	/*
951 	 * Count various limits which used for
952 	 * optimal allocation of a directory inode.
953 	 */
954 	maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
955 	minifree = avgifree - fs->fs_ipg / 4;
956 	if (minifree < 0)
957 		minifree = 0;
958 	minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4;
959 	if (minbfree < 0)
960 		minbfree = 0;
961 	cgsize = fs->fs_fsize * fs->fs_fpg;
962 	dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir;
963 	curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0;
964 	if (dirsize < curdirsize)
965 		dirsize = curdirsize;
966 	maxcontigdirs = min(cgsize / dirsize, 255);
967 	if (fs->fs_avgfpdir > 0)
968 		maxcontigdirs = min(maxcontigdirs,
969 				    fs->fs_ipg / fs->fs_avgfpdir);
970 	if (maxcontigdirs == 0)
971 		maxcontigdirs = 1;
972 
973 	/*
974 	 * Limit number of dirs in one cg and reserve space for
975 	 * regular files, but only if we have no deficit in
976 	 * inodes or space.
977 	 */
978 	prefcg = ino_to_cg(fs, pip->i_number);
979 	for (cg = prefcg; cg < fs->fs_ncg; cg++)
980 		if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
981 		    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
982 	    	    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
983 			if (fs->fs_contigdirs[cg] < maxcontigdirs)
984 				return ((ino_t)(fs->fs_ipg * cg));
985 		}
986 	for (cg = 0; cg < prefcg; cg++)
987 		if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
988 		    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
989 	    	    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
990 			if (fs->fs_contigdirs[cg] < maxcontigdirs)
991 				return ((ino_t)(fs->fs_ipg * cg));
992 		}
993 	/*
994 	 * This is a backstop when we have deficit in space.
995 	 */
996 	for (cg = prefcg; cg < fs->fs_ncg; cg++)
997 		if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
998 			return ((ino_t)(fs->fs_ipg * cg));
999 	for (cg = 0; cg < prefcg; cg++)
1000 		if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
1001 			break;
1002 	return ((ino_t)(fs->fs_ipg * cg));
1003 }
1004 
1005 /*
1006  * Select the desired position for the next block in a file.  The file is
1007  * logically divided into sections. The first section is composed of the
1008  * direct blocks. Each additional section contains fs_maxbpg blocks.
1009  *
1010  * If no blocks have been allocated in the first section, the policy is to
1011  * request a block in the same cylinder group as the inode that describes
1012  * the file. If no blocks have been allocated in any other section, the
1013  * policy is to place the section in a cylinder group with a greater than
1014  * average number of free blocks.  An appropriate cylinder group is found
1015  * by using a rotor that sweeps the cylinder groups. When a new group of
1016  * blocks is needed, the sweep begins in the cylinder group following the
1017  * cylinder group from which the previous allocation was made. The sweep
1018  * continues until a cylinder group with greater than the average number
1019  * of free blocks is found. If the allocation is for the first block in an
1020  * indirect block, the information on the previous allocation is unavailable;
1021  * here a best guess is made based upon the logical block number being
1022  * allocated.
1023  *
1024  * If a section is already partially allocated, the policy is to
1025  * contiguously allocate fs_maxcontig blocks. The end of one of these
1026  * contiguous blocks and the beginning of the next is laid out
1027  * contiguously if possible.
1028  */
1029 ufs2_daddr_t
1030 ffs_blkpref_ufs1(ip, lbn, indx, bap)
1031 	struct inode *ip;
1032 	ufs_lbn_t lbn;
1033 	int indx;
1034 	ufs1_daddr_t *bap;
1035 {
1036 	struct fs *fs;
1037 	int cg;
1038 	int avgbfree, startcg;
1039 
1040 	fs = ip->i_fs;
1041 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
1042 		if (lbn < NDADDR + NINDIR(fs)) {
1043 			cg = ino_to_cg(fs, ip->i_number);
1044 			return (fs->fs_fpg * cg + fs->fs_frag);
1045 		}
1046 		/*
1047 		 * Find a cylinder with greater than average number of
1048 		 * unused data blocks.
1049 		 */
1050 		if (indx == 0 || bap[indx - 1] == 0)
1051 			startcg =
1052 			    ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
1053 		else
1054 			startcg = dtog(fs, bap[indx - 1]) + 1;
1055 		startcg %= fs->fs_ncg;
1056 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
1057 		for (cg = startcg; cg < fs->fs_ncg; cg++)
1058 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1059 				fs->fs_cgrotor = cg;
1060 				return (fs->fs_fpg * cg + fs->fs_frag);
1061 			}
1062 		for (cg = 0; cg <= startcg; cg++)
1063 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1064 				fs->fs_cgrotor = cg;
1065 				return (fs->fs_fpg * cg + fs->fs_frag);
1066 			}
1067 		return (0);
1068 	}
1069 	/*
1070 	 * We just always try to lay things out contiguously.
1071 	 */
1072 	return (bap[indx - 1] + fs->fs_frag);
1073 }
1074 
1075 /*
1076  * Same as above, but for UFS2
1077  */
1078 ufs2_daddr_t
1079 ffs_blkpref_ufs2(ip, lbn, indx, bap)
1080 	struct inode *ip;
1081 	ufs_lbn_t lbn;
1082 	int indx;
1083 	ufs2_daddr_t *bap;
1084 {
1085 	struct fs *fs;
1086 	int cg;
1087 	int avgbfree, startcg;
1088 
1089 	fs = ip->i_fs;
1090 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
1091 		if (lbn < NDADDR + NINDIR(fs)) {
1092 			cg = ino_to_cg(fs, ip->i_number);
1093 			return (fs->fs_fpg * cg + fs->fs_frag);
1094 		}
1095 		/*
1096 		 * Find a cylinder with greater than average number of
1097 		 * unused data blocks.
1098 		 */
1099 		if (indx == 0 || bap[indx - 1] == 0)
1100 			startcg =
1101 			    ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
1102 		else
1103 			startcg = dtog(fs, bap[indx - 1]) + 1;
1104 		startcg %= fs->fs_ncg;
1105 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
1106 		for (cg = startcg; cg < fs->fs_ncg; cg++)
1107 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1108 				fs->fs_cgrotor = cg;
1109 				return (fs->fs_fpg * cg + fs->fs_frag);
1110 			}
1111 		for (cg = 0; cg <= startcg; cg++)
1112 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1113 				fs->fs_cgrotor = cg;
1114 				return (fs->fs_fpg * cg + fs->fs_frag);
1115 			}
1116 		return (0);
1117 	}
1118 	/*
1119 	 * We just always try to lay things out contiguously.
1120 	 */
1121 	return (bap[indx - 1] + fs->fs_frag);
1122 }
1123 
1124 /*
1125  * Implement the cylinder overflow algorithm.
1126  *
1127  * The policy implemented by this algorithm is:
1128  *   1) allocate the block in its requested cylinder group.
1129  *   2) quadradically rehash on the cylinder group number.
1130  *   3) brute force search for a free block.
1131  */
1132 /*VARARGS5*/
1133 static ufs2_daddr_t
1134 ffs_hashalloc(ip, cg, pref, size, allocator)
1135 	struct inode *ip;
1136 	int cg;
1137 	ufs2_daddr_t pref;
1138 	int size;	/* size for data blocks, mode for inodes */
1139 	allocfcn_t *allocator;
1140 {
1141 	struct fs *fs;
1142 	ufs2_daddr_t result;
1143 	int i, icg = cg;
1144 
1145 #ifdef DIAGNOSTIC
1146 	if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
1147 		panic("ffs_hashalloc: allocation on suspended filesystem");
1148 #endif
1149 	fs = ip->i_fs;
1150 	/*
1151 	 * 1: preferred cylinder group
1152 	 */
1153 	result = (*allocator)(ip, cg, pref, size);
1154 	if (result)
1155 		return (result);
1156 	/*
1157 	 * 2: quadratic rehash
1158 	 */
1159 	for (i = 1; i < fs->fs_ncg; i *= 2) {
1160 		cg += i;
1161 		if (cg >= fs->fs_ncg)
1162 			cg -= fs->fs_ncg;
1163 		result = (*allocator)(ip, cg, 0, size);
1164 		if (result)
1165 			return (result);
1166 	}
1167 	/*
1168 	 * 3: brute force search
1169 	 * Note that we start at i == 2, since 0 was checked initially,
1170 	 * and 1 is always checked in the quadratic rehash.
1171 	 */
1172 	cg = (icg + 2) % fs->fs_ncg;
1173 	for (i = 2; i < fs->fs_ncg; i++) {
1174 		result = (*allocator)(ip, cg, 0, size);
1175 		if (result)
1176 			return (result);
1177 		cg++;
1178 		if (cg == fs->fs_ncg)
1179 			cg = 0;
1180 	}
1181 	return (0);
1182 }
1183 
1184 /*
1185  * Determine whether a fragment can be extended.
1186  *
1187  * Check to see if the necessary fragments are available, and
1188  * if they are, allocate them.
1189  */
1190 static ufs2_daddr_t
1191 ffs_fragextend(ip, cg, bprev, osize, nsize)
1192 	struct inode *ip;
1193 	int cg;
1194 	ufs2_daddr_t bprev;
1195 	int osize, nsize;
1196 {
1197 	struct fs *fs;
1198 	struct cg *cgp;
1199 	struct buf *bp;
1200 	long bno;
1201 	int frags, bbase;
1202 	int i, error;
1203 	u_int8_t *blksfree;
1204 
1205 	fs = ip->i_fs;
1206 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
1207 		return (0);
1208 	frags = numfrags(fs, nsize);
1209 	bbase = fragnum(fs, bprev);
1210 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
1211 		/* cannot extend across a block boundary */
1212 		return (0);
1213 	}
1214 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1215 		(int)fs->fs_cgsize, NOCRED, &bp);
1216 	if (error) {
1217 		brelse(bp);
1218 		return (0);
1219 	}
1220 	cgp = (struct cg *)bp->b_data;
1221 	if (!cg_chkmagic(cgp)) {
1222 		brelse(bp);
1223 		return (0);
1224 	}
1225 	bp->b_xflags |= BX_BKGRDWRITE;
1226 	cgp->cg_old_time = cgp->cg_time = time_second;
1227 	bno = dtogd(fs, bprev);
1228 	blksfree = cg_blksfree(cgp);
1229 	for (i = numfrags(fs, osize); i < frags; i++)
1230 		if (isclr(blksfree, bno + i)) {
1231 			brelse(bp);
1232 			return (0);
1233 		}
1234 	/*
1235 	 * the current fragment can be extended
1236 	 * deduct the count on fragment being extended into
1237 	 * increase the count on the remaining fragment (if any)
1238 	 * allocate the extended piece
1239 	 */
1240 	for (i = frags; i < fs->fs_frag - bbase; i++)
1241 		if (isclr(blksfree, bno + i))
1242 			break;
1243 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
1244 	if (i != frags)
1245 		cgp->cg_frsum[i - frags]++;
1246 	for (i = numfrags(fs, osize); i < frags; i++) {
1247 		clrbit(blksfree, bno + i);
1248 		cgp->cg_cs.cs_nffree--;
1249 		fs->fs_cstotal.cs_nffree--;
1250 		fs->fs_cs(fs, cg).cs_nffree--;
1251 	}
1252 	fs->fs_fmod = 1;
1253 	if (DOINGSOFTDEP(ITOV(ip)))
1254 		softdep_setup_blkmapdep(bp, fs, bprev);
1255 	if (fs->fs_active != 0)
1256 		atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg));
1257 	bdwrite(bp);
1258 	return (bprev);
1259 }
1260 
1261 /*
1262  * Determine whether a block can be allocated.
1263  *
1264  * Check to see if a block of the appropriate size is available,
1265  * and if it is, allocate it.
1266  */
1267 static ufs2_daddr_t
1268 ffs_alloccg(ip, cg, bpref, size)
1269 	struct inode *ip;
1270 	int cg;
1271 	ufs2_daddr_t bpref;
1272 	int size;
1273 {
1274 	struct fs *fs;
1275 	struct cg *cgp;
1276 	struct buf *bp;
1277 	ufs1_daddr_t bno;
1278 	ufs2_daddr_t blkno;
1279 	int i, allocsiz, error, frags;
1280 	u_int8_t *blksfree;
1281 
1282 	fs = ip->i_fs;
1283 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
1284 		return (0);
1285 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1286 		(int)fs->fs_cgsize, NOCRED, &bp);
1287 	if (error) {
1288 		brelse(bp);
1289 		return (0);
1290 	}
1291 	cgp = (struct cg *)bp->b_data;
1292 	if (!cg_chkmagic(cgp) ||
1293 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
1294 		brelse(bp);
1295 		return (0);
1296 	}
1297 	bp->b_xflags |= BX_BKGRDWRITE;
1298 	cgp->cg_old_time = cgp->cg_time = time_second;
1299 	if (size == fs->fs_bsize) {
1300 		blkno = ffs_alloccgblk(ip, bp, bpref);
1301 		if (fs->fs_active != 0)
1302 			atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg));
1303 		bdwrite(bp);
1304 		return (blkno);
1305 	}
1306 	/*
1307 	 * check to see if any fragments are already available
1308 	 * allocsiz is the size which will be allocated, hacking
1309 	 * it down to a smaller size if necessary
1310 	 */
1311 	blksfree = cg_blksfree(cgp);
1312 	frags = numfrags(fs, size);
1313 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
1314 		if (cgp->cg_frsum[allocsiz] != 0)
1315 			break;
1316 	if (allocsiz == fs->fs_frag) {
1317 		/*
1318 		 * no fragments were available, so a block will be
1319 		 * allocated, and hacked up
1320 		 */
1321 		if (cgp->cg_cs.cs_nbfree == 0) {
1322 			brelse(bp);
1323 			return (0);
1324 		}
1325 		blkno = ffs_alloccgblk(ip, bp, bpref);
1326 		bno = dtogd(fs, blkno);
1327 		for (i = frags; i < fs->fs_frag; i++)
1328 			setbit(blksfree, bno + i);
1329 		i = fs->fs_frag - frags;
1330 		cgp->cg_cs.cs_nffree += i;
1331 		fs->fs_cstotal.cs_nffree += i;
1332 		fs->fs_cs(fs, cg).cs_nffree += i;
1333 		fs->fs_fmod = 1;
1334 		cgp->cg_frsum[i]++;
1335 		if (fs->fs_active != 0)
1336 			atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg));
1337 		bdwrite(bp);
1338 		return (blkno);
1339 	}
1340 	bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
1341 	if (bno < 0) {
1342 		brelse(bp);
1343 		return (0);
1344 	}
1345 	for (i = 0; i < frags; i++)
1346 		clrbit(blksfree, bno + i);
1347 	cgp->cg_cs.cs_nffree -= frags;
1348 	fs->fs_cstotal.cs_nffree -= frags;
1349 	fs->fs_cs(fs, cg).cs_nffree -= frags;
1350 	fs->fs_fmod = 1;
1351 	cgp->cg_frsum[allocsiz]--;
1352 	if (frags != allocsiz)
1353 		cgp->cg_frsum[allocsiz - frags]++;
1354 	blkno = cg * fs->fs_fpg + bno;
1355 	if (DOINGSOFTDEP(ITOV(ip)))
1356 		softdep_setup_blkmapdep(bp, fs, blkno);
1357 	if (fs->fs_active != 0)
1358 		atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg));
1359 	bdwrite(bp);
1360 	return (blkno);
1361 }
1362 
1363 /*
1364  * Allocate a block in a cylinder group.
1365  *
1366  * This algorithm implements the following policy:
1367  *   1) allocate the requested block.
1368  *   2) allocate a rotationally optimal block in the same cylinder.
1369  *   3) allocate the next available block on the block rotor for the
1370  *      specified cylinder group.
1371  * Note that this routine only allocates fs_bsize blocks; these
1372  * blocks may be fragmented by the routine that allocates them.
1373  */
1374 static ufs2_daddr_t
1375 ffs_alloccgblk(ip, bp, bpref)
1376 	struct inode *ip;
1377 	struct buf *bp;
1378 	ufs2_daddr_t bpref;
1379 {
1380 	struct fs *fs;
1381 	struct cg *cgp;
1382 	ufs1_daddr_t bno;
1383 	ufs2_daddr_t blkno;
1384 	u_int8_t *blksfree;
1385 
1386 	fs = ip->i_fs;
1387 	cgp = (struct cg *)bp->b_data;
1388 	blksfree = cg_blksfree(cgp);
1389 	if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
1390 		bpref = cgp->cg_rotor;
1391 	} else {
1392 		bpref = blknum(fs, bpref);
1393 		bno = dtogd(fs, bpref);
1394 		/*
1395 		 * if the requested block is available, use it
1396 		 */
1397 		if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
1398 			goto gotit;
1399 	}
1400 	/*
1401 	 * Take the next available block in this cylinder group.
1402 	 */
1403 	bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1404 	if (bno < 0)
1405 		return (0);
1406 	cgp->cg_rotor = bno;
1407 gotit:
1408 	blkno = fragstoblks(fs, bno);
1409 	ffs_clrblock(fs, blksfree, (long)blkno);
1410 	ffs_clusteracct(fs, cgp, blkno, -1);
1411 	cgp->cg_cs.cs_nbfree--;
1412 	fs->fs_cstotal.cs_nbfree--;
1413 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
1414 	fs->fs_fmod = 1;
1415 	blkno = cgp->cg_cgx * fs->fs_fpg + bno;
1416 	if (DOINGSOFTDEP(ITOV(ip)))
1417 		softdep_setup_blkmapdep(bp, fs, blkno);
1418 	return (blkno);
1419 }
1420 
1421 /*
1422  * Determine whether a cluster can be allocated.
1423  *
1424  * We do not currently check for optimal rotational layout if there
1425  * are multiple choices in the same cylinder group. Instead we just
1426  * take the first one that we find following bpref.
1427  */
1428 static ufs2_daddr_t
1429 ffs_clusteralloc(ip, cg, bpref, len)
1430 	struct inode *ip;
1431 	int cg;
1432 	ufs2_daddr_t bpref;
1433 	int len;
1434 {
1435 	struct fs *fs;
1436 	struct cg *cgp;
1437 	struct buf *bp;
1438 	int i, run, bit, map, got;
1439 	ufs2_daddr_t bno;
1440 	u_char *mapp;
1441 	int32_t *lp;
1442 	u_int8_t *blksfree;
1443 
1444 	fs = ip->i_fs;
1445 	if (fs->fs_maxcluster[cg] < len)
1446 		return (0);
1447 	if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1448 	    NOCRED, &bp))
1449 		goto fail;
1450 	cgp = (struct cg *)bp->b_data;
1451 	if (!cg_chkmagic(cgp))
1452 		goto fail;
1453 	bp->b_xflags |= BX_BKGRDWRITE;
1454 	/*
1455 	 * Check to see if a cluster of the needed size (or bigger) is
1456 	 * available in this cylinder group.
1457 	 */
1458 	lp = &cg_clustersum(cgp)[len];
1459 	for (i = len; i <= fs->fs_contigsumsize; i++)
1460 		if (*lp++ > 0)
1461 			break;
1462 	if (i > fs->fs_contigsumsize) {
1463 		/*
1464 		 * This is the first time looking for a cluster in this
1465 		 * cylinder group. Update the cluster summary information
1466 		 * to reflect the true maximum sized cluster so that
1467 		 * future cluster allocation requests can avoid reading
1468 		 * the cylinder group map only to find no clusters.
1469 		 */
1470 		lp = &cg_clustersum(cgp)[len - 1];
1471 		for (i = len - 1; i > 0; i--)
1472 			if (*lp-- > 0)
1473 				break;
1474 		fs->fs_maxcluster[cg] = i;
1475 		goto fail;
1476 	}
1477 	/*
1478 	 * Search the cluster map to find a big enough cluster.
1479 	 * We take the first one that we find, even if it is larger
1480 	 * than we need as we prefer to get one close to the previous
1481 	 * block allocation. We do not search before the current
1482 	 * preference point as we do not want to allocate a block
1483 	 * that is allocated before the previous one (as we will
1484 	 * then have to wait for another pass of the elevator
1485 	 * algorithm before it will be read). We prefer to fail and
1486 	 * be recalled to try an allocation in the next cylinder group.
1487 	 */
1488 	if (dtog(fs, bpref) != cg)
1489 		bpref = 0;
1490 	else
1491 		bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1492 	mapp = &cg_clustersfree(cgp)[bpref / NBBY];
1493 	map = *mapp++;
1494 	bit = 1 << (bpref % NBBY);
1495 	for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
1496 		if ((map & bit) == 0) {
1497 			run = 0;
1498 		} else {
1499 			run++;
1500 			if (run == len)
1501 				break;
1502 		}
1503 		if ((got & (NBBY - 1)) != (NBBY - 1)) {
1504 			bit <<= 1;
1505 		} else {
1506 			map = *mapp++;
1507 			bit = 1;
1508 		}
1509 	}
1510 	if (got >= cgp->cg_nclusterblks)
1511 		goto fail;
1512 	/*
1513 	 * Allocate the cluster that we have found.
1514 	 */
1515 	blksfree = cg_blksfree(cgp);
1516 	for (i = 1; i <= len; i++)
1517 		if (!ffs_isblock(fs, blksfree, got - run + i))
1518 			panic("ffs_clusteralloc: map mismatch");
1519 	bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1520 	if (dtog(fs, bno) != cg)
1521 		panic("ffs_clusteralloc: allocated out of group");
1522 	len = blkstofrags(fs, len);
1523 	for (i = 0; i < len; i += fs->fs_frag)
1524 		if (ffs_alloccgblk(ip, bp, bno + i) != bno + i)
1525 			panic("ffs_clusteralloc: lost block");
1526 	if (fs->fs_active != 0)
1527 		atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg));
1528 	bdwrite(bp);
1529 	return (bno);
1530 
1531 fail:
1532 	brelse(bp);
1533 	return (0);
1534 }
1535 
1536 /*
1537  * Determine whether an inode can be allocated.
1538  *
1539  * Check to see if an inode is available, and if it is,
1540  * allocate it using the following policy:
1541  *   1) allocate the requested inode.
1542  *   2) allocate the next available inode after the requested
1543  *      inode in the specified cylinder group.
1544  */
1545 static ufs2_daddr_t
1546 ffs_nodealloccg(ip, cg, ipref, mode)
1547 	struct inode *ip;
1548 	int cg;
1549 	ufs2_daddr_t ipref;
1550 	int mode;
1551 {
1552 	struct fs *fs;
1553 	struct cg *cgp;
1554 	struct buf *bp, *ibp;
1555 	u_int8_t *inosused;
1556 	struct ufs2_dinode *dp2;
1557 	int error, start, len, loc, map, i;
1558 
1559 	fs = ip->i_fs;
1560 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
1561 		return (0);
1562 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1563 		(int)fs->fs_cgsize, NOCRED, &bp);
1564 	if (error) {
1565 		brelse(bp);
1566 		return (0);
1567 	}
1568 	cgp = (struct cg *)bp->b_data;
1569 	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
1570 		brelse(bp);
1571 		return (0);
1572 	}
1573 	bp->b_xflags |= BX_BKGRDWRITE;
1574 	cgp->cg_old_time = cgp->cg_time = time_second;
1575 	inosused = cg_inosused(cgp);
1576 	if (ipref) {
1577 		ipref %= fs->fs_ipg;
1578 		if (isclr(inosused, ipref))
1579 			goto gotit;
1580 	}
1581 	start = cgp->cg_irotor / NBBY;
1582 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
1583 	loc = skpc(0xff, len, &inosused[start]);
1584 	if (loc == 0) {
1585 		len = start + 1;
1586 		start = 0;
1587 		loc = skpc(0xff, len, &inosused[0]);
1588 		if (loc == 0) {
1589 			printf("cg = %d, irotor = %ld, fs = %s\n",
1590 			    cg, (long)cgp->cg_irotor, fs->fs_fsmnt);
1591 			panic("ffs_nodealloccg: map corrupted");
1592 			/* NOTREACHED */
1593 		}
1594 	}
1595 	i = start + len - loc;
1596 	map = inosused[i];
1597 	ipref = i * NBBY;
1598 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1599 		if ((map & i) == 0) {
1600 			cgp->cg_irotor = ipref;
1601 			goto gotit;
1602 		}
1603 	}
1604 	printf("fs = %s\n", fs->fs_fsmnt);
1605 	panic("ffs_nodealloccg: block not in map");
1606 	/* NOTREACHED */
1607 gotit:
1608 	if (DOINGSOFTDEP(ITOV(ip)))
1609 		softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
1610 	setbit(inosused, ipref);
1611 	cgp->cg_cs.cs_nifree--;
1612 	fs->fs_cstotal.cs_nifree--;
1613 	fs->fs_cs(fs, cg).cs_nifree--;
1614 	fs->fs_fmod = 1;
1615 	if ((mode & IFMT) == IFDIR) {
1616 		cgp->cg_cs.cs_ndir++;
1617 		fs->fs_cstotal.cs_ndir++;
1618 		fs->fs_cs(fs, cg).cs_ndir++;
1619 	}
1620 	/*
1621 	 * Check to see if we need to initialize more inodes.
1622 	 */
1623 	if (fs->fs_magic == FS_UFS2_MAGIC &&
1624 	    ipref + INOPB(fs) > cgp->cg_initediblk &&
1625 	    cgp->cg_initediblk < cgp->cg_niblk) {
1626 		ibp = getblk(ip->i_devvp, fsbtodb(fs,
1627 		    ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)),
1628 		    (int)fs->fs_bsize, 0, 0, 0);
1629 		bzero(ibp->b_data, (int)fs->fs_bsize);
1630 		dp2 = (struct ufs2_dinode *)(ibp->b_data);
1631 		for (i = 0; i < INOPB(fs); i++) {
1632 			dp2->di_gen = arc4random() / 2 + 1;
1633 			dp2++;
1634 		}
1635 		bawrite(ibp);
1636 		cgp->cg_initediblk += INOPB(fs);
1637 	}
1638 	if (fs->fs_active != 0)
1639 		atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg));
1640 	bdwrite(bp);
1641 	return (cg * fs->fs_ipg + ipref);
1642 }
1643 
1644 /*
1645  * check if a block is free
1646  */
1647 static int
1648 ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h)
1649 {
1650 
1651 	switch ((int)fs->fs_frag) {
1652 	case 8:
1653 		return (cp[h] == 0);
1654 	case 4:
1655 		return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
1656 	case 2:
1657 		return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
1658 	case 1:
1659 		return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
1660 	default:
1661 		panic("ffs_isfreeblock");
1662 	}
1663 	return (0);
1664 }
1665 
1666 /*
1667  * Free a block or fragment.
1668  *
1669  * The specified block or fragment is placed back in the
1670  * free map. If a fragment is deallocated, a possible
1671  * block reassembly is checked.
1672  */
1673 void
1674 ffs_blkfree(fs, devvp, bno, size, inum)
1675 	struct fs *fs;
1676 	struct vnode *devvp;
1677 	ufs2_daddr_t bno;
1678 	long size;
1679 	ino_t inum;
1680 {
1681 	struct cg *cgp;
1682 	struct buf *bp;
1683 	ufs1_daddr_t fragno, cgbno;
1684 	ufs2_daddr_t cgblkno;
1685 	int i, error, cg, blk, frags, bbase;
1686 	u_int8_t *blksfree;
1687 	dev_t dev;
1688 
1689 	cg = dtog(fs, bno);
1690 	if (devvp->v_type != VCHR) {
1691 		/* devvp is a snapshot */
1692 		dev = VTOI(devvp)->i_devvp->v_rdev;
1693 		cgblkno = fragstoblks(fs, cgtod(fs, cg));
1694 	} else {
1695 		/* devvp is a normal disk device */
1696 		dev = devvp->v_rdev;
1697 		cgblkno = fsbtodb(fs, cgtod(fs, cg));
1698 		ASSERT_VOP_LOCKED(devvp, "ffs_blkfree");
1699 		if ((devvp->v_vflag & VV_COPYONWRITE) &&
1700 		    ffs_snapblkfree(fs, devvp, bno, size, inum))
1701 			return;
1702 		VOP_FREEBLKS(devvp, fsbtodb(fs, bno), size);
1703 	}
1704 #ifdef DIAGNOSTIC
1705 	if (dev->si_mountpoint &&
1706 	    (dev->si_mountpoint->mnt_kern_flag & MNTK_SUSPENDED))
1707 		panic("ffs_blkfree: deallocation on suspended filesystem");
1708 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
1709 	    fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
1710 		printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n",
1711 		    devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize,
1712 		    size, fs->fs_fsmnt);
1713 		panic("ffs_blkfree: bad size");
1714 	}
1715 #endif
1716 	if ((u_int)bno >= fs->fs_size) {
1717 		printf("bad block %jd, ino %lu\n", (intmax_t)bno,
1718 		    (u_long)inum);
1719 		ffs_fserr(fs, inum, "bad block");
1720 		return;
1721 	}
1722 	if ((error = bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp))) {
1723 		brelse(bp);
1724 		return;
1725 	}
1726 	cgp = (struct cg *)bp->b_data;
1727 	if (!cg_chkmagic(cgp)) {
1728 		brelse(bp);
1729 		return;
1730 	}
1731 	bp->b_xflags |= BX_BKGRDWRITE;
1732 	cgp->cg_old_time = cgp->cg_time = time_second;
1733 	cgbno = dtogd(fs, bno);
1734 	blksfree = cg_blksfree(cgp);
1735 	if (size == fs->fs_bsize) {
1736 		fragno = fragstoblks(fs, cgbno);
1737 		if (!ffs_isfreeblock(fs, blksfree, fragno)) {
1738 			if (devvp->v_type != VCHR) {
1739 				/* devvp is a snapshot */
1740 				brelse(bp);
1741 				return;
1742 			}
1743 			printf("dev = %s, block = %jd, fs = %s\n",
1744 			    devtoname(dev), (intmax_t)bno, fs->fs_fsmnt);
1745 			panic("ffs_blkfree: freeing free block");
1746 		}
1747 		ffs_setblock(fs, blksfree, fragno);
1748 		ffs_clusteracct(fs, cgp, fragno, 1);
1749 		cgp->cg_cs.cs_nbfree++;
1750 		fs->fs_cstotal.cs_nbfree++;
1751 		fs->fs_cs(fs, cg).cs_nbfree++;
1752 	} else {
1753 		bbase = cgbno - fragnum(fs, cgbno);
1754 		/*
1755 		 * decrement the counts associated with the old frags
1756 		 */
1757 		blk = blkmap(fs, blksfree, bbase);
1758 		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
1759 		/*
1760 		 * deallocate the fragment
1761 		 */
1762 		frags = numfrags(fs, size);
1763 		for (i = 0; i < frags; i++) {
1764 			if (isset(blksfree, cgbno + i)) {
1765 				printf("dev = %s, block = %jd, fs = %s\n",
1766 				    devtoname(dev), (intmax_t)(bno + i),
1767 				    fs->fs_fsmnt);
1768 				panic("ffs_blkfree: freeing free frag");
1769 			}
1770 			setbit(blksfree, cgbno + i);
1771 		}
1772 		cgp->cg_cs.cs_nffree += i;
1773 		fs->fs_cstotal.cs_nffree += i;
1774 		fs->fs_cs(fs, cg).cs_nffree += i;
1775 		/*
1776 		 * add back in counts associated with the new frags
1777 		 */
1778 		blk = blkmap(fs, blksfree, bbase);
1779 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
1780 		/*
1781 		 * if a complete block has been reassembled, account for it
1782 		 */
1783 		fragno = fragstoblks(fs, bbase);
1784 		if (ffs_isblock(fs, blksfree, fragno)) {
1785 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
1786 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1787 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1788 			ffs_clusteracct(fs, cgp, fragno, 1);
1789 			cgp->cg_cs.cs_nbfree++;
1790 			fs->fs_cstotal.cs_nbfree++;
1791 			fs->fs_cs(fs, cg).cs_nbfree++;
1792 		}
1793 	}
1794 	fs->fs_fmod = 1;
1795 	if (fs->fs_active != 0)
1796 		atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg));
1797 	bdwrite(bp);
1798 }
1799 
1800 #ifdef DIAGNOSTIC
1801 /*
1802  * Verify allocation of a block or fragment. Returns true if block or
1803  * fragment is allocated, false if it is free.
1804  */
1805 static int
1806 ffs_checkblk(ip, bno, size)
1807 	struct inode *ip;
1808 	ufs2_daddr_t bno;
1809 	long size;
1810 {
1811 	struct fs *fs;
1812 	struct cg *cgp;
1813 	struct buf *bp;
1814 	ufs1_daddr_t cgbno;
1815 	int i, error, frags, free;
1816 	u_int8_t *blksfree;
1817 
1818 	fs = ip->i_fs;
1819 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1820 		printf("bsize = %ld, size = %ld, fs = %s\n",
1821 		    (long)fs->fs_bsize, size, fs->fs_fsmnt);
1822 		panic("ffs_checkblk: bad size");
1823 	}
1824 	if ((u_int)bno >= fs->fs_size)
1825 		panic("ffs_checkblk: bad block %jd", (intmax_t)bno);
1826 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1827 		(int)fs->fs_cgsize, NOCRED, &bp);
1828 	if (error)
1829 		panic("ffs_checkblk: cg bread failed");
1830 	cgp = (struct cg *)bp->b_data;
1831 	if (!cg_chkmagic(cgp))
1832 		panic("ffs_checkblk: cg magic mismatch");
1833 	bp->b_xflags |= BX_BKGRDWRITE;
1834 	blksfree = cg_blksfree(cgp);
1835 	cgbno = dtogd(fs, bno);
1836 	if (size == fs->fs_bsize) {
1837 		free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno));
1838 	} else {
1839 		frags = numfrags(fs, size);
1840 		for (free = 0, i = 0; i < frags; i++)
1841 			if (isset(blksfree, cgbno + i))
1842 				free++;
1843 		if (free != 0 && free != frags)
1844 			panic("ffs_checkblk: partially free fragment");
1845 	}
1846 	brelse(bp);
1847 	return (!free);
1848 }
1849 #endif /* DIAGNOSTIC */
1850 
1851 /*
1852  * Free an inode.
1853  */
1854 int
1855 ffs_vfree(pvp, ino, mode)
1856 	struct vnode *pvp;
1857 	ino_t ino;
1858 	int mode;
1859 {
1860 	if (DOINGSOFTDEP(pvp)) {
1861 		softdep_freefile(pvp, ino, mode);
1862 		return (0);
1863 	}
1864 	return (ffs_freefile(VTOI(pvp)->i_fs, VTOI(pvp)->i_devvp, ino, mode));
1865 }
1866 
1867 /*
1868  * Do the actual free operation.
1869  * The specified inode is placed back in the free map.
1870  */
1871 int
1872 ffs_freefile(fs, devvp, ino, mode)
1873 	struct fs *fs;
1874 	struct vnode *devvp;
1875 	ino_t ino;
1876 	int mode;
1877 {
1878 	struct cg *cgp;
1879 	struct buf *bp;
1880 	ufs2_daddr_t cgbno;
1881 	int error, cg;
1882 	u_int8_t *inosused;
1883 	dev_t dev;
1884 
1885 	cg = ino_to_cg(fs, ino);
1886 	if (devvp->v_type != VCHR) {
1887 		/* devvp is a snapshot */
1888 		dev = VTOI(devvp)->i_devvp->v_rdev;
1889 		cgbno = fragstoblks(fs, cgtod(fs, cg));
1890 	} else {
1891 		/* devvp is a normal disk device */
1892 		dev = devvp->v_rdev;
1893 		cgbno = fsbtodb(fs, cgtod(fs, cg));
1894 	}
1895 	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1896 		panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s",
1897 		    devtoname(dev), (u_long)ino, fs->fs_fsmnt);
1898 	if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) {
1899 		brelse(bp);
1900 		return (error);
1901 	}
1902 	cgp = (struct cg *)bp->b_data;
1903 	if (!cg_chkmagic(cgp)) {
1904 		brelse(bp);
1905 		return (0);
1906 	}
1907 	bp->b_xflags |= BX_BKGRDWRITE;
1908 	cgp->cg_old_time = cgp->cg_time = time_second;
1909 	inosused = cg_inosused(cgp);
1910 	ino %= fs->fs_ipg;
1911 	if (isclr(inosused, ino)) {
1912 		printf("dev = %s, ino = %lu, fs = %s\n", devtoname(dev),
1913 		    (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt);
1914 		if (fs->fs_ronly == 0)
1915 			panic("ffs_freefile: freeing free inode");
1916 	}
1917 	clrbit(inosused, ino);
1918 	if (ino < cgp->cg_irotor)
1919 		cgp->cg_irotor = ino;
1920 	cgp->cg_cs.cs_nifree++;
1921 	fs->fs_cstotal.cs_nifree++;
1922 	fs->fs_cs(fs, cg).cs_nifree++;
1923 	if ((mode & IFMT) == IFDIR) {
1924 		cgp->cg_cs.cs_ndir--;
1925 		fs->fs_cstotal.cs_ndir--;
1926 		fs->fs_cs(fs, cg).cs_ndir--;
1927 	}
1928 	fs->fs_fmod = 1;
1929 	if (fs->fs_active != 0)
1930 		atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg));
1931 	bdwrite(bp);
1932 	return (0);
1933 }
1934 
1935 /*
1936  * Check to see if a file is free.
1937  */
1938 int
1939 ffs_checkfreefile(fs, devvp, ino)
1940 	struct fs *fs;
1941 	struct vnode *devvp;
1942 	ino_t ino;
1943 {
1944 	struct cg *cgp;
1945 	struct buf *bp;
1946 	ufs2_daddr_t cgbno;
1947 	int error, ret, cg;
1948 	u_int8_t *inosused;
1949 
1950 	cg = ino_to_cg(fs, ino);
1951 	if (devvp->v_type != VCHR) {
1952 		/* devvp is a snapshot */
1953 		cgbno = fragstoblks(fs, cgtod(fs, cg));
1954 	} else {
1955 		/* devvp is a normal disk device */
1956 		cgbno = fsbtodb(fs, cgtod(fs, cg));
1957 	}
1958 	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1959 		return (1);
1960 	if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) {
1961 		brelse(bp);
1962 		return (1);
1963 	}
1964 	cgp = (struct cg *)bp->b_data;
1965 	if (!cg_chkmagic(cgp)) {
1966 		brelse(bp);
1967 		return (1);
1968 	}
1969 	inosused = cg_inosused(cgp);
1970 	ino %= fs->fs_ipg;
1971 	ret = isclr(inosused, ino);
1972 	brelse(bp);
1973 	return (ret);
1974 }
1975 
1976 /*
1977  * Find a block of the specified size in the specified cylinder group.
1978  *
1979  * It is a panic if a request is made to find a block if none are
1980  * available.
1981  */
1982 static ufs1_daddr_t
1983 ffs_mapsearch(fs, cgp, bpref, allocsiz)
1984 	struct fs *fs;
1985 	struct cg *cgp;
1986 	ufs2_daddr_t bpref;
1987 	int allocsiz;
1988 {
1989 	ufs1_daddr_t bno;
1990 	int start, len, loc, i;
1991 	int blk, field, subfield, pos;
1992 	u_int8_t *blksfree;
1993 
1994 	/*
1995 	 * find the fragment by searching through the free block
1996 	 * map for an appropriate bit pattern
1997 	 */
1998 	if (bpref)
1999 		start = dtogd(fs, bpref) / NBBY;
2000 	else
2001 		start = cgp->cg_frotor / NBBY;
2002 	blksfree = cg_blksfree(cgp);
2003 	len = howmany(fs->fs_fpg, NBBY) - start;
2004 	loc = scanc((u_int)len, (u_char *)&blksfree[start],
2005 		(u_char *)fragtbl[fs->fs_frag],
2006 		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
2007 	if (loc == 0) {
2008 		len = start + 1;
2009 		start = 0;
2010 		loc = scanc((u_int)len, (u_char *)&blksfree[0],
2011 			(u_char *)fragtbl[fs->fs_frag],
2012 			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
2013 		if (loc == 0) {
2014 			printf("start = %d, len = %d, fs = %s\n",
2015 			    start, len, fs->fs_fsmnt);
2016 			panic("ffs_alloccg: map corrupted");
2017 			/* NOTREACHED */
2018 		}
2019 	}
2020 	bno = (start + len - loc) * NBBY;
2021 	cgp->cg_frotor = bno;
2022 	/*
2023 	 * found the byte in the map
2024 	 * sift through the bits to find the selected frag
2025 	 */
2026 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
2027 		blk = blkmap(fs, blksfree, bno);
2028 		blk <<= 1;
2029 		field = around[allocsiz];
2030 		subfield = inside[allocsiz];
2031 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
2032 			if ((blk & field) == subfield)
2033 				return (bno + pos);
2034 			field <<= 1;
2035 			subfield <<= 1;
2036 		}
2037 	}
2038 	printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt);
2039 	panic("ffs_alloccg: block not in map");
2040 	return (-1);
2041 }
2042 
2043 /*
2044  * Update the cluster map because of an allocation or free.
2045  *
2046  * Cnt == 1 means free; cnt == -1 means allocating.
2047  */
2048 void
2049 ffs_clusteracct(fs, cgp, blkno, cnt)
2050 	struct fs *fs;
2051 	struct cg *cgp;
2052 	ufs1_daddr_t blkno;
2053 	int cnt;
2054 {
2055 	int32_t *sump;
2056 	int32_t *lp;
2057 	u_char *freemapp, *mapp;
2058 	int i, start, end, forw, back, map, bit;
2059 
2060 	if (fs->fs_contigsumsize <= 0)
2061 		return;
2062 	freemapp = cg_clustersfree(cgp);
2063 	sump = cg_clustersum(cgp);
2064 	/*
2065 	 * Allocate or clear the actual block.
2066 	 */
2067 	if (cnt > 0)
2068 		setbit(freemapp, blkno);
2069 	else
2070 		clrbit(freemapp, blkno);
2071 	/*
2072 	 * Find the size of the cluster going forward.
2073 	 */
2074 	start = blkno + 1;
2075 	end = start + fs->fs_contigsumsize;
2076 	if (end >= cgp->cg_nclusterblks)
2077 		end = cgp->cg_nclusterblks;
2078 	mapp = &freemapp[start / NBBY];
2079 	map = *mapp++;
2080 	bit = 1 << (start % NBBY);
2081 	for (i = start; i < end; i++) {
2082 		if ((map & bit) == 0)
2083 			break;
2084 		if ((i & (NBBY - 1)) != (NBBY - 1)) {
2085 			bit <<= 1;
2086 		} else {
2087 			map = *mapp++;
2088 			bit = 1;
2089 		}
2090 	}
2091 	forw = i - start;
2092 	/*
2093 	 * Find the size of the cluster going backward.
2094 	 */
2095 	start = blkno - 1;
2096 	end = start - fs->fs_contigsumsize;
2097 	if (end < 0)
2098 		end = -1;
2099 	mapp = &freemapp[start / NBBY];
2100 	map = *mapp--;
2101 	bit = 1 << (start % NBBY);
2102 	for (i = start; i > end; i--) {
2103 		if ((map & bit) == 0)
2104 			break;
2105 		if ((i & (NBBY - 1)) != 0) {
2106 			bit >>= 1;
2107 		} else {
2108 			map = *mapp--;
2109 			bit = 1 << (NBBY - 1);
2110 		}
2111 	}
2112 	back = start - i;
2113 	/*
2114 	 * Account for old cluster and the possibly new forward and
2115 	 * back clusters.
2116 	 */
2117 	i = back + forw + 1;
2118 	if (i > fs->fs_contigsumsize)
2119 		i = fs->fs_contigsumsize;
2120 	sump[i] += cnt;
2121 	if (back > 0)
2122 		sump[back] -= cnt;
2123 	if (forw > 0)
2124 		sump[forw] -= cnt;
2125 	/*
2126 	 * Update cluster summary information.
2127 	 */
2128 	lp = &sump[fs->fs_contigsumsize];
2129 	for (i = fs->fs_contigsumsize; i > 0; i--)
2130 		if (*lp-- > 0)
2131 			break;
2132 	fs->fs_maxcluster[cgp->cg_cgx] = i;
2133 }
2134 
2135 /*
2136  * Fserr prints the name of a filesystem with an error diagnostic.
2137  *
2138  * The form of the error message is:
2139  *	fs: error message
2140  */
2141 static void
2142 ffs_fserr(fs, inum, cp)
2143 	struct fs *fs;
2144 	ino_t inum;
2145 	char *cp;
2146 {
2147 	struct thread *td = curthread;	/* XXX */
2148 	struct proc *p = td->td_proc;
2149 
2150 	log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n",
2151 	    p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp);
2152 }
2153 
2154 /*
2155  * This function provides the capability for the fsck program to
2156  * update an active filesystem. Six operations are provided:
2157  *
2158  * adjrefcnt(inode, amt) - adjusts the reference count on the
2159  *	specified inode by the specified amount. Under normal
2160  *	operation the count should always go down. Decrementing
2161  *	the count to zero will cause the inode to be freed.
2162  * adjblkcnt(inode, amt) - adjust the number of blocks used to
2163  *	by the specifed amount.
2164  * freedirs(inode, count) - directory inodes [inode..inode + count - 1]
2165  *	are marked as free. Inodes should never have to be marked
2166  *	as in use.
2167  * freefiles(inode, count) - file inodes [inode..inode + count - 1]
2168  *	are marked as free. Inodes should never have to be marked
2169  *	as in use.
2170  * freeblks(blockno, size) - blocks [blockno..blockno + size - 1]
2171  *	are marked as free. Blocks should never have to be marked
2172  *	as in use.
2173  * setflags(flags, set/clear) - the fs_flags field has the specified
2174  *	flags set (second parameter +1) or cleared (second parameter -1).
2175  */
2176 
2177 static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS);
2178 
2179 SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT,
2180 	0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count");
2181 
2182 SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR,
2183 	sysctl_ffs_fsck, "Adjust Inode Used Blocks Count");
2184 
2185 SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR,
2186 	sysctl_ffs_fsck, "Free Range of Directory Inodes");
2187 
2188 SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR,
2189 	sysctl_ffs_fsck, "Free Range of File Inodes");
2190 
2191 SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR,
2192 	sysctl_ffs_fsck, "Free Range of Blocks");
2193 
2194 SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR,
2195 	sysctl_ffs_fsck, "Change Filesystem Flags");
2196 
2197 #ifdef DEBUG
2198 static int fsckcmds = 0;
2199 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, "");
2200 #endif /* DEBUG */
2201 
2202 static int
2203 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
2204 {
2205 	struct fsck_cmd cmd;
2206 	struct ufsmount *ump;
2207 	struct vnode *vp;
2208 	struct inode *ip;
2209 	struct mount *mp;
2210 	struct fs *fs;
2211 	ufs2_daddr_t blkno;
2212 	long blkcnt, blksize;
2213 	struct file *fp;
2214 	int filetype, error;
2215 
2216 	if (req->newlen > sizeof cmd)
2217 		return (EBADRPC);
2218 	if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0)
2219 		return (error);
2220 	if (cmd.version != FFS_CMD_VERSION)
2221 		return (ERPCMISMATCH);
2222 	if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0)
2223 		return (error);
2224 	vn_start_write(fp->f_data, &mp, V_WAIT);
2225 	if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) {
2226 		vn_finished_write(mp);
2227 		fdrop(fp, curthread);
2228 		return (EINVAL);
2229 	}
2230 	if (mp->mnt_flag & MNT_RDONLY) {
2231 		vn_finished_write(mp);
2232 		fdrop(fp, curthread);
2233 		return (EROFS);
2234 	}
2235 	ump = VFSTOUFS(mp);
2236 	fs = ump->um_fs;
2237 	filetype = IFREG;
2238 
2239 	switch (oidp->oid_number) {
2240 
2241 	case FFS_SET_FLAGS:
2242 #ifdef DEBUG
2243 		if (fsckcmds)
2244 			printf("%s: %s flags\n", mp->mnt_stat.f_mntonname,
2245 			    cmd.size > 0 ? "set" : "clear");
2246 #endif /* DEBUG */
2247 		if (cmd.size > 0)
2248 			fs->fs_flags |= (long)cmd.value;
2249 		else
2250 			fs->fs_flags &= ~(long)cmd.value;
2251 		break;
2252 
2253 	case FFS_ADJ_REFCNT:
2254 #ifdef DEBUG
2255 		if (fsckcmds) {
2256 			printf("%s: adjust inode %jd count by %jd\n",
2257 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
2258 			    (intmax_t)cmd.size);
2259 		}
2260 #endif /* DEBUG */
2261 		if ((error = VFS_VGET(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
2262 			break;
2263 		ip = VTOI(vp);
2264 		ip->i_nlink += cmd.size;
2265 		DIP(ip, i_nlink) = ip->i_nlink;
2266 		ip->i_effnlink += cmd.size;
2267 		ip->i_flag |= IN_CHANGE;
2268 		if (DOINGSOFTDEP(vp))
2269 			softdep_change_linkcnt(ip);
2270 		vput(vp);
2271 		break;
2272 
2273 	case FFS_ADJ_BLKCNT:
2274 #ifdef DEBUG
2275 		if (fsckcmds) {
2276 			printf("%s: adjust inode %jd block count by %jd\n",
2277 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
2278 			    (intmax_t)cmd.size);
2279 		}
2280 #endif /* DEBUG */
2281 		if ((error = VFS_VGET(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
2282 			break;
2283 		ip = VTOI(vp);
2284 		DIP(ip, i_blocks) += cmd.size;
2285 		ip->i_flag |= IN_CHANGE;
2286 		vput(vp);
2287 		break;
2288 
2289 	case FFS_DIR_FREE:
2290 		filetype = IFDIR;
2291 		/* fall through */
2292 
2293 	case FFS_FILE_FREE:
2294 #ifdef DEBUG
2295 		if (fsckcmds) {
2296 			if (cmd.size == 1)
2297 				printf("%s: free %s inode %d\n",
2298 				    mp->mnt_stat.f_mntonname,
2299 				    filetype == IFDIR ? "directory" : "file",
2300 				    (ino_t)cmd.value);
2301 			else
2302 				printf("%s: free %s inodes %d-%d\n",
2303 				    mp->mnt_stat.f_mntonname,
2304 				    filetype == IFDIR ? "directory" : "file",
2305 				    (ino_t)cmd.value,
2306 				    (ino_t)(cmd.value + cmd.size - 1));
2307 		}
2308 #endif /* DEBUG */
2309 		while (cmd.size > 0) {
2310 			if ((error = ffs_freefile(fs, ump->um_devvp, cmd.value,
2311 			    filetype)))
2312 				break;
2313 			cmd.size -= 1;
2314 			cmd.value += 1;
2315 		}
2316 		break;
2317 
2318 	case FFS_BLK_FREE:
2319 #ifdef DEBUG
2320 		if (fsckcmds) {
2321 			if (cmd.size == 1)
2322 				printf("%s: free block %jd\n",
2323 				    mp->mnt_stat.f_mntonname,
2324 				    (intmax_t)cmd.value);
2325 			else
2326 				printf("%s: free blocks %jd-%jd\n",
2327 				    mp->mnt_stat.f_mntonname,
2328 				    (intmax_t)cmd.value,
2329 				    (intmax_t)cmd.value + cmd.size - 1);
2330 		}
2331 #endif /* DEBUG */
2332 		blkno = cmd.value;
2333 		blkcnt = cmd.size;
2334 		blksize = fs->fs_frag - (blkno % fs->fs_frag);
2335 		while (blkcnt > 0) {
2336 			if (blksize > blkcnt)
2337 				blksize = blkcnt;
2338 			ffs_blkfree(fs, ump->um_devvp, blkno,
2339 			    blksize * fs->fs_fsize, ROOTINO);
2340 			blkno += blksize;
2341 			blkcnt -= blksize;
2342 			blksize = fs->fs_frag;
2343 		}
2344 		break;
2345 
2346 	default:
2347 #ifdef DEBUG
2348 		if (fsckcmds) {
2349 			printf("Invalid request %d from fsck\n",
2350 			    oidp->oid_number);
2351 		}
2352 #endif /* DEBUG */
2353 		error = EINVAL;
2354 		break;
2355 
2356 	}
2357 	fdrop(fp, curthread);
2358 	vn_finished_write(mp);
2359 	return (error);
2360 }
2361