xref: /linux/fs/ext2/balloc.c (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1 /*
2  *  linux/fs/ext2/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10  *  Big-endian to little-endian byte-swapping/bitmaps by
11  *        David S. Miller (davem@caip.rutgers.edu), 1995
12  */
13 
14 #include <linux/config.h>
15 #include "ext2.h"
16 #include <linux/quotaops.h>
17 #include <linux/sched.h>
18 #include <linux/buffer_head.h>
19 #include <linux/capability.h>
20 
21 /*
22  * balloc.c contains the blocks allocation and deallocation routines
23  */
24 
25 /*
26  * The free blocks are managed by bitmaps.  A file system contains several
27  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
28  * block for inodes, N blocks for the inode table and data blocks.
29  *
30  * The file system contains group descriptors which are located after the
31  * super block.  Each descriptor contains the number of the bitmap block and
32  * the free blocks count in the block.  The descriptors are loaded in memory
33  * when a file system is mounted (see ext2_read_super).
34  */
35 
36 
37 #define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
38 
39 struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
40 					     unsigned int block_group,
41 					     struct buffer_head ** bh)
42 {
43 	unsigned long group_desc;
44 	unsigned long offset;
45 	struct ext2_group_desc * desc;
46 	struct ext2_sb_info *sbi = EXT2_SB(sb);
47 
48 	if (block_group >= sbi->s_groups_count) {
49 		ext2_error (sb, "ext2_get_group_desc",
50 			    "block_group >= groups_count - "
51 			    "block_group = %d, groups_count = %lu",
52 			    block_group, sbi->s_groups_count);
53 
54 		return NULL;
55 	}
56 
57 	group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
58 	offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
59 	if (!sbi->s_group_desc[group_desc]) {
60 		ext2_error (sb, "ext2_get_group_desc",
61 			    "Group descriptor not loaded - "
62 			    "block_group = %d, group_desc = %lu, desc = %lu",
63 			     block_group, group_desc, offset);
64 		return NULL;
65 	}
66 
67 	desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
68 	if (bh)
69 		*bh = sbi->s_group_desc[group_desc];
70 	return desc + offset;
71 }
72 
73 /*
74  * Read the bitmap for a given block_group, reading into the specified
75  * slot in the superblock's bitmap cache.
76  *
77  * Return buffer_head on success or NULL in case of failure.
78  */
79 static struct buffer_head *
80 read_block_bitmap(struct super_block *sb, unsigned int block_group)
81 {
82 	struct ext2_group_desc * desc;
83 	struct buffer_head * bh = NULL;
84 
85 	desc = ext2_get_group_desc (sb, block_group, NULL);
86 	if (!desc)
87 		goto error_out;
88 	bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
89 	if (!bh)
90 		ext2_error (sb, "read_block_bitmap",
91 			    "Cannot read block bitmap - "
92 			    "block_group = %d, block_bitmap = %u",
93 			    block_group, le32_to_cpu(desc->bg_block_bitmap));
94 error_out:
95 	return bh;
96 }
97 
98 /*
99  * Set sb->s_dirt here because the superblock was "logically" altered.  We
100  * need to recalculate its free blocks count and flush it out.
101  */
102 static int reserve_blocks(struct super_block *sb, int count)
103 {
104 	struct ext2_sb_info *sbi = EXT2_SB(sb);
105 	struct ext2_super_block *es = sbi->s_es;
106 	unsigned free_blocks;
107 	unsigned root_blocks;
108 
109 	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
110 	root_blocks = le32_to_cpu(es->s_r_blocks_count);
111 
112 	if (free_blocks < count)
113 		count = free_blocks;
114 
115 	if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
116 	    sbi->s_resuid != current->fsuid &&
117 	    (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
118 		/*
119 		 * We are too close to reserve and we are not privileged.
120 		 * Can we allocate anything at all?
121 		 */
122 		if (free_blocks > root_blocks)
123 			count = free_blocks - root_blocks;
124 		else
125 			return 0;
126 	}
127 
128 	percpu_counter_mod(&sbi->s_freeblocks_counter, -count);
129 	sb->s_dirt = 1;
130 	return count;
131 }
132 
133 static void release_blocks(struct super_block *sb, int count)
134 {
135 	if (count) {
136 		struct ext2_sb_info *sbi = EXT2_SB(sb);
137 
138 		percpu_counter_mod(&sbi->s_freeblocks_counter, count);
139 		sb->s_dirt = 1;
140 	}
141 }
142 
143 static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no,
144 	struct ext2_group_desc *desc, struct buffer_head *bh, int count)
145 {
146 	unsigned free_blocks;
147 
148 	if (!desc->bg_free_blocks_count)
149 		return 0;
150 
151 	spin_lock(sb_bgl_lock(sbi, group_no));
152 	free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
153 	if (free_blocks < count)
154 		count = free_blocks;
155 	desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
156 	spin_unlock(sb_bgl_lock(sbi, group_no));
157 	mark_buffer_dirty(bh);
158 	return count;
159 }
160 
161 static void group_release_blocks(struct super_block *sb, int group_no,
162 	struct ext2_group_desc *desc, struct buffer_head *bh, int count)
163 {
164 	if (count) {
165 		struct ext2_sb_info *sbi = EXT2_SB(sb);
166 		unsigned free_blocks;
167 
168 		spin_lock(sb_bgl_lock(sbi, group_no));
169 		free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
170 		desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
171 		spin_unlock(sb_bgl_lock(sbi, group_no));
172 		sb->s_dirt = 1;
173 		mark_buffer_dirty(bh);
174 	}
175 }
176 
177 /* Free given blocks, update quota and i_blocks field */
178 void ext2_free_blocks (struct inode * inode, unsigned long block,
179 		       unsigned long count)
180 {
181 	struct buffer_head *bitmap_bh = NULL;
182 	struct buffer_head * bh2;
183 	unsigned long block_group;
184 	unsigned long bit;
185 	unsigned long i;
186 	unsigned long overflow;
187 	struct super_block * sb = inode->i_sb;
188 	struct ext2_sb_info * sbi = EXT2_SB(sb);
189 	struct ext2_group_desc * desc;
190 	struct ext2_super_block * es = sbi->s_es;
191 	unsigned freed = 0, group_freed;
192 
193 	if (block < le32_to_cpu(es->s_first_data_block) ||
194 	    block + count < block ||
195 	    block + count > le32_to_cpu(es->s_blocks_count)) {
196 		ext2_error (sb, "ext2_free_blocks",
197 			    "Freeing blocks not in datazone - "
198 			    "block = %lu, count = %lu", block, count);
199 		goto error_return;
200 	}
201 
202 	ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
203 
204 do_more:
205 	overflow = 0;
206 	block_group = (block - le32_to_cpu(es->s_first_data_block)) /
207 		      EXT2_BLOCKS_PER_GROUP(sb);
208 	bit = (block - le32_to_cpu(es->s_first_data_block)) %
209 		      EXT2_BLOCKS_PER_GROUP(sb);
210 	/*
211 	 * Check to see if we are freeing blocks across a group
212 	 * boundary.
213 	 */
214 	if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
215 		overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
216 		count -= overflow;
217 	}
218 	brelse(bitmap_bh);
219 	bitmap_bh = read_block_bitmap(sb, block_group);
220 	if (!bitmap_bh)
221 		goto error_return;
222 
223 	desc = ext2_get_group_desc (sb, block_group, &bh2);
224 	if (!desc)
225 		goto error_return;
226 
227 	if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
228 	    in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
229 	    in_range (block, le32_to_cpu(desc->bg_inode_table),
230 		      sbi->s_itb_per_group) ||
231 	    in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
232 		      sbi->s_itb_per_group))
233 		ext2_error (sb, "ext2_free_blocks",
234 			    "Freeing blocks in system zones - "
235 			    "Block = %lu, count = %lu",
236 			    block, count);
237 
238 	for (i = 0, group_freed = 0; i < count; i++) {
239 		if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
240 						bit + i, bitmap_bh->b_data)) {
241 			ext2_error(sb, __FUNCTION__,
242 				"bit already cleared for block %lu", block + i);
243 		} else {
244 			group_freed++;
245 		}
246 	}
247 
248 	mark_buffer_dirty(bitmap_bh);
249 	if (sb->s_flags & MS_SYNCHRONOUS)
250 		sync_dirty_buffer(bitmap_bh);
251 
252 	group_release_blocks(sb, block_group, desc, bh2, group_freed);
253 	freed += group_freed;
254 
255 	if (overflow) {
256 		block += count;
257 		count = overflow;
258 		goto do_more;
259 	}
260 error_return:
261 	brelse(bitmap_bh);
262 	release_blocks(sb, freed);
263 	DQUOT_FREE_BLOCK(inode, freed);
264 }
265 
266 static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
267 {
268 	int k;
269 	char *p, *r;
270 
271 	if (!ext2_test_bit(goal, map))
272 		goto got_it;
273 
274 repeat:
275 	if (goal) {
276 		/*
277 		 * The goal was occupied; search forward for a free
278 		 * block within the next XX blocks.
279 		 *
280 		 * end_goal is more or less random, but it has to be
281 		 * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
282 		 * next 64-bit boundary is simple..
283 		 */
284 		k = (goal + 63) & ~63;
285 		goal = ext2_find_next_zero_bit(map, k, goal);
286 		if (goal < k)
287 			goto got_it;
288 		/*
289 		 * Search in the remainder of the current group.
290 		 */
291 	}
292 
293 	p = map + (goal >> 3);
294 	r = memscan(p, 0, (size - goal + 7) >> 3);
295 	k = (r - map) << 3;
296 	if (k < size) {
297 		/*
298 		 * We have succeeded in finding a free byte in the block
299 		 * bitmap.  Now search backwards to find the start of this
300 		 * group of free blocks - won't take more than 7 iterations.
301 		 */
302 		for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--)
303 			;
304 		goto got_it;
305 	}
306 
307 	k = ext2_find_next_zero_bit ((u32 *)map, size, goal);
308 	if (k < size) {
309 		goal = k;
310 		goto got_it;
311 	}
312 	return -1;
313 got_it:
314 	if (ext2_set_bit_atomic(lock, goal, (void *) map))
315 		goto repeat;
316 	return goal;
317 }
318 
319 /*
320  * ext2_new_block uses a goal block to assist allocation.  If the goal is
321  * free, or there is a free block within 32 blocks of the goal, that block
322  * is allocated.  Otherwise a forward search is made for a free block; within
323  * each block group the search first looks for an entire free byte in the block
324  * bitmap, and then for any free bit if that fails.
325  * This function also updates quota and i_blocks field.
326  */
327 int ext2_new_block(struct inode *inode, unsigned long goal,
328 			u32 *prealloc_count, u32 *prealloc_block, int *err)
329 {
330 	struct buffer_head *bitmap_bh = NULL;
331 	struct buffer_head *gdp_bh;	/* bh2 */
332 	struct ext2_group_desc *desc;
333 	int group_no;			/* i */
334 	int ret_block;			/* j */
335 	int group_idx;			/* k */
336 	int target_block;		/* tmp */
337 	int block = 0;
338 	struct super_block *sb = inode->i_sb;
339 	struct ext2_sb_info *sbi = EXT2_SB(sb);
340 	struct ext2_super_block *es = sbi->s_es;
341 	unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb);
342 	unsigned prealloc_goal = es->s_prealloc_blocks;
343 	unsigned group_alloc = 0, es_alloc, dq_alloc;
344 	int nr_scanned_groups;
345 
346 	if (!prealloc_goal--)
347 		prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1;
348 	if (!prealloc_count || *prealloc_count)
349 		prealloc_goal = 0;
350 
351 	if (DQUOT_ALLOC_BLOCK(inode, 1)) {
352 		*err = -EDQUOT;
353 		goto out;
354 	}
355 
356 	while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal))
357 		prealloc_goal--;
358 
359 	dq_alloc = prealloc_goal + 1;
360 	es_alloc = reserve_blocks(sb, dq_alloc);
361 	if (!es_alloc) {
362 		*err = -ENOSPC;
363 		goto out_dquot;
364 	}
365 
366 	ext2_debug ("goal=%lu.\n", goal);
367 
368 	if (goal < le32_to_cpu(es->s_first_data_block) ||
369 	    goal >= le32_to_cpu(es->s_blocks_count))
370 		goal = le32_to_cpu(es->s_first_data_block);
371 	group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size;
372 	desc = ext2_get_group_desc (sb, group_no, &gdp_bh);
373 	if (!desc) {
374 		/*
375 		 * gdp_bh may still be uninitialised.  But group_release_blocks
376 		 * will not touch it because group_alloc is zero.
377 		 */
378 		goto io_error;
379 	}
380 
381 	group_alloc = group_reserve_blocks(sbi, group_no, desc,
382 					gdp_bh, es_alloc);
383 	if (group_alloc) {
384 		ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
385 					group_size);
386 		brelse(bitmap_bh);
387 		bitmap_bh = read_block_bitmap(sb, group_no);
388 		if (!bitmap_bh)
389 			goto io_error;
390 
391 		ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
392 
393 		ret_block = grab_block(sb_bgl_lock(sbi, group_no),
394 				bitmap_bh->b_data, group_size, ret_block);
395 		if (ret_block >= 0)
396 			goto got_block;
397 		group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
398 		group_alloc = 0;
399 	}
400 
401 	ext2_debug ("Bit not found in block group %d.\n", group_no);
402 
403 	/*
404 	 * Now search the rest of the groups.  We assume that
405 	 * i and desc correctly point to the last group visited.
406 	 */
407 	nr_scanned_groups = 0;
408 retry:
409 	for (group_idx = 0; !group_alloc &&
410 			group_idx < sbi->s_groups_count; group_idx++) {
411 		group_no++;
412 		if (group_no >= sbi->s_groups_count)
413 			group_no = 0;
414 		desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
415 		if (!desc)
416 			goto io_error;
417 		group_alloc = group_reserve_blocks(sbi, group_no, desc,
418 						gdp_bh, es_alloc);
419 	}
420 	if (!group_alloc) {
421 		*err = -ENOSPC;
422 		goto out_release;
423 	}
424 	brelse(bitmap_bh);
425 	bitmap_bh = read_block_bitmap(sb, group_no);
426 	if (!bitmap_bh)
427 		goto io_error;
428 
429 	ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data,
430 				group_size, 0);
431 	if (ret_block < 0) {
432 		/*
433 		 * If a free block counter is corrupted we can loop inifintely.
434 		 * Detect that here.
435 		 */
436 		nr_scanned_groups++;
437 		if (nr_scanned_groups > 2 * sbi->s_groups_count) {
438 			ext2_error(sb, "ext2_new_block",
439 				"corrupted free blocks counters");
440 			goto io_error;
441 		}
442 		/*
443 		 * Someone else grabbed the last free block in this blockgroup
444 		 * before us.  Retry the scan.
445 		 */
446 		group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
447 		group_alloc = 0;
448 		goto retry;
449 	}
450 
451 got_block:
452 	ext2_debug("using block group %d(%d)\n",
453 		group_no, desc->bg_free_blocks_count);
454 
455 	target_block = ret_block + group_no * group_size +
456 			le32_to_cpu(es->s_first_data_block);
457 
458 	if (target_block == le32_to_cpu(desc->bg_block_bitmap) ||
459 	    target_block == le32_to_cpu(desc->bg_inode_bitmap) ||
460 	    in_range(target_block, le32_to_cpu(desc->bg_inode_table),
461 		      sbi->s_itb_per_group))
462 		ext2_error (sb, "ext2_new_block",
463 			    "Allocating block in system zone - "
464 			    "block = %u", target_block);
465 
466 	if (target_block >= le32_to_cpu(es->s_blocks_count)) {
467 		ext2_error (sb, "ext2_new_block",
468 			    "block(%d) >= blocks count(%d) - "
469 			    "block_group = %d, es == %p ", ret_block,
470 			le32_to_cpu(es->s_blocks_count), group_no, es);
471 		goto io_error;
472 	}
473 	block = target_block;
474 
475 	/* OK, we _had_ allocated something */
476 	ext2_debug("found bit %d\n", ret_block);
477 
478 	dq_alloc--;
479 	es_alloc--;
480 	group_alloc--;
481 
482 	/*
483 	 * Do block preallocation now if required.
484 	 */
485 	write_lock(&EXT2_I(inode)->i_meta_lock);
486 	if (group_alloc && !*prealloc_count) {
487 		unsigned n;
488 
489 		for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
490 			if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no),
491 						ret_block,
492 						(void*) bitmap_bh->b_data))
493  				break;
494 		}
495 		*prealloc_block = block + 1;
496 		*prealloc_count = n;
497 		es_alloc -= n;
498 		dq_alloc -= n;
499 		group_alloc -= n;
500 	}
501 	write_unlock(&EXT2_I(inode)->i_meta_lock);
502 
503 	mark_buffer_dirty(bitmap_bh);
504 	if (sb->s_flags & MS_SYNCHRONOUS)
505 		sync_dirty_buffer(bitmap_bh);
506 
507 	ext2_debug ("allocating block %d. ", block);
508 
509 	*err = 0;
510 out_release:
511 	group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
512 	release_blocks(sb, es_alloc);
513 out_dquot:
514 	DQUOT_FREE_BLOCK(inode, dq_alloc);
515 out:
516 	brelse(bitmap_bh);
517 	return block;
518 
519 io_error:
520 	*err = -EIO;
521 	goto out_release;
522 }
523 
524 unsigned long ext2_count_free_blocks (struct super_block * sb)
525 {
526 	struct ext2_group_desc * desc;
527 	unsigned long desc_count = 0;
528 	int i;
529 #ifdef EXT2FS_DEBUG
530 	unsigned long bitmap_count, x;
531 	struct ext2_super_block *es;
532 
533 	lock_super (sb);
534 	es = EXT2_SB(sb)->s_es;
535 	desc_count = 0;
536 	bitmap_count = 0;
537 	desc = NULL;
538 	for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
539 		struct buffer_head *bitmap_bh;
540 		desc = ext2_get_group_desc (sb, i, NULL);
541 		if (!desc)
542 			continue;
543 		desc_count += le16_to_cpu(desc->bg_free_blocks_count);
544 		bitmap_bh = read_block_bitmap(sb, i);
545 		if (!bitmap_bh)
546 			continue;
547 
548 		x = ext2_count_free(bitmap_bh, sb->s_blocksize);
549 		printk ("group %d: stored = %d, counted = %lu\n",
550 			i, le16_to_cpu(desc->bg_free_blocks_count), x);
551 		bitmap_count += x;
552 		brelse(bitmap_bh);
553 	}
554 	printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
555 		(long)le32_to_cpu(es->s_free_blocks_count),
556 		desc_count, bitmap_count);
557 	unlock_super (sb);
558 	return bitmap_count;
559 #else
560         for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
561                 desc = ext2_get_group_desc (sb, i, NULL);
562                 if (!desc)
563                         continue;
564                 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
565 	}
566 	return desc_count;
567 #endif
568 }
569 
570 static inline int
571 block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
572 {
573 	return ext2_test_bit ((block -
574 		le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
575 			 EXT2_BLOCKS_PER_GROUP(sb), map);
576 }
577 
578 static inline int test_root(int a, int b)
579 {
580 	int num = b;
581 
582 	while (a > num)
583 		num *= b;
584 	return num == a;
585 }
586 
587 static int ext2_group_sparse(int group)
588 {
589 	if (group <= 1)
590 		return 1;
591 	return (test_root(group, 3) || test_root(group, 5) ||
592 		test_root(group, 7));
593 }
594 
595 /**
596  *	ext2_bg_has_super - number of blocks used by the superblock in group
597  *	@sb: superblock for filesystem
598  *	@group: group number to check
599  *
600  *	Return the number of blocks used by the superblock (primary or backup)
601  *	in this group.  Currently this will be only 0 or 1.
602  */
603 int ext2_bg_has_super(struct super_block *sb, int group)
604 {
605 	if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
606 	    !ext2_group_sparse(group))
607 		return 0;
608 	return 1;
609 }
610 
611 /**
612  *	ext2_bg_num_gdb - number of blocks used by the group table in group
613  *	@sb: superblock for filesystem
614  *	@group: group number to check
615  *
616  *	Return the number of blocks used by the group descriptor table
617  *	(primary or backup) in this group.  In the future there may be a
618  *	different number of descriptor blocks in each group.
619  */
620 unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
621 {
622 	if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
623 	    !ext2_group_sparse(group))
624 		return 0;
625 	return EXT2_SB(sb)->s_gdb_count;
626 }
627 
628