1 /* 2 * linux/fs/ext4/balloc.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 10 * Big-endian to little-endian byte-swapping/bitmaps by 11 * David S. Miller (davem@caip.rutgers.edu), 1995 12 */ 13 14 #include <linux/time.h> 15 #include <linux/capability.h> 16 #include <linux/fs.h> 17 #include <linux/jbd2.h> 18 #include <linux/quotaops.h> 19 #include <linux/buffer_head.h> 20 #include "ext4.h" 21 #include "ext4_jbd2.h" 22 #include "group.h" 23 24 /* 25 * balloc.c contains the blocks allocation and deallocation routines 26 */ 27 28 /* 29 * Calculate the block group number and offset, given a block number 30 */ 31 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 32 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) 33 { 34 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 35 ext4_grpblk_t offset; 36 37 blocknr = blocknr - le32_to_cpu(es->s_first_data_block); 38 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); 39 if (offsetp) 40 *offsetp = offset; 41 if (blockgrpp) 42 *blockgrpp = blocknr; 43 44 } 45 46 static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, 47 ext4_group_t block_group) 48 { 49 ext4_group_t actual_group; 50 ext4_get_group_no_and_offset(sb, block, &actual_group, NULL); 51 if (actual_group == block_group) 52 return 1; 53 return 0; 54 } 55 56 static int ext4_group_used_meta_blocks(struct super_block *sb, 57 ext4_group_t block_group) 58 { 59 ext4_fsblk_t tmp; 60 struct ext4_sb_info *sbi = EXT4_SB(sb); 61 /* block bitmap, inode bitmap, and inode table blocks */ 62 int used_blocks = sbi->s_itb_per_group + 2; 63 64 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 65 struct ext4_group_desc *gdp; 66 struct buffer_head *bh; 67 68 gdp = ext4_get_group_desc(sb, block_group, &bh); 69 if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), 70 block_group)) 71 used_blocks--; 72 73 if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), 74 block_group)) 75 used_blocks--; 76 77 tmp = ext4_inode_table(sb, gdp); 78 for (; tmp < ext4_inode_table(sb, gdp) + 79 sbi->s_itb_per_group; tmp++) { 80 if (!ext4_block_in_group(sb, tmp, block_group)) 81 used_blocks -= 1; 82 } 83 } 84 return used_blocks; 85 } 86 /* Initializes an uninitialized block bitmap if given, and returns the 87 * number of blocks free in the group. */ 88 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 89 ext4_group_t block_group, struct ext4_group_desc *gdp) 90 { 91 int bit, bit_max; 92 unsigned free_blocks, group_blocks; 93 struct ext4_sb_info *sbi = EXT4_SB(sb); 94 95 if (bh) { 96 J_ASSERT_BH(bh, buffer_locked(bh)); 97 98 /* If checksum is bad mark all blocks used to prevent allocation 99 * essentially implementing a per-group read-only flag. */ 100 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 101 ext4_error(sb, __func__, 102 "Checksum bad for group %lu\n", block_group); 103 gdp->bg_free_blocks_count = 0; 104 gdp->bg_free_inodes_count = 0; 105 gdp->bg_itable_unused = 0; 106 memset(bh->b_data, 0xff, sb->s_blocksize); 107 return 0; 108 } 109 memset(bh->b_data, 0, sb->s_blocksize); 110 } 111 112 /* Check for superblock and gdt backups in this group */ 113 bit_max = ext4_bg_has_super(sb, block_group); 114 115 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 116 block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * 117 sbi->s_desc_per_block) { 118 if (bit_max) { 119 bit_max += ext4_bg_num_gdb(sb, block_group); 120 bit_max += 121 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); 122 } 123 } else { /* For META_BG_BLOCK_GROUPS */ 124 bit_max += ext4_bg_num_gdb(sb, block_group); 125 } 126 127 if (block_group == sbi->s_groups_count - 1) { 128 /* 129 * Even though mke2fs always initialize first and last group 130 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need 131 * to make sure we calculate the right free blocks 132 */ 133 group_blocks = ext4_blocks_count(sbi->s_es) - 134 le32_to_cpu(sbi->s_es->s_first_data_block) - 135 (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); 136 } else { 137 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 138 } 139 140 free_blocks = group_blocks - bit_max; 141 142 if (bh) { 143 ext4_fsblk_t start, tmp; 144 int flex_bg = 0; 145 146 for (bit = 0; bit < bit_max; bit++) 147 ext4_set_bit(bit, bh->b_data); 148 149 start = ext4_group_first_block_no(sb, block_group); 150 151 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 152 EXT4_FEATURE_INCOMPAT_FLEX_BG)) 153 flex_bg = 1; 154 155 /* Set bits for block and inode bitmaps, and inode table */ 156 tmp = ext4_block_bitmap(sb, gdp); 157 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 158 ext4_set_bit(tmp - start, bh->b_data); 159 160 tmp = ext4_inode_bitmap(sb, gdp); 161 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 162 ext4_set_bit(tmp - start, bh->b_data); 163 164 tmp = ext4_inode_table(sb, gdp); 165 for (; tmp < ext4_inode_table(sb, gdp) + 166 sbi->s_itb_per_group; tmp++) { 167 if (!flex_bg || 168 ext4_block_in_group(sb, tmp, block_group)) 169 ext4_set_bit(tmp - start, bh->b_data); 170 } 171 /* 172 * Also if the number of blocks within the group is 173 * less than the blocksize * 8 ( which is the size 174 * of bitmap ), set rest of the block bitmap to 1 175 */ 176 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); 177 } 178 return free_blocks - ext4_group_used_meta_blocks(sb, block_group); 179 } 180 181 182 /* 183 * The free blocks are managed by bitmaps. A file system contains several 184 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap 185 * block for inodes, N blocks for the inode table and data blocks. 186 * 187 * The file system contains group descriptors which are located after the 188 * super block. Each descriptor contains the number of the bitmap block and 189 * the free blocks count in the block. The descriptors are loaded in memory 190 * when a file system is mounted (see ext4_fill_super). 191 */ 192 193 194 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 195 196 /** 197 * ext4_get_group_desc() -- load group descriptor from disk 198 * @sb: super block 199 * @block_group: given block group 200 * @bh: pointer to the buffer head to store the block 201 * group descriptor 202 */ 203 struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 204 ext4_group_t block_group, 205 struct buffer_head ** bh) 206 { 207 unsigned long group_desc; 208 unsigned long offset; 209 struct ext4_group_desc * desc; 210 struct ext4_sb_info *sbi = EXT4_SB(sb); 211 212 if (block_group >= sbi->s_groups_count) { 213 ext4_error (sb, "ext4_get_group_desc", 214 "block_group >= groups_count - " 215 "block_group = %lu, groups_count = %lu", 216 block_group, sbi->s_groups_count); 217 218 return NULL; 219 } 220 smp_rmb(); 221 222 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); 223 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); 224 if (!sbi->s_group_desc[group_desc]) { 225 ext4_error (sb, "ext4_get_group_desc", 226 "Group descriptor not loaded - " 227 "block_group = %lu, group_desc = %lu, desc = %lu", 228 block_group, group_desc, offset); 229 return NULL; 230 } 231 232 desc = (struct ext4_group_desc *)( 233 (__u8 *)sbi->s_group_desc[group_desc]->b_data + 234 offset * EXT4_DESC_SIZE(sb)); 235 if (bh) 236 *bh = sbi->s_group_desc[group_desc]; 237 return desc; 238 } 239 240 static int ext4_valid_block_bitmap(struct super_block *sb, 241 struct ext4_group_desc *desc, 242 unsigned int block_group, 243 struct buffer_head *bh) 244 { 245 ext4_grpblk_t offset; 246 ext4_grpblk_t next_zero_bit; 247 ext4_fsblk_t bitmap_blk; 248 ext4_fsblk_t group_first_block; 249 250 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 251 /* with FLEX_BG, the inode/block bitmaps and itable 252 * blocks may not be in the group at all 253 * so the bitmap validation will be skipped for those groups 254 * or it has to also read the block group where the bitmaps 255 * are located to verify they are set. 256 */ 257 return 1; 258 } 259 group_first_block = ext4_group_first_block_no(sb, block_group); 260 261 /* check whether block bitmap block number is set */ 262 bitmap_blk = ext4_block_bitmap(sb, desc); 263 offset = bitmap_blk - group_first_block; 264 if (!ext4_test_bit(offset, bh->b_data)) 265 /* bad block bitmap */ 266 goto err_out; 267 268 /* check whether the inode bitmap block number is set */ 269 bitmap_blk = ext4_inode_bitmap(sb, desc); 270 offset = bitmap_blk - group_first_block; 271 if (!ext4_test_bit(offset, bh->b_data)) 272 /* bad block bitmap */ 273 goto err_out; 274 275 /* check whether the inode table block number is set */ 276 bitmap_blk = ext4_inode_table(sb, desc); 277 offset = bitmap_blk - group_first_block; 278 next_zero_bit = ext4_find_next_zero_bit(bh->b_data, 279 offset + EXT4_SB(sb)->s_itb_per_group, 280 offset); 281 if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group) 282 /* good bitmap for inode tables */ 283 return 1; 284 285 err_out: 286 ext4_error(sb, __func__, 287 "Invalid block bitmap - " 288 "block_group = %d, block = %llu", 289 block_group, bitmap_blk); 290 return 0; 291 } 292 /** 293 * ext4_read_block_bitmap() 294 * @sb: super block 295 * @block_group: given block group 296 * 297 * Read the bitmap for a given block_group,and validate the 298 * bits for block/inode/inode tables are set in the bitmaps 299 * 300 * Return buffer_head on success or NULL in case of failure. 301 */ 302 struct buffer_head * 303 ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 304 { 305 struct ext4_group_desc * desc; 306 struct buffer_head * bh = NULL; 307 ext4_fsblk_t bitmap_blk; 308 309 desc = ext4_get_group_desc(sb, block_group, NULL); 310 if (!desc) 311 return NULL; 312 bitmap_blk = ext4_block_bitmap(sb, desc); 313 bh = sb_getblk(sb, bitmap_blk); 314 if (unlikely(!bh)) { 315 ext4_error(sb, __func__, 316 "Cannot read block bitmap - " 317 "block_group = %d, block_bitmap = %llu", 318 (int)block_group, (unsigned long long)bitmap_blk); 319 return NULL; 320 } 321 if (bh_uptodate_or_lock(bh)) 322 return bh; 323 324 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 325 ext4_init_block_bitmap(sb, bh, block_group, desc); 326 set_buffer_uptodate(bh); 327 unlock_buffer(bh); 328 return bh; 329 } 330 if (bh_submit_read(bh) < 0) { 331 put_bh(bh); 332 ext4_error(sb, __func__, 333 "Cannot read block bitmap - " 334 "block_group = %d, block_bitmap = %llu", 335 (int)block_group, (unsigned long long)bitmap_blk); 336 return NULL; 337 } 338 ext4_valid_block_bitmap(sb, desc, block_group, bh); 339 /* 340 * file system mounted not to panic on error, 341 * continue with corrupt bitmap 342 */ 343 return bh; 344 } 345 /* 346 * The reservation window structure operations 347 * -------------------------------------------- 348 * Operations include: 349 * dump, find, add, remove, is_empty, find_next_reservable_window, etc. 350 * 351 * We use a red-black tree to represent per-filesystem reservation 352 * windows. 353 * 354 */ 355 356 /** 357 * __rsv_window_dump() -- Dump the filesystem block allocation reservation map 358 * @rb_root: root of per-filesystem reservation rb tree 359 * @verbose: verbose mode 360 * @fn: function which wishes to dump the reservation map 361 * 362 * If verbose is turned on, it will print the whole block reservation 363 * windows(start, end). Otherwise, it will only print out the "bad" windows, 364 * those windows that overlap with their immediate neighbors. 365 */ 366 #if 1 367 static void __rsv_window_dump(struct rb_root *root, int verbose, 368 const char *fn) 369 { 370 struct rb_node *n; 371 struct ext4_reserve_window_node *rsv, *prev; 372 int bad; 373 374 restart: 375 n = rb_first(root); 376 bad = 0; 377 prev = NULL; 378 379 printk("Block Allocation Reservation Windows Map (%s):\n", fn); 380 while (n) { 381 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); 382 if (verbose) 383 printk("reservation window 0x%p " 384 "start: %llu, end: %llu\n", 385 rsv, rsv->rsv_start, rsv->rsv_end); 386 if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { 387 printk("Bad reservation %p (start >= end)\n", 388 rsv); 389 bad = 1; 390 } 391 if (prev && prev->rsv_end >= rsv->rsv_start) { 392 printk("Bad reservation %p (prev->end >= start)\n", 393 rsv); 394 bad = 1; 395 } 396 if (bad) { 397 if (!verbose) { 398 printk("Restarting reservation walk in verbose mode\n"); 399 verbose = 1; 400 goto restart; 401 } 402 } 403 n = rb_next(n); 404 prev = rsv; 405 } 406 printk("Window map complete.\n"); 407 BUG_ON(bad); 408 } 409 #define rsv_window_dump(root, verbose) \ 410 __rsv_window_dump((root), (verbose), __func__) 411 #else 412 #define rsv_window_dump(root, verbose) do {} while (0) 413 #endif 414 415 /** 416 * goal_in_my_reservation() 417 * @rsv: inode's reservation window 418 * @grp_goal: given goal block relative to the allocation block group 419 * @group: the current allocation block group 420 * @sb: filesystem super block 421 * 422 * Test if the given goal block (group relative) is within the file's 423 * own block reservation window range. 424 * 425 * If the reservation window is outside the goal allocation group, return 0; 426 * grp_goal (given goal block) could be -1, which means no specific 427 * goal block. In this case, always return 1. 428 * If the goal block is within the reservation window, return 1; 429 * otherwise, return 0; 430 */ 431 static int 432 goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, 433 ext4_group_t group, struct super_block *sb) 434 { 435 ext4_fsblk_t group_first_block, group_last_block; 436 437 group_first_block = ext4_group_first_block_no(sb, group); 438 group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); 439 440 if ((rsv->_rsv_start > group_last_block) || 441 (rsv->_rsv_end < group_first_block)) 442 return 0; 443 if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) 444 || (grp_goal + group_first_block > rsv->_rsv_end))) 445 return 0; 446 return 1; 447 } 448 449 /** 450 * search_reserve_window() 451 * @rb_root: root of reservation tree 452 * @goal: target allocation block 453 * 454 * Find the reserved window which includes the goal, or the previous one 455 * if the goal is not in any window. 456 * Returns NULL if there are no windows or if all windows start after the goal. 457 */ 458 static struct ext4_reserve_window_node * 459 search_reserve_window(struct rb_root *root, ext4_fsblk_t goal) 460 { 461 struct rb_node *n = root->rb_node; 462 struct ext4_reserve_window_node *rsv; 463 464 if (!n) 465 return NULL; 466 467 do { 468 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); 469 470 if (goal < rsv->rsv_start) 471 n = n->rb_left; 472 else if (goal > rsv->rsv_end) 473 n = n->rb_right; 474 else 475 return rsv; 476 } while (n); 477 /* 478 * We've fallen off the end of the tree: the goal wasn't inside 479 * any particular node. OK, the previous node must be to one 480 * side of the interval containing the goal. If it's the RHS, 481 * we need to back up one. 482 */ 483 if (rsv->rsv_start > goal) { 484 n = rb_prev(&rsv->rsv_node); 485 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); 486 } 487 return rsv; 488 } 489 490 /** 491 * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree. 492 * @sb: super block 493 * @rsv: reservation window to add 494 * 495 * Must be called with rsv_lock hold. 496 */ 497 void ext4_rsv_window_add(struct super_block *sb, 498 struct ext4_reserve_window_node *rsv) 499 { 500 struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root; 501 struct rb_node *node = &rsv->rsv_node; 502 ext4_fsblk_t start = rsv->rsv_start; 503 504 struct rb_node ** p = &root->rb_node; 505 struct rb_node * parent = NULL; 506 struct ext4_reserve_window_node *this; 507 508 while (*p) 509 { 510 parent = *p; 511 this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node); 512 513 if (start < this->rsv_start) 514 p = &(*p)->rb_left; 515 else if (start > this->rsv_end) 516 p = &(*p)->rb_right; 517 else { 518 rsv_window_dump(root, 1); 519 BUG(); 520 } 521 } 522 523 rb_link_node(node, parent, p); 524 rb_insert_color(node, root); 525 } 526 527 /** 528 * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree 529 * @sb: super block 530 * @rsv: reservation window to remove 531 * 532 * Mark the block reservation window as not allocated, and unlink it 533 * from the filesystem reservation window rb tree. Must be called with 534 * rsv_lock hold. 535 */ 536 static void rsv_window_remove(struct super_block *sb, 537 struct ext4_reserve_window_node *rsv) 538 { 539 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 540 rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 541 rsv->rsv_alloc_hit = 0; 542 rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root); 543 } 544 545 /* 546 * rsv_is_empty() -- Check if the reservation window is allocated. 547 * @rsv: given reservation window to check 548 * 549 * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED. 550 */ 551 static inline int rsv_is_empty(struct ext4_reserve_window *rsv) 552 { 553 /* a valid reservation end block could not be 0 */ 554 return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 555 } 556 557 /** 558 * ext4_init_block_alloc_info() 559 * @inode: file inode structure 560 * 561 * Allocate and initialize the reservation window structure, and 562 * link the window to the ext4 inode structure at last 563 * 564 * The reservation window structure is only dynamically allocated 565 * and linked to ext4 inode the first time the open file 566 * needs a new block. So, before every ext4_new_block(s) call, for 567 * regular files, we should check whether the reservation window 568 * structure exists or not. In the latter case, this function is called. 569 * Fail to do so will result in block reservation being turned off for that 570 * open file. 571 * 572 * This function is called from ext4_get_blocks_handle(), also called 573 * when setting the reservation window size through ioctl before the file 574 * is open for write (needs block allocation). 575 * 576 * Needs down_write(i_data_sem) protection prior to call this function. 577 */ 578 void ext4_init_block_alloc_info(struct inode *inode) 579 { 580 struct ext4_inode_info *ei = EXT4_I(inode); 581 struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; 582 struct super_block *sb = inode->i_sb; 583 584 block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 585 if (block_i) { 586 struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node; 587 588 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 589 rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 590 591 /* 592 * if filesystem is mounted with NORESERVATION, the goal 593 * reservation window size is set to zero to indicate 594 * block reservation is off 595 */ 596 if (!test_opt(sb, RESERVATION)) 597 rsv->rsv_goal_size = 0; 598 else 599 rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS; 600 rsv->rsv_alloc_hit = 0; 601 block_i->last_alloc_logical_block = 0; 602 block_i->last_alloc_physical_block = 0; 603 } 604 ei->i_block_alloc_info = block_i; 605 } 606 607 /** 608 * ext4_discard_reservation() 609 * @inode: inode 610 * 611 * Discard(free) block reservation window on last file close, or truncate 612 * or at last iput(). 613 * 614 * It is being called in three cases: 615 * ext4_release_file(): last writer close the file 616 * ext4_clear_inode(): last iput(), when nobody link to this file. 617 * ext4_truncate(): when the block indirect map is about to change. 618 * 619 */ 620 void ext4_discard_reservation(struct inode *inode) 621 { 622 struct ext4_inode_info *ei = EXT4_I(inode); 623 struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; 624 struct ext4_reserve_window_node *rsv; 625 spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; 626 627 ext4_mb_discard_inode_preallocations(inode); 628 629 if (!block_i) 630 return; 631 632 rsv = &block_i->rsv_window_node; 633 if (!rsv_is_empty(&rsv->rsv_window)) { 634 spin_lock(rsv_lock); 635 if (!rsv_is_empty(&rsv->rsv_window)) 636 rsv_window_remove(inode->i_sb, rsv); 637 spin_unlock(rsv_lock); 638 } 639 } 640 641 /** 642 * ext4_free_blocks_sb() -- Free given blocks and update quota 643 * @handle: handle to this transaction 644 * @sb: super block 645 * @block: start physcial block to free 646 * @count: number of blocks to free 647 * @pdquot_freed_blocks: pointer to quota 648 */ 649 void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, 650 ext4_fsblk_t block, unsigned long count, 651 unsigned long *pdquot_freed_blocks) 652 { 653 struct buffer_head *bitmap_bh = NULL; 654 struct buffer_head *gd_bh; 655 ext4_group_t block_group; 656 ext4_grpblk_t bit; 657 unsigned long i; 658 unsigned long overflow; 659 struct ext4_group_desc * desc; 660 struct ext4_super_block * es; 661 struct ext4_sb_info *sbi; 662 int err = 0, ret; 663 ext4_grpblk_t group_freed; 664 665 *pdquot_freed_blocks = 0; 666 sbi = EXT4_SB(sb); 667 es = sbi->s_es; 668 if (block < le32_to_cpu(es->s_first_data_block) || 669 block + count < block || 670 block + count > ext4_blocks_count(es)) { 671 ext4_error (sb, "ext4_free_blocks", 672 "Freeing blocks not in datazone - " 673 "block = %llu, count = %lu", block, count); 674 goto error_return; 675 } 676 677 ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); 678 679 do_more: 680 overflow = 0; 681 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 682 /* 683 * Check to see if we are freeing blocks across a group 684 * boundary. 685 */ 686 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { 687 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); 688 count -= overflow; 689 } 690 brelse(bitmap_bh); 691 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 692 if (!bitmap_bh) 693 goto error_return; 694 desc = ext4_get_group_desc (sb, block_group, &gd_bh); 695 if (!desc) 696 goto error_return; 697 698 if (in_range(ext4_block_bitmap(sb, desc), block, count) || 699 in_range(ext4_inode_bitmap(sb, desc), block, count) || 700 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || 701 in_range(block + count - 1, ext4_inode_table(sb, desc), 702 sbi->s_itb_per_group)) { 703 ext4_error (sb, "ext4_free_blocks", 704 "Freeing blocks in system zones - " 705 "Block = %llu, count = %lu", 706 block, count); 707 goto error_return; 708 } 709 710 /* 711 * We are about to start releasing blocks in the bitmap, 712 * so we need undo access. 713 */ 714 /* @@@ check errors */ 715 BUFFER_TRACE(bitmap_bh, "getting undo access"); 716 err = ext4_journal_get_undo_access(handle, bitmap_bh); 717 if (err) 718 goto error_return; 719 720 /* 721 * We are about to modify some metadata. Call the journal APIs 722 * to unshare ->b_data if a currently-committing transaction is 723 * using it 724 */ 725 BUFFER_TRACE(gd_bh, "get_write_access"); 726 err = ext4_journal_get_write_access(handle, gd_bh); 727 if (err) 728 goto error_return; 729 730 jbd_lock_bh_state(bitmap_bh); 731 732 for (i = 0, group_freed = 0; i < count; i++) { 733 /* 734 * An HJ special. This is expensive... 735 */ 736 #ifdef CONFIG_JBD2_DEBUG 737 jbd_unlock_bh_state(bitmap_bh); 738 { 739 struct buffer_head *debug_bh; 740 debug_bh = sb_find_get_block(sb, block + i); 741 if (debug_bh) { 742 BUFFER_TRACE(debug_bh, "Deleted!"); 743 if (!bh2jh(bitmap_bh)->b_committed_data) 744 BUFFER_TRACE(debug_bh, 745 "No commited data in bitmap"); 746 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); 747 __brelse(debug_bh); 748 } 749 } 750 jbd_lock_bh_state(bitmap_bh); 751 #endif 752 if (need_resched()) { 753 jbd_unlock_bh_state(bitmap_bh); 754 cond_resched(); 755 jbd_lock_bh_state(bitmap_bh); 756 } 757 /* @@@ This prevents newly-allocated data from being 758 * freed and then reallocated within the same 759 * transaction. 760 * 761 * Ideally we would want to allow that to happen, but to 762 * do so requires making jbd2_journal_forget() capable of 763 * revoking the queued write of a data block, which 764 * implies blocking on the journal lock. *forget() 765 * cannot block due to truncate races. 766 * 767 * Eventually we can fix this by making jbd2_journal_forget() 768 * return a status indicating whether or not it was able 769 * to revoke the buffer. On successful revoke, it is 770 * safe not to set the allocation bit in the committed 771 * bitmap, because we know that there is no outstanding 772 * activity on the buffer any more and so it is safe to 773 * reallocate it. 774 */ 775 BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); 776 J_ASSERT_BH(bitmap_bh, 777 bh2jh(bitmap_bh)->b_committed_data != NULL); 778 ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, 779 bh2jh(bitmap_bh)->b_committed_data); 780 781 /* 782 * We clear the bit in the bitmap after setting the committed 783 * data bit, because this is the reverse order to that which 784 * the allocator uses. 785 */ 786 BUFFER_TRACE(bitmap_bh, "clear bit"); 787 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 788 bit + i, bitmap_bh->b_data)) { 789 jbd_unlock_bh_state(bitmap_bh); 790 ext4_error(sb, __func__, 791 "bit already cleared for block %llu", 792 (ext4_fsblk_t)(block + i)); 793 jbd_lock_bh_state(bitmap_bh); 794 BUFFER_TRACE(bitmap_bh, "bit already cleared"); 795 } else { 796 group_freed++; 797 } 798 } 799 jbd_unlock_bh_state(bitmap_bh); 800 801 spin_lock(sb_bgl_lock(sbi, block_group)); 802 le16_add_cpu(&desc->bg_free_blocks_count, group_freed); 803 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 804 spin_unlock(sb_bgl_lock(sbi, block_group)); 805 percpu_counter_add(&sbi->s_freeblocks_counter, count); 806 807 if (sbi->s_log_groups_per_flex) { 808 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 809 spin_lock(sb_bgl_lock(sbi, flex_group)); 810 sbi->s_flex_groups[flex_group].free_blocks += count; 811 spin_unlock(sb_bgl_lock(sbi, flex_group)); 812 } 813 814 /* We dirtied the bitmap block */ 815 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 816 err = ext4_journal_dirty_metadata(handle, bitmap_bh); 817 818 /* And the group descriptor block */ 819 BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); 820 ret = ext4_journal_dirty_metadata(handle, gd_bh); 821 if (!err) err = ret; 822 *pdquot_freed_blocks += group_freed; 823 824 if (overflow && !err) { 825 block += count; 826 count = overflow; 827 goto do_more; 828 } 829 sb->s_dirt = 1; 830 error_return: 831 brelse(bitmap_bh); 832 ext4_std_error(sb, err); 833 return; 834 } 835 836 /** 837 * ext4_free_blocks() -- Free given blocks and update quota 838 * @handle: handle for this transaction 839 * @inode: inode 840 * @block: start physical block to free 841 * @count: number of blocks to count 842 * @metadata: Are these metadata blocks 843 */ 844 void ext4_free_blocks(handle_t *handle, struct inode *inode, 845 ext4_fsblk_t block, unsigned long count, 846 int metadata) 847 { 848 struct super_block * sb; 849 unsigned long dquot_freed_blocks; 850 851 /* this isn't the right place to decide whether block is metadata 852 * inode.c/extents.c knows better, but for safety ... */ 853 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || 854 ext4_should_journal_data(inode)) 855 metadata = 1; 856 857 sb = inode->i_sb; 858 859 if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) 860 ext4_free_blocks_sb(handle, sb, block, count, 861 &dquot_freed_blocks); 862 else 863 ext4_mb_free_blocks(handle, inode, block, count, 864 metadata, &dquot_freed_blocks); 865 if (dquot_freed_blocks) 866 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); 867 return; 868 } 869 870 /** 871 * ext4_test_allocatable() 872 * @nr: given allocation block group 873 * @bh: bufferhead contains the bitmap of the given block group 874 * 875 * For ext4 allocations, we must not reuse any blocks which are 876 * allocated in the bitmap buffer's "last committed data" copy. This 877 * prevents deletes from freeing up the page for reuse until we have 878 * committed the delete transaction. 879 * 880 * If we didn't do this, then deleting something and reallocating it as 881 * data would allow the old block to be overwritten before the 882 * transaction committed (because we force data to disk before commit). 883 * This would lead to corruption if we crashed between overwriting the 884 * data and committing the delete. 885 * 886 * @@@ We may want to make this allocation behaviour conditional on 887 * data-writes at some point, and disable it for metadata allocations or 888 * sync-data inodes. 889 */ 890 static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) 891 { 892 int ret; 893 struct journal_head *jh = bh2jh(bh); 894 895 if (ext4_test_bit(nr, bh->b_data)) 896 return 0; 897 898 jbd_lock_bh_state(bh); 899 if (!jh->b_committed_data) 900 ret = 1; 901 else 902 ret = !ext4_test_bit(nr, jh->b_committed_data); 903 jbd_unlock_bh_state(bh); 904 return ret; 905 } 906 907 /** 908 * bitmap_search_next_usable_block() 909 * @start: the starting block (group relative) of the search 910 * @bh: bufferhead contains the block group bitmap 911 * @maxblocks: the ending block (group relative) of the reservation 912 * 913 * The bitmap search --- search forward alternately through the actual 914 * bitmap on disk and the last-committed copy in journal, until we find a 915 * bit free in both bitmaps. 916 */ 917 static ext4_grpblk_t 918 bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, 919 ext4_grpblk_t maxblocks) 920 { 921 ext4_grpblk_t next; 922 struct journal_head *jh = bh2jh(bh); 923 924 while (start < maxblocks) { 925 next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start); 926 if (next >= maxblocks) 927 return -1; 928 if (ext4_test_allocatable(next, bh)) 929 return next; 930 jbd_lock_bh_state(bh); 931 if (jh->b_committed_data) 932 start = ext4_find_next_zero_bit(jh->b_committed_data, 933 maxblocks, next); 934 jbd_unlock_bh_state(bh); 935 } 936 return -1; 937 } 938 939 /** 940 * find_next_usable_block() 941 * @start: the starting block (group relative) to find next 942 * allocatable block in bitmap. 943 * @bh: bufferhead contains the block group bitmap 944 * @maxblocks: the ending block (group relative) for the search 945 * 946 * Find an allocatable block in a bitmap. We honor both the bitmap and 947 * its last-committed copy (if that exists), and perform the "most 948 * appropriate allocation" algorithm of looking for a free block near 949 * the initial goal; then for a free byte somewhere in the bitmap; then 950 * for any free bit in the bitmap. 951 */ 952 static ext4_grpblk_t 953 find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, 954 ext4_grpblk_t maxblocks) 955 { 956 ext4_grpblk_t here, next; 957 char *p, *r; 958 959 if (start > 0) { 960 /* 961 * The goal was occupied; search forward for a free 962 * block within the next XX blocks. 963 * 964 * end_goal is more or less random, but it has to be 965 * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the 966 * next 64-bit boundary is simple.. 967 */ 968 ext4_grpblk_t end_goal = (start + 63) & ~63; 969 if (end_goal > maxblocks) 970 end_goal = maxblocks; 971 here = ext4_find_next_zero_bit(bh->b_data, end_goal, start); 972 if (here < end_goal && ext4_test_allocatable(here, bh)) 973 return here; 974 ext4_debug("Bit not found near goal\n"); 975 } 976 977 here = start; 978 if (here < 0) 979 here = 0; 980 981 p = ((char *)bh->b_data) + (here >> 3); 982 r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); 983 next = (r - ((char *)bh->b_data)) << 3; 984 985 if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) 986 return next; 987 988 /* 989 * The bitmap search --- search forward alternately through the actual 990 * bitmap and the last-committed copy until we find a bit free in 991 * both 992 */ 993 here = bitmap_search_next_usable_block(here, bh, maxblocks); 994 return here; 995 } 996 997 /** 998 * claim_block() 999 * @block: the free block (group relative) to allocate 1000 * @bh: the bufferhead containts the block group bitmap 1001 * 1002 * We think we can allocate this block in this bitmap. Try to set the bit. 1003 * If that succeeds then check that nobody has allocated and then freed the 1004 * block since we saw that is was not marked in b_committed_data. If it _was_ 1005 * allocated and freed then clear the bit in the bitmap again and return 1006 * zero (failure). 1007 */ 1008 static inline int 1009 claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) 1010 { 1011 struct journal_head *jh = bh2jh(bh); 1012 int ret; 1013 1014 if (ext4_set_bit_atomic(lock, block, bh->b_data)) 1015 return 0; 1016 jbd_lock_bh_state(bh); 1017 if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) { 1018 ext4_clear_bit_atomic(lock, block, bh->b_data); 1019 ret = 0; 1020 } else { 1021 ret = 1; 1022 } 1023 jbd_unlock_bh_state(bh); 1024 return ret; 1025 } 1026 1027 /** 1028 * ext4_try_to_allocate() 1029 * @sb: superblock 1030 * @handle: handle to this transaction 1031 * @group: given allocation block group 1032 * @bitmap_bh: bufferhead holds the block bitmap 1033 * @grp_goal: given target block within the group 1034 * @count: target number of blocks to allocate 1035 * @my_rsv: reservation window 1036 * 1037 * Attempt to allocate blocks within a give range. Set the range of allocation 1038 * first, then find the first free bit(s) from the bitmap (within the range), 1039 * and at last, allocate the blocks by claiming the found free bit as allocated. 1040 * 1041 * To set the range of this allocation: 1042 * if there is a reservation window, only try to allocate block(s) from the 1043 * file's own reservation window; 1044 * Otherwise, the allocation range starts from the give goal block, ends at 1045 * the block group's last block. 1046 * 1047 * If we failed to allocate the desired block then we may end up crossing to a 1048 * new bitmap. In that case we must release write access to the old one via 1049 * ext4_journal_release_buffer(), else we'll run out of credits. 1050 */ 1051 static ext4_grpblk_t 1052 ext4_try_to_allocate(struct super_block *sb, handle_t *handle, 1053 ext4_group_t group, struct buffer_head *bitmap_bh, 1054 ext4_grpblk_t grp_goal, unsigned long *count, 1055 struct ext4_reserve_window *my_rsv) 1056 { 1057 ext4_fsblk_t group_first_block; 1058 ext4_grpblk_t start, end; 1059 unsigned long num = 0; 1060 1061 /* we do allocation within the reservation window if we have a window */ 1062 if (my_rsv) { 1063 group_first_block = ext4_group_first_block_no(sb, group); 1064 if (my_rsv->_rsv_start >= group_first_block) 1065 start = my_rsv->_rsv_start - group_first_block; 1066 else 1067 /* reservation window cross group boundary */ 1068 start = 0; 1069 end = my_rsv->_rsv_end - group_first_block + 1; 1070 if (end > EXT4_BLOCKS_PER_GROUP(sb)) 1071 /* reservation window crosses group boundary */ 1072 end = EXT4_BLOCKS_PER_GROUP(sb); 1073 if ((start <= grp_goal) && (grp_goal < end)) 1074 start = grp_goal; 1075 else 1076 grp_goal = -1; 1077 } else { 1078 if (grp_goal > 0) 1079 start = grp_goal; 1080 else 1081 start = 0; 1082 end = EXT4_BLOCKS_PER_GROUP(sb); 1083 } 1084 1085 BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb)); 1086 1087 repeat: 1088 if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) { 1089 grp_goal = find_next_usable_block(start, bitmap_bh, end); 1090 if (grp_goal < 0) 1091 goto fail_access; 1092 if (!my_rsv) { 1093 int i; 1094 1095 for (i = 0; i < 7 && grp_goal > start && 1096 ext4_test_allocatable(grp_goal - 1, 1097 bitmap_bh); 1098 i++, grp_goal--) 1099 ; 1100 } 1101 } 1102 start = grp_goal; 1103 1104 if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group), 1105 grp_goal, bitmap_bh)) { 1106 /* 1107 * The block was allocated by another thread, or it was 1108 * allocated and then freed by another thread 1109 */ 1110 start++; 1111 grp_goal++; 1112 if (start >= end) 1113 goto fail_access; 1114 goto repeat; 1115 } 1116 num++; 1117 grp_goal++; 1118 while (num < *count && grp_goal < end 1119 && ext4_test_allocatable(grp_goal, bitmap_bh) 1120 && claim_block(sb_bgl_lock(EXT4_SB(sb), group), 1121 grp_goal, bitmap_bh)) { 1122 num++; 1123 grp_goal++; 1124 } 1125 *count = num; 1126 return grp_goal - num; 1127 fail_access: 1128 *count = num; 1129 return -1; 1130 } 1131 1132 /** 1133 * find_next_reservable_window(): 1134 * find a reservable space within the given range. 1135 * It does not allocate the reservation window for now: 1136 * alloc_new_reservation() will do the work later. 1137 * 1138 * @search_head: the head of the searching list; 1139 * This is not necessarily the list head of the whole filesystem 1140 * 1141 * We have both head and start_block to assist the search 1142 * for the reservable space. The list starts from head, 1143 * but we will shift to the place where start_block is, 1144 * then start from there, when looking for a reservable space. 1145 * 1146 * @size: the target new reservation window size 1147 * 1148 * @group_first_block: the first block we consider to start 1149 * the real search from 1150 * 1151 * @last_block: 1152 * the maximum block number that our goal reservable space 1153 * could start from. This is normally the last block in this 1154 * group. The search will end when we found the start of next 1155 * possible reservable space is out of this boundary. 1156 * This could handle the cross boundary reservation window 1157 * request. 1158 * 1159 * basically we search from the given range, rather than the whole 1160 * reservation double linked list, (start_block, last_block) 1161 * to find a free region that is of my size and has not 1162 * been reserved. 1163 * 1164 */ 1165 static int find_next_reservable_window( 1166 struct ext4_reserve_window_node *search_head, 1167 struct ext4_reserve_window_node *my_rsv, 1168 struct super_block * sb, 1169 ext4_fsblk_t start_block, 1170 ext4_fsblk_t last_block) 1171 { 1172 struct rb_node *next; 1173 struct ext4_reserve_window_node *rsv, *prev; 1174 ext4_fsblk_t cur; 1175 int size = my_rsv->rsv_goal_size; 1176 1177 /* TODO: make the start of the reservation window byte-aligned */ 1178 /* cur = *start_block & ~7;*/ 1179 cur = start_block; 1180 rsv = search_head; 1181 if (!rsv) 1182 return -1; 1183 1184 while (1) { 1185 if (cur <= rsv->rsv_end) 1186 cur = rsv->rsv_end + 1; 1187 1188 /* TODO? 1189 * in the case we could not find a reservable space 1190 * that is what is expected, during the re-search, we could 1191 * remember what's the largest reservable space we could have 1192 * and return that one. 1193 * 1194 * For now it will fail if we could not find the reservable 1195 * space with expected-size (or more)... 1196 */ 1197 if (cur > last_block) 1198 return -1; /* fail */ 1199 1200 prev = rsv; 1201 next = rb_next(&rsv->rsv_node); 1202 rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node); 1203 1204 /* 1205 * Reached the last reservation, we can just append to the 1206 * previous one. 1207 */ 1208 if (!next) 1209 break; 1210 1211 if (cur + size <= rsv->rsv_start) { 1212 /* 1213 * Found a reserveable space big enough. We could 1214 * have a reservation across the group boundary here 1215 */ 1216 break; 1217 } 1218 } 1219 /* 1220 * we come here either : 1221 * when we reach the end of the whole list, 1222 * and there is empty reservable space after last entry in the list. 1223 * append it to the end of the list. 1224 * 1225 * or we found one reservable space in the middle of the list, 1226 * return the reservation window that we could append to. 1227 * succeed. 1228 */ 1229 1230 if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) 1231 rsv_window_remove(sb, my_rsv); 1232 1233 /* 1234 * Let's book the whole avaliable window for now. We will check the 1235 * disk bitmap later and then, if there are free blocks then we adjust 1236 * the window size if it's larger than requested. 1237 * Otherwise, we will remove this node from the tree next time 1238 * call find_next_reservable_window. 1239 */ 1240 my_rsv->rsv_start = cur; 1241 my_rsv->rsv_end = cur + size - 1; 1242 my_rsv->rsv_alloc_hit = 0; 1243 1244 if (prev != my_rsv) 1245 ext4_rsv_window_add(sb, my_rsv); 1246 1247 return 0; 1248 } 1249 1250 /** 1251 * alloc_new_reservation()--allocate a new reservation window 1252 * 1253 * To make a new reservation, we search part of the filesystem 1254 * reservation list (the list that inside the group). We try to 1255 * allocate a new reservation window near the allocation goal, 1256 * or the beginning of the group, if there is no goal. 1257 * 1258 * We first find a reservable space after the goal, then from 1259 * there, we check the bitmap for the first free block after 1260 * it. If there is no free block until the end of group, then the 1261 * whole group is full, we failed. Otherwise, check if the free 1262 * block is inside the expected reservable space, if so, we 1263 * succeed. 1264 * If the first free block is outside the reservable space, then 1265 * start from the first free block, we search for next available 1266 * space, and go on. 1267 * 1268 * on succeed, a new reservation will be found and inserted into the list 1269 * It contains at least one free block, and it does not overlap with other 1270 * reservation windows. 1271 * 1272 * failed: we failed to find a reservation window in this group 1273 * 1274 * @rsv: the reservation 1275 * 1276 * @grp_goal: The goal (group-relative). It is where the search for a 1277 * free reservable space should start from. 1278 * if we have a grp_goal(grp_goal >0 ), then start from there, 1279 * no grp_goal(grp_goal = -1), we start from the first block 1280 * of the group. 1281 * 1282 * @sb: the super block 1283 * @group: the group we are trying to allocate in 1284 * @bitmap_bh: the block group block bitmap 1285 * 1286 */ 1287 static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, 1288 ext4_grpblk_t grp_goal, struct super_block *sb, 1289 ext4_group_t group, struct buffer_head *bitmap_bh) 1290 { 1291 struct ext4_reserve_window_node *search_head; 1292 ext4_fsblk_t group_first_block, group_end_block, start_block; 1293 ext4_grpblk_t first_free_block; 1294 struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root; 1295 unsigned long size; 1296 int ret; 1297 spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; 1298 1299 group_first_block = ext4_group_first_block_no(sb, group); 1300 group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1301 1302 if (grp_goal < 0) 1303 start_block = group_first_block; 1304 else 1305 start_block = grp_goal + group_first_block; 1306 1307 size = my_rsv->rsv_goal_size; 1308 1309 if (!rsv_is_empty(&my_rsv->rsv_window)) { 1310 /* 1311 * if the old reservation is cross group boundary 1312 * and if the goal is inside the old reservation window, 1313 * we will come here when we just failed to allocate from 1314 * the first part of the window. We still have another part 1315 * that belongs to the next group. In this case, there is no 1316 * point to discard our window and try to allocate a new one 1317 * in this group(which will fail). we should 1318 * keep the reservation window, just simply move on. 1319 * 1320 * Maybe we could shift the start block of the reservation 1321 * window to the first block of next group. 1322 */ 1323 1324 if ((my_rsv->rsv_start <= group_end_block) && 1325 (my_rsv->rsv_end > group_end_block) && 1326 (start_block >= my_rsv->rsv_start)) 1327 return -1; 1328 1329 if ((my_rsv->rsv_alloc_hit > 1330 (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { 1331 /* 1332 * if the previously allocation hit ratio is 1333 * greater than 1/2, then we double the size of 1334 * the reservation window the next time, 1335 * otherwise we keep the same size window 1336 */ 1337 size = size * 2; 1338 if (size > EXT4_MAX_RESERVE_BLOCKS) 1339 size = EXT4_MAX_RESERVE_BLOCKS; 1340 my_rsv->rsv_goal_size= size; 1341 } 1342 } 1343 1344 spin_lock(rsv_lock); 1345 /* 1346 * shift the search start to the window near the goal block 1347 */ 1348 search_head = search_reserve_window(fs_rsv_root, start_block); 1349 1350 /* 1351 * find_next_reservable_window() simply finds a reservable window 1352 * inside the given range(start_block, group_end_block). 1353 * 1354 * To make sure the reservation window has a free bit inside it, we 1355 * need to check the bitmap after we found a reservable window. 1356 */ 1357 retry: 1358 ret = find_next_reservable_window(search_head, my_rsv, sb, 1359 start_block, group_end_block); 1360 1361 if (ret == -1) { 1362 if (!rsv_is_empty(&my_rsv->rsv_window)) 1363 rsv_window_remove(sb, my_rsv); 1364 spin_unlock(rsv_lock); 1365 return -1; 1366 } 1367 1368 /* 1369 * On success, find_next_reservable_window() returns the 1370 * reservation window where there is a reservable space after it. 1371 * Before we reserve this reservable space, we need 1372 * to make sure there is at least a free block inside this region. 1373 * 1374 * searching the first free bit on the block bitmap and copy of 1375 * last committed bitmap alternatively, until we found a allocatable 1376 * block. Search start from the start block of the reservable space 1377 * we just found. 1378 */ 1379 spin_unlock(rsv_lock); 1380 first_free_block = bitmap_search_next_usable_block( 1381 my_rsv->rsv_start - group_first_block, 1382 bitmap_bh, group_end_block - group_first_block + 1); 1383 1384 if (first_free_block < 0) { 1385 /* 1386 * no free block left on the bitmap, no point 1387 * to reserve the space. return failed. 1388 */ 1389 spin_lock(rsv_lock); 1390 if (!rsv_is_empty(&my_rsv->rsv_window)) 1391 rsv_window_remove(sb, my_rsv); 1392 spin_unlock(rsv_lock); 1393 return -1; /* failed */ 1394 } 1395 1396 start_block = first_free_block + group_first_block; 1397 /* 1398 * check if the first free block is within the 1399 * free space we just reserved 1400 */ 1401 if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) 1402 return 0; /* success */ 1403 /* 1404 * if the first free bit we found is out of the reservable space 1405 * continue search for next reservable space, 1406 * start from where the free block is, 1407 * we also shift the list head to where we stopped last time 1408 */ 1409 search_head = my_rsv; 1410 spin_lock(rsv_lock); 1411 goto retry; 1412 } 1413 1414 /** 1415 * try_to_extend_reservation() 1416 * @my_rsv: given reservation window 1417 * @sb: super block 1418 * @size: the delta to extend 1419 * 1420 * Attempt to expand the reservation window large enough to have 1421 * required number of free blocks 1422 * 1423 * Since ext4_try_to_allocate() will always allocate blocks within 1424 * the reservation window range, if the window size is too small, 1425 * multiple blocks allocation has to stop at the end of the reservation 1426 * window. To make this more efficient, given the total number of 1427 * blocks needed and the current size of the window, we try to 1428 * expand the reservation window size if necessary on a best-effort 1429 * basis before ext4_new_blocks() tries to allocate blocks, 1430 */ 1431 static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, 1432 struct super_block *sb, int size) 1433 { 1434 struct ext4_reserve_window_node *next_rsv; 1435 struct rb_node *next; 1436 spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; 1437 1438 if (!spin_trylock(rsv_lock)) 1439 return; 1440 1441 next = rb_next(&my_rsv->rsv_node); 1442 1443 if (!next) 1444 my_rsv->rsv_end += size; 1445 else { 1446 next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node); 1447 1448 if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) 1449 my_rsv->rsv_end += size; 1450 else 1451 my_rsv->rsv_end = next_rsv->rsv_start - 1; 1452 } 1453 spin_unlock(rsv_lock); 1454 } 1455 1456 /** 1457 * ext4_try_to_allocate_with_rsv() 1458 * @sb: superblock 1459 * @handle: handle to this transaction 1460 * @group: given allocation block group 1461 * @bitmap_bh: bufferhead holds the block bitmap 1462 * @grp_goal: given target block within the group 1463 * @count: target number of blocks to allocate 1464 * @my_rsv: reservation window 1465 * @errp: pointer to store the error code 1466 * 1467 * This is the main function used to allocate a new block and its reservation 1468 * window. 1469 * 1470 * Each time when a new block allocation is need, first try to allocate from 1471 * its own reservation. If it does not have a reservation window, instead of 1472 * looking for a free bit on bitmap first, then look up the reservation list to 1473 * see if it is inside somebody else's reservation window, we try to allocate a 1474 * reservation window for it starting from the goal first. Then do the block 1475 * allocation within the reservation window. 1476 * 1477 * This will avoid keeping on searching the reservation list again and 1478 * again when somebody is looking for a free block (without 1479 * reservation), and there are lots of free blocks, but they are all 1480 * being reserved. 1481 * 1482 * We use a red-black tree for the per-filesystem reservation list. 1483 * 1484 */ 1485 static ext4_grpblk_t 1486 ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, 1487 ext4_group_t group, struct buffer_head *bitmap_bh, 1488 ext4_grpblk_t grp_goal, 1489 struct ext4_reserve_window_node * my_rsv, 1490 unsigned long *count, int *errp) 1491 { 1492 ext4_fsblk_t group_first_block, group_last_block; 1493 ext4_grpblk_t ret = 0; 1494 int fatal; 1495 unsigned long num = *count; 1496 1497 *errp = 0; 1498 1499 /* 1500 * Make sure we use undo access for the bitmap, because it is critical 1501 * that we do the frozen_data COW on bitmap buffers in all cases even 1502 * if the buffer is in BJ_Forget state in the committing transaction. 1503 */ 1504 BUFFER_TRACE(bitmap_bh, "get undo access for new block"); 1505 fatal = ext4_journal_get_undo_access(handle, bitmap_bh); 1506 if (fatal) { 1507 *errp = fatal; 1508 return -1; 1509 } 1510 1511 /* 1512 * we don't deal with reservation when 1513 * filesystem is mounted without reservation 1514 * or the file is not a regular file 1515 * or last attempt to allocate a block with reservation turned on failed 1516 */ 1517 if (my_rsv == NULL ) { 1518 ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, 1519 grp_goal, count, NULL); 1520 goto out; 1521 } 1522 /* 1523 * grp_goal is a group relative block number (if there is a goal) 1524 * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb) 1525 * first block is a filesystem wide block number 1526 * first block is the block number of the first block in this group 1527 */ 1528 group_first_block = ext4_group_first_block_no(sb, group); 1529 group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1530 1531 /* 1532 * Basically we will allocate a new block from inode's reservation 1533 * window. 1534 * 1535 * We need to allocate a new reservation window, if: 1536 * a) inode does not have a reservation window; or 1537 * b) last attempt to allocate a block from existing reservation 1538 * failed; or 1539 * c) we come here with a goal and with a reservation window 1540 * 1541 * We do not need to allocate a new reservation window if we come here 1542 * at the beginning with a goal and the goal is inside the window, or 1543 * we don't have a goal but already have a reservation window. 1544 * then we could go to allocate from the reservation window directly. 1545 */ 1546 while (1) { 1547 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || 1548 !goal_in_my_reservation(&my_rsv->rsv_window, 1549 grp_goal, group, sb)) { 1550 if (my_rsv->rsv_goal_size < *count) 1551 my_rsv->rsv_goal_size = *count; 1552 ret = alloc_new_reservation(my_rsv, grp_goal, sb, 1553 group, bitmap_bh); 1554 if (ret < 0) 1555 break; /* failed */ 1556 1557 if (!goal_in_my_reservation(&my_rsv->rsv_window, 1558 grp_goal, group, sb)) 1559 grp_goal = -1; 1560 } else if (grp_goal >= 0) { 1561 int curr = my_rsv->rsv_end - 1562 (grp_goal + group_first_block) + 1; 1563 1564 if (curr < *count) 1565 try_to_extend_reservation(my_rsv, sb, 1566 *count - curr); 1567 } 1568 1569 if ((my_rsv->rsv_start > group_last_block) || 1570 (my_rsv->rsv_end < group_first_block)) { 1571 rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1); 1572 BUG(); 1573 } 1574 ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, 1575 grp_goal, &num, &my_rsv->rsv_window); 1576 if (ret >= 0) { 1577 my_rsv->rsv_alloc_hit += num; 1578 *count = num; 1579 break; /* succeed */ 1580 } 1581 num = *count; 1582 } 1583 out: 1584 if (ret >= 0) { 1585 BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " 1586 "bitmap block"); 1587 fatal = ext4_journal_dirty_metadata(handle, bitmap_bh); 1588 if (fatal) { 1589 *errp = fatal; 1590 return -1; 1591 } 1592 return ret; 1593 } 1594 1595 BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); 1596 ext4_journal_release_buffer(handle, bitmap_bh); 1597 return ret; 1598 } 1599 1600 /** 1601 * ext4_has_free_blocks() 1602 * @sbi: in-core super block structure. 1603 * @nblocks: number of neeed blocks 1604 * 1605 * Check if filesystem has free blocks available for allocation. 1606 * Return the number of blocks avaible for allocation for this request 1607 * On success, return nblocks 1608 */ 1609 ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, 1610 ext4_fsblk_t nblocks) 1611 { 1612 ext4_fsblk_t free_blocks; 1613 ext4_fsblk_t root_blocks = 0; 1614 1615 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1616 1617 if (!capable(CAP_SYS_RESOURCE) && 1618 sbi->s_resuid != current->fsuid && 1619 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) 1620 root_blocks = ext4_r_blocks_count(sbi->s_es); 1621 #ifdef CONFIG_SMP 1622 if (free_blocks - root_blocks < FBC_BATCH) 1623 free_blocks = 1624 percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); 1625 #endif 1626 if (free_blocks - root_blocks < nblocks) 1627 return free_blocks - root_blocks; 1628 return nblocks; 1629 } 1630 1631 1632 /** 1633 * ext4_should_retry_alloc() 1634 * @sb: super block 1635 * @retries number of attemps has been made 1636 * 1637 * ext4_should_retry_alloc() is called when ENOSPC is returned, and if 1638 * it is profitable to retry the operation, this function will wait 1639 * for the current or commiting transaction to complete, and then 1640 * return TRUE. 1641 * 1642 * if the total number of retries exceed three times, return FALSE. 1643 */ 1644 int ext4_should_retry_alloc(struct super_block *sb, int *retries) 1645 { 1646 if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3) 1647 return 0; 1648 1649 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 1650 1651 return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); 1652 } 1653 1654 /** 1655 * ext4_old_new_blocks() -- core block bitmap based block allocation function 1656 * 1657 * @handle: handle to this transaction 1658 * @inode: file inode 1659 * @goal: given target block(filesystem wide) 1660 * @count: target number of blocks to allocate 1661 * @errp: error code 1662 * 1663 * ext4_old_new_blocks uses a goal block to assist allocation and look up 1664 * the block bitmap directly to do block allocation. It tries to 1665 * allocate block(s) from the block group contains the goal block first. If 1666 * that fails, it will try to allocate block(s) from other block groups 1667 * without any specific goal block. 1668 * 1669 * This function is called when -o nomballoc mount option is enabled 1670 * 1671 */ 1672 ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, 1673 ext4_fsblk_t goal, unsigned long *count, int *errp) 1674 { 1675 struct buffer_head *bitmap_bh = NULL; 1676 struct buffer_head *gdp_bh; 1677 ext4_group_t group_no; 1678 ext4_group_t goal_group; 1679 ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ 1680 ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ 1681 ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ 1682 ext4_group_t bgi; /* blockgroup iteration index */ 1683 int fatal = 0, err; 1684 int performed_allocation = 0; 1685 ext4_grpblk_t free_blocks; /* number of free blocks in a group */ 1686 struct super_block *sb; 1687 struct ext4_group_desc *gdp; 1688 struct ext4_super_block *es; 1689 struct ext4_sb_info *sbi; 1690 struct ext4_reserve_window_node *my_rsv = NULL; 1691 struct ext4_block_alloc_info *block_i; 1692 unsigned short windowsz = 0; 1693 ext4_group_t ngroups; 1694 unsigned long num = *count; 1695 1696 sb = inode->i_sb; 1697 if (!sb) { 1698 *errp = -ENODEV; 1699 printk("ext4_new_block: nonexistent device"); 1700 return 0; 1701 } 1702 1703 sbi = EXT4_SB(sb); 1704 if (!EXT4_I(inode)->i_delalloc_reserved_flag) { 1705 /* 1706 * With delalloc we already reserved the blocks 1707 */ 1708 *count = ext4_has_free_blocks(sbi, *count); 1709 } 1710 if (*count == 0) { 1711 *errp = -ENOSPC; 1712 return 0; /*return with ENOSPC error */ 1713 } 1714 num = *count; 1715 1716 /* 1717 * Check quota for allocation of this block. 1718 */ 1719 if (DQUOT_ALLOC_BLOCK(inode, num)) { 1720 *errp = -EDQUOT; 1721 return 0; 1722 } 1723 1724 sbi = EXT4_SB(sb); 1725 es = EXT4_SB(sb)->s_es; 1726 ext4_debug("goal=%llu.\n", goal); 1727 /* 1728 * Allocate a block from reservation only when 1729 * filesystem is mounted with reservation(default,-o reservation), and 1730 * it's a regular file, and 1731 * the desired window size is greater than 0 (One could use ioctl 1732 * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off 1733 * reservation on that particular file) 1734 */ 1735 block_i = EXT4_I(inode)->i_block_alloc_info; 1736 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) 1737 my_rsv = &block_i->rsv_window_node; 1738 1739 /* 1740 * First, test whether the goal block is free. 1741 */ 1742 if (goal < le32_to_cpu(es->s_first_data_block) || 1743 goal >= ext4_blocks_count(es)) 1744 goal = le32_to_cpu(es->s_first_data_block); 1745 ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); 1746 goal_group = group_no; 1747 retry_alloc: 1748 gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); 1749 if (!gdp) 1750 goto io_error; 1751 1752 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1753 /* 1754 * if there is not enough free blocks to make a new resevation 1755 * turn off reservation for this allocation 1756 */ 1757 if (my_rsv && (free_blocks < windowsz) 1758 && (rsv_is_empty(&my_rsv->rsv_window))) 1759 my_rsv = NULL; 1760 1761 if (free_blocks > 0) { 1762 bitmap_bh = ext4_read_block_bitmap(sb, group_no); 1763 if (!bitmap_bh) 1764 goto io_error; 1765 grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, 1766 group_no, bitmap_bh, grp_target_blk, 1767 my_rsv, &num, &fatal); 1768 if (fatal) 1769 goto out; 1770 if (grp_alloc_blk >= 0) 1771 goto allocated; 1772 } 1773 1774 ngroups = EXT4_SB(sb)->s_groups_count; 1775 smp_rmb(); 1776 1777 /* 1778 * Now search the rest of the groups. We assume that 1779 * group_no and gdp correctly point to the last group visited. 1780 */ 1781 for (bgi = 0; bgi < ngroups; bgi++) { 1782 group_no++; 1783 if (group_no >= ngroups) 1784 group_no = 0; 1785 gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); 1786 if (!gdp) 1787 goto io_error; 1788 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1789 /* 1790 * skip this group if the number of 1791 * free blocks is less than half of the reservation 1792 * window size. 1793 */ 1794 if (free_blocks <= (windowsz/2)) 1795 continue; 1796 1797 brelse(bitmap_bh); 1798 bitmap_bh = ext4_read_block_bitmap(sb, group_no); 1799 if (!bitmap_bh) 1800 goto io_error; 1801 /* 1802 * try to allocate block(s) from this group, without a goal(-1). 1803 */ 1804 grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, 1805 group_no, bitmap_bh, -1, my_rsv, 1806 &num, &fatal); 1807 if (fatal) 1808 goto out; 1809 if (grp_alloc_blk >= 0) 1810 goto allocated; 1811 } 1812 /* 1813 * We may end up a bogus ealier ENOSPC error due to 1814 * filesystem is "full" of reservations, but 1815 * there maybe indeed free blocks avaliable on disk 1816 * In this case, we just forget about the reservations 1817 * just do block allocation as without reservations. 1818 */ 1819 if (my_rsv) { 1820 my_rsv = NULL; 1821 windowsz = 0; 1822 group_no = goal_group; 1823 goto retry_alloc; 1824 } 1825 /* No space left on the device */ 1826 *errp = -ENOSPC; 1827 goto out; 1828 1829 allocated: 1830 1831 ext4_debug("using block group %lu(%d)\n", 1832 group_no, gdp->bg_free_blocks_count); 1833 1834 BUFFER_TRACE(gdp_bh, "get_write_access"); 1835 fatal = ext4_journal_get_write_access(handle, gdp_bh); 1836 if (fatal) 1837 goto out; 1838 1839 ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); 1840 1841 if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || 1842 in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) || 1843 in_range(ret_block, ext4_inode_table(sb, gdp), 1844 EXT4_SB(sb)->s_itb_per_group) || 1845 in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), 1846 EXT4_SB(sb)->s_itb_per_group)) { 1847 ext4_error(sb, "ext4_new_block", 1848 "Allocating block in system zone - " 1849 "blocks from %llu, length %lu", 1850 ret_block, num); 1851 /* 1852 * claim_block marked the blocks we allocated 1853 * as in use. So we may want to selectively 1854 * mark some of the blocks as free 1855 */ 1856 goto retry_alloc; 1857 } 1858 1859 performed_allocation = 1; 1860 1861 #ifdef CONFIG_JBD2_DEBUG 1862 { 1863 struct buffer_head *debug_bh; 1864 1865 /* Record bitmap buffer state in the newly allocated block */ 1866 debug_bh = sb_find_get_block(sb, ret_block); 1867 if (debug_bh) { 1868 BUFFER_TRACE(debug_bh, "state when allocated"); 1869 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); 1870 brelse(debug_bh); 1871 } 1872 } 1873 jbd_lock_bh_state(bitmap_bh); 1874 spin_lock(sb_bgl_lock(sbi, group_no)); 1875 if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { 1876 int i; 1877 1878 for (i = 0; i < num; i++) { 1879 if (ext4_test_bit(grp_alloc_blk+i, 1880 bh2jh(bitmap_bh)->b_committed_data)) { 1881 printk("%s: block was unexpectedly set in " 1882 "b_committed_data\n", __func__); 1883 } 1884 } 1885 } 1886 ext4_debug("found bit %d\n", grp_alloc_blk); 1887 spin_unlock(sb_bgl_lock(sbi, group_no)); 1888 jbd_unlock_bh_state(bitmap_bh); 1889 #endif 1890 1891 if (ret_block + num - 1 >= ext4_blocks_count(es)) { 1892 ext4_error(sb, "ext4_new_block", 1893 "block(%llu) >= blocks count(%llu) - " 1894 "block_group = %lu, es == %p ", ret_block, 1895 ext4_blocks_count(es), group_no, es); 1896 goto out; 1897 } 1898 1899 /* 1900 * It is up to the caller to add the new buffer to a journal 1901 * list of some description. We don't know in advance whether 1902 * the caller wants to use it as metadata or data. 1903 */ 1904 spin_lock(sb_bgl_lock(sbi, group_no)); 1905 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) 1906 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 1907 le16_add_cpu(&gdp->bg_free_blocks_count, -num); 1908 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); 1909 spin_unlock(sb_bgl_lock(sbi, group_no)); 1910 if (!EXT4_I(inode)->i_delalloc_reserved_flag) 1911 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1912 1913 if (sbi->s_log_groups_per_flex) { 1914 ext4_group_t flex_group = ext4_flex_group(sbi, group_no); 1915 spin_lock(sb_bgl_lock(sbi, flex_group)); 1916 sbi->s_flex_groups[flex_group].free_blocks -= num; 1917 spin_unlock(sb_bgl_lock(sbi, flex_group)); 1918 } 1919 1920 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); 1921 err = ext4_journal_dirty_metadata(handle, gdp_bh); 1922 if (!fatal) 1923 fatal = err; 1924 1925 sb->s_dirt = 1; 1926 if (fatal) 1927 goto out; 1928 1929 *errp = 0; 1930 brelse(bitmap_bh); 1931 DQUOT_FREE_BLOCK(inode, *count-num); 1932 *count = num; 1933 return ret_block; 1934 1935 io_error: 1936 *errp = -EIO; 1937 out: 1938 if (fatal) { 1939 *errp = fatal; 1940 ext4_std_error(sb, fatal); 1941 } 1942 /* 1943 * Undo the block allocation 1944 */ 1945 if (!performed_allocation) 1946 DQUOT_FREE_BLOCK(inode, *count); 1947 brelse(bitmap_bh); 1948 return 0; 1949 } 1950 1951 #define EXT4_META_BLOCK 0x1 1952 1953 static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, 1954 ext4_lblk_t iblock, ext4_fsblk_t goal, 1955 unsigned long *count, int *errp, int flags) 1956 { 1957 struct ext4_allocation_request ar; 1958 ext4_fsblk_t ret; 1959 1960 if (!test_opt(inode->i_sb, MBALLOC)) { 1961 return ext4_old_new_blocks(handle, inode, goal, count, errp); 1962 } 1963 1964 memset(&ar, 0, sizeof(ar)); 1965 /* Fill with neighbour allocated blocks */ 1966 1967 ar.inode = inode; 1968 ar.goal = goal; 1969 ar.len = *count; 1970 ar.logical = iblock; 1971 1972 if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK)) 1973 /* enable in-core preallocation for data block allocation */ 1974 ar.flags = EXT4_MB_HINT_DATA; 1975 else 1976 /* disable in-core preallocation for non-regular files */ 1977 ar.flags = 0; 1978 1979 ret = ext4_mb_new_blocks(handle, &ar, errp); 1980 *count = ar.len; 1981 return ret; 1982 } 1983 1984 /* 1985 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks 1986 * 1987 * @handle: handle to this transaction 1988 * @inode: file inode 1989 * @goal: given target block(filesystem wide) 1990 * @count: total number of blocks need 1991 * @errp: error code 1992 * 1993 * Return 1st allocated block numberon success, *count stores total account 1994 * error stores in errp pointer 1995 */ 1996 ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 1997 ext4_fsblk_t goal, unsigned long *count, int *errp) 1998 { 1999 ext4_fsblk_t ret; 2000 ret = do_blk_alloc(handle, inode, 0, goal, 2001 count, errp, EXT4_META_BLOCK); 2002 /* 2003 * Account for the allocated meta blocks 2004 */ 2005 if (!(*errp)) { 2006 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 2007 EXT4_I(inode)->i_allocated_meta_blocks += *count; 2008 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2009 } 2010 return ret; 2011 } 2012 2013 /* 2014 * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks 2015 * 2016 * @handle: handle to this transaction 2017 * @inode: file inode 2018 * @goal: given target block(filesystem wide) 2019 * @errp: error code 2020 * 2021 * Return allocated block number on success 2022 */ 2023 ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, 2024 ext4_fsblk_t goal, int *errp) 2025 { 2026 unsigned long count = 1; 2027 return ext4_new_meta_blocks(handle, inode, goal, &count, errp); 2028 } 2029 2030 /* 2031 * ext4_new_blocks() -- allocate data blocks 2032 * 2033 * @handle: handle to this transaction 2034 * @inode: file inode 2035 * @goal: given target block(filesystem wide) 2036 * @count: total number of blocks need 2037 * @errp: error code 2038 * 2039 * Return 1st allocated block numberon success, *count stores total account 2040 * error stores in errp pointer 2041 */ 2042 2043 ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, 2044 ext4_lblk_t iblock, ext4_fsblk_t goal, 2045 unsigned long *count, int *errp) 2046 { 2047 return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0); 2048 } 2049 2050 /** 2051 * ext4_count_free_blocks() -- count filesystem free blocks 2052 * @sb: superblock 2053 * 2054 * Adds up the number of free blocks from each block group. 2055 */ 2056 ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) 2057 { 2058 ext4_fsblk_t desc_count; 2059 struct ext4_group_desc *gdp; 2060 ext4_group_t i; 2061 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 2062 #ifdef EXT4FS_DEBUG 2063 struct ext4_super_block *es; 2064 ext4_fsblk_t bitmap_count; 2065 unsigned long x; 2066 struct buffer_head *bitmap_bh = NULL; 2067 2068 es = EXT4_SB(sb)->s_es; 2069 desc_count = 0; 2070 bitmap_count = 0; 2071 gdp = NULL; 2072 2073 smp_rmb(); 2074 for (i = 0; i < ngroups; i++) { 2075 gdp = ext4_get_group_desc(sb, i, NULL); 2076 if (!gdp) 2077 continue; 2078 desc_count += le16_to_cpu(gdp->bg_free_blocks_count); 2079 brelse(bitmap_bh); 2080 bitmap_bh = ext4_read_block_bitmap(sb, i); 2081 if (bitmap_bh == NULL) 2082 continue; 2083 2084 x = ext4_count_free(bitmap_bh, sb->s_blocksize); 2085 printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", 2086 i, le16_to_cpu(gdp->bg_free_blocks_count), x); 2087 bitmap_count += x; 2088 } 2089 brelse(bitmap_bh); 2090 printk("ext4_count_free_blocks: stored = %llu" 2091 ", computed = %llu, %llu\n", 2092 ext4_free_blocks_count(es), 2093 desc_count, bitmap_count); 2094 return bitmap_count; 2095 #else 2096 desc_count = 0; 2097 smp_rmb(); 2098 for (i = 0; i < ngroups; i++) { 2099 gdp = ext4_get_group_desc(sb, i, NULL); 2100 if (!gdp) 2101 continue; 2102 desc_count += le16_to_cpu(gdp->bg_free_blocks_count); 2103 } 2104 2105 return desc_count; 2106 #endif 2107 } 2108 2109 static inline int test_root(ext4_group_t a, int b) 2110 { 2111 int num = b; 2112 2113 while (a > num) 2114 num *= b; 2115 return num == a; 2116 } 2117 2118 static int ext4_group_sparse(ext4_group_t group) 2119 { 2120 if (group <= 1) 2121 return 1; 2122 if (!(group & 1)) 2123 return 0; 2124 return (test_root(group, 7) || test_root(group, 5) || 2125 test_root(group, 3)); 2126 } 2127 2128 /** 2129 * ext4_bg_has_super - number of blocks used by the superblock in group 2130 * @sb: superblock for filesystem 2131 * @group: group number to check 2132 * 2133 * Return the number of blocks used by the superblock (primary or backup) 2134 * in this group. Currently this will be only 0 or 1. 2135 */ 2136 int ext4_bg_has_super(struct super_block *sb, ext4_group_t group) 2137 { 2138 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 2139 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && 2140 !ext4_group_sparse(group)) 2141 return 0; 2142 return 1; 2143 } 2144 2145 static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, 2146 ext4_group_t group) 2147 { 2148 unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); 2149 ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb); 2150 ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1; 2151 2152 if (group == first || group == first + 1 || group == last) 2153 return 1; 2154 return 0; 2155 } 2156 2157 static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, 2158 ext4_group_t group) 2159 { 2160 return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; 2161 } 2162 2163 /** 2164 * ext4_bg_num_gdb - number of blocks used by the group table in group 2165 * @sb: superblock for filesystem 2166 * @group: group number to check 2167 * 2168 * Return the number of blocks used by the group descriptor table 2169 * (primary or backup) in this group. In the future there may be a 2170 * different number of descriptor blocks in each group. 2171 */ 2172 unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) 2173 { 2174 unsigned long first_meta_bg = 2175 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); 2176 unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); 2177 2178 if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || 2179 metagroup < first_meta_bg) 2180 return ext4_bg_num_gdb_nometa(sb,group); 2181 2182 return ext4_bg_num_gdb_meta(sb,group); 2183 2184 } 2185