1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * suballoc.c 5 * 6 * metadata alloc and free 7 * Inspired by ext3 block groups. 8 * 9 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public 13 * License as published by the Free Software Foundation; either 14 * version 2 of the License, or (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public 22 * License along with this program; if not, write to the 23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 24 * Boston, MA 021110-1307, USA. 25 */ 26 27 #include <linux/fs.h> 28 #include <linux/types.h> 29 #include <linux/slab.h> 30 #include <linux/highmem.h> 31 32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC 33 #include <cluster/masklog.h> 34 35 #include "ocfs2.h" 36 37 #include "alloc.h" 38 #include "dlmglue.h" 39 #include "inode.h" 40 #include "journal.h" 41 #include "localalloc.h" 42 #include "suballoc.h" 43 #include "super.h" 44 #include "sysfile.h" 45 #include "uptodate.h" 46 47 #include "buffer_head_io.h" 48 49 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 50 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); 51 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); 52 static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, 53 struct inode *alloc_inode, 54 struct buffer_head *bg_bh, 55 u64 group_blkno, 56 u16 my_chain, 57 struct ocfs2_chain_list *cl); 58 static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 59 struct inode *alloc_inode, 60 struct buffer_head *bh); 61 62 static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, 63 struct ocfs2_alloc_context *ac); 64 65 static int ocfs2_cluster_group_search(struct inode *inode, 66 struct buffer_head *group_bh, 67 u32 bits_wanted, u32 min_bits, 68 u16 *bit_off, u16 *bits_found); 69 static int ocfs2_block_group_search(struct inode *inode, 70 struct buffer_head *group_bh, 71 u32 bits_wanted, u32 min_bits, 72 u16 *bit_off, u16 *bits_found); 73 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, 74 u32 bits_wanted, 75 u32 min_bits, 76 u16 *bit_off, 77 unsigned int *num_bits, 78 u64 *bg_blkno); 79 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 80 struct ocfs2_alloc_context *ac, 81 u32 bits_wanted, 82 u32 min_bits, 83 u16 *bit_off, 84 unsigned int *num_bits, 85 u64 *bg_blkno); 86 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 87 int nr); 88 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 89 struct buffer_head *bg_bh, 90 unsigned int bits_wanted, 91 u16 *bit_off, 92 u16 *bits_found); 93 static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, 94 struct inode *alloc_inode, 95 struct ocfs2_group_desc *bg, 96 struct buffer_head *group_bh, 97 unsigned int bit_off, 98 unsigned int num_bits); 99 static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle, 100 struct inode *alloc_inode, 101 struct ocfs2_group_desc *bg, 102 struct buffer_head *group_bh, 103 unsigned int bit_off, 104 unsigned int num_bits); 105 106 static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, 107 struct inode *alloc_inode, 108 struct buffer_head *fe_bh, 109 struct buffer_head *bg_bh, 110 struct buffer_head *prev_bg_bh, 111 u16 chain); 112 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, 113 u32 wanted); 114 static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, 115 struct inode *alloc_inode, 116 struct buffer_head *alloc_bh, 117 unsigned int start_bit, 118 u64 bg_blkno, 119 unsigned int count); 120 static inline u64 ocfs2_which_suballoc_group(u64 block, 121 unsigned int bit); 122 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, 123 u64 bg_blkno, 124 u16 bg_bit_off); 125 static inline u64 ocfs2_which_cluster_group(struct inode *inode, 126 u32 cluster); 127 static inline void ocfs2_block_to_cluster_group(struct inode *inode, 128 u64 data_blkno, 129 u64 *bg_blkno, 130 u16 *bg_bit_off); 131 132 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 133 { 134 if (ac->ac_inode) 135 iput(ac->ac_inode); 136 if (ac->ac_bh) 137 brelse(ac->ac_bh); 138 kfree(ac); 139 } 140 141 static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) 142 { 143 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); 144 } 145 146 static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, 147 struct inode *alloc_inode, 148 struct buffer_head *bg_bh, 149 u64 group_blkno, 150 u16 my_chain, 151 struct ocfs2_chain_list *cl) 152 { 153 int status = 0; 154 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 155 struct super_block * sb = alloc_inode->i_sb; 156 157 mlog_entry_void(); 158 159 if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) { 160 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != " 161 "b_blocknr (%llu)", 162 (unsigned long long)group_blkno, 163 (unsigned long long) bg_bh->b_blocknr); 164 status = -EIO; 165 goto bail; 166 } 167 168 status = ocfs2_journal_access(handle, 169 alloc_inode, 170 bg_bh, 171 OCFS2_JOURNAL_ACCESS_CREATE); 172 if (status < 0) { 173 mlog_errno(status); 174 goto bail; 175 } 176 177 memset(bg, 0, sb->s_blocksize); 178 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); 179 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 180 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb)); 181 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); 182 bg->bg_chain = cpu_to_le16(my_chain); 183 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno; 184 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno); 185 bg->bg_blkno = cpu_to_le64(group_blkno); 186 /* set the 1st bit in the bitmap to account for the descriptor block */ 187 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap); 188 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1); 189 190 status = ocfs2_journal_dirty(handle, bg_bh); 191 if (status < 0) 192 mlog_errno(status); 193 194 /* There is no need to zero out or otherwise initialize the 195 * other blocks in a group - All valid FS metadata in a block 196 * group stores the superblock fs_generation value at 197 * allocation time. */ 198 199 bail: 200 mlog_exit(status); 201 return status; 202 } 203 204 static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) 205 { 206 u16 curr, best; 207 208 best = curr = 0; 209 while (curr < le16_to_cpu(cl->cl_count)) { 210 if (le32_to_cpu(cl->cl_recs[best].c_total) > 211 le32_to_cpu(cl->cl_recs[curr].c_total)) 212 best = curr; 213 curr++; 214 } 215 return best; 216 } 217 218 /* 219 * We expect the block group allocator to already be locked. 220 */ 221 static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 222 struct inode *alloc_inode, 223 struct buffer_head *bh) 224 { 225 int status, credits; 226 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; 227 struct ocfs2_chain_list *cl; 228 struct ocfs2_alloc_context *ac = NULL; 229 struct ocfs2_journal_handle *handle = NULL; 230 u32 bit_off, num_bits; 231 u16 alloc_rec; 232 u64 bg_blkno; 233 struct buffer_head *bg_bh = NULL; 234 struct ocfs2_group_desc *bg; 235 236 BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); 237 238 mlog_entry_void(); 239 240 handle = ocfs2_alloc_handle(osb); 241 if (!handle) { 242 status = -ENOMEM; 243 mlog_errno(status); 244 goto bail; 245 } 246 247 cl = &fe->id2.i_chain; 248 status = ocfs2_reserve_clusters(osb, 249 handle, 250 le16_to_cpu(cl->cl_cpg), 251 &ac); 252 if (status < 0) { 253 if (status != -ENOSPC) 254 mlog_errno(status); 255 goto bail; 256 } 257 258 credits = ocfs2_calc_group_alloc_credits(osb->sb, 259 le16_to_cpu(cl->cl_cpg)); 260 handle = ocfs2_start_trans(osb, handle, credits); 261 if (IS_ERR(handle)) { 262 status = PTR_ERR(handle); 263 handle = NULL; 264 mlog_errno(status); 265 goto bail; 266 } 267 268 status = ocfs2_claim_clusters(osb, 269 handle, 270 ac, 271 le16_to_cpu(cl->cl_cpg), 272 &bit_off, 273 &num_bits); 274 if (status < 0) { 275 if (status != -ENOSPC) 276 mlog_errno(status); 277 goto bail; 278 } 279 280 alloc_rec = ocfs2_find_smallest_chain(cl); 281 282 /* setup the group */ 283 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); 284 mlog(0, "new descriptor, record %u, at block %llu\n", 285 alloc_rec, (unsigned long long)bg_blkno); 286 287 bg_bh = sb_getblk(osb->sb, bg_blkno); 288 if (!bg_bh) { 289 status = -EIO; 290 mlog_errno(status); 291 goto bail; 292 } 293 ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh); 294 295 status = ocfs2_block_group_fill(handle, 296 alloc_inode, 297 bg_bh, 298 bg_blkno, 299 alloc_rec, 300 cl); 301 if (status < 0) { 302 mlog_errno(status); 303 goto bail; 304 } 305 306 bg = (struct ocfs2_group_desc *) bg_bh->b_data; 307 308 status = ocfs2_journal_access(handle, alloc_inode, 309 bh, OCFS2_JOURNAL_ACCESS_WRITE); 310 if (status < 0) { 311 mlog_errno(status); 312 goto bail; 313 } 314 315 le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, 316 le16_to_cpu(bg->bg_free_bits_count)); 317 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); 318 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno); 319 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) 320 le16_add_cpu(&cl->cl_next_free_rec, 1); 321 322 le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) - 323 le16_to_cpu(bg->bg_free_bits_count)); 324 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); 325 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); 326 327 status = ocfs2_journal_dirty(handle, bh); 328 if (status < 0) { 329 mlog_errno(status); 330 goto bail; 331 } 332 333 spin_lock(&OCFS2_I(alloc_inode)->ip_lock); 334 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 335 fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb, 336 le32_to_cpu(fe->i_clusters))); 337 spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); 338 i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); 339 alloc_inode->i_blocks = 340 ocfs2_align_bytes_to_sectors(i_size_read(alloc_inode)); 341 342 status = 0; 343 bail: 344 if (handle) 345 ocfs2_commit_trans(handle); 346 347 if (ac) 348 ocfs2_free_alloc_context(ac); 349 350 if (bg_bh) 351 brelse(bg_bh); 352 353 mlog_exit(status); 354 return status; 355 } 356 357 static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, 358 struct ocfs2_alloc_context *ac) 359 { 360 int status; 361 u32 bits_wanted = ac->ac_bits_wanted; 362 struct inode *alloc_inode = ac->ac_inode; 363 struct buffer_head *bh = NULL; 364 struct ocfs2_journal_handle *handle = ac->ac_handle; 365 struct ocfs2_dinode *fe; 366 u32 free_bits; 367 368 mlog_entry_void(); 369 370 BUG_ON(handle->flags & OCFS2_HANDLE_STARTED); 371 372 ocfs2_handle_add_inode(handle, alloc_inode); 373 status = ocfs2_meta_lock(alloc_inode, handle, &bh, 1); 374 if (status < 0) { 375 mlog_errno(status); 376 goto bail; 377 } 378 379 fe = (struct ocfs2_dinode *) bh->b_data; 380 if (!OCFS2_IS_VALID_DINODE(fe)) { 381 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 382 status = -EIO; 383 goto bail; 384 } 385 if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { 386 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu", 387 (unsigned long long)le64_to_cpu(fe->i_blkno)); 388 status = -EIO; 389 goto bail; 390 } 391 392 free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) - 393 le32_to_cpu(fe->id1.bitmap1.i_used); 394 395 if (bits_wanted > free_bits) { 396 /* cluster bitmap never grows */ 397 if (ocfs2_is_cluster_bitmap(alloc_inode)) { 398 mlog(0, "Disk Full: wanted=%u, free_bits=%u\n", 399 bits_wanted, free_bits); 400 status = -ENOSPC; 401 goto bail; 402 } 403 404 status = ocfs2_block_group_alloc(osb, alloc_inode, bh); 405 if (status < 0) { 406 if (status != -ENOSPC) 407 mlog_errno(status); 408 goto bail; 409 } 410 atomic_inc(&osb->alloc_stats.bg_extends); 411 412 /* You should never ask for this much metadata */ 413 BUG_ON(bits_wanted > 414 (le32_to_cpu(fe->id1.bitmap1.i_total) 415 - le32_to_cpu(fe->id1.bitmap1.i_used))); 416 } 417 418 get_bh(bh); 419 ac->ac_bh = bh; 420 bail: 421 if (bh) 422 brelse(bh); 423 424 mlog_exit(status); 425 return status; 426 } 427 428 int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, 429 struct ocfs2_journal_handle *handle, 430 struct ocfs2_dinode *fe, 431 struct ocfs2_alloc_context **ac) 432 { 433 int status; 434 struct inode *alloc_inode = NULL; 435 436 *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 437 if (!(*ac)) { 438 status = -ENOMEM; 439 mlog_errno(status); 440 goto bail; 441 } 442 443 (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); 444 (*ac)->ac_handle = handle; 445 (*ac)->ac_which = OCFS2_AC_USE_META; 446 447 #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS 448 alloc_inode = ocfs2_get_system_file_inode(osb, 449 EXTENT_ALLOC_SYSTEM_INODE, 450 0); 451 #else 452 alloc_inode = ocfs2_get_system_file_inode(osb, 453 EXTENT_ALLOC_SYSTEM_INODE, 454 osb->slot_num); 455 #endif 456 if (!alloc_inode) { 457 status = -ENOMEM; 458 mlog_errno(status); 459 goto bail; 460 } 461 462 (*ac)->ac_inode = igrab(alloc_inode); 463 (*ac)->ac_group_search = ocfs2_block_group_search; 464 465 status = ocfs2_reserve_suballoc_bits(osb, (*ac)); 466 if (status < 0) { 467 if (status != -ENOSPC) 468 mlog_errno(status); 469 goto bail; 470 } 471 472 status = 0; 473 bail: 474 if ((status < 0) && *ac) { 475 ocfs2_free_alloc_context(*ac); 476 *ac = NULL; 477 } 478 479 if (alloc_inode) 480 iput(alloc_inode); 481 482 mlog_exit(status); 483 return status; 484 } 485 486 int ocfs2_reserve_new_inode(struct ocfs2_super *osb, 487 struct ocfs2_journal_handle *handle, 488 struct ocfs2_alloc_context **ac) 489 { 490 int status; 491 struct inode *alloc_inode = NULL; 492 493 *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 494 if (!(*ac)) { 495 status = -ENOMEM; 496 mlog_errno(status); 497 goto bail; 498 } 499 500 (*ac)->ac_bits_wanted = 1; 501 (*ac)->ac_handle = handle; 502 (*ac)->ac_which = OCFS2_AC_USE_INODE; 503 504 alloc_inode = ocfs2_get_system_file_inode(osb, 505 INODE_ALLOC_SYSTEM_INODE, 506 osb->slot_num); 507 if (!alloc_inode) { 508 status = -ENOMEM; 509 mlog_errno(status); 510 goto bail; 511 } 512 513 (*ac)->ac_inode = igrab(alloc_inode); 514 (*ac)->ac_group_search = ocfs2_block_group_search; 515 516 status = ocfs2_reserve_suballoc_bits(osb, *ac); 517 if (status < 0) { 518 if (status != -ENOSPC) 519 mlog_errno(status); 520 goto bail; 521 } 522 523 status = 0; 524 bail: 525 if ((status < 0) && *ac) { 526 ocfs2_free_alloc_context(*ac); 527 *ac = NULL; 528 } 529 530 if (alloc_inode) 531 iput(alloc_inode); 532 533 mlog_exit(status); 534 return status; 535 } 536 537 /* local alloc code has to do the same thing, so rather than do this 538 * twice.. */ 539 int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, 540 struct ocfs2_alloc_context *ac) 541 { 542 int status; 543 544 ac->ac_inode = ocfs2_get_system_file_inode(osb, 545 GLOBAL_BITMAP_SYSTEM_INODE, 546 OCFS2_INVALID_SLOT); 547 if (!ac->ac_inode) { 548 status = -EINVAL; 549 mlog(ML_ERROR, "Could not get bitmap inode!\n"); 550 goto bail; 551 } 552 ac->ac_which = OCFS2_AC_USE_MAIN; 553 ac->ac_group_search = ocfs2_cluster_group_search; 554 555 status = ocfs2_reserve_suballoc_bits(osb, ac); 556 if (status < 0 && status != -ENOSPC) 557 mlog_errno(status); 558 bail: 559 return status; 560 } 561 562 /* Callers don't need to care which bitmap (local alloc or main) to 563 * use so we figure it out for them, but unfortunately this clutters 564 * things a bit. */ 565 int ocfs2_reserve_clusters(struct ocfs2_super *osb, 566 struct ocfs2_journal_handle *handle, 567 u32 bits_wanted, 568 struct ocfs2_alloc_context **ac) 569 { 570 int status; 571 572 mlog_entry_void(); 573 574 BUG_ON(!handle); 575 576 *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 577 if (!(*ac)) { 578 status = -ENOMEM; 579 mlog_errno(status); 580 goto bail; 581 } 582 583 (*ac)->ac_bits_wanted = bits_wanted; 584 (*ac)->ac_handle = handle; 585 586 status = -ENOSPC; 587 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { 588 status = ocfs2_reserve_local_alloc_bits(osb, 589 handle, 590 bits_wanted, 591 *ac); 592 if ((status < 0) && (status != -ENOSPC)) { 593 mlog_errno(status); 594 goto bail; 595 } else if (status == -ENOSPC) { 596 /* reserve_local_bits will return enospc with 597 * the local alloc inode still locked, so we 598 * can change this safely here. */ 599 mlog(0, "Disabling local alloc\n"); 600 /* We set to OCFS2_LA_DISABLED so that umount 601 * can clean up what's left of the local 602 * allocation */ 603 osb->local_alloc_state = OCFS2_LA_DISABLED; 604 } 605 } 606 607 if (status == -ENOSPC) { 608 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 609 if (status < 0) { 610 if (status != -ENOSPC) 611 mlog_errno(status); 612 goto bail; 613 } 614 } 615 616 status = 0; 617 bail: 618 if ((status < 0) && *ac) { 619 ocfs2_free_alloc_context(*ac); 620 *ac = NULL; 621 } 622 623 mlog_exit(status); 624 return status; 625 } 626 627 /* 628 * More or less lifted from ext3. I'll leave their description below: 629 * 630 * "For ext3 allocations, we must not reuse any blocks which are 631 * allocated in the bitmap buffer's "last committed data" copy. This 632 * prevents deletes from freeing up the page for reuse until we have 633 * committed the delete transaction. 634 * 635 * If we didn't do this, then deleting something and reallocating it as 636 * data would allow the old block to be overwritten before the 637 * transaction committed (because we force data to disk before commit). 638 * This would lead to corruption if we crashed between overwriting the 639 * data and committing the delete. 640 * 641 * @@@ We may want to make this allocation behaviour conditional on 642 * data-writes at some point, and disable it for metadata allocations or 643 * sync-data inodes." 644 * 645 * Note: OCFS2 already does this differently for metadata vs data 646 * allocations, as those bitmaps are seperate and undo access is never 647 * called on a metadata group descriptor. 648 */ 649 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 650 int nr) 651 { 652 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 653 654 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) 655 return 0; 656 if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data) 657 return 1; 658 659 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; 660 return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); 661 } 662 663 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 664 struct buffer_head *bg_bh, 665 unsigned int bits_wanted, 666 u16 *bit_off, 667 u16 *bits_found) 668 { 669 void *bitmap; 670 u16 best_offset, best_size; 671 int offset, start, found, status = 0; 672 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 673 674 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 675 OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg); 676 return -EIO; 677 } 678 679 found = start = best_offset = best_size = 0; 680 bitmap = bg->bg_bitmap; 681 682 while((offset = ocfs2_find_next_zero_bit(bitmap, 683 le16_to_cpu(bg->bg_bits), 684 start)) != -1) { 685 if (offset == le16_to_cpu(bg->bg_bits)) 686 break; 687 688 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { 689 /* We found a zero, but we can't use it as it 690 * hasn't been put to disk yet! */ 691 found = 0; 692 start = offset + 1; 693 } else if (offset == start) { 694 /* we found a zero */ 695 found++; 696 /* move start to the next bit to test */ 697 start++; 698 } else { 699 /* got a zero after some ones */ 700 found = 1; 701 start = offset + 1; 702 } 703 if (found > best_size) { 704 best_size = found; 705 best_offset = start - found; 706 } 707 /* we got everything we needed */ 708 if (found == bits_wanted) { 709 /* mlog(0, "Found it all!\n"); */ 710 break; 711 } 712 } 713 714 /* XXX: I think the first clause is equivalent to the second 715 * - jlbec */ 716 if (found == bits_wanted) { 717 *bit_off = start - found; 718 *bits_found = found; 719 } else if (best_size) { 720 *bit_off = best_offset; 721 *bits_found = best_size; 722 } else { 723 status = -ENOSPC; 724 /* No error log here -- see the comment above 725 * ocfs2_test_bg_bit_allocatable */ 726 } 727 728 return status; 729 } 730 731 static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, 732 struct inode *alloc_inode, 733 struct ocfs2_group_desc *bg, 734 struct buffer_head *group_bh, 735 unsigned int bit_off, 736 unsigned int num_bits) 737 { 738 int status; 739 void *bitmap = bg->bg_bitmap; 740 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 741 742 mlog_entry_void(); 743 744 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 745 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 746 status = -EIO; 747 goto bail; 748 } 749 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); 750 751 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, 752 num_bits); 753 754 if (ocfs2_is_cluster_bitmap(alloc_inode)) 755 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 756 757 status = ocfs2_journal_access(handle, 758 alloc_inode, 759 group_bh, 760 journal_type); 761 if (status < 0) { 762 mlog_errno(status); 763 goto bail; 764 } 765 766 le16_add_cpu(&bg->bg_free_bits_count, -num_bits); 767 768 while(num_bits--) 769 ocfs2_set_bit(bit_off++, bitmap); 770 771 status = ocfs2_journal_dirty(handle, 772 group_bh); 773 if (status < 0) { 774 mlog_errno(status); 775 goto bail; 776 } 777 778 bail: 779 mlog_exit(status); 780 return status; 781 } 782 783 /* find the one with the most empty bits */ 784 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl) 785 { 786 u16 curr, best; 787 788 BUG_ON(!cl->cl_next_free_rec); 789 790 best = curr = 0; 791 while (curr < le16_to_cpu(cl->cl_next_free_rec)) { 792 if (le32_to_cpu(cl->cl_recs[curr].c_free) > 793 le32_to_cpu(cl->cl_recs[best].c_free)) 794 best = curr; 795 curr++; 796 } 797 798 BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec)); 799 return best; 800 } 801 802 static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, 803 struct inode *alloc_inode, 804 struct buffer_head *fe_bh, 805 struct buffer_head *bg_bh, 806 struct buffer_head *prev_bg_bh, 807 u16 chain) 808 { 809 int status; 810 /* there is a really tiny chance the journal calls could fail, 811 * but we wouldn't want inconsistent blocks in *any* case. */ 812 u64 fe_ptr, bg_ptr, prev_bg_ptr; 813 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; 814 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 815 struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; 816 817 if (!OCFS2_IS_VALID_DINODE(fe)) { 818 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 819 status = -EIO; 820 goto out; 821 } 822 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 823 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 824 status = -EIO; 825 goto out; 826 } 827 if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) { 828 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg); 829 status = -EIO; 830 goto out; 831 } 832 833 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", 834 (unsigned long long)fe->i_blkno, chain, 835 (unsigned long long)bg->bg_blkno, 836 (unsigned long long)prev_bg->bg_blkno); 837 838 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); 839 bg_ptr = le64_to_cpu(bg->bg_next_group); 840 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); 841 842 status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh, 843 OCFS2_JOURNAL_ACCESS_WRITE); 844 if (status < 0) { 845 mlog_errno(status); 846 goto out_rollback; 847 } 848 849 prev_bg->bg_next_group = bg->bg_next_group; 850 851 status = ocfs2_journal_dirty(handle, prev_bg_bh); 852 if (status < 0) { 853 mlog_errno(status); 854 goto out_rollback; 855 } 856 857 status = ocfs2_journal_access(handle, alloc_inode, bg_bh, 858 OCFS2_JOURNAL_ACCESS_WRITE); 859 if (status < 0) { 860 mlog_errno(status); 861 goto out_rollback; 862 } 863 864 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; 865 866 status = ocfs2_journal_dirty(handle, bg_bh); 867 if (status < 0) { 868 mlog_errno(status); 869 goto out_rollback; 870 } 871 872 status = ocfs2_journal_access(handle, alloc_inode, fe_bh, 873 OCFS2_JOURNAL_ACCESS_WRITE); 874 if (status < 0) { 875 mlog_errno(status); 876 goto out_rollback; 877 } 878 879 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; 880 881 status = ocfs2_journal_dirty(handle, fe_bh); 882 if (status < 0) { 883 mlog_errno(status); 884 goto out_rollback; 885 } 886 887 status = 0; 888 out_rollback: 889 if (status < 0) { 890 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); 891 bg->bg_next_group = cpu_to_le64(bg_ptr); 892 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); 893 } 894 out: 895 mlog_exit(status); 896 return status; 897 } 898 899 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, 900 u32 wanted) 901 { 902 return le16_to_cpu(bg->bg_free_bits_count) > wanted; 903 } 904 905 /* return 0 on success, -ENOSPC to keep searching and any other < 0 906 * value on error. */ 907 static int ocfs2_cluster_group_search(struct inode *inode, 908 struct buffer_head *group_bh, 909 u32 bits_wanted, u32 min_bits, 910 u16 *bit_off, u16 *bits_found) 911 { 912 int search = -ENOSPC; 913 int ret; 914 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 915 u16 tmp_off, tmp_found; 916 917 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 918 919 if (bg->bg_free_bits_count) { 920 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 921 group_bh, bits_wanted, 922 &tmp_off, &tmp_found); 923 if (ret) 924 return ret; 925 926 /* ocfs2_block_group_find_clear_bits() might 927 * return success, but we still want to return 928 * -ENOSPC unless it found the minimum number 929 * of bits. */ 930 if (min_bits <= tmp_found) { 931 *bit_off = tmp_off; 932 *bits_found = tmp_found; 933 search = 0; /* success */ 934 } 935 } 936 937 return search; 938 } 939 940 static int ocfs2_block_group_search(struct inode *inode, 941 struct buffer_head *group_bh, 942 u32 bits_wanted, u32 min_bits, 943 u16 *bit_off, u16 *bits_found) 944 { 945 int ret = -ENOSPC; 946 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 947 948 BUG_ON(min_bits != 1); 949 BUG_ON(ocfs2_is_cluster_bitmap(inode)); 950 951 if (bg->bg_free_bits_count) 952 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 953 group_bh, bits_wanted, 954 bit_off, bits_found); 955 956 return ret; 957 } 958 959 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, 960 u32 bits_wanted, 961 u32 min_bits, 962 u16 *bit_off, 963 unsigned int *num_bits, 964 u64 *bg_blkno) 965 { 966 int status; 967 u16 chain, tmp_bits; 968 u32 tmp_used; 969 u64 next_group; 970 struct ocfs2_journal_handle *handle = ac->ac_handle; 971 struct inode *alloc_inode = ac->ac_inode; 972 struct buffer_head *group_bh = NULL; 973 struct buffer_head *prev_group_bh = NULL; 974 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; 975 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; 976 struct ocfs2_group_desc *bg; 977 978 chain = ac->ac_chain; 979 mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n", 980 bits_wanted, chain, 981 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); 982 983 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 984 le64_to_cpu(cl->cl_recs[chain].c_blkno), 985 &group_bh, OCFS2_BH_CACHED, alloc_inode); 986 if (status < 0) { 987 mlog_errno(status); 988 goto bail; 989 } 990 bg = (struct ocfs2_group_desc *) group_bh->b_data; 991 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 992 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 993 status = -EIO; 994 goto bail; 995 } 996 997 status = -ENOSPC; 998 /* for now, the chain search is a bit simplistic. We just use 999 * the 1st group with any empty bits. */ 1000 while ((status = ac->ac_group_search(alloc_inode, group_bh, 1001 bits_wanted, min_bits, bit_off, 1002 &tmp_bits)) == -ENOSPC) { 1003 if (!bg->bg_next_group) 1004 break; 1005 1006 if (prev_group_bh) { 1007 brelse(prev_group_bh); 1008 prev_group_bh = NULL; 1009 } 1010 next_group = le64_to_cpu(bg->bg_next_group); 1011 prev_group_bh = group_bh; 1012 group_bh = NULL; 1013 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 1014 next_group, &group_bh, 1015 OCFS2_BH_CACHED, alloc_inode); 1016 if (status < 0) { 1017 mlog_errno(status); 1018 goto bail; 1019 } 1020 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1021 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1022 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1023 status = -EIO; 1024 goto bail; 1025 } 1026 } 1027 if (status < 0) { 1028 if (status != -ENOSPC) 1029 mlog_errno(status); 1030 goto bail; 1031 } 1032 1033 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", 1034 tmp_bits, (unsigned long long)bg->bg_blkno); 1035 1036 *num_bits = tmp_bits; 1037 1038 BUG_ON(*num_bits == 0); 1039 1040 /* 1041 * Keep track of previous block descriptor read. When 1042 * we find a target, if we have read more than X 1043 * number of descriptors, and the target is reasonably 1044 * empty, relink him to top of his chain. 1045 * 1046 * We've read 0 extra blocks and only send one more to 1047 * the transaction, yet the next guy to search has a 1048 * much easier time. 1049 * 1050 * Do this *after* figuring out how many bits we're taking out 1051 * of our target group. 1052 */ 1053 if (ac->ac_allow_chain_relink && 1054 (prev_group_bh) && 1055 (ocfs2_block_group_reasonably_empty(bg, *num_bits))) { 1056 status = ocfs2_relink_block_group(handle, alloc_inode, 1057 ac->ac_bh, group_bh, 1058 prev_group_bh, chain); 1059 if (status < 0) { 1060 mlog_errno(status); 1061 goto bail; 1062 } 1063 } 1064 1065 /* Ok, claim our bits now: set the info on dinode, chainlist 1066 * and then the group */ 1067 status = ocfs2_journal_access(handle, 1068 alloc_inode, 1069 ac->ac_bh, 1070 OCFS2_JOURNAL_ACCESS_WRITE); 1071 if (status < 0) { 1072 mlog_errno(status); 1073 goto bail; 1074 } 1075 1076 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 1077 fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used); 1078 le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits)); 1079 1080 status = ocfs2_journal_dirty(handle, 1081 ac->ac_bh); 1082 if (status < 0) { 1083 mlog_errno(status); 1084 goto bail; 1085 } 1086 1087 status = ocfs2_block_group_set_bits(handle, 1088 alloc_inode, 1089 bg, 1090 group_bh, 1091 *bit_off, 1092 *num_bits); 1093 if (status < 0) { 1094 mlog_errno(status); 1095 goto bail; 1096 } 1097 1098 mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits, 1099 (unsigned long long)fe->i_blkno); 1100 1101 *bg_blkno = le64_to_cpu(bg->bg_blkno); 1102 bail: 1103 if (group_bh) 1104 brelse(group_bh); 1105 if (prev_group_bh) 1106 brelse(prev_group_bh); 1107 1108 mlog_exit(status); 1109 return status; 1110 } 1111 1112 /* will give out up to bits_wanted contiguous bits. */ 1113 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 1114 struct ocfs2_alloc_context *ac, 1115 u32 bits_wanted, 1116 u32 min_bits, 1117 u16 *bit_off, 1118 unsigned int *num_bits, 1119 u64 *bg_blkno) 1120 { 1121 int status; 1122 u16 victim, i; 1123 struct ocfs2_chain_list *cl; 1124 struct ocfs2_dinode *fe; 1125 1126 mlog_entry_void(); 1127 1128 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 1129 BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given)); 1130 BUG_ON(!ac->ac_bh); 1131 1132 fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; 1133 if (!OCFS2_IS_VALID_DINODE(fe)) { 1134 OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); 1135 status = -EIO; 1136 goto bail; 1137 } 1138 if (le32_to_cpu(fe->id1.bitmap1.i_used) >= 1139 le32_to_cpu(fe->id1.bitmap1.i_total)) { 1140 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " 1141 "bits but only %u total.", 1142 (unsigned long long)le64_to_cpu(fe->i_blkno), 1143 le32_to_cpu(fe->id1.bitmap1.i_used), 1144 le32_to_cpu(fe->id1.bitmap1.i_total)); 1145 status = -EIO; 1146 goto bail; 1147 } 1148 1149 cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; 1150 1151 victim = ocfs2_find_victim_chain(cl); 1152 ac->ac_chain = victim; 1153 ac->ac_allow_chain_relink = 1; 1154 1155 status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, 1156 num_bits, bg_blkno); 1157 if (!status) 1158 goto bail; 1159 if (status < 0 && status != -ENOSPC) { 1160 mlog_errno(status); 1161 goto bail; 1162 } 1163 1164 mlog(0, "Search of victim chain %u came up with nothing, " 1165 "trying all chains now.\n", victim); 1166 1167 /* If we didn't pick a good victim, then just default to 1168 * searching each chain in order. Don't allow chain relinking 1169 * because we only calculate enough journal credits for one 1170 * relink per alloc. */ 1171 ac->ac_allow_chain_relink = 0; 1172 for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { 1173 if (i == victim) 1174 continue; 1175 if (!cl->cl_recs[i].c_free) 1176 continue; 1177 1178 ac->ac_chain = i; 1179 status = ocfs2_search_chain(ac, bits_wanted, min_bits, 1180 bit_off, num_bits, 1181 bg_blkno); 1182 if (!status) 1183 break; 1184 if (status < 0 && status != -ENOSPC) { 1185 mlog_errno(status); 1186 goto bail; 1187 } 1188 } 1189 bail: 1190 1191 mlog_exit(status); 1192 return status; 1193 } 1194 1195 int ocfs2_claim_metadata(struct ocfs2_super *osb, 1196 struct ocfs2_journal_handle *handle, 1197 struct ocfs2_alloc_context *ac, 1198 u32 bits_wanted, 1199 u16 *suballoc_bit_start, 1200 unsigned int *num_bits, 1201 u64 *blkno_start) 1202 { 1203 int status; 1204 u64 bg_blkno; 1205 1206 BUG_ON(!ac); 1207 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted)); 1208 BUG_ON(ac->ac_which != OCFS2_AC_USE_META); 1209 BUG_ON(ac->ac_handle != handle); 1210 1211 status = ocfs2_claim_suballoc_bits(osb, 1212 ac, 1213 bits_wanted, 1214 1, 1215 suballoc_bit_start, 1216 num_bits, 1217 &bg_blkno); 1218 if (status < 0) { 1219 mlog_errno(status); 1220 goto bail; 1221 } 1222 atomic_inc(&osb->alloc_stats.bg_allocs); 1223 1224 *blkno_start = bg_blkno + (u64) *suballoc_bit_start; 1225 ac->ac_bits_given += (*num_bits); 1226 status = 0; 1227 bail: 1228 mlog_exit(status); 1229 return status; 1230 } 1231 1232 int ocfs2_claim_new_inode(struct ocfs2_super *osb, 1233 struct ocfs2_journal_handle *handle, 1234 struct ocfs2_alloc_context *ac, 1235 u16 *suballoc_bit, 1236 u64 *fe_blkno) 1237 { 1238 int status; 1239 unsigned int num_bits; 1240 u64 bg_blkno; 1241 1242 mlog_entry_void(); 1243 1244 BUG_ON(!ac); 1245 BUG_ON(ac->ac_bits_given != 0); 1246 BUG_ON(ac->ac_bits_wanted != 1); 1247 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); 1248 BUG_ON(ac->ac_handle != handle); 1249 1250 status = ocfs2_claim_suballoc_bits(osb, 1251 ac, 1252 1, 1253 1, 1254 suballoc_bit, 1255 &num_bits, 1256 &bg_blkno); 1257 if (status < 0) { 1258 mlog_errno(status); 1259 goto bail; 1260 } 1261 atomic_inc(&osb->alloc_stats.bg_allocs); 1262 1263 BUG_ON(num_bits != 1); 1264 1265 *fe_blkno = bg_blkno + (u64) (*suballoc_bit); 1266 ac->ac_bits_given++; 1267 status = 0; 1268 bail: 1269 mlog_exit(status); 1270 return status; 1271 } 1272 1273 /* translate a group desc. blkno and it's bitmap offset into 1274 * disk cluster offset. */ 1275 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, 1276 u64 bg_blkno, 1277 u16 bg_bit_off) 1278 { 1279 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1280 u32 cluster = 0; 1281 1282 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1283 1284 if (bg_blkno != osb->first_cluster_group_blkno) 1285 cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno); 1286 cluster += (u32) bg_bit_off; 1287 return cluster; 1288 } 1289 1290 /* given a cluster offset, calculate which block group it belongs to 1291 * and return that block offset. */ 1292 static inline u64 ocfs2_which_cluster_group(struct inode *inode, 1293 u32 cluster) 1294 { 1295 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1296 u32 group_no; 1297 1298 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1299 1300 group_no = cluster / osb->bitmap_cpg; 1301 if (!group_no) 1302 return osb->first_cluster_group_blkno; 1303 return ocfs2_clusters_to_blocks(inode->i_sb, 1304 group_no * osb->bitmap_cpg); 1305 } 1306 1307 /* given the block number of a cluster start, calculate which cluster 1308 * group and descriptor bitmap offset that corresponds to. */ 1309 static inline void ocfs2_block_to_cluster_group(struct inode *inode, 1310 u64 data_blkno, 1311 u64 *bg_blkno, 1312 u16 *bg_bit_off) 1313 { 1314 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1315 u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno); 1316 1317 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1318 1319 *bg_blkno = ocfs2_which_cluster_group(inode, 1320 data_cluster); 1321 1322 if (*bg_blkno == osb->first_cluster_group_blkno) 1323 *bg_bit_off = (u16) data_cluster; 1324 else 1325 *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb, 1326 data_blkno - *bg_blkno); 1327 } 1328 1329 /* 1330 * min_bits - minimum contiguous chunk from this total allocation we 1331 * can handle. set to what we asked for originally for a full 1332 * contig. allocation, set to '1' to indicate we can deal with extents 1333 * of any size. 1334 */ 1335 int ocfs2_claim_clusters(struct ocfs2_super *osb, 1336 struct ocfs2_journal_handle *handle, 1337 struct ocfs2_alloc_context *ac, 1338 u32 min_clusters, 1339 u32 *cluster_start, 1340 u32 *num_clusters) 1341 { 1342 int status; 1343 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; 1344 u64 bg_blkno; 1345 u16 bg_bit_off; 1346 1347 mlog_entry_void(); 1348 1349 BUG_ON(!ac); 1350 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 1351 1352 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL 1353 && ac->ac_which != OCFS2_AC_USE_MAIN); 1354 BUG_ON(ac->ac_handle != handle); 1355 1356 if (ac->ac_which == OCFS2_AC_USE_LOCAL) { 1357 status = ocfs2_claim_local_alloc_bits(osb, 1358 handle, 1359 ac, 1360 bits_wanted, 1361 cluster_start, 1362 num_clusters); 1363 if (!status) 1364 atomic_inc(&osb->alloc_stats.local_data); 1365 } else { 1366 if (min_clusters > (osb->bitmap_cpg - 1)) { 1367 /* The only paths asking for contiguousness 1368 * should know about this already. */ 1369 mlog(ML_ERROR, "minimum allocation requested exceeds " 1370 "group bitmap size!"); 1371 status = -ENOSPC; 1372 goto bail; 1373 } 1374 /* clamp the current request down to a realistic size. */ 1375 if (bits_wanted > (osb->bitmap_cpg - 1)) 1376 bits_wanted = osb->bitmap_cpg - 1; 1377 1378 status = ocfs2_claim_suballoc_bits(osb, 1379 ac, 1380 bits_wanted, 1381 min_clusters, 1382 &bg_bit_off, 1383 num_clusters, 1384 &bg_blkno); 1385 if (!status) { 1386 *cluster_start = 1387 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode, 1388 bg_blkno, 1389 bg_bit_off); 1390 atomic_inc(&osb->alloc_stats.bitmap_data); 1391 } 1392 } 1393 if (status < 0) { 1394 if (status != -ENOSPC) 1395 mlog_errno(status); 1396 goto bail; 1397 } 1398 1399 ac->ac_bits_given += *num_clusters; 1400 1401 bail: 1402 mlog_exit(status); 1403 return status; 1404 } 1405 1406 static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle, 1407 struct inode *alloc_inode, 1408 struct ocfs2_group_desc *bg, 1409 struct buffer_head *group_bh, 1410 unsigned int bit_off, 1411 unsigned int num_bits) 1412 { 1413 int status; 1414 unsigned int tmp; 1415 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 1416 struct ocfs2_group_desc *undo_bg = NULL; 1417 1418 mlog_entry_void(); 1419 1420 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1421 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1422 status = -EIO; 1423 goto bail; 1424 } 1425 1426 mlog(0, "off = %u, num = %u\n", bit_off, num_bits); 1427 1428 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1429 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1430 1431 status = ocfs2_journal_access(handle, alloc_inode, group_bh, 1432 journal_type); 1433 if (status < 0) { 1434 mlog_errno(status); 1435 goto bail; 1436 } 1437 1438 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1439 undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; 1440 1441 tmp = num_bits; 1442 while(tmp--) { 1443 ocfs2_clear_bit((bit_off + tmp), 1444 (unsigned long *) bg->bg_bitmap); 1445 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1446 ocfs2_set_bit(bit_off + tmp, 1447 (unsigned long *) undo_bg->bg_bitmap); 1448 } 1449 le16_add_cpu(&bg->bg_free_bits_count, num_bits); 1450 1451 status = ocfs2_journal_dirty(handle, group_bh); 1452 if (status < 0) 1453 mlog_errno(status); 1454 bail: 1455 return status; 1456 } 1457 1458 /* 1459 * expects the suballoc inode to already be locked. 1460 */ 1461 static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, 1462 struct inode *alloc_inode, 1463 struct buffer_head *alloc_bh, 1464 unsigned int start_bit, 1465 u64 bg_blkno, 1466 unsigned int count) 1467 { 1468 int status = 0; 1469 u32 tmp_used; 1470 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); 1471 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; 1472 struct ocfs2_chain_list *cl = &fe->id2.i_chain; 1473 struct buffer_head *group_bh = NULL; 1474 struct ocfs2_group_desc *group; 1475 1476 mlog_entry_void(); 1477 1478 if (!OCFS2_IS_VALID_DINODE(fe)) { 1479 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 1480 status = -EIO; 1481 goto bail; 1482 } 1483 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); 1484 1485 mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", 1486 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, 1487 (unsigned long long)bg_blkno, start_bit); 1488 1489 status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, 1490 alloc_inode); 1491 if (status < 0) { 1492 mlog_errno(status); 1493 goto bail; 1494 } 1495 1496 group = (struct ocfs2_group_desc *) group_bh->b_data; 1497 if (!OCFS2_IS_VALID_GROUP_DESC(group)) { 1498 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group); 1499 status = -EIO; 1500 goto bail; 1501 } 1502 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); 1503 1504 status = ocfs2_block_group_clear_bits(handle, alloc_inode, 1505 group, group_bh, 1506 start_bit, count); 1507 if (status < 0) { 1508 mlog_errno(status); 1509 goto bail; 1510 } 1511 1512 status = ocfs2_journal_access(handle, alloc_inode, alloc_bh, 1513 OCFS2_JOURNAL_ACCESS_WRITE); 1514 if (status < 0) { 1515 mlog_errno(status); 1516 goto bail; 1517 } 1518 1519 le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free, 1520 count); 1521 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 1522 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); 1523 1524 status = ocfs2_journal_dirty(handle, alloc_bh); 1525 if (status < 0) { 1526 mlog_errno(status); 1527 goto bail; 1528 } 1529 1530 bail: 1531 if (group_bh) 1532 brelse(group_bh); 1533 1534 mlog_exit(status); 1535 return status; 1536 } 1537 1538 static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) 1539 { 1540 u64 group = block - (u64) bit; 1541 1542 return group; 1543 } 1544 1545 int ocfs2_free_dinode(struct ocfs2_journal_handle *handle, 1546 struct inode *inode_alloc_inode, 1547 struct buffer_head *inode_alloc_bh, 1548 struct ocfs2_dinode *di) 1549 { 1550 u64 blk = le64_to_cpu(di->i_blkno); 1551 u16 bit = le16_to_cpu(di->i_suballoc_bit); 1552 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 1553 1554 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode, 1555 inode_alloc_bh, bit, bg_blkno, 1); 1556 } 1557 1558 int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle, 1559 struct inode *eb_alloc_inode, 1560 struct buffer_head *eb_alloc_bh, 1561 struct ocfs2_extent_block *eb) 1562 { 1563 u64 blk = le64_to_cpu(eb->h_blkno); 1564 u16 bit = le16_to_cpu(eb->h_suballoc_bit); 1565 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 1566 1567 return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh, 1568 bit, bg_blkno, 1); 1569 } 1570 1571 int ocfs2_free_clusters(struct ocfs2_journal_handle *handle, 1572 struct inode *bitmap_inode, 1573 struct buffer_head *bitmap_bh, 1574 u64 start_blk, 1575 unsigned int num_clusters) 1576 { 1577 int status; 1578 u16 bg_start_bit; 1579 u64 bg_blkno; 1580 struct ocfs2_dinode *fe; 1581 1582 /* You can't ever have a contiguous set of clusters 1583 * bigger than a block group bitmap so we never have to worry 1584 * about looping on them. */ 1585 1586 mlog_entry_void(); 1587 1588 /* This is expensive. We can safely remove once this stuff has 1589 * gotten tested really well. */ 1590 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk))); 1591 1592 fe = (struct ocfs2_dinode *) bitmap_bh->b_data; 1593 1594 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, 1595 &bg_start_bit); 1596 1597 mlog(0, "want to free %u clusters starting at block %llu\n", 1598 num_clusters, (unsigned long long)start_blk); 1599 mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", 1600 (unsigned long long)bg_blkno, bg_start_bit); 1601 1602 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, 1603 bg_start_bit, bg_blkno, 1604 num_clusters); 1605 if (status < 0) 1606 mlog_errno(status); 1607 1608 mlog_exit(status); 1609 return status; 1610 } 1611 1612 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) 1613 { 1614 printk("Block Group:\n"); 1615 printk("bg_signature: %s\n", bg->bg_signature); 1616 printk("bg_size: %u\n", bg->bg_size); 1617 printk("bg_bits: %u\n", bg->bg_bits); 1618 printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count); 1619 printk("bg_chain: %u\n", bg->bg_chain); 1620 printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation)); 1621 printk("bg_next_group: %llu\n", 1622 (unsigned long long)bg->bg_next_group); 1623 printk("bg_parent_dinode: %llu\n", 1624 (unsigned long long)bg->bg_parent_dinode); 1625 printk("bg_blkno: %llu\n", 1626 (unsigned long long)bg->bg_blkno); 1627 } 1628 1629 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) 1630 { 1631 int i; 1632 1633 printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno); 1634 printk("i_signature: %s\n", fe->i_signature); 1635 printk("i_size: %llu\n", 1636 (unsigned long long)fe->i_size); 1637 printk("i_clusters: %u\n", fe->i_clusters); 1638 printk("i_generation: %u\n", 1639 le32_to_cpu(fe->i_generation)); 1640 printk("id1.bitmap1.i_used: %u\n", 1641 le32_to_cpu(fe->id1.bitmap1.i_used)); 1642 printk("id1.bitmap1.i_total: %u\n", 1643 le32_to_cpu(fe->id1.bitmap1.i_total)); 1644 printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg); 1645 printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc); 1646 printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count); 1647 printk("id2.i_chain.cl_next_free_rec: %u\n", 1648 fe->id2.i_chain.cl_next_free_rec); 1649 for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) { 1650 printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i, 1651 fe->id2.i_chain.cl_recs[i].c_free); 1652 printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i, 1653 fe->id2.i_chain.cl_recs[i].c_total); 1654 printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i, 1655 (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); 1656 } 1657 } 1658