1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * localalloc.c 5 * 6 * Node local data allocation 7 * 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public 21 * License along with this program; if not, write to the 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 021110-1307, USA. 24 */ 25 26 #include <linux/fs.h> 27 #include <linux/types.h> 28 #include <linux/slab.h> 29 #include <linux/highmem.h> 30 #include <linux/bitops.h> 31 32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC 33 #include <cluster/masklog.h> 34 35 #include "ocfs2.h" 36 37 #include "alloc.h" 38 #include "blockcheck.h" 39 #include "dlmglue.h" 40 #include "inode.h" 41 #include "journal.h" 42 #include "localalloc.h" 43 #include "suballoc.h" 44 #include "super.h" 45 #include "sysfile.h" 46 47 #include "buffer_head_io.h" 48 49 #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 50 51 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 52 53 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 54 struct ocfs2_dinode *alloc, 55 u32 numbits); 56 57 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 58 59 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 60 handle_t *handle, 61 struct ocfs2_dinode *alloc, 62 struct inode *main_bm_inode, 63 struct buffer_head *main_bm_bh); 64 65 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 66 struct ocfs2_alloc_context **ac, 67 struct inode **bitmap_inode, 68 struct buffer_head **bitmap_bh); 69 70 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 71 handle_t *handle, 72 struct ocfs2_alloc_context *ac); 73 74 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 75 struct inode *local_alloc_inode); 76 77 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 78 { 79 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 80 osb->local_alloc_state == OCFS2_LA_ENABLED); 81 } 82 83 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, 84 unsigned int num_clusters) 85 { 86 spin_lock(&osb->osb_lock); 87 if (osb->local_alloc_state == OCFS2_LA_DISABLED || 88 osb->local_alloc_state == OCFS2_LA_THROTTLED) 89 if (num_clusters >= osb->local_alloc_default_bits) { 90 cancel_delayed_work(&osb->la_enable_wq); 91 osb->local_alloc_state = OCFS2_LA_ENABLED; 92 } 93 spin_unlock(&osb->osb_lock); 94 } 95 96 void ocfs2_la_enable_worker(struct work_struct *work) 97 { 98 struct ocfs2_super *osb = 99 container_of(work, struct ocfs2_super, 100 la_enable_wq.work); 101 spin_lock(&osb->osb_lock); 102 osb->local_alloc_state = OCFS2_LA_ENABLED; 103 spin_unlock(&osb->osb_lock); 104 } 105 106 /* 107 * Tell us whether a given allocation should use the local alloc 108 * file. Otherwise, it has to go to the main bitmap. 109 * 110 * This function does semi-dirty reads of local alloc size and state! 111 * This is ok however, as the values are re-checked once under mutex. 112 */ 113 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 114 { 115 int ret = 0; 116 int la_bits; 117 118 spin_lock(&osb->osb_lock); 119 la_bits = osb->local_alloc_bits; 120 121 if (!ocfs2_la_state_enabled(osb)) 122 goto bail; 123 124 /* la_bits should be at least twice the size (in clusters) of 125 * a new block group. We want to be sure block group 126 * allocations go through the local alloc, so allow an 127 * allocation to take up to half the bitmap. */ 128 if (bits > (la_bits / 2)) 129 goto bail; 130 131 ret = 1; 132 bail: 133 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", 134 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); 135 spin_unlock(&osb->osb_lock); 136 return ret; 137 } 138 139 int ocfs2_load_local_alloc(struct ocfs2_super *osb) 140 { 141 int status = 0; 142 struct ocfs2_dinode *alloc = NULL; 143 struct buffer_head *alloc_bh = NULL; 144 u32 num_used; 145 struct inode *inode = NULL; 146 struct ocfs2_local_alloc *la; 147 148 mlog_entry_void(); 149 150 if (osb->local_alloc_bits == 0) 151 goto bail; 152 153 if (osb->local_alloc_bits >= osb->bitmap_cpg) { 154 mlog(ML_NOTICE, "Requested local alloc window %d is larger " 155 "than max possible %u. Using defaults.\n", 156 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 157 osb->local_alloc_bits = 158 ocfs2_megabytes_to_clusters(osb->sb, 159 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); 160 } 161 162 /* read the alloc off disk */ 163 inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, 164 osb->slot_num); 165 if (!inode) { 166 status = -EINVAL; 167 mlog_errno(status); 168 goto bail; 169 } 170 171 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 172 OCFS2_BH_IGNORE_CACHE); 173 if (status < 0) { 174 mlog_errno(status); 175 goto bail; 176 } 177 178 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 179 la = OCFS2_LOCAL_ALLOC(alloc); 180 181 if (!(le32_to_cpu(alloc->i_flags) & 182 (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { 183 mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", 184 (unsigned long long)OCFS2_I(inode)->ip_blkno); 185 status = -EINVAL; 186 goto bail; 187 } 188 189 if ((la->la_size == 0) || 190 (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { 191 mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", 192 le16_to_cpu(la->la_size)); 193 status = -EINVAL; 194 goto bail; 195 } 196 197 /* do a little verification. */ 198 num_used = ocfs2_local_alloc_count_bits(alloc); 199 200 /* hopefully the local alloc has always been recovered before 201 * we load it. */ 202 if (num_used 203 || alloc->id1.bitmap1.i_used 204 || alloc->id1.bitmap1.i_total 205 || la->la_bm_off) 206 mlog(ML_ERROR, "Local alloc hasn't been recovered!\n" 207 "found = %u, set = %u, taken = %u, off = %u\n", 208 num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), 209 le32_to_cpu(alloc->id1.bitmap1.i_total), 210 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 211 212 osb->local_alloc_bh = alloc_bh; 213 osb->local_alloc_state = OCFS2_LA_ENABLED; 214 215 bail: 216 if (status < 0) 217 brelse(alloc_bh); 218 if (inode) 219 iput(inode); 220 221 mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); 222 223 mlog_exit(status); 224 return status; 225 } 226 227 /* 228 * return any unused bits to the bitmap and write out a clean 229 * local_alloc. 230 * 231 * local_alloc_bh is optional. If not passed, we will simply use the 232 * one off osb. If you do pass it however, be warned that it *will* be 233 * returned brelse'd and NULL'd out.*/ 234 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) 235 { 236 int status; 237 handle_t *handle; 238 struct inode *local_alloc_inode = NULL; 239 struct buffer_head *bh = NULL; 240 struct buffer_head *main_bm_bh = NULL; 241 struct inode *main_bm_inode = NULL; 242 struct ocfs2_dinode *alloc_copy = NULL; 243 struct ocfs2_dinode *alloc = NULL; 244 245 mlog_entry_void(); 246 247 cancel_delayed_work(&osb->la_enable_wq); 248 flush_workqueue(ocfs2_wq); 249 250 if (osb->local_alloc_state == OCFS2_LA_UNUSED) 251 goto out; 252 253 local_alloc_inode = 254 ocfs2_get_system_file_inode(osb, 255 LOCAL_ALLOC_SYSTEM_INODE, 256 osb->slot_num); 257 if (!local_alloc_inode) { 258 status = -ENOENT; 259 mlog_errno(status); 260 goto out; 261 } 262 263 osb->local_alloc_state = OCFS2_LA_DISABLED; 264 265 main_bm_inode = ocfs2_get_system_file_inode(osb, 266 GLOBAL_BITMAP_SYSTEM_INODE, 267 OCFS2_INVALID_SLOT); 268 if (!main_bm_inode) { 269 status = -EINVAL; 270 mlog_errno(status); 271 goto out; 272 } 273 274 mutex_lock(&main_bm_inode->i_mutex); 275 276 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 277 if (status < 0) { 278 mlog_errno(status); 279 goto out_mutex; 280 } 281 282 /* WINDOW_MOVE_CREDITS is a bit heavy... */ 283 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 284 if (IS_ERR(handle)) { 285 mlog_errno(PTR_ERR(handle)); 286 handle = NULL; 287 goto out_unlock; 288 } 289 290 bh = osb->local_alloc_bh; 291 alloc = (struct ocfs2_dinode *) bh->b_data; 292 293 alloc_copy = kmalloc(bh->b_size, GFP_NOFS); 294 if (!alloc_copy) { 295 status = -ENOMEM; 296 goto out_commit; 297 } 298 memcpy(alloc_copy, alloc, bh->b_size); 299 300 status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), 301 bh, OCFS2_JOURNAL_ACCESS_WRITE); 302 if (status < 0) { 303 mlog_errno(status); 304 goto out_commit; 305 } 306 307 ocfs2_clear_local_alloc(alloc); 308 309 status = ocfs2_journal_dirty(handle, bh); 310 if (status < 0) { 311 mlog_errno(status); 312 goto out_commit; 313 } 314 315 brelse(bh); 316 osb->local_alloc_bh = NULL; 317 osb->local_alloc_state = OCFS2_LA_UNUSED; 318 319 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 320 main_bm_inode, main_bm_bh); 321 if (status < 0) 322 mlog_errno(status); 323 324 out_commit: 325 ocfs2_commit_trans(osb, handle); 326 327 out_unlock: 328 brelse(main_bm_bh); 329 330 ocfs2_inode_unlock(main_bm_inode, 1); 331 332 out_mutex: 333 mutex_unlock(&main_bm_inode->i_mutex); 334 iput(main_bm_inode); 335 336 out: 337 if (local_alloc_inode) 338 iput(local_alloc_inode); 339 340 if (alloc_copy) 341 kfree(alloc_copy); 342 343 mlog_exit_void(); 344 } 345 346 /* 347 * We want to free the bitmap bits outside of any recovery context as 348 * we'll need a cluster lock to do so, but we must clear the local 349 * alloc before giving up the recovered nodes journal. To solve this, 350 * we kmalloc a copy of the local alloc before it's change for the 351 * caller to process with ocfs2_complete_local_alloc_recovery 352 */ 353 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 354 int slot_num, 355 struct ocfs2_dinode **alloc_copy) 356 { 357 int status = 0; 358 struct buffer_head *alloc_bh = NULL; 359 struct inode *inode = NULL; 360 struct ocfs2_dinode *alloc; 361 362 mlog_entry("(slot_num = %d)\n", slot_num); 363 364 *alloc_copy = NULL; 365 366 inode = ocfs2_get_system_file_inode(osb, 367 LOCAL_ALLOC_SYSTEM_INODE, 368 slot_num); 369 if (!inode) { 370 status = -EINVAL; 371 mlog_errno(status); 372 goto bail; 373 } 374 375 mutex_lock(&inode->i_mutex); 376 377 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 378 OCFS2_BH_IGNORE_CACHE); 379 if (status < 0) { 380 mlog_errno(status); 381 goto bail; 382 } 383 384 *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); 385 if (!(*alloc_copy)) { 386 status = -ENOMEM; 387 goto bail; 388 } 389 memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); 390 391 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 392 ocfs2_clear_local_alloc(alloc); 393 394 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 395 status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode)); 396 if (status < 0) 397 mlog_errno(status); 398 399 bail: 400 if ((status < 0) && (*alloc_copy)) { 401 kfree(*alloc_copy); 402 *alloc_copy = NULL; 403 } 404 405 brelse(alloc_bh); 406 407 if (inode) { 408 mutex_unlock(&inode->i_mutex); 409 iput(inode); 410 } 411 412 mlog_exit(status); 413 return status; 414 } 415 416 /* 417 * Step 2: By now, we've completed the journal recovery, we've stamped 418 * a clean local alloc on disk and dropped the node out of the 419 * recovery map. Dlm locks will no longer stall, so lets clear out the 420 * main bitmap. 421 */ 422 int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, 423 struct ocfs2_dinode *alloc) 424 { 425 int status; 426 handle_t *handle; 427 struct buffer_head *main_bm_bh = NULL; 428 struct inode *main_bm_inode; 429 430 mlog_entry_void(); 431 432 main_bm_inode = ocfs2_get_system_file_inode(osb, 433 GLOBAL_BITMAP_SYSTEM_INODE, 434 OCFS2_INVALID_SLOT); 435 if (!main_bm_inode) { 436 status = -EINVAL; 437 mlog_errno(status); 438 goto out; 439 } 440 441 mutex_lock(&main_bm_inode->i_mutex); 442 443 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 444 if (status < 0) { 445 mlog_errno(status); 446 goto out_mutex; 447 } 448 449 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 450 if (IS_ERR(handle)) { 451 status = PTR_ERR(handle); 452 handle = NULL; 453 mlog_errno(status); 454 goto out_unlock; 455 } 456 457 /* we want the bitmap change to be recorded on disk asap */ 458 handle->h_sync = 1; 459 460 status = ocfs2_sync_local_to_main(osb, handle, alloc, 461 main_bm_inode, main_bm_bh); 462 if (status < 0) 463 mlog_errno(status); 464 465 ocfs2_commit_trans(osb, handle); 466 467 out_unlock: 468 ocfs2_inode_unlock(main_bm_inode, 1); 469 470 out_mutex: 471 mutex_unlock(&main_bm_inode->i_mutex); 472 473 brelse(main_bm_bh); 474 475 iput(main_bm_inode); 476 477 out: 478 if (!status) 479 ocfs2_init_steal_slots(osb); 480 mlog_exit(status); 481 return status; 482 } 483 484 /* Check to see if the local alloc window is within ac->ac_max_block */ 485 static int ocfs2_local_alloc_in_range(struct inode *inode, 486 struct ocfs2_alloc_context *ac, 487 u32 bits_wanted) 488 { 489 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 490 struct ocfs2_dinode *alloc; 491 struct ocfs2_local_alloc *la; 492 int start; 493 u64 block_off; 494 495 if (!ac->ac_max_block) 496 return 1; 497 498 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 499 la = OCFS2_LOCAL_ALLOC(alloc); 500 501 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 502 if (start == -1) { 503 mlog_errno(-ENOSPC); 504 return 0; 505 } 506 507 /* 508 * Converting (bm_off + start + bits_wanted) to blocks gives us 509 * the blkno just past our actual allocation. This is perfect 510 * to compare with ac_max_block. 511 */ 512 block_off = ocfs2_clusters_to_blocks(inode->i_sb, 513 le32_to_cpu(la->la_bm_off) + 514 start + bits_wanted); 515 mlog(0, "Checking %llu against %llu\n", 516 (unsigned long long)block_off, 517 (unsigned long long)ac->ac_max_block); 518 if (block_off > ac->ac_max_block) 519 return 0; 520 521 return 1; 522 } 523 524 /* 525 * make sure we've got at least bits_wanted contiguous bits in the 526 * local alloc. You lose them when you drop i_mutex. 527 * 528 * We will add ourselves to the transaction passed in, but may start 529 * our own in order to shift windows. 530 */ 531 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, 532 u32 bits_wanted, 533 struct ocfs2_alloc_context *ac) 534 { 535 int status; 536 struct ocfs2_dinode *alloc; 537 struct inode *local_alloc_inode; 538 unsigned int free_bits; 539 540 mlog_entry_void(); 541 542 BUG_ON(!ac); 543 544 local_alloc_inode = 545 ocfs2_get_system_file_inode(osb, 546 LOCAL_ALLOC_SYSTEM_INODE, 547 osb->slot_num); 548 if (!local_alloc_inode) { 549 status = -ENOENT; 550 mlog_errno(status); 551 goto bail; 552 } 553 554 mutex_lock(&local_alloc_inode->i_mutex); 555 556 /* 557 * We must double check state and allocator bits because 558 * another process may have changed them while holding i_mutex. 559 */ 560 spin_lock(&osb->osb_lock); 561 if (!ocfs2_la_state_enabled(osb) || 562 (bits_wanted > osb->local_alloc_bits)) { 563 spin_unlock(&osb->osb_lock); 564 status = -ENOSPC; 565 goto bail; 566 } 567 spin_unlock(&osb->osb_lock); 568 569 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 570 571 #ifdef CONFIG_OCFS2_DEBUG_FS 572 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 573 ocfs2_local_alloc_count_bits(alloc)) { 574 ocfs2_error(osb->sb, "local alloc inode %llu says it has " 575 "%u free bits, but a count shows %u", 576 (unsigned long long)le64_to_cpu(alloc->i_blkno), 577 le32_to_cpu(alloc->id1.bitmap1.i_used), 578 ocfs2_local_alloc_count_bits(alloc)); 579 status = -EIO; 580 goto bail; 581 } 582 #endif 583 584 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 585 le32_to_cpu(alloc->id1.bitmap1.i_used); 586 if (bits_wanted > free_bits) { 587 /* uhoh, window change time. */ 588 status = 589 ocfs2_local_alloc_slide_window(osb, local_alloc_inode); 590 if (status < 0) { 591 if (status != -ENOSPC) 592 mlog_errno(status); 593 goto bail; 594 } 595 596 /* 597 * Under certain conditions, the window slide code 598 * might have reduced the number of bits available or 599 * disabled the the local alloc entirely. Re-check 600 * here and return -ENOSPC if necessary. 601 */ 602 status = -ENOSPC; 603 if (!ocfs2_la_state_enabled(osb)) 604 goto bail; 605 606 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 607 le32_to_cpu(alloc->id1.bitmap1.i_used); 608 if (bits_wanted > free_bits) 609 goto bail; 610 } 611 612 if (ac->ac_max_block) 613 mlog(0, "Calling in_range for max block %llu\n", 614 (unsigned long long)ac->ac_max_block); 615 616 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac, 617 bits_wanted)) { 618 /* 619 * The window is outside ac->ac_max_block. 620 * This errno tells the caller to keep localalloc enabled 621 * but to get the allocation from the main bitmap. 622 */ 623 status = -EFBIG; 624 goto bail; 625 } 626 627 ac->ac_inode = local_alloc_inode; 628 /* We should never use localalloc from another slot */ 629 ac->ac_alloc_slot = osb->slot_num; 630 ac->ac_which = OCFS2_AC_USE_LOCAL; 631 get_bh(osb->local_alloc_bh); 632 ac->ac_bh = osb->local_alloc_bh; 633 status = 0; 634 bail: 635 if (status < 0 && local_alloc_inode) { 636 mutex_unlock(&local_alloc_inode->i_mutex); 637 iput(local_alloc_inode); 638 } 639 640 mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num, 641 status); 642 643 mlog_exit(status); 644 return status; 645 } 646 647 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 648 handle_t *handle, 649 struct ocfs2_alloc_context *ac, 650 u32 bits_wanted, 651 u32 *bit_off, 652 u32 *num_bits) 653 { 654 int status, start; 655 struct inode *local_alloc_inode; 656 void *bitmap; 657 struct ocfs2_dinode *alloc; 658 struct ocfs2_local_alloc *la; 659 660 mlog_entry_void(); 661 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 662 663 local_alloc_inode = ac->ac_inode; 664 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 665 la = OCFS2_LOCAL_ALLOC(alloc); 666 667 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 668 if (start == -1) { 669 /* TODO: Shouldn't we just BUG here? */ 670 status = -ENOSPC; 671 mlog_errno(status); 672 goto bail; 673 } 674 675 bitmap = la->la_bitmap; 676 *bit_off = le32_to_cpu(la->la_bm_off) + start; 677 /* local alloc is always contiguous by nature -- we never 678 * delete bits from it! */ 679 *num_bits = bits_wanted; 680 681 status = ocfs2_journal_access_di(handle, 682 INODE_CACHE(local_alloc_inode), 683 osb->local_alloc_bh, 684 OCFS2_JOURNAL_ACCESS_WRITE); 685 if (status < 0) { 686 mlog_errno(status); 687 goto bail; 688 } 689 690 while(bits_wanted--) 691 ocfs2_set_bit(start++, bitmap); 692 693 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 694 695 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh); 696 if (status < 0) { 697 mlog_errno(status); 698 goto bail; 699 } 700 701 status = 0; 702 bail: 703 mlog_exit(status); 704 return status; 705 } 706 707 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) 708 { 709 int i; 710 u8 *buffer; 711 u32 count = 0; 712 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 713 714 mlog_entry_void(); 715 716 buffer = la->la_bitmap; 717 for (i = 0; i < le16_to_cpu(la->la_size); i++) 718 count += hweight8(buffer[i]); 719 720 mlog_exit(count); 721 return count; 722 } 723 724 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 725 struct ocfs2_dinode *alloc, 726 u32 numbits) 727 { 728 int numfound, bitoff, left, startoff, lastzero; 729 void *bitmap = NULL; 730 731 mlog_entry("(numbits wanted = %u)\n", numbits); 732 733 if (!alloc->id1.bitmap1.i_total) { 734 mlog(0, "No bits in my window!\n"); 735 bitoff = -1; 736 goto bail; 737 } 738 739 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 740 741 numfound = bitoff = startoff = 0; 742 lastzero = -1; 743 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 744 while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) { 745 if (bitoff == left) { 746 /* mlog(0, "bitoff (%d) == left", bitoff); */ 747 break; 748 } 749 /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, " 750 "numfound = %d\n", bitoff, startoff, numfound);*/ 751 752 /* Ok, we found a zero bit... is it contig. or do we 753 * start over?*/ 754 if (bitoff == startoff) { 755 /* we found a zero */ 756 numfound++; 757 startoff++; 758 } else { 759 /* got a zero after some ones */ 760 numfound = 1; 761 startoff = bitoff+1; 762 } 763 /* we got everything we needed */ 764 if (numfound == numbits) { 765 /* mlog(0, "Found it all!\n"); */ 766 break; 767 } 768 } 769 770 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 771 numfound); 772 773 if (numfound == numbits) 774 bitoff = startoff - numfound; 775 else 776 bitoff = -1; 777 778 bail: 779 mlog_exit(bitoff); 780 return bitoff; 781 } 782 783 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) 784 { 785 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 786 int i; 787 mlog_entry_void(); 788 789 alloc->id1.bitmap1.i_total = 0; 790 alloc->id1.bitmap1.i_used = 0; 791 la->la_bm_off = 0; 792 for(i = 0; i < le16_to_cpu(la->la_size); i++) 793 la->la_bitmap[i] = 0; 794 795 mlog_exit_void(); 796 } 797 798 #if 0 799 /* turn this on and uncomment below to aid debugging window shifts. */ 800 static void ocfs2_verify_zero_bits(unsigned long *bitmap, 801 unsigned int start, 802 unsigned int count) 803 { 804 unsigned int tmp = count; 805 while(tmp--) { 806 if (ocfs2_test_bit(start + tmp, bitmap)) { 807 printk("ocfs2_verify_zero_bits: start = %u, count = " 808 "%u\n", start, count); 809 printk("ocfs2_verify_zero_bits: bit %u is set!", 810 start + tmp); 811 BUG(); 812 } 813 } 814 } 815 #endif 816 817 /* 818 * sync the local alloc to main bitmap. 819 * 820 * assumes you've already locked the main bitmap -- the bitmap inode 821 * passed is used for caching. 822 */ 823 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 824 handle_t *handle, 825 struct ocfs2_dinode *alloc, 826 struct inode *main_bm_inode, 827 struct buffer_head *main_bm_bh) 828 { 829 int status = 0; 830 int bit_off, left, count, start; 831 u64 la_start_blk; 832 u64 blkno; 833 void *bitmap; 834 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 835 836 mlog_entry("total = %u, used = %u\n", 837 le32_to_cpu(alloc->id1.bitmap1.i_total), 838 le32_to_cpu(alloc->id1.bitmap1.i_used)); 839 840 if (!alloc->id1.bitmap1.i_total) { 841 mlog(0, "nothing to sync!\n"); 842 goto bail; 843 } 844 845 if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 846 le32_to_cpu(alloc->id1.bitmap1.i_total)) { 847 mlog(0, "all bits were taken!\n"); 848 goto bail; 849 } 850 851 la_start_blk = ocfs2_clusters_to_blocks(osb->sb, 852 le32_to_cpu(la->la_bm_off)); 853 bitmap = la->la_bitmap; 854 start = count = bit_off = 0; 855 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 856 857 while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) 858 != -1) { 859 if ((bit_off < left) && (bit_off == start)) { 860 count++; 861 start++; 862 continue; 863 } 864 if (count) { 865 blkno = la_start_blk + 866 ocfs2_clusters_to_blocks(osb->sb, 867 start - count); 868 869 mlog(0, "freeing %u bits starting at local alloc bit " 870 "%u (la_start_blk = %llu, blkno = %llu)\n", 871 count, start - count, 872 (unsigned long long)la_start_blk, 873 (unsigned long long)blkno); 874 875 status = ocfs2_release_clusters(handle, 876 main_bm_inode, 877 main_bm_bh, blkno, 878 count); 879 if (status < 0) { 880 mlog_errno(status); 881 goto bail; 882 } 883 } 884 if (bit_off >= left) 885 break; 886 count = 1; 887 start = bit_off + 1; 888 } 889 890 bail: 891 mlog_exit(status); 892 return status; 893 } 894 895 enum ocfs2_la_event { 896 OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ 897 OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has 898 * enough bits theoretically 899 * free, but a contiguous 900 * allocation could not be 901 * found. */ 902 OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have 903 * enough bits free to satisfy 904 * our request. */ 905 }; 906 #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) 907 /* 908 * Given an event, calculate the size of our next local alloc window. 909 * 910 * This should always be called under i_mutex of the local alloc inode 911 * so that local alloc disabling doesn't race with processes trying to 912 * use the allocator. 913 * 914 * Returns the state which the local alloc was left in. This value can 915 * be ignored by some paths. 916 */ 917 static int ocfs2_recalc_la_window(struct ocfs2_super *osb, 918 enum ocfs2_la_event event) 919 { 920 unsigned int bits; 921 int state; 922 923 spin_lock(&osb->osb_lock); 924 if (osb->local_alloc_state == OCFS2_LA_DISABLED) { 925 WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); 926 goto out_unlock; 927 } 928 929 /* 930 * ENOSPC and fragmentation are treated similarly for now. 931 */ 932 if (event == OCFS2_LA_EVENT_ENOSPC || 933 event == OCFS2_LA_EVENT_FRAGMENTED) { 934 /* 935 * We ran out of contiguous space in the primary 936 * bitmap. Drastically reduce the number of bits used 937 * by local alloc until we have to disable it. 938 */ 939 bits = osb->local_alloc_bits >> 1; 940 if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { 941 /* 942 * By setting state to THROTTLED, we'll keep 943 * the number of local alloc bits used down 944 * until an event occurs which would give us 945 * reason to assume the bitmap situation might 946 * have changed. 947 */ 948 osb->local_alloc_state = OCFS2_LA_THROTTLED; 949 osb->local_alloc_bits = bits; 950 } else { 951 osb->local_alloc_state = OCFS2_LA_DISABLED; 952 } 953 queue_delayed_work(ocfs2_wq, &osb->la_enable_wq, 954 OCFS2_LA_ENABLE_INTERVAL); 955 goto out_unlock; 956 } 957 958 /* 959 * Don't increase the size of the local alloc window until we 960 * know we might be able to fulfill the request. Otherwise, we 961 * risk bouncing around the global bitmap during periods of 962 * low space. 963 */ 964 if (osb->local_alloc_state != OCFS2_LA_THROTTLED) 965 osb->local_alloc_bits = osb->local_alloc_default_bits; 966 967 out_unlock: 968 state = osb->local_alloc_state; 969 spin_unlock(&osb->osb_lock); 970 971 return state; 972 } 973 974 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 975 struct ocfs2_alloc_context **ac, 976 struct inode **bitmap_inode, 977 struct buffer_head **bitmap_bh) 978 { 979 int status; 980 981 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 982 if (!(*ac)) { 983 status = -ENOMEM; 984 mlog_errno(status); 985 goto bail; 986 } 987 988 retry_enospc: 989 (*ac)->ac_bits_wanted = osb->local_alloc_default_bits; 990 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 991 if (status == -ENOSPC) { 992 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 993 OCFS2_LA_DISABLED) 994 goto bail; 995 996 ocfs2_free_ac_resource(*ac); 997 memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); 998 goto retry_enospc; 999 } 1000 if (status < 0) { 1001 mlog_errno(status); 1002 goto bail; 1003 } 1004 1005 *bitmap_inode = (*ac)->ac_inode; 1006 igrab(*bitmap_inode); 1007 *bitmap_bh = (*ac)->ac_bh; 1008 get_bh(*bitmap_bh); 1009 status = 0; 1010 bail: 1011 if ((status < 0) && *ac) { 1012 ocfs2_free_alloc_context(*ac); 1013 *ac = NULL; 1014 } 1015 1016 mlog_exit(status); 1017 return status; 1018 } 1019 1020 /* 1021 * pass it the bitmap lock in lock_bh if you have it. 1022 */ 1023 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 1024 handle_t *handle, 1025 struct ocfs2_alloc_context *ac) 1026 { 1027 int status = 0; 1028 u32 cluster_off, cluster_count; 1029 struct ocfs2_dinode *alloc = NULL; 1030 struct ocfs2_local_alloc *la; 1031 1032 mlog_entry_void(); 1033 1034 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1035 la = OCFS2_LOCAL_ALLOC(alloc); 1036 1037 if (alloc->id1.bitmap1.i_total) 1038 mlog(0, "asking me to alloc a new window over a non-empty " 1039 "one\n"); 1040 1041 mlog(0, "Allocating %u clusters for a new window.\n", 1042 osb->local_alloc_bits); 1043 1044 /* Instruct the allocation code to try the most recently used 1045 * cluster group. We'll re-record the group used this pass 1046 * below. */ 1047 ac->ac_last_group = osb->la_last_gd; 1048 1049 /* we used the generic suballoc reserve function, but we set 1050 * everything up nicely, so there's no reason why we can't use 1051 * the more specific cluster api to claim bits. */ 1052 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, 1053 &cluster_off, &cluster_count); 1054 if (status == -ENOSPC) { 1055 retry_enospc: 1056 /* 1057 * Note: We could also try syncing the journal here to 1058 * allow use of any free bits which the current 1059 * transaction can't give us access to. --Mark 1060 */ 1061 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == 1062 OCFS2_LA_DISABLED) 1063 goto bail; 1064 1065 ac->ac_bits_wanted = osb->local_alloc_default_bits; 1066 status = ocfs2_claim_clusters(osb, handle, ac, 1067 osb->local_alloc_bits, 1068 &cluster_off, 1069 &cluster_count); 1070 if (status == -ENOSPC) 1071 goto retry_enospc; 1072 /* 1073 * We only shrunk the *minimum* number of in our 1074 * request - it's entirely possible that the allocator 1075 * might give us more than we asked for. 1076 */ 1077 if (status == 0) { 1078 spin_lock(&osb->osb_lock); 1079 osb->local_alloc_bits = cluster_count; 1080 spin_unlock(&osb->osb_lock); 1081 } 1082 } 1083 if (status < 0) { 1084 if (status != -ENOSPC) 1085 mlog_errno(status); 1086 goto bail; 1087 } 1088 1089 osb->la_last_gd = ac->ac_last_group; 1090 1091 la->la_bm_off = cpu_to_le32(cluster_off); 1092 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 1093 /* just in case... In the future when we find space ourselves, 1094 * we don't have to get all contiguous -- but we'll have to 1095 * set all previously used bits in bitmap and update 1096 * la_bits_set before setting the bits in the main bitmap. */ 1097 alloc->id1.bitmap1.i_used = 0; 1098 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1099 le16_to_cpu(la->la_size)); 1100 1101 mlog(0, "New window allocated:\n"); 1102 mlog(0, "window la_bm_off = %u\n", 1103 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1104 mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total)); 1105 1106 bail: 1107 mlog_exit(status); 1108 return status; 1109 } 1110 1111 /* Note that we do *NOT* lock the local alloc inode here as 1112 * it's been locked already for us. */ 1113 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 1114 struct inode *local_alloc_inode) 1115 { 1116 int status = 0; 1117 struct buffer_head *main_bm_bh = NULL; 1118 struct inode *main_bm_inode = NULL; 1119 handle_t *handle = NULL; 1120 struct ocfs2_dinode *alloc; 1121 struct ocfs2_dinode *alloc_copy = NULL; 1122 struct ocfs2_alloc_context *ac = NULL; 1123 1124 mlog_entry_void(); 1125 1126 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 1127 1128 /* This will lock the main bitmap for us. */ 1129 status = ocfs2_local_alloc_reserve_for_window(osb, 1130 &ac, 1131 &main_bm_inode, 1132 &main_bm_bh); 1133 if (status < 0) { 1134 if (status != -ENOSPC) 1135 mlog_errno(status); 1136 goto bail; 1137 } 1138 1139 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 1140 if (IS_ERR(handle)) { 1141 status = PTR_ERR(handle); 1142 handle = NULL; 1143 mlog_errno(status); 1144 goto bail; 1145 } 1146 1147 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1148 1149 /* We want to clear the local alloc before doing anything 1150 * else, so that if we error later during this operation, 1151 * local alloc shutdown won't try to double free main bitmap 1152 * bits. Make a copy so the sync function knows which bits to 1153 * free. */ 1154 alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS); 1155 if (!alloc_copy) { 1156 status = -ENOMEM; 1157 mlog_errno(status); 1158 goto bail; 1159 } 1160 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); 1161 1162 status = ocfs2_journal_access_di(handle, 1163 INODE_CACHE(local_alloc_inode), 1164 osb->local_alloc_bh, 1165 OCFS2_JOURNAL_ACCESS_WRITE); 1166 if (status < 0) { 1167 mlog_errno(status); 1168 goto bail; 1169 } 1170 1171 ocfs2_clear_local_alloc(alloc); 1172 1173 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh); 1174 if (status < 0) { 1175 mlog_errno(status); 1176 goto bail; 1177 } 1178 1179 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1180 main_bm_inode, main_bm_bh); 1181 if (status < 0) { 1182 mlog_errno(status); 1183 goto bail; 1184 } 1185 1186 status = ocfs2_local_alloc_new_window(osb, handle, ac); 1187 if (status < 0) { 1188 if (status != -ENOSPC) 1189 mlog_errno(status); 1190 goto bail; 1191 } 1192 1193 atomic_inc(&osb->alloc_stats.moves); 1194 1195 status = 0; 1196 bail: 1197 if (handle) 1198 ocfs2_commit_trans(osb, handle); 1199 1200 brelse(main_bm_bh); 1201 1202 if (main_bm_inode) 1203 iput(main_bm_inode); 1204 1205 if (alloc_copy) 1206 kfree(alloc_copy); 1207 1208 if (ac) 1209 ocfs2_free_alloc_context(ac); 1210 1211 mlog_exit(status); 1212 return status; 1213 } 1214 1215