1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * localalloc.c 5 * 6 * Node local data allocation 7 * 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public 21 * License along with this program; if not, write to the 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 021110-1307, USA. 24 */ 25 26 #include <linux/fs.h> 27 #include <linux/types.h> 28 #include <linux/slab.h> 29 #include <linux/highmem.h> 30 #include <linux/bitops.h> 31 #include <linux/debugfs.h> 32 33 #define MLOG_MASK_PREFIX ML_DISK_ALLOC 34 #include <cluster/masklog.h> 35 36 #include "ocfs2.h" 37 38 #include "alloc.h" 39 #include "blockcheck.h" 40 #include "dlmglue.h" 41 #include "inode.h" 42 #include "journal.h" 43 #include "localalloc.h" 44 #include "suballoc.h" 45 #include "super.h" 46 #include "sysfile.h" 47 48 #include "buffer_head_io.h" 49 50 #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 51 52 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 53 54 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 55 struct ocfs2_dinode *alloc, 56 u32 numbits); 57 58 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 59 60 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 61 handle_t *handle, 62 struct ocfs2_dinode *alloc, 63 struct inode *main_bm_inode, 64 struct buffer_head *main_bm_bh); 65 66 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 67 struct ocfs2_alloc_context **ac, 68 struct inode **bitmap_inode, 69 struct buffer_head **bitmap_bh); 70 71 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 72 handle_t *handle, 73 struct ocfs2_alloc_context *ac); 74 75 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 76 struct inode *local_alloc_inode); 77 78 #ifdef CONFIG_OCFS2_FS_STATS 79 80 static int ocfs2_la_debug_open(struct inode *inode, struct file *file) 81 { 82 file->private_data = inode->i_private; 83 return 0; 84 } 85 86 #define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE 87 #define LA_DEBUG_VER 1 88 static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf, 89 size_t count, loff_t *ppos) 90 { 91 static DEFINE_MUTEX(la_debug_mutex); 92 struct ocfs2_super *osb = file->private_data; 93 int written, ret; 94 char *buf = osb->local_alloc_debug_buf; 95 96 mutex_lock(&la_debug_mutex); 97 memset(buf, 0, LA_DEBUG_BUF_SZ); 98 99 written = snprintf(buf, LA_DEBUG_BUF_SZ, 100 "0x%x\t0x%llx\t%u\t%u\t0x%x\n", 101 LA_DEBUG_VER, 102 (unsigned long long)osb->la_last_gd, 103 osb->local_alloc_default_bits, 104 osb->local_alloc_bits, osb->local_alloc_state); 105 106 ret = simple_read_from_buffer(userbuf, count, ppos, buf, written); 107 108 mutex_unlock(&la_debug_mutex); 109 return ret; 110 } 111 112 static const struct file_operations ocfs2_la_debug_fops = { 113 .open = ocfs2_la_debug_open, 114 .read = ocfs2_la_debug_read, 115 }; 116 117 static void ocfs2_init_la_debug(struct ocfs2_super *osb) 118 { 119 osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS); 120 if (!osb->local_alloc_debug_buf) 121 return; 122 123 osb->local_alloc_debug = debugfs_create_file("local_alloc_stats", 124 S_IFREG|S_IRUSR, 125 osb->osb_debug_root, 126 osb, 127 &ocfs2_la_debug_fops); 128 if (!osb->local_alloc_debug) { 129 kfree(osb->local_alloc_debug_buf); 130 osb->local_alloc_debug_buf = NULL; 131 } 132 } 133 134 static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) 135 { 136 if (osb->local_alloc_debug) 137 debugfs_remove(osb->local_alloc_debug); 138 139 if (osb->local_alloc_debug_buf) 140 kfree(osb->local_alloc_debug_buf); 141 142 osb->local_alloc_debug_buf = NULL; 143 osb->local_alloc_debug = NULL; 144 } 145 #else /* CONFIG_OCFS2_FS_STATS */ 146 static void ocfs2_init_la_debug(struct ocfs2_super *osb) 147 { 148 return; 149 } 150 static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) 151 { 152 return; 153 } 154 #endif 155 156 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 157 { 158 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 159 osb->local_alloc_state == OCFS2_LA_ENABLED); 160 } 161 162 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, 163 unsigned int num_clusters) 164 { 165 spin_lock(&osb->osb_lock); 166 if (osb->local_alloc_state == OCFS2_LA_DISABLED || 167 osb->local_alloc_state == OCFS2_LA_THROTTLED) 168 if (num_clusters >= osb->local_alloc_default_bits) { 169 cancel_delayed_work(&osb->la_enable_wq); 170 osb->local_alloc_state = OCFS2_LA_ENABLED; 171 } 172 spin_unlock(&osb->osb_lock); 173 } 174 175 void ocfs2_la_enable_worker(struct work_struct *work) 176 { 177 struct ocfs2_super *osb = 178 container_of(work, struct ocfs2_super, 179 la_enable_wq.work); 180 spin_lock(&osb->osb_lock); 181 osb->local_alloc_state = OCFS2_LA_ENABLED; 182 spin_unlock(&osb->osb_lock); 183 } 184 185 /* 186 * Tell us whether a given allocation should use the local alloc 187 * file. Otherwise, it has to go to the main bitmap. 188 * 189 * This function does semi-dirty reads of local alloc size and state! 190 * This is ok however, as the values are re-checked once under mutex. 191 */ 192 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 193 { 194 int ret = 0; 195 int la_bits; 196 197 spin_lock(&osb->osb_lock); 198 la_bits = osb->local_alloc_bits; 199 200 if (!ocfs2_la_state_enabled(osb)) 201 goto bail; 202 203 /* la_bits should be at least twice the size (in clusters) of 204 * a new block group. We want to be sure block group 205 * allocations go through the local alloc, so allow an 206 * allocation to take up to half the bitmap. */ 207 if (bits > (la_bits / 2)) 208 goto bail; 209 210 ret = 1; 211 bail: 212 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", 213 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); 214 spin_unlock(&osb->osb_lock); 215 return ret; 216 } 217 218 int ocfs2_load_local_alloc(struct ocfs2_super *osb) 219 { 220 int status = 0; 221 struct ocfs2_dinode *alloc = NULL; 222 struct buffer_head *alloc_bh = NULL; 223 u32 num_used; 224 struct inode *inode = NULL; 225 struct ocfs2_local_alloc *la; 226 227 mlog_entry_void(); 228 229 ocfs2_init_la_debug(osb); 230 231 if (osb->local_alloc_bits == 0) 232 goto bail; 233 234 if (osb->local_alloc_bits >= osb->bitmap_cpg) { 235 mlog(ML_NOTICE, "Requested local alloc window %d is larger " 236 "than max possible %u. Using defaults.\n", 237 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 238 osb->local_alloc_bits = 239 ocfs2_megabytes_to_clusters(osb->sb, 240 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); 241 } 242 243 /* read the alloc off disk */ 244 inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, 245 osb->slot_num); 246 if (!inode) { 247 status = -EINVAL; 248 mlog_errno(status); 249 goto bail; 250 } 251 252 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 253 OCFS2_BH_IGNORE_CACHE); 254 if (status < 0) { 255 mlog_errno(status); 256 goto bail; 257 } 258 259 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 260 la = OCFS2_LOCAL_ALLOC(alloc); 261 262 if (!(le32_to_cpu(alloc->i_flags) & 263 (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { 264 mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", 265 (unsigned long long)OCFS2_I(inode)->ip_blkno); 266 status = -EINVAL; 267 goto bail; 268 } 269 270 if ((la->la_size == 0) || 271 (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { 272 mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", 273 le16_to_cpu(la->la_size)); 274 status = -EINVAL; 275 goto bail; 276 } 277 278 /* do a little verification. */ 279 num_used = ocfs2_local_alloc_count_bits(alloc); 280 281 /* hopefully the local alloc has always been recovered before 282 * we load it. */ 283 if (num_used 284 || alloc->id1.bitmap1.i_used 285 || alloc->id1.bitmap1.i_total 286 || la->la_bm_off) 287 mlog(ML_ERROR, "Local alloc hasn't been recovered!\n" 288 "found = %u, set = %u, taken = %u, off = %u\n", 289 num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), 290 le32_to_cpu(alloc->id1.bitmap1.i_total), 291 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 292 293 osb->local_alloc_bh = alloc_bh; 294 osb->local_alloc_state = OCFS2_LA_ENABLED; 295 296 bail: 297 if (status < 0) 298 brelse(alloc_bh); 299 if (inode) 300 iput(inode); 301 302 if (status < 0) 303 ocfs2_shutdown_la_debug(osb); 304 305 mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); 306 307 mlog_exit(status); 308 return status; 309 } 310 311 /* 312 * return any unused bits to the bitmap and write out a clean 313 * local_alloc. 314 * 315 * local_alloc_bh is optional. If not passed, we will simply use the 316 * one off osb. If you do pass it however, be warned that it *will* be 317 * returned brelse'd and NULL'd out.*/ 318 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) 319 { 320 int status; 321 handle_t *handle; 322 struct inode *local_alloc_inode = NULL; 323 struct buffer_head *bh = NULL; 324 struct buffer_head *main_bm_bh = NULL; 325 struct inode *main_bm_inode = NULL; 326 struct ocfs2_dinode *alloc_copy = NULL; 327 struct ocfs2_dinode *alloc = NULL; 328 329 mlog_entry_void(); 330 331 cancel_delayed_work(&osb->la_enable_wq); 332 flush_workqueue(ocfs2_wq); 333 334 ocfs2_shutdown_la_debug(osb); 335 336 if (osb->local_alloc_state == OCFS2_LA_UNUSED) 337 goto out; 338 339 local_alloc_inode = 340 ocfs2_get_system_file_inode(osb, 341 LOCAL_ALLOC_SYSTEM_INODE, 342 osb->slot_num); 343 if (!local_alloc_inode) { 344 status = -ENOENT; 345 mlog_errno(status); 346 goto out; 347 } 348 349 osb->local_alloc_state = OCFS2_LA_DISABLED; 350 351 main_bm_inode = ocfs2_get_system_file_inode(osb, 352 GLOBAL_BITMAP_SYSTEM_INODE, 353 OCFS2_INVALID_SLOT); 354 if (!main_bm_inode) { 355 status = -EINVAL; 356 mlog_errno(status); 357 goto out; 358 } 359 360 mutex_lock(&main_bm_inode->i_mutex); 361 362 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 363 if (status < 0) { 364 mlog_errno(status); 365 goto out_mutex; 366 } 367 368 /* WINDOW_MOVE_CREDITS is a bit heavy... */ 369 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 370 if (IS_ERR(handle)) { 371 mlog_errno(PTR_ERR(handle)); 372 handle = NULL; 373 goto out_unlock; 374 } 375 376 bh = osb->local_alloc_bh; 377 alloc = (struct ocfs2_dinode *) bh->b_data; 378 379 alloc_copy = kmalloc(bh->b_size, GFP_NOFS); 380 if (!alloc_copy) { 381 status = -ENOMEM; 382 goto out_commit; 383 } 384 memcpy(alloc_copy, alloc, bh->b_size); 385 386 status = ocfs2_journal_access_di(handle, local_alloc_inode, bh, 387 OCFS2_JOURNAL_ACCESS_WRITE); 388 if (status < 0) { 389 mlog_errno(status); 390 goto out_commit; 391 } 392 393 ocfs2_clear_local_alloc(alloc); 394 395 status = ocfs2_journal_dirty(handle, bh); 396 if (status < 0) { 397 mlog_errno(status); 398 goto out_commit; 399 } 400 401 brelse(bh); 402 osb->local_alloc_bh = NULL; 403 osb->local_alloc_state = OCFS2_LA_UNUSED; 404 405 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 406 main_bm_inode, main_bm_bh); 407 if (status < 0) 408 mlog_errno(status); 409 410 out_commit: 411 ocfs2_commit_trans(osb, handle); 412 413 out_unlock: 414 brelse(main_bm_bh); 415 416 ocfs2_inode_unlock(main_bm_inode, 1); 417 418 out_mutex: 419 mutex_unlock(&main_bm_inode->i_mutex); 420 iput(main_bm_inode); 421 422 out: 423 if (local_alloc_inode) 424 iput(local_alloc_inode); 425 426 if (alloc_copy) 427 kfree(alloc_copy); 428 429 mlog_exit_void(); 430 } 431 432 /* 433 * We want to free the bitmap bits outside of any recovery context as 434 * we'll need a cluster lock to do so, but we must clear the local 435 * alloc before giving up the recovered nodes journal. To solve this, 436 * we kmalloc a copy of the local alloc before it's change for the 437 * caller to process with ocfs2_complete_local_alloc_recovery 438 */ 439 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 440 int slot_num, 441 struct ocfs2_dinode **alloc_copy) 442 { 443 int status = 0; 444 struct buffer_head *alloc_bh = NULL; 445 struct inode *inode = NULL; 446 struct ocfs2_dinode *alloc; 447 448 mlog_entry("(slot_num = %d)\n", slot_num); 449 450 *alloc_copy = NULL; 451 452 inode = ocfs2_get_system_file_inode(osb, 453 LOCAL_ALLOC_SYSTEM_INODE, 454 slot_num); 455 if (!inode) { 456 status = -EINVAL; 457 mlog_errno(status); 458 goto bail; 459 } 460 461 mutex_lock(&inode->i_mutex); 462 463 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 464 OCFS2_BH_IGNORE_CACHE); 465 if (status < 0) { 466 mlog_errno(status); 467 goto bail; 468 } 469 470 *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); 471 if (!(*alloc_copy)) { 472 status = -ENOMEM; 473 goto bail; 474 } 475 memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); 476 477 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 478 ocfs2_clear_local_alloc(alloc); 479 480 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 481 status = ocfs2_write_block(osb, alloc_bh, inode); 482 if (status < 0) 483 mlog_errno(status); 484 485 bail: 486 if ((status < 0) && (*alloc_copy)) { 487 kfree(*alloc_copy); 488 *alloc_copy = NULL; 489 } 490 491 brelse(alloc_bh); 492 493 if (inode) { 494 mutex_unlock(&inode->i_mutex); 495 iput(inode); 496 } 497 498 mlog_exit(status); 499 return status; 500 } 501 502 /* 503 * Step 2: By now, we've completed the journal recovery, we've stamped 504 * a clean local alloc on disk and dropped the node out of the 505 * recovery map. Dlm locks will no longer stall, so lets clear out the 506 * main bitmap. 507 */ 508 int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, 509 struct ocfs2_dinode *alloc) 510 { 511 int status; 512 handle_t *handle; 513 struct buffer_head *main_bm_bh = NULL; 514 struct inode *main_bm_inode; 515 516 mlog_entry_void(); 517 518 main_bm_inode = ocfs2_get_system_file_inode(osb, 519 GLOBAL_BITMAP_SYSTEM_INODE, 520 OCFS2_INVALID_SLOT); 521 if (!main_bm_inode) { 522 status = -EINVAL; 523 mlog_errno(status); 524 goto out; 525 } 526 527 mutex_lock(&main_bm_inode->i_mutex); 528 529 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 530 if (status < 0) { 531 mlog_errno(status); 532 goto out_mutex; 533 } 534 535 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 536 if (IS_ERR(handle)) { 537 status = PTR_ERR(handle); 538 handle = NULL; 539 mlog_errno(status); 540 goto out_unlock; 541 } 542 543 /* we want the bitmap change to be recorded on disk asap */ 544 handle->h_sync = 1; 545 546 status = ocfs2_sync_local_to_main(osb, handle, alloc, 547 main_bm_inode, main_bm_bh); 548 if (status < 0) 549 mlog_errno(status); 550 551 ocfs2_commit_trans(osb, handle); 552 553 out_unlock: 554 ocfs2_inode_unlock(main_bm_inode, 1); 555 556 out_mutex: 557 mutex_unlock(&main_bm_inode->i_mutex); 558 559 brelse(main_bm_bh); 560 561 iput(main_bm_inode); 562 563 out: 564 if (!status) 565 ocfs2_init_inode_steal_slot(osb); 566 mlog_exit(status); 567 return status; 568 } 569 570 /* Check to see if the local alloc window is within ac->ac_max_block */ 571 static int ocfs2_local_alloc_in_range(struct inode *inode, 572 struct ocfs2_alloc_context *ac, 573 u32 bits_wanted) 574 { 575 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 576 struct ocfs2_dinode *alloc; 577 struct ocfs2_local_alloc *la; 578 int start; 579 u64 block_off; 580 581 if (!ac->ac_max_block) 582 return 1; 583 584 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 585 la = OCFS2_LOCAL_ALLOC(alloc); 586 587 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 588 if (start == -1) { 589 mlog_errno(-ENOSPC); 590 return 0; 591 } 592 593 /* 594 * Converting (bm_off + start + bits_wanted) to blocks gives us 595 * the blkno just past our actual allocation. This is perfect 596 * to compare with ac_max_block. 597 */ 598 block_off = ocfs2_clusters_to_blocks(inode->i_sb, 599 le32_to_cpu(la->la_bm_off) + 600 start + bits_wanted); 601 mlog(0, "Checking %llu against %llu\n", 602 (unsigned long long)block_off, 603 (unsigned long long)ac->ac_max_block); 604 if (block_off > ac->ac_max_block) 605 return 0; 606 607 return 1; 608 } 609 610 /* 611 * make sure we've got at least bits_wanted contiguous bits in the 612 * local alloc. You lose them when you drop i_mutex. 613 * 614 * We will add ourselves to the transaction passed in, but may start 615 * our own in order to shift windows. 616 */ 617 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, 618 u32 bits_wanted, 619 struct ocfs2_alloc_context *ac) 620 { 621 int status; 622 struct ocfs2_dinode *alloc; 623 struct inode *local_alloc_inode; 624 unsigned int free_bits; 625 626 mlog_entry_void(); 627 628 BUG_ON(!ac); 629 630 local_alloc_inode = 631 ocfs2_get_system_file_inode(osb, 632 LOCAL_ALLOC_SYSTEM_INODE, 633 osb->slot_num); 634 if (!local_alloc_inode) { 635 status = -ENOENT; 636 mlog_errno(status); 637 goto bail; 638 } 639 640 mutex_lock(&local_alloc_inode->i_mutex); 641 642 /* 643 * We must double check state and allocator bits because 644 * another process may have changed them while holding i_mutex. 645 */ 646 spin_lock(&osb->osb_lock); 647 if (!ocfs2_la_state_enabled(osb) || 648 (bits_wanted > osb->local_alloc_bits)) { 649 spin_unlock(&osb->osb_lock); 650 status = -ENOSPC; 651 goto bail; 652 } 653 spin_unlock(&osb->osb_lock); 654 655 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 656 657 #ifdef CONFIG_OCFS2_DEBUG_FS 658 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 659 ocfs2_local_alloc_count_bits(alloc)) { 660 ocfs2_error(osb->sb, "local alloc inode %llu says it has " 661 "%u free bits, but a count shows %u", 662 (unsigned long long)le64_to_cpu(alloc->i_blkno), 663 le32_to_cpu(alloc->id1.bitmap1.i_used), 664 ocfs2_local_alloc_count_bits(alloc)); 665 status = -EIO; 666 goto bail; 667 } 668 #endif 669 670 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 671 le32_to_cpu(alloc->id1.bitmap1.i_used); 672 if (bits_wanted > free_bits) { 673 /* uhoh, window change time. */ 674 status = 675 ocfs2_local_alloc_slide_window(osb, local_alloc_inode); 676 if (status < 0) { 677 if (status != -ENOSPC) 678 mlog_errno(status); 679 goto bail; 680 } 681 682 /* 683 * Under certain conditions, the window slide code 684 * might have reduced the number of bits available or 685 * disabled the the local alloc entirely. Re-check 686 * here and return -ENOSPC if necessary. 687 */ 688 status = -ENOSPC; 689 if (!ocfs2_la_state_enabled(osb)) 690 goto bail; 691 692 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 693 le32_to_cpu(alloc->id1.bitmap1.i_used); 694 if (bits_wanted > free_bits) 695 goto bail; 696 } 697 698 if (ac->ac_max_block) 699 mlog(0, "Calling in_range for max block %llu\n", 700 (unsigned long long)ac->ac_max_block); 701 702 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac, 703 bits_wanted)) { 704 /* 705 * The window is outside ac->ac_max_block. 706 * This errno tells the caller to keep localalloc enabled 707 * but to get the allocation from the main bitmap. 708 */ 709 status = -EFBIG; 710 goto bail; 711 } 712 713 ac->ac_inode = local_alloc_inode; 714 /* We should never use localalloc from another slot */ 715 ac->ac_alloc_slot = osb->slot_num; 716 ac->ac_which = OCFS2_AC_USE_LOCAL; 717 get_bh(osb->local_alloc_bh); 718 ac->ac_bh = osb->local_alloc_bh; 719 status = 0; 720 bail: 721 if (status < 0 && local_alloc_inode) { 722 mutex_unlock(&local_alloc_inode->i_mutex); 723 iput(local_alloc_inode); 724 } 725 726 mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num, 727 status); 728 729 mlog_exit(status); 730 return status; 731 } 732 733 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 734 handle_t *handle, 735 struct ocfs2_alloc_context *ac, 736 u32 bits_wanted, 737 u32 *bit_off, 738 u32 *num_bits) 739 { 740 int status, start; 741 struct inode *local_alloc_inode; 742 void *bitmap; 743 struct ocfs2_dinode *alloc; 744 struct ocfs2_local_alloc *la; 745 746 mlog_entry_void(); 747 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 748 749 local_alloc_inode = ac->ac_inode; 750 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 751 la = OCFS2_LOCAL_ALLOC(alloc); 752 753 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 754 if (start == -1) { 755 /* TODO: Shouldn't we just BUG here? */ 756 status = -ENOSPC; 757 mlog_errno(status); 758 goto bail; 759 } 760 761 bitmap = la->la_bitmap; 762 *bit_off = le32_to_cpu(la->la_bm_off) + start; 763 /* local alloc is always contiguous by nature -- we never 764 * delete bits from it! */ 765 *num_bits = bits_wanted; 766 767 status = ocfs2_journal_access_di(handle, local_alloc_inode, 768 osb->local_alloc_bh, 769 OCFS2_JOURNAL_ACCESS_WRITE); 770 if (status < 0) { 771 mlog_errno(status); 772 goto bail; 773 } 774 775 while(bits_wanted--) 776 ocfs2_set_bit(start++, bitmap); 777 778 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 779 780 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh); 781 if (status < 0) { 782 mlog_errno(status); 783 goto bail; 784 } 785 786 status = 0; 787 bail: 788 mlog_exit(status); 789 return status; 790 } 791 792 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) 793 { 794 int i; 795 u8 *buffer; 796 u32 count = 0; 797 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 798 799 mlog_entry_void(); 800 801 buffer = la->la_bitmap; 802 for (i = 0; i < le16_to_cpu(la->la_size); i++) 803 count += hweight8(buffer[i]); 804 805 mlog_exit(count); 806 return count; 807 } 808 809 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 810 struct ocfs2_dinode *alloc, 811 u32 numbits) 812 { 813 int numfound, bitoff, left, startoff, lastzero; 814 void *bitmap = NULL; 815 816 mlog_entry("(numbits wanted = %u)\n", numbits); 817 818 if (!alloc->id1.bitmap1.i_total) { 819 mlog(0, "No bits in my window!\n"); 820 bitoff = -1; 821 goto bail; 822 } 823 824 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 825 826 numfound = bitoff = startoff = 0; 827 lastzero = -1; 828 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 829 while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) { 830 if (bitoff == left) { 831 /* mlog(0, "bitoff (%d) == left", bitoff); */ 832 break; 833 } 834 /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, " 835 "numfound = %d\n", bitoff, startoff, numfound);*/ 836 837 /* Ok, we found a zero bit... is it contig. or do we 838 * start over?*/ 839 if (bitoff == startoff) { 840 /* we found a zero */ 841 numfound++; 842 startoff++; 843 } else { 844 /* got a zero after some ones */ 845 numfound = 1; 846 startoff = bitoff+1; 847 } 848 /* we got everything we needed */ 849 if (numfound == numbits) { 850 /* mlog(0, "Found it all!\n"); */ 851 break; 852 } 853 } 854 855 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 856 numfound); 857 858 if (numfound == numbits) 859 bitoff = startoff - numfound; 860 else 861 bitoff = -1; 862 863 bail: 864 mlog_exit(bitoff); 865 return bitoff; 866 } 867 868 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) 869 { 870 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 871 int i; 872 mlog_entry_void(); 873 874 alloc->id1.bitmap1.i_total = 0; 875 alloc->id1.bitmap1.i_used = 0; 876 la->la_bm_off = 0; 877 for(i = 0; i < le16_to_cpu(la->la_size); i++) 878 la->la_bitmap[i] = 0; 879 880 mlog_exit_void(); 881 } 882 883 #if 0 884 /* turn this on and uncomment below to aid debugging window shifts. */ 885 static void ocfs2_verify_zero_bits(unsigned long *bitmap, 886 unsigned int start, 887 unsigned int count) 888 { 889 unsigned int tmp = count; 890 while(tmp--) { 891 if (ocfs2_test_bit(start + tmp, bitmap)) { 892 printk("ocfs2_verify_zero_bits: start = %u, count = " 893 "%u\n", start, count); 894 printk("ocfs2_verify_zero_bits: bit %u is set!", 895 start + tmp); 896 BUG(); 897 } 898 } 899 } 900 #endif 901 902 /* 903 * sync the local alloc to main bitmap. 904 * 905 * assumes you've already locked the main bitmap -- the bitmap inode 906 * passed is used for caching. 907 */ 908 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 909 handle_t *handle, 910 struct ocfs2_dinode *alloc, 911 struct inode *main_bm_inode, 912 struct buffer_head *main_bm_bh) 913 { 914 int status = 0; 915 int bit_off, left, count, start; 916 u64 la_start_blk; 917 u64 blkno; 918 void *bitmap; 919 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 920 921 mlog_entry("total = %u, used = %u\n", 922 le32_to_cpu(alloc->id1.bitmap1.i_total), 923 le32_to_cpu(alloc->id1.bitmap1.i_used)); 924 925 if (!alloc->id1.bitmap1.i_total) { 926 mlog(0, "nothing to sync!\n"); 927 goto bail; 928 } 929 930 if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 931 le32_to_cpu(alloc->id1.bitmap1.i_total)) { 932 mlog(0, "all bits were taken!\n"); 933 goto bail; 934 } 935 936 la_start_blk = ocfs2_clusters_to_blocks(osb->sb, 937 le32_to_cpu(la->la_bm_off)); 938 bitmap = la->la_bitmap; 939 start = count = bit_off = 0; 940 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 941 942 while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) 943 != -1) { 944 if ((bit_off < left) && (bit_off == start)) { 945 count++; 946 start++; 947 continue; 948 } 949 if (count) { 950 blkno = la_start_blk + 951 ocfs2_clusters_to_blocks(osb->sb, 952 start - count); 953 954 mlog(0, "freeing %u bits starting at local alloc bit " 955 "%u (la_start_blk = %llu, blkno = %llu)\n", 956 count, start - count, 957 (unsigned long long)la_start_blk, 958 (unsigned long long)blkno); 959 960 status = ocfs2_free_clusters(handle, main_bm_inode, 961 main_bm_bh, blkno, count); 962 if (status < 0) { 963 mlog_errno(status); 964 goto bail; 965 } 966 } 967 if (bit_off >= left) 968 break; 969 count = 1; 970 start = bit_off + 1; 971 } 972 973 bail: 974 mlog_exit(status); 975 return status; 976 } 977 978 enum ocfs2_la_event { 979 OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ 980 OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has 981 * enough bits theoretically 982 * free, but a contiguous 983 * allocation could not be 984 * found. */ 985 OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have 986 * enough bits free to satisfy 987 * our request. */ 988 }; 989 #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) 990 /* 991 * Given an event, calculate the size of our next local alloc window. 992 * 993 * This should always be called under i_mutex of the local alloc inode 994 * so that local alloc disabling doesn't race with processes trying to 995 * use the allocator. 996 * 997 * Returns the state which the local alloc was left in. This value can 998 * be ignored by some paths. 999 */ 1000 static int ocfs2_recalc_la_window(struct ocfs2_super *osb, 1001 enum ocfs2_la_event event) 1002 { 1003 unsigned int bits; 1004 int state; 1005 1006 spin_lock(&osb->osb_lock); 1007 if (osb->local_alloc_state == OCFS2_LA_DISABLED) { 1008 WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); 1009 goto out_unlock; 1010 } 1011 1012 /* 1013 * ENOSPC and fragmentation are treated similarly for now. 1014 */ 1015 if (event == OCFS2_LA_EVENT_ENOSPC || 1016 event == OCFS2_LA_EVENT_FRAGMENTED) { 1017 /* 1018 * We ran out of contiguous space in the primary 1019 * bitmap. Drastically reduce the number of bits used 1020 * by local alloc until we have to disable it. 1021 */ 1022 bits = osb->local_alloc_bits >> 1; 1023 if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { 1024 /* 1025 * By setting state to THROTTLED, we'll keep 1026 * the number of local alloc bits used down 1027 * until an event occurs which would give us 1028 * reason to assume the bitmap situation might 1029 * have changed. 1030 */ 1031 osb->local_alloc_state = OCFS2_LA_THROTTLED; 1032 osb->local_alloc_bits = bits; 1033 } else { 1034 osb->local_alloc_state = OCFS2_LA_DISABLED; 1035 } 1036 queue_delayed_work(ocfs2_wq, &osb->la_enable_wq, 1037 OCFS2_LA_ENABLE_INTERVAL); 1038 goto out_unlock; 1039 } 1040 1041 /* 1042 * Don't increase the size of the local alloc window until we 1043 * know we might be able to fulfill the request. Otherwise, we 1044 * risk bouncing around the global bitmap during periods of 1045 * low space. 1046 */ 1047 if (osb->local_alloc_state != OCFS2_LA_THROTTLED) 1048 osb->local_alloc_bits = osb->local_alloc_default_bits; 1049 1050 out_unlock: 1051 state = osb->local_alloc_state; 1052 spin_unlock(&osb->osb_lock); 1053 1054 return state; 1055 } 1056 1057 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 1058 struct ocfs2_alloc_context **ac, 1059 struct inode **bitmap_inode, 1060 struct buffer_head **bitmap_bh) 1061 { 1062 int status; 1063 1064 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 1065 if (!(*ac)) { 1066 status = -ENOMEM; 1067 mlog_errno(status); 1068 goto bail; 1069 } 1070 1071 retry_enospc: 1072 (*ac)->ac_bits_wanted = osb->local_alloc_bits; 1073 1074 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 1075 if (status == -ENOSPC) { 1076 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 1077 OCFS2_LA_DISABLED) 1078 goto bail; 1079 1080 ocfs2_free_ac_resource(*ac); 1081 memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); 1082 goto retry_enospc; 1083 } 1084 if (status < 0) { 1085 mlog_errno(status); 1086 goto bail; 1087 } 1088 1089 *bitmap_inode = (*ac)->ac_inode; 1090 igrab(*bitmap_inode); 1091 *bitmap_bh = (*ac)->ac_bh; 1092 get_bh(*bitmap_bh); 1093 status = 0; 1094 bail: 1095 if ((status < 0) && *ac) { 1096 ocfs2_free_alloc_context(*ac); 1097 *ac = NULL; 1098 } 1099 1100 mlog_exit(status); 1101 return status; 1102 } 1103 1104 /* 1105 * pass it the bitmap lock in lock_bh if you have it. 1106 */ 1107 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 1108 handle_t *handle, 1109 struct ocfs2_alloc_context *ac) 1110 { 1111 int status = 0; 1112 u32 cluster_off, cluster_count; 1113 struct ocfs2_dinode *alloc = NULL; 1114 struct ocfs2_local_alloc *la; 1115 1116 mlog_entry_void(); 1117 1118 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1119 la = OCFS2_LOCAL_ALLOC(alloc); 1120 1121 if (alloc->id1.bitmap1.i_total) 1122 mlog(0, "asking me to alloc a new window over a non-empty " 1123 "one\n"); 1124 1125 mlog(0, "Allocating %u clusters for a new window.\n", 1126 osb->local_alloc_bits); 1127 1128 /* Instruct the allocation code to try the most recently used 1129 * cluster group. We'll re-record the group used this pass 1130 * below. */ 1131 ac->ac_last_group = osb->la_last_gd; 1132 1133 /* we used the generic suballoc reserve function, but we set 1134 * everything up nicely, so there's no reason why we can't use 1135 * the more specific cluster api to claim bits. */ 1136 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, 1137 &cluster_off, &cluster_count); 1138 if (status == -ENOSPC) { 1139 retry_enospc: 1140 /* 1141 * Note: We could also try syncing the journal here to 1142 * allow use of any free bits which the current 1143 * transaction can't give us access to. --Mark 1144 */ 1145 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == 1146 OCFS2_LA_DISABLED) 1147 goto bail; 1148 1149 status = ocfs2_claim_clusters(osb, handle, ac, 1150 osb->local_alloc_bits, 1151 &cluster_off, 1152 &cluster_count); 1153 if (status == -ENOSPC) 1154 goto retry_enospc; 1155 /* 1156 * We only shrunk the *minimum* number of in our 1157 * request - it's entirely possible that the allocator 1158 * might give us more than we asked for. 1159 */ 1160 if (status == 0) { 1161 spin_lock(&osb->osb_lock); 1162 osb->local_alloc_bits = cluster_count; 1163 spin_unlock(&osb->osb_lock); 1164 } 1165 } 1166 if (status < 0) { 1167 if (status != -ENOSPC) 1168 mlog_errno(status); 1169 goto bail; 1170 } 1171 1172 osb->la_last_gd = ac->ac_last_group; 1173 1174 la->la_bm_off = cpu_to_le32(cluster_off); 1175 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 1176 /* just in case... In the future when we find space ourselves, 1177 * we don't have to get all contiguous -- but we'll have to 1178 * set all previously used bits in bitmap and update 1179 * la_bits_set before setting the bits in the main bitmap. */ 1180 alloc->id1.bitmap1.i_used = 0; 1181 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1182 le16_to_cpu(la->la_size)); 1183 1184 mlog(0, "New window allocated:\n"); 1185 mlog(0, "window la_bm_off = %u\n", 1186 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1187 mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total)); 1188 1189 bail: 1190 mlog_exit(status); 1191 return status; 1192 } 1193 1194 /* Note that we do *NOT* lock the local alloc inode here as 1195 * it's been locked already for us. */ 1196 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 1197 struct inode *local_alloc_inode) 1198 { 1199 int status = 0; 1200 struct buffer_head *main_bm_bh = NULL; 1201 struct inode *main_bm_inode = NULL; 1202 handle_t *handle = NULL; 1203 struct ocfs2_dinode *alloc; 1204 struct ocfs2_dinode *alloc_copy = NULL; 1205 struct ocfs2_alloc_context *ac = NULL; 1206 1207 mlog_entry_void(); 1208 1209 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 1210 1211 /* This will lock the main bitmap for us. */ 1212 status = ocfs2_local_alloc_reserve_for_window(osb, 1213 &ac, 1214 &main_bm_inode, 1215 &main_bm_bh); 1216 if (status < 0) { 1217 if (status != -ENOSPC) 1218 mlog_errno(status); 1219 goto bail; 1220 } 1221 1222 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 1223 if (IS_ERR(handle)) { 1224 status = PTR_ERR(handle); 1225 handle = NULL; 1226 mlog_errno(status); 1227 goto bail; 1228 } 1229 1230 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1231 1232 /* We want to clear the local alloc before doing anything 1233 * else, so that if we error later during this operation, 1234 * local alloc shutdown won't try to double free main bitmap 1235 * bits. Make a copy so the sync function knows which bits to 1236 * free. */ 1237 alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS); 1238 if (!alloc_copy) { 1239 status = -ENOMEM; 1240 mlog_errno(status); 1241 goto bail; 1242 } 1243 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); 1244 1245 status = ocfs2_journal_access_di(handle, local_alloc_inode, 1246 osb->local_alloc_bh, 1247 OCFS2_JOURNAL_ACCESS_WRITE); 1248 if (status < 0) { 1249 mlog_errno(status); 1250 goto bail; 1251 } 1252 1253 ocfs2_clear_local_alloc(alloc); 1254 1255 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh); 1256 if (status < 0) { 1257 mlog_errno(status); 1258 goto bail; 1259 } 1260 1261 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1262 main_bm_inode, main_bm_bh); 1263 if (status < 0) { 1264 mlog_errno(status); 1265 goto bail; 1266 } 1267 1268 status = ocfs2_local_alloc_new_window(osb, handle, ac); 1269 if (status < 0) { 1270 if (status != -ENOSPC) 1271 mlog_errno(status); 1272 goto bail; 1273 } 1274 1275 atomic_inc(&osb->alloc_stats.moves); 1276 1277 status = 0; 1278 bail: 1279 if (handle) 1280 ocfs2_commit_trans(osb, handle); 1281 1282 brelse(main_bm_bh); 1283 1284 if (main_bm_inode) 1285 iput(main_bm_inode); 1286 1287 if (alloc_copy) 1288 kfree(alloc_copy); 1289 1290 if (ac) 1291 ocfs2_free_alloc_context(ac); 1292 1293 mlog_exit(status); 1294 return status; 1295 } 1296 1297