1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * localalloc.c 4 * 5 * Node local data allocation 6 * 7 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/types.h> 12 #include <linux/slab.h> 13 #include <linux/highmem.h> 14 #include <linux/bitops.h> 15 16 #include <cluster/masklog.h> 17 18 #include "ocfs2.h" 19 20 #include "alloc.h" 21 #include "blockcheck.h" 22 #include "dlmglue.h" 23 #include "inode.h" 24 #include "journal.h" 25 #include "localalloc.h" 26 #include "suballoc.h" 27 #include "super.h" 28 #include "sysfile.h" 29 #include "ocfs2_trace.h" 30 31 #include "buffer_head_io.h" 32 33 #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 34 35 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 36 37 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 38 struct ocfs2_dinode *alloc, 39 u32 *numbits, 40 struct ocfs2_alloc_reservation *resv); 41 42 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 43 44 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 45 handle_t *handle, 46 struct ocfs2_dinode *alloc, 47 struct inode *main_bm_inode, 48 struct buffer_head *main_bm_bh); 49 50 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 51 struct ocfs2_alloc_context **ac, 52 struct inode **bitmap_inode, 53 struct buffer_head **bitmap_bh); 54 55 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 56 handle_t *handle, 57 struct ocfs2_alloc_context *ac); 58 59 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 60 struct inode *local_alloc_inode); 61 62 /* 63 * ocfs2_la_default_mb() - determine a default size, in megabytes of 64 * the local alloc. 65 * 66 * Generally, we'd like to pick as large a local alloc as 67 * possible. Performance on large workloads tends to scale 68 * proportionally to la size. In addition to that, the reservations 69 * code functions more efficiently as it can reserve more windows for 70 * write. 71 * 72 * Some things work against us when trying to choose a large local alloc: 73 * 74 * - We need to ensure our sizing is picked to leave enough space in 75 * group descriptors for other allocations (such as block groups, 76 * etc). Picking default sizes which are a multiple of 4 could help 77 * - block groups are allocated in 2mb and 4mb chunks. 78 * 79 * - Likewise, we don't want to starve other nodes of bits on small 80 * file systems. This can easily be taken care of by limiting our 81 * default to a reasonable size (256M) on larger cluster sizes. 82 * 83 * - Some file systems can't support very large sizes - 4k and 8k in 84 * particular are limited to less than 128 and 256 megabytes respectively. 85 * 86 * The following reference table shows group descriptor and local 87 * alloc maximums at various cluster sizes (4k blocksize) 88 * 89 * csize: 4K group: 126M la: 121M 90 * csize: 8K group: 252M la: 243M 91 * csize: 16K group: 504M la: 486M 92 * csize: 32K group: 1008M la: 972M 93 * csize: 64K group: 2016M la: 1944M 94 * csize: 128K group: 4032M la: 3888M 95 * csize: 256K group: 8064M la: 7776M 96 * csize: 512K group: 16128M la: 15552M 97 * csize: 1024K group: 32256M la: 31104M 98 */ 99 #define OCFS2_LA_MAX_DEFAULT_MB 256 100 #define OCFS2_LA_OLD_DEFAULT 8 101 unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) 102 { 103 unsigned int la_mb; 104 unsigned int gd_mb; 105 unsigned int la_max_mb; 106 unsigned int megs_per_slot; 107 struct super_block *sb = osb->sb; 108 109 gd_mb = ocfs2_clusters_to_megabytes(osb->sb, 110 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat)); 111 112 /* 113 * This takes care of files systems with very small group 114 * descriptors - 512 byte blocksize at cluster sizes lower 115 * than 16K and also 1k blocksize with 4k cluster size. 116 */ 117 if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192) 118 || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096)) 119 return OCFS2_LA_OLD_DEFAULT; 120 121 /* 122 * Leave enough room for some block groups and make the final 123 * value we work from a multiple of 4. 124 */ 125 gd_mb -= 16; 126 gd_mb &= 0xFFFFFFFB; 127 128 la_mb = gd_mb; 129 130 /* 131 * Keep window sizes down to a reasonable default 132 */ 133 if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) { 134 /* 135 * Some clustersize / blocksize combinations will have 136 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB 137 * default size, but get poor distribution when 138 * limited to exactly 256 megabytes. 139 * 140 * As an example, 16K clustersize at 4K blocksize 141 * gives us a cluster group size of 504M. Paring the 142 * local alloc size down to 256 however, would give us 143 * only one window and around 200MB left in the 144 * cluster group. Instead, find the first size below 145 * 256 which would give us an even distribution. 146 * 147 * Larger cluster group sizes actually work out pretty 148 * well when pared to 256, so we don't have to do this 149 * for any group that fits more than two 150 * OCFS2_LA_MAX_DEFAULT_MB windows. 151 */ 152 if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB)) 153 la_mb = 256; 154 else { 155 unsigned int gd_mult = gd_mb; 156 157 while (gd_mult > 256) 158 gd_mult = gd_mult >> 1; 159 160 la_mb = gd_mult; 161 } 162 } 163 164 megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots; 165 megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot); 166 /* Too many nodes, too few disk clusters. */ 167 if (megs_per_slot < la_mb) 168 la_mb = megs_per_slot; 169 170 /* We can't store more bits than we can in a block. */ 171 la_max_mb = ocfs2_clusters_to_megabytes(osb->sb, 172 ocfs2_local_alloc_size(sb) * 8); 173 if (la_mb > la_max_mb) 174 la_mb = la_max_mb; 175 176 return la_mb; 177 } 178 179 void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) 180 { 181 struct super_block *sb = osb->sb; 182 unsigned int la_default_mb = ocfs2_la_default_mb(osb); 183 unsigned int la_max_mb; 184 185 la_max_mb = ocfs2_clusters_to_megabytes(sb, 186 ocfs2_local_alloc_size(sb) * 8); 187 188 trace_ocfs2_la_set_sizes(requested_mb, la_max_mb, la_default_mb); 189 190 if (requested_mb == -1) { 191 /* No user request - use defaults */ 192 osb->local_alloc_default_bits = 193 ocfs2_megabytes_to_clusters(sb, la_default_mb); 194 } else if (requested_mb > la_max_mb) { 195 /* Request is too big, we give the maximum available */ 196 osb->local_alloc_default_bits = 197 ocfs2_megabytes_to_clusters(sb, la_max_mb); 198 } else { 199 osb->local_alloc_default_bits = 200 ocfs2_megabytes_to_clusters(sb, requested_mb); 201 } 202 203 osb->local_alloc_bits = osb->local_alloc_default_bits; 204 } 205 206 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 207 { 208 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 209 osb->local_alloc_state == OCFS2_LA_ENABLED); 210 } 211 212 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, 213 unsigned int num_clusters) 214 { 215 if (num_clusters >= osb->local_alloc_default_bits) { 216 spin_lock(&osb->osb_lock); 217 if (osb->local_alloc_state == OCFS2_LA_DISABLED || 218 osb->local_alloc_state == OCFS2_LA_THROTTLED) { 219 cancel_delayed_work(&osb->la_enable_wq); 220 osb->local_alloc_state = OCFS2_LA_ENABLED; 221 } 222 spin_unlock(&osb->osb_lock); 223 } 224 } 225 226 void ocfs2_la_enable_worker(struct work_struct *work) 227 { 228 struct ocfs2_super *osb = 229 container_of(work, struct ocfs2_super, 230 la_enable_wq.work); 231 spin_lock(&osb->osb_lock); 232 osb->local_alloc_state = OCFS2_LA_ENABLED; 233 spin_unlock(&osb->osb_lock); 234 } 235 236 /* 237 * Tell us whether a given allocation should use the local alloc 238 * file. Otherwise, it has to go to the main bitmap. 239 * 240 * This function does semi-dirty reads of local alloc size and state! 241 * This is ok however, as the values are re-checked once under mutex. 242 */ 243 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 244 { 245 int ret = 0; 246 int la_bits; 247 248 spin_lock(&osb->osb_lock); 249 la_bits = osb->local_alloc_bits; 250 251 if (!ocfs2_la_state_enabled(osb)) 252 goto bail; 253 254 /* la_bits should be at least twice the size (in clusters) of 255 * a new block group. We want to be sure block group 256 * allocations go through the local alloc, so allow an 257 * allocation to take up to half the bitmap. */ 258 if (bits > (la_bits / 2)) 259 goto bail; 260 261 ret = 1; 262 bail: 263 trace_ocfs2_alloc_should_use_local( 264 (unsigned long long)bits, osb->local_alloc_state, la_bits, ret); 265 spin_unlock(&osb->osb_lock); 266 return ret; 267 } 268 269 int ocfs2_load_local_alloc(struct ocfs2_super *osb) 270 { 271 int status = 0; 272 struct ocfs2_dinode *alloc = NULL; 273 struct buffer_head *alloc_bh = NULL; 274 u32 num_used; 275 struct inode *inode = NULL; 276 struct ocfs2_local_alloc *la; 277 278 if (osb->local_alloc_bits == 0) 279 goto bail; 280 281 if (osb->local_alloc_bits >= osb->bitmap_cpg) { 282 mlog(ML_NOTICE, "Requested local alloc window %d is larger " 283 "than max possible %u. Using defaults.\n", 284 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 285 osb->local_alloc_bits = 286 ocfs2_megabytes_to_clusters(osb->sb, 287 ocfs2_la_default_mb(osb)); 288 } 289 290 /* read the alloc off disk */ 291 inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, 292 osb->slot_num); 293 if (!inode) { 294 status = -EINVAL; 295 mlog_errno(status); 296 goto bail; 297 } 298 299 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 300 OCFS2_BH_IGNORE_CACHE); 301 if (status < 0) { 302 mlog_errno(status); 303 goto bail; 304 } 305 306 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 307 la = OCFS2_LOCAL_ALLOC(alloc); 308 309 if (!(le32_to_cpu(alloc->i_flags) & 310 (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { 311 mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", 312 (unsigned long long)OCFS2_I(inode)->ip_blkno); 313 status = -EINVAL; 314 goto bail; 315 } 316 317 if ((la->la_size == 0) || 318 (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { 319 mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", 320 le16_to_cpu(la->la_size)); 321 status = -EINVAL; 322 goto bail; 323 } 324 325 /* do a little verification. */ 326 num_used = ocfs2_local_alloc_count_bits(alloc); 327 328 /* hopefully the local alloc has always been recovered before 329 * we load it. */ 330 if (num_used 331 || alloc->id1.bitmap1.i_used 332 || alloc->id1.bitmap1.i_total 333 || la->la_bm_off) { 334 mlog(ML_ERROR, "inconsistent detected, clean journal with" 335 " unrecovered local alloc, please run fsck.ocfs2!\n" 336 "found = %u, set = %u, taken = %u, off = %u\n", 337 num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), 338 le32_to_cpu(alloc->id1.bitmap1.i_total), 339 le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off)); 340 341 status = -EINVAL; 342 goto bail; 343 } 344 345 osb->local_alloc_bh = alloc_bh; 346 osb->local_alloc_state = OCFS2_LA_ENABLED; 347 348 bail: 349 if (status < 0) 350 brelse(alloc_bh); 351 iput(inode); 352 353 trace_ocfs2_load_local_alloc(osb->local_alloc_bits); 354 355 if (status) 356 mlog_errno(status); 357 return status; 358 } 359 360 /* 361 * return any unused bits to the bitmap and write out a clean 362 * local_alloc. 363 * 364 * local_alloc_bh is optional. If not passed, we will simply use the 365 * one off osb. If you do pass it however, be warned that it *will* be 366 * returned brelse'd and NULL'd out.*/ 367 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) 368 { 369 int status; 370 handle_t *handle; 371 struct inode *local_alloc_inode = NULL; 372 struct buffer_head *bh = NULL; 373 struct buffer_head *main_bm_bh = NULL; 374 struct inode *main_bm_inode = NULL; 375 struct ocfs2_dinode *alloc_copy = NULL; 376 struct ocfs2_dinode *alloc = NULL; 377 378 cancel_delayed_work(&osb->la_enable_wq); 379 if (osb->ocfs2_wq) 380 flush_workqueue(osb->ocfs2_wq); 381 382 if (osb->local_alloc_state == OCFS2_LA_UNUSED) 383 goto out; 384 385 local_alloc_inode = 386 ocfs2_get_system_file_inode(osb, 387 LOCAL_ALLOC_SYSTEM_INODE, 388 osb->slot_num); 389 if (!local_alloc_inode) { 390 status = -ENOENT; 391 mlog_errno(status); 392 goto out; 393 } 394 395 osb->local_alloc_state = OCFS2_LA_DISABLED; 396 397 ocfs2_resmap_uninit(&osb->osb_la_resmap); 398 399 main_bm_inode = ocfs2_get_system_file_inode(osb, 400 GLOBAL_BITMAP_SYSTEM_INODE, 401 OCFS2_INVALID_SLOT); 402 if (!main_bm_inode) { 403 status = -EINVAL; 404 mlog_errno(status); 405 goto out; 406 } 407 408 inode_lock(main_bm_inode); 409 410 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 411 if (status < 0) { 412 mlog_errno(status); 413 goto out_mutex; 414 } 415 416 /* WINDOW_MOVE_CREDITS is a bit heavy... */ 417 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 418 if (IS_ERR(handle)) { 419 mlog_errno(PTR_ERR(handle)); 420 handle = NULL; 421 goto out_unlock; 422 } 423 424 bh = osb->local_alloc_bh; 425 alloc = (struct ocfs2_dinode *) bh->b_data; 426 427 alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS); 428 if (!alloc_copy) { 429 status = -ENOMEM; 430 goto out_commit; 431 } 432 433 status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), 434 bh, OCFS2_JOURNAL_ACCESS_WRITE); 435 if (status < 0) { 436 mlog_errno(status); 437 goto out_commit; 438 } 439 440 ocfs2_clear_local_alloc(alloc); 441 ocfs2_journal_dirty(handle, bh); 442 443 brelse(bh); 444 osb->local_alloc_bh = NULL; 445 osb->local_alloc_state = OCFS2_LA_UNUSED; 446 447 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 448 main_bm_inode, main_bm_bh); 449 if (status < 0) 450 mlog_errno(status); 451 452 out_commit: 453 ocfs2_commit_trans(osb, handle); 454 455 out_unlock: 456 brelse(main_bm_bh); 457 458 ocfs2_inode_unlock(main_bm_inode, 1); 459 460 out_mutex: 461 inode_unlock(main_bm_inode); 462 iput(main_bm_inode); 463 464 out: 465 iput(local_alloc_inode); 466 467 kfree(alloc_copy); 468 } 469 470 /* 471 * We want to free the bitmap bits outside of any recovery context as 472 * we'll need a cluster lock to do so, but we must clear the local 473 * alloc before giving up the recovered nodes journal. To solve this, 474 * we kmalloc a copy of the local alloc before it's change for the 475 * caller to process with ocfs2_complete_local_alloc_recovery 476 */ 477 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 478 int slot_num, 479 struct ocfs2_dinode **alloc_copy) 480 { 481 int status = 0; 482 struct buffer_head *alloc_bh = NULL; 483 struct inode *inode = NULL; 484 struct ocfs2_dinode *alloc; 485 486 trace_ocfs2_begin_local_alloc_recovery(slot_num); 487 488 *alloc_copy = NULL; 489 490 inode = ocfs2_get_system_file_inode(osb, 491 LOCAL_ALLOC_SYSTEM_INODE, 492 slot_num); 493 if (!inode) { 494 status = -EINVAL; 495 mlog_errno(status); 496 goto bail; 497 } 498 499 inode_lock(inode); 500 501 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 502 OCFS2_BH_IGNORE_CACHE); 503 if (status < 0) { 504 mlog_errno(status); 505 goto bail; 506 } 507 508 *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); 509 if (!(*alloc_copy)) { 510 status = -ENOMEM; 511 goto bail; 512 } 513 memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); 514 515 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 516 ocfs2_clear_local_alloc(alloc); 517 518 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 519 status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode)); 520 if (status < 0) 521 mlog_errno(status); 522 523 bail: 524 if (status < 0) { 525 kfree(*alloc_copy); 526 *alloc_copy = NULL; 527 } 528 529 brelse(alloc_bh); 530 531 if (inode) { 532 inode_unlock(inode); 533 iput(inode); 534 } 535 536 if (status) 537 mlog_errno(status); 538 return status; 539 } 540 541 /* 542 * Step 2: By now, we've completed the journal recovery, we've stamped 543 * a clean local alloc on disk and dropped the node out of the 544 * recovery map. Dlm locks will no longer stall, so lets clear out the 545 * main bitmap. 546 */ 547 int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, 548 struct ocfs2_dinode *alloc) 549 { 550 int status; 551 handle_t *handle; 552 struct buffer_head *main_bm_bh = NULL; 553 struct inode *main_bm_inode; 554 555 main_bm_inode = ocfs2_get_system_file_inode(osb, 556 GLOBAL_BITMAP_SYSTEM_INODE, 557 OCFS2_INVALID_SLOT); 558 if (!main_bm_inode) { 559 status = -EINVAL; 560 mlog_errno(status); 561 goto out; 562 } 563 564 inode_lock(main_bm_inode); 565 566 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 567 if (status < 0) { 568 mlog_errno(status); 569 goto out_mutex; 570 } 571 572 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 573 if (IS_ERR(handle)) { 574 status = PTR_ERR(handle); 575 handle = NULL; 576 mlog_errno(status); 577 goto out_unlock; 578 } 579 580 /* we want the bitmap change to be recorded on disk asap */ 581 handle->h_sync = 1; 582 583 status = ocfs2_sync_local_to_main(osb, handle, alloc, 584 main_bm_inode, main_bm_bh); 585 if (status < 0) 586 mlog_errno(status); 587 588 ocfs2_commit_trans(osb, handle); 589 590 out_unlock: 591 ocfs2_inode_unlock(main_bm_inode, 1); 592 593 out_mutex: 594 inode_unlock(main_bm_inode); 595 596 brelse(main_bm_bh); 597 598 iput(main_bm_inode); 599 600 out: 601 if (!status) 602 ocfs2_init_steal_slots(osb); 603 if (status) 604 mlog_errno(status); 605 return status; 606 } 607 608 /* 609 * make sure we've got at least bits_wanted contiguous bits in the 610 * local alloc. You lose them when you drop i_rwsem. 611 * 612 * We will add ourselves to the transaction passed in, but may start 613 * our own in order to shift windows. 614 */ 615 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, 616 u32 bits_wanted, 617 struct ocfs2_alloc_context *ac) 618 { 619 int status; 620 struct ocfs2_dinode *alloc; 621 struct inode *local_alloc_inode; 622 unsigned int free_bits; 623 624 BUG_ON(!ac); 625 626 local_alloc_inode = 627 ocfs2_get_system_file_inode(osb, 628 LOCAL_ALLOC_SYSTEM_INODE, 629 osb->slot_num); 630 if (!local_alloc_inode) { 631 status = -ENOENT; 632 mlog_errno(status); 633 goto bail; 634 } 635 636 inode_lock(local_alloc_inode); 637 638 /* 639 * We must double check state and allocator bits because 640 * another process may have changed them while holding i_rwsem. 641 */ 642 spin_lock(&osb->osb_lock); 643 if (!ocfs2_la_state_enabled(osb) || 644 (bits_wanted > osb->local_alloc_bits)) { 645 spin_unlock(&osb->osb_lock); 646 status = -ENOSPC; 647 goto bail; 648 } 649 spin_unlock(&osb->osb_lock); 650 651 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 652 653 #ifdef CONFIG_OCFS2_DEBUG_FS 654 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 655 ocfs2_local_alloc_count_bits(alloc)) { 656 status = ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n", 657 (unsigned long long)le64_to_cpu(alloc->i_blkno), 658 le32_to_cpu(alloc->id1.bitmap1.i_used), 659 ocfs2_local_alloc_count_bits(alloc)); 660 goto bail; 661 } 662 #endif 663 664 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 665 le32_to_cpu(alloc->id1.bitmap1.i_used); 666 if (bits_wanted > free_bits) { 667 /* uhoh, window change time. */ 668 status = 669 ocfs2_local_alloc_slide_window(osb, local_alloc_inode); 670 if (status < 0) { 671 if (status != -ENOSPC) 672 mlog_errno(status); 673 goto bail; 674 } 675 676 /* 677 * Under certain conditions, the window slide code 678 * might have reduced the number of bits available or 679 * disabled the local alloc entirely. Re-check 680 * here and return -ENOSPC if necessary. 681 */ 682 status = -ENOSPC; 683 if (!ocfs2_la_state_enabled(osb)) 684 goto bail; 685 686 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 687 le32_to_cpu(alloc->id1.bitmap1.i_used); 688 if (bits_wanted > free_bits) 689 goto bail; 690 } 691 692 ac->ac_inode = local_alloc_inode; 693 /* We should never use localalloc from another slot */ 694 ac->ac_alloc_slot = osb->slot_num; 695 ac->ac_which = OCFS2_AC_USE_LOCAL; 696 get_bh(osb->local_alloc_bh); 697 ac->ac_bh = osb->local_alloc_bh; 698 status = 0; 699 bail: 700 if (status < 0 && local_alloc_inode) { 701 inode_unlock(local_alloc_inode); 702 iput(local_alloc_inode); 703 } 704 705 trace_ocfs2_reserve_local_alloc_bits( 706 (unsigned long long)ac->ac_max_block, 707 bits_wanted, osb->slot_num, status); 708 709 if (status) 710 mlog_errno(status); 711 return status; 712 } 713 714 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 715 handle_t *handle, 716 struct ocfs2_alloc_context *ac, 717 u32 bits_wanted, 718 u32 *bit_off, 719 u32 *num_bits) 720 { 721 int status, start; 722 struct inode *local_alloc_inode; 723 void *bitmap; 724 struct ocfs2_dinode *alloc; 725 struct ocfs2_local_alloc *la; 726 727 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 728 729 local_alloc_inode = ac->ac_inode; 730 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 731 la = OCFS2_LOCAL_ALLOC(alloc); 732 733 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, 734 ac->ac_resv); 735 if (start == -1) { 736 /* TODO: Shouldn't we just BUG here? */ 737 status = -ENOSPC; 738 mlog_errno(status); 739 goto bail; 740 } 741 742 bitmap = la->la_bitmap; 743 *bit_off = le32_to_cpu(la->la_bm_off) + start; 744 *num_bits = bits_wanted; 745 746 status = ocfs2_journal_access_di(handle, 747 INODE_CACHE(local_alloc_inode), 748 osb->local_alloc_bh, 749 OCFS2_JOURNAL_ACCESS_WRITE); 750 if (status < 0) { 751 mlog_errno(status); 752 goto bail; 753 } 754 755 ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start, 756 bits_wanted); 757 758 while(bits_wanted--) 759 ocfs2_set_bit(start++, bitmap); 760 761 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 762 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 763 764 bail: 765 if (status) 766 mlog_errno(status); 767 return status; 768 } 769 770 int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb, 771 handle_t *handle, 772 struct ocfs2_alloc_context *ac, 773 u32 bit_off, 774 u32 num_bits) 775 { 776 int status, start; 777 u32 clear_bits; 778 struct inode *local_alloc_inode; 779 void *bitmap; 780 struct ocfs2_dinode *alloc; 781 struct ocfs2_local_alloc *la; 782 783 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 784 785 local_alloc_inode = ac->ac_inode; 786 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 787 la = OCFS2_LOCAL_ALLOC(alloc); 788 789 bitmap = la->la_bitmap; 790 start = bit_off - le32_to_cpu(la->la_bm_off); 791 clear_bits = num_bits; 792 793 status = ocfs2_journal_access_di(handle, 794 INODE_CACHE(local_alloc_inode), 795 osb->local_alloc_bh, 796 OCFS2_JOURNAL_ACCESS_WRITE); 797 if (status < 0) { 798 mlog_errno(status); 799 goto bail; 800 } 801 802 while (clear_bits--) 803 ocfs2_clear_bit(start++, bitmap); 804 805 le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits); 806 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 807 808 bail: 809 return status; 810 } 811 812 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) 813 { 814 u32 count; 815 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 816 817 count = memweight(la->la_bitmap, le16_to_cpu(la->la_size)); 818 819 trace_ocfs2_local_alloc_count_bits(count); 820 return count; 821 } 822 823 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 824 struct ocfs2_dinode *alloc, 825 u32 *numbits, 826 struct ocfs2_alloc_reservation *resv) 827 { 828 int numfound = 0, bitoff, left, startoff; 829 int local_resv = 0; 830 struct ocfs2_alloc_reservation r; 831 void *bitmap = NULL; 832 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap; 833 834 if (!alloc->id1.bitmap1.i_total) { 835 bitoff = -1; 836 goto bail; 837 } 838 839 if (!resv) { 840 local_resv = 1; 841 ocfs2_resv_init_once(&r); 842 ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP); 843 resv = &r; 844 } 845 846 numfound = *numbits; 847 if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) { 848 if (numfound < *numbits) 849 *numbits = numfound; 850 goto bail; 851 } 852 853 /* 854 * Code error. While reservations are enabled, local 855 * allocation should _always_ go through them. 856 */ 857 BUG_ON(osb->osb_resv_level != 0); 858 859 /* 860 * Reservations are disabled. Handle this the old way. 861 */ 862 863 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 864 865 numfound = bitoff = startoff = 0; 866 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 867 while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) < 868 left) { 869 /* Ok, we found a zero bit... is it contig. or do we 870 * start over?*/ 871 if (bitoff == startoff) { 872 /* we found a zero */ 873 numfound++; 874 startoff++; 875 } else { 876 /* got a zero after some ones */ 877 numfound = 1; 878 startoff = bitoff+1; 879 } 880 /* we got everything we needed */ 881 if (numfound == *numbits) { 882 /* mlog(0, "Found it all!\n"); */ 883 break; 884 } 885 } 886 887 trace_ocfs2_local_alloc_find_clear_bits_search_bitmap(bitoff, numfound); 888 889 if (numfound == *numbits) 890 bitoff = startoff - numfound; 891 else 892 bitoff = -1; 893 894 bail: 895 if (local_resv) 896 ocfs2_resv_discard(resmap, resv); 897 898 trace_ocfs2_local_alloc_find_clear_bits(*numbits, 899 le32_to_cpu(alloc->id1.bitmap1.i_total), 900 bitoff, numfound); 901 902 return bitoff; 903 } 904 905 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) 906 { 907 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 908 int i; 909 910 alloc->id1.bitmap1.i_total = 0; 911 alloc->id1.bitmap1.i_used = 0; 912 la->la_bm_off = 0; 913 for(i = 0; i < le16_to_cpu(la->la_size); i++) 914 la->la_bitmap[i] = 0; 915 } 916 917 #if 0 918 /* turn this on and uncomment below to aid debugging window shifts. */ 919 static void ocfs2_verify_zero_bits(unsigned long *bitmap, 920 unsigned int start, 921 unsigned int count) 922 { 923 unsigned int tmp = count; 924 while(tmp--) { 925 if (ocfs2_test_bit(start + tmp, bitmap)) { 926 printk("ocfs2_verify_zero_bits: start = %u, count = " 927 "%u\n", start, count); 928 printk("ocfs2_verify_zero_bits: bit %u is set!", 929 start + tmp); 930 BUG(); 931 } 932 } 933 } 934 #endif 935 936 /* 937 * sync the local alloc to main bitmap. 938 * 939 * assumes you've already locked the main bitmap -- the bitmap inode 940 * passed is used for caching. 941 */ 942 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 943 handle_t *handle, 944 struct ocfs2_dinode *alloc, 945 struct inode *main_bm_inode, 946 struct buffer_head *main_bm_bh) 947 { 948 int status = 0; 949 int bit_off, left, count, start; 950 u64 la_start_blk; 951 u64 blkno; 952 void *bitmap; 953 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 954 955 trace_ocfs2_sync_local_to_main( 956 le32_to_cpu(alloc->id1.bitmap1.i_total), 957 le32_to_cpu(alloc->id1.bitmap1.i_used)); 958 959 if (!alloc->id1.bitmap1.i_total) { 960 goto bail; 961 } 962 963 if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 964 le32_to_cpu(alloc->id1.bitmap1.i_total)) { 965 goto bail; 966 } 967 968 la_start_blk = ocfs2_clusters_to_blocks(osb->sb, 969 le32_to_cpu(la->la_bm_off)); 970 bitmap = la->la_bitmap; 971 start = count = 0; 972 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 973 974 while (1) { 975 bit_off = ocfs2_find_next_zero_bit(bitmap, left, start); 976 if ((bit_off < left) && (bit_off == start)) { 977 count++; 978 start++; 979 continue; 980 } 981 if (count) { 982 blkno = la_start_blk + 983 ocfs2_clusters_to_blocks(osb->sb, 984 start - count); 985 986 trace_ocfs2_sync_local_to_main_free( 987 count, start - count, 988 (unsigned long long)la_start_blk, 989 (unsigned long long)blkno); 990 991 status = ocfs2_release_clusters(handle, 992 main_bm_inode, 993 main_bm_bh, blkno, 994 count); 995 if (status < 0) { 996 mlog_errno(status); 997 goto bail; 998 } 999 } 1000 1001 if (bit_off >= left) 1002 break; 1003 count = 1; 1004 start = bit_off + 1; 1005 } 1006 1007 bail: 1008 if (status) 1009 mlog_errno(status); 1010 return status; 1011 } 1012 1013 enum ocfs2_la_event { 1014 OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ 1015 OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has 1016 * enough bits theoretically 1017 * free, but a contiguous 1018 * allocation could not be 1019 * found. */ 1020 OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have 1021 * enough bits free to satisfy 1022 * our request. */ 1023 }; 1024 #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) 1025 /* 1026 * Given an event, calculate the size of our next local alloc window. 1027 * 1028 * This should always be called under i_rwsem of the local alloc inode 1029 * so that local alloc disabling doesn't race with processes trying to 1030 * use the allocator. 1031 * 1032 * Returns the state which the local alloc was left in. This value can 1033 * be ignored by some paths. 1034 */ 1035 static int ocfs2_recalc_la_window(struct ocfs2_super *osb, 1036 enum ocfs2_la_event event) 1037 { 1038 unsigned int bits; 1039 int state; 1040 1041 spin_lock(&osb->osb_lock); 1042 if (osb->local_alloc_state == OCFS2_LA_DISABLED) { 1043 WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); 1044 goto out_unlock; 1045 } 1046 1047 /* 1048 * ENOSPC and fragmentation are treated similarly for now. 1049 */ 1050 if (event == OCFS2_LA_EVENT_ENOSPC || 1051 event == OCFS2_LA_EVENT_FRAGMENTED) { 1052 /* 1053 * We ran out of contiguous space in the primary 1054 * bitmap. Drastically reduce the number of bits used 1055 * by local alloc until we have to disable it. 1056 */ 1057 bits = osb->local_alloc_bits >> 1; 1058 if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { 1059 /* 1060 * By setting state to THROTTLED, we'll keep 1061 * the number of local alloc bits used down 1062 * until an event occurs which would give us 1063 * reason to assume the bitmap situation might 1064 * have changed. 1065 */ 1066 osb->local_alloc_state = OCFS2_LA_THROTTLED; 1067 osb->local_alloc_bits = bits; 1068 } else { 1069 osb->local_alloc_state = OCFS2_LA_DISABLED; 1070 } 1071 queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq, 1072 OCFS2_LA_ENABLE_INTERVAL); 1073 goto out_unlock; 1074 } 1075 1076 /* 1077 * Don't increase the size of the local alloc window until we 1078 * know we might be able to fulfill the request. Otherwise, we 1079 * risk bouncing around the global bitmap during periods of 1080 * low space. 1081 */ 1082 if (osb->local_alloc_state != OCFS2_LA_THROTTLED) 1083 osb->local_alloc_bits = osb->local_alloc_default_bits; 1084 1085 out_unlock: 1086 state = osb->local_alloc_state; 1087 spin_unlock(&osb->osb_lock); 1088 1089 return state; 1090 } 1091 1092 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 1093 struct ocfs2_alloc_context **ac, 1094 struct inode **bitmap_inode, 1095 struct buffer_head **bitmap_bh) 1096 { 1097 int status; 1098 1099 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 1100 if (!(*ac)) { 1101 status = -ENOMEM; 1102 mlog_errno(status); 1103 goto bail; 1104 } 1105 1106 retry_enospc: 1107 (*ac)->ac_bits_wanted = osb->local_alloc_bits; 1108 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 1109 if (status == -ENOSPC) { 1110 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 1111 OCFS2_LA_DISABLED) 1112 goto bail; 1113 1114 ocfs2_free_ac_resource(*ac); 1115 memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); 1116 goto retry_enospc; 1117 } 1118 if (status < 0) { 1119 mlog_errno(status); 1120 goto bail; 1121 } 1122 1123 *bitmap_inode = (*ac)->ac_inode; 1124 igrab(*bitmap_inode); 1125 *bitmap_bh = (*ac)->ac_bh; 1126 get_bh(*bitmap_bh); 1127 status = 0; 1128 bail: 1129 if ((status < 0) && *ac) { 1130 ocfs2_free_alloc_context(*ac); 1131 *ac = NULL; 1132 } 1133 1134 if (status) 1135 mlog_errno(status); 1136 return status; 1137 } 1138 1139 /* 1140 * pass it the bitmap lock in lock_bh if you have it. 1141 */ 1142 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 1143 handle_t *handle, 1144 struct ocfs2_alloc_context *ac) 1145 { 1146 int status = 0; 1147 u32 cluster_off, cluster_count; 1148 struct ocfs2_dinode *alloc = NULL; 1149 struct ocfs2_local_alloc *la; 1150 1151 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1152 la = OCFS2_LOCAL_ALLOC(alloc); 1153 1154 trace_ocfs2_local_alloc_new_window( 1155 le32_to_cpu(alloc->id1.bitmap1.i_total), 1156 osb->local_alloc_bits); 1157 1158 /* Instruct the allocation code to try the most recently used 1159 * cluster group. We'll re-record the group used this pass 1160 * below. */ 1161 ac->ac_last_group = osb->la_last_gd; 1162 1163 /* we used the generic suballoc reserve function, but we set 1164 * everything up nicely, so there's no reason why we can't use 1165 * the more specific cluster api to claim bits. */ 1166 status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits, 1167 &cluster_off, &cluster_count); 1168 if (status == -ENOSPC) { 1169 retry_enospc: 1170 /* 1171 * Note: We could also try syncing the journal here to 1172 * allow use of any free bits which the current 1173 * transaction can't give us access to. --Mark 1174 */ 1175 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == 1176 OCFS2_LA_DISABLED) 1177 goto bail; 1178 1179 ac->ac_bits_wanted = osb->local_alloc_bits; 1180 status = ocfs2_claim_clusters(handle, ac, 1181 osb->local_alloc_bits, 1182 &cluster_off, 1183 &cluster_count); 1184 if (status == -ENOSPC) 1185 goto retry_enospc; 1186 /* 1187 * We only shrunk the *minimum* number of in our 1188 * request - it's entirely possible that the allocator 1189 * might give us more than we asked for. 1190 */ 1191 if (status == 0) { 1192 spin_lock(&osb->osb_lock); 1193 osb->local_alloc_bits = cluster_count; 1194 spin_unlock(&osb->osb_lock); 1195 } 1196 } 1197 if (status < 0) { 1198 if (status != -ENOSPC) 1199 mlog_errno(status); 1200 goto bail; 1201 } 1202 1203 osb->la_last_gd = ac->ac_last_group; 1204 1205 la->la_bm_off = cpu_to_le32(cluster_off); 1206 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 1207 /* just in case... In the future when we find space ourselves, 1208 * we don't have to get all contiguous -- but we'll have to 1209 * set all previously used bits in bitmap and update 1210 * la_bits_set before setting the bits in the main bitmap. */ 1211 alloc->id1.bitmap1.i_used = 0; 1212 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1213 le16_to_cpu(la->la_size)); 1214 1215 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count, 1216 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); 1217 1218 trace_ocfs2_local_alloc_new_window_result( 1219 le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off), 1220 le32_to_cpu(alloc->id1.bitmap1.i_total)); 1221 1222 bail: 1223 if (status) 1224 mlog_errno(status); 1225 return status; 1226 } 1227 1228 /* Note that we do *NOT* lock the local alloc inode here as 1229 * it's been locked already for us. */ 1230 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 1231 struct inode *local_alloc_inode) 1232 { 1233 int status = 0; 1234 struct buffer_head *main_bm_bh = NULL; 1235 struct inode *main_bm_inode = NULL; 1236 handle_t *handle = NULL; 1237 struct ocfs2_dinode *alloc; 1238 struct ocfs2_dinode *alloc_copy = NULL; 1239 struct ocfs2_alloc_context *ac = NULL; 1240 1241 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 1242 1243 /* This will lock the main bitmap for us. */ 1244 status = ocfs2_local_alloc_reserve_for_window(osb, 1245 &ac, 1246 &main_bm_inode, 1247 &main_bm_bh); 1248 if (status < 0) { 1249 if (status != -ENOSPC) 1250 mlog_errno(status); 1251 goto bail; 1252 } 1253 1254 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 1255 if (IS_ERR(handle)) { 1256 status = PTR_ERR(handle); 1257 handle = NULL; 1258 mlog_errno(status); 1259 goto bail; 1260 } 1261 1262 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1263 1264 /* We want to clear the local alloc before doing anything 1265 * else, so that if we error later during this operation, 1266 * local alloc shutdown won't try to double free main bitmap 1267 * bits. Make a copy so the sync function knows which bits to 1268 * free. */ 1269 alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS); 1270 if (!alloc_copy) { 1271 status = -ENOMEM; 1272 mlog_errno(status); 1273 goto bail; 1274 } 1275 1276 status = ocfs2_journal_access_di(handle, 1277 INODE_CACHE(local_alloc_inode), 1278 osb->local_alloc_bh, 1279 OCFS2_JOURNAL_ACCESS_WRITE); 1280 if (status < 0) { 1281 mlog_errno(status); 1282 goto bail; 1283 } 1284 1285 ocfs2_clear_local_alloc(alloc); 1286 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 1287 1288 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1289 main_bm_inode, main_bm_bh); 1290 if (status < 0) { 1291 mlog_errno(status); 1292 goto bail; 1293 } 1294 1295 status = ocfs2_local_alloc_new_window(osb, handle, ac); 1296 if (status < 0) { 1297 if (status != -ENOSPC) 1298 mlog_errno(status); 1299 goto bail; 1300 } 1301 1302 atomic_inc(&osb->alloc_stats.moves); 1303 1304 bail: 1305 if (handle) 1306 ocfs2_commit_trans(osb, handle); 1307 1308 brelse(main_bm_bh); 1309 1310 iput(main_bm_inode); 1311 kfree(alloc_copy); 1312 1313 if (ac) 1314 ocfs2_free_alloc_context(ac); 1315 1316 if (status) 1317 mlog_errno(status); 1318 return status; 1319 } 1320 1321