1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * localalloc.c 4 * 5 * Node local data allocation 6 * 7 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/types.h> 12 #include <linux/slab.h> 13 #include <linux/highmem.h> 14 #include <linux/bitops.h> 15 16 #include <cluster/masklog.h> 17 18 #include "ocfs2.h" 19 20 #include "alloc.h" 21 #include "blockcheck.h" 22 #include "dlmglue.h" 23 #include "inode.h" 24 #include "journal.h" 25 #include "localalloc.h" 26 #include "suballoc.h" 27 #include "super.h" 28 #include "sysfile.h" 29 #include "ocfs2_trace.h" 30 31 #include "buffer_head_io.h" 32 33 #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 34 35 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 36 37 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 38 struct ocfs2_dinode *alloc, 39 u32 *numbits, 40 struct ocfs2_alloc_reservation *resv); 41 42 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 43 44 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 45 handle_t *handle, 46 struct ocfs2_dinode *alloc, 47 struct inode *main_bm_inode, 48 struct buffer_head *main_bm_bh); 49 50 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 51 struct ocfs2_alloc_context **ac, 52 struct inode **bitmap_inode, 53 struct buffer_head **bitmap_bh); 54 55 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 56 handle_t *handle, 57 struct ocfs2_alloc_context *ac); 58 59 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 60 struct inode *local_alloc_inode); 61 62 /* 63 * ocfs2_la_default_mb() - determine a default size, in megabytes of 64 * the local alloc. 65 * 66 * Generally, we'd like to pick as large a local alloc as 67 * possible. Performance on large workloads tends to scale 68 * proportionally to la size. In addition to that, the reservations 69 * code functions more efficiently as it can reserve more windows for 70 * write. 71 * 72 * Some things work against us when trying to choose a large local alloc: 73 * 74 * - We need to ensure our sizing is picked to leave enough space in 75 * group descriptors for other allocations (such as block groups, 76 * etc). Picking default sizes which are a multiple of 4 could help 77 * - block groups are allocated in 2mb and 4mb chunks. 78 * 79 * - Likewise, we don't want to starve other nodes of bits on small 80 * file systems. This can easily be taken care of by limiting our 81 * default to a reasonable size (256M) on larger cluster sizes. 82 * 83 * - Some file systems can't support very large sizes - 4k and 8k in 84 * particular are limited to less than 128 and 256 megabytes respectively. 85 * 86 * The following reference table shows group descriptor and local 87 * alloc maximums at various cluster sizes (4k blocksize) 88 * 89 * csize: 4K group: 126M la: 121M 90 * csize: 8K group: 252M la: 243M 91 * csize: 16K group: 504M la: 486M 92 * csize: 32K group: 1008M la: 972M 93 * csize: 64K group: 2016M la: 1944M 94 * csize: 128K group: 4032M la: 3888M 95 * csize: 256K group: 8064M la: 7776M 96 * csize: 512K group: 16128M la: 15552M 97 * csize: 1024K group: 32256M la: 31104M 98 */ 99 #define OCFS2_LA_MAX_DEFAULT_MB 256 100 #define OCFS2_LA_OLD_DEFAULT 8 101 unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) 102 { 103 unsigned int la_mb; 104 unsigned int gd_mb; 105 unsigned int la_max_mb; 106 unsigned int megs_per_slot; 107 struct super_block *sb = osb->sb; 108 109 gd_mb = ocfs2_clusters_to_megabytes(osb->sb, 110 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat)); 111 112 /* 113 * This takes care of files systems with very small group 114 * descriptors - 512 byte blocksize at cluster sizes lower 115 * than 16K and also 1k blocksize with 4k cluster size. 116 */ 117 if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192) 118 || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096)) 119 return OCFS2_LA_OLD_DEFAULT; 120 121 /* 122 * Leave enough room for some block groups and make the final 123 * value we work from a multiple of 4. 124 */ 125 gd_mb -= 16; 126 gd_mb &= 0xFFFFFFFB; 127 128 la_mb = gd_mb; 129 130 /* 131 * Keep window sizes down to a reasonable default 132 */ 133 if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) { 134 /* 135 * Some clustersize / blocksize combinations will have 136 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB 137 * default size, but get poor distribution when 138 * limited to exactly 256 megabytes. 139 * 140 * As an example, 16K clustersize at 4K blocksize 141 * gives us a cluster group size of 504M. Paring the 142 * local alloc size down to 256 however, would give us 143 * only one window and around 200MB left in the 144 * cluster group. Instead, find the first size below 145 * 256 which would give us an even distribution. 146 * 147 * Larger cluster group sizes actually work out pretty 148 * well when pared to 256, so we don't have to do this 149 * for any group that fits more than two 150 * OCFS2_LA_MAX_DEFAULT_MB windows. 151 */ 152 if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB)) 153 la_mb = 256; 154 else { 155 unsigned int gd_mult = gd_mb; 156 157 while (gd_mult > 256) 158 gd_mult = gd_mult >> 1; 159 160 la_mb = gd_mult; 161 } 162 } 163 164 megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots; 165 megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot); 166 /* Too many nodes, too few disk clusters. */ 167 if (megs_per_slot < la_mb) 168 la_mb = megs_per_slot; 169 170 /* We can't store more bits than we can in a block. */ 171 la_max_mb = ocfs2_clusters_to_megabytes(osb->sb, 172 ocfs2_local_alloc_size(sb) * 8); 173 if (la_mb > la_max_mb) 174 la_mb = la_max_mb; 175 176 return la_mb; 177 } 178 179 void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) 180 { 181 struct super_block *sb = osb->sb; 182 unsigned int la_default_mb = ocfs2_la_default_mb(osb); 183 unsigned int la_max_mb; 184 185 la_max_mb = ocfs2_clusters_to_megabytes(sb, 186 ocfs2_local_alloc_size(sb) * 8); 187 188 trace_ocfs2_la_set_sizes(requested_mb, la_max_mb, la_default_mb); 189 190 if (requested_mb == -1) { 191 /* No user request - use defaults */ 192 osb->local_alloc_default_bits = 193 ocfs2_megabytes_to_clusters(sb, la_default_mb); 194 } else if (requested_mb > la_max_mb) { 195 /* Request is too big, we give the maximum available */ 196 osb->local_alloc_default_bits = 197 ocfs2_megabytes_to_clusters(sb, la_max_mb); 198 } else { 199 osb->local_alloc_default_bits = 200 ocfs2_megabytes_to_clusters(sb, requested_mb); 201 } 202 203 osb->local_alloc_bits = osb->local_alloc_default_bits; 204 } 205 206 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 207 { 208 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 209 osb->local_alloc_state == OCFS2_LA_ENABLED); 210 } 211 212 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, 213 unsigned int num_clusters) 214 { 215 if (num_clusters >= osb->local_alloc_default_bits) { 216 spin_lock(&osb->osb_lock); 217 if (osb->local_alloc_state == OCFS2_LA_DISABLED || 218 osb->local_alloc_state == OCFS2_LA_THROTTLED) { 219 cancel_delayed_work(&osb->la_enable_wq); 220 osb->local_alloc_state = OCFS2_LA_ENABLED; 221 } 222 spin_unlock(&osb->osb_lock); 223 } 224 } 225 226 void ocfs2_la_enable_worker(struct work_struct *work) 227 { 228 struct ocfs2_super *osb = 229 container_of(work, struct ocfs2_super, 230 la_enable_wq.work); 231 spin_lock(&osb->osb_lock); 232 osb->local_alloc_state = OCFS2_LA_ENABLED; 233 spin_unlock(&osb->osb_lock); 234 } 235 236 /* 237 * Tell us whether a given allocation should use the local alloc 238 * file. Otherwise, it has to go to the main bitmap. 239 * 240 * This function does semi-dirty reads of local alloc size and state! 241 * This is ok however, as the values are re-checked once under mutex. 242 */ 243 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 244 { 245 int ret = 0; 246 int la_bits; 247 248 spin_lock(&osb->osb_lock); 249 la_bits = osb->local_alloc_bits; 250 251 if (!ocfs2_la_state_enabled(osb)) 252 goto bail; 253 254 /* la_bits should be at least twice the size (in clusters) of 255 * a new block group. We want to be sure block group 256 * allocations go through the local alloc, so allow an 257 * allocation to take up to half the bitmap. */ 258 if (bits > (la_bits / 2)) 259 goto bail; 260 261 ret = 1; 262 bail: 263 trace_ocfs2_alloc_should_use_local( 264 (unsigned long long)bits, osb->local_alloc_state, la_bits, ret); 265 spin_unlock(&osb->osb_lock); 266 return ret; 267 } 268 269 int ocfs2_load_local_alloc(struct ocfs2_super *osb) 270 { 271 int status = 0; 272 struct ocfs2_dinode *alloc = NULL; 273 struct buffer_head *alloc_bh = NULL; 274 u32 num_used; 275 struct inode *inode = NULL; 276 struct ocfs2_local_alloc *la; 277 278 if (osb->local_alloc_bits == 0) 279 goto bail; 280 281 if (osb->local_alloc_bits >= osb->bitmap_cpg) { 282 mlog(ML_NOTICE, "Requested local alloc window %d is larger " 283 "than max possible %u. Using defaults.\n", 284 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 285 osb->local_alloc_bits = 286 ocfs2_megabytes_to_clusters(osb->sb, 287 ocfs2_la_default_mb(osb)); 288 } 289 290 /* read the alloc off disk */ 291 inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, 292 osb->slot_num); 293 if (!inode) { 294 status = -EINVAL; 295 mlog_errno(status); 296 goto bail; 297 } 298 299 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 300 OCFS2_BH_IGNORE_CACHE); 301 if (status < 0) { 302 mlog_errno(status); 303 goto bail; 304 } 305 306 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 307 la = OCFS2_LOCAL_ALLOC(alloc); 308 309 if (!(le32_to_cpu(alloc->i_flags) & 310 (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { 311 mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", 312 (unsigned long long)OCFS2_I(inode)->ip_blkno); 313 status = -EINVAL; 314 goto bail; 315 } 316 317 if ((la->la_size == 0) || 318 (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { 319 mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", 320 le16_to_cpu(la->la_size)); 321 status = -EINVAL; 322 goto bail; 323 } 324 325 /* do a little verification. */ 326 num_used = ocfs2_local_alloc_count_bits(alloc); 327 328 /* hopefully the local alloc has always been recovered before 329 * we load it. */ 330 if (num_used 331 || alloc->id1.bitmap1.i_used 332 || alloc->id1.bitmap1.i_total 333 || la->la_bm_off) { 334 mlog(ML_ERROR, "inconsistent detected, clean journal with" 335 " unrecovered local alloc, please run fsck.ocfs2!\n" 336 "found = %u, set = %u, taken = %u, off = %u\n", 337 num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), 338 le32_to_cpu(alloc->id1.bitmap1.i_total), 339 le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off)); 340 341 status = -EINVAL; 342 goto bail; 343 } 344 345 osb->local_alloc_bh = alloc_bh; 346 osb->local_alloc_state = OCFS2_LA_ENABLED; 347 348 bail: 349 if (status < 0) 350 brelse(alloc_bh); 351 iput(inode); 352 353 trace_ocfs2_load_local_alloc(osb->local_alloc_bits); 354 355 if (status) 356 mlog_errno(status); 357 return status; 358 } 359 360 /* 361 * return any unused bits to the bitmap and write out a clean 362 * local_alloc. 363 * 364 * local_alloc_bh is optional. If not passed, we will simply use the 365 * one off osb. If you do pass it however, be warned that it *will* be 366 * returned brelse'd and NULL'd out.*/ 367 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) 368 { 369 int status; 370 handle_t *handle; 371 struct inode *local_alloc_inode = NULL; 372 struct buffer_head *bh = NULL; 373 struct buffer_head *main_bm_bh = NULL; 374 struct inode *main_bm_inode = NULL; 375 struct ocfs2_dinode *alloc_copy = NULL; 376 struct ocfs2_dinode *alloc = NULL; 377 378 cancel_delayed_work(&osb->la_enable_wq); 379 if (osb->ocfs2_wq) 380 flush_workqueue(osb->ocfs2_wq); 381 382 if (osb->local_alloc_state == OCFS2_LA_UNUSED) 383 goto out; 384 385 local_alloc_inode = 386 ocfs2_get_system_file_inode(osb, 387 LOCAL_ALLOC_SYSTEM_INODE, 388 osb->slot_num); 389 if (!local_alloc_inode) { 390 status = -ENOENT; 391 mlog_errno(status); 392 goto out; 393 } 394 395 osb->local_alloc_state = OCFS2_LA_DISABLED; 396 397 ocfs2_resmap_uninit(&osb->osb_la_resmap); 398 399 main_bm_inode = ocfs2_get_system_file_inode(osb, 400 GLOBAL_BITMAP_SYSTEM_INODE, 401 OCFS2_INVALID_SLOT); 402 if (!main_bm_inode) { 403 status = -EINVAL; 404 mlog_errno(status); 405 goto out; 406 } 407 408 inode_lock(main_bm_inode); 409 410 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 411 if (status < 0) { 412 mlog_errno(status); 413 goto out_mutex; 414 } 415 416 /* WINDOW_MOVE_CREDITS is a bit heavy... */ 417 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 418 if (IS_ERR(handle)) { 419 mlog_errno(PTR_ERR(handle)); 420 handle = NULL; 421 goto out_unlock; 422 } 423 424 bh = osb->local_alloc_bh; 425 alloc = (struct ocfs2_dinode *) bh->b_data; 426 427 alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS); 428 if (!alloc_copy) { 429 status = -ENOMEM; 430 goto out_commit; 431 } 432 433 status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), 434 bh, OCFS2_JOURNAL_ACCESS_WRITE); 435 if (status < 0) { 436 mlog_errno(status); 437 goto out_commit; 438 } 439 440 ocfs2_clear_local_alloc(alloc); 441 ocfs2_journal_dirty(handle, bh); 442 443 brelse(bh); 444 osb->local_alloc_bh = NULL; 445 osb->local_alloc_state = OCFS2_LA_UNUSED; 446 447 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 448 main_bm_inode, main_bm_bh); 449 if (status < 0) 450 mlog_errno(status); 451 452 out_commit: 453 ocfs2_commit_trans(osb, handle); 454 455 out_unlock: 456 brelse(main_bm_bh); 457 458 ocfs2_inode_unlock(main_bm_inode, 1); 459 460 out_mutex: 461 inode_unlock(main_bm_inode); 462 iput(main_bm_inode); 463 464 out: 465 iput(local_alloc_inode); 466 467 kfree(alloc_copy); 468 } 469 470 /* 471 * We want to free the bitmap bits outside of any recovery context as 472 * we'll need a cluster lock to do so, but we must clear the local 473 * alloc before giving up the recovered nodes journal. To solve this, 474 * we kmalloc a copy of the local alloc before it's change for the 475 * caller to process with ocfs2_complete_local_alloc_recovery 476 */ 477 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 478 int slot_num, 479 struct ocfs2_dinode **alloc_copy) 480 { 481 int status = 0; 482 struct buffer_head *alloc_bh = NULL; 483 struct inode *inode = NULL; 484 struct ocfs2_dinode *alloc; 485 486 trace_ocfs2_begin_local_alloc_recovery(slot_num); 487 488 *alloc_copy = NULL; 489 490 inode = ocfs2_get_system_file_inode(osb, 491 LOCAL_ALLOC_SYSTEM_INODE, 492 slot_num); 493 if (!inode) { 494 status = -EINVAL; 495 mlog_errno(status); 496 goto bail; 497 } 498 499 inode_lock(inode); 500 501 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 502 OCFS2_BH_IGNORE_CACHE); 503 if (status < 0) { 504 mlog_errno(status); 505 goto bail; 506 } 507 508 *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); 509 if (!(*alloc_copy)) { 510 status = -ENOMEM; 511 goto bail; 512 } 513 memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); 514 515 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 516 ocfs2_clear_local_alloc(alloc); 517 518 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 519 status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode)); 520 if (status < 0) 521 mlog_errno(status); 522 523 bail: 524 if (status < 0) { 525 kfree(*alloc_copy); 526 *alloc_copy = NULL; 527 } 528 529 brelse(alloc_bh); 530 531 if (inode) { 532 inode_unlock(inode); 533 iput(inode); 534 } 535 536 if (status) 537 mlog_errno(status); 538 return status; 539 } 540 541 /* 542 * Step 2: By now, we've completed the journal recovery, we've stamped 543 * a clean local alloc on disk and dropped the node out of the 544 * recovery map. Dlm locks will no longer stall, so lets clear out the 545 * main bitmap. 546 */ 547 int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, 548 struct ocfs2_dinode *alloc) 549 { 550 int status; 551 handle_t *handle; 552 struct buffer_head *main_bm_bh = NULL; 553 struct inode *main_bm_inode; 554 555 main_bm_inode = ocfs2_get_system_file_inode(osb, 556 GLOBAL_BITMAP_SYSTEM_INODE, 557 OCFS2_INVALID_SLOT); 558 if (!main_bm_inode) { 559 status = -EINVAL; 560 mlog_errno(status); 561 goto out; 562 } 563 564 inode_lock(main_bm_inode); 565 566 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 567 if (status < 0) { 568 mlog_errno(status); 569 goto out_mutex; 570 } 571 572 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 573 if (IS_ERR(handle)) { 574 status = PTR_ERR(handle); 575 handle = NULL; 576 mlog_errno(status); 577 goto out_unlock; 578 } 579 580 /* we want the bitmap change to be recorded on disk asap */ 581 handle->h_sync = 1; 582 583 status = ocfs2_sync_local_to_main(osb, handle, alloc, 584 main_bm_inode, main_bm_bh); 585 if (status < 0) 586 mlog_errno(status); 587 588 ocfs2_commit_trans(osb, handle); 589 590 out_unlock: 591 ocfs2_inode_unlock(main_bm_inode, 1); 592 593 out_mutex: 594 inode_unlock(main_bm_inode); 595 596 brelse(main_bm_bh); 597 598 iput(main_bm_inode); 599 600 out: 601 if (!status) 602 ocfs2_init_steal_slots(osb); 603 if (status) 604 mlog_errno(status); 605 return status; 606 } 607 608 /* 609 * make sure we've got at least bits_wanted contiguous bits in the 610 * local alloc. You lose them when you drop i_rwsem. 611 * 612 * We will add ourselves to the transaction passed in, but may start 613 * our own in order to shift windows. 614 */ 615 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, 616 u32 bits_wanted, 617 struct ocfs2_alloc_context *ac) 618 { 619 int status; 620 struct ocfs2_dinode *alloc; 621 struct inode *local_alloc_inode; 622 unsigned int free_bits; 623 624 BUG_ON(!ac); 625 626 local_alloc_inode = 627 ocfs2_get_system_file_inode(osb, 628 LOCAL_ALLOC_SYSTEM_INODE, 629 osb->slot_num); 630 if (!local_alloc_inode) { 631 status = -ENOENT; 632 mlog_errno(status); 633 goto bail; 634 } 635 636 inode_lock(local_alloc_inode); 637 638 /* 639 * We must double check state and allocator bits because 640 * another process may have changed them while holding i_rwsem. 641 */ 642 spin_lock(&osb->osb_lock); 643 if (!ocfs2_la_state_enabled(osb) || 644 (bits_wanted > osb->local_alloc_bits)) { 645 spin_unlock(&osb->osb_lock); 646 status = -ENOSPC; 647 goto bail; 648 } 649 spin_unlock(&osb->osb_lock); 650 651 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 652 653 #ifdef CONFIG_OCFS2_DEBUG_FS 654 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 655 ocfs2_local_alloc_count_bits(alloc)) { 656 status = ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n", 657 (unsigned long long)le64_to_cpu(alloc->i_blkno), 658 le32_to_cpu(alloc->id1.bitmap1.i_used), 659 ocfs2_local_alloc_count_bits(alloc)); 660 goto bail; 661 } 662 #endif 663 664 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 665 le32_to_cpu(alloc->id1.bitmap1.i_used); 666 if (bits_wanted > free_bits) { 667 /* uhoh, window change time. */ 668 status = 669 ocfs2_local_alloc_slide_window(osb, local_alloc_inode); 670 if (status < 0) { 671 if (status != -ENOSPC) 672 mlog_errno(status); 673 goto bail; 674 } 675 676 /* 677 * Under certain conditions, the window slide code 678 * might have reduced the number of bits available or 679 * disabled the local alloc entirely. Re-check 680 * here and return -ENOSPC if necessary. 681 */ 682 status = -ENOSPC; 683 if (!ocfs2_la_state_enabled(osb)) 684 goto bail; 685 686 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 687 le32_to_cpu(alloc->id1.bitmap1.i_used); 688 if (bits_wanted > free_bits) 689 goto bail; 690 } 691 692 ac->ac_inode = local_alloc_inode; 693 /* We should never use localalloc from another slot */ 694 ac->ac_alloc_slot = osb->slot_num; 695 ac->ac_which = OCFS2_AC_USE_LOCAL; 696 get_bh(osb->local_alloc_bh); 697 ac->ac_bh = osb->local_alloc_bh; 698 status = 0; 699 bail: 700 if (status < 0 && local_alloc_inode) { 701 inode_unlock(local_alloc_inode); 702 iput(local_alloc_inode); 703 } 704 705 trace_ocfs2_reserve_local_alloc_bits( 706 (unsigned long long)ac->ac_max_block, 707 bits_wanted, osb->slot_num, status); 708 709 if (status) 710 mlog_errno(status); 711 return status; 712 } 713 714 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 715 handle_t *handle, 716 struct ocfs2_alloc_context *ac, 717 u32 bits_wanted, 718 u32 *bit_off, 719 u32 *num_bits) 720 { 721 int status, start; 722 struct inode *local_alloc_inode; 723 void *bitmap; 724 struct ocfs2_dinode *alloc; 725 struct ocfs2_local_alloc *la; 726 727 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 728 729 local_alloc_inode = ac->ac_inode; 730 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 731 la = OCFS2_LOCAL_ALLOC(alloc); 732 733 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, 734 ac->ac_resv); 735 if (start == -1) { 736 /* TODO: Shouldn't we just BUG here? */ 737 status = -ENOSPC; 738 mlog_errno(status); 739 goto bail; 740 } 741 742 bitmap = la->la_bitmap; 743 *bit_off = le32_to_cpu(la->la_bm_off) + start; 744 *num_bits = bits_wanted; 745 746 status = ocfs2_journal_access_di(handle, 747 INODE_CACHE(local_alloc_inode), 748 osb->local_alloc_bh, 749 OCFS2_JOURNAL_ACCESS_WRITE); 750 if (status < 0) { 751 mlog_errno(status); 752 goto bail; 753 } 754 755 ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start, 756 bits_wanted); 757 758 while(bits_wanted--) 759 ocfs2_set_bit(start++, bitmap); 760 761 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 762 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 763 764 bail: 765 if (status) 766 mlog_errno(status); 767 return status; 768 } 769 770 int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb, 771 handle_t *handle, 772 struct ocfs2_alloc_context *ac, 773 u32 bit_off, 774 u32 num_bits) 775 { 776 int status, start; 777 u32 clear_bits; 778 struct inode *local_alloc_inode; 779 void *bitmap; 780 struct ocfs2_dinode *alloc; 781 struct ocfs2_local_alloc *la; 782 783 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 784 785 local_alloc_inode = ac->ac_inode; 786 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 787 la = OCFS2_LOCAL_ALLOC(alloc); 788 789 bitmap = la->la_bitmap; 790 start = bit_off - le32_to_cpu(la->la_bm_off); 791 clear_bits = num_bits; 792 793 status = ocfs2_journal_access_di(handle, 794 INODE_CACHE(local_alloc_inode), 795 osb->local_alloc_bh, 796 OCFS2_JOURNAL_ACCESS_WRITE); 797 if (status < 0) { 798 mlog_errno(status); 799 goto bail; 800 } 801 802 while (clear_bits--) 803 ocfs2_clear_bit(start++, bitmap); 804 805 le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits); 806 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 807 808 bail: 809 return status; 810 } 811 812 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) 813 { 814 u32 count; 815 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 816 817 count = memweight(la->la_bitmap, le16_to_cpu(la->la_size)); 818 819 trace_ocfs2_local_alloc_count_bits(count); 820 return count; 821 } 822 823 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 824 struct ocfs2_dinode *alloc, 825 u32 *numbits, 826 struct ocfs2_alloc_reservation *resv) 827 { 828 int numfound = 0, bitoff, left, startoff; 829 int local_resv = 0; 830 struct ocfs2_alloc_reservation r; 831 void *bitmap = NULL; 832 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap; 833 834 if (!alloc->id1.bitmap1.i_total) { 835 bitoff = -1; 836 goto bail; 837 } 838 839 if (!resv) { 840 local_resv = 1; 841 ocfs2_resv_init_once(&r); 842 ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP); 843 resv = &r; 844 } 845 846 numfound = *numbits; 847 if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) { 848 if (numfound < *numbits) 849 *numbits = numfound; 850 goto bail; 851 } 852 853 /* 854 * Code error. While reservations are enabled, local 855 * allocation should _always_ go through them. 856 */ 857 BUG_ON(osb->osb_resv_level != 0); 858 859 /* 860 * Reservations are disabled. Handle this the old way. 861 */ 862 863 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 864 865 numfound = bitoff = startoff = 0; 866 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 867 while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) < 868 left) { 869 /* Ok, we found a zero bit... is it contig. or do we 870 * start over?*/ 871 if (bitoff == startoff) { 872 /* we found a zero */ 873 numfound++; 874 startoff++; 875 } else { 876 /* got a zero after some ones */ 877 numfound = 1; 878 startoff = bitoff+1; 879 } 880 /* we got everything we needed */ 881 if (numfound == *numbits) { 882 /* mlog(0, "Found it all!\n"); */ 883 break; 884 } 885 } 886 887 trace_ocfs2_local_alloc_find_clear_bits_search_bitmap(bitoff, numfound); 888 889 if (numfound == *numbits) 890 bitoff = startoff - numfound; 891 else 892 bitoff = -1; 893 894 bail: 895 if (local_resv) 896 ocfs2_resv_discard(resmap, resv); 897 898 trace_ocfs2_local_alloc_find_clear_bits(*numbits, 899 le32_to_cpu(alloc->id1.bitmap1.i_total), 900 bitoff, numfound); 901 902 return bitoff; 903 } 904 905 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) 906 { 907 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 908 int i; 909 910 alloc->id1.bitmap1.i_total = 0; 911 alloc->id1.bitmap1.i_used = 0; 912 la->la_bm_off = 0; 913 for(i = 0; i < le16_to_cpu(la->la_size); i++) 914 la->la_bitmap[i] = 0; 915 } 916 917 #if 0 918 /* turn this on and uncomment below to aid debugging window shifts. */ 919 static void ocfs2_verify_zero_bits(unsigned long *bitmap, 920 unsigned int start, 921 unsigned int count) 922 { 923 unsigned int tmp = count; 924 while(tmp--) { 925 if (ocfs2_test_bit(start + tmp, bitmap)) { 926 printk("ocfs2_verify_zero_bits: start = %u, count = " 927 "%u\n", start, count); 928 printk("ocfs2_verify_zero_bits: bit %u is set!", 929 start + tmp); 930 BUG(); 931 } 932 } 933 } 934 #endif 935 936 /* 937 * sync the local alloc to main bitmap. 938 * 939 * assumes you've already locked the main bitmap -- the bitmap inode 940 * passed is used for caching. 941 */ 942 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 943 handle_t *handle, 944 struct ocfs2_dinode *alloc, 945 struct inode *main_bm_inode, 946 struct buffer_head *main_bm_bh) 947 { 948 int status = 0; 949 int bit_off, left, count, start; 950 u64 la_start_blk; 951 u64 blkno; 952 void *bitmap; 953 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 954 955 trace_ocfs2_sync_local_to_main( 956 le32_to_cpu(alloc->id1.bitmap1.i_total), 957 le32_to_cpu(alloc->id1.bitmap1.i_used)); 958 959 if (!alloc->id1.bitmap1.i_total) { 960 goto bail; 961 } 962 963 if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 964 le32_to_cpu(alloc->id1.bitmap1.i_total)) { 965 goto bail; 966 } 967 968 la_start_blk = ocfs2_clusters_to_blocks(osb->sb, 969 le32_to_cpu(la->la_bm_off)); 970 bitmap = la->la_bitmap; 971 start = count = 0; 972 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 973 974 while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) < 975 left) { 976 if (bit_off == start) { 977 count++; 978 start++; 979 continue; 980 } 981 if (count) { 982 blkno = la_start_blk + 983 ocfs2_clusters_to_blocks(osb->sb, 984 start - count); 985 986 trace_ocfs2_sync_local_to_main_free( 987 count, start - count, 988 (unsigned long long)la_start_blk, 989 (unsigned long long)blkno); 990 991 status = ocfs2_release_clusters(handle, 992 main_bm_inode, 993 main_bm_bh, blkno, 994 count); 995 if (status < 0) { 996 mlog_errno(status); 997 goto bail; 998 } 999 } 1000 1001 count = 1; 1002 start = bit_off + 1; 1003 } 1004 1005 bail: 1006 if (status) 1007 mlog_errno(status); 1008 return status; 1009 } 1010 1011 enum ocfs2_la_event { 1012 OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ 1013 OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has 1014 * enough bits theoretically 1015 * free, but a contiguous 1016 * allocation could not be 1017 * found. */ 1018 OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have 1019 * enough bits free to satisfy 1020 * our request. */ 1021 }; 1022 #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) 1023 /* 1024 * Given an event, calculate the size of our next local alloc window. 1025 * 1026 * This should always be called under i_rwsem of the local alloc inode 1027 * so that local alloc disabling doesn't race with processes trying to 1028 * use the allocator. 1029 * 1030 * Returns the state which the local alloc was left in. This value can 1031 * be ignored by some paths. 1032 */ 1033 static int ocfs2_recalc_la_window(struct ocfs2_super *osb, 1034 enum ocfs2_la_event event) 1035 { 1036 unsigned int bits; 1037 int state; 1038 1039 spin_lock(&osb->osb_lock); 1040 if (osb->local_alloc_state == OCFS2_LA_DISABLED) { 1041 WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); 1042 goto out_unlock; 1043 } 1044 1045 /* 1046 * ENOSPC and fragmentation are treated similarly for now. 1047 */ 1048 if (event == OCFS2_LA_EVENT_ENOSPC || 1049 event == OCFS2_LA_EVENT_FRAGMENTED) { 1050 /* 1051 * We ran out of contiguous space in the primary 1052 * bitmap. Drastically reduce the number of bits used 1053 * by local alloc until we have to disable it. 1054 */ 1055 bits = osb->local_alloc_bits >> 1; 1056 if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { 1057 /* 1058 * By setting state to THROTTLED, we'll keep 1059 * the number of local alloc bits used down 1060 * until an event occurs which would give us 1061 * reason to assume the bitmap situation might 1062 * have changed. 1063 */ 1064 osb->local_alloc_state = OCFS2_LA_THROTTLED; 1065 osb->local_alloc_bits = bits; 1066 } else { 1067 osb->local_alloc_state = OCFS2_LA_DISABLED; 1068 } 1069 queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq, 1070 OCFS2_LA_ENABLE_INTERVAL); 1071 goto out_unlock; 1072 } 1073 1074 /* 1075 * Don't increase the size of the local alloc window until we 1076 * know we might be able to fulfill the request. Otherwise, we 1077 * risk bouncing around the global bitmap during periods of 1078 * low space. 1079 */ 1080 if (osb->local_alloc_state != OCFS2_LA_THROTTLED) 1081 osb->local_alloc_bits = osb->local_alloc_default_bits; 1082 1083 out_unlock: 1084 state = osb->local_alloc_state; 1085 spin_unlock(&osb->osb_lock); 1086 1087 return state; 1088 } 1089 1090 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 1091 struct ocfs2_alloc_context **ac, 1092 struct inode **bitmap_inode, 1093 struct buffer_head **bitmap_bh) 1094 { 1095 int status; 1096 1097 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 1098 if (!(*ac)) { 1099 status = -ENOMEM; 1100 mlog_errno(status); 1101 goto bail; 1102 } 1103 1104 retry_enospc: 1105 (*ac)->ac_bits_wanted = osb->local_alloc_bits; 1106 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 1107 if (status == -ENOSPC) { 1108 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 1109 OCFS2_LA_DISABLED) 1110 goto bail; 1111 1112 ocfs2_free_ac_resource(*ac); 1113 memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); 1114 goto retry_enospc; 1115 } 1116 if (status < 0) { 1117 mlog_errno(status); 1118 goto bail; 1119 } 1120 1121 *bitmap_inode = (*ac)->ac_inode; 1122 igrab(*bitmap_inode); 1123 *bitmap_bh = (*ac)->ac_bh; 1124 get_bh(*bitmap_bh); 1125 status = 0; 1126 bail: 1127 if ((status < 0) && *ac) { 1128 ocfs2_free_alloc_context(*ac); 1129 *ac = NULL; 1130 } 1131 1132 if (status) 1133 mlog_errno(status); 1134 return status; 1135 } 1136 1137 /* 1138 * pass it the bitmap lock in lock_bh if you have it. 1139 */ 1140 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 1141 handle_t *handle, 1142 struct ocfs2_alloc_context *ac) 1143 { 1144 int status = 0; 1145 u32 cluster_off, cluster_count; 1146 struct ocfs2_dinode *alloc = NULL; 1147 struct ocfs2_local_alloc *la; 1148 1149 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1150 la = OCFS2_LOCAL_ALLOC(alloc); 1151 1152 trace_ocfs2_local_alloc_new_window( 1153 le32_to_cpu(alloc->id1.bitmap1.i_total), 1154 osb->local_alloc_bits); 1155 1156 /* Instruct the allocation code to try the most recently used 1157 * cluster group. We'll re-record the group used this pass 1158 * below. */ 1159 ac->ac_last_group = osb->la_last_gd; 1160 1161 /* we used the generic suballoc reserve function, but we set 1162 * everything up nicely, so there's no reason why we can't use 1163 * the more specific cluster api to claim bits. */ 1164 status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits, 1165 &cluster_off, &cluster_count); 1166 if (status == -ENOSPC) { 1167 retry_enospc: 1168 /* 1169 * Note: We could also try syncing the journal here to 1170 * allow use of any free bits which the current 1171 * transaction can't give us access to. --Mark 1172 */ 1173 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == 1174 OCFS2_LA_DISABLED) 1175 goto bail; 1176 1177 ac->ac_bits_wanted = osb->local_alloc_bits; 1178 status = ocfs2_claim_clusters(handle, ac, 1179 osb->local_alloc_bits, 1180 &cluster_off, 1181 &cluster_count); 1182 if (status == -ENOSPC) 1183 goto retry_enospc; 1184 /* 1185 * We only shrunk the *minimum* number of in our 1186 * request - it's entirely possible that the allocator 1187 * might give us more than we asked for. 1188 */ 1189 if (status == 0) { 1190 spin_lock(&osb->osb_lock); 1191 osb->local_alloc_bits = cluster_count; 1192 spin_unlock(&osb->osb_lock); 1193 } 1194 } 1195 if (status < 0) { 1196 if (status != -ENOSPC) 1197 mlog_errno(status); 1198 goto bail; 1199 } 1200 1201 osb->la_last_gd = ac->ac_last_group; 1202 1203 la->la_bm_off = cpu_to_le32(cluster_off); 1204 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 1205 /* just in case... In the future when we find space ourselves, 1206 * we don't have to get all contiguous -- but we'll have to 1207 * set all previously used bits in bitmap and update 1208 * la_bits_set before setting the bits in the main bitmap. */ 1209 alloc->id1.bitmap1.i_used = 0; 1210 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1211 le16_to_cpu(la->la_size)); 1212 1213 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count, 1214 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); 1215 1216 trace_ocfs2_local_alloc_new_window_result( 1217 le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off), 1218 le32_to_cpu(alloc->id1.bitmap1.i_total)); 1219 1220 bail: 1221 if (status) 1222 mlog_errno(status); 1223 return status; 1224 } 1225 1226 /* Note that we do *NOT* lock the local alloc inode here as 1227 * it's been locked already for us. */ 1228 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 1229 struct inode *local_alloc_inode) 1230 { 1231 int status = 0; 1232 struct buffer_head *main_bm_bh = NULL; 1233 struct inode *main_bm_inode = NULL; 1234 handle_t *handle = NULL; 1235 struct ocfs2_dinode *alloc; 1236 struct ocfs2_dinode *alloc_copy = NULL; 1237 struct ocfs2_alloc_context *ac = NULL; 1238 1239 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 1240 1241 /* This will lock the main bitmap for us. */ 1242 status = ocfs2_local_alloc_reserve_for_window(osb, 1243 &ac, 1244 &main_bm_inode, 1245 &main_bm_bh); 1246 if (status < 0) { 1247 if (status != -ENOSPC) 1248 mlog_errno(status); 1249 goto bail; 1250 } 1251 1252 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 1253 if (IS_ERR(handle)) { 1254 status = PTR_ERR(handle); 1255 handle = NULL; 1256 mlog_errno(status); 1257 goto bail; 1258 } 1259 1260 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1261 1262 /* We want to clear the local alloc before doing anything 1263 * else, so that if we error later during this operation, 1264 * local alloc shutdown won't try to double free main bitmap 1265 * bits. Make a copy so the sync function knows which bits to 1266 * free. */ 1267 alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS); 1268 if (!alloc_copy) { 1269 status = -ENOMEM; 1270 mlog_errno(status); 1271 goto bail; 1272 } 1273 1274 status = ocfs2_journal_access_di(handle, 1275 INODE_CACHE(local_alloc_inode), 1276 osb->local_alloc_bh, 1277 OCFS2_JOURNAL_ACCESS_WRITE); 1278 if (status < 0) { 1279 mlog_errno(status); 1280 goto bail; 1281 } 1282 1283 ocfs2_clear_local_alloc(alloc); 1284 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 1285 1286 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1287 main_bm_inode, main_bm_bh); 1288 if (status < 0) { 1289 mlog_errno(status); 1290 goto bail; 1291 } 1292 1293 status = ocfs2_local_alloc_new_window(osb, handle, ac); 1294 if (status < 0) { 1295 if (status != -ENOSPC) 1296 mlog_errno(status); 1297 goto bail; 1298 } 1299 1300 atomic_inc(&osb->alloc_stats.moves); 1301 1302 bail: 1303 if (handle) 1304 ocfs2_commit_trans(osb, handle); 1305 1306 brelse(main_bm_bh); 1307 1308 iput(main_bm_inode); 1309 kfree(alloc_copy); 1310 1311 if (ac) 1312 ocfs2_free_alloc_context(ac); 1313 1314 if (status) 1315 mlog_errno(status); 1316 return status; 1317 } 1318 1319