1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 #include <linux/blkdev.h> 4 #include <linux/module.h> 5 #include <linux/errno.h> 6 #include <linux/slab.h> 7 #include <linux/init.h> 8 #include <linux/timer.h> 9 #include <linux/sched.h> 10 #include <linux/list.h> 11 #include <linux/file.h> 12 #include <linux/seq_file.h> 13 #include <trace/events/block.h> 14 15 #include "md.h" 16 #include "md-bitmap.h" 17 18 /* 19 * #### Background 20 * 21 * Redundant data is used to enhance data fault tolerance, and the storage 22 * methods for redundant data vary depending on the RAID levels. And it's 23 * important to maintain the consistency of redundant data. 24 * 25 * Bitmap is used to record which data blocks have been synchronized and which 26 * ones need to be resynchronized or recovered. Each bit in the bitmap 27 * represents a segment of data in the array. When a bit is set, it indicates 28 * that the multiple redundant copies of that data segment may not be 29 * consistent. Data synchronization can be performed based on the bitmap after 30 * power failure or readding a disk. If there is no bitmap, a full disk 31 * synchronization is required. 32 * 33 * #### Key Features 34 * 35 * - IO fastpath is lockless, if user issues lots of write IO to the same 36 * bitmap bit in a short time, only the first write has additional overhead 37 * to update bitmap bit, no additional overhead for the following writes; 38 * - support only resync or recover written data, means in the case creating 39 * new array or replacing with a new disk, there is no need to do a full disk 40 * resync/recovery; 41 * 42 * #### Key Concept 43 * 44 * ##### State Machine 45 * 46 * Each bit is one byte, contain 6 different states, see llbitmap_state. And 47 * there are total 8 different actions, see llbitmap_action, can change state: 48 * 49 * llbitmap state machine: transitions between states 50 * 51 * | | Startwrite | Startsync | Endsync | Abortsync| 52 * | --------- | ---------- | --------- | ------- | ------- | 53 * | Unwritten | Dirty | x | x | x | 54 * | Clean | Dirty | x | x | x | 55 * | Dirty | x | x | x | x | 56 * | NeedSync | x | Syncing | x | x | 57 * | Syncing | x | Syncing | Dirty | NeedSync | 58 * 59 * | | Reload | Daemon | Discard | Stale | 60 * | --------- | -------- | ------ | --------- | --------- | 61 * | Unwritten | x | x | x | x | 62 * | Clean | x | x | Unwritten | NeedSync | 63 * | Dirty | NeedSync | Clean | Unwritten | NeedSync | 64 * | NeedSync | x | x | Unwritten | x | 65 * | Syncing | NeedSync | x | Unwritten | NeedSync | 66 * 67 * Typical scenarios: 68 * 69 * 1) Create new array 70 * All bits will be set to Unwritten by default, if --assume-clean is set, 71 * all bits will be set to Clean instead. 72 * 73 * 2) write data, raid1/raid10 have full copy of data, while raid456 doesn't and 74 * rely on xor data 75 * 76 * 2.1) write new data to raid1/raid10: 77 * Unwritten --StartWrite--> Dirty 78 * 79 * 2.2) write new data to raid456: 80 * Unwritten --StartWrite--> NeedSync 81 * 82 * Because the initial recover for raid456 is skipped, the xor data is not built 83 * yet, the bit must be set to NeedSync first and after lazy initial recover is 84 * finished, the bit will finally set to Dirty(see 5.1 and 5.4); 85 * 86 * 2.3) cover write 87 * Clean --StartWrite--> Dirty 88 * 89 * 3) daemon, if the array is not degraded: 90 * Dirty --Daemon--> Clean 91 * 92 * 4) discard 93 * {Clean, Dirty, NeedSync, Syncing} --Discard--> Unwritten 94 * 95 * 5) resync and recover 96 * 97 * 5.1) common process 98 * NeedSync --Startsync--> Syncing --Endsync--> Dirty --Daemon--> Clean 99 * 100 * 5.2) resync after power failure 101 * Dirty --Reload--> NeedSync 102 * 103 * 5.3) recover while replacing with a new disk 104 * By default, the old bitmap framework will recover all data, and llbitmap 105 * implements this by a new helper, see llbitmap_skip_sync_blocks: 106 * 107 * skip recover for bits other than dirty or clean; 108 * 109 * 5.4) lazy initial recover for raid5: 110 * By default, the old bitmap framework will only allow new recover when there 111 * are spares(new disk), a new recovery flag MD_RECOVERY_LAZY_RECOVER is added 112 * to perform raid456 lazy recover for set bits(from 2.2). 113 * 114 * 6. special handling for degraded array: 115 * 116 * - Dirty bits will never be cleared, daemon will just do nothing, so that if 117 * a disk is readded, Clean bits can be skipped with recovery; 118 * - Dirty bits will convert to Syncing from start write, to do data recovery 119 * for new added disks; 120 * - New write will convert bits to NeedSync directly; 121 * 122 * ##### Bitmap IO 123 * 124 * ##### Chunksize 125 * 126 * The default bitmap size is 128k, incluing 1k bitmap super block, and 127 * the default size of segment of data in the array each bit(chunksize) is 64k, 128 * and chunksize will adjust to twice the old size each time if the total number 129 * bits is not less than 127k.(see llbitmap_init) 130 * 131 * ##### READ 132 * 133 * While creating bitmap, all pages will be allocated and read for llbitmap, 134 * there won't be read afterwards 135 * 136 * ##### WRITE 137 * 138 * WRITE IO is divided into logical_block_size of the array, the dirty state 139 * of each block is tracked independently, for example: 140 * 141 * each page is 4k, contain 8 blocks; each block is 512 bytes contain 512 bit; 142 * 143 * | page0 | page1 | ... | page 31 | 144 * | | 145 * | \-----------------------\ 146 * | | 147 * | block0 | block1 | ... | block 8| 148 * | | 149 * | \-----------------\ 150 * | | 151 * | bit0 | bit1 | ... | bit511 | 152 * 153 * From IO path, if one bit is changed to Dirty or NeedSync, the corresponding 154 * subpage will be marked dirty, such block must write first before the IO is 155 * issued. This behaviour will affect IO performance, to reduce the impact, if 156 * multiple bits are changed in the same block in a short time, all bits in this 157 * block will be changed to Dirty/NeedSync, so that there won't be any overhead 158 * until daemon clears dirty bits. 159 * 160 * ##### Dirty Bits synchronization 161 * 162 * IO fast path will set bits to dirty, and those dirty bits will be cleared 163 * by daemon after IO is done. llbitmap_page_ctl is used to synchronize between 164 * IO path and daemon; 165 * 166 * IO path: 167 * 1) try to grab a reference, if succeed, set expire time after 5s and return; 168 * 2) if failed to grab a reference, wait for daemon to finish clearing dirty 169 * bits; 170 * 171 * Daemon (Daemon will be woken up every daemon_sleep seconds): 172 * For each page: 173 * 1) check if page expired, if not skip this page; for expired page: 174 * 2) suspend the page and wait for inflight write IO to be done; 175 * 3) change dirty page to clean; 176 * 4) resume the page; 177 */ 178 179 #define BITMAP_DATA_OFFSET 1024 180 181 /* 64k is the max IO size of sync IO for raid1/raid10 */ 182 #define MIN_CHUNK_SIZE (64 * 2) 183 184 /* By default, daemon will be woken up every 30s */ 185 #define DEFAULT_DAEMON_SLEEP 30 186 187 /* 188 * Dirtied bits that have not been accessed for more than 5s will be cleared 189 * by daemon. 190 */ 191 #define DEFAULT_BARRIER_IDLE 5 192 193 enum llbitmap_state { 194 /* No valid data, init state after assemble the array */ 195 BitUnwritten = 0, 196 /* data is consistent */ 197 BitClean, 198 /* data will be consistent after IO is done, set directly for writes */ 199 BitDirty, 200 /* 201 * data need to be resynchronized: 202 * 1) set directly for writes if array is degraded, prevent full disk 203 * synchronization after readding a disk; 204 * 2) reassemble the array after power failure, and dirty bits are 205 * found after reloading the bitmap; 206 * 3) set for first write for raid5, to build initial xor data lazily 207 */ 208 BitNeedSync, 209 /* data is synchronizing */ 210 BitSyncing, 211 BitStateCount, 212 BitNone = 0xff, 213 }; 214 215 enum llbitmap_action { 216 /* User write new data, this is the only action from IO fast path */ 217 BitmapActionStartwrite = 0, 218 /* Start recovery */ 219 BitmapActionStartsync, 220 /* Finish recovery */ 221 BitmapActionEndsync, 222 /* Failed recovery */ 223 BitmapActionAbortsync, 224 /* Reassemble the array */ 225 BitmapActionReload, 226 /* Daemon thread is trying to clear dirty bits */ 227 BitmapActionDaemon, 228 /* Data is deleted */ 229 BitmapActionDiscard, 230 /* 231 * Bitmap is stale, mark all bits in addition to BitUnwritten to 232 * BitNeedSync. 233 */ 234 BitmapActionStale, 235 BitmapActionCount, 236 /* Init state is BitUnwritten */ 237 BitmapActionInit, 238 }; 239 240 enum llbitmap_page_state { 241 LLPageFlush = 0, 242 LLPageDirty, 243 }; 244 245 struct llbitmap_page_ctl { 246 char *state; 247 struct page *page; 248 unsigned long expire; 249 unsigned long flags; 250 wait_queue_head_t wait; 251 struct percpu_ref active; 252 /* Per block size dirty state, maximum 64k page / 1 sector = 128 */ 253 unsigned long dirty[]; 254 }; 255 256 struct llbitmap { 257 struct mddev *mddev; 258 struct llbitmap_page_ctl **pctl; 259 260 unsigned int nr_pages; 261 unsigned int io_size; 262 unsigned int blocks_per_page; 263 264 /* shift of one chunk */ 265 unsigned long chunkshift; 266 /* size of one chunk in sector */ 267 unsigned long chunksize; 268 /* total number of chunks */ 269 unsigned long chunks; 270 unsigned long last_end_sync; 271 /* 272 * time in seconds that dirty bits will be cleared if the page is not 273 * accessed. 274 */ 275 unsigned long barrier_idle; 276 /* fires on first BitDirty state */ 277 struct timer_list pending_timer; 278 struct work_struct daemon_work; 279 280 unsigned long flags; 281 __u64 events_cleared; 282 283 /* for slow disks */ 284 atomic_t behind_writes; 285 wait_queue_head_t behind_wait; 286 }; 287 288 struct llbitmap_unplug_work { 289 struct work_struct work; 290 struct llbitmap *llbitmap; 291 struct completion *done; 292 }; 293 294 static struct workqueue_struct *md_llbitmap_io_wq; 295 static struct workqueue_struct *md_llbitmap_unplug_wq; 296 297 static char state_machine[BitStateCount][BitmapActionCount] = { 298 [BitUnwritten] = { 299 [BitmapActionStartwrite] = BitDirty, 300 [BitmapActionStartsync] = BitNone, 301 [BitmapActionEndsync] = BitNone, 302 [BitmapActionAbortsync] = BitNone, 303 [BitmapActionReload] = BitNone, 304 [BitmapActionDaemon] = BitNone, 305 [BitmapActionDiscard] = BitNone, 306 [BitmapActionStale] = BitNone, 307 }, 308 [BitClean] = { 309 [BitmapActionStartwrite] = BitDirty, 310 [BitmapActionStartsync] = BitNone, 311 [BitmapActionEndsync] = BitNone, 312 [BitmapActionAbortsync] = BitNone, 313 [BitmapActionReload] = BitNone, 314 [BitmapActionDaemon] = BitNone, 315 [BitmapActionDiscard] = BitUnwritten, 316 [BitmapActionStale] = BitNeedSync, 317 }, 318 [BitDirty] = { 319 [BitmapActionStartwrite] = BitNone, 320 [BitmapActionStartsync] = BitNone, 321 [BitmapActionEndsync] = BitNone, 322 [BitmapActionAbortsync] = BitNone, 323 [BitmapActionReload] = BitNeedSync, 324 [BitmapActionDaemon] = BitClean, 325 [BitmapActionDiscard] = BitUnwritten, 326 [BitmapActionStale] = BitNeedSync, 327 }, 328 [BitNeedSync] = { 329 [BitmapActionStartwrite] = BitNone, 330 [BitmapActionStartsync] = BitSyncing, 331 [BitmapActionEndsync] = BitNone, 332 [BitmapActionAbortsync] = BitNone, 333 [BitmapActionReload] = BitNone, 334 [BitmapActionDaemon] = BitNone, 335 [BitmapActionDiscard] = BitUnwritten, 336 [BitmapActionStale] = BitNone, 337 }, 338 [BitSyncing] = { 339 [BitmapActionStartwrite] = BitNone, 340 [BitmapActionStartsync] = BitSyncing, 341 [BitmapActionEndsync] = BitDirty, 342 [BitmapActionAbortsync] = BitNeedSync, 343 [BitmapActionReload] = BitNeedSync, 344 [BitmapActionDaemon] = BitNone, 345 [BitmapActionDiscard] = BitUnwritten, 346 [BitmapActionStale] = BitNeedSync, 347 }, 348 }; 349 350 static void __llbitmap_flush(struct mddev *mddev); 351 352 static enum llbitmap_state llbitmap_read(struct llbitmap *llbitmap, loff_t pos) 353 { 354 unsigned int idx; 355 unsigned int offset; 356 357 pos += BITMAP_DATA_OFFSET; 358 idx = pos >> PAGE_SHIFT; 359 offset = offset_in_page(pos); 360 361 return llbitmap->pctl[idx]->state[offset]; 362 } 363 364 /* set all the bits in the subpage as dirty */ 365 static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap, 366 struct llbitmap_page_ctl *pctl, 367 unsigned int block) 368 { 369 bool level_456 = raid_is_456(llbitmap->mddev); 370 unsigned int io_size = llbitmap->io_size; 371 int pos; 372 373 for (pos = block * io_size; pos < (block + 1) * io_size; pos++) { 374 switch (pctl->state[pos]) { 375 case BitUnwritten: 376 pctl->state[pos] = level_456 ? BitNeedSync : BitDirty; 377 break; 378 case BitClean: 379 pctl->state[pos] = BitDirty; 380 break; 381 } 382 } 383 } 384 385 static void llbitmap_set_page_dirty(struct llbitmap *llbitmap, int idx, 386 int offset) 387 { 388 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; 389 unsigned int io_size = llbitmap->io_size; 390 int block = offset / io_size; 391 int pos; 392 393 if (!test_bit(LLPageDirty, &pctl->flags)) 394 set_bit(LLPageDirty, &pctl->flags); 395 396 /* 397 * For degraded array, dirty bits will never be cleared, and we must 398 * resync all the dirty bits, hence skip infect new dirty bits to 399 * prevent resync unnecessary data. 400 */ 401 if (llbitmap->mddev->degraded) { 402 set_bit(block, pctl->dirty); 403 return; 404 } 405 406 /* 407 * The subpage usually contains a total of 512 bits. If any single bit 408 * within the subpage is marked as dirty, the entire sector will be 409 * written. To avoid impacting write performance, when multiple bits 410 * within the same sector are modified within llbitmap->barrier_idle, 411 * all bits in the sector will be collectively marked as dirty at once. 412 */ 413 if (test_and_set_bit(block, pctl->dirty)) { 414 llbitmap_infect_dirty_bits(llbitmap, pctl, block); 415 return; 416 } 417 418 for (pos = block * io_size; pos < (block + 1) * io_size; pos++) { 419 if (pos == offset) 420 continue; 421 if (pctl->state[pos] == BitDirty || 422 pctl->state[pos] == BitNeedSync) { 423 llbitmap_infect_dirty_bits(llbitmap, pctl, block); 424 return; 425 } 426 } 427 } 428 429 static void llbitmap_write(struct llbitmap *llbitmap, enum llbitmap_state state, 430 loff_t pos) 431 { 432 unsigned int idx; 433 unsigned int bit; 434 435 pos += BITMAP_DATA_OFFSET; 436 idx = pos >> PAGE_SHIFT; 437 bit = offset_in_page(pos); 438 439 llbitmap->pctl[idx]->state[bit] = state; 440 if (state == BitDirty || state == BitNeedSync) 441 llbitmap_set_page_dirty(llbitmap, idx, bit); 442 } 443 444 static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx) 445 { 446 struct mddev *mddev = llbitmap->mddev; 447 struct page *page = NULL; 448 struct md_rdev *rdev; 449 450 if (llbitmap->pctl && llbitmap->pctl[idx]) 451 page = llbitmap->pctl[idx]->page; 452 if (page) 453 return page; 454 455 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 456 if (!page) 457 return ERR_PTR(-ENOMEM); 458 459 rdev_for_each(rdev, mddev) { 460 sector_t sector; 461 462 if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags) || 463 !test_bit(In_sync, &rdev->flags)) 464 continue; 465 466 sector = mddev->bitmap_info.offset + 467 (idx << PAGE_SECTORS_SHIFT); 468 469 if (sync_page_io(rdev, sector, PAGE_SIZE, page, REQ_OP_READ, 470 true)) 471 return page; 472 473 md_error(mddev, rdev); 474 } 475 476 __free_page(page); 477 return ERR_PTR(-EIO); 478 } 479 480 static void llbitmap_write_page(struct llbitmap *llbitmap, int idx) 481 { 482 struct page *page = llbitmap->pctl[idx]->page; 483 struct mddev *mddev = llbitmap->mddev; 484 struct md_rdev *rdev; 485 int block; 486 487 for (block = 0; block < llbitmap->blocks_per_page; block++) { 488 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; 489 490 if (!test_and_clear_bit(block, pctl->dirty)) 491 continue; 492 493 rdev_for_each(rdev, mddev) { 494 sector_t sector; 495 sector_t bit_sector = llbitmap->io_size >> SECTOR_SHIFT; 496 497 if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags)) 498 continue; 499 500 sector = mddev->bitmap_info.offset + rdev->sb_start + 501 (idx << PAGE_SECTORS_SHIFT) + 502 block * bit_sector; 503 md_write_metadata(mddev, rdev, sector, 504 llbitmap->io_size, page, 505 block * llbitmap->io_size); 506 } 507 } 508 } 509 510 static void active_release(struct percpu_ref *ref) 511 { 512 struct llbitmap_page_ctl *pctl = 513 container_of(ref, struct llbitmap_page_ctl, active); 514 515 wake_up(&pctl->wait); 516 } 517 518 static void llbitmap_free_pages(struct llbitmap *llbitmap) 519 { 520 int i; 521 522 if (!llbitmap->pctl) 523 return; 524 525 for (i = 0; i < llbitmap->nr_pages; i++) { 526 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 527 528 if (!pctl || !pctl->page) 529 break; 530 531 __free_page(pctl->page); 532 percpu_ref_exit(&pctl->active); 533 } 534 535 kfree(llbitmap->pctl[0]); 536 kfree(llbitmap->pctl); 537 llbitmap->pctl = NULL; 538 } 539 540 static int llbitmap_cache_pages(struct llbitmap *llbitmap) 541 { 542 struct llbitmap_page_ctl *pctl; 543 unsigned int nr_pages = DIV_ROUND_UP(llbitmap->chunks + 544 BITMAP_DATA_OFFSET, PAGE_SIZE); 545 unsigned int size = struct_size(pctl, dirty, BITS_TO_LONGS( 546 llbitmap->blocks_per_page)); 547 int i; 548 549 llbitmap->pctl = kmalloc_array(nr_pages, sizeof(void *), 550 GFP_KERNEL | __GFP_ZERO); 551 if (!llbitmap->pctl) 552 return -ENOMEM; 553 554 size = round_up(size, cache_line_size()); 555 pctl = kmalloc_array(nr_pages, size, GFP_KERNEL | __GFP_ZERO); 556 if (!pctl) { 557 kfree(llbitmap->pctl); 558 return -ENOMEM; 559 } 560 561 llbitmap->nr_pages = nr_pages; 562 563 for (i = 0; i < nr_pages; i++, pctl = (void *)pctl + size) { 564 struct page *page = llbitmap_read_page(llbitmap, i); 565 566 llbitmap->pctl[i] = pctl; 567 568 if (IS_ERR(page)) { 569 llbitmap_free_pages(llbitmap); 570 return PTR_ERR(page); 571 } 572 573 if (percpu_ref_init(&pctl->active, active_release, 574 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { 575 __free_page(page); 576 llbitmap_free_pages(llbitmap); 577 return -ENOMEM; 578 } 579 580 pctl->page = page; 581 pctl->state = page_address(page); 582 init_waitqueue_head(&pctl->wait); 583 } 584 585 return 0; 586 } 587 588 static void llbitmap_init_state(struct llbitmap *llbitmap) 589 { 590 enum llbitmap_state state = BitUnwritten; 591 unsigned long i; 592 593 if (test_and_clear_bit(BITMAP_CLEAN, &llbitmap->flags)) 594 state = BitClean; 595 596 for (i = 0; i < llbitmap->chunks; i++) 597 llbitmap_write(llbitmap, state, i); 598 } 599 600 /* The return value is only used from resync, where @start == @end. */ 601 static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap, 602 unsigned long start, 603 unsigned long end, 604 enum llbitmap_action action) 605 { 606 struct mddev *mddev = llbitmap->mddev; 607 enum llbitmap_state state = BitNone; 608 bool level_456 = raid_is_456(llbitmap->mddev); 609 bool need_resync = false; 610 bool need_recovery = false; 611 612 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) 613 return BitNone; 614 615 if (action == BitmapActionInit) { 616 llbitmap_init_state(llbitmap); 617 return BitNone; 618 } 619 620 while (start <= end) { 621 enum llbitmap_state c = llbitmap_read(llbitmap, start); 622 623 if (c < 0 || c >= BitStateCount) { 624 pr_err("%s: invalid bit %lu state %d action %d, forcing resync\n", 625 __func__, start, c, action); 626 state = BitNeedSync; 627 goto write_bitmap; 628 } 629 630 if (c == BitNeedSync) 631 need_resync = !mddev->degraded; 632 633 state = state_machine[c][action]; 634 635 write_bitmap: 636 if (unlikely(mddev->degraded)) { 637 /* For degraded array, mark new data as need sync. */ 638 if (state == BitDirty && 639 action == BitmapActionStartwrite) 640 state = BitNeedSync; 641 /* 642 * For degraded array, resync dirty data as well, noted 643 * if array is still degraded after resync is done, all 644 * new data will still be dirty until array is clean. 645 */ 646 else if (c == BitDirty && 647 action == BitmapActionStartsync) 648 state = BitSyncing; 649 } else if (c == BitUnwritten && state == BitDirty && 650 action == BitmapActionStartwrite && level_456) { 651 /* Delay raid456 initial recovery to first write. */ 652 state = BitNeedSync; 653 } 654 655 if (state == BitNone) { 656 start++; 657 continue; 658 } 659 660 llbitmap_write(llbitmap, state, start); 661 662 if (state == BitNeedSync) 663 need_resync = !mddev->degraded; 664 else if (state == BitDirty && 665 !timer_pending(&llbitmap->pending_timer)) 666 mod_timer(&llbitmap->pending_timer, 667 jiffies + mddev->bitmap_info.daemon_sleep * HZ); 668 669 start++; 670 } 671 672 if (need_resync && level_456) 673 need_recovery = true; 674 675 if (need_recovery) { 676 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 677 set_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery); 678 md_wakeup_thread(mddev->thread); 679 } else if (need_resync) { 680 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 681 set_bit(MD_RECOVERY_SYNC, &mddev->recovery); 682 md_wakeup_thread(mddev->thread); 683 } 684 685 return state; 686 } 687 688 static void llbitmap_raise_barrier(struct llbitmap *llbitmap, int page_idx) 689 { 690 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 691 692 retry: 693 if (likely(percpu_ref_tryget_live(&pctl->active))) { 694 WRITE_ONCE(pctl->expire, jiffies + llbitmap->barrier_idle * HZ); 695 return; 696 } 697 698 wait_event(pctl->wait, !percpu_ref_is_dying(&pctl->active)); 699 goto retry; 700 } 701 702 static void llbitmap_release_barrier(struct llbitmap *llbitmap, int page_idx) 703 { 704 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 705 706 percpu_ref_put(&pctl->active); 707 } 708 709 static int llbitmap_suspend_timeout(struct llbitmap *llbitmap, int page_idx) 710 { 711 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 712 713 percpu_ref_kill(&pctl->active); 714 715 if (!wait_event_timeout(pctl->wait, percpu_ref_is_zero(&pctl->active), 716 llbitmap->mddev->bitmap_info.daemon_sleep * HZ)) { 717 percpu_ref_resurrect(&pctl->active); 718 return -ETIMEDOUT; 719 } 720 721 return 0; 722 } 723 724 static void llbitmap_resume(struct llbitmap *llbitmap, int page_idx) 725 { 726 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 727 728 pctl->expire = LONG_MAX; 729 percpu_ref_resurrect(&pctl->active); 730 wake_up(&pctl->wait); 731 } 732 733 static int llbitmap_check_support(struct mddev *mddev) 734 { 735 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 736 pr_notice("md/llbitmap: %s: array with journal cannot have bitmap\n", 737 mdname(mddev)); 738 return -EBUSY; 739 } 740 741 if (mddev->bitmap_info.space == 0) { 742 if (mddev->bitmap_info.default_space == 0) { 743 pr_notice("md/llbitmap: %s: no space for bitmap\n", 744 mdname(mddev)); 745 return -ENOSPC; 746 } 747 } 748 749 if (!mddev->persistent) { 750 pr_notice("md/llbitmap: %s: array must be persistent\n", 751 mdname(mddev)); 752 return -EOPNOTSUPP; 753 } 754 755 if (mddev->bitmap_info.file) { 756 pr_notice("md/llbitmap: %s: doesn't support bitmap file\n", 757 mdname(mddev)); 758 return -EOPNOTSUPP; 759 } 760 761 if (mddev->bitmap_info.external) { 762 pr_notice("md/llbitmap: %s: doesn't support external metadata\n", 763 mdname(mddev)); 764 return -EOPNOTSUPP; 765 } 766 767 if (mddev_is_dm(mddev)) { 768 pr_notice("md/llbitmap: %s: doesn't support dm-raid\n", 769 mdname(mddev)); 770 return -EOPNOTSUPP; 771 } 772 773 return 0; 774 } 775 776 static int llbitmap_init(struct llbitmap *llbitmap) 777 { 778 struct mddev *mddev = llbitmap->mddev; 779 sector_t blocks = mddev->resync_max_sectors; 780 unsigned long chunksize = MIN_CHUNK_SIZE; 781 unsigned long chunks = DIV_ROUND_UP(blocks, chunksize); 782 unsigned long space = mddev->bitmap_info.space << SECTOR_SHIFT; 783 int ret; 784 785 while (chunks > space) { 786 chunksize = chunksize << 1; 787 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); 788 } 789 790 llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE; 791 llbitmap->chunkshift = ffz(~chunksize); 792 llbitmap->chunksize = chunksize; 793 llbitmap->chunks = chunks; 794 mddev->bitmap_info.daemon_sleep = DEFAULT_DAEMON_SLEEP; 795 796 ret = llbitmap_cache_pages(llbitmap); 797 if (ret) 798 return ret; 799 800 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, 801 BitmapActionInit); 802 /* flush initial llbitmap to disk */ 803 __llbitmap_flush(mddev); 804 805 return 0; 806 } 807 808 static int llbitmap_read_sb(struct llbitmap *llbitmap) 809 { 810 struct mddev *mddev = llbitmap->mddev; 811 unsigned long daemon_sleep; 812 unsigned long chunksize; 813 unsigned long events; 814 struct page *sb_page; 815 bitmap_super_t *sb; 816 int ret = -EINVAL; 817 818 if (!mddev->bitmap_info.offset) { 819 pr_err("md/llbitmap: %s: no super block found", mdname(mddev)); 820 return -EINVAL; 821 } 822 823 sb_page = llbitmap_read_page(llbitmap, 0); 824 if (IS_ERR(sb_page)) { 825 pr_err("md/llbitmap: %s: read super block failed", 826 mdname(mddev)); 827 return -EIO; 828 } 829 830 sb = kmap_local_page(sb_page); 831 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) { 832 pr_err("md/llbitmap: %s: invalid super block magic number", 833 mdname(mddev)); 834 goto out_put_page; 835 } 836 837 if (sb->version != cpu_to_le32(BITMAP_MAJOR_LOCKLESS)) { 838 pr_err("md/llbitmap: %s: invalid super block version", 839 mdname(mddev)); 840 goto out_put_page; 841 } 842 843 if (memcmp(sb->uuid, mddev->uuid, 16)) { 844 pr_err("md/llbitmap: %s: bitmap superblock UUID mismatch\n", 845 mdname(mddev)); 846 goto out_put_page; 847 } 848 849 if (mddev->bitmap_info.space == 0) { 850 int room = le32_to_cpu(sb->sectors_reserved); 851 852 if (room) 853 mddev->bitmap_info.space = room; 854 else 855 mddev->bitmap_info.space = mddev->bitmap_info.default_space; 856 } 857 llbitmap->flags = le32_to_cpu(sb->state); 858 if (test_and_clear_bit(BITMAP_FIRST_USE, &llbitmap->flags)) { 859 ret = llbitmap_init(llbitmap); 860 goto out_put_page; 861 } 862 863 chunksize = le32_to_cpu(sb->chunksize); 864 if (!is_power_of_2(chunksize)) { 865 pr_err("md/llbitmap: %s: chunksize not a power of 2", 866 mdname(mddev)); 867 goto out_put_page; 868 } 869 870 if (chunksize < DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors, 871 mddev->bitmap_info.space << SECTOR_SHIFT)) { 872 pr_err("md/llbitmap: %s: chunksize too small %lu < %llu / %lu", 873 mdname(mddev), chunksize, mddev->resync_max_sectors, 874 mddev->bitmap_info.space); 875 goto out_put_page; 876 } 877 878 daemon_sleep = le32_to_cpu(sb->daemon_sleep); 879 if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ) { 880 pr_err("md/llbitmap: %s: daemon sleep %lu period out of range", 881 mdname(mddev), daemon_sleep); 882 goto out_put_page; 883 } 884 885 events = le64_to_cpu(sb->events); 886 if (events < mddev->events) { 887 pr_warn("md/llbitmap :%s: bitmap file is out of date (%lu < %llu) -- forcing full recovery", 888 mdname(mddev), events, mddev->events); 889 set_bit(BITMAP_STALE, &llbitmap->flags); 890 } 891 892 sb->sync_size = cpu_to_le64(mddev->resync_max_sectors); 893 mddev->bitmap_info.chunksize = chunksize; 894 mddev->bitmap_info.daemon_sleep = daemon_sleep; 895 896 llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE; 897 llbitmap->chunksize = chunksize; 898 llbitmap->chunks = DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors, chunksize); 899 llbitmap->chunkshift = ffz(~chunksize); 900 ret = llbitmap_cache_pages(llbitmap); 901 902 out_put_page: 903 __free_page(sb_page); 904 kunmap_local(sb); 905 return ret; 906 } 907 908 static void llbitmap_pending_timer_fn(struct timer_list *pending_timer) 909 { 910 struct llbitmap *llbitmap = 911 container_of(pending_timer, struct llbitmap, pending_timer); 912 913 if (work_busy(&llbitmap->daemon_work)) { 914 pr_warn("md/llbitmap: %s daemon_work not finished in %lu seconds\n", 915 mdname(llbitmap->mddev), 916 llbitmap->mddev->bitmap_info.daemon_sleep); 917 set_bit(BITMAP_DAEMON_BUSY, &llbitmap->flags); 918 return; 919 } 920 921 queue_work(md_llbitmap_io_wq, &llbitmap->daemon_work); 922 } 923 924 static void md_llbitmap_daemon_fn(struct work_struct *work) 925 { 926 struct llbitmap *llbitmap = 927 container_of(work, struct llbitmap, daemon_work); 928 unsigned long start; 929 unsigned long end; 930 bool restart; 931 int idx; 932 933 if (llbitmap->mddev->degraded) 934 return; 935 retry: 936 start = 0; 937 end = min(llbitmap->chunks, PAGE_SIZE - BITMAP_DATA_OFFSET) - 1; 938 restart = false; 939 940 for (idx = 0; idx < llbitmap->nr_pages; idx++) { 941 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; 942 943 if (idx > 0) { 944 start = end + 1; 945 end = min(end + PAGE_SIZE, llbitmap->chunks - 1); 946 } 947 948 if (!test_bit(LLPageFlush, &pctl->flags) && 949 time_before(jiffies, pctl->expire)) { 950 restart = true; 951 continue; 952 } 953 954 if (llbitmap_suspend_timeout(llbitmap, idx) < 0) { 955 pr_warn("md/llbitmap: %s: %s waiting for page %d timeout\n", 956 mdname(llbitmap->mddev), __func__, idx); 957 continue; 958 } 959 960 llbitmap_state_machine(llbitmap, start, end, BitmapActionDaemon); 961 llbitmap_resume(llbitmap, idx); 962 } 963 964 /* 965 * If the daemon took a long time to finish, retry to prevent missing 966 * clearing dirty bits. 967 */ 968 if (test_and_clear_bit(BITMAP_DAEMON_BUSY, &llbitmap->flags)) 969 goto retry; 970 971 /* If some page is dirty but not expired, setup timer again */ 972 if (restart) 973 mod_timer(&llbitmap->pending_timer, 974 jiffies + llbitmap->mddev->bitmap_info.daemon_sleep * HZ); 975 } 976 977 static int llbitmap_create(struct mddev *mddev) 978 { 979 struct llbitmap *llbitmap; 980 int ret; 981 982 ret = llbitmap_check_support(mddev); 983 if (ret) 984 return ret; 985 986 llbitmap = kzalloc_obj(*llbitmap); 987 if (!llbitmap) 988 return -ENOMEM; 989 990 llbitmap->mddev = mddev; 991 llbitmap->io_size = bdev_logical_block_size(mddev->gendisk->part0); 992 llbitmap->blocks_per_page = PAGE_SIZE / llbitmap->io_size; 993 994 timer_setup(&llbitmap->pending_timer, llbitmap_pending_timer_fn, 0); 995 INIT_WORK(&llbitmap->daemon_work, md_llbitmap_daemon_fn); 996 atomic_set(&llbitmap->behind_writes, 0); 997 init_waitqueue_head(&llbitmap->behind_wait); 998 999 mutex_lock(&mddev->bitmap_info.mutex); 1000 mddev->bitmap = llbitmap; 1001 ret = llbitmap_read_sb(llbitmap); 1002 mutex_unlock(&mddev->bitmap_info.mutex); 1003 if (ret) { 1004 kfree(llbitmap); 1005 mddev->bitmap = NULL; 1006 } 1007 1008 return ret; 1009 } 1010 1011 static int llbitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize) 1012 { 1013 struct llbitmap *llbitmap = mddev->bitmap; 1014 unsigned long chunks; 1015 1016 if (chunksize == 0) 1017 chunksize = llbitmap->chunksize; 1018 1019 /* If there is enough space, leave the chunksize unchanged. */ 1020 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); 1021 while (chunks > mddev->bitmap_info.space << SECTOR_SHIFT) { 1022 chunksize = chunksize << 1; 1023 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); 1024 } 1025 1026 llbitmap->chunkshift = ffz(~chunksize); 1027 llbitmap->chunksize = chunksize; 1028 llbitmap->chunks = chunks; 1029 1030 return 0; 1031 } 1032 1033 static int llbitmap_load(struct mddev *mddev) 1034 { 1035 enum llbitmap_action action = BitmapActionReload; 1036 struct llbitmap *llbitmap = mddev->bitmap; 1037 1038 if (test_and_clear_bit(BITMAP_STALE, &llbitmap->flags)) 1039 action = BitmapActionStale; 1040 1041 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, action); 1042 return 0; 1043 } 1044 1045 static void llbitmap_destroy(struct mddev *mddev) 1046 { 1047 struct llbitmap *llbitmap = mddev->bitmap; 1048 1049 if (!llbitmap) 1050 return; 1051 1052 mutex_lock(&mddev->bitmap_info.mutex); 1053 1054 timer_delete_sync(&llbitmap->pending_timer); 1055 flush_workqueue(md_llbitmap_io_wq); 1056 flush_workqueue(md_llbitmap_unplug_wq); 1057 1058 mddev->bitmap = NULL; 1059 llbitmap_free_pages(llbitmap); 1060 kfree(llbitmap); 1061 mutex_unlock(&mddev->bitmap_info.mutex); 1062 } 1063 1064 static void llbitmap_start_write(struct mddev *mddev, sector_t offset, 1065 unsigned long sectors) 1066 { 1067 struct llbitmap *llbitmap = mddev->bitmap; 1068 unsigned long start = offset >> llbitmap->chunkshift; 1069 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1070 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1071 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1072 1073 while (page_start <= page_end) { 1074 llbitmap_raise_barrier(llbitmap, page_start); 1075 page_start++; 1076 } 1077 1078 llbitmap_state_machine(llbitmap, start, end, BitmapActionStartwrite); 1079 } 1080 1081 static void llbitmap_end_write(struct mddev *mddev, sector_t offset, 1082 unsigned long sectors) 1083 { 1084 struct llbitmap *llbitmap = mddev->bitmap; 1085 unsigned long start = offset >> llbitmap->chunkshift; 1086 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1087 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1088 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1089 1090 while (page_start <= page_end) { 1091 llbitmap_release_barrier(llbitmap, page_start); 1092 page_start++; 1093 } 1094 } 1095 1096 static void llbitmap_start_discard(struct mddev *mddev, sector_t offset, 1097 unsigned long sectors) 1098 { 1099 struct llbitmap *llbitmap = mddev->bitmap; 1100 unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize); 1101 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1102 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1103 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1104 1105 while (page_start <= page_end) { 1106 llbitmap_raise_barrier(llbitmap, page_start); 1107 page_start++; 1108 } 1109 1110 llbitmap_state_machine(llbitmap, start, end, BitmapActionDiscard); 1111 } 1112 1113 static void llbitmap_end_discard(struct mddev *mddev, sector_t offset, 1114 unsigned long sectors) 1115 { 1116 struct llbitmap *llbitmap = mddev->bitmap; 1117 unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize); 1118 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1119 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1120 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1121 1122 while (page_start <= page_end) { 1123 llbitmap_release_barrier(llbitmap, page_start); 1124 page_start++; 1125 } 1126 } 1127 1128 static void llbitmap_unplug_fn(struct work_struct *work) 1129 { 1130 struct llbitmap_unplug_work *unplug_work = 1131 container_of(work, struct llbitmap_unplug_work, work); 1132 struct llbitmap *llbitmap = unplug_work->llbitmap; 1133 struct blk_plug plug; 1134 int i; 1135 1136 blk_start_plug(&plug); 1137 1138 for (i = 0; i < llbitmap->nr_pages; i++) { 1139 if (!test_bit(LLPageDirty, &llbitmap->pctl[i]->flags) || 1140 !test_and_clear_bit(LLPageDirty, &llbitmap->pctl[i]->flags)) 1141 continue; 1142 1143 llbitmap_write_page(llbitmap, i); 1144 } 1145 1146 blk_finish_plug(&plug); 1147 md_super_wait(llbitmap->mddev); 1148 complete(unplug_work->done); 1149 } 1150 1151 static bool llbitmap_dirty(struct llbitmap *llbitmap) 1152 { 1153 int i; 1154 1155 for (i = 0; i < llbitmap->nr_pages; i++) 1156 if (test_bit(LLPageDirty, &llbitmap->pctl[i]->flags)) 1157 return true; 1158 1159 return false; 1160 } 1161 1162 static void llbitmap_unplug(struct mddev *mddev, bool sync) 1163 { 1164 DECLARE_COMPLETION_ONSTACK(done); 1165 struct llbitmap *llbitmap = mddev->bitmap; 1166 struct llbitmap_unplug_work unplug_work = { 1167 .llbitmap = llbitmap, 1168 .done = &done, 1169 }; 1170 1171 if (!llbitmap_dirty(llbitmap)) 1172 return; 1173 1174 /* 1175 * Issue new bitmap IO under submit_bio() context will deadlock: 1176 * - the bio will wait for bitmap bio to be done, before it can be 1177 * issued; 1178 * - bitmap bio will be added to current->bio_list and wait for this 1179 * bio to be issued; 1180 */ 1181 INIT_WORK_ONSTACK(&unplug_work.work, llbitmap_unplug_fn); 1182 queue_work(md_llbitmap_unplug_wq, &unplug_work.work); 1183 wait_for_completion(&done); 1184 destroy_work_on_stack(&unplug_work.work); 1185 } 1186 1187 /* 1188 * Force to write all bitmap pages to disk, called when stopping the array, or 1189 * every daemon_sleep seconds when sync_thread is running. 1190 */ 1191 static void __llbitmap_flush(struct mddev *mddev) 1192 { 1193 struct llbitmap *llbitmap = mddev->bitmap; 1194 struct blk_plug plug; 1195 int i; 1196 1197 blk_start_plug(&plug); 1198 for (i = 0; i < llbitmap->nr_pages; i++) { 1199 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 1200 1201 /* mark all blocks as dirty */ 1202 set_bit(LLPageDirty, &pctl->flags); 1203 bitmap_fill(pctl->dirty, llbitmap->blocks_per_page); 1204 llbitmap_write_page(llbitmap, i); 1205 } 1206 blk_finish_plug(&plug); 1207 md_super_wait(llbitmap->mddev); 1208 } 1209 1210 static void llbitmap_flush(struct mddev *mddev) 1211 { 1212 struct llbitmap *llbitmap = mddev->bitmap; 1213 int i; 1214 1215 for (i = 0; i < llbitmap->nr_pages; i++) 1216 set_bit(LLPageFlush, &llbitmap->pctl[i]->flags); 1217 1218 timer_delete_sync(&llbitmap->pending_timer); 1219 queue_work(md_llbitmap_io_wq, &llbitmap->daemon_work); 1220 flush_work(&llbitmap->daemon_work); 1221 1222 __llbitmap_flush(mddev); 1223 } 1224 1225 /* This is used for raid5 lazy initial recovery */ 1226 static bool llbitmap_blocks_synced(struct mddev *mddev, sector_t offset) 1227 { 1228 struct llbitmap *llbitmap = mddev->bitmap; 1229 unsigned long p = offset >> llbitmap->chunkshift; 1230 enum llbitmap_state c = llbitmap_read(llbitmap, p); 1231 1232 return c == BitClean || c == BitDirty; 1233 } 1234 1235 static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset) 1236 { 1237 struct llbitmap *llbitmap = mddev->bitmap; 1238 unsigned long p = offset >> llbitmap->chunkshift; 1239 int blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); 1240 enum llbitmap_state c = llbitmap_read(llbitmap, p); 1241 1242 /* always skip unwritten blocks */ 1243 if (c == BitUnwritten) 1244 return blocks; 1245 1246 /* For degraded array, don't skip */ 1247 if (mddev->degraded) 1248 return 0; 1249 1250 /* For resync also skip clean/dirty blocks */ 1251 if ((c == BitClean || c == BitDirty) && 1252 test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && 1253 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) 1254 return blocks; 1255 1256 return 0; 1257 } 1258 1259 static bool llbitmap_start_sync(struct mddev *mddev, sector_t offset, 1260 sector_t *blocks, bool degraded) 1261 { 1262 struct llbitmap *llbitmap = mddev->bitmap; 1263 unsigned long p = offset >> llbitmap->chunkshift; 1264 1265 /* 1266 * Handle one bit at a time, this is much simpler. And it doesn't matter 1267 * if md_do_sync() loop more times. 1268 */ 1269 *blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); 1270 return llbitmap_state_machine(llbitmap, p, p, 1271 BitmapActionStartsync) == BitSyncing; 1272 } 1273 1274 /* Something is wrong, sync_thread stop at @offset */ 1275 static void llbitmap_end_sync(struct mddev *mddev, sector_t offset, 1276 sector_t *blocks) 1277 { 1278 struct llbitmap *llbitmap = mddev->bitmap; 1279 unsigned long p = offset >> llbitmap->chunkshift; 1280 1281 *blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); 1282 llbitmap_state_machine(llbitmap, p, llbitmap->chunks - 1, 1283 BitmapActionAbortsync); 1284 } 1285 1286 /* A full sync_thread is finished */ 1287 static void llbitmap_close_sync(struct mddev *mddev) 1288 { 1289 struct llbitmap *llbitmap = mddev->bitmap; 1290 int i; 1291 1292 for (i = 0; i < llbitmap->nr_pages; i++) { 1293 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 1294 1295 /* let daemon_fn clear dirty bits immediately */ 1296 WRITE_ONCE(pctl->expire, jiffies); 1297 } 1298 1299 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, 1300 BitmapActionEndsync); 1301 } 1302 1303 /* 1304 * sync_thread have reached @sector, update metadata every daemon_sleep seconds, 1305 * just in case sync_thread have to restart after power failure. 1306 */ 1307 static void llbitmap_cond_end_sync(struct mddev *mddev, sector_t sector, 1308 bool force) 1309 { 1310 struct llbitmap *llbitmap = mddev->bitmap; 1311 1312 if (sector == 0) { 1313 llbitmap->last_end_sync = jiffies; 1314 return; 1315 } 1316 1317 if (time_before(jiffies, llbitmap->last_end_sync + 1318 HZ * mddev->bitmap_info.daemon_sleep)) 1319 return; 1320 1321 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); 1322 1323 mddev->curr_resync_completed = sector; 1324 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); 1325 llbitmap_state_machine(llbitmap, 0, sector >> llbitmap->chunkshift, 1326 BitmapActionEndsync); 1327 __llbitmap_flush(mddev); 1328 1329 llbitmap->last_end_sync = jiffies; 1330 sysfs_notify_dirent_safe(mddev->sysfs_completed); 1331 } 1332 1333 static bool llbitmap_enabled(void *data, bool flush) 1334 { 1335 struct llbitmap *llbitmap = data; 1336 1337 return llbitmap && !test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags); 1338 } 1339 1340 static void llbitmap_dirty_bits(struct mddev *mddev, unsigned long s, 1341 unsigned long e) 1342 { 1343 llbitmap_state_machine(mddev->bitmap, s, e, BitmapActionStartwrite); 1344 } 1345 1346 static void llbitmap_write_sb(struct llbitmap *llbitmap) 1347 { 1348 int nr_blocks = DIV_ROUND_UP(BITMAP_DATA_OFFSET, llbitmap->io_size); 1349 1350 bitmap_fill(llbitmap->pctl[0]->dirty, nr_blocks); 1351 llbitmap_write_page(llbitmap, 0); 1352 md_super_wait(llbitmap->mddev); 1353 } 1354 1355 static void llbitmap_update_sb(void *data) 1356 { 1357 struct llbitmap *llbitmap = data; 1358 struct mddev *mddev = llbitmap->mddev; 1359 struct page *sb_page; 1360 bitmap_super_t *sb; 1361 1362 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) 1363 return; 1364 1365 sb_page = llbitmap_read_page(llbitmap, 0); 1366 if (IS_ERR(sb_page)) { 1367 pr_err("%s: %s: read super block failed", __func__, 1368 mdname(mddev)); 1369 set_bit(BITMAP_WRITE_ERROR, &llbitmap->flags); 1370 return; 1371 } 1372 1373 if (mddev->events < llbitmap->events_cleared) 1374 llbitmap->events_cleared = mddev->events; 1375 1376 sb = kmap_local_page(sb_page); 1377 sb->events = cpu_to_le64(mddev->events); 1378 sb->state = cpu_to_le32(llbitmap->flags); 1379 sb->chunksize = cpu_to_le32(llbitmap->chunksize); 1380 sb->sync_size = cpu_to_le64(mddev->resync_max_sectors); 1381 sb->events_cleared = cpu_to_le64(llbitmap->events_cleared); 1382 sb->sectors_reserved = cpu_to_le32(mddev->bitmap_info.space); 1383 sb->daemon_sleep = cpu_to_le32(mddev->bitmap_info.daemon_sleep); 1384 1385 kunmap_local(sb); 1386 llbitmap_write_sb(llbitmap); 1387 } 1388 1389 static int llbitmap_get_stats(void *data, struct md_bitmap_stats *stats) 1390 { 1391 struct llbitmap *llbitmap = data; 1392 1393 memset(stats, 0, sizeof(*stats)); 1394 1395 stats->missing_pages = 0; 1396 stats->pages = llbitmap->nr_pages; 1397 stats->file_pages = llbitmap->nr_pages; 1398 1399 stats->behind_writes = atomic_read(&llbitmap->behind_writes); 1400 stats->behind_wait = wq_has_sleeper(&llbitmap->behind_wait); 1401 stats->events_cleared = llbitmap->events_cleared; 1402 1403 return 0; 1404 } 1405 1406 /* just flag all pages as needing to be written */ 1407 static void llbitmap_write_all(struct mddev *mddev) 1408 { 1409 int i; 1410 struct llbitmap *llbitmap = mddev->bitmap; 1411 1412 for (i = 0; i < llbitmap->nr_pages; i++) { 1413 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 1414 1415 set_bit(LLPageDirty, &pctl->flags); 1416 bitmap_fill(pctl->dirty, llbitmap->blocks_per_page); 1417 } 1418 } 1419 1420 static void llbitmap_start_behind_write(struct mddev *mddev) 1421 { 1422 struct llbitmap *llbitmap = mddev->bitmap; 1423 1424 atomic_inc(&llbitmap->behind_writes); 1425 } 1426 1427 static void llbitmap_end_behind_write(struct mddev *mddev) 1428 { 1429 struct llbitmap *llbitmap = mddev->bitmap; 1430 1431 if (atomic_dec_and_test(&llbitmap->behind_writes)) 1432 wake_up(&llbitmap->behind_wait); 1433 } 1434 1435 static void llbitmap_wait_behind_writes(struct mddev *mddev) 1436 { 1437 struct llbitmap *llbitmap = mddev->bitmap; 1438 1439 if (!llbitmap) 1440 return; 1441 1442 wait_event(llbitmap->behind_wait, 1443 atomic_read(&llbitmap->behind_writes) == 0); 1444 1445 } 1446 1447 static ssize_t bits_show(struct mddev *mddev, char *page) 1448 { 1449 struct llbitmap *llbitmap; 1450 int bits[BitStateCount] = {0}; 1451 loff_t start = 0; 1452 1453 mutex_lock(&mddev->bitmap_info.mutex); 1454 llbitmap = mddev->bitmap; 1455 if (!llbitmap || !llbitmap->pctl) { 1456 mutex_unlock(&mddev->bitmap_info.mutex); 1457 return sprintf(page, "no bitmap\n"); 1458 } 1459 1460 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) { 1461 mutex_unlock(&mddev->bitmap_info.mutex); 1462 return sprintf(page, "bitmap io error\n"); 1463 } 1464 1465 while (start < llbitmap->chunks) { 1466 enum llbitmap_state c = llbitmap_read(llbitmap, start); 1467 1468 if (c < 0 || c >= BitStateCount) 1469 pr_err("%s: invalid bit %llu state %d\n", 1470 __func__, start, c); 1471 else 1472 bits[c]++; 1473 start++; 1474 } 1475 1476 mutex_unlock(&mddev->bitmap_info.mutex); 1477 return sprintf(page, "unwritten %d\nclean %d\ndirty %d\nneed sync %d\nsyncing %d\n", 1478 bits[BitUnwritten], bits[BitClean], bits[BitDirty], 1479 bits[BitNeedSync], bits[BitSyncing]); 1480 } 1481 1482 static struct md_sysfs_entry llbitmap_bits = __ATTR_RO(bits); 1483 1484 static ssize_t metadata_show(struct mddev *mddev, char *page) 1485 { 1486 struct llbitmap *llbitmap; 1487 ssize_t ret; 1488 1489 mutex_lock(&mddev->bitmap_info.mutex); 1490 llbitmap = mddev->bitmap; 1491 if (!llbitmap) { 1492 mutex_unlock(&mddev->bitmap_info.mutex); 1493 return sprintf(page, "no bitmap\n"); 1494 } 1495 1496 ret = sprintf(page, "chunksize %lu\nchunkshift %lu\nchunks %lu\noffset %llu\ndaemon_sleep %lu\n", 1497 llbitmap->chunksize, llbitmap->chunkshift, 1498 llbitmap->chunks, mddev->bitmap_info.offset, 1499 llbitmap->mddev->bitmap_info.daemon_sleep); 1500 mutex_unlock(&mddev->bitmap_info.mutex); 1501 1502 return ret; 1503 } 1504 1505 static struct md_sysfs_entry llbitmap_metadata = __ATTR_RO(metadata); 1506 1507 static ssize_t 1508 daemon_sleep_show(struct mddev *mddev, char *page) 1509 { 1510 return sprintf(page, "%lu\n", mddev->bitmap_info.daemon_sleep); 1511 } 1512 1513 static ssize_t 1514 daemon_sleep_store(struct mddev *mddev, const char *buf, size_t len) 1515 { 1516 unsigned long timeout; 1517 int rv = kstrtoul(buf, 10, &timeout); 1518 1519 if (rv) 1520 return rv; 1521 1522 mddev->bitmap_info.daemon_sleep = timeout; 1523 return len; 1524 } 1525 1526 static struct md_sysfs_entry llbitmap_daemon_sleep = __ATTR_RW(daemon_sleep); 1527 1528 static ssize_t 1529 barrier_idle_show(struct mddev *mddev, char *page) 1530 { 1531 struct llbitmap *llbitmap = mddev->bitmap; 1532 1533 return sprintf(page, "%lu\n", llbitmap->barrier_idle); 1534 } 1535 1536 static ssize_t 1537 barrier_idle_store(struct mddev *mddev, const char *buf, size_t len) 1538 { 1539 struct llbitmap *llbitmap = mddev->bitmap; 1540 unsigned long timeout; 1541 int rv = kstrtoul(buf, 10, &timeout); 1542 1543 if (rv) 1544 return rv; 1545 1546 llbitmap->barrier_idle = timeout; 1547 return len; 1548 } 1549 1550 static struct md_sysfs_entry llbitmap_barrier_idle = __ATTR_RW(barrier_idle); 1551 1552 static struct attribute *md_llbitmap_attrs[] = { 1553 &llbitmap_bits.attr, 1554 &llbitmap_metadata.attr, 1555 &llbitmap_daemon_sleep.attr, 1556 &llbitmap_barrier_idle.attr, 1557 NULL 1558 }; 1559 1560 static struct attribute_group md_llbitmap_group = { 1561 .name = "llbitmap", 1562 .attrs = md_llbitmap_attrs, 1563 }; 1564 1565 static struct bitmap_operations llbitmap_ops = { 1566 .head = { 1567 .type = MD_BITMAP, 1568 .id = ID_LLBITMAP, 1569 .name = "llbitmap", 1570 }, 1571 1572 .enabled = llbitmap_enabled, 1573 .create = llbitmap_create, 1574 .resize = llbitmap_resize, 1575 .load = llbitmap_load, 1576 .destroy = llbitmap_destroy, 1577 1578 .start_write = llbitmap_start_write, 1579 .end_write = llbitmap_end_write, 1580 .start_discard = llbitmap_start_discard, 1581 .end_discard = llbitmap_end_discard, 1582 .unplug = llbitmap_unplug, 1583 .flush = llbitmap_flush, 1584 1585 .start_behind_write = llbitmap_start_behind_write, 1586 .end_behind_write = llbitmap_end_behind_write, 1587 .wait_behind_writes = llbitmap_wait_behind_writes, 1588 1589 .blocks_synced = llbitmap_blocks_synced, 1590 .skip_sync_blocks = llbitmap_skip_sync_blocks, 1591 .start_sync = llbitmap_start_sync, 1592 .end_sync = llbitmap_end_sync, 1593 .close_sync = llbitmap_close_sync, 1594 .cond_end_sync = llbitmap_cond_end_sync, 1595 1596 .update_sb = llbitmap_update_sb, 1597 .get_stats = llbitmap_get_stats, 1598 .dirty_bits = llbitmap_dirty_bits, 1599 .write_all = llbitmap_write_all, 1600 1601 .group = &md_llbitmap_group, 1602 }; 1603 1604 int md_llbitmap_init(void) 1605 { 1606 md_llbitmap_io_wq = alloc_workqueue("md_llbitmap_io", 1607 WQ_MEM_RECLAIM | WQ_UNBOUND, 0); 1608 if (!md_llbitmap_io_wq) 1609 return -ENOMEM; 1610 1611 md_llbitmap_unplug_wq = alloc_workqueue("md_llbitmap_unplug", 1612 WQ_MEM_RECLAIM | WQ_UNBOUND, 0); 1613 if (!md_llbitmap_unplug_wq) { 1614 destroy_workqueue(md_llbitmap_io_wq); 1615 md_llbitmap_io_wq = NULL; 1616 return -ENOMEM; 1617 } 1618 1619 return register_md_submodule(&llbitmap_ops.head); 1620 } 1621 1622 void md_llbitmap_exit(void) 1623 { 1624 destroy_workqueue(md_llbitmap_io_wq); 1625 md_llbitmap_io_wq = NULL; 1626 destroy_workqueue(md_llbitmap_unplug_wq); 1627 md_llbitmap_unplug_wq = NULL; 1628 unregister_md_submodule(&llbitmap_ops.head); 1629 } 1630