1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 #include <linux/blkdev.h> 4 #include <linux/module.h> 5 #include <linux/errno.h> 6 #include <linux/slab.h> 7 #include <linux/init.h> 8 #include <linux/timer.h> 9 #include <linux/sched.h> 10 #include <linux/list.h> 11 #include <linux/file.h> 12 #include <linux/seq_file.h> 13 #include <trace/events/block.h> 14 15 #include "md.h" 16 #include "md-bitmap.h" 17 18 /* 19 * #### Background 20 * 21 * Redundant data is used to enhance data fault tolerance, and the storage 22 * methods for redundant data vary depending on the RAID levels. And it's 23 * important to maintain the consistency of redundant data. 24 * 25 * Bitmap is used to record which data blocks have been synchronized and which 26 * ones need to be resynchronized or recovered. Each bit in the bitmap 27 * represents a segment of data in the array. When a bit is set, it indicates 28 * that the multiple redundant copies of that data segment may not be 29 * consistent. Data synchronization can be performed based on the bitmap after 30 * power failure or readding a disk. If there is no bitmap, a full disk 31 * synchronization is required. 32 * 33 * #### Key Features 34 * 35 * - IO fastpath is lockless, if user issues lots of write IO to the same 36 * bitmap bit in a short time, only the first write has additional overhead 37 * to update bitmap bit, no additional overhead for the following writes; 38 * - support only resync or recover written data, means in the case creating 39 * new array or replacing with a new disk, there is no need to do a full disk 40 * resync/recovery; 41 * 42 * #### Key Concept 43 * 44 * ##### State Machine 45 * 46 * Each bit is one byte, contain 6 different states, see llbitmap_state. And 47 * there are total 8 different actions, see llbitmap_action, can change state: 48 * 49 * llbitmap state machine: transitions between states 50 * 51 * | | Startwrite | Startsync | Endsync | Abortsync| 52 * | --------- | ---------- | --------- | ------- | ------- | 53 * | Unwritten | Dirty | x | x | x | 54 * | Clean | Dirty | x | x | x | 55 * | Dirty | x | x | x | x | 56 * | NeedSync | x | Syncing | x | x | 57 * | Syncing | x | Syncing | Dirty | NeedSync | 58 * 59 * | | Reload | Daemon | Discard | Stale | 60 * | --------- | -------- | ------ | --------- | --------- | 61 * | Unwritten | x | x | x | x | 62 * | Clean | x | x | Unwritten | NeedSync | 63 * | Dirty | NeedSync | Clean | Unwritten | NeedSync | 64 * | NeedSync | x | x | Unwritten | x | 65 * | Syncing | NeedSync | x | Unwritten | NeedSync | 66 * 67 * Typical scenarios: 68 * 69 * 1) Create new array 70 * All bits will be set to Unwritten by default, if --assume-clean is set, 71 * all bits will be set to Clean instead. 72 * 73 * 2) write data, raid1/raid10 have full copy of data, while raid456 doesn't and 74 * rely on xor data 75 * 76 * 2.1) write new data to raid1/raid10: 77 * Unwritten --StartWrite--> Dirty 78 * 79 * 2.2) write new data to raid456: 80 * Unwritten --StartWrite--> NeedSync 81 * 82 * Because the initial recover for raid456 is skipped, the xor data is not built 83 * yet, the bit must be set to NeedSync first and after lazy initial recover is 84 * finished, the bit will finally set to Dirty(see 5.1 and 5.4); 85 * 86 * 2.3) cover write 87 * Clean --StartWrite--> Dirty 88 * 89 * 3) daemon, if the array is not degraded: 90 * Dirty --Daemon--> Clean 91 * 92 * 4) discard 93 * {Clean, Dirty, NeedSync, Syncing} --Discard--> Unwritten 94 * 95 * 5) resync and recover 96 * 97 * 5.1) common process 98 * NeedSync --Startsync--> Syncing --Endsync--> Dirty --Daemon--> Clean 99 * 100 * 5.2) resync after power failure 101 * Dirty --Reload--> NeedSync 102 * 103 * 5.3) recover while replacing with a new disk 104 * By default, the old bitmap framework will recover all data, and llbitmap 105 * implements this by a new helper, see llbitmap_skip_sync_blocks: 106 * 107 * skip recover for bits other than dirty or clean; 108 * 109 * 5.4) lazy initial recover for raid5: 110 * By default, the old bitmap framework will only allow new recover when there 111 * are spares(new disk), a new recovery flag MD_RECOVERY_LAZY_RECOVER is added 112 * to perform raid456 lazy recover for set bits(from 2.2). 113 * 114 * 6. special handling for degraded array: 115 * 116 * - Dirty bits will never be cleared, daemon will just do nothing, so that if 117 * a disk is readded, Clean bits can be skipped with recovery; 118 * - Dirty bits will convert to Syncing from start write, to do data recovery 119 * for new added disks; 120 * - New write will convert bits to NeedSync directly; 121 * 122 * ##### Bitmap IO 123 * 124 * ##### Chunksize 125 * 126 * The default bitmap size is 128k, incluing 1k bitmap super block, and 127 * the default size of segment of data in the array each bit(chunksize) is 64k, 128 * and chunksize will adjust to twice the old size each time if the total number 129 * bits is not less than 127k.(see llbitmap_init) 130 * 131 * ##### READ 132 * 133 * While creating bitmap, all pages will be allocated and read for llbitmap, 134 * there won't be read afterwards 135 * 136 * ##### WRITE 137 * 138 * WRITE IO is divided into logical_block_size of the array, the dirty state 139 * of each block is tracked independently, for example: 140 * 141 * each page is 4k, contain 8 blocks; each block is 512 bytes contain 512 bit; 142 * 143 * | page0 | page1 | ... | page 31 | 144 * | | 145 * | \-----------------------\ 146 * | | 147 * | block0 | block1 | ... | block 8| 148 * | | 149 * | \-----------------\ 150 * | | 151 * | bit0 | bit1 | ... | bit511 | 152 * 153 * From IO path, if one bit is changed to Dirty or NeedSync, the corresponding 154 * subpage will be marked dirty, such block must write first before the IO is 155 * issued. This behaviour will affect IO performance, to reduce the impact, if 156 * multiple bits are changed in the same block in a short time, all bits in this 157 * block will be changed to Dirty/NeedSync, so that there won't be any overhead 158 * until daemon clears dirty bits. 159 * 160 * ##### Dirty Bits synchronization 161 * 162 * IO fast path will set bits to dirty, and those dirty bits will be cleared 163 * by daemon after IO is done. llbitmap_page_ctl is used to synchronize between 164 * IO path and daemon; 165 * 166 * IO path: 167 * 1) try to grab a reference, if succeed, set expire time after 5s and return; 168 * 2) if failed to grab a reference, wait for daemon to finish clearing dirty 169 * bits; 170 * 171 * Daemon (Daemon will be woken up every daemon_sleep seconds): 172 * For each page: 173 * 1) check if page expired, if not skip this page; for expired page: 174 * 2) suspend the page and wait for inflight write IO to be done; 175 * 3) change dirty page to clean; 176 * 4) resume the page; 177 */ 178 179 #define BITMAP_DATA_OFFSET 1024 180 181 /* 64k is the max IO size of sync IO for raid1/raid10 */ 182 #define MIN_CHUNK_SIZE (64 * 2) 183 184 /* By default, daemon will be woken up every 30s */ 185 #define DEFAULT_DAEMON_SLEEP 30 186 187 /* 188 * Dirtied bits that have not been accessed for more than 5s will be cleared 189 * by daemon. 190 */ 191 #define DEFAULT_BARRIER_IDLE 5 192 193 enum llbitmap_state { 194 /* No valid data, init state after assemble the array */ 195 BitUnwritten = 0, 196 /* data is consistent */ 197 BitClean, 198 /* data will be consistent after IO is done, set directly for writes */ 199 BitDirty, 200 /* 201 * data need to be resynchronized: 202 * 1) set directly for writes if array is degraded, prevent full disk 203 * synchronization after readding a disk; 204 * 2) reassemble the array after power failure, and dirty bits are 205 * found after reloading the bitmap; 206 * 3) set for first write for raid5, to build initial xor data lazily 207 */ 208 BitNeedSync, 209 /* data is synchronizing */ 210 BitSyncing, 211 BitStateCount, 212 BitNone = 0xff, 213 }; 214 215 enum llbitmap_action { 216 /* User write new data, this is the only action from IO fast path */ 217 BitmapActionStartwrite = 0, 218 /* Start recovery */ 219 BitmapActionStartsync, 220 /* Finish recovery */ 221 BitmapActionEndsync, 222 /* Failed recovery */ 223 BitmapActionAbortsync, 224 /* Reassemble the array */ 225 BitmapActionReload, 226 /* Daemon thread is trying to clear dirty bits */ 227 BitmapActionDaemon, 228 /* Data is deleted */ 229 BitmapActionDiscard, 230 /* 231 * Bitmap is stale, mark all bits in addition to BitUnwritten to 232 * BitNeedSync. 233 */ 234 BitmapActionStale, 235 BitmapActionCount, 236 /* Init state is BitUnwritten */ 237 BitmapActionInit, 238 }; 239 240 enum llbitmap_page_state { 241 LLPageFlush = 0, 242 LLPageDirty, 243 }; 244 245 struct llbitmap_page_ctl { 246 char *state; 247 struct page *page; 248 unsigned long expire; 249 unsigned long flags; 250 wait_queue_head_t wait; 251 struct percpu_ref active; 252 /* Per block size dirty state, maximum 64k page / 1 sector = 128 */ 253 unsigned long dirty[]; 254 }; 255 256 struct llbitmap { 257 struct mddev *mddev; 258 struct llbitmap_page_ctl **pctl; 259 260 unsigned int nr_pages; 261 unsigned int io_size; 262 unsigned int blocks_per_page; 263 264 /* shift of one chunk */ 265 unsigned long chunkshift; 266 /* size of one chunk in sector */ 267 unsigned long chunksize; 268 /* total number of chunks */ 269 unsigned long chunks; 270 unsigned long last_end_sync; 271 /* 272 * time in seconds that dirty bits will be cleared if the page is not 273 * accessed. 274 */ 275 unsigned long barrier_idle; 276 /* fires on first BitDirty state */ 277 struct timer_list pending_timer; 278 struct work_struct daemon_work; 279 280 unsigned long flags; 281 __u64 events_cleared; 282 283 /* for slow disks */ 284 atomic_t behind_writes; 285 wait_queue_head_t behind_wait; 286 }; 287 288 struct llbitmap_unplug_work { 289 struct work_struct work; 290 struct llbitmap *llbitmap; 291 struct completion *done; 292 }; 293 294 static struct workqueue_struct *md_llbitmap_io_wq; 295 static struct workqueue_struct *md_llbitmap_unplug_wq; 296 297 static char state_machine[BitStateCount][BitmapActionCount] = { 298 [BitUnwritten] = { 299 [BitmapActionStartwrite] = BitDirty, 300 [BitmapActionStartsync] = BitNone, 301 [BitmapActionEndsync] = BitNone, 302 [BitmapActionAbortsync] = BitNone, 303 [BitmapActionReload] = BitNone, 304 [BitmapActionDaemon] = BitNone, 305 [BitmapActionDiscard] = BitNone, 306 [BitmapActionStale] = BitNone, 307 }, 308 [BitClean] = { 309 [BitmapActionStartwrite] = BitDirty, 310 [BitmapActionStartsync] = BitNone, 311 [BitmapActionEndsync] = BitNone, 312 [BitmapActionAbortsync] = BitNone, 313 [BitmapActionReload] = BitNone, 314 [BitmapActionDaemon] = BitNone, 315 [BitmapActionDiscard] = BitUnwritten, 316 [BitmapActionStale] = BitNeedSync, 317 }, 318 [BitDirty] = { 319 [BitmapActionStartwrite] = BitNone, 320 [BitmapActionStartsync] = BitNone, 321 [BitmapActionEndsync] = BitNone, 322 [BitmapActionAbortsync] = BitNone, 323 [BitmapActionReload] = BitNeedSync, 324 [BitmapActionDaemon] = BitClean, 325 [BitmapActionDiscard] = BitUnwritten, 326 [BitmapActionStale] = BitNeedSync, 327 }, 328 [BitNeedSync] = { 329 [BitmapActionStartwrite] = BitNone, 330 [BitmapActionStartsync] = BitSyncing, 331 [BitmapActionEndsync] = BitNone, 332 [BitmapActionAbortsync] = BitNone, 333 [BitmapActionReload] = BitNone, 334 [BitmapActionDaemon] = BitNone, 335 [BitmapActionDiscard] = BitUnwritten, 336 [BitmapActionStale] = BitNone, 337 }, 338 [BitSyncing] = { 339 [BitmapActionStartwrite] = BitNone, 340 [BitmapActionStartsync] = BitSyncing, 341 [BitmapActionEndsync] = BitDirty, 342 [BitmapActionAbortsync] = BitNeedSync, 343 [BitmapActionReload] = BitNeedSync, 344 [BitmapActionDaemon] = BitNone, 345 [BitmapActionDiscard] = BitUnwritten, 346 [BitmapActionStale] = BitNeedSync, 347 }, 348 }; 349 350 static void __llbitmap_flush(struct mddev *mddev); 351 352 static enum llbitmap_state llbitmap_read(struct llbitmap *llbitmap, loff_t pos) 353 { 354 unsigned int idx; 355 unsigned int offset; 356 357 pos += BITMAP_DATA_OFFSET; 358 idx = pos >> PAGE_SHIFT; 359 offset = offset_in_page(pos); 360 361 return llbitmap->pctl[idx]->state[offset]; 362 } 363 364 /* set all the bits in the subpage as dirty */ 365 static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap, 366 struct llbitmap_page_ctl *pctl, 367 unsigned int block) 368 { 369 bool level_456 = raid_is_456(llbitmap->mddev); 370 unsigned int io_size = llbitmap->io_size; 371 int pos; 372 373 for (pos = block * io_size; pos < (block + 1) * io_size; pos++) { 374 switch (pctl->state[pos]) { 375 case BitUnwritten: 376 pctl->state[pos] = level_456 ? BitNeedSync : BitDirty; 377 break; 378 case BitClean: 379 pctl->state[pos] = BitDirty; 380 break; 381 }; 382 } 383 } 384 385 static void llbitmap_set_page_dirty(struct llbitmap *llbitmap, int idx, 386 int offset) 387 { 388 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; 389 unsigned int io_size = llbitmap->io_size; 390 int block = offset / io_size; 391 int pos; 392 393 if (!test_bit(LLPageDirty, &pctl->flags)) 394 set_bit(LLPageDirty, &pctl->flags); 395 396 /* 397 * For degraded array, dirty bits will never be cleared, and we must 398 * resync all the dirty bits, hence skip infect new dirty bits to 399 * prevent resync unnecessary data. 400 */ 401 if (llbitmap->mddev->degraded) { 402 set_bit(block, pctl->dirty); 403 return; 404 } 405 406 /* 407 * The subpage usually contains a total of 512 bits. If any single bit 408 * within the subpage is marked as dirty, the entire sector will be 409 * written. To avoid impacting write performance, when multiple bits 410 * within the same sector are modified within llbitmap->barrier_idle, 411 * all bits in the sector will be collectively marked as dirty at once. 412 */ 413 if (test_and_set_bit(block, pctl->dirty)) { 414 llbitmap_infect_dirty_bits(llbitmap, pctl, block); 415 return; 416 } 417 418 for (pos = block * io_size; pos < (block + 1) * io_size; pos++) { 419 if (pos == offset) 420 continue; 421 if (pctl->state[pos] == BitDirty || 422 pctl->state[pos] == BitNeedSync) { 423 llbitmap_infect_dirty_bits(llbitmap, pctl, block); 424 return; 425 } 426 } 427 } 428 429 static void llbitmap_write(struct llbitmap *llbitmap, enum llbitmap_state state, 430 loff_t pos) 431 { 432 unsigned int idx; 433 unsigned int bit; 434 435 pos += BITMAP_DATA_OFFSET; 436 idx = pos >> PAGE_SHIFT; 437 bit = offset_in_page(pos); 438 439 llbitmap->pctl[idx]->state[bit] = state; 440 if (state == BitDirty || state == BitNeedSync) 441 llbitmap_set_page_dirty(llbitmap, idx, bit); 442 } 443 444 static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx) 445 { 446 struct mddev *mddev = llbitmap->mddev; 447 struct page *page = NULL; 448 struct md_rdev *rdev; 449 450 if (llbitmap->pctl && llbitmap->pctl[idx]) 451 page = llbitmap->pctl[idx]->page; 452 if (page) 453 return page; 454 455 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 456 if (!page) 457 return ERR_PTR(-ENOMEM); 458 459 rdev_for_each(rdev, mddev) { 460 sector_t sector; 461 462 if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags)) 463 continue; 464 465 sector = mddev->bitmap_info.offset + 466 (idx << PAGE_SECTORS_SHIFT); 467 468 if (sync_page_io(rdev, sector, PAGE_SIZE, page, REQ_OP_READ, 469 true)) 470 return page; 471 472 md_error(mddev, rdev); 473 } 474 475 __free_page(page); 476 return ERR_PTR(-EIO); 477 } 478 479 static void llbitmap_write_page(struct llbitmap *llbitmap, int idx) 480 { 481 struct page *page = llbitmap->pctl[idx]->page; 482 struct mddev *mddev = llbitmap->mddev; 483 struct md_rdev *rdev; 484 int block; 485 486 for (block = 0; block < llbitmap->blocks_per_page; block++) { 487 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; 488 489 if (!test_and_clear_bit(block, pctl->dirty)) 490 continue; 491 492 rdev_for_each(rdev, mddev) { 493 sector_t sector; 494 sector_t bit_sector = llbitmap->io_size >> SECTOR_SHIFT; 495 496 if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags)) 497 continue; 498 499 sector = mddev->bitmap_info.offset + rdev->sb_start + 500 (idx << PAGE_SECTORS_SHIFT) + 501 block * bit_sector; 502 md_write_metadata(mddev, rdev, sector, 503 llbitmap->io_size, page, 504 block * llbitmap->io_size); 505 } 506 } 507 } 508 509 static void active_release(struct percpu_ref *ref) 510 { 511 struct llbitmap_page_ctl *pctl = 512 container_of(ref, struct llbitmap_page_ctl, active); 513 514 wake_up(&pctl->wait); 515 } 516 517 static void llbitmap_free_pages(struct llbitmap *llbitmap) 518 { 519 int i; 520 521 if (!llbitmap->pctl) 522 return; 523 524 for (i = 0; i < llbitmap->nr_pages; i++) { 525 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 526 527 if (!pctl || !pctl->page) 528 break; 529 530 __free_page(pctl->page); 531 percpu_ref_exit(&pctl->active); 532 } 533 534 kfree(llbitmap->pctl[0]); 535 kfree(llbitmap->pctl); 536 llbitmap->pctl = NULL; 537 } 538 539 static int llbitmap_cache_pages(struct llbitmap *llbitmap) 540 { 541 struct llbitmap_page_ctl *pctl; 542 unsigned int nr_pages = DIV_ROUND_UP(llbitmap->chunks + 543 BITMAP_DATA_OFFSET, PAGE_SIZE); 544 unsigned int size = struct_size(pctl, dirty, BITS_TO_LONGS( 545 llbitmap->blocks_per_page)); 546 int i; 547 548 llbitmap->pctl = kmalloc_array(nr_pages, sizeof(void *), 549 GFP_KERNEL | __GFP_ZERO); 550 if (!llbitmap->pctl) 551 return -ENOMEM; 552 553 size = round_up(size, cache_line_size()); 554 pctl = kmalloc_array(nr_pages, size, GFP_KERNEL | __GFP_ZERO); 555 if (!pctl) { 556 kfree(llbitmap->pctl); 557 return -ENOMEM; 558 } 559 560 llbitmap->nr_pages = nr_pages; 561 562 for (i = 0; i < nr_pages; i++, pctl = (void *)pctl + size) { 563 struct page *page = llbitmap_read_page(llbitmap, i); 564 565 llbitmap->pctl[i] = pctl; 566 567 if (IS_ERR(page)) { 568 llbitmap_free_pages(llbitmap); 569 return PTR_ERR(page); 570 } 571 572 if (percpu_ref_init(&pctl->active, active_release, 573 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { 574 __free_page(page); 575 llbitmap_free_pages(llbitmap); 576 return -ENOMEM; 577 } 578 579 pctl->page = page; 580 pctl->state = page_address(page); 581 init_waitqueue_head(&pctl->wait); 582 } 583 584 return 0; 585 } 586 587 static void llbitmap_init_state(struct llbitmap *llbitmap) 588 { 589 enum llbitmap_state state = BitUnwritten; 590 unsigned long i; 591 592 if (test_and_clear_bit(BITMAP_CLEAN, &llbitmap->flags)) 593 state = BitClean; 594 595 for (i = 0; i < llbitmap->chunks; i++) 596 llbitmap_write(llbitmap, state, i); 597 } 598 599 /* The return value is only used from resync, where @start == @end. */ 600 static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap, 601 unsigned long start, 602 unsigned long end, 603 enum llbitmap_action action) 604 { 605 struct mddev *mddev = llbitmap->mddev; 606 enum llbitmap_state state = BitNone; 607 bool level_456 = raid_is_456(llbitmap->mddev); 608 bool need_resync = false; 609 bool need_recovery = false; 610 611 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) 612 return BitNone; 613 614 if (action == BitmapActionInit) { 615 llbitmap_init_state(llbitmap); 616 return BitNone; 617 } 618 619 while (start <= end) { 620 enum llbitmap_state c = llbitmap_read(llbitmap, start); 621 622 if (c < 0 || c >= BitStateCount) { 623 pr_err("%s: invalid bit %lu state %d action %d, forcing resync\n", 624 __func__, start, c, action); 625 state = BitNeedSync; 626 goto write_bitmap; 627 } 628 629 if (c == BitNeedSync) 630 need_resync = !mddev->degraded; 631 632 state = state_machine[c][action]; 633 634 write_bitmap: 635 if (unlikely(mddev->degraded)) { 636 /* For degraded array, mark new data as need sync. */ 637 if (state == BitDirty && 638 action == BitmapActionStartwrite) 639 state = BitNeedSync; 640 /* 641 * For degraded array, resync dirty data as well, noted 642 * if array is still degraded after resync is done, all 643 * new data will still be dirty until array is clean. 644 */ 645 else if (c == BitDirty && 646 action == BitmapActionStartsync) 647 state = BitSyncing; 648 } else if (c == BitUnwritten && state == BitDirty && 649 action == BitmapActionStartwrite && level_456) { 650 /* Delay raid456 initial recovery to first write. */ 651 state = BitNeedSync; 652 } 653 654 if (state == BitNone) { 655 start++; 656 continue; 657 } 658 659 llbitmap_write(llbitmap, state, start); 660 661 if (state == BitNeedSync) 662 need_resync = !mddev->degraded; 663 else if (state == BitDirty && 664 !timer_pending(&llbitmap->pending_timer)) 665 mod_timer(&llbitmap->pending_timer, 666 jiffies + mddev->bitmap_info.daemon_sleep * HZ); 667 668 start++; 669 } 670 671 if (need_resync && level_456) 672 need_recovery = true; 673 674 if (need_recovery) { 675 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 676 set_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery); 677 md_wakeup_thread(mddev->thread); 678 } else if (need_resync) { 679 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 680 set_bit(MD_RECOVERY_SYNC, &mddev->recovery); 681 md_wakeup_thread(mddev->thread); 682 } 683 684 return state; 685 } 686 687 static void llbitmap_raise_barrier(struct llbitmap *llbitmap, int page_idx) 688 { 689 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 690 691 retry: 692 if (likely(percpu_ref_tryget_live(&pctl->active))) { 693 WRITE_ONCE(pctl->expire, jiffies + llbitmap->barrier_idle * HZ); 694 return; 695 } 696 697 wait_event(pctl->wait, !percpu_ref_is_dying(&pctl->active)); 698 goto retry; 699 } 700 701 static void llbitmap_release_barrier(struct llbitmap *llbitmap, int page_idx) 702 { 703 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 704 705 percpu_ref_put(&pctl->active); 706 } 707 708 static int llbitmap_suspend_timeout(struct llbitmap *llbitmap, int page_idx) 709 { 710 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 711 712 percpu_ref_kill(&pctl->active); 713 714 if (!wait_event_timeout(pctl->wait, percpu_ref_is_zero(&pctl->active), 715 llbitmap->mddev->bitmap_info.daemon_sleep * HZ)) 716 return -ETIMEDOUT; 717 718 return 0; 719 } 720 721 static void llbitmap_resume(struct llbitmap *llbitmap, int page_idx) 722 { 723 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 724 725 pctl->expire = LONG_MAX; 726 percpu_ref_resurrect(&pctl->active); 727 wake_up(&pctl->wait); 728 } 729 730 static int llbitmap_check_support(struct mddev *mddev) 731 { 732 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 733 pr_notice("md/llbitmap: %s: array with journal cannot have bitmap\n", 734 mdname(mddev)); 735 return -EBUSY; 736 } 737 738 if (mddev->bitmap_info.space == 0) { 739 if (mddev->bitmap_info.default_space == 0) { 740 pr_notice("md/llbitmap: %s: no space for bitmap\n", 741 mdname(mddev)); 742 return -ENOSPC; 743 } 744 } 745 746 if (!mddev->persistent) { 747 pr_notice("md/llbitmap: %s: array must be persistent\n", 748 mdname(mddev)); 749 return -EOPNOTSUPP; 750 } 751 752 if (mddev->bitmap_info.file) { 753 pr_notice("md/llbitmap: %s: doesn't support bitmap file\n", 754 mdname(mddev)); 755 return -EOPNOTSUPP; 756 } 757 758 if (mddev->bitmap_info.external) { 759 pr_notice("md/llbitmap: %s: doesn't support external metadata\n", 760 mdname(mddev)); 761 return -EOPNOTSUPP; 762 } 763 764 if (mddev_is_dm(mddev)) { 765 pr_notice("md/llbitmap: %s: doesn't support dm-raid\n", 766 mdname(mddev)); 767 return -EOPNOTSUPP; 768 } 769 770 return 0; 771 } 772 773 static int llbitmap_init(struct llbitmap *llbitmap) 774 { 775 struct mddev *mddev = llbitmap->mddev; 776 sector_t blocks = mddev->resync_max_sectors; 777 unsigned long chunksize = MIN_CHUNK_SIZE; 778 unsigned long chunks = DIV_ROUND_UP(blocks, chunksize); 779 unsigned long space = mddev->bitmap_info.space << SECTOR_SHIFT; 780 int ret; 781 782 while (chunks > space) { 783 chunksize = chunksize << 1; 784 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); 785 } 786 787 llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE; 788 llbitmap->chunkshift = ffz(~chunksize); 789 llbitmap->chunksize = chunksize; 790 llbitmap->chunks = chunks; 791 mddev->bitmap_info.daemon_sleep = DEFAULT_DAEMON_SLEEP; 792 793 ret = llbitmap_cache_pages(llbitmap); 794 if (ret) 795 return ret; 796 797 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, 798 BitmapActionInit); 799 /* flush initial llbitmap to disk */ 800 __llbitmap_flush(mddev); 801 802 return 0; 803 } 804 805 static int llbitmap_read_sb(struct llbitmap *llbitmap) 806 { 807 struct mddev *mddev = llbitmap->mddev; 808 unsigned long daemon_sleep; 809 unsigned long chunksize; 810 unsigned long events; 811 struct page *sb_page; 812 bitmap_super_t *sb; 813 int ret = -EINVAL; 814 815 if (!mddev->bitmap_info.offset) { 816 pr_err("md/llbitmap: %s: no super block found", mdname(mddev)); 817 return -EINVAL; 818 } 819 820 sb_page = llbitmap_read_page(llbitmap, 0); 821 if (IS_ERR(sb_page)) { 822 pr_err("md/llbitmap: %s: read super block failed", 823 mdname(mddev)); 824 return -EIO; 825 } 826 827 sb = kmap_local_page(sb_page); 828 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) { 829 pr_err("md/llbitmap: %s: invalid super block magic number", 830 mdname(mddev)); 831 goto out_put_page; 832 } 833 834 if (sb->version != cpu_to_le32(BITMAP_MAJOR_LOCKLESS)) { 835 pr_err("md/llbitmap: %s: invalid super block version", 836 mdname(mddev)); 837 goto out_put_page; 838 } 839 840 if (memcmp(sb->uuid, mddev->uuid, 16)) { 841 pr_err("md/llbitmap: %s: bitmap superblock UUID mismatch\n", 842 mdname(mddev)); 843 goto out_put_page; 844 } 845 846 if (mddev->bitmap_info.space == 0) { 847 int room = le32_to_cpu(sb->sectors_reserved); 848 849 if (room) 850 mddev->bitmap_info.space = room; 851 else 852 mddev->bitmap_info.space = mddev->bitmap_info.default_space; 853 } 854 llbitmap->flags = le32_to_cpu(sb->state); 855 if (test_and_clear_bit(BITMAP_FIRST_USE, &llbitmap->flags)) { 856 ret = llbitmap_init(llbitmap); 857 goto out_put_page; 858 } 859 860 chunksize = le32_to_cpu(sb->chunksize); 861 if (!is_power_of_2(chunksize)) { 862 pr_err("md/llbitmap: %s: chunksize not a power of 2", 863 mdname(mddev)); 864 goto out_put_page; 865 } 866 867 if (chunksize < DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors, 868 mddev->bitmap_info.space << SECTOR_SHIFT)) { 869 pr_err("md/llbitmap: %s: chunksize too small %lu < %llu / %lu", 870 mdname(mddev), chunksize, mddev->resync_max_sectors, 871 mddev->bitmap_info.space); 872 goto out_put_page; 873 } 874 875 daemon_sleep = le32_to_cpu(sb->daemon_sleep); 876 if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ) { 877 pr_err("md/llbitmap: %s: daemon sleep %lu period out of range", 878 mdname(mddev), daemon_sleep); 879 goto out_put_page; 880 } 881 882 events = le64_to_cpu(sb->events); 883 if (events < mddev->events) { 884 pr_warn("md/llbitmap :%s: bitmap file is out of date (%lu < %llu) -- forcing full recovery", 885 mdname(mddev), events, mddev->events); 886 set_bit(BITMAP_STALE, &llbitmap->flags); 887 } 888 889 sb->sync_size = cpu_to_le64(mddev->resync_max_sectors); 890 mddev->bitmap_info.chunksize = chunksize; 891 mddev->bitmap_info.daemon_sleep = daemon_sleep; 892 893 llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE; 894 llbitmap->chunksize = chunksize; 895 llbitmap->chunks = DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors, chunksize); 896 llbitmap->chunkshift = ffz(~chunksize); 897 ret = llbitmap_cache_pages(llbitmap); 898 899 out_put_page: 900 __free_page(sb_page); 901 kunmap_local(sb); 902 return ret; 903 } 904 905 static void llbitmap_pending_timer_fn(struct timer_list *pending_timer) 906 { 907 struct llbitmap *llbitmap = 908 container_of(pending_timer, struct llbitmap, pending_timer); 909 910 if (work_busy(&llbitmap->daemon_work)) { 911 pr_warn("md/llbitmap: %s daemon_work not finished in %lu seconds\n", 912 mdname(llbitmap->mddev), 913 llbitmap->mddev->bitmap_info.daemon_sleep); 914 set_bit(BITMAP_DAEMON_BUSY, &llbitmap->flags); 915 return; 916 } 917 918 queue_work(md_llbitmap_io_wq, &llbitmap->daemon_work); 919 } 920 921 static void md_llbitmap_daemon_fn(struct work_struct *work) 922 { 923 struct llbitmap *llbitmap = 924 container_of(work, struct llbitmap, daemon_work); 925 unsigned long start; 926 unsigned long end; 927 bool restart; 928 int idx; 929 930 if (llbitmap->mddev->degraded) 931 return; 932 retry: 933 start = 0; 934 end = min(llbitmap->chunks, PAGE_SIZE - BITMAP_DATA_OFFSET) - 1; 935 restart = false; 936 937 for (idx = 0; idx < llbitmap->nr_pages; idx++) { 938 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; 939 940 if (idx > 0) { 941 start = end + 1; 942 end = min(end + PAGE_SIZE, llbitmap->chunks - 1); 943 } 944 945 if (!test_bit(LLPageFlush, &pctl->flags) && 946 time_before(jiffies, pctl->expire)) { 947 restart = true; 948 continue; 949 } 950 951 if (llbitmap_suspend_timeout(llbitmap, idx) < 0) { 952 pr_warn("md/llbitmap: %s: %s waiting for page %d timeout\n", 953 mdname(llbitmap->mddev), __func__, idx); 954 continue; 955 } 956 957 llbitmap_state_machine(llbitmap, start, end, BitmapActionDaemon); 958 llbitmap_resume(llbitmap, idx); 959 } 960 961 /* 962 * If the daemon took a long time to finish, retry to prevent missing 963 * clearing dirty bits. 964 */ 965 if (test_and_clear_bit(BITMAP_DAEMON_BUSY, &llbitmap->flags)) 966 goto retry; 967 968 /* If some page is dirty but not expired, setup timer again */ 969 if (restart) 970 mod_timer(&llbitmap->pending_timer, 971 jiffies + llbitmap->mddev->bitmap_info.daemon_sleep * HZ); 972 } 973 974 static int llbitmap_create(struct mddev *mddev) 975 { 976 struct llbitmap *llbitmap; 977 int ret; 978 979 ret = llbitmap_check_support(mddev); 980 if (ret) 981 return ret; 982 983 llbitmap = kzalloc(sizeof(*llbitmap), GFP_KERNEL); 984 if (!llbitmap) 985 return -ENOMEM; 986 987 llbitmap->mddev = mddev; 988 llbitmap->io_size = bdev_logical_block_size(mddev->gendisk->part0); 989 llbitmap->blocks_per_page = PAGE_SIZE / llbitmap->io_size; 990 991 timer_setup(&llbitmap->pending_timer, llbitmap_pending_timer_fn, 0); 992 INIT_WORK(&llbitmap->daemon_work, md_llbitmap_daemon_fn); 993 atomic_set(&llbitmap->behind_writes, 0); 994 init_waitqueue_head(&llbitmap->behind_wait); 995 996 mutex_lock(&mddev->bitmap_info.mutex); 997 mddev->bitmap = llbitmap; 998 ret = llbitmap_read_sb(llbitmap); 999 mutex_unlock(&mddev->bitmap_info.mutex); 1000 if (ret) { 1001 kfree(llbitmap); 1002 mddev->bitmap = NULL; 1003 } 1004 1005 return ret; 1006 } 1007 1008 static int llbitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize) 1009 { 1010 struct llbitmap *llbitmap = mddev->bitmap; 1011 unsigned long chunks; 1012 1013 if (chunksize == 0) 1014 chunksize = llbitmap->chunksize; 1015 1016 /* If there is enough space, leave the chunksize unchanged. */ 1017 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); 1018 while (chunks > mddev->bitmap_info.space << SECTOR_SHIFT) { 1019 chunksize = chunksize << 1; 1020 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); 1021 } 1022 1023 llbitmap->chunkshift = ffz(~chunksize); 1024 llbitmap->chunksize = chunksize; 1025 llbitmap->chunks = chunks; 1026 1027 return 0; 1028 } 1029 1030 static int llbitmap_load(struct mddev *mddev) 1031 { 1032 enum llbitmap_action action = BitmapActionReload; 1033 struct llbitmap *llbitmap = mddev->bitmap; 1034 1035 if (test_and_clear_bit(BITMAP_STALE, &llbitmap->flags)) 1036 action = BitmapActionStale; 1037 1038 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, action); 1039 return 0; 1040 } 1041 1042 static void llbitmap_destroy(struct mddev *mddev) 1043 { 1044 struct llbitmap *llbitmap = mddev->bitmap; 1045 1046 if (!llbitmap) 1047 return; 1048 1049 mutex_lock(&mddev->bitmap_info.mutex); 1050 1051 timer_delete_sync(&llbitmap->pending_timer); 1052 flush_workqueue(md_llbitmap_io_wq); 1053 flush_workqueue(md_llbitmap_unplug_wq); 1054 1055 mddev->bitmap = NULL; 1056 llbitmap_free_pages(llbitmap); 1057 kfree(llbitmap); 1058 mutex_unlock(&mddev->bitmap_info.mutex); 1059 } 1060 1061 static void llbitmap_start_write(struct mddev *mddev, sector_t offset, 1062 unsigned long sectors) 1063 { 1064 struct llbitmap *llbitmap = mddev->bitmap; 1065 unsigned long start = offset >> llbitmap->chunkshift; 1066 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1067 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1068 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1069 1070 llbitmap_state_machine(llbitmap, start, end, BitmapActionStartwrite); 1071 1072 while (page_start <= page_end) { 1073 llbitmap_raise_barrier(llbitmap, page_start); 1074 page_start++; 1075 } 1076 } 1077 1078 static void llbitmap_end_write(struct mddev *mddev, sector_t offset, 1079 unsigned long sectors) 1080 { 1081 struct llbitmap *llbitmap = mddev->bitmap; 1082 unsigned long start = offset >> llbitmap->chunkshift; 1083 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1084 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1085 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1086 1087 while (page_start <= page_end) { 1088 llbitmap_release_barrier(llbitmap, page_start); 1089 page_start++; 1090 } 1091 } 1092 1093 static void llbitmap_start_discard(struct mddev *mddev, sector_t offset, 1094 unsigned long sectors) 1095 { 1096 struct llbitmap *llbitmap = mddev->bitmap; 1097 unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize); 1098 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1099 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1100 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1101 1102 llbitmap_state_machine(llbitmap, start, end, BitmapActionDiscard); 1103 1104 while (page_start <= page_end) { 1105 llbitmap_raise_barrier(llbitmap, page_start); 1106 page_start++; 1107 } 1108 } 1109 1110 static void llbitmap_end_discard(struct mddev *mddev, sector_t offset, 1111 unsigned long sectors) 1112 { 1113 struct llbitmap *llbitmap = mddev->bitmap; 1114 unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize); 1115 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1116 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1117 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1118 1119 while (page_start <= page_end) { 1120 llbitmap_release_barrier(llbitmap, page_start); 1121 page_start++; 1122 } 1123 } 1124 1125 static void llbitmap_unplug_fn(struct work_struct *work) 1126 { 1127 struct llbitmap_unplug_work *unplug_work = 1128 container_of(work, struct llbitmap_unplug_work, work); 1129 struct llbitmap *llbitmap = unplug_work->llbitmap; 1130 struct blk_plug plug; 1131 int i; 1132 1133 blk_start_plug(&plug); 1134 1135 for (i = 0; i < llbitmap->nr_pages; i++) { 1136 if (!test_bit(LLPageDirty, &llbitmap->pctl[i]->flags) || 1137 !test_and_clear_bit(LLPageDirty, &llbitmap->pctl[i]->flags)) 1138 continue; 1139 1140 llbitmap_write_page(llbitmap, i); 1141 } 1142 1143 blk_finish_plug(&plug); 1144 md_super_wait(llbitmap->mddev); 1145 complete(unplug_work->done); 1146 } 1147 1148 static bool llbitmap_dirty(struct llbitmap *llbitmap) 1149 { 1150 int i; 1151 1152 for (i = 0; i < llbitmap->nr_pages; i++) 1153 if (test_bit(LLPageDirty, &llbitmap->pctl[i]->flags)) 1154 return true; 1155 1156 return false; 1157 } 1158 1159 static void llbitmap_unplug(struct mddev *mddev, bool sync) 1160 { 1161 DECLARE_COMPLETION_ONSTACK(done); 1162 struct llbitmap *llbitmap = mddev->bitmap; 1163 struct llbitmap_unplug_work unplug_work = { 1164 .llbitmap = llbitmap, 1165 .done = &done, 1166 }; 1167 1168 if (!llbitmap_dirty(llbitmap)) 1169 return; 1170 1171 /* 1172 * Issue new bitmap IO under submit_bio() context will deadlock: 1173 * - the bio will wait for bitmap bio to be done, before it can be 1174 * issued; 1175 * - bitmap bio will be added to current->bio_list and wait for this 1176 * bio to be issued; 1177 */ 1178 INIT_WORK_ONSTACK(&unplug_work.work, llbitmap_unplug_fn); 1179 queue_work(md_llbitmap_unplug_wq, &unplug_work.work); 1180 wait_for_completion(&done); 1181 destroy_work_on_stack(&unplug_work.work); 1182 } 1183 1184 /* 1185 * Force to write all bitmap pages to disk, called when stopping the array, or 1186 * every daemon_sleep seconds when sync_thread is running. 1187 */ 1188 static void __llbitmap_flush(struct mddev *mddev) 1189 { 1190 struct llbitmap *llbitmap = mddev->bitmap; 1191 struct blk_plug plug; 1192 int i; 1193 1194 blk_start_plug(&plug); 1195 for (i = 0; i < llbitmap->nr_pages; i++) { 1196 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 1197 1198 /* mark all blocks as dirty */ 1199 set_bit(LLPageDirty, &pctl->flags); 1200 bitmap_fill(pctl->dirty, llbitmap->blocks_per_page); 1201 llbitmap_write_page(llbitmap, i); 1202 } 1203 blk_finish_plug(&plug); 1204 md_super_wait(llbitmap->mddev); 1205 } 1206 1207 static void llbitmap_flush(struct mddev *mddev) 1208 { 1209 struct llbitmap *llbitmap = mddev->bitmap; 1210 int i; 1211 1212 for (i = 0; i < llbitmap->nr_pages; i++) 1213 set_bit(LLPageFlush, &llbitmap->pctl[i]->flags); 1214 1215 timer_delete_sync(&llbitmap->pending_timer); 1216 queue_work(md_llbitmap_io_wq, &llbitmap->daemon_work); 1217 flush_work(&llbitmap->daemon_work); 1218 1219 __llbitmap_flush(mddev); 1220 } 1221 1222 /* This is used for raid5 lazy initial recovery */ 1223 static bool llbitmap_blocks_synced(struct mddev *mddev, sector_t offset) 1224 { 1225 struct llbitmap *llbitmap = mddev->bitmap; 1226 unsigned long p = offset >> llbitmap->chunkshift; 1227 enum llbitmap_state c = llbitmap_read(llbitmap, p); 1228 1229 return c == BitClean || c == BitDirty; 1230 } 1231 1232 static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset) 1233 { 1234 struct llbitmap *llbitmap = mddev->bitmap; 1235 unsigned long p = offset >> llbitmap->chunkshift; 1236 int blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); 1237 enum llbitmap_state c = llbitmap_read(llbitmap, p); 1238 1239 /* always skip unwritten blocks */ 1240 if (c == BitUnwritten) 1241 return blocks; 1242 1243 /* For degraded array, don't skip */ 1244 if (mddev->degraded) 1245 return 0; 1246 1247 /* For resync also skip clean/dirty blocks */ 1248 if ((c == BitClean || c == BitDirty) && 1249 test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && 1250 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) 1251 return blocks; 1252 1253 return 0; 1254 } 1255 1256 static bool llbitmap_start_sync(struct mddev *mddev, sector_t offset, 1257 sector_t *blocks, bool degraded) 1258 { 1259 struct llbitmap *llbitmap = mddev->bitmap; 1260 unsigned long p = offset >> llbitmap->chunkshift; 1261 1262 /* 1263 * Handle one bit at a time, this is much simpler. And it doesn't matter 1264 * if md_do_sync() loop more times. 1265 */ 1266 *blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); 1267 return llbitmap_state_machine(llbitmap, p, p, 1268 BitmapActionStartsync) == BitSyncing; 1269 } 1270 1271 /* Something is wrong, sync_thread stop at @offset */ 1272 static void llbitmap_end_sync(struct mddev *mddev, sector_t offset, 1273 sector_t *blocks) 1274 { 1275 struct llbitmap *llbitmap = mddev->bitmap; 1276 unsigned long p = offset >> llbitmap->chunkshift; 1277 1278 *blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); 1279 llbitmap_state_machine(llbitmap, p, llbitmap->chunks - 1, 1280 BitmapActionAbortsync); 1281 } 1282 1283 /* A full sync_thread is finished */ 1284 static void llbitmap_close_sync(struct mddev *mddev) 1285 { 1286 struct llbitmap *llbitmap = mddev->bitmap; 1287 int i; 1288 1289 for (i = 0; i < llbitmap->nr_pages; i++) { 1290 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 1291 1292 /* let daemon_fn clear dirty bits immediately */ 1293 WRITE_ONCE(pctl->expire, jiffies); 1294 } 1295 1296 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, 1297 BitmapActionEndsync); 1298 } 1299 1300 /* 1301 * sync_thread have reached @sector, update metadata every daemon_sleep seconds, 1302 * just in case sync_thread have to restart after power failure. 1303 */ 1304 static void llbitmap_cond_end_sync(struct mddev *mddev, sector_t sector, 1305 bool force) 1306 { 1307 struct llbitmap *llbitmap = mddev->bitmap; 1308 1309 if (sector == 0) { 1310 llbitmap->last_end_sync = jiffies; 1311 return; 1312 } 1313 1314 if (time_before(jiffies, llbitmap->last_end_sync + 1315 HZ * mddev->bitmap_info.daemon_sleep)) 1316 return; 1317 1318 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); 1319 1320 mddev->curr_resync_completed = sector; 1321 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); 1322 llbitmap_state_machine(llbitmap, 0, sector >> llbitmap->chunkshift, 1323 BitmapActionEndsync); 1324 __llbitmap_flush(mddev); 1325 1326 llbitmap->last_end_sync = jiffies; 1327 sysfs_notify_dirent_safe(mddev->sysfs_completed); 1328 } 1329 1330 static bool llbitmap_enabled(void *data, bool flush) 1331 { 1332 struct llbitmap *llbitmap = data; 1333 1334 return llbitmap && !test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags); 1335 } 1336 1337 static void llbitmap_dirty_bits(struct mddev *mddev, unsigned long s, 1338 unsigned long e) 1339 { 1340 llbitmap_state_machine(mddev->bitmap, s, e, BitmapActionStartwrite); 1341 } 1342 1343 static void llbitmap_write_sb(struct llbitmap *llbitmap) 1344 { 1345 int nr_blocks = DIV_ROUND_UP(BITMAP_DATA_OFFSET, llbitmap->io_size); 1346 1347 bitmap_fill(llbitmap->pctl[0]->dirty, nr_blocks); 1348 llbitmap_write_page(llbitmap, 0); 1349 md_super_wait(llbitmap->mddev); 1350 } 1351 1352 static void llbitmap_update_sb(void *data) 1353 { 1354 struct llbitmap *llbitmap = data; 1355 struct mddev *mddev = llbitmap->mddev; 1356 struct page *sb_page; 1357 bitmap_super_t *sb; 1358 1359 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) 1360 return; 1361 1362 sb_page = llbitmap_read_page(llbitmap, 0); 1363 if (IS_ERR(sb_page)) { 1364 pr_err("%s: %s: read super block failed", __func__, 1365 mdname(mddev)); 1366 set_bit(BITMAP_WRITE_ERROR, &llbitmap->flags); 1367 return; 1368 } 1369 1370 if (mddev->events < llbitmap->events_cleared) 1371 llbitmap->events_cleared = mddev->events; 1372 1373 sb = kmap_local_page(sb_page); 1374 sb->events = cpu_to_le64(mddev->events); 1375 sb->state = cpu_to_le32(llbitmap->flags); 1376 sb->chunksize = cpu_to_le32(llbitmap->chunksize); 1377 sb->sync_size = cpu_to_le64(mddev->resync_max_sectors); 1378 sb->events_cleared = cpu_to_le64(llbitmap->events_cleared); 1379 sb->sectors_reserved = cpu_to_le32(mddev->bitmap_info.space); 1380 sb->daemon_sleep = cpu_to_le32(mddev->bitmap_info.daemon_sleep); 1381 1382 kunmap_local(sb); 1383 llbitmap_write_sb(llbitmap); 1384 } 1385 1386 static int llbitmap_get_stats(void *data, struct md_bitmap_stats *stats) 1387 { 1388 struct llbitmap *llbitmap = data; 1389 1390 memset(stats, 0, sizeof(*stats)); 1391 1392 stats->missing_pages = 0; 1393 stats->pages = llbitmap->nr_pages; 1394 stats->file_pages = llbitmap->nr_pages; 1395 1396 stats->behind_writes = atomic_read(&llbitmap->behind_writes); 1397 stats->behind_wait = wq_has_sleeper(&llbitmap->behind_wait); 1398 stats->events_cleared = llbitmap->events_cleared; 1399 1400 return 0; 1401 } 1402 1403 /* just flag all pages as needing to be written */ 1404 static void llbitmap_write_all(struct mddev *mddev) 1405 { 1406 int i; 1407 struct llbitmap *llbitmap = mddev->bitmap; 1408 1409 for (i = 0; i < llbitmap->nr_pages; i++) { 1410 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 1411 1412 set_bit(LLPageDirty, &pctl->flags); 1413 bitmap_fill(pctl->dirty, llbitmap->blocks_per_page); 1414 } 1415 } 1416 1417 static void llbitmap_start_behind_write(struct mddev *mddev) 1418 { 1419 struct llbitmap *llbitmap = mddev->bitmap; 1420 1421 atomic_inc(&llbitmap->behind_writes); 1422 } 1423 1424 static void llbitmap_end_behind_write(struct mddev *mddev) 1425 { 1426 struct llbitmap *llbitmap = mddev->bitmap; 1427 1428 if (atomic_dec_and_test(&llbitmap->behind_writes)) 1429 wake_up(&llbitmap->behind_wait); 1430 } 1431 1432 static void llbitmap_wait_behind_writes(struct mddev *mddev) 1433 { 1434 struct llbitmap *llbitmap = mddev->bitmap; 1435 1436 if (!llbitmap) 1437 return; 1438 1439 wait_event(llbitmap->behind_wait, 1440 atomic_read(&llbitmap->behind_writes) == 0); 1441 1442 } 1443 1444 static ssize_t bits_show(struct mddev *mddev, char *page) 1445 { 1446 struct llbitmap *llbitmap; 1447 int bits[BitStateCount] = {0}; 1448 loff_t start = 0; 1449 1450 mutex_lock(&mddev->bitmap_info.mutex); 1451 llbitmap = mddev->bitmap; 1452 if (!llbitmap || !llbitmap->pctl) { 1453 mutex_unlock(&mddev->bitmap_info.mutex); 1454 return sprintf(page, "no bitmap\n"); 1455 } 1456 1457 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) { 1458 mutex_unlock(&mddev->bitmap_info.mutex); 1459 return sprintf(page, "bitmap io error\n"); 1460 } 1461 1462 while (start < llbitmap->chunks) { 1463 enum llbitmap_state c = llbitmap_read(llbitmap, start); 1464 1465 if (c < 0 || c >= BitStateCount) 1466 pr_err("%s: invalid bit %llu state %d\n", 1467 __func__, start, c); 1468 else 1469 bits[c]++; 1470 start++; 1471 } 1472 1473 mutex_unlock(&mddev->bitmap_info.mutex); 1474 return sprintf(page, "unwritten %d\nclean %d\ndirty %d\nneed sync %d\nsyncing %d\n", 1475 bits[BitUnwritten], bits[BitClean], bits[BitDirty], 1476 bits[BitNeedSync], bits[BitSyncing]); 1477 } 1478 1479 static struct md_sysfs_entry llbitmap_bits = __ATTR_RO(bits); 1480 1481 static ssize_t metadata_show(struct mddev *mddev, char *page) 1482 { 1483 struct llbitmap *llbitmap; 1484 ssize_t ret; 1485 1486 mutex_lock(&mddev->bitmap_info.mutex); 1487 llbitmap = mddev->bitmap; 1488 if (!llbitmap) { 1489 mutex_unlock(&mddev->bitmap_info.mutex); 1490 return sprintf(page, "no bitmap\n"); 1491 } 1492 1493 ret = sprintf(page, "chunksize %lu\nchunkshift %lu\nchunks %lu\noffset %llu\ndaemon_sleep %lu\n", 1494 llbitmap->chunksize, llbitmap->chunkshift, 1495 llbitmap->chunks, mddev->bitmap_info.offset, 1496 llbitmap->mddev->bitmap_info.daemon_sleep); 1497 mutex_unlock(&mddev->bitmap_info.mutex); 1498 1499 return ret; 1500 } 1501 1502 static struct md_sysfs_entry llbitmap_metadata = __ATTR_RO(metadata); 1503 1504 static ssize_t 1505 daemon_sleep_show(struct mddev *mddev, char *page) 1506 { 1507 return sprintf(page, "%lu\n", mddev->bitmap_info.daemon_sleep); 1508 } 1509 1510 static ssize_t 1511 daemon_sleep_store(struct mddev *mddev, const char *buf, size_t len) 1512 { 1513 unsigned long timeout; 1514 int rv = kstrtoul(buf, 10, &timeout); 1515 1516 if (rv) 1517 return rv; 1518 1519 mddev->bitmap_info.daemon_sleep = timeout; 1520 return len; 1521 } 1522 1523 static struct md_sysfs_entry llbitmap_daemon_sleep = __ATTR_RW(daemon_sleep); 1524 1525 static ssize_t 1526 barrier_idle_show(struct mddev *mddev, char *page) 1527 { 1528 struct llbitmap *llbitmap = mddev->bitmap; 1529 1530 return sprintf(page, "%lu\n", llbitmap->barrier_idle); 1531 } 1532 1533 static ssize_t 1534 barrier_idle_store(struct mddev *mddev, const char *buf, size_t len) 1535 { 1536 struct llbitmap *llbitmap = mddev->bitmap; 1537 unsigned long timeout; 1538 int rv = kstrtoul(buf, 10, &timeout); 1539 1540 if (rv) 1541 return rv; 1542 1543 llbitmap->barrier_idle = timeout; 1544 return len; 1545 } 1546 1547 static struct md_sysfs_entry llbitmap_barrier_idle = __ATTR_RW(barrier_idle); 1548 1549 static struct attribute *md_llbitmap_attrs[] = { 1550 &llbitmap_bits.attr, 1551 &llbitmap_metadata.attr, 1552 &llbitmap_daemon_sleep.attr, 1553 &llbitmap_barrier_idle.attr, 1554 NULL 1555 }; 1556 1557 static struct attribute_group md_llbitmap_group = { 1558 .name = "llbitmap", 1559 .attrs = md_llbitmap_attrs, 1560 }; 1561 1562 static struct bitmap_operations llbitmap_ops = { 1563 .head = { 1564 .type = MD_BITMAP, 1565 .id = ID_LLBITMAP, 1566 .name = "llbitmap", 1567 }, 1568 1569 .enabled = llbitmap_enabled, 1570 .create = llbitmap_create, 1571 .resize = llbitmap_resize, 1572 .load = llbitmap_load, 1573 .destroy = llbitmap_destroy, 1574 1575 .start_write = llbitmap_start_write, 1576 .end_write = llbitmap_end_write, 1577 .start_discard = llbitmap_start_discard, 1578 .end_discard = llbitmap_end_discard, 1579 .unplug = llbitmap_unplug, 1580 .flush = llbitmap_flush, 1581 1582 .start_behind_write = llbitmap_start_behind_write, 1583 .end_behind_write = llbitmap_end_behind_write, 1584 .wait_behind_writes = llbitmap_wait_behind_writes, 1585 1586 .blocks_synced = llbitmap_blocks_synced, 1587 .skip_sync_blocks = llbitmap_skip_sync_blocks, 1588 .start_sync = llbitmap_start_sync, 1589 .end_sync = llbitmap_end_sync, 1590 .close_sync = llbitmap_close_sync, 1591 .cond_end_sync = llbitmap_cond_end_sync, 1592 1593 .update_sb = llbitmap_update_sb, 1594 .get_stats = llbitmap_get_stats, 1595 .dirty_bits = llbitmap_dirty_bits, 1596 .write_all = llbitmap_write_all, 1597 1598 .group = &md_llbitmap_group, 1599 }; 1600 1601 int md_llbitmap_init(void) 1602 { 1603 md_llbitmap_io_wq = alloc_workqueue("md_llbitmap_io", 1604 WQ_MEM_RECLAIM | WQ_UNBOUND, 0); 1605 if (!md_llbitmap_io_wq) 1606 return -ENOMEM; 1607 1608 md_llbitmap_unplug_wq = alloc_workqueue("md_llbitmap_unplug", 1609 WQ_MEM_RECLAIM | WQ_UNBOUND, 0); 1610 if (!md_llbitmap_unplug_wq) { 1611 destroy_workqueue(md_llbitmap_io_wq); 1612 md_llbitmap_io_wq = NULL; 1613 return -ENOMEM; 1614 } 1615 1616 return register_md_submodule(&llbitmap_ops.head); 1617 } 1618 1619 void md_llbitmap_exit(void) 1620 { 1621 destroy_workqueue(md_llbitmap_io_wq); 1622 md_llbitmap_io_wq = NULL; 1623 destroy_workqueue(md_llbitmap_unplug_wq); 1624 md_llbitmap_unplug_wq = NULL; 1625 unregister_md_submodule(&llbitmap_ops.head); 1626 } 1627