1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 #include <linux/blkdev.h> 4 #include <linux/module.h> 5 #include <linux/errno.h> 6 #include <linux/slab.h> 7 #include <linux/init.h> 8 #include <linux/timer.h> 9 #include <linux/sched.h> 10 #include <linux/list.h> 11 #include <linux/file.h> 12 #include <linux/seq_file.h> 13 #include <trace/events/block.h> 14 15 #include "md.h" 16 #include "md-bitmap.h" 17 18 /* 19 * #### Background 20 * 21 * Redundant data is used to enhance data fault tolerance, and the storage 22 * methods for redundant data vary depending on the RAID levels. And it's 23 * important to maintain the consistency of redundant data. 24 * 25 * Bitmap is used to record which data blocks have been synchronized and which 26 * ones need to be resynchronized or recovered. Each bit in the bitmap 27 * represents a segment of data in the array. When a bit is set, it indicates 28 * that the multiple redundant copies of that data segment may not be 29 * consistent. Data synchronization can be performed based on the bitmap after 30 * power failure or readding a disk. If there is no bitmap, a full disk 31 * synchronization is required. 32 * 33 * #### Key Features 34 * 35 * - IO fastpath is lockless, if user issues lots of write IO to the same 36 * bitmap bit in a short time, only the first write has additional overhead 37 * to update bitmap bit, no additional overhead for the following writes; 38 * - support only resync or recover written data, means in the case creating 39 * new array or replacing with a new disk, there is no need to do a full disk 40 * resync/recovery; 41 * 42 * #### Key Concept 43 * 44 * ##### State Machine 45 * 46 * Each bit is one byte, contain 6 different states, see llbitmap_state. And 47 * there are total 8 different actions, see llbitmap_action, can change state: 48 * 49 * llbitmap state machine: transitions between states 50 * 51 * | | Startwrite | Startsync | Endsync | Abortsync| 52 * | --------- | ---------- | --------- | ------- | ------- | 53 * | Unwritten | Dirty | x | x | x | 54 * | Clean | Dirty | x | x | x | 55 * | Dirty | x | x | x | x | 56 * | NeedSync | x | Syncing | x | x | 57 * | Syncing | x | Syncing | Dirty | NeedSync | 58 * 59 * | | Reload | Daemon | Discard | Stale | 60 * | --------- | -------- | ------ | --------- | --------- | 61 * | Unwritten | x | x | x | x | 62 * | Clean | x | x | Unwritten | NeedSync | 63 * | Dirty | NeedSync | Clean | Unwritten | NeedSync | 64 * | NeedSync | x | x | Unwritten | x | 65 * | Syncing | NeedSync | x | Unwritten | NeedSync | 66 * 67 * Typical scenarios: 68 * 69 * 1) Create new array 70 * All bits will be set to Unwritten by default, if --assume-clean is set, 71 * all bits will be set to Clean instead. 72 * 73 * 2) write data, raid1/raid10 have full copy of data, while raid456 doesn't and 74 * rely on xor data 75 * 76 * 2.1) write new data to raid1/raid10: 77 * Unwritten --StartWrite--> Dirty 78 * 79 * 2.2) write new data to raid456: 80 * Unwritten --StartWrite--> NeedSync 81 * 82 * Because the initial recover for raid456 is skipped, the xor data is not built 83 * yet, the bit must be set to NeedSync first and after lazy initial recover is 84 * finished, the bit will finally set to Dirty(see 5.1 and 5.4); 85 * 86 * 2.3) cover write 87 * Clean --StartWrite--> Dirty 88 * 89 * 3) daemon, if the array is not degraded: 90 * Dirty --Daemon--> Clean 91 * 92 * 4) discard 93 * {Clean, Dirty, NeedSync, Syncing} --Discard--> Unwritten 94 * 95 * 5) resync and recover 96 * 97 * 5.1) common process 98 * NeedSync --Startsync--> Syncing --Endsync--> Dirty --Daemon--> Clean 99 * 100 * 5.2) resync after power failure 101 * Dirty --Reload--> NeedSync 102 * 103 * 5.3) recover while replacing with a new disk 104 * By default, the old bitmap framework will recover all data, and llbitmap 105 * implements this by a new helper, see llbitmap_skip_sync_blocks: 106 * 107 * skip recover for bits other than dirty or clean; 108 * 109 * 5.4) lazy initial recover for raid5: 110 * By default, the old bitmap framework will only allow new recover when there 111 * are spares(new disk), a new recovery flag MD_RECOVERY_LAZY_RECOVER is added 112 * to perform raid456 lazy recover for set bits(from 2.2). 113 * 114 * 6. special handling for degraded array: 115 * 116 * - Dirty bits will never be cleared, daemon will just do nothing, so that if 117 * a disk is readded, Clean bits can be skipped with recovery; 118 * - Dirty bits will convert to Syncing from start write, to do data recovery 119 * for new added disks; 120 * - New write will convert bits to NeedSync directly; 121 * 122 * ##### Bitmap IO 123 * 124 * ##### Chunksize 125 * 126 * The default bitmap size is 128k, incluing 1k bitmap super block, and 127 * the default size of segment of data in the array each bit(chunksize) is 64k, 128 * and chunksize will adjust to twice the old size each time if the total number 129 * bits is not less than 127k.(see llbitmap_init) 130 * 131 * ##### READ 132 * 133 * While creating bitmap, all pages will be allocated and read for llbitmap, 134 * there won't be read afterwards 135 * 136 * ##### WRITE 137 * 138 * WRITE IO is divided into logical_block_size of the array, the dirty state 139 * of each block is tracked independently, for example: 140 * 141 * each page is 4k, contain 8 blocks; each block is 512 bytes contain 512 bit; 142 * 143 * | page0 | page1 | ... | page 31 | 144 * | | 145 * | \-----------------------\ 146 * | | 147 * | block0 | block1 | ... | block 8| 148 * | | 149 * | \-----------------\ 150 * | | 151 * | bit0 | bit1 | ... | bit511 | 152 * 153 * From IO path, if one bit is changed to Dirty or NeedSync, the corresponding 154 * subpage will be marked dirty, such block must write first before the IO is 155 * issued. This behaviour will affect IO performance, to reduce the impact, if 156 * multiple bits are changed in the same block in a short time, all bits in this 157 * block will be changed to Dirty/NeedSync, so that there won't be any overhead 158 * until daemon clears dirty bits. 159 * 160 * ##### Dirty Bits synchronization 161 * 162 * IO fast path will set bits to dirty, and those dirty bits will be cleared 163 * by daemon after IO is done. llbitmap_page_ctl is used to synchronize between 164 * IO path and daemon; 165 * 166 * IO path: 167 * 1) try to grab a reference, if succeed, set expire time after 5s and return; 168 * 2) if failed to grab a reference, wait for daemon to finish clearing dirty 169 * bits; 170 * 171 * Daemon (Daemon will be woken up every daemon_sleep seconds): 172 * For each page: 173 * 1) check if page expired, if not skip this page; for expired page: 174 * 2) suspend the page and wait for inflight write IO to be done; 175 * 3) change dirty page to clean; 176 * 4) resume the page; 177 */ 178 179 #define BITMAP_DATA_OFFSET 1024 180 181 /* 64k is the max IO size of sync IO for raid1/raid10 */ 182 #define MIN_CHUNK_SIZE (64 * 2) 183 184 /* By default, daemon will be woken up every 30s */ 185 #define DEFAULT_DAEMON_SLEEP 30 186 187 /* 188 * Dirtied bits that have not been accessed for more than 5s will be cleared 189 * by daemon. 190 */ 191 #define DEFAULT_BARRIER_IDLE 5 192 193 enum llbitmap_state { 194 /* No valid data, init state after assemble the array */ 195 BitUnwritten = 0, 196 /* data is consistent */ 197 BitClean, 198 /* data will be consistent after IO is done, set directly for writes */ 199 BitDirty, 200 /* 201 * data need to be resynchronized: 202 * 1) set directly for writes if array is degraded, prevent full disk 203 * synchronization after readding a disk; 204 * 2) reassemble the array after power failure, and dirty bits are 205 * found after reloading the bitmap; 206 * 3) set for first write for raid5, to build initial xor data lazily 207 */ 208 BitNeedSync, 209 /* data is synchronizing */ 210 BitSyncing, 211 BitStateCount, 212 BitNone = 0xff, 213 }; 214 215 enum llbitmap_action { 216 /* User write new data, this is the only action from IO fast path */ 217 BitmapActionStartwrite = 0, 218 /* Start recovery */ 219 BitmapActionStartsync, 220 /* Finish recovery */ 221 BitmapActionEndsync, 222 /* Failed recovery */ 223 BitmapActionAbortsync, 224 /* Reassemble the array */ 225 BitmapActionReload, 226 /* Daemon thread is trying to clear dirty bits */ 227 BitmapActionDaemon, 228 /* Data is deleted */ 229 BitmapActionDiscard, 230 /* 231 * Bitmap is stale, mark all bits in addition to BitUnwritten to 232 * BitNeedSync. 233 */ 234 BitmapActionStale, 235 BitmapActionCount, 236 /* Init state is BitUnwritten */ 237 BitmapActionInit, 238 }; 239 240 enum llbitmap_page_state { 241 LLPageFlush = 0, 242 LLPageDirty, 243 }; 244 245 struct llbitmap_page_ctl { 246 char *state; 247 struct page *page; 248 unsigned long expire; 249 unsigned long flags; 250 wait_queue_head_t wait; 251 struct percpu_ref active; 252 /* Per block size dirty state, maximum 64k page / 1 sector = 128 */ 253 unsigned long dirty[]; 254 }; 255 256 struct llbitmap { 257 struct mddev *mddev; 258 struct llbitmap_page_ctl **pctl; 259 260 unsigned int nr_pages; 261 unsigned int io_size; 262 unsigned int blocks_per_page; 263 264 /* shift of one chunk */ 265 unsigned long chunkshift; 266 /* size of one chunk in sector */ 267 unsigned long chunksize; 268 /* total number of chunks */ 269 unsigned long chunks; 270 unsigned long last_end_sync; 271 /* 272 * time in seconds that dirty bits will be cleared if the page is not 273 * accessed. 274 */ 275 unsigned long barrier_idle; 276 /* fires on first BitDirty state */ 277 struct timer_list pending_timer; 278 struct work_struct daemon_work; 279 280 unsigned long flags; 281 __u64 events_cleared; 282 283 /* for slow disks */ 284 atomic_t behind_writes; 285 wait_queue_head_t behind_wait; 286 }; 287 288 struct llbitmap_unplug_work { 289 struct work_struct work; 290 struct llbitmap *llbitmap; 291 struct completion *done; 292 }; 293 294 static struct workqueue_struct *md_llbitmap_io_wq; 295 static struct workqueue_struct *md_llbitmap_unplug_wq; 296 297 static char state_machine[BitStateCount][BitmapActionCount] = { 298 [BitUnwritten] = { 299 [BitmapActionStartwrite] = BitDirty, 300 [BitmapActionStartsync] = BitNone, 301 [BitmapActionEndsync] = BitNone, 302 [BitmapActionAbortsync] = BitNone, 303 [BitmapActionReload] = BitNone, 304 [BitmapActionDaemon] = BitNone, 305 [BitmapActionDiscard] = BitNone, 306 [BitmapActionStale] = BitNone, 307 }, 308 [BitClean] = { 309 [BitmapActionStartwrite] = BitDirty, 310 [BitmapActionStartsync] = BitNone, 311 [BitmapActionEndsync] = BitNone, 312 [BitmapActionAbortsync] = BitNone, 313 [BitmapActionReload] = BitNone, 314 [BitmapActionDaemon] = BitNone, 315 [BitmapActionDiscard] = BitUnwritten, 316 [BitmapActionStale] = BitNeedSync, 317 }, 318 [BitDirty] = { 319 [BitmapActionStartwrite] = BitNone, 320 [BitmapActionStartsync] = BitNone, 321 [BitmapActionEndsync] = BitNone, 322 [BitmapActionAbortsync] = BitNone, 323 [BitmapActionReload] = BitNeedSync, 324 [BitmapActionDaemon] = BitClean, 325 [BitmapActionDiscard] = BitUnwritten, 326 [BitmapActionStale] = BitNeedSync, 327 }, 328 [BitNeedSync] = { 329 [BitmapActionStartwrite] = BitNone, 330 [BitmapActionStartsync] = BitSyncing, 331 [BitmapActionEndsync] = BitNone, 332 [BitmapActionAbortsync] = BitNone, 333 [BitmapActionReload] = BitNone, 334 [BitmapActionDaemon] = BitNone, 335 [BitmapActionDiscard] = BitUnwritten, 336 [BitmapActionStale] = BitNone, 337 }, 338 [BitSyncing] = { 339 [BitmapActionStartwrite] = BitNone, 340 [BitmapActionStartsync] = BitSyncing, 341 [BitmapActionEndsync] = BitDirty, 342 [BitmapActionAbortsync] = BitNeedSync, 343 [BitmapActionReload] = BitNeedSync, 344 [BitmapActionDaemon] = BitNone, 345 [BitmapActionDiscard] = BitUnwritten, 346 [BitmapActionStale] = BitNeedSync, 347 }, 348 }; 349 350 static void __llbitmap_flush(struct mddev *mddev); 351 352 static enum llbitmap_state llbitmap_read(struct llbitmap *llbitmap, loff_t pos) 353 { 354 unsigned int idx; 355 unsigned int offset; 356 357 pos += BITMAP_DATA_OFFSET; 358 idx = pos >> PAGE_SHIFT; 359 offset = offset_in_page(pos); 360 361 return llbitmap->pctl[idx]->state[offset]; 362 } 363 364 /* set all the bits in the subpage as dirty */ 365 static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap, 366 struct llbitmap_page_ctl *pctl, 367 unsigned int block) 368 { 369 bool level_456 = raid_is_456(llbitmap->mddev); 370 unsigned int io_size = llbitmap->io_size; 371 int pos; 372 373 for (pos = block * io_size; pos < (block + 1) * io_size; pos++) { 374 switch (pctl->state[pos]) { 375 case BitUnwritten: 376 pctl->state[pos] = level_456 ? BitNeedSync : BitDirty; 377 break; 378 case BitClean: 379 pctl->state[pos] = BitDirty; 380 break; 381 } 382 } 383 } 384 385 static void llbitmap_set_page_dirty(struct llbitmap *llbitmap, int idx, 386 int offset) 387 { 388 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; 389 unsigned int io_size = llbitmap->io_size; 390 int block = offset / io_size; 391 int pos; 392 393 if (!test_bit(LLPageDirty, &pctl->flags)) 394 set_bit(LLPageDirty, &pctl->flags); 395 396 /* 397 * For degraded array, dirty bits will never be cleared, and we must 398 * resync all the dirty bits, hence skip infect new dirty bits to 399 * prevent resync unnecessary data. 400 */ 401 if (llbitmap->mddev->degraded) { 402 set_bit(block, pctl->dirty); 403 return; 404 } 405 406 /* 407 * The subpage usually contains a total of 512 bits. If any single bit 408 * within the subpage is marked as dirty, the entire sector will be 409 * written. To avoid impacting write performance, when multiple bits 410 * within the same sector are modified within llbitmap->barrier_idle, 411 * all bits in the sector will be collectively marked as dirty at once. 412 */ 413 if (test_and_set_bit(block, pctl->dirty)) { 414 llbitmap_infect_dirty_bits(llbitmap, pctl, block); 415 return; 416 } 417 418 for (pos = block * io_size; pos < (block + 1) * io_size; pos++) { 419 if (pos == offset) 420 continue; 421 if (pctl->state[pos] == BitDirty || 422 pctl->state[pos] == BitNeedSync) { 423 llbitmap_infect_dirty_bits(llbitmap, pctl, block); 424 return; 425 } 426 } 427 } 428 429 static void llbitmap_write(struct llbitmap *llbitmap, enum llbitmap_state state, 430 loff_t pos) 431 { 432 unsigned int idx; 433 unsigned int bit; 434 435 pos += BITMAP_DATA_OFFSET; 436 idx = pos >> PAGE_SHIFT; 437 bit = offset_in_page(pos); 438 439 llbitmap->pctl[idx]->state[bit] = state; 440 if (state == BitDirty || state == BitNeedSync) 441 llbitmap_set_page_dirty(llbitmap, idx, bit); 442 } 443 444 static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx) 445 { 446 struct mddev *mddev = llbitmap->mddev; 447 struct page *page = NULL; 448 struct md_rdev *rdev; 449 450 if (llbitmap->pctl && llbitmap->pctl[idx]) 451 page = llbitmap->pctl[idx]->page; 452 if (page) 453 return page; 454 455 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 456 if (!page) 457 return ERR_PTR(-ENOMEM); 458 459 rdev_for_each(rdev, mddev) { 460 sector_t sector; 461 462 if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags)) 463 continue; 464 465 sector = mddev->bitmap_info.offset + 466 (idx << PAGE_SECTORS_SHIFT); 467 468 if (sync_page_io(rdev, sector, PAGE_SIZE, page, REQ_OP_READ, 469 true)) 470 return page; 471 472 md_error(mddev, rdev); 473 } 474 475 __free_page(page); 476 return ERR_PTR(-EIO); 477 } 478 479 static void llbitmap_write_page(struct llbitmap *llbitmap, int idx) 480 { 481 struct page *page = llbitmap->pctl[idx]->page; 482 struct mddev *mddev = llbitmap->mddev; 483 struct md_rdev *rdev; 484 int block; 485 486 for (block = 0; block < llbitmap->blocks_per_page; block++) { 487 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; 488 489 if (!test_and_clear_bit(block, pctl->dirty)) 490 continue; 491 492 rdev_for_each(rdev, mddev) { 493 sector_t sector; 494 sector_t bit_sector = llbitmap->io_size >> SECTOR_SHIFT; 495 496 if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags)) 497 continue; 498 499 sector = mddev->bitmap_info.offset + rdev->sb_start + 500 (idx << PAGE_SECTORS_SHIFT) + 501 block * bit_sector; 502 md_write_metadata(mddev, rdev, sector, 503 llbitmap->io_size, page, 504 block * llbitmap->io_size); 505 } 506 } 507 } 508 509 static void active_release(struct percpu_ref *ref) 510 { 511 struct llbitmap_page_ctl *pctl = 512 container_of(ref, struct llbitmap_page_ctl, active); 513 514 wake_up(&pctl->wait); 515 } 516 517 static void llbitmap_free_pages(struct llbitmap *llbitmap) 518 { 519 int i; 520 521 if (!llbitmap->pctl) 522 return; 523 524 for (i = 0; i < llbitmap->nr_pages; i++) { 525 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 526 527 if (!pctl || !pctl->page) 528 break; 529 530 __free_page(pctl->page); 531 percpu_ref_exit(&pctl->active); 532 } 533 534 kfree(llbitmap->pctl[0]); 535 kfree(llbitmap->pctl); 536 llbitmap->pctl = NULL; 537 } 538 539 static int llbitmap_cache_pages(struct llbitmap *llbitmap) 540 { 541 struct llbitmap_page_ctl *pctl; 542 unsigned int nr_pages = DIV_ROUND_UP(llbitmap->chunks + 543 BITMAP_DATA_OFFSET, PAGE_SIZE); 544 unsigned int size = struct_size(pctl, dirty, BITS_TO_LONGS( 545 llbitmap->blocks_per_page)); 546 int i; 547 548 llbitmap->pctl = kmalloc_array(nr_pages, sizeof(void *), 549 GFP_KERNEL | __GFP_ZERO); 550 if (!llbitmap->pctl) 551 return -ENOMEM; 552 553 size = round_up(size, cache_line_size()); 554 pctl = kmalloc_array(nr_pages, size, GFP_KERNEL | __GFP_ZERO); 555 if (!pctl) { 556 kfree(llbitmap->pctl); 557 return -ENOMEM; 558 } 559 560 llbitmap->nr_pages = nr_pages; 561 562 for (i = 0; i < nr_pages; i++, pctl = (void *)pctl + size) { 563 struct page *page = llbitmap_read_page(llbitmap, i); 564 565 llbitmap->pctl[i] = pctl; 566 567 if (IS_ERR(page)) { 568 llbitmap_free_pages(llbitmap); 569 return PTR_ERR(page); 570 } 571 572 if (percpu_ref_init(&pctl->active, active_release, 573 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { 574 __free_page(page); 575 llbitmap_free_pages(llbitmap); 576 return -ENOMEM; 577 } 578 579 pctl->page = page; 580 pctl->state = page_address(page); 581 init_waitqueue_head(&pctl->wait); 582 } 583 584 return 0; 585 } 586 587 static void llbitmap_init_state(struct llbitmap *llbitmap) 588 { 589 enum llbitmap_state state = BitUnwritten; 590 unsigned long i; 591 592 if (test_and_clear_bit(BITMAP_CLEAN, &llbitmap->flags)) 593 state = BitClean; 594 595 for (i = 0; i < llbitmap->chunks; i++) 596 llbitmap_write(llbitmap, state, i); 597 } 598 599 /* The return value is only used from resync, where @start == @end. */ 600 static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap, 601 unsigned long start, 602 unsigned long end, 603 enum llbitmap_action action) 604 { 605 struct mddev *mddev = llbitmap->mddev; 606 enum llbitmap_state state = BitNone; 607 bool level_456 = raid_is_456(llbitmap->mddev); 608 bool need_resync = false; 609 bool need_recovery = false; 610 611 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) 612 return BitNone; 613 614 if (action == BitmapActionInit) { 615 llbitmap_init_state(llbitmap); 616 return BitNone; 617 } 618 619 while (start <= end) { 620 enum llbitmap_state c = llbitmap_read(llbitmap, start); 621 622 if (c < 0 || c >= BitStateCount) { 623 pr_err("%s: invalid bit %lu state %d action %d, forcing resync\n", 624 __func__, start, c, action); 625 state = BitNeedSync; 626 goto write_bitmap; 627 } 628 629 if (c == BitNeedSync) 630 need_resync = !mddev->degraded; 631 632 state = state_machine[c][action]; 633 634 write_bitmap: 635 if (unlikely(mddev->degraded)) { 636 /* For degraded array, mark new data as need sync. */ 637 if (state == BitDirty && 638 action == BitmapActionStartwrite) 639 state = BitNeedSync; 640 /* 641 * For degraded array, resync dirty data as well, noted 642 * if array is still degraded after resync is done, all 643 * new data will still be dirty until array is clean. 644 */ 645 else if (c == BitDirty && 646 action == BitmapActionStartsync) 647 state = BitSyncing; 648 } else if (c == BitUnwritten && state == BitDirty && 649 action == BitmapActionStartwrite && level_456) { 650 /* Delay raid456 initial recovery to first write. */ 651 state = BitNeedSync; 652 } 653 654 if (state == BitNone) { 655 start++; 656 continue; 657 } 658 659 llbitmap_write(llbitmap, state, start); 660 661 if (state == BitNeedSync) 662 need_resync = !mddev->degraded; 663 else if (state == BitDirty && 664 !timer_pending(&llbitmap->pending_timer)) 665 mod_timer(&llbitmap->pending_timer, 666 jiffies + mddev->bitmap_info.daemon_sleep * HZ); 667 668 start++; 669 } 670 671 if (need_resync && level_456) 672 need_recovery = true; 673 674 if (need_recovery) { 675 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 676 set_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery); 677 md_wakeup_thread(mddev->thread); 678 } else if (need_resync) { 679 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 680 set_bit(MD_RECOVERY_SYNC, &mddev->recovery); 681 md_wakeup_thread(mddev->thread); 682 } 683 684 return state; 685 } 686 687 static void llbitmap_raise_barrier(struct llbitmap *llbitmap, int page_idx) 688 { 689 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 690 691 retry: 692 if (likely(percpu_ref_tryget_live(&pctl->active))) { 693 WRITE_ONCE(pctl->expire, jiffies + llbitmap->barrier_idle * HZ); 694 return; 695 } 696 697 wait_event(pctl->wait, !percpu_ref_is_dying(&pctl->active)); 698 goto retry; 699 } 700 701 static void llbitmap_release_barrier(struct llbitmap *llbitmap, int page_idx) 702 { 703 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 704 705 percpu_ref_put(&pctl->active); 706 } 707 708 static int llbitmap_suspend_timeout(struct llbitmap *llbitmap, int page_idx) 709 { 710 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 711 712 percpu_ref_kill(&pctl->active); 713 714 if (!wait_event_timeout(pctl->wait, percpu_ref_is_zero(&pctl->active), 715 llbitmap->mddev->bitmap_info.daemon_sleep * HZ)) { 716 percpu_ref_resurrect(&pctl->active); 717 return -ETIMEDOUT; 718 } 719 720 return 0; 721 } 722 723 static void llbitmap_resume(struct llbitmap *llbitmap, int page_idx) 724 { 725 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; 726 727 pctl->expire = LONG_MAX; 728 percpu_ref_resurrect(&pctl->active); 729 wake_up(&pctl->wait); 730 } 731 732 static int llbitmap_check_support(struct mddev *mddev) 733 { 734 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 735 pr_notice("md/llbitmap: %s: array with journal cannot have bitmap\n", 736 mdname(mddev)); 737 return -EBUSY; 738 } 739 740 if (mddev->bitmap_info.space == 0) { 741 if (mddev->bitmap_info.default_space == 0) { 742 pr_notice("md/llbitmap: %s: no space for bitmap\n", 743 mdname(mddev)); 744 return -ENOSPC; 745 } 746 } 747 748 if (!mddev->persistent) { 749 pr_notice("md/llbitmap: %s: array must be persistent\n", 750 mdname(mddev)); 751 return -EOPNOTSUPP; 752 } 753 754 if (mddev->bitmap_info.file) { 755 pr_notice("md/llbitmap: %s: doesn't support bitmap file\n", 756 mdname(mddev)); 757 return -EOPNOTSUPP; 758 } 759 760 if (mddev->bitmap_info.external) { 761 pr_notice("md/llbitmap: %s: doesn't support external metadata\n", 762 mdname(mddev)); 763 return -EOPNOTSUPP; 764 } 765 766 if (mddev_is_dm(mddev)) { 767 pr_notice("md/llbitmap: %s: doesn't support dm-raid\n", 768 mdname(mddev)); 769 return -EOPNOTSUPP; 770 } 771 772 return 0; 773 } 774 775 static int llbitmap_init(struct llbitmap *llbitmap) 776 { 777 struct mddev *mddev = llbitmap->mddev; 778 sector_t blocks = mddev->resync_max_sectors; 779 unsigned long chunksize = MIN_CHUNK_SIZE; 780 unsigned long chunks = DIV_ROUND_UP(blocks, chunksize); 781 unsigned long space = mddev->bitmap_info.space << SECTOR_SHIFT; 782 int ret; 783 784 while (chunks > space) { 785 chunksize = chunksize << 1; 786 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); 787 } 788 789 llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE; 790 llbitmap->chunkshift = ffz(~chunksize); 791 llbitmap->chunksize = chunksize; 792 llbitmap->chunks = chunks; 793 mddev->bitmap_info.daemon_sleep = DEFAULT_DAEMON_SLEEP; 794 795 ret = llbitmap_cache_pages(llbitmap); 796 if (ret) 797 return ret; 798 799 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, 800 BitmapActionInit); 801 /* flush initial llbitmap to disk */ 802 __llbitmap_flush(mddev); 803 804 return 0; 805 } 806 807 static int llbitmap_read_sb(struct llbitmap *llbitmap) 808 { 809 struct mddev *mddev = llbitmap->mddev; 810 unsigned long daemon_sleep; 811 unsigned long chunksize; 812 unsigned long events; 813 struct page *sb_page; 814 bitmap_super_t *sb; 815 int ret = -EINVAL; 816 817 if (!mddev->bitmap_info.offset) { 818 pr_err("md/llbitmap: %s: no super block found", mdname(mddev)); 819 return -EINVAL; 820 } 821 822 sb_page = llbitmap_read_page(llbitmap, 0); 823 if (IS_ERR(sb_page)) { 824 pr_err("md/llbitmap: %s: read super block failed", 825 mdname(mddev)); 826 return -EIO; 827 } 828 829 sb = kmap_local_page(sb_page); 830 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) { 831 pr_err("md/llbitmap: %s: invalid super block magic number", 832 mdname(mddev)); 833 goto out_put_page; 834 } 835 836 if (sb->version != cpu_to_le32(BITMAP_MAJOR_LOCKLESS)) { 837 pr_err("md/llbitmap: %s: invalid super block version", 838 mdname(mddev)); 839 goto out_put_page; 840 } 841 842 if (memcmp(sb->uuid, mddev->uuid, 16)) { 843 pr_err("md/llbitmap: %s: bitmap superblock UUID mismatch\n", 844 mdname(mddev)); 845 goto out_put_page; 846 } 847 848 if (mddev->bitmap_info.space == 0) { 849 int room = le32_to_cpu(sb->sectors_reserved); 850 851 if (room) 852 mddev->bitmap_info.space = room; 853 else 854 mddev->bitmap_info.space = mddev->bitmap_info.default_space; 855 } 856 llbitmap->flags = le32_to_cpu(sb->state); 857 if (test_and_clear_bit(BITMAP_FIRST_USE, &llbitmap->flags)) { 858 ret = llbitmap_init(llbitmap); 859 goto out_put_page; 860 } 861 862 chunksize = le32_to_cpu(sb->chunksize); 863 if (!is_power_of_2(chunksize)) { 864 pr_err("md/llbitmap: %s: chunksize not a power of 2", 865 mdname(mddev)); 866 goto out_put_page; 867 } 868 869 if (chunksize < DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors, 870 mddev->bitmap_info.space << SECTOR_SHIFT)) { 871 pr_err("md/llbitmap: %s: chunksize too small %lu < %llu / %lu", 872 mdname(mddev), chunksize, mddev->resync_max_sectors, 873 mddev->bitmap_info.space); 874 goto out_put_page; 875 } 876 877 daemon_sleep = le32_to_cpu(sb->daemon_sleep); 878 if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ) { 879 pr_err("md/llbitmap: %s: daemon sleep %lu period out of range", 880 mdname(mddev), daemon_sleep); 881 goto out_put_page; 882 } 883 884 events = le64_to_cpu(sb->events); 885 if (events < mddev->events) { 886 pr_warn("md/llbitmap :%s: bitmap file is out of date (%lu < %llu) -- forcing full recovery", 887 mdname(mddev), events, mddev->events); 888 set_bit(BITMAP_STALE, &llbitmap->flags); 889 } 890 891 sb->sync_size = cpu_to_le64(mddev->resync_max_sectors); 892 mddev->bitmap_info.chunksize = chunksize; 893 mddev->bitmap_info.daemon_sleep = daemon_sleep; 894 895 llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE; 896 llbitmap->chunksize = chunksize; 897 llbitmap->chunks = DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors, chunksize); 898 llbitmap->chunkshift = ffz(~chunksize); 899 ret = llbitmap_cache_pages(llbitmap); 900 901 out_put_page: 902 __free_page(sb_page); 903 kunmap_local(sb); 904 return ret; 905 } 906 907 static void llbitmap_pending_timer_fn(struct timer_list *pending_timer) 908 { 909 struct llbitmap *llbitmap = 910 container_of(pending_timer, struct llbitmap, pending_timer); 911 912 if (work_busy(&llbitmap->daemon_work)) { 913 pr_warn("md/llbitmap: %s daemon_work not finished in %lu seconds\n", 914 mdname(llbitmap->mddev), 915 llbitmap->mddev->bitmap_info.daemon_sleep); 916 set_bit(BITMAP_DAEMON_BUSY, &llbitmap->flags); 917 return; 918 } 919 920 queue_work(md_llbitmap_io_wq, &llbitmap->daemon_work); 921 } 922 923 static void md_llbitmap_daemon_fn(struct work_struct *work) 924 { 925 struct llbitmap *llbitmap = 926 container_of(work, struct llbitmap, daemon_work); 927 unsigned long start; 928 unsigned long end; 929 bool restart; 930 int idx; 931 932 if (llbitmap->mddev->degraded) 933 return; 934 retry: 935 start = 0; 936 end = min(llbitmap->chunks, PAGE_SIZE - BITMAP_DATA_OFFSET) - 1; 937 restart = false; 938 939 for (idx = 0; idx < llbitmap->nr_pages; idx++) { 940 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; 941 942 if (idx > 0) { 943 start = end + 1; 944 end = min(end + PAGE_SIZE, llbitmap->chunks - 1); 945 } 946 947 if (!test_bit(LLPageFlush, &pctl->flags) && 948 time_before(jiffies, pctl->expire)) { 949 restart = true; 950 continue; 951 } 952 953 if (llbitmap_suspend_timeout(llbitmap, idx) < 0) { 954 pr_warn("md/llbitmap: %s: %s waiting for page %d timeout\n", 955 mdname(llbitmap->mddev), __func__, idx); 956 continue; 957 } 958 959 llbitmap_state_machine(llbitmap, start, end, BitmapActionDaemon); 960 llbitmap_resume(llbitmap, idx); 961 } 962 963 /* 964 * If the daemon took a long time to finish, retry to prevent missing 965 * clearing dirty bits. 966 */ 967 if (test_and_clear_bit(BITMAP_DAEMON_BUSY, &llbitmap->flags)) 968 goto retry; 969 970 /* If some page is dirty but not expired, setup timer again */ 971 if (restart) 972 mod_timer(&llbitmap->pending_timer, 973 jiffies + llbitmap->mddev->bitmap_info.daemon_sleep * HZ); 974 } 975 976 static int llbitmap_create(struct mddev *mddev) 977 { 978 struct llbitmap *llbitmap; 979 int ret; 980 981 ret = llbitmap_check_support(mddev); 982 if (ret) 983 return ret; 984 985 llbitmap = kzalloc(sizeof(*llbitmap), GFP_KERNEL); 986 if (!llbitmap) 987 return -ENOMEM; 988 989 llbitmap->mddev = mddev; 990 llbitmap->io_size = bdev_logical_block_size(mddev->gendisk->part0); 991 llbitmap->blocks_per_page = PAGE_SIZE / llbitmap->io_size; 992 993 timer_setup(&llbitmap->pending_timer, llbitmap_pending_timer_fn, 0); 994 INIT_WORK(&llbitmap->daemon_work, md_llbitmap_daemon_fn); 995 atomic_set(&llbitmap->behind_writes, 0); 996 init_waitqueue_head(&llbitmap->behind_wait); 997 998 mutex_lock(&mddev->bitmap_info.mutex); 999 mddev->bitmap = llbitmap; 1000 ret = llbitmap_read_sb(llbitmap); 1001 mutex_unlock(&mddev->bitmap_info.mutex); 1002 if (ret) { 1003 kfree(llbitmap); 1004 mddev->bitmap = NULL; 1005 } 1006 1007 return ret; 1008 } 1009 1010 static int llbitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize) 1011 { 1012 struct llbitmap *llbitmap = mddev->bitmap; 1013 unsigned long chunks; 1014 1015 if (chunksize == 0) 1016 chunksize = llbitmap->chunksize; 1017 1018 /* If there is enough space, leave the chunksize unchanged. */ 1019 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); 1020 while (chunks > mddev->bitmap_info.space << SECTOR_SHIFT) { 1021 chunksize = chunksize << 1; 1022 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); 1023 } 1024 1025 llbitmap->chunkshift = ffz(~chunksize); 1026 llbitmap->chunksize = chunksize; 1027 llbitmap->chunks = chunks; 1028 1029 return 0; 1030 } 1031 1032 static int llbitmap_load(struct mddev *mddev) 1033 { 1034 enum llbitmap_action action = BitmapActionReload; 1035 struct llbitmap *llbitmap = mddev->bitmap; 1036 1037 if (test_and_clear_bit(BITMAP_STALE, &llbitmap->flags)) 1038 action = BitmapActionStale; 1039 1040 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, action); 1041 return 0; 1042 } 1043 1044 static void llbitmap_destroy(struct mddev *mddev) 1045 { 1046 struct llbitmap *llbitmap = mddev->bitmap; 1047 1048 if (!llbitmap) 1049 return; 1050 1051 mutex_lock(&mddev->bitmap_info.mutex); 1052 1053 timer_delete_sync(&llbitmap->pending_timer); 1054 flush_workqueue(md_llbitmap_io_wq); 1055 flush_workqueue(md_llbitmap_unplug_wq); 1056 1057 mddev->bitmap = NULL; 1058 llbitmap_free_pages(llbitmap); 1059 kfree(llbitmap); 1060 mutex_unlock(&mddev->bitmap_info.mutex); 1061 } 1062 1063 static void llbitmap_start_write(struct mddev *mddev, sector_t offset, 1064 unsigned long sectors) 1065 { 1066 struct llbitmap *llbitmap = mddev->bitmap; 1067 unsigned long start = offset >> llbitmap->chunkshift; 1068 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1069 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1070 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1071 1072 llbitmap_state_machine(llbitmap, start, end, BitmapActionStartwrite); 1073 1074 while (page_start <= page_end) { 1075 llbitmap_raise_barrier(llbitmap, page_start); 1076 page_start++; 1077 } 1078 } 1079 1080 static void llbitmap_end_write(struct mddev *mddev, sector_t offset, 1081 unsigned long sectors) 1082 { 1083 struct llbitmap *llbitmap = mddev->bitmap; 1084 unsigned long start = offset >> llbitmap->chunkshift; 1085 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1086 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1087 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1088 1089 while (page_start <= page_end) { 1090 llbitmap_release_barrier(llbitmap, page_start); 1091 page_start++; 1092 } 1093 } 1094 1095 static void llbitmap_start_discard(struct mddev *mddev, sector_t offset, 1096 unsigned long sectors) 1097 { 1098 struct llbitmap *llbitmap = mddev->bitmap; 1099 unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize); 1100 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1101 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1102 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1103 1104 llbitmap_state_machine(llbitmap, start, end, BitmapActionDiscard); 1105 1106 while (page_start <= page_end) { 1107 llbitmap_raise_barrier(llbitmap, page_start); 1108 page_start++; 1109 } 1110 } 1111 1112 static void llbitmap_end_discard(struct mddev *mddev, sector_t offset, 1113 unsigned long sectors) 1114 { 1115 struct llbitmap *llbitmap = mddev->bitmap; 1116 unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize); 1117 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; 1118 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1119 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; 1120 1121 while (page_start <= page_end) { 1122 llbitmap_release_barrier(llbitmap, page_start); 1123 page_start++; 1124 } 1125 } 1126 1127 static void llbitmap_unplug_fn(struct work_struct *work) 1128 { 1129 struct llbitmap_unplug_work *unplug_work = 1130 container_of(work, struct llbitmap_unplug_work, work); 1131 struct llbitmap *llbitmap = unplug_work->llbitmap; 1132 struct blk_plug plug; 1133 int i; 1134 1135 blk_start_plug(&plug); 1136 1137 for (i = 0; i < llbitmap->nr_pages; i++) { 1138 if (!test_bit(LLPageDirty, &llbitmap->pctl[i]->flags) || 1139 !test_and_clear_bit(LLPageDirty, &llbitmap->pctl[i]->flags)) 1140 continue; 1141 1142 llbitmap_write_page(llbitmap, i); 1143 } 1144 1145 blk_finish_plug(&plug); 1146 md_super_wait(llbitmap->mddev); 1147 complete(unplug_work->done); 1148 } 1149 1150 static bool llbitmap_dirty(struct llbitmap *llbitmap) 1151 { 1152 int i; 1153 1154 for (i = 0; i < llbitmap->nr_pages; i++) 1155 if (test_bit(LLPageDirty, &llbitmap->pctl[i]->flags)) 1156 return true; 1157 1158 return false; 1159 } 1160 1161 static void llbitmap_unplug(struct mddev *mddev, bool sync) 1162 { 1163 DECLARE_COMPLETION_ONSTACK(done); 1164 struct llbitmap *llbitmap = mddev->bitmap; 1165 struct llbitmap_unplug_work unplug_work = { 1166 .llbitmap = llbitmap, 1167 .done = &done, 1168 }; 1169 1170 if (!llbitmap_dirty(llbitmap)) 1171 return; 1172 1173 /* 1174 * Issue new bitmap IO under submit_bio() context will deadlock: 1175 * - the bio will wait for bitmap bio to be done, before it can be 1176 * issued; 1177 * - bitmap bio will be added to current->bio_list and wait for this 1178 * bio to be issued; 1179 */ 1180 INIT_WORK_ONSTACK(&unplug_work.work, llbitmap_unplug_fn); 1181 queue_work(md_llbitmap_unplug_wq, &unplug_work.work); 1182 wait_for_completion(&done); 1183 destroy_work_on_stack(&unplug_work.work); 1184 } 1185 1186 /* 1187 * Force to write all bitmap pages to disk, called when stopping the array, or 1188 * every daemon_sleep seconds when sync_thread is running. 1189 */ 1190 static void __llbitmap_flush(struct mddev *mddev) 1191 { 1192 struct llbitmap *llbitmap = mddev->bitmap; 1193 struct blk_plug plug; 1194 int i; 1195 1196 blk_start_plug(&plug); 1197 for (i = 0; i < llbitmap->nr_pages; i++) { 1198 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 1199 1200 /* mark all blocks as dirty */ 1201 set_bit(LLPageDirty, &pctl->flags); 1202 bitmap_fill(pctl->dirty, llbitmap->blocks_per_page); 1203 llbitmap_write_page(llbitmap, i); 1204 } 1205 blk_finish_plug(&plug); 1206 md_super_wait(llbitmap->mddev); 1207 } 1208 1209 static void llbitmap_flush(struct mddev *mddev) 1210 { 1211 struct llbitmap *llbitmap = mddev->bitmap; 1212 int i; 1213 1214 for (i = 0; i < llbitmap->nr_pages; i++) 1215 set_bit(LLPageFlush, &llbitmap->pctl[i]->flags); 1216 1217 timer_delete_sync(&llbitmap->pending_timer); 1218 queue_work(md_llbitmap_io_wq, &llbitmap->daemon_work); 1219 flush_work(&llbitmap->daemon_work); 1220 1221 __llbitmap_flush(mddev); 1222 } 1223 1224 /* This is used for raid5 lazy initial recovery */ 1225 static bool llbitmap_blocks_synced(struct mddev *mddev, sector_t offset) 1226 { 1227 struct llbitmap *llbitmap = mddev->bitmap; 1228 unsigned long p = offset >> llbitmap->chunkshift; 1229 enum llbitmap_state c = llbitmap_read(llbitmap, p); 1230 1231 return c == BitClean || c == BitDirty; 1232 } 1233 1234 static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset) 1235 { 1236 struct llbitmap *llbitmap = mddev->bitmap; 1237 unsigned long p = offset >> llbitmap->chunkshift; 1238 int blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); 1239 enum llbitmap_state c = llbitmap_read(llbitmap, p); 1240 1241 /* always skip unwritten blocks */ 1242 if (c == BitUnwritten) 1243 return blocks; 1244 1245 /* For degraded array, don't skip */ 1246 if (mddev->degraded) 1247 return 0; 1248 1249 /* For resync also skip clean/dirty blocks */ 1250 if ((c == BitClean || c == BitDirty) && 1251 test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && 1252 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) 1253 return blocks; 1254 1255 return 0; 1256 } 1257 1258 static bool llbitmap_start_sync(struct mddev *mddev, sector_t offset, 1259 sector_t *blocks, bool degraded) 1260 { 1261 struct llbitmap *llbitmap = mddev->bitmap; 1262 unsigned long p = offset >> llbitmap->chunkshift; 1263 1264 /* 1265 * Handle one bit at a time, this is much simpler. And it doesn't matter 1266 * if md_do_sync() loop more times. 1267 */ 1268 *blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); 1269 return llbitmap_state_machine(llbitmap, p, p, 1270 BitmapActionStartsync) == BitSyncing; 1271 } 1272 1273 /* Something is wrong, sync_thread stop at @offset */ 1274 static void llbitmap_end_sync(struct mddev *mddev, sector_t offset, 1275 sector_t *blocks) 1276 { 1277 struct llbitmap *llbitmap = mddev->bitmap; 1278 unsigned long p = offset >> llbitmap->chunkshift; 1279 1280 *blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); 1281 llbitmap_state_machine(llbitmap, p, llbitmap->chunks - 1, 1282 BitmapActionAbortsync); 1283 } 1284 1285 /* A full sync_thread is finished */ 1286 static void llbitmap_close_sync(struct mddev *mddev) 1287 { 1288 struct llbitmap *llbitmap = mddev->bitmap; 1289 int i; 1290 1291 for (i = 0; i < llbitmap->nr_pages; i++) { 1292 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 1293 1294 /* let daemon_fn clear dirty bits immediately */ 1295 WRITE_ONCE(pctl->expire, jiffies); 1296 } 1297 1298 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, 1299 BitmapActionEndsync); 1300 } 1301 1302 /* 1303 * sync_thread have reached @sector, update metadata every daemon_sleep seconds, 1304 * just in case sync_thread have to restart after power failure. 1305 */ 1306 static void llbitmap_cond_end_sync(struct mddev *mddev, sector_t sector, 1307 bool force) 1308 { 1309 struct llbitmap *llbitmap = mddev->bitmap; 1310 1311 if (sector == 0) { 1312 llbitmap->last_end_sync = jiffies; 1313 return; 1314 } 1315 1316 if (time_before(jiffies, llbitmap->last_end_sync + 1317 HZ * mddev->bitmap_info.daemon_sleep)) 1318 return; 1319 1320 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); 1321 1322 mddev->curr_resync_completed = sector; 1323 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); 1324 llbitmap_state_machine(llbitmap, 0, sector >> llbitmap->chunkshift, 1325 BitmapActionEndsync); 1326 __llbitmap_flush(mddev); 1327 1328 llbitmap->last_end_sync = jiffies; 1329 sysfs_notify_dirent_safe(mddev->sysfs_completed); 1330 } 1331 1332 static bool llbitmap_enabled(void *data, bool flush) 1333 { 1334 struct llbitmap *llbitmap = data; 1335 1336 return llbitmap && !test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags); 1337 } 1338 1339 static void llbitmap_dirty_bits(struct mddev *mddev, unsigned long s, 1340 unsigned long e) 1341 { 1342 llbitmap_state_machine(mddev->bitmap, s, e, BitmapActionStartwrite); 1343 } 1344 1345 static void llbitmap_write_sb(struct llbitmap *llbitmap) 1346 { 1347 int nr_blocks = DIV_ROUND_UP(BITMAP_DATA_OFFSET, llbitmap->io_size); 1348 1349 bitmap_fill(llbitmap->pctl[0]->dirty, nr_blocks); 1350 llbitmap_write_page(llbitmap, 0); 1351 md_super_wait(llbitmap->mddev); 1352 } 1353 1354 static void llbitmap_update_sb(void *data) 1355 { 1356 struct llbitmap *llbitmap = data; 1357 struct mddev *mddev = llbitmap->mddev; 1358 struct page *sb_page; 1359 bitmap_super_t *sb; 1360 1361 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) 1362 return; 1363 1364 sb_page = llbitmap_read_page(llbitmap, 0); 1365 if (IS_ERR(sb_page)) { 1366 pr_err("%s: %s: read super block failed", __func__, 1367 mdname(mddev)); 1368 set_bit(BITMAP_WRITE_ERROR, &llbitmap->flags); 1369 return; 1370 } 1371 1372 if (mddev->events < llbitmap->events_cleared) 1373 llbitmap->events_cleared = mddev->events; 1374 1375 sb = kmap_local_page(sb_page); 1376 sb->events = cpu_to_le64(mddev->events); 1377 sb->state = cpu_to_le32(llbitmap->flags); 1378 sb->chunksize = cpu_to_le32(llbitmap->chunksize); 1379 sb->sync_size = cpu_to_le64(mddev->resync_max_sectors); 1380 sb->events_cleared = cpu_to_le64(llbitmap->events_cleared); 1381 sb->sectors_reserved = cpu_to_le32(mddev->bitmap_info.space); 1382 sb->daemon_sleep = cpu_to_le32(mddev->bitmap_info.daemon_sleep); 1383 1384 kunmap_local(sb); 1385 llbitmap_write_sb(llbitmap); 1386 } 1387 1388 static int llbitmap_get_stats(void *data, struct md_bitmap_stats *stats) 1389 { 1390 struct llbitmap *llbitmap = data; 1391 1392 memset(stats, 0, sizeof(*stats)); 1393 1394 stats->missing_pages = 0; 1395 stats->pages = llbitmap->nr_pages; 1396 stats->file_pages = llbitmap->nr_pages; 1397 1398 stats->behind_writes = atomic_read(&llbitmap->behind_writes); 1399 stats->behind_wait = wq_has_sleeper(&llbitmap->behind_wait); 1400 stats->events_cleared = llbitmap->events_cleared; 1401 1402 return 0; 1403 } 1404 1405 /* just flag all pages as needing to be written */ 1406 static void llbitmap_write_all(struct mddev *mddev) 1407 { 1408 int i; 1409 struct llbitmap *llbitmap = mddev->bitmap; 1410 1411 for (i = 0; i < llbitmap->nr_pages; i++) { 1412 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; 1413 1414 set_bit(LLPageDirty, &pctl->flags); 1415 bitmap_fill(pctl->dirty, llbitmap->blocks_per_page); 1416 } 1417 } 1418 1419 static void llbitmap_start_behind_write(struct mddev *mddev) 1420 { 1421 struct llbitmap *llbitmap = mddev->bitmap; 1422 1423 atomic_inc(&llbitmap->behind_writes); 1424 } 1425 1426 static void llbitmap_end_behind_write(struct mddev *mddev) 1427 { 1428 struct llbitmap *llbitmap = mddev->bitmap; 1429 1430 if (atomic_dec_and_test(&llbitmap->behind_writes)) 1431 wake_up(&llbitmap->behind_wait); 1432 } 1433 1434 static void llbitmap_wait_behind_writes(struct mddev *mddev) 1435 { 1436 struct llbitmap *llbitmap = mddev->bitmap; 1437 1438 if (!llbitmap) 1439 return; 1440 1441 wait_event(llbitmap->behind_wait, 1442 atomic_read(&llbitmap->behind_writes) == 0); 1443 1444 } 1445 1446 static ssize_t bits_show(struct mddev *mddev, char *page) 1447 { 1448 struct llbitmap *llbitmap; 1449 int bits[BitStateCount] = {0}; 1450 loff_t start = 0; 1451 1452 mutex_lock(&mddev->bitmap_info.mutex); 1453 llbitmap = mddev->bitmap; 1454 if (!llbitmap || !llbitmap->pctl) { 1455 mutex_unlock(&mddev->bitmap_info.mutex); 1456 return sprintf(page, "no bitmap\n"); 1457 } 1458 1459 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) { 1460 mutex_unlock(&mddev->bitmap_info.mutex); 1461 return sprintf(page, "bitmap io error\n"); 1462 } 1463 1464 while (start < llbitmap->chunks) { 1465 enum llbitmap_state c = llbitmap_read(llbitmap, start); 1466 1467 if (c < 0 || c >= BitStateCount) 1468 pr_err("%s: invalid bit %llu state %d\n", 1469 __func__, start, c); 1470 else 1471 bits[c]++; 1472 start++; 1473 } 1474 1475 mutex_unlock(&mddev->bitmap_info.mutex); 1476 return sprintf(page, "unwritten %d\nclean %d\ndirty %d\nneed sync %d\nsyncing %d\n", 1477 bits[BitUnwritten], bits[BitClean], bits[BitDirty], 1478 bits[BitNeedSync], bits[BitSyncing]); 1479 } 1480 1481 static struct md_sysfs_entry llbitmap_bits = __ATTR_RO(bits); 1482 1483 static ssize_t metadata_show(struct mddev *mddev, char *page) 1484 { 1485 struct llbitmap *llbitmap; 1486 ssize_t ret; 1487 1488 mutex_lock(&mddev->bitmap_info.mutex); 1489 llbitmap = mddev->bitmap; 1490 if (!llbitmap) { 1491 mutex_unlock(&mddev->bitmap_info.mutex); 1492 return sprintf(page, "no bitmap\n"); 1493 } 1494 1495 ret = sprintf(page, "chunksize %lu\nchunkshift %lu\nchunks %lu\noffset %llu\ndaemon_sleep %lu\n", 1496 llbitmap->chunksize, llbitmap->chunkshift, 1497 llbitmap->chunks, mddev->bitmap_info.offset, 1498 llbitmap->mddev->bitmap_info.daemon_sleep); 1499 mutex_unlock(&mddev->bitmap_info.mutex); 1500 1501 return ret; 1502 } 1503 1504 static struct md_sysfs_entry llbitmap_metadata = __ATTR_RO(metadata); 1505 1506 static ssize_t 1507 daemon_sleep_show(struct mddev *mddev, char *page) 1508 { 1509 return sprintf(page, "%lu\n", mddev->bitmap_info.daemon_sleep); 1510 } 1511 1512 static ssize_t 1513 daemon_sleep_store(struct mddev *mddev, const char *buf, size_t len) 1514 { 1515 unsigned long timeout; 1516 int rv = kstrtoul(buf, 10, &timeout); 1517 1518 if (rv) 1519 return rv; 1520 1521 mddev->bitmap_info.daemon_sleep = timeout; 1522 return len; 1523 } 1524 1525 static struct md_sysfs_entry llbitmap_daemon_sleep = __ATTR_RW(daemon_sleep); 1526 1527 static ssize_t 1528 barrier_idle_show(struct mddev *mddev, char *page) 1529 { 1530 struct llbitmap *llbitmap = mddev->bitmap; 1531 1532 return sprintf(page, "%lu\n", llbitmap->barrier_idle); 1533 } 1534 1535 static ssize_t 1536 barrier_idle_store(struct mddev *mddev, const char *buf, size_t len) 1537 { 1538 struct llbitmap *llbitmap = mddev->bitmap; 1539 unsigned long timeout; 1540 int rv = kstrtoul(buf, 10, &timeout); 1541 1542 if (rv) 1543 return rv; 1544 1545 llbitmap->barrier_idle = timeout; 1546 return len; 1547 } 1548 1549 static struct md_sysfs_entry llbitmap_barrier_idle = __ATTR_RW(barrier_idle); 1550 1551 static struct attribute *md_llbitmap_attrs[] = { 1552 &llbitmap_bits.attr, 1553 &llbitmap_metadata.attr, 1554 &llbitmap_daemon_sleep.attr, 1555 &llbitmap_barrier_idle.attr, 1556 NULL 1557 }; 1558 1559 static struct attribute_group md_llbitmap_group = { 1560 .name = "llbitmap", 1561 .attrs = md_llbitmap_attrs, 1562 }; 1563 1564 static struct bitmap_operations llbitmap_ops = { 1565 .head = { 1566 .type = MD_BITMAP, 1567 .id = ID_LLBITMAP, 1568 .name = "llbitmap", 1569 }, 1570 1571 .enabled = llbitmap_enabled, 1572 .create = llbitmap_create, 1573 .resize = llbitmap_resize, 1574 .load = llbitmap_load, 1575 .destroy = llbitmap_destroy, 1576 1577 .start_write = llbitmap_start_write, 1578 .end_write = llbitmap_end_write, 1579 .start_discard = llbitmap_start_discard, 1580 .end_discard = llbitmap_end_discard, 1581 .unplug = llbitmap_unplug, 1582 .flush = llbitmap_flush, 1583 1584 .start_behind_write = llbitmap_start_behind_write, 1585 .end_behind_write = llbitmap_end_behind_write, 1586 .wait_behind_writes = llbitmap_wait_behind_writes, 1587 1588 .blocks_synced = llbitmap_blocks_synced, 1589 .skip_sync_blocks = llbitmap_skip_sync_blocks, 1590 .start_sync = llbitmap_start_sync, 1591 .end_sync = llbitmap_end_sync, 1592 .close_sync = llbitmap_close_sync, 1593 .cond_end_sync = llbitmap_cond_end_sync, 1594 1595 .update_sb = llbitmap_update_sb, 1596 .get_stats = llbitmap_get_stats, 1597 .dirty_bits = llbitmap_dirty_bits, 1598 .write_all = llbitmap_write_all, 1599 1600 .group = &md_llbitmap_group, 1601 }; 1602 1603 int md_llbitmap_init(void) 1604 { 1605 md_llbitmap_io_wq = alloc_workqueue("md_llbitmap_io", 1606 WQ_MEM_RECLAIM | WQ_UNBOUND, 0); 1607 if (!md_llbitmap_io_wq) 1608 return -ENOMEM; 1609 1610 md_llbitmap_unplug_wq = alloc_workqueue("md_llbitmap_unplug", 1611 WQ_MEM_RECLAIM | WQ_UNBOUND, 0); 1612 if (!md_llbitmap_unplug_wq) { 1613 destroy_workqueue(md_llbitmap_io_wq); 1614 md_llbitmap_io_wq = NULL; 1615 return -ENOMEM; 1616 } 1617 1618 return register_md_submodule(&llbitmap_ops.head); 1619 } 1620 1621 void md_llbitmap_exit(void) 1622 { 1623 destroy_workqueue(md_llbitmap_io_wq); 1624 md_llbitmap_io_wq = NULL; 1625 destroy_workqueue(md_llbitmap_unplug_wq); 1626 md_llbitmap_unplug_wq = NULL; 1627 unregister_md_submodule(&llbitmap_ops.head); 1628 } 1629