1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 4 * 5 * bitmap_create - sets up the bitmap structure 6 * bitmap_destroy - destroys the bitmap structure 7 * 8 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: 9 * - added disk storage for bitmap 10 * - changes to allow various bitmap chunk sizes 11 */ 12 13 /* 14 * Still to do: 15 * 16 * flush after percent set rather than just time based. (maybe both). 17 */ 18 19 #include <linux/blkdev.h> 20 #include <linux/module.h> 21 #include <linux/errno.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/timer.h> 25 #include <linux/sched.h> 26 #include <linux/list.h> 27 #include <linux/file.h> 28 #include <linux/mount.h> 29 #include <linux/buffer_head.h> 30 #include <linux/seq_file.h> 31 #include <trace/events/block.h> 32 33 #include "md.h" 34 #include "md-bitmap.h" 35 #include "md-cluster.h" 36 37 #define BITMAP_MAJOR_LO 3 38 /* version 4 insists the bitmap is in little-endian order 39 * with version 3, it is host-endian which is non-portable 40 * Version 5 is currently set only for clustered devices 41 */ 42 #define BITMAP_MAJOR_HI 4 43 #define BITMAP_MAJOR_CLUSTERED 5 44 #define BITMAP_MAJOR_HOSTENDIAN 3 45 46 /* 47 * in-memory bitmap: 48 * 49 * Use 16 bit block counters to track pending writes to each "chunk". 50 * The 2 high order bits are special-purpose, the first is a flag indicating 51 * whether a resync is needed. The second is a flag indicating whether a 52 * resync is active. 53 * This means that the counter is actually 14 bits: 54 * 55 * +--------+--------+------------------------------------------------+ 56 * | resync | resync | counter | 57 * | needed | active | | 58 * | (0-1) | (0-1) | (0-16383) | 59 * +--------+--------+------------------------------------------------+ 60 * 61 * The "resync needed" bit is set when: 62 * a '1' bit is read from storage at startup. 63 * a write request fails on some drives 64 * a resync is aborted on a chunk with 'resync active' set 65 * It is cleared (and resync-active set) when a resync starts across all drives 66 * of the chunk. 67 * 68 * 69 * The "resync active" bit is set when: 70 * a resync is started on all drives, and resync_needed is set. 71 * resync_needed will be cleared (as long as resync_active wasn't already set). 72 * It is cleared when a resync completes. 73 * 74 * The counter counts pending write requests, plus the on-disk bit. 75 * When the counter is '1' and the resync bits are clear, the on-disk 76 * bit can be cleared as well, thus setting the counter to 0. 77 * When we set a bit, or in the counter (to start a write), if the fields is 78 * 0, we first set the disk bit and set the counter to 1. 79 * 80 * If the counter is 0, the on-disk bit is clear and the stripe is clean 81 * Anything that dirties the stripe pushes the counter to 2 (at least) 82 * and sets the on-disk bit (lazily). 83 * If a periodic sweep find the counter at 2, it is decremented to 1. 84 * If the sweep find the counter at 1, the on-disk bit is cleared and the 85 * counter goes to zero. 86 * 87 * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block 88 * counters as a fallback when "page" memory cannot be allocated: 89 * 90 * Normal case (page memory allocated): 91 * 92 * page pointer (32-bit) 93 * 94 * [ ] ------+ 95 * | 96 * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) 97 * c1 c2 c2048 98 * 99 * Hijacked case (page memory allocation failed): 100 * 101 * hijacked page pointer (32-bit) 102 * 103 * [ ][ ] (no page memory allocated) 104 * counter #1 (16-bit) counter #2 (16-bit) 105 * 106 */ 107 108 #define PAGE_BITS (PAGE_SIZE << 3) 109 #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) 110 111 #define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) 112 #define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) 113 #define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) 114 115 /* how many counters per page? */ 116 #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) 117 /* same, except a shift value for more efficient bitops */ 118 #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) 119 /* same, except a mask value for more efficient bitops */ 120 #define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) 121 122 #define BITMAP_BLOCK_SHIFT 9 123 124 /* 125 * bitmap structures: 126 */ 127 128 /* the in-memory bitmap is represented by bitmap_pages */ 129 struct bitmap_page { 130 /* 131 * map points to the actual memory page 132 */ 133 char *map; 134 /* 135 * in emergencies (when map cannot be alloced), hijack the map 136 * pointer and use it as two counters itself 137 */ 138 unsigned int hijacked:1; 139 /* 140 * If any counter in this page is '1' or '2' - and so could be 141 * cleared then that page is marked as 'pending' 142 */ 143 unsigned int pending:1; 144 /* 145 * count of dirty bits on the page 146 */ 147 unsigned int count:30; 148 }; 149 150 /* the main bitmap structure - one per mddev */ 151 struct bitmap { 152 153 struct bitmap_counts { 154 spinlock_t lock; 155 struct bitmap_page *bp; 156 /* total number of pages in the bitmap */ 157 unsigned long pages; 158 /* number of pages not yet allocated */ 159 unsigned long missing_pages; 160 /* chunksize = 2^chunkshift (for bitops) */ 161 unsigned long chunkshift; 162 /* total number of data chunks for the array */ 163 unsigned long chunks; 164 } counts; 165 166 struct mddev *mddev; /* the md device that the bitmap is for */ 167 168 __u64 events_cleared; 169 int need_sync; 170 171 struct bitmap_storage { 172 /* backing disk file */ 173 struct file *file; 174 /* cached copy of the bitmap file superblock */ 175 struct page *sb_page; 176 unsigned long sb_index; 177 /* list of cache pages for the file */ 178 struct page **filemap; 179 /* attributes associated filemap pages */ 180 unsigned long *filemap_attr; 181 /* number of pages in the file */ 182 unsigned long file_pages; 183 /* total bytes in the bitmap */ 184 unsigned long bytes; 185 } storage; 186 187 unsigned long flags; 188 189 int allclean; 190 191 atomic_t behind_writes; 192 /* highest actual value at runtime */ 193 unsigned long behind_writes_used; 194 195 /* 196 * the bitmap daemon - periodically wakes up and sweeps the bitmap 197 * file, cleaning up bits and flushing out pages to disk as necessary 198 */ 199 unsigned long daemon_lastrun; /* jiffies of last run */ 200 /* 201 * when we lasted called end_sync to update bitmap with resync 202 * progress. 203 */ 204 unsigned long last_end_sync; 205 206 /* pending writes to the bitmap file */ 207 atomic_t pending_writes; 208 wait_queue_head_t write_wait; 209 wait_queue_head_t overflow_wait; 210 wait_queue_head_t behind_wait; 211 212 struct kernfs_node *sysfs_can_clear; 213 /* slot offset for clustered env */ 214 int cluster_slot; 215 }; 216 217 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, 218 int chunksize, bool init); 219 220 static inline char *bmname(struct bitmap *bitmap) 221 { 222 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; 223 } 224 225 static bool __bitmap_enabled(struct bitmap *bitmap) 226 { 227 return bitmap->storage.filemap && 228 !test_bit(BITMAP_STALE, &bitmap->flags); 229 } 230 231 static bool bitmap_enabled(struct mddev *mddev) 232 { 233 struct bitmap *bitmap = mddev->bitmap; 234 235 if (!bitmap) 236 return false; 237 238 return __bitmap_enabled(bitmap); 239 } 240 241 /* 242 * check a page and, if necessary, allocate it (or hijack it if the alloc fails) 243 * 244 * 1) check to see if this page is allocated, if it's not then try to alloc 245 * 2) if the alloc fails, set the page's hijacked flag so we'll use the 246 * page pointer directly as a counter 247 * 248 * if we find our page, we increment the page's refcount so that it stays 249 * allocated while we're using it 250 */ 251 static int md_bitmap_checkpage(struct bitmap_counts *bitmap, 252 unsigned long page, int create, int no_hijack) 253 __releases(bitmap->lock) 254 __acquires(bitmap->lock) 255 { 256 unsigned char *mappage; 257 258 WARN_ON_ONCE(page >= bitmap->pages); 259 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ 260 return 0; 261 262 if (bitmap->bp[page].map) /* page is already allocated, just return */ 263 return 0; 264 265 if (!create) 266 return -ENOENT; 267 268 /* this page has not been allocated yet */ 269 270 spin_unlock_irq(&bitmap->lock); 271 /* It is possible that this is being called inside a 272 * prepare_to_wait/finish_wait loop from raid5c:make_request(). 273 * In general it is not permitted to sleep in that context as it 274 * can cause the loop to spin freely. 275 * That doesn't apply here as we can only reach this point 276 * once with any loop. 277 * When this function completes, either bp[page].map or 278 * bp[page].hijacked. In either case, this function will 279 * abort before getting to this point again. So there is 280 * no risk of a free-spin, and so it is safe to assert 281 * that sleeping here is allowed. 282 */ 283 sched_annotate_sleep(); 284 mappage = kzalloc(PAGE_SIZE, GFP_NOIO); 285 spin_lock_irq(&bitmap->lock); 286 287 if (mappage == NULL) { 288 pr_debug("md/bitmap: map page allocation failed, hijacking\n"); 289 /* We don't support hijack for cluster raid */ 290 if (no_hijack) 291 return -ENOMEM; 292 /* failed - set the hijacked flag so that we can use the 293 * pointer as a counter */ 294 if (!bitmap->bp[page].map) 295 bitmap->bp[page].hijacked = 1; 296 } else if (bitmap->bp[page].map || 297 bitmap->bp[page].hijacked) { 298 /* somebody beat us to getting the page */ 299 kfree(mappage); 300 } else { 301 302 /* no page was in place and we have one, so install it */ 303 304 bitmap->bp[page].map = mappage; 305 bitmap->missing_pages--; 306 } 307 return 0; 308 } 309 310 /* if page is completely empty, put it back on the free list, or dealloc it */ 311 /* if page was hijacked, unmark the flag so it might get alloced next time */ 312 /* Note: lock should be held when calling this */ 313 static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page) 314 { 315 char *ptr; 316 317 if (bitmap->bp[page].count) /* page is still busy */ 318 return; 319 320 /* page is no longer in use, it can be released */ 321 322 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ 323 bitmap->bp[page].hijacked = 0; 324 bitmap->bp[page].map = NULL; 325 } else { 326 /* normal case, free the page */ 327 ptr = bitmap->bp[page].map; 328 bitmap->bp[page].map = NULL; 329 bitmap->missing_pages++; 330 kfree(ptr); 331 } 332 } 333 334 /* 335 * bitmap file handling - read and write the bitmap file and its superblock 336 */ 337 338 /* 339 * basic page I/O operations 340 */ 341 342 /* IO operations when bitmap is stored near all superblocks */ 343 344 /* choose a good rdev and read the page from there */ 345 static int read_sb_page(struct mddev *mddev, loff_t offset, 346 struct page *page, unsigned long index, int size) 347 { 348 349 sector_t sector = mddev->bitmap_info.offset + offset + 350 index * (PAGE_SIZE / SECTOR_SIZE); 351 struct md_rdev *rdev; 352 353 rdev_for_each(rdev, mddev) { 354 u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev)); 355 356 if (!test_bit(In_sync, &rdev->flags) || 357 test_bit(Faulty, &rdev->flags) || 358 test_bit(Bitmap_sync, &rdev->flags)) 359 continue; 360 361 if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true)) 362 return 0; 363 } 364 return -EIO; 365 } 366 367 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev) 368 { 369 /* Iterate the disks of an mddev, using rcu to protect access to the 370 * linked list, and raising the refcount of devices we return to ensure 371 * they don't disappear while in use. 372 * As devices are only added or removed when raid_disk is < 0 and 373 * nr_pending is 0 and In_sync is clear, the entries we return will 374 * still be in the same position on the list when we re-enter 375 * list_for_each_entry_continue_rcu. 376 * 377 * Note that if entered with 'rdev == NULL' to start at the 378 * beginning, we temporarily assign 'rdev' to an address which 379 * isn't really an rdev, but which can be used by 380 * list_for_each_entry_continue_rcu() to find the first entry. 381 */ 382 rcu_read_lock(); 383 if (rdev == NULL) 384 /* start at the beginning */ 385 rdev = list_entry(&mddev->disks, struct md_rdev, same_set); 386 else { 387 /* release the previous rdev and start from there. */ 388 rdev_dec_pending(rdev, mddev); 389 } 390 list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) { 391 if (rdev->raid_disk >= 0 && 392 !test_bit(Faulty, &rdev->flags)) { 393 /* this is a usable devices */ 394 atomic_inc(&rdev->nr_pending); 395 rcu_read_unlock(); 396 return rdev; 397 } 398 } 399 rcu_read_unlock(); 400 return NULL; 401 } 402 403 static unsigned int optimal_io_size(struct block_device *bdev, 404 unsigned int last_page_size, 405 unsigned int io_size) 406 { 407 if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev)) 408 return roundup(last_page_size, bdev_io_opt(bdev)); 409 return io_size; 410 } 411 412 static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size, 413 loff_t start, loff_t boundary) 414 { 415 if (io_size != opt_size && 416 start + opt_size / SECTOR_SIZE <= boundary) 417 return opt_size; 418 if (start + io_size / SECTOR_SIZE <= boundary) 419 return io_size; 420 421 /* Overflows boundary */ 422 return 0; 423 } 424 425 static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, 426 unsigned long pg_index, struct page *page) 427 { 428 struct block_device *bdev; 429 struct mddev *mddev = bitmap->mddev; 430 struct bitmap_storage *store = &bitmap->storage; 431 unsigned long num_pages = bitmap->storage.file_pages; 432 unsigned int bitmap_limit = (num_pages - pg_index % num_pages) << PAGE_SHIFT; 433 loff_t sboff, offset = mddev->bitmap_info.offset; 434 sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE; 435 unsigned int size = PAGE_SIZE; 436 unsigned int opt_size = PAGE_SIZE; 437 sector_t doff; 438 439 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; 440 /* we compare length (page numbers), not page offset. */ 441 if ((pg_index - store->sb_index) == num_pages - 1) { 442 unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1); 443 444 if (last_page_size == 0) 445 last_page_size = PAGE_SIZE; 446 size = roundup(last_page_size, bdev_logical_block_size(bdev)); 447 opt_size = optimal_io_size(bdev, last_page_size, size); 448 } 449 450 sboff = rdev->sb_start + offset; 451 doff = rdev->data_offset; 452 453 /* Just make sure we aren't corrupting data or metadata */ 454 if (mddev->external) { 455 /* Bitmap could be anywhere. */ 456 if (sboff + ps > doff && 457 sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE)) 458 return -EINVAL; 459 } else if (offset < 0) { 460 /* DATA BITMAP METADATA */ 461 size = bitmap_io_size(size, opt_size, offset + ps, 0); 462 if (size == 0) 463 /* bitmap runs in to metadata */ 464 return -EINVAL; 465 466 if (doff + mddev->dev_sectors > sboff) 467 /* data runs in to bitmap */ 468 return -EINVAL; 469 } else if (rdev->sb_start < rdev->data_offset) { 470 /* METADATA BITMAP DATA */ 471 size = bitmap_io_size(size, opt_size, sboff + ps, doff); 472 if (size == 0) 473 /* bitmap runs in to data */ 474 return -EINVAL; 475 } 476 477 md_super_write(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit), page); 478 return 0; 479 } 480 481 static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index, 482 struct page *page, bool wait) 483 { 484 struct mddev *mddev = bitmap->mddev; 485 486 do { 487 struct md_rdev *rdev = NULL; 488 489 while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { 490 if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) { 491 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 492 return; 493 } 494 } 495 } while (wait && md_super_wait(mddev) < 0); 496 } 497 498 static void md_bitmap_file_kick(struct bitmap *bitmap); 499 500 #ifdef CONFIG_MD_BITMAP_FILE 501 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) 502 { 503 struct buffer_head *bh = page_buffers(page); 504 505 while (bh && bh->b_blocknr) { 506 atomic_inc(&bitmap->pending_writes); 507 set_buffer_locked(bh); 508 set_buffer_mapped(bh); 509 submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); 510 bh = bh->b_this_page; 511 } 512 513 if (wait) 514 wait_event(bitmap->write_wait, 515 atomic_read(&bitmap->pending_writes) == 0); 516 } 517 518 static void end_bitmap_write(struct buffer_head *bh, int uptodate) 519 { 520 struct bitmap *bitmap = bh->b_private; 521 522 if (!uptodate) 523 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 524 if (atomic_dec_and_test(&bitmap->pending_writes)) 525 wake_up(&bitmap->write_wait); 526 } 527 528 static void free_buffers(struct page *page) 529 { 530 struct buffer_head *bh; 531 532 if (!PagePrivate(page)) 533 return; 534 535 bh = page_buffers(page); 536 while (bh) { 537 struct buffer_head *next = bh->b_this_page; 538 free_buffer_head(bh); 539 bh = next; 540 } 541 detach_page_private(page); 542 put_page(page); 543 } 544 545 /* read a page from a file. 546 * We both read the page, and attach buffers to the page to record the 547 * address of each block (using bmap). These addresses will be used 548 * to write the block later, completely bypassing the filesystem. 549 * This usage is similar to how swap files are handled, and allows us 550 * to write to a file with no concerns of memory allocation failing. 551 */ 552 static int read_file_page(struct file *file, unsigned long index, 553 struct bitmap *bitmap, unsigned long count, struct page *page) 554 { 555 int ret = 0; 556 struct inode *inode = file_inode(file); 557 struct buffer_head *bh; 558 sector_t block, blk_cur; 559 unsigned long blocksize = i_blocksize(inode); 560 561 pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, 562 (unsigned long long)index << PAGE_SHIFT); 563 564 bh = alloc_page_buffers(page, blocksize); 565 if (!bh) { 566 ret = -ENOMEM; 567 goto out; 568 } 569 attach_page_private(page, bh); 570 blk_cur = index << (PAGE_SHIFT - inode->i_blkbits); 571 while (bh) { 572 block = blk_cur; 573 574 if (count == 0) 575 bh->b_blocknr = 0; 576 else { 577 ret = bmap(inode, &block); 578 if (ret || !block) { 579 ret = -EINVAL; 580 bh->b_blocknr = 0; 581 goto out; 582 } 583 584 bh->b_blocknr = block; 585 bh->b_bdev = inode->i_sb->s_bdev; 586 if (count < blocksize) 587 count = 0; 588 else 589 count -= blocksize; 590 591 bh->b_end_io = end_bitmap_write; 592 bh->b_private = bitmap; 593 atomic_inc(&bitmap->pending_writes); 594 set_buffer_locked(bh); 595 set_buffer_mapped(bh); 596 submit_bh(REQ_OP_READ, bh); 597 } 598 blk_cur++; 599 bh = bh->b_this_page; 600 } 601 602 wait_event(bitmap->write_wait, 603 atomic_read(&bitmap->pending_writes)==0); 604 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 605 ret = -EIO; 606 out: 607 if (ret) 608 pr_err("md: bitmap read error: (%dB @ %llu): %d\n", 609 (int)PAGE_SIZE, 610 (unsigned long long)index << PAGE_SHIFT, 611 ret); 612 return ret; 613 } 614 #else /* CONFIG_MD_BITMAP_FILE */ 615 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) 616 { 617 } 618 static int read_file_page(struct file *file, unsigned long index, 619 struct bitmap *bitmap, unsigned long count, struct page *page) 620 { 621 return -EIO; 622 } 623 static void free_buffers(struct page *page) 624 { 625 put_page(page); 626 } 627 #endif /* CONFIG_MD_BITMAP_FILE */ 628 629 /* 630 * bitmap file superblock operations 631 */ 632 633 /* 634 * write out a page to a file 635 */ 636 static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index, 637 bool wait) 638 { 639 struct bitmap_storage *store = &bitmap->storage; 640 struct page *page = store->filemap[pg_index]; 641 642 if (mddev_is_clustered(bitmap->mddev)) { 643 /* go to node bitmap area starting point */ 644 pg_index += store->sb_index; 645 } 646 647 if (store->file) 648 write_file_page(bitmap, page, wait); 649 else 650 write_sb_page(bitmap, pg_index, page, wait); 651 } 652 653 /* 654 * md_bitmap_wait_writes() should be called before writing any bitmap 655 * blocks, to ensure previous writes, particularly from 656 * md_bitmap_daemon_work(), have completed. 657 */ 658 static void md_bitmap_wait_writes(struct bitmap *bitmap) 659 { 660 if (bitmap->storage.file) 661 wait_event(bitmap->write_wait, 662 atomic_read(&bitmap->pending_writes)==0); 663 else 664 /* Note that we ignore the return value. The writes 665 * might have failed, but that would just mean that 666 * some bits which should be cleared haven't been, 667 * which is safe. The relevant bitmap blocks will 668 * probably get written again, but there is no great 669 * loss if they aren't. 670 */ 671 md_super_wait(bitmap->mddev); 672 } 673 674 675 /* update the event counter and sync the superblock to disk */ 676 static void bitmap_update_sb(void *data) 677 { 678 bitmap_super_t *sb; 679 struct bitmap *bitmap = data; 680 681 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ 682 return; 683 if (bitmap->mddev->bitmap_info.external) 684 return; 685 if (!bitmap->storage.sb_page) /* no superblock */ 686 return; 687 sb = kmap_local_page(bitmap->storage.sb_page); 688 sb->events = cpu_to_le64(bitmap->mddev->events); 689 if (bitmap->mddev->events < bitmap->events_cleared) 690 /* rocking back to read-only */ 691 bitmap->events_cleared = bitmap->mddev->events; 692 sb->events_cleared = cpu_to_le64(bitmap->events_cleared); 693 /* 694 * clear BITMAP_WRITE_ERROR bit to protect against the case that 695 * a bitmap write error occurred but the later writes succeeded. 696 */ 697 sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR)); 698 /* Just in case these have been changed via sysfs: */ 699 sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); 700 sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); 701 /* This might have been changed by a reshape */ 702 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 703 sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize); 704 sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes); 705 sb->sectors_reserved = cpu_to_le32(bitmap->mddev-> 706 bitmap_info.space); 707 kunmap_local(sb); 708 709 if (bitmap->storage.file) 710 write_file_page(bitmap, bitmap->storage.sb_page, 1); 711 else 712 write_sb_page(bitmap, bitmap->storage.sb_index, 713 bitmap->storage.sb_page, 1); 714 } 715 716 static void bitmap_print_sb(struct bitmap *bitmap) 717 { 718 bitmap_super_t *sb; 719 720 if (!bitmap || !bitmap->storage.sb_page) 721 return; 722 sb = kmap_local_page(bitmap->storage.sb_page); 723 pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); 724 pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); 725 pr_debug(" version: %u\n", le32_to_cpu(sb->version)); 726 pr_debug(" uuid: %08x.%08x.%08x.%08x\n", 727 le32_to_cpu(*(__le32 *)(sb->uuid+0)), 728 le32_to_cpu(*(__le32 *)(sb->uuid+4)), 729 le32_to_cpu(*(__le32 *)(sb->uuid+8)), 730 le32_to_cpu(*(__le32 *)(sb->uuid+12))); 731 pr_debug(" events: %llu\n", 732 (unsigned long long) le64_to_cpu(sb->events)); 733 pr_debug("events cleared: %llu\n", 734 (unsigned long long) le64_to_cpu(sb->events_cleared)); 735 pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); 736 pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize)); 737 pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep)); 738 pr_debug(" sync size: %llu KB\n", 739 (unsigned long long)le64_to_cpu(sb->sync_size)/2); 740 pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind)); 741 kunmap_local(sb); 742 } 743 744 /* 745 * bitmap_new_disk_sb 746 * @bitmap 747 * 748 * This function is somewhat the reverse of bitmap_read_sb. bitmap_read_sb 749 * reads and verifies the on-disk bitmap superblock and populates bitmap_info. 750 * This function verifies 'bitmap_info' and populates the on-disk bitmap 751 * structure, which is to be written to disk. 752 * 753 * Returns: 0 on success, -Exxx on error 754 */ 755 static int md_bitmap_new_disk_sb(struct bitmap *bitmap) 756 { 757 bitmap_super_t *sb; 758 unsigned long chunksize, daemon_sleep, write_behind; 759 760 bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 761 if (bitmap->storage.sb_page == NULL) 762 return -ENOMEM; 763 bitmap->storage.sb_index = 0; 764 765 sb = kmap_local_page(bitmap->storage.sb_page); 766 767 sb->magic = cpu_to_le32(BITMAP_MAGIC); 768 sb->version = cpu_to_le32(BITMAP_MAJOR_HI); 769 770 chunksize = bitmap->mddev->bitmap_info.chunksize; 771 BUG_ON(!chunksize); 772 if (!is_power_of_2(chunksize)) { 773 kunmap_local(sb); 774 pr_warn("bitmap chunksize not a power of 2\n"); 775 return -EINVAL; 776 } 777 sb->chunksize = cpu_to_le32(chunksize); 778 779 daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; 780 if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { 781 pr_debug("Choosing daemon_sleep default (5 sec)\n"); 782 daemon_sleep = 5 * HZ; 783 } 784 sb->daemon_sleep = cpu_to_le32(daemon_sleep); 785 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 786 787 /* 788 * FIXME: write_behind for RAID1. If not specified, what 789 * is a good choice? We choose COUNTER_MAX / 2 arbitrarily. 790 */ 791 write_behind = bitmap->mddev->bitmap_info.max_write_behind; 792 if (write_behind > COUNTER_MAX) 793 write_behind = COUNTER_MAX / 2; 794 sb->write_behind = cpu_to_le32(write_behind); 795 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 796 797 /* keep the array size field of the bitmap superblock up to date */ 798 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 799 800 memcpy(sb->uuid, bitmap->mddev->uuid, 16); 801 802 set_bit(BITMAP_STALE, &bitmap->flags); 803 sb->state = cpu_to_le32(bitmap->flags); 804 bitmap->events_cleared = bitmap->mddev->events; 805 sb->events_cleared = cpu_to_le64(bitmap->mddev->events); 806 bitmap->mddev->bitmap_info.nodes = 0; 807 808 kunmap_local(sb); 809 810 return 0; 811 } 812 813 /* read the superblock from the bitmap file and initialize some bitmap fields */ 814 static int md_bitmap_read_sb(struct bitmap *bitmap) 815 { 816 char *reason = NULL; 817 bitmap_super_t *sb; 818 unsigned long chunksize, daemon_sleep, write_behind; 819 unsigned long long events; 820 int nodes = 0; 821 unsigned long sectors_reserved = 0; 822 int err = -EINVAL; 823 struct page *sb_page; 824 loff_t offset = 0; 825 826 if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { 827 chunksize = 128 * 1024 * 1024; 828 daemon_sleep = 5 * HZ; 829 write_behind = 0; 830 set_bit(BITMAP_STALE, &bitmap->flags); 831 err = 0; 832 goto out_no_sb; 833 } 834 /* page 0 is the superblock, read it... */ 835 sb_page = alloc_page(GFP_KERNEL); 836 if (!sb_page) 837 return -ENOMEM; 838 bitmap->storage.sb_page = sb_page; 839 840 re_read: 841 /* If cluster_slot is set, the cluster is setup */ 842 if (bitmap->cluster_slot >= 0) { 843 sector_t bm_blocks = bitmap->mddev->resync_max_sectors; 844 845 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 846 (bitmap->mddev->bitmap_info.chunksize >> 9)); 847 /* bits to bytes */ 848 bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t); 849 /* to 4k blocks */ 850 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); 851 offset = bitmap->cluster_slot * (bm_blocks << 3); 852 pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, 853 bitmap->cluster_slot, offset); 854 } 855 856 if (bitmap->storage.file) { 857 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); 858 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; 859 860 err = read_file_page(bitmap->storage.file, 0, 861 bitmap, bytes, sb_page); 862 } else { 863 err = read_sb_page(bitmap->mddev, offset, sb_page, 0, 864 sizeof(bitmap_super_t)); 865 } 866 if (err) 867 return err; 868 869 err = -EINVAL; 870 sb = kmap_local_page(sb_page); 871 872 chunksize = le32_to_cpu(sb->chunksize); 873 daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; 874 write_behind = le32_to_cpu(sb->write_behind); 875 sectors_reserved = le32_to_cpu(sb->sectors_reserved); 876 877 /* verify that the bitmap-specific fields are valid */ 878 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) 879 reason = "bad magic"; 880 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || 881 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED) 882 reason = "unrecognized superblock version"; 883 else if (chunksize < 512) 884 reason = "bitmap chunksize too small"; 885 else if (!is_power_of_2(chunksize)) 886 reason = "bitmap chunksize not a power of 2"; 887 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT) 888 reason = "daemon sleep period out of range"; 889 else if (write_behind > COUNTER_MAX) 890 reason = "write-behind limit out of range (0 - 16383)"; 891 if (reason) { 892 pr_warn("%s: invalid bitmap file superblock: %s\n", 893 bmname(bitmap), reason); 894 goto out; 895 } 896 897 /* 898 * Setup nodes/clustername only if bitmap version is 899 * cluster-compatible 900 */ 901 if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { 902 nodes = le32_to_cpu(sb->nodes); 903 strscpy(bitmap->mddev->bitmap_info.cluster_name, 904 sb->cluster_name, 64); 905 } 906 907 /* keep the array size field of the bitmap superblock up to date */ 908 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 909 910 if (bitmap->mddev->persistent) { 911 /* 912 * We have a persistent array superblock, so compare the 913 * bitmap's UUID and event counter to the mddev's 914 */ 915 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { 916 pr_warn("%s: bitmap superblock UUID mismatch\n", 917 bmname(bitmap)); 918 goto out; 919 } 920 events = le64_to_cpu(sb->events); 921 if (!nodes && (events < bitmap->mddev->events)) { 922 pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n", 923 bmname(bitmap), events, 924 (unsigned long long) bitmap->mddev->events); 925 set_bit(BITMAP_STALE, &bitmap->flags); 926 } 927 } 928 929 /* assign fields using values from superblock */ 930 bitmap->flags |= le32_to_cpu(sb->state); 931 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) 932 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags); 933 bitmap->events_cleared = le64_to_cpu(sb->events_cleared); 934 err = 0; 935 936 out: 937 kunmap_local(sb); 938 if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { 939 /* Assigning chunksize is required for "re_read" */ 940 bitmap->mddev->bitmap_info.chunksize = chunksize; 941 err = md_setup_cluster(bitmap->mddev, nodes); 942 if (err) { 943 pr_warn("%s: Could not setup cluster service (%d)\n", 944 bmname(bitmap), err); 945 goto out_no_sb; 946 } 947 bitmap->cluster_slot = bitmap->mddev->cluster_ops->slot_number(bitmap->mddev); 948 goto re_read; 949 } 950 951 out_no_sb: 952 if (err == 0) { 953 if (test_bit(BITMAP_STALE, &bitmap->flags)) 954 bitmap->events_cleared = bitmap->mddev->events; 955 bitmap->mddev->bitmap_info.chunksize = chunksize; 956 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 957 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 958 bitmap->mddev->bitmap_info.nodes = nodes; 959 if (bitmap->mddev->bitmap_info.space == 0 || 960 bitmap->mddev->bitmap_info.space > sectors_reserved) 961 bitmap->mddev->bitmap_info.space = sectors_reserved; 962 } else { 963 bitmap_print_sb(bitmap); 964 if (bitmap->cluster_slot < 0) 965 md_cluster_stop(bitmap->mddev); 966 } 967 return err; 968 } 969 970 /* 971 * general bitmap file operations 972 */ 973 974 /* 975 * on-disk bitmap: 976 * 977 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap 978 * file a page at a time. There's a superblock at the start of the file. 979 */ 980 /* calculate the index of the page that contains this bit */ 981 static inline unsigned long file_page_index(struct bitmap_storage *store, 982 unsigned long chunk) 983 { 984 if (store->sb_page) 985 chunk += sizeof(bitmap_super_t) << 3; 986 return chunk >> PAGE_BIT_SHIFT; 987 } 988 989 /* calculate the (bit) offset of this bit within a page */ 990 static inline unsigned long file_page_offset(struct bitmap_storage *store, 991 unsigned long chunk) 992 { 993 if (store->sb_page) 994 chunk += sizeof(bitmap_super_t) << 3; 995 return chunk & (PAGE_BITS - 1); 996 } 997 998 /* 999 * return a pointer to the page in the filemap that contains the given bit 1000 * 1001 */ 1002 static inline struct page *filemap_get_page(struct bitmap_storage *store, 1003 unsigned long chunk) 1004 { 1005 if (file_page_index(store, chunk) >= store->file_pages) 1006 return NULL; 1007 return store->filemap[file_page_index(store, chunk)]; 1008 } 1009 1010 static int md_bitmap_storage_alloc(struct bitmap_storage *store, 1011 unsigned long chunks, int with_super, 1012 int slot_number) 1013 { 1014 int pnum, offset = 0; 1015 unsigned long num_pages; 1016 unsigned long bytes; 1017 1018 bytes = DIV_ROUND_UP(chunks, 8); 1019 if (with_super) 1020 bytes += sizeof(bitmap_super_t); 1021 1022 num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); 1023 offset = slot_number * num_pages; 1024 1025 store->filemap = kmalloc_array(num_pages, sizeof(struct page *), 1026 GFP_KERNEL); 1027 if (!store->filemap) 1028 return -ENOMEM; 1029 1030 if (with_super && !store->sb_page) { 1031 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO); 1032 if (store->sb_page == NULL) 1033 return -ENOMEM; 1034 } 1035 1036 pnum = 0; 1037 if (store->sb_page) { 1038 store->filemap[0] = store->sb_page; 1039 pnum = 1; 1040 store->sb_index = offset; 1041 } 1042 1043 for ( ; pnum < num_pages; pnum++) { 1044 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO); 1045 if (!store->filemap[pnum]) { 1046 store->file_pages = pnum; 1047 return -ENOMEM; 1048 } 1049 } 1050 store->file_pages = pnum; 1051 1052 /* We need 4 bits per page, rounded up to a multiple 1053 * of sizeof(unsigned long) */ 1054 store->filemap_attr = kzalloc( 1055 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 1056 GFP_KERNEL); 1057 if (!store->filemap_attr) 1058 return -ENOMEM; 1059 1060 store->bytes = bytes; 1061 1062 return 0; 1063 } 1064 1065 static void md_bitmap_file_unmap(struct bitmap_storage *store) 1066 { 1067 struct file *file = store->file; 1068 struct page *sb_page = store->sb_page; 1069 struct page **map = store->filemap; 1070 int pages = store->file_pages; 1071 1072 while (pages--) 1073 if (map[pages] != sb_page) /* 0 is sb_page, release it below */ 1074 free_buffers(map[pages]); 1075 kfree(map); 1076 kfree(store->filemap_attr); 1077 1078 if (sb_page) 1079 free_buffers(sb_page); 1080 1081 if (file) { 1082 struct inode *inode = file_inode(file); 1083 invalidate_mapping_pages(inode->i_mapping, 0, -1); 1084 fput(file); 1085 } 1086 } 1087 1088 /* 1089 * bitmap_file_kick - if an error occurs while manipulating the bitmap file 1090 * then it is no longer reliable, so we stop using it and we mark the file 1091 * as failed in the superblock 1092 */ 1093 static void md_bitmap_file_kick(struct bitmap *bitmap) 1094 { 1095 if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { 1096 bitmap_update_sb(bitmap); 1097 1098 if (bitmap->storage.file) { 1099 pr_warn("%s: kicking failed bitmap file %pD4 from array!\n", 1100 bmname(bitmap), bitmap->storage.file); 1101 1102 } else 1103 pr_warn("%s: disabling internal bitmap due to errors\n", 1104 bmname(bitmap)); 1105 } 1106 } 1107 1108 enum bitmap_page_attr { 1109 BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ 1110 BITMAP_PAGE_PENDING = 1, /* there are bits that are being cleaned. 1111 * i.e. counter is 1 or 2. */ 1112 BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ 1113 }; 1114 1115 static inline void set_page_attr(struct bitmap *bitmap, int pnum, 1116 enum bitmap_page_attr attr) 1117 { 1118 set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 1119 } 1120 1121 static inline void clear_page_attr(struct bitmap *bitmap, int pnum, 1122 enum bitmap_page_attr attr) 1123 { 1124 clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 1125 } 1126 1127 static inline int test_page_attr(struct bitmap *bitmap, int pnum, 1128 enum bitmap_page_attr attr) 1129 { 1130 return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 1131 } 1132 1133 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum, 1134 enum bitmap_page_attr attr) 1135 { 1136 return test_and_clear_bit((pnum<<2) + attr, 1137 bitmap->storage.filemap_attr); 1138 } 1139 /* 1140 * bitmap_file_set_bit -- called before performing a write to the md device 1141 * to set (and eventually sync) a particular bit in the bitmap file 1142 * 1143 * we set the bit immediately, then we record the page number so that 1144 * when an unplug occurs, we can flush the dirty pages out to disk 1145 */ 1146 static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) 1147 { 1148 unsigned long bit; 1149 struct page *page; 1150 void *kaddr; 1151 unsigned long chunk = block >> bitmap->counts.chunkshift; 1152 struct bitmap_storage *store = &bitmap->storage; 1153 unsigned long index = file_page_index(store, chunk); 1154 unsigned long node_offset = 0; 1155 1156 index += store->sb_index; 1157 if (mddev_is_clustered(bitmap->mddev)) 1158 node_offset = bitmap->cluster_slot * store->file_pages; 1159 1160 page = filemap_get_page(&bitmap->storage, chunk); 1161 if (!page) 1162 return; 1163 bit = file_page_offset(&bitmap->storage, chunk); 1164 1165 /* set the bit */ 1166 kaddr = kmap_local_page(page); 1167 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1168 set_bit(bit, kaddr); 1169 else 1170 set_bit_le(bit, kaddr); 1171 kunmap_local(kaddr); 1172 pr_debug("set file bit %lu page %lu\n", bit, index); 1173 /* record page number so it gets flushed to disk when unplug occurs */ 1174 set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY); 1175 } 1176 1177 static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) 1178 { 1179 unsigned long bit; 1180 struct page *page; 1181 void *paddr; 1182 unsigned long chunk = block >> bitmap->counts.chunkshift; 1183 struct bitmap_storage *store = &bitmap->storage; 1184 unsigned long index = file_page_index(store, chunk); 1185 unsigned long node_offset = 0; 1186 1187 index += store->sb_index; 1188 if (mddev_is_clustered(bitmap->mddev)) 1189 node_offset = bitmap->cluster_slot * store->file_pages; 1190 1191 page = filemap_get_page(&bitmap->storage, chunk); 1192 if (!page) 1193 return; 1194 bit = file_page_offset(&bitmap->storage, chunk); 1195 paddr = kmap_local_page(page); 1196 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1197 clear_bit(bit, paddr); 1198 else 1199 clear_bit_le(bit, paddr); 1200 kunmap_local(paddr); 1201 if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) { 1202 set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING); 1203 bitmap->allclean = 0; 1204 } 1205 } 1206 1207 static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) 1208 { 1209 unsigned long bit; 1210 struct page *page; 1211 void *paddr; 1212 unsigned long chunk = block >> bitmap->counts.chunkshift; 1213 int set = 0; 1214 1215 page = filemap_get_page(&bitmap->storage, chunk); 1216 if (!page) 1217 return -EINVAL; 1218 bit = file_page_offset(&bitmap->storage, chunk); 1219 paddr = kmap_local_page(page); 1220 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1221 set = test_bit(bit, paddr); 1222 else 1223 set = test_bit_le(bit, paddr); 1224 kunmap_local(paddr); 1225 return set; 1226 } 1227 1228 /* this gets called when the md device is ready to unplug its underlying 1229 * (slave) device queues -- before we let any writes go down, we need to 1230 * sync the dirty pages of the bitmap file to disk */ 1231 static void __bitmap_unplug(struct bitmap *bitmap) 1232 { 1233 unsigned long i; 1234 int dirty, need_write; 1235 int writing = 0; 1236 1237 if (!__bitmap_enabled(bitmap)) 1238 return; 1239 1240 /* look at each page to see if there are any set bits that need to be 1241 * flushed out to disk */ 1242 for (i = 0; i < bitmap->storage.file_pages; i++) { 1243 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 1244 need_write = test_and_clear_page_attr(bitmap, i, 1245 BITMAP_PAGE_NEEDWRITE); 1246 if (dirty || need_write) { 1247 if (!writing) { 1248 md_bitmap_wait_writes(bitmap); 1249 mddev_add_trace_msg(bitmap->mddev, 1250 "md bitmap_unplug"); 1251 } 1252 clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); 1253 filemap_write_page(bitmap, i, false); 1254 writing = 1; 1255 } 1256 } 1257 if (writing) 1258 md_bitmap_wait_writes(bitmap); 1259 1260 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 1261 md_bitmap_file_kick(bitmap); 1262 } 1263 1264 struct bitmap_unplug_work { 1265 struct work_struct work; 1266 struct bitmap *bitmap; 1267 struct completion *done; 1268 }; 1269 1270 static void md_bitmap_unplug_fn(struct work_struct *work) 1271 { 1272 struct bitmap_unplug_work *unplug_work = 1273 container_of(work, struct bitmap_unplug_work, work); 1274 1275 __bitmap_unplug(unplug_work->bitmap); 1276 complete(unplug_work->done); 1277 } 1278 1279 static void bitmap_unplug_async(struct bitmap *bitmap) 1280 { 1281 DECLARE_COMPLETION_ONSTACK(done); 1282 struct bitmap_unplug_work unplug_work; 1283 1284 INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn); 1285 unplug_work.bitmap = bitmap; 1286 unplug_work.done = &done; 1287 1288 queue_work(md_bitmap_wq, &unplug_work.work); 1289 wait_for_completion(&done); 1290 destroy_work_on_stack(&unplug_work.work); 1291 } 1292 1293 static void bitmap_unplug(struct mddev *mddev, bool sync) 1294 { 1295 struct bitmap *bitmap = mddev->bitmap; 1296 1297 if (!bitmap) 1298 return; 1299 1300 if (sync) 1301 __bitmap_unplug(bitmap); 1302 else 1303 bitmap_unplug_async(bitmap); 1304 } 1305 1306 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); 1307 1308 /* 1309 * Initialize the in-memory bitmap from the on-disk bitmap and set up the memory 1310 * mapping of the bitmap file. 1311 * 1312 * Special case: If there's no bitmap file, or if the bitmap file had been 1313 * previously kicked from the array, we mark all the bits as 1's in order to 1314 * cause a full resync. 1315 * 1316 * We ignore all bits for sectors that end earlier than 'start'. 1317 * This is used when reading an out-of-date bitmap. 1318 */ 1319 static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) 1320 { 1321 bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags); 1322 struct mddev *mddev = bitmap->mddev; 1323 unsigned long chunks = bitmap->counts.chunks; 1324 struct bitmap_storage *store = &bitmap->storage; 1325 struct file *file = store->file; 1326 unsigned long node_offset = 0; 1327 unsigned long bit_cnt = 0; 1328 unsigned long i; 1329 int ret; 1330 1331 if (!file && !mddev->bitmap_info.offset) { 1332 /* No permanent bitmap - fill with '1s'. */ 1333 store->filemap = NULL; 1334 store->file_pages = 0; 1335 for (i = 0; i < chunks ; i++) { 1336 /* if the disk bit is set, set the memory bit */ 1337 int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift) 1338 >= start); 1339 md_bitmap_set_memory_bits(bitmap, 1340 (sector_t)i << bitmap->counts.chunkshift, 1341 needed); 1342 } 1343 return 0; 1344 } 1345 1346 if (file && i_size_read(file->f_mapping->host) < store->bytes) { 1347 pr_warn("%s: bitmap file too short %lu < %lu\n", 1348 bmname(bitmap), 1349 (unsigned long) i_size_read(file->f_mapping->host), 1350 store->bytes); 1351 ret = -ENOSPC; 1352 goto err; 1353 } 1354 1355 if (mddev_is_clustered(mddev)) 1356 node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE)); 1357 1358 for (i = 0; i < store->file_pages; i++) { 1359 struct page *page = store->filemap[i]; 1360 int count; 1361 1362 /* unmap the old page, we're done with it */ 1363 if (i == store->file_pages - 1) 1364 count = store->bytes - i * PAGE_SIZE; 1365 else 1366 count = PAGE_SIZE; 1367 1368 if (file) 1369 ret = read_file_page(file, i, bitmap, count, page); 1370 else 1371 ret = read_sb_page(mddev, 0, page, i + node_offset, 1372 count); 1373 if (ret) 1374 goto err; 1375 } 1376 1377 if (outofdate) { 1378 pr_warn("%s: bitmap file is out of date, doing full recovery\n", 1379 bmname(bitmap)); 1380 1381 for (i = 0; i < store->file_pages; i++) { 1382 struct page *page = store->filemap[i]; 1383 unsigned long offset = 0; 1384 void *paddr; 1385 1386 if (i == 0 && !mddev->bitmap_info.external) 1387 offset = sizeof(bitmap_super_t); 1388 1389 /* 1390 * If the bitmap is out of date, dirty the whole page 1391 * and write it out 1392 */ 1393 paddr = kmap_local_page(page); 1394 memset(paddr + offset, 0xff, PAGE_SIZE - offset); 1395 kunmap_local(paddr); 1396 1397 filemap_write_page(bitmap, i, true); 1398 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) { 1399 ret = -EIO; 1400 goto err; 1401 } 1402 } 1403 } 1404 1405 for (i = 0; i < chunks; i++) { 1406 struct page *page = filemap_get_page(&bitmap->storage, i); 1407 unsigned long bit = file_page_offset(&bitmap->storage, i); 1408 void *paddr; 1409 bool was_set; 1410 1411 paddr = kmap_local_page(page); 1412 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1413 was_set = test_bit(bit, paddr); 1414 else 1415 was_set = test_bit_le(bit, paddr); 1416 kunmap_local(paddr); 1417 1418 if (was_set) { 1419 /* if the disk bit is set, set the memory bit */ 1420 int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift 1421 >= start); 1422 md_bitmap_set_memory_bits(bitmap, 1423 (sector_t)i << bitmap->counts.chunkshift, 1424 needed); 1425 bit_cnt++; 1426 } 1427 } 1428 1429 pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n", 1430 bmname(bitmap), store->file_pages, 1431 bit_cnt, chunks); 1432 1433 return 0; 1434 1435 err: 1436 pr_warn("%s: bitmap initialisation failed: %d\n", 1437 bmname(bitmap), ret); 1438 return ret; 1439 } 1440 1441 /* just flag bitmap pages as needing to be written. */ 1442 static void bitmap_write_all(struct mddev *mddev) 1443 { 1444 int i; 1445 struct bitmap *bitmap = mddev->bitmap; 1446 1447 if (!bitmap || !bitmap->storage.filemap) 1448 return; 1449 1450 /* Only one copy, so nothing needed */ 1451 if (bitmap->storage.file) 1452 return; 1453 1454 for (i = 0; i < bitmap->storage.file_pages; i++) 1455 set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); 1456 bitmap->allclean = 0; 1457 } 1458 1459 static void md_bitmap_count_page(struct bitmap_counts *bitmap, 1460 sector_t offset, int inc) 1461 { 1462 sector_t chunk = offset >> bitmap->chunkshift; 1463 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1464 bitmap->bp[page].count += inc; 1465 md_bitmap_checkfree(bitmap, page); 1466 } 1467 1468 static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset) 1469 { 1470 sector_t chunk = offset >> bitmap->chunkshift; 1471 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1472 struct bitmap_page *bp = &bitmap->bp[page]; 1473 1474 if (!bp->pending) 1475 bp->pending = 1; 1476 } 1477 1478 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, 1479 sector_t offset, sector_t *blocks, 1480 int create); 1481 1482 static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout, 1483 bool force) 1484 { 1485 struct md_thread *thread; 1486 1487 rcu_read_lock(); 1488 thread = rcu_dereference(mddev->thread); 1489 1490 if (!thread) 1491 goto out; 1492 1493 if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT) 1494 thread->timeout = timeout; 1495 1496 out: 1497 rcu_read_unlock(); 1498 } 1499 1500 /* 1501 * bitmap daemon -- periodically wakes up to clean bits and flush pages 1502 * out to disk 1503 */ 1504 static void bitmap_daemon_work(struct mddev *mddev) 1505 { 1506 struct bitmap *bitmap; 1507 unsigned long j; 1508 unsigned long nextpage; 1509 sector_t blocks; 1510 struct bitmap_counts *counts; 1511 1512 /* Use a mutex to guard daemon_work against 1513 * bitmap_destroy. 1514 */ 1515 mutex_lock(&mddev->bitmap_info.mutex); 1516 bitmap = mddev->bitmap; 1517 if (bitmap == NULL) { 1518 mutex_unlock(&mddev->bitmap_info.mutex); 1519 return; 1520 } 1521 if (time_before(jiffies, bitmap->daemon_lastrun 1522 + mddev->bitmap_info.daemon_sleep)) 1523 goto done; 1524 1525 bitmap->daemon_lastrun = jiffies; 1526 if (bitmap->allclean) { 1527 mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); 1528 goto done; 1529 } 1530 bitmap->allclean = 1; 1531 1532 mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work"); 1533 1534 /* Any file-page which is PENDING now needs to be written. 1535 * So set NEEDWRITE now, then after we make any last-minute changes 1536 * we will write it. 1537 */ 1538 for (j = 0; j < bitmap->storage.file_pages; j++) 1539 if (test_and_clear_page_attr(bitmap, j, 1540 BITMAP_PAGE_PENDING)) 1541 set_page_attr(bitmap, j, 1542 BITMAP_PAGE_NEEDWRITE); 1543 1544 if (bitmap->need_sync && 1545 mddev->bitmap_info.external == 0) { 1546 /* Arrange for superblock update as well as 1547 * other changes */ 1548 bitmap_super_t *sb; 1549 bitmap->need_sync = 0; 1550 if (bitmap->storage.filemap) { 1551 sb = kmap_local_page(bitmap->storage.sb_page); 1552 sb->events_cleared = 1553 cpu_to_le64(bitmap->events_cleared); 1554 kunmap_local(sb); 1555 set_page_attr(bitmap, 0, 1556 BITMAP_PAGE_NEEDWRITE); 1557 } 1558 } 1559 /* Now look at the bitmap counters and if any are '2' or '1', 1560 * decrement and handle accordingly. 1561 */ 1562 counts = &bitmap->counts; 1563 spin_lock_irq(&counts->lock); 1564 nextpage = 0; 1565 for (j = 0; j < counts->chunks; j++) { 1566 bitmap_counter_t *bmc; 1567 sector_t block = (sector_t)j << counts->chunkshift; 1568 1569 if (j == nextpage) { 1570 nextpage += PAGE_COUNTER_RATIO; 1571 if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) { 1572 j |= PAGE_COUNTER_MASK; 1573 continue; 1574 } 1575 counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0; 1576 } 1577 1578 bmc = md_bitmap_get_counter(counts, block, &blocks, 0); 1579 if (!bmc) { 1580 j |= PAGE_COUNTER_MASK; 1581 continue; 1582 } 1583 if (*bmc == 1 && !bitmap->need_sync) { 1584 /* We can clear the bit */ 1585 *bmc = 0; 1586 md_bitmap_count_page(counts, block, -1); 1587 md_bitmap_file_clear_bit(bitmap, block); 1588 } else if (*bmc && *bmc <= 2) { 1589 *bmc = 1; 1590 md_bitmap_set_pending(counts, block); 1591 bitmap->allclean = 0; 1592 } 1593 } 1594 spin_unlock_irq(&counts->lock); 1595 1596 md_bitmap_wait_writes(bitmap); 1597 /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. 1598 * DIRTY pages need to be written by bitmap_unplug so it can wait 1599 * for them. 1600 * If we find any DIRTY page we stop there and let bitmap_unplug 1601 * handle all the rest. This is important in the case where 1602 * the first blocking holds the superblock and it has been updated. 1603 * We mustn't write any other blocks before the superblock. 1604 */ 1605 for (j = 0; 1606 j < bitmap->storage.file_pages 1607 && !test_bit(BITMAP_STALE, &bitmap->flags); 1608 j++) { 1609 if (test_page_attr(bitmap, j, 1610 BITMAP_PAGE_DIRTY)) 1611 /* bitmap_unplug will handle the rest */ 1612 break; 1613 if (bitmap->storage.filemap && 1614 test_and_clear_page_attr(bitmap, j, 1615 BITMAP_PAGE_NEEDWRITE)) 1616 filemap_write_page(bitmap, j, false); 1617 } 1618 1619 done: 1620 if (bitmap->allclean == 0) 1621 mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); 1622 mutex_unlock(&mddev->bitmap_info.mutex); 1623 } 1624 1625 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, 1626 sector_t offset, sector_t *blocks, 1627 int create) 1628 __releases(bitmap->lock) 1629 __acquires(bitmap->lock) 1630 { 1631 /* If 'create', we might release the lock and reclaim it. 1632 * The lock must have been taken with interrupts enabled. 1633 * If !create, we don't release the lock. 1634 */ 1635 sector_t chunk = offset >> bitmap->chunkshift; 1636 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1637 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; 1638 sector_t csize = ((sector_t)1) << bitmap->chunkshift; 1639 int err; 1640 1641 if (page >= bitmap->pages) { 1642 /* 1643 * This can happen if bitmap_start_sync goes beyond 1644 * End-of-device while looking for a whole page or 1645 * user set a huge number to sysfs bitmap_set_bits. 1646 */ 1647 *blocks = csize - (offset & (csize - 1)); 1648 return NULL; 1649 } 1650 err = md_bitmap_checkpage(bitmap, page, create, 0); 1651 1652 if (bitmap->bp[page].hijacked || 1653 bitmap->bp[page].map == NULL) 1654 csize = ((sector_t)1) << (bitmap->chunkshift + 1655 PAGE_COUNTER_SHIFT); 1656 1657 *blocks = csize - (offset & (csize - 1)); 1658 1659 if (err < 0) 1660 return NULL; 1661 1662 /* now locked ... */ 1663 1664 if (bitmap->bp[page].hijacked) { /* hijacked pointer */ 1665 /* should we use the first or second counter field 1666 * of the hijacked pointer? */ 1667 int hi = (pageoff > PAGE_COUNTER_MASK); 1668 return &((bitmap_counter_t *) 1669 &bitmap->bp[page].map)[hi]; 1670 } else /* page is allocated */ 1671 return (bitmap_counter_t *) 1672 &(bitmap->bp[page].map[pageoff]); 1673 } 1674 1675 static int bitmap_startwrite(struct mddev *mddev, sector_t offset, 1676 unsigned long sectors) 1677 { 1678 struct bitmap *bitmap = mddev->bitmap; 1679 1680 if (!bitmap) 1681 return 0; 1682 1683 while (sectors) { 1684 sector_t blocks; 1685 bitmap_counter_t *bmc; 1686 1687 spin_lock_irq(&bitmap->counts.lock); 1688 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1); 1689 if (!bmc) { 1690 spin_unlock_irq(&bitmap->counts.lock); 1691 return 0; 1692 } 1693 1694 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) { 1695 DEFINE_WAIT(__wait); 1696 /* note that it is safe to do the prepare_to_wait 1697 * after the test as long as we do it before dropping 1698 * the spinlock. 1699 */ 1700 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1701 TASK_UNINTERRUPTIBLE); 1702 spin_unlock_irq(&bitmap->counts.lock); 1703 schedule(); 1704 finish_wait(&bitmap->overflow_wait, &__wait); 1705 continue; 1706 } 1707 1708 switch (*bmc) { 1709 case 0: 1710 md_bitmap_file_set_bit(bitmap, offset); 1711 md_bitmap_count_page(&bitmap->counts, offset, 1); 1712 fallthrough; 1713 case 1: 1714 *bmc = 2; 1715 } 1716 1717 (*bmc)++; 1718 1719 spin_unlock_irq(&bitmap->counts.lock); 1720 1721 offset += blocks; 1722 if (sectors > blocks) 1723 sectors -= blocks; 1724 else 1725 sectors = 0; 1726 } 1727 return 0; 1728 } 1729 1730 static void bitmap_endwrite(struct mddev *mddev, sector_t offset, 1731 unsigned long sectors) 1732 { 1733 struct bitmap *bitmap = mddev->bitmap; 1734 1735 if (!bitmap) 1736 return; 1737 1738 while (sectors) { 1739 sector_t blocks; 1740 unsigned long flags; 1741 bitmap_counter_t *bmc; 1742 1743 spin_lock_irqsave(&bitmap->counts.lock, flags); 1744 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0); 1745 if (!bmc) { 1746 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1747 return; 1748 } 1749 1750 if (!bitmap->mddev->degraded) { 1751 if (bitmap->events_cleared < bitmap->mddev->events) { 1752 bitmap->events_cleared = bitmap->mddev->events; 1753 bitmap->need_sync = 1; 1754 sysfs_notify_dirent_safe( 1755 bitmap->sysfs_can_clear); 1756 } 1757 } else if (!NEEDED(*bmc)) { 1758 *bmc |= NEEDED_MASK; 1759 } 1760 1761 if (COUNTER(*bmc) == COUNTER_MAX) 1762 wake_up(&bitmap->overflow_wait); 1763 1764 (*bmc)--; 1765 if (*bmc <= 2) { 1766 md_bitmap_set_pending(&bitmap->counts, offset); 1767 bitmap->allclean = 0; 1768 } 1769 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1770 offset += blocks; 1771 if (sectors > blocks) 1772 sectors -= blocks; 1773 else 1774 sectors = 0; 1775 } 1776 } 1777 1778 static bool __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, 1779 sector_t *blocks, bool degraded) 1780 { 1781 bitmap_counter_t *bmc; 1782 bool rv; 1783 1784 if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ 1785 *blocks = 1024; 1786 return true; /* always resync if no bitmap */ 1787 } 1788 spin_lock_irq(&bitmap->counts.lock); 1789 1790 rv = false; 1791 bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1792 if (bmc) { 1793 /* locked */ 1794 if (RESYNC(*bmc)) { 1795 rv = true; 1796 } else if (NEEDED(*bmc)) { 1797 rv = true; 1798 if (!degraded) { /* don't set/clear bits if degraded */ 1799 *bmc |= RESYNC_MASK; 1800 *bmc &= ~NEEDED_MASK; 1801 } 1802 } 1803 } 1804 spin_unlock_irq(&bitmap->counts.lock); 1805 1806 return rv; 1807 } 1808 1809 static bool bitmap_start_sync(struct mddev *mddev, sector_t offset, 1810 sector_t *blocks, bool degraded) 1811 { 1812 /* bitmap_start_sync must always report on multiples of whole 1813 * pages, otherwise resync (which is very PAGE_SIZE based) will 1814 * get confused. 1815 * So call __bitmap_start_sync repeatedly (if needed) until 1816 * At least PAGE_SIZE>>9 blocks are covered. 1817 * Return the 'or' of the result. 1818 */ 1819 bool rv = false; 1820 sector_t blocks1; 1821 1822 *blocks = 0; 1823 while (*blocks < (PAGE_SIZE>>9)) { 1824 rv |= __bitmap_start_sync(mddev->bitmap, offset, 1825 &blocks1, degraded); 1826 offset += blocks1; 1827 *blocks += blocks1; 1828 } 1829 1830 return rv; 1831 } 1832 1833 static void __bitmap_end_sync(struct bitmap *bitmap, sector_t offset, 1834 sector_t *blocks, bool aborted) 1835 { 1836 bitmap_counter_t *bmc; 1837 unsigned long flags; 1838 1839 if (bitmap == NULL) { 1840 *blocks = 1024; 1841 return; 1842 } 1843 spin_lock_irqsave(&bitmap->counts.lock, flags); 1844 bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1845 if (bmc == NULL) 1846 goto unlock; 1847 /* locked */ 1848 if (RESYNC(*bmc)) { 1849 *bmc &= ~RESYNC_MASK; 1850 1851 if (!NEEDED(*bmc) && aborted) 1852 *bmc |= NEEDED_MASK; 1853 else { 1854 if (*bmc <= 2) { 1855 md_bitmap_set_pending(&bitmap->counts, offset); 1856 bitmap->allclean = 0; 1857 } 1858 } 1859 } 1860 unlock: 1861 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1862 } 1863 1864 static void bitmap_end_sync(struct mddev *mddev, sector_t offset, 1865 sector_t *blocks) 1866 { 1867 __bitmap_end_sync(mddev->bitmap, offset, blocks, true); 1868 } 1869 1870 static void bitmap_close_sync(struct mddev *mddev) 1871 { 1872 /* Sync has finished, and any bitmap chunks that weren't synced 1873 * properly have been aborted. It remains to us to clear the 1874 * RESYNC bit wherever it is still on 1875 */ 1876 sector_t sector = 0; 1877 sector_t blocks; 1878 struct bitmap *bitmap = mddev->bitmap; 1879 1880 if (!bitmap) 1881 return; 1882 1883 while (sector < bitmap->mddev->resync_max_sectors) { 1884 __bitmap_end_sync(bitmap, sector, &blocks, false); 1885 sector += blocks; 1886 } 1887 } 1888 1889 static void bitmap_cond_end_sync(struct mddev *mddev, sector_t sector, 1890 bool force) 1891 { 1892 sector_t s = 0; 1893 sector_t blocks; 1894 struct bitmap *bitmap = mddev->bitmap; 1895 1896 if (!bitmap) 1897 return; 1898 if (sector == 0) { 1899 bitmap->last_end_sync = jiffies; 1900 return; 1901 } 1902 if (!force && time_before(jiffies, (bitmap->last_end_sync 1903 + bitmap->mddev->bitmap_info.daemon_sleep))) 1904 return; 1905 wait_event(bitmap->mddev->recovery_wait, 1906 atomic_read(&bitmap->mddev->recovery_active) == 0); 1907 1908 bitmap->mddev->curr_resync_completed = sector; 1909 set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags); 1910 sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); 1911 s = 0; 1912 while (s < sector && s < bitmap->mddev->resync_max_sectors) { 1913 __bitmap_end_sync(bitmap, s, &blocks, false); 1914 s += blocks; 1915 } 1916 bitmap->last_end_sync = jiffies; 1917 sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed); 1918 } 1919 1920 static void bitmap_sync_with_cluster(struct mddev *mddev, 1921 sector_t old_lo, sector_t old_hi, 1922 sector_t new_lo, sector_t new_hi) 1923 { 1924 struct bitmap *bitmap = mddev->bitmap; 1925 sector_t sector, blocks = 0; 1926 1927 for (sector = old_lo; sector < new_lo; ) { 1928 __bitmap_end_sync(bitmap, sector, &blocks, false); 1929 sector += blocks; 1930 } 1931 WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n"); 1932 1933 for (sector = old_hi; sector < new_hi; ) { 1934 bitmap_start_sync(mddev, sector, &blocks, false); 1935 sector += blocks; 1936 } 1937 WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n"); 1938 } 1939 1940 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) 1941 { 1942 /* For each chunk covered by any of these sectors, set the 1943 * counter to 2 and possibly set resync_needed. They should all 1944 * be 0 at this point 1945 */ 1946 1947 sector_t secs; 1948 bitmap_counter_t *bmc; 1949 spin_lock_irq(&bitmap->counts.lock); 1950 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1); 1951 if (!bmc) { 1952 spin_unlock_irq(&bitmap->counts.lock); 1953 return; 1954 } 1955 if (!*bmc) { 1956 *bmc = 2; 1957 md_bitmap_count_page(&bitmap->counts, offset, 1); 1958 md_bitmap_set_pending(&bitmap->counts, offset); 1959 bitmap->allclean = 0; 1960 } 1961 if (needed) 1962 *bmc |= NEEDED_MASK; 1963 spin_unlock_irq(&bitmap->counts.lock); 1964 } 1965 1966 /* dirty the memory and file bits for bitmap chunks "s" to "e" */ 1967 static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s, 1968 unsigned long e) 1969 { 1970 unsigned long chunk; 1971 struct bitmap *bitmap = mddev->bitmap; 1972 1973 if (!bitmap) 1974 return; 1975 1976 for (chunk = s; chunk <= e; chunk++) { 1977 sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift; 1978 1979 md_bitmap_set_memory_bits(bitmap, sec, 1); 1980 md_bitmap_file_set_bit(bitmap, sec); 1981 if (sec < bitmap->mddev->recovery_cp) 1982 /* We are asserting that the array is dirty, 1983 * so move the recovery_cp address back so 1984 * that it is obvious that it is dirty 1985 */ 1986 bitmap->mddev->recovery_cp = sec; 1987 } 1988 } 1989 1990 static void bitmap_flush(struct mddev *mddev) 1991 { 1992 struct bitmap *bitmap = mddev->bitmap; 1993 long sleep; 1994 1995 if (!bitmap) /* there was no bitmap */ 1996 return; 1997 1998 /* run the daemon_work three time to ensure everything is flushed 1999 * that can be 2000 */ 2001 sleep = mddev->bitmap_info.daemon_sleep * 2; 2002 bitmap->daemon_lastrun -= sleep; 2003 bitmap_daemon_work(mddev); 2004 bitmap->daemon_lastrun -= sleep; 2005 bitmap_daemon_work(mddev); 2006 bitmap->daemon_lastrun -= sleep; 2007 bitmap_daemon_work(mddev); 2008 if (mddev->bitmap_info.external) 2009 md_super_wait(mddev); 2010 bitmap_update_sb(bitmap); 2011 } 2012 2013 static void md_bitmap_free(void *data) 2014 { 2015 unsigned long k, pages; 2016 struct bitmap_page *bp; 2017 struct bitmap *bitmap = data; 2018 2019 if (!bitmap) /* there was no bitmap */ 2020 return; 2021 2022 if (bitmap->sysfs_can_clear) 2023 sysfs_put(bitmap->sysfs_can_clear); 2024 2025 if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info && 2026 bitmap->cluster_slot == bitmap->mddev->cluster_ops->slot_number(bitmap->mddev)) 2027 md_cluster_stop(bitmap->mddev); 2028 2029 /* Shouldn't be needed - but just in case.... */ 2030 wait_event(bitmap->write_wait, 2031 atomic_read(&bitmap->pending_writes) == 0); 2032 2033 /* release the bitmap file */ 2034 md_bitmap_file_unmap(&bitmap->storage); 2035 2036 bp = bitmap->counts.bp; 2037 pages = bitmap->counts.pages; 2038 2039 /* free all allocated memory */ 2040 2041 if (bp) /* deallocate the page memory */ 2042 for (k = 0; k < pages; k++) 2043 if (bp[k].map && !bp[k].hijacked) 2044 kfree(bp[k].map); 2045 kfree(bp); 2046 kfree(bitmap); 2047 } 2048 2049 static void bitmap_start_behind_write(struct mddev *mddev) 2050 { 2051 struct bitmap *bitmap = mddev->bitmap; 2052 int bw; 2053 2054 if (!bitmap) 2055 return; 2056 2057 atomic_inc(&bitmap->behind_writes); 2058 bw = atomic_read(&bitmap->behind_writes); 2059 if (bw > bitmap->behind_writes_used) 2060 bitmap->behind_writes_used = bw; 2061 2062 pr_debug("inc write-behind count %d/%lu\n", 2063 bw, bitmap->mddev->bitmap_info.max_write_behind); 2064 } 2065 2066 static void bitmap_end_behind_write(struct mddev *mddev) 2067 { 2068 struct bitmap *bitmap = mddev->bitmap; 2069 2070 if (!bitmap) 2071 return; 2072 2073 if (atomic_dec_and_test(&bitmap->behind_writes)) 2074 wake_up(&bitmap->behind_wait); 2075 pr_debug("dec write-behind count %d/%lu\n", 2076 atomic_read(&bitmap->behind_writes), 2077 bitmap->mddev->bitmap_info.max_write_behind); 2078 } 2079 2080 static void bitmap_wait_behind_writes(struct mddev *mddev) 2081 { 2082 struct bitmap *bitmap = mddev->bitmap; 2083 2084 /* wait for behind writes to complete */ 2085 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { 2086 pr_debug("md:%s: behind writes in progress - waiting to stop.\n", 2087 mdname(mddev)); 2088 /* need to kick something here to make sure I/O goes? */ 2089 wait_event(bitmap->behind_wait, 2090 atomic_read(&bitmap->behind_writes) == 0); 2091 } 2092 } 2093 2094 static void bitmap_destroy(struct mddev *mddev) 2095 { 2096 struct bitmap *bitmap = mddev->bitmap; 2097 2098 if (!bitmap) /* there was no bitmap */ 2099 return; 2100 2101 bitmap_wait_behind_writes(mddev); 2102 if (!mddev->serialize_policy) 2103 mddev_destroy_serial_pool(mddev, NULL); 2104 2105 mutex_lock(&mddev->bitmap_info.mutex); 2106 spin_lock(&mddev->lock); 2107 mddev->bitmap = NULL; /* disconnect from the md device */ 2108 spin_unlock(&mddev->lock); 2109 mutex_unlock(&mddev->bitmap_info.mutex); 2110 mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); 2111 2112 md_bitmap_free(bitmap); 2113 } 2114 2115 /* 2116 * initialize the bitmap structure 2117 * if this returns an error, bitmap_destroy must be called to do clean up 2118 * once mddev->bitmap is set 2119 */ 2120 static struct bitmap *__bitmap_create(struct mddev *mddev, int slot) 2121 { 2122 struct bitmap *bitmap; 2123 sector_t blocks = mddev->resync_max_sectors; 2124 struct file *file = mddev->bitmap_info.file; 2125 int err; 2126 struct kernfs_node *bm = NULL; 2127 2128 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 2129 2130 BUG_ON(file && mddev->bitmap_info.offset); 2131 2132 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 2133 pr_notice("md/raid:%s: array with journal cannot have bitmap\n", 2134 mdname(mddev)); 2135 return ERR_PTR(-EBUSY); 2136 } 2137 2138 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); 2139 if (!bitmap) 2140 return ERR_PTR(-ENOMEM); 2141 2142 spin_lock_init(&bitmap->counts.lock); 2143 atomic_set(&bitmap->pending_writes, 0); 2144 init_waitqueue_head(&bitmap->write_wait); 2145 init_waitqueue_head(&bitmap->overflow_wait); 2146 init_waitqueue_head(&bitmap->behind_wait); 2147 2148 bitmap->mddev = mddev; 2149 bitmap->cluster_slot = slot; 2150 2151 if (mddev->kobj.sd) 2152 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); 2153 if (bm) { 2154 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); 2155 sysfs_put(bm); 2156 } else 2157 bitmap->sysfs_can_clear = NULL; 2158 2159 bitmap->storage.file = file; 2160 if (file) { 2161 get_file(file); 2162 /* As future accesses to this file will use bmap, 2163 * and bypass the page cache, we must sync the file 2164 * first. 2165 */ 2166 vfs_fsync(file, 1); 2167 } 2168 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ 2169 if (!mddev->bitmap_info.external) { 2170 /* 2171 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is 2172 * instructing us to create a new on-disk bitmap instance. 2173 */ 2174 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags)) 2175 err = md_bitmap_new_disk_sb(bitmap); 2176 else 2177 err = md_bitmap_read_sb(bitmap); 2178 } else { 2179 err = 0; 2180 if (mddev->bitmap_info.chunksize == 0 || 2181 mddev->bitmap_info.daemon_sleep == 0) 2182 /* chunksize and time_base need to be 2183 * set first. */ 2184 err = -EINVAL; 2185 } 2186 if (err) 2187 goto error; 2188 2189 bitmap->daemon_lastrun = jiffies; 2190 err = __bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 2191 true); 2192 if (err) 2193 goto error; 2194 2195 pr_debug("created bitmap (%lu pages) for device %s\n", 2196 bitmap->counts.pages, bmname(bitmap)); 2197 2198 err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; 2199 if (err) 2200 goto error; 2201 2202 return bitmap; 2203 error: 2204 md_bitmap_free(bitmap); 2205 return ERR_PTR(err); 2206 } 2207 2208 static int bitmap_create(struct mddev *mddev, int slot) 2209 { 2210 struct bitmap *bitmap = __bitmap_create(mddev, slot); 2211 2212 if (IS_ERR(bitmap)) 2213 return PTR_ERR(bitmap); 2214 2215 mddev->bitmap = bitmap; 2216 return 0; 2217 } 2218 2219 static int bitmap_load(struct mddev *mddev) 2220 { 2221 int err = 0; 2222 sector_t start = 0; 2223 sector_t sector = 0; 2224 struct bitmap *bitmap = mddev->bitmap; 2225 struct md_rdev *rdev; 2226 2227 if (!bitmap) 2228 goto out; 2229 2230 rdev_for_each(rdev, mddev) 2231 mddev_create_serial_pool(mddev, rdev); 2232 2233 if (mddev_is_clustered(mddev)) 2234 mddev->cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes); 2235 2236 /* Clear out old bitmap info first: Either there is none, or we 2237 * are resuming after someone else has possibly changed things, 2238 * so we should forget old cached info. 2239 * All chunks should be clean, but some might need_sync. 2240 */ 2241 while (sector < mddev->resync_max_sectors) { 2242 sector_t blocks; 2243 bitmap_start_sync(mddev, sector, &blocks, false); 2244 sector += blocks; 2245 } 2246 bitmap_close_sync(mddev); 2247 2248 if (mddev->degraded == 0 2249 || bitmap->events_cleared == mddev->events) 2250 /* no need to keep dirty bits to optimise a 2251 * re-add of a missing device */ 2252 start = mddev->recovery_cp; 2253 2254 mutex_lock(&mddev->bitmap_info.mutex); 2255 err = md_bitmap_init_from_disk(bitmap, start); 2256 mutex_unlock(&mddev->bitmap_info.mutex); 2257 2258 if (err) 2259 goto out; 2260 clear_bit(BITMAP_STALE, &bitmap->flags); 2261 2262 /* Kick recovery in case any bits were set */ 2263 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); 2264 2265 mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); 2266 md_wakeup_thread(mddev->thread); 2267 2268 bitmap_update_sb(bitmap); 2269 2270 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 2271 err = -EIO; 2272 out: 2273 return err; 2274 } 2275 2276 /* caller need to free returned bitmap with md_bitmap_free() */ 2277 static void *bitmap_get_from_slot(struct mddev *mddev, int slot) 2278 { 2279 int rv = 0; 2280 struct bitmap *bitmap; 2281 2282 bitmap = __bitmap_create(mddev, slot); 2283 if (IS_ERR(bitmap)) { 2284 rv = PTR_ERR(bitmap); 2285 return ERR_PTR(rv); 2286 } 2287 2288 rv = md_bitmap_init_from_disk(bitmap, 0); 2289 if (rv) { 2290 md_bitmap_free(bitmap); 2291 return ERR_PTR(rv); 2292 } 2293 2294 return bitmap; 2295 } 2296 2297 /* Loads the bitmap associated with slot and copies the resync information 2298 * to our bitmap 2299 */ 2300 static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low, 2301 sector_t *high, bool clear_bits) 2302 { 2303 int rv = 0, i, j; 2304 sector_t block, lo = 0, hi = 0; 2305 struct bitmap_counts *counts; 2306 struct bitmap *bitmap; 2307 2308 bitmap = bitmap_get_from_slot(mddev, slot); 2309 if (IS_ERR(bitmap)) { 2310 pr_err("%s can't get bitmap from slot %d\n", __func__, slot); 2311 return -1; 2312 } 2313 2314 counts = &bitmap->counts; 2315 for (j = 0; j < counts->chunks; j++) { 2316 block = (sector_t)j << counts->chunkshift; 2317 if (md_bitmap_file_test_bit(bitmap, block)) { 2318 if (!lo) 2319 lo = block; 2320 hi = block; 2321 md_bitmap_file_clear_bit(bitmap, block); 2322 md_bitmap_set_memory_bits(mddev->bitmap, block, 1); 2323 md_bitmap_file_set_bit(mddev->bitmap, block); 2324 } 2325 } 2326 2327 if (clear_bits) { 2328 bitmap_update_sb(bitmap); 2329 /* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs 2330 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */ 2331 for (i = 0; i < bitmap->storage.file_pages; i++) 2332 if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING)) 2333 set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); 2334 __bitmap_unplug(bitmap); 2335 } 2336 __bitmap_unplug(mddev->bitmap); 2337 *low = lo; 2338 *high = hi; 2339 md_bitmap_free(bitmap); 2340 2341 return rv; 2342 } 2343 2344 static void bitmap_set_pages(void *data, unsigned long pages) 2345 { 2346 struct bitmap *bitmap = data; 2347 2348 bitmap->counts.pages = pages; 2349 } 2350 2351 static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) 2352 { 2353 struct bitmap_storage *storage; 2354 struct bitmap_counts *counts; 2355 struct bitmap *bitmap = data; 2356 bitmap_super_t *sb; 2357 2358 if (!bitmap) 2359 return -ENOENT; 2360 if (bitmap->mddev->bitmap_info.external) 2361 return -ENOENT; 2362 if (!bitmap->storage.sb_page) /* no superblock */ 2363 return -EINVAL; 2364 sb = kmap_local_page(bitmap->storage.sb_page); 2365 stats->sync_size = le64_to_cpu(sb->sync_size); 2366 kunmap_local(sb); 2367 2368 counts = &bitmap->counts; 2369 stats->missing_pages = counts->missing_pages; 2370 stats->pages = counts->pages; 2371 2372 storage = &bitmap->storage; 2373 stats->file_pages = storage->file_pages; 2374 stats->file = storage->file; 2375 2376 stats->behind_writes = atomic_read(&bitmap->behind_writes); 2377 stats->behind_wait = wq_has_sleeper(&bitmap->behind_wait); 2378 stats->events_cleared = bitmap->events_cleared; 2379 return 0; 2380 } 2381 2382 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, 2383 int chunksize, bool init) 2384 { 2385 /* If chunk_size is 0, choose an appropriate chunk size. 2386 * Then possibly allocate new storage space. 2387 * Then quiesce, copy bits, replace bitmap, and re-start 2388 * 2389 * This function is called both to set up the initial bitmap 2390 * and to resize the bitmap while the array is active. 2391 * If this happens as a result of the array being resized, 2392 * chunksize will be zero, and we need to choose a suitable 2393 * chunksize, otherwise we use what we are given. 2394 */ 2395 struct bitmap_storage store; 2396 struct bitmap_counts old_counts; 2397 unsigned long chunks; 2398 sector_t block; 2399 sector_t old_blocks, new_blocks; 2400 int chunkshift; 2401 int ret = 0; 2402 long pages; 2403 struct bitmap_page *new_bp; 2404 2405 if (bitmap->storage.file && !init) { 2406 pr_info("md: cannot resize file-based bitmap\n"); 2407 return -EINVAL; 2408 } 2409 2410 if (chunksize == 0) { 2411 /* If there is enough space, leave the chunk size unchanged, 2412 * else increase by factor of two until there is enough space. 2413 */ 2414 long bytes; 2415 long space = bitmap->mddev->bitmap_info.space; 2416 2417 if (space == 0) { 2418 /* We don't know how much space there is, so limit 2419 * to current size - in sectors. 2420 */ 2421 bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8); 2422 if (!bitmap->mddev->bitmap_info.external) 2423 bytes += sizeof(bitmap_super_t); 2424 space = DIV_ROUND_UP(bytes, 512); 2425 bitmap->mddev->bitmap_info.space = space; 2426 } 2427 chunkshift = bitmap->counts.chunkshift; 2428 chunkshift--; 2429 do { 2430 /* 'chunkshift' is shift from block size to chunk size */ 2431 chunkshift++; 2432 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2433 bytes = DIV_ROUND_UP(chunks, 8); 2434 if (!bitmap->mddev->bitmap_info.external) 2435 bytes += sizeof(bitmap_super_t); 2436 } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) < 2437 (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1)); 2438 } else 2439 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; 2440 2441 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2442 memset(&store, 0, sizeof(store)); 2443 if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) 2444 ret = md_bitmap_storage_alloc(&store, chunks, 2445 !bitmap->mddev->bitmap_info.external, 2446 mddev_is_clustered(bitmap->mddev) 2447 ? bitmap->cluster_slot : 0); 2448 if (ret) { 2449 md_bitmap_file_unmap(&store); 2450 goto err; 2451 } 2452 2453 pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO); 2454 2455 new_bp = kcalloc(pages, sizeof(*new_bp), GFP_KERNEL); 2456 ret = -ENOMEM; 2457 if (!new_bp) { 2458 md_bitmap_file_unmap(&store); 2459 goto err; 2460 } 2461 2462 if (!init) 2463 bitmap->mddev->pers->quiesce(bitmap->mddev, 1); 2464 2465 store.file = bitmap->storage.file; 2466 bitmap->storage.file = NULL; 2467 2468 if (store.sb_page && bitmap->storage.sb_page) 2469 memcpy(page_address(store.sb_page), 2470 page_address(bitmap->storage.sb_page), 2471 sizeof(bitmap_super_t)); 2472 spin_lock_irq(&bitmap->counts.lock); 2473 md_bitmap_file_unmap(&bitmap->storage); 2474 bitmap->storage = store; 2475 2476 old_counts = bitmap->counts; 2477 bitmap->counts.bp = new_bp; 2478 bitmap->counts.pages = pages; 2479 bitmap->counts.missing_pages = pages; 2480 bitmap->counts.chunkshift = chunkshift; 2481 bitmap->counts.chunks = chunks; 2482 bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift + 2483 BITMAP_BLOCK_SHIFT); 2484 2485 blocks = min(old_counts.chunks << old_counts.chunkshift, 2486 chunks << chunkshift); 2487 2488 /* For cluster raid, need to pre-allocate bitmap */ 2489 if (mddev_is_clustered(bitmap->mddev)) { 2490 unsigned long page; 2491 for (page = 0; page < pages; page++) { 2492 ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1); 2493 if (ret) { 2494 unsigned long k; 2495 2496 /* deallocate the page memory */ 2497 for (k = 0; k < page; k++) { 2498 kfree(new_bp[k].map); 2499 } 2500 kfree(new_bp); 2501 2502 /* restore some fields from old_counts */ 2503 bitmap->counts.bp = old_counts.bp; 2504 bitmap->counts.pages = old_counts.pages; 2505 bitmap->counts.missing_pages = old_counts.pages; 2506 bitmap->counts.chunkshift = old_counts.chunkshift; 2507 bitmap->counts.chunks = old_counts.chunks; 2508 bitmap->mddev->bitmap_info.chunksize = 2509 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); 2510 blocks = old_counts.chunks << old_counts.chunkshift; 2511 pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); 2512 break; 2513 } else 2514 bitmap->counts.bp[page].count += 1; 2515 } 2516 } 2517 2518 for (block = 0; block < blocks; ) { 2519 bitmap_counter_t *bmc_old, *bmc_new; 2520 int set; 2521 2522 bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0); 2523 set = bmc_old && NEEDED(*bmc_old); 2524 2525 if (set) { 2526 bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); 2527 if (bmc_new) { 2528 if (*bmc_new == 0) { 2529 /* need to set on-disk bits too. */ 2530 sector_t end = block + new_blocks; 2531 sector_t start = block >> chunkshift; 2532 2533 start <<= chunkshift; 2534 while (start < end) { 2535 md_bitmap_file_set_bit(bitmap, block); 2536 start += 1 << chunkshift; 2537 } 2538 *bmc_new = 2; 2539 md_bitmap_count_page(&bitmap->counts, block, 1); 2540 md_bitmap_set_pending(&bitmap->counts, block); 2541 } 2542 *bmc_new |= NEEDED_MASK; 2543 } 2544 if (new_blocks < old_blocks) 2545 old_blocks = new_blocks; 2546 } 2547 block += old_blocks; 2548 } 2549 2550 if (bitmap->counts.bp != old_counts.bp) { 2551 unsigned long k; 2552 for (k = 0; k < old_counts.pages; k++) 2553 if (!old_counts.bp[k].hijacked) 2554 kfree(old_counts.bp[k].map); 2555 kfree(old_counts.bp); 2556 } 2557 2558 if (!init) { 2559 int i; 2560 while (block < (chunks << chunkshift)) { 2561 bitmap_counter_t *bmc; 2562 bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); 2563 if (bmc) { 2564 /* new space. It needs to be resynced, so 2565 * we set NEEDED_MASK. 2566 */ 2567 if (*bmc == 0) { 2568 *bmc = NEEDED_MASK | 2; 2569 md_bitmap_count_page(&bitmap->counts, block, 1); 2570 md_bitmap_set_pending(&bitmap->counts, block); 2571 } 2572 } 2573 block += new_blocks; 2574 } 2575 for (i = 0; i < bitmap->storage.file_pages; i++) 2576 set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 2577 } 2578 spin_unlock_irq(&bitmap->counts.lock); 2579 2580 if (!init) { 2581 __bitmap_unplug(bitmap); 2582 bitmap->mddev->pers->quiesce(bitmap->mddev, 0); 2583 } 2584 ret = 0; 2585 err: 2586 return ret; 2587 } 2588 2589 static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize, 2590 bool init) 2591 { 2592 struct bitmap *bitmap = mddev->bitmap; 2593 2594 if (!bitmap) 2595 return 0; 2596 2597 return __bitmap_resize(bitmap, blocks, chunksize, init); 2598 } 2599 2600 static ssize_t 2601 location_show(struct mddev *mddev, char *page) 2602 { 2603 ssize_t len; 2604 if (mddev->bitmap_info.file) 2605 len = sprintf(page, "file"); 2606 else if (mddev->bitmap_info.offset) 2607 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); 2608 else 2609 len = sprintf(page, "none"); 2610 len += sprintf(page+len, "\n"); 2611 return len; 2612 } 2613 2614 static ssize_t 2615 location_store(struct mddev *mddev, const char *buf, size_t len) 2616 { 2617 int rv; 2618 2619 rv = mddev_suspend_and_lock(mddev); 2620 if (rv) 2621 return rv; 2622 2623 if (mddev->pers) { 2624 if (mddev->recovery || mddev->sync_thread) { 2625 rv = -EBUSY; 2626 goto out; 2627 } 2628 } 2629 2630 if (mddev->bitmap || mddev->bitmap_info.file || 2631 mddev->bitmap_info.offset) { 2632 /* bitmap already configured. Only option is to clear it */ 2633 if (strncmp(buf, "none", 4) != 0) { 2634 rv = -EBUSY; 2635 goto out; 2636 } 2637 2638 bitmap_destroy(mddev); 2639 mddev->bitmap_info.offset = 0; 2640 if (mddev->bitmap_info.file) { 2641 struct file *f = mddev->bitmap_info.file; 2642 mddev->bitmap_info.file = NULL; 2643 fput(f); 2644 } 2645 } else { 2646 /* No bitmap, OK to set a location */ 2647 long long offset; 2648 2649 if (strncmp(buf, "none", 4) == 0) 2650 /* nothing to be done */; 2651 else if (strncmp(buf, "file:", 5) == 0) { 2652 /* Not supported yet */ 2653 rv = -EINVAL; 2654 goto out; 2655 } else { 2656 if (buf[0] == '+') 2657 rv = kstrtoll(buf+1, 10, &offset); 2658 else 2659 rv = kstrtoll(buf, 10, &offset); 2660 if (rv) 2661 goto out; 2662 if (offset == 0) { 2663 rv = -EINVAL; 2664 goto out; 2665 } 2666 if (mddev->bitmap_info.external == 0 && 2667 mddev->major_version == 0 && 2668 offset != mddev->bitmap_info.default_offset) { 2669 rv = -EINVAL; 2670 goto out; 2671 } 2672 2673 mddev->bitmap_info.offset = offset; 2674 rv = bitmap_create(mddev, -1); 2675 if (rv) 2676 goto out; 2677 2678 rv = bitmap_load(mddev); 2679 if (rv) { 2680 mddev->bitmap_info.offset = 0; 2681 bitmap_destroy(mddev); 2682 goto out; 2683 } 2684 } 2685 } 2686 if (!mddev->external) { 2687 /* Ensure new bitmap info is stored in 2688 * metadata promptly. 2689 */ 2690 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 2691 md_wakeup_thread(mddev->thread); 2692 } 2693 rv = 0; 2694 out: 2695 mddev_unlock_and_resume(mddev); 2696 if (rv) 2697 return rv; 2698 return len; 2699 } 2700 2701 static struct md_sysfs_entry bitmap_location = 2702 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store); 2703 2704 /* 'bitmap/space' is the space available at 'location' for the 2705 * bitmap. This allows the kernel to know when it is safe to 2706 * resize the bitmap to match a resized array. 2707 */ 2708 static ssize_t 2709 space_show(struct mddev *mddev, char *page) 2710 { 2711 return sprintf(page, "%lu\n", mddev->bitmap_info.space); 2712 } 2713 2714 static ssize_t 2715 space_store(struct mddev *mddev, const char *buf, size_t len) 2716 { 2717 struct bitmap *bitmap; 2718 unsigned long sectors; 2719 int rv; 2720 2721 rv = kstrtoul(buf, 10, §ors); 2722 if (rv) 2723 return rv; 2724 2725 if (sectors == 0) 2726 return -EINVAL; 2727 2728 bitmap = mddev->bitmap; 2729 if (bitmap && sectors < (bitmap->storage.bytes + 511) >> 9) 2730 return -EFBIG; /* Bitmap is too big for this small space */ 2731 2732 /* could make sure it isn't too big, but that isn't really 2733 * needed - user-space should be careful. 2734 */ 2735 mddev->bitmap_info.space = sectors; 2736 return len; 2737 } 2738 2739 static struct md_sysfs_entry bitmap_space = 2740 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store); 2741 2742 static ssize_t 2743 timeout_show(struct mddev *mddev, char *page) 2744 { 2745 ssize_t len; 2746 unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; 2747 unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; 2748 2749 len = sprintf(page, "%lu", secs); 2750 if (jifs) 2751 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); 2752 len += sprintf(page+len, "\n"); 2753 return len; 2754 } 2755 2756 static ssize_t 2757 timeout_store(struct mddev *mddev, const char *buf, size_t len) 2758 { 2759 /* timeout can be set at any time */ 2760 unsigned long timeout; 2761 int rv = strict_strtoul_scaled(buf, &timeout, 4); 2762 if (rv) 2763 return rv; 2764 2765 /* just to make sure we don't overflow... */ 2766 if (timeout >= LONG_MAX / HZ) 2767 return -EINVAL; 2768 2769 timeout = timeout * HZ / 10000; 2770 2771 if (timeout >= MAX_SCHEDULE_TIMEOUT) 2772 timeout = MAX_SCHEDULE_TIMEOUT-1; 2773 if (timeout < 1) 2774 timeout = 1; 2775 2776 mddev->bitmap_info.daemon_sleep = timeout; 2777 mddev_set_timeout(mddev, timeout, false); 2778 md_wakeup_thread(mddev->thread); 2779 2780 return len; 2781 } 2782 2783 static struct md_sysfs_entry bitmap_timeout = 2784 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store); 2785 2786 static ssize_t 2787 backlog_show(struct mddev *mddev, char *page) 2788 { 2789 return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind); 2790 } 2791 2792 static ssize_t 2793 backlog_store(struct mddev *mddev, const char *buf, size_t len) 2794 { 2795 unsigned long backlog; 2796 unsigned long old_mwb = mddev->bitmap_info.max_write_behind; 2797 struct md_rdev *rdev; 2798 bool has_write_mostly = false; 2799 int rv = kstrtoul(buf, 10, &backlog); 2800 if (rv) 2801 return rv; 2802 if (backlog > COUNTER_MAX) 2803 return -EINVAL; 2804 2805 rv = mddev_suspend_and_lock(mddev); 2806 if (rv) 2807 return rv; 2808 2809 /* 2810 * Without write mostly device, it doesn't make sense to set 2811 * backlog for max_write_behind. 2812 */ 2813 rdev_for_each(rdev, mddev) { 2814 if (test_bit(WriteMostly, &rdev->flags)) { 2815 has_write_mostly = true; 2816 break; 2817 } 2818 } 2819 if (!has_write_mostly) { 2820 pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n", 2821 mdname(mddev)); 2822 mddev_unlock(mddev); 2823 return -EINVAL; 2824 } 2825 2826 mddev->bitmap_info.max_write_behind = backlog; 2827 if (!backlog && mddev->serial_info_pool) { 2828 /* serial_info_pool is not needed if backlog is zero */ 2829 if (!mddev->serialize_policy) 2830 mddev_destroy_serial_pool(mddev, NULL); 2831 } else if (backlog && !mddev->serial_info_pool) { 2832 /* serial_info_pool is needed since backlog is not zero */ 2833 rdev_for_each(rdev, mddev) 2834 mddev_create_serial_pool(mddev, rdev); 2835 } 2836 if (old_mwb != backlog) 2837 bitmap_update_sb(mddev->bitmap); 2838 2839 mddev_unlock_and_resume(mddev); 2840 return len; 2841 } 2842 2843 static struct md_sysfs_entry bitmap_backlog = 2844 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store); 2845 2846 static ssize_t 2847 chunksize_show(struct mddev *mddev, char *page) 2848 { 2849 return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize); 2850 } 2851 2852 static ssize_t 2853 chunksize_store(struct mddev *mddev, const char *buf, size_t len) 2854 { 2855 /* Can only be changed when no bitmap is active */ 2856 int rv; 2857 unsigned long csize; 2858 if (mddev->bitmap) 2859 return -EBUSY; 2860 rv = kstrtoul(buf, 10, &csize); 2861 if (rv) 2862 return rv; 2863 if (csize < 512 || 2864 !is_power_of_2(csize)) 2865 return -EINVAL; 2866 if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE * 2867 sizeof(((bitmap_super_t *)0)->chunksize)))) 2868 return -EOVERFLOW; 2869 mddev->bitmap_info.chunksize = csize; 2870 return len; 2871 } 2872 2873 static struct md_sysfs_entry bitmap_chunksize = 2874 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); 2875 2876 static ssize_t metadata_show(struct mddev *mddev, char *page) 2877 { 2878 if (mddev_is_clustered(mddev)) 2879 return sprintf(page, "clustered\n"); 2880 return sprintf(page, "%s\n", (mddev->bitmap_info.external 2881 ? "external" : "internal")); 2882 } 2883 2884 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len) 2885 { 2886 if (mddev->bitmap || 2887 mddev->bitmap_info.file || 2888 mddev->bitmap_info.offset) 2889 return -EBUSY; 2890 if (strncmp(buf, "external", 8) == 0) 2891 mddev->bitmap_info.external = 1; 2892 else if ((strncmp(buf, "internal", 8) == 0) || 2893 (strncmp(buf, "clustered", 9) == 0)) 2894 mddev->bitmap_info.external = 0; 2895 else 2896 return -EINVAL; 2897 return len; 2898 } 2899 2900 static struct md_sysfs_entry bitmap_metadata = 2901 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); 2902 2903 static ssize_t can_clear_show(struct mddev *mddev, char *page) 2904 { 2905 int len; 2906 struct bitmap *bitmap; 2907 2908 spin_lock(&mddev->lock); 2909 bitmap = mddev->bitmap; 2910 if (bitmap) 2911 len = sprintf(page, "%s\n", (bitmap->need_sync ? "false" : 2912 "true")); 2913 else 2914 len = sprintf(page, "\n"); 2915 spin_unlock(&mddev->lock); 2916 return len; 2917 } 2918 2919 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len) 2920 { 2921 struct bitmap *bitmap = mddev->bitmap; 2922 2923 if (!bitmap) 2924 return -ENOENT; 2925 2926 if (strncmp(buf, "false", 5) == 0) { 2927 bitmap->need_sync = 1; 2928 return len; 2929 } 2930 2931 if (strncmp(buf, "true", 4) == 0) { 2932 if (mddev->degraded) 2933 return -EBUSY; 2934 bitmap->need_sync = 0; 2935 return len; 2936 } 2937 2938 return -EINVAL; 2939 } 2940 2941 static struct md_sysfs_entry bitmap_can_clear = 2942 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store); 2943 2944 static ssize_t 2945 behind_writes_used_show(struct mddev *mddev, char *page) 2946 { 2947 ssize_t ret; 2948 struct bitmap *bitmap; 2949 2950 spin_lock(&mddev->lock); 2951 bitmap = mddev->bitmap; 2952 if (!bitmap) 2953 ret = sprintf(page, "0\n"); 2954 else 2955 ret = sprintf(page, "%lu\n", bitmap->behind_writes_used); 2956 spin_unlock(&mddev->lock); 2957 2958 return ret; 2959 } 2960 2961 static ssize_t 2962 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len) 2963 { 2964 struct bitmap *bitmap = mddev->bitmap; 2965 2966 if (bitmap) 2967 bitmap->behind_writes_used = 0; 2968 return len; 2969 } 2970 2971 static struct md_sysfs_entry max_backlog_used = 2972 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR, 2973 behind_writes_used_show, behind_writes_used_reset); 2974 2975 static struct attribute *md_bitmap_attrs[] = { 2976 &bitmap_location.attr, 2977 &bitmap_space.attr, 2978 &bitmap_timeout.attr, 2979 &bitmap_backlog.attr, 2980 &bitmap_chunksize.attr, 2981 &bitmap_metadata.attr, 2982 &bitmap_can_clear.attr, 2983 &max_backlog_used.attr, 2984 NULL 2985 }; 2986 const struct attribute_group md_bitmap_group = { 2987 .name = "bitmap", 2988 .attrs = md_bitmap_attrs, 2989 }; 2990 2991 static struct bitmap_operations bitmap_ops = { 2992 .enabled = bitmap_enabled, 2993 .create = bitmap_create, 2994 .resize = bitmap_resize, 2995 .load = bitmap_load, 2996 .destroy = bitmap_destroy, 2997 .flush = bitmap_flush, 2998 .write_all = bitmap_write_all, 2999 .dirty_bits = bitmap_dirty_bits, 3000 .unplug = bitmap_unplug, 3001 .daemon_work = bitmap_daemon_work, 3002 3003 .start_behind_write = bitmap_start_behind_write, 3004 .end_behind_write = bitmap_end_behind_write, 3005 .wait_behind_writes = bitmap_wait_behind_writes, 3006 3007 .startwrite = bitmap_startwrite, 3008 .endwrite = bitmap_endwrite, 3009 .start_sync = bitmap_start_sync, 3010 .end_sync = bitmap_end_sync, 3011 .cond_end_sync = bitmap_cond_end_sync, 3012 .close_sync = bitmap_close_sync, 3013 3014 .update_sb = bitmap_update_sb, 3015 .get_stats = bitmap_get_stats, 3016 3017 .sync_with_cluster = bitmap_sync_with_cluster, 3018 .get_from_slot = bitmap_get_from_slot, 3019 .copy_from_slot = bitmap_copy_from_slot, 3020 .set_pages = bitmap_set_pages, 3021 .free = md_bitmap_free, 3022 }; 3023 3024 void mddev_set_bitmap_ops(struct mddev *mddev) 3025 { 3026 mddev->bitmap_ops = &bitmap_ops; 3027 } 3028