1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 4 * 5 * bitmap_create - sets up the bitmap structure 6 * bitmap_destroy - destroys the bitmap structure 7 * 8 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: 9 * - added disk storage for bitmap 10 * - changes to allow various bitmap chunk sizes 11 */ 12 13 /* 14 * Still to do: 15 * 16 * flush after percent set rather than just time based. (maybe both). 17 */ 18 19 #include <linux/blkdev.h> 20 #include <linux/module.h> 21 #include <linux/errno.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/timer.h> 25 #include <linux/sched.h> 26 #include <linux/list.h> 27 #include <linux/file.h> 28 #include <linux/mount.h> 29 #include <linux/buffer_head.h> 30 #include <linux/seq_file.h> 31 #include <trace/events/block.h> 32 33 #include "md.h" 34 #include "md-bitmap.h" 35 #include "md-cluster.h" 36 37 /* 38 * in-memory bitmap: 39 * 40 * Use 16 bit block counters to track pending writes to each "chunk". 41 * The 2 high order bits are special-purpose, the first is a flag indicating 42 * whether a resync is needed. The second is a flag indicating whether a 43 * resync is active. 44 * This means that the counter is actually 14 bits: 45 * 46 * +--------+--------+------------------------------------------------+ 47 * | resync | resync | counter | 48 * | needed | active | | 49 * | (0-1) | (0-1) | (0-16383) | 50 * +--------+--------+------------------------------------------------+ 51 * 52 * The "resync needed" bit is set when: 53 * a '1' bit is read from storage at startup. 54 * a write request fails on some drives 55 * a resync is aborted on a chunk with 'resync active' set 56 * It is cleared (and resync-active set) when a resync starts across all drives 57 * of the chunk. 58 * 59 * 60 * The "resync active" bit is set when: 61 * a resync is started on all drives, and resync_needed is set. 62 * resync_needed will be cleared (as long as resync_active wasn't already set). 63 * It is cleared when a resync completes. 64 * 65 * The counter counts pending write requests, plus the on-disk bit. 66 * When the counter is '1' and the resync bits are clear, the on-disk 67 * bit can be cleared as well, thus setting the counter to 0. 68 * When we set a bit, or in the counter (to start a write), if the fields is 69 * 0, we first set the disk bit and set the counter to 1. 70 * 71 * If the counter is 0, the on-disk bit is clear and the stripe is clean 72 * Anything that dirties the stripe pushes the counter to 2 (at least) 73 * and sets the on-disk bit (lazily). 74 * If a periodic sweep find the counter at 2, it is decremented to 1. 75 * If the sweep find the counter at 1, the on-disk bit is cleared and the 76 * counter goes to zero. 77 * 78 * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block 79 * counters as a fallback when "page" memory cannot be allocated: 80 * 81 * Normal case (page memory allocated): 82 * 83 * page pointer (32-bit) 84 * 85 * [ ] ------+ 86 * | 87 * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) 88 * c1 c2 c2048 89 * 90 * Hijacked case (page memory allocation failed): 91 * 92 * hijacked page pointer (32-bit) 93 * 94 * [ ][ ] (no page memory allocated) 95 * counter #1 (16-bit) counter #2 (16-bit) 96 * 97 */ 98 99 typedef __u16 bitmap_counter_t; 100 101 #define PAGE_BITS (PAGE_SIZE << 3) 102 #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) 103 104 #define COUNTER_BITS 16 105 #define COUNTER_BIT_SHIFT 4 106 #define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3) 107 108 #define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1))) 109 #define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2))) 110 #define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1) 111 112 #define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) 113 #define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) 114 #define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) 115 116 /* how many counters per page? */ 117 #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) 118 /* same, except a shift value for more efficient bitops */ 119 #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) 120 /* same, except a mask value for more efficient bitops */ 121 #define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) 122 123 #define BITMAP_BLOCK_SHIFT 9 124 125 /* 126 * bitmap structures: 127 */ 128 129 /* the in-memory bitmap is represented by bitmap_pages */ 130 struct bitmap_page { 131 /* 132 * map points to the actual memory page 133 */ 134 char *map; 135 /* 136 * in emergencies (when map cannot be alloced), hijack the map 137 * pointer and use it as two counters itself 138 */ 139 unsigned int hijacked:1; 140 /* 141 * If any counter in this page is '1' or '2' - and so could be 142 * cleared then that page is marked as 'pending' 143 */ 144 unsigned int pending:1; 145 /* 146 * count of dirty bits on the page 147 */ 148 unsigned int count:30; 149 }; 150 151 /* the main bitmap structure - one per mddev */ 152 struct bitmap { 153 154 struct bitmap_counts { 155 spinlock_t lock; 156 struct bitmap_page *bp; 157 /* total number of pages in the bitmap */ 158 unsigned long pages; 159 /* number of pages not yet allocated */ 160 unsigned long missing_pages; 161 /* chunksize = 2^chunkshift (for bitops) */ 162 unsigned long chunkshift; 163 /* total number of data chunks for the array */ 164 unsigned long chunks; 165 } counts; 166 167 struct mddev *mddev; /* the md device that the bitmap is for */ 168 169 __u64 events_cleared; 170 int need_sync; 171 172 struct bitmap_storage { 173 /* backing disk file */ 174 struct file *file; 175 /* cached copy of the bitmap file superblock */ 176 struct page *sb_page; 177 unsigned long sb_index; 178 /* list of cache pages for the file */ 179 struct page **filemap; 180 /* attributes associated filemap pages */ 181 unsigned long *filemap_attr; 182 /* number of pages in the file */ 183 unsigned long file_pages; 184 /* total bytes in the bitmap */ 185 unsigned long bytes; 186 } storage; 187 188 unsigned long flags; 189 190 int allclean; 191 192 atomic_t behind_writes; 193 /* highest actual value at runtime */ 194 unsigned long behind_writes_used; 195 196 /* 197 * the bitmap daemon - periodically wakes up and sweeps the bitmap 198 * file, cleaning up bits and flushing out pages to disk as necessary 199 */ 200 unsigned long daemon_lastrun; /* jiffies of last run */ 201 /* 202 * when we lasted called end_sync to update bitmap with resync 203 * progress. 204 */ 205 unsigned long last_end_sync; 206 207 /* pending writes to the bitmap file */ 208 atomic_t pending_writes; 209 wait_queue_head_t write_wait; 210 wait_queue_head_t overflow_wait; 211 wait_queue_head_t behind_wait; 212 213 struct kernfs_node *sysfs_can_clear; 214 /* slot offset for clustered env */ 215 int cluster_slot; 216 }; 217 218 static struct workqueue_struct *md_bitmap_wq; 219 static struct attribute_group md_bitmap_internal_group; 220 221 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, 222 int chunksize, bool init); 223 224 static inline char *bmname(struct bitmap *bitmap) 225 { 226 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; 227 } 228 229 static bool bitmap_enabled(void *data, bool flush) 230 { 231 struct bitmap *bitmap = data; 232 233 if (!flush) 234 return true; 235 236 /* 237 * If caller want to flush bitmap pages to underlying disks, check if 238 * there are cached pages in filemap. 239 */ 240 return !test_bit(BITMAP_STALE, &bitmap->flags) && 241 bitmap->storage.filemap != NULL; 242 } 243 244 /* 245 * check a page and, if necessary, allocate it (or hijack it if the alloc fails) 246 * 247 * 1) check to see if this page is allocated, if it's not then try to alloc 248 * 2) if the alloc fails, set the page's hijacked flag so we'll use the 249 * page pointer directly as a counter 250 * 251 * if we find our page, we increment the page's refcount so that it stays 252 * allocated while we're using it 253 */ 254 static int md_bitmap_checkpage(struct bitmap_counts *bitmap, 255 unsigned long page, int create, int no_hijack) 256 __releases(bitmap->lock) 257 __acquires(bitmap->lock) 258 { 259 unsigned char *mappage; 260 261 WARN_ON_ONCE(page >= bitmap->pages); 262 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ 263 return 0; 264 265 if (bitmap->bp[page].map) /* page is already allocated, just return */ 266 return 0; 267 268 if (!create) 269 return -ENOENT; 270 271 /* this page has not been allocated yet */ 272 273 spin_unlock_irq(&bitmap->lock); 274 /* It is possible that this is being called inside a 275 * prepare_to_wait/finish_wait loop from raid5c:make_request(). 276 * In general it is not permitted to sleep in that context as it 277 * can cause the loop to spin freely. 278 * That doesn't apply here as we can only reach this point 279 * once with any loop. 280 * When this function completes, either bp[page].map or 281 * bp[page].hijacked. In either case, this function will 282 * abort before getting to this point again. So there is 283 * no risk of a free-spin, and so it is safe to assert 284 * that sleeping here is allowed. 285 */ 286 sched_annotate_sleep(); 287 mappage = kzalloc(PAGE_SIZE, GFP_NOIO); 288 spin_lock_irq(&bitmap->lock); 289 290 if (mappage == NULL) { 291 pr_debug("md/bitmap: map page allocation failed, hijacking\n"); 292 /* We don't support hijack for cluster raid */ 293 if (no_hijack) 294 return -ENOMEM; 295 /* failed - set the hijacked flag so that we can use the 296 * pointer as a counter */ 297 if (!bitmap->bp[page].map) 298 bitmap->bp[page].hijacked = 1; 299 } else if (bitmap->bp[page].map || 300 bitmap->bp[page].hijacked) { 301 /* somebody beat us to getting the page */ 302 kfree(mappage); 303 } else { 304 305 /* no page was in place and we have one, so install it */ 306 307 bitmap->bp[page].map = mappage; 308 bitmap->missing_pages--; 309 } 310 return 0; 311 } 312 313 /* if page is completely empty, put it back on the free list, or dealloc it */ 314 /* if page was hijacked, unmark the flag so it might get alloced next time */ 315 /* Note: lock should be held when calling this */ 316 static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page) 317 { 318 char *ptr; 319 320 if (bitmap->bp[page].count) /* page is still busy */ 321 return; 322 323 /* page is no longer in use, it can be released */ 324 325 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ 326 bitmap->bp[page].hijacked = 0; 327 bitmap->bp[page].map = NULL; 328 } else { 329 /* normal case, free the page */ 330 ptr = bitmap->bp[page].map; 331 bitmap->bp[page].map = NULL; 332 bitmap->missing_pages++; 333 kfree(ptr); 334 } 335 } 336 337 /* 338 * bitmap file handling - read and write the bitmap file and its superblock 339 */ 340 341 /* 342 * basic page I/O operations 343 */ 344 345 /* IO operations when bitmap is stored near all superblocks */ 346 347 /* choose a good rdev and read the page from there */ 348 static int read_sb_page(struct mddev *mddev, loff_t offset, 349 struct page *page, unsigned long index, int size) 350 { 351 352 sector_t sector = mddev->bitmap_info.offset + offset + 353 index * (PAGE_SIZE / SECTOR_SIZE); 354 struct md_rdev *rdev; 355 356 rdev_for_each(rdev, mddev) { 357 u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev)); 358 359 if (!test_bit(In_sync, &rdev->flags) || 360 test_bit(Faulty, &rdev->flags) || 361 test_bit(Bitmap_sync, &rdev->flags)) 362 continue; 363 364 if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true)) 365 return 0; 366 } 367 return -EIO; 368 } 369 370 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev) 371 { 372 /* Iterate the disks of an mddev, using rcu to protect access to the 373 * linked list, and raising the refcount of devices we return to ensure 374 * they don't disappear while in use. 375 * As devices are only added or removed when raid_disk is < 0 and 376 * nr_pending is 0 and In_sync is clear, the entries we return will 377 * still be in the same position on the list when we re-enter 378 * list_for_each_entry_continue_rcu. 379 * 380 * Note that if entered with 'rdev == NULL' to start at the 381 * beginning, we temporarily assign 'rdev' to an address which 382 * isn't really an rdev, but which can be used by 383 * list_for_each_entry_continue_rcu() to find the first entry. 384 */ 385 rcu_read_lock(); 386 if (rdev == NULL) 387 /* start at the beginning */ 388 rdev = list_entry(&mddev->disks, struct md_rdev, same_set); 389 else { 390 /* release the previous rdev and start from there. */ 391 rdev_dec_pending(rdev, mddev); 392 } 393 list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) { 394 if (rdev->raid_disk >= 0 && 395 !test_bit(Faulty, &rdev->flags)) { 396 /* this is a usable devices */ 397 atomic_inc(&rdev->nr_pending); 398 rcu_read_unlock(); 399 return rdev; 400 } 401 } 402 rcu_read_unlock(); 403 return NULL; 404 } 405 406 static unsigned int optimal_io_size(struct block_device *bdev, 407 unsigned int last_page_size, 408 unsigned int io_size) 409 { 410 if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev)) 411 return roundup(last_page_size, bdev_io_opt(bdev)); 412 return io_size; 413 } 414 415 static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size, 416 loff_t start, loff_t boundary) 417 { 418 if (io_size != opt_size && 419 start + opt_size / SECTOR_SIZE <= boundary) 420 return opt_size; 421 if (start + io_size / SECTOR_SIZE <= boundary) 422 return io_size; 423 424 /* Overflows boundary */ 425 return 0; 426 } 427 428 static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, 429 unsigned long pg_index, struct page *page) 430 { 431 struct block_device *bdev; 432 struct mddev *mddev = bitmap->mddev; 433 struct bitmap_storage *store = &bitmap->storage; 434 unsigned long num_pages = bitmap->storage.file_pages; 435 unsigned int bitmap_limit = (num_pages - pg_index % num_pages) << PAGE_SHIFT; 436 loff_t sboff, offset = mddev->bitmap_info.offset; 437 sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE; 438 unsigned int size = PAGE_SIZE; 439 unsigned int opt_size = PAGE_SIZE; 440 sector_t doff; 441 442 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; 443 /* we compare length (page numbers), not page offset. */ 444 if ((pg_index - store->sb_index) == num_pages - 1) { 445 unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1); 446 447 if (last_page_size == 0) 448 last_page_size = PAGE_SIZE; 449 size = roundup(last_page_size, bdev_logical_block_size(bdev)); 450 opt_size = optimal_io_size(bdev, last_page_size, size); 451 } 452 453 sboff = rdev->sb_start + offset; 454 doff = rdev->data_offset; 455 456 /* Just make sure we aren't corrupting data or metadata */ 457 if (mddev->external) { 458 /* Bitmap could be anywhere. */ 459 if (sboff + ps > doff && 460 sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE)) 461 return -EINVAL; 462 } else if (offset < 0) { 463 /* DATA BITMAP METADATA */ 464 size = bitmap_io_size(size, opt_size, offset + ps, 0); 465 if (size == 0) 466 /* bitmap runs in to metadata */ 467 return -EINVAL; 468 469 if (doff + mddev->dev_sectors > sboff) 470 /* data runs in to bitmap */ 471 return -EINVAL; 472 } else if (rdev->sb_start < rdev->data_offset) { 473 /* METADATA BITMAP DATA */ 474 size = bitmap_io_size(size, opt_size, sboff + ps, doff); 475 if (size == 0) 476 /* bitmap runs in to data */ 477 return -EINVAL; 478 } 479 480 md_write_metadata(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit), 481 page, 0); 482 return 0; 483 } 484 485 static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index, 486 struct page *page, bool wait) 487 { 488 struct mddev *mddev = bitmap->mddev; 489 490 do { 491 struct md_rdev *rdev = NULL; 492 493 while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { 494 if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) { 495 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 496 return; 497 } 498 } 499 } while (wait && md_super_wait(mddev) < 0); 500 } 501 502 static void md_bitmap_file_kick(struct bitmap *bitmap); 503 504 #ifdef CONFIG_MD_BITMAP_FILE 505 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) 506 { 507 struct buffer_head *bh = page_buffers(page); 508 509 while (bh && bh->b_blocknr) { 510 atomic_inc(&bitmap->pending_writes); 511 set_buffer_locked(bh); 512 set_buffer_mapped(bh); 513 submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); 514 bh = bh->b_this_page; 515 } 516 517 if (wait) 518 wait_event(bitmap->write_wait, 519 atomic_read(&bitmap->pending_writes) == 0); 520 } 521 522 static void end_bitmap_write(struct buffer_head *bh, int uptodate) 523 { 524 struct bitmap *bitmap = bh->b_private; 525 526 if (!uptodate) 527 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 528 if (atomic_dec_and_test(&bitmap->pending_writes)) 529 wake_up(&bitmap->write_wait); 530 } 531 532 static void free_buffers(struct page *page) 533 { 534 struct buffer_head *bh; 535 536 if (!PagePrivate(page)) 537 return; 538 539 bh = page_buffers(page); 540 while (bh) { 541 struct buffer_head *next = bh->b_this_page; 542 free_buffer_head(bh); 543 bh = next; 544 } 545 detach_page_private(page); 546 put_page(page); 547 } 548 549 /* read a page from a file. 550 * We both read the page, and attach buffers to the page to record the 551 * address of each block (using bmap). These addresses will be used 552 * to write the block later, completely bypassing the filesystem. 553 * This usage is similar to how swap files are handled, and allows us 554 * to write to a file with no concerns of memory allocation failing. 555 */ 556 static int read_file_page(struct file *file, unsigned long index, 557 struct bitmap *bitmap, unsigned long count, struct page *page) 558 { 559 int ret = 0; 560 struct inode *inode = file_inode(file); 561 struct buffer_head *bh; 562 sector_t block, blk_cur; 563 unsigned long blocksize = i_blocksize(inode); 564 565 pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, 566 (unsigned long long)index << PAGE_SHIFT); 567 568 bh = alloc_page_buffers(page, blocksize); 569 if (!bh) { 570 ret = -ENOMEM; 571 goto out; 572 } 573 attach_page_private(page, bh); 574 blk_cur = index << (PAGE_SHIFT - inode->i_blkbits); 575 while (bh) { 576 block = blk_cur; 577 578 if (count == 0) 579 bh->b_blocknr = 0; 580 else { 581 ret = bmap(inode, &block); 582 if (ret || !block) { 583 ret = -EINVAL; 584 bh->b_blocknr = 0; 585 goto out; 586 } 587 588 bh->b_blocknr = block; 589 bh->b_bdev = inode->i_sb->s_bdev; 590 if (count < blocksize) 591 count = 0; 592 else 593 count -= blocksize; 594 595 bh->b_end_io = end_bitmap_write; 596 bh->b_private = bitmap; 597 atomic_inc(&bitmap->pending_writes); 598 set_buffer_locked(bh); 599 set_buffer_mapped(bh); 600 submit_bh(REQ_OP_READ, bh); 601 } 602 blk_cur++; 603 bh = bh->b_this_page; 604 } 605 606 wait_event(bitmap->write_wait, 607 atomic_read(&bitmap->pending_writes)==0); 608 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 609 ret = -EIO; 610 out: 611 if (ret) 612 pr_err("md: bitmap read error: (%dB @ %llu): %d\n", 613 (int)PAGE_SIZE, 614 (unsigned long long)index << PAGE_SHIFT, 615 ret); 616 return ret; 617 } 618 #else /* CONFIG_MD_BITMAP_FILE */ 619 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) 620 { 621 } 622 static int read_file_page(struct file *file, unsigned long index, 623 struct bitmap *bitmap, unsigned long count, struct page *page) 624 { 625 return -EIO; 626 } 627 static void free_buffers(struct page *page) 628 { 629 put_page(page); 630 } 631 #endif /* CONFIG_MD_BITMAP_FILE */ 632 633 /* 634 * bitmap file superblock operations 635 */ 636 637 /* 638 * write out a page to a file 639 */ 640 static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index, 641 bool wait) 642 { 643 struct bitmap_storage *store = &bitmap->storage; 644 struct page *page = store->filemap[pg_index]; 645 646 if (mddev_is_clustered(bitmap->mddev)) { 647 /* go to node bitmap area starting point */ 648 pg_index += store->sb_index; 649 } 650 651 if (store->file) 652 write_file_page(bitmap, page, wait); 653 else 654 write_sb_page(bitmap, pg_index, page, wait); 655 } 656 657 /* 658 * md_bitmap_wait_writes() should be called before writing any bitmap 659 * blocks, to ensure previous writes, particularly from 660 * md_bitmap_daemon_work(), have completed. 661 */ 662 static void md_bitmap_wait_writes(struct bitmap *bitmap) 663 { 664 if (bitmap->storage.file) 665 wait_event(bitmap->write_wait, 666 atomic_read(&bitmap->pending_writes)==0); 667 else 668 /* Note that we ignore the return value. The writes 669 * might have failed, but that would just mean that 670 * some bits which should be cleared haven't been, 671 * which is safe. The relevant bitmap blocks will 672 * probably get written again, but there is no great 673 * loss if they aren't. 674 */ 675 md_super_wait(bitmap->mddev); 676 } 677 678 679 /* update the event counter and sync the superblock to disk */ 680 static void bitmap_update_sb(void *data) 681 { 682 bitmap_super_t *sb; 683 struct bitmap *bitmap = data; 684 685 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ 686 return; 687 if (bitmap->mddev->bitmap_info.external) 688 return; 689 if (!bitmap->storage.sb_page) /* no superblock */ 690 return; 691 sb = kmap_local_page(bitmap->storage.sb_page); 692 sb->events = cpu_to_le64(bitmap->mddev->events); 693 if (bitmap->mddev->events < bitmap->events_cleared) 694 /* rocking back to read-only */ 695 bitmap->events_cleared = bitmap->mddev->events; 696 sb->events_cleared = cpu_to_le64(bitmap->events_cleared); 697 /* 698 * clear BITMAP_WRITE_ERROR bit to protect against the case that 699 * a bitmap write error occurred but the later writes succeeded. 700 */ 701 sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR)); 702 /* Just in case these have been changed via sysfs: */ 703 sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); 704 sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); 705 /* This might have been changed by a reshape */ 706 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 707 sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize); 708 sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes); 709 sb->sectors_reserved = cpu_to_le32(bitmap->mddev-> 710 bitmap_info.space); 711 kunmap_local(sb); 712 713 if (bitmap->storage.file) 714 write_file_page(bitmap, bitmap->storage.sb_page, 1); 715 else 716 write_sb_page(bitmap, bitmap->storage.sb_index, 717 bitmap->storage.sb_page, 1); 718 } 719 720 static void bitmap_print_sb(struct bitmap *bitmap) 721 { 722 bitmap_super_t *sb; 723 724 if (!bitmap || !bitmap->storage.sb_page) 725 return; 726 sb = kmap_local_page(bitmap->storage.sb_page); 727 pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); 728 pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); 729 pr_debug(" version: %u\n", le32_to_cpu(sb->version)); 730 pr_debug(" uuid: %08x.%08x.%08x.%08x\n", 731 le32_to_cpu(*(__le32 *)(sb->uuid+0)), 732 le32_to_cpu(*(__le32 *)(sb->uuid+4)), 733 le32_to_cpu(*(__le32 *)(sb->uuid+8)), 734 le32_to_cpu(*(__le32 *)(sb->uuid+12))); 735 pr_debug(" events: %llu\n", 736 (unsigned long long) le64_to_cpu(sb->events)); 737 pr_debug("events cleared: %llu\n", 738 (unsigned long long) le64_to_cpu(sb->events_cleared)); 739 pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); 740 pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize)); 741 pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep)); 742 pr_debug(" sync size: %llu KB\n", 743 (unsigned long long)le64_to_cpu(sb->sync_size)/2); 744 pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind)); 745 kunmap_local(sb); 746 } 747 748 /* 749 * bitmap_new_disk_sb 750 * @bitmap 751 * 752 * This function is somewhat the reverse of bitmap_read_sb. bitmap_read_sb 753 * reads and verifies the on-disk bitmap superblock and populates bitmap_info. 754 * This function verifies 'bitmap_info' and populates the on-disk bitmap 755 * structure, which is to be written to disk. 756 * 757 * Returns: 0 on success, -Exxx on error 758 */ 759 static int md_bitmap_new_disk_sb(struct bitmap *bitmap) 760 { 761 bitmap_super_t *sb; 762 unsigned long chunksize, daemon_sleep, write_behind; 763 764 bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 765 if (bitmap->storage.sb_page == NULL) 766 return -ENOMEM; 767 bitmap->storage.sb_index = 0; 768 769 sb = kmap_local_page(bitmap->storage.sb_page); 770 771 sb->magic = cpu_to_le32(BITMAP_MAGIC); 772 sb->version = cpu_to_le32(BITMAP_MAJOR_HI); 773 774 chunksize = bitmap->mddev->bitmap_info.chunksize; 775 BUG_ON(!chunksize); 776 if (!is_power_of_2(chunksize)) { 777 kunmap_local(sb); 778 pr_warn("bitmap chunksize not a power of 2\n"); 779 return -EINVAL; 780 } 781 sb->chunksize = cpu_to_le32(chunksize); 782 783 daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; 784 if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { 785 pr_debug("Choosing daemon_sleep default (5 sec)\n"); 786 daemon_sleep = 5 * HZ; 787 } 788 sb->daemon_sleep = cpu_to_le32(daemon_sleep); 789 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 790 791 /* 792 * FIXME: write_behind for RAID1. If not specified, what 793 * is a good choice? We choose COUNTER_MAX / 2 arbitrarily. 794 */ 795 write_behind = bitmap->mddev->bitmap_info.max_write_behind; 796 if (write_behind > COUNTER_MAX / 2) 797 write_behind = COUNTER_MAX / 2; 798 sb->write_behind = cpu_to_le32(write_behind); 799 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 800 801 /* keep the array size field of the bitmap superblock up to date */ 802 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 803 804 memcpy(sb->uuid, bitmap->mddev->uuid, 16); 805 806 set_bit(BITMAP_STALE, &bitmap->flags); 807 sb->state = cpu_to_le32(bitmap->flags); 808 bitmap->events_cleared = bitmap->mddev->events; 809 sb->events_cleared = cpu_to_le64(bitmap->mddev->events); 810 bitmap->mddev->bitmap_info.nodes = 0; 811 812 kunmap_local(sb); 813 814 return 0; 815 } 816 817 /* read the superblock from the bitmap file and initialize some bitmap fields */ 818 static int md_bitmap_read_sb(struct bitmap *bitmap) 819 { 820 char *reason = NULL; 821 bitmap_super_t *sb; 822 unsigned long chunksize, daemon_sleep, write_behind; 823 unsigned long long events; 824 int nodes = 0; 825 unsigned long sectors_reserved = 0; 826 int err = -EINVAL; 827 struct page *sb_page; 828 loff_t offset = 0; 829 830 if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { 831 chunksize = 128 * 1024 * 1024; 832 daemon_sleep = 5 * HZ; 833 write_behind = 0; 834 set_bit(BITMAP_STALE, &bitmap->flags); 835 err = 0; 836 goto out_no_sb; 837 } 838 /* page 0 is the superblock, read it... */ 839 sb_page = alloc_page(GFP_KERNEL); 840 if (!sb_page) 841 return -ENOMEM; 842 bitmap->storage.sb_page = sb_page; 843 844 re_read: 845 /* If cluster_slot is set, the cluster is setup */ 846 if (bitmap->cluster_slot >= 0) { 847 sector_t bm_blocks = bitmap->mddev->resync_max_sectors; 848 849 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 850 (bitmap->mddev->bitmap_info.chunksize >> 9)); 851 /* bits to bytes */ 852 bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t); 853 /* to 4k blocks */ 854 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); 855 offset = bitmap->cluster_slot * (bm_blocks << 3); 856 pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, 857 bitmap->cluster_slot, offset); 858 } 859 860 if (bitmap->storage.file) { 861 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); 862 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; 863 864 err = read_file_page(bitmap->storage.file, 0, 865 bitmap, bytes, sb_page); 866 } else { 867 err = read_sb_page(bitmap->mddev, offset, sb_page, 0, 868 sizeof(bitmap_super_t)); 869 } 870 if (err) 871 return err; 872 873 err = -EINVAL; 874 sb = kmap_local_page(sb_page); 875 876 chunksize = le32_to_cpu(sb->chunksize); 877 daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; 878 write_behind = le32_to_cpu(sb->write_behind); 879 sectors_reserved = le32_to_cpu(sb->sectors_reserved); 880 881 /* verify that the bitmap-specific fields are valid */ 882 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) 883 reason = "bad magic"; 884 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || 885 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED) 886 reason = "unrecognized superblock version"; 887 else if (chunksize < 512) 888 reason = "bitmap chunksize too small"; 889 else if (!is_power_of_2(chunksize)) 890 reason = "bitmap chunksize not a power of 2"; 891 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT) 892 reason = "daemon sleep period out of range"; 893 else if (write_behind > COUNTER_MAX) 894 reason = "write-behind limit out of range (0 - 16383)"; 895 if (reason) { 896 pr_warn("%s: invalid bitmap file superblock: %s\n", 897 bmname(bitmap), reason); 898 goto out; 899 } 900 901 /* 902 * Setup nodes/clustername only if bitmap version is 903 * cluster-compatible 904 */ 905 if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { 906 nodes = le32_to_cpu(sb->nodes); 907 strscpy(bitmap->mddev->bitmap_info.cluster_name, 908 sb->cluster_name, 64); 909 } 910 911 /* keep the array size field of the bitmap superblock up to date */ 912 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 913 914 if (bitmap->mddev->persistent) { 915 /* 916 * We have a persistent array superblock, so compare the 917 * bitmap's UUID and event counter to the mddev's 918 */ 919 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { 920 pr_warn("%s: bitmap superblock UUID mismatch\n", 921 bmname(bitmap)); 922 goto out; 923 } 924 events = le64_to_cpu(sb->events); 925 if (!nodes && (events < bitmap->mddev->events)) { 926 pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n", 927 bmname(bitmap), events, 928 (unsigned long long) bitmap->mddev->events); 929 set_bit(BITMAP_STALE, &bitmap->flags); 930 } 931 } 932 933 /* assign fields using values from superblock */ 934 bitmap->flags |= le32_to_cpu(sb->state); 935 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) 936 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags); 937 bitmap->events_cleared = le64_to_cpu(sb->events_cleared); 938 err = 0; 939 940 out: 941 kunmap_local(sb); 942 if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { 943 /* Assigning chunksize is required for "re_read" */ 944 bitmap->mddev->bitmap_info.chunksize = chunksize; 945 err = md_setup_cluster(bitmap->mddev, nodes); 946 if (err) { 947 pr_warn("%s: Could not setup cluster service (%d)\n", 948 bmname(bitmap), err); 949 goto out_no_sb; 950 } 951 bitmap->cluster_slot = bitmap->mddev->cluster_ops->slot_number(bitmap->mddev); 952 goto re_read; 953 } 954 955 out_no_sb: 956 if (err == 0) { 957 if (test_bit(BITMAP_STALE, &bitmap->flags)) 958 bitmap->events_cleared = bitmap->mddev->events; 959 bitmap->mddev->bitmap_info.chunksize = chunksize; 960 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 961 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 962 bitmap->mddev->bitmap_info.nodes = nodes; 963 if (bitmap->mddev->bitmap_info.space == 0 || 964 bitmap->mddev->bitmap_info.space > sectors_reserved) 965 bitmap->mddev->bitmap_info.space = sectors_reserved; 966 } else { 967 bitmap_print_sb(bitmap); 968 if (bitmap->cluster_slot < 0) 969 md_cluster_stop(bitmap->mddev); 970 } 971 return err; 972 } 973 974 /* 975 * general bitmap file operations 976 */ 977 978 /* 979 * on-disk bitmap: 980 * 981 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap 982 * file a page at a time. There's a superblock at the start of the file. 983 */ 984 /* calculate the index of the page that contains this bit */ 985 static inline unsigned long file_page_index(struct bitmap_storage *store, 986 unsigned long chunk) 987 { 988 if (store->sb_page) 989 chunk += sizeof(bitmap_super_t) << 3; 990 return chunk >> PAGE_BIT_SHIFT; 991 } 992 993 /* calculate the (bit) offset of this bit within a page */ 994 static inline unsigned long file_page_offset(struct bitmap_storage *store, 995 unsigned long chunk) 996 { 997 if (store->sb_page) 998 chunk += sizeof(bitmap_super_t) << 3; 999 return chunk & (PAGE_BITS - 1); 1000 } 1001 1002 /* 1003 * return a pointer to the page in the filemap that contains the given bit 1004 * 1005 */ 1006 static inline struct page *filemap_get_page(struct bitmap_storage *store, 1007 unsigned long chunk) 1008 { 1009 if (file_page_index(store, chunk) >= store->file_pages) 1010 return NULL; 1011 return store->filemap[file_page_index(store, chunk)]; 1012 } 1013 1014 static int md_bitmap_storage_alloc(struct bitmap_storage *store, 1015 unsigned long chunks, int with_super, 1016 int slot_number) 1017 { 1018 int pnum, offset = 0; 1019 unsigned long num_pages; 1020 unsigned long bytes; 1021 1022 bytes = DIV_ROUND_UP(chunks, 8); 1023 if (with_super) 1024 bytes += sizeof(bitmap_super_t); 1025 1026 num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); 1027 offset = slot_number * num_pages; 1028 1029 store->filemap = kmalloc_objs(struct page *, num_pages); 1030 if (!store->filemap) 1031 return -ENOMEM; 1032 1033 if (with_super && !store->sb_page) { 1034 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO); 1035 if (store->sb_page == NULL) 1036 return -ENOMEM; 1037 } 1038 1039 pnum = 0; 1040 if (store->sb_page) { 1041 store->filemap[0] = store->sb_page; 1042 pnum = 1; 1043 store->sb_index = offset; 1044 } 1045 1046 for ( ; pnum < num_pages; pnum++) { 1047 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO); 1048 if (!store->filemap[pnum]) { 1049 store->file_pages = pnum; 1050 return -ENOMEM; 1051 } 1052 } 1053 store->file_pages = pnum; 1054 1055 /* We need 4 bits per page, rounded up to a multiple 1056 * of sizeof(unsigned long) */ 1057 store->filemap_attr = kzalloc( 1058 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 1059 GFP_KERNEL); 1060 if (!store->filemap_attr) 1061 return -ENOMEM; 1062 1063 store->bytes = bytes; 1064 1065 return 0; 1066 } 1067 1068 static void md_bitmap_file_unmap(struct bitmap_storage *store) 1069 { 1070 struct file *file = store->file; 1071 struct page *sb_page = store->sb_page; 1072 struct page **map = store->filemap; 1073 int pages = store->file_pages; 1074 1075 while (pages--) 1076 if (map[pages] != sb_page) /* 0 is sb_page, release it below */ 1077 free_buffers(map[pages]); 1078 kfree(map); 1079 kfree(store->filemap_attr); 1080 1081 if (sb_page) 1082 free_buffers(sb_page); 1083 1084 if (file) { 1085 struct inode *inode = file_inode(file); 1086 invalidate_mapping_pages(inode->i_mapping, 0, -1); 1087 fput(file); 1088 } 1089 } 1090 1091 /* 1092 * bitmap_file_kick - if an error occurs while manipulating the bitmap file 1093 * then it is no longer reliable, so we stop using it and we mark the file 1094 * as failed in the superblock 1095 */ 1096 static void md_bitmap_file_kick(struct bitmap *bitmap) 1097 { 1098 if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { 1099 bitmap_update_sb(bitmap); 1100 1101 if (bitmap->storage.file) { 1102 pr_warn("%s: kicking failed bitmap file %pD4 from array!\n", 1103 bmname(bitmap), bitmap->storage.file); 1104 1105 } else 1106 pr_warn("%s: disabling internal bitmap due to errors\n", 1107 bmname(bitmap)); 1108 } 1109 } 1110 1111 enum bitmap_page_attr { 1112 BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ 1113 BITMAP_PAGE_PENDING = 1, /* there are bits that are being cleaned. 1114 * i.e. counter is 1 or 2. */ 1115 BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ 1116 }; 1117 1118 static inline void set_page_attr(struct bitmap *bitmap, int pnum, 1119 enum bitmap_page_attr attr) 1120 { 1121 set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 1122 } 1123 1124 static inline void clear_page_attr(struct bitmap *bitmap, int pnum, 1125 enum bitmap_page_attr attr) 1126 { 1127 clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 1128 } 1129 1130 static inline int test_page_attr(struct bitmap *bitmap, int pnum, 1131 enum bitmap_page_attr attr) 1132 { 1133 return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 1134 } 1135 1136 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum, 1137 enum bitmap_page_attr attr) 1138 { 1139 return test_and_clear_bit((pnum<<2) + attr, 1140 bitmap->storage.filemap_attr); 1141 } 1142 /* 1143 * bitmap_file_set_bit -- called before performing a write to the md device 1144 * to set (and eventually sync) a particular bit in the bitmap file 1145 * 1146 * we set the bit immediately, then we record the page number so that 1147 * when an unplug occurs, we can flush the dirty pages out to disk 1148 */ 1149 static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) 1150 { 1151 unsigned long bit; 1152 struct page *page; 1153 void *kaddr; 1154 unsigned long chunk = block >> bitmap->counts.chunkshift; 1155 struct bitmap_storage *store = &bitmap->storage; 1156 unsigned long index = file_page_index(store, chunk); 1157 unsigned long node_offset = 0; 1158 1159 index += store->sb_index; 1160 if (mddev_is_clustered(bitmap->mddev)) 1161 node_offset = bitmap->cluster_slot * store->file_pages; 1162 1163 page = filemap_get_page(&bitmap->storage, chunk); 1164 if (!page) 1165 return; 1166 bit = file_page_offset(&bitmap->storage, chunk); 1167 1168 /* set the bit */ 1169 kaddr = kmap_local_page(page); 1170 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1171 set_bit(bit, kaddr); 1172 else 1173 set_bit_le(bit, kaddr); 1174 kunmap_local(kaddr); 1175 pr_debug("set file bit %lu page %lu\n", bit, index); 1176 /* record page number so it gets flushed to disk when unplug occurs */ 1177 set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY); 1178 } 1179 1180 static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) 1181 { 1182 unsigned long bit; 1183 struct page *page; 1184 void *paddr; 1185 unsigned long chunk = block >> bitmap->counts.chunkshift; 1186 struct bitmap_storage *store = &bitmap->storage; 1187 unsigned long index = file_page_index(store, chunk); 1188 unsigned long node_offset = 0; 1189 1190 index += store->sb_index; 1191 if (mddev_is_clustered(bitmap->mddev)) 1192 node_offset = bitmap->cluster_slot * store->file_pages; 1193 1194 page = filemap_get_page(&bitmap->storage, chunk); 1195 if (!page) 1196 return; 1197 bit = file_page_offset(&bitmap->storage, chunk); 1198 paddr = kmap_local_page(page); 1199 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1200 clear_bit(bit, paddr); 1201 else 1202 clear_bit_le(bit, paddr); 1203 kunmap_local(paddr); 1204 if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) { 1205 set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING); 1206 bitmap->allclean = 0; 1207 } 1208 } 1209 1210 static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) 1211 { 1212 unsigned long bit; 1213 struct page *page; 1214 void *paddr; 1215 unsigned long chunk = block >> bitmap->counts.chunkshift; 1216 int set = 0; 1217 1218 page = filemap_get_page(&bitmap->storage, chunk); 1219 if (!page) 1220 return -EINVAL; 1221 bit = file_page_offset(&bitmap->storage, chunk); 1222 paddr = kmap_local_page(page); 1223 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1224 set = test_bit(bit, paddr); 1225 else 1226 set = test_bit_le(bit, paddr); 1227 kunmap_local(paddr); 1228 return set; 1229 } 1230 1231 /* this gets called when the md device is ready to unplug its underlying 1232 * (slave) device queues -- before we let any writes go down, we need to 1233 * sync the dirty pages of the bitmap file to disk */ 1234 static void __bitmap_unplug(struct bitmap *bitmap) 1235 { 1236 unsigned long i; 1237 int dirty, need_write; 1238 int writing = 0; 1239 1240 if (!bitmap_enabled(bitmap, true)) 1241 return; 1242 1243 /* look at each page to see if there are any set bits that need to be 1244 * flushed out to disk */ 1245 for (i = 0; i < bitmap->storage.file_pages; i++) { 1246 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 1247 need_write = test_and_clear_page_attr(bitmap, i, 1248 BITMAP_PAGE_NEEDWRITE); 1249 if (dirty || need_write) { 1250 if (!writing) { 1251 md_bitmap_wait_writes(bitmap); 1252 mddev_add_trace_msg(bitmap->mddev, 1253 "md bitmap_unplug"); 1254 } 1255 clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); 1256 filemap_write_page(bitmap, i, false); 1257 writing = 1; 1258 } 1259 } 1260 if (writing) 1261 md_bitmap_wait_writes(bitmap); 1262 1263 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 1264 md_bitmap_file_kick(bitmap); 1265 } 1266 1267 struct bitmap_unplug_work { 1268 struct work_struct work; 1269 struct bitmap *bitmap; 1270 struct completion *done; 1271 }; 1272 1273 static void md_bitmap_unplug_fn(struct work_struct *work) 1274 { 1275 struct bitmap_unplug_work *unplug_work = 1276 container_of(work, struct bitmap_unplug_work, work); 1277 1278 __bitmap_unplug(unplug_work->bitmap); 1279 complete(unplug_work->done); 1280 } 1281 1282 static void bitmap_unplug_async(struct bitmap *bitmap) 1283 { 1284 DECLARE_COMPLETION_ONSTACK(done); 1285 struct bitmap_unplug_work unplug_work; 1286 1287 INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn); 1288 unplug_work.bitmap = bitmap; 1289 unplug_work.done = &done; 1290 1291 queue_work(md_bitmap_wq, &unplug_work.work); 1292 wait_for_completion(&done); 1293 destroy_work_on_stack(&unplug_work.work); 1294 } 1295 1296 static void bitmap_unplug(struct mddev *mddev, bool sync) 1297 { 1298 struct bitmap *bitmap = mddev->bitmap; 1299 1300 if (!bitmap) 1301 return; 1302 1303 if (sync) 1304 __bitmap_unplug(bitmap); 1305 else 1306 bitmap_unplug_async(bitmap); 1307 } 1308 1309 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); 1310 1311 /* 1312 * Initialize the in-memory bitmap from the on-disk bitmap and set up the memory 1313 * mapping of the bitmap file. 1314 * 1315 * Special case: If there's no bitmap file, or if the bitmap file had been 1316 * previously kicked from the array, we mark all the bits as 1's in order to 1317 * cause a full resync. 1318 * 1319 * We ignore all bits for sectors that end earlier than 'start'. 1320 * This is used when reading an out-of-date bitmap. 1321 */ 1322 static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) 1323 { 1324 bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags); 1325 struct mddev *mddev = bitmap->mddev; 1326 unsigned long chunks = bitmap->counts.chunks; 1327 struct bitmap_storage *store = &bitmap->storage; 1328 struct file *file = store->file; 1329 unsigned long node_offset = 0; 1330 unsigned long bit_cnt = 0; 1331 unsigned long i; 1332 int ret; 1333 1334 if (!file && !mddev->bitmap_info.offset) { 1335 /* No permanent bitmap - fill with '1s'. */ 1336 store->filemap = NULL; 1337 store->file_pages = 0; 1338 for (i = 0; i < chunks ; i++) { 1339 /* if the disk bit is set, set the memory bit */ 1340 int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift) 1341 >= start); 1342 md_bitmap_set_memory_bits(bitmap, 1343 (sector_t)i << bitmap->counts.chunkshift, 1344 needed); 1345 } 1346 return 0; 1347 } 1348 1349 if (file && i_size_read(file->f_mapping->host) < store->bytes) { 1350 pr_warn("%s: bitmap file too short %lu < %lu\n", 1351 bmname(bitmap), 1352 (unsigned long) i_size_read(file->f_mapping->host), 1353 store->bytes); 1354 ret = -ENOSPC; 1355 goto err; 1356 } 1357 1358 if (mddev_is_clustered(mddev)) 1359 node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE)); 1360 1361 for (i = 0; i < store->file_pages; i++) { 1362 struct page *page = store->filemap[i]; 1363 int count; 1364 1365 /* unmap the old page, we're done with it */ 1366 if (i == store->file_pages - 1) 1367 count = store->bytes - i * PAGE_SIZE; 1368 else 1369 count = PAGE_SIZE; 1370 1371 if (file) 1372 ret = read_file_page(file, i, bitmap, count, page); 1373 else 1374 ret = read_sb_page(mddev, 0, page, i + node_offset, 1375 count); 1376 if (ret) 1377 goto err; 1378 } 1379 1380 if (outofdate) { 1381 pr_warn("%s: bitmap file is out of date, doing full recovery\n", 1382 bmname(bitmap)); 1383 1384 for (i = 0; i < store->file_pages; i++) { 1385 struct page *page = store->filemap[i]; 1386 unsigned long offset = 0; 1387 void *paddr; 1388 1389 if (i == 0 && !mddev->bitmap_info.external) 1390 offset = sizeof(bitmap_super_t); 1391 1392 /* 1393 * If the bitmap is out of date, dirty the whole page 1394 * and write it out 1395 */ 1396 paddr = kmap_local_page(page); 1397 memset(paddr + offset, 0xff, PAGE_SIZE - offset); 1398 kunmap_local(paddr); 1399 1400 filemap_write_page(bitmap, i, true); 1401 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) { 1402 ret = -EIO; 1403 goto err; 1404 } 1405 } 1406 } 1407 1408 for (i = 0; i < chunks; i++) { 1409 struct page *page = filemap_get_page(&bitmap->storage, i); 1410 unsigned long bit = file_page_offset(&bitmap->storage, i); 1411 void *paddr; 1412 bool was_set; 1413 1414 paddr = kmap_local_page(page); 1415 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1416 was_set = test_bit(bit, paddr); 1417 else 1418 was_set = test_bit_le(bit, paddr); 1419 kunmap_local(paddr); 1420 1421 if (was_set) { 1422 /* if the disk bit is set, set the memory bit */ 1423 int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift 1424 >= start); 1425 md_bitmap_set_memory_bits(bitmap, 1426 (sector_t)i << bitmap->counts.chunkshift, 1427 needed); 1428 bit_cnt++; 1429 } 1430 } 1431 1432 pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n", 1433 bmname(bitmap), store->file_pages, 1434 bit_cnt, chunks); 1435 1436 return 0; 1437 1438 err: 1439 pr_warn("%s: bitmap initialisation failed: %d\n", 1440 bmname(bitmap), ret); 1441 return ret; 1442 } 1443 1444 /* just flag bitmap pages as needing to be written. */ 1445 static void bitmap_write_all(struct mddev *mddev) 1446 { 1447 int i; 1448 struct bitmap *bitmap = mddev->bitmap; 1449 1450 if (!bitmap || !bitmap->storage.filemap) 1451 return; 1452 1453 /* Only one copy, so nothing needed */ 1454 if (bitmap->storage.file) 1455 return; 1456 1457 for (i = 0; i < bitmap->storage.file_pages; i++) 1458 set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); 1459 bitmap->allclean = 0; 1460 } 1461 1462 static void md_bitmap_count_page(struct bitmap_counts *bitmap, 1463 sector_t offset, int inc) 1464 { 1465 sector_t chunk = offset >> bitmap->chunkshift; 1466 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1467 bitmap->bp[page].count += inc; 1468 md_bitmap_checkfree(bitmap, page); 1469 } 1470 1471 static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset) 1472 { 1473 sector_t chunk = offset >> bitmap->chunkshift; 1474 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1475 struct bitmap_page *bp = &bitmap->bp[page]; 1476 1477 if (!bp->pending) 1478 bp->pending = 1; 1479 } 1480 1481 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, 1482 sector_t offset, sector_t *blocks, 1483 int create); 1484 1485 static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout, 1486 bool force) 1487 { 1488 struct md_thread *thread; 1489 1490 rcu_read_lock(); 1491 thread = rcu_dereference(mddev->thread); 1492 1493 if (!thread) 1494 goto out; 1495 1496 if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT) 1497 thread->timeout = timeout; 1498 1499 out: 1500 rcu_read_unlock(); 1501 } 1502 1503 /* 1504 * bitmap daemon -- periodically wakes up to clean bits and flush pages 1505 * out to disk 1506 */ 1507 static void bitmap_daemon_work(struct mddev *mddev) 1508 { 1509 struct bitmap *bitmap; 1510 unsigned long j; 1511 unsigned long nextpage; 1512 sector_t blocks; 1513 struct bitmap_counts *counts; 1514 1515 /* Use a mutex to guard daemon_work against 1516 * bitmap_destroy. 1517 */ 1518 mutex_lock(&mddev->bitmap_info.mutex); 1519 bitmap = mddev->bitmap; 1520 if (bitmap == NULL) { 1521 mutex_unlock(&mddev->bitmap_info.mutex); 1522 return; 1523 } 1524 if (time_before(jiffies, bitmap->daemon_lastrun 1525 + mddev->bitmap_info.daemon_sleep)) 1526 goto done; 1527 1528 bitmap->daemon_lastrun = jiffies; 1529 if (bitmap->allclean) { 1530 mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); 1531 goto done; 1532 } 1533 bitmap->allclean = 1; 1534 1535 mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work"); 1536 1537 /* Any file-page which is PENDING now needs to be written. 1538 * So set NEEDWRITE now, then after we make any last-minute changes 1539 * we will write it. 1540 */ 1541 for (j = 0; j < bitmap->storage.file_pages; j++) 1542 if (test_and_clear_page_attr(bitmap, j, 1543 BITMAP_PAGE_PENDING)) 1544 set_page_attr(bitmap, j, 1545 BITMAP_PAGE_NEEDWRITE); 1546 1547 if (bitmap->need_sync && 1548 mddev->bitmap_info.external == 0) { 1549 /* Arrange for superblock update as well as 1550 * other changes */ 1551 bitmap_super_t *sb; 1552 bitmap->need_sync = 0; 1553 if (bitmap->storage.filemap) { 1554 sb = kmap_local_page(bitmap->storage.sb_page); 1555 sb->events_cleared = 1556 cpu_to_le64(bitmap->events_cleared); 1557 kunmap_local(sb); 1558 set_page_attr(bitmap, 0, 1559 BITMAP_PAGE_NEEDWRITE); 1560 } 1561 } 1562 /* Now look at the bitmap counters and if any are '2' or '1', 1563 * decrement and handle accordingly. 1564 */ 1565 counts = &bitmap->counts; 1566 spin_lock_irq(&counts->lock); 1567 nextpage = 0; 1568 for (j = 0; j < counts->chunks; j++) { 1569 bitmap_counter_t *bmc; 1570 sector_t block = (sector_t)j << counts->chunkshift; 1571 1572 if (j == nextpage) { 1573 nextpage += PAGE_COUNTER_RATIO; 1574 if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) { 1575 j |= PAGE_COUNTER_MASK; 1576 continue; 1577 } 1578 counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0; 1579 } 1580 1581 bmc = md_bitmap_get_counter(counts, block, &blocks, 0); 1582 if (!bmc) { 1583 j |= PAGE_COUNTER_MASK; 1584 continue; 1585 } 1586 if (*bmc == 1 && !bitmap->need_sync) { 1587 /* We can clear the bit */ 1588 *bmc = 0; 1589 md_bitmap_count_page(counts, block, -1); 1590 md_bitmap_file_clear_bit(bitmap, block); 1591 } else if (*bmc && *bmc <= 2) { 1592 *bmc = 1; 1593 md_bitmap_set_pending(counts, block); 1594 bitmap->allclean = 0; 1595 } 1596 } 1597 spin_unlock_irq(&counts->lock); 1598 1599 md_bitmap_wait_writes(bitmap); 1600 /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. 1601 * DIRTY pages need to be written by bitmap_unplug so it can wait 1602 * for them. 1603 * If we find any DIRTY page we stop there and let bitmap_unplug 1604 * handle all the rest. This is important in the case where 1605 * the first blocking holds the superblock and it has been updated. 1606 * We mustn't write any other blocks before the superblock. 1607 */ 1608 for (j = 0; 1609 j < bitmap->storage.file_pages 1610 && !test_bit(BITMAP_STALE, &bitmap->flags); 1611 j++) { 1612 if (test_page_attr(bitmap, j, 1613 BITMAP_PAGE_DIRTY)) 1614 /* bitmap_unplug will handle the rest */ 1615 break; 1616 if (bitmap->storage.filemap && 1617 test_and_clear_page_attr(bitmap, j, 1618 BITMAP_PAGE_NEEDWRITE)) 1619 filemap_write_page(bitmap, j, false); 1620 } 1621 1622 done: 1623 if (bitmap->allclean == 0) 1624 mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); 1625 mutex_unlock(&mddev->bitmap_info.mutex); 1626 } 1627 1628 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, 1629 sector_t offset, sector_t *blocks, 1630 int create) 1631 __releases(bitmap->lock) 1632 __acquires(bitmap->lock) 1633 { 1634 /* If 'create', we might release the lock and reclaim it. 1635 * The lock must have been taken with interrupts enabled. 1636 * If !create, we don't release the lock. 1637 */ 1638 sector_t chunk = offset >> bitmap->chunkshift; 1639 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1640 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; 1641 sector_t csize = ((sector_t)1) << bitmap->chunkshift; 1642 int err; 1643 1644 if (page >= bitmap->pages) { 1645 /* 1646 * This can happen if bitmap_start_sync goes beyond 1647 * End-of-device while looking for a whole page or 1648 * user set a huge number to sysfs bitmap_set_bits. 1649 */ 1650 *blocks = csize - (offset & (csize - 1)); 1651 return NULL; 1652 } 1653 err = md_bitmap_checkpage(bitmap, page, create, 0); 1654 1655 if (bitmap->bp[page].hijacked || 1656 bitmap->bp[page].map == NULL) 1657 csize = ((sector_t)1) << (bitmap->chunkshift + 1658 PAGE_COUNTER_SHIFT); 1659 1660 *blocks = csize - (offset & (csize - 1)); 1661 1662 if (err < 0) 1663 return NULL; 1664 1665 /* now locked ... */ 1666 1667 if (bitmap->bp[page].hijacked) { /* hijacked pointer */ 1668 /* should we use the first or second counter field 1669 * of the hijacked pointer? */ 1670 int hi = (pageoff > PAGE_COUNTER_MASK); 1671 return &((bitmap_counter_t *) 1672 &bitmap->bp[page].map)[hi]; 1673 } else /* page is allocated */ 1674 return (bitmap_counter_t *) 1675 &(bitmap->bp[page].map[pageoff]); 1676 } 1677 1678 static void bitmap_start_write(struct mddev *mddev, sector_t offset, 1679 unsigned long sectors) 1680 { 1681 struct bitmap *bitmap = mddev->bitmap; 1682 1683 if (!bitmap) 1684 return; 1685 1686 while (sectors) { 1687 sector_t blocks; 1688 bitmap_counter_t *bmc; 1689 1690 spin_lock_irq(&bitmap->counts.lock); 1691 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1); 1692 if (!bmc) { 1693 spin_unlock_irq(&bitmap->counts.lock); 1694 return; 1695 } 1696 1697 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) { 1698 DEFINE_WAIT(__wait); 1699 /* note that it is safe to do the prepare_to_wait 1700 * after the test as long as we do it before dropping 1701 * the spinlock. 1702 */ 1703 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1704 TASK_UNINTERRUPTIBLE); 1705 spin_unlock_irq(&bitmap->counts.lock); 1706 schedule(); 1707 finish_wait(&bitmap->overflow_wait, &__wait); 1708 continue; 1709 } 1710 1711 switch (*bmc) { 1712 case 0: 1713 md_bitmap_file_set_bit(bitmap, offset); 1714 md_bitmap_count_page(&bitmap->counts, offset, 1); 1715 fallthrough; 1716 case 1: 1717 *bmc = 2; 1718 } 1719 1720 (*bmc)++; 1721 1722 spin_unlock_irq(&bitmap->counts.lock); 1723 1724 offset += blocks; 1725 if (sectors > blocks) 1726 sectors -= blocks; 1727 else 1728 sectors = 0; 1729 } 1730 } 1731 1732 static void bitmap_end_write(struct mddev *mddev, sector_t offset, 1733 unsigned long sectors) 1734 { 1735 struct bitmap *bitmap = mddev->bitmap; 1736 1737 if (!bitmap) 1738 return; 1739 1740 while (sectors) { 1741 sector_t blocks; 1742 unsigned long flags; 1743 bitmap_counter_t *bmc; 1744 1745 spin_lock_irqsave(&bitmap->counts.lock, flags); 1746 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0); 1747 if (!bmc) { 1748 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1749 return; 1750 } 1751 1752 if (!bitmap->mddev->degraded) { 1753 if (bitmap->events_cleared < bitmap->mddev->events) { 1754 bitmap->events_cleared = bitmap->mddev->events; 1755 bitmap->need_sync = 1; 1756 sysfs_notify_dirent_safe( 1757 bitmap->sysfs_can_clear); 1758 } 1759 } else if (!NEEDED(*bmc)) { 1760 *bmc |= NEEDED_MASK; 1761 } 1762 1763 if (COUNTER(*bmc) == COUNTER_MAX) 1764 wake_up(&bitmap->overflow_wait); 1765 1766 (*bmc)--; 1767 if (*bmc <= 2) { 1768 md_bitmap_set_pending(&bitmap->counts, offset); 1769 bitmap->allclean = 0; 1770 } 1771 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1772 offset += blocks; 1773 if (sectors > blocks) 1774 sectors -= blocks; 1775 else 1776 sectors = 0; 1777 } 1778 } 1779 1780 static bool __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, 1781 sector_t *blocks, bool degraded) 1782 { 1783 bitmap_counter_t *bmc; 1784 bool rv = false; 1785 1786 spin_lock_irq(&bitmap->counts.lock); 1787 bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1788 if (bmc) { 1789 /* locked */ 1790 if (RESYNC(*bmc)) { 1791 rv = true; 1792 } else if (NEEDED(*bmc)) { 1793 rv = true; 1794 if (!degraded) { /* don't set/clear bits if degraded */ 1795 *bmc |= RESYNC_MASK; 1796 *bmc &= ~NEEDED_MASK; 1797 } 1798 } 1799 } 1800 spin_unlock_irq(&bitmap->counts.lock); 1801 1802 return rv; 1803 } 1804 1805 static bool bitmap_start_sync(struct mddev *mddev, sector_t offset, 1806 sector_t *blocks, bool degraded) 1807 { 1808 /* bitmap_start_sync must always report on multiples of whole 1809 * pages, otherwise resync (which is very PAGE_SIZE based) will 1810 * get confused. 1811 * So call __bitmap_start_sync repeatedly (if needed) until 1812 * At least PAGE_SIZE>>9 blocks are covered. 1813 * Return the 'or' of the result. 1814 */ 1815 bool rv = false; 1816 sector_t blocks1; 1817 1818 *blocks = 0; 1819 while (*blocks < (PAGE_SIZE>>9)) { 1820 rv |= __bitmap_start_sync(mddev->bitmap, offset, 1821 &blocks1, degraded); 1822 offset += blocks1; 1823 *blocks += blocks1; 1824 } 1825 1826 return rv; 1827 } 1828 1829 static void __bitmap_end_sync(struct bitmap *bitmap, sector_t offset, 1830 sector_t *blocks, bool aborted) 1831 { 1832 bitmap_counter_t *bmc; 1833 unsigned long flags; 1834 1835 spin_lock_irqsave(&bitmap->counts.lock, flags); 1836 bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1837 if (bmc == NULL) 1838 goto unlock; 1839 /* locked */ 1840 if (RESYNC(*bmc)) { 1841 *bmc &= ~RESYNC_MASK; 1842 1843 if (!NEEDED(*bmc) && aborted) 1844 *bmc |= NEEDED_MASK; 1845 else { 1846 if (*bmc <= 2) { 1847 md_bitmap_set_pending(&bitmap->counts, offset); 1848 bitmap->allclean = 0; 1849 } 1850 } 1851 } 1852 unlock: 1853 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1854 } 1855 1856 static void bitmap_end_sync(struct mddev *mddev, sector_t offset, 1857 sector_t *blocks) 1858 { 1859 __bitmap_end_sync(mddev->bitmap, offset, blocks, true); 1860 } 1861 1862 static void bitmap_close_sync(struct mddev *mddev) 1863 { 1864 /* Sync has finished, and any bitmap chunks that weren't synced 1865 * properly have been aborted. It remains to us to clear the 1866 * RESYNC bit wherever it is still on 1867 */ 1868 sector_t sector = 0; 1869 sector_t blocks; 1870 struct bitmap *bitmap = mddev->bitmap; 1871 1872 if (!bitmap) 1873 return; 1874 1875 while (sector < bitmap->mddev->resync_max_sectors) { 1876 __bitmap_end_sync(bitmap, sector, &blocks, false); 1877 sector += blocks; 1878 } 1879 } 1880 1881 static void bitmap_cond_end_sync(struct mddev *mddev, sector_t sector, 1882 bool force) 1883 { 1884 sector_t s = 0; 1885 sector_t blocks; 1886 struct bitmap *bitmap = mddev->bitmap; 1887 1888 if (!bitmap) 1889 return; 1890 if (sector == 0) { 1891 bitmap->last_end_sync = jiffies; 1892 return; 1893 } 1894 if (!force && time_before(jiffies, (bitmap->last_end_sync 1895 + bitmap->mddev->bitmap_info.daemon_sleep))) 1896 return; 1897 wait_event(bitmap->mddev->recovery_wait, 1898 atomic_read(&bitmap->mddev->recovery_active) == 0); 1899 1900 bitmap->mddev->curr_resync_completed = sector; 1901 set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags); 1902 sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); 1903 s = 0; 1904 while (s < sector && s < bitmap->mddev->resync_max_sectors) { 1905 __bitmap_end_sync(bitmap, s, &blocks, false); 1906 s += blocks; 1907 } 1908 bitmap->last_end_sync = jiffies; 1909 sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed); 1910 } 1911 1912 static void bitmap_sync_with_cluster(struct mddev *mddev, 1913 sector_t old_lo, sector_t old_hi, 1914 sector_t new_lo, sector_t new_hi) 1915 { 1916 struct bitmap *bitmap = mddev->bitmap; 1917 sector_t sector, blocks = 0; 1918 1919 for (sector = old_lo; sector < new_lo; ) { 1920 __bitmap_end_sync(bitmap, sector, &blocks, false); 1921 sector += blocks; 1922 } 1923 WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n"); 1924 1925 for (sector = old_hi; sector < new_hi; ) { 1926 bitmap_start_sync(mddev, sector, &blocks, false); 1927 sector += blocks; 1928 } 1929 WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n"); 1930 } 1931 1932 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) 1933 { 1934 /* For each chunk covered by any of these sectors, set the 1935 * counter to 2 and possibly set resync_needed. They should all 1936 * be 0 at this point 1937 */ 1938 1939 sector_t secs; 1940 bitmap_counter_t *bmc; 1941 spin_lock_irq(&bitmap->counts.lock); 1942 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1); 1943 if (!bmc) { 1944 spin_unlock_irq(&bitmap->counts.lock); 1945 return; 1946 } 1947 if (!*bmc) { 1948 *bmc = 2; 1949 md_bitmap_count_page(&bitmap->counts, offset, 1); 1950 md_bitmap_set_pending(&bitmap->counts, offset); 1951 bitmap->allclean = 0; 1952 } 1953 if (needed) 1954 *bmc |= NEEDED_MASK; 1955 spin_unlock_irq(&bitmap->counts.lock); 1956 } 1957 1958 /* dirty the memory and file bits for bitmap chunks "s" to "e" */ 1959 static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s, 1960 unsigned long e) 1961 { 1962 unsigned long chunk; 1963 struct bitmap *bitmap = mddev->bitmap; 1964 1965 if (!bitmap) 1966 return; 1967 1968 for (chunk = s; chunk <= e; chunk++) { 1969 sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift; 1970 1971 md_bitmap_set_memory_bits(bitmap, sec, 1); 1972 md_bitmap_file_set_bit(bitmap, sec); 1973 if (sec < bitmap->mddev->resync_offset) 1974 /* We are asserting that the array is dirty, 1975 * so move the resync_offset address back so 1976 * that it is obvious that it is dirty 1977 */ 1978 bitmap->mddev->resync_offset = sec; 1979 } 1980 } 1981 1982 static void bitmap_flush(struct mddev *mddev) 1983 { 1984 struct bitmap *bitmap = mddev->bitmap; 1985 long sleep; 1986 1987 if (!bitmap) /* there was no bitmap */ 1988 return; 1989 1990 /* run the daemon_work three time to ensure everything is flushed 1991 * that can be 1992 */ 1993 sleep = mddev->bitmap_info.daemon_sleep * 2; 1994 bitmap->daemon_lastrun -= sleep; 1995 bitmap_daemon_work(mddev); 1996 bitmap->daemon_lastrun -= sleep; 1997 bitmap_daemon_work(mddev); 1998 bitmap->daemon_lastrun -= sleep; 1999 bitmap_daemon_work(mddev); 2000 if (mddev->bitmap_info.external) 2001 md_super_wait(mddev); 2002 bitmap_update_sb(bitmap); 2003 } 2004 2005 static void md_bitmap_free(void *data) 2006 { 2007 unsigned long k, pages; 2008 struct bitmap_page *bp; 2009 struct bitmap *bitmap = data; 2010 2011 if (!bitmap) /* there was no bitmap */ 2012 return; 2013 2014 if (bitmap->sysfs_can_clear) 2015 sysfs_put(bitmap->sysfs_can_clear); 2016 2017 if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info && 2018 bitmap->cluster_slot == bitmap->mddev->cluster_ops->slot_number(bitmap->mddev)) 2019 md_cluster_stop(bitmap->mddev); 2020 2021 /* Shouldn't be needed - but just in case.... */ 2022 wait_event(bitmap->write_wait, 2023 atomic_read(&bitmap->pending_writes) == 0); 2024 2025 /* release the bitmap file */ 2026 md_bitmap_file_unmap(&bitmap->storage); 2027 2028 bp = bitmap->counts.bp; 2029 pages = bitmap->counts.pages; 2030 2031 /* free all allocated memory */ 2032 2033 if (bp) /* deallocate the page memory */ 2034 for (k = 0; k < pages; k++) 2035 if (bp[k].map && !bp[k].hijacked) 2036 kfree(bp[k].map); 2037 kfree(bp); 2038 kfree(bitmap); 2039 } 2040 2041 static void bitmap_start_behind_write(struct mddev *mddev) 2042 { 2043 struct bitmap *bitmap = mddev->bitmap; 2044 int bw; 2045 2046 atomic_inc(&bitmap->behind_writes); 2047 bw = atomic_read(&bitmap->behind_writes); 2048 if (bw > bitmap->behind_writes_used) 2049 bitmap->behind_writes_used = bw; 2050 2051 pr_debug("inc write-behind count %d/%lu\n", 2052 bw, bitmap->mddev->bitmap_info.max_write_behind); 2053 } 2054 2055 static void bitmap_end_behind_write(struct mddev *mddev) 2056 { 2057 struct bitmap *bitmap = mddev->bitmap; 2058 2059 if (atomic_dec_and_test(&bitmap->behind_writes)) 2060 wake_up(&bitmap->behind_wait); 2061 pr_debug("dec write-behind count %d/%lu\n", 2062 atomic_read(&bitmap->behind_writes), 2063 bitmap->mddev->bitmap_info.max_write_behind); 2064 } 2065 2066 static void bitmap_wait_behind_writes(struct mddev *mddev) 2067 { 2068 struct bitmap *bitmap = mddev->bitmap; 2069 2070 /* wait for behind writes to complete */ 2071 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { 2072 pr_debug("md:%s: behind writes in progress - waiting to stop.\n", 2073 mdname(mddev)); 2074 /* need to kick something here to make sure I/O goes? */ 2075 wait_event(bitmap->behind_wait, 2076 atomic_read(&bitmap->behind_writes) == 0); 2077 } 2078 } 2079 2080 static void bitmap_destroy(struct mddev *mddev) 2081 { 2082 struct bitmap *bitmap = mddev->bitmap; 2083 2084 if (!bitmap) /* there was no bitmap */ 2085 return; 2086 2087 bitmap_wait_behind_writes(mddev); 2088 if (!test_bit(MD_SERIALIZE_POLICY, &mddev->flags)) 2089 mddev_destroy_serial_pool(mddev, NULL); 2090 2091 mutex_lock(&mddev->bitmap_info.mutex); 2092 spin_lock(&mddev->lock); 2093 mddev->bitmap = NULL; /* disconnect from the md device */ 2094 spin_unlock(&mddev->lock); 2095 mutex_unlock(&mddev->bitmap_info.mutex); 2096 mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); 2097 2098 md_bitmap_free(bitmap); 2099 } 2100 2101 /* 2102 * initialize the bitmap structure 2103 * if this returns an error, bitmap_destroy must be called to do clean up 2104 * once mddev->bitmap is set 2105 */ 2106 static struct bitmap *__bitmap_create(struct mddev *mddev, int slot) 2107 { 2108 struct bitmap *bitmap; 2109 sector_t blocks = mddev->resync_max_sectors; 2110 struct file *file = mddev->bitmap_info.file; 2111 int err; 2112 struct kernfs_node *bm = NULL; 2113 2114 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 2115 2116 BUG_ON(file && mddev->bitmap_info.offset); 2117 2118 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 2119 pr_notice("md/raid:%s: array with journal cannot have bitmap\n", 2120 mdname(mddev)); 2121 return ERR_PTR(-EBUSY); 2122 } 2123 2124 bitmap = kzalloc_obj(*bitmap); 2125 if (!bitmap) 2126 return ERR_PTR(-ENOMEM); 2127 2128 spin_lock_init(&bitmap->counts.lock); 2129 atomic_set(&bitmap->pending_writes, 0); 2130 init_waitqueue_head(&bitmap->write_wait); 2131 init_waitqueue_head(&bitmap->overflow_wait); 2132 init_waitqueue_head(&bitmap->behind_wait); 2133 2134 bitmap->mddev = mddev; 2135 bitmap->cluster_slot = slot; 2136 2137 if (mddev->kobj.sd) 2138 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); 2139 if (bm) { 2140 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); 2141 sysfs_put(bm); 2142 } else 2143 bitmap->sysfs_can_clear = NULL; 2144 2145 bitmap->storage.file = file; 2146 if (file) { 2147 get_file(file); 2148 /* As future accesses to this file will use bmap, 2149 * and bypass the page cache, we must sync the file 2150 * first. 2151 */ 2152 vfs_fsync(file, 1); 2153 } 2154 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ 2155 if (!mddev->bitmap_info.external) { 2156 /* 2157 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is 2158 * instructing us to create a new on-disk bitmap instance. 2159 */ 2160 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags)) 2161 err = md_bitmap_new_disk_sb(bitmap); 2162 else 2163 err = md_bitmap_read_sb(bitmap); 2164 } else { 2165 err = 0; 2166 if (mddev->bitmap_info.chunksize == 0 || 2167 mddev->bitmap_info.daemon_sleep == 0) 2168 /* chunksize and time_base need to be 2169 * set first. */ 2170 err = -EINVAL; 2171 } 2172 if (err) 2173 goto error; 2174 2175 bitmap->daemon_lastrun = jiffies; 2176 err = __bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 2177 true); 2178 if (err) 2179 goto error; 2180 2181 pr_debug("created bitmap (%lu pages) for device %s\n", 2182 bitmap->counts.pages, bmname(bitmap)); 2183 2184 err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; 2185 if (err) 2186 goto error; 2187 2188 return bitmap; 2189 error: 2190 md_bitmap_free(bitmap); 2191 return ERR_PTR(err); 2192 } 2193 2194 static int bitmap_create(struct mddev *mddev) 2195 { 2196 struct bitmap *bitmap = __bitmap_create(mddev, -1); 2197 2198 if (IS_ERR(bitmap)) 2199 return PTR_ERR(bitmap); 2200 2201 mddev->bitmap = bitmap; 2202 return 0; 2203 } 2204 2205 static int bitmap_load(struct mddev *mddev) 2206 { 2207 int err = 0; 2208 sector_t start = 0; 2209 sector_t sector = 0; 2210 struct bitmap *bitmap = mddev->bitmap; 2211 struct md_rdev *rdev; 2212 2213 if (!bitmap) 2214 goto out; 2215 2216 rdev_for_each(rdev, mddev) 2217 mddev_create_serial_pool(mddev, rdev); 2218 2219 if (mddev_is_clustered(mddev)) 2220 mddev->cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes); 2221 2222 /* Clear out old bitmap info first: Either there is none, or we 2223 * are resuming after someone else has possibly changed things, 2224 * so we should forget old cached info. 2225 * All chunks should be clean, but some might need_sync. 2226 */ 2227 while (sector < mddev->resync_max_sectors) { 2228 sector_t blocks; 2229 bitmap_start_sync(mddev, sector, &blocks, false); 2230 sector += blocks; 2231 } 2232 bitmap_close_sync(mddev); 2233 2234 if (mddev->degraded == 0 2235 || bitmap->events_cleared == mddev->events) 2236 /* no need to keep dirty bits to optimise a 2237 * re-add of a missing device */ 2238 start = mddev->resync_offset; 2239 2240 mutex_lock(&mddev->bitmap_info.mutex); 2241 err = md_bitmap_init_from_disk(bitmap, start); 2242 mutex_unlock(&mddev->bitmap_info.mutex); 2243 2244 if (err) 2245 goto out; 2246 clear_bit(BITMAP_STALE, &bitmap->flags); 2247 2248 /* Kick recovery in case any bits were set */ 2249 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); 2250 2251 mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); 2252 md_wakeup_thread(mddev->thread); 2253 2254 bitmap_update_sb(bitmap); 2255 2256 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 2257 err = -EIO; 2258 out: 2259 return err; 2260 } 2261 2262 /* caller need to free returned bitmap with md_bitmap_free() */ 2263 static void *bitmap_get_from_slot(struct mddev *mddev, int slot) 2264 { 2265 int rv = 0; 2266 struct bitmap *bitmap; 2267 2268 bitmap = __bitmap_create(mddev, slot); 2269 if (IS_ERR(bitmap)) { 2270 rv = PTR_ERR(bitmap); 2271 return ERR_PTR(rv); 2272 } 2273 2274 rv = md_bitmap_init_from_disk(bitmap, 0); 2275 if (rv) { 2276 md_bitmap_free(bitmap); 2277 return ERR_PTR(rv); 2278 } 2279 2280 return bitmap; 2281 } 2282 2283 /* Loads the bitmap associated with slot and copies the resync information 2284 * to our bitmap 2285 */ 2286 static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low, 2287 sector_t *high, bool clear_bits) 2288 { 2289 int rv = 0, i, j; 2290 sector_t block, lo = 0, hi = 0; 2291 struct bitmap_counts *counts; 2292 struct bitmap *bitmap; 2293 2294 bitmap = bitmap_get_from_slot(mddev, slot); 2295 if (IS_ERR(bitmap)) { 2296 pr_err("%s can't get bitmap from slot %d\n", __func__, slot); 2297 return -1; 2298 } 2299 2300 counts = &bitmap->counts; 2301 for (j = 0; j < counts->chunks; j++) { 2302 block = (sector_t)j << counts->chunkshift; 2303 if (md_bitmap_file_test_bit(bitmap, block)) { 2304 if (!lo) 2305 lo = block; 2306 hi = block; 2307 md_bitmap_file_clear_bit(bitmap, block); 2308 md_bitmap_set_memory_bits(mddev->bitmap, block, 1); 2309 md_bitmap_file_set_bit(mddev->bitmap, block); 2310 } 2311 } 2312 2313 if (clear_bits) { 2314 bitmap_update_sb(bitmap); 2315 /* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs 2316 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */ 2317 for (i = 0; i < bitmap->storage.file_pages; i++) 2318 if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING)) 2319 set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); 2320 __bitmap_unplug(bitmap); 2321 } 2322 __bitmap_unplug(mddev->bitmap); 2323 *low = lo; 2324 *high = hi; 2325 md_bitmap_free(bitmap); 2326 2327 return rv; 2328 } 2329 2330 static void bitmap_set_pages(void *data, unsigned long pages) 2331 { 2332 struct bitmap *bitmap = data; 2333 2334 bitmap->counts.pages = pages; 2335 } 2336 2337 static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) 2338 { 2339 struct bitmap_storage *storage; 2340 struct bitmap_counts *counts; 2341 struct bitmap *bitmap = data; 2342 bitmap_super_t *sb; 2343 2344 if (!bitmap) 2345 return -ENOENT; 2346 if (!bitmap->storage.sb_page) 2347 return -EINVAL; 2348 sb = kmap_local_page(bitmap->storage.sb_page); 2349 stats->sync_size = le64_to_cpu(sb->sync_size); 2350 kunmap_local(sb); 2351 2352 counts = &bitmap->counts; 2353 stats->missing_pages = counts->missing_pages; 2354 stats->pages = counts->pages; 2355 2356 storage = &bitmap->storage; 2357 stats->file_pages = storage->file_pages; 2358 stats->file = storage->file; 2359 2360 stats->behind_writes = atomic_read(&bitmap->behind_writes); 2361 stats->behind_wait = wq_has_sleeper(&bitmap->behind_wait); 2362 stats->events_cleared = bitmap->events_cleared; 2363 return 0; 2364 } 2365 2366 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, 2367 int chunksize, bool init) 2368 { 2369 /* If chunk_size is 0, choose an appropriate chunk size. 2370 * Then possibly allocate new storage space. 2371 * Then quiesce, copy bits, replace bitmap, and re-start 2372 * 2373 * This function is called both to set up the initial bitmap 2374 * and to resize the bitmap while the array is active. 2375 * If this happens as a result of the array being resized, 2376 * chunksize will be zero, and we need to choose a suitable 2377 * chunksize, otherwise we use what we are given. 2378 */ 2379 struct bitmap_storage store; 2380 struct bitmap_counts old_counts; 2381 unsigned long chunks; 2382 sector_t block; 2383 sector_t old_blocks, new_blocks; 2384 int chunkshift; 2385 int ret = 0; 2386 long pages; 2387 struct bitmap_page *new_bp; 2388 2389 if (bitmap->storage.file && !init) { 2390 pr_info("md: cannot resize file-based bitmap\n"); 2391 return -EINVAL; 2392 } 2393 2394 if (chunksize == 0) { 2395 /* If there is enough space, leave the chunk size unchanged, 2396 * else increase by factor of two until there is enough space. 2397 */ 2398 long bytes; 2399 long space = bitmap->mddev->bitmap_info.space; 2400 2401 if (space == 0) { 2402 /* We don't know how much space there is, so limit 2403 * to current size - in sectors. 2404 */ 2405 bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8); 2406 if (!bitmap->mddev->bitmap_info.external) 2407 bytes += sizeof(bitmap_super_t); 2408 space = DIV_ROUND_UP(bytes, 512); 2409 bitmap->mddev->bitmap_info.space = space; 2410 } 2411 chunkshift = bitmap->counts.chunkshift; 2412 chunkshift--; 2413 do { 2414 /* 'chunkshift' is shift from block size to chunk size */ 2415 chunkshift++; 2416 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2417 bytes = DIV_ROUND_UP(chunks, 8); 2418 if (!bitmap->mddev->bitmap_info.external) 2419 bytes += sizeof(bitmap_super_t); 2420 } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) < 2421 (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1)); 2422 } else 2423 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; 2424 2425 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2426 memset(&store, 0, sizeof(store)); 2427 if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) 2428 ret = md_bitmap_storage_alloc(&store, chunks, 2429 !bitmap->mddev->bitmap_info.external, 2430 mddev_is_clustered(bitmap->mddev) 2431 ? bitmap->cluster_slot : 0); 2432 if (ret) { 2433 md_bitmap_file_unmap(&store); 2434 goto err; 2435 } 2436 2437 pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO); 2438 2439 new_bp = kzalloc_objs(*new_bp, pages); 2440 ret = -ENOMEM; 2441 if (!new_bp) { 2442 md_bitmap_file_unmap(&store); 2443 goto err; 2444 } 2445 2446 if (!init) 2447 bitmap->mddev->pers->quiesce(bitmap->mddev, 1); 2448 2449 store.file = bitmap->storage.file; 2450 bitmap->storage.file = NULL; 2451 2452 if (store.sb_page && bitmap->storage.sb_page) 2453 memcpy(page_address(store.sb_page), 2454 page_address(bitmap->storage.sb_page), 2455 sizeof(bitmap_super_t)); 2456 mutex_lock(&bitmap->mddev->bitmap_info.mutex); 2457 spin_lock_irq(&bitmap->counts.lock); 2458 md_bitmap_file_unmap(&bitmap->storage); 2459 bitmap->storage = store; 2460 2461 old_counts = bitmap->counts; 2462 bitmap->counts.bp = new_bp; 2463 bitmap->counts.pages = pages; 2464 bitmap->counts.missing_pages = pages; 2465 bitmap->counts.chunkshift = chunkshift; 2466 bitmap->counts.chunks = chunks; 2467 bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift + 2468 BITMAP_BLOCK_SHIFT); 2469 2470 blocks = min(old_counts.chunks << old_counts.chunkshift, 2471 chunks << chunkshift); 2472 2473 /* For cluster raid, need to pre-allocate bitmap */ 2474 if (mddev_is_clustered(bitmap->mddev)) { 2475 unsigned long page; 2476 for (page = 0; page < pages; page++) { 2477 ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1); 2478 if (ret) { 2479 unsigned long k; 2480 2481 /* deallocate the page memory */ 2482 for (k = 0; k < page; k++) { 2483 kfree(new_bp[k].map); 2484 } 2485 kfree(new_bp); 2486 2487 /* restore some fields from old_counts */ 2488 bitmap->counts.bp = old_counts.bp; 2489 bitmap->counts.pages = old_counts.pages; 2490 bitmap->counts.missing_pages = old_counts.pages; 2491 bitmap->counts.chunkshift = old_counts.chunkshift; 2492 bitmap->counts.chunks = old_counts.chunks; 2493 bitmap->mddev->bitmap_info.chunksize = 2494 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); 2495 blocks = old_counts.chunks << old_counts.chunkshift; 2496 pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); 2497 break; 2498 } else 2499 bitmap->counts.bp[page].count += 1; 2500 } 2501 } 2502 2503 for (block = 0; block < blocks; ) { 2504 bitmap_counter_t *bmc_old, *bmc_new; 2505 int set; 2506 2507 bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0); 2508 set = bmc_old && NEEDED(*bmc_old); 2509 2510 if (set) { 2511 bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); 2512 if (bmc_new) { 2513 if (*bmc_new == 0) { 2514 /* need to set on-disk bits too. */ 2515 sector_t end = block + new_blocks; 2516 sector_t start = block >> chunkshift; 2517 2518 start <<= chunkshift; 2519 while (start < end) { 2520 md_bitmap_file_set_bit(bitmap, block); 2521 start += 1 << chunkshift; 2522 } 2523 *bmc_new = 2; 2524 md_bitmap_count_page(&bitmap->counts, block, 1); 2525 md_bitmap_set_pending(&bitmap->counts, block); 2526 } 2527 *bmc_new |= NEEDED_MASK; 2528 } 2529 if (new_blocks < old_blocks) 2530 old_blocks = new_blocks; 2531 } 2532 block += old_blocks; 2533 } 2534 2535 if (bitmap->counts.bp != old_counts.bp) { 2536 unsigned long k; 2537 for (k = 0; k < old_counts.pages; k++) 2538 if (!old_counts.bp[k].hijacked) 2539 kfree(old_counts.bp[k].map); 2540 kfree(old_counts.bp); 2541 } 2542 2543 if (!init) { 2544 int i; 2545 while (block < (chunks << chunkshift)) { 2546 bitmap_counter_t *bmc; 2547 bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); 2548 if (bmc) { 2549 /* new space. It needs to be resynced, so 2550 * we set NEEDED_MASK. 2551 */ 2552 if (*bmc == 0) { 2553 *bmc = NEEDED_MASK | 2; 2554 md_bitmap_count_page(&bitmap->counts, block, 1); 2555 md_bitmap_set_pending(&bitmap->counts, block); 2556 } 2557 } 2558 block += new_blocks; 2559 } 2560 for (i = 0; i < bitmap->storage.file_pages; i++) 2561 set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 2562 } 2563 spin_unlock_irq(&bitmap->counts.lock); 2564 mutex_unlock(&bitmap->mddev->bitmap_info.mutex); 2565 if (!init) { 2566 __bitmap_unplug(bitmap); 2567 bitmap->mddev->pers->quiesce(bitmap->mddev, 0); 2568 } 2569 ret = 0; 2570 err: 2571 return ret; 2572 } 2573 2574 static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize) 2575 { 2576 struct bitmap *bitmap = mddev->bitmap; 2577 2578 if (!bitmap) 2579 return 0; 2580 2581 return __bitmap_resize(bitmap, blocks, chunksize, false); 2582 } 2583 2584 static bool bitmap_none_enabled(void *data, bool flush) 2585 { 2586 return false; 2587 } 2588 2589 static int bitmap_none_create(struct mddev *mddev) 2590 { 2591 return 0; 2592 } 2593 2594 static int bitmap_none_load(struct mddev *mddev) 2595 { 2596 return 0; 2597 } 2598 2599 static void bitmap_none_destroy(struct mddev *mddev) 2600 { 2601 } 2602 2603 static int bitmap_none_get_stats(void *data, struct md_bitmap_stats *stats) 2604 { 2605 return -ENOENT; 2606 } 2607 2608 static ssize_t 2609 location_show(struct mddev *mddev, char *page) 2610 { 2611 ssize_t len; 2612 if (mddev->bitmap_info.file) 2613 len = sprintf(page, "file"); 2614 else if (mddev->bitmap_info.offset) 2615 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); 2616 else 2617 len = sprintf(page, "none"); 2618 len += sprintf(page+len, "\n"); 2619 return len; 2620 } 2621 2622 static ssize_t 2623 location_store(struct mddev *mddev, const char *buf, size_t len) 2624 { 2625 int rv; 2626 2627 rv = mddev_suspend_and_lock(mddev); 2628 if (rv) 2629 return rv; 2630 2631 if (mddev->pers) { 2632 if (mddev->recovery || mddev->sync_thread) { 2633 rv = -EBUSY; 2634 goto out; 2635 } 2636 } 2637 2638 if (mddev->bitmap || mddev->bitmap_info.file || 2639 mddev->bitmap_info.offset) { 2640 /* bitmap already configured. Only option is to clear it */ 2641 if (strncmp(buf, "none", 4) != 0) { 2642 rv = -EBUSY; 2643 goto out; 2644 } 2645 2646 sysfs_unmerge_group(&mddev->kobj, &md_bitmap_internal_group); 2647 md_bitmap_destroy_nosysfs(mddev); 2648 mddev->bitmap_id = ID_BITMAP_NONE; 2649 if (!mddev_set_bitmap_ops_nosysfs(mddev)) 2650 goto none_err; 2651 mddev->bitmap_info.offset = 0; 2652 if (mddev->bitmap_info.file) { 2653 struct file *f = mddev->bitmap_info.file; 2654 mddev->bitmap_info.file = NULL; 2655 fput(f); 2656 } 2657 } else { 2658 /* No bitmap, OK to set a location */ 2659 long long offset; 2660 2661 if (strncmp(buf, "none", 4) == 0) 2662 /* nothing to be done */; 2663 else if (strncmp(buf, "file:", 5) == 0) { 2664 /* Not supported yet */ 2665 rv = -EINVAL; 2666 goto out; 2667 } else { 2668 if (buf[0] == '+') 2669 rv = kstrtoll(buf+1, 10, &offset); 2670 else 2671 rv = kstrtoll(buf, 10, &offset); 2672 if (rv) 2673 goto out; 2674 if (offset == 0) { 2675 rv = -EINVAL; 2676 goto out; 2677 } 2678 if (mddev->bitmap_info.external == 0 && 2679 mddev->major_version == 0 && 2680 offset != mddev->bitmap_info.default_offset) { 2681 rv = -EINVAL; 2682 goto out; 2683 } 2684 2685 mddev->bitmap_info.offset = offset; 2686 md_bitmap_destroy_nosysfs(mddev); 2687 mddev->bitmap_id = ID_BITMAP; 2688 if (!mddev_set_bitmap_ops_nosysfs(mddev)) 2689 goto bitmap_err; 2690 2691 rv = md_bitmap_create_nosysfs(mddev); 2692 if (rv) 2693 goto create_err; 2694 2695 rv = mddev->bitmap_ops->load(mddev); 2696 if (rv) { 2697 mddev->bitmap_info.offset = 0; 2698 goto load_err; 2699 } 2700 2701 rv = sysfs_merge_group(&mddev->kobj, 2702 &md_bitmap_internal_group); 2703 if (rv) 2704 goto merge_err; 2705 } 2706 } 2707 if (!mddev->external) { 2708 /* Ensure new bitmap info is stored in 2709 * metadata promptly. 2710 */ 2711 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 2712 md_wakeup_thread(mddev->thread); 2713 } 2714 rv = 0; 2715 out: 2716 mddev_unlock_and_resume(mddev); 2717 if (rv) 2718 return rv; 2719 return len; 2720 2721 merge_err: 2722 mddev->bitmap_info.offset = 0; 2723 load_err: 2724 md_bitmap_destroy_nosysfs(mddev); 2725 create_err: 2726 mddev->bitmap_info.offset = 0; 2727 mddev->bitmap_id = ID_BITMAP_NONE; 2728 if (!mddev_set_bitmap_ops_nosysfs(mddev)) 2729 rv = -ENOENT; 2730 goto out; 2731 bitmap_err: 2732 rv = -ENOENT; 2733 none_err: 2734 mddev->bitmap_info.offset = 0; 2735 goto out; 2736 } 2737 2738 static struct md_sysfs_entry bitmap_location = 2739 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store); 2740 2741 /* 'bitmap/space' is the space available at 'location' for the 2742 * bitmap. This allows the kernel to know when it is safe to 2743 * resize the bitmap to match a resized array. 2744 */ 2745 static ssize_t 2746 space_show(struct mddev *mddev, char *page) 2747 { 2748 return sprintf(page, "%lu\n", mddev->bitmap_info.space); 2749 } 2750 2751 static ssize_t 2752 space_store(struct mddev *mddev, const char *buf, size_t len) 2753 { 2754 struct bitmap *bitmap; 2755 unsigned long sectors; 2756 int rv; 2757 2758 rv = kstrtoul(buf, 10, §ors); 2759 if (rv) 2760 return rv; 2761 2762 if (sectors == 0) 2763 return -EINVAL; 2764 2765 bitmap = mddev->bitmap; 2766 if (bitmap && sectors < (bitmap->storage.bytes + 511) >> 9) 2767 return -EFBIG; /* Bitmap is too big for this small space */ 2768 2769 /* could make sure it isn't too big, but that isn't really 2770 * needed - user-space should be careful. 2771 */ 2772 mddev->bitmap_info.space = sectors; 2773 return len; 2774 } 2775 2776 static struct md_sysfs_entry bitmap_space = 2777 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store); 2778 2779 static ssize_t 2780 timeout_show(struct mddev *mddev, char *page) 2781 { 2782 ssize_t len; 2783 unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; 2784 unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; 2785 2786 len = sprintf(page, "%lu", secs); 2787 if (jifs) 2788 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); 2789 len += sprintf(page+len, "\n"); 2790 return len; 2791 } 2792 2793 static ssize_t 2794 timeout_store(struct mddev *mddev, const char *buf, size_t len) 2795 { 2796 /* timeout can be set at any time */ 2797 unsigned long timeout; 2798 int rv = strict_strtoul_scaled(buf, &timeout, 4); 2799 if (rv) 2800 return rv; 2801 2802 /* just to make sure we don't overflow... */ 2803 if (timeout >= LONG_MAX / HZ) 2804 return -EINVAL; 2805 2806 timeout = timeout * HZ / 10000; 2807 2808 if (timeout >= MAX_SCHEDULE_TIMEOUT) 2809 timeout = MAX_SCHEDULE_TIMEOUT-1; 2810 if (timeout < 1) 2811 timeout = 1; 2812 2813 mddev->bitmap_info.daemon_sleep = timeout; 2814 mddev_set_timeout(mddev, timeout, false); 2815 md_wakeup_thread(mddev->thread); 2816 2817 return len; 2818 } 2819 2820 static struct md_sysfs_entry bitmap_timeout = 2821 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store); 2822 2823 static ssize_t 2824 backlog_show(struct mddev *mddev, char *page) 2825 { 2826 return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind); 2827 } 2828 2829 static ssize_t 2830 backlog_store(struct mddev *mddev, const char *buf, size_t len) 2831 { 2832 unsigned long backlog; 2833 unsigned long old_mwb = mddev->bitmap_info.max_write_behind; 2834 struct md_rdev *rdev; 2835 bool has_write_mostly = false; 2836 int rv = kstrtoul(buf, 10, &backlog); 2837 if (rv) 2838 return rv; 2839 if (backlog > COUNTER_MAX) 2840 return -EINVAL; 2841 2842 rv = mddev_suspend_and_lock(mddev); 2843 if (rv) 2844 return rv; 2845 2846 /* 2847 * Without write mostly device, it doesn't make sense to set 2848 * backlog for max_write_behind. 2849 */ 2850 rdev_for_each(rdev, mddev) { 2851 if (test_bit(WriteMostly, &rdev->flags)) { 2852 has_write_mostly = true; 2853 break; 2854 } 2855 } 2856 if (!has_write_mostly) { 2857 pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n", 2858 mdname(mddev)); 2859 mddev_unlock(mddev); 2860 return -EINVAL; 2861 } 2862 2863 mddev->bitmap_info.max_write_behind = backlog; 2864 if (!backlog && mddev->serial_info_pool) { 2865 /* serial_info_pool is not needed if backlog is zero */ 2866 if (!test_bit(MD_SERIALIZE_POLICY, &mddev->flags)) 2867 mddev_destroy_serial_pool(mddev, NULL); 2868 } else if (backlog && !mddev->serial_info_pool) { 2869 /* serial_info_pool is needed since backlog is not zero */ 2870 rdev_for_each(rdev, mddev) 2871 mddev_create_serial_pool(mddev, rdev); 2872 } 2873 if (old_mwb != backlog) 2874 bitmap_update_sb(mddev->bitmap); 2875 2876 mddev_unlock_and_resume(mddev); 2877 return len; 2878 } 2879 2880 static struct md_sysfs_entry bitmap_backlog = 2881 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store); 2882 2883 static ssize_t 2884 chunksize_show(struct mddev *mddev, char *page) 2885 { 2886 return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize); 2887 } 2888 2889 static ssize_t 2890 chunksize_store(struct mddev *mddev, const char *buf, size_t len) 2891 { 2892 /* Can only be changed when no bitmap is active */ 2893 int rv; 2894 unsigned long csize; 2895 if (mddev->bitmap) 2896 return -EBUSY; 2897 rv = kstrtoul(buf, 10, &csize); 2898 if (rv) 2899 return rv; 2900 if (csize < 512 || 2901 !is_power_of_2(csize)) 2902 return -EINVAL; 2903 if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE * 2904 sizeof(((bitmap_super_t *)0)->chunksize)))) 2905 return -EOVERFLOW; 2906 mddev->bitmap_info.chunksize = csize; 2907 return len; 2908 } 2909 2910 static struct md_sysfs_entry bitmap_chunksize = 2911 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); 2912 2913 static ssize_t metadata_show(struct mddev *mddev, char *page) 2914 { 2915 if (mddev_is_clustered(mddev)) 2916 return sprintf(page, "clustered\n"); 2917 return sprintf(page, "%s\n", (mddev->bitmap_info.external 2918 ? "external" : "internal")); 2919 } 2920 2921 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len) 2922 { 2923 if (mddev->bitmap || 2924 mddev->bitmap_info.file || 2925 mddev->bitmap_info.offset) 2926 return -EBUSY; 2927 if (strncmp(buf, "external", 8) == 0) 2928 mddev->bitmap_info.external = 1; 2929 else if ((strncmp(buf, "internal", 8) == 0) || 2930 (strncmp(buf, "clustered", 9) == 0)) 2931 mddev->bitmap_info.external = 0; 2932 else 2933 return -EINVAL; 2934 return len; 2935 } 2936 2937 static struct md_sysfs_entry bitmap_metadata = 2938 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); 2939 2940 static ssize_t can_clear_show(struct mddev *mddev, char *page) 2941 { 2942 int len; 2943 struct bitmap *bitmap; 2944 2945 spin_lock(&mddev->lock); 2946 bitmap = mddev->bitmap; 2947 if (bitmap) 2948 len = sprintf(page, "%s\n", (bitmap->need_sync ? "false" : 2949 "true")); 2950 else 2951 len = sprintf(page, "\n"); 2952 spin_unlock(&mddev->lock); 2953 return len; 2954 } 2955 2956 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len) 2957 { 2958 struct bitmap *bitmap = mddev->bitmap; 2959 2960 if (!bitmap) 2961 return -ENOENT; 2962 2963 if (strncmp(buf, "false", 5) == 0) { 2964 bitmap->need_sync = 1; 2965 return len; 2966 } 2967 2968 if (strncmp(buf, "true", 4) == 0) { 2969 if (mddev->degraded) 2970 return -EBUSY; 2971 bitmap->need_sync = 0; 2972 return len; 2973 } 2974 2975 return -EINVAL; 2976 } 2977 2978 static struct md_sysfs_entry bitmap_can_clear = 2979 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store); 2980 2981 static ssize_t 2982 behind_writes_used_show(struct mddev *mddev, char *page) 2983 { 2984 ssize_t ret; 2985 struct bitmap *bitmap; 2986 2987 spin_lock(&mddev->lock); 2988 bitmap = mddev->bitmap; 2989 if (!bitmap) 2990 ret = sprintf(page, "0\n"); 2991 else 2992 ret = sprintf(page, "%lu\n", bitmap->behind_writes_used); 2993 spin_unlock(&mddev->lock); 2994 2995 return ret; 2996 } 2997 2998 static ssize_t 2999 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len) 3000 { 3001 struct bitmap *bitmap = mddev->bitmap; 3002 3003 if (bitmap) 3004 bitmap->behind_writes_used = 0; 3005 return len; 3006 } 3007 3008 static struct md_sysfs_entry max_backlog_used = 3009 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR, 3010 behind_writes_used_show, behind_writes_used_reset); 3011 3012 static struct attribute *md_bitmap_common_attrs[] = { 3013 &bitmap_location.attr, 3014 NULL 3015 }; 3016 3017 static struct attribute *md_bitmap_internal_attrs[] = { 3018 &bitmap_space.attr, 3019 &bitmap_timeout.attr, 3020 &bitmap_backlog.attr, 3021 &bitmap_chunksize.attr, 3022 &bitmap_metadata.attr, 3023 &bitmap_can_clear.attr, 3024 &max_backlog_used.attr, 3025 NULL 3026 }; 3027 3028 static struct attribute_group md_bitmap_common_group = { 3029 .name = "bitmap", 3030 .attrs = md_bitmap_common_attrs, 3031 }; 3032 3033 static struct attribute_group md_bitmap_internal_group = { 3034 .name = "bitmap", 3035 .attrs = md_bitmap_internal_attrs, 3036 }; 3037 3038 static const struct attribute_group *bitmap_groups[] = { 3039 &md_bitmap_common_group, 3040 &md_bitmap_internal_group, 3041 NULL, 3042 }; 3043 3044 static const struct attribute_group *bitmap_none_groups[] = { 3045 &md_bitmap_common_group, 3046 NULL, 3047 }; 3048 3049 static struct bitmap_operations bitmap_none_ops = { 3050 .head = { 3051 .type = MD_BITMAP, 3052 .id = ID_BITMAP_NONE, 3053 .name = "none", 3054 }, 3055 3056 .enabled = bitmap_none_enabled, 3057 .create = bitmap_none_create, 3058 .load = bitmap_none_load, 3059 .destroy = bitmap_none_destroy, 3060 .get_stats = bitmap_none_get_stats, 3061 3062 .groups = bitmap_none_groups, 3063 }; 3064 3065 static struct bitmap_operations bitmap_ops = { 3066 .head = { 3067 .type = MD_BITMAP, 3068 .id = ID_BITMAP, 3069 .name = "bitmap", 3070 }, 3071 3072 .enabled = bitmap_enabled, 3073 .create = bitmap_create, 3074 .resize = bitmap_resize, 3075 .load = bitmap_load, 3076 .destroy = bitmap_destroy, 3077 .flush = bitmap_flush, 3078 .write_all = bitmap_write_all, 3079 .dirty_bits = bitmap_dirty_bits, 3080 .unplug = bitmap_unplug, 3081 .daemon_work = bitmap_daemon_work, 3082 3083 .start_behind_write = bitmap_start_behind_write, 3084 .end_behind_write = bitmap_end_behind_write, 3085 .wait_behind_writes = bitmap_wait_behind_writes, 3086 3087 .start_write = bitmap_start_write, 3088 .end_write = bitmap_end_write, 3089 .start_discard = bitmap_start_write, 3090 .end_discard = bitmap_end_write, 3091 3092 .start_sync = bitmap_start_sync, 3093 .end_sync = bitmap_end_sync, 3094 .cond_end_sync = bitmap_cond_end_sync, 3095 .close_sync = bitmap_close_sync, 3096 3097 .update_sb = bitmap_update_sb, 3098 .get_stats = bitmap_get_stats, 3099 3100 .sync_with_cluster = bitmap_sync_with_cluster, 3101 .get_from_slot = bitmap_get_from_slot, 3102 .copy_from_slot = bitmap_copy_from_slot, 3103 .set_pages = bitmap_set_pages, 3104 .free = md_bitmap_free, 3105 3106 .groups = bitmap_groups, 3107 }; 3108 3109 int md_bitmap_init(void) 3110 { 3111 int err; 3112 3113 md_bitmap_wq = alloc_workqueue("md_bitmap", WQ_MEM_RECLAIM | WQ_UNBOUND, 3114 0); 3115 if (!md_bitmap_wq) 3116 return -ENOMEM; 3117 3118 err = register_md_submodule(&bitmap_none_ops.head); 3119 if (err) 3120 goto err_wq; 3121 3122 err = register_md_submodule(&bitmap_ops.head); 3123 if (err) 3124 goto err_none; 3125 3126 return 0; 3127 3128 err_none: 3129 unregister_md_submodule(&bitmap_none_ops.head); 3130 err_wq: 3131 destroy_workqueue(md_bitmap_wq); 3132 return err; 3133 } 3134 3135 void md_bitmap_exit(void) 3136 { 3137 unregister_md_submodule(&bitmap_ops.head); 3138 unregister_md_submodule(&bitmap_none_ops.head); 3139 destroy_workqueue(md_bitmap_wq); 3140 } 3141