1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 4 * 5 * bitmap_create - sets up the bitmap structure 6 * bitmap_destroy - destroys the bitmap structure 7 * 8 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: 9 * - added disk storage for bitmap 10 * - changes to allow various bitmap chunk sizes 11 */ 12 13 /* 14 * Still to do: 15 * 16 * flush after percent set rather than just time based. (maybe both). 17 */ 18 19 #include <linux/blkdev.h> 20 #include <linux/module.h> 21 #include <linux/errno.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/timer.h> 25 #include <linux/sched.h> 26 #include <linux/list.h> 27 #include <linux/file.h> 28 #include <linux/mount.h> 29 #include <linux/buffer_head.h> 30 #include <linux/seq_file.h> 31 #include <trace/events/block.h> 32 #include "md.h" 33 #include "md-bitmap.h" 34 35 static inline char *bmname(struct bitmap *bitmap) 36 { 37 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; 38 } 39 40 /* 41 * check a page and, if necessary, allocate it (or hijack it if the alloc fails) 42 * 43 * 1) check to see if this page is allocated, if it's not then try to alloc 44 * 2) if the alloc fails, set the page's hijacked flag so we'll use the 45 * page pointer directly as a counter 46 * 47 * if we find our page, we increment the page's refcount so that it stays 48 * allocated while we're using it 49 */ 50 static int md_bitmap_checkpage(struct bitmap_counts *bitmap, 51 unsigned long page, int create, int no_hijack) 52 __releases(bitmap->lock) 53 __acquires(bitmap->lock) 54 { 55 unsigned char *mappage; 56 57 WARN_ON_ONCE(page >= bitmap->pages); 58 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ 59 return 0; 60 61 if (bitmap->bp[page].map) /* page is already allocated, just return */ 62 return 0; 63 64 if (!create) 65 return -ENOENT; 66 67 /* this page has not been allocated yet */ 68 69 spin_unlock_irq(&bitmap->lock); 70 /* It is possible that this is being called inside a 71 * prepare_to_wait/finish_wait loop from raid5c:make_request(). 72 * In general it is not permitted to sleep in that context as it 73 * can cause the loop to spin freely. 74 * That doesn't apply here as we can only reach this point 75 * once with any loop. 76 * When this function completes, either bp[page].map or 77 * bp[page].hijacked. In either case, this function will 78 * abort before getting to this point again. So there is 79 * no risk of a free-spin, and so it is safe to assert 80 * that sleeping here is allowed. 81 */ 82 sched_annotate_sleep(); 83 mappage = kzalloc(PAGE_SIZE, GFP_NOIO); 84 spin_lock_irq(&bitmap->lock); 85 86 if (mappage == NULL) { 87 pr_debug("md/bitmap: map page allocation failed, hijacking\n"); 88 /* We don't support hijack for cluster raid */ 89 if (no_hijack) 90 return -ENOMEM; 91 /* failed - set the hijacked flag so that we can use the 92 * pointer as a counter */ 93 if (!bitmap->bp[page].map) 94 bitmap->bp[page].hijacked = 1; 95 } else if (bitmap->bp[page].map || 96 bitmap->bp[page].hijacked) { 97 /* somebody beat us to getting the page */ 98 kfree(mappage); 99 } else { 100 101 /* no page was in place and we have one, so install it */ 102 103 bitmap->bp[page].map = mappage; 104 bitmap->missing_pages--; 105 } 106 return 0; 107 } 108 109 /* if page is completely empty, put it back on the free list, or dealloc it */ 110 /* if page was hijacked, unmark the flag so it might get alloced next time */ 111 /* Note: lock should be held when calling this */ 112 static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page) 113 { 114 char *ptr; 115 116 if (bitmap->bp[page].count) /* page is still busy */ 117 return; 118 119 /* page is no longer in use, it can be released */ 120 121 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ 122 bitmap->bp[page].hijacked = 0; 123 bitmap->bp[page].map = NULL; 124 } else { 125 /* normal case, free the page */ 126 ptr = bitmap->bp[page].map; 127 bitmap->bp[page].map = NULL; 128 bitmap->missing_pages++; 129 kfree(ptr); 130 } 131 } 132 133 /* 134 * bitmap file handling - read and write the bitmap file and its superblock 135 */ 136 137 /* 138 * basic page I/O operations 139 */ 140 141 /* IO operations when bitmap is stored near all superblocks */ 142 static int read_sb_page(struct mddev *mddev, loff_t offset, 143 struct page *page, 144 unsigned long index, int size) 145 { 146 /* choose a good rdev and read the page from there */ 147 148 struct md_rdev *rdev; 149 sector_t target; 150 151 rdev_for_each(rdev, mddev) { 152 if (! test_bit(In_sync, &rdev->flags) 153 || test_bit(Faulty, &rdev->flags) 154 || test_bit(Bitmap_sync, &rdev->flags)) 155 continue; 156 157 target = offset + index * (PAGE_SIZE/512); 158 159 if (sync_page_io(rdev, target, 160 roundup(size, bdev_logical_block_size(rdev->bdev)), 161 page, REQ_OP_READ, true)) { 162 page->index = index; 163 return 0; 164 } 165 } 166 return -EIO; 167 } 168 169 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev) 170 { 171 /* Iterate the disks of an mddev, using rcu to protect access to the 172 * linked list, and raising the refcount of devices we return to ensure 173 * they don't disappear while in use. 174 * As devices are only added or removed when raid_disk is < 0 and 175 * nr_pending is 0 and In_sync is clear, the entries we return will 176 * still be in the same position on the list when we re-enter 177 * list_for_each_entry_continue_rcu. 178 * 179 * Note that if entered with 'rdev == NULL' to start at the 180 * beginning, we temporarily assign 'rdev' to an address which 181 * isn't really an rdev, but which can be used by 182 * list_for_each_entry_continue_rcu() to find the first entry. 183 */ 184 rcu_read_lock(); 185 if (rdev == NULL) 186 /* start at the beginning */ 187 rdev = list_entry(&mddev->disks, struct md_rdev, same_set); 188 else { 189 /* release the previous rdev and start from there. */ 190 rdev_dec_pending(rdev, mddev); 191 } 192 list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) { 193 if (rdev->raid_disk >= 0 && 194 !test_bit(Faulty, &rdev->flags)) { 195 /* this is a usable devices */ 196 atomic_inc(&rdev->nr_pending); 197 rcu_read_unlock(); 198 return rdev; 199 } 200 } 201 rcu_read_unlock(); 202 return NULL; 203 } 204 205 static unsigned int optimal_io_size(struct block_device *bdev, 206 unsigned int last_page_size, 207 unsigned int io_size) 208 { 209 if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev)) 210 return roundup(last_page_size, bdev_io_opt(bdev)); 211 return io_size; 212 } 213 214 static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size, 215 loff_t start, loff_t boundary) 216 { 217 if (io_size != opt_size && 218 start + opt_size / SECTOR_SIZE <= boundary) 219 return opt_size; 220 if (start + io_size / SECTOR_SIZE <= boundary) 221 return io_size; 222 223 /* Overflows boundary */ 224 return 0; 225 } 226 227 static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, 228 struct page *page) 229 { 230 struct block_device *bdev; 231 struct mddev *mddev = bitmap->mddev; 232 struct bitmap_storage *store = &bitmap->storage; 233 loff_t sboff, offset = mddev->bitmap_info.offset; 234 sector_t ps, doff; 235 unsigned int size = PAGE_SIZE; 236 unsigned int opt_size = PAGE_SIZE; 237 238 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; 239 if (page->index == store->file_pages - 1) { 240 unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1); 241 242 if (last_page_size == 0) 243 last_page_size = PAGE_SIZE; 244 size = roundup(last_page_size, bdev_logical_block_size(bdev)); 245 opt_size = optimal_io_size(bdev, last_page_size, size); 246 } 247 248 ps = page->index * PAGE_SIZE / SECTOR_SIZE; 249 sboff = rdev->sb_start + offset; 250 doff = rdev->data_offset; 251 252 /* Just make sure we aren't corrupting data or metadata */ 253 if (mddev->external) { 254 /* Bitmap could be anywhere. */ 255 if (sboff + ps > doff && 256 sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE)) 257 return -EINVAL; 258 } else if (offset < 0) { 259 /* DATA BITMAP METADATA */ 260 size = bitmap_io_size(size, opt_size, offset + ps, 0); 261 if (size == 0) 262 /* bitmap runs in to metadata */ 263 return -EINVAL; 264 265 if (doff + mddev->dev_sectors > sboff) 266 /* data runs in to bitmap */ 267 return -EINVAL; 268 } else if (rdev->sb_start < rdev->data_offset) { 269 /* METADATA BITMAP DATA */ 270 size = bitmap_io_size(size, opt_size, sboff + ps, doff); 271 if (size == 0) 272 /* bitmap runs in to data */ 273 return -EINVAL; 274 } else { 275 /* DATA METADATA BITMAP - no problems */ 276 } 277 278 md_super_write(mddev, rdev, sboff + ps, (int) size, page); 279 return 0; 280 } 281 282 static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) 283 { 284 struct md_rdev *rdev; 285 struct mddev *mddev = bitmap->mddev; 286 int ret; 287 288 do { 289 rdev = NULL; 290 while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { 291 ret = __write_sb_page(rdev, bitmap, page); 292 if (ret) 293 return ret; 294 } 295 } while (wait && md_super_wait(mddev) < 0); 296 297 return 0; 298 } 299 300 static void md_bitmap_file_kick(struct bitmap *bitmap); 301 /* 302 * write out a page to a file 303 */ 304 static void write_page(struct bitmap *bitmap, struct page *page, int wait) 305 { 306 struct buffer_head *bh; 307 308 if (bitmap->storage.file == NULL) { 309 switch (write_sb_page(bitmap, page, wait)) { 310 case -EINVAL: 311 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 312 } 313 } else { 314 315 bh = page_buffers(page); 316 317 while (bh && bh->b_blocknr) { 318 atomic_inc(&bitmap->pending_writes); 319 set_buffer_locked(bh); 320 set_buffer_mapped(bh); 321 submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); 322 bh = bh->b_this_page; 323 } 324 325 if (wait) 326 wait_event(bitmap->write_wait, 327 atomic_read(&bitmap->pending_writes)==0); 328 } 329 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 330 md_bitmap_file_kick(bitmap); 331 } 332 333 static void end_bitmap_write(struct buffer_head *bh, int uptodate) 334 { 335 struct bitmap *bitmap = bh->b_private; 336 337 if (!uptodate) 338 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 339 if (atomic_dec_and_test(&bitmap->pending_writes)) 340 wake_up(&bitmap->write_wait); 341 } 342 343 static void free_buffers(struct page *page) 344 { 345 struct buffer_head *bh; 346 347 if (!PagePrivate(page)) 348 return; 349 350 bh = page_buffers(page); 351 while (bh) { 352 struct buffer_head *next = bh->b_this_page; 353 free_buffer_head(bh); 354 bh = next; 355 } 356 detach_page_private(page); 357 put_page(page); 358 } 359 360 /* read a page from a file. 361 * We both read the page, and attach buffers to the page to record the 362 * address of each block (using bmap). These addresses will be used 363 * to write the block later, completely bypassing the filesystem. 364 * This usage is similar to how swap files are handled, and allows us 365 * to write to a file with no concerns of memory allocation failing. 366 */ 367 static int read_page(struct file *file, unsigned long index, 368 struct bitmap *bitmap, 369 unsigned long count, 370 struct page *page) 371 { 372 int ret = 0; 373 struct inode *inode = file_inode(file); 374 struct buffer_head *bh; 375 sector_t block, blk_cur; 376 unsigned long blocksize = i_blocksize(inode); 377 378 pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, 379 (unsigned long long)index << PAGE_SHIFT); 380 381 bh = alloc_page_buffers(page, blocksize, false); 382 if (!bh) { 383 ret = -ENOMEM; 384 goto out; 385 } 386 attach_page_private(page, bh); 387 blk_cur = index << (PAGE_SHIFT - inode->i_blkbits); 388 while (bh) { 389 block = blk_cur; 390 391 if (count == 0) 392 bh->b_blocknr = 0; 393 else { 394 ret = bmap(inode, &block); 395 if (ret || !block) { 396 ret = -EINVAL; 397 bh->b_blocknr = 0; 398 goto out; 399 } 400 401 bh->b_blocknr = block; 402 bh->b_bdev = inode->i_sb->s_bdev; 403 if (count < blocksize) 404 count = 0; 405 else 406 count -= blocksize; 407 408 bh->b_end_io = end_bitmap_write; 409 bh->b_private = bitmap; 410 atomic_inc(&bitmap->pending_writes); 411 set_buffer_locked(bh); 412 set_buffer_mapped(bh); 413 submit_bh(REQ_OP_READ, bh); 414 } 415 blk_cur++; 416 bh = bh->b_this_page; 417 } 418 page->index = index; 419 420 wait_event(bitmap->write_wait, 421 atomic_read(&bitmap->pending_writes)==0); 422 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 423 ret = -EIO; 424 out: 425 if (ret) 426 pr_err("md: bitmap read error: (%dB @ %llu): %d\n", 427 (int)PAGE_SIZE, 428 (unsigned long long)index << PAGE_SHIFT, 429 ret); 430 return ret; 431 } 432 433 /* 434 * bitmap file superblock operations 435 */ 436 437 /* 438 * md_bitmap_wait_writes() should be called before writing any bitmap 439 * blocks, to ensure previous writes, particularly from 440 * md_bitmap_daemon_work(), have completed. 441 */ 442 static void md_bitmap_wait_writes(struct bitmap *bitmap) 443 { 444 if (bitmap->storage.file) 445 wait_event(bitmap->write_wait, 446 atomic_read(&bitmap->pending_writes)==0); 447 else 448 /* Note that we ignore the return value. The writes 449 * might have failed, but that would just mean that 450 * some bits which should be cleared haven't been, 451 * which is safe. The relevant bitmap blocks will 452 * probably get written again, but there is no great 453 * loss if they aren't. 454 */ 455 md_super_wait(bitmap->mddev); 456 } 457 458 459 /* update the event counter and sync the superblock to disk */ 460 void md_bitmap_update_sb(struct bitmap *bitmap) 461 { 462 bitmap_super_t *sb; 463 464 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ 465 return; 466 if (bitmap->mddev->bitmap_info.external) 467 return; 468 if (!bitmap->storage.sb_page) /* no superblock */ 469 return; 470 sb = kmap_atomic(bitmap->storage.sb_page); 471 sb->events = cpu_to_le64(bitmap->mddev->events); 472 if (bitmap->mddev->events < bitmap->events_cleared) 473 /* rocking back to read-only */ 474 bitmap->events_cleared = bitmap->mddev->events; 475 sb->events_cleared = cpu_to_le64(bitmap->events_cleared); 476 /* 477 * clear BITMAP_WRITE_ERROR bit to protect against the case that 478 * a bitmap write error occurred but the later writes succeeded. 479 */ 480 sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR)); 481 /* Just in case these have been changed via sysfs: */ 482 sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); 483 sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); 484 /* This might have been changed by a reshape */ 485 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 486 sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize); 487 sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes); 488 sb->sectors_reserved = cpu_to_le32(bitmap->mddev-> 489 bitmap_info.space); 490 kunmap_atomic(sb); 491 write_page(bitmap, bitmap->storage.sb_page, 1); 492 } 493 EXPORT_SYMBOL(md_bitmap_update_sb); 494 495 /* print out the bitmap file superblock */ 496 void md_bitmap_print_sb(struct bitmap *bitmap) 497 { 498 bitmap_super_t *sb; 499 500 if (!bitmap || !bitmap->storage.sb_page) 501 return; 502 sb = kmap_atomic(bitmap->storage.sb_page); 503 pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); 504 pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); 505 pr_debug(" version: %u\n", le32_to_cpu(sb->version)); 506 pr_debug(" uuid: %08x.%08x.%08x.%08x\n", 507 le32_to_cpu(*(__le32 *)(sb->uuid+0)), 508 le32_to_cpu(*(__le32 *)(sb->uuid+4)), 509 le32_to_cpu(*(__le32 *)(sb->uuid+8)), 510 le32_to_cpu(*(__le32 *)(sb->uuid+12))); 511 pr_debug(" events: %llu\n", 512 (unsigned long long) le64_to_cpu(sb->events)); 513 pr_debug("events cleared: %llu\n", 514 (unsigned long long) le64_to_cpu(sb->events_cleared)); 515 pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); 516 pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize)); 517 pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep)); 518 pr_debug(" sync size: %llu KB\n", 519 (unsigned long long)le64_to_cpu(sb->sync_size)/2); 520 pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind)); 521 kunmap_atomic(sb); 522 } 523 524 /* 525 * bitmap_new_disk_sb 526 * @bitmap 527 * 528 * This function is somewhat the reverse of bitmap_read_sb. bitmap_read_sb 529 * reads and verifies the on-disk bitmap superblock and populates bitmap_info. 530 * This function verifies 'bitmap_info' and populates the on-disk bitmap 531 * structure, which is to be written to disk. 532 * 533 * Returns: 0 on success, -Exxx on error 534 */ 535 static int md_bitmap_new_disk_sb(struct bitmap *bitmap) 536 { 537 bitmap_super_t *sb; 538 unsigned long chunksize, daemon_sleep, write_behind; 539 540 bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 541 if (bitmap->storage.sb_page == NULL) 542 return -ENOMEM; 543 bitmap->storage.sb_page->index = 0; 544 545 sb = kmap_atomic(bitmap->storage.sb_page); 546 547 sb->magic = cpu_to_le32(BITMAP_MAGIC); 548 sb->version = cpu_to_le32(BITMAP_MAJOR_HI); 549 550 chunksize = bitmap->mddev->bitmap_info.chunksize; 551 BUG_ON(!chunksize); 552 if (!is_power_of_2(chunksize)) { 553 kunmap_atomic(sb); 554 pr_warn("bitmap chunksize not a power of 2\n"); 555 return -EINVAL; 556 } 557 sb->chunksize = cpu_to_le32(chunksize); 558 559 daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; 560 if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { 561 pr_debug("Choosing daemon_sleep default (5 sec)\n"); 562 daemon_sleep = 5 * HZ; 563 } 564 sb->daemon_sleep = cpu_to_le32(daemon_sleep); 565 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 566 567 /* 568 * FIXME: write_behind for RAID1. If not specified, what 569 * is a good choice? We choose COUNTER_MAX / 2 arbitrarily. 570 */ 571 write_behind = bitmap->mddev->bitmap_info.max_write_behind; 572 if (write_behind > COUNTER_MAX) 573 write_behind = COUNTER_MAX / 2; 574 sb->write_behind = cpu_to_le32(write_behind); 575 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 576 577 /* keep the array size field of the bitmap superblock up to date */ 578 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 579 580 memcpy(sb->uuid, bitmap->mddev->uuid, 16); 581 582 set_bit(BITMAP_STALE, &bitmap->flags); 583 sb->state = cpu_to_le32(bitmap->flags); 584 bitmap->events_cleared = bitmap->mddev->events; 585 sb->events_cleared = cpu_to_le64(bitmap->mddev->events); 586 bitmap->mddev->bitmap_info.nodes = 0; 587 588 kunmap_atomic(sb); 589 590 return 0; 591 } 592 593 /* read the superblock from the bitmap file and initialize some bitmap fields */ 594 static int md_bitmap_read_sb(struct bitmap *bitmap) 595 { 596 char *reason = NULL; 597 bitmap_super_t *sb; 598 unsigned long chunksize, daemon_sleep, write_behind; 599 unsigned long long events; 600 int nodes = 0; 601 unsigned long sectors_reserved = 0; 602 int err = -EINVAL; 603 struct page *sb_page; 604 loff_t offset = bitmap->mddev->bitmap_info.offset; 605 606 if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { 607 chunksize = 128 * 1024 * 1024; 608 daemon_sleep = 5 * HZ; 609 write_behind = 0; 610 set_bit(BITMAP_STALE, &bitmap->flags); 611 err = 0; 612 goto out_no_sb; 613 } 614 /* page 0 is the superblock, read it... */ 615 sb_page = alloc_page(GFP_KERNEL); 616 if (!sb_page) 617 return -ENOMEM; 618 bitmap->storage.sb_page = sb_page; 619 620 re_read: 621 /* If cluster_slot is set, the cluster is setup */ 622 if (bitmap->cluster_slot >= 0) { 623 sector_t bm_blocks = bitmap->mddev->resync_max_sectors; 624 625 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 626 (bitmap->mddev->bitmap_info.chunksize >> 9)); 627 /* bits to bytes */ 628 bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t); 629 /* to 4k blocks */ 630 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); 631 offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3)); 632 pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, 633 bitmap->cluster_slot, offset); 634 } 635 636 if (bitmap->storage.file) { 637 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); 638 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; 639 640 err = read_page(bitmap->storage.file, 0, 641 bitmap, bytes, sb_page); 642 } else { 643 err = read_sb_page(bitmap->mddev, 644 offset, 645 sb_page, 646 0, sizeof(bitmap_super_t)); 647 } 648 if (err) 649 return err; 650 651 err = -EINVAL; 652 sb = kmap_atomic(sb_page); 653 654 chunksize = le32_to_cpu(sb->chunksize); 655 daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; 656 write_behind = le32_to_cpu(sb->write_behind); 657 sectors_reserved = le32_to_cpu(sb->sectors_reserved); 658 659 /* verify that the bitmap-specific fields are valid */ 660 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) 661 reason = "bad magic"; 662 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || 663 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED) 664 reason = "unrecognized superblock version"; 665 else if (chunksize < 512) 666 reason = "bitmap chunksize too small"; 667 else if (!is_power_of_2(chunksize)) 668 reason = "bitmap chunksize not a power of 2"; 669 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT) 670 reason = "daemon sleep period out of range"; 671 else if (write_behind > COUNTER_MAX) 672 reason = "write-behind limit out of range (0 - 16383)"; 673 if (reason) { 674 pr_warn("%s: invalid bitmap file superblock: %s\n", 675 bmname(bitmap), reason); 676 goto out; 677 } 678 679 /* 680 * Setup nodes/clustername only if bitmap version is 681 * cluster-compatible 682 */ 683 if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { 684 nodes = le32_to_cpu(sb->nodes); 685 strscpy(bitmap->mddev->bitmap_info.cluster_name, 686 sb->cluster_name, 64); 687 } 688 689 /* keep the array size field of the bitmap superblock up to date */ 690 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 691 692 if (bitmap->mddev->persistent) { 693 /* 694 * We have a persistent array superblock, so compare the 695 * bitmap's UUID and event counter to the mddev's 696 */ 697 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { 698 pr_warn("%s: bitmap superblock UUID mismatch\n", 699 bmname(bitmap)); 700 goto out; 701 } 702 events = le64_to_cpu(sb->events); 703 if (!nodes && (events < bitmap->mddev->events)) { 704 pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n", 705 bmname(bitmap), events, 706 (unsigned long long) bitmap->mddev->events); 707 set_bit(BITMAP_STALE, &bitmap->flags); 708 } 709 } 710 711 /* assign fields using values from superblock */ 712 bitmap->flags |= le32_to_cpu(sb->state); 713 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) 714 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags); 715 bitmap->events_cleared = le64_to_cpu(sb->events_cleared); 716 err = 0; 717 718 out: 719 kunmap_atomic(sb); 720 if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { 721 /* Assigning chunksize is required for "re_read" */ 722 bitmap->mddev->bitmap_info.chunksize = chunksize; 723 err = md_setup_cluster(bitmap->mddev, nodes); 724 if (err) { 725 pr_warn("%s: Could not setup cluster service (%d)\n", 726 bmname(bitmap), err); 727 goto out_no_sb; 728 } 729 bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev); 730 goto re_read; 731 } 732 733 out_no_sb: 734 if (err == 0) { 735 if (test_bit(BITMAP_STALE, &bitmap->flags)) 736 bitmap->events_cleared = bitmap->mddev->events; 737 bitmap->mddev->bitmap_info.chunksize = chunksize; 738 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 739 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 740 bitmap->mddev->bitmap_info.nodes = nodes; 741 if (bitmap->mddev->bitmap_info.space == 0 || 742 bitmap->mddev->bitmap_info.space > sectors_reserved) 743 bitmap->mddev->bitmap_info.space = sectors_reserved; 744 } else { 745 md_bitmap_print_sb(bitmap); 746 if (bitmap->cluster_slot < 0) 747 md_cluster_stop(bitmap->mddev); 748 } 749 return err; 750 } 751 752 /* 753 * general bitmap file operations 754 */ 755 756 /* 757 * on-disk bitmap: 758 * 759 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap 760 * file a page at a time. There's a superblock at the start of the file. 761 */ 762 /* calculate the index of the page that contains this bit */ 763 static inline unsigned long file_page_index(struct bitmap_storage *store, 764 unsigned long chunk) 765 { 766 if (store->sb_page) 767 chunk += sizeof(bitmap_super_t) << 3; 768 return chunk >> PAGE_BIT_SHIFT; 769 } 770 771 /* calculate the (bit) offset of this bit within a page */ 772 static inline unsigned long file_page_offset(struct bitmap_storage *store, 773 unsigned long chunk) 774 { 775 if (store->sb_page) 776 chunk += sizeof(bitmap_super_t) << 3; 777 return chunk & (PAGE_BITS - 1); 778 } 779 780 /* 781 * return a pointer to the page in the filemap that contains the given bit 782 * 783 */ 784 static inline struct page *filemap_get_page(struct bitmap_storage *store, 785 unsigned long chunk) 786 { 787 if (file_page_index(store, chunk) >= store->file_pages) 788 return NULL; 789 return store->filemap[file_page_index(store, chunk)]; 790 } 791 792 static int md_bitmap_storage_alloc(struct bitmap_storage *store, 793 unsigned long chunks, int with_super, 794 int slot_number) 795 { 796 int pnum, offset = 0; 797 unsigned long num_pages; 798 unsigned long bytes; 799 800 bytes = DIV_ROUND_UP(chunks, 8); 801 if (with_super) 802 bytes += sizeof(bitmap_super_t); 803 804 num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); 805 offset = slot_number * num_pages; 806 807 store->filemap = kmalloc_array(num_pages, sizeof(struct page *), 808 GFP_KERNEL); 809 if (!store->filemap) 810 return -ENOMEM; 811 812 if (with_super && !store->sb_page) { 813 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO); 814 if (store->sb_page == NULL) 815 return -ENOMEM; 816 } 817 818 pnum = 0; 819 if (store->sb_page) { 820 store->filemap[0] = store->sb_page; 821 pnum = 1; 822 store->sb_page->index = offset; 823 } 824 825 for ( ; pnum < num_pages; pnum++) { 826 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO); 827 if (!store->filemap[pnum]) { 828 store->file_pages = pnum; 829 return -ENOMEM; 830 } 831 store->filemap[pnum]->index = pnum + offset; 832 } 833 store->file_pages = pnum; 834 835 /* We need 4 bits per page, rounded up to a multiple 836 * of sizeof(unsigned long) */ 837 store->filemap_attr = kzalloc( 838 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 839 GFP_KERNEL); 840 if (!store->filemap_attr) 841 return -ENOMEM; 842 843 store->bytes = bytes; 844 845 return 0; 846 } 847 848 static void md_bitmap_file_unmap(struct bitmap_storage *store) 849 { 850 struct page **map, *sb_page; 851 int pages; 852 struct file *file; 853 854 file = store->file; 855 map = store->filemap; 856 pages = store->file_pages; 857 sb_page = store->sb_page; 858 859 while (pages--) 860 if (map[pages] != sb_page) /* 0 is sb_page, release it below */ 861 free_buffers(map[pages]); 862 kfree(map); 863 kfree(store->filemap_attr); 864 865 if (sb_page) 866 free_buffers(sb_page); 867 868 if (file) { 869 struct inode *inode = file_inode(file); 870 invalidate_mapping_pages(inode->i_mapping, 0, -1); 871 fput(file); 872 } 873 } 874 875 /* 876 * bitmap_file_kick - if an error occurs while manipulating the bitmap file 877 * then it is no longer reliable, so we stop using it and we mark the file 878 * as failed in the superblock 879 */ 880 static void md_bitmap_file_kick(struct bitmap *bitmap) 881 { 882 char *path, *ptr = NULL; 883 884 if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { 885 md_bitmap_update_sb(bitmap); 886 887 if (bitmap->storage.file) { 888 path = kmalloc(PAGE_SIZE, GFP_KERNEL); 889 if (path) 890 ptr = file_path(bitmap->storage.file, 891 path, PAGE_SIZE); 892 893 pr_warn("%s: kicking failed bitmap file %s from array!\n", 894 bmname(bitmap), IS_ERR(ptr) ? "" : ptr); 895 896 kfree(path); 897 } else 898 pr_warn("%s: disabling internal bitmap due to errors\n", 899 bmname(bitmap)); 900 } 901 } 902 903 enum bitmap_page_attr { 904 BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ 905 BITMAP_PAGE_PENDING = 1, /* there are bits that are being cleaned. 906 * i.e. counter is 1 or 2. */ 907 BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ 908 }; 909 910 static inline void set_page_attr(struct bitmap *bitmap, int pnum, 911 enum bitmap_page_attr attr) 912 { 913 set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 914 } 915 916 static inline void clear_page_attr(struct bitmap *bitmap, int pnum, 917 enum bitmap_page_attr attr) 918 { 919 clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 920 } 921 922 static inline int test_page_attr(struct bitmap *bitmap, int pnum, 923 enum bitmap_page_attr attr) 924 { 925 return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 926 } 927 928 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum, 929 enum bitmap_page_attr attr) 930 { 931 return test_and_clear_bit((pnum<<2) + attr, 932 bitmap->storage.filemap_attr); 933 } 934 /* 935 * bitmap_file_set_bit -- called before performing a write to the md device 936 * to set (and eventually sync) a particular bit in the bitmap file 937 * 938 * we set the bit immediately, then we record the page number so that 939 * when an unplug occurs, we can flush the dirty pages out to disk 940 */ 941 static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) 942 { 943 unsigned long bit; 944 struct page *page; 945 void *kaddr; 946 unsigned long chunk = block >> bitmap->counts.chunkshift; 947 struct bitmap_storage *store = &bitmap->storage; 948 unsigned long node_offset = 0; 949 950 if (mddev_is_clustered(bitmap->mddev)) 951 node_offset = bitmap->cluster_slot * store->file_pages; 952 953 page = filemap_get_page(&bitmap->storage, chunk); 954 if (!page) 955 return; 956 bit = file_page_offset(&bitmap->storage, chunk); 957 958 /* set the bit */ 959 kaddr = kmap_atomic(page); 960 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 961 set_bit(bit, kaddr); 962 else 963 set_bit_le(bit, kaddr); 964 kunmap_atomic(kaddr); 965 pr_debug("set file bit %lu page %lu\n", bit, page->index); 966 /* record page number so it gets flushed to disk when unplug occurs */ 967 set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_DIRTY); 968 } 969 970 static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) 971 { 972 unsigned long bit; 973 struct page *page; 974 void *paddr; 975 unsigned long chunk = block >> bitmap->counts.chunkshift; 976 struct bitmap_storage *store = &bitmap->storage; 977 unsigned long node_offset = 0; 978 979 if (mddev_is_clustered(bitmap->mddev)) 980 node_offset = bitmap->cluster_slot * store->file_pages; 981 982 page = filemap_get_page(&bitmap->storage, chunk); 983 if (!page) 984 return; 985 bit = file_page_offset(&bitmap->storage, chunk); 986 paddr = kmap_atomic(page); 987 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 988 clear_bit(bit, paddr); 989 else 990 clear_bit_le(bit, paddr); 991 kunmap_atomic(paddr); 992 if (!test_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_NEEDWRITE)) { 993 set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_PENDING); 994 bitmap->allclean = 0; 995 } 996 } 997 998 static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) 999 { 1000 unsigned long bit; 1001 struct page *page; 1002 void *paddr; 1003 unsigned long chunk = block >> bitmap->counts.chunkshift; 1004 int set = 0; 1005 1006 page = filemap_get_page(&bitmap->storage, chunk); 1007 if (!page) 1008 return -EINVAL; 1009 bit = file_page_offset(&bitmap->storage, chunk); 1010 paddr = kmap_atomic(page); 1011 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1012 set = test_bit(bit, paddr); 1013 else 1014 set = test_bit_le(bit, paddr); 1015 kunmap_atomic(paddr); 1016 return set; 1017 } 1018 1019 /* this gets called when the md device is ready to unplug its underlying 1020 * (slave) device queues -- before we let any writes go down, we need to 1021 * sync the dirty pages of the bitmap file to disk */ 1022 void md_bitmap_unplug(struct bitmap *bitmap) 1023 { 1024 unsigned long i; 1025 int dirty, need_write; 1026 int writing = 0; 1027 1028 if (!md_bitmap_enabled(bitmap)) 1029 return; 1030 1031 /* look at each page to see if there are any set bits that need to be 1032 * flushed out to disk */ 1033 for (i = 0; i < bitmap->storage.file_pages; i++) { 1034 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 1035 need_write = test_and_clear_page_attr(bitmap, i, 1036 BITMAP_PAGE_NEEDWRITE); 1037 if (dirty || need_write) { 1038 if (!writing) { 1039 md_bitmap_wait_writes(bitmap); 1040 if (bitmap->mddev->queue) 1041 blk_add_trace_msg(bitmap->mddev->queue, 1042 "md bitmap_unplug"); 1043 } 1044 clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); 1045 write_page(bitmap, bitmap->storage.filemap[i], 0); 1046 writing = 1; 1047 } 1048 } 1049 if (writing) 1050 md_bitmap_wait_writes(bitmap); 1051 1052 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 1053 md_bitmap_file_kick(bitmap); 1054 } 1055 EXPORT_SYMBOL(md_bitmap_unplug); 1056 1057 struct bitmap_unplug_work { 1058 struct work_struct work; 1059 struct bitmap *bitmap; 1060 struct completion *done; 1061 }; 1062 1063 static void md_bitmap_unplug_fn(struct work_struct *work) 1064 { 1065 struct bitmap_unplug_work *unplug_work = 1066 container_of(work, struct bitmap_unplug_work, work); 1067 1068 md_bitmap_unplug(unplug_work->bitmap); 1069 complete(unplug_work->done); 1070 } 1071 1072 void md_bitmap_unplug_async(struct bitmap *bitmap) 1073 { 1074 DECLARE_COMPLETION_ONSTACK(done); 1075 struct bitmap_unplug_work unplug_work; 1076 1077 INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn); 1078 unplug_work.bitmap = bitmap; 1079 unplug_work.done = &done; 1080 1081 queue_work(md_bitmap_wq, &unplug_work.work); 1082 wait_for_completion(&done); 1083 } 1084 EXPORT_SYMBOL(md_bitmap_unplug_async); 1085 1086 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); 1087 /* * bitmap_init_from_disk -- called at bitmap_create time to initialize 1088 * the in-memory bitmap from the on-disk bitmap -- also, sets up the 1089 * memory mapping of the bitmap file 1090 * Special cases: 1091 * if there's no bitmap file, or if the bitmap file had been 1092 * previously kicked from the array, we mark all the bits as 1093 * 1's in order to cause a full resync. 1094 * 1095 * We ignore all bits for sectors that end earlier than 'start'. 1096 * This is used when reading an out-of-date bitmap... 1097 */ 1098 static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) 1099 { 1100 unsigned long i, chunks, index, oldindex, bit, node_offset = 0; 1101 struct page *page = NULL; 1102 unsigned long bit_cnt = 0; 1103 struct file *file; 1104 unsigned long offset; 1105 int outofdate; 1106 int ret = -ENOSPC; 1107 void *paddr; 1108 struct bitmap_storage *store = &bitmap->storage; 1109 1110 chunks = bitmap->counts.chunks; 1111 file = store->file; 1112 1113 if (!file && !bitmap->mddev->bitmap_info.offset) { 1114 /* No permanent bitmap - fill with '1s'. */ 1115 store->filemap = NULL; 1116 store->file_pages = 0; 1117 for (i = 0; i < chunks ; i++) { 1118 /* if the disk bit is set, set the memory bit */ 1119 int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift) 1120 >= start); 1121 md_bitmap_set_memory_bits(bitmap, 1122 (sector_t)i << bitmap->counts.chunkshift, 1123 needed); 1124 } 1125 return 0; 1126 } 1127 1128 outofdate = test_bit(BITMAP_STALE, &bitmap->flags); 1129 if (outofdate) 1130 pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap)); 1131 1132 if (file && i_size_read(file->f_mapping->host) < store->bytes) { 1133 pr_warn("%s: bitmap file too short %lu < %lu\n", 1134 bmname(bitmap), 1135 (unsigned long) i_size_read(file->f_mapping->host), 1136 store->bytes); 1137 goto err; 1138 } 1139 1140 oldindex = ~0L; 1141 offset = 0; 1142 if (!bitmap->mddev->bitmap_info.external) 1143 offset = sizeof(bitmap_super_t); 1144 1145 if (mddev_is_clustered(bitmap->mddev)) 1146 node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE)); 1147 1148 for (i = 0; i < chunks; i++) { 1149 int b; 1150 index = file_page_index(&bitmap->storage, i); 1151 bit = file_page_offset(&bitmap->storage, i); 1152 if (index != oldindex) { /* this is a new page, read it in */ 1153 int count; 1154 /* unmap the old page, we're done with it */ 1155 if (index == store->file_pages-1) 1156 count = store->bytes - index * PAGE_SIZE; 1157 else 1158 count = PAGE_SIZE; 1159 page = store->filemap[index]; 1160 if (file) 1161 ret = read_page(file, index, bitmap, 1162 count, page); 1163 else 1164 ret = read_sb_page( 1165 bitmap->mddev, 1166 bitmap->mddev->bitmap_info.offset, 1167 page, 1168 index + node_offset, count); 1169 1170 if (ret) 1171 goto err; 1172 1173 oldindex = index; 1174 1175 if (outofdate) { 1176 /* 1177 * if bitmap is out of date, dirty the 1178 * whole page and write it out 1179 */ 1180 paddr = kmap_atomic(page); 1181 memset(paddr + offset, 0xff, 1182 PAGE_SIZE - offset); 1183 kunmap_atomic(paddr); 1184 write_page(bitmap, page, 1); 1185 1186 ret = -EIO; 1187 if (test_bit(BITMAP_WRITE_ERROR, 1188 &bitmap->flags)) 1189 goto err; 1190 } 1191 } 1192 paddr = kmap_atomic(page); 1193 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1194 b = test_bit(bit, paddr); 1195 else 1196 b = test_bit_le(bit, paddr); 1197 kunmap_atomic(paddr); 1198 if (b) { 1199 /* if the disk bit is set, set the memory bit */ 1200 int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift 1201 >= start); 1202 md_bitmap_set_memory_bits(bitmap, 1203 (sector_t)i << bitmap->counts.chunkshift, 1204 needed); 1205 bit_cnt++; 1206 } 1207 offset = 0; 1208 } 1209 1210 pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n", 1211 bmname(bitmap), store->file_pages, 1212 bit_cnt, chunks); 1213 1214 return 0; 1215 1216 err: 1217 pr_warn("%s: bitmap initialisation failed: %d\n", 1218 bmname(bitmap), ret); 1219 return ret; 1220 } 1221 1222 void md_bitmap_write_all(struct bitmap *bitmap) 1223 { 1224 /* We don't actually write all bitmap blocks here, 1225 * just flag them as needing to be written 1226 */ 1227 int i; 1228 1229 if (!bitmap || !bitmap->storage.filemap) 1230 return; 1231 if (bitmap->storage.file) 1232 /* Only one copy, so nothing needed */ 1233 return; 1234 1235 for (i = 0; i < bitmap->storage.file_pages; i++) 1236 set_page_attr(bitmap, i, 1237 BITMAP_PAGE_NEEDWRITE); 1238 bitmap->allclean = 0; 1239 } 1240 1241 static void md_bitmap_count_page(struct bitmap_counts *bitmap, 1242 sector_t offset, int inc) 1243 { 1244 sector_t chunk = offset >> bitmap->chunkshift; 1245 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1246 bitmap->bp[page].count += inc; 1247 md_bitmap_checkfree(bitmap, page); 1248 } 1249 1250 static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset) 1251 { 1252 sector_t chunk = offset >> bitmap->chunkshift; 1253 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1254 struct bitmap_page *bp = &bitmap->bp[page]; 1255 1256 if (!bp->pending) 1257 bp->pending = 1; 1258 } 1259 1260 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, 1261 sector_t offset, sector_t *blocks, 1262 int create); 1263 1264 static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout, 1265 bool force) 1266 { 1267 struct md_thread *thread; 1268 1269 rcu_read_lock(); 1270 thread = rcu_dereference(mddev->thread); 1271 1272 if (!thread) 1273 goto out; 1274 1275 if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT) 1276 thread->timeout = timeout; 1277 1278 out: 1279 rcu_read_unlock(); 1280 } 1281 1282 /* 1283 * bitmap daemon -- periodically wakes up to clean bits and flush pages 1284 * out to disk 1285 */ 1286 void md_bitmap_daemon_work(struct mddev *mddev) 1287 { 1288 struct bitmap *bitmap; 1289 unsigned long j; 1290 unsigned long nextpage; 1291 sector_t blocks; 1292 struct bitmap_counts *counts; 1293 1294 /* Use a mutex to guard daemon_work against 1295 * bitmap_destroy. 1296 */ 1297 mutex_lock(&mddev->bitmap_info.mutex); 1298 bitmap = mddev->bitmap; 1299 if (bitmap == NULL) { 1300 mutex_unlock(&mddev->bitmap_info.mutex); 1301 return; 1302 } 1303 if (time_before(jiffies, bitmap->daemon_lastrun 1304 + mddev->bitmap_info.daemon_sleep)) 1305 goto done; 1306 1307 bitmap->daemon_lastrun = jiffies; 1308 if (bitmap->allclean) { 1309 mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); 1310 goto done; 1311 } 1312 bitmap->allclean = 1; 1313 1314 if (bitmap->mddev->queue) 1315 blk_add_trace_msg(bitmap->mddev->queue, 1316 "md bitmap_daemon_work"); 1317 1318 /* Any file-page which is PENDING now needs to be written. 1319 * So set NEEDWRITE now, then after we make any last-minute changes 1320 * we will write it. 1321 */ 1322 for (j = 0; j < bitmap->storage.file_pages; j++) 1323 if (test_and_clear_page_attr(bitmap, j, 1324 BITMAP_PAGE_PENDING)) 1325 set_page_attr(bitmap, j, 1326 BITMAP_PAGE_NEEDWRITE); 1327 1328 if (bitmap->need_sync && 1329 mddev->bitmap_info.external == 0) { 1330 /* Arrange for superblock update as well as 1331 * other changes */ 1332 bitmap_super_t *sb; 1333 bitmap->need_sync = 0; 1334 if (bitmap->storage.filemap) { 1335 sb = kmap_atomic(bitmap->storage.sb_page); 1336 sb->events_cleared = 1337 cpu_to_le64(bitmap->events_cleared); 1338 kunmap_atomic(sb); 1339 set_page_attr(bitmap, 0, 1340 BITMAP_PAGE_NEEDWRITE); 1341 } 1342 } 1343 /* Now look at the bitmap counters and if any are '2' or '1', 1344 * decrement and handle accordingly. 1345 */ 1346 counts = &bitmap->counts; 1347 spin_lock_irq(&counts->lock); 1348 nextpage = 0; 1349 for (j = 0; j < counts->chunks; j++) { 1350 bitmap_counter_t *bmc; 1351 sector_t block = (sector_t)j << counts->chunkshift; 1352 1353 if (j == nextpage) { 1354 nextpage += PAGE_COUNTER_RATIO; 1355 if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) { 1356 j |= PAGE_COUNTER_MASK; 1357 continue; 1358 } 1359 counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0; 1360 } 1361 1362 bmc = md_bitmap_get_counter(counts, block, &blocks, 0); 1363 if (!bmc) { 1364 j |= PAGE_COUNTER_MASK; 1365 continue; 1366 } 1367 if (*bmc == 1 && !bitmap->need_sync) { 1368 /* We can clear the bit */ 1369 *bmc = 0; 1370 md_bitmap_count_page(counts, block, -1); 1371 md_bitmap_file_clear_bit(bitmap, block); 1372 } else if (*bmc && *bmc <= 2) { 1373 *bmc = 1; 1374 md_bitmap_set_pending(counts, block); 1375 bitmap->allclean = 0; 1376 } 1377 } 1378 spin_unlock_irq(&counts->lock); 1379 1380 md_bitmap_wait_writes(bitmap); 1381 /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. 1382 * DIRTY pages need to be written by bitmap_unplug so it can wait 1383 * for them. 1384 * If we find any DIRTY page we stop there and let bitmap_unplug 1385 * handle all the rest. This is important in the case where 1386 * the first blocking holds the superblock and it has been updated. 1387 * We mustn't write any other blocks before the superblock. 1388 */ 1389 for (j = 0; 1390 j < bitmap->storage.file_pages 1391 && !test_bit(BITMAP_STALE, &bitmap->flags); 1392 j++) { 1393 if (test_page_attr(bitmap, j, 1394 BITMAP_PAGE_DIRTY)) 1395 /* bitmap_unplug will handle the rest */ 1396 break; 1397 if (bitmap->storage.filemap && 1398 test_and_clear_page_attr(bitmap, j, 1399 BITMAP_PAGE_NEEDWRITE)) { 1400 write_page(bitmap, bitmap->storage.filemap[j], 0); 1401 } 1402 } 1403 1404 done: 1405 if (bitmap->allclean == 0) 1406 mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); 1407 mutex_unlock(&mddev->bitmap_info.mutex); 1408 } 1409 1410 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, 1411 sector_t offset, sector_t *blocks, 1412 int create) 1413 __releases(bitmap->lock) 1414 __acquires(bitmap->lock) 1415 { 1416 /* If 'create', we might release the lock and reclaim it. 1417 * The lock must have been taken with interrupts enabled. 1418 * If !create, we don't release the lock. 1419 */ 1420 sector_t chunk = offset >> bitmap->chunkshift; 1421 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1422 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; 1423 sector_t csize; 1424 int err; 1425 1426 if (page >= bitmap->pages) { 1427 /* 1428 * This can happen if bitmap_start_sync goes beyond 1429 * End-of-device while looking for a whole page or 1430 * user set a huge number to sysfs bitmap_set_bits. 1431 */ 1432 return NULL; 1433 } 1434 err = md_bitmap_checkpage(bitmap, page, create, 0); 1435 1436 if (bitmap->bp[page].hijacked || 1437 bitmap->bp[page].map == NULL) 1438 csize = ((sector_t)1) << (bitmap->chunkshift + 1439 PAGE_COUNTER_SHIFT); 1440 else 1441 csize = ((sector_t)1) << bitmap->chunkshift; 1442 *blocks = csize - (offset & (csize - 1)); 1443 1444 if (err < 0) 1445 return NULL; 1446 1447 /* now locked ... */ 1448 1449 if (bitmap->bp[page].hijacked) { /* hijacked pointer */ 1450 /* should we use the first or second counter field 1451 * of the hijacked pointer? */ 1452 int hi = (pageoff > PAGE_COUNTER_MASK); 1453 return &((bitmap_counter_t *) 1454 &bitmap->bp[page].map)[hi]; 1455 } else /* page is allocated */ 1456 return (bitmap_counter_t *) 1457 &(bitmap->bp[page].map[pageoff]); 1458 } 1459 1460 int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) 1461 { 1462 if (!bitmap) 1463 return 0; 1464 1465 if (behind) { 1466 int bw; 1467 atomic_inc(&bitmap->behind_writes); 1468 bw = atomic_read(&bitmap->behind_writes); 1469 if (bw > bitmap->behind_writes_used) 1470 bitmap->behind_writes_used = bw; 1471 1472 pr_debug("inc write-behind count %d/%lu\n", 1473 bw, bitmap->mddev->bitmap_info.max_write_behind); 1474 } 1475 1476 while (sectors) { 1477 sector_t blocks; 1478 bitmap_counter_t *bmc; 1479 1480 spin_lock_irq(&bitmap->counts.lock); 1481 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1); 1482 if (!bmc) { 1483 spin_unlock_irq(&bitmap->counts.lock); 1484 return 0; 1485 } 1486 1487 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) { 1488 DEFINE_WAIT(__wait); 1489 /* note that it is safe to do the prepare_to_wait 1490 * after the test as long as we do it before dropping 1491 * the spinlock. 1492 */ 1493 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1494 TASK_UNINTERRUPTIBLE); 1495 spin_unlock_irq(&bitmap->counts.lock); 1496 schedule(); 1497 finish_wait(&bitmap->overflow_wait, &__wait); 1498 continue; 1499 } 1500 1501 switch (*bmc) { 1502 case 0: 1503 md_bitmap_file_set_bit(bitmap, offset); 1504 md_bitmap_count_page(&bitmap->counts, offset, 1); 1505 fallthrough; 1506 case 1: 1507 *bmc = 2; 1508 } 1509 1510 (*bmc)++; 1511 1512 spin_unlock_irq(&bitmap->counts.lock); 1513 1514 offset += blocks; 1515 if (sectors > blocks) 1516 sectors -= blocks; 1517 else 1518 sectors = 0; 1519 } 1520 return 0; 1521 } 1522 EXPORT_SYMBOL(md_bitmap_startwrite); 1523 1524 void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset, 1525 unsigned long sectors, int success, int behind) 1526 { 1527 if (!bitmap) 1528 return; 1529 if (behind) { 1530 if (atomic_dec_and_test(&bitmap->behind_writes)) 1531 wake_up(&bitmap->behind_wait); 1532 pr_debug("dec write-behind count %d/%lu\n", 1533 atomic_read(&bitmap->behind_writes), 1534 bitmap->mddev->bitmap_info.max_write_behind); 1535 } 1536 1537 while (sectors) { 1538 sector_t blocks; 1539 unsigned long flags; 1540 bitmap_counter_t *bmc; 1541 1542 spin_lock_irqsave(&bitmap->counts.lock, flags); 1543 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0); 1544 if (!bmc) { 1545 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1546 return; 1547 } 1548 1549 if (success && !bitmap->mddev->degraded && 1550 bitmap->events_cleared < bitmap->mddev->events) { 1551 bitmap->events_cleared = bitmap->mddev->events; 1552 bitmap->need_sync = 1; 1553 sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); 1554 } 1555 1556 if (!success && !NEEDED(*bmc)) 1557 *bmc |= NEEDED_MASK; 1558 1559 if (COUNTER(*bmc) == COUNTER_MAX) 1560 wake_up(&bitmap->overflow_wait); 1561 1562 (*bmc)--; 1563 if (*bmc <= 2) { 1564 md_bitmap_set_pending(&bitmap->counts, offset); 1565 bitmap->allclean = 0; 1566 } 1567 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1568 offset += blocks; 1569 if (sectors > blocks) 1570 sectors -= blocks; 1571 else 1572 sectors = 0; 1573 } 1574 } 1575 EXPORT_SYMBOL(md_bitmap_endwrite); 1576 1577 static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, 1578 int degraded) 1579 { 1580 bitmap_counter_t *bmc; 1581 int rv; 1582 if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ 1583 *blocks = 1024; 1584 return 1; /* always resync if no bitmap */ 1585 } 1586 spin_lock_irq(&bitmap->counts.lock); 1587 bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1588 rv = 0; 1589 if (bmc) { 1590 /* locked */ 1591 if (RESYNC(*bmc)) 1592 rv = 1; 1593 else if (NEEDED(*bmc)) { 1594 rv = 1; 1595 if (!degraded) { /* don't set/clear bits if degraded */ 1596 *bmc |= RESYNC_MASK; 1597 *bmc &= ~NEEDED_MASK; 1598 } 1599 } 1600 } 1601 spin_unlock_irq(&bitmap->counts.lock); 1602 return rv; 1603 } 1604 1605 int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, 1606 int degraded) 1607 { 1608 /* bitmap_start_sync must always report on multiples of whole 1609 * pages, otherwise resync (which is very PAGE_SIZE based) will 1610 * get confused. 1611 * So call __bitmap_start_sync repeatedly (if needed) until 1612 * At least PAGE_SIZE>>9 blocks are covered. 1613 * Return the 'or' of the result. 1614 */ 1615 int rv = 0; 1616 sector_t blocks1; 1617 1618 *blocks = 0; 1619 while (*blocks < (PAGE_SIZE>>9)) { 1620 rv |= __bitmap_start_sync(bitmap, offset, 1621 &blocks1, degraded); 1622 offset += blocks1; 1623 *blocks += blocks1; 1624 } 1625 return rv; 1626 } 1627 EXPORT_SYMBOL(md_bitmap_start_sync); 1628 1629 void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted) 1630 { 1631 bitmap_counter_t *bmc; 1632 unsigned long flags; 1633 1634 if (bitmap == NULL) { 1635 *blocks = 1024; 1636 return; 1637 } 1638 spin_lock_irqsave(&bitmap->counts.lock, flags); 1639 bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1640 if (bmc == NULL) 1641 goto unlock; 1642 /* locked */ 1643 if (RESYNC(*bmc)) { 1644 *bmc &= ~RESYNC_MASK; 1645 1646 if (!NEEDED(*bmc) && aborted) 1647 *bmc |= NEEDED_MASK; 1648 else { 1649 if (*bmc <= 2) { 1650 md_bitmap_set_pending(&bitmap->counts, offset); 1651 bitmap->allclean = 0; 1652 } 1653 } 1654 } 1655 unlock: 1656 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1657 } 1658 EXPORT_SYMBOL(md_bitmap_end_sync); 1659 1660 void md_bitmap_close_sync(struct bitmap *bitmap) 1661 { 1662 /* Sync has finished, and any bitmap chunks that weren't synced 1663 * properly have been aborted. It remains to us to clear the 1664 * RESYNC bit wherever it is still on 1665 */ 1666 sector_t sector = 0; 1667 sector_t blocks; 1668 if (!bitmap) 1669 return; 1670 while (sector < bitmap->mddev->resync_max_sectors) { 1671 md_bitmap_end_sync(bitmap, sector, &blocks, 0); 1672 sector += blocks; 1673 } 1674 } 1675 EXPORT_SYMBOL(md_bitmap_close_sync); 1676 1677 void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) 1678 { 1679 sector_t s = 0; 1680 sector_t blocks; 1681 1682 if (!bitmap) 1683 return; 1684 if (sector == 0) { 1685 bitmap->last_end_sync = jiffies; 1686 return; 1687 } 1688 if (!force && time_before(jiffies, (bitmap->last_end_sync 1689 + bitmap->mddev->bitmap_info.daemon_sleep))) 1690 return; 1691 wait_event(bitmap->mddev->recovery_wait, 1692 atomic_read(&bitmap->mddev->recovery_active) == 0); 1693 1694 bitmap->mddev->curr_resync_completed = sector; 1695 set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags); 1696 sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); 1697 s = 0; 1698 while (s < sector && s < bitmap->mddev->resync_max_sectors) { 1699 md_bitmap_end_sync(bitmap, s, &blocks, 0); 1700 s += blocks; 1701 } 1702 bitmap->last_end_sync = jiffies; 1703 sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed); 1704 } 1705 EXPORT_SYMBOL(md_bitmap_cond_end_sync); 1706 1707 void md_bitmap_sync_with_cluster(struct mddev *mddev, 1708 sector_t old_lo, sector_t old_hi, 1709 sector_t new_lo, sector_t new_hi) 1710 { 1711 struct bitmap *bitmap = mddev->bitmap; 1712 sector_t sector, blocks = 0; 1713 1714 for (sector = old_lo; sector < new_lo; ) { 1715 md_bitmap_end_sync(bitmap, sector, &blocks, 0); 1716 sector += blocks; 1717 } 1718 WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n"); 1719 1720 for (sector = old_hi; sector < new_hi; ) { 1721 md_bitmap_start_sync(bitmap, sector, &blocks, 0); 1722 sector += blocks; 1723 } 1724 WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n"); 1725 } 1726 EXPORT_SYMBOL(md_bitmap_sync_with_cluster); 1727 1728 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) 1729 { 1730 /* For each chunk covered by any of these sectors, set the 1731 * counter to 2 and possibly set resync_needed. They should all 1732 * be 0 at this point 1733 */ 1734 1735 sector_t secs; 1736 bitmap_counter_t *bmc; 1737 spin_lock_irq(&bitmap->counts.lock); 1738 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1); 1739 if (!bmc) { 1740 spin_unlock_irq(&bitmap->counts.lock); 1741 return; 1742 } 1743 if (!*bmc) { 1744 *bmc = 2; 1745 md_bitmap_count_page(&bitmap->counts, offset, 1); 1746 md_bitmap_set_pending(&bitmap->counts, offset); 1747 bitmap->allclean = 0; 1748 } 1749 if (needed) 1750 *bmc |= NEEDED_MASK; 1751 spin_unlock_irq(&bitmap->counts.lock); 1752 } 1753 1754 /* dirty the memory and file bits for bitmap chunks "s" to "e" */ 1755 void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) 1756 { 1757 unsigned long chunk; 1758 1759 for (chunk = s; chunk <= e; chunk++) { 1760 sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift; 1761 md_bitmap_set_memory_bits(bitmap, sec, 1); 1762 md_bitmap_file_set_bit(bitmap, sec); 1763 if (sec < bitmap->mddev->recovery_cp) 1764 /* We are asserting that the array is dirty, 1765 * so move the recovery_cp address back so 1766 * that it is obvious that it is dirty 1767 */ 1768 bitmap->mddev->recovery_cp = sec; 1769 } 1770 } 1771 1772 /* 1773 * flush out any pending updates 1774 */ 1775 void md_bitmap_flush(struct mddev *mddev) 1776 { 1777 struct bitmap *bitmap = mddev->bitmap; 1778 long sleep; 1779 1780 if (!bitmap) /* there was no bitmap */ 1781 return; 1782 1783 /* run the daemon_work three time to ensure everything is flushed 1784 * that can be 1785 */ 1786 sleep = mddev->bitmap_info.daemon_sleep * 2; 1787 bitmap->daemon_lastrun -= sleep; 1788 md_bitmap_daemon_work(mddev); 1789 bitmap->daemon_lastrun -= sleep; 1790 md_bitmap_daemon_work(mddev); 1791 bitmap->daemon_lastrun -= sleep; 1792 md_bitmap_daemon_work(mddev); 1793 if (mddev->bitmap_info.external) 1794 md_super_wait(mddev); 1795 md_bitmap_update_sb(bitmap); 1796 } 1797 1798 /* 1799 * free memory that was allocated 1800 */ 1801 void md_bitmap_free(struct bitmap *bitmap) 1802 { 1803 unsigned long k, pages; 1804 struct bitmap_page *bp; 1805 1806 if (!bitmap) /* there was no bitmap */ 1807 return; 1808 1809 if (bitmap->sysfs_can_clear) 1810 sysfs_put(bitmap->sysfs_can_clear); 1811 1812 if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info && 1813 bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev)) 1814 md_cluster_stop(bitmap->mddev); 1815 1816 /* Shouldn't be needed - but just in case.... */ 1817 wait_event(bitmap->write_wait, 1818 atomic_read(&bitmap->pending_writes) == 0); 1819 1820 /* release the bitmap file */ 1821 md_bitmap_file_unmap(&bitmap->storage); 1822 1823 bp = bitmap->counts.bp; 1824 pages = bitmap->counts.pages; 1825 1826 /* free all allocated memory */ 1827 1828 if (bp) /* deallocate the page memory */ 1829 for (k = 0; k < pages; k++) 1830 if (bp[k].map && !bp[k].hijacked) 1831 kfree(bp[k].map); 1832 kfree(bp); 1833 kfree(bitmap); 1834 } 1835 EXPORT_SYMBOL(md_bitmap_free); 1836 1837 void md_bitmap_wait_behind_writes(struct mddev *mddev) 1838 { 1839 struct bitmap *bitmap = mddev->bitmap; 1840 1841 /* wait for behind writes to complete */ 1842 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { 1843 pr_debug("md:%s: behind writes in progress - waiting to stop.\n", 1844 mdname(mddev)); 1845 /* need to kick something here to make sure I/O goes? */ 1846 wait_event(bitmap->behind_wait, 1847 atomic_read(&bitmap->behind_writes) == 0); 1848 } 1849 } 1850 1851 void md_bitmap_destroy(struct mddev *mddev) 1852 { 1853 struct bitmap *bitmap = mddev->bitmap; 1854 1855 if (!bitmap) /* there was no bitmap */ 1856 return; 1857 1858 md_bitmap_wait_behind_writes(mddev); 1859 if (!mddev->serialize_policy) 1860 mddev_destroy_serial_pool(mddev, NULL, true); 1861 1862 mutex_lock(&mddev->bitmap_info.mutex); 1863 spin_lock(&mddev->lock); 1864 mddev->bitmap = NULL; /* disconnect from the md device */ 1865 spin_unlock(&mddev->lock); 1866 mutex_unlock(&mddev->bitmap_info.mutex); 1867 mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); 1868 1869 md_bitmap_free(bitmap); 1870 } 1871 1872 /* 1873 * initialize the bitmap structure 1874 * if this returns an error, bitmap_destroy must be called to do clean up 1875 * once mddev->bitmap is set 1876 */ 1877 struct bitmap *md_bitmap_create(struct mddev *mddev, int slot) 1878 { 1879 struct bitmap *bitmap; 1880 sector_t blocks = mddev->resync_max_sectors; 1881 struct file *file = mddev->bitmap_info.file; 1882 int err; 1883 struct kernfs_node *bm = NULL; 1884 1885 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 1886 1887 BUG_ON(file && mddev->bitmap_info.offset); 1888 1889 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 1890 pr_notice("md/raid:%s: array with journal cannot have bitmap\n", 1891 mdname(mddev)); 1892 return ERR_PTR(-EBUSY); 1893 } 1894 1895 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); 1896 if (!bitmap) 1897 return ERR_PTR(-ENOMEM); 1898 1899 spin_lock_init(&bitmap->counts.lock); 1900 atomic_set(&bitmap->pending_writes, 0); 1901 init_waitqueue_head(&bitmap->write_wait); 1902 init_waitqueue_head(&bitmap->overflow_wait); 1903 init_waitqueue_head(&bitmap->behind_wait); 1904 1905 bitmap->mddev = mddev; 1906 bitmap->cluster_slot = slot; 1907 1908 if (mddev->kobj.sd) 1909 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); 1910 if (bm) { 1911 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); 1912 sysfs_put(bm); 1913 } else 1914 bitmap->sysfs_can_clear = NULL; 1915 1916 bitmap->storage.file = file; 1917 if (file) { 1918 get_file(file); 1919 /* As future accesses to this file will use bmap, 1920 * and bypass the page cache, we must sync the file 1921 * first. 1922 */ 1923 vfs_fsync(file, 1); 1924 } 1925 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ 1926 if (!mddev->bitmap_info.external) { 1927 /* 1928 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is 1929 * instructing us to create a new on-disk bitmap instance. 1930 */ 1931 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags)) 1932 err = md_bitmap_new_disk_sb(bitmap); 1933 else 1934 err = md_bitmap_read_sb(bitmap); 1935 } else { 1936 err = 0; 1937 if (mddev->bitmap_info.chunksize == 0 || 1938 mddev->bitmap_info.daemon_sleep == 0) 1939 /* chunksize and time_base need to be 1940 * set first. */ 1941 err = -EINVAL; 1942 } 1943 if (err) 1944 goto error; 1945 1946 bitmap->daemon_lastrun = jiffies; 1947 err = md_bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1); 1948 if (err) 1949 goto error; 1950 1951 pr_debug("created bitmap (%lu pages) for device %s\n", 1952 bitmap->counts.pages, bmname(bitmap)); 1953 1954 err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; 1955 if (err) 1956 goto error; 1957 1958 return bitmap; 1959 error: 1960 md_bitmap_free(bitmap); 1961 return ERR_PTR(err); 1962 } 1963 1964 int md_bitmap_load(struct mddev *mddev) 1965 { 1966 int err = 0; 1967 sector_t start = 0; 1968 sector_t sector = 0; 1969 struct bitmap *bitmap = mddev->bitmap; 1970 struct md_rdev *rdev; 1971 1972 if (!bitmap) 1973 goto out; 1974 1975 rdev_for_each(rdev, mddev) 1976 mddev_create_serial_pool(mddev, rdev, true); 1977 1978 if (mddev_is_clustered(mddev)) 1979 md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes); 1980 1981 /* Clear out old bitmap info first: Either there is none, or we 1982 * are resuming after someone else has possibly changed things, 1983 * so we should forget old cached info. 1984 * All chunks should be clean, but some might need_sync. 1985 */ 1986 while (sector < mddev->resync_max_sectors) { 1987 sector_t blocks; 1988 md_bitmap_start_sync(bitmap, sector, &blocks, 0); 1989 sector += blocks; 1990 } 1991 md_bitmap_close_sync(bitmap); 1992 1993 if (mddev->degraded == 0 1994 || bitmap->events_cleared == mddev->events) 1995 /* no need to keep dirty bits to optimise a 1996 * re-add of a missing device */ 1997 start = mddev->recovery_cp; 1998 1999 mutex_lock(&mddev->bitmap_info.mutex); 2000 err = md_bitmap_init_from_disk(bitmap, start); 2001 mutex_unlock(&mddev->bitmap_info.mutex); 2002 2003 if (err) 2004 goto out; 2005 clear_bit(BITMAP_STALE, &bitmap->flags); 2006 2007 /* Kick recovery in case any bits were set */ 2008 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); 2009 2010 mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); 2011 md_wakeup_thread(mddev->thread); 2012 2013 md_bitmap_update_sb(bitmap); 2014 2015 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 2016 err = -EIO; 2017 out: 2018 return err; 2019 } 2020 EXPORT_SYMBOL_GPL(md_bitmap_load); 2021 2022 /* caller need to free returned bitmap with md_bitmap_free() */ 2023 struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot) 2024 { 2025 int rv = 0; 2026 struct bitmap *bitmap; 2027 2028 bitmap = md_bitmap_create(mddev, slot); 2029 if (IS_ERR(bitmap)) { 2030 rv = PTR_ERR(bitmap); 2031 return ERR_PTR(rv); 2032 } 2033 2034 rv = md_bitmap_init_from_disk(bitmap, 0); 2035 if (rv) { 2036 md_bitmap_free(bitmap); 2037 return ERR_PTR(rv); 2038 } 2039 2040 return bitmap; 2041 } 2042 EXPORT_SYMBOL(get_bitmap_from_slot); 2043 2044 /* Loads the bitmap associated with slot and copies the resync information 2045 * to our bitmap 2046 */ 2047 int md_bitmap_copy_from_slot(struct mddev *mddev, int slot, 2048 sector_t *low, sector_t *high, bool clear_bits) 2049 { 2050 int rv = 0, i, j; 2051 sector_t block, lo = 0, hi = 0; 2052 struct bitmap_counts *counts; 2053 struct bitmap *bitmap; 2054 2055 bitmap = get_bitmap_from_slot(mddev, slot); 2056 if (IS_ERR(bitmap)) { 2057 pr_err("%s can't get bitmap from slot %d\n", __func__, slot); 2058 return -1; 2059 } 2060 2061 counts = &bitmap->counts; 2062 for (j = 0; j < counts->chunks; j++) { 2063 block = (sector_t)j << counts->chunkshift; 2064 if (md_bitmap_file_test_bit(bitmap, block)) { 2065 if (!lo) 2066 lo = block; 2067 hi = block; 2068 md_bitmap_file_clear_bit(bitmap, block); 2069 md_bitmap_set_memory_bits(mddev->bitmap, block, 1); 2070 md_bitmap_file_set_bit(mddev->bitmap, block); 2071 } 2072 } 2073 2074 if (clear_bits) { 2075 md_bitmap_update_sb(bitmap); 2076 /* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs 2077 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */ 2078 for (i = 0; i < bitmap->storage.file_pages; i++) 2079 if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING)) 2080 set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); 2081 md_bitmap_unplug(bitmap); 2082 } 2083 md_bitmap_unplug(mddev->bitmap); 2084 *low = lo; 2085 *high = hi; 2086 md_bitmap_free(bitmap); 2087 2088 return rv; 2089 } 2090 EXPORT_SYMBOL_GPL(md_bitmap_copy_from_slot); 2091 2092 2093 void md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap) 2094 { 2095 unsigned long chunk_kb; 2096 struct bitmap_counts *counts; 2097 2098 if (!bitmap) 2099 return; 2100 2101 counts = &bitmap->counts; 2102 2103 chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10; 2104 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " 2105 "%lu%s chunk", 2106 counts->pages - counts->missing_pages, 2107 counts->pages, 2108 (counts->pages - counts->missing_pages) 2109 << (PAGE_SHIFT - 10), 2110 chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize, 2111 chunk_kb ? "KB" : "B"); 2112 if (bitmap->storage.file) { 2113 seq_printf(seq, ", file: "); 2114 seq_file_path(seq, bitmap->storage.file, " \t\n"); 2115 } 2116 2117 seq_printf(seq, "\n"); 2118 } 2119 2120 int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, 2121 int chunksize, int init) 2122 { 2123 /* If chunk_size is 0, choose an appropriate chunk size. 2124 * Then possibly allocate new storage space. 2125 * Then quiesce, copy bits, replace bitmap, and re-start 2126 * 2127 * This function is called both to set up the initial bitmap 2128 * and to resize the bitmap while the array is active. 2129 * If this happens as a result of the array being resized, 2130 * chunksize will be zero, and we need to choose a suitable 2131 * chunksize, otherwise we use what we are given. 2132 */ 2133 struct bitmap_storage store; 2134 struct bitmap_counts old_counts; 2135 unsigned long chunks; 2136 sector_t block; 2137 sector_t old_blocks, new_blocks; 2138 int chunkshift; 2139 int ret = 0; 2140 long pages; 2141 struct bitmap_page *new_bp; 2142 2143 if (bitmap->storage.file && !init) { 2144 pr_info("md: cannot resize file-based bitmap\n"); 2145 return -EINVAL; 2146 } 2147 2148 if (chunksize == 0) { 2149 /* If there is enough space, leave the chunk size unchanged, 2150 * else increase by factor of two until there is enough space. 2151 */ 2152 long bytes; 2153 long space = bitmap->mddev->bitmap_info.space; 2154 2155 if (space == 0) { 2156 /* We don't know how much space there is, so limit 2157 * to current size - in sectors. 2158 */ 2159 bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8); 2160 if (!bitmap->mddev->bitmap_info.external) 2161 bytes += sizeof(bitmap_super_t); 2162 space = DIV_ROUND_UP(bytes, 512); 2163 bitmap->mddev->bitmap_info.space = space; 2164 } 2165 chunkshift = bitmap->counts.chunkshift; 2166 chunkshift--; 2167 do { 2168 /* 'chunkshift' is shift from block size to chunk size */ 2169 chunkshift++; 2170 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2171 bytes = DIV_ROUND_UP(chunks, 8); 2172 if (!bitmap->mddev->bitmap_info.external) 2173 bytes += sizeof(bitmap_super_t); 2174 } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) < 2175 (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1)); 2176 } else 2177 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; 2178 2179 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2180 memset(&store, 0, sizeof(store)); 2181 if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) 2182 ret = md_bitmap_storage_alloc(&store, chunks, 2183 !bitmap->mddev->bitmap_info.external, 2184 mddev_is_clustered(bitmap->mddev) 2185 ? bitmap->cluster_slot : 0); 2186 if (ret) { 2187 md_bitmap_file_unmap(&store); 2188 goto err; 2189 } 2190 2191 pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO); 2192 2193 new_bp = kcalloc(pages, sizeof(*new_bp), GFP_KERNEL); 2194 ret = -ENOMEM; 2195 if (!new_bp) { 2196 md_bitmap_file_unmap(&store); 2197 goto err; 2198 } 2199 2200 if (!init) 2201 bitmap->mddev->pers->quiesce(bitmap->mddev, 1); 2202 2203 store.file = bitmap->storage.file; 2204 bitmap->storage.file = NULL; 2205 2206 if (store.sb_page && bitmap->storage.sb_page) 2207 memcpy(page_address(store.sb_page), 2208 page_address(bitmap->storage.sb_page), 2209 sizeof(bitmap_super_t)); 2210 spin_lock_irq(&bitmap->counts.lock); 2211 md_bitmap_file_unmap(&bitmap->storage); 2212 bitmap->storage = store; 2213 2214 old_counts = bitmap->counts; 2215 bitmap->counts.bp = new_bp; 2216 bitmap->counts.pages = pages; 2217 bitmap->counts.missing_pages = pages; 2218 bitmap->counts.chunkshift = chunkshift; 2219 bitmap->counts.chunks = chunks; 2220 bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift + 2221 BITMAP_BLOCK_SHIFT); 2222 2223 blocks = min(old_counts.chunks << old_counts.chunkshift, 2224 chunks << chunkshift); 2225 2226 /* For cluster raid, need to pre-allocate bitmap */ 2227 if (mddev_is_clustered(bitmap->mddev)) { 2228 unsigned long page; 2229 for (page = 0; page < pages; page++) { 2230 ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1); 2231 if (ret) { 2232 unsigned long k; 2233 2234 /* deallocate the page memory */ 2235 for (k = 0; k < page; k++) { 2236 kfree(new_bp[k].map); 2237 } 2238 kfree(new_bp); 2239 2240 /* restore some fields from old_counts */ 2241 bitmap->counts.bp = old_counts.bp; 2242 bitmap->counts.pages = old_counts.pages; 2243 bitmap->counts.missing_pages = old_counts.pages; 2244 bitmap->counts.chunkshift = old_counts.chunkshift; 2245 bitmap->counts.chunks = old_counts.chunks; 2246 bitmap->mddev->bitmap_info.chunksize = 2247 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); 2248 blocks = old_counts.chunks << old_counts.chunkshift; 2249 pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); 2250 break; 2251 } else 2252 bitmap->counts.bp[page].count += 1; 2253 } 2254 } 2255 2256 for (block = 0; block < blocks; ) { 2257 bitmap_counter_t *bmc_old, *bmc_new; 2258 int set; 2259 2260 bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0); 2261 set = bmc_old && NEEDED(*bmc_old); 2262 2263 if (set) { 2264 bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); 2265 if (bmc_new) { 2266 if (*bmc_new == 0) { 2267 /* need to set on-disk bits too. */ 2268 sector_t end = block + new_blocks; 2269 sector_t start = block >> chunkshift; 2270 2271 start <<= chunkshift; 2272 while (start < end) { 2273 md_bitmap_file_set_bit(bitmap, block); 2274 start += 1 << chunkshift; 2275 } 2276 *bmc_new = 2; 2277 md_bitmap_count_page(&bitmap->counts, block, 1); 2278 md_bitmap_set_pending(&bitmap->counts, block); 2279 } 2280 *bmc_new |= NEEDED_MASK; 2281 } 2282 if (new_blocks < old_blocks) 2283 old_blocks = new_blocks; 2284 } 2285 block += old_blocks; 2286 } 2287 2288 if (bitmap->counts.bp != old_counts.bp) { 2289 unsigned long k; 2290 for (k = 0; k < old_counts.pages; k++) 2291 if (!old_counts.bp[k].hijacked) 2292 kfree(old_counts.bp[k].map); 2293 kfree(old_counts.bp); 2294 } 2295 2296 if (!init) { 2297 int i; 2298 while (block < (chunks << chunkshift)) { 2299 bitmap_counter_t *bmc; 2300 bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); 2301 if (bmc) { 2302 /* new space. It needs to be resynced, so 2303 * we set NEEDED_MASK. 2304 */ 2305 if (*bmc == 0) { 2306 *bmc = NEEDED_MASK | 2; 2307 md_bitmap_count_page(&bitmap->counts, block, 1); 2308 md_bitmap_set_pending(&bitmap->counts, block); 2309 } 2310 } 2311 block += new_blocks; 2312 } 2313 for (i = 0; i < bitmap->storage.file_pages; i++) 2314 set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 2315 } 2316 spin_unlock_irq(&bitmap->counts.lock); 2317 2318 if (!init) { 2319 md_bitmap_unplug(bitmap); 2320 bitmap->mddev->pers->quiesce(bitmap->mddev, 0); 2321 } 2322 ret = 0; 2323 err: 2324 return ret; 2325 } 2326 EXPORT_SYMBOL_GPL(md_bitmap_resize); 2327 2328 static ssize_t 2329 location_show(struct mddev *mddev, char *page) 2330 { 2331 ssize_t len; 2332 if (mddev->bitmap_info.file) 2333 len = sprintf(page, "file"); 2334 else if (mddev->bitmap_info.offset) 2335 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); 2336 else 2337 len = sprintf(page, "none"); 2338 len += sprintf(page+len, "\n"); 2339 return len; 2340 } 2341 2342 static ssize_t 2343 location_store(struct mddev *mddev, const char *buf, size_t len) 2344 { 2345 int rv; 2346 2347 rv = mddev_lock(mddev); 2348 if (rv) 2349 return rv; 2350 if (mddev->pers) { 2351 if (!mddev->pers->quiesce) { 2352 rv = -EBUSY; 2353 goto out; 2354 } 2355 if (mddev->recovery || mddev->sync_thread) { 2356 rv = -EBUSY; 2357 goto out; 2358 } 2359 } 2360 2361 if (mddev->bitmap || mddev->bitmap_info.file || 2362 mddev->bitmap_info.offset) { 2363 /* bitmap already configured. Only option is to clear it */ 2364 if (strncmp(buf, "none", 4) != 0) { 2365 rv = -EBUSY; 2366 goto out; 2367 } 2368 if (mddev->pers) { 2369 mddev_suspend(mddev); 2370 md_bitmap_destroy(mddev); 2371 mddev_resume(mddev); 2372 } 2373 mddev->bitmap_info.offset = 0; 2374 if (mddev->bitmap_info.file) { 2375 struct file *f = mddev->bitmap_info.file; 2376 mddev->bitmap_info.file = NULL; 2377 fput(f); 2378 } 2379 } else { 2380 /* No bitmap, OK to set a location */ 2381 long long offset; 2382 if (strncmp(buf, "none", 4) == 0) 2383 /* nothing to be done */; 2384 else if (strncmp(buf, "file:", 5) == 0) { 2385 /* Not supported yet */ 2386 rv = -EINVAL; 2387 goto out; 2388 } else { 2389 if (buf[0] == '+') 2390 rv = kstrtoll(buf+1, 10, &offset); 2391 else 2392 rv = kstrtoll(buf, 10, &offset); 2393 if (rv) 2394 goto out; 2395 if (offset == 0) { 2396 rv = -EINVAL; 2397 goto out; 2398 } 2399 if (mddev->bitmap_info.external == 0 && 2400 mddev->major_version == 0 && 2401 offset != mddev->bitmap_info.default_offset) { 2402 rv = -EINVAL; 2403 goto out; 2404 } 2405 mddev->bitmap_info.offset = offset; 2406 if (mddev->pers) { 2407 struct bitmap *bitmap; 2408 bitmap = md_bitmap_create(mddev, -1); 2409 mddev_suspend(mddev); 2410 if (IS_ERR(bitmap)) 2411 rv = PTR_ERR(bitmap); 2412 else { 2413 mddev->bitmap = bitmap; 2414 rv = md_bitmap_load(mddev); 2415 if (rv) 2416 mddev->bitmap_info.offset = 0; 2417 } 2418 if (rv) { 2419 md_bitmap_destroy(mddev); 2420 mddev_resume(mddev); 2421 goto out; 2422 } 2423 mddev_resume(mddev); 2424 } 2425 } 2426 } 2427 if (!mddev->external) { 2428 /* Ensure new bitmap info is stored in 2429 * metadata promptly. 2430 */ 2431 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 2432 md_wakeup_thread(mddev->thread); 2433 } 2434 rv = 0; 2435 out: 2436 mddev_unlock(mddev); 2437 if (rv) 2438 return rv; 2439 return len; 2440 } 2441 2442 static struct md_sysfs_entry bitmap_location = 2443 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store); 2444 2445 /* 'bitmap/space' is the space available at 'location' for the 2446 * bitmap. This allows the kernel to know when it is safe to 2447 * resize the bitmap to match a resized array. 2448 */ 2449 static ssize_t 2450 space_show(struct mddev *mddev, char *page) 2451 { 2452 return sprintf(page, "%lu\n", mddev->bitmap_info.space); 2453 } 2454 2455 static ssize_t 2456 space_store(struct mddev *mddev, const char *buf, size_t len) 2457 { 2458 unsigned long sectors; 2459 int rv; 2460 2461 rv = kstrtoul(buf, 10, §ors); 2462 if (rv) 2463 return rv; 2464 2465 if (sectors == 0) 2466 return -EINVAL; 2467 2468 if (mddev->bitmap && 2469 sectors < (mddev->bitmap->storage.bytes + 511) >> 9) 2470 return -EFBIG; /* Bitmap is too big for this small space */ 2471 2472 /* could make sure it isn't too big, but that isn't really 2473 * needed - user-space should be careful. 2474 */ 2475 mddev->bitmap_info.space = sectors; 2476 return len; 2477 } 2478 2479 static struct md_sysfs_entry bitmap_space = 2480 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store); 2481 2482 static ssize_t 2483 timeout_show(struct mddev *mddev, char *page) 2484 { 2485 ssize_t len; 2486 unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; 2487 unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; 2488 2489 len = sprintf(page, "%lu", secs); 2490 if (jifs) 2491 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); 2492 len += sprintf(page+len, "\n"); 2493 return len; 2494 } 2495 2496 static ssize_t 2497 timeout_store(struct mddev *mddev, const char *buf, size_t len) 2498 { 2499 /* timeout can be set at any time */ 2500 unsigned long timeout; 2501 int rv = strict_strtoul_scaled(buf, &timeout, 4); 2502 if (rv) 2503 return rv; 2504 2505 /* just to make sure we don't overflow... */ 2506 if (timeout >= LONG_MAX / HZ) 2507 return -EINVAL; 2508 2509 timeout = timeout * HZ / 10000; 2510 2511 if (timeout >= MAX_SCHEDULE_TIMEOUT) 2512 timeout = MAX_SCHEDULE_TIMEOUT-1; 2513 if (timeout < 1) 2514 timeout = 1; 2515 2516 mddev->bitmap_info.daemon_sleep = timeout; 2517 mddev_set_timeout(mddev, timeout, false); 2518 md_wakeup_thread(mddev->thread); 2519 2520 return len; 2521 } 2522 2523 static struct md_sysfs_entry bitmap_timeout = 2524 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store); 2525 2526 static ssize_t 2527 backlog_show(struct mddev *mddev, char *page) 2528 { 2529 return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind); 2530 } 2531 2532 static ssize_t 2533 backlog_store(struct mddev *mddev, const char *buf, size_t len) 2534 { 2535 unsigned long backlog; 2536 unsigned long old_mwb = mddev->bitmap_info.max_write_behind; 2537 struct md_rdev *rdev; 2538 bool has_write_mostly = false; 2539 int rv = kstrtoul(buf, 10, &backlog); 2540 if (rv) 2541 return rv; 2542 if (backlog > COUNTER_MAX) 2543 return -EINVAL; 2544 2545 /* 2546 * Without write mostly device, it doesn't make sense to set 2547 * backlog for max_write_behind. 2548 */ 2549 rdev_for_each(rdev, mddev) { 2550 if (test_bit(WriteMostly, &rdev->flags)) { 2551 has_write_mostly = true; 2552 break; 2553 } 2554 } 2555 if (!has_write_mostly) { 2556 pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n", 2557 mdname(mddev)); 2558 return -EINVAL; 2559 } 2560 2561 mddev->bitmap_info.max_write_behind = backlog; 2562 if (!backlog && mddev->serial_info_pool) { 2563 /* serial_info_pool is not needed if backlog is zero */ 2564 if (!mddev->serialize_policy) 2565 mddev_destroy_serial_pool(mddev, NULL, false); 2566 } else if (backlog && !mddev->serial_info_pool) { 2567 /* serial_info_pool is needed since backlog is not zero */ 2568 struct md_rdev *rdev; 2569 2570 rdev_for_each(rdev, mddev) 2571 mddev_create_serial_pool(mddev, rdev, false); 2572 } 2573 if (old_mwb != backlog) 2574 md_bitmap_update_sb(mddev->bitmap); 2575 return len; 2576 } 2577 2578 static struct md_sysfs_entry bitmap_backlog = 2579 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store); 2580 2581 static ssize_t 2582 chunksize_show(struct mddev *mddev, char *page) 2583 { 2584 return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize); 2585 } 2586 2587 static ssize_t 2588 chunksize_store(struct mddev *mddev, const char *buf, size_t len) 2589 { 2590 /* Can only be changed when no bitmap is active */ 2591 int rv; 2592 unsigned long csize; 2593 if (mddev->bitmap) 2594 return -EBUSY; 2595 rv = kstrtoul(buf, 10, &csize); 2596 if (rv) 2597 return rv; 2598 if (csize < 512 || 2599 !is_power_of_2(csize)) 2600 return -EINVAL; 2601 if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE * 2602 sizeof(((bitmap_super_t *)0)->chunksize)))) 2603 return -EOVERFLOW; 2604 mddev->bitmap_info.chunksize = csize; 2605 return len; 2606 } 2607 2608 static struct md_sysfs_entry bitmap_chunksize = 2609 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); 2610 2611 static ssize_t metadata_show(struct mddev *mddev, char *page) 2612 { 2613 if (mddev_is_clustered(mddev)) 2614 return sprintf(page, "clustered\n"); 2615 return sprintf(page, "%s\n", (mddev->bitmap_info.external 2616 ? "external" : "internal")); 2617 } 2618 2619 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len) 2620 { 2621 if (mddev->bitmap || 2622 mddev->bitmap_info.file || 2623 mddev->bitmap_info.offset) 2624 return -EBUSY; 2625 if (strncmp(buf, "external", 8) == 0) 2626 mddev->bitmap_info.external = 1; 2627 else if ((strncmp(buf, "internal", 8) == 0) || 2628 (strncmp(buf, "clustered", 9) == 0)) 2629 mddev->bitmap_info.external = 0; 2630 else 2631 return -EINVAL; 2632 return len; 2633 } 2634 2635 static struct md_sysfs_entry bitmap_metadata = 2636 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); 2637 2638 static ssize_t can_clear_show(struct mddev *mddev, char *page) 2639 { 2640 int len; 2641 spin_lock(&mddev->lock); 2642 if (mddev->bitmap) 2643 len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ? 2644 "false" : "true")); 2645 else 2646 len = sprintf(page, "\n"); 2647 spin_unlock(&mddev->lock); 2648 return len; 2649 } 2650 2651 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len) 2652 { 2653 if (mddev->bitmap == NULL) 2654 return -ENOENT; 2655 if (strncmp(buf, "false", 5) == 0) 2656 mddev->bitmap->need_sync = 1; 2657 else if (strncmp(buf, "true", 4) == 0) { 2658 if (mddev->degraded) 2659 return -EBUSY; 2660 mddev->bitmap->need_sync = 0; 2661 } else 2662 return -EINVAL; 2663 return len; 2664 } 2665 2666 static struct md_sysfs_entry bitmap_can_clear = 2667 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store); 2668 2669 static ssize_t 2670 behind_writes_used_show(struct mddev *mddev, char *page) 2671 { 2672 ssize_t ret; 2673 spin_lock(&mddev->lock); 2674 if (mddev->bitmap == NULL) 2675 ret = sprintf(page, "0\n"); 2676 else 2677 ret = sprintf(page, "%lu\n", 2678 mddev->bitmap->behind_writes_used); 2679 spin_unlock(&mddev->lock); 2680 return ret; 2681 } 2682 2683 static ssize_t 2684 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len) 2685 { 2686 if (mddev->bitmap) 2687 mddev->bitmap->behind_writes_used = 0; 2688 return len; 2689 } 2690 2691 static struct md_sysfs_entry max_backlog_used = 2692 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR, 2693 behind_writes_used_show, behind_writes_used_reset); 2694 2695 static struct attribute *md_bitmap_attrs[] = { 2696 &bitmap_location.attr, 2697 &bitmap_space.attr, 2698 &bitmap_timeout.attr, 2699 &bitmap_backlog.attr, 2700 &bitmap_chunksize.attr, 2701 &bitmap_metadata.attr, 2702 &bitmap_can_clear.attr, 2703 &max_backlog_used.attr, 2704 NULL 2705 }; 2706 const struct attribute_group md_bitmap_group = { 2707 .name = "bitmap", 2708 .attrs = md_bitmap_attrs, 2709 }; 2710