1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 4 * 5 * bitmap_create - sets up the bitmap structure 6 * bitmap_destroy - destroys the bitmap structure 7 * 8 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: 9 * - added disk storage for bitmap 10 * - changes to allow various bitmap chunk sizes 11 */ 12 13 /* 14 * Still to do: 15 * 16 * flush after percent set rather than just time based. (maybe both). 17 */ 18 19 #include <linux/blkdev.h> 20 #include <linux/module.h> 21 #include <linux/errno.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/timer.h> 25 #include <linux/sched.h> 26 #include <linux/list.h> 27 #include <linux/file.h> 28 #include <linux/mount.h> 29 #include <linux/buffer_head.h> 30 #include <linux/seq_file.h> 31 #include <trace/events/block.h> 32 #include "md.h" 33 #include "md-bitmap.h" 34 35 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, 36 int chunksize, bool init); 37 38 static inline char *bmname(struct bitmap *bitmap) 39 { 40 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; 41 } 42 43 /* 44 * check a page and, if necessary, allocate it (or hijack it if the alloc fails) 45 * 46 * 1) check to see if this page is allocated, if it's not then try to alloc 47 * 2) if the alloc fails, set the page's hijacked flag so we'll use the 48 * page pointer directly as a counter 49 * 50 * if we find our page, we increment the page's refcount so that it stays 51 * allocated while we're using it 52 */ 53 static int md_bitmap_checkpage(struct bitmap_counts *bitmap, 54 unsigned long page, int create, int no_hijack) 55 __releases(bitmap->lock) 56 __acquires(bitmap->lock) 57 { 58 unsigned char *mappage; 59 60 WARN_ON_ONCE(page >= bitmap->pages); 61 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ 62 return 0; 63 64 if (bitmap->bp[page].map) /* page is already allocated, just return */ 65 return 0; 66 67 if (!create) 68 return -ENOENT; 69 70 /* this page has not been allocated yet */ 71 72 spin_unlock_irq(&bitmap->lock); 73 /* It is possible that this is being called inside a 74 * prepare_to_wait/finish_wait loop from raid5c:make_request(). 75 * In general it is not permitted to sleep in that context as it 76 * can cause the loop to spin freely. 77 * That doesn't apply here as we can only reach this point 78 * once with any loop. 79 * When this function completes, either bp[page].map or 80 * bp[page].hijacked. In either case, this function will 81 * abort before getting to this point again. So there is 82 * no risk of a free-spin, and so it is safe to assert 83 * that sleeping here is allowed. 84 */ 85 sched_annotate_sleep(); 86 mappage = kzalloc(PAGE_SIZE, GFP_NOIO); 87 spin_lock_irq(&bitmap->lock); 88 89 if (mappage == NULL) { 90 pr_debug("md/bitmap: map page allocation failed, hijacking\n"); 91 /* We don't support hijack for cluster raid */ 92 if (no_hijack) 93 return -ENOMEM; 94 /* failed - set the hijacked flag so that we can use the 95 * pointer as a counter */ 96 if (!bitmap->bp[page].map) 97 bitmap->bp[page].hijacked = 1; 98 } else if (bitmap->bp[page].map || 99 bitmap->bp[page].hijacked) { 100 /* somebody beat us to getting the page */ 101 kfree(mappage); 102 } else { 103 104 /* no page was in place and we have one, so install it */ 105 106 bitmap->bp[page].map = mappage; 107 bitmap->missing_pages--; 108 } 109 return 0; 110 } 111 112 /* if page is completely empty, put it back on the free list, or dealloc it */ 113 /* if page was hijacked, unmark the flag so it might get alloced next time */ 114 /* Note: lock should be held when calling this */ 115 static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page) 116 { 117 char *ptr; 118 119 if (bitmap->bp[page].count) /* page is still busy */ 120 return; 121 122 /* page is no longer in use, it can be released */ 123 124 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ 125 bitmap->bp[page].hijacked = 0; 126 bitmap->bp[page].map = NULL; 127 } else { 128 /* normal case, free the page */ 129 ptr = bitmap->bp[page].map; 130 bitmap->bp[page].map = NULL; 131 bitmap->missing_pages++; 132 kfree(ptr); 133 } 134 } 135 136 /* 137 * bitmap file handling - read and write the bitmap file and its superblock 138 */ 139 140 /* 141 * basic page I/O operations 142 */ 143 144 /* IO operations when bitmap is stored near all superblocks */ 145 146 /* choose a good rdev and read the page from there */ 147 static int read_sb_page(struct mddev *mddev, loff_t offset, 148 struct page *page, unsigned long index, int size) 149 { 150 151 sector_t sector = mddev->bitmap_info.offset + offset + 152 index * (PAGE_SIZE / SECTOR_SIZE); 153 struct md_rdev *rdev; 154 155 rdev_for_each(rdev, mddev) { 156 u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev)); 157 158 if (!test_bit(In_sync, &rdev->flags) || 159 test_bit(Faulty, &rdev->flags) || 160 test_bit(Bitmap_sync, &rdev->flags)) 161 continue; 162 163 if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true)) 164 return 0; 165 } 166 return -EIO; 167 } 168 169 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev) 170 { 171 /* Iterate the disks of an mddev, using rcu to protect access to the 172 * linked list, and raising the refcount of devices we return to ensure 173 * they don't disappear while in use. 174 * As devices are only added or removed when raid_disk is < 0 and 175 * nr_pending is 0 and In_sync is clear, the entries we return will 176 * still be in the same position on the list when we re-enter 177 * list_for_each_entry_continue_rcu. 178 * 179 * Note that if entered with 'rdev == NULL' to start at the 180 * beginning, we temporarily assign 'rdev' to an address which 181 * isn't really an rdev, but which can be used by 182 * list_for_each_entry_continue_rcu() to find the first entry. 183 */ 184 rcu_read_lock(); 185 if (rdev == NULL) 186 /* start at the beginning */ 187 rdev = list_entry(&mddev->disks, struct md_rdev, same_set); 188 else { 189 /* release the previous rdev and start from there. */ 190 rdev_dec_pending(rdev, mddev); 191 } 192 list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) { 193 if (rdev->raid_disk >= 0 && 194 !test_bit(Faulty, &rdev->flags)) { 195 /* this is a usable devices */ 196 atomic_inc(&rdev->nr_pending); 197 rcu_read_unlock(); 198 return rdev; 199 } 200 } 201 rcu_read_unlock(); 202 return NULL; 203 } 204 205 static unsigned int optimal_io_size(struct block_device *bdev, 206 unsigned int last_page_size, 207 unsigned int io_size) 208 { 209 if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev)) 210 return roundup(last_page_size, bdev_io_opt(bdev)); 211 return io_size; 212 } 213 214 static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size, 215 loff_t start, loff_t boundary) 216 { 217 if (io_size != opt_size && 218 start + opt_size / SECTOR_SIZE <= boundary) 219 return opt_size; 220 if (start + io_size / SECTOR_SIZE <= boundary) 221 return io_size; 222 223 /* Overflows boundary */ 224 return 0; 225 } 226 227 static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, 228 unsigned long pg_index, struct page *page) 229 { 230 struct block_device *bdev; 231 struct mddev *mddev = bitmap->mddev; 232 struct bitmap_storage *store = &bitmap->storage; 233 unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) << 234 PAGE_SHIFT; 235 loff_t sboff, offset = mddev->bitmap_info.offset; 236 sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE; 237 unsigned int size = PAGE_SIZE; 238 unsigned int opt_size = PAGE_SIZE; 239 sector_t doff; 240 241 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; 242 /* we compare length (page numbers), not page offset. */ 243 if ((pg_index - store->sb_index) == store->file_pages - 1) { 244 unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1); 245 246 if (last_page_size == 0) 247 last_page_size = PAGE_SIZE; 248 size = roundup(last_page_size, bdev_logical_block_size(bdev)); 249 opt_size = optimal_io_size(bdev, last_page_size, size); 250 } 251 252 sboff = rdev->sb_start + offset; 253 doff = rdev->data_offset; 254 255 /* Just make sure we aren't corrupting data or metadata */ 256 if (mddev->external) { 257 /* Bitmap could be anywhere. */ 258 if (sboff + ps > doff && 259 sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE)) 260 return -EINVAL; 261 } else if (offset < 0) { 262 /* DATA BITMAP METADATA */ 263 size = bitmap_io_size(size, opt_size, offset + ps, 0); 264 if (size == 0) 265 /* bitmap runs in to metadata */ 266 return -EINVAL; 267 268 if (doff + mddev->dev_sectors > sboff) 269 /* data runs in to bitmap */ 270 return -EINVAL; 271 } else if (rdev->sb_start < rdev->data_offset) { 272 /* METADATA BITMAP DATA */ 273 size = bitmap_io_size(size, opt_size, sboff + ps, doff); 274 if (size == 0) 275 /* bitmap runs in to data */ 276 return -EINVAL; 277 } 278 279 md_super_write(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit), page); 280 return 0; 281 } 282 283 static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index, 284 struct page *page, bool wait) 285 { 286 struct mddev *mddev = bitmap->mddev; 287 288 do { 289 struct md_rdev *rdev = NULL; 290 291 while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { 292 if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) { 293 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 294 return; 295 } 296 } 297 } while (wait && md_super_wait(mddev) < 0); 298 } 299 300 static void md_bitmap_file_kick(struct bitmap *bitmap); 301 302 #ifdef CONFIG_MD_BITMAP_FILE 303 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) 304 { 305 struct buffer_head *bh = page_buffers(page); 306 307 while (bh && bh->b_blocknr) { 308 atomic_inc(&bitmap->pending_writes); 309 set_buffer_locked(bh); 310 set_buffer_mapped(bh); 311 submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); 312 bh = bh->b_this_page; 313 } 314 315 if (wait) 316 wait_event(bitmap->write_wait, 317 atomic_read(&bitmap->pending_writes) == 0); 318 } 319 320 static void end_bitmap_write(struct buffer_head *bh, int uptodate) 321 { 322 struct bitmap *bitmap = bh->b_private; 323 324 if (!uptodate) 325 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 326 if (atomic_dec_and_test(&bitmap->pending_writes)) 327 wake_up(&bitmap->write_wait); 328 } 329 330 static void free_buffers(struct page *page) 331 { 332 struct buffer_head *bh; 333 334 if (!PagePrivate(page)) 335 return; 336 337 bh = page_buffers(page); 338 while (bh) { 339 struct buffer_head *next = bh->b_this_page; 340 free_buffer_head(bh); 341 bh = next; 342 } 343 detach_page_private(page); 344 put_page(page); 345 } 346 347 /* read a page from a file. 348 * We both read the page, and attach buffers to the page to record the 349 * address of each block (using bmap). These addresses will be used 350 * to write the block later, completely bypassing the filesystem. 351 * This usage is similar to how swap files are handled, and allows us 352 * to write to a file with no concerns of memory allocation failing. 353 */ 354 static int read_file_page(struct file *file, unsigned long index, 355 struct bitmap *bitmap, unsigned long count, struct page *page) 356 { 357 int ret = 0; 358 struct inode *inode = file_inode(file); 359 struct buffer_head *bh; 360 sector_t block, blk_cur; 361 unsigned long blocksize = i_blocksize(inode); 362 363 pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, 364 (unsigned long long)index << PAGE_SHIFT); 365 366 bh = alloc_page_buffers(page, blocksize, false); 367 if (!bh) { 368 ret = -ENOMEM; 369 goto out; 370 } 371 attach_page_private(page, bh); 372 blk_cur = index << (PAGE_SHIFT - inode->i_blkbits); 373 while (bh) { 374 block = blk_cur; 375 376 if (count == 0) 377 bh->b_blocknr = 0; 378 else { 379 ret = bmap(inode, &block); 380 if (ret || !block) { 381 ret = -EINVAL; 382 bh->b_blocknr = 0; 383 goto out; 384 } 385 386 bh->b_blocknr = block; 387 bh->b_bdev = inode->i_sb->s_bdev; 388 if (count < blocksize) 389 count = 0; 390 else 391 count -= blocksize; 392 393 bh->b_end_io = end_bitmap_write; 394 bh->b_private = bitmap; 395 atomic_inc(&bitmap->pending_writes); 396 set_buffer_locked(bh); 397 set_buffer_mapped(bh); 398 submit_bh(REQ_OP_READ, bh); 399 } 400 blk_cur++; 401 bh = bh->b_this_page; 402 } 403 404 wait_event(bitmap->write_wait, 405 atomic_read(&bitmap->pending_writes)==0); 406 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 407 ret = -EIO; 408 out: 409 if (ret) 410 pr_err("md: bitmap read error: (%dB @ %llu): %d\n", 411 (int)PAGE_SIZE, 412 (unsigned long long)index << PAGE_SHIFT, 413 ret); 414 return ret; 415 } 416 #else /* CONFIG_MD_BITMAP_FILE */ 417 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) 418 { 419 } 420 static int read_file_page(struct file *file, unsigned long index, 421 struct bitmap *bitmap, unsigned long count, struct page *page) 422 { 423 return -EIO; 424 } 425 static void free_buffers(struct page *page) 426 { 427 put_page(page); 428 } 429 #endif /* CONFIG_MD_BITMAP_FILE */ 430 431 /* 432 * bitmap file superblock operations 433 */ 434 435 /* 436 * write out a page to a file 437 */ 438 static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index, 439 bool wait) 440 { 441 struct bitmap_storage *store = &bitmap->storage; 442 struct page *page = store->filemap[pg_index]; 443 444 if (mddev_is_clustered(bitmap->mddev)) { 445 /* go to node bitmap area starting point */ 446 pg_index += store->sb_index; 447 } 448 449 if (store->file) 450 write_file_page(bitmap, page, wait); 451 else 452 write_sb_page(bitmap, pg_index, page, wait); 453 } 454 455 /* 456 * md_bitmap_wait_writes() should be called before writing any bitmap 457 * blocks, to ensure previous writes, particularly from 458 * md_bitmap_daemon_work(), have completed. 459 */ 460 static void md_bitmap_wait_writes(struct bitmap *bitmap) 461 { 462 if (bitmap->storage.file) 463 wait_event(bitmap->write_wait, 464 atomic_read(&bitmap->pending_writes)==0); 465 else 466 /* Note that we ignore the return value. The writes 467 * might have failed, but that would just mean that 468 * some bits which should be cleared haven't been, 469 * which is safe. The relevant bitmap blocks will 470 * probably get written again, but there is no great 471 * loss if they aren't. 472 */ 473 md_super_wait(bitmap->mddev); 474 } 475 476 477 /* update the event counter and sync the superblock to disk */ 478 static void bitmap_update_sb(struct bitmap *bitmap) 479 { 480 bitmap_super_t *sb; 481 482 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ 483 return; 484 if (bitmap->mddev->bitmap_info.external) 485 return; 486 if (!bitmap->storage.sb_page) /* no superblock */ 487 return; 488 sb = kmap_atomic(bitmap->storage.sb_page); 489 sb->events = cpu_to_le64(bitmap->mddev->events); 490 if (bitmap->mddev->events < bitmap->events_cleared) 491 /* rocking back to read-only */ 492 bitmap->events_cleared = bitmap->mddev->events; 493 sb->events_cleared = cpu_to_le64(bitmap->events_cleared); 494 /* 495 * clear BITMAP_WRITE_ERROR bit to protect against the case that 496 * a bitmap write error occurred but the later writes succeeded. 497 */ 498 sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR)); 499 /* Just in case these have been changed via sysfs: */ 500 sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); 501 sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); 502 /* This might have been changed by a reshape */ 503 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 504 sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize); 505 sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes); 506 sb->sectors_reserved = cpu_to_le32(bitmap->mddev-> 507 bitmap_info.space); 508 kunmap_atomic(sb); 509 510 if (bitmap->storage.file) 511 write_file_page(bitmap, bitmap->storage.sb_page, 1); 512 else 513 write_sb_page(bitmap, bitmap->storage.sb_index, 514 bitmap->storage.sb_page, 1); 515 } 516 517 static void bitmap_print_sb(struct bitmap *bitmap) 518 { 519 bitmap_super_t *sb; 520 521 if (!bitmap || !bitmap->storage.sb_page) 522 return; 523 sb = kmap_atomic(bitmap->storage.sb_page); 524 pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); 525 pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); 526 pr_debug(" version: %u\n", le32_to_cpu(sb->version)); 527 pr_debug(" uuid: %08x.%08x.%08x.%08x\n", 528 le32_to_cpu(*(__le32 *)(sb->uuid+0)), 529 le32_to_cpu(*(__le32 *)(sb->uuid+4)), 530 le32_to_cpu(*(__le32 *)(sb->uuid+8)), 531 le32_to_cpu(*(__le32 *)(sb->uuid+12))); 532 pr_debug(" events: %llu\n", 533 (unsigned long long) le64_to_cpu(sb->events)); 534 pr_debug("events cleared: %llu\n", 535 (unsigned long long) le64_to_cpu(sb->events_cleared)); 536 pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); 537 pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize)); 538 pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep)); 539 pr_debug(" sync size: %llu KB\n", 540 (unsigned long long)le64_to_cpu(sb->sync_size)/2); 541 pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind)); 542 kunmap_atomic(sb); 543 } 544 545 /* 546 * bitmap_new_disk_sb 547 * @bitmap 548 * 549 * This function is somewhat the reverse of bitmap_read_sb. bitmap_read_sb 550 * reads and verifies the on-disk bitmap superblock and populates bitmap_info. 551 * This function verifies 'bitmap_info' and populates the on-disk bitmap 552 * structure, which is to be written to disk. 553 * 554 * Returns: 0 on success, -Exxx on error 555 */ 556 static int md_bitmap_new_disk_sb(struct bitmap *bitmap) 557 { 558 bitmap_super_t *sb; 559 unsigned long chunksize, daemon_sleep, write_behind; 560 561 bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 562 if (bitmap->storage.sb_page == NULL) 563 return -ENOMEM; 564 bitmap->storage.sb_index = 0; 565 566 sb = kmap_atomic(bitmap->storage.sb_page); 567 568 sb->magic = cpu_to_le32(BITMAP_MAGIC); 569 sb->version = cpu_to_le32(BITMAP_MAJOR_HI); 570 571 chunksize = bitmap->mddev->bitmap_info.chunksize; 572 BUG_ON(!chunksize); 573 if (!is_power_of_2(chunksize)) { 574 kunmap_atomic(sb); 575 pr_warn("bitmap chunksize not a power of 2\n"); 576 return -EINVAL; 577 } 578 sb->chunksize = cpu_to_le32(chunksize); 579 580 daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; 581 if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { 582 pr_debug("Choosing daemon_sleep default (5 sec)\n"); 583 daemon_sleep = 5 * HZ; 584 } 585 sb->daemon_sleep = cpu_to_le32(daemon_sleep); 586 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 587 588 /* 589 * FIXME: write_behind for RAID1. If not specified, what 590 * is a good choice? We choose COUNTER_MAX / 2 arbitrarily. 591 */ 592 write_behind = bitmap->mddev->bitmap_info.max_write_behind; 593 if (write_behind > COUNTER_MAX) 594 write_behind = COUNTER_MAX / 2; 595 sb->write_behind = cpu_to_le32(write_behind); 596 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 597 598 /* keep the array size field of the bitmap superblock up to date */ 599 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 600 601 memcpy(sb->uuid, bitmap->mddev->uuid, 16); 602 603 set_bit(BITMAP_STALE, &bitmap->flags); 604 sb->state = cpu_to_le32(bitmap->flags); 605 bitmap->events_cleared = bitmap->mddev->events; 606 sb->events_cleared = cpu_to_le64(bitmap->mddev->events); 607 bitmap->mddev->bitmap_info.nodes = 0; 608 609 kunmap_atomic(sb); 610 611 return 0; 612 } 613 614 /* read the superblock from the bitmap file and initialize some bitmap fields */ 615 static int md_bitmap_read_sb(struct bitmap *bitmap) 616 { 617 char *reason = NULL; 618 bitmap_super_t *sb; 619 unsigned long chunksize, daemon_sleep, write_behind; 620 unsigned long long events; 621 int nodes = 0; 622 unsigned long sectors_reserved = 0; 623 int err = -EINVAL; 624 struct page *sb_page; 625 loff_t offset = 0; 626 627 if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { 628 chunksize = 128 * 1024 * 1024; 629 daemon_sleep = 5 * HZ; 630 write_behind = 0; 631 set_bit(BITMAP_STALE, &bitmap->flags); 632 err = 0; 633 goto out_no_sb; 634 } 635 /* page 0 is the superblock, read it... */ 636 sb_page = alloc_page(GFP_KERNEL); 637 if (!sb_page) 638 return -ENOMEM; 639 bitmap->storage.sb_page = sb_page; 640 641 re_read: 642 /* If cluster_slot is set, the cluster is setup */ 643 if (bitmap->cluster_slot >= 0) { 644 sector_t bm_blocks = bitmap->mddev->resync_max_sectors; 645 646 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 647 (bitmap->mddev->bitmap_info.chunksize >> 9)); 648 /* bits to bytes */ 649 bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t); 650 /* to 4k blocks */ 651 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); 652 offset = bitmap->cluster_slot * (bm_blocks << 3); 653 pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, 654 bitmap->cluster_slot, offset); 655 } 656 657 if (bitmap->storage.file) { 658 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); 659 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; 660 661 err = read_file_page(bitmap->storage.file, 0, 662 bitmap, bytes, sb_page); 663 } else { 664 err = read_sb_page(bitmap->mddev, offset, sb_page, 0, 665 sizeof(bitmap_super_t)); 666 } 667 if (err) 668 return err; 669 670 err = -EINVAL; 671 sb = kmap_atomic(sb_page); 672 673 chunksize = le32_to_cpu(sb->chunksize); 674 daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; 675 write_behind = le32_to_cpu(sb->write_behind); 676 sectors_reserved = le32_to_cpu(sb->sectors_reserved); 677 678 /* verify that the bitmap-specific fields are valid */ 679 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) 680 reason = "bad magic"; 681 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || 682 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED) 683 reason = "unrecognized superblock version"; 684 else if (chunksize < 512) 685 reason = "bitmap chunksize too small"; 686 else if (!is_power_of_2(chunksize)) 687 reason = "bitmap chunksize not a power of 2"; 688 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT) 689 reason = "daemon sleep period out of range"; 690 else if (write_behind > COUNTER_MAX) 691 reason = "write-behind limit out of range (0 - 16383)"; 692 if (reason) { 693 pr_warn("%s: invalid bitmap file superblock: %s\n", 694 bmname(bitmap), reason); 695 goto out; 696 } 697 698 /* 699 * Setup nodes/clustername only if bitmap version is 700 * cluster-compatible 701 */ 702 if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { 703 nodes = le32_to_cpu(sb->nodes); 704 strscpy(bitmap->mddev->bitmap_info.cluster_name, 705 sb->cluster_name, 64); 706 } 707 708 /* keep the array size field of the bitmap superblock up to date */ 709 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 710 711 if (bitmap->mddev->persistent) { 712 /* 713 * We have a persistent array superblock, so compare the 714 * bitmap's UUID and event counter to the mddev's 715 */ 716 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { 717 pr_warn("%s: bitmap superblock UUID mismatch\n", 718 bmname(bitmap)); 719 goto out; 720 } 721 events = le64_to_cpu(sb->events); 722 if (!nodes && (events < bitmap->mddev->events)) { 723 pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n", 724 bmname(bitmap), events, 725 (unsigned long long) bitmap->mddev->events); 726 set_bit(BITMAP_STALE, &bitmap->flags); 727 } 728 } 729 730 /* assign fields using values from superblock */ 731 bitmap->flags |= le32_to_cpu(sb->state); 732 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) 733 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags); 734 bitmap->events_cleared = le64_to_cpu(sb->events_cleared); 735 err = 0; 736 737 out: 738 kunmap_atomic(sb); 739 if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { 740 /* Assigning chunksize is required for "re_read" */ 741 bitmap->mddev->bitmap_info.chunksize = chunksize; 742 err = md_setup_cluster(bitmap->mddev, nodes); 743 if (err) { 744 pr_warn("%s: Could not setup cluster service (%d)\n", 745 bmname(bitmap), err); 746 goto out_no_sb; 747 } 748 bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev); 749 goto re_read; 750 } 751 752 out_no_sb: 753 if (err == 0) { 754 if (test_bit(BITMAP_STALE, &bitmap->flags)) 755 bitmap->events_cleared = bitmap->mddev->events; 756 bitmap->mddev->bitmap_info.chunksize = chunksize; 757 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 758 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 759 bitmap->mddev->bitmap_info.nodes = nodes; 760 if (bitmap->mddev->bitmap_info.space == 0 || 761 bitmap->mddev->bitmap_info.space > sectors_reserved) 762 bitmap->mddev->bitmap_info.space = sectors_reserved; 763 } else { 764 bitmap_print_sb(bitmap); 765 if (bitmap->cluster_slot < 0) 766 md_cluster_stop(bitmap->mddev); 767 } 768 return err; 769 } 770 771 /* 772 * general bitmap file operations 773 */ 774 775 /* 776 * on-disk bitmap: 777 * 778 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap 779 * file a page at a time. There's a superblock at the start of the file. 780 */ 781 /* calculate the index of the page that contains this bit */ 782 static inline unsigned long file_page_index(struct bitmap_storage *store, 783 unsigned long chunk) 784 { 785 if (store->sb_page) 786 chunk += sizeof(bitmap_super_t) << 3; 787 return chunk >> PAGE_BIT_SHIFT; 788 } 789 790 /* calculate the (bit) offset of this bit within a page */ 791 static inline unsigned long file_page_offset(struct bitmap_storage *store, 792 unsigned long chunk) 793 { 794 if (store->sb_page) 795 chunk += sizeof(bitmap_super_t) << 3; 796 return chunk & (PAGE_BITS - 1); 797 } 798 799 /* 800 * return a pointer to the page in the filemap that contains the given bit 801 * 802 */ 803 static inline struct page *filemap_get_page(struct bitmap_storage *store, 804 unsigned long chunk) 805 { 806 if (file_page_index(store, chunk) >= store->file_pages) 807 return NULL; 808 return store->filemap[file_page_index(store, chunk)]; 809 } 810 811 static int md_bitmap_storage_alloc(struct bitmap_storage *store, 812 unsigned long chunks, int with_super, 813 int slot_number) 814 { 815 int pnum, offset = 0; 816 unsigned long num_pages; 817 unsigned long bytes; 818 819 bytes = DIV_ROUND_UP(chunks, 8); 820 if (with_super) 821 bytes += sizeof(bitmap_super_t); 822 823 num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); 824 offset = slot_number * num_pages; 825 826 store->filemap = kmalloc_array(num_pages, sizeof(struct page *), 827 GFP_KERNEL); 828 if (!store->filemap) 829 return -ENOMEM; 830 831 if (with_super && !store->sb_page) { 832 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO); 833 if (store->sb_page == NULL) 834 return -ENOMEM; 835 } 836 837 pnum = 0; 838 if (store->sb_page) { 839 store->filemap[0] = store->sb_page; 840 pnum = 1; 841 store->sb_index = offset; 842 } 843 844 for ( ; pnum < num_pages; pnum++) { 845 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO); 846 if (!store->filemap[pnum]) { 847 store->file_pages = pnum; 848 return -ENOMEM; 849 } 850 } 851 store->file_pages = pnum; 852 853 /* We need 4 bits per page, rounded up to a multiple 854 * of sizeof(unsigned long) */ 855 store->filemap_attr = kzalloc( 856 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 857 GFP_KERNEL); 858 if (!store->filemap_attr) 859 return -ENOMEM; 860 861 store->bytes = bytes; 862 863 return 0; 864 } 865 866 static void md_bitmap_file_unmap(struct bitmap_storage *store) 867 { 868 struct file *file = store->file; 869 struct page *sb_page = store->sb_page; 870 struct page **map = store->filemap; 871 int pages = store->file_pages; 872 873 while (pages--) 874 if (map[pages] != sb_page) /* 0 is sb_page, release it below */ 875 free_buffers(map[pages]); 876 kfree(map); 877 kfree(store->filemap_attr); 878 879 if (sb_page) 880 free_buffers(sb_page); 881 882 if (file) { 883 struct inode *inode = file_inode(file); 884 invalidate_mapping_pages(inode->i_mapping, 0, -1); 885 fput(file); 886 } 887 } 888 889 /* 890 * bitmap_file_kick - if an error occurs while manipulating the bitmap file 891 * then it is no longer reliable, so we stop using it and we mark the file 892 * as failed in the superblock 893 */ 894 static void md_bitmap_file_kick(struct bitmap *bitmap) 895 { 896 if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { 897 bitmap_update_sb(bitmap); 898 899 if (bitmap->storage.file) { 900 pr_warn("%s: kicking failed bitmap file %pD4 from array!\n", 901 bmname(bitmap), bitmap->storage.file); 902 903 } else 904 pr_warn("%s: disabling internal bitmap due to errors\n", 905 bmname(bitmap)); 906 } 907 } 908 909 enum bitmap_page_attr { 910 BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ 911 BITMAP_PAGE_PENDING = 1, /* there are bits that are being cleaned. 912 * i.e. counter is 1 or 2. */ 913 BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ 914 }; 915 916 static inline void set_page_attr(struct bitmap *bitmap, int pnum, 917 enum bitmap_page_attr attr) 918 { 919 set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 920 } 921 922 static inline void clear_page_attr(struct bitmap *bitmap, int pnum, 923 enum bitmap_page_attr attr) 924 { 925 clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 926 } 927 928 static inline int test_page_attr(struct bitmap *bitmap, int pnum, 929 enum bitmap_page_attr attr) 930 { 931 return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 932 } 933 934 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum, 935 enum bitmap_page_attr attr) 936 { 937 return test_and_clear_bit((pnum<<2) + attr, 938 bitmap->storage.filemap_attr); 939 } 940 /* 941 * bitmap_file_set_bit -- called before performing a write to the md device 942 * to set (and eventually sync) a particular bit in the bitmap file 943 * 944 * we set the bit immediately, then we record the page number so that 945 * when an unplug occurs, we can flush the dirty pages out to disk 946 */ 947 static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) 948 { 949 unsigned long bit; 950 struct page *page; 951 void *kaddr; 952 unsigned long chunk = block >> bitmap->counts.chunkshift; 953 struct bitmap_storage *store = &bitmap->storage; 954 unsigned long index = file_page_index(store, chunk); 955 unsigned long node_offset = 0; 956 957 index += store->sb_index; 958 if (mddev_is_clustered(bitmap->mddev)) 959 node_offset = bitmap->cluster_slot * store->file_pages; 960 961 page = filemap_get_page(&bitmap->storage, chunk); 962 if (!page) 963 return; 964 bit = file_page_offset(&bitmap->storage, chunk); 965 966 /* set the bit */ 967 kaddr = kmap_atomic(page); 968 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 969 set_bit(bit, kaddr); 970 else 971 set_bit_le(bit, kaddr); 972 kunmap_atomic(kaddr); 973 pr_debug("set file bit %lu page %lu\n", bit, index); 974 /* record page number so it gets flushed to disk when unplug occurs */ 975 set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY); 976 } 977 978 static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) 979 { 980 unsigned long bit; 981 struct page *page; 982 void *paddr; 983 unsigned long chunk = block >> bitmap->counts.chunkshift; 984 struct bitmap_storage *store = &bitmap->storage; 985 unsigned long index = file_page_index(store, chunk); 986 unsigned long node_offset = 0; 987 988 index += store->sb_index; 989 if (mddev_is_clustered(bitmap->mddev)) 990 node_offset = bitmap->cluster_slot * store->file_pages; 991 992 page = filemap_get_page(&bitmap->storage, chunk); 993 if (!page) 994 return; 995 bit = file_page_offset(&bitmap->storage, chunk); 996 paddr = kmap_atomic(page); 997 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 998 clear_bit(bit, paddr); 999 else 1000 clear_bit_le(bit, paddr); 1001 kunmap_atomic(paddr); 1002 if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) { 1003 set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING); 1004 bitmap->allclean = 0; 1005 } 1006 } 1007 1008 static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) 1009 { 1010 unsigned long bit; 1011 struct page *page; 1012 void *paddr; 1013 unsigned long chunk = block >> bitmap->counts.chunkshift; 1014 int set = 0; 1015 1016 page = filemap_get_page(&bitmap->storage, chunk); 1017 if (!page) 1018 return -EINVAL; 1019 bit = file_page_offset(&bitmap->storage, chunk); 1020 paddr = kmap_atomic(page); 1021 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1022 set = test_bit(bit, paddr); 1023 else 1024 set = test_bit_le(bit, paddr); 1025 kunmap_atomic(paddr); 1026 return set; 1027 } 1028 1029 /* this gets called when the md device is ready to unplug its underlying 1030 * (slave) device queues -- before we let any writes go down, we need to 1031 * sync the dirty pages of the bitmap file to disk */ 1032 static void __bitmap_unplug(struct bitmap *bitmap) 1033 { 1034 unsigned long i; 1035 int dirty, need_write; 1036 int writing = 0; 1037 1038 if (!md_bitmap_enabled(bitmap)) 1039 return; 1040 1041 /* look at each page to see if there are any set bits that need to be 1042 * flushed out to disk */ 1043 for (i = 0; i < bitmap->storage.file_pages; i++) { 1044 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 1045 need_write = test_and_clear_page_attr(bitmap, i, 1046 BITMAP_PAGE_NEEDWRITE); 1047 if (dirty || need_write) { 1048 if (!writing) { 1049 md_bitmap_wait_writes(bitmap); 1050 mddev_add_trace_msg(bitmap->mddev, 1051 "md bitmap_unplug"); 1052 } 1053 clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); 1054 filemap_write_page(bitmap, i, false); 1055 writing = 1; 1056 } 1057 } 1058 if (writing) 1059 md_bitmap_wait_writes(bitmap); 1060 1061 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 1062 md_bitmap_file_kick(bitmap); 1063 } 1064 1065 struct bitmap_unplug_work { 1066 struct work_struct work; 1067 struct bitmap *bitmap; 1068 struct completion *done; 1069 }; 1070 1071 static void md_bitmap_unplug_fn(struct work_struct *work) 1072 { 1073 struct bitmap_unplug_work *unplug_work = 1074 container_of(work, struct bitmap_unplug_work, work); 1075 1076 __bitmap_unplug(unplug_work->bitmap); 1077 complete(unplug_work->done); 1078 } 1079 1080 static void bitmap_unplug_async(struct bitmap *bitmap) 1081 { 1082 DECLARE_COMPLETION_ONSTACK(done); 1083 struct bitmap_unplug_work unplug_work; 1084 1085 INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn); 1086 unplug_work.bitmap = bitmap; 1087 unplug_work.done = &done; 1088 1089 queue_work(md_bitmap_wq, &unplug_work.work); 1090 wait_for_completion(&done); 1091 } 1092 1093 static void bitmap_unplug(struct mddev *mddev, bool sync) 1094 { 1095 struct bitmap *bitmap = mddev->bitmap; 1096 1097 if (!bitmap) 1098 return; 1099 1100 if (sync) 1101 __bitmap_unplug(bitmap); 1102 else 1103 bitmap_unplug_async(bitmap); 1104 } 1105 1106 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); 1107 1108 /* 1109 * Initialize the in-memory bitmap from the on-disk bitmap and set up the memory 1110 * mapping of the bitmap file. 1111 * 1112 * Special case: If there's no bitmap file, or if the bitmap file had been 1113 * previously kicked from the array, we mark all the bits as 1's in order to 1114 * cause a full resync. 1115 * 1116 * We ignore all bits for sectors that end earlier than 'start'. 1117 * This is used when reading an out-of-date bitmap. 1118 */ 1119 static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) 1120 { 1121 bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags); 1122 struct mddev *mddev = bitmap->mddev; 1123 unsigned long chunks = bitmap->counts.chunks; 1124 struct bitmap_storage *store = &bitmap->storage; 1125 struct file *file = store->file; 1126 unsigned long node_offset = 0; 1127 unsigned long bit_cnt = 0; 1128 unsigned long i; 1129 int ret; 1130 1131 if (!file && !mddev->bitmap_info.offset) { 1132 /* No permanent bitmap - fill with '1s'. */ 1133 store->filemap = NULL; 1134 store->file_pages = 0; 1135 for (i = 0; i < chunks ; i++) { 1136 /* if the disk bit is set, set the memory bit */ 1137 int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift) 1138 >= start); 1139 md_bitmap_set_memory_bits(bitmap, 1140 (sector_t)i << bitmap->counts.chunkshift, 1141 needed); 1142 } 1143 return 0; 1144 } 1145 1146 if (file && i_size_read(file->f_mapping->host) < store->bytes) { 1147 pr_warn("%s: bitmap file too short %lu < %lu\n", 1148 bmname(bitmap), 1149 (unsigned long) i_size_read(file->f_mapping->host), 1150 store->bytes); 1151 ret = -ENOSPC; 1152 goto err; 1153 } 1154 1155 if (mddev_is_clustered(mddev)) 1156 node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE)); 1157 1158 for (i = 0; i < store->file_pages; i++) { 1159 struct page *page = store->filemap[i]; 1160 int count; 1161 1162 /* unmap the old page, we're done with it */ 1163 if (i == store->file_pages - 1) 1164 count = store->bytes - i * PAGE_SIZE; 1165 else 1166 count = PAGE_SIZE; 1167 1168 if (file) 1169 ret = read_file_page(file, i, bitmap, count, page); 1170 else 1171 ret = read_sb_page(mddev, 0, page, i + node_offset, 1172 count); 1173 if (ret) 1174 goto err; 1175 } 1176 1177 if (outofdate) { 1178 pr_warn("%s: bitmap file is out of date, doing full recovery\n", 1179 bmname(bitmap)); 1180 1181 for (i = 0; i < store->file_pages; i++) { 1182 struct page *page = store->filemap[i]; 1183 unsigned long offset = 0; 1184 void *paddr; 1185 1186 if (i == 0 && !mddev->bitmap_info.external) 1187 offset = sizeof(bitmap_super_t); 1188 1189 /* 1190 * If the bitmap is out of date, dirty the whole page 1191 * and write it out 1192 */ 1193 paddr = kmap_atomic(page); 1194 memset(paddr + offset, 0xff, PAGE_SIZE - offset); 1195 kunmap_atomic(paddr); 1196 1197 filemap_write_page(bitmap, i, true); 1198 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) { 1199 ret = -EIO; 1200 goto err; 1201 } 1202 } 1203 } 1204 1205 for (i = 0; i < chunks; i++) { 1206 struct page *page = filemap_get_page(&bitmap->storage, i); 1207 unsigned long bit = file_page_offset(&bitmap->storage, i); 1208 void *paddr; 1209 bool was_set; 1210 1211 paddr = kmap_atomic(page); 1212 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1213 was_set = test_bit(bit, paddr); 1214 else 1215 was_set = test_bit_le(bit, paddr); 1216 kunmap_atomic(paddr); 1217 1218 if (was_set) { 1219 /* if the disk bit is set, set the memory bit */ 1220 int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift 1221 >= start); 1222 md_bitmap_set_memory_bits(bitmap, 1223 (sector_t)i << bitmap->counts.chunkshift, 1224 needed); 1225 bit_cnt++; 1226 } 1227 } 1228 1229 pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n", 1230 bmname(bitmap), store->file_pages, 1231 bit_cnt, chunks); 1232 1233 return 0; 1234 1235 err: 1236 pr_warn("%s: bitmap initialisation failed: %d\n", 1237 bmname(bitmap), ret); 1238 return ret; 1239 } 1240 1241 /* just flag bitmap pages as needing to be written. */ 1242 static void bitmap_write_all(struct mddev *mddev) 1243 { 1244 int i; 1245 struct bitmap *bitmap = mddev->bitmap; 1246 1247 if (!bitmap || !bitmap->storage.filemap) 1248 return; 1249 1250 /* Only one copy, so nothing needed */ 1251 if (bitmap->storage.file) 1252 return; 1253 1254 for (i = 0; i < bitmap->storage.file_pages; i++) 1255 set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); 1256 bitmap->allclean = 0; 1257 } 1258 1259 static void md_bitmap_count_page(struct bitmap_counts *bitmap, 1260 sector_t offset, int inc) 1261 { 1262 sector_t chunk = offset >> bitmap->chunkshift; 1263 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1264 bitmap->bp[page].count += inc; 1265 md_bitmap_checkfree(bitmap, page); 1266 } 1267 1268 static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset) 1269 { 1270 sector_t chunk = offset >> bitmap->chunkshift; 1271 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1272 struct bitmap_page *bp = &bitmap->bp[page]; 1273 1274 if (!bp->pending) 1275 bp->pending = 1; 1276 } 1277 1278 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, 1279 sector_t offset, sector_t *blocks, 1280 int create); 1281 1282 static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout, 1283 bool force) 1284 { 1285 struct md_thread *thread; 1286 1287 rcu_read_lock(); 1288 thread = rcu_dereference(mddev->thread); 1289 1290 if (!thread) 1291 goto out; 1292 1293 if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT) 1294 thread->timeout = timeout; 1295 1296 out: 1297 rcu_read_unlock(); 1298 } 1299 1300 /* 1301 * bitmap daemon -- periodically wakes up to clean bits and flush pages 1302 * out to disk 1303 */ 1304 static void bitmap_daemon_work(struct mddev *mddev) 1305 { 1306 struct bitmap *bitmap; 1307 unsigned long j; 1308 unsigned long nextpage; 1309 sector_t blocks; 1310 struct bitmap_counts *counts; 1311 1312 /* Use a mutex to guard daemon_work against 1313 * bitmap_destroy. 1314 */ 1315 mutex_lock(&mddev->bitmap_info.mutex); 1316 bitmap = mddev->bitmap; 1317 if (bitmap == NULL) { 1318 mutex_unlock(&mddev->bitmap_info.mutex); 1319 return; 1320 } 1321 if (time_before(jiffies, bitmap->daemon_lastrun 1322 + mddev->bitmap_info.daemon_sleep)) 1323 goto done; 1324 1325 bitmap->daemon_lastrun = jiffies; 1326 if (bitmap->allclean) { 1327 mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); 1328 goto done; 1329 } 1330 bitmap->allclean = 1; 1331 1332 mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work"); 1333 1334 /* Any file-page which is PENDING now needs to be written. 1335 * So set NEEDWRITE now, then after we make any last-minute changes 1336 * we will write it. 1337 */ 1338 for (j = 0; j < bitmap->storage.file_pages; j++) 1339 if (test_and_clear_page_attr(bitmap, j, 1340 BITMAP_PAGE_PENDING)) 1341 set_page_attr(bitmap, j, 1342 BITMAP_PAGE_NEEDWRITE); 1343 1344 if (bitmap->need_sync && 1345 mddev->bitmap_info.external == 0) { 1346 /* Arrange for superblock update as well as 1347 * other changes */ 1348 bitmap_super_t *sb; 1349 bitmap->need_sync = 0; 1350 if (bitmap->storage.filemap) { 1351 sb = kmap_atomic(bitmap->storage.sb_page); 1352 sb->events_cleared = 1353 cpu_to_le64(bitmap->events_cleared); 1354 kunmap_atomic(sb); 1355 set_page_attr(bitmap, 0, 1356 BITMAP_PAGE_NEEDWRITE); 1357 } 1358 } 1359 /* Now look at the bitmap counters and if any are '2' or '1', 1360 * decrement and handle accordingly. 1361 */ 1362 counts = &bitmap->counts; 1363 spin_lock_irq(&counts->lock); 1364 nextpage = 0; 1365 for (j = 0; j < counts->chunks; j++) { 1366 bitmap_counter_t *bmc; 1367 sector_t block = (sector_t)j << counts->chunkshift; 1368 1369 if (j == nextpage) { 1370 nextpage += PAGE_COUNTER_RATIO; 1371 if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) { 1372 j |= PAGE_COUNTER_MASK; 1373 continue; 1374 } 1375 counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0; 1376 } 1377 1378 bmc = md_bitmap_get_counter(counts, block, &blocks, 0); 1379 if (!bmc) { 1380 j |= PAGE_COUNTER_MASK; 1381 continue; 1382 } 1383 if (*bmc == 1 && !bitmap->need_sync) { 1384 /* We can clear the bit */ 1385 *bmc = 0; 1386 md_bitmap_count_page(counts, block, -1); 1387 md_bitmap_file_clear_bit(bitmap, block); 1388 } else if (*bmc && *bmc <= 2) { 1389 *bmc = 1; 1390 md_bitmap_set_pending(counts, block); 1391 bitmap->allclean = 0; 1392 } 1393 } 1394 spin_unlock_irq(&counts->lock); 1395 1396 md_bitmap_wait_writes(bitmap); 1397 /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. 1398 * DIRTY pages need to be written by bitmap_unplug so it can wait 1399 * for them. 1400 * If we find any DIRTY page we stop there and let bitmap_unplug 1401 * handle all the rest. This is important in the case where 1402 * the first blocking holds the superblock and it has been updated. 1403 * We mustn't write any other blocks before the superblock. 1404 */ 1405 for (j = 0; 1406 j < bitmap->storage.file_pages 1407 && !test_bit(BITMAP_STALE, &bitmap->flags); 1408 j++) { 1409 if (test_page_attr(bitmap, j, 1410 BITMAP_PAGE_DIRTY)) 1411 /* bitmap_unplug will handle the rest */ 1412 break; 1413 if (bitmap->storage.filemap && 1414 test_and_clear_page_attr(bitmap, j, 1415 BITMAP_PAGE_NEEDWRITE)) 1416 filemap_write_page(bitmap, j, false); 1417 } 1418 1419 done: 1420 if (bitmap->allclean == 0) 1421 mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); 1422 mutex_unlock(&mddev->bitmap_info.mutex); 1423 } 1424 1425 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, 1426 sector_t offset, sector_t *blocks, 1427 int create) 1428 __releases(bitmap->lock) 1429 __acquires(bitmap->lock) 1430 { 1431 /* If 'create', we might release the lock and reclaim it. 1432 * The lock must have been taken with interrupts enabled. 1433 * If !create, we don't release the lock. 1434 */ 1435 sector_t chunk = offset >> bitmap->chunkshift; 1436 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1437 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; 1438 sector_t csize = ((sector_t)1) << bitmap->chunkshift; 1439 int err; 1440 1441 if (page >= bitmap->pages) { 1442 /* 1443 * This can happen if bitmap_start_sync goes beyond 1444 * End-of-device while looking for a whole page or 1445 * user set a huge number to sysfs bitmap_set_bits. 1446 */ 1447 *blocks = csize - (offset & (csize - 1)); 1448 return NULL; 1449 } 1450 err = md_bitmap_checkpage(bitmap, page, create, 0); 1451 1452 if (bitmap->bp[page].hijacked || 1453 bitmap->bp[page].map == NULL) 1454 csize = ((sector_t)1) << (bitmap->chunkshift + 1455 PAGE_COUNTER_SHIFT); 1456 1457 *blocks = csize - (offset & (csize - 1)); 1458 1459 if (err < 0) 1460 return NULL; 1461 1462 /* now locked ... */ 1463 1464 if (bitmap->bp[page].hijacked) { /* hijacked pointer */ 1465 /* should we use the first or second counter field 1466 * of the hijacked pointer? */ 1467 int hi = (pageoff > PAGE_COUNTER_MASK); 1468 return &((bitmap_counter_t *) 1469 &bitmap->bp[page].map)[hi]; 1470 } else /* page is allocated */ 1471 return (bitmap_counter_t *) 1472 &(bitmap->bp[page].map[pageoff]); 1473 } 1474 1475 static int bitmap_startwrite(struct mddev *mddev, sector_t offset, 1476 unsigned long sectors, bool behind) 1477 { 1478 struct bitmap *bitmap = mddev->bitmap; 1479 1480 if (!bitmap) 1481 return 0; 1482 1483 if (behind) { 1484 int bw; 1485 atomic_inc(&bitmap->behind_writes); 1486 bw = atomic_read(&bitmap->behind_writes); 1487 if (bw > bitmap->behind_writes_used) 1488 bitmap->behind_writes_used = bw; 1489 1490 pr_debug("inc write-behind count %d/%lu\n", 1491 bw, bitmap->mddev->bitmap_info.max_write_behind); 1492 } 1493 1494 while (sectors) { 1495 sector_t blocks; 1496 bitmap_counter_t *bmc; 1497 1498 spin_lock_irq(&bitmap->counts.lock); 1499 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1); 1500 if (!bmc) { 1501 spin_unlock_irq(&bitmap->counts.lock); 1502 return 0; 1503 } 1504 1505 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) { 1506 DEFINE_WAIT(__wait); 1507 /* note that it is safe to do the prepare_to_wait 1508 * after the test as long as we do it before dropping 1509 * the spinlock. 1510 */ 1511 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1512 TASK_UNINTERRUPTIBLE); 1513 spin_unlock_irq(&bitmap->counts.lock); 1514 schedule(); 1515 finish_wait(&bitmap->overflow_wait, &__wait); 1516 continue; 1517 } 1518 1519 switch (*bmc) { 1520 case 0: 1521 md_bitmap_file_set_bit(bitmap, offset); 1522 md_bitmap_count_page(&bitmap->counts, offset, 1); 1523 fallthrough; 1524 case 1: 1525 *bmc = 2; 1526 } 1527 1528 (*bmc)++; 1529 1530 spin_unlock_irq(&bitmap->counts.lock); 1531 1532 offset += blocks; 1533 if (sectors > blocks) 1534 sectors -= blocks; 1535 else 1536 sectors = 0; 1537 } 1538 return 0; 1539 } 1540 1541 static void bitmap_endwrite(struct mddev *mddev, sector_t offset, 1542 unsigned long sectors, bool success, bool behind) 1543 { 1544 struct bitmap *bitmap = mddev->bitmap; 1545 1546 if (!bitmap) 1547 return; 1548 1549 if (behind) { 1550 if (atomic_dec_and_test(&bitmap->behind_writes)) 1551 wake_up(&bitmap->behind_wait); 1552 pr_debug("dec write-behind count %d/%lu\n", 1553 atomic_read(&bitmap->behind_writes), 1554 bitmap->mddev->bitmap_info.max_write_behind); 1555 } 1556 1557 while (sectors) { 1558 sector_t blocks; 1559 unsigned long flags; 1560 bitmap_counter_t *bmc; 1561 1562 spin_lock_irqsave(&bitmap->counts.lock, flags); 1563 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0); 1564 if (!bmc) { 1565 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1566 return; 1567 } 1568 1569 if (success && !bitmap->mddev->degraded && 1570 bitmap->events_cleared < bitmap->mddev->events) { 1571 bitmap->events_cleared = bitmap->mddev->events; 1572 bitmap->need_sync = 1; 1573 sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); 1574 } 1575 1576 if (!success && !NEEDED(*bmc)) 1577 *bmc |= NEEDED_MASK; 1578 1579 if (COUNTER(*bmc) == COUNTER_MAX) 1580 wake_up(&bitmap->overflow_wait); 1581 1582 (*bmc)--; 1583 if (*bmc <= 2) { 1584 md_bitmap_set_pending(&bitmap->counts, offset); 1585 bitmap->allclean = 0; 1586 } 1587 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1588 offset += blocks; 1589 if (sectors > blocks) 1590 sectors -= blocks; 1591 else 1592 sectors = 0; 1593 } 1594 } 1595 1596 static bool __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, 1597 sector_t *blocks, bool degraded) 1598 { 1599 bitmap_counter_t *bmc; 1600 bool rv; 1601 1602 if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ 1603 *blocks = 1024; 1604 return true; /* always resync if no bitmap */ 1605 } 1606 spin_lock_irq(&bitmap->counts.lock); 1607 1608 rv = false; 1609 bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1610 if (bmc) { 1611 /* locked */ 1612 if (RESYNC(*bmc)) { 1613 rv = true; 1614 } else if (NEEDED(*bmc)) { 1615 rv = true; 1616 if (!degraded) { /* don't set/clear bits if degraded */ 1617 *bmc |= RESYNC_MASK; 1618 *bmc &= ~NEEDED_MASK; 1619 } 1620 } 1621 } 1622 spin_unlock_irq(&bitmap->counts.lock); 1623 1624 return rv; 1625 } 1626 1627 static bool bitmap_start_sync(struct mddev *mddev, sector_t offset, 1628 sector_t *blocks, bool degraded) 1629 { 1630 /* bitmap_start_sync must always report on multiples of whole 1631 * pages, otherwise resync (which is very PAGE_SIZE based) will 1632 * get confused. 1633 * So call __bitmap_start_sync repeatedly (if needed) until 1634 * At least PAGE_SIZE>>9 blocks are covered. 1635 * Return the 'or' of the result. 1636 */ 1637 bool rv = false; 1638 sector_t blocks1; 1639 1640 *blocks = 0; 1641 while (*blocks < (PAGE_SIZE>>9)) { 1642 rv |= __bitmap_start_sync(mddev->bitmap, offset, 1643 &blocks1, degraded); 1644 offset += blocks1; 1645 *blocks += blocks1; 1646 } 1647 1648 return rv; 1649 } 1650 1651 static void __bitmap_end_sync(struct bitmap *bitmap, sector_t offset, 1652 sector_t *blocks, bool aborted) 1653 { 1654 bitmap_counter_t *bmc; 1655 unsigned long flags; 1656 1657 if (bitmap == NULL) { 1658 *blocks = 1024; 1659 return; 1660 } 1661 spin_lock_irqsave(&bitmap->counts.lock, flags); 1662 bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1663 if (bmc == NULL) 1664 goto unlock; 1665 /* locked */ 1666 if (RESYNC(*bmc)) { 1667 *bmc &= ~RESYNC_MASK; 1668 1669 if (!NEEDED(*bmc) && aborted) 1670 *bmc |= NEEDED_MASK; 1671 else { 1672 if (*bmc <= 2) { 1673 md_bitmap_set_pending(&bitmap->counts, offset); 1674 bitmap->allclean = 0; 1675 } 1676 } 1677 } 1678 unlock: 1679 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1680 } 1681 1682 static void bitmap_end_sync(struct mddev *mddev, sector_t offset, 1683 sector_t *blocks) 1684 { 1685 __bitmap_end_sync(mddev->bitmap, offset, blocks, true); 1686 } 1687 1688 static void bitmap_close_sync(struct mddev *mddev) 1689 { 1690 /* Sync has finished, and any bitmap chunks that weren't synced 1691 * properly have been aborted. It remains to us to clear the 1692 * RESYNC bit wherever it is still on 1693 */ 1694 sector_t sector = 0; 1695 sector_t blocks; 1696 struct bitmap *bitmap = mddev->bitmap; 1697 1698 if (!bitmap) 1699 return; 1700 1701 while (sector < bitmap->mddev->resync_max_sectors) { 1702 __bitmap_end_sync(bitmap, sector, &blocks, false); 1703 sector += blocks; 1704 } 1705 } 1706 1707 static void bitmap_cond_end_sync(struct mddev *mddev, sector_t sector, 1708 bool force) 1709 { 1710 sector_t s = 0; 1711 sector_t blocks; 1712 struct bitmap *bitmap = mddev->bitmap; 1713 1714 if (!bitmap) 1715 return; 1716 if (sector == 0) { 1717 bitmap->last_end_sync = jiffies; 1718 return; 1719 } 1720 if (!force && time_before(jiffies, (bitmap->last_end_sync 1721 + bitmap->mddev->bitmap_info.daemon_sleep))) 1722 return; 1723 wait_event(bitmap->mddev->recovery_wait, 1724 atomic_read(&bitmap->mddev->recovery_active) == 0); 1725 1726 bitmap->mddev->curr_resync_completed = sector; 1727 set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags); 1728 sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); 1729 s = 0; 1730 while (s < sector && s < bitmap->mddev->resync_max_sectors) { 1731 __bitmap_end_sync(bitmap, s, &blocks, false); 1732 s += blocks; 1733 } 1734 bitmap->last_end_sync = jiffies; 1735 sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed); 1736 } 1737 1738 static void bitmap_sync_with_cluster(struct mddev *mddev, 1739 sector_t old_lo, sector_t old_hi, 1740 sector_t new_lo, sector_t new_hi) 1741 { 1742 struct bitmap *bitmap = mddev->bitmap; 1743 sector_t sector, blocks = 0; 1744 1745 for (sector = old_lo; sector < new_lo; ) { 1746 __bitmap_end_sync(bitmap, sector, &blocks, false); 1747 sector += blocks; 1748 } 1749 WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n"); 1750 1751 for (sector = old_hi; sector < new_hi; ) { 1752 bitmap_start_sync(mddev, sector, &blocks, false); 1753 sector += blocks; 1754 } 1755 WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n"); 1756 } 1757 1758 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) 1759 { 1760 /* For each chunk covered by any of these sectors, set the 1761 * counter to 2 and possibly set resync_needed. They should all 1762 * be 0 at this point 1763 */ 1764 1765 sector_t secs; 1766 bitmap_counter_t *bmc; 1767 spin_lock_irq(&bitmap->counts.lock); 1768 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1); 1769 if (!bmc) { 1770 spin_unlock_irq(&bitmap->counts.lock); 1771 return; 1772 } 1773 if (!*bmc) { 1774 *bmc = 2; 1775 md_bitmap_count_page(&bitmap->counts, offset, 1); 1776 md_bitmap_set_pending(&bitmap->counts, offset); 1777 bitmap->allclean = 0; 1778 } 1779 if (needed) 1780 *bmc |= NEEDED_MASK; 1781 spin_unlock_irq(&bitmap->counts.lock); 1782 } 1783 1784 /* dirty the memory and file bits for bitmap chunks "s" to "e" */ 1785 static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s, 1786 unsigned long e) 1787 { 1788 unsigned long chunk; 1789 struct bitmap *bitmap = mddev->bitmap; 1790 1791 if (!bitmap) 1792 return; 1793 1794 for (chunk = s; chunk <= e; chunk++) { 1795 sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift; 1796 1797 md_bitmap_set_memory_bits(bitmap, sec, 1); 1798 md_bitmap_file_set_bit(bitmap, sec); 1799 if (sec < bitmap->mddev->recovery_cp) 1800 /* We are asserting that the array is dirty, 1801 * so move the recovery_cp address back so 1802 * that it is obvious that it is dirty 1803 */ 1804 bitmap->mddev->recovery_cp = sec; 1805 } 1806 } 1807 1808 static void bitmap_flush(struct mddev *mddev) 1809 { 1810 struct bitmap *bitmap = mddev->bitmap; 1811 long sleep; 1812 1813 if (!bitmap) /* there was no bitmap */ 1814 return; 1815 1816 /* run the daemon_work three time to ensure everything is flushed 1817 * that can be 1818 */ 1819 sleep = mddev->bitmap_info.daemon_sleep * 2; 1820 bitmap->daemon_lastrun -= sleep; 1821 bitmap_daemon_work(mddev); 1822 bitmap->daemon_lastrun -= sleep; 1823 bitmap_daemon_work(mddev); 1824 bitmap->daemon_lastrun -= sleep; 1825 bitmap_daemon_work(mddev); 1826 if (mddev->bitmap_info.external) 1827 md_super_wait(mddev); 1828 bitmap_update_sb(bitmap); 1829 } 1830 1831 /* 1832 * free memory that was allocated 1833 */ 1834 void md_bitmap_free(struct bitmap *bitmap) 1835 { 1836 unsigned long k, pages; 1837 struct bitmap_page *bp; 1838 1839 if (!bitmap) /* there was no bitmap */ 1840 return; 1841 1842 if (bitmap->sysfs_can_clear) 1843 sysfs_put(bitmap->sysfs_can_clear); 1844 1845 if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info && 1846 bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev)) 1847 md_cluster_stop(bitmap->mddev); 1848 1849 /* Shouldn't be needed - but just in case.... */ 1850 wait_event(bitmap->write_wait, 1851 atomic_read(&bitmap->pending_writes) == 0); 1852 1853 /* release the bitmap file */ 1854 md_bitmap_file_unmap(&bitmap->storage); 1855 1856 bp = bitmap->counts.bp; 1857 pages = bitmap->counts.pages; 1858 1859 /* free all allocated memory */ 1860 1861 if (bp) /* deallocate the page memory */ 1862 for (k = 0; k < pages; k++) 1863 if (bp[k].map && !bp[k].hijacked) 1864 kfree(bp[k].map); 1865 kfree(bp); 1866 kfree(bitmap); 1867 } 1868 EXPORT_SYMBOL(md_bitmap_free); 1869 1870 void md_bitmap_wait_behind_writes(struct mddev *mddev) 1871 { 1872 struct bitmap *bitmap = mddev->bitmap; 1873 1874 /* wait for behind writes to complete */ 1875 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { 1876 pr_debug("md:%s: behind writes in progress - waiting to stop.\n", 1877 mdname(mddev)); 1878 /* need to kick something here to make sure I/O goes? */ 1879 wait_event(bitmap->behind_wait, 1880 atomic_read(&bitmap->behind_writes) == 0); 1881 } 1882 } 1883 EXPORT_SYMBOL_GPL(md_bitmap_wait_behind_writes); 1884 1885 static void bitmap_destroy(struct mddev *mddev) 1886 { 1887 struct bitmap *bitmap = mddev->bitmap; 1888 1889 if (!bitmap) /* there was no bitmap */ 1890 return; 1891 1892 md_bitmap_wait_behind_writes(mddev); 1893 if (!mddev->serialize_policy) 1894 mddev_destroy_serial_pool(mddev, NULL); 1895 1896 mutex_lock(&mddev->bitmap_info.mutex); 1897 spin_lock(&mddev->lock); 1898 mddev->bitmap = NULL; /* disconnect from the md device */ 1899 spin_unlock(&mddev->lock); 1900 mutex_unlock(&mddev->bitmap_info.mutex); 1901 mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); 1902 1903 md_bitmap_free(bitmap); 1904 } 1905 1906 /* 1907 * initialize the bitmap structure 1908 * if this returns an error, bitmap_destroy must be called to do clean up 1909 * once mddev->bitmap is set 1910 */ 1911 static struct bitmap *__bitmap_create(struct mddev *mddev, int slot) 1912 { 1913 struct bitmap *bitmap; 1914 sector_t blocks = mddev->resync_max_sectors; 1915 struct file *file = mddev->bitmap_info.file; 1916 int err; 1917 struct kernfs_node *bm = NULL; 1918 1919 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 1920 1921 BUG_ON(file && mddev->bitmap_info.offset); 1922 1923 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 1924 pr_notice("md/raid:%s: array with journal cannot have bitmap\n", 1925 mdname(mddev)); 1926 return ERR_PTR(-EBUSY); 1927 } 1928 1929 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); 1930 if (!bitmap) 1931 return ERR_PTR(-ENOMEM); 1932 1933 spin_lock_init(&bitmap->counts.lock); 1934 atomic_set(&bitmap->pending_writes, 0); 1935 init_waitqueue_head(&bitmap->write_wait); 1936 init_waitqueue_head(&bitmap->overflow_wait); 1937 init_waitqueue_head(&bitmap->behind_wait); 1938 1939 bitmap->mddev = mddev; 1940 bitmap->cluster_slot = slot; 1941 1942 if (mddev->kobj.sd) 1943 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); 1944 if (bm) { 1945 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); 1946 sysfs_put(bm); 1947 } else 1948 bitmap->sysfs_can_clear = NULL; 1949 1950 bitmap->storage.file = file; 1951 if (file) { 1952 get_file(file); 1953 /* As future accesses to this file will use bmap, 1954 * and bypass the page cache, we must sync the file 1955 * first. 1956 */ 1957 vfs_fsync(file, 1); 1958 } 1959 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ 1960 if (!mddev->bitmap_info.external) { 1961 /* 1962 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is 1963 * instructing us to create a new on-disk bitmap instance. 1964 */ 1965 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags)) 1966 err = md_bitmap_new_disk_sb(bitmap); 1967 else 1968 err = md_bitmap_read_sb(bitmap); 1969 } else { 1970 err = 0; 1971 if (mddev->bitmap_info.chunksize == 0 || 1972 mddev->bitmap_info.daemon_sleep == 0) 1973 /* chunksize and time_base need to be 1974 * set first. */ 1975 err = -EINVAL; 1976 } 1977 if (err) 1978 goto error; 1979 1980 bitmap->daemon_lastrun = jiffies; 1981 err = __bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1982 true); 1983 if (err) 1984 goto error; 1985 1986 pr_debug("created bitmap (%lu pages) for device %s\n", 1987 bitmap->counts.pages, bmname(bitmap)); 1988 1989 err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; 1990 if (err) 1991 goto error; 1992 1993 return bitmap; 1994 error: 1995 md_bitmap_free(bitmap); 1996 return ERR_PTR(err); 1997 } 1998 1999 static int bitmap_create(struct mddev *mddev, int slot) 2000 { 2001 struct bitmap *bitmap = __bitmap_create(mddev, slot); 2002 2003 if (IS_ERR(bitmap)) 2004 return PTR_ERR(bitmap); 2005 2006 mddev->bitmap = bitmap; 2007 return 0; 2008 } 2009 2010 static int bitmap_load(struct mddev *mddev) 2011 { 2012 int err = 0; 2013 sector_t start = 0; 2014 sector_t sector = 0; 2015 struct bitmap *bitmap = mddev->bitmap; 2016 struct md_rdev *rdev; 2017 2018 if (!bitmap) 2019 goto out; 2020 2021 rdev_for_each(rdev, mddev) 2022 mddev_create_serial_pool(mddev, rdev); 2023 2024 if (mddev_is_clustered(mddev)) 2025 md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes); 2026 2027 /* Clear out old bitmap info first: Either there is none, or we 2028 * are resuming after someone else has possibly changed things, 2029 * so we should forget old cached info. 2030 * All chunks should be clean, but some might need_sync. 2031 */ 2032 while (sector < mddev->resync_max_sectors) { 2033 sector_t blocks; 2034 bitmap_start_sync(mddev, sector, &blocks, false); 2035 sector += blocks; 2036 } 2037 bitmap_close_sync(mddev); 2038 2039 if (mddev->degraded == 0 2040 || bitmap->events_cleared == mddev->events) 2041 /* no need to keep dirty bits to optimise a 2042 * re-add of a missing device */ 2043 start = mddev->recovery_cp; 2044 2045 mutex_lock(&mddev->bitmap_info.mutex); 2046 err = md_bitmap_init_from_disk(bitmap, start); 2047 mutex_unlock(&mddev->bitmap_info.mutex); 2048 2049 if (err) 2050 goto out; 2051 clear_bit(BITMAP_STALE, &bitmap->flags); 2052 2053 /* Kick recovery in case any bits were set */ 2054 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); 2055 2056 mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); 2057 md_wakeup_thread(mddev->thread); 2058 2059 bitmap_update_sb(bitmap); 2060 2061 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 2062 err = -EIO; 2063 out: 2064 return err; 2065 } 2066 2067 /* caller need to free returned bitmap with md_bitmap_free() */ 2068 static struct bitmap *bitmap_get_from_slot(struct mddev *mddev, int slot) 2069 { 2070 int rv = 0; 2071 struct bitmap *bitmap; 2072 2073 bitmap = __bitmap_create(mddev, slot); 2074 if (IS_ERR(bitmap)) { 2075 rv = PTR_ERR(bitmap); 2076 return ERR_PTR(rv); 2077 } 2078 2079 rv = md_bitmap_init_from_disk(bitmap, 0); 2080 if (rv) { 2081 md_bitmap_free(bitmap); 2082 return ERR_PTR(rv); 2083 } 2084 2085 return bitmap; 2086 } 2087 2088 /* Loads the bitmap associated with slot and copies the resync information 2089 * to our bitmap 2090 */ 2091 static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low, 2092 sector_t *high, bool clear_bits) 2093 { 2094 int rv = 0, i, j; 2095 sector_t block, lo = 0, hi = 0; 2096 struct bitmap_counts *counts; 2097 struct bitmap *bitmap; 2098 2099 bitmap = bitmap_get_from_slot(mddev, slot); 2100 if (IS_ERR(bitmap)) { 2101 pr_err("%s can't get bitmap from slot %d\n", __func__, slot); 2102 return -1; 2103 } 2104 2105 counts = &bitmap->counts; 2106 for (j = 0; j < counts->chunks; j++) { 2107 block = (sector_t)j << counts->chunkshift; 2108 if (md_bitmap_file_test_bit(bitmap, block)) { 2109 if (!lo) 2110 lo = block; 2111 hi = block; 2112 md_bitmap_file_clear_bit(bitmap, block); 2113 md_bitmap_set_memory_bits(mddev->bitmap, block, 1); 2114 md_bitmap_file_set_bit(mddev->bitmap, block); 2115 } 2116 } 2117 2118 if (clear_bits) { 2119 bitmap_update_sb(bitmap); 2120 /* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs 2121 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */ 2122 for (i = 0; i < bitmap->storage.file_pages; i++) 2123 if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING)) 2124 set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); 2125 __bitmap_unplug(bitmap); 2126 } 2127 __bitmap_unplug(mddev->bitmap); 2128 *low = lo; 2129 *high = hi; 2130 md_bitmap_free(bitmap); 2131 2132 return rv; 2133 } 2134 2135 static void bitmap_set_pages(struct bitmap *bitmap, unsigned long pages) 2136 { 2137 bitmap->counts.pages = pages; 2138 } 2139 2140 static int bitmap_get_stats(struct bitmap *bitmap, struct md_bitmap_stats *stats) 2141 { 2142 struct bitmap_storage *storage; 2143 struct bitmap_counts *counts; 2144 bitmap_super_t *sb; 2145 2146 if (!bitmap) 2147 return -ENOENT; 2148 2149 sb = kmap_local_page(bitmap->storage.sb_page); 2150 stats->sync_size = le64_to_cpu(sb->sync_size); 2151 kunmap_local(sb); 2152 2153 counts = &bitmap->counts; 2154 stats->missing_pages = counts->missing_pages; 2155 stats->pages = counts->pages; 2156 2157 storage = &bitmap->storage; 2158 stats->file_pages = storage->file_pages; 2159 stats->file = storage->file; 2160 2161 stats->behind_writes = atomic_read(&bitmap->behind_writes); 2162 stats->behind_wait = wq_has_sleeper(&bitmap->behind_wait); 2163 stats->events_cleared = bitmap->events_cleared; 2164 return 0; 2165 } 2166 2167 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, 2168 int chunksize, bool init) 2169 { 2170 /* If chunk_size is 0, choose an appropriate chunk size. 2171 * Then possibly allocate new storage space. 2172 * Then quiesce, copy bits, replace bitmap, and re-start 2173 * 2174 * This function is called both to set up the initial bitmap 2175 * and to resize the bitmap while the array is active. 2176 * If this happens as a result of the array being resized, 2177 * chunksize will be zero, and we need to choose a suitable 2178 * chunksize, otherwise we use what we are given. 2179 */ 2180 struct bitmap_storage store; 2181 struct bitmap_counts old_counts; 2182 unsigned long chunks; 2183 sector_t block; 2184 sector_t old_blocks, new_blocks; 2185 int chunkshift; 2186 int ret = 0; 2187 long pages; 2188 struct bitmap_page *new_bp; 2189 2190 if (bitmap->storage.file && !init) { 2191 pr_info("md: cannot resize file-based bitmap\n"); 2192 return -EINVAL; 2193 } 2194 2195 if (chunksize == 0) { 2196 /* If there is enough space, leave the chunk size unchanged, 2197 * else increase by factor of two until there is enough space. 2198 */ 2199 long bytes; 2200 long space = bitmap->mddev->bitmap_info.space; 2201 2202 if (space == 0) { 2203 /* We don't know how much space there is, so limit 2204 * to current size - in sectors. 2205 */ 2206 bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8); 2207 if (!bitmap->mddev->bitmap_info.external) 2208 bytes += sizeof(bitmap_super_t); 2209 space = DIV_ROUND_UP(bytes, 512); 2210 bitmap->mddev->bitmap_info.space = space; 2211 } 2212 chunkshift = bitmap->counts.chunkshift; 2213 chunkshift--; 2214 do { 2215 /* 'chunkshift' is shift from block size to chunk size */ 2216 chunkshift++; 2217 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2218 bytes = DIV_ROUND_UP(chunks, 8); 2219 if (!bitmap->mddev->bitmap_info.external) 2220 bytes += sizeof(bitmap_super_t); 2221 } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) < 2222 (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1)); 2223 } else 2224 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; 2225 2226 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2227 memset(&store, 0, sizeof(store)); 2228 if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) 2229 ret = md_bitmap_storage_alloc(&store, chunks, 2230 !bitmap->mddev->bitmap_info.external, 2231 mddev_is_clustered(bitmap->mddev) 2232 ? bitmap->cluster_slot : 0); 2233 if (ret) { 2234 md_bitmap_file_unmap(&store); 2235 goto err; 2236 } 2237 2238 pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO); 2239 2240 new_bp = kcalloc(pages, sizeof(*new_bp), GFP_KERNEL); 2241 ret = -ENOMEM; 2242 if (!new_bp) { 2243 md_bitmap_file_unmap(&store); 2244 goto err; 2245 } 2246 2247 if (!init) 2248 bitmap->mddev->pers->quiesce(bitmap->mddev, 1); 2249 2250 store.file = bitmap->storage.file; 2251 bitmap->storage.file = NULL; 2252 2253 if (store.sb_page && bitmap->storage.sb_page) 2254 memcpy(page_address(store.sb_page), 2255 page_address(bitmap->storage.sb_page), 2256 sizeof(bitmap_super_t)); 2257 spin_lock_irq(&bitmap->counts.lock); 2258 md_bitmap_file_unmap(&bitmap->storage); 2259 bitmap->storage = store; 2260 2261 old_counts = bitmap->counts; 2262 bitmap->counts.bp = new_bp; 2263 bitmap->counts.pages = pages; 2264 bitmap->counts.missing_pages = pages; 2265 bitmap->counts.chunkshift = chunkshift; 2266 bitmap->counts.chunks = chunks; 2267 bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift + 2268 BITMAP_BLOCK_SHIFT); 2269 2270 blocks = min(old_counts.chunks << old_counts.chunkshift, 2271 chunks << chunkshift); 2272 2273 /* For cluster raid, need to pre-allocate bitmap */ 2274 if (mddev_is_clustered(bitmap->mddev)) { 2275 unsigned long page; 2276 for (page = 0; page < pages; page++) { 2277 ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1); 2278 if (ret) { 2279 unsigned long k; 2280 2281 /* deallocate the page memory */ 2282 for (k = 0; k < page; k++) { 2283 kfree(new_bp[k].map); 2284 } 2285 kfree(new_bp); 2286 2287 /* restore some fields from old_counts */ 2288 bitmap->counts.bp = old_counts.bp; 2289 bitmap->counts.pages = old_counts.pages; 2290 bitmap->counts.missing_pages = old_counts.pages; 2291 bitmap->counts.chunkshift = old_counts.chunkshift; 2292 bitmap->counts.chunks = old_counts.chunks; 2293 bitmap->mddev->bitmap_info.chunksize = 2294 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); 2295 blocks = old_counts.chunks << old_counts.chunkshift; 2296 pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); 2297 break; 2298 } else 2299 bitmap->counts.bp[page].count += 1; 2300 } 2301 } 2302 2303 for (block = 0; block < blocks; ) { 2304 bitmap_counter_t *bmc_old, *bmc_new; 2305 int set; 2306 2307 bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0); 2308 set = bmc_old && NEEDED(*bmc_old); 2309 2310 if (set) { 2311 bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); 2312 if (bmc_new) { 2313 if (*bmc_new == 0) { 2314 /* need to set on-disk bits too. */ 2315 sector_t end = block + new_blocks; 2316 sector_t start = block >> chunkshift; 2317 2318 start <<= chunkshift; 2319 while (start < end) { 2320 md_bitmap_file_set_bit(bitmap, block); 2321 start += 1 << chunkshift; 2322 } 2323 *bmc_new = 2; 2324 md_bitmap_count_page(&bitmap->counts, block, 1); 2325 md_bitmap_set_pending(&bitmap->counts, block); 2326 } 2327 *bmc_new |= NEEDED_MASK; 2328 } 2329 if (new_blocks < old_blocks) 2330 old_blocks = new_blocks; 2331 } 2332 block += old_blocks; 2333 } 2334 2335 if (bitmap->counts.bp != old_counts.bp) { 2336 unsigned long k; 2337 for (k = 0; k < old_counts.pages; k++) 2338 if (!old_counts.bp[k].hijacked) 2339 kfree(old_counts.bp[k].map); 2340 kfree(old_counts.bp); 2341 } 2342 2343 if (!init) { 2344 int i; 2345 while (block < (chunks << chunkshift)) { 2346 bitmap_counter_t *bmc; 2347 bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); 2348 if (bmc) { 2349 /* new space. It needs to be resynced, so 2350 * we set NEEDED_MASK. 2351 */ 2352 if (*bmc == 0) { 2353 *bmc = NEEDED_MASK | 2; 2354 md_bitmap_count_page(&bitmap->counts, block, 1); 2355 md_bitmap_set_pending(&bitmap->counts, block); 2356 } 2357 } 2358 block += new_blocks; 2359 } 2360 for (i = 0; i < bitmap->storage.file_pages; i++) 2361 set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 2362 } 2363 spin_unlock_irq(&bitmap->counts.lock); 2364 2365 if (!init) { 2366 __bitmap_unplug(bitmap); 2367 bitmap->mddev->pers->quiesce(bitmap->mddev, 0); 2368 } 2369 ret = 0; 2370 err: 2371 return ret; 2372 } 2373 2374 static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize, 2375 bool init) 2376 { 2377 struct bitmap *bitmap = mddev->bitmap; 2378 2379 if (!bitmap) 2380 return 0; 2381 2382 return __bitmap_resize(bitmap, blocks, chunksize, init); 2383 } 2384 2385 static ssize_t 2386 location_show(struct mddev *mddev, char *page) 2387 { 2388 ssize_t len; 2389 if (mddev->bitmap_info.file) 2390 len = sprintf(page, "file"); 2391 else if (mddev->bitmap_info.offset) 2392 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); 2393 else 2394 len = sprintf(page, "none"); 2395 len += sprintf(page+len, "\n"); 2396 return len; 2397 } 2398 2399 static ssize_t 2400 location_store(struct mddev *mddev, const char *buf, size_t len) 2401 { 2402 int rv; 2403 2404 rv = mddev_suspend_and_lock(mddev); 2405 if (rv) 2406 return rv; 2407 2408 if (mddev->pers) { 2409 if (mddev->recovery || mddev->sync_thread) { 2410 rv = -EBUSY; 2411 goto out; 2412 } 2413 } 2414 2415 if (mddev->bitmap || mddev->bitmap_info.file || 2416 mddev->bitmap_info.offset) { 2417 /* bitmap already configured. Only option is to clear it */ 2418 if (strncmp(buf, "none", 4) != 0) { 2419 rv = -EBUSY; 2420 goto out; 2421 } 2422 2423 bitmap_destroy(mddev); 2424 mddev->bitmap_info.offset = 0; 2425 if (mddev->bitmap_info.file) { 2426 struct file *f = mddev->bitmap_info.file; 2427 mddev->bitmap_info.file = NULL; 2428 fput(f); 2429 } 2430 } else { 2431 /* No bitmap, OK to set a location */ 2432 long long offset; 2433 2434 if (strncmp(buf, "none", 4) == 0) 2435 /* nothing to be done */; 2436 else if (strncmp(buf, "file:", 5) == 0) { 2437 /* Not supported yet */ 2438 rv = -EINVAL; 2439 goto out; 2440 } else { 2441 if (buf[0] == '+') 2442 rv = kstrtoll(buf+1, 10, &offset); 2443 else 2444 rv = kstrtoll(buf, 10, &offset); 2445 if (rv) 2446 goto out; 2447 if (offset == 0) { 2448 rv = -EINVAL; 2449 goto out; 2450 } 2451 if (mddev->bitmap_info.external == 0 && 2452 mddev->major_version == 0 && 2453 offset != mddev->bitmap_info.default_offset) { 2454 rv = -EINVAL; 2455 goto out; 2456 } 2457 2458 mddev->bitmap_info.offset = offset; 2459 rv = bitmap_create(mddev, -1); 2460 if (rv) 2461 goto out; 2462 2463 rv = bitmap_load(mddev); 2464 if (rv) { 2465 mddev->bitmap_info.offset = 0; 2466 bitmap_destroy(mddev); 2467 goto out; 2468 } 2469 } 2470 } 2471 if (!mddev->external) { 2472 /* Ensure new bitmap info is stored in 2473 * metadata promptly. 2474 */ 2475 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 2476 md_wakeup_thread(mddev->thread); 2477 } 2478 rv = 0; 2479 out: 2480 mddev_unlock_and_resume(mddev); 2481 if (rv) 2482 return rv; 2483 return len; 2484 } 2485 2486 static struct md_sysfs_entry bitmap_location = 2487 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store); 2488 2489 /* 'bitmap/space' is the space available at 'location' for the 2490 * bitmap. This allows the kernel to know when it is safe to 2491 * resize the bitmap to match a resized array. 2492 */ 2493 static ssize_t 2494 space_show(struct mddev *mddev, char *page) 2495 { 2496 return sprintf(page, "%lu\n", mddev->bitmap_info.space); 2497 } 2498 2499 static ssize_t 2500 space_store(struct mddev *mddev, const char *buf, size_t len) 2501 { 2502 unsigned long sectors; 2503 int rv; 2504 2505 rv = kstrtoul(buf, 10, §ors); 2506 if (rv) 2507 return rv; 2508 2509 if (sectors == 0) 2510 return -EINVAL; 2511 2512 if (mddev->bitmap && 2513 sectors < (mddev->bitmap->storage.bytes + 511) >> 9) 2514 return -EFBIG; /* Bitmap is too big for this small space */ 2515 2516 /* could make sure it isn't too big, but that isn't really 2517 * needed - user-space should be careful. 2518 */ 2519 mddev->bitmap_info.space = sectors; 2520 return len; 2521 } 2522 2523 static struct md_sysfs_entry bitmap_space = 2524 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store); 2525 2526 static ssize_t 2527 timeout_show(struct mddev *mddev, char *page) 2528 { 2529 ssize_t len; 2530 unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; 2531 unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; 2532 2533 len = sprintf(page, "%lu", secs); 2534 if (jifs) 2535 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); 2536 len += sprintf(page+len, "\n"); 2537 return len; 2538 } 2539 2540 static ssize_t 2541 timeout_store(struct mddev *mddev, const char *buf, size_t len) 2542 { 2543 /* timeout can be set at any time */ 2544 unsigned long timeout; 2545 int rv = strict_strtoul_scaled(buf, &timeout, 4); 2546 if (rv) 2547 return rv; 2548 2549 /* just to make sure we don't overflow... */ 2550 if (timeout >= LONG_MAX / HZ) 2551 return -EINVAL; 2552 2553 timeout = timeout * HZ / 10000; 2554 2555 if (timeout >= MAX_SCHEDULE_TIMEOUT) 2556 timeout = MAX_SCHEDULE_TIMEOUT-1; 2557 if (timeout < 1) 2558 timeout = 1; 2559 2560 mddev->bitmap_info.daemon_sleep = timeout; 2561 mddev_set_timeout(mddev, timeout, false); 2562 md_wakeup_thread(mddev->thread); 2563 2564 return len; 2565 } 2566 2567 static struct md_sysfs_entry bitmap_timeout = 2568 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store); 2569 2570 static ssize_t 2571 backlog_show(struct mddev *mddev, char *page) 2572 { 2573 return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind); 2574 } 2575 2576 static ssize_t 2577 backlog_store(struct mddev *mddev, const char *buf, size_t len) 2578 { 2579 unsigned long backlog; 2580 unsigned long old_mwb = mddev->bitmap_info.max_write_behind; 2581 struct md_rdev *rdev; 2582 bool has_write_mostly = false; 2583 int rv = kstrtoul(buf, 10, &backlog); 2584 if (rv) 2585 return rv; 2586 if (backlog > COUNTER_MAX) 2587 return -EINVAL; 2588 2589 rv = mddev_suspend_and_lock(mddev); 2590 if (rv) 2591 return rv; 2592 2593 /* 2594 * Without write mostly device, it doesn't make sense to set 2595 * backlog for max_write_behind. 2596 */ 2597 rdev_for_each(rdev, mddev) { 2598 if (test_bit(WriteMostly, &rdev->flags)) { 2599 has_write_mostly = true; 2600 break; 2601 } 2602 } 2603 if (!has_write_mostly) { 2604 pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n", 2605 mdname(mddev)); 2606 mddev_unlock(mddev); 2607 return -EINVAL; 2608 } 2609 2610 mddev->bitmap_info.max_write_behind = backlog; 2611 if (!backlog && mddev->serial_info_pool) { 2612 /* serial_info_pool is not needed if backlog is zero */ 2613 if (!mddev->serialize_policy) 2614 mddev_destroy_serial_pool(mddev, NULL); 2615 } else if (backlog && !mddev->serial_info_pool) { 2616 /* serial_info_pool is needed since backlog is not zero */ 2617 rdev_for_each(rdev, mddev) 2618 mddev_create_serial_pool(mddev, rdev); 2619 } 2620 if (old_mwb != backlog) 2621 bitmap_update_sb(mddev->bitmap); 2622 2623 mddev_unlock_and_resume(mddev); 2624 return len; 2625 } 2626 2627 static struct md_sysfs_entry bitmap_backlog = 2628 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store); 2629 2630 static ssize_t 2631 chunksize_show(struct mddev *mddev, char *page) 2632 { 2633 return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize); 2634 } 2635 2636 static ssize_t 2637 chunksize_store(struct mddev *mddev, const char *buf, size_t len) 2638 { 2639 /* Can only be changed when no bitmap is active */ 2640 int rv; 2641 unsigned long csize; 2642 if (mddev->bitmap) 2643 return -EBUSY; 2644 rv = kstrtoul(buf, 10, &csize); 2645 if (rv) 2646 return rv; 2647 if (csize < 512 || 2648 !is_power_of_2(csize)) 2649 return -EINVAL; 2650 if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE * 2651 sizeof(((bitmap_super_t *)0)->chunksize)))) 2652 return -EOVERFLOW; 2653 mddev->bitmap_info.chunksize = csize; 2654 return len; 2655 } 2656 2657 static struct md_sysfs_entry bitmap_chunksize = 2658 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); 2659 2660 static ssize_t metadata_show(struct mddev *mddev, char *page) 2661 { 2662 if (mddev_is_clustered(mddev)) 2663 return sprintf(page, "clustered\n"); 2664 return sprintf(page, "%s\n", (mddev->bitmap_info.external 2665 ? "external" : "internal")); 2666 } 2667 2668 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len) 2669 { 2670 if (mddev->bitmap || 2671 mddev->bitmap_info.file || 2672 mddev->bitmap_info.offset) 2673 return -EBUSY; 2674 if (strncmp(buf, "external", 8) == 0) 2675 mddev->bitmap_info.external = 1; 2676 else if ((strncmp(buf, "internal", 8) == 0) || 2677 (strncmp(buf, "clustered", 9) == 0)) 2678 mddev->bitmap_info.external = 0; 2679 else 2680 return -EINVAL; 2681 return len; 2682 } 2683 2684 static struct md_sysfs_entry bitmap_metadata = 2685 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); 2686 2687 static ssize_t can_clear_show(struct mddev *mddev, char *page) 2688 { 2689 int len; 2690 spin_lock(&mddev->lock); 2691 if (mddev->bitmap) 2692 len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ? 2693 "false" : "true")); 2694 else 2695 len = sprintf(page, "\n"); 2696 spin_unlock(&mddev->lock); 2697 return len; 2698 } 2699 2700 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len) 2701 { 2702 if (mddev->bitmap == NULL) 2703 return -ENOENT; 2704 if (strncmp(buf, "false", 5) == 0) 2705 mddev->bitmap->need_sync = 1; 2706 else if (strncmp(buf, "true", 4) == 0) { 2707 if (mddev->degraded) 2708 return -EBUSY; 2709 mddev->bitmap->need_sync = 0; 2710 } else 2711 return -EINVAL; 2712 return len; 2713 } 2714 2715 static struct md_sysfs_entry bitmap_can_clear = 2716 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store); 2717 2718 static ssize_t 2719 behind_writes_used_show(struct mddev *mddev, char *page) 2720 { 2721 ssize_t ret; 2722 spin_lock(&mddev->lock); 2723 if (mddev->bitmap == NULL) 2724 ret = sprintf(page, "0\n"); 2725 else 2726 ret = sprintf(page, "%lu\n", 2727 mddev->bitmap->behind_writes_used); 2728 spin_unlock(&mddev->lock); 2729 return ret; 2730 } 2731 2732 static ssize_t 2733 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len) 2734 { 2735 if (mddev->bitmap) 2736 mddev->bitmap->behind_writes_used = 0; 2737 return len; 2738 } 2739 2740 static struct md_sysfs_entry max_backlog_used = 2741 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR, 2742 behind_writes_used_show, behind_writes_used_reset); 2743 2744 static struct attribute *md_bitmap_attrs[] = { 2745 &bitmap_location.attr, 2746 &bitmap_space.attr, 2747 &bitmap_timeout.attr, 2748 &bitmap_backlog.attr, 2749 &bitmap_chunksize.attr, 2750 &bitmap_metadata.attr, 2751 &bitmap_can_clear.attr, 2752 &max_backlog_used.attr, 2753 NULL 2754 }; 2755 const struct attribute_group md_bitmap_group = { 2756 .name = "bitmap", 2757 .attrs = md_bitmap_attrs, 2758 }; 2759 2760 static struct bitmap_operations bitmap_ops = { 2761 .create = bitmap_create, 2762 .resize = bitmap_resize, 2763 .load = bitmap_load, 2764 .destroy = bitmap_destroy, 2765 .flush = bitmap_flush, 2766 .write_all = bitmap_write_all, 2767 .dirty_bits = bitmap_dirty_bits, 2768 .unplug = bitmap_unplug, 2769 .daemon_work = bitmap_daemon_work, 2770 2771 .startwrite = bitmap_startwrite, 2772 .endwrite = bitmap_endwrite, 2773 .start_sync = bitmap_start_sync, 2774 .end_sync = bitmap_end_sync, 2775 .cond_end_sync = bitmap_cond_end_sync, 2776 .close_sync = bitmap_close_sync, 2777 2778 .update_sb = bitmap_update_sb, 2779 .get_stats = bitmap_get_stats, 2780 2781 .sync_with_cluster = bitmap_sync_with_cluster, 2782 .get_from_slot = bitmap_get_from_slot, 2783 .copy_from_slot = bitmap_copy_from_slot, 2784 .set_pages = bitmap_set_pages, 2785 }; 2786 2787 void mddev_set_bitmap_ops(struct mddev *mddev) 2788 { 2789 mddev->bitmap_ops = &bitmap_ops; 2790 } 2791