xref: /linux/drivers/md/md-bitmap.c (revision c8ed3a15a749246ddfedb84aab9cf0316c7b9b8a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
4  *
5  * bitmap_create  - sets up the bitmap structure
6  * bitmap_destroy - destroys the bitmap structure
7  *
8  * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
9  * - added disk storage for bitmap
10  * - changes to allow various bitmap chunk sizes
11  */
12 
13 /*
14  * Still to do:
15  *
16  * flush after percent set rather than just time based. (maybe both).
17  */
18 
19 #include <linux/blkdev.h>
20 #include <linux/module.h>
21 #include <linux/errno.h>
22 #include <linux/slab.h>
23 #include <linux/init.h>
24 #include <linux/timer.h>
25 #include <linux/sched.h>
26 #include <linux/list.h>
27 #include <linux/file.h>
28 #include <linux/mount.h>
29 #include <linux/buffer_head.h>
30 #include <linux/seq_file.h>
31 #include <trace/events/block.h>
32 
33 #include "md.h"
34 #include "md-bitmap.h"
35 #include "md-cluster.h"
36 
37 /*
38  * in-memory bitmap:
39  *
40  * Use 16 bit block counters to track pending writes to each "chunk".
41  * The 2 high order bits are special-purpose, the first is a flag indicating
42  * whether a resync is needed.  The second is a flag indicating whether a
43  * resync is active.
44  * This means that the counter is actually 14 bits:
45  *
46  * +--------+--------+------------------------------------------------+
47  * | resync | resync |               counter                          |
48  * | needed | active |                                                |
49  * |  (0-1) |  (0-1) |              (0-16383)                         |
50  * +--------+--------+------------------------------------------------+
51  *
52  * The "resync needed" bit is set when:
53  *    a '1' bit is read from storage at startup.
54  *    a write request fails on some drives
55  *    a resync is aborted on a chunk with 'resync active' set
56  * It is cleared (and resync-active set) when a resync starts across all drives
57  * of the chunk.
58  *
59  *
60  * The "resync active" bit is set when:
61  *    a resync is started on all drives, and resync_needed is set.
62  *       resync_needed will be cleared (as long as resync_active wasn't already set).
63  * It is cleared when a resync completes.
64  *
65  * The counter counts pending write requests, plus the on-disk bit.
66  * When the counter is '1' and the resync bits are clear, the on-disk
67  * bit can be cleared as well, thus setting the counter to 0.
68  * When we set a bit, or in the counter (to start a write), if the fields is
69  * 0, we first set the disk bit and set the counter to 1.
70  *
71  * If the counter is 0, the on-disk bit is clear and the stripe is clean
72  * Anything that dirties the stripe pushes the counter to 2 (at least)
73  * and sets the on-disk bit (lazily).
74  * If a periodic sweep find the counter at 2, it is decremented to 1.
75  * If the sweep find the counter at 1, the on-disk bit is cleared and the
76  * counter goes to zero.
77  *
78  * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
79  * counters as a fallback when "page" memory cannot be allocated:
80  *
81  * Normal case (page memory allocated):
82  *
83  *     page pointer (32-bit)
84  *
85  *     [ ] ------+
86  *               |
87  *               +-------> [   ][   ]..[   ] (4096 byte page == 2048 counters)
88  *                          c1   c2    c2048
89  *
90  * Hijacked case (page memory allocation failed):
91  *
92  *     hijacked page pointer (32-bit)
93  *
94  *     [		  ][		  ] (no page memory allocated)
95  *      counter #1 (16-bit) counter #2 (16-bit)
96  *
97  */
98 
99 typedef __u16 bitmap_counter_t;
100 
101 #define PAGE_BITS (PAGE_SIZE << 3)
102 #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
103 
104 #define COUNTER_BITS 16
105 #define COUNTER_BIT_SHIFT 4
106 #define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
107 
108 #define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
109 #define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
110 #define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
111 
112 #define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
113 #define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
114 #define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
115 
116 /* how many counters per page? */
117 #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
118 /* same, except a shift value for more efficient bitops */
119 #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
120 /* same, except a mask value for more efficient bitops */
121 #define PAGE_COUNTER_MASK  (PAGE_COUNTER_RATIO - 1)
122 
123 #define BITMAP_BLOCK_SHIFT 9
124 
125 /*
126  * bitmap structures:
127  */
128 
129 /* the in-memory bitmap is represented by bitmap_pages */
130 struct bitmap_page {
131 	/*
132 	 * map points to the actual memory page
133 	 */
134 	char *map;
135 	/*
136 	 * in emergencies (when map cannot be alloced), hijack the map
137 	 * pointer and use it as two counters itself
138 	 */
139 	unsigned int hijacked:1;
140 	/*
141 	 * If any counter in this page is '1' or '2' - and so could be
142 	 * cleared then that page is marked as 'pending'
143 	 */
144 	unsigned int pending:1;
145 	/*
146 	 * count of dirty bits on the page
147 	 */
148 	unsigned int  count:30;
149 };
150 
151 /* the main bitmap structure - one per mddev */
152 struct bitmap {
153 
154 	struct bitmap_counts {
155 		spinlock_t lock;
156 		struct bitmap_page *bp;
157 		/* total number of pages in the bitmap */
158 		unsigned long pages;
159 		/* number of pages not yet allocated */
160 		unsigned long missing_pages;
161 		/* chunksize = 2^chunkshift (for bitops) */
162 		unsigned long chunkshift;
163 		/* total number of data chunks for the array */
164 		unsigned long chunks;
165 	} counts;
166 
167 	struct mddev *mddev; /* the md device that the bitmap is for */
168 
169 	__u64	events_cleared;
170 	int need_sync;
171 
172 	struct bitmap_storage {
173 		/* backing disk file */
174 		struct file *file;
175 		/* cached copy of the bitmap file superblock */
176 		struct page *sb_page;
177 		unsigned long sb_index;
178 		/* list of cache pages for the file */
179 		struct page **filemap;
180 		/* attributes associated filemap pages */
181 		unsigned long *filemap_attr;
182 		/* number of pages in the file */
183 		unsigned long file_pages;
184 		/* total bytes in the bitmap */
185 		unsigned long bytes;
186 	} storage;
187 
188 	unsigned long flags;
189 
190 	int allclean;
191 
192 	atomic_t behind_writes;
193 	/* highest actual value at runtime */
194 	unsigned long behind_writes_used;
195 
196 	/*
197 	 * the bitmap daemon - periodically wakes up and sweeps the bitmap
198 	 * file, cleaning up bits and flushing out pages to disk as necessary
199 	 */
200 	unsigned long daemon_lastrun; /* jiffies of last run */
201 	/*
202 	 * when we lasted called end_sync to update bitmap with resync
203 	 * progress.
204 	 */
205 	unsigned long last_end_sync;
206 
207 	/* pending writes to the bitmap file */
208 	atomic_t pending_writes;
209 	wait_queue_head_t write_wait;
210 	wait_queue_head_t overflow_wait;
211 	wait_queue_head_t behind_wait;
212 
213 	struct kernfs_node *sysfs_can_clear;
214 	/* slot offset for clustered env */
215 	int cluster_slot;
216 };
217 
218 static struct workqueue_struct *md_bitmap_wq;
219 static struct attribute_group md_bitmap_internal_group;
220 
221 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks,
222 			   int chunksize, bool init);
223 
224 static inline char *bmname(struct bitmap *bitmap)
225 {
226 	return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
227 }
228 
229 static bool bitmap_enabled(void *data, bool flush)
230 {
231 	struct bitmap *bitmap = data;
232 
233 	if (!flush)
234 		return true;
235 
236 	/*
237 	 * If caller want to flush bitmap pages to underlying disks, check if
238 	 * there are cached pages in filemap.
239 	 */
240 	return !test_bit(BITMAP_STALE, &bitmap->flags) &&
241 	       bitmap->storage.filemap != NULL;
242 }
243 
244 /*
245  * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
246  *
247  * 1) check to see if this page is allocated, if it's not then try to alloc
248  * 2) if the alloc fails, set the page's hijacked flag so we'll use the
249  *    page pointer directly as a counter
250  *
251  * if we find our page, we increment the page's refcount so that it stays
252  * allocated while we're using it
253  */
254 static int md_bitmap_checkpage(struct bitmap_counts *bitmap,
255 			       unsigned long page, int create, int no_hijack)
256 __releases(bitmap->lock)
257 __acquires(bitmap->lock)
258 {
259 	unsigned char *mappage;
260 
261 	WARN_ON_ONCE(page >= bitmap->pages);
262 	if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
263 		return 0;
264 
265 	if (bitmap->bp[page].map) /* page is already allocated, just return */
266 		return 0;
267 
268 	if (!create)
269 		return -ENOENT;
270 
271 	/* this page has not been allocated yet */
272 
273 	spin_unlock_irq(&bitmap->lock);
274 	/* It is possible that this is being called inside a
275 	 * prepare_to_wait/finish_wait loop from raid5c:make_request().
276 	 * In general it is not permitted to sleep in that context as it
277 	 * can cause the loop to spin freely.
278 	 * That doesn't apply here as we can only reach this point
279 	 * once with any loop.
280 	 * When this function completes, either bp[page].map or
281 	 * bp[page].hijacked.  In either case, this function will
282 	 * abort before getting to this point again.  So there is
283 	 * no risk of a free-spin, and so it is safe to assert
284 	 * that sleeping here is allowed.
285 	 */
286 	sched_annotate_sleep();
287 	mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
288 	spin_lock_irq(&bitmap->lock);
289 
290 	if (mappage == NULL) {
291 		pr_debug("md/bitmap: map page allocation failed, hijacking\n");
292 		/* We don't support hijack for cluster raid */
293 		if (no_hijack)
294 			return -ENOMEM;
295 		/* failed - set the hijacked flag so that we can use the
296 		 * pointer as a counter */
297 		if (!bitmap->bp[page].map)
298 			bitmap->bp[page].hijacked = 1;
299 	} else if (bitmap->bp[page].map ||
300 		   bitmap->bp[page].hijacked) {
301 		/* somebody beat us to getting the page */
302 		kfree(mappage);
303 	} else {
304 
305 		/* no page was in place and we have one, so install it */
306 
307 		bitmap->bp[page].map = mappage;
308 		bitmap->missing_pages--;
309 	}
310 	return 0;
311 }
312 
313 /* if page is completely empty, put it back on the free list, or dealloc it */
314 /* if page was hijacked, unmark the flag so it might get alloced next time */
315 /* Note: lock should be held when calling this */
316 static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
317 {
318 	char *ptr;
319 
320 	if (bitmap->bp[page].count) /* page is still busy */
321 		return;
322 
323 	/* page is no longer in use, it can be released */
324 
325 	if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
326 		bitmap->bp[page].hijacked = 0;
327 		bitmap->bp[page].map = NULL;
328 	} else {
329 		/* normal case, free the page */
330 		ptr = bitmap->bp[page].map;
331 		bitmap->bp[page].map = NULL;
332 		bitmap->missing_pages++;
333 		kfree(ptr);
334 	}
335 }
336 
337 /*
338  * bitmap file handling - read and write the bitmap file and its superblock
339  */
340 
341 /*
342  * basic page I/O operations
343  */
344 
345 /* IO operations when bitmap is stored near all superblocks */
346 
347 /* choose a good rdev and read the page from there */
348 static int read_sb_page(struct mddev *mddev, loff_t offset,
349 		struct page *page, unsigned long index, int size)
350 {
351 
352 	sector_t sector = mddev->bitmap_info.offset + offset +
353 		index * (PAGE_SIZE / SECTOR_SIZE);
354 	struct md_rdev *rdev;
355 
356 	rdev_for_each(rdev, mddev) {
357 		u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev));
358 
359 		if (!test_bit(In_sync, &rdev->flags) ||
360 		    test_bit(Faulty, &rdev->flags) ||
361 		    test_bit(Bitmap_sync, &rdev->flags))
362 			continue;
363 
364 		if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true))
365 			return 0;
366 	}
367 	return -EIO;
368 }
369 
370 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
371 {
372 	/* Iterate the disks of an mddev, using rcu to protect access to the
373 	 * linked list, and raising the refcount of devices we return to ensure
374 	 * they don't disappear while in use.
375 	 * As devices are only added or removed when raid_disk is < 0 and
376 	 * nr_pending is 0 and In_sync is clear, the entries we return will
377 	 * still be in the same position on the list when we re-enter
378 	 * list_for_each_entry_continue_rcu.
379 	 *
380 	 * Note that if entered with 'rdev == NULL' to start at the
381 	 * beginning, we temporarily assign 'rdev' to an address which
382 	 * isn't really an rdev, but which can be used by
383 	 * list_for_each_entry_continue_rcu() to find the first entry.
384 	 */
385 	rcu_read_lock();
386 	if (rdev == NULL)
387 		/* start at the beginning */
388 		rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
389 	else {
390 		/* release the previous rdev and start from there. */
391 		rdev_dec_pending(rdev, mddev);
392 	}
393 	list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
394 		if (rdev->raid_disk >= 0 &&
395 		    !test_bit(Faulty, &rdev->flags)) {
396 			/* this is a usable devices */
397 			atomic_inc(&rdev->nr_pending);
398 			rcu_read_unlock();
399 			return rdev;
400 		}
401 	}
402 	rcu_read_unlock();
403 	return NULL;
404 }
405 
406 static unsigned int optimal_io_size(struct block_device *bdev,
407 				    unsigned int last_page_size,
408 				    unsigned int io_size)
409 {
410 	if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev))
411 		return roundup(last_page_size, bdev_io_opt(bdev));
412 	return io_size;
413 }
414 
415 static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size,
416 				   loff_t start, loff_t boundary)
417 {
418 	if (io_size != opt_size &&
419 	    start + opt_size / SECTOR_SIZE <= boundary)
420 		return opt_size;
421 	if (start + io_size / SECTOR_SIZE <= boundary)
422 		return io_size;
423 
424 	/* Overflows boundary */
425 	return 0;
426 }
427 
428 static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
429 			   unsigned long pg_index, struct page *page)
430 {
431 	struct block_device *bdev;
432 	struct mddev *mddev = bitmap->mddev;
433 	struct bitmap_storage *store = &bitmap->storage;
434 	unsigned long num_pages = bitmap->storage.file_pages;
435 	unsigned int bitmap_limit = (num_pages - pg_index % num_pages) << PAGE_SHIFT;
436 	loff_t sboff, offset = mddev->bitmap_info.offset;
437 	sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
438 	unsigned int size = PAGE_SIZE;
439 	unsigned int opt_size = PAGE_SIZE;
440 	sector_t doff;
441 
442 	bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
443 	/* we compare length (page numbers), not page offset. */
444 	if ((pg_index - store->sb_index) == num_pages - 1) {
445 		unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
446 
447 		if (last_page_size == 0)
448 			last_page_size = PAGE_SIZE;
449 		size = roundup(last_page_size, bdev_logical_block_size(bdev));
450 		opt_size = optimal_io_size(bdev, last_page_size, size);
451 	}
452 
453 	sboff = rdev->sb_start + offset;
454 	doff = rdev->data_offset;
455 
456 	/* Just make sure we aren't corrupting data or metadata */
457 	if (mddev->external) {
458 		/* Bitmap could be anywhere. */
459 		if (sboff + ps > doff &&
460 		    sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE))
461 			return -EINVAL;
462 	} else if (offset < 0) {
463 		/* DATA  BITMAP METADATA  */
464 		size = bitmap_io_size(size, opt_size, offset + ps, 0);
465 		if (size == 0)
466 			/* bitmap runs in to metadata */
467 			return -EINVAL;
468 
469 		if (doff + mddev->dev_sectors > sboff)
470 			/* data runs in to bitmap */
471 			return -EINVAL;
472 	} else if (rdev->sb_start < rdev->data_offset) {
473 		/* METADATA BITMAP DATA */
474 		size = bitmap_io_size(size, opt_size, sboff + ps, doff);
475 		if (size == 0)
476 			/* bitmap runs in to data */
477 			return -EINVAL;
478 	}
479 
480 	md_write_metadata(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit),
481 			  page, 0);
482 	return 0;
483 }
484 
485 static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index,
486 			  struct page *page, bool wait)
487 {
488 	struct mddev *mddev = bitmap->mddev;
489 
490 	do {
491 		struct md_rdev *rdev = NULL;
492 
493 		while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
494 			if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) {
495 				set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
496 				return;
497 			}
498 		}
499 	} while (wait && md_super_wait(mddev) < 0);
500 }
501 
502 static void md_bitmap_file_kick(struct bitmap *bitmap);
503 
504 #ifdef CONFIG_MD_BITMAP_FILE
505 static void end_bitmap_write(struct bio *bio)
506 {
507 	struct buffer_head *bh;
508 	bool uptodate = bio_endio_bh(bio, &bh);
509 	struct bitmap *bitmap = bh->b_private;
510 
511 	if (!uptodate)
512 		set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
513 	if (atomic_dec_and_test(&bitmap->pending_writes))
514 		wake_up(&bitmap->write_wait);
515 }
516 
517 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
518 {
519 	struct buffer_head *bh = page_buffers(page);
520 
521 	while (bh && bh->b_blocknr) {
522 		atomic_inc(&bitmap->pending_writes);
523 		set_buffer_locked(bh);
524 		set_buffer_mapped(bh);
525 		bh_submit(bh, REQ_OP_WRITE | REQ_SYNC, end_bitmap_write);
526 		bh = bh->b_this_page;
527 	}
528 
529 	if (wait)
530 		wait_event(bitmap->write_wait,
531 			   atomic_read(&bitmap->pending_writes) == 0);
532 }
533 
534 static void free_buffers(struct page *page)
535 {
536 	struct buffer_head *bh;
537 
538 	if (!PagePrivate(page))
539 		return;
540 
541 	bh = page_buffers(page);
542 	while (bh) {
543 		struct buffer_head *next = bh->b_this_page;
544 		free_buffer_head(bh);
545 		bh = next;
546 	}
547 	detach_page_private(page);
548 	put_page(page);
549 }
550 
551 /* read a page from a file.
552  * We both read the page, and attach buffers to the page to record the
553  * address of each block (using bmap).  These addresses will be used
554  * to write the block later, completely bypassing the filesystem.
555  * This usage is similar to how swap files are handled, and allows us
556  * to write to a file with no concerns of memory allocation failing.
557  */
558 static int read_file_page(struct file *file, unsigned long index,
559 		struct bitmap *bitmap, unsigned long count, struct page *page)
560 {
561 	int ret = 0;
562 	struct inode *inode = file_inode(file);
563 	struct buffer_head *bh;
564 	sector_t block, blk_cur;
565 	unsigned long blocksize = i_blocksize(inode);
566 
567 	pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
568 		 (unsigned long long)index << PAGE_SHIFT);
569 
570 	bh = alloc_page_buffers(page, blocksize);
571 	if (!bh) {
572 		ret = -ENOMEM;
573 		goto out;
574 	}
575 	attach_page_private(page, bh);
576 	blk_cur = index << (PAGE_SHIFT - inode->i_blkbits);
577 	while (bh) {
578 		block = blk_cur;
579 
580 		if (count == 0)
581 			bh->b_blocknr = 0;
582 		else {
583 			ret = bmap(inode, &block);
584 			if (ret || !block) {
585 				ret = -EINVAL;
586 				bh->b_blocknr = 0;
587 				goto out;
588 			}
589 
590 			bh->b_blocknr = block;
591 			bh->b_bdev = inode->i_sb->s_bdev;
592 			if (count < blocksize)
593 				count = 0;
594 			else
595 				count -= blocksize;
596 
597 			bh->b_private = bitmap;
598 			atomic_inc(&bitmap->pending_writes);
599 			set_buffer_locked(bh);
600 			set_buffer_mapped(bh);
601 			bh_submit(bh, REQ_OP_READ, end_bitmap_write);
602 		}
603 		blk_cur++;
604 		bh = bh->b_this_page;
605 	}
606 
607 	wait_event(bitmap->write_wait,
608 		   atomic_read(&bitmap->pending_writes)==0);
609 	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
610 		ret = -EIO;
611 out:
612 	if (ret)
613 		pr_err("md: bitmap read error: (%dB @ %llu): %d\n",
614 		       (int)PAGE_SIZE,
615 		       (unsigned long long)index << PAGE_SHIFT,
616 		       ret);
617 	return ret;
618 }
619 #else /* CONFIG_MD_BITMAP_FILE */
620 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
621 {
622 }
623 static int read_file_page(struct file *file, unsigned long index,
624 		struct bitmap *bitmap, unsigned long count, struct page *page)
625 {
626 	return -EIO;
627 }
628 static void free_buffers(struct page *page)
629 {
630 	put_page(page);
631 }
632 #endif /* CONFIG_MD_BITMAP_FILE */
633 
634 /*
635  * bitmap file superblock operations
636  */
637 
638 /*
639  * write out a page to a file
640  */
641 static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index,
642 			       bool wait)
643 {
644 	struct bitmap_storage *store = &bitmap->storage;
645 	struct page *page = store->filemap[pg_index];
646 
647 	if (mddev_is_clustered(bitmap->mddev)) {
648 		/* go to node bitmap area starting point */
649 		pg_index += store->sb_index;
650 	}
651 
652 	if (store->file)
653 		write_file_page(bitmap, page, wait);
654 	else
655 		write_sb_page(bitmap, pg_index, page, wait);
656 }
657 
658 /*
659  * md_bitmap_wait_writes() should be called before writing any bitmap
660  * blocks, to ensure previous writes, particularly from
661  * md_bitmap_daemon_work(), have completed.
662  */
663 static void md_bitmap_wait_writes(struct bitmap *bitmap)
664 {
665 	if (bitmap->storage.file)
666 		wait_event(bitmap->write_wait,
667 			   atomic_read(&bitmap->pending_writes)==0);
668 	else
669 		/* Note that we ignore the return value.  The writes
670 		 * might have failed, but that would just mean that
671 		 * some bits which should be cleared haven't been,
672 		 * which is safe.  The relevant bitmap blocks will
673 		 * probably get written again, but there is no great
674 		 * loss if they aren't.
675 		 */
676 		md_super_wait(bitmap->mddev);
677 }
678 
679 
680 /* update the event counter and sync the superblock to disk */
681 static void bitmap_update_sb(void *data)
682 {
683 	bitmap_super_t *sb;
684 	struct bitmap *bitmap = data;
685 
686 	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
687 		return;
688 	if (bitmap->mddev->bitmap_info.external)
689 		return;
690 	if (!bitmap->storage.sb_page) /* no superblock */
691 		return;
692 	sb = kmap_local_page(bitmap->storage.sb_page);
693 	sb->events = cpu_to_le64(bitmap->mddev->events);
694 	if (bitmap->mddev->events < bitmap->events_cleared)
695 		/* rocking back to read-only */
696 		bitmap->events_cleared = bitmap->mddev->events;
697 	sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
698 	/*
699 	 * clear BITMAP_WRITE_ERROR bit to protect against the case that
700 	 * a bitmap write error occurred but the later writes succeeded.
701 	 */
702 	sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR));
703 	/* Just in case these have been changed via sysfs: */
704 	sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
705 	sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
706 	/* This might have been changed by a reshape */
707 	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
708 	sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
709 	sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
710 	sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
711 					   bitmap_info.space);
712 	kunmap_local(sb);
713 
714 	if (bitmap->storage.file)
715 		write_file_page(bitmap, bitmap->storage.sb_page, 1);
716 	else
717 		write_sb_page(bitmap, bitmap->storage.sb_index,
718 			      bitmap->storage.sb_page, 1);
719 }
720 
721 static void bitmap_print_sb(struct bitmap *bitmap)
722 {
723 	bitmap_super_t *sb;
724 
725 	if (!bitmap || !bitmap->storage.sb_page)
726 		return;
727 	sb = kmap_local_page(bitmap->storage.sb_page);
728 	pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
729 	pr_debug("         magic: %08x\n", le32_to_cpu(sb->magic));
730 	pr_debug("       version: %u\n", le32_to_cpu(sb->version));
731 	pr_debug("          uuid: %08x.%08x.%08x.%08x\n",
732 		 le32_to_cpu(*(__le32 *)(sb->uuid+0)),
733 		 le32_to_cpu(*(__le32 *)(sb->uuid+4)),
734 		 le32_to_cpu(*(__le32 *)(sb->uuid+8)),
735 		 le32_to_cpu(*(__le32 *)(sb->uuid+12)));
736 	pr_debug("        events: %llu\n",
737 		 (unsigned long long) le64_to_cpu(sb->events));
738 	pr_debug("events cleared: %llu\n",
739 		 (unsigned long long) le64_to_cpu(sb->events_cleared));
740 	pr_debug("         state: %08x\n", le32_to_cpu(sb->state));
741 	pr_debug("     chunksize: %u B\n", le32_to_cpu(sb->chunksize));
742 	pr_debug("  daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep));
743 	pr_debug("     sync size: %llu KB\n",
744 		 (unsigned long long)le64_to_cpu(sb->sync_size)/2);
745 	pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind));
746 	kunmap_local(sb);
747 }
748 
749 /*
750  * bitmap_new_disk_sb
751  * @bitmap
752  *
753  * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
754  * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
755  * This function verifies 'bitmap_info' and populates the on-disk bitmap
756  * structure, which is to be written to disk.
757  *
758  * Returns: 0 on success, -Exxx on error
759  */
760 static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
761 {
762 	bitmap_super_t *sb;
763 	unsigned long chunksize, daemon_sleep, write_behind;
764 
765 	bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
766 	if (bitmap->storage.sb_page == NULL)
767 		return -ENOMEM;
768 	bitmap->storage.sb_index = 0;
769 
770 	sb = kmap_local_page(bitmap->storage.sb_page);
771 
772 	sb->magic = cpu_to_le32(BITMAP_MAGIC);
773 	sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
774 
775 	chunksize = bitmap->mddev->bitmap_info.chunksize;
776 	BUG_ON(!chunksize);
777 	if (!is_power_of_2(chunksize)) {
778 		kunmap_local(sb);
779 		pr_warn("bitmap chunksize not a power of 2\n");
780 		return -EINVAL;
781 	}
782 	sb->chunksize = cpu_to_le32(chunksize);
783 
784 	daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
785 	if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
786 		pr_debug("Choosing daemon_sleep default (5 sec)\n");
787 		daemon_sleep = 5 * HZ;
788 	}
789 	sb->daemon_sleep = cpu_to_le32(daemon_sleep);
790 	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
791 
792 	/*
793 	 * FIXME: write_behind for RAID1.  If not specified, what
794 	 * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
795 	 */
796 	write_behind = bitmap->mddev->bitmap_info.max_write_behind;
797 	if (write_behind > COUNTER_MAX / 2)
798 		write_behind = COUNTER_MAX / 2;
799 	sb->write_behind = cpu_to_le32(write_behind);
800 	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
801 
802 	/* keep the array size field of the bitmap superblock up to date */
803 	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
804 
805 	memcpy(sb->uuid, bitmap->mddev->uuid, 16);
806 
807 	set_bit(BITMAP_STALE, &bitmap->flags);
808 	sb->state = cpu_to_le32(bitmap->flags);
809 	bitmap->events_cleared = bitmap->mddev->events;
810 	sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
811 	bitmap->mddev->bitmap_info.nodes = 0;
812 
813 	kunmap_local(sb);
814 
815 	return 0;
816 }
817 
818 /* read the superblock from the bitmap file and initialize some bitmap fields */
819 static int md_bitmap_read_sb(struct bitmap *bitmap)
820 {
821 	char *reason = NULL;
822 	bitmap_super_t *sb;
823 	unsigned long chunksize, daemon_sleep, write_behind;
824 	unsigned long long events;
825 	int nodes = 0;
826 	unsigned long sectors_reserved = 0;
827 	int err = -EINVAL;
828 	struct page *sb_page;
829 	loff_t offset = 0;
830 
831 	if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
832 		chunksize = 128 * 1024 * 1024;
833 		daemon_sleep = 5 * HZ;
834 		write_behind = 0;
835 		set_bit(BITMAP_STALE, &bitmap->flags);
836 		err = 0;
837 		goto out_no_sb;
838 	}
839 	/* page 0 is the superblock, read it... */
840 	sb_page = alloc_page(GFP_KERNEL);
841 	if (!sb_page)
842 		return -ENOMEM;
843 	bitmap->storage.sb_page = sb_page;
844 
845 re_read:
846 	/* If cluster_slot is set, the cluster is setup */
847 	if (bitmap->cluster_slot >= 0) {
848 		sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
849 
850 		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks,
851 			   (bitmap->mddev->bitmap_info.chunksize >> 9));
852 		/* bits to bytes */
853 		bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
854 		/* to 4k blocks */
855 		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
856 		offset = bitmap->cluster_slot * (bm_blocks << 3);
857 		pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
858 			bitmap->cluster_slot, offset);
859 	}
860 
861 	if (bitmap->storage.file) {
862 		loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
863 		int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
864 
865 		err = read_file_page(bitmap->storage.file, 0,
866 				bitmap, bytes, sb_page);
867 	} else {
868 		err = read_sb_page(bitmap->mddev, offset, sb_page, 0,
869 				   sizeof(bitmap_super_t));
870 	}
871 	if (err)
872 		return err;
873 
874 	err = -EINVAL;
875 	sb = kmap_local_page(sb_page);
876 
877 	chunksize = le32_to_cpu(sb->chunksize);
878 	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
879 	write_behind = le32_to_cpu(sb->write_behind);
880 	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
881 
882 	/* verify that the bitmap-specific fields are valid */
883 	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
884 		reason = "bad magic";
885 	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
886 		 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED)
887 		reason = "unrecognized superblock version";
888 	else if (chunksize < 512)
889 		reason = "bitmap chunksize too small";
890 	else if (!is_power_of_2(chunksize))
891 		reason = "bitmap chunksize not a power of 2";
892 	else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
893 		reason = "daemon sleep period out of range";
894 	else if (write_behind > COUNTER_MAX)
895 		reason = "write-behind limit out of range (0 - 16383)";
896 	if (reason) {
897 		pr_warn("%s: invalid bitmap file superblock: %s\n",
898 			bmname(bitmap), reason);
899 		goto out;
900 	}
901 
902 	/*
903 	 * Setup nodes/clustername only if bitmap version is
904 	 * cluster-compatible
905 	 */
906 	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
907 		nodes = le32_to_cpu(sb->nodes);
908 		strscpy(bitmap->mddev->bitmap_info.cluster_name,
909 				sb->cluster_name, 64);
910 	}
911 
912 	/* keep the array size field of the bitmap superblock up to date */
913 	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
914 
915 	if (bitmap->mddev->persistent) {
916 		/*
917 		 * We have a persistent array superblock, so compare the
918 		 * bitmap's UUID and event counter to the mddev's
919 		 */
920 		if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
921 			pr_warn("%s: bitmap superblock UUID mismatch\n",
922 				bmname(bitmap));
923 			goto out;
924 		}
925 		events = le64_to_cpu(sb->events);
926 		if (!nodes && (events < bitmap->mddev->events)) {
927 			pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n",
928 				bmname(bitmap), events,
929 				(unsigned long long) bitmap->mddev->events);
930 			set_bit(BITMAP_STALE, &bitmap->flags);
931 		}
932 	}
933 
934 	/* assign fields using values from superblock */
935 	bitmap->flags |= le32_to_cpu(sb->state);
936 	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
937 		set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
938 	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
939 	err = 0;
940 
941 out:
942 	kunmap_local(sb);
943 	if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
944 		/* Assigning chunksize is required for "re_read" */
945 		bitmap->mddev->bitmap_info.chunksize = chunksize;
946 		err = md_setup_cluster(bitmap->mddev, nodes);
947 		if (err) {
948 			pr_warn("%s: Could not setup cluster service (%d)\n",
949 				bmname(bitmap), err);
950 			goto out_no_sb;
951 		}
952 		bitmap->cluster_slot = bitmap->mddev->cluster_ops->slot_number(bitmap->mddev);
953 		goto re_read;
954 	}
955 
956 out_no_sb:
957 	if (err == 0) {
958 		if (test_bit(BITMAP_STALE, &bitmap->flags))
959 			bitmap->events_cleared = bitmap->mddev->events;
960 		bitmap->mddev->bitmap_info.chunksize = chunksize;
961 		bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
962 		bitmap->mddev->bitmap_info.max_write_behind = write_behind;
963 		bitmap->mddev->bitmap_info.nodes = nodes;
964 		if (bitmap->mddev->bitmap_info.space == 0 ||
965 			bitmap->mddev->bitmap_info.space > sectors_reserved)
966 			bitmap->mddev->bitmap_info.space = sectors_reserved;
967 	} else {
968 		bitmap_print_sb(bitmap);
969 		if (bitmap->cluster_slot < 0)
970 			md_cluster_stop(bitmap->mddev);
971 	}
972 	return err;
973 }
974 
975 /*
976  * general bitmap file operations
977  */
978 
979 /*
980  * on-disk bitmap:
981  *
982  * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
983  * file a page at a time. There's a superblock at the start of the file.
984  */
985 /* calculate the index of the page that contains this bit */
986 static inline unsigned long file_page_index(struct bitmap_storage *store,
987 					    unsigned long chunk)
988 {
989 	if (store->sb_page)
990 		chunk += sizeof(bitmap_super_t) << 3;
991 	return chunk >> PAGE_BIT_SHIFT;
992 }
993 
994 /* calculate the (bit) offset of this bit within a page */
995 static inline unsigned long file_page_offset(struct bitmap_storage *store,
996 					     unsigned long chunk)
997 {
998 	if (store->sb_page)
999 		chunk += sizeof(bitmap_super_t) << 3;
1000 	return chunk & (PAGE_BITS - 1);
1001 }
1002 
1003 /*
1004  * return a pointer to the page in the filemap that contains the given bit
1005  *
1006  */
1007 static inline struct page *filemap_get_page(struct bitmap_storage *store,
1008 					    unsigned long chunk)
1009 {
1010 	if (file_page_index(store, chunk) >= store->file_pages)
1011 		return NULL;
1012 	return store->filemap[file_page_index(store, chunk)];
1013 }
1014 
1015 static int md_bitmap_storage_alloc(struct bitmap_storage *store,
1016 				   unsigned long chunks, int with_super,
1017 				   int slot_number)
1018 {
1019 	int pnum, offset = 0;
1020 	unsigned long num_pages;
1021 	unsigned long bytes;
1022 
1023 	bytes = DIV_ROUND_UP(chunks, 8);
1024 	if (with_super)
1025 		bytes += sizeof(bitmap_super_t);
1026 
1027 	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
1028 	offset = slot_number * num_pages;
1029 
1030 	store->filemap = kmalloc_objs(struct page *, num_pages);
1031 	if (!store->filemap)
1032 		return -ENOMEM;
1033 
1034 	if (with_super && !store->sb_page) {
1035 		store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
1036 		if (store->sb_page == NULL)
1037 			return -ENOMEM;
1038 	}
1039 
1040 	pnum = 0;
1041 	if (store->sb_page) {
1042 		store->filemap[0] = store->sb_page;
1043 		pnum = 1;
1044 		store->sb_index = offset;
1045 	}
1046 
1047 	for ( ; pnum < num_pages; pnum++) {
1048 		store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
1049 		if (!store->filemap[pnum]) {
1050 			store->file_pages = pnum;
1051 			return -ENOMEM;
1052 		}
1053 	}
1054 	store->file_pages = pnum;
1055 
1056 	/* We need 4 bits per page, rounded up to a multiple
1057 	 * of sizeof(unsigned long) */
1058 	store->filemap_attr = kzalloc(
1059 		roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
1060 		GFP_KERNEL);
1061 	if (!store->filemap_attr)
1062 		return -ENOMEM;
1063 
1064 	store->bytes = bytes;
1065 
1066 	return 0;
1067 }
1068 
1069 static void md_bitmap_file_unmap(struct bitmap_storage *store)
1070 {
1071 	struct file *file = store->file;
1072 	struct page *sb_page = store->sb_page;
1073 	struct page **map = store->filemap;
1074 	int pages = store->file_pages;
1075 
1076 	while (pages--)
1077 		if (map[pages] != sb_page) /* 0 is sb_page, release it below */
1078 			free_buffers(map[pages]);
1079 	kfree(map);
1080 	kfree(store->filemap_attr);
1081 
1082 	if (sb_page)
1083 		free_buffers(sb_page);
1084 
1085 	if (file) {
1086 		struct inode *inode = file_inode(file);
1087 		invalidate_mapping_pages(inode->i_mapping, 0, -1);
1088 		fput(file);
1089 	}
1090 }
1091 
1092 /*
1093  * bitmap_file_kick - if an error occurs while manipulating the bitmap file
1094  * then it is no longer reliable, so we stop using it and we mark the file
1095  * as failed in the superblock
1096  */
1097 static void md_bitmap_file_kick(struct bitmap *bitmap)
1098 {
1099 	if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
1100 		bitmap_update_sb(bitmap);
1101 
1102 		if (bitmap->storage.file) {
1103 			pr_warn("%s: kicking failed bitmap file %pD4 from array!\n",
1104 				bmname(bitmap), bitmap->storage.file);
1105 
1106 		} else
1107 			pr_warn("%s: disabling internal bitmap due to errors\n",
1108 				bmname(bitmap));
1109 	}
1110 }
1111 
1112 enum bitmap_page_attr {
1113 	BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
1114 	BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
1115 				    * i.e. counter is 1 or 2. */
1116 	BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
1117 };
1118 
1119 static inline void set_page_attr(struct bitmap *bitmap, int pnum,
1120 				 enum bitmap_page_attr attr)
1121 {
1122 	set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
1123 }
1124 
1125 static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
1126 				   enum bitmap_page_attr attr)
1127 {
1128 	clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
1129 }
1130 
1131 static inline int test_page_attr(struct bitmap *bitmap, int pnum,
1132 				 enum bitmap_page_attr attr)
1133 {
1134 	return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
1135 }
1136 
1137 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
1138 					   enum bitmap_page_attr attr)
1139 {
1140 	return test_and_clear_bit((pnum<<2) + attr,
1141 				  bitmap->storage.filemap_attr);
1142 }
1143 /*
1144  * bitmap_file_set_bit -- called before performing a write to the md device
1145  * to set (and eventually sync) a particular bit in the bitmap file
1146  *
1147  * we set the bit immediately, then we record the page number so that
1148  * when an unplug occurs, we can flush the dirty pages out to disk
1149  */
1150 static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
1151 {
1152 	unsigned long bit;
1153 	struct page *page;
1154 	void *kaddr;
1155 	unsigned long chunk = block >> bitmap->counts.chunkshift;
1156 	struct bitmap_storage *store = &bitmap->storage;
1157 	unsigned long index = file_page_index(store, chunk);
1158 	unsigned long node_offset = 0;
1159 
1160 	index += store->sb_index;
1161 	if (mddev_is_clustered(bitmap->mddev))
1162 		node_offset = bitmap->cluster_slot * store->file_pages;
1163 
1164 	page = filemap_get_page(&bitmap->storage, chunk);
1165 	if (!page)
1166 		return;
1167 	bit = file_page_offset(&bitmap->storage, chunk);
1168 
1169 	/* set the bit */
1170 	kaddr = kmap_local_page(page);
1171 	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1172 		set_bit(bit, kaddr);
1173 	else
1174 		set_bit_le(bit, kaddr);
1175 	kunmap_local(kaddr);
1176 	pr_debug("set file bit %lu page %lu\n", bit, index);
1177 	/* record page number so it gets flushed to disk when unplug occurs */
1178 	set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY);
1179 }
1180 
1181 static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
1182 {
1183 	unsigned long bit;
1184 	struct page *page;
1185 	void *paddr;
1186 	unsigned long chunk = block >> bitmap->counts.chunkshift;
1187 	struct bitmap_storage *store = &bitmap->storage;
1188 	unsigned long index = file_page_index(store, chunk);
1189 	unsigned long node_offset = 0;
1190 
1191 	index += store->sb_index;
1192 	if (mddev_is_clustered(bitmap->mddev))
1193 		node_offset = bitmap->cluster_slot * store->file_pages;
1194 
1195 	page = filemap_get_page(&bitmap->storage, chunk);
1196 	if (!page)
1197 		return;
1198 	bit = file_page_offset(&bitmap->storage, chunk);
1199 	paddr = kmap_local_page(page);
1200 	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1201 		clear_bit(bit, paddr);
1202 	else
1203 		clear_bit_le(bit, paddr);
1204 	kunmap_local(paddr);
1205 	if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
1206 		set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING);
1207 		bitmap->allclean = 0;
1208 	}
1209 }
1210 
1211 static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
1212 {
1213 	unsigned long bit;
1214 	struct page *page;
1215 	void *paddr;
1216 	unsigned long chunk = block >> bitmap->counts.chunkshift;
1217 	int set = 0;
1218 
1219 	page = filemap_get_page(&bitmap->storage, chunk);
1220 	if (!page)
1221 		return -EINVAL;
1222 	bit = file_page_offset(&bitmap->storage, chunk);
1223 	paddr = kmap_local_page(page);
1224 	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1225 		set = test_bit(bit, paddr);
1226 	else
1227 		set = test_bit_le(bit, paddr);
1228 	kunmap_local(paddr);
1229 	return set;
1230 }
1231 
1232 /* this gets called when the md device is ready to unplug its underlying
1233  * (slave) device queues -- before we let any writes go down, we need to
1234  * sync the dirty pages of the bitmap file to disk */
1235 static void __bitmap_unplug(struct bitmap *bitmap)
1236 {
1237 	unsigned long i;
1238 	int dirty, need_write;
1239 	int writing = 0;
1240 
1241 	if (!bitmap_enabled(bitmap, true))
1242 		return;
1243 
1244 	/* look at each page to see if there are any set bits that need to be
1245 	 * flushed out to disk */
1246 	for (i = 0; i < bitmap->storage.file_pages; i++) {
1247 		dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
1248 		need_write = test_and_clear_page_attr(bitmap, i,
1249 						      BITMAP_PAGE_NEEDWRITE);
1250 		if (dirty || need_write) {
1251 			if (!writing) {
1252 				md_bitmap_wait_writes(bitmap);
1253 				mddev_add_trace_msg(bitmap->mddev,
1254 					"md bitmap_unplug");
1255 			}
1256 			clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
1257 			filemap_write_page(bitmap, i, false);
1258 			writing = 1;
1259 		}
1260 	}
1261 	if (writing)
1262 		md_bitmap_wait_writes(bitmap);
1263 
1264 	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1265 		md_bitmap_file_kick(bitmap);
1266 }
1267 
1268 struct bitmap_unplug_work {
1269 	struct work_struct work;
1270 	struct bitmap *bitmap;
1271 	struct completion *done;
1272 };
1273 
1274 static void md_bitmap_unplug_fn(struct work_struct *work)
1275 {
1276 	struct bitmap_unplug_work *unplug_work =
1277 		container_of(work, struct bitmap_unplug_work, work);
1278 
1279 	__bitmap_unplug(unplug_work->bitmap);
1280 	complete(unplug_work->done);
1281 }
1282 
1283 static void bitmap_unplug_async(struct bitmap *bitmap)
1284 {
1285 	DECLARE_COMPLETION_ONSTACK(done);
1286 	struct bitmap_unplug_work unplug_work;
1287 
1288 	INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn);
1289 	unplug_work.bitmap = bitmap;
1290 	unplug_work.done = &done;
1291 
1292 	queue_work(md_bitmap_wq, &unplug_work.work);
1293 	wait_for_completion(&done);
1294 	destroy_work_on_stack(&unplug_work.work);
1295 }
1296 
1297 static void bitmap_unplug(struct mddev *mddev, bool sync)
1298 {
1299 	struct bitmap *bitmap = mddev->bitmap;
1300 
1301 	if (!bitmap)
1302 		return;
1303 
1304 	if (sync)
1305 		__bitmap_unplug(bitmap);
1306 	else
1307 		bitmap_unplug_async(bitmap);
1308 }
1309 
1310 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
1311 
1312 /*
1313  * Initialize the in-memory bitmap from the on-disk bitmap and set up the memory
1314  * mapping of the bitmap file.
1315  *
1316  * Special case: If there's no bitmap file, or if the bitmap file had been
1317  * previously kicked from the array, we mark all the bits as 1's in order to
1318  * cause a full resync.
1319  *
1320  * We ignore all bits for sectors that end earlier than 'start'.
1321  * This is used when reading an out-of-date bitmap.
1322  */
1323 static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1324 {
1325 	bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
1326 	struct mddev *mddev = bitmap->mddev;
1327 	unsigned long chunks = bitmap->counts.chunks;
1328 	struct bitmap_storage *store = &bitmap->storage;
1329 	struct file *file = store->file;
1330 	unsigned long node_offset = 0;
1331 	unsigned long bit_cnt = 0;
1332 	unsigned long i;
1333 	int ret;
1334 
1335 	if (!file && !mddev->bitmap_info.offset) {
1336 		/* No permanent bitmap - fill with '1s'. */
1337 		store->filemap = NULL;
1338 		store->file_pages = 0;
1339 		for (i = 0; i < chunks ; i++) {
1340 			/* if the disk bit is set, set the memory bit */
1341 			int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
1342 				      >= start);
1343 			md_bitmap_set_memory_bits(bitmap,
1344 						  (sector_t)i << bitmap->counts.chunkshift,
1345 						  needed);
1346 		}
1347 		return 0;
1348 	}
1349 
1350 	if (file && i_size_read(file->f_mapping->host) < store->bytes) {
1351 		pr_warn("%s: bitmap file too short %lu < %lu\n",
1352 			bmname(bitmap),
1353 			(unsigned long) i_size_read(file->f_mapping->host),
1354 			store->bytes);
1355 		ret = -ENOSPC;
1356 		goto err;
1357 	}
1358 
1359 	if (mddev_is_clustered(mddev))
1360 		node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
1361 
1362 	for (i = 0; i < store->file_pages; i++) {
1363 		struct page *page = store->filemap[i];
1364 		int count;
1365 
1366 		/* unmap the old page, we're done with it */
1367 		if (i == store->file_pages - 1)
1368 			count = store->bytes - i * PAGE_SIZE;
1369 		else
1370 			count = PAGE_SIZE;
1371 
1372 		if (file)
1373 			ret = read_file_page(file, i, bitmap, count, page);
1374 		else
1375 			ret = read_sb_page(mddev, 0, page, i + node_offset,
1376 					   count);
1377 		if (ret)
1378 			goto err;
1379 	}
1380 
1381 	if (outofdate) {
1382 		pr_warn("%s: bitmap file is out of date, doing full recovery\n",
1383 			bmname(bitmap));
1384 
1385 		for (i = 0; i < store->file_pages; i++) {
1386 			struct page *page = store->filemap[i];
1387 			unsigned long offset = 0;
1388 			void *paddr;
1389 
1390 			if (i == 0 && !mddev->bitmap_info.external)
1391 				offset = sizeof(bitmap_super_t);
1392 
1393 			/*
1394 			 * If the bitmap is out of date, dirty the whole page
1395 			 * and write it out
1396 			 */
1397 			paddr = kmap_local_page(page);
1398 			memset(paddr + offset, 0xff, PAGE_SIZE - offset);
1399 			kunmap_local(paddr);
1400 
1401 			filemap_write_page(bitmap, i, true);
1402 			if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) {
1403 				ret = -EIO;
1404 				goto err;
1405 			}
1406 		}
1407 	}
1408 
1409 	for (i = 0; i < chunks; i++) {
1410 		struct page *page = filemap_get_page(&bitmap->storage, i);
1411 		unsigned long bit = file_page_offset(&bitmap->storage, i);
1412 		void *paddr;
1413 		bool was_set;
1414 
1415 		paddr = kmap_local_page(page);
1416 		if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1417 			was_set = test_bit(bit, paddr);
1418 		else
1419 			was_set = test_bit_le(bit, paddr);
1420 		kunmap_local(paddr);
1421 
1422 		if (was_set) {
1423 			/* if the disk bit is set, set the memory bit */
1424 			int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1425 				      >= start);
1426 			md_bitmap_set_memory_bits(bitmap,
1427 						  (sector_t)i << bitmap->counts.chunkshift,
1428 						  needed);
1429 			bit_cnt++;
1430 		}
1431 	}
1432 
1433 	pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n",
1434 		 bmname(bitmap), store->file_pages,
1435 		 bit_cnt, chunks);
1436 
1437 	return 0;
1438 
1439  err:
1440 	pr_warn("%s: bitmap initialisation failed: %d\n",
1441 		bmname(bitmap), ret);
1442 	return ret;
1443 }
1444 
1445 /* just flag bitmap pages as needing to be written. */
1446 static void bitmap_write_all(struct mddev *mddev)
1447 {
1448 	int i;
1449 	struct bitmap *bitmap = mddev->bitmap;
1450 
1451 	if (!bitmap || !bitmap->storage.filemap)
1452 		return;
1453 
1454 	/* Only one copy, so nothing needed */
1455 	if (bitmap->storage.file)
1456 		return;
1457 
1458 	for (i = 0; i < bitmap->storage.file_pages; i++)
1459 		set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
1460 	bitmap->allclean = 0;
1461 }
1462 
1463 static void md_bitmap_count_page(struct bitmap_counts *bitmap,
1464 				 sector_t offset, int inc)
1465 {
1466 	sector_t chunk = offset >> bitmap->chunkshift;
1467 	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1468 	bitmap->bp[page].count += inc;
1469 	md_bitmap_checkfree(bitmap, page);
1470 }
1471 
1472 static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1473 {
1474 	sector_t chunk = offset >> bitmap->chunkshift;
1475 	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1476 	struct bitmap_page *bp = &bitmap->bp[page];
1477 
1478 	if (!bp->pending)
1479 		bp->pending = 1;
1480 }
1481 
1482 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
1483 					       sector_t offset, sector_t *blocks,
1484 					       int create);
1485 
1486 static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout,
1487 			      bool force)
1488 {
1489 	struct md_thread *thread;
1490 
1491 	rcu_read_lock();
1492 	thread = rcu_dereference(mddev->thread);
1493 
1494 	if (!thread)
1495 		goto out;
1496 
1497 	if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT)
1498 		thread->timeout = timeout;
1499 
1500 out:
1501 	rcu_read_unlock();
1502 }
1503 
1504 /*
1505  * bitmap daemon -- periodically wakes up to clean bits and flush pages
1506  *			out to disk
1507  */
1508 static void bitmap_daemon_work(struct mddev *mddev)
1509 {
1510 	struct bitmap *bitmap;
1511 	unsigned long j;
1512 	unsigned long nextpage;
1513 	sector_t blocks;
1514 	struct bitmap_counts *counts;
1515 
1516 	/* Use a mutex to guard daemon_work against
1517 	 * bitmap_destroy.
1518 	 */
1519 	mutex_lock(&mddev->bitmap_info.mutex);
1520 	bitmap = mddev->bitmap;
1521 	if (bitmap == NULL) {
1522 		mutex_unlock(&mddev->bitmap_info.mutex);
1523 		return;
1524 	}
1525 	if (time_before(jiffies, bitmap->daemon_lastrun
1526 			+ mddev->bitmap_info.daemon_sleep))
1527 		goto done;
1528 
1529 	bitmap->daemon_lastrun = jiffies;
1530 	if (bitmap->allclean) {
1531 		mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
1532 		goto done;
1533 	}
1534 	bitmap->allclean = 1;
1535 
1536 	mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work");
1537 
1538 	/* Any file-page which is PENDING now needs to be written.
1539 	 * So set NEEDWRITE now, then after we make any last-minute changes
1540 	 * we will write it.
1541 	 */
1542 	for (j = 0; j < bitmap->storage.file_pages; j++)
1543 		if (test_and_clear_page_attr(bitmap, j,
1544 					     BITMAP_PAGE_PENDING))
1545 			set_page_attr(bitmap, j,
1546 				      BITMAP_PAGE_NEEDWRITE);
1547 
1548 	if (bitmap->need_sync &&
1549 	    mddev->bitmap_info.external == 0) {
1550 		/* Arrange for superblock update as well as
1551 		 * other changes */
1552 		bitmap_super_t *sb;
1553 		bitmap->need_sync = 0;
1554 		if (bitmap->storage.filemap) {
1555 			sb = kmap_local_page(bitmap->storage.sb_page);
1556 			sb->events_cleared =
1557 				cpu_to_le64(bitmap->events_cleared);
1558 			kunmap_local(sb);
1559 			set_page_attr(bitmap, 0,
1560 				      BITMAP_PAGE_NEEDWRITE);
1561 		}
1562 	}
1563 	/* Now look at the bitmap counters and if any are '2' or '1',
1564 	 * decrement and handle accordingly.
1565 	 */
1566 	counts = &bitmap->counts;
1567 	spin_lock_irq(&counts->lock);
1568 	nextpage = 0;
1569 	for (j = 0; j < counts->chunks; j++) {
1570 		bitmap_counter_t *bmc;
1571 		sector_t  block = (sector_t)j << counts->chunkshift;
1572 
1573 		if (j == nextpage) {
1574 			nextpage += PAGE_COUNTER_RATIO;
1575 			if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1576 				j |= PAGE_COUNTER_MASK;
1577 				continue;
1578 			}
1579 			counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1580 		}
1581 
1582 		bmc = md_bitmap_get_counter(counts, block, &blocks, 0);
1583 		if (!bmc) {
1584 			j |= PAGE_COUNTER_MASK;
1585 			continue;
1586 		}
1587 		if (*bmc == 1 && !bitmap->need_sync) {
1588 			/* We can clear the bit */
1589 			*bmc = 0;
1590 			md_bitmap_count_page(counts, block, -1);
1591 			md_bitmap_file_clear_bit(bitmap, block);
1592 		} else if (*bmc && *bmc <= 2) {
1593 			*bmc = 1;
1594 			md_bitmap_set_pending(counts, block);
1595 			bitmap->allclean = 0;
1596 		}
1597 	}
1598 	spin_unlock_irq(&counts->lock);
1599 
1600 	md_bitmap_wait_writes(bitmap);
1601 	/* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
1602 	 * DIRTY pages need to be written by bitmap_unplug so it can wait
1603 	 * for them.
1604 	 * If we find any DIRTY page we stop there and let bitmap_unplug
1605 	 * handle all the rest.  This is important in the case where
1606 	 * the first blocking holds the superblock and it has been updated.
1607 	 * We mustn't write any other blocks before the superblock.
1608 	 */
1609 	for (j = 0;
1610 	     j < bitmap->storage.file_pages
1611 		     && !test_bit(BITMAP_STALE, &bitmap->flags);
1612 	     j++) {
1613 		if (test_page_attr(bitmap, j,
1614 				   BITMAP_PAGE_DIRTY))
1615 			/* bitmap_unplug will handle the rest */
1616 			break;
1617 		if (bitmap->storage.filemap &&
1618 		    test_and_clear_page_attr(bitmap, j,
1619 					     BITMAP_PAGE_NEEDWRITE))
1620 			filemap_write_page(bitmap, j, false);
1621 	}
1622 
1623  done:
1624 	if (bitmap->allclean == 0)
1625 		mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
1626 	mutex_unlock(&mddev->bitmap_info.mutex);
1627 }
1628 
1629 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
1630 					       sector_t offset, sector_t *blocks,
1631 					       int create)
1632 __releases(bitmap->lock)
1633 __acquires(bitmap->lock)
1634 {
1635 	/* If 'create', we might release the lock and reclaim it.
1636 	 * The lock must have been taken with interrupts enabled.
1637 	 * If !create, we don't release the lock.
1638 	 */
1639 	sector_t chunk = offset >> bitmap->chunkshift;
1640 	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1641 	unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1642 	sector_t csize = ((sector_t)1) << bitmap->chunkshift;
1643 	int err;
1644 
1645 	if (page >= bitmap->pages) {
1646 		/*
1647 		 * This can happen if bitmap_start_sync goes beyond
1648 		 * End-of-device while looking for a whole page or
1649 		 * user set a huge number to sysfs bitmap_set_bits.
1650 		 */
1651 		*blocks = csize - (offset & (csize - 1));
1652 		return NULL;
1653 	}
1654 	err = md_bitmap_checkpage(bitmap, page, create, 0);
1655 
1656 	if (bitmap->bp[page].hijacked ||
1657 	    bitmap->bp[page].map == NULL)
1658 		csize = ((sector_t)1) << (bitmap->chunkshift +
1659 					  PAGE_COUNTER_SHIFT);
1660 
1661 	*blocks = csize - (offset & (csize - 1));
1662 
1663 	if (err < 0)
1664 		return NULL;
1665 
1666 	/* now locked ... */
1667 
1668 	if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1669 		/* should we use the first or second counter field
1670 		 * of the hijacked pointer? */
1671 		int hi = (pageoff > PAGE_COUNTER_MASK);
1672 		return  &((bitmap_counter_t *)
1673 			  &bitmap->bp[page].map)[hi];
1674 	} else /* page is allocated */
1675 		return (bitmap_counter_t *)
1676 			&(bitmap->bp[page].map[pageoff]);
1677 }
1678 
1679 static void bitmap_start_write(struct mddev *mddev, sector_t offset,
1680 			       unsigned long sectors)
1681 {
1682 	struct bitmap *bitmap = mddev->bitmap;
1683 
1684 	if (!bitmap)
1685 		return;
1686 
1687 	while (sectors) {
1688 		sector_t blocks;
1689 		bitmap_counter_t *bmc;
1690 
1691 		spin_lock_irq(&bitmap->counts.lock);
1692 		bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1693 		if (!bmc) {
1694 			spin_unlock_irq(&bitmap->counts.lock);
1695 			return;
1696 		}
1697 
1698 		if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1699 			DEFINE_WAIT(__wait);
1700 			/* note that it is safe to do the prepare_to_wait
1701 			 * after the test as long as we do it before dropping
1702 			 * the spinlock.
1703 			 */
1704 			prepare_to_wait(&bitmap->overflow_wait, &__wait,
1705 					TASK_UNINTERRUPTIBLE);
1706 			spin_unlock_irq(&bitmap->counts.lock);
1707 			schedule();
1708 			finish_wait(&bitmap->overflow_wait, &__wait);
1709 			continue;
1710 		}
1711 
1712 		switch (*bmc) {
1713 		case 0:
1714 			md_bitmap_file_set_bit(bitmap, offset);
1715 			md_bitmap_count_page(&bitmap->counts, offset, 1);
1716 			fallthrough;
1717 		case 1:
1718 			*bmc = 2;
1719 		}
1720 
1721 		(*bmc)++;
1722 
1723 		spin_unlock_irq(&bitmap->counts.lock);
1724 
1725 		offset += blocks;
1726 		if (sectors > blocks)
1727 			sectors -= blocks;
1728 		else
1729 			sectors = 0;
1730 	}
1731 }
1732 
1733 static void bitmap_end_write(struct mddev *mddev, sector_t offset,
1734 			     unsigned long sectors)
1735 {
1736 	struct bitmap *bitmap = mddev->bitmap;
1737 
1738 	if (!bitmap)
1739 		return;
1740 
1741 	while (sectors) {
1742 		sector_t blocks;
1743 		unsigned long flags;
1744 		bitmap_counter_t *bmc;
1745 
1746 		spin_lock_irqsave(&bitmap->counts.lock, flags);
1747 		bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1748 		if (!bmc) {
1749 			spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1750 			return;
1751 		}
1752 
1753 		if (!bitmap->mddev->degraded) {
1754 			if (bitmap->events_cleared < bitmap->mddev->events) {
1755 				bitmap->events_cleared = bitmap->mddev->events;
1756 				bitmap->need_sync = 1;
1757 				sysfs_notify_dirent_safe(
1758 						bitmap->sysfs_can_clear);
1759 			}
1760 		} else if (!NEEDED(*bmc)) {
1761 			*bmc |= NEEDED_MASK;
1762 		}
1763 
1764 		if (COUNTER(*bmc) == COUNTER_MAX)
1765 			wake_up(&bitmap->overflow_wait);
1766 
1767 		(*bmc)--;
1768 		if (*bmc <= 2) {
1769 			md_bitmap_set_pending(&bitmap->counts, offset);
1770 			bitmap->allclean = 0;
1771 		}
1772 		spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1773 		offset += blocks;
1774 		if (sectors > blocks)
1775 			sectors -= blocks;
1776 		else
1777 			sectors = 0;
1778 	}
1779 }
1780 
1781 static bool __bitmap_start_sync(struct bitmap *bitmap, sector_t offset,
1782 				sector_t *blocks, bool degraded)
1783 {
1784 	bitmap_counter_t *bmc;
1785 	bool rv = false;
1786 
1787 	spin_lock_irq(&bitmap->counts.lock);
1788 	bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1789 	if (bmc) {
1790 		/* locked */
1791 		if (RESYNC(*bmc)) {
1792 			rv = true;
1793 		} else if (NEEDED(*bmc)) {
1794 			rv = true;
1795 			if (!degraded) { /* don't set/clear bits if degraded */
1796 				*bmc |= RESYNC_MASK;
1797 				*bmc &= ~NEEDED_MASK;
1798 			}
1799 		}
1800 	}
1801 	spin_unlock_irq(&bitmap->counts.lock);
1802 
1803 	return rv;
1804 }
1805 
1806 static bool bitmap_start_sync(struct mddev *mddev, sector_t offset,
1807 			      sector_t *blocks, bool degraded)
1808 {
1809 	/* bitmap_start_sync must always report on multiples of whole
1810 	 * pages, otherwise resync (which is very PAGE_SIZE based) will
1811 	 * get confused.
1812 	 * So call __bitmap_start_sync repeatedly (if needed) until
1813 	 * At least PAGE_SIZE>>9 blocks are covered.
1814 	 * Return the 'or' of the result.
1815 	 */
1816 	bool rv = false;
1817 	sector_t blocks1;
1818 
1819 	*blocks = 0;
1820 	while (*blocks < (PAGE_SIZE>>9)) {
1821 		rv |= __bitmap_start_sync(mddev->bitmap, offset,
1822 					  &blocks1, degraded);
1823 		offset += blocks1;
1824 		*blocks += blocks1;
1825 	}
1826 
1827 	return rv;
1828 }
1829 
1830 static void __bitmap_end_sync(struct bitmap *bitmap, sector_t offset,
1831 			      sector_t *blocks, bool aborted)
1832 {
1833 	bitmap_counter_t *bmc;
1834 	unsigned long flags;
1835 
1836 	spin_lock_irqsave(&bitmap->counts.lock, flags);
1837 	bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1838 	if (bmc == NULL)
1839 		goto unlock;
1840 	/* locked */
1841 	if (RESYNC(*bmc)) {
1842 		*bmc &= ~RESYNC_MASK;
1843 
1844 		if (!NEEDED(*bmc) && aborted)
1845 			*bmc |= NEEDED_MASK;
1846 		else {
1847 			if (*bmc <= 2) {
1848 				md_bitmap_set_pending(&bitmap->counts, offset);
1849 				bitmap->allclean = 0;
1850 			}
1851 		}
1852 	}
1853  unlock:
1854 	spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1855 }
1856 
1857 static void bitmap_end_sync(struct mddev *mddev, sector_t offset,
1858 			    sector_t *blocks)
1859 {
1860 	__bitmap_end_sync(mddev->bitmap, offset, blocks, true);
1861 }
1862 
1863 static void bitmap_close_sync(struct mddev *mddev)
1864 {
1865 	/* Sync has finished, and any bitmap chunks that weren't synced
1866 	 * properly have been aborted.  It remains to us to clear the
1867 	 * RESYNC bit wherever it is still on
1868 	 */
1869 	sector_t sector = 0;
1870 	sector_t blocks;
1871 	struct bitmap *bitmap = mddev->bitmap;
1872 
1873 	if (!bitmap)
1874 		return;
1875 
1876 	while (sector < bitmap->mddev->resync_max_sectors) {
1877 		__bitmap_end_sync(bitmap, sector, &blocks, false);
1878 		sector += blocks;
1879 	}
1880 }
1881 
1882 static void bitmap_cond_end_sync(struct mddev *mddev, sector_t sector,
1883 				 bool force)
1884 {
1885 	sector_t s = 0;
1886 	sector_t blocks;
1887 	struct bitmap *bitmap = mddev->bitmap;
1888 
1889 	if (!bitmap)
1890 		return;
1891 	if (sector == 0) {
1892 		bitmap->last_end_sync = jiffies;
1893 		return;
1894 	}
1895 	if (!force && time_before(jiffies, (bitmap->last_end_sync
1896 				  + bitmap->mddev->bitmap_info.daemon_sleep)))
1897 		return;
1898 	wait_event(bitmap->mddev->recovery_wait,
1899 		   atomic_read(&bitmap->mddev->recovery_active) == 0);
1900 
1901 	bitmap->mddev->curr_resync_completed = sector;
1902 	set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags);
1903 	sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
1904 	s = 0;
1905 	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1906 		__bitmap_end_sync(bitmap, s, &blocks, false);
1907 		s += blocks;
1908 	}
1909 	bitmap->last_end_sync = jiffies;
1910 	sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed);
1911 }
1912 
1913 static void bitmap_sync_with_cluster(struct mddev *mddev,
1914 				     sector_t old_lo, sector_t old_hi,
1915 				     sector_t new_lo, sector_t new_hi)
1916 {
1917 	struct bitmap *bitmap = mddev->bitmap;
1918 	sector_t sector, blocks = 0;
1919 
1920 	for (sector = old_lo; sector < new_lo; ) {
1921 		__bitmap_end_sync(bitmap, sector, &blocks, false);
1922 		sector += blocks;
1923 	}
1924 	WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n");
1925 
1926 	for (sector = old_hi; sector < new_hi; ) {
1927 		bitmap_start_sync(mddev, sector, &blocks, false);
1928 		sector += blocks;
1929 	}
1930 	WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n");
1931 }
1932 
1933 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1934 {
1935 	/* For each chunk covered by any of these sectors, set the
1936 	 * counter to 2 and possibly set resync_needed.  They should all
1937 	 * be 0 at this point
1938 	 */
1939 
1940 	sector_t secs;
1941 	bitmap_counter_t *bmc;
1942 	spin_lock_irq(&bitmap->counts.lock);
1943 	bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1944 	if (!bmc) {
1945 		spin_unlock_irq(&bitmap->counts.lock);
1946 		return;
1947 	}
1948 	if (!*bmc) {
1949 		*bmc = 2;
1950 		md_bitmap_count_page(&bitmap->counts, offset, 1);
1951 		md_bitmap_set_pending(&bitmap->counts, offset);
1952 		bitmap->allclean = 0;
1953 	}
1954 	if (needed)
1955 		*bmc |= NEEDED_MASK;
1956 	spin_unlock_irq(&bitmap->counts.lock);
1957 }
1958 
1959 /* dirty the memory and file bits for bitmap chunks "s" to "e" */
1960 static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s,
1961 			      unsigned long e)
1962 {
1963 	unsigned long chunk;
1964 	struct bitmap *bitmap = mddev->bitmap;
1965 
1966 	if (!bitmap)
1967 		return;
1968 
1969 	for (chunk = s; chunk <= e; chunk++) {
1970 		sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1971 
1972 		md_bitmap_set_memory_bits(bitmap, sec, 1);
1973 		md_bitmap_file_set_bit(bitmap, sec);
1974 		if (sec < bitmap->mddev->resync_offset)
1975 			/* We are asserting that the array is dirty,
1976 			 * so move the resync_offset address back so
1977 			 * that it is obvious that it is dirty
1978 			 */
1979 			bitmap->mddev->resync_offset = sec;
1980 	}
1981 }
1982 
1983 static void bitmap_flush(struct mddev *mddev)
1984 {
1985 	struct bitmap *bitmap = mddev->bitmap;
1986 	long sleep;
1987 
1988 	if (!bitmap) /* there was no bitmap */
1989 		return;
1990 
1991 	/* run the daemon_work three time to ensure everything is flushed
1992 	 * that can be
1993 	 */
1994 	sleep = mddev->bitmap_info.daemon_sleep * 2;
1995 	bitmap->daemon_lastrun -= sleep;
1996 	bitmap_daemon_work(mddev);
1997 	bitmap->daemon_lastrun -= sleep;
1998 	bitmap_daemon_work(mddev);
1999 	bitmap->daemon_lastrun -= sleep;
2000 	bitmap_daemon_work(mddev);
2001 	if (mddev->bitmap_info.external)
2002 		md_super_wait(mddev);
2003 	bitmap_update_sb(bitmap);
2004 }
2005 
2006 static void md_bitmap_free(void *data)
2007 {
2008 	unsigned long k, pages;
2009 	struct bitmap_page *bp;
2010 	struct bitmap *bitmap = data;
2011 
2012 	if (!bitmap) /* there was no bitmap */
2013 		return;
2014 
2015 	if (bitmap->sysfs_can_clear)
2016 		sysfs_put(bitmap->sysfs_can_clear);
2017 
2018 	if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
2019 		bitmap->cluster_slot == bitmap->mddev->cluster_ops->slot_number(bitmap->mddev))
2020 		md_cluster_stop(bitmap->mddev);
2021 
2022 	/* Shouldn't be needed - but just in case.... */
2023 	wait_event(bitmap->write_wait,
2024 		   atomic_read(&bitmap->pending_writes) == 0);
2025 
2026 	/* release the bitmap file  */
2027 	md_bitmap_file_unmap(&bitmap->storage);
2028 
2029 	bp = bitmap->counts.bp;
2030 	pages = bitmap->counts.pages;
2031 
2032 	/* free all allocated memory */
2033 
2034 	if (bp) /* deallocate the page memory */
2035 		for (k = 0; k < pages; k++)
2036 			if (bp[k].map && !bp[k].hijacked)
2037 				kfree(bp[k].map);
2038 	kfree(bp);
2039 	kfree(bitmap);
2040 }
2041 
2042 static void bitmap_start_behind_write(struct mddev *mddev)
2043 {
2044 	struct bitmap *bitmap = mddev->bitmap;
2045 	int bw;
2046 
2047 	atomic_inc(&bitmap->behind_writes);
2048 	bw = atomic_read(&bitmap->behind_writes);
2049 	if (bw > bitmap->behind_writes_used)
2050 		bitmap->behind_writes_used = bw;
2051 
2052 	pr_debug("inc write-behind count %d/%lu\n",
2053 		 bw, bitmap->mddev->bitmap_info.max_write_behind);
2054 }
2055 
2056 static void bitmap_end_behind_write(struct mddev *mddev)
2057 {
2058 	struct bitmap *bitmap = mddev->bitmap;
2059 
2060 	if (atomic_dec_and_test(&bitmap->behind_writes))
2061 		wake_up(&bitmap->behind_wait);
2062 	pr_debug("dec write-behind count %d/%lu\n",
2063 		 atomic_read(&bitmap->behind_writes),
2064 		 bitmap->mddev->bitmap_info.max_write_behind);
2065 }
2066 
2067 static void bitmap_wait_behind_writes(struct mddev *mddev)
2068 {
2069 	struct bitmap *bitmap = mddev->bitmap;
2070 
2071 	/* wait for behind writes to complete */
2072 	if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
2073 		pr_debug("md:%s: behind writes in progress - waiting to stop.\n",
2074 			 mdname(mddev));
2075 		/* need to kick something here to make sure I/O goes? */
2076 		wait_event(bitmap->behind_wait,
2077 			   atomic_read(&bitmap->behind_writes) == 0);
2078 	}
2079 }
2080 
2081 static void bitmap_destroy(struct mddev *mddev)
2082 {
2083 	struct bitmap *bitmap = mddev->bitmap;
2084 
2085 	if (!bitmap) /* there was no bitmap */
2086 		return;
2087 
2088 	bitmap_wait_behind_writes(mddev);
2089 	if (!test_bit(MD_SERIALIZE_POLICY, &mddev->flags))
2090 		mddev_destroy_serial_pool(mddev, NULL);
2091 
2092 	mutex_lock(&mddev->bitmap_info.mutex);
2093 	spin_lock(&mddev->lock);
2094 	mddev->bitmap = NULL; /* disconnect from the md device */
2095 	spin_unlock(&mddev->lock);
2096 	mutex_unlock(&mddev->bitmap_info.mutex);
2097 	mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
2098 
2099 	md_bitmap_free(bitmap);
2100 }
2101 
2102 /*
2103  * initialize the bitmap structure
2104  * if this returns an error, bitmap_destroy must be called to do clean up
2105  * once mddev->bitmap is set
2106  */
2107 static struct bitmap *__bitmap_create(struct mddev *mddev, int slot)
2108 {
2109 	struct bitmap *bitmap;
2110 	sector_t blocks = mddev->resync_max_sectors;
2111 	struct file *file = mddev->bitmap_info.file;
2112 	int err;
2113 	struct kernfs_node *bm = NULL;
2114 
2115 	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
2116 
2117 	BUG_ON(file && mddev->bitmap_info.offset);
2118 
2119 	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
2120 		pr_notice("md/raid:%s: array with journal cannot have bitmap\n",
2121 			  mdname(mddev));
2122 		return ERR_PTR(-EBUSY);
2123 	}
2124 
2125 	bitmap = kzalloc_obj(*bitmap);
2126 	if (!bitmap)
2127 		return ERR_PTR(-ENOMEM);
2128 
2129 	spin_lock_init(&bitmap->counts.lock);
2130 	atomic_set(&bitmap->pending_writes, 0);
2131 	init_waitqueue_head(&bitmap->write_wait);
2132 	init_waitqueue_head(&bitmap->overflow_wait);
2133 	init_waitqueue_head(&bitmap->behind_wait);
2134 
2135 	bitmap->mddev = mddev;
2136 	bitmap->cluster_slot = slot;
2137 
2138 	if (mddev->kobj.sd)
2139 		bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
2140 	if (bm) {
2141 		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
2142 		sysfs_put(bm);
2143 	} else
2144 		bitmap->sysfs_can_clear = NULL;
2145 
2146 	bitmap->storage.file = file;
2147 	if (file) {
2148 		get_file(file);
2149 		/* As future accesses to this file will use bmap,
2150 		 * and bypass the page cache, we must sync the file
2151 		 * first.
2152 		 */
2153 		vfs_fsync(file, 1);
2154 	}
2155 	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
2156 	if (!mddev->bitmap_info.external) {
2157 		/*
2158 		 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
2159 		 * instructing us to create a new on-disk bitmap instance.
2160 		 */
2161 		if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
2162 			err = md_bitmap_new_disk_sb(bitmap);
2163 		else
2164 			err = md_bitmap_read_sb(bitmap);
2165 	} else {
2166 		err = 0;
2167 		if (mddev->bitmap_info.chunksize == 0 ||
2168 		    mddev->bitmap_info.daemon_sleep == 0)
2169 			/* chunksize and time_base need to be
2170 			 * set first. */
2171 			err = -EINVAL;
2172 	}
2173 	if (err)
2174 		goto error;
2175 
2176 	bitmap->daemon_lastrun = jiffies;
2177 	err = __bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize,
2178 			      true);
2179 	if (err)
2180 		goto error;
2181 
2182 	pr_debug("created bitmap (%lu pages) for device %s\n",
2183 		 bitmap->counts.pages, bmname(bitmap));
2184 
2185 	err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
2186 	if (err)
2187 		goto error;
2188 
2189 	return bitmap;
2190  error:
2191 	md_bitmap_free(bitmap);
2192 	return ERR_PTR(err);
2193 }
2194 
2195 static int bitmap_create(struct mddev *mddev)
2196 {
2197 	struct bitmap *bitmap = __bitmap_create(mddev, -1);
2198 
2199 	if (IS_ERR(bitmap))
2200 		return PTR_ERR(bitmap);
2201 
2202 	mddev->bitmap = bitmap;
2203 	return 0;
2204 }
2205 
2206 static int bitmap_load(struct mddev *mddev)
2207 {
2208 	int err = 0;
2209 	sector_t start = 0;
2210 	sector_t sector = 0;
2211 	struct bitmap *bitmap = mddev->bitmap;
2212 	struct md_rdev *rdev;
2213 
2214 	if (!bitmap)
2215 		goto out;
2216 
2217 	rdev_for_each(rdev, mddev)
2218 		mddev_create_serial_pool(mddev, rdev);
2219 
2220 	if (mddev_is_clustered(mddev))
2221 		mddev->cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
2222 
2223 	/* Clear out old bitmap info first:  Either there is none, or we
2224 	 * are resuming after someone else has possibly changed things,
2225 	 * so we should forget old cached info.
2226 	 * All chunks should be clean, but some might need_sync.
2227 	 */
2228 	while (sector < mddev->resync_max_sectors) {
2229 		sector_t blocks;
2230 		bitmap_start_sync(mddev, sector, &blocks, false);
2231 		sector += blocks;
2232 	}
2233 	bitmap_close_sync(mddev);
2234 
2235 	if (mddev->degraded == 0
2236 	    || bitmap->events_cleared == mddev->events)
2237 		/* no need to keep dirty bits to optimise a
2238 		 * re-add of a missing device */
2239 		start = mddev->resync_offset;
2240 
2241 	mutex_lock(&mddev->bitmap_info.mutex);
2242 	err = md_bitmap_init_from_disk(bitmap, start);
2243 	mutex_unlock(&mddev->bitmap_info.mutex);
2244 
2245 	if (err)
2246 		goto out;
2247 	clear_bit(BITMAP_STALE, &bitmap->flags);
2248 
2249 	/* Kick recovery in case any bits were set */
2250 	set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
2251 
2252 	mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
2253 	md_wakeup_thread(mddev->thread);
2254 
2255 	bitmap_update_sb(bitmap);
2256 
2257 	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
2258 		err = -EIO;
2259 out:
2260 	return err;
2261 }
2262 
2263 /* caller need to free returned bitmap with md_bitmap_free() */
2264 static void *bitmap_get_from_slot(struct mddev *mddev, int slot)
2265 {
2266 	int rv = 0;
2267 	struct bitmap *bitmap;
2268 
2269 	bitmap = __bitmap_create(mddev, slot);
2270 	if (IS_ERR(bitmap)) {
2271 		rv = PTR_ERR(bitmap);
2272 		return ERR_PTR(rv);
2273 	}
2274 
2275 	rv = md_bitmap_init_from_disk(bitmap, 0);
2276 	if (rv) {
2277 		md_bitmap_free(bitmap);
2278 		return ERR_PTR(rv);
2279 	}
2280 
2281 	return bitmap;
2282 }
2283 
2284 /* Loads the bitmap associated with slot and copies the resync information
2285  * to our bitmap
2286  */
2287 static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low,
2288 				 sector_t *high, bool clear_bits)
2289 {
2290 	int rv = 0, i, j;
2291 	sector_t block, lo = 0, hi = 0;
2292 	struct bitmap_counts *counts;
2293 	struct bitmap *bitmap;
2294 
2295 	bitmap = bitmap_get_from_slot(mddev, slot);
2296 	if (IS_ERR(bitmap)) {
2297 		pr_err("%s can't get bitmap from slot %d\n", __func__, slot);
2298 		return -1;
2299 	}
2300 
2301 	counts = &bitmap->counts;
2302 	for (j = 0; j < counts->chunks; j++) {
2303 		block = (sector_t)j << counts->chunkshift;
2304 		if (md_bitmap_file_test_bit(bitmap, block)) {
2305 			if (!lo)
2306 				lo = block;
2307 			hi = block;
2308 			md_bitmap_file_clear_bit(bitmap, block);
2309 			md_bitmap_set_memory_bits(mddev->bitmap, block, 1);
2310 			md_bitmap_file_set_bit(mddev->bitmap, block);
2311 		}
2312 	}
2313 
2314 	if (clear_bits) {
2315 		bitmap_update_sb(bitmap);
2316 		/* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs
2317 		 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */
2318 		for (i = 0; i < bitmap->storage.file_pages; i++)
2319 			if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING))
2320 				set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
2321 		__bitmap_unplug(bitmap);
2322 	}
2323 	__bitmap_unplug(mddev->bitmap);
2324 	*low = lo;
2325 	*high = hi;
2326 	md_bitmap_free(bitmap);
2327 
2328 	return rv;
2329 }
2330 
2331 static void bitmap_set_pages(void *data, unsigned long pages)
2332 {
2333 	struct bitmap *bitmap = data;
2334 
2335 	bitmap->counts.pages = pages;
2336 }
2337 
2338 static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats)
2339 {
2340 	struct bitmap_storage *storage;
2341 	struct bitmap_counts *counts;
2342 	struct bitmap *bitmap = data;
2343 	bitmap_super_t *sb;
2344 
2345 	if (!bitmap)
2346 		return -ENOENT;
2347 	if (!bitmap->storage.sb_page)
2348 		return -EINVAL;
2349 	sb = kmap_local_page(bitmap->storage.sb_page);
2350 	stats->sync_size = le64_to_cpu(sb->sync_size);
2351 	kunmap_local(sb);
2352 
2353 	counts = &bitmap->counts;
2354 	stats->missing_pages = counts->missing_pages;
2355 	stats->pages = counts->pages;
2356 
2357 	storage = &bitmap->storage;
2358 	stats->file_pages = storage->file_pages;
2359 	stats->file = storage->file;
2360 
2361 	stats->behind_writes = atomic_read(&bitmap->behind_writes);
2362 	stats->behind_wait = wq_has_sleeper(&bitmap->behind_wait);
2363 	stats->events_cleared = bitmap->events_cleared;
2364 	return 0;
2365 }
2366 
2367 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks,
2368 			   int chunksize, bool init)
2369 {
2370 	/* If chunk_size is 0, choose an appropriate chunk size.
2371 	 * Then possibly allocate new storage space.
2372 	 * Then quiesce, copy bits, replace bitmap, and re-start
2373 	 *
2374 	 * This function is called both to set up the initial bitmap
2375 	 * and to resize the bitmap while the array is active.
2376 	 * If this happens as a result of the array being resized,
2377 	 * chunksize will be zero, and we need to choose a suitable
2378 	 * chunksize, otherwise we use what we are given.
2379 	 */
2380 	struct bitmap_storage store;
2381 	struct bitmap_counts old_counts;
2382 	unsigned long chunks;
2383 	sector_t block;
2384 	sector_t old_blocks, new_blocks;
2385 	int chunkshift;
2386 	int ret = 0;
2387 	long pages;
2388 	struct bitmap_page *new_bp;
2389 
2390 	if (bitmap->storage.file && !init) {
2391 		pr_info("md: cannot resize file-based bitmap\n");
2392 		return -EINVAL;
2393 	}
2394 
2395 	if (chunksize == 0) {
2396 		/* If there is enough space, leave the chunk size unchanged,
2397 		 * else increase by factor of two until there is enough space.
2398 		 */
2399 		long bytes;
2400 		long space = bitmap->mddev->bitmap_info.space;
2401 
2402 		if (space == 0) {
2403 			/* We don't know how much space there is, so limit
2404 			 * to current size - in sectors.
2405 			 */
2406 			bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
2407 			if (!bitmap->mddev->bitmap_info.external)
2408 				bytes += sizeof(bitmap_super_t);
2409 			space = DIV_ROUND_UP(bytes, 512);
2410 			bitmap->mddev->bitmap_info.space = space;
2411 		}
2412 		chunkshift = bitmap->counts.chunkshift;
2413 		chunkshift--;
2414 		do {
2415 			/* 'chunkshift' is shift from block size to chunk size */
2416 			chunkshift++;
2417 			chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
2418 			bytes = DIV_ROUND_UP(chunks, 8);
2419 			if (!bitmap->mddev->bitmap_info.external)
2420 				bytes += sizeof(bitmap_super_t);
2421 		} while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) <
2422 			(BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1));
2423 	} else
2424 		chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
2425 
2426 	chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
2427 	memset(&store, 0, sizeof(store));
2428 	if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
2429 		ret = md_bitmap_storage_alloc(&store, chunks,
2430 					      !bitmap->mddev->bitmap_info.external,
2431 					      mddev_is_clustered(bitmap->mddev)
2432 					      ? bitmap->cluster_slot : 0);
2433 	if (ret) {
2434 		md_bitmap_file_unmap(&store);
2435 		goto err;
2436 	}
2437 
2438 	pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
2439 
2440 	new_bp = kzalloc_objs(*new_bp, pages);
2441 	ret = -ENOMEM;
2442 	if (!new_bp) {
2443 		md_bitmap_file_unmap(&store);
2444 		goto err;
2445 	}
2446 
2447 	if (!init)
2448 		bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
2449 
2450 	store.file = bitmap->storage.file;
2451 	bitmap->storage.file = NULL;
2452 
2453 	if (store.sb_page && bitmap->storage.sb_page)
2454 		memcpy(page_address(store.sb_page),
2455 		       page_address(bitmap->storage.sb_page),
2456 		       sizeof(bitmap_super_t));
2457 	mutex_lock(&bitmap->mddev->bitmap_info.mutex);
2458 	spin_lock_irq(&bitmap->counts.lock);
2459 	md_bitmap_file_unmap(&bitmap->storage);
2460 	bitmap->storage = store;
2461 
2462 	old_counts = bitmap->counts;
2463 	bitmap->counts.bp = new_bp;
2464 	bitmap->counts.pages = pages;
2465 	bitmap->counts.missing_pages = pages;
2466 	bitmap->counts.chunkshift = chunkshift;
2467 	bitmap->counts.chunks = chunks;
2468 	bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift +
2469 						     BITMAP_BLOCK_SHIFT);
2470 
2471 	blocks = min(old_counts.chunks << old_counts.chunkshift,
2472 		     chunks << chunkshift);
2473 
2474 	/* For cluster raid, need to pre-allocate bitmap */
2475 	if (mddev_is_clustered(bitmap->mddev)) {
2476 		unsigned long page;
2477 		for (page = 0; page < pages; page++) {
2478 			ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1);
2479 			if (ret) {
2480 				unsigned long k;
2481 
2482 				/* deallocate the page memory */
2483 				for (k = 0; k < page; k++) {
2484 					kfree(new_bp[k].map);
2485 				}
2486 				kfree(new_bp);
2487 
2488 				/* restore some fields from old_counts */
2489 				bitmap->counts.bp = old_counts.bp;
2490 				bitmap->counts.pages = old_counts.pages;
2491 				bitmap->counts.missing_pages = old_counts.pages;
2492 				bitmap->counts.chunkshift = old_counts.chunkshift;
2493 				bitmap->counts.chunks = old_counts.chunks;
2494 				bitmap->mddev->bitmap_info.chunksize =
2495 					1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT);
2496 				blocks = old_counts.chunks << old_counts.chunkshift;
2497 				pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n");
2498 				break;
2499 			} else
2500 				bitmap->counts.bp[page].count += 1;
2501 		}
2502 	}
2503 
2504 	for (block = 0; block < blocks; ) {
2505 		bitmap_counter_t *bmc_old, *bmc_new;
2506 		int set;
2507 
2508 		bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0);
2509 		set = bmc_old && NEEDED(*bmc_old);
2510 
2511 		if (set) {
2512 			bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
2513 			if (bmc_new) {
2514 				if (*bmc_new == 0) {
2515 					/* need to set on-disk bits too. */
2516 					sector_t end = block + new_blocks;
2517 					sector_t start = block >> chunkshift;
2518 
2519 					start <<= chunkshift;
2520 					while (start < end) {
2521 						md_bitmap_file_set_bit(bitmap, block);
2522 						start += 1 << chunkshift;
2523 					}
2524 					*bmc_new = 2;
2525 					md_bitmap_count_page(&bitmap->counts, block, 1);
2526 					md_bitmap_set_pending(&bitmap->counts, block);
2527 				}
2528 				*bmc_new |= NEEDED_MASK;
2529 			}
2530 			if (new_blocks < old_blocks)
2531 				old_blocks = new_blocks;
2532 		}
2533 		block += old_blocks;
2534 	}
2535 
2536 	if (bitmap->counts.bp != old_counts.bp) {
2537 		unsigned long k;
2538 		for (k = 0; k < old_counts.pages; k++)
2539 			if (!old_counts.bp[k].hijacked)
2540 				kfree(old_counts.bp[k].map);
2541 		kfree(old_counts.bp);
2542 	}
2543 
2544 	if (!init) {
2545 		int i;
2546 		while (block < (chunks << chunkshift)) {
2547 			bitmap_counter_t *bmc;
2548 			bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
2549 			if (bmc) {
2550 				/* new space.  It needs to be resynced, so
2551 				 * we set NEEDED_MASK.
2552 				 */
2553 				if (*bmc == 0) {
2554 					*bmc = NEEDED_MASK | 2;
2555 					md_bitmap_count_page(&bitmap->counts, block, 1);
2556 					md_bitmap_set_pending(&bitmap->counts, block);
2557 				}
2558 			}
2559 			block += new_blocks;
2560 		}
2561 		for (i = 0; i < bitmap->storage.file_pages; i++)
2562 			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
2563 	}
2564 	spin_unlock_irq(&bitmap->counts.lock);
2565 	mutex_unlock(&bitmap->mddev->bitmap_info.mutex);
2566 	if (!init) {
2567 		__bitmap_unplug(bitmap);
2568 		bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
2569 	}
2570 	ret = 0;
2571 err:
2572 	return ret;
2573 }
2574 
2575 static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize)
2576 {
2577 	struct bitmap *bitmap = mddev->bitmap;
2578 
2579 	if (!bitmap)
2580 		return 0;
2581 
2582 	return __bitmap_resize(bitmap, blocks, chunksize, false);
2583 }
2584 
2585 static bool bitmap_none_enabled(void *data, bool flush)
2586 {
2587 	return false;
2588 }
2589 
2590 static int bitmap_none_create(struct mddev *mddev)
2591 {
2592 	return 0;
2593 }
2594 
2595 static int bitmap_none_load(struct mddev *mddev)
2596 {
2597 	return 0;
2598 }
2599 
2600 static void bitmap_none_destroy(struct mddev *mddev)
2601 {
2602 }
2603 
2604 static int bitmap_none_get_stats(void *data, struct md_bitmap_stats *stats)
2605 {
2606 	return -ENOENT;
2607 }
2608 
2609 static ssize_t
2610 location_show(struct mddev *mddev, char *page)
2611 {
2612 	ssize_t len;
2613 	if (mddev->bitmap_info.file)
2614 		len = sprintf(page, "file");
2615 	else if (mddev->bitmap_info.offset)
2616 		len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
2617 	else
2618 		len = sprintf(page, "none");
2619 	len += sprintf(page+len, "\n");
2620 	return len;
2621 }
2622 
2623 static ssize_t
2624 location_store(struct mddev *mddev, const char *buf, size_t len)
2625 {
2626 	int rv;
2627 
2628 	rv = mddev_suspend_and_lock(mddev);
2629 	if (rv)
2630 		return rv;
2631 
2632 	if (mddev->pers) {
2633 		if (mddev->recovery || mddev->sync_thread) {
2634 			rv = -EBUSY;
2635 			goto out;
2636 		}
2637 	}
2638 
2639 	if (mddev->bitmap || mddev->bitmap_info.file ||
2640 	    mddev->bitmap_info.offset) {
2641 		/* bitmap already configured.  Only option is to clear it */
2642 		if (strncmp(buf, "none", 4) != 0) {
2643 			rv = -EBUSY;
2644 			goto out;
2645 		}
2646 
2647 		sysfs_unmerge_group(&mddev->kobj, &md_bitmap_internal_group);
2648 		md_bitmap_destroy_nosysfs(mddev);
2649 		mddev->bitmap_id = ID_BITMAP_NONE;
2650 		if (!mddev_set_bitmap_ops_nosysfs(mddev))
2651 			goto none_err;
2652 		mddev->bitmap_info.offset = 0;
2653 		if (mddev->bitmap_info.file) {
2654 			struct file *f = mddev->bitmap_info.file;
2655 			mddev->bitmap_info.file = NULL;
2656 			fput(f);
2657 		}
2658 	} else {
2659 		/* No bitmap, OK to set a location */
2660 		long long offset;
2661 
2662 		if (strncmp(buf, "none", 4) == 0)
2663 			/* nothing to be done */;
2664 		else if (strncmp(buf, "file:", 5) == 0) {
2665 			/* Not supported yet */
2666 			rv = -EINVAL;
2667 			goto out;
2668 		} else {
2669 			if (buf[0] == '+')
2670 				rv = kstrtoll(buf+1, 10, &offset);
2671 			else
2672 				rv = kstrtoll(buf, 10, &offset);
2673 			if (rv)
2674 				goto out;
2675 			if (offset == 0) {
2676 				rv = -EINVAL;
2677 				goto out;
2678 			}
2679 			if (mddev->bitmap_info.external == 0 &&
2680 			    mddev->major_version == 0 &&
2681 			    offset != mddev->bitmap_info.default_offset) {
2682 				rv = -EINVAL;
2683 				goto out;
2684 			}
2685 
2686 			mddev->bitmap_info.offset = offset;
2687 			md_bitmap_destroy_nosysfs(mddev);
2688 			mddev->bitmap_id = ID_BITMAP;
2689 			if (!mddev_set_bitmap_ops_nosysfs(mddev))
2690 				goto bitmap_err;
2691 
2692 			rv = md_bitmap_create_nosysfs(mddev);
2693 			if (rv)
2694 				goto create_err;
2695 
2696 			rv = mddev->bitmap_ops->load(mddev);
2697 			if (rv) {
2698 				mddev->bitmap_info.offset = 0;
2699 				goto load_err;
2700 			}
2701 
2702 			rv = sysfs_merge_group(&mddev->kobj,
2703 					       &md_bitmap_internal_group);
2704 			if (rv)
2705 				goto merge_err;
2706 		}
2707 	}
2708 	if (!mddev->external) {
2709 		/* Ensure new bitmap info is stored in
2710 		 * metadata promptly.
2711 		 */
2712 		set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2713 		md_wakeup_thread(mddev->thread);
2714 	}
2715 	rv = 0;
2716 out:
2717 	mddev_unlock_and_resume(mddev);
2718 	if (rv)
2719 		return rv;
2720 	return len;
2721 
2722 merge_err:
2723 	mddev->bitmap_info.offset = 0;
2724 load_err:
2725 	md_bitmap_destroy_nosysfs(mddev);
2726 create_err:
2727 	mddev->bitmap_info.offset = 0;
2728 	mddev->bitmap_id = ID_BITMAP_NONE;
2729 	if (!mddev_set_bitmap_ops_nosysfs(mddev))
2730 		rv = -ENOENT;
2731 	goto out;
2732 bitmap_err:
2733 	rv = -ENOENT;
2734 none_err:
2735 	mddev->bitmap_info.offset = 0;
2736 	goto out;
2737 }
2738 
2739 static struct md_sysfs_entry bitmap_location =
2740 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
2741 
2742 /* 'bitmap/space' is the space available at 'location' for the
2743  * bitmap.  This allows the kernel to know when it is safe to
2744  * resize the bitmap to match a resized array.
2745  */
2746 static ssize_t
2747 space_show(struct mddev *mddev, char *page)
2748 {
2749 	return sprintf(page, "%lu\n", mddev->bitmap_info.space);
2750 }
2751 
2752 static ssize_t
2753 space_store(struct mddev *mddev, const char *buf, size_t len)
2754 {
2755 	struct bitmap *bitmap;
2756 	unsigned long sectors;
2757 	int rv;
2758 
2759 	rv = kstrtoul(buf, 10, &sectors);
2760 	if (rv)
2761 		return rv;
2762 
2763 	if (sectors == 0)
2764 		return -EINVAL;
2765 
2766 	bitmap = mddev->bitmap;
2767 	if (bitmap && sectors < (bitmap->storage.bytes + 511) >> 9)
2768 		return -EFBIG; /* Bitmap is too big for this small space */
2769 
2770 	/* could make sure it isn't too big, but that isn't really
2771 	 * needed - user-space should be careful.
2772 	 */
2773 	mddev->bitmap_info.space = sectors;
2774 	return len;
2775 }
2776 
2777 static struct md_sysfs_entry bitmap_space =
2778 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
2779 
2780 static ssize_t
2781 timeout_show(struct mddev *mddev, char *page)
2782 {
2783 	ssize_t len;
2784 	unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
2785 	unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2786 
2787 	len = sprintf(page, "%lu", secs);
2788 	if (jifs)
2789 		len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
2790 	len += sprintf(page+len, "\n");
2791 	return len;
2792 }
2793 
2794 static ssize_t
2795 timeout_store(struct mddev *mddev, const char *buf, size_t len)
2796 {
2797 	/* timeout can be set at any time */
2798 	unsigned long timeout;
2799 	int rv = strict_strtoul_scaled(buf, &timeout, 4);
2800 	if (rv)
2801 		return rv;
2802 
2803 	/* just to make sure we don't overflow... */
2804 	if (timeout >= LONG_MAX / HZ)
2805 		return -EINVAL;
2806 
2807 	timeout = timeout * HZ / 10000;
2808 
2809 	if (timeout >= MAX_SCHEDULE_TIMEOUT)
2810 		timeout = MAX_SCHEDULE_TIMEOUT-1;
2811 	if (timeout < 1)
2812 		timeout = 1;
2813 
2814 	mddev->bitmap_info.daemon_sleep = timeout;
2815 	mddev_set_timeout(mddev, timeout, false);
2816 	md_wakeup_thread(mddev->thread);
2817 
2818 	return len;
2819 }
2820 
2821 static struct md_sysfs_entry bitmap_timeout =
2822 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2823 
2824 static ssize_t
2825 backlog_show(struct mddev *mddev, char *page)
2826 {
2827 	return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2828 }
2829 
2830 static ssize_t
2831 backlog_store(struct mddev *mddev, const char *buf, size_t len)
2832 {
2833 	unsigned long backlog;
2834 	unsigned long old_mwb = mddev->bitmap_info.max_write_behind;
2835 	struct md_rdev *rdev;
2836 	bool has_write_mostly = false;
2837 	int rv = kstrtoul(buf, 10, &backlog);
2838 	if (rv)
2839 		return rv;
2840 	if (backlog > COUNTER_MAX)
2841 		return -EINVAL;
2842 
2843 	rv = mddev_suspend_and_lock(mddev);
2844 	if (rv)
2845 		return rv;
2846 
2847 	/*
2848 	 * Without write mostly device, it doesn't make sense to set
2849 	 * backlog for max_write_behind.
2850 	 */
2851 	rdev_for_each(rdev, mddev) {
2852 		if (test_bit(WriteMostly, &rdev->flags)) {
2853 			has_write_mostly = true;
2854 			break;
2855 		}
2856 	}
2857 	if (!has_write_mostly) {
2858 		pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n",
2859 				    mdname(mddev));
2860 		mddev_unlock(mddev);
2861 		return -EINVAL;
2862 	}
2863 
2864 	mddev->bitmap_info.max_write_behind = backlog;
2865 	if (!backlog && mddev->serial_info_pool) {
2866 		/* serial_info_pool is not needed if backlog is zero */
2867 		if (!test_bit(MD_SERIALIZE_POLICY, &mddev->flags))
2868 			mddev_destroy_serial_pool(mddev, NULL);
2869 	} else if (backlog && !mddev->serial_info_pool) {
2870 		/* serial_info_pool is needed since backlog is not zero */
2871 		rdev_for_each(rdev, mddev)
2872 			mddev_create_serial_pool(mddev, rdev);
2873 	}
2874 	if (old_mwb != backlog)
2875 		bitmap_update_sb(mddev->bitmap);
2876 
2877 	mddev_unlock_and_resume(mddev);
2878 	return len;
2879 }
2880 
2881 static struct md_sysfs_entry bitmap_backlog =
2882 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2883 
2884 static ssize_t
2885 chunksize_show(struct mddev *mddev, char *page)
2886 {
2887 	return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2888 }
2889 
2890 static ssize_t
2891 chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2892 {
2893 	/* Can only be changed when no bitmap is active */
2894 	int rv;
2895 	unsigned long csize;
2896 	if (mddev->bitmap)
2897 		return -EBUSY;
2898 	rv = kstrtoul(buf, 10, &csize);
2899 	if (rv)
2900 		return rv;
2901 	if (csize < 512 ||
2902 	    !is_power_of_2(csize))
2903 		return -EINVAL;
2904 	if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE *
2905 		sizeof(((bitmap_super_t *)0)->chunksize))))
2906 		return -EOVERFLOW;
2907 	mddev->bitmap_info.chunksize = csize;
2908 	return len;
2909 }
2910 
2911 static struct md_sysfs_entry bitmap_chunksize =
2912 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2913 
2914 static ssize_t metadata_show(struct mddev *mddev, char *page)
2915 {
2916 	if (mddev_is_clustered(mddev))
2917 		return sprintf(page, "clustered\n");
2918 	return sprintf(page, "%s\n", (mddev->bitmap_info.external
2919 				      ? "external" : "internal"));
2920 }
2921 
2922 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2923 {
2924 	if (mddev->bitmap ||
2925 	    mddev->bitmap_info.file ||
2926 	    mddev->bitmap_info.offset)
2927 		return -EBUSY;
2928 	if (strncmp(buf, "external", 8) == 0)
2929 		mddev->bitmap_info.external = 1;
2930 	else if ((strncmp(buf, "internal", 8) == 0) ||
2931 			(strncmp(buf, "clustered", 9) == 0))
2932 		mddev->bitmap_info.external = 0;
2933 	else
2934 		return -EINVAL;
2935 	return len;
2936 }
2937 
2938 static struct md_sysfs_entry bitmap_metadata =
2939 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2940 
2941 static ssize_t can_clear_show(struct mddev *mddev, char *page)
2942 {
2943 	int len;
2944 	struct bitmap *bitmap;
2945 
2946 	spin_lock(&mddev->lock);
2947 	bitmap = mddev->bitmap;
2948 	if (bitmap)
2949 		len = sprintf(page, "%s\n", (bitmap->need_sync ? "false" :
2950 								 "true"));
2951 	else
2952 		len = sprintf(page, "\n");
2953 	spin_unlock(&mddev->lock);
2954 	return len;
2955 }
2956 
2957 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2958 {
2959 	struct bitmap *bitmap = mddev->bitmap;
2960 
2961 	if (!bitmap)
2962 		return -ENOENT;
2963 
2964 	if (strncmp(buf, "false", 5) == 0) {
2965 		bitmap->need_sync = 1;
2966 		return len;
2967 	}
2968 
2969 	if (strncmp(buf, "true", 4) == 0) {
2970 		if (mddev->degraded)
2971 			return -EBUSY;
2972 		bitmap->need_sync = 0;
2973 		return len;
2974 	}
2975 
2976 	return -EINVAL;
2977 }
2978 
2979 static struct md_sysfs_entry bitmap_can_clear =
2980 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2981 
2982 static ssize_t
2983 behind_writes_used_show(struct mddev *mddev, char *page)
2984 {
2985 	ssize_t ret;
2986 	struct bitmap *bitmap;
2987 
2988 	spin_lock(&mddev->lock);
2989 	bitmap = mddev->bitmap;
2990 	if (!bitmap)
2991 		ret = sprintf(page, "0\n");
2992 	else
2993 		ret = sprintf(page, "%lu\n", bitmap->behind_writes_used);
2994 	spin_unlock(&mddev->lock);
2995 
2996 	return ret;
2997 }
2998 
2999 static ssize_t
3000 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
3001 {
3002 	struct bitmap *bitmap = mddev->bitmap;
3003 
3004 	if (bitmap)
3005 		bitmap->behind_writes_used = 0;
3006 	return len;
3007 }
3008 
3009 static struct md_sysfs_entry max_backlog_used =
3010 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
3011        behind_writes_used_show, behind_writes_used_reset);
3012 
3013 static struct attribute *md_bitmap_common_attrs[] = {
3014 	&bitmap_location.attr,
3015 	NULL
3016 };
3017 
3018 static struct attribute *md_bitmap_internal_attrs[] = {
3019 	&bitmap_space.attr,
3020 	&bitmap_timeout.attr,
3021 	&bitmap_backlog.attr,
3022 	&bitmap_chunksize.attr,
3023 	&bitmap_metadata.attr,
3024 	&bitmap_can_clear.attr,
3025 	&max_backlog_used.attr,
3026 	NULL
3027 };
3028 
3029 static struct attribute_group md_bitmap_common_group = {
3030 	.name = "bitmap",
3031 	.attrs = md_bitmap_common_attrs,
3032 };
3033 
3034 static struct attribute_group md_bitmap_internal_group = {
3035 	.name = "bitmap",
3036 	.attrs = md_bitmap_internal_attrs,
3037 };
3038 
3039 static const struct attribute_group *bitmap_groups[] = {
3040 	&md_bitmap_common_group,
3041 	&md_bitmap_internal_group,
3042 	NULL,
3043 };
3044 
3045 static const struct attribute_group *bitmap_none_groups[] = {
3046 	&md_bitmap_common_group,
3047 	NULL,
3048 };
3049 
3050 static struct bitmap_operations bitmap_none_ops = {
3051 	.head = {
3052 		.type	= MD_BITMAP,
3053 		.id	= ID_BITMAP_NONE,
3054 		.name	= "none",
3055 	},
3056 
3057 	.enabled		= bitmap_none_enabled,
3058 	.create			= bitmap_none_create,
3059 	.load			= bitmap_none_load,
3060 	.destroy		= bitmap_none_destroy,
3061 	.get_stats		= bitmap_none_get_stats,
3062 
3063 	.groups			= bitmap_none_groups,
3064 };
3065 
3066 static struct bitmap_operations bitmap_ops = {
3067 	.head = {
3068 		.type	= MD_BITMAP,
3069 		.id	= ID_BITMAP,
3070 		.name	= "bitmap",
3071 	},
3072 
3073 	.enabled		= bitmap_enabled,
3074 	.create			= bitmap_create,
3075 	.resize			= bitmap_resize,
3076 	.load			= bitmap_load,
3077 	.destroy		= bitmap_destroy,
3078 	.flush			= bitmap_flush,
3079 	.write_all		= bitmap_write_all,
3080 	.dirty_bits		= bitmap_dirty_bits,
3081 	.unplug			= bitmap_unplug,
3082 	.daemon_work		= bitmap_daemon_work,
3083 
3084 	.start_behind_write	= bitmap_start_behind_write,
3085 	.end_behind_write	= bitmap_end_behind_write,
3086 	.wait_behind_writes	= bitmap_wait_behind_writes,
3087 
3088 	.start_write		= bitmap_start_write,
3089 	.end_write		= bitmap_end_write,
3090 	.start_discard		= bitmap_start_write,
3091 	.end_discard		= bitmap_end_write,
3092 
3093 	.start_sync		= bitmap_start_sync,
3094 	.end_sync		= bitmap_end_sync,
3095 	.cond_end_sync		= bitmap_cond_end_sync,
3096 	.close_sync		= bitmap_close_sync,
3097 
3098 	.update_sb		= bitmap_update_sb,
3099 	.get_stats		= bitmap_get_stats,
3100 
3101 	.sync_with_cluster	= bitmap_sync_with_cluster,
3102 	.get_from_slot		= bitmap_get_from_slot,
3103 	.copy_from_slot		= bitmap_copy_from_slot,
3104 	.set_pages		= bitmap_set_pages,
3105 	.free			= md_bitmap_free,
3106 
3107 	.groups			= bitmap_groups,
3108 };
3109 
3110 int md_bitmap_init(void)
3111 {
3112 	int err;
3113 
3114 	md_bitmap_wq = alloc_workqueue("md_bitmap", WQ_MEM_RECLAIM | WQ_UNBOUND,
3115 				       0);
3116 	if (!md_bitmap_wq)
3117 		return -ENOMEM;
3118 
3119 	err = register_md_submodule(&bitmap_none_ops.head);
3120 	if (err)
3121 		goto err_wq;
3122 
3123 	err = register_md_submodule(&bitmap_ops.head);
3124 	if (err)
3125 		goto err_none;
3126 
3127 	return 0;
3128 
3129 err_none:
3130 	unregister_md_submodule(&bitmap_none_ops.head);
3131 err_wq:
3132 	destroy_workqueue(md_bitmap_wq);
3133 	return err;
3134 }
3135 
3136 void md_bitmap_exit(void)
3137 {
3138 	unregister_md_submodule(&bitmap_ops.head);
3139 	unregister_md_submodule(&bitmap_none_ops.head);
3140 	destroy_workqueue(md_bitmap_wq);
3141 }
3142