xref: /linux/drivers/md/md-bitmap.c (revision f2926a533d03fe70d753b512b713e06a2aa174af)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
4  *
5  * bitmap_create  - sets up the bitmap structure
6  * bitmap_destroy - destroys the bitmap structure
7  *
8  * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
9  * - added disk storage for bitmap
10  * - changes to allow various bitmap chunk sizes
11  */
12 
13 /*
14  * Still to do:
15  *
16  * flush after percent set rather than just time based. (maybe both).
17  */
18 
19 #include <linux/blkdev.h>
20 #include <linux/module.h>
21 #include <linux/errno.h>
22 #include <linux/slab.h>
23 #include <linux/init.h>
24 #include <linux/timer.h>
25 #include <linux/sched.h>
26 #include <linux/list.h>
27 #include <linux/file.h>
28 #include <linux/mount.h>
29 #include <linux/buffer_head.h>
30 #include <linux/seq_file.h>
31 #include <trace/events/block.h>
32 
33 #include "md.h"
34 #include "md-bitmap.h"
35 #include "md-cluster.h"
36 
37 /*
38  * in-memory bitmap:
39  *
40  * Use 16 bit block counters to track pending writes to each "chunk".
41  * The 2 high order bits are special-purpose, the first is a flag indicating
42  * whether a resync is needed.  The second is a flag indicating whether a
43  * resync is active.
44  * This means that the counter is actually 14 bits:
45  *
46  * +--------+--------+------------------------------------------------+
47  * | resync | resync |               counter                          |
48  * | needed | active |                                                |
49  * |  (0-1) |  (0-1) |              (0-16383)                         |
50  * +--------+--------+------------------------------------------------+
51  *
52  * The "resync needed" bit is set when:
53  *    a '1' bit is read from storage at startup.
54  *    a write request fails on some drives
55  *    a resync is aborted on a chunk with 'resync active' set
56  * It is cleared (and resync-active set) when a resync starts across all drives
57  * of the chunk.
58  *
59  *
60  * The "resync active" bit is set when:
61  *    a resync is started on all drives, and resync_needed is set.
62  *       resync_needed will be cleared (as long as resync_active wasn't already set).
63  * It is cleared when a resync completes.
64  *
65  * The counter counts pending write requests, plus the on-disk bit.
66  * When the counter is '1' and the resync bits are clear, the on-disk
67  * bit can be cleared as well, thus setting the counter to 0.
68  * When we set a bit, or in the counter (to start a write), if the fields is
69  * 0, we first set the disk bit and set the counter to 1.
70  *
71  * If the counter is 0, the on-disk bit is clear and the stripe is clean
72  * Anything that dirties the stripe pushes the counter to 2 (at least)
73  * and sets the on-disk bit (lazily).
74  * If a periodic sweep find the counter at 2, it is decremented to 1.
75  * If the sweep find the counter at 1, the on-disk bit is cleared and the
76  * counter goes to zero.
77  *
78  * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
79  * counters as a fallback when "page" memory cannot be allocated:
80  *
81  * Normal case (page memory allocated):
82  *
83  *     page pointer (32-bit)
84  *
85  *     [ ] ------+
86  *               |
87  *               +-------> [   ][   ]..[   ] (4096 byte page == 2048 counters)
88  *                          c1   c2    c2048
89  *
90  * Hijacked case (page memory allocation failed):
91  *
92  *     hijacked page pointer (32-bit)
93  *
94  *     [		  ][		  ] (no page memory allocated)
95  *      counter #1 (16-bit) counter #2 (16-bit)
96  *
97  */
98 
99 typedef __u16 bitmap_counter_t;
100 
101 #define PAGE_BITS (PAGE_SIZE << 3)
102 #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
103 
104 #define COUNTER_BITS 16
105 #define COUNTER_BIT_SHIFT 4
106 #define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
107 
108 #define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
109 #define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
110 #define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
111 
112 #define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
113 #define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
114 #define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
115 
116 /* how many counters per page? */
117 #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
118 /* same, except a shift value for more efficient bitops */
119 #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
120 /* same, except a mask value for more efficient bitops */
121 #define PAGE_COUNTER_MASK  (PAGE_COUNTER_RATIO - 1)
122 
123 #define BITMAP_BLOCK_SHIFT 9
124 
125 /*
126  * bitmap structures:
127  */
128 
129 /* the in-memory bitmap is represented by bitmap_pages */
130 struct bitmap_page {
131 	/*
132 	 * map points to the actual memory page
133 	 */
134 	char *map;
135 	/*
136 	 * in emergencies (when map cannot be alloced), hijack the map
137 	 * pointer and use it as two counters itself
138 	 */
139 	unsigned int hijacked:1;
140 	/*
141 	 * If any counter in this page is '1' or '2' - and so could be
142 	 * cleared then that page is marked as 'pending'
143 	 */
144 	unsigned int pending:1;
145 	/*
146 	 * count of dirty bits on the page
147 	 */
148 	unsigned int  count:30;
149 };
150 
151 /* the main bitmap structure - one per mddev */
152 struct bitmap {
153 
154 	struct bitmap_counts {
155 		spinlock_t lock;
156 		struct bitmap_page *bp;
157 		/* total number of pages in the bitmap */
158 		unsigned long pages;
159 		/* number of pages not yet allocated */
160 		unsigned long missing_pages;
161 		/* chunksize = 2^chunkshift (for bitops) */
162 		unsigned long chunkshift;
163 		/* total number of data chunks for the array */
164 		unsigned long chunks;
165 	} counts;
166 
167 	struct mddev *mddev; /* the md device that the bitmap is for */
168 
169 	__u64	events_cleared;
170 	int need_sync;
171 
172 	struct bitmap_storage {
173 		/* backing disk file */
174 		struct file *file;
175 		/* cached copy of the bitmap file superblock */
176 		struct page *sb_page;
177 		unsigned long sb_index;
178 		/* list of cache pages for the file */
179 		struct page **filemap;
180 		/* attributes associated filemap pages */
181 		unsigned long *filemap_attr;
182 		/* number of pages in the file */
183 		unsigned long file_pages;
184 		/* total bytes in the bitmap */
185 		unsigned long bytes;
186 	} storage;
187 
188 	unsigned long flags;
189 
190 	int allclean;
191 
192 	atomic_t behind_writes;
193 	/* highest actual value at runtime */
194 	unsigned long behind_writes_used;
195 
196 	/*
197 	 * the bitmap daemon - periodically wakes up and sweeps the bitmap
198 	 * file, cleaning up bits and flushing out pages to disk as necessary
199 	 */
200 	unsigned long daemon_lastrun; /* jiffies of last run */
201 	/*
202 	 * when we lasted called end_sync to update bitmap with resync
203 	 * progress.
204 	 */
205 	unsigned long last_end_sync;
206 
207 	/* pending writes to the bitmap file */
208 	atomic_t pending_writes;
209 	wait_queue_head_t write_wait;
210 	wait_queue_head_t overflow_wait;
211 	wait_queue_head_t behind_wait;
212 
213 	struct kernfs_node *sysfs_can_clear;
214 	/* slot offset for clustered env */
215 	int cluster_slot;
216 };
217 
218 static struct workqueue_struct *md_bitmap_wq;
219 static struct attribute_group md_bitmap_internal_group;
220 
221 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks,
222 			   int chunksize, bool init);
223 
224 static inline char *bmname(struct bitmap *bitmap)
225 {
226 	return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
227 }
228 
229 static bool bitmap_enabled(void *data, bool flush)
230 {
231 	struct bitmap *bitmap = data;
232 
233 	if (!flush)
234 		return true;
235 
236 	/*
237 	 * If caller want to flush bitmap pages to underlying disks, check if
238 	 * there are cached pages in filemap.
239 	 */
240 	return !test_bit(BITMAP_STALE, &bitmap->flags) &&
241 	       bitmap->storage.filemap != NULL;
242 }
243 
244 /*
245  * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
246  *
247  * 1) check to see if this page is allocated, if it's not then try to alloc
248  * 2) if the alloc fails, set the page's hijacked flag so we'll use the
249  *    page pointer directly as a counter
250  *
251  * if we find our page, we increment the page's refcount so that it stays
252  * allocated while we're using it
253  */
254 static int md_bitmap_checkpage(struct bitmap_counts *bitmap,
255 			       unsigned long page, int create, int no_hijack)
256 __releases(bitmap->lock)
257 __acquires(bitmap->lock)
258 {
259 	unsigned char *mappage;
260 
261 	WARN_ON_ONCE(page >= bitmap->pages);
262 	if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
263 		return 0;
264 
265 	if (bitmap->bp[page].map) /* page is already allocated, just return */
266 		return 0;
267 
268 	if (!create)
269 		return -ENOENT;
270 
271 	/* this page has not been allocated yet */
272 
273 	spin_unlock_irq(&bitmap->lock);
274 	/* It is possible that this is being called inside a
275 	 * prepare_to_wait/finish_wait loop from raid5c:make_request().
276 	 * In general it is not permitted to sleep in that context as it
277 	 * can cause the loop to spin freely.
278 	 * That doesn't apply here as we can only reach this point
279 	 * once with any loop.
280 	 * When this function completes, either bp[page].map or
281 	 * bp[page].hijacked.  In either case, this function will
282 	 * abort before getting to this point again.  So there is
283 	 * no risk of a free-spin, and so it is safe to assert
284 	 * that sleeping here is allowed.
285 	 */
286 	sched_annotate_sleep();
287 	mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
288 	spin_lock_irq(&bitmap->lock);
289 
290 	if (mappage == NULL) {
291 		pr_debug("md/bitmap: map page allocation failed, hijacking\n");
292 		/* We don't support hijack for cluster raid */
293 		if (no_hijack)
294 			return -ENOMEM;
295 		/* failed - set the hijacked flag so that we can use the
296 		 * pointer as a counter */
297 		if (!bitmap->bp[page].map)
298 			bitmap->bp[page].hijacked = 1;
299 	} else if (bitmap->bp[page].map ||
300 		   bitmap->bp[page].hijacked) {
301 		/* somebody beat us to getting the page */
302 		kfree(mappage);
303 	} else {
304 
305 		/* no page was in place and we have one, so install it */
306 
307 		bitmap->bp[page].map = mappage;
308 		bitmap->missing_pages--;
309 	}
310 	return 0;
311 }
312 
313 /* if page is completely empty, put it back on the free list, or dealloc it */
314 /* if page was hijacked, unmark the flag so it might get alloced next time */
315 /* Note: lock should be held when calling this */
316 static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
317 {
318 	char *ptr;
319 
320 	if (bitmap->bp[page].count) /* page is still busy */
321 		return;
322 
323 	/* page is no longer in use, it can be released */
324 
325 	if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
326 		bitmap->bp[page].hijacked = 0;
327 		bitmap->bp[page].map = NULL;
328 	} else {
329 		/* normal case, free the page */
330 		ptr = bitmap->bp[page].map;
331 		bitmap->bp[page].map = NULL;
332 		bitmap->missing_pages++;
333 		kfree(ptr);
334 	}
335 }
336 
337 /*
338  * bitmap file handling - read and write the bitmap file and its superblock
339  */
340 
341 /*
342  * basic page I/O operations
343  */
344 
345 /* IO operations when bitmap is stored near all superblocks */
346 
347 /* choose a good rdev and read the page from there */
348 static int read_sb_page(struct mddev *mddev, loff_t offset,
349 		struct page *page, unsigned long index, int size)
350 {
351 
352 	sector_t sector = mddev->bitmap_info.offset + offset +
353 		index * (PAGE_SIZE / SECTOR_SIZE);
354 	struct md_rdev *rdev;
355 
356 	rdev_for_each(rdev, mddev) {
357 		u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev));
358 
359 		if (!test_bit(In_sync, &rdev->flags) ||
360 		    test_bit(Faulty, &rdev->flags) ||
361 		    test_bit(Bitmap_sync, &rdev->flags))
362 			continue;
363 
364 		if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true))
365 			return 0;
366 	}
367 	return -EIO;
368 }
369 
370 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
371 {
372 	/* Iterate the disks of an mddev, using rcu to protect access to the
373 	 * linked list, and raising the refcount of devices we return to ensure
374 	 * they don't disappear while in use.
375 	 * As devices are only added or removed when raid_disk is < 0 and
376 	 * nr_pending is 0 and In_sync is clear, the entries we return will
377 	 * still be in the same position on the list when we re-enter
378 	 * list_for_each_entry_continue_rcu.
379 	 *
380 	 * Note that if entered with 'rdev == NULL' to start at the
381 	 * beginning, we temporarily assign 'rdev' to an address which
382 	 * isn't really an rdev, but which can be used by
383 	 * list_for_each_entry_continue_rcu() to find the first entry.
384 	 */
385 	rcu_read_lock();
386 	if (rdev == NULL)
387 		/* start at the beginning */
388 		rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
389 	else {
390 		/* release the previous rdev and start from there. */
391 		rdev_dec_pending(rdev, mddev);
392 	}
393 	list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
394 		if (rdev->raid_disk >= 0 &&
395 		    !test_bit(Faulty, &rdev->flags)) {
396 			/* this is a usable devices */
397 			atomic_inc(&rdev->nr_pending);
398 			rcu_read_unlock();
399 			return rdev;
400 		}
401 	}
402 	rcu_read_unlock();
403 	return NULL;
404 }
405 
406 static unsigned int optimal_io_size(struct block_device *bdev,
407 				    unsigned int last_page_size,
408 				    unsigned int io_size)
409 {
410 	if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev))
411 		return roundup(last_page_size, bdev_io_opt(bdev));
412 	return io_size;
413 }
414 
415 static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size,
416 				   loff_t start, loff_t boundary)
417 {
418 	if (io_size != opt_size &&
419 	    start + opt_size / SECTOR_SIZE <= boundary)
420 		return opt_size;
421 	if (start + io_size / SECTOR_SIZE <= boundary)
422 		return io_size;
423 
424 	/* Overflows boundary */
425 	return 0;
426 }
427 
428 static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
429 			   unsigned long pg_index, struct page *page)
430 {
431 	struct block_device *bdev;
432 	struct mddev *mddev = bitmap->mddev;
433 	struct bitmap_storage *store = &bitmap->storage;
434 	unsigned long num_pages = bitmap->storage.file_pages;
435 	unsigned int bitmap_limit = (num_pages - pg_index % num_pages) << PAGE_SHIFT;
436 	loff_t sboff, offset = mddev->bitmap_info.offset;
437 	sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
438 	unsigned int size = PAGE_SIZE;
439 	unsigned int opt_size = PAGE_SIZE;
440 	sector_t doff;
441 
442 	bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
443 	/* we compare length (page numbers), not page offset. */
444 	if ((pg_index - store->sb_index) == num_pages - 1) {
445 		unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
446 
447 		if (last_page_size == 0)
448 			last_page_size = PAGE_SIZE;
449 		size = roundup(last_page_size, bdev_logical_block_size(bdev));
450 		opt_size = optimal_io_size(bdev, last_page_size, size);
451 	}
452 
453 	sboff = rdev->sb_start + offset;
454 	doff = rdev->data_offset;
455 
456 	/* Just make sure we aren't corrupting data or metadata */
457 	if (mddev->external) {
458 		/* Bitmap could be anywhere. */
459 		if (sboff + ps > doff &&
460 		    sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE))
461 			return -EINVAL;
462 	} else if (offset < 0) {
463 		/* DATA  BITMAP METADATA  */
464 		size = bitmap_io_size(size, opt_size, offset + ps, 0);
465 		if (size == 0)
466 			/* bitmap runs in to metadata */
467 			return -EINVAL;
468 
469 		if (doff + mddev->dev_sectors > sboff)
470 			/* data runs in to bitmap */
471 			return -EINVAL;
472 	} else if (rdev->sb_start < rdev->data_offset) {
473 		/* METADATA BITMAP DATA */
474 		size = bitmap_io_size(size, opt_size, sboff + ps, doff);
475 		if (size == 0)
476 			/* bitmap runs in to data */
477 			return -EINVAL;
478 	}
479 
480 	md_write_metadata(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit),
481 			  page, 0);
482 	return 0;
483 }
484 
485 static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index,
486 			  struct page *page, bool wait)
487 {
488 	struct mddev *mddev = bitmap->mddev;
489 
490 	do {
491 		struct md_rdev *rdev = NULL;
492 
493 		while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
494 			if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) {
495 				set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
496 				return;
497 			}
498 		}
499 	} while (wait && md_super_wait(mddev) < 0);
500 }
501 
502 static void md_bitmap_file_kick(struct bitmap *bitmap);
503 
504 #ifdef CONFIG_MD_BITMAP_FILE
505 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
506 {
507 	struct buffer_head *bh = page_buffers(page);
508 
509 	while (bh && bh->b_blocknr) {
510 		atomic_inc(&bitmap->pending_writes);
511 		set_buffer_locked(bh);
512 		set_buffer_mapped(bh);
513 		submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
514 		bh = bh->b_this_page;
515 	}
516 
517 	if (wait)
518 		wait_event(bitmap->write_wait,
519 			   atomic_read(&bitmap->pending_writes) == 0);
520 }
521 
522 static void end_bitmap_write(struct buffer_head *bh, int uptodate)
523 {
524 	struct bitmap *bitmap = bh->b_private;
525 
526 	if (!uptodate)
527 		set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
528 	if (atomic_dec_and_test(&bitmap->pending_writes))
529 		wake_up(&bitmap->write_wait);
530 }
531 
532 static void free_buffers(struct page *page)
533 {
534 	struct buffer_head *bh;
535 
536 	if (!PagePrivate(page))
537 		return;
538 
539 	bh = page_buffers(page);
540 	while (bh) {
541 		struct buffer_head *next = bh->b_this_page;
542 		free_buffer_head(bh);
543 		bh = next;
544 	}
545 	detach_page_private(page);
546 	put_page(page);
547 }
548 
549 /* read a page from a file.
550  * We both read the page, and attach buffers to the page to record the
551  * address of each block (using bmap).  These addresses will be used
552  * to write the block later, completely bypassing the filesystem.
553  * This usage is similar to how swap files are handled, and allows us
554  * to write to a file with no concerns of memory allocation failing.
555  */
556 static int read_file_page(struct file *file, unsigned long index,
557 		struct bitmap *bitmap, unsigned long count, struct page *page)
558 {
559 	int ret = 0;
560 	struct inode *inode = file_inode(file);
561 	struct buffer_head *bh;
562 	sector_t block, blk_cur;
563 	unsigned long blocksize = i_blocksize(inode);
564 
565 	pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
566 		 (unsigned long long)index << PAGE_SHIFT);
567 
568 	bh = alloc_page_buffers(page, blocksize);
569 	if (!bh) {
570 		ret = -ENOMEM;
571 		goto out;
572 	}
573 	attach_page_private(page, bh);
574 	blk_cur = index << (PAGE_SHIFT - inode->i_blkbits);
575 	while (bh) {
576 		block = blk_cur;
577 
578 		if (count == 0)
579 			bh->b_blocknr = 0;
580 		else {
581 			ret = bmap(inode, &block);
582 			if (ret || !block) {
583 				ret = -EINVAL;
584 				bh->b_blocknr = 0;
585 				goto out;
586 			}
587 
588 			bh->b_blocknr = block;
589 			bh->b_bdev = inode->i_sb->s_bdev;
590 			if (count < blocksize)
591 				count = 0;
592 			else
593 				count -= blocksize;
594 
595 			bh->b_end_io = end_bitmap_write;
596 			bh->b_private = bitmap;
597 			atomic_inc(&bitmap->pending_writes);
598 			set_buffer_locked(bh);
599 			set_buffer_mapped(bh);
600 			submit_bh(REQ_OP_READ, bh);
601 		}
602 		blk_cur++;
603 		bh = bh->b_this_page;
604 	}
605 
606 	wait_event(bitmap->write_wait,
607 		   atomic_read(&bitmap->pending_writes)==0);
608 	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
609 		ret = -EIO;
610 out:
611 	if (ret)
612 		pr_err("md: bitmap read error: (%dB @ %llu): %d\n",
613 		       (int)PAGE_SIZE,
614 		       (unsigned long long)index << PAGE_SHIFT,
615 		       ret);
616 	return ret;
617 }
618 #else /* CONFIG_MD_BITMAP_FILE */
619 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
620 {
621 }
622 static int read_file_page(struct file *file, unsigned long index,
623 		struct bitmap *bitmap, unsigned long count, struct page *page)
624 {
625 	return -EIO;
626 }
627 static void free_buffers(struct page *page)
628 {
629 	put_page(page);
630 }
631 #endif /* CONFIG_MD_BITMAP_FILE */
632 
633 /*
634  * bitmap file superblock operations
635  */
636 
637 /*
638  * write out a page to a file
639  */
640 static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index,
641 			       bool wait)
642 {
643 	struct bitmap_storage *store = &bitmap->storage;
644 	struct page *page = store->filemap[pg_index];
645 
646 	if (mddev_is_clustered(bitmap->mddev)) {
647 		/* go to node bitmap area starting point */
648 		pg_index += store->sb_index;
649 	}
650 
651 	if (store->file)
652 		write_file_page(bitmap, page, wait);
653 	else
654 		write_sb_page(bitmap, pg_index, page, wait);
655 }
656 
657 /*
658  * md_bitmap_wait_writes() should be called before writing any bitmap
659  * blocks, to ensure previous writes, particularly from
660  * md_bitmap_daemon_work(), have completed.
661  */
662 static void md_bitmap_wait_writes(struct bitmap *bitmap)
663 {
664 	if (bitmap->storage.file)
665 		wait_event(bitmap->write_wait,
666 			   atomic_read(&bitmap->pending_writes)==0);
667 	else
668 		/* Note that we ignore the return value.  The writes
669 		 * might have failed, but that would just mean that
670 		 * some bits which should be cleared haven't been,
671 		 * which is safe.  The relevant bitmap blocks will
672 		 * probably get written again, but there is no great
673 		 * loss if they aren't.
674 		 */
675 		md_super_wait(bitmap->mddev);
676 }
677 
678 
679 /* update the event counter and sync the superblock to disk */
680 static void bitmap_update_sb(void *data)
681 {
682 	bitmap_super_t *sb;
683 	struct bitmap *bitmap = data;
684 
685 	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
686 		return;
687 	if (bitmap->mddev->bitmap_info.external)
688 		return;
689 	if (!bitmap->storage.sb_page) /* no superblock */
690 		return;
691 	sb = kmap_local_page(bitmap->storage.sb_page);
692 	sb->events = cpu_to_le64(bitmap->mddev->events);
693 	if (bitmap->mddev->events < bitmap->events_cleared)
694 		/* rocking back to read-only */
695 		bitmap->events_cleared = bitmap->mddev->events;
696 	sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
697 	/*
698 	 * clear BITMAP_WRITE_ERROR bit to protect against the case that
699 	 * a bitmap write error occurred but the later writes succeeded.
700 	 */
701 	sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR));
702 	/* Just in case these have been changed via sysfs: */
703 	sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
704 	sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
705 	/* This might have been changed by a reshape */
706 	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
707 	sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
708 	sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
709 	sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
710 					   bitmap_info.space);
711 	kunmap_local(sb);
712 
713 	if (bitmap->storage.file)
714 		write_file_page(bitmap, bitmap->storage.sb_page, 1);
715 	else
716 		write_sb_page(bitmap, bitmap->storage.sb_index,
717 			      bitmap->storage.sb_page, 1);
718 }
719 
720 static void bitmap_print_sb(struct bitmap *bitmap)
721 {
722 	bitmap_super_t *sb;
723 
724 	if (!bitmap || !bitmap->storage.sb_page)
725 		return;
726 	sb = kmap_local_page(bitmap->storage.sb_page);
727 	pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
728 	pr_debug("         magic: %08x\n", le32_to_cpu(sb->magic));
729 	pr_debug("       version: %u\n", le32_to_cpu(sb->version));
730 	pr_debug("          uuid: %08x.%08x.%08x.%08x\n",
731 		 le32_to_cpu(*(__le32 *)(sb->uuid+0)),
732 		 le32_to_cpu(*(__le32 *)(sb->uuid+4)),
733 		 le32_to_cpu(*(__le32 *)(sb->uuid+8)),
734 		 le32_to_cpu(*(__le32 *)(sb->uuid+12)));
735 	pr_debug("        events: %llu\n",
736 		 (unsigned long long) le64_to_cpu(sb->events));
737 	pr_debug("events cleared: %llu\n",
738 		 (unsigned long long) le64_to_cpu(sb->events_cleared));
739 	pr_debug("         state: %08x\n", le32_to_cpu(sb->state));
740 	pr_debug("     chunksize: %u B\n", le32_to_cpu(sb->chunksize));
741 	pr_debug("  daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep));
742 	pr_debug("     sync size: %llu KB\n",
743 		 (unsigned long long)le64_to_cpu(sb->sync_size)/2);
744 	pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind));
745 	kunmap_local(sb);
746 }
747 
748 /*
749  * bitmap_new_disk_sb
750  * @bitmap
751  *
752  * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
753  * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
754  * This function verifies 'bitmap_info' and populates the on-disk bitmap
755  * structure, which is to be written to disk.
756  *
757  * Returns: 0 on success, -Exxx on error
758  */
759 static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
760 {
761 	bitmap_super_t *sb;
762 	unsigned long chunksize, daemon_sleep, write_behind;
763 
764 	bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
765 	if (bitmap->storage.sb_page == NULL)
766 		return -ENOMEM;
767 	bitmap->storage.sb_index = 0;
768 
769 	sb = kmap_local_page(bitmap->storage.sb_page);
770 
771 	sb->magic = cpu_to_le32(BITMAP_MAGIC);
772 	sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
773 
774 	chunksize = bitmap->mddev->bitmap_info.chunksize;
775 	BUG_ON(!chunksize);
776 	if (!is_power_of_2(chunksize)) {
777 		kunmap_local(sb);
778 		pr_warn("bitmap chunksize not a power of 2\n");
779 		return -EINVAL;
780 	}
781 	sb->chunksize = cpu_to_le32(chunksize);
782 
783 	daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
784 	if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
785 		pr_debug("Choosing daemon_sleep default (5 sec)\n");
786 		daemon_sleep = 5 * HZ;
787 	}
788 	sb->daemon_sleep = cpu_to_le32(daemon_sleep);
789 	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
790 
791 	/*
792 	 * FIXME: write_behind for RAID1.  If not specified, what
793 	 * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
794 	 */
795 	write_behind = bitmap->mddev->bitmap_info.max_write_behind;
796 	if (write_behind > COUNTER_MAX / 2)
797 		write_behind = COUNTER_MAX / 2;
798 	sb->write_behind = cpu_to_le32(write_behind);
799 	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
800 
801 	/* keep the array size field of the bitmap superblock up to date */
802 	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
803 
804 	memcpy(sb->uuid, bitmap->mddev->uuid, 16);
805 
806 	set_bit(BITMAP_STALE, &bitmap->flags);
807 	sb->state = cpu_to_le32(bitmap->flags);
808 	bitmap->events_cleared = bitmap->mddev->events;
809 	sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
810 	bitmap->mddev->bitmap_info.nodes = 0;
811 
812 	kunmap_local(sb);
813 
814 	return 0;
815 }
816 
817 /* read the superblock from the bitmap file and initialize some bitmap fields */
818 static int md_bitmap_read_sb(struct bitmap *bitmap)
819 {
820 	char *reason = NULL;
821 	bitmap_super_t *sb;
822 	unsigned long chunksize, daemon_sleep, write_behind;
823 	unsigned long long events;
824 	int nodes = 0;
825 	unsigned long sectors_reserved = 0;
826 	int err = -EINVAL;
827 	struct page *sb_page;
828 	loff_t offset = 0;
829 
830 	if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
831 		chunksize = 128 * 1024 * 1024;
832 		daemon_sleep = 5 * HZ;
833 		write_behind = 0;
834 		set_bit(BITMAP_STALE, &bitmap->flags);
835 		err = 0;
836 		goto out_no_sb;
837 	}
838 	/* page 0 is the superblock, read it... */
839 	sb_page = alloc_page(GFP_KERNEL);
840 	if (!sb_page)
841 		return -ENOMEM;
842 	bitmap->storage.sb_page = sb_page;
843 
844 re_read:
845 	/* If cluster_slot is set, the cluster is setup */
846 	if (bitmap->cluster_slot >= 0) {
847 		sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
848 
849 		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks,
850 			   (bitmap->mddev->bitmap_info.chunksize >> 9));
851 		/* bits to bytes */
852 		bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
853 		/* to 4k blocks */
854 		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
855 		offset = bitmap->cluster_slot * (bm_blocks << 3);
856 		pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
857 			bitmap->cluster_slot, offset);
858 	}
859 
860 	if (bitmap->storage.file) {
861 		loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
862 		int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
863 
864 		err = read_file_page(bitmap->storage.file, 0,
865 				bitmap, bytes, sb_page);
866 	} else {
867 		err = read_sb_page(bitmap->mddev, offset, sb_page, 0,
868 				   sizeof(bitmap_super_t));
869 	}
870 	if (err)
871 		return err;
872 
873 	err = -EINVAL;
874 	sb = kmap_local_page(sb_page);
875 
876 	chunksize = le32_to_cpu(sb->chunksize);
877 	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
878 	write_behind = le32_to_cpu(sb->write_behind);
879 	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
880 
881 	/* verify that the bitmap-specific fields are valid */
882 	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
883 		reason = "bad magic";
884 	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
885 		 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED)
886 		reason = "unrecognized superblock version";
887 	else if (chunksize < 512)
888 		reason = "bitmap chunksize too small";
889 	else if (!is_power_of_2(chunksize))
890 		reason = "bitmap chunksize not a power of 2";
891 	else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
892 		reason = "daemon sleep period out of range";
893 	else if (write_behind > COUNTER_MAX)
894 		reason = "write-behind limit out of range (0 - 16383)";
895 	if (reason) {
896 		pr_warn("%s: invalid bitmap file superblock: %s\n",
897 			bmname(bitmap), reason);
898 		goto out;
899 	}
900 
901 	/*
902 	 * Setup nodes/clustername only if bitmap version is
903 	 * cluster-compatible
904 	 */
905 	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
906 		nodes = le32_to_cpu(sb->nodes);
907 		strscpy(bitmap->mddev->bitmap_info.cluster_name,
908 				sb->cluster_name, 64);
909 	}
910 
911 	/* keep the array size field of the bitmap superblock up to date */
912 	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
913 
914 	if (bitmap->mddev->persistent) {
915 		/*
916 		 * We have a persistent array superblock, so compare the
917 		 * bitmap's UUID and event counter to the mddev's
918 		 */
919 		if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
920 			pr_warn("%s: bitmap superblock UUID mismatch\n",
921 				bmname(bitmap));
922 			goto out;
923 		}
924 		events = le64_to_cpu(sb->events);
925 		if (!nodes && (events < bitmap->mddev->events)) {
926 			pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n",
927 				bmname(bitmap), events,
928 				(unsigned long long) bitmap->mddev->events);
929 			set_bit(BITMAP_STALE, &bitmap->flags);
930 		}
931 	}
932 
933 	/* assign fields using values from superblock */
934 	bitmap->flags |= le32_to_cpu(sb->state);
935 	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
936 		set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
937 	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
938 	err = 0;
939 
940 out:
941 	kunmap_local(sb);
942 	if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
943 		/* Assigning chunksize is required for "re_read" */
944 		bitmap->mddev->bitmap_info.chunksize = chunksize;
945 		err = md_setup_cluster(bitmap->mddev, nodes);
946 		if (err) {
947 			pr_warn("%s: Could not setup cluster service (%d)\n",
948 				bmname(bitmap), err);
949 			goto out_no_sb;
950 		}
951 		bitmap->cluster_slot = bitmap->mddev->cluster_ops->slot_number(bitmap->mddev);
952 		goto re_read;
953 	}
954 
955 out_no_sb:
956 	if (err == 0) {
957 		if (test_bit(BITMAP_STALE, &bitmap->flags))
958 			bitmap->events_cleared = bitmap->mddev->events;
959 		bitmap->mddev->bitmap_info.chunksize = chunksize;
960 		bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
961 		bitmap->mddev->bitmap_info.max_write_behind = write_behind;
962 		bitmap->mddev->bitmap_info.nodes = nodes;
963 		if (bitmap->mddev->bitmap_info.space == 0 ||
964 			bitmap->mddev->bitmap_info.space > sectors_reserved)
965 			bitmap->mddev->bitmap_info.space = sectors_reserved;
966 	} else {
967 		bitmap_print_sb(bitmap);
968 		if (bitmap->cluster_slot < 0)
969 			md_cluster_stop(bitmap->mddev);
970 	}
971 	return err;
972 }
973 
974 /*
975  * general bitmap file operations
976  */
977 
978 /*
979  * on-disk bitmap:
980  *
981  * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
982  * file a page at a time. There's a superblock at the start of the file.
983  */
984 /* calculate the index of the page that contains this bit */
985 static inline unsigned long file_page_index(struct bitmap_storage *store,
986 					    unsigned long chunk)
987 {
988 	if (store->sb_page)
989 		chunk += sizeof(bitmap_super_t) << 3;
990 	return chunk >> PAGE_BIT_SHIFT;
991 }
992 
993 /* calculate the (bit) offset of this bit within a page */
994 static inline unsigned long file_page_offset(struct bitmap_storage *store,
995 					     unsigned long chunk)
996 {
997 	if (store->sb_page)
998 		chunk += sizeof(bitmap_super_t) << 3;
999 	return chunk & (PAGE_BITS - 1);
1000 }
1001 
1002 /*
1003  * return a pointer to the page in the filemap that contains the given bit
1004  *
1005  */
1006 static inline struct page *filemap_get_page(struct bitmap_storage *store,
1007 					    unsigned long chunk)
1008 {
1009 	if (file_page_index(store, chunk) >= store->file_pages)
1010 		return NULL;
1011 	return store->filemap[file_page_index(store, chunk)];
1012 }
1013 
1014 static int md_bitmap_storage_alloc(struct bitmap_storage *store,
1015 				   unsigned long chunks, int with_super,
1016 				   int slot_number)
1017 {
1018 	int pnum, offset = 0;
1019 	unsigned long num_pages;
1020 	unsigned long bytes;
1021 
1022 	bytes = DIV_ROUND_UP(chunks, 8);
1023 	if (with_super)
1024 		bytes += sizeof(bitmap_super_t);
1025 
1026 	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
1027 	offset = slot_number * num_pages;
1028 
1029 	store->filemap = kmalloc_objs(struct page *, num_pages);
1030 	if (!store->filemap)
1031 		return -ENOMEM;
1032 
1033 	if (with_super && !store->sb_page) {
1034 		store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
1035 		if (store->sb_page == NULL)
1036 			return -ENOMEM;
1037 	}
1038 
1039 	pnum = 0;
1040 	if (store->sb_page) {
1041 		store->filemap[0] = store->sb_page;
1042 		pnum = 1;
1043 		store->sb_index = offset;
1044 	}
1045 
1046 	for ( ; pnum < num_pages; pnum++) {
1047 		store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
1048 		if (!store->filemap[pnum]) {
1049 			store->file_pages = pnum;
1050 			return -ENOMEM;
1051 		}
1052 	}
1053 	store->file_pages = pnum;
1054 
1055 	/* We need 4 bits per page, rounded up to a multiple
1056 	 * of sizeof(unsigned long) */
1057 	store->filemap_attr = kzalloc(
1058 		roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
1059 		GFP_KERNEL);
1060 	if (!store->filemap_attr)
1061 		return -ENOMEM;
1062 
1063 	store->bytes = bytes;
1064 
1065 	return 0;
1066 }
1067 
1068 static void md_bitmap_file_unmap(struct bitmap_storage *store)
1069 {
1070 	struct file *file = store->file;
1071 	struct page *sb_page = store->sb_page;
1072 	struct page **map = store->filemap;
1073 	int pages = store->file_pages;
1074 
1075 	while (pages--)
1076 		if (map[pages] != sb_page) /* 0 is sb_page, release it below */
1077 			free_buffers(map[pages]);
1078 	kfree(map);
1079 	kfree(store->filemap_attr);
1080 
1081 	if (sb_page)
1082 		free_buffers(sb_page);
1083 
1084 	if (file) {
1085 		struct inode *inode = file_inode(file);
1086 		invalidate_mapping_pages(inode->i_mapping, 0, -1);
1087 		fput(file);
1088 	}
1089 }
1090 
1091 /*
1092  * bitmap_file_kick - if an error occurs while manipulating the bitmap file
1093  * then it is no longer reliable, so we stop using it and we mark the file
1094  * as failed in the superblock
1095  */
1096 static void md_bitmap_file_kick(struct bitmap *bitmap)
1097 {
1098 	if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
1099 		bitmap_update_sb(bitmap);
1100 
1101 		if (bitmap->storage.file) {
1102 			pr_warn("%s: kicking failed bitmap file %pD4 from array!\n",
1103 				bmname(bitmap), bitmap->storage.file);
1104 
1105 		} else
1106 			pr_warn("%s: disabling internal bitmap due to errors\n",
1107 				bmname(bitmap));
1108 	}
1109 }
1110 
1111 enum bitmap_page_attr {
1112 	BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
1113 	BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
1114 				    * i.e. counter is 1 or 2. */
1115 	BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
1116 };
1117 
1118 static inline void set_page_attr(struct bitmap *bitmap, int pnum,
1119 				 enum bitmap_page_attr attr)
1120 {
1121 	set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
1122 }
1123 
1124 static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
1125 				   enum bitmap_page_attr attr)
1126 {
1127 	clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
1128 }
1129 
1130 static inline int test_page_attr(struct bitmap *bitmap, int pnum,
1131 				 enum bitmap_page_attr attr)
1132 {
1133 	return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
1134 }
1135 
1136 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
1137 					   enum bitmap_page_attr attr)
1138 {
1139 	return test_and_clear_bit((pnum<<2) + attr,
1140 				  bitmap->storage.filemap_attr);
1141 }
1142 /*
1143  * bitmap_file_set_bit -- called before performing a write to the md device
1144  * to set (and eventually sync) a particular bit in the bitmap file
1145  *
1146  * we set the bit immediately, then we record the page number so that
1147  * when an unplug occurs, we can flush the dirty pages out to disk
1148  */
1149 static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
1150 {
1151 	unsigned long bit;
1152 	struct page *page;
1153 	void *kaddr;
1154 	unsigned long chunk = block >> bitmap->counts.chunkshift;
1155 	struct bitmap_storage *store = &bitmap->storage;
1156 	unsigned long index = file_page_index(store, chunk);
1157 	unsigned long node_offset = 0;
1158 
1159 	index += store->sb_index;
1160 	if (mddev_is_clustered(bitmap->mddev))
1161 		node_offset = bitmap->cluster_slot * store->file_pages;
1162 
1163 	page = filemap_get_page(&bitmap->storage, chunk);
1164 	if (!page)
1165 		return;
1166 	bit = file_page_offset(&bitmap->storage, chunk);
1167 
1168 	/* set the bit */
1169 	kaddr = kmap_local_page(page);
1170 	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1171 		set_bit(bit, kaddr);
1172 	else
1173 		set_bit_le(bit, kaddr);
1174 	kunmap_local(kaddr);
1175 	pr_debug("set file bit %lu page %lu\n", bit, index);
1176 	/* record page number so it gets flushed to disk when unplug occurs */
1177 	set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY);
1178 }
1179 
1180 static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
1181 {
1182 	unsigned long bit;
1183 	struct page *page;
1184 	void *paddr;
1185 	unsigned long chunk = block >> bitmap->counts.chunkshift;
1186 	struct bitmap_storage *store = &bitmap->storage;
1187 	unsigned long index = file_page_index(store, chunk);
1188 	unsigned long node_offset = 0;
1189 
1190 	index += store->sb_index;
1191 	if (mddev_is_clustered(bitmap->mddev))
1192 		node_offset = bitmap->cluster_slot * store->file_pages;
1193 
1194 	page = filemap_get_page(&bitmap->storage, chunk);
1195 	if (!page)
1196 		return;
1197 	bit = file_page_offset(&bitmap->storage, chunk);
1198 	paddr = kmap_local_page(page);
1199 	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1200 		clear_bit(bit, paddr);
1201 	else
1202 		clear_bit_le(bit, paddr);
1203 	kunmap_local(paddr);
1204 	if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
1205 		set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING);
1206 		bitmap->allclean = 0;
1207 	}
1208 }
1209 
1210 static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
1211 {
1212 	unsigned long bit;
1213 	struct page *page;
1214 	void *paddr;
1215 	unsigned long chunk = block >> bitmap->counts.chunkshift;
1216 	int set = 0;
1217 
1218 	page = filemap_get_page(&bitmap->storage, chunk);
1219 	if (!page)
1220 		return -EINVAL;
1221 	bit = file_page_offset(&bitmap->storage, chunk);
1222 	paddr = kmap_local_page(page);
1223 	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1224 		set = test_bit(bit, paddr);
1225 	else
1226 		set = test_bit_le(bit, paddr);
1227 	kunmap_local(paddr);
1228 	return set;
1229 }
1230 
1231 /* this gets called when the md device is ready to unplug its underlying
1232  * (slave) device queues -- before we let any writes go down, we need to
1233  * sync the dirty pages of the bitmap file to disk */
1234 static void __bitmap_unplug(struct bitmap *bitmap)
1235 {
1236 	unsigned long i;
1237 	int dirty, need_write;
1238 	int writing = 0;
1239 
1240 	if (!bitmap_enabled(bitmap, true))
1241 		return;
1242 
1243 	/* look at each page to see if there are any set bits that need to be
1244 	 * flushed out to disk */
1245 	for (i = 0; i < bitmap->storage.file_pages; i++) {
1246 		dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
1247 		need_write = test_and_clear_page_attr(bitmap, i,
1248 						      BITMAP_PAGE_NEEDWRITE);
1249 		if (dirty || need_write) {
1250 			if (!writing) {
1251 				md_bitmap_wait_writes(bitmap);
1252 				mddev_add_trace_msg(bitmap->mddev,
1253 					"md bitmap_unplug");
1254 			}
1255 			clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
1256 			filemap_write_page(bitmap, i, false);
1257 			writing = 1;
1258 		}
1259 	}
1260 	if (writing)
1261 		md_bitmap_wait_writes(bitmap);
1262 
1263 	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1264 		md_bitmap_file_kick(bitmap);
1265 }
1266 
1267 struct bitmap_unplug_work {
1268 	struct work_struct work;
1269 	struct bitmap *bitmap;
1270 	struct completion *done;
1271 };
1272 
1273 static void md_bitmap_unplug_fn(struct work_struct *work)
1274 {
1275 	struct bitmap_unplug_work *unplug_work =
1276 		container_of(work, struct bitmap_unplug_work, work);
1277 
1278 	__bitmap_unplug(unplug_work->bitmap);
1279 	complete(unplug_work->done);
1280 }
1281 
1282 static void bitmap_unplug_async(struct bitmap *bitmap)
1283 {
1284 	DECLARE_COMPLETION_ONSTACK(done);
1285 	struct bitmap_unplug_work unplug_work;
1286 
1287 	INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn);
1288 	unplug_work.bitmap = bitmap;
1289 	unplug_work.done = &done;
1290 
1291 	queue_work(md_bitmap_wq, &unplug_work.work);
1292 	wait_for_completion(&done);
1293 	destroy_work_on_stack(&unplug_work.work);
1294 }
1295 
1296 static void bitmap_unplug(struct mddev *mddev, bool sync)
1297 {
1298 	struct bitmap *bitmap = mddev->bitmap;
1299 
1300 	if (!bitmap)
1301 		return;
1302 
1303 	if (sync)
1304 		__bitmap_unplug(bitmap);
1305 	else
1306 		bitmap_unplug_async(bitmap);
1307 }
1308 
1309 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
1310 
1311 /*
1312  * Initialize the in-memory bitmap from the on-disk bitmap and set up the memory
1313  * mapping of the bitmap file.
1314  *
1315  * Special case: If there's no bitmap file, or if the bitmap file had been
1316  * previously kicked from the array, we mark all the bits as 1's in order to
1317  * cause a full resync.
1318  *
1319  * We ignore all bits for sectors that end earlier than 'start'.
1320  * This is used when reading an out-of-date bitmap.
1321  */
1322 static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1323 {
1324 	bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
1325 	struct mddev *mddev = bitmap->mddev;
1326 	unsigned long chunks = bitmap->counts.chunks;
1327 	struct bitmap_storage *store = &bitmap->storage;
1328 	struct file *file = store->file;
1329 	unsigned long node_offset = 0;
1330 	unsigned long bit_cnt = 0;
1331 	unsigned long i;
1332 	int ret;
1333 
1334 	if (!file && !mddev->bitmap_info.offset) {
1335 		/* No permanent bitmap - fill with '1s'. */
1336 		store->filemap = NULL;
1337 		store->file_pages = 0;
1338 		for (i = 0; i < chunks ; i++) {
1339 			/* if the disk bit is set, set the memory bit */
1340 			int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
1341 				      >= start);
1342 			md_bitmap_set_memory_bits(bitmap,
1343 						  (sector_t)i << bitmap->counts.chunkshift,
1344 						  needed);
1345 		}
1346 		return 0;
1347 	}
1348 
1349 	if (file && i_size_read(file->f_mapping->host) < store->bytes) {
1350 		pr_warn("%s: bitmap file too short %lu < %lu\n",
1351 			bmname(bitmap),
1352 			(unsigned long) i_size_read(file->f_mapping->host),
1353 			store->bytes);
1354 		ret = -ENOSPC;
1355 		goto err;
1356 	}
1357 
1358 	if (mddev_is_clustered(mddev))
1359 		node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
1360 
1361 	for (i = 0; i < store->file_pages; i++) {
1362 		struct page *page = store->filemap[i];
1363 		int count;
1364 
1365 		/* unmap the old page, we're done with it */
1366 		if (i == store->file_pages - 1)
1367 			count = store->bytes - i * PAGE_SIZE;
1368 		else
1369 			count = PAGE_SIZE;
1370 
1371 		if (file)
1372 			ret = read_file_page(file, i, bitmap, count, page);
1373 		else
1374 			ret = read_sb_page(mddev, 0, page, i + node_offset,
1375 					   count);
1376 		if (ret)
1377 			goto err;
1378 	}
1379 
1380 	if (outofdate) {
1381 		pr_warn("%s: bitmap file is out of date, doing full recovery\n",
1382 			bmname(bitmap));
1383 
1384 		for (i = 0; i < store->file_pages; i++) {
1385 			struct page *page = store->filemap[i];
1386 			unsigned long offset = 0;
1387 			void *paddr;
1388 
1389 			if (i == 0 && !mddev->bitmap_info.external)
1390 				offset = sizeof(bitmap_super_t);
1391 
1392 			/*
1393 			 * If the bitmap is out of date, dirty the whole page
1394 			 * and write it out
1395 			 */
1396 			paddr = kmap_local_page(page);
1397 			memset(paddr + offset, 0xff, PAGE_SIZE - offset);
1398 			kunmap_local(paddr);
1399 
1400 			filemap_write_page(bitmap, i, true);
1401 			if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) {
1402 				ret = -EIO;
1403 				goto err;
1404 			}
1405 		}
1406 	}
1407 
1408 	for (i = 0; i < chunks; i++) {
1409 		struct page *page = filemap_get_page(&bitmap->storage, i);
1410 		unsigned long bit = file_page_offset(&bitmap->storage, i);
1411 		void *paddr;
1412 		bool was_set;
1413 
1414 		paddr = kmap_local_page(page);
1415 		if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1416 			was_set = test_bit(bit, paddr);
1417 		else
1418 			was_set = test_bit_le(bit, paddr);
1419 		kunmap_local(paddr);
1420 
1421 		if (was_set) {
1422 			/* if the disk bit is set, set the memory bit */
1423 			int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1424 				      >= start);
1425 			md_bitmap_set_memory_bits(bitmap,
1426 						  (sector_t)i << bitmap->counts.chunkshift,
1427 						  needed);
1428 			bit_cnt++;
1429 		}
1430 	}
1431 
1432 	pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n",
1433 		 bmname(bitmap), store->file_pages,
1434 		 bit_cnt, chunks);
1435 
1436 	return 0;
1437 
1438  err:
1439 	pr_warn("%s: bitmap initialisation failed: %d\n",
1440 		bmname(bitmap), ret);
1441 	return ret;
1442 }
1443 
1444 /* just flag bitmap pages as needing to be written. */
1445 static void bitmap_write_all(struct mddev *mddev)
1446 {
1447 	int i;
1448 	struct bitmap *bitmap = mddev->bitmap;
1449 
1450 	if (!bitmap || !bitmap->storage.filemap)
1451 		return;
1452 
1453 	/* Only one copy, so nothing needed */
1454 	if (bitmap->storage.file)
1455 		return;
1456 
1457 	for (i = 0; i < bitmap->storage.file_pages; i++)
1458 		set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
1459 	bitmap->allclean = 0;
1460 }
1461 
1462 static void md_bitmap_count_page(struct bitmap_counts *bitmap,
1463 				 sector_t offset, int inc)
1464 {
1465 	sector_t chunk = offset >> bitmap->chunkshift;
1466 	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1467 	bitmap->bp[page].count += inc;
1468 	md_bitmap_checkfree(bitmap, page);
1469 }
1470 
1471 static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1472 {
1473 	sector_t chunk = offset >> bitmap->chunkshift;
1474 	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1475 	struct bitmap_page *bp = &bitmap->bp[page];
1476 
1477 	if (!bp->pending)
1478 		bp->pending = 1;
1479 }
1480 
1481 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
1482 					       sector_t offset, sector_t *blocks,
1483 					       int create);
1484 
1485 static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout,
1486 			      bool force)
1487 {
1488 	struct md_thread *thread;
1489 
1490 	rcu_read_lock();
1491 	thread = rcu_dereference(mddev->thread);
1492 
1493 	if (!thread)
1494 		goto out;
1495 
1496 	if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT)
1497 		thread->timeout = timeout;
1498 
1499 out:
1500 	rcu_read_unlock();
1501 }
1502 
1503 /*
1504  * bitmap daemon -- periodically wakes up to clean bits and flush pages
1505  *			out to disk
1506  */
1507 static void bitmap_daemon_work(struct mddev *mddev)
1508 {
1509 	struct bitmap *bitmap;
1510 	unsigned long j;
1511 	unsigned long nextpage;
1512 	sector_t blocks;
1513 	struct bitmap_counts *counts;
1514 
1515 	/* Use a mutex to guard daemon_work against
1516 	 * bitmap_destroy.
1517 	 */
1518 	mutex_lock(&mddev->bitmap_info.mutex);
1519 	bitmap = mddev->bitmap;
1520 	if (bitmap == NULL) {
1521 		mutex_unlock(&mddev->bitmap_info.mutex);
1522 		return;
1523 	}
1524 	if (time_before(jiffies, bitmap->daemon_lastrun
1525 			+ mddev->bitmap_info.daemon_sleep))
1526 		goto done;
1527 
1528 	bitmap->daemon_lastrun = jiffies;
1529 	if (bitmap->allclean) {
1530 		mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
1531 		goto done;
1532 	}
1533 	bitmap->allclean = 1;
1534 
1535 	mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work");
1536 
1537 	/* Any file-page which is PENDING now needs to be written.
1538 	 * So set NEEDWRITE now, then after we make any last-minute changes
1539 	 * we will write it.
1540 	 */
1541 	for (j = 0; j < bitmap->storage.file_pages; j++)
1542 		if (test_and_clear_page_attr(bitmap, j,
1543 					     BITMAP_PAGE_PENDING))
1544 			set_page_attr(bitmap, j,
1545 				      BITMAP_PAGE_NEEDWRITE);
1546 
1547 	if (bitmap->need_sync &&
1548 	    mddev->bitmap_info.external == 0) {
1549 		/* Arrange for superblock update as well as
1550 		 * other changes */
1551 		bitmap_super_t *sb;
1552 		bitmap->need_sync = 0;
1553 		if (bitmap->storage.filemap) {
1554 			sb = kmap_local_page(bitmap->storage.sb_page);
1555 			sb->events_cleared =
1556 				cpu_to_le64(bitmap->events_cleared);
1557 			kunmap_local(sb);
1558 			set_page_attr(bitmap, 0,
1559 				      BITMAP_PAGE_NEEDWRITE);
1560 		}
1561 	}
1562 	/* Now look at the bitmap counters and if any are '2' or '1',
1563 	 * decrement and handle accordingly.
1564 	 */
1565 	counts = &bitmap->counts;
1566 	spin_lock_irq(&counts->lock);
1567 	nextpage = 0;
1568 	for (j = 0; j < counts->chunks; j++) {
1569 		bitmap_counter_t *bmc;
1570 		sector_t  block = (sector_t)j << counts->chunkshift;
1571 
1572 		if (j == nextpage) {
1573 			nextpage += PAGE_COUNTER_RATIO;
1574 			if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1575 				j |= PAGE_COUNTER_MASK;
1576 				continue;
1577 			}
1578 			counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1579 		}
1580 
1581 		bmc = md_bitmap_get_counter(counts, block, &blocks, 0);
1582 		if (!bmc) {
1583 			j |= PAGE_COUNTER_MASK;
1584 			continue;
1585 		}
1586 		if (*bmc == 1 && !bitmap->need_sync) {
1587 			/* We can clear the bit */
1588 			*bmc = 0;
1589 			md_bitmap_count_page(counts, block, -1);
1590 			md_bitmap_file_clear_bit(bitmap, block);
1591 		} else if (*bmc && *bmc <= 2) {
1592 			*bmc = 1;
1593 			md_bitmap_set_pending(counts, block);
1594 			bitmap->allclean = 0;
1595 		}
1596 	}
1597 	spin_unlock_irq(&counts->lock);
1598 
1599 	md_bitmap_wait_writes(bitmap);
1600 	/* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
1601 	 * DIRTY pages need to be written by bitmap_unplug so it can wait
1602 	 * for them.
1603 	 * If we find any DIRTY page we stop there and let bitmap_unplug
1604 	 * handle all the rest.  This is important in the case where
1605 	 * the first blocking holds the superblock and it has been updated.
1606 	 * We mustn't write any other blocks before the superblock.
1607 	 */
1608 	for (j = 0;
1609 	     j < bitmap->storage.file_pages
1610 		     && !test_bit(BITMAP_STALE, &bitmap->flags);
1611 	     j++) {
1612 		if (test_page_attr(bitmap, j,
1613 				   BITMAP_PAGE_DIRTY))
1614 			/* bitmap_unplug will handle the rest */
1615 			break;
1616 		if (bitmap->storage.filemap &&
1617 		    test_and_clear_page_attr(bitmap, j,
1618 					     BITMAP_PAGE_NEEDWRITE))
1619 			filemap_write_page(bitmap, j, false);
1620 	}
1621 
1622  done:
1623 	if (bitmap->allclean == 0)
1624 		mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
1625 	mutex_unlock(&mddev->bitmap_info.mutex);
1626 }
1627 
1628 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
1629 					       sector_t offset, sector_t *blocks,
1630 					       int create)
1631 __releases(bitmap->lock)
1632 __acquires(bitmap->lock)
1633 {
1634 	/* If 'create', we might release the lock and reclaim it.
1635 	 * The lock must have been taken with interrupts enabled.
1636 	 * If !create, we don't release the lock.
1637 	 */
1638 	sector_t chunk = offset >> bitmap->chunkshift;
1639 	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1640 	unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1641 	sector_t csize = ((sector_t)1) << bitmap->chunkshift;
1642 	int err;
1643 
1644 	if (page >= bitmap->pages) {
1645 		/*
1646 		 * This can happen if bitmap_start_sync goes beyond
1647 		 * End-of-device while looking for a whole page or
1648 		 * user set a huge number to sysfs bitmap_set_bits.
1649 		 */
1650 		*blocks = csize - (offset & (csize - 1));
1651 		return NULL;
1652 	}
1653 	err = md_bitmap_checkpage(bitmap, page, create, 0);
1654 
1655 	if (bitmap->bp[page].hijacked ||
1656 	    bitmap->bp[page].map == NULL)
1657 		csize = ((sector_t)1) << (bitmap->chunkshift +
1658 					  PAGE_COUNTER_SHIFT);
1659 
1660 	*blocks = csize - (offset & (csize - 1));
1661 
1662 	if (err < 0)
1663 		return NULL;
1664 
1665 	/* now locked ... */
1666 
1667 	if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1668 		/* should we use the first or second counter field
1669 		 * of the hijacked pointer? */
1670 		int hi = (pageoff > PAGE_COUNTER_MASK);
1671 		return  &((bitmap_counter_t *)
1672 			  &bitmap->bp[page].map)[hi];
1673 	} else /* page is allocated */
1674 		return (bitmap_counter_t *)
1675 			&(bitmap->bp[page].map[pageoff]);
1676 }
1677 
1678 static void bitmap_start_write(struct mddev *mddev, sector_t offset,
1679 			       unsigned long sectors)
1680 {
1681 	struct bitmap *bitmap = mddev->bitmap;
1682 
1683 	if (!bitmap)
1684 		return;
1685 
1686 	while (sectors) {
1687 		sector_t blocks;
1688 		bitmap_counter_t *bmc;
1689 
1690 		spin_lock_irq(&bitmap->counts.lock);
1691 		bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1692 		if (!bmc) {
1693 			spin_unlock_irq(&bitmap->counts.lock);
1694 			return;
1695 		}
1696 
1697 		if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1698 			DEFINE_WAIT(__wait);
1699 			/* note that it is safe to do the prepare_to_wait
1700 			 * after the test as long as we do it before dropping
1701 			 * the spinlock.
1702 			 */
1703 			prepare_to_wait(&bitmap->overflow_wait, &__wait,
1704 					TASK_UNINTERRUPTIBLE);
1705 			spin_unlock_irq(&bitmap->counts.lock);
1706 			schedule();
1707 			finish_wait(&bitmap->overflow_wait, &__wait);
1708 			continue;
1709 		}
1710 
1711 		switch (*bmc) {
1712 		case 0:
1713 			md_bitmap_file_set_bit(bitmap, offset);
1714 			md_bitmap_count_page(&bitmap->counts, offset, 1);
1715 			fallthrough;
1716 		case 1:
1717 			*bmc = 2;
1718 		}
1719 
1720 		(*bmc)++;
1721 
1722 		spin_unlock_irq(&bitmap->counts.lock);
1723 
1724 		offset += blocks;
1725 		if (sectors > blocks)
1726 			sectors -= blocks;
1727 		else
1728 			sectors = 0;
1729 	}
1730 }
1731 
1732 static void bitmap_end_write(struct mddev *mddev, sector_t offset,
1733 			     unsigned long sectors)
1734 {
1735 	struct bitmap *bitmap = mddev->bitmap;
1736 
1737 	if (!bitmap)
1738 		return;
1739 
1740 	while (sectors) {
1741 		sector_t blocks;
1742 		unsigned long flags;
1743 		bitmap_counter_t *bmc;
1744 
1745 		spin_lock_irqsave(&bitmap->counts.lock, flags);
1746 		bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1747 		if (!bmc) {
1748 			spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1749 			return;
1750 		}
1751 
1752 		if (!bitmap->mddev->degraded) {
1753 			if (bitmap->events_cleared < bitmap->mddev->events) {
1754 				bitmap->events_cleared = bitmap->mddev->events;
1755 				bitmap->need_sync = 1;
1756 				sysfs_notify_dirent_safe(
1757 						bitmap->sysfs_can_clear);
1758 			}
1759 		} else if (!NEEDED(*bmc)) {
1760 			*bmc |= NEEDED_MASK;
1761 		}
1762 
1763 		if (COUNTER(*bmc) == COUNTER_MAX)
1764 			wake_up(&bitmap->overflow_wait);
1765 
1766 		(*bmc)--;
1767 		if (*bmc <= 2) {
1768 			md_bitmap_set_pending(&bitmap->counts, offset);
1769 			bitmap->allclean = 0;
1770 		}
1771 		spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1772 		offset += blocks;
1773 		if (sectors > blocks)
1774 			sectors -= blocks;
1775 		else
1776 			sectors = 0;
1777 	}
1778 }
1779 
1780 static bool __bitmap_start_sync(struct bitmap *bitmap, sector_t offset,
1781 				sector_t *blocks, bool degraded)
1782 {
1783 	bitmap_counter_t *bmc;
1784 	bool rv = false;
1785 
1786 	spin_lock_irq(&bitmap->counts.lock);
1787 	bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1788 	if (bmc) {
1789 		/* locked */
1790 		if (RESYNC(*bmc)) {
1791 			rv = true;
1792 		} else if (NEEDED(*bmc)) {
1793 			rv = true;
1794 			if (!degraded) { /* don't set/clear bits if degraded */
1795 				*bmc |= RESYNC_MASK;
1796 				*bmc &= ~NEEDED_MASK;
1797 			}
1798 		}
1799 	}
1800 	spin_unlock_irq(&bitmap->counts.lock);
1801 
1802 	return rv;
1803 }
1804 
1805 static bool bitmap_start_sync(struct mddev *mddev, sector_t offset,
1806 			      sector_t *blocks, bool degraded)
1807 {
1808 	/* bitmap_start_sync must always report on multiples of whole
1809 	 * pages, otherwise resync (which is very PAGE_SIZE based) will
1810 	 * get confused.
1811 	 * So call __bitmap_start_sync repeatedly (if needed) until
1812 	 * At least PAGE_SIZE>>9 blocks are covered.
1813 	 * Return the 'or' of the result.
1814 	 */
1815 	bool rv = false;
1816 	sector_t blocks1;
1817 
1818 	*blocks = 0;
1819 	while (*blocks < (PAGE_SIZE>>9)) {
1820 		rv |= __bitmap_start_sync(mddev->bitmap, offset,
1821 					  &blocks1, degraded);
1822 		offset += blocks1;
1823 		*blocks += blocks1;
1824 	}
1825 
1826 	return rv;
1827 }
1828 
1829 static void __bitmap_end_sync(struct bitmap *bitmap, sector_t offset,
1830 			      sector_t *blocks, bool aborted)
1831 {
1832 	bitmap_counter_t *bmc;
1833 	unsigned long flags;
1834 
1835 	spin_lock_irqsave(&bitmap->counts.lock, flags);
1836 	bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1837 	if (bmc == NULL)
1838 		goto unlock;
1839 	/* locked */
1840 	if (RESYNC(*bmc)) {
1841 		*bmc &= ~RESYNC_MASK;
1842 
1843 		if (!NEEDED(*bmc) && aborted)
1844 			*bmc |= NEEDED_MASK;
1845 		else {
1846 			if (*bmc <= 2) {
1847 				md_bitmap_set_pending(&bitmap->counts, offset);
1848 				bitmap->allclean = 0;
1849 			}
1850 		}
1851 	}
1852  unlock:
1853 	spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1854 }
1855 
1856 static void bitmap_end_sync(struct mddev *mddev, sector_t offset,
1857 			    sector_t *blocks)
1858 {
1859 	__bitmap_end_sync(mddev->bitmap, offset, blocks, true);
1860 }
1861 
1862 static void bitmap_close_sync(struct mddev *mddev)
1863 {
1864 	/* Sync has finished, and any bitmap chunks that weren't synced
1865 	 * properly have been aborted.  It remains to us to clear the
1866 	 * RESYNC bit wherever it is still on
1867 	 */
1868 	sector_t sector = 0;
1869 	sector_t blocks;
1870 	struct bitmap *bitmap = mddev->bitmap;
1871 
1872 	if (!bitmap)
1873 		return;
1874 
1875 	while (sector < bitmap->mddev->resync_max_sectors) {
1876 		__bitmap_end_sync(bitmap, sector, &blocks, false);
1877 		sector += blocks;
1878 	}
1879 }
1880 
1881 static void bitmap_cond_end_sync(struct mddev *mddev, sector_t sector,
1882 				 bool force)
1883 {
1884 	sector_t s = 0;
1885 	sector_t blocks;
1886 	struct bitmap *bitmap = mddev->bitmap;
1887 
1888 	if (!bitmap)
1889 		return;
1890 	if (sector == 0) {
1891 		bitmap->last_end_sync = jiffies;
1892 		return;
1893 	}
1894 	if (!force && time_before(jiffies, (bitmap->last_end_sync
1895 				  + bitmap->mddev->bitmap_info.daemon_sleep)))
1896 		return;
1897 	wait_event(bitmap->mddev->recovery_wait,
1898 		   atomic_read(&bitmap->mddev->recovery_active) == 0);
1899 
1900 	bitmap->mddev->curr_resync_completed = sector;
1901 	set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags);
1902 	sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
1903 	s = 0;
1904 	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1905 		__bitmap_end_sync(bitmap, s, &blocks, false);
1906 		s += blocks;
1907 	}
1908 	bitmap->last_end_sync = jiffies;
1909 	sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed);
1910 }
1911 
1912 static void bitmap_sync_with_cluster(struct mddev *mddev,
1913 				     sector_t old_lo, sector_t old_hi,
1914 				     sector_t new_lo, sector_t new_hi)
1915 {
1916 	struct bitmap *bitmap = mddev->bitmap;
1917 	sector_t sector, blocks = 0;
1918 
1919 	for (sector = old_lo; sector < new_lo; ) {
1920 		__bitmap_end_sync(bitmap, sector, &blocks, false);
1921 		sector += blocks;
1922 	}
1923 	WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n");
1924 
1925 	for (sector = old_hi; sector < new_hi; ) {
1926 		bitmap_start_sync(mddev, sector, &blocks, false);
1927 		sector += blocks;
1928 	}
1929 	WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n");
1930 }
1931 
1932 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1933 {
1934 	/* For each chunk covered by any of these sectors, set the
1935 	 * counter to 2 and possibly set resync_needed.  They should all
1936 	 * be 0 at this point
1937 	 */
1938 
1939 	sector_t secs;
1940 	bitmap_counter_t *bmc;
1941 	spin_lock_irq(&bitmap->counts.lock);
1942 	bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1943 	if (!bmc) {
1944 		spin_unlock_irq(&bitmap->counts.lock);
1945 		return;
1946 	}
1947 	if (!*bmc) {
1948 		*bmc = 2;
1949 		md_bitmap_count_page(&bitmap->counts, offset, 1);
1950 		md_bitmap_set_pending(&bitmap->counts, offset);
1951 		bitmap->allclean = 0;
1952 	}
1953 	if (needed)
1954 		*bmc |= NEEDED_MASK;
1955 	spin_unlock_irq(&bitmap->counts.lock);
1956 }
1957 
1958 /* dirty the memory and file bits for bitmap chunks "s" to "e" */
1959 static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s,
1960 			      unsigned long e)
1961 {
1962 	unsigned long chunk;
1963 	struct bitmap *bitmap = mddev->bitmap;
1964 
1965 	if (!bitmap)
1966 		return;
1967 
1968 	for (chunk = s; chunk <= e; chunk++) {
1969 		sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1970 
1971 		md_bitmap_set_memory_bits(bitmap, sec, 1);
1972 		md_bitmap_file_set_bit(bitmap, sec);
1973 		if (sec < bitmap->mddev->resync_offset)
1974 			/* We are asserting that the array is dirty,
1975 			 * so move the resync_offset address back so
1976 			 * that it is obvious that it is dirty
1977 			 */
1978 			bitmap->mddev->resync_offset = sec;
1979 	}
1980 }
1981 
1982 static void bitmap_flush(struct mddev *mddev)
1983 {
1984 	struct bitmap *bitmap = mddev->bitmap;
1985 	long sleep;
1986 
1987 	if (!bitmap) /* there was no bitmap */
1988 		return;
1989 
1990 	/* run the daemon_work three time to ensure everything is flushed
1991 	 * that can be
1992 	 */
1993 	sleep = mddev->bitmap_info.daemon_sleep * 2;
1994 	bitmap->daemon_lastrun -= sleep;
1995 	bitmap_daemon_work(mddev);
1996 	bitmap->daemon_lastrun -= sleep;
1997 	bitmap_daemon_work(mddev);
1998 	bitmap->daemon_lastrun -= sleep;
1999 	bitmap_daemon_work(mddev);
2000 	if (mddev->bitmap_info.external)
2001 		md_super_wait(mddev);
2002 	bitmap_update_sb(bitmap);
2003 }
2004 
2005 static void md_bitmap_free(void *data)
2006 {
2007 	unsigned long k, pages;
2008 	struct bitmap_page *bp;
2009 	struct bitmap *bitmap = data;
2010 
2011 	if (!bitmap) /* there was no bitmap */
2012 		return;
2013 
2014 	if (bitmap->sysfs_can_clear)
2015 		sysfs_put(bitmap->sysfs_can_clear);
2016 
2017 	if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
2018 		bitmap->cluster_slot == bitmap->mddev->cluster_ops->slot_number(bitmap->mddev))
2019 		md_cluster_stop(bitmap->mddev);
2020 
2021 	/* Shouldn't be needed - but just in case.... */
2022 	wait_event(bitmap->write_wait,
2023 		   atomic_read(&bitmap->pending_writes) == 0);
2024 
2025 	/* release the bitmap file  */
2026 	md_bitmap_file_unmap(&bitmap->storage);
2027 
2028 	bp = bitmap->counts.bp;
2029 	pages = bitmap->counts.pages;
2030 
2031 	/* free all allocated memory */
2032 
2033 	if (bp) /* deallocate the page memory */
2034 		for (k = 0; k < pages; k++)
2035 			if (bp[k].map && !bp[k].hijacked)
2036 				kfree(bp[k].map);
2037 	kfree(bp);
2038 	kfree(bitmap);
2039 }
2040 
2041 static void bitmap_start_behind_write(struct mddev *mddev)
2042 {
2043 	struct bitmap *bitmap = mddev->bitmap;
2044 	int bw;
2045 
2046 	atomic_inc(&bitmap->behind_writes);
2047 	bw = atomic_read(&bitmap->behind_writes);
2048 	if (bw > bitmap->behind_writes_used)
2049 		bitmap->behind_writes_used = bw;
2050 
2051 	pr_debug("inc write-behind count %d/%lu\n",
2052 		 bw, bitmap->mddev->bitmap_info.max_write_behind);
2053 }
2054 
2055 static void bitmap_end_behind_write(struct mddev *mddev)
2056 {
2057 	struct bitmap *bitmap = mddev->bitmap;
2058 
2059 	if (atomic_dec_and_test(&bitmap->behind_writes))
2060 		wake_up(&bitmap->behind_wait);
2061 	pr_debug("dec write-behind count %d/%lu\n",
2062 		 atomic_read(&bitmap->behind_writes),
2063 		 bitmap->mddev->bitmap_info.max_write_behind);
2064 }
2065 
2066 static void bitmap_wait_behind_writes(struct mddev *mddev)
2067 {
2068 	struct bitmap *bitmap = mddev->bitmap;
2069 
2070 	/* wait for behind writes to complete */
2071 	if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
2072 		pr_debug("md:%s: behind writes in progress - waiting to stop.\n",
2073 			 mdname(mddev));
2074 		/* need to kick something here to make sure I/O goes? */
2075 		wait_event(bitmap->behind_wait,
2076 			   atomic_read(&bitmap->behind_writes) == 0);
2077 	}
2078 }
2079 
2080 static void bitmap_destroy(struct mddev *mddev)
2081 {
2082 	struct bitmap *bitmap = mddev->bitmap;
2083 
2084 	if (!bitmap) /* there was no bitmap */
2085 		return;
2086 
2087 	bitmap_wait_behind_writes(mddev);
2088 	if (!test_bit(MD_SERIALIZE_POLICY, &mddev->flags))
2089 		mddev_destroy_serial_pool(mddev, NULL);
2090 
2091 	mutex_lock(&mddev->bitmap_info.mutex);
2092 	spin_lock(&mddev->lock);
2093 	mddev->bitmap = NULL; /* disconnect from the md device */
2094 	spin_unlock(&mddev->lock);
2095 	mutex_unlock(&mddev->bitmap_info.mutex);
2096 	mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
2097 
2098 	md_bitmap_free(bitmap);
2099 }
2100 
2101 /*
2102  * initialize the bitmap structure
2103  * if this returns an error, bitmap_destroy must be called to do clean up
2104  * once mddev->bitmap is set
2105  */
2106 static struct bitmap *__bitmap_create(struct mddev *mddev, int slot)
2107 {
2108 	struct bitmap *bitmap;
2109 	sector_t blocks = mddev->resync_max_sectors;
2110 	struct file *file = mddev->bitmap_info.file;
2111 	int err;
2112 	struct kernfs_node *bm = NULL;
2113 
2114 	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
2115 
2116 	BUG_ON(file && mddev->bitmap_info.offset);
2117 
2118 	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
2119 		pr_notice("md/raid:%s: array with journal cannot have bitmap\n",
2120 			  mdname(mddev));
2121 		return ERR_PTR(-EBUSY);
2122 	}
2123 
2124 	bitmap = kzalloc_obj(*bitmap);
2125 	if (!bitmap)
2126 		return ERR_PTR(-ENOMEM);
2127 
2128 	spin_lock_init(&bitmap->counts.lock);
2129 	atomic_set(&bitmap->pending_writes, 0);
2130 	init_waitqueue_head(&bitmap->write_wait);
2131 	init_waitqueue_head(&bitmap->overflow_wait);
2132 	init_waitqueue_head(&bitmap->behind_wait);
2133 
2134 	bitmap->mddev = mddev;
2135 	bitmap->cluster_slot = slot;
2136 
2137 	if (mddev->kobj.sd)
2138 		bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
2139 	if (bm) {
2140 		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
2141 		sysfs_put(bm);
2142 	} else
2143 		bitmap->sysfs_can_clear = NULL;
2144 
2145 	bitmap->storage.file = file;
2146 	if (file) {
2147 		get_file(file);
2148 		/* As future accesses to this file will use bmap,
2149 		 * and bypass the page cache, we must sync the file
2150 		 * first.
2151 		 */
2152 		vfs_fsync(file, 1);
2153 	}
2154 	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
2155 	if (!mddev->bitmap_info.external) {
2156 		/*
2157 		 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
2158 		 * instructing us to create a new on-disk bitmap instance.
2159 		 */
2160 		if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
2161 			err = md_bitmap_new_disk_sb(bitmap);
2162 		else
2163 			err = md_bitmap_read_sb(bitmap);
2164 	} else {
2165 		err = 0;
2166 		if (mddev->bitmap_info.chunksize == 0 ||
2167 		    mddev->bitmap_info.daemon_sleep == 0)
2168 			/* chunksize and time_base need to be
2169 			 * set first. */
2170 			err = -EINVAL;
2171 	}
2172 	if (err)
2173 		goto error;
2174 
2175 	bitmap->daemon_lastrun = jiffies;
2176 	err = __bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize,
2177 			      true);
2178 	if (err)
2179 		goto error;
2180 
2181 	pr_debug("created bitmap (%lu pages) for device %s\n",
2182 		 bitmap->counts.pages, bmname(bitmap));
2183 
2184 	err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
2185 	if (err)
2186 		goto error;
2187 
2188 	return bitmap;
2189  error:
2190 	md_bitmap_free(bitmap);
2191 	return ERR_PTR(err);
2192 }
2193 
2194 static int bitmap_create(struct mddev *mddev)
2195 {
2196 	struct bitmap *bitmap = __bitmap_create(mddev, -1);
2197 
2198 	if (IS_ERR(bitmap))
2199 		return PTR_ERR(bitmap);
2200 
2201 	mddev->bitmap = bitmap;
2202 	return 0;
2203 }
2204 
2205 static int bitmap_load(struct mddev *mddev)
2206 {
2207 	int err = 0;
2208 	sector_t start = 0;
2209 	sector_t sector = 0;
2210 	struct bitmap *bitmap = mddev->bitmap;
2211 	struct md_rdev *rdev;
2212 
2213 	if (!bitmap)
2214 		goto out;
2215 
2216 	rdev_for_each(rdev, mddev)
2217 		mddev_create_serial_pool(mddev, rdev);
2218 
2219 	if (mddev_is_clustered(mddev))
2220 		mddev->cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
2221 
2222 	/* Clear out old bitmap info first:  Either there is none, or we
2223 	 * are resuming after someone else has possibly changed things,
2224 	 * so we should forget old cached info.
2225 	 * All chunks should be clean, but some might need_sync.
2226 	 */
2227 	while (sector < mddev->resync_max_sectors) {
2228 		sector_t blocks;
2229 		bitmap_start_sync(mddev, sector, &blocks, false);
2230 		sector += blocks;
2231 	}
2232 	bitmap_close_sync(mddev);
2233 
2234 	if (mddev->degraded == 0
2235 	    || bitmap->events_cleared == mddev->events)
2236 		/* no need to keep dirty bits to optimise a
2237 		 * re-add of a missing device */
2238 		start = mddev->resync_offset;
2239 
2240 	mutex_lock(&mddev->bitmap_info.mutex);
2241 	err = md_bitmap_init_from_disk(bitmap, start);
2242 	mutex_unlock(&mddev->bitmap_info.mutex);
2243 
2244 	if (err)
2245 		goto out;
2246 	clear_bit(BITMAP_STALE, &bitmap->flags);
2247 
2248 	/* Kick recovery in case any bits were set */
2249 	set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
2250 
2251 	mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
2252 	md_wakeup_thread(mddev->thread);
2253 
2254 	bitmap_update_sb(bitmap);
2255 
2256 	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
2257 		err = -EIO;
2258 out:
2259 	return err;
2260 }
2261 
2262 /* caller need to free returned bitmap with md_bitmap_free() */
2263 static void *bitmap_get_from_slot(struct mddev *mddev, int slot)
2264 {
2265 	int rv = 0;
2266 	struct bitmap *bitmap;
2267 
2268 	bitmap = __bitmap_create(mddev, slot);
2269 	if (IS_ERR(bitmap)) {
2270 		rv = PTR_ERR(bitmap);
2271 		return ERR_PTR(rv);
2272 	}
2273 
2274 	rv = md_bitmap_init_from_disk(bitmap, 0);
2275 	if (rv) {
2276 		md_bitmap_free(bitmap);
2277 		return ERR_PTR(rv);
2278 	}
2279 
2280 	return bitmap;
2281 }
2282 
2283 /* Loads the bitmap associated with slot and copies the resync information
2284  * to our bitmap
2285  */
2286 static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low,
2287 				 sector_t *high, bool clear_bits)
2288 {
2289 	int rv = 0, i, j;
2290 	sector_t block, lo = 0, hi = 0;
2291 	struct bitmap_counts *counts;
2292 	struct bitmap *bitmap;
2293 
2294 	bitmap = bitmap_get_from_slot(mddev, slot);
2295 	if (IS_ERR(bitmap)) {
2296 		pr_err("%s can't get bitmap from slot %d\n", __func__, slot);
2297 		return -1;
2298 	}
2299 
2300 	counts = &bitmap->counts;
2301 	for (j = 0; j < counts->chunks; j++) {
2302 		block = (sector_t)j << counts->chunkshift;
2303 		if (md_bitmap_file_test_bit(bitmap, block)) {
2304 			if (!lo)
2305 				lo = block;
2306 			hi = block;
2307 			md_bitmap_file_clear_bit(bitmap, block);
2308 			md_bitmap_set_memory_bits(mddev->bitmap, block, 1);
2309 			md_bitmap_file_set_bit(mddev->bitmap, block);
2310 		}
2311 	}
2312 
2313 	if (clear_bits) {
2314 		bitmap_update_sb(bitmap);
2315 		/* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs
2316 		 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */
2317 		for (i = 0; i < bitmap->storage.file_pages; i++)
2318 			if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING))
2319 				set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
2320 		__bitmap_unplug(bitmap);
2321 	}
2322 	__bitmap_unplug(mddev->bitmap);
2323 	*low = lo;
2324 	*high = hi;
2325 	md_bitmap_free(bitmap);
2326 
2327 	return rv;
2328 }
2329 
2330 static void bitmap_set_pages(void *data, unsigned long pages)
2331 {
2332 	struct bitmap *bitmap = data;
2333 
2334 	bitmap->counts.pages = pages;
2335 }
2336 
2337 static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats)
2338 {
2339 	struct bitmap_storage *storage;
2340 	struct bitmap_counts *counts;
2341 	struct bitmap *bitmap = data;
2342 	bitmap_super_t *sb;
2343 
2344 	if (!bitmap)
2345 		return -ENOENT;
2346 	if (!bitmap->storage.sb_page)
2347 		return -EINVAL;
2348 	sb = kmap_local_page(bitmap->storage.sb_page);
2349 	stats->sync_size = le64_to_cpu(sb->sync_size);
2350 	kunmap_local(sb);
2351 
2352 	counts = &bitmap->counts;
2353 	stats->missing_pages = counts->missing_pages;
2354 	stats->pages = counts->pages;
2355 
2356 	storage = &bitmap->storage;
2357 	stats->file_pages = storage->file_pages;
2358 	stats->file = storage->file;
2359 
2360 	stats->behind_writes = atomic_read(&bitmap->behind_writes);
2361 	stats->behind_wait = wq_has_sleeper(&bitmap->behind_wait);
2362 	stats->events_cleared = bitmap->events_cleared;
2363 	return 0;
2364 }
2365 
2366 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks,
2367 			   int chunksize, bool init)
2368 {
2369 	/* If chunk_size is 0, choose an appropriate chunk size.
2370 	 * Then possibly allocate new storage space.
2371 	 * Then quiesce, copy bits, replace bitmap, and re-start
2372 	 *
2373 	 * This function is called both to set up the initial bitmap
2374 	 * and to resize the bitmap while the array is active.
2375 	 * If this happens as a result of the array being resized,
2376 	 * chunksize will be zero, and we need to choose a suitable
2377 	 * chunksize, otherwise we use what we are given.
2378 	 */
2379 	struct bitmap_storage store;
2380 	struct bitmap_counts old_counts;
2381 	unsigned long chunks;
2382 	sector_t block;
2383 	sector_t old_blocks, new_blocks;
2384 	int chunkshift;
2385 	int ret = 0;
2386 	long pages;
2387 	struct bitmap_page *new_bp;
2388 
2389 	if (bitmap->storage.file && !init) {
2390 		pr_info("md: cannot resize file-based bitmap\n");
2391 		return -EINVAL;
2392 	}
2393 
2394 	if (chunksize == 0) {
2395 		/* If there is enough space, leave the chunk size unchanged,
2396 		 * else increase by factor of two until there is enough space.
2397 		 */
2398 		long bytes;
2399 		long space = bitmap->mddev->bitmap_info.space;
2400 
2401 		if (space == 0) {
2402 			/* We don't know how much space there is, so limit
2403 			 * to current size - in sectors.
2404 			 */
2405 			bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
2406 			if (!bitmap->mddev->bitmap_info.external)
2407 				bytes += sizeof(bitmap_super_t);
2408 			space = DIV_ROUND_UP(bytes, 512);
2409 			bitmap->mddev->bitmap_info.space = space;
2410 		}
2411 		chunkshift = bitmap->counts.chunkshift;
2412 		chunkshift--;
2413 		do {
2414 			/* 'chunkshift' is shift from block size to chunk size */
2415 			chunkshift++;
2416 			chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
2417 			bytes = DIV_ROUND_UP(chunks, 8);
2418 			if (!bitmap->mddev->bitmap_info.external)
2419 				bytes += sizeof(bitmap_super_t);
2420 		} while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) <
2421 			(BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1));
2422 	} else
2423 		chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
2424 
2425 	chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
2426 	memset(&store, 0, sizeof(store));
2427 	if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
2428 		ret = md_bitmap_storage_alloc(&store, chunks,
2429 					      !bitmap->mddev->bitmap_info.external,
2430 					      mddev_is_clustered(bitmap->mddev)
2431 					      ? bitmap->cluster_slot : 0);
2432 	if (ret) {
2433 		md_bitmap_file_unmap(&store);
2434 		goto err;
2435 	}
2436 
2437 	pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
2438 
2439 	new_bp = kzalloc_objs(*new_bp, pages);
2440 	ret = -ENOMEM;
2441 	if (!new_bp) {
2442 		md_bitmap_file_unmap(&store);
2443 		goto err;
2444 	}
2445 
2446 	if (!init)
2447 		bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
2448 
2449 	store.file = bitmap->storage.file;
2450 	bitmap->storage.file = NULL;
2451 
2452 	if (store.sb_page && bitmap->storage.sb_page)
2453 		memcpy(page_address(store.sb_page),
2454 		       page_address(bitmap->storage.sb_page),
2455 		       sizeof(bitmap_super_t));
2456 	mutex_lock(&bitmap->mddev->bitmap_info.mutex);
2457 	spin_lock_irq(&bitmap->counts.lock);
2458 	md_bitmap_file_unmap(&bitmap->storage);
2459 	bitmap->storage = store;
2460 
2461 	old_counts = bitmap->counts;
2462 	bitmap->counts.bp = new_bp;
2463 	bitmap->counts.pages = pages;
2464 	bitmap->counts.missing_pages = pages;
2465 	bitmap->counts.chunkshift = chunkshift;
2466 	bitmap->counts.chunks = chunks;
2467 	bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift +
2468 						     BITMAP_BLOCK_SHIFT);
2469 
2470 	blocks = min(old_counts.chunks << old_counts.chunkshift,
2471 		     chunks << chunkshift);
2472 
2473 	/* For cluster raid, need to pre-allocate bitmap */
2474 	if (mddev_is_clustered(bitmap->mddev)) {
2475 		unsigned long page;
2476 		for (page = 0; page < pages; page++) {
2477 			ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1);
2478 			if (ret) {
2479 				unsigned long k;
2480 
2481 				/* deallocate the page memory */
2482 				for (k = 0; k < page; k++) {
2483 					kfree(new_bp[k].map);
2484 				}
2485 				kfree(new_bp);
2486 
2487 				/* restore some fields from old_counts */
2488 				bitmap->counts.bp = old_counts.bp;
2489 				bitmap->counts.pages = old_counts.pages;
2490 				bitmap->counts.missing_pages = old_counts.pages;
2491 				bitmap->counts.chunkshift = old_counts.chunkshift;
2492 				bitmap->counts.chunks = old_counts.chunks;
2493 				bitmap->mddev->bitmap_info.chunksize =
2494 					1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT);
2495 				blocks = old_counts.chunks << old_counts.chunkshift;
2496 				pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n");
2497 				break;
2498 			} else
2499 				bitmap->counts.bp[page].count += 1;
2500 		}
2501 	}
2502 
2503 	for (block = 0; block < blocks; ) {
2504 		bitmap_counter_t *bmc_old, *bmc_new;
2505 		int set;
2506 
2507 		bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0);
2508 		set = bmc_old && NEEDED(*bmc_old);
2509 
2510 		if (set) {
2511 			bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
2512 			if (bmc_new) {
2513 				if (*bmc_new == 0) {
2514 					/* need to set on-disk bits too. */
2515 					sector_t end = block + new_blocks;
2516 					sector_t start = block >> chunkshift;
2517 
2518 					start <<= chunkshift;
2519 					while (start < end) {
2520 						md_bitmap_file_set_bit(bitmap, block);
2521 						start += 1 << chunkshift;
2522 					}
2523 					*bmc_new = 2;
2524 					md_bitmap_count_page(&bitmap->counts, block, 1);
2525 					md_bitmap_set_pending(&bitmap->counts, block);
2526 				}
2527 				*bmc_new |= NEEDED_MASK;
2528 			}
2529 			if (new_blocks < old_blocks)
2530 				old_blocks = new_blocks;
2531 		}
2532 		block += old_blocks;
2533 	}
2534 
2535 	if (bitmap->counts.bp != old_counts.bp) {
2536 		unsigned long k;
2537 		for (k = 0; k < old_counts.pages; k++)
2538 			if (!old_counts.bp[k].hijacked)
2539 				kfree(old_counts.bp[k].map);
2540 		kfree(old_counts.bp);
2541 	}
2542 
2543 	if (!init) {
2544 		int i;
2545 		while (block < (chunks << chunkshift)) {
2546 			bitmap_counter_t *bmc;
2547 			bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
2548 			if (bmc) {
2549 				/* new space.  It needs to be resynced, so
2550 				 * we set NEEDED_MASK.
2551 				 */
2552 				if (*bmc == 0) {
2553 					*bmc = NEEDED_MASK | 2;
2554 					md_bitmap_count_page(&bitmap->counts, block, 1);
2555 					md_bitmap_set_pending(&bitmap->counts, block);
2556 				}
2557 			}
2558 			block += new_blocks;
2559 		}
2560 		for (i = 0; i < bitmap->storage.file_pages; i++)
2561 			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
2562 	}
2563 	spin_unlock_irq(&bitmap->counts.lock);
2564 	mutex_unlock(&bitmap->mddev->bitmap_info.mutex);
2565 	if (!init) {
2566 		__bitmap_unplug(bitmap);
2567 		bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
2568 	}
2569 	ret = 0;
2570 err:
2571 	return ret;
2572 }
2573 
2574 static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize)
2575 {
2576 	struct bitmap *bitmap = mddev->bitmap;
2577 
2578 	if (!bitmap)
2579 		return 0;
2580 
2581 	return __bitmap_resize(bitmap, blocks, chunksize, false);
2582 }
2583 
2584 static bool bitmap_none_enabled(void *data, bool flush)
2585 {
2586 	return false;
2587 }
2588 
2589 static int bitmap_none_create(struct mddev *mddev)
2590 {
2591 	return 0;
2592 }
2593 
2594 static int bitmap_none_load(struct mddev *mddev)
2595 {
2596 	return 0;
2597 }
2598 
2599 static void bitmap_none_destroy(struct mddev *mddev)
2600 {
2601 }
2602 
2603 static int bitmap_none_get_stats(void *data, struct md_bitmap_stats *stats)
2604 {
2605 	return -ENOENT;
2606 }
2607 
2608 static ssize_t
2609 location_show(struct mddev *mddev, char *page)
2610 {
2611 	ssize_t len;
2612 	if (mddev->bitmap_info.file)
2613 		len = sprintf(page, "file");
2614 	else if (mddev->bitmap_info.offset)
2615 		len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
2616 	else
2617 		len = sprintf(page, "none");
2618 	len += sprintf(page+len, "\n");
2619 	return len;
2620 }
2621 
2622 static ssize_t
2623 location_store(struct mddev *mddev, const char *buf, size_t len)
2624 {
2625 	int rv;
2626 
2627 	rv = mddev_suspend_and_lock(mddev);
2628 	if (rv)
2629 		return rv;
2630 
2631 	if (mddev->pers) {
2632 		if (mddev->recovery || mddev->sync_thread) {
2633 			rv = -EBUSY;
2634 			goto out;
2635 		}
2636 	}
2637 
2638 	if (mddev->bitmap || mddev->bitmap_info.file ||
2639 	    mddev->bitmap_info.offset) {
2640 		/* bitmap already configured.  Only option is to clear it */
2641 		if (strncmp(buf, "none", 4) != 0) {
2642 			rv = -EBUSY;
2643 			goto out;
2644 		}
2645 
2646 		sysfs_unmerge_group(&mddev->kobj, &md_bitmap_internal_group);
2647 		md_bitmap_destroy_nosysfs(mddev);
2648 		mddev->bitmap_id = ID_BITMAP_NONE;
2649 		if (!mddev_set_bitmap_ops_nosysfs(mddev))
2650 			goto none_err;
2651 		mddev->bitmap_info.offset = 0;
2652 		if (mddev->bitmap_info.file) {
2653 			struct file *f = mddev->bitmap_info.file;
2654 			mddev->bitmap_info.file = NULL;
2655 			fput(f);
2656 		}
2657 	} else {
2658 		/* No bitmap, OK to set a location */
2659 		long long offset;
2660 
2661 		if (strncmp(buf, "none", 4) == 0)
2662 			/* nothing to be done */;
2663 		else if (strncmp(buf, "file:", 5) == 0) {
2664 			/* Not supported yet */
2665 			rv = -EINVAL;
2666 			goto out;
2667 		} else {
2668 			if (buf[0] == '+')
2669 				rv = kstrtoll(buf+1, 10, &offset);
2670 			else
2671 				rv = kstrtoll(buf, 10, &offset);
2672 			if (rv)
2673 				goto out;
2674 			if (offset == 0) {
2675 				rv = -EINVAL;
2676 				goto out;
2677 			}
2678 			if (mddev->bitmap_info.external == 0 &&
2679 			    mddev->major_version == 0 &&
2680 			    offset != mddev->bitmap_info.default_offset) {
2681 				rv = -EINVAL;
2682 				goto out;
2683 			}
2684 
2685 			mddev->bitmap_info.offset = offset;
2686 			md_bitmap_destroy_nosysfs(mddev);
2687 			mddev->bitmap_id = ID_BITMAP;
2688 			if (!mddev_set_bitmap_ops_nosysfs(mddev))
2689 				goto bitmap_err;
2690 
2691 			rv = md_bitmap_create_nosysfs(mddev);
2692 			if (rv)
2693 				goto create_err;
2694 
2695 			rv = mddev->bitmap_ops->load(mddev);
2696 			if (rv) {
2697 				mddev->bitmap_info.offset = 0;
2698 				goto load_err;
2699 			}
2700 
2701 			rv = sysfs_merge_group(&mddev->kobj,
2702 					       &md_bitmap_internal_group);
2703 			if (rv)
2704 				goto merge_err;
2705 		}
2706 	}
2707 	if (!mddev->external) {
2708 		/* Ensure new bitmap info is stored in
2709 		 * metadata promptly.
2710 		 */
2711 		set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2712 		md_wakeup_thread(mddev->thread);
2713 	}
2714 	rv = 0;
2715 out:
2716 	mddev_unlock_and_resume(mddev);
2717 	if (rv)
2718 		return rv;
2719 	return len;
2720 
2721 merge_err:
2722 	mddev->bitmap_info.offset = 0;
2723 load_err:
2724 	md_bitmap_destroy_nosysfs(mddev);
2725 create_err:
2726 	mddev->bitmap_info.offset = 0;
2727 	mddev->bitmap_id = ID_BITMAP_NONE;
2728 	if (!mddev_set_bitmap_ops_nosysfs(mddev))
2729 		rv = -ENOENT;
2730 	goto out;
2731 bitmap_err:
2732 	rv = -ENOENT;
2733 none_err:
2734 	mddev->bitmap_info.offset = 0;
2735 	goto out;
2736 }
2737 
2738 static struct md_sysfs_entry bitmap_location =
2739 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
2740 
2741 /* 'bitmap/space' is the space available at 'location' for the
2742  * bitmap.  This allows the kernel to know when it is safe to
2743  * resize the bitmap to match a resized array.
2744  */
2745 static ssize_t
2746 space_show(struct mddev *mddev, char *page)
2747 {
2748 	return sprintf(page, "%lu\n", mddev->bitmap_info.space);
2749 }
2750 
2751 static ssize_t
2752 space_store(struct mddev *mddev, const char *buf, size_t len)
2753 {
2754 	struct bitmap *bitmap;
2755 	unsigned long sectors;
2756 	int rv;
2757 
2758 	rv = kstrtoul(buf, 10, &sectors);
2759 	if (rv)
2760 		return rv;
2761 
2762 	if (sectors == 0)
2763 		return -EINVAL;
2764 
2765 	bitmap = mddev->bitmap;
2766 	if (bitmap && sectors < (bitmap->storage.bytes + 511) >> 9)
2767 		return -EFBIG; /* Bitmap is too big for this small space */
2768 
2769 	/* could make sure it isn't too big, but that isn't really
2770 	 * needed - user-space should be careful.
2771 	 */
2772 	mddev->bitmap_info.space = sectors;
2773 	return len;
2774 }
2775 
2776 static struct md_sysfs_entry bitmap_space =
2777 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
2778 
2779 static ssize_t
2780 timeout_show(struct mddev *mddev, char *page)
2781 {
2782 	ssize_t len;
2783 	unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
2784 	unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2785 
2786 	len = sprintf(page, "%lu", secs);
2787 	if (jifs)
2788 		len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
2789 	len += sprintf(page+len, "\n");
2790 	return len;
2791 }
2792 
2793 static ssize_t
2794 timeout_store(struct mddev *mddev, const char *buf, size_t len)
2795 {
2796 	/* timeout can be set at any time */
2797 	unsigned long timeout;
2798 	int rv = strict_strtoul_scaled(buf, &timeout, 4);
2799 	if (rv)
2800 		return rv;
2801 
2802 	/* just to make sure we don't overflow... */
2803 	if (timeout >= LONG_MAX / HZ)
2804 		return -EINVAL;
2805 
2806 	timeout = timeout * HZ / 10000;
2807 
2808 	if (timeout >= MAX_SCHEDULE_TIMEOUT)
2809 		timeout = MAX_SCHEDULE_TIMEOUT-1;
2810 	if (timeout < 1)
2811 		timeout = 1;
2812 
2813 	mddev->bitmap_info.daemon_sleep = timeout;
2814 	mddev_set_timeout(mddev, timeout, false);
2815 	md_wakeup_thread(mddev->thread);
2816 
2817 	return len;
2818 }
2819 
2820 static struct md_sysfs_entry bitmap_timeout =
2821 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2822 
2823 static ssize_t
2824 backlog_show(struct mddev *mddev, char *page)
2825 {
2826 	return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2827 }
2828 
2829 static ssize_t
2830 backlog_store(struct mddev *mddev, const char *buf, size_t len)
2831 {
2832 	unsigned long backlog;
2833 	unsigned long old_mwb = mddev->bitmap_info.max_write_behind;
2834 	struct md_rdev *rdev;
2835 	bool has_write_mostly = false;
2836 	int rv = kstrtoul(buf, 10, &backlog);
2837 	if (rv)
2838 		return rv;
2839 	if (backlog > COUNTER_MAX)
2840 		return -EINVAL;
2841 
2842 	rv = mddev_suspend_and_lock(mddev);
2843 	if (rv)
2844 		return rv;
2845 
2846 	/*
2847 	 * Without write mostly device, it doesn't make sense to set
2848 	 * backlog for max_write_behind.
2849 	 */
2850 	rdev_for_each(rdev, mddev) {
2851 		if (test_bit(WriteMostly, &rdev->flags)) {
2852 			has_write_mostly = true;
2853 			break;
2854 		}
2855 	}
2856 	if (!has_write_mostly) {
2857 		pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n",
2858 				    mdname(mddev));
2859 		mddev_unlock(mddev);
2860 		return -EINVAL;
2861 	}
2862 
2863 	mddev->bitmap_info.max_write_behind = backlog;
2864 	if (!backlog && mddev->serial_info_pool) {
2865 		/* serial_info_pool is not needed if backlog is zero */
2866 		if (!test_bit(MD_SERIALIZE_POLICY, &mddev->flags))
2867 			mddev_destroy_serial_pool(mddev, NULL);
2868 	} else if (backlog && !mddev->serial_info_pool) {
2869 		/* serial_info_pool is needed since backlog is not zero */
2870 		rdev_for_each(rdev, mddev)
2871 			mddev_create_serial_pool(mddev, rdev);
2872 	}
2873 	if (old_mwb != backlog)
2874 		bitmap_update_sb(mddev->bitmap);
2875 
2876 	mddev_unlock_and_resume(mddev);
2877 	return len;
2878 }
2879 
2880 static struct md_sysfs_entry bitmap_backlog =
2881 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2882 
2883 static ssize_t
2884 chunksize_show(struct mddev *mddev, char *page)
2885 {
2886 	return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2887 }
2888 
2889 static ssize_t
2890 chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2891 {
2892 	/* Can only be changed when no bitmap is active */
2893 	int rv;
2894 	unsigned long csize;
2895 	if (mddev->bitmap)
2896 		return -EBUSY;
2897 	rv = kstrtoul(buf, 10, &csize);
2898 	if (rv)
2899 		return rv;
2900 	if (csize < 512 ||
2901 	    !is_power_of_2(csize))
2902 		return -EINVAL;
2903 	if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE *
2904 		sizeof(((bitmap_super_t *)0)->chunksize))))
2905 		return -EOVERFLOW;
2906 	mddev->bitmap_info.chunksize = csize;
2907 	return len;
2908 }
2909 
2910 static struct md_sysfs_entry bitmap_chunksize =
2911 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2912 
2913 static ssize_t metadata_show(struct mddev *mddev, char *page)
2914 {
2915 	if (mddev_is_clustered(mddev))
2916 		return sprintf(page, "clustered\n");
2917 	return sprintf(page, "%s\n", (mddev->bitmap_info.external
2918 				      ? "external" : "internal"));
2919 }
2920 
2921 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2922 {
2923 	if (mddev->bitmap ||
2924 	    mddev->bitmap_info.file ||
2925 	    mddev->bitmap_info.offset)
2926 		return -EBUSY;
2927 	if (strncmp(buf, "external", 8) == 0)
2928 		mddev->bitmap_info.external = 1;
2929 	else if ((strncmp(buf, "internal", 8) == 0) ||
2930 			(strncmp(buf, "clustered", 9) == 0))
2931 		mddev->bitmap_info.external = 0;
2932 	else
2933 		return -EINVAL;
2934 	return len;
2935 }
2936 
2937 static struct md_sysfs_entry bitmap_metadata =
2938 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2939 
2940 static ssize_t can_clear_show(struct mddev *mddev, char *page)
2941 {
2942 	int len;
2943 	struct bitmap *bitmap;
2944 
2945 	spin_lock(&mddev->lock);
2946 	bitmap = mddev->bitmap;
2947 	if (bitmap)
2948 		len = sprintf(page, "%s\n", (bitmap->need_sync ? "false" :
2949 								 "true"));
2950 	else
2951 		len = sprintf(page, "\n");
2952 	spin_unlock(&mddev->lock);
2953 	return len;
2954 }
2955 
2956 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2957 {
2958 	struct bitmap *bitmap = mddev->bitmap;
2959 
2960 	if (!bitmap)
2961 		return -ENOENT;
2962 
2963 	if (strncmp(buf, "false", 5) == 0) {
2964 		bitmap->need_sync = 1;
2965 		return len;
2966 	}
2967 
2968 	if (strncmp(buf, "true", 4) == 0) {
2969 		if (mddev->degraded)
2970 			return -EBUSY;
2971 		bitmap->need_sync = 0;
2972 		return len;
2973 	}
2974 
2975 	return -EINVAL;
2976 }
2977 
2978 static struct md_sysfs_entry bitmap_can_clear =
2979 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2980 
2981 static ssize_t
2982 behind_writes_used_show(struct mddev *mddev, char *page)
2983 {
2984 	ssize_t ret;
2985 	struct bitmap *bitmap;
2986 
2987 	spin_lock(&mddev->lock);
2988 	bitmap = mddev->bitmap;
2989 	if (!bitmap)
2990 		ret = sprintf(page, "0\n");
2991 	else
2992 		ret = sprintf(page, "%lu\n", bitmap->behind_writes_used);
2993 	spin_unlock(&mddev->lock);
2994 
2995 	return ret;
2996 }
2997 
2998 static ssize_t
2999 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
3000 {
3001 	struct bitmap *bitmap = mddev->bitmap;
3002 
3003 	if (bitmap)
3004 		bitmap->behind_writes_used = 0;
3005 	return len;
3006 }
3007 
3008 static struct md_sysfs_entry max_backlog_used =
3009 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
3010        behind_writes_used_show, behind_writes_used_reset);
3011 
3012 static struct attribute *md_bitmap_common_attrs[] = {
3013 	&bitmap_location.attr,
3014 	NULL
3015 };
3016 
3017 static struct attribute *md_bitmap_internal_attrs[] = {
3018 	&bitmap_space.attr,
3019 	&bitmap_timeout.attr,
3020 	&bitmap_backlog.attr,
3021 	&bitmap_chunksize.attr,
3022 	&bitmap_metadata.attr,
3023 	&bitmap_can_clear.attr,
3024 	&max_backlog_used.attr,
3025 	NULL
3026 };
3027 
3028 static struct attribute_group md_bitmap_common_group = {
3029 	.name = "bitmap",
3030 	.attrs = md_bitmap_common_attrs,
3031 };
3032 
3033 static struct attribute_group md_bitmap_internal_group = {
3034 	.name = "bitmap",
3035 	.attrs = md_bitmap_internal_attrs,
3036 };
3037 
3038 static const struct attribute_group *bitmap_groups[] = {
3039 	&md_bitmap_common_group,
3040 	&md_bitmap_internal_group,
3041 	NULL,
3042 };
3043 
3044 static const struct attribute_group *bitmap_none_groups[] = {
3045 	&md_bitmap_common_group,
3046 	NULL,
3047 };
3048 
3049 static struct bitmap_operations bitmap_none_ops = {
3050 	.head = {
3051 		.type	= MD_BITMAP,
3052 		.id	= ID_BITMAP_NONE,
3053 		.name	= "none",
3054 	},
3055 
3056 	.enabled		= bitmap_none_enabled,
3057 	.create			= bitmap_none_create,
3058 	.load			= bitmap_none_load,
3059 	.destroy		= bitmap_none_destroy,
3060 	.get_stats		= bitmap_none_get_stats,
3061 
3062 	.groups			= bitmap_none_groups,
3063 };
3064 
3065 static struct bitmap_operations bitmap_ops = {
3066 	.head = {
3067 		.type	= MD_BITMAP,
3068 		.id	= ID_BITMAP,
3069 		.name	= "bitmap",
3070 	},
3071 
3072 	.enabled		= bitmap_enabled,
3073 	.create			= bitmap_create,
3074 	.resize			= bitmap_resize,
3075 	.load			= bitmap_load,
3076 	.destroy		= bitmap_destroy,
3077 	.flush			= bitmap_flush,
3078 	.write_all		= bitmap_write_all,
3079 	.dirty_bits		= bitmap_dirty_bits,
3080 	.unplug			= bitmap_unplug,
3081 	.daemon_work		= bitmap_daemon_work,
3082 
3083 	.start_behind_write	= bitmap_start_behind_write,
3084 	.end_behind_write	= bitmap_end_behind_write,
3085 	.wait_behind_writes	= bitmap_wait_behind_writes,
3086 
3087 	.start_write		= bitmap_start_write,
3088 	.end_write		= bitmap_end_write,
3089 	.start_discard		= bitmap_start_write,
3090 	.end_discard		= bitmap_end_write,
3091 
3092 	.start_sync		= bitmap_start_sync,
3093 	.end_sync		= bitmap_end_sync,
3094 	.cond_end_sync		= bitmap_cond_end_sync,
3095 	.close_sync		= bitmap_close_sync,
3096 
3097 	.update_sb		= bitmap_update_sb,
3098 	.get_stats		= bitmap_get_stats,
3099 
3100 	.sync_with_cluster	= bitmap_sync_with_cluster,
3101 	.get_from_slot		= bitmap_get_from_slot,
3102 	.copy_from_slot		= bitmap_copy_from_slot,
3103 	.set_pages		= bitmap_set_pages,
3104 	.free			= md_bitmap_free,
3105 
3106 	.groups			= bitmap_groups,
3107 };
3108 
3109 int md_bitmap_init(void)
3110 {
3111 	int err;
3112 
3113 	md_bitmap_wq = alloc_workqueue("md_bitmap", WQ_MEM_RECLAIM | WQ_UNBOUND,
3114 				       0);
3115 	if (!md_bitmap_wq)
3116 		return -ENOMEM;
3117 
3118 	err = register_md_submodule(&bitmap_none_ops.head);
3119 	if (err)
3120 		goto err_wq;
3121 
3122 	err = register_md_submodule(&bitmap_ops.head);
3123 	if (err)
3124 		goto err_none;
3125 
3126 	return 0;
3127 
3128 err_none:
3129 	unregister_md_submodule(&bitmap_none_ops.head);
3130 err_wq:
3131 	destroy_workqueue(md_bitmap_wq);
3132 	return err;
3133 }
3134 
3135 void md_bitmap_exit(void)
3136 {
3137 	unregister_md_submodule(&bitmap_ops.head);
3138 	unregister_md_submodule(&bitmap_none_ops.head);
3139 	destroy_workqueue(md_bitmap_wq);
3140 }
3141