xref: /linux/drivers/md/md-bitmap.c (revision 9b960d8cd6f712cb2c03e2bdd4d5ca058238037f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
4  *
5  * bitmap_create  - sets up the bitmap structure
6  * bitmap_destroy - destroys the bitmap structure
7  *
8  * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
9  * - added disk storage for bitmap
10  * - changes to allow various bitmap chunk sizes
11  */
12 
13 /*
14  * Still to do:
15  *
16  * flush after percent set rather than just time based. (maybe both).
17  */
18 
19 #include <linux/blkdev.h>
20 #include <linux/module.h>
21 #include <linux/errno.h>
22 #include <linux/slab.h>
23 #include <linux/init.h>
24 #include <linux/timer.h>
25 #include <linux/sched.h>
26 #include <linux/list.h>
27 #include <linux/file.h>
28 #include <linux/mount.h>
29 #include <linux/buffer_head.h>
30 #include <linux/seq_file.h>
31 #include <trace/events/block.h>
32 
33 #include "md.h"
34 #include "md-bitmap.h"
35 #include "md-cluster.h"
36 
37 #define BITMAP_MAJOR_LO 3
38 /* version 4 insists the bitmap is in little-endian order
39  * with version 3, it is host-endian which is non-portable
40  * Version 5 is currently set only for clustered devices
41  */
42 #define BITMAP_MAJOR_HI 4
43 #define BITMAP_MAJOR_CLUSTERED 5
44 #define	BITMAP_MAJOR_HOSTENDIAN 3
45 
46 /*
47  * in-memory bitmap:
48  *
49  * Use 16 bit block counters to track pending writes to each "chunk".
50  * The 2 high order bits are special-purpose, the first is a flag indicating
51  * whether a resync is needed.  The second is a flag indicating whether a
52  * resync is active.
53  * This means that the counter is actually 14 bits:
54  *
55  * +--------+--------+------------------------------------------------+
56  * | resync | resync |               counter                          |
57  * | needed | active |                                                |
58  * |  (0-1) |  (0-1) |              (0-16383)                         |
59  * +--------+--------+------------------------------------------------+
60  *
61  * The "resync needed" bit is set when:
62  *    a '1' bit is read from storage at startup.
63  *    a write request fails on some drives
64  *    a resync is aborted on a chunk with 'resync active' set
65  * It is cleared (and resync-active set) when a resync starts across all drives
66  * of the chunk.
67  *
68  *
69  * The "resync active" bit is set when:
70  *    a resync is started on all drives, and resync_needed is set.
71  *       resync_needed will be cleared (as long as resync_active wasn't already set).
72  * It is cleared when a resync completes.
73  *
74  * The counter counts pending write requests, plus the on-disk bit.
75  * When the counter is '1' and the resync bits are clear, the on-disk
76  * bit can be cleared as well, thus setting the counter to 0.
77  * When we set a bit, or in the counter (to start a write), if the fields is
78  * 0, we first set the disk bit and set the counter to 1.
79  *
80  * If the counter is 0, the on-disk bit is clear and the stripe is clean
81  * Anything that dirties the stripe pushes the counter to 2 (at least)
82  * and sets the on-disk bit (lazily).
83  * If a periodic sweep find the counter at 2, it is decremented to 1.
84  * If the sweep find the counter at 1, the on-disk bit is cleared and the
85  * counter goes to zero.
86  *
87  * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
88  * counters as a fallback when "page" memory cannot be allocated:
89  *
90  * Normal case (page memory allocated):
91  *
92  *     page pointer (32-bit)
93  *
94  *     [ ] ------+
95  *               |
96  *               +-------> [   ][   ]..[   ] (4096 byte page == 2048 counters)
97  *                          c1   c2    c2048
98  *
99  * Hijacked case (page memory allocation failed):
100  *
101  *     hijacked page pointer (32-bit)
102  *
103  *     [		  ][		  ] (no page memory allocated)
104  *      counter #1 (16-bit) counter #2 (16-bit)
105  *
106  */
107 
108 #define PAGE_BITS (PAGE_SIZE << 3)
109 #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
110 
111 #define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
112 #define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
113 #define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
114 
115 /* how many counters per page? */
116 #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
117 /* same, except a shift value for more efficient bitops */
118 #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
119 /* same, except a mask value for more efficient bitops */
120 #define PAGE_COUNTER_MASK  (PAGE_COUNTER_RATIO - 1)
121 
122 #define BITMAP_BLOCK_SHIFT 9
123 
124 /*
125  * bitmap structures:
126  */
127 
128 /* the in-memory bitmap is represented by bitmap_pages */
129 struct bitmap_page {
130 	/*
131 	 * map points to the actual memory page
132 	 */
133 	char *map;
134 	/*
135 	 * in emergencies (when map cannot be alloced), hijack the map
136 	 * pointer and use it as two counters itself
137 	 */
138 	unsigned int hijacked:1;
139 	/*
140 	 * If any counter in this page is '1' or '2' - and so could be
141 	 * cleared then that page is marked as 'pending'
142 	 */
143 	unsigned int pending:1;
144 	/*
145 	 * count of dirty bits on the page
146 	 */
147 	unsigned int  count:30;
148 };
149 
150 /* the main bitmap structure - one per mddev */
151 struct bitmap {
152 
153 	struct bitmap_counts {
154 		spinlock_t lock;
155 		struct bitmap_page *bp;
156 		/* total number of pages in the bitmap */
157 		unsigned long pages;
158 		/* number of pages not yet allocated */
159 		unsigned long missing_pages;
160 		/* chunksize = 2^chunkshift (for bitops) */
161 		unsigned long chunkshift;
162 		/* total number of data chunks for the array */
163 		unsigned long chunks;
164 	} counts;
165 
166 	struct mddev *mddev; /* the md device that the bitmap is for */
167 
168 	__u64	events_cleared;
169 	int need_sync;
170 
171 	struct bitmap_storage {
172 		/* backing disk file */
173 		struct file *file;
174 		/* cached copy of the bitmap file superblock */
175 		struct page *sb_page;
176 		unsigned long sb_index;
177 		/* list of cache pages for the file */
178 		struct page **filemap;
179 		/* attributes associated filemap pages */
180 		unsigned long *filemap_attr;
181 		/* number of pages in the file */
182 		unsigned long file_pages;
183 		/* total bytes in the bitmap */
184 		unsigned long bytes;
185 	} storage;
186 
187 	unsigned long flags;
188 
189 	int allclean;
190 
191 	atomic_t behind_writes;
192 	/* highest actual value at runtime */
193 	unsigned long behind_writes_used;
194 
195 	/*
196 	 * the bitmap daemon - periodically wakes up and sweeps the bitmap
197 	 * file, cleaning up bits and flushing out pages to disk as necessary
198 	 */
199 	unsigned long daemon_lastrun; /* jiffies of last run */
200 	/*
201 	 * when we lasted called end_sync to update bitmap with resync
202 	 * progress.
203 	 */
204 	unsigned long last_end_sync;
205 
206 	/* pending writes to the bitmap file */
207 	atomic_t pending_writes;
208 	wait_queue_head_t write_wait;
209 	wait_queue_head_t overflow_wait;
210 	wait_queue_head_t behind_wait;
211 
212 	struct kernfs_node *sysfs_can_clear;
213 	/* slot offset for clustered env */
214 	int cluster_slot;
215 };
216 
217 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks,
218 			   int chunksize, bool init);
219 
bmname(struct bitmap * bitmap)220 static inline char *bmname(struct bitmap *bitmap)
221 {
222 	return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
223 }
224 
__bitmap_enabled(struct bitmap * bitmap)225 static bool __bitmap_enabled(struct bitmap *bitmap)
226 {
227 	return bitmap->storage.filemap &&
228 	       !test_bit(BITMAP_STALE, &bitmap->flags);
229 }
230 
bitmap_enabled(struct mddev * mddev)231 static bool bitmap_enabled(struct mddev *mddev)
232 {
233 	struct bitmap *bitmap = mddev->bitmap;
234 
235 	if (!bitmap)
236 		return false;
237 
238 	return __bitmap_enabled(bitmap);
239 }
240 
241 /*
242  * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
243  *
244  * 1) check to see if this page is allocated, if it's not then try to alloc
245  * 2) if the alloc fails, set the page's hijacked flag so we'll use the
246  *    page pointer directly as a counter
247  *
248  * if we find our page, we increment the page's refcount so that it stays
249  * allocated while we're using it
250  */
md_bitmap_checkpage(struct bitmap_counts * bitmap,unsigned long page,int create,int no_hijack)251 static int md_bitmap_checkpage(struct bitmap_counts *bitmap,
252 			       unsigned long page, int create, int no_hijack)
253 __releases(bitmap->lock)
254 __acquires(bitmap->lock)
255 {
256 	unsigned char *mappage;
257 
258 	WARN_ON_ONCE(page >= bitmap->pages);
259 	if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
260 		return 0;
261 
262 	if (bitmap->bp[page].map) /* page is already allocated, just return */
263 		return 0;
264 
265 	if (!create)
266 		return -ENOENT;
267 
268 	/* this page has not been allocated yet */
269 
270 	spin_unlock_irq(&bitmap->lock);
271 	/* It is possible that this is being called inside a
272 	 * prepare_to_wait/finish_wait loop from raid5c:make_request().
273 	 * In general it is not permitted to sleep in that context as it
274 	 * can cause the loop to spin freely.
275 	 * That doesn't apply here as we can only reach this point
276 	 * once with any loop.
277 	 * When this function completes, either bp[page].map or
278 	 * bp[page].hijacked.  In either case, this function will
279 	 * abort before getting to this point again.  So there is
280 	 * no risk of a free-spin, and so it is safe to assert
281 	 * that sleeping here is allowed.
282 	 */
283 	sched_annotate_sleep();
284 	mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
285 	spin_lock_irq(&bitmap->lock);
286 
287 	if (mappage == NULL) {
288 		pr_debug("md/bitmap: map page allocation failed, hijacking\n");
289 		/* We don't support hijack for cluster raid */
290 		if (no_hijack)
291 			return -ENOMEM;
292 		/* failed - set the hijacked flag so that we can use the
293 		 * pointer as a counter */
294 		if (!bitmap->bp[page].map)
295 			bitmap->bp[page].hijacked = 1;
296 	} else if (bitmap->bp[page].map ||
297 		   bitmap->bp[page].hijacked) {
298 		/* somebody beat us to getting the page */
299 		kfree(mappage);
300 	} else {
301 
302 		/* no page was in place and we have one, so install it */
303 
304 		bitmap->bp[page].map = mappage;
305 		bitmap->missing_pages--;
306 	}
307 	return 0;
308 }
309 
310 /* if page is completely empty, put it back on the free list, or dealloc it */
311 /* if page was hijacked, unmark the flag so it might get alloced next time */
312 /* Note: lock should be held when calling this */
md_bitmap_checkfree(struct bitmap_counts * bitmap,unsigned long page)313 static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
314 {
315 	char *ptr;
316 
317 	if (bitmap->bp[page].count) /* page is still busy */
318 		return;
319 
320 	/* page is no longer in use, it can be released */
321 
322 	if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
323 		bitmap->bp[page].hijacked = 0;
324 		bitmap->bp[page].map = NULL;
325 	} else {
326 		/* normal case, free the page */
327 		ptr = bitmap->bp[page].map;
328 		bitmap->bp[page].map = NULL;
329 		bitmap->missing_pages++;
330 		kfree(ptr);
331 	}
332 }
333 
334 /*
335  * bitmap file handling - read and write the bitmap file and its superblock
336  */
337 
338 /*
339  * basic page I/O operations
340  */
341 
342 /* IO operations when bitmap is stored near all superblocks */
343 
344 /* choose a good rdev and read the page from there */
read_sb_page(struct mddev * mddev,loff_t offset,struct page * page,unsigned long index,int size)345 static int read_sb_page(struct mddev *mddev, loff_t offset,
346 		struct page *page, unsigned long index, int size)
347 {
348 
349 	sector_t sector = mddev->bitmap_info.offset + offset +
350 		index * (PAGE_SIZE / SECTOR_SIZE);
351 	struct md_rdev *rdev;
352 
353 	rdev_for_each(rdev, mddev) {
354 		u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev));
355 
356 		if (!test_bit(In_sync, &rdev->flags) ||
357 		    test_bit(Faulty, &rdev->flags) ||
358 		    test_bit(Bitmap_sync, &rdev->flags))
359 			continue;
360 
361 		if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true))
362 			return 0;
363 	}
364 	return -EIO;
365 }
366 
next_active_rdev(struct md_rdev * rdev,struct mddev * mddev)367 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
368 {
369 	/* Iterate the disks of an mddev, using rcu to protect access to the
370 	 * linked list, and raising the refcount of devices we return to ensure
371 	 * they don't disappear while in use.
372 	 * As devices are only added or removed when raid_disk is < 0 and
373 	 * nr_pending is 0 and In_sync is clear, the entries we return will
374 	 * still be in the same position on the list when we re-enter
375 	 * list_for_each_entry_continue_rcu.
376 	 *
377 	 * Note that if entered with 'rdev == NULL' to start at the
378 	 * beginning, we temporarily assign 'rdev' to an address which
379 	 * isn't really an rdev, but which can be used by
380 	 * list_for_each_entry_continue_rcu() to find the first entry.
381 	 */
382 	rcu_read_lock();
383 	if (rdev == NULL)
384 		/* start at the beginning */
385 		rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
386 	else {
387 		/* release the previous rdev and start from there. */
388 		rdev_dec_pending(rdev, mddev);
389 	}
390 	list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
391 		if (rdev->raid_disk >= 0 &&
392 		    !test_bit(Faulty, &rdev->flags)) {
393 			/* this is a usable devices */
394 			atomic_inc(&rdev->nr_pending);
395 			rcu_read_unlock();
396 			return rdev;
397 		}
398 	}
399 	rcu_read_unlock();
400 	return NULL;
401 }
402 
optimal_io_size(struct block_device * bdev,unsigned int last_page_size,unsigned int io_size)403 static unsigned int optimal_io_size(struct block_device *bdev,
404 				    unsigned int last_page_size,
405 				    unsigned int io_size)
406 {
407 	if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev))
408 		return roundup(last_page_size, bdev_io_opt(bdev));
409 	return io_size;
410 }
411 
bitmap_io_size(unsigned int io_size,unsigned int opt_size,loff_t start,loff_t boundary)412 static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size,
413 				   loff_t start, loff_t boundary)
414 {
415 	if (io_size != opt_size &&
416 	    start + opt_size / SECTOR_SIZE <= boundary)
417 		return opt_size;
418 	if (start + io_size / SECTOR_SIZE <= boundary)
419 		return io_size;
420 
421 	/* Overflows boundary */
422 	return 0;
423 }
424 
__write_sb_page(struct md_rdev * rdev,struct bitmap * bitmap,unsigned long pg_index,struct page * page)425 static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
426 			   unsigned long pg_index, struct page *page)
427 {
428 	struct block_device *bdev;
429 	struct mddev *mddev = bitmap->mddev;
430 	struct bitmap_storage *store = &bitmap->storage;
431 	unsigned long num_pages = bitmap->storage.file_pages;
432 	unsigned int bitmap_limit = (num_pages - pg_index % num_pages) << PAGE_SHIFT;
433 	loff_t sboff, offset = mddev->bitmap_info.offset;
434 	sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
435 	unsigned int size = PAGE_SIZE;
436 	unsigned int opt_size = PAGE_SIZE;
437 	sector_t doff;
438 
439 	bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
440 	/* we compare length (page numbers), not page offset. */
441 	if ((pg_index - store->sb_index) == num_pages - 1) {
442 		unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
443 
444 		if (last_page_size == 0)
445 			last_page_size = PAGE_SIZE;
446 		size = roundup(last_page_size, bdev_logical_block_size(bdev));
447 		opt_size = optimal_io_size(bdev, last_page_size, size);
448 	}
449 
450 	sboff = rdev->sb_start + offset;
451 	doff = rdev->data_offset;
452 
453 	/* Just make sure we aren't corrupting data or metadata */
454 	if (mddev->external) {
455 		/* Bitmap could be anywhere. */
456 		if (sboff + ps > doff &&
457 		    sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE))
458 			return -EINVAL;
459 	} else if (offset < 0) {
460 		/* DATA  BITMAP METADATA  */
461 		size = bitmap_io_size(size, opt_size, offset + ps, 0);
462 		if (size == 0)
463 			/* bitmap runs in to metadata */
464 			return -EINVAL;
465 
466 		if (doff + mddev->dev_sectors > sboff)
467 			/* data runs in to bitmap */
468 			return -EINVAL;
469 	} else if (rdev->sb_start < rdev->data_offset) {
470 		/* METADATA BITMAP DATA */
471 		size = bitmap_io_size(size, opt_size, sboff + ps, doff);
472 		if (size == 0)
473 			/* bitmap runs in to data */
474 			return -EINVAL;
475 	}
476 
477 	md_super_write(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit), page);
478 	return 0;
479 }
480 
write_sb_page(struct bitmap * bitmap,unsigned long pg_index,struct page * page,bool wait)481 static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index,
482 			  struct page *page, bool wait)
483 {
484 	struct mddev *mddev = bitmap->mddev;
485 
486 	do {
487 		struct md_rdev *rdev = NULL;
488 
489 		while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
490 			if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) {
491 				set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
492 				return;
493 			}
494 		}
495 	} while (wait && md_super_wait(mddev) < 0);
496 }
497 
498 static void md_bitmap_file_kick(struct bitmap *bitmap);
499 
500 #ifdef CONFIG_MD_BITMAP_FILE
write_file_page(struct bitmap * bitmap,struct page * page,int wait)501 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
502 {
503 	struct buffer_head *bh = page_buffers(page);
504 
505 	while (bh && bh->b_blocknr) {
506 		atomic_inc(&bitmap->pending_writes);
507 		set_buffer_locked(bh);
508 		set_buffer_mapped(bh);
509 		submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
510 		bh = bh->b_this_page;
511 	}
512 
513 	if (wait)
514 		wait_event(bitmap->write_wait,
515 			   atomic_read(&bitmap->pending_writes) == 0);
516 }
517 
end_bitmap_write(struct buffer_head * bh,int uptodate)518 static void end_bitmap_write(struct buffer_head *bh, int uptodate)
519 {
520 	struct bitmap *bitmap = bh->b_private;
521 
522 	if (!uptodate)
523 		set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
524 	if (atomic_dec_and_test(&bitmap->pending_writes))
525 		wake_up(&bitmap->write_wait);
526 }
527 
free_buffers(struct page * page)528 static void free_buffers(struct page *page)
529 {
530 	struct buffer_head *bh;
531 
532 	if (!PagePrivate(page))
533 		return;
534 
535 	bh = page_buffers(page);
536 	while (bh) {
537 		struct buffer_head *next = bh->b_this_page;
538 		free_buffer_head(bh);
539 		bh = next;
540 	}
541 	detach_page_private(page);
542 	put_page(page);
543 }
544 
545 /* read a page from a file.
546  * We both read the page, and attach buffers to the page to record the
547  * address of each block (using bmap).  These addresses will be used
548  * to write the block later, completely bypassing the filesystem.
549  * This usage is similar to how swap files are handled, and allows us
550  * to write to a file with no concerns of memory allocation failing.
551  */
read_file_page(struct file * file,unsigned long index,struct bitmap * bitmap,unsigned long count,struct page * page)552 static int read_file_page(struct file *file, unsigned long index,
553 		struct bitmap *bitmap, unsigned long count, struct page *page)
554 {
555 	int ret = 0;
556 	struct inode *inode = file_inode(file);
557 	struct buffer_head *bh;
558 	sector_t block, blk_cur;
559 	unsigned long blocksize = i_blocksize(inode);
560 
561 	pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
562 		 (unsigned long long)index << PAGE_SHIFT);
563 
564 	bh = alloc_page_buffers(page, blocksize);
565 	if (!bh) {
566 		ret = -ENOMEM;
567 		goto out;
568 	}
569 	attach_page_private(page, bh);
570 	blk_cur = index << (PAGE_SHIFT - inode->i_blkbits);
571 	while (bh) {
572 		block = blk_cur;
573 
574 		if (count == 0)
575 			bh->b_blocknr = 0;
576 		else {
577 			ret = bmap(inode, &block);
578 			if (ret || !block) {
579 				ret = -EINVAL;
580 				bh->b_blocknr = 0;
581 				goto out;
582 			}
583 
584 			bh->b_blocknr = block;
585 			bh->b_bdev = inode->i_sb->s_bdev;
586 			if (count < blocksize)
587 				count = 0;
588 			else
589 				count -= blocksize;
590 
591 			bh->b_end_io = end_bitmap_write;
592 			bh->b_private = bitmap;
593 			atomic_inc(&bitmap->pending_writes);
594 			set_buffer_locked(bh);
595 			set_buffer_mapped(bh);
596 			submit_bh(REQ_OP_READ, bh);
597 		}
598 		blk_cur++;
599 		bh = bh->b_this_page;
600 	}
601 
602 	wait_event(bitmap->write_wait,
603 		   atomic_read(&bitmap->pending_writes)==0);
604 	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
605 		ret = -EIO;
606 out:
607 	if (ret)
608 		pr_err("md: bitmap read error: (%dB @ %llu): %d\n",
609 		       (int)PAGE_SIZE,
610 		       (unsigned long long)index << PAGE_SHIFT,
611 		       ret);
612 	return ret;
613 }
614 #else /* CONFIG_MD_BITMAP_FILE */
write_file_page(struct bitmap * bitmap,struct page * page,int wait)615 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
616 {
617 }
read_file_page(struct file * file,unsigned long index,struct bitmap * bitmap,unsigned long count,struct page * page)618 static int read_file_page(struct file *file, unsigned long index,
619 		struct bitmap *bitmap, unsigned long count, struct page *page)
620 {
621 	return -EIO;
622 }
free_buffers(struct page * page)623 static void free_buffers(struct page *page)
624 {
625 	put_page(page);
626 }
627 #endif /* CONFIG_MD_BITMAP_FILE */
628 
629 /*
630  * bitmap file superblock operations
631  */
632 
633 /*
634  * write out a page to a file
635  */
filemap_write_page(struct bitmap * bitmap,unsigned long pg_index,bool wait)636 static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index,
637 			       bool wait)
638 {
639 	struct bitmap_storage *store = &bitmap->storage;
640 	struct page *page = store->filemap[pg_index];
641 
642 	if (mddev_is_clustered(bitmap->mddev)) {
643 		/* go to node bitmap area starting point */
644 		pg_index += store->sb_index;
645 	}
646 
647 	if (store->file)
648 		write_file_page(bitmap, page, wait);
649 	else
650 		write_sb_page(bitmap, pg_index, page, wait);
651 }
652 
653 /*
654  * md_bitmap_wait_writes() should be called before writing any bitmap
655  * blocks, to ensure previous writes, particularly from
656  * md_bitmap_daemon_work(), have completed.
657  */
md_bitmap_wait_writes(struct bitmap * bitmap)658 static void md_bitmap_wait_writes(struct bitmap *bitmap)
659 {
660 	if (bitmap->storage.file)
661 		wait_event(bitmap->write_wait,
662 			   atomic_read(&bitmap->pending_writes)==0);
663 	else
664 		/* Note that we ignore the return value.  The writes
665 		 * might have failed, but that would just mean that
666 		 * some bits which should be cleared haven't been,
667 		 * which is safe.  The relevant bitmap blocks will
668 		 * probably get written again, but there is no great
669 		 * loss if they aren't.
670 		 */
671 		md_super_wait(bitmap->mddev);
672 }
673 
674 
675 /* update the event counter and sync the superblock to disk */
bitmap_update_sb(void * data)676 static void bitmap_update_sb(void *data)
677 {
678 	bitmap_super_t *sb;
679 	struct bitmap *bitmap = data;
680 
681 	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
682 		return;
683 	if (bitmap->mddev->bitmap_info.external)
684 		return;
685 	if (!bitmap->storage.sb_page) /* no superblock */
686 		return;
687 	sb = kmap_local_page(bitmap->storage.sb_page);
688 	sb->events = cpu_to_le64(bitmap->mddev->events);
689 	if (bitmap->mddev->events < bitmap->events_cleared)
690 		/* rocking back to read-only */
691 		bitmap->events_cleared = bitmap->mddev->events;
692 	sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
693 	/*
694 	 * clear BITMAP_WRITE_ERROR bit to protect against the case that
695 	 * a bitmap write error occurred but the later writes succeeded.
696 	 */
697 	sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR));
698 	/* Just in case these have been changed via sysfs: */
699 	sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
700 	sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
701 	/* This might have been changed by a reshape */
702 	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
703 	sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
704 	sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
705 	sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
706 					   bitmap_info.space);
707 	kunmap_local(sb);
708 
709 	if (bitmap->storage.file)
710 		write_file_page(bitmap, bitmap->storage.sb_page, 1);
711 	else
712 		write_sb_page(bitmap, bitmap->storage.sb_index,
713 			      bitmap->storage.sb_page, 1);
714 }
715 
bitmap_print_sb(struct bitmap * bitmap)716 static void bitmap_print_sb(struct bitmap *bitmap)
717 {
718 	bitmap_super_t *sb;
719 
720 	if (!bitmap || !bitmap->storage.sb_page)
721 		return;
722 	sb = kmap_local_page(bitmap->storage.sb_page);
723 	pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
724 	pr_debug("         magic: %08x\n", le32_to_cpu(sb->magic));
725 	pr_debug("       version: %u\n", le32_to_cpu(sb->version));
726 	pr_debug("          uuid: %08x.%08x.%08x.%08x\n",
727 		 le32_to_cpu(*(__le32 *)(sb->uuid+0)),
728 		 le32_to_cpu(*(__le32 *)(sb->uuid+4)),
729 		 le32_to_cpu(*(__le32 *)(sb->uuid+8)),
730 		 le32_to_cpu(*(__le32 *)(sb->uuid+12)));
731 	pr_debug("        events: %llu\n",
732 		 (unsigned long long) le64_to_cpu(sb->events));
733 	pr_debug("events cleared: %llu\n",
734 		 (unsigned long long) le64_to_cpu(sb->events_cleared));
735 	pr_debug("         state: %08x\n", le32_to_cpu(sb->state));
736 	pr_debug("     chunksize: %u B\n", le32_to_cpu(sb->chunksize));
737 	pr_debug("  daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep));
738 	pr_debug("     sync size: %llu KB\n",
739 		 (unsigned long long)le64_to_cpu(sb->sync_size)/2);
740 	pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind));
741 	kunmap_local(sb);
742 }
743 
744 /*
745  * bitmap_new_disk_sb
746  * @bitmap
747  *
748  * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
749  * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
750  * This function verifies 'bitmap_info' and populates the on-disk bitmap
751  * structure, which is to be written to disk.
752  *
753  * Returns: 0 on success, -Exxx on error
754  */
md_bitmap_new_disk_sb(struct bitmap * bitmap)755 static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
756 {
757 	bitmap_super_t *sb;
758 	unsigned long chunksize, daemon_sleep, write_behind;
759 
760 	bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
761 	if (bitmap->storage.sb_page == NULL)
762 		return -ENOMEM;
763 	bitmap->storage.sb_index = 0;
764 
765 	sb = kmap_local_page(bitmap->storage.sb_page);
766 
767 	sb->magic = cpu_to_le32(BITMAP_MAGIC);
768 	sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
769 
770 	chunksize = bitmap->mddev->bitmap_info.chunksize;
771 	BUG_ON(!chunksize);
772 	if (!is_power_of_2(chunksize)) {
773 		kunmap_local(sb);
774 		pr_warn("bitmap chunksize not a power of 2\n");
775 		return -EINVAL;
776 	}
777 	sb->chunksize = cpu_to_le32(chunksize);
778 
779 	daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
780 	if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
781 		pr_debug("Choosing daemon_sleep default (5 sec)\n");
782 		daemon_sleep = 5 * HZ;
783 	}
784 	sb->daemon_sleep = cpu_to_le32(daemon_sleep);
785 	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
786 
787 	/*
788 	 * FIXME: write_behind for RAID1.  If not specified, what
789 	 * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
790 	 */
791 	write_behind = bitmap->mddev->bitmap_info.max_write_behind;
792 	if (write_behind > COUNTER_MAX)
793 		write_behind = COUNTER_MAX / 2;
794 	sb->write_behind = cpu_to_le32(write_behind);
795 	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
796 
797 	/* keep the array size field of the bitmap superblock up to date */
798 	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
799 
800 	memcpy(sb->uuid, bitmap->mddev->uuid, 16);
801 
802 	set_bit(BITMAP_STALE, &bitmap->flags);
803 	sb->state = cpu_to_le32(bitmap->flags);
804 	bitmap->events_cleared = bitmap->mddev->events;
805 	sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
806 	bitmap->mddev->bitmap_info.nodes = 0;
807 
808 	kunmap_local(sb);
809 
810 	return 0;
811 }
812 
813 /* read the superblock from the bitmap file and initialize some bitmap fields */
md_bitmap_read_sb(struct bitmap * bitmap)814 static int md_bitmap_read_sb(struct bitmap *bitmap)
815 {
816 	char *reason = NULL;
817 	bitmap_super_t *sb;
818 	unsigned long chunksize, daemon_sleep, write_behind;
819 	unsigned long long events;
820 	int nodes = 0;
821 	unsigned long sectors_reserved = 0;
822 	int err = -EINVAL;
823 	struct page *sb_page;
824 	loff_t offset = 0;
825 
826 	if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
827 		chunksize = 128 * 1024 * 1024;
828 		daemon_sleep = 5 * HZ;
829 		write_behind = 0;
830 		set_bit(BITMAP_STALE, &bitmap->flags);
831 		err = 0;
832 		goto out_no_sb;
833 	}
834 	/* page 0 is the superblock, read it... */
835 	sb_page = alloc_page(GFP_KERNEL);
836 	if (!sb_page)
837 		return -ENOMEM;
838 	bitmap->storage.sb_page = sb_page;
839 
840 re_read:
841 	/* If cluster_slot is set, the cluster is setup */
842 	if (bitmap->cluster_slot >= 0) {
843 		sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
844 
845 		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks,
846 			   (bitmap->mddev->bitmap_info.chunksize >> 9));
847 		/* bits to bytes */
848 		bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
849 		/* to 4k blocks */
850 		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
851 		offset = bitmap->cluster_slot * (bm_blocks << 3);
852 		pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
853 			bitmap->cluster_slot, offset);
854 	}
855 
856 	if (bitmap->storage.file) {
857 		loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
858 		int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
859 
860 		err = read_file_page(bitmap->storage.file, 0,
861 				bitmap, bytes, sb_page);
862 	} else {
863 		err = read_sb_page(bitmap->mddev, offset, sb_page, 0,
864 				   sizeof(bitmap_super_t));
865 	}
866 	if (err)
867 		return err;
868 
869 	err = -EINVAL;
870 	sb = kmap_local_page(sb_page);
871 
872 	chunksize = le32_to_cpu(sb->chunksize);
873 	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
874 	write_behind = le32_to_cpu(sb->write_behind);
875 	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
876 
877 	/* verify that the bitmap-specific fields are valid */
878 	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
879 		reason = "bad magic";
880 	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
881 		 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED)
882 		reason = "unrecognized superblock version";
883 	else if (chunksize < 512)
884 		reason = "bitmap chunksize too small";
885 	else if (!is_power_of_2(chunksize))
886 		reason = "bitmap chunksize not a power of 2";
887 	else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
888 		reason = "daemon sleep period out of range";
889 	else if (write_behind > COUNTER_MAX)
890 		reason = "write-behind limit out of range (0 - 16383)";
891 	if (reason) {
892 		pr_warn("%s: invalid bitmap file superblock: %s\n",
893 			bmname(bitmap), reason);
894 		goto out;
895 	}
896 
897 	/*
898 	 * Setup nodes/clustername only if bitmap version is
899 	 * cluster-compatible
900 	 */
901 	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
902 		nodes = le32_to_cpu(sb->nodes);
903 		strscpy(bitmap->mddev->bitmap_info.cluster_name,
904 				sb->cluster_name, 64);
905 	}
906 
907 	/* keep the array size field of the bitmap superblock up to date */
908 	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
909 
910 	if (bitmap->mddev->persistent) {
911 		/*
912 		 * We have a persistent array superblock, so compare the
913 		 * bitmap's UUID and event counter to the mddev's
914 		 */
915 		if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
916 			pr_warn("%s: bitmap superblock UUID mismatch\n",
917 				bmname(bitmap));
918 			goto out;
919 		}
920 		events = le64_to_cpu(sb->events);
921 		if (!nodes && (events < bitmap->mddev->events)) {
922 			pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n",
923 				bmname(bitmap), events,
924 				(unsigned long long) bitmap->mddev->events);
925 			set_bit(BITMAP_STALE, &bitmap->flags);
926 		}
927 	}
928 
929 	/* assign fields using values from superblock */
930 	bitmap->flags |= le32_to_cpu(sb->state);
931 	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
932 		set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
933 	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
934 	err = 0;
935 
936 out:
937 	kunmap_local(sb);
938 	if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
939 		/* Assigning chunksize is required for "re_read" */
940 		bitmap->mddev->bitmap_info.chunksize = chunksize;
941 		err = md_setup_cluster(bitmap->mddev, nodes);
942 		if (err) {
943 			pr_warn("%s: Could not setup cluster service (%d)\n",
944 				bmname(bitmap), err);
945 			goto out_no_sb;
946 		}
947 		bitmap->cluster_slot = bitmap->mddev->cluster_ops->slot_number(bitmap->mddev);
948 		goto re_read;
949 	}
950 
951 out_no_sb:
952 	if (err == 0) {
953 		if (test_bit(BITMAP_STALE, &bitmap->flags))
954 			bitmap->events_cleared = bitmap->mddev->events;
955 		bitmap->mddev->bitmap_info.chunksize = chunksize;
956 		bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
957 		bitmap->mddev->bitmap_info.max_write_behind = write_behind;
958 		bitmap->mddev->bitmap_info.nodes = nodes;
959 		if (bitmap->mddev->bitmap_info.space == 0 ||
960 			bitmap->mddev->bitmap_info.space > sectors_reserved)
961 			bitmap->mddev->bitmap_info.space = sectors_reserved;
962 	} else {
963 		bitmap_print_sb(bitmap);
964 		if (bitmap->cluster_slot < 0)
965 			md_cluster_stop(bitmap->mddev);
966 	}
967 	return err;
968 }
969 
970 /*
971  * general bitmap file operations
972  */
973 
974 /*
975  * on-disk bitmap:
976  *
977  * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
978  * file a page at a time. There's a superblock at the start of the file.
979  */
980 /* calculate the index of the page that contains this bit */
file_page_index(struct bitmap_storage * store,unsigned long chunk)981 static inline unsigned long file_page_index(struct bitmap_storage *store,
982 					    unsigned long chunk)
983 {
984 	if (store->sb_page)
985 		chunk += sizeof(bitmap_super_t) << 3;
986 	return chunk >> PAGE_BIT_SHIFT;
987 }
988 
989 /* calculate the (bit) offset of this bit within a page */
file_page_offset(struct bitmap_storage * store,unsigned long chunk)990 static inline unsigned long file_page_offset(struct bitmap_storage *store,
991 					     unsigned long chunk)
992 {
993 	if (store->sb_page)
994 		chunk += sizeof(bitmap_super_t) << 3;
995 	return chunk & (PAGE_BITS - 1);
996 }
997 
998 /*
999  * return a pointer to the page in the filemap that contains the given bit
1000  *
1001  */
filemap_get_page(struct bitmap_storage * store,unsigned long chunk)1002 static inline struct page *filemap_get_page(struct bitmap_storage *store,
1003 					    unsigned long chunk)
1004 {
1005 	if (file_page_index(store, chunk) >= store->file_pages)
1006 		return NULL;
1007 	return store->filemap[file_page_index(store, chunk)];
1008 }
1009 
md_bitmap_storage_alloc(struct bitmap_storage * store,unsigned long chunks,int with_super,int slot_number)1010 static int md_bitmap_storage_alloc(struct bitmap_storage *store,
1011 				   unsigned long chunks, int with_super,
1012 				   int slot_number)
1013 {
1014 	int pnum, offset = 0;
1015 	unsigned long num_pages;
1016 	unsigned long bytes;
1017 
1018 	bytes = DIV_ROUND_UP(chunks, 8);
1019 	if (with_super)
1020 		bytes += sizeof(bitmap_super_t);
1021 
1022 	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
1023 	offset = slot_number * num_pages;
1024 
1025 	store->filemap = kmalloc_array(num_pages, sizeof(struct page *),
1026 				       GFP_KERNEL);
1027 	if (!store->filemap)
1028 		return -ENOMEM;
1029 
1030 	if (with_super && !store->sb_page) {
1031 		store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
1032 		if (store->sb_page == NULL)
1033 			return -ENOMEM;
1034 	}
1035 
1036 	pnum = 0;
1037 	if (store->sb_page) {
1038 		store->filemap[0] = store->sb_page;
1039 		pnum = 1;
1040 		store->sb_index = offset;
1041 	}
1042 
1043 	for ( ; pnum < num_pages; pnum++) {
1044 		store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
1045 		if (!store->filemap[pnum]) {
1046 			store->file_pages = pnum;
1047 			return -ENOMEM;
1048 		}
1049 	}
1050 	store->file_pages = pnum;
1051 
1052 	/* We need 4 bits per page, rounded up to a multiple
1053 	 * of sizeof(unsigned long) */
1054 	store->filemap_attr = kzalloc(
1055 		roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
1056 		GFP_KERNEL);
1057 	if (!store->filemap_attr)
1058 		return -ENOMEM;
1059 
1060 	store->bytes = bytes;
1061 
1062 	return 0;
1063 }
1064 
md_bitmap_file_unmap(struct bitmap_storage * store)1065 static void md_bitmap_file_unmap(struct bitmap_storage *store)
1066 {
1067 	struct file *file = store->file;
1068 	struct page *sb_page = store->sb_page;
1069 	struct page **map = store->filemap;
1070 	int pages = store->file_pages;
1071 
1072 	while (pages--)
1073 		if (map[pages] != sb_page) /* 0 is sb_page, release it below */
1074 			free_buffers(map[pages]);
1075 	kfree(map);
1076 	kfree(store->filemap_attr);
1077 
1078 	if (sb_page)
1079 		free_buffers(sb_page);
1080 
1081 	if (file) {
1082 		struct inode *inode = file_inode(file);
1083 		invalidate_mapping_pages(inode->i_mapping, 0, -1);
1084 		fput(file);
1085 	}
1086 }
1087 
1088 /*
1089  * bitmap_file_kick - if an error occurs while manipulating the bitmap file
1090  * then it is no longer reliable, so we stop using it and we mark the file
1091  * as failed in the superblock
1092  */
md_bitmap_file_kick(struct bitmap * bitmap)1093 static void md_bitmap_file_kick(struct bitmap *bitmap)
1094 {
1095 	if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
1096 		bitmap_update_sb(bitmap);
1097 
1098 		if (bitmap->storage.file) {
1099 			pr_warn("%s: kicking failed bitmap file %pD4 from array!\n",
1100 				bmname(bitmap), bitmap->storage.file);
1101 
1102 		} else
1103 			pr_warn("%s: disabling internal bitmap due to errors\n",
1104 				bmname(bitmap));
1105 	}
1106 }
1107 
1108 enum bitmap_page_attr {
1109 	BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
1110 	BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
1111 				    * i.e. counter is 1 or 2. */
1112 	BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
1113 };
1114 
set_page_attr(struct bitmap * bitmap,int pnum,enum bitmap_page_attr attr)1115 static inline void set_page_attr(struct bitmap *bitmap, int pnum,
1116 				 enum bitmap_page_attr attr)
1117 {
1118 	set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
1119 }
1120 
clear_page_attr(struct bitmap * bitmap,int pnum,enum bitmap_page_attr attr)1121 static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
1122 				   enum bitmap_page_attr attr)
1123 {
1124 	clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
1125 }
1126 
test_page_attr(struct bitmap * bitmap,int pnum,enum bitmap_page_attr attr)1127 static inline int test_page_attr(struct bitmap *bitmap, int pnum,
1128 				 enum bitmap_page_attr attr)
1129 {
1130 	return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
1131 }
1132 
test_and_clear_page_attr(struct bitmap * bitmap,int pnum,enum bitmap_page_attr attr)1133 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
1134 					   enum bitmap_page_attr attr)
1135 {
1136 	return test_and_clear_bit((pnum<<2) + attr,
1137 				  bitmap->storage.filemap_attr);
1138 }
1139 /*
1140  * bitmap_file_set_bit -- called before performing a write to the md device
1141  * to set (and eventually sync) a particular bit in the bitmap file
1142  *
1143  * we set the bit immediately, then we record the page number so that
1144  * when an unplug occurs, we can flush the dirty pages out to disk
1145  */
md_bitmap_file_set_bit(struct bitmap * bitmap,sector_t block)1146 static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
1147 {
1148 	unsigned long bit;
1149 	struct page *page;
1150 	void *kaddr;
1151 	unsigned long chunk = block >> bitmap->counts.chunkshift;
1152 	struct bitmap_storage *store = &bitmap->storage;
1153 	unsigned long index = file_page_index(store, chunk);
1154 	unsigned long node_offset = 0;
1155 
1156 	index += store->sb_index;
1157 	if (mddev_is_clustered(bitmap->mddev))
1158 		node_offset = bitmap->cluster_slot * store->file_pages;
1159 
1160 	page = filemap_get_page(&bitmap->storage, chunk);
1161 	if (!page)
1162 		return;
1163 	bit = file_page_offset(&bitmap->storage, chunk);
1164 
1165 	/* set the bit */
1166 	kaddr = kmap_local_page(page);
1167 	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1168 		set_bit(bit, kaddr);
1169 	else
1170 		set_bit_le(bit, kaddr);
1171 	kunmap_local(kaddr);
1172 	pr_debug("set file bit %lu page %lu\n", bit, index);
1173 	/* record page number so it gets flushed to disk when unplug occurs */
1174 	set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY);
1175 }
1176 
md_bitmap_file_clear_bit(struct bitmap * bitmap,sector_t block)1177 static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
1178 {
1179 	unsigned long bit;
1180 	struct page *page;
1181 	void *paddr;
1182 	unsigned long chunk = block >> bitmap->counts.chunkshift;
1183 	struct bitmap_storage *store = &bitmap->storage;
1184 	unsigned long index = file_page_index(store, chunk);
1185 	unsigned long node_offset = 0;
1186 
1187 	index += store->sb_index;
1188 	if (mddev_is_clustered(bitmap->mddev))
1189 		node_offset = bitmap->cluster_slot * store->file_pages;
1190 
1191 	page = filemap_get_page(&bitmap->storage, chunk);
1192 	if (!page)
1193 		return;
1194 	bit = file_page_offset(&bitmap->storage, chunk);
1195 	paddr = kmap_local_page(page);
1196 	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1197 		clear_bit(bit, paddr);
1198 	else
1199 		clear_bit_le(bit, paddr);
1200 	kunmap_local(paddr);
1201 	if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
1202 		set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING);
1203 		bitmap->allclean = 0;
1204 	}
1205 }
1206 
md_bitmap_file_test_bit(struct bitmap * bitmap,sector_t block)1207 static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
1208 {
1209 	unsigned long bit;
1210 	struct page *page;
1211 	void *paddr;
1212 	unsigned long chunk = block >> bitmap->counts.chunkshift;
1213 	int set = 0;
1214 
1215 	page = filemap_get_page(&bitmap->storage, chunk);
1216 	if (!page)
1217 		return -EINVAL;
1218 	bit = file_page_offset(&bitmap->storage, chunk);
1219 	paddr = kmap_local_page(page);
1220 	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1221 		set = test_bit(bit, paddr);
1222 	else
1223 		set = test_bit_le(bit, paddr);
1224 	kunmap_local(paddr);
1225 	return set;
1226 }
1227 
1228 /* this gets called when the md device is ready to unplug its underlying
1229  * (slave) device queues -- before we let any writes go down, we need to
1230  * sync the dirty pages of the bitmap file to disk */
__bitmap_unplug(struct bitmap * bitmap)1231 static void __bitmap_unplug(struct bitmap *bitmap)
1232 {
1233 	unsigned long i;
1234 	int dirty, need_write;
1235 	int writing = 0;
1236 
1237 	if (!__bitmap_enabled(bitmap))
1238 		return;
1239 
1240 	/* look at each page to see if there are any set bits that need to be
1241 	 * flushed out to disk */
1242 	for (i = 0; i < bitmap->storage.file_pages; i++) {
1243 		dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
1244 		need_write = test_and_clear_page_attr(bitmap, i,
1245 						      BITMAP_PAGE_NEEDWRITE);
1246 		if (dirty || need_write) {
1247 			if (!writing) {
1248 				md_bitmap_wait_writes(bitmap);
1249 				mddev_add_trace_msg(bitmap->mddev,
1250 					"md bitmap_unplug");
1251 			}
1252 			clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
1253 			filemap_write_page(bitmap, i, false);
1254 			writing = 1;
1255 		}
1256 	}
1257 	if (writing)
1258 		md_bitmap_wait_writes(bitmap);
1259 
1260 	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1261 		md_bitmap_file_kick(bitmap);
1262 }
1263 
1264 struct bitmap_unplug_work {
1265 	struct work_struct work;
1266 	struct bitmap *bitmap;
1267 	struct completion *done;
1268 };
1269 
md_bitmap_unplug_fn(struct work_struct * work)1270 static void md_bitmap_unplug_fn(struct work_struct *work)
1271 {
1272 	struct bitmap_unplug_work *unplug_work =
1273 		container_of(work, struct bitmap_unplug_work, work);
1274 
1275 	__bitmap_unplug(unplug_work->bitmap);
1276 	complete(unplug_work->done);
1277 }
1278 
bitmap_unplug_async(struct bitmap * bitmap)1279 static void bitmap_unplug_async(struct bitmap *bitmap)
1280 {
1281 	DECLARE_COMPLETION_ONSTACK(done);
1282 	struct bitmap_unplug_work unplug_work;
1283 
1284 	INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn);
1285 	unplug_work.bitmap = bitmap;
1286 	unplug_work.done = &done;
1287 
1288 	queue_work(md_bitmap_wq, &unplug_work.work);
1289 	wait_for_completion(&done);
1290 	destroy_work_on_stack(&unplug_work.work);
1291 }
1292 
bitmap_unplug(struct mddev * mddev,bool sync)1293 static void bitmap_unplug(struct mddev *mddev, bool sync)
1294 {
1295 	struct bitmap *bitmap = mddev->bitmap;
1296 
1297 	if (!bitmap)
1298 		return;
1299 
1300 	if (sync)
1301 		__bitmap_unplug(bitmap);
1302 	else
1303 		bitmap_unplug_async(bitmap);
1304 }
1305 
1306 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
1307 
1308 /*
1309  * Initialize the in-memory bitmap from the on-disk bitmap and set up the memory
1310  * mapping of the bitmap file.
1311  *
1312  * Special case: If there's no bitmap file, or if the bitmap file had been
1313  * previously kicked from the array, we mark all the bits as 1's in order to
1314  * cause a full resync.
1315  *
1316  * We ignore all bits for sectors that end earlier than 'start'.
1317  * This is used when reading an out-of-date bitmap.
1318  */
md_bitmap_init_from_disk(struct bitmap * bitmap,sector_t start)1319 static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1320 {
1321 	bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
1322 	struct mddev *mddev = bitmap->mddev;
1323 	unsigned long chunks = bitmap->counts.chunks;
1324 	struct bitmap_storage *store = &bitmap->storage;
1325 	struct file *file = store->file;
1326 	unsigned long node_offset = 0;
1327 	unsigned long bit_cnt = 0;
1328 	unsigned long i;
1329 	int ret;
1330 
1331 	if (!file && !mddev->bitmap_info.offset) {
1332 		/* No permanent bitmap - fill with '1s'. */
1333 		store->filemap = NULL;
1334 		store->file_pages = 0;
1335 		for (i = 0; i < chunks ; i++) {
1336 			/* if the disk bit is set, set the memory bit */
1337 			int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
1338 				      >= start);
1339 			md_bitmap_set_memory_bits(bitmap,
1340 						  (sector_t)i << bitmap->counts.chunkshift,
1341 						  needed);
1342 		}
1343 		return 0;
1344 	}
1345 
1346 	if (file && i_size_read(file->f_mapping->host) < store->bytes) {
1347 		pr_warn("%s: bitmap file too short %lu < %lu\n",
1348 			bmname(bitmap),
1349 			(unsigned long) i_size_read(file->f_mapping->host),
1350 			store->bytes);
1351 		ret = -ENOSPC;
1352 		goto err;
1353 	}
1354 
1355 	if (mddev_is_clustered(mddev))
1356 		node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
1357 
1358 	for (i = 0; i < store->file_pages; i++) {
1359 		struct page *page = store->filemap[i];
1360 		int count;
1361 
1362 		/* unmap the old page, we're done with it */
1363 		if (i == store->file_pages - 1)
1364 			count = store->bytes - i * PAGE_SIZE;
1365 		else
1366 			count = PAGE_SIZE;
1367 
1368 		if (file)
1369 			ret = read_file_page(file, i, bitmap, count, page);
1370 		else
1371 			ret = read_sb_page(mddev, 0, page, i + node_offset,
1372 					   count);
1373 		if (ret)
1374 			goto err;
1375 	}
1376 
1377 	if (outofdate) {
1378 		pr_warn("%s: bitmap file is out of date, doing full recovery\n",
1379 			bmname(bitmap));
1380 
1381 		for (i = 0; i < store->file_pages; i++) {
1382 			struct page *page = store->filemap[i];
1383 			unsigned long offset = 0;
1384 			void *paddr;
1385 
1386 			if (i == 0 && !mddev->bitmap_info.external)
1387 				offset = sizeof(bitmap_super_t);
1388 
1389 			/*
1390 			 * If the bitmap is out of date, dirty the whole page
1391 			 * and write it out
1392 			 */
1393 			paddr = kmap_local_page(page);
1394 			memset(paddr + offset, 0xff, PAGE_SIZE - offset);
1395 			kunmap_local(paddr);
1396 
1397 			filemap_write_page(bitmap, i, true);
1398 			if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) {
1399 				ret = -EIO;
1400 				goto err;
1401 			}
1402 		}
1403 	}
1404 
1405 	for (i = 0; i < chunks; i++) {
1406 		struct page *page = filemap_get_page(&bitmap->storage, i);
1407 		unsigned long bit = file_page_offset(&bitmap->storage, i);
1408 		void *paddr;
1409 		bool was_set;
1410 
1411 		paddr = kmap_local_page(page);
1412 		if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1413 			was_set = test_bit(bit, paddr);
1414 		else
1415 			was_set = test_bit_le(bit, paddr);
1416 		kunmap_local(paddr);
1417 
1418 		if (was_set) {
1419 			/* if the disk bit is set, set the memory bit */
1420 			int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1421 				      >= start);
1422 			md_bitmap_set_memory_bits(bitmap,
1423 						  (sector_t)i << bitmap->counts.chunkshift,
1424 						  needed);
1425 			bit_cnt++;
1426 		}
1427 	}
1428 
1429 	pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n",
1430 		 bmname(bitmap), store->file_pages,
1431 		 bit_cnt, chunks);
1432 
1433 	return 0;
1434 
1435  err:
1436 	pr_warn("%s: bitmap initialisation failed: %d\n",
1437 		bmname(bitmap), ret);
1438 	return ret;
1439 }
1440 
1441 /* just flag bitmap pages as needing to be written. */
bitmap_write_all(struct mddev * mddev)1442 static void bitmap_write_all(struct mddev *mddev)
1443 {
1444 	int i;
1445 	struct bitmap *bitmap = mddev->bitmap;
1446 
1447 	if (!bitmap || !bitmap->storage.filemap)
1448 		return;
1449 
1450 	/* Only one copy, so nothing needed */
1451 	if (bitmap->storage.file)
1452 		return;
1453 
1454 	for (i = 0; i < bitmap->storage.file_pages; i++)
1455 		set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
1456 	bitmap->allclean = 0;
1457 }
1458 
md_bitmap_count_page(struct bitmap_counts * bitmap,sector_t offset,int inc)1459 static void md_bitmap_count_page(struct bitmap_counts *bitmap,
1460 				 sector_t offset, int inc)
1461 {
1462 	sector_t chunk = offset >> bitmap->chunkshift;
1463 	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1464 	bitmap->bp[page].count += inc;
1465 	md_bitmap_checkfree(bitmap, page);
1466 }
1467 
md_bitmap_set_pending(struct bitmap_counts * bitmap,sector_t offset)1468 static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1469 {
1470 	sector_t chunk = offset >> bitmap->chunkshift;
1471 	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1472 	struct bitmap_page *bp = &bitmap->bp[page];
1473 
1474 	if (!bp->pending)
1475 		bp->pending = 1;
1476 }
1477 
1478 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
1479 					       sector_t offset, sector_t *blocks,
1480 					       int create);
1481 
mddev_set_timeout(struct mddev * mddev,unsigned long timeout,bool force)1482 static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout,
1483 			      bool force)
1484 {
1485 	struct md_thread *thread;
1486 
1487 	rcu_read_lock();
1488 	thread = rcu_dereference(mddev->thread);
1489 
1490 	if (!thread)
1491 		goto out;
1492 
1493 	if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT)
1494 		thread->timeout = timeout;
1495 
1496 out:
1497 	rcu_read_unlock();
1498 }
1499 
1500 /*
1501  * bitmap daemon -- periodically wakes up to clean bits and flush pages
1502  *			out to disk
1503  */
bitmap_daemon_work(struct mddev * mddev)1504 static void bitmap_daemon_work(struct mddev *mddev)
1505 {
1506 	struct bitmap *bitmap;
1507 	unsigned long j;
1508 	unsigned long nextpage;
1509 	sector_t blocks;
1510 	struct bitmap_counts *counts;
1511 
1512 	/* Use a mutex to guard daemon_work against
1513 	 * bitmap_destroy.
1514 	 */
1515 	mutex_lock(&mddev->bitmap_info.mutex);
1516 	bitmap = mddev->bitmap;
1517 	if (bitmap == NULL) {
1518 		mutex_unlock(&mddev->bitmap_info.mutex);
1519 		return;
1520 	}
1521 	if (time_before(jiffies, bitmap->daemon_lastrun
1522 			+ mddev->bitmap_info.daemon_sleep))
1523 		goto done;
1524 
1525 	bitmap->daemon_lastrun = jiffies;
1526 	if (bitmap->allclean) {
1527 		mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
1528 		goto done;
1529 	}
1530 	bitmap->allclean = 1;
1531 
1532 	mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work");
1533 
1534 	/* Any file-page which is PENDING now needs to be written.
1535 	 * So set NEEDWRITE now, then after we make any last-minute changes
1536 	 * we will write it.
1537 	 */
1538 	for (j = 0; j < bitmap->storage.file_pages; j++)
1539 		if (test_and_clear_page_attr(bitmap, j,
1540 					     BITMAP_PAGE_PENDING))
1541 			set_page_attr(bitmap, j,
1542 				      BITMAP_PAGE_NEEDWRITE);
1543 
1544 	if (bitmap->need_sync &&
1545 	    mddev->bitmap_info.external == 0) {
1546 		/* Arrange for superblock update as well as
1547 		 * other changes */
1548 		bitmap_super_t *sb;
1549 		bitmap->need_sync = 0;
1550 		if (bitmap->storage.filemap) {
1551 			sb = kmap_local_page(bitmap->storage.sb_page);
1552 			sb->events_cleared =
1553 				cpu_to_le64(bitmap->events_cleared);
1554 			kunmap_local(sb);
1555 			set_page_attr(bitmap, 0,
1556 				      BITMAP_PAGE_NEEDWRITE);
1557 		}
1558 	}
1559 	/* Now look at the bitmap counters and if any are '2' or '1',
1560 	 * decrement and handle accordingly.
1561 	 */
1562 	counts = &bitmap->counts;
1563 	spin_lock_irq(&counts->lock);
1564 	nextpage = 0;
1565 	for (j = 0; j < counts->chunks; j++) {
1566 		bitmap_counter_t *bmc;
1567 		sector_t  block = (sector_t)j << counts->chunkshift;
1568 
1569 		if (j == nextpage) {
1570 			nextpage += PAGE_COUNTER_RATIO;
1571 			if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1572 				j |= PAGE_COUNTER_MASK;
1573 				continue;
1574 			}
1575 			counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1576 		}
1577 
1578 		bmc = md_bitmap_get_counter(counts, block, &blocks, 0);
1579 		if (!bmc) {
1580 			j |= PAGE_COUNTER_MASK;
1581 			continue;
1582 		}
1583 		if (*bmc == 1 && !bitmap->need_sync) {
1584 			/* We can clear the bit */
1585 			*bmc = 0;
1586 			md_bitmap_count_page(counts, block, -1);
1587 			md_bitmap_file_clear_bit(bitmap, block);
1588 		} else if (*bmc && *bmc <= 2) {
1589 			*bmc = 1;
1590 			md_bitmap_set_pending(counts, block);
1591 			bitmap->allclean = 0;
1592 		}
1593 	}
1594 	spin_unlock_irq(&counts->lock);
1595 
1596 	md_bitmap_wait_writes(bitmap);
1597 	/* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
1598 	 * DIRTY pages need to be written by bitmap_unplug so it can wait
1599 	 * for them.
1600 	 * If we find any DIRTY page we stop there and let bitmap_unplug
1601 	 * handle all the rest.  This is important in the case where
1602 	 * the first blocking holds the superblock and it has been updated.
1603 	 * We mustn't write any other blocks before the superblock.
1604 	 */
1605 	for (j = 0;
1606 	     j < bitmap->storage.file_pages
1607 		     && !test_bit(BITMAP_STALE, &bitmap->flags);
1608 	     j++) {
1609 		if (test_page_attr(bitmap, j,
1610 				   BITMAP_PAGE_DIRTY))
1611 			/* bitmap_unplug will handle the rest */
1612 			break;
1613 		if (bitmap->storage.filemap &&
1614 		    test_and_clear_page_attr(bitmap, j,
1615 					     BITMAP_PAGE_NEEDWRITE))
1616 			filemap_write_page(bitmap, j, false);
1617 	}
1618 
1619  done:
1620 	if (bitmap->allclean == 0)
1621 		mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
1622 	mutex_unlock(&mddev->bitmap_info.mutex);
1623 }
1624 
md_bitmap_get_counter(struct bitmap_counts * bitmap,sector_t offset,sector_t * blocks,int create)1625 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
1626 					       sector_t offset, sector_t *blocks,
1627 					       int create)
1628 __releases(bitmap->lock)
1629 __acquires(bitmap->lock)
1630 {
1631 	/* If 'create', we might release the lock and reclaim it.
1632 	 * The lock must have been taken with interrupts enabled.
1633 	 * If !create, we don't release the lock.
1634 	 */
1635 	sector_t chunk = offset >> bitmap->chunkshift;
1636 	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1637 	unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1638 	sector_t csize = ((sector_t)1) << bitmap->chunkshift;
1639 	int err;
1640 
1641 	if (page >= bitmap->pages) {
1642 		/*
1643 		 * This can happen if bitmap_start_sync goes beyond
1644 		 * End-of-device while looking for a whole page or
1645 		 * user set a huge number to sysfs bitmap_set_bits.
1646 		 */
1647 		*blocks = csize - (offset & (csize - 1));
1648 		return NULL;
1649 	}
1650 	err = md_bitmap_checkpage(bitmap, page, create, 0);
1651 
1652 	if (bitmap->bp[page].hijacked ||
1653 	    bitmap->bp[page].map == NULL)
1654 		csize = ((sector_t)1) << (bitmap->chunkshift +
1655 					  PAGE_COUNTER_SHIFT);
1656 
1657 	*blocks = csize - (offset & (csize - 1));
1658 
1659 	if (err < 0)
1660 		return NULL;
1661 
1662 	/* now locked ... */
1663 
1664 	if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1665 		/* should we use the first or second counter field
1666 		 * of the hijacked pointer? */
1667 		int hi = (pageoff > PAGE_COUNTER_MASK);
1668 		return  &((bitmap_counter_t *)
1669 			  &bitmap->bp[page].map)[hi];
1670 	} else /* page is allocated */
1671 		return (bitmap_counter_t *)
1672 			&(bitmap->bp[page].map[pageoff]);
1673 }
1674 
bitmap_startwrite(struct mddev * mddev,sector_t offset,unsigned long sectors)1675 static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
1676 			     unsigned long sectors)
1677 {
1678 	struct bitmap *bitmap = mddev->bitmap;
1679 
1680 	if (!bitmap)
1681 		return 0;
1682 
1683 	while (sectors) {
1684 		sector_t blocks;
1685 		bitmap_counter_t *bmc;
1686 
1687 		spin_lock_irq(&bitmap->counts.lock);
1688 		bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1689 		if (!bmc) {
1690 			spin_unlock_irq(&bitmap->counts.lock);
1691 			return 0;
1692 		}
1693 
1694 		if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1695 			DEFINE_WAIT(__wait);
1696 			/* note that it is safe to do the prepare_to_wait
1697 			 * after the test as long as we do it before dropping
1698 			 * the spinlock.
1699 			 */
1700 			prepare_to_wait(&bitmap->overflow_wait, &__wait,
1701 					TASK_UNINTERRUPTIBLE);
1702 			spin_unlock_irq(&bitmap->counts.lock);
1703 			schedule();
1704 			finish_wait(&bitmap->overflow_wait, &__wait);
1705 			continue;
1706 		}
1707 
1708 		switch (*bmc) {
1709 		case 0:
1710 			md_bitmap_file_set_bit(bitmap, offset);
1711 			md_bitmap_count_page(&bitmap->counts, offset, 1);
1712 			fallthrough;
1713 		case 1:
1714 			*bmc = 2;
1715 		}
1716 
1717 		(*bmc)++;
1718 
1719 		spin_unlock_irq(&bitmap->counts.lock);
1720 
1721 		offset += blocks;
1722 		if (sectors > blocks)
1723 			sectors -= blocks;
1724 		else
1725 			sectors = 0;
1726 	}
1727 	return 0;
1728 }
1729 
bitmap_endwrite(struct mddev * mddev,sector_t offset,unsigned long sectors)1730 static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
1731 			    unsigned long sectors)
1732 {
1733 	struct bitmap *bitmap = mddev->bitmap;
1734 
1735 	if (!bitmap)
1736 		return;
1737 
1738 	while (sectors) {
1739 		sector_t blocks;
1740 		unsigned long flags;
1741 		bitmap_counter_t *bmc;
1742 
1743 		spin_lock_irqsave(&bitmap->counts.lock, flags);
1744 		bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1745 		if (!bmc) {
1746 			spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1747 			return;
1748 		}
1749 
1750 		if (!bitmap->mddev->degraded) {
1751 			if (bitmap->events_cleared < bitmap->mddev->events) {
1752 				bitmap->events_cleared = bitmap->mddev->events;
1753 				bitmap->need_sync = 1;
1754 				sysfs_notify_dirent_safe(
1755 						bitmap->sysfs_can_clear);
1756 			}
1757 		} else if (!NEEDED(*bmc)) {
1758 			*bmc |= NEEDED_MASK;
1759 		}
1760 
1761 		if (COUNTER(*bmc) == COUNTER_MAX)
1762 			wake_up(&bitmap->overflow_wait);
1763 
1764 		(*bmc)--;
1765 		if (*bmc <= 2) {
1766 			md_bitmap_set_pending(&bitmap->counts, offset);
1767 			bitmap->allclean = 0;
1768 		}
1769 		spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1770 		offset += blocks;
1771 		if (sectors > blocks)
1772 			sectors -= blocks;
1773 		else
1774 			sectors = 0;
1775 	}
1776 }
1777 
__bitmap_start_sync(struct bitmap * bitmap,sector_t offset,sector_t * blocks,bool degraded)1778 static bool __bitmap_start_sync(struct bitmap *bitmap, sector_t offset,
1779 				sector_t *blocks, bool degraded)
1780 {
1781 	bitmap_counter_t *bmc;
1782 	bool rv;
1783 
1784 	if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1785 		*blocks = 1024;
1786 		return true; /* always resync if no bitmap */
1787 	}
1788 	spin_lock_irq(&bitmap->counts.lock);
1789 
1790 	rv = false;
1791 	bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1792 	if (bmc) {
1793 		/* locked */
1794 		if (RESYNC(*bmc)) {
1795 			rv = true;
1796 		} else if (NEEDED(*bmc)) {
1797 			rv = true;
1798 			if (!degraded) { /* don't set/clear bits if degraded */
1799 				*bmc |= RESYNC_MASK;
1800 				*bmc &= ~NEEDED_MASK;
1801 			}
1802 		}
1803 	}
1804 	spin_unlock_irq(&bitmap->counts.lock);
1805 
1806 	return rv;
1807 }
1808 
bitmap_start_sync(struct mddev * mddev,sector_t offset,sector_t * blocks,bool degraded)1809 static bool bitmap_start_sync(struct mddev *mddev, sector_t offset,
1810 			      sector_t *blocks, bool degraded)
1811 {
1812 	/* bitmap_start_sync must always report on multiples of whole
1813 	 * pages, otherwise resync (which is very PAGE_SIZE based) will
1814 	 * get confused.
1815 	 * So call __bitmap_start_sync repeatedly (if needed) until
1816 	 * At least PAGE_SIZE>>9 blocks are covered.
1817 	 * Return the 'or' of the result.
1818 	 */
1819 	bool rv = false;
1820 	sector_t blocks1;
1821 
1822 	*blocks = 0;
1823 	while (*blocks < (PAGE_SIZE>>9)) {
1824 		rv |= __bitmap_start_sync(mddev->bitmap, offset,
1825 					  &blocks1, degraded);
1826 		offset += blocks1;
1827 		*blocks += blocks1;
1828 	}
1829 
1830 	return rv;
1831 }
1832 
__bitmap_end_sync(struct bitmap * bitmap,sector_t offset,sector_t * blocks,bool aborted)1833 static void __bitmap_end_sync(struct bitmap *bitmap, sector_t offset,
1834 			      sector_t *blocks, bool aborted)
1835 {
1836 	bitmap_counter_t *bmc;
1837 	unsigned long flags;
1838 
1839 	if (bitmap == NULL) {
1840 		*blocks = 1024;
1841 		return;
1842 	}
1843 	spin_lock_irqsave(&bitmap->counts.lock, flags);
1844 	bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1845 	if (bmc == NULL)
1846 		goto unlock;
1847 	/* locked */
1848 	if (RESYNC(*bmc)) {
1849 		*bmc &= ~RESYNC_MASK;
1850 
1851 		if (!NEEDED(*bmc) && aborted)
1852 			*bmc |= NEEDED_MASK;
1853 		else {
1854 			if (*bmc <= 2) {
1855 				md_bitmap_set_pending(&bitmap->counts, offset);
1856 				bitmap->allclean = 0;
1857 			}
1858 		}
1859 	}
1860  unlock:
1861 	spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1862 }
1863 
bitmap_end_sync(struct mddev * mddev,sector_t offset,sector_t * blocks)1864 static void bitmap_end_sync(struct mddev *mddev, sector_t offset,
1865 			    sector_t *blocks)
1866 {
1867 	__bitmap_end_sync(mddev->bitmap, offset, blocks, true);
1868 }
1869 
bitmap_close_sync(struct mddev * mddev)1870 static void bitmap_close_sync(struct mddev *mddev)
1871 {
1872 	/* Sync has finished, and any bitmap chunks that weren't synced
1873 	 * properly have been aborted.  It remains to us to clear the
1874 	 * RESYNC bit wherever it is still on
1875 	 */
1876 	sector_t sector = 0;
1877 	sector_t blocks;
1878 	struct bitmap *bitmap = mddev->bitmap;
1879 
1880 	if (!bitmap)
1881 		return;
1882 
1883 	while (sector < bitmap->mddev->resync_max_sectors) {
1884 		__bitmap_end_sync(bitmap, sector, &blocks, false);
1885 		sector += blocks;
1886 	}
1887 }
1888 
bitmap_cond_end_sync(struct mddev * mddev,sector_t sector,bool force)1889 static void bitmap_cond_end_sync(struct mddev *mddev, sector_t sector,
1890 				 bool force)
1891 {
1892 	sector_t s = 0;
1893 	sector_t blocks;
1894 	struct bitmap *bitmap = mddev->bitmap;
1895 
1896 	if (!bitmap)
1897 		return;
1898 	if (sector == 0) {
1899 		bitmap->last_end_sync = jiffies;
1900 		return;
1901 	}
1902 	if (!force && time_before(jiffies, (bitmap->last_end_sync
1903 				  + bitmap->mddev->bitmap_info.daemon_sleep)))
1904 		return;
1905 	wait_event(bitmap->mddev->recovery_wait,
1906 		   atomic_read(&bitmap->mddev->recovery_active) == 0);
1907 
1908 	bitmap->mddev->curr_resync_completed = sector;
1909 	set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags);
1910 	sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
1911 	s = 0;
1912 	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1913 		__bitmap_end_sync(bitmap, s, &blocks, false);
1914 		s += blocks;
1915 	}
1916 	bitmap->last_end_sync = jiffies;
1917 	sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed);
1918 }
1919 
bitmap_sync_with_cluster(struct mddev * mddev,sector_t old_lo,sector_t old_hi,sector_t new_lo,sector_t new_hi)1920 static void bitmap_sync_with_cluster(struct mddev *mddev,
1921 				     sector_t old_lo, sector_t old_hi,
1922 				     sector_t new_lo, sector_t new_hi)
1923 {
1924 	struct bitmap *bitmap = mddev->bitmap;
1925 	sector_t sector, blocks = 0;
1926 
1927 	for (sector = old_lo; sector < new_lo; ) {
1928 		__bitmap_end_sync(bitmap, sector, &blocks, false);
1929 		sector += blocks;
1930 	}
1931 	WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n");
1932 
1933 	for (sector = old_hi; sector < new_hi; ) {
1934 		bitmap_start_sync(mddev, sector, &blocks, false);
1935 		sector += blocks;
1936 	}
1937 	WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n");
1938 }
1939 
md_bitmap_set_memory_bits(struct bitmap * bitmap,sector_t offset,int needed)1940 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1941 {
1942 	/* For each chunk covered by any of these sectors, set the
1943 	 * counter to 2 and possibly set resync_needed.  They should all
1944 	 * be 0 at this point
1945 	 */
1946 
1947 	sector_t secs;
1948 	bitmap_counter_t *bmc;
1949 	spin_lock_irq(&bitmap->counts.lock);
1950 	bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1951 	if (!bmc) {
1952 		spin_unlock_irq(&bitmap->counts.lock);
1953 		return;
1954 	}
1955 	if (!*bmc) {
1956 		*bmc = 2;
1957 		md_bitmap_count_page(&bitmap->counts, offset, 1);
1958 		md_bitmap_set_pending(&bitmap->counts, offset);
1959 		bitmap->allclean = 0;
1960 	}
1961 	if (needed)
1962 		*bmc |= NEEDED_MASK;
1963 	spin_unlock_irq(&bitmap->counts.lock);
1964 }
1965 
1966 /* dirty the memory and file bits for bitmap chunks "s" to "e" */
bitmap_dirty_bits(struct mddev * mddev,unsigned long s,unsigned long e)1967 static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s,
1968 			      unsigned long e)
1969 {
1970 	unsigned long chunk;
1971 	struct bitmap *bitmap = mddev->bitmap;
1972 
1973 	if (!bitmap)
1974 		return;
1975 
1976 	for (chunk = s; chunk <= e; chunk++) {
1977 		sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1978 
1979 		md_bitmap_set_memory_bits(bitmap, sec, 1);
1980 		md_bitmap_file_set_bit(bitmap, sec);
1981 		if (sec < bitmap->mddev->recovery_cp)
1982 			/* We are asserting that the array is dirty,
1983 			 * so move the recovery_cp address back so
1984 			 * that it is obvious that it is dirty
1985 			 */
1986 			bitmap->mddev->recovery_cp = sec;
1987 	}
1988 }
1989 
bitmap_flush(struct mddev * mddev)1990 static void bitmap_flush(struct mddev *mddev)
1991 {
1992 	struct bitmap *bitmap = mddev->bitmap;
1993 	long sleep;
1994 
1995 	if (!bitmap) /* there was no bitmap */
1996 		return;
1997 
1998 	/* run the daemon_work three time to ensure everything is flushed
1999 	 * that can be
2000 	 */
2001 	sleep = mddev->bitmap_info.daemon_sleep * 2;
2002 	bitmap->daemon_lastrun -= sleep;
2003 	bitmap_daemon_work(mddev);
2004 	bitmap->daemon_lastrun -= sleep;
2005 	bitmap_daemon_work(mddev);
2006 	bitmap->daemon_lastrun -= sleep;
2007 	bitmap_daemon_work(mddev);
2008 	if (mddev->bitmap_info.external)
2009 		md_super_wait(mddev);
2010 	bitmap_update_sb(bitmap);
2011 }
2012 
md_bitmap_free(void * data)2013 static void md_bitmap_free(void *data)
2014 {
2015 	unsigned long k, pages;
2016 	struct bitmap_page *bp;
2017 	struct bitmap *bitmap = data;
2018 
2019 	if (!bitmap) /* there was no bitmap */
2020 		return;
2021 
2022 	if (bitmap->sysfs_can_clear)
2023 		sysfs_put(bitmap->sysfs_can_clear);
2024 
2025 	if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
2026 		bitmap->cluster_slot == bitmap->mddev->cluster_ops->slot_number(bitmap->mddev))
2027 		md_cluster_stop(bitmap->mddev);
2028 
2029 	/* Shouldn't be needed - but just in case.... */
2030 	wait_event(bitmap->write_wait,
2031 		   atomic_read(&bitmap->pending_writes) == 0);
2032 
2033 	/* release the bitmap file  */
2034 	md_bitmap_file_unmap(&bitmap->storage);
2035 
2036 	bp = bitmap->counts.bp;
2037 	pages = bitmap->counts.pages;
2038 
2039 	/* free all allocated memory */
2040 
2041 	if (bp) /* deallocate the page memory */
2042 		for (k = 0; k < pages; k++)
2043 			if (bp[k].map && !bp[k].hijacked)
2044 				kfree(bp[k].map);
2045 	kfree(bp);
2046 	kfree(bitmap);
2047 }
2048 
bitmap_start_behind_write(struct mddev * mddev)2049 static void bitmap_start_behind_write(struct mddev *mddev)
2050 {
2051 	struct bitmap *bitmap = mddev->bitmap;
2052 	int bw;
2053 
2054 	if (!bitmap)
2055 		return;
2056 
2057 	atomic_inc(&bitmap->behind_writes);
2058 	bw = atomic_read(&bitmap->behind_writes);
2059 	if (bw > bitmap->behind_writes_used)
2060 		bitmap->behind_writes_used = bw;
2061 
2062 	pr_debug("inc write-behind count %d/%lu\n",
2063 		 bw, bitmap->mddev->bitmap_info.max_write_behind);
2064 }
2065 
bitmap_end_behind_write(struct mddev * mddev)2066 static void bitmap_end_behind_write(struct mddev *mddev)
2067 {
2068 	struct bitmap *bitmap = mddev->bitmap;
2069 
2070 	if (!bitmap)
2071 		return;
2072 
2073 	if (atomic_dec_and_test(&bitmap->behind_writes))
2074 		wake_up(&bitmap->behind_wait);
2075 	pr_debug("dec write-behind count %d/%lu\n",
2076 		 atomic_read(&bitmap->behind_writes),
2077 		 bitmap->mddev->bitmap_info.max_write_behind);
2078 }
2079 
bitmap_wait_behind_writes(struct mddev * mddev)2080 static void bitmap_wait_behind_writes(struct mddev *mddev)
2081 {
2082 	struct bitmap *bitmap = mddev->bitmap;
2083 
2084 	/* wait for behind writes to complete */
2085 	if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
2086 		pr_debug("md:%s: behind writes in progress - waiting to stop.\n",
2087 			 mdname(mddev));
2088 		/* need to kick something here to make sure I/O goes? */
2089 		wait_event(bitmap->behind_wait,
2090 			   atomic_read(&bitmap->behind_writes) == 0);
2091 	}
2092 }
2093 
bitmap_destroy(struct mddev * mddev)2094 static void bitmap_destroy(struct mddev *mddev)
2095 {
2096 	struct bitmap *bitmap = mddev->bitmap;
2097 
2098 	if (!bitmap) /* there was no bitmap */
2099 		return;
2100 
2101 	bitmap_wait_behind_writes(mddev);
2102 	if (!mddev->serialize_policy)
2103 		mddev_destroy_serial_pool(mddev, NULL);
2104 
2105 	mutex_lock(&mddev->bitmap_info.mutex);
2106 	spin_lock(&mddev->lock);
2107 	mddev->bitmap = NULL; /* disconnect from the md device */
2108 	spin_unlock(&mddev->lock);
2109 	mutex_unlock(&mddev->bitmap_info.mutex);
2110 	mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
2111 
2112 	md_bitmap_free(bitmap);
2113 }
2114 
2115 /*
2116  * initialize the bitmap structure
2117  * if this returns an error, bitmap_destroy must be called to do clean up
2118  * once mddev->bitmap is set
2119  */
__bitmap_create(struct mddev * mddev,int slot)2120 static struct bitmap *__bitmap_create(struct mddev *mddev, int slot)
2121 {
2122 	struct bitmap *bitmap;
2123 	sector_t blocks = mddev->resync_max_sectors;
2124 	struct file *file = mddev->bitmap_info.file;
2125 	int err;
2126 	struct kernfs_node *bm = NULL;
2127 
2128 	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
2129 
2130 	BUG_ON(file && mddev->bitmap_info.offset);
2131 
2132 	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
2133 		pr_notice("md/raid:%s: array with journal cannot have bitmap\n",
2134 			  mdname(mddev));
2135 		return ERR_PTR(-EBUSY);
2136 	}
2137 
2138 	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
2139 	if (!bitmap)
2140 		return ERR_PTR(-ENOMEM);
2141 
2142 	spin_lock_init(&bitmap->counts.lock);
2143 	atomic_set(&bitmap->pending_writes, 0);
2144 	init_waitqueue_head(&bitmap->write_wait);
2145 	init_waitqueue_head(&bitmap->overflow_wait);
2146 	init_waitqueue_head(&bitmap->behind_wait);
2147 
2148 	bitmap->mddev = mddev;
2149 	bitmap->cluster_slot = slot;
2150 
2151 	if (mddev->kobj.sd)
2152 		bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
2153 	if (bm) {
2154 		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
2155 		sysfs_put(bm);
2156 	} else
2157 		bitmap->sysfs_can_clear = NULL;
2158 
2159 	bitmap->storage.file = file;
2160 	if (file) {
2161 		get_file(file);
2162 		/* As future accesses to this file will use bmap,
2163 		 * and bypass the page cache, we must sync the file
2164 		 * first.
2165 		 */
2166 		vfs_fsync(file, 1);
2167 	}
2168 	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
2169 	if (!mddev->bitmap_info.external) {
2170 		/*
2171 		 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
2172 		 * instructing us to create a new on-disk bitmap instance.
2173 		 */
2174 		if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
2175 			err = md_bitmap_new_disk_sb(bitmap);
2176 		else
2177 			err = md_bitmap_read_sb(bitmap);
2178 	} else {
2179 		err = 0;
2180 		if (mddev->bitmap_info.chunksize == 0 ||
2181 		    mddev->bitmap_info.daemon_sleep == 0)
2182 			/* chunksize and time_base need to be
2183 			 * set first. */
2184 			err = -EINVAL;
2185 	}
2186 	if (err)
2187 		goto error;
2188 
2189 	bitmap->daemon_lastrun = jiffies;
2190 	err = __bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize,
2191 			      true);
2192 	if (err)
2193 		goto error;
2194 
2195 	pr_debug("created bitmap (%lu pages) for device %s\n",
2196 		 bitmap->counts.pages, bmname(bitmap));
2197 
2198 	err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
2199 	if (err)
2200 		goto error;
2201 
2202 	return bitmap;
2203  error:
2204 	md_bitmap_free(bitmap);
2205 	return ERR_PTR(err);
2206 }
2207 
bitmap_create(struct mddev * mddev,int slot)2208 static int bitmap_create(struct mddev *mddev, int slot)
2209 {
2210 	struct bitmap *bitmap = __bitmap_create(mddev, slot);
2211 
2212 	if (IS_ERR(bitmap))
2213 		return PTR_ERR(bitmap);
2214 
2215 	mddev->bitmap = bitmap;
2216 	return 0;
2217 }
2218 
bitmap_load(struct mddev * mddev)2219 static int bitmap_load(struct mddev *mddev)
2220 {
2221 	int err = 0;
2222 	sector_t start = 0;
2223 	sector_t sector = 0;
2224 	struct bitmap *bitmap = mddev->bitmap;
2225 	struct md_rdev *rdev;
2226 
2227 	if (!bitmap)
2228 		goto out;
2229 
2230 	rdev_for_each(rdev, mddev)
2231 		mddev_create_serial_pool(mddev, rdev);
2232 
2233 	if (mddev_is_clustered(mddev))
2234 		mddev->cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
2235 
2236 	/* Clear out old bitmap info first:  Either there is none, or we
2237 	 * are resuming after someone else has possibly changed things,
2238 	 * so we should forget old cached info.
2239 	 * All chunks should be clean, but some might need_sync.
2240 	 */
2241 	while (sector < mddev->resync_max_sectors) {
2242 		sector_t blocks;
2243 		bitmap_start_sync(mddev, sector, &blocks, false);
2244 		sector += blocks;
2245 	}
2246 	bitmap_close_sync(mddev);
2247 
2248 	if (mddev->degraded == 0
2249 	    || bitmap->events_cleared == mddev->events)
2250 		/* no need to keep dirty bits to optimise a
2251 		 * re-add of a missing device */
2252 		start = mddev->recovery_cp;
2253 
2254 	mutex_lock(&mddev->bitmap_info.mutex);
2255 	err = md_bitmap_init_from_disk(bitmap, start);
2256 	mutex_unlock(&mddev->bitmap_info.mutex);
2257 
2258 	if (err)
2259 		goto out;
2260 	clear_bit(BITMAP_STALE, &bitmap->flags);
2261 
2262 	/* Kick recovery in case any bits were set */
2263 	set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
2264 
2265 	mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
2266 	md_wakeup_thread(mddev->thread);
2267 
2268 	bitmap_update_sb(bitmap);
2269 
2270 	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
2271 		err = -EIO;
2272 out:
2273 	return err;
2274 }
2275 
2276 /* caller need to free returned bitmap with md_bitmap_free() */
bitmap_get_from_slot(struct mddev * mddev,int slot)2277 static void *bitmap_get_from_slot(struct mddev *mddev, int slot)
2278 {
2279 	int rv = 0;
2280 	struct bitmap *bitmap;
2281 
2282 	bitmap = __bitmap_create(mddev, slot);
2283 	if (IS_ERR(bitmap)) {
2284 		rv = PTR_ERR(bitmap);
2285 		return ERR_PTR(rv);
2286 	}
2287 
2288 	rv = md_bitmap_init_from_disk(bitmap, 0);
2289 	if (rv) {
2290 		md_bitmap_free(bitmap);
2291 		return ERR_PTR(rv);
2292 	}
2293 
2294 	return bitmap;
2295 }
2296 
2297 /* Loads the bitmap associated with slot and copies the resync information
2298  * to our bitmap
2299  */
bitmap_copy_from_slot(struct mddev * mddev,int slot,sector_t * low,sector_t * high,bool clear_bits)2300 static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low,
2301 				 sector_t *high, bool clear_bits)
2302 {
2303 	int rv = 0, i, j;
2304 	sector_t block, lo = 0, hi = 0;
2305 	struct bitmap_counts *counts;
2306 	struct bitmap *bitmap;
2307 
2308 	bitmap = bitmap_get_from_slot(mddev, slot);
2309 	if (IS_ERR(bitmap)) {
2310 		pr_err("%s can't get bitmap from slot %d\n", __func__, slot);
2311 		return -1;
2312 	}
2313 
2314 	counts = &bitmap->counts;
2315 	for (j = 0; j < counts->chunks; j++) {
2316 		block = (sector_t)j << counts->chunkshift;
2317 		if (md_bitmap_file_test_bit(bitmap, block)) {
2318 			if (!lo)
2319 				lo = block;
2320 			hi = block;
2321 			md_bitmap_file_clear_bit(bitmap, block);
2322 			md_bitmap_set_memory_bits(mddev->bitmap, block, 1);
2323 			md_bitmap_file_set_bit(mddev->bitmap, block);
2324 		}
2325 	}
2326 
2327 	if (clear_bits) {
2328 		bitmap_update_sb(bitmap);
2329 		/* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs
2330 		 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */
2331 		for (i = 0; i < bitmap->storage.file_pages; i++)
2332 			if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING))
2333 				set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
2334 		__bitmap_unplug(bitmap);
2335 	}
2336 	__bitmap_unplug(mddev->bitmap);
2337 	*low = lo;
2338 	*high = hi;
2339 	md_bitmap_free(bitmap);
2340 
2341 	return rv;
2342 }
2343 
bitmap_set_pages(void * data,unsigned long pages)2344 static void bitmap_set_pages(void *data, unsigned long pages)
2345 {
2346 	struct bitmap *bitmap = data;
2347 
2348 	bitmap->counts.pages = pages;
2349 }
2350 
bitmap_get_stats(void * data,struct md_bitmap_stats * stats)2351 static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats)
2352 {
2353 	struct bitmap_storage *storage;
2354 	struct bitmap_counts *counts;
2355 	struct bitmap *bitmap = data;
2356 	bitmap_super_t *sb;
2357 
2358 	if (!bitmap)
2359 		return -ENOENT;
2360 	if (bitmap->mddev->bitmap_info.external)
2361 		return -ENOENT;
2362 	if (!bitmap->storage.sb_page) /* no superblock */
2363 		return -EINVAL;
2364 	sb = kmap_local_page(bitmap->storage.sb_page);
2365 	stats->sync_size = le64_to_cpu(sb->sync_size);
2366 	kunmap_local(sb);
2367 
2368 	counts = &bitmap->counts;
2369 	stats->missing_pages = counts->missing_pages;
2370 	stats->pages = counts->pages;
2371 
2372 	storage = &bitmap->storage;
2373 	stats->file_pages = storage->file_pages;
2374 	stats->file = storage->file;
2375 
2376 	stats->behind_writes = atomic_read(&bitmap->behind_writes);
2377 	stats->behind_wait = wq_has_sleeper(&bitmap->behind_wait);
2378 	stats->events_cleared = bitmap->events_cleared;
2379 	return 0;
2380 }
2381 
__bitmap_resize(struct bitmap * bitmap,sector_t blocks,int chunksize,bool init)2382 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks,
2383 			   int chunksize, bool init)
2384 {
2385 	/* If chunk_size is 0, choose an appropriate chunk size.
2386 	 * Then possibly allocate new storage space.
2387 	 * Then quiesce, copy bits, replace bitmap, and re-start
2388 	 *
2389 	 * This function is called both to set up the initial bitmap
2390 	 * and to resize the bitmap while the array is active.
2391 	 * If this happens as a result of the array being resized,
2392 	 * chunksize will be zero, and we need to choose a suitable
2393 	 * chunksize, otherwise we use what we are given.
2394 	 */
2395 	struct bitmap_storage store;
2396 	struct bitmap_counts old_counts;
2397 	unsigned long chunks;
2398 	sector_t block;
2399 	sector_t old_blocks, new_blocks;
2400 	int chunkshift;
2401 	int ret = 0;
2402 	long pages;
2403 	struct bitmap_page *new_bp;
2404 
2405 	if (bitmap->storage.file && !init) {
2406 		pr_info("md: cannot resize file-based bitmap\n");
2407 		return -EINVAL;
2408 	}
2409 
2410 	if (chunksize == 0) {
2411 		/* If there is enough space, leave the chunk size unchanged,
2412 		 * else increase by factor of two until there is enough space.
2413 		 */
2414 		long bytes;
2415 		long space = bitmap->mddev->bitmap_info.space;
2416 
2417 		if (space == 0) {
2418 			/* We don't know how much space there is, so limit
2419 			 * to current size - in sectors.
2420 			 */
2421 			bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
2422 			if (!bitmap->mddev->bitmap_info.external)
2423 				bytes += sizeof(bitmap_super_t);
2424 			space = DIV_ROUND_UP(bytes, 512);
2425 			bitmap->mddev->bitmap_info.space = space;
2426 		}
2427 		chunkshift = bitmap->counts.chunkshift;
2428 		chunkshift--;
2429 		do {
2430 			/* 'chunkshift' is shift from block size to chunk size */
2431 			chunkshift++;
2432 			chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
2433 			bytes = DIV_ROUND_UP(chunks, 8);
2434 			if (!bitmap->mddev->bitmap_info.external)
2435 				bytes += sizeof(bitmap_super_t);
2436 		} while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) <
2437 			(BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1));
2438 	} else
2439 		chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
2440 
2441 	chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
2442 	memset(&store, 0, sizeof(store));
2443 	if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
2444 		ret = md_bitmap_storage_alloc(&store, chunks,
2445 					      !bitmap->mddev->bitmap_info.external,
2446 					      mddev_is_clustered(bitmap->mddev)
2447 					      ? bitmap->cluster_slot : 0);
2448 	if (ret) {
2449 		md_bitmap_file_unmap(&store);
2450 		goto err;
2451 	}
2452 
2453 	pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
2454 
2455 	new_bp = kcalloc(pages, sizeof(*new_bp), GFP_KERNEL);
2456 	ret = -ENOMEM;
2457 	if (!new_bp) {
2458 		md_bitmap_file_unmap(&store);
2459 		goto err;
2460 	}
2461 
2462 	if (!init)
2463 		bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
2464 
2465 	store.file = bitmap->storage.file;
2466 	bitmap->storage.file = NULL;
2467 
2468 	if (store.sb_page && bitmap->storage.sb_page)
2469 		memcpy(page_address(store.sb_page),
2470 		       page_address(bitmap->storage.sb_page),
2471 		       sizeof(bitmap_super_t));
2472 	spin_lock_irq(&bitmap->counts.lock);
2473 	md_bitmap_file_unmap(&bitmap->storage);
2474 	bitmap->storage = store;
2475 
2476 	old_counts = bitmap->counts;
2477 	bitmap->counts.bp = new_bp;
2478 	bitmap->counts.pages = pages;
2479 	bitmap->counts.missing_pages = pages;
2480 	bitmap->counts.chunkshift = chunkshift;
2481 	bitmap->counts.chunks = chunks;
2482 	bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift +
2483 						     BITMAP_BLOCK_SHIFT);
2484 
2485 	blocks = min(old_counts.chunks << old_counts.chunkshift,
2486 		     chunks << chunkshift);
2487 
2488 	/* For cluster raid, need to pre-allocate bitmap */
2489 	if (mddev_is_clustered(bitmap->mddev)) {
2490 		unsigned long page;
2491 		for (page = 0; page < pages; page++) {
2492 			ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1);
2493 			if (ret) {
2494 				unsigned long k;
2495 
2496 				/* deallocate the page memory */
2497 				for (k = 0; k < page; k++) {
2498 					kfree(new_bp[k].map);
2499 				}
2500 				kfree(new_bp);
2501 
2502 				/* restore some fields from old_counts */
2503 				bitmap->counts.bp = old_counts.bp;
2504 				bitmap->counts.pages = old_counts.pages;
2505 				bitmap->counts.missing_pages = old_counts.pages;
2506 				bitmap->counts.chunkshift = old_counts.chunkshift;
2507 				bitmap->counts.chunks = old_counts.chunks;
2508 				bitmap->mddev->bitmap_info.chunksize =
2509 					1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT);
2510 				blocks = old_counts.chunks << old_counts.chunkshift;
2511 				pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n");
2512 				break;
2513 			} else
2514 				bitmap->counts.bp[page].count += 1;
2515 		}
2516 	}
2517 
2518 	for (block = 0; block < blocks; ) {
2519 		bitmap_counter_t *bmc_old, *bmc_new;
2520 		int set;
2521 
2522 		bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0);
2523 		set = bmc_old && NEEDED(*bmc_old);
2524 
2525 		if (set) {
2526 			bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
2527 			if (bmc_new) {
2528 				if (*bmc_new == 0) {
2529 					/* need to set on-disk bits too. */
2530 					sector_t end = block + new_blocks;
2531 					sector_t start = block >> chunkshift;
2532 
2533 					start <<= chunkshift;
2534 					while (start < end) {
2535 						md_bitmap_file_set_bit(bitmap, block);
2536 						start += 1 << chunkshift;
2537 					}
2538 					*bmc_new = 2;
2539 					md_bitmap_count_page(&bitmap->counts, block, 1);
2540 					md_bitmap_set_pending(&bitmap->counts, block);
2541 				}
2542 				*bmc_new |= NEEDED_MASK;
2543 			}
2544 			if (new_blocks < old_blocks)
2545 				old_blocks = new_blocks;
2546 		}
2547 		block += old_blocks;
2548 	}
2549 
2550 	if (bitmap->counts.bp != old_counts.bp) {
2551 		unsigned long k;
2552 		for (k = 0; k < old_counts.pages; k++)
2553 			if (!old_counts.bp[k].hijacked)
2554 				kfree(old_counts.bp[k].map);
2555 		kfree(old_counts.bp);
2556 	}
2557 
2558 	if (!init) {
2559 		int i;
2560 		while (block < (chunks << chunkshift)) {
2561 			bitmap_counter_t *bmc;
2562 			bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
2563 			if (bmc) {
2564 				/* new space.  It needs to be resynced, so
2565 				 * we set NEEDED_MASK.
2566 				 */
2567 				if (*bmc == 0) {
2568 					*bmc = NEEDED_MASK | 2;
2569 					md_bitmap_count_page(&bitmap->counts, block, 1);
2570 					md_bitmap_set_pending(&bitmap->counts, block);
2571 				}
2572 			}
2573 			block += new_blocks;
2574 		}
2575 		for (i = 0; i < bitmap->storage.file_pages; i++)
2576 			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
2577 	}
2578 	spin_unlock_irq(&bitmap->counts.lock);
2579 
2580 	if (!init) {
2581 		__bitmap_unplug(bitmap);
2582 		bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
2583 	}
2584 	ret = 0;
2585 err:
2586 	return ret;
2587 }
2588 
bitmap_resize(struct mddev * mddev,sector_t blocks,int chunksize,bool init)2589 static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize,
2590 			 bool init)
2591 {
2592 	struct bitmap *bitmap = mddev->bitmap;
2593 
2594 	if (!bitmap)
2595 		return 0;
2596 
2597 	return __bitmap_resize(bitmap, blocks, chunksize, init);
2598 }
2599 
2600 static ssize_t
location_show(struct mddev * mddev,char * page)2601 location_show(struct mddev *mddev, char *page)
2602 {
2603 	ssize_t len;
2604 	if (mddev->bitmap_info.file)
2605 		len = sprintf(page, "file");
2606 	else if (mddev->bitmap_info.offset)
2607 		len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
2608 	else
2609 		len = sprintf(page, "none");
2610 	len += sprintf(page+len, "\n");
2611 	return len;
2612 }
2613 
2614 static ssize_t
location_store(struct mddev * mddev,const char * buf,size_t len)2615 location_store(struct mddev *mddev, const char *buf, size_t len)
2616 {
2617 	int rv;
2618 
2619 	rv = mddev_suspend_and_lock(mddev);
2620 	if (rv)
2621 		return rv;
2622 
2623 	if (mddev->pers) {
2624 		if (mddev->recovery || mddev->sync_thread) {
2625 			rv = -EBUSY;
2626 			goto out;
2627 		}
2628 	}
2629 
2630 	if (mddev->bitmap || mddev->bitmap_info.file ||
2631 	    mddev->bitmap_info.offset) {
2632 		/* bitmap already configured.  Only option is to clear it */
2633 		if (strncmp(buf, "none", 4) != 0) {
2634 			rv = -EBUSY;
2635 			goto out;
2636 		}
2637 
2638 		bitmap_destroy(mddev);
2639 		mddev->bitmap_info.offset = 0;
2640 		if (mddev->bitmap_info.file) {
2641 			struct file *f = mddev->bitmap_info.file;
2642 			mddev->bitmap_info.file = NULL;
2643 			fput(f);
2644 		}
2645 	} else {
2646 		/* No bitmap, OK to set a location */
2647 		long long offset;
2648 
2649 		if (strncmp(buf, "none", 4) == 0)
2650 			/* nothing to be done */;
2651 		else if (strncmp(buf, "file:", 5) == 0) {
2652 			/* Not supported yet */
2653 			rv = -EINVAL;
2654 			goto out;
2655 		} else {
2656 			if (buf[0] == '+')
2657 				rv = kstrtoll(buf+1, 10, &offset);
2658 			else
2659 				rv = kstrtoll(buf, 10, &offset);
2660 			if (rv)
2661 				goto out;
2662 			if (offset == 0) {
2663 				rv = -EINVAL;
2664 				goto out;
2665 			}
2666 			if (mddev->bitmap_info.external == 0 &&
2667 			    mddev->major_version == 0 &&
2668 			    offset != mddev->bitmap_info.default_offset) {
2669 				rv = -EINVAL;
2670 				goto out;
2671 			}
2672 
2673 			mddev->bitmap_info.offset = offset;
2674 			rv = bitmap_create(mddev, -1);
2675 			if (rv)
2676 				goto out;
2677 
2678 			rv = bitmap_load(mddev);
2679 			if (rv) {
2680 				mddev->bitmap_info.offset = 0;
2681 				bitmap_destroy(mddev);
2682 				goto out;
2683 			}
2684 		}
2685 	}
2686 	if (!mddev->external) {
2687 		/* Ensure new bitmap info is stored in
2688 		 * metadata promptly.
2689 		 */
2690 		set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2691 		md_wakeup_thread(mddev->thread);
2692 	}
2693 	rv = 0;
2694 out:
2695 	mddev_unlock_and_resume(mddev);
2696 	if (rv)
2697 		return rv;
2698 	return len;
2699 }
2700 
2701 static struct md_sysfs_entry bitmap_location =
2702 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
2703 
2704 /* 'bitmap/space' is the space available at 'location' for the
2705  * bitmap.  This allows the kernel to know when it is safe to
2706  * resize the bitmap to match a resized array.
2707  */
2708 static ssize_t
space_show(struct mddev * mddev,char * page)2709 space_show(struct mddev *mddev, char *page)
2710 {
2711 	return sprintf(page, "%lu\n", mddev->bitmap_info.space);
2712 }
2713 
2714 static ssize_t
space_store(struct mddev * mddev,const char * buf,size_t len)2715 space_store(struct mddev *mddev, const char *buf, size_t len)
2716 {
2717 	struct bitmap *bitmap;
2718 	unsigned long sectors;
2719 	int rv;
2720 
2721 	rv = kstrtoul(buf, 10, &sectors);
2722 	if (rv)
2723 		return rv;
2724 
2725 	if (sectors == 0)
2726 		return -EINVAL;
2727 
2728 	bitmap = mddev->bitmap;
2729 	if (bitmap && sectors < (bitmap->storage.bytes + 511) >> 9)
2730 		return -EFBIG; /* Bitmap is too big for this small space */
2731 
2732 	/* could make sure it isn't too big, but that isn't really
2733 	 * needed - user-space should be careful.
2734 	 */
2735 	mddev->bitmap_info.space = sectors;
2736 	return len;
2737 }
2738 
2739 static struct md_sysfs_entry bitmap_space =
2740 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
2741 
2742 static ssize_t
timeout_show(struct mddev * mddev,char * page)2743 timeout_show(struct mddev *mddev, char *page)
2744 {
2745 	ssize_t len;
2746 	unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
2747 	unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2748 
2749 	len = sprintf(page, "%lu", secs);
2750 	if (jifs)
2751 		len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
2752 	len += sprintf(page+len, "\n");
2753 	return len;
2754 }
2755 
2756 static ssize_t
timeout_store(struct mddev * mddev,const char * buf,size_t len)2757 timeout_store(struct mddev *mddev, const char *buf, size_t len)
2758 {
2759 	/* timeout can be set at any time */
2760 	unsigned long timeout;
2761 	int rv = strict_strtoul_scaled(buf, &timeout, 4);
2762 	if (rv)
2763 		return rv;
2764 
2765 	/* just to make sure we don't overflow... */
2766 	if (timeout >= LONG_MAX / HZ)
2767 		return -EINVAL;
2768 
2769 	timeout = timeout * HZ / 10000;
2770 
2771 	if (timeout >= MAX_SCHEDULE_TIMEOUT)
2772 		timeout = MAX_SCHEDULE_TIMEOUT-1;
2773 	if (timeout < 1)
2774 		timeout = 1;
2775 
2776 	mddev->bitmap_info.daemon_sleep = timeout;
2777 	mddev_set_timeout(mddev, timeout, false);
2778 	md_wakeup_thread(mddev->thread);
2779 
2780 	return len;
2781 }
2782 
2783 static struct md_sysfs_entry bitmap_timeout =
2784 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2785 
2786 static ssize_t
backlog_show(struct mddev * mddev,char * page)2787 backlog_show(struct mddev *mddev, char *page)
2788 {
2789 	return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2790 }
2791 
2792 static ssize_t
backlog_store(struct mddev * mddev,const char * buf,size_t len)2793 backlog_store(struct mddev *mddev, const char *buf, size_t len)
2794 {
2795 	unsigned long backlog;
2796 	unsigned long old_mwb = mddev->bitmap_info.max_write_behind;
2797 	struct md_rdev *rdev;
2798 	bool has_write_mostly = false;
2799 	int rv = kstrtoul(buf, 10, &backlog);
2800 	if (rv)
2801 		return rv;
2802 	if (backlog > COUNTER_MAX)
2803 		return -EINVAL;
2804 
2805 	rv = mddev_suspend_and_lock(mddev);
2806 	if (rv)
2807 		return rv;
2808 
2809 	/*
2810 	 * Without write mostly device, it doesn't make sense to set
2811 	 * backlog for max_write_behind.
2812 	 */
2813 	rdev_for_each(rdev, mddev) {
2814 		if (test_bit(WriteMostly, &rdev->flags)) {
2815 			has_write_mostly = true;
2816 			break;
2817 		}
2818 	}
2819 	if (!has_write_mostly) {
2820 		pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n",
2821 				    mdname(mddev));
2822 		mddev_unlock(mddev);
2823 		return -EINVAL;
2824 	}
2825 
2826 	mddev->bitmap_info.max_write_behind = backlog;
2827 	if (!backlog && mddev->serial_info_pool) {
2828 		/* serial_info_pool is not needed if backlog is zero */
2829 		if (!mddev->serialize_policy)
2830 			mddev_destroy_serial_pool(mddev, NULL);
2831 	} else if (backlog && !mddev->serial_info_pool) {
2832 		/* serial_info_pool is needed since backlog is not zero */
2833 		rdev_for_each(rdev, mddev)
2834 			mddev_create_serial_pool(mddev, rdev);
2835 	}
2836 	if (old_mwb != backlog)
2837 		bitmap_update_sb(mddev->bitmap);
2838 
2839 	mddev_unlock_and_resume(mddev);
2840 	return len;
2841 }
2842 
2843 static struct md_sysfs_entry bitmap_backlog =
2844 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2845 
2846 static ssize_t
chunksize_show(struct mddev * mddev,char * page)2847 chunksize_show(struct mddev *mddev, char *page)
2848 {
2849 	return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2850 }
2851 
2852 static ssize_t
chunksize_store(struct mddev * mddev,const char * buf,size_t len)2853 chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2854 {
2855 	/* Can only be changed when no bitmap is active */
2856 	int rv;
2857 	unsigned long csize;
2858 	if (mddev->bitmap)
2859 		return -EBUSY;
2860 	rv = kstrtoul(buf, 10, &csize);
2861 	if (rv)
2862 		return rv;
2863 	if (csize < 512 ||
2864 	    !is_power_of_2(csize))
2865 		return -EINVAL;
2866 	if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE *
2867 		sizeof(((bitmap_super_t *)0)->chunksize))))
2868 		return -EOVERFLOW;
2869 	mddev->bitmap_info.chunksize = csize;
2870 	return len;
2871 }
2872 
2873 static struct md_sysfs_entry bitmap_chunksize =
2874 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2875 
metadata_show(struct mddev * mddev,char * page)2876 static ssize_t metadata_show(struct mddev *mddev, char *page)
2877 {
2878 	if (mddev_is_clustered(mddev))
2879 		return sprintf(page, "clustered\n");
2880 	return sprintf(page, "%s\n", (mddev->bitmap_info.external
2881 				      ? "external" : "internal"));
2882 }
2883 
metadata_store(struct mddev * mddev,const char * buf,size_t len)2884 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2885 {
2886 	if (mddev->bitmap ||
2887 	    mddev->bitmap_info.file ||
2888 	    mddev->bitmap_info.offset)
2889 		return -EBUSY;
2890 	if (strncmp(buf, "external", 8) == 0)
2891 		mddev->bitmap_info.external = 1;
2892 	else if ((strncmp(buf, "internal", 8) == 0) ||
2893 			(strncmp(buf, "clustered", 9) == 0))
2894 		mddev->bitmap_info.external = 0;
2895 	else
2896 		return -EINVAL;
2897 	return len;
2898 }
2899 
2900 static struct md_sysfs_entry bitmap_metadata =
2901 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2902 
can_clear_show(struct mddev * mddev,char * page)2903 static ssize_t can_clear_show(struct mddev *mddev, char *page)
2904 {
2905 	int len;
2906 	struct bitmap *bitmap;
2907 
2908 	spin_lock(&mddev->lock);
2909 	bitmap = mddev->bitmap;
2910 	if (bitmap)
2911 		len = sprintf(page, "%s\n", (bitmap->need_sync ? "false" :
2912 								 "true"));
2913 	else
2914 		len = sprintf(page, "\n");
2915 	spin_unlock(&mddev->lock);
2916 	return len;
2917 }
2918 
can_clear_store(struct mddev * mddev,const char * buf,size_t len)2919 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2920 {
2921 	struct bitmap *bitmap = mddev->bitmap;
2922 
2923 	if (!bitmap)
2924 		return -ENOENT;
2925 
2926 	if (strncmp(buf, "false", 5) == 0) {
2927 		bitmap->need_sync = 1;
2928 		return len;
2929 	}
2930 
2931 	if (strncmp(buf, "true", 4) == 0) {
2932 		if (mddev->degraded)
2933 			return -EBUSY;
2934 		bitmap->need_sync = 0;
2935 		return len;
2936 	}
2937 
2938 	return -EINVAL;
2939 }
2940 
2941 static struct md_sysfs_entry bitmap_can_clear =
2942 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2943 
2944 static ssize_t
behind_writes_used_show(struct mddev * mddev,char * page)2945 behind_writes_used_show(struct mddev *mddev, char *page)
2946 {
2947 	ssize_t ret;
2948 	struct bitmap *bitmap;
2949 
2950 	spin_lock(&mddev->lock);
2951 	bitmap = mddev->bitmap;
2952 	if (!bitmap)
2953 		ret = sprintf(page, "0\n");
2954 	else
2955 		ret = sprintf(page, "%lu\n", bitmap->behind_writes_used);
2956 	spin_unlock(&mddev->lock);
2957 
2958 	return ret;
2959 }
2960 
2961 static ssize_t
behind_writes_used_reset(struct mddev * mddev,const char * buf,size_t len)2962 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2963 {
2964 	struct bitmap *bitmap = mddev->bitmap;
2965 
2966 	if (bitmap)
2967 		bitmap->behind_writes_used = 0;
2968 	return len;
2969 }
2970 
2971 static struct md_sysfs_entry max_backlog_used =
2972 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
2973        behind_writes_used_show, behind_writes_used_reset);
2974 
2975 static struct attribute *md_bitmap_attrs[] = {
2976 	&bitmap_location.attr,
2977 	&bitmap_space.attr,
2978 	&bitmap_timeout.attr,
2979 	&bitmap_backlog.attr,
2980 	&bitmap_chunksize.attr,
2981 	&bitmap_metadata.attr,
2982 	&bitmap_can_clear.attr,
2983 	&max_backlog_used.attr,
2984 	NULL
2985 };
2986 const struct attribute_group md_bitmap_group = {
2987 	.name = "bitmap",
2988 	.attrs = md_bitmap_attrs,
2989 };
2990 
2991 static struct bitmap_operations bitmap_ops = {
2992 	.enabled		= bitmap_enabled,
2993 	.create			= bitmap_create,
2994 	.resize			= bitmap_resize,
2995 	.load			= bitmap_load,
2996 	.destroy		= bitmap_destroy,
2997 	.flush			= bitmap_flush,
2998 	.write_all		= bitmap_write_all,
2999 	.dirty_bits		= bitmap_dirty_bits,
3000 	.unplug			= bitmap_unplug,
3001 	.daemon_work		= bitmap_daemon_work,
3002 
3003 	.start_behind_write	= bitmap_start_behind_write,
3004 	.end_behind_write	= bitmap_end_behind_write,
3005 	.wait_behind_writes	= bitmap_wait_behind_writes,
3006 
3007 	.startwrite		= bitmap_startwrite,
3008 	.endwrite		= bitmap_endwrite,
3009 	.start_sync		= bitmap_start_sync,
3010 	.end_sync		= bitmap_end_sync,
3011 	.cond_end_sync		= bitmap_cond_end_sync,
3012 	.close_sync		= bitmap_close_sync,
3013 
3014 	.update_sb		= bitmap_update_sb,
3015 	.get_stats		= bitmap_get_stats,
3016 
3017 	.sync_with_cluster	= bitmap_sync_with_cluster,
3018 	.get_from_slot		= bitmap_get_from_slot,
3019 	.copy_from_slot		= bitmap_copy_from_slot,
3020 	.set_pages		= bitmap_set_pages,
3021 	.free			= md_bitmap_free,
3022 };
3023 
mddev_set_bitmap_ops(struct mddev * mddev)3024 void mddev_set_bitmap_ops(struct mddev *mddev)
3025 {
3026 	mddev->bitmap_ops = &bitmap_ops;
3027 }
3028