xref: /linux/drivers/md/persistent-data/dm-space-map-common.c (revision 5cfe477f6a3f9a4d9b2906d442964f2115b0403f)
1 /*
2  * Copyright (C) 2011 Red Hat, Inc.
3  *
4  * This file is released under the GPL.
5  */
6 
7 #include "dm-space-map-common.h"
8 #include "dm-transaction-manager.h"
9 #include "dm-btree-internal.h"
10 #include "dm-persistent-data-internal.h"
11 
12 #include <linux/bitops.h>
13 #include <linux/device-mapper.h>
14 
15 #define DM_MSG_PREFIX "space map common"
16 
17 /*----------------------------------------------------------------*/
18 
19 /*
20  * Index validator.
21  */
22 #define INDEX_CSUM_XOR 160478
23 
24 static void index_prepare_for_write(struct dm_block_validator *v,
25 				    struct dm_block *b,
26 				    size_t block_size)
27 {
28 	struct disk_metadata_index *mi_le = dm_block_data(b);
29 
30 	mi_le->blocknr = cpu_to_le64(dm_block_location(b));
31 	mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
32 						 block_size - sizeof(__le32),
33 						 INDEX_CSUM_XOR));
34 }
35 
36 static int index_check(struct dm_block_validator *v,
37 		       struct dm_block *b,
38 		       size_t block_size)
39 {
40 	struct disk_metadata_index *mi_le = dm_block_data(b);
41 	__le32 csum_disk;
42 
43 	if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
44 		DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
45 			    le64_to_cpu(mi_le->blocknr), dm_block_location(b));
46 		return -ENOTBLK;
47 	}
48 
49 	csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
50 					       block_size - sizeof(__le32),
51 					       INDEX_CSUM_XOR));
52 	if (csum_disk != mi_le->csum) {
53 		DMERR_LIMIT("index_check failed: csum %u != wanted %u",
54 			    le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
55 		return -EILSEQ;
56 	}
57 
58 	return 0;
59 }
60 
61 static struct dm_block_validator index_validator = {
62 	.name = "index",
63 	.prepare_for_write = index_prepare_for_write,
64 	.check = index_check
65 };
66 
67 /*----------------------------------------------------------------*/
68 
69 /*
70  * Bitmap validator
71  */
72 #define BITMAP_CSUM_XOR 240779
73 
74 static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
75 					struct dm_block *b,
76 					size_t block_size)
77 {
78 	struct disk_bitmap_header *disk_header = dm_block_data(b);
79 
80 	disk_header->blocknr = cpu_to_le64(dm_block_location(b));
81 	disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
82 						       block_size - sizeof(__le32),
83 						       BITMAP_CSUM_XOR));
84 }
85 
86 static int dm_bitmap_check(struct dm_block_validator *v,
87 			   struct dm_block *b,
88 			   size_t block_size)
89 {
90 	struct disk_bitmap_header *disk_header = dm_block_data(b);
91 	__le32 csum_disk;
92 
93 	if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
94 		DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
95 			    le64_to_cpu(disk_header->blocknr), dm_block_location(b));
96 		return -ENOTBLK;
97 	}
98 
99 	csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
100 					       block_size - sizeof(__le32),
101 					       BITMAP_CSUM_XOR));
102 	if (csum_disk != disk_header->csum) {
103 		DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
104 			    le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
105 		return -EILSEQ;
106 	}
107 
108 	return 0;
109 }
110 
111 static struct dm_block_validator dm_sm_bitmap_validator = {
112 	.name = "sm_bitmap",
113 	.prepare_for_write = dm_bitmap_prepare_for_write,
114 	.check = dm_bitmap_check,
115 };
116 
117 /*----------------------------------------------------------------*/
118 
119 #define ENTRIES_PER_WORD 32
120 #define ENTRIES_SHIFT	5
121 
122 static void *dm_bitmap_data(struct dm_block *b)
123 {
124 	return dm_block_data(b) + sizeof(struct disk_bitmap_header);
125 }
126 
127 #define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
128 
129 static unsigned dm_bitmap_word_used(void *addr, unsigned b)
130 {
131 	__le64 *words_le = addr;
132 	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
133 
134 	uint64_t bits = le64_to_cpu(*w_le);
135 	uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
136 
137 	return !(~bits & mask);
138 }
139 
140 static unsigned sm_lookup_bitmap(void *addr, unsigned b)
141 {
142 	__le64 *words_le = addr;
143 	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
144 	unsigned hi, lo;
145 
146 	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
147 	hi = !!test_bit_le(b, (void *) w_le);
148 	lo = !!test_bit_le(b + 1, (void *) w_le);
149 	return (hi << 1) | lo;
150 }
151 
152 static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
153 {
154 	__le64 *words_le = addr;
155 	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
156 
157 	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
158 
159 	if (val & 2)
160 		__set_bit_le(b, (void *) w_le);
161 	else
162 		__clear_bit_le(b, (void *) w_le);
163 
164 	if (val & 1)
165 		__set_bit_le(b + 1, (void *) w_le);
166 	else
167 		__clear_bit_le(b + 1, (void *) w_le);
168 }
169 
170 static int sm_find_free(void *addr, unsigned begin, unsigned end,
171 			unsigned *result)
172 {
173 	while (begin < end) {
174 		if (!(begin & (ENTRIES_PER_WORD - 1)) &&
175 		    dm_bitmap_word_used(addr, begin)) {
176 			begin += ENTRIES_PER_WORD;
177 			continue;
178 		}
179 
180 		if (!sm_lookup_bitmap(addr, begin)) {
181 			*result = begin;
182 			return 0;
183 		}
184 
185 		begin++;
186 	}
187 
188 	return -ENOSPC;
189 }
190 
191 /*----------------------------------------------------------------*/
192 
193 static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
194 {
195 	memset(ll, 0, sizeof(struct ll_disk));
196 
197 	ll->tm = tm;
198 
199 	ll->bitmap_info.tm = tm;
200 	ll->bitmap_info.levels = 1;
201 
202 	/*
203 	 * Because the new bitmap blocks are created via a shadow
204 	 * operation, the old entry has already had its reference count
205 	 * decremented and we don't need the btree to do any bookkeeping.
206 	 */
207 	ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
208 	ll->bitmap_info.value_type.inc = NULL;
209 	ll->bitmap_info.value_type.dec = NULL;
210 	ll->bitmap_info.value_type.equal = NULL;
211 
212 	ll->ref_count_info.tm = tm;
213 	ll->ref_count_info.levels = 1;
214 	ll->ref_count_info.value_type.size = sizeof(uint32_t);
215 	ll->ref_count_info.value_type.inc = NULL;
216 	ll->ref_count_info.value_type.dec = NULL;
217 	ll->ref_count_info.value_type.equal = NULL;
218 
219 	ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
220 
221 	if (ll->block_size > (1 << 30)) {
222 		DMERR("block size too big to hold bitmaps");
223 		return -EINVAL;
224 	}
225 
226 	ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
227 		ENTRIES_PER_BYTE;
228 	ll->nr_blocks = 0;
229 	ll->bitmap_root = 0;
230 	ll->ref_count_root = 0;
231 	ll->bitmap_index_changed = false;
232 
233 	return 0;
234 }
235 
236 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
237 {
238 	int r;
239 	dm_block_t i, nr_blocks, nr_indexes;
240 	unsigned old_blocks, blocks;
241 
242 	nr_blocks = ll->nr_blocks + extra_blocks;
243 	old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
244 	blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
245 
246 	nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
247 	if (nr_indexes > ll->max_entries(ll)) {
248 		DMERR("space map too large");
249 		return -EINVAL;
250 	}
251 
252 	/*
253 	 * We need to set this before the dm_tm_new_block() call below.
254 	 */
255 	ll->nr_blocks = nr_blocks;
256 	for (i = old_blocks; i < blocks; i++) {
257 		struct dm_block *b;
258 		struct disk_index_entry idx;
259 
260 		r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
261 		if (r < 0)
262 			return r;
263 
264 		idx.blocknr = cpu_to_le64(dm_block_location(b));
265 
266 		dm_tm_unlock(ll->tm, b);
267 
268 		idx.nr_free = cpu_to_le32(ll->entries_per_block);
269 		idx.none_free_before = 0;
270 
271 		r = ll->save_ie(ll, i, &idx);
272 		if (r < 0)
273 			return r;
274 	}
275 
276 	return 0;
277 }
278 
279 int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
280 {
281 	int r;
282 	dm_block_t index = b;
283 	struct disk_index_entry ie_disk;
284 	struct dm_block *blk;
285 
286 	if (b >= ll->nr_blocks) {
287 		DMERR_LIMIT("metadata block out of bounds");
288 		return -EINVAL;
289 	}
290 
291 	b = do_div(index, ll->entries_per_block);
292 	r = ll->load_ie(ll, index, &ie_disk);
293 	if (r < 0)
294 		return r;
295 
296 	r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
297 			    &dm_sm_bitmap_validator, &blk);
298 	if (r < 0)
299 		return r;
300 
301 	*result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
302 
303 	dm_tm_unlock(ll->tm, blk);
304 
305 	return 0;
306 }
307 
308 static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
309 				      uint32_t *result)
310 {
311 	__le32 le_rc;
312 	int r;
313 
314 	r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
315 	if (r < 0)
316 		return r;
317 
318 	*result = le32_to_cpu(le_rc);
319 
320 	return r;
321 }
322 
323 int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
324 {
325 	int r = sm_ll_lookup_bitmap(ll, b, result);
326 
327 	if (r)
328 		return r;
329 
330 	if (*result != 3)
331 		return r;
332 
333 	return sm_ll_lookup_big_ref_count(ll, b, result);
334 }
335 
336 int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
337 			  dm_block_t end, dm_block_t *result)
338 {
339 	int r;
340 	struct disk_index_entry ie_disk;
341 	dm_block_t i, index_begin = begin;
342 	dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
343 
344 	/*
345 	 * FIXME: Use shifts
346 	 */
347 	begin = do_div(index_begin, ll->entries_per_block);
348 	end = do_div(end, ll->entries_per_block);
349 	if (end == 0)
350 		end = ll->entries_per_block;
351 
352 	for (i = index_begin; i < index_end; i++, begin = 0) {
353 		struct dm_block *blk;
354 		unsigned position;
355 		uint32_t bit_end;
356 
357 		r = ll->load_ie(ll, i, &ie_disk);
358 		if (r < 0)
359 			return r;
360 
361 		if (le32_to_cpu(ie_disk.nr_free) == 0)
362 			continue;
363 
364 		r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
365 				    &dm_sm_bitmap_validator, &blk);
366 		if (r < 0)
367 			return r;
368 
369 		bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
370 
371 		r = sm_find_free(dm_bitmap_data(blk),
372 				 max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
373 				 bit_end, &position);
374 		if (r == -ENOSPC) {
375 			/*
376 			 * This might happen because we started searching
377 			 * part way through the bitmap.
378 			 */
379 			dm_tm_unlock(ll->tm, blk);
380 			continue;
381 		}
382 
383 		dm_tm_unlock(ll->tm, blk);
384 
385 		*result = i * ll->entries_per_block + (dm_block_t) position;
386 		return 0;
387 	}
388 
389 	return -ENOSPC;
390 }
391 
392 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
393 	                         dm_block_t begin, dm_block_t end, dm_block_t *b)
394 {
395 	int r;
396 	uint32_t count;
397 
398 	do {
399 		r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
400 		if (r)
401 			break;
402 
403 		/* double check this block wasn't used in the old transaction */
404 		if (*b >= old_ll->nr_blocks)
405 			count = 0;
406 		else {
407 			r = sm_ll_lookup(old_ll, *b, &count);
408 			if (r)
409 				break;
410 
411 			if (count)
412 				begin = *b + 1;
413 		}
414 	} while (count);
415 
416 	return r;
417 }
418 
419 /*----------------------------------------------------------------*/
420 
421 int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
422 		 uint32_t ref_count, int32_t *nr_allocations)
423 {
424 	int r;
425 	uint32_t bit, old;
426 	struct dm_block *nb;
427 	dm_block_t index = b;
428 	struct disk_index_entry ie_disk;
429 	void *bm_le;
430 	int inc;
431 
432 	bit = do_div(index, ll->entries_per_block);
433 	r = ll->load_ie(ll, index, &ie_disk);
434 	if (r < 0)
435 		return r;
436 
437 	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
438 			       &dm_sm_bitmap_validator, &nb, &inc);
439 	if (r < 0) {
440 		DMERR("dm_tm_shadow_block() failed");
441 		return r;
442 	}
443 	ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
444 	bm_le = dm_bitmap_data(nb);
445 
446 	old = sm_lookup_bitmap(bm_le, bit);
447 	if (old > 2) {
448 		r = sm_ll_lookup_big_ref_count(ll, b, &old);
449 		if (r < 0) {
450 			dm_tm_unlock(ll->tm, nb);
451 			return r;
452 		}
453 	}
454 
455 	if (r) {
456 		dm_tm_unlock(ll->tm, nb);
457 		return r;
458 	}
459 
460 	if (ref_count <= 2) {
461 		sm_set_bitmap(bm_le, bit, ref_count);
462 		dm_tm_unlock(ll->tm, nb);
463 
464 		if (old > 2) {
465 			r = dm_btree_remove(&ll->ref_count_info,
466 					    ll->ref_count_root,
467 					    &b, &ll->ref_count_root);
468 			if (r)
469 				return r;
470 		}
471 
472 	} else {
473 		__le32 le_rc = cpu_to_le32(ref_count);
474 
475 		sm_set_bitmap(bm_le, bit, 3);
476 		dm_tm_unlock(ll->tm, nb);
477 
478 		__dm_bless_for_disk(&le_rc);
479 		r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
480 				    &b, &le_rc, &ll->ref_count_root);
481 		if (r < 0) {
482 			DMERR("ref count insert failed");
483 			return r;
484 		}
485 	}
486 
487 	if (ref_count && !old) {
488 		*nr_allocations = 1;
489 		ll->nr_allocated++;
490 		le32_add_cpu(&ie_disk.nr_free, -1);
491 		if (le32_to_cpu(ie_disk.none_free_before) == bit)
492 			ie_disk.none_free_before = cpu_to_le32(bit + 1);
493 
494 	} else if (old && !ref_count) {
495 		*nr_allocations = -1;
496 		ll->nr_allocated--;
497 		le32_add_cpu(&ie_disk.nr_free, 1);
498 		ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
499 	} else
500 		*nr_allocations = 0;
501 
502 	return ll->save_ie(ll, index, &ie_disk);
503 }
504 
505 /*----------------------------------------------------------------*/
506 
507 /*
508  * Holds useful intermediate results for the range based inc and dec
509  * operations.
510  */
511 struct inc_context {
512 	struct disk_index_entry ie_disk;
513 	struct dm_block *bitmap_block;
514 	void *bitmap;
515 
516 	struct dm_block *overflow_leaf;
517 };
518 
519 static inline void init_inc_context(struct inc_context *ic)
520 {
521 	ic->bitmap_block = NULL;
522 	ic->bitmap = NULL;
523 	ic->overflow_leaf = NULL;
524 }
525 
526 static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
527 {
528 	if (ic->bitmap_block)
529 		dm_tm_unlock(ll->tm, ic->bitmap_block);
530 	if (ic->overflow_leaf)
531 		dm_tm_unlock(ll->tm, ic->overflow_leaf);
532 }
533 
534 static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
535 {
536 	exit_inc_context(ll, ic);
537 	init_inc_context(ic);
538 }
539 
540 /*
541  * Confirms a btree node contains a particular key at an index.
542  */
543 static bool contains_key(struct btree_node *n, uint64_t key, int index)
544 {
545 	return index >= 0 &&
546 		index < le32_to_cpu(n->header.nr_entries) &&
547 		le64_to_cpu(n->keys[index]) == key;
548 }
549 
550 static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
551 {
552 	int r;
553 	int index;
554 	struct btree_node *n;
555 	__le32 *v_ptr;
556 	uint32_t rc;
557 
558 	/*
559 	 * bitmap_block needs to be unlocked because getting the
560 	 * overflow_leaf may need to allocate, and thus use the space map.
561 	 */
562 	reset_inc_context(ll, ic);
563 
564 	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
565 				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
566 	if (r < 0)
567 		return r;
568 
569 	n = dm_block_data(ic->overflow_leaf);
570 
571 	if (!contains_key(n, b, index)) {
572 		DMERR("overflow btree is missing an entry");
573 		return -EINVAL;
574 	}
575 
576 	v_ptr = value_ptr(n, index);
577 	rc = le32_to_cpu(*v_ptr) + 1;
578 	*v_ptr = cpu_to_le32(rc);
579 
580 	return 0;
581 }
582 
583 static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
584 {
585 	int index;
586 	struct btree_node *n;
587 	__le32 *v_ptr;
588 	uint32_t rc;
589 
590 	/*
591 	 * Do we already have the correct overflow leaf?
592 	 */
593 	if (ic->overflow_leaf) {
594 		n = dm_block_data(ic->overflow_leaf);
595 		index = lower_bound(n, b);
596 		if (contains_key(n, b, index)) {
597 			v_ptr = value_ptr(n, index);
598 			rc = le32_to_cpu(*v_ptr) + 1;
599 			*v_ptr = cpu_to_le32(rc);
600 
601 			return 0;
602 		}
603 	}
604 
605 	return __sm_ll_inc_overflow(ll, b, ic);
606 }
607 
608 static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
609 {
610 	int r, inc;
611 	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
612 			       &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
613 	if (r < 0) {
614 		DMERR("dm_tm_shadow_block() failed");
615 		return r;
616 	}
617 	ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
618 	ic->bitmap = dm_bitmap_data(ic->bitmap_block);
619 	return 0;
620 }
621 
622 /*
623  * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
624  * we can reopen the bitmap with a simple write lock, rather than re calling
625  * dm_tm_shadow_block().
626  */
627 static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
628 {
629 	if (!ic->bitmap_block) {
630 		int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
631 					 &dm_sm_bitmap_validator, &ic->bitmap_block);
632 		if (r) {
633 			DMERR("unable to re-get write lock for bitmap");
634 			return r;
635 		}
636 		ic->bitmap = dm_bitmap_data(ic->bitmap_block);
637 	}
638 
639 	return 0;
640 }
641 
642 /*
643  * Loops round incrementing entries in a single bitmap.
644  */
645 static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
646 				   uint32_t bit, uint32_t bit_end,
647 				   int32_t *nr_allocations, dm_block_t *new_b,
648 				   struct inc_context *ic)
649 {
650 	int r;
651 	__le32 le_rc;
652 	uint32_t old;
653 
654 	for (; bit != bit_end; bit++, b++) {
655 		/*
656 		 * We only need to drop the bitmap if we need to find a new btree
657 		 * leaf for the overflow.  So if it was dropped last iteration,
658 		 * we now re-get it.
659 		 */
660 		r = ensure_bitmap(ll, ic);
661 		if (r)
662 			return r;
663 
664 		old = sm_lookup_bitmap(ic->bitmap, bit);
665 		switch (old) {
666 		case 0:
667 			/* inc bitmap, adjust nr_allocated */
668 			sm_set_bitmap(ic->bitmap, bit, 1);
669 			(*nr_allocations)++;
670 			ll->nr_allocated++;
671 			le32_add_cpu(&ic->ie_disk.nr_free, -1);
672 			if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
673 				ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
674 			break;
675 
676 		case 1:
677 			/* inc bitmap */
678 			sm_set_bitmap(ic->bitmap, bit, 2);
679 			break;
680 
681 		case 2:
682 			/* inc bitmap and insert into overflow */
683 			sm_set_bitmap(ic->bitmap, bit, 3);
684 			reset_inc_context(ll, ic);
685 
686 			le_rc = cpu_to_le32(3);
687 			__dm_bless_for_disk(&le_rc);
688 			r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
689 					    &b, &le_rc, &ll->ref_count_root);
690 			if (r < 0) {
691 				DMERR("ref count insert failed");
692 				return r;
693 			}
694 			break;
695 
696 		default:
697 			/*
698 			 * inc within the overflow tree only.
699 			 */
700 			r = sm_ll_inc_overflow(ll, b, ic);
701 			if (r < 0)
702 				return r;
703 		}
704 	}
705 
706 	*new_b = b;
707 	return 0;
708 }
709 
710 /*
711  * Finds a bitmap that contains entries in the block range, and increments
712  * them.
713  */
714 static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
715 		       int32_t *nr_allocations, dm_block_t *new_b)
716 {
717 	int r;
718 	struct inc_context ic;
719 	uint32_t bit, bit_end;
720 	dm_block_t index = b;
721 
722 	init_inc_context(&ic);
723 
724 	bit = do_div(index, ll->entries_per_block);
725 	r = ll->load_ie(ll, index, &ic.ie_disk);
726 	if (r < 0)
727 		return r;
728 
729 	r = shadow_bitmap(ll, &ic);
730 	if (r)
731 		return r;
732 
733 	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
734 	r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
735 
736 	exit_inc_context(ll, &ic);
737 
738 	if (r)
739 		return r;
740 
741 	return ll->save_ie(ll, index, &ic.ie_disk);
742 }
743 
744 int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
745 	      int32_t *nr_allocations)
746 {
747 	*nr_allocations = 0;
748 	while (b != e) {
749 		int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
750 		if (r)
751 			return r;
752 	}
753 
754 	return 0;
755 }
756 
757 /*----------------------------------------------------------------*/
758 
759 static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
760 				struct inc_context *ic)
761 {
762 	reset_inc_context(ll, ic);
763 	return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
764 			       &b, &ll->ref_count_root);
765 }
766 
767 static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
768 				struct inc_context *ic, uint32_t *old_rc)
769 {
770 	int r;
771 	int index = -1;
772 	struct btree_node *n;
773 	__le32 *v_ptr;
774 	uint32_t rc;
775 
776 	reset_inc_context(ll, ic);
777 	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
778 				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
779 	if (r < 0)
780 		return r;
781 
782 	n = dm_block_data(ic->overflow_leaf);
783 
784 	if (!contains_key(n, b, index)) {
785 		DMERR("overflow btree is missing an entry");
786 		return -EINVAL;
787 	}
788 
789 	v_ptr = value_ptr(n, index);
790 	rc = le32_to_cpu(*v_ptr);
791 	*old_rc = rc;
792 
793 	if (rc == 3) {
794 		return __sm_ll_del_overflow(ll, b, ic);
795 	} else {
796 		rc--;
797 		*v_ptr = cpu_to_le32(rc);
798 		return 0;
799 	}
800 }
801 
802 static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
803 			      struct inc_context *ic, uint32_t *old_rc)
804 {
805 	/*
806 	 * Do we already have the correct overflow leaf?
807 	 */
808 	if (ic->overflow_leaf) {
809 		int index;
810 		struct btree_node *n;
811 		__le32 *v_ptr;
812 		uint32_t rc;
813 
814 		n = dm_block_data(ic->overflow_leaf);
815 		index = lower_bound(n, b);
816 		if (contains_key(n, b, index)) {
817 			v_ptr = value_ptr(n, index);
818 			rc = le32_to_cpu(*v_ptr);
819 			*old_rc = rc;
820 
821 			if (rc > 3) {
822 				rc--;
823 				*v_ptr = cpu_to_le32(rc);
824 				return 0;
825 			} else {
826 				return __sm_ll_del_overflow(ll, b, ic);
827 			}
828 
829 		}
830 	}
831 
832 	return __sm_ll_dec_overflow(ll, b, ic, old_rc);
833 }
834 
835 /*
836  * Loops round incrementing entries in a single bitmap.
837  */
838 static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
839 				   uint32_t bit, uint32_t bit_end,
840 				   struct inc_context *ic,
841 				   int32_t *nr_allocations, dm_block_t *new_b)
842 {
843 	int r;
844 	uint32_t old;
845 
846 	for (; bit != bit_end; bit++, b++) {
847 		/*
848 		 * We only need to drop the bitmap if we need to find a new btree
849 		 * leaf for the overflow.  So if it was dropped last iteration,
850 		 * we now re-get it.
851 		 */
852 		r = ensure_bitmap(ll, ic);
853 		if (r)
854 			return r;
855 
856 		old = sm_lookup_bitmap(ic->bitmap, bit);
857 		switch (old) {
858 		case 0:
859 			DMERR("unable to decrement block");
860 			return -EINVAL;
861 
862 		case 1:
863 			/* dec bitmap */
864 			sm_set_bitmap(ic->bitmap, bit, 0);
865 			(*nr_allocations)--;
866 			ll->nr_allocated--;
867 			le32_add_cpu(&ic->ie_disk.nr_free, 1);
868 			ic->ie_disk.none_free_before =
869 				cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
870 			break;
871 
872 		case 2:
873 			/* dec bitmap and insert into overflow */
874 			sm_set_bitmap(ic->bitmap, bit, 1);
875 			break;
876 
877 		case 3:
878 			r = sm_ll_dec_overflow(ll, b, ic, &old);
879 			if (r < 0)
880 				return r;
881 
882 			if (old == 3) {
883 				r = ensure_bitmap(ll, ic);
884 				if (r)
885 					return r;
886 
887 				sm_set_bitmap(ic->bitmap, bit, 2);
888 			}
889 			break;
890 		}
891 	}
892 
893 	*new_b = b;
894 	return 0;
895 }
896 
897 static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
898 		       int32_t *nr_allocations, dm_block_t *new_b)
899 {
900 	int r;
901 	uint32_t bit, bit_end;
902 	struct inc_context ic;
903 	dm_block_t index = b;
904 
905 	init_inc_context(&ic);
906 
907 	bit = do_div(index, ll->entries_per_block);
908 	r = ll->load_ie(ll, index, &ic.ie_disk);
909 	if (r < 0)
910 		return r;
911 
912 	r = shadow_bitmap(ll, &ic);
913 	if (r)
914 		return r;
915 
916 	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
917 	r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
918 	exit_inc_context(ll, &ic);
919 
920 	if (r)
921 		return r;
922 
923 	return ll->save_ie(ll, index, &ic.ie_disk);
924 }
925 
926 int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
927 	      int32_t *nr_allocations)
928 {
929 	*nr_allocations = 0;
930 	while (b != e) {
931 		int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
932 		if (r)
933 			return r;
934 	}
935 
936 	return 0;
937 }
938 
939 /*----------------------------------------------------------------*/
940 
941 int sm_ll_commit(struct ll_disk *ll)
942 {
943 	int r = 0;
944 
945 	if (ll->bitmap_index_changed) {
946 		r = ll->commit(ll);
947 		if (!r)
948 			ll->bitmap_index_changed = false;
949 	}
950 
951 	return r;
952 }
953 
954 /*----------------------------------------------------------------*/
955 
956 static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
957 			       struct disk_index_entry *ie)
958 {
959 	memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
960 	return 0;
961 }
962 
963 static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
964 			       struct disk_index_entry *ie)
965 {
966 	ll->bitmap_index_changed = true;
967 	memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
968 	return 0;
969 }
970 
971 static int metadata_ll_init_index(struct ll_disk *ll)
972 {
973 	int r;
974 	struct dm_block *b;
975 
976 	r = dm_tm_new_block(ll->tm, &index_validator, &b);
977 	if (r < 0)
978 		return r;
979 
980 	ll->bitmap_root = dm_block_location(b);
981 
982 	dm_tm_unlock(ll->tm, b);
983 
984 	return 0;
985 }
986 
987 static int metadata_ll_open(struct ll_disk *ll)
988 {
989 	int r;
990 	struct dm_block *block;
991 
992 	r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
993 			    &index_validator, &block);
994 	if (r)
995 		return r;
996 
997 	memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
998 	dm_tm_unlock(ll->tm, block);
999 
1000 	return 0;
1001 }
1002 
1003 static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
1004 {
1005 	return MAX_METADATA_BITMAPS;
1006 }
1007 
1008 static int metadata_ll_commit(struct ll_disk *ll)
1009 {
1010 	int r, inc;
1011 	struct dm_block *b;
1012 
1013 	r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
1014 	if (r)
1015 		return r;
1016 
1017 	memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
1018 	ll->bitmap_root = dm_block_location(b);
1019 
1020 	dm_tm_unlock(ll->tm, b);
1021 
1022 	return 0;
1023 }
1024 
1025 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
1026 {
1027 	int r;
1028 
1029 	r = sm_ll_init(ll, tm);
1030 	if (r < 0)
1031 		return r;
1032 
1033 	ll->load_ie = metadata_ll_load_ie;
1034 	ll->save_ie = metadata_ll_save_ie;
1035 	ll->init_index = metadata_ll_init_index;
1036 	ll->open_index = metadata_ll_open;
1037 	ll->max_entries = metadata_ll_max_entries;
1038 	ll->commit = metadata_ll_commit;
1039 
1040 	ll->nr_blocks = 0;
1041 	ll->nr_allocated = 0;
1042 
1043 	r = ll->init_index(ll);
1044 	if (r < 0)
1045 		return r;
1046 
1047 	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1048 	if (r < 0)
1049 		return r;
1050 
1051 	return 0;
1052 }
1053 
1054 int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
1055 			void *root_le, size_t len)
1056 {
1057 	int r;
1058 	struct disk_sm_root smr;
1059 
1060 	if (len < sizeof(struct disk_sm_root)) {
1061 		DMERR("sm_metadata root too small");
1062 		return -ENOMEM;
1063 	}
1064 
1065 	/*
1066 	 * We don't know the alignment of the root_le buffer, so need to
1067 	 * copy into a new structure.
1068 	 */
1069 	memcpy(&smr, root_le, sizeof(smr));
1070 
1071 	r = sm_ll_init(ll, tm);
1072 	if (r < 0)
1073 		return r;
1074 
1075 	ll->load_ie = metadata_ll_load_ie;
1076 	ll->save_ie = metadata_ll_save_ie;
1077 	ll->init_index = metadata_ll_init_index;
1078 	ll->open_index = metadata_ll_open;
1079 	ll->max_entries = metadata_ll_max_entries;
1080 	ll->commit = metadata_ll_commit;
1081 
1082 	ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
1083 	ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
1084 	ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
1085 	ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
1086 
1087 	return ll->open_index(ll);
1088 }
1089 
1090 /*----------------------------------------------------------------*/
1091 
1092 static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
1093 {
1094 	iec->dirty = false;
1095 	__dm_bless_for_disk(iec->ie);
1096 	return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
1097 			       &iec->index, &iec->ie, &ll->bitmap_root);
1098 }
1099 
1100 static inline unsigned hash_index(dm_block_t index)
1101 {
1102 	return dm_hash_block(index, IE_CACHE_MASK);
1103 }
1104 
1105 static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
1106 			   struct disk_index_entry *ie)
1107 {
1108 	int r;
1109 	unsigned h = hash_index(index);
1110 	struct ie_cache *iec = ll->ie_cache + h;
1111 
1112 	if (iec->valid) {
1113 		if (iec->index == index) {
1114 			memcpy(ie, &iec->ie, sizeof(*ie));
1115 			return 0;
1116 		}
1117 
1118 		if (iec->dirty) {
1119 			r = ie_cache_writeback(ll, iec);
1120 			if (r)
1121 				return r;
1122 		}
1123 	}
1124 
1125 	r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
1126 	if (!r) {
1127 		iec->valid = true;
1128 		iec->dirty = false;
1129 		iec->index = index;
1130 		memcpy(&iec->ie, ie, sizeof(*ie));
1131 	}
1132 
1133 	return r;
1134 }
1135 
1136 static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
1137 			   struct disk_index_entry *ie)
1138 {
1139 	int r;
1140 	unsigned h = hash_index(index);
1141 	struct ie_cache *iec = ll->ie_cache + h;
1142 
1143 	ll->bitmap_index_changed = true;
1144 	if (iec->valid) {
1145 		if (iec->index == index) {
1146 			memcpy(&iec->ie, ie, sizeof(*ie));
1147 			iec->dirty = true;
1148 			return 0;
1149 		}
1150 
1151 		if (iec->dirty) {
1152 			r = ie_cache_writeback(ll, iec);
1153 			if (r)
1154 				return r;
1155 		}
1156 	}
1157 
1158 	iec->valid = true;
1159 	iec->dirty = true;
1160 	iec->index = index;
1161 	memcpy(&iec->ie, ie, sizeof(*ie));
1162 	return 0;
1163 }
1164 
1165 static int disk_ll_init_index(struct ll_disk *ll)
1166 {
1167 	unsigned i;
1168 	for (i = 0; i < IE_CACHE_SIZE; i++) {
1169 		struct ie_cache *iec = ll->ie_cache + i;
1170 		iec->valid = false;
1171 		iec->dirty = false;
1172 	}
1173 	return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
1174 }
1175 
1176 static int disk_ll_open(struct ll_disk *ll)
1177 {
1178 	return 0;
1179 }
1180 
1181 static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
1182 {
1183 	return -1ULL;
1184 }
1185 
1186 static int disk_ll_commit(struct ll_disk *ll)
1187 {
1188 	int r = 0;
1189 	unsigned i;
1190 
1191 	for (i = 0; i < IE_CACHE_SIZE; i++) {
1192 		struct ie_cache *iec = ll->ie_cache + i;
1193 		if (iec->valid && iec->dirty)
1194 			r = ie_cache_writeback(ll, iec);
1195 	}
1196 
1197 	return r;
1198 }
1199 
1200 int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
1201 {
1202 	int r;
1203 
1204 	r = sm_ll_init(ll, tm);
1205 	if (r < 0)
1206 		return r;
1207 
1208 	ll->load_ie = disk_ll_load_ie;
1209 	ll->save_ie = disk_ll_save_ie;
1210 	ll->init_index = disk_ll_init_index;
1211 	ll->open_index = disk_ll_open;
1212 	ll->max_entries = disk_ll_max_entries;
1213 	ll->commit = disk_ll_commit;
1214 
1215 	ll->nr_blocks = 0;
1216 	ll->nr_allocated = 0;
1217 
1218 	r = ll->init_index(ll);
1219 	if (r < 0)
1220 		return r;
1221 
1222 	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1223 	if (r < 0)
1224 		return r;
1225 
1226 	return 0;
1227 }
1228 
1229 int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
1230 		    void *root_le, size_t len)
1231 {
1232 	int r;
1233 	struct disk_sm_root *smr = root_le;
1234 
1235 	if (len < sizeof(struct disk_sm_root)) {
1236 		DMERR("sm_metadata root too small");
1237 		return -ENOMEM;
1238 	}
1239 
1240 	r = sm_ll_init(ll, tm);
1241 	if (r < 0)
1242 		return r;
1243 
1244 	ll->load_ie = disk_ll_load_ie;
1245 	ll->save_ie = disk_ll_save_ie;
1246 	ll->init_index = disk_ll_init_index;
1247 	ll->open_index = disk_ll_open;
1248 	ll->max_entries = disk_ll_max_entries;
1249 	ll->commit = disk_ll_commit;
1250 
1251 	ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
1252 	ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
1253 	ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
1254 	ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
1255 
1256 	return ll->open_index(ll);
1257 }
1258 
1259 /*----------------------------------------------------------------*/
1260