xref: /linux/drivers/md/persistent-data/dm-array.c (revision 34dc1baba215b826e454b8d19e4f24adbeb7d00d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012 Red Hat, Inc.
4  *
5  * This file is released under the GPL.
6  */
7 
8 #include "dm-array.h"
9 #include "dm-space-map.h"
10 #include "dm-transaction-manager.h"
11 
12 #include <linux/export.h>
13 #include <linux/device-mapper.h>
14 
15 #define DM_MSG_PREFIX "array"
16 
17 /*----------------------------------------------------------------*/
18 
19 /*
20  * The array is implemented as a fully populated btree, which points to
21  * blocks that contain the packed values.  This is more space efficient
22  * than just using a btree since we don't store 1 key per value.
23  */
24 struct array_block {
25 	__le32 csum;
26 	__le32 max_entries;
27 	__le32 nr_entries;
28 	__le32 value_size;
29 	__le64 blocknr; /* Block this node is supposed to live in. */
30 } __packed;
31 
32 /*----------------------------------------------------------------*/
33 
34 /*
35  * Validator methods.  As usual we calculate a checksum, and also write the
36  * block location into the header (paranoia about ssds remapping areas by
37  * mistake).
38  */
39 #define CSUM_XOR 595846735
40 
41 static void array_block_prepare_for_write(struct dm_block_validator *v,
42 					  struct dm_block *b,
43 					  size_t size_of_block)
44 {
45 	struct array_block *bh_le = dm_block_data(b);
46 
47 	bh_le->blocknr = cpu_to_le64(dm_block_location(b));
48 	bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
49 						 size_of_block - sizeof(__le32),
50 						 CSUM_XOR));
51 }
52 
53 static int array_block_check(struct dm_block_validator *v,
54 			     struct dm_block *b,
55 			     size_t size_of_block)
56 {
57 	struct array_block *bh_le = dm_block_data(b);
58 	__le32 csum_disk;
59 
60 	if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) {
61 		DMERR_LIMIT("%s failed: blocknr %llu != wanted %llu", __func__,
62 			    (unsigned long long) le64_to_cpu(bh_le->blocknr),
63 			    (unsigned long long) dm_block_location(b));
64 		return -ENOTBLK;
65 	}
66 
67 	csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
68 					       size_of_block - sizeof(__le32),
69 					       CSUM_XOR));
70 	if (csum_disk != bh_le->csum) {
71 		DMERR_LIMIT("%s failed: csum %u != wanted %u", __func__,
72 			    (unsigned int) le32_to_cpu(csum_disk),
73 			    (unsigned int) le32_to_cpu(bh_le->csum));
74 		return -EILSEQ;
75 	}
76 
77 	return 0;
78 }
79 
80 static struct dm_block_validator array_validator = {
81 	.name = "array",
82 	.prepare_for_write = array_block_prepare_for_write,
83 	.check = array_block_check
84 };
85 
86 /*----------------------------------------------------------------*/
87 
88 /*
89  * Functions for manipulating the array blocks.
90  */
91 
92 /*
93  * Returns a pointer to a value within an array block.
94  *
95  * index - The index into _this_ specific block.
96  */
97 static void *element_at(struct dm_array_info *info, struct array_block *ab,
98 			unsigned int index)
99 {
100 	unsigned char *entry = (unsigned char *) (ab + 1);
101 
102 	entry += index * info->value_type.size;
103 
104 	return entry;
105 }
106 
107 /*
108  * Utility function that calls one of the value_type methods on every value
109  * in an array block.
110  */
111 static void on_entries(struct dm_array_info *info, struct array_block *ab,
112 		       void (*fn)(void *, const void *, unsigned int))
113 {
114 	unsigned int nr_entries = le32_to_cpu(ab->nr_entries);
115 
116 	fn(info->value_type.context, element_at(info, ab, 0), nr_entries);
117 }
118 
119 /*
120  * Increment every value in an array block.
121  */
122 static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab)
123 {
124 	struct dm_btree_value_type *vt = &info->value_type;
125 
126 	if (vt->inc)
127 		on_entries(info, ab, vt->inc);
128 }
129 
130 /*
131  * Decrement every value in an array block.
132  */
133 static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab)
134 {
135 	struct dm_btree_value_type *vt = &info->value_type;
136 
137 	if (vt->dec)
138 		on_entries(info, ab, vt->dec);
139 }
140 
141 /*
142  * Each array block can hold this many values.
143  */
144 static uint32_t calc_max_entries(size_t value_size, size_t size_of_block)
145 {
146 	return (size_of_block - sizeof(struct array_block)) / value_size;
147 }
148 
149 /*
150  * Allocate a new array block.  The caller will need to unlock block.
151  */
152 static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
153 			uint32_t max_entries,
154 			struct dm_block **block, struct array_block **ab)
155 {
156 	int r;
157 
158 	r = dm_tm_new_block(info->btree_info.tm, &array_validator, block);
159 	if (r)
160 		return r;
161 
162 	(*ab) = dm_block_data(*block);
163 	(*ab)->max_entries = cpu_to_le32(max_entries);
164 	(*ab)->nr_entries = cpu_to_le32(0);
165 	(*ab)->value_size = cpu_to_le32(info->value_type.size);
166 
167 	return 0;
168 }
169 
170 /*
171  * Pad an array block out with a particular value.  Every instance will
172  * cause an increment of the value_type.  new_nr must always be more than
173  * the current number of entries.
174  */
175 static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
176 			const void *value, unsigned int new_nr)
177 {
178 	uint32_t nr_entries, delta, i;
179 	struct dm_btree_value_type *vt = &info->value_type;
180 
181 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
182 	BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
183 
184 	nr_entries = le32_to_cpu(ab->nr_entries);
185 	delta = new_nr - nr_entries;
186 	if (vt->inc)
187 		vt->inc(vt->context, value, delta);
188 	for (i = nr_entries; i < new_nr; i++)
189 		memcpy(element_at(info, ab, i), value, vt->size);
190 	ab->nr_entries = cpu_to_le32(new_nr);
191 }
192 
193 /*
194  * Remove some entries from the back of an array block.  Every value
195  * removed will be decremented.  new_nr must be <= the current number of
196  * entries.
197  */
198 static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
199 			unsigned int new_nr)
200 {
201 	uint32_t nr_entries, delta;
202 	struct dm_btree_value_type *vt = &info->value_type;
203 
204 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
205 	BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
206 
207 	nr_entries = le32_to_cpu(ab->nr_entries);
208 	delta = nr_entries - new_nr;
209 	if (vt->dec)
210 		vt->dec(vt->context, element_at(info, ab, new_nr - 1), delta);
211 	ab->nr_entries = cpu_to_le32(new_nr);
212 }
213 
214 /*
215  * Read locks a block, and coerces it to an array block.  The caller must
216  * unlock 'block' when finished.
217  */
218 static int get_ablock(struct dm_array_info *info, dm_block_t b,
219 		      struct dm_block **block, struct array_block **ab)
220 {
221 	int r;
222 
223 	r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block);
224 	if (r)
225 		return r;
226 
227 	*ab = dm_block_data(*block);
228 	return 0;
229 }
230 
231 /*
232  * Unlocks an array block.
233  */
234 static void unlock_ablock(struct dm_array_info *info, struct dm_block *block)
235 {
236 	dm_tm_unlock(info->btree_info.tm, block);
237 }
238 
239 /*----------------------------------------------------------------*/
240 
241 /*
242  * Btree manipulation.
243  */
244 
245 /*
246  * Looks up an array block in the btree, and then read locks it.
247  *
248  * index is the index of the index of the array_block, (ie. the array index
249  * / max_entries).
250  */
251 static int lookup_ablock(struct dm_array_info *info, dm_block_t root,
252 			 unsigned int index, struct dm_block **block,
253 			 struct array_block **ab)
254 {
255 	int r;
256 	uint64_t key = index;
257 	__le64 block_le;
258 
259 	r = dm_btree_lookup(&info->btree_info, root, &key, &block_le);
260 	if (r)
261 		return r;
262 
263 	return get_ablock(info, le64_to_cpu(block_le), block, ab);
264 }
265 
266 /*
267  * Insert an array block into the btree.  The block is _not_ unlocked.
268  */
269 static int insert_ablock(struct dm_array_info *info, uint64_t index,
270 			 struct dm_block *block, dm_block_t *root)
271 {
272 	__le64 block_le = cpu_to_le64(dm_block_location(block));
273 
274 	__dm_bless_for_disk(block_le);
275 	return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
276 }
277 
278 /*----------------------------------------------------------------*/
279 
280 static int __shadow_ablock(struct dm_array_info *info, dm_block_t b,
281 			   struct dm_block **block, struct array_block **ab)
282 {
283 	int inc;
284 	int r = dm_tm_shadow_block(info->btree_info.tm, b,
285 				   &array_validator, block, &inc);
286 	if (r)
287 		return r;
288 
289 	*ab = dm_block_data(*block);
290 	if (inc)
291 		inc_ablock_entries(info, *ab);
292 
293 	return 0;
294 }
295 
296 /*
297  * The shadow op will often be a noop.  Only insert if it really
298  * copied data.
299  */
300 static int __reinsert_ablock(struct dm_array_info *info, unsigned int index,
301 			     struct dm_block *block, dm_block_t b,
302 			     dm_block_t *root)
303 {
304 	int r = 0;
305 
306 	if (dm_block_location(block) != b) {
307 		/*
308 		 * dm_tm_shadow_block will have already decremented the old
309 		 * block, but it is still referenced by the btree.  We
310 		 * increment to stop the insert decrementing it below zero
311 		 * when overwriting the old value.
312 		 */
313 		dm_tm_inc(info->btree_info.tm, b);
314 		r = insert_ablock(info, index, block, root);
315 	}
316 
317 	return r;
318 }
319 
320 /*
321  * Looks up an array block in the btree.  Then shadows it, and updates the
322  * btree to point to this new shadow.  'root' is an input/output parameter
323  * for both the current root block, and the new one.
324  */
325 static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
326 			 unsigned int index, struct dm_block **block,
327 			 struct array_block **ab)
328 {
329 	int r;
330 	uint64_t key = index;
331 	dm_block_t b;
332 	__le64 block_le;
333 
334 	r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
335 	if (r)
336 		return r;
337 	b = le64_to_cpu(block_le);
338 
339 	r = __shadow_ablock(info, b, block, ab);
340 	if (r)
341 		return r;
342 
343 	return __reinsert_ablock(info, index, *block, b, root);
344 }
345 
346 /*
347  * Allocate an new array block, and fill it with some values.
348  */
349 static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block,
350 			     uint32_t max_entries,
351 			     unsigned int block_index, uint32_t nr,
352 			     const void *value, dm_block_t *root)
353 {
354 	int r;
355 	struct dm_block *block;
356 	struct array_block *ab;
357 
358 	r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
359 	if (r)
360 		return r;
361 
362 	fill_ablock(info, ab, value, nr);
363 	r = insert_ablock(info, block_index, block, root);
364 	unlock_ablock(info, block);
365 
366 	return r;
367 }
368 
369 static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block,
370 			       unsigned int begin_block, unsigned int end_block,
371 			       unsigned int max_entries, const void *value,
372 			       dm_block_t *root)
373 {
374 	int r = 0;
375 
376 	for (; !r && begin_block != end_block; begin_block++)
377 		r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root);
378 
379 	return r;
380 }
381 
382 /*
383  * There are a bunch of functions involved with resizing an array.  This
384  * structure holds information that commonly needed by them.  Purely here
385  * to reduce parameter count.
386  */
387 struct resize {
388 	/*
389 	 * Describes the array.
390 	 */
391 	struct dm_array_info *info;
392 
393 	/*
394 	 * The current root of the array.  This gets updated.
395 	 */
396 	dm_block_t root;
397 
398 	/*
399 	 * Metadata block size.  Used to calculate the nr entries in an
400 	 * array block.
401 	 */
402 	size_t size_of_block;
403 
404 	/*
405 	 * Maximum nr entries in an array block.
406 	 */
407 	unsigned int max_entries;
408 
409 	/*
410 	 * nr of completely full blocks in the array.
411 	 *
412 	 * 'old' refers to before the resize, 'new' after.
413 	 */
414 	unsigned int old_nr_full_blocks, new_nr_full_blocks;
415 
416 	/*
417 	 * Number of entries in the final block.  0 iff only full blocks in
418 	 * the array.
419 	 */
420 	unsigned int old_nr_entries_in_last_block, new_nr_entries_in_last_block;
421 
422 	/*
423 	 * The default value used when growing the array.
424 	 */
425 	const void *value;
426 };
427 
428 /*
429  * Removes a consecutive set of array blocks from the btree.  The values
430  * in block are decremented as a side effect of the btree remove.
431  *
432  * begin_index - the index of the first array block to remove.
433  * end_index - the one-past-the-end value.  ie. this block is not removed.
434  */
435 static int drop_blocks(struct resize *resize, unsigned int begin_index,
436 		       unsigned int end_index)
437 {
438 	int r;
439 
440 	while (begin_index != end_index) {
441 		uint64_t key = begin_index++;
442 
443 		r = dm_btree_remove(&resize->info->btree_info, resize->root,
444 				    &key, &resize->root);
445 		if (r)
446 			return r;
447 	}
448 
449 	return 0;
450 }
451 
452 /*
453  * Calculates how many blocks are needed for the array.
454  */
455 static unsigned int total_nr_blocks_needed(unsigned int nr_full_blocks,
456 				       unsigned int nr_entries_in_last_block)
457 {
458 	return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0);
459 }
460 
461 /*
462  * Shrink an array.
463  */
464 static int shrink(struct resize *resize)
465 {
466 	int r;
467 	unsigned int begin, end;
468 	struct dm_block *block;
469 	struct array_block *ab;
470 
471 	/*
472 	 * Lose some blocks from the back?
473 	 */
474 	if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) {
475 		begin = total_nr_blocks_needed(resize->new_nr_full_blocks,
476 					       resize->new_nr_entries_in_last_block);
477 		end = total_nr_blocks_needed(resize->old_nr_full_blocks,
478 					     resize->old_nr_entries_in_last_block);
479 
480 		r = drop_blocks(resize, begin, end);
481 		if (r)
482 			return r;
483 	}
484 
485 	/*
486 	 * Trim the new tail block
487 	 */
488 	if (resize->new_nr_entries_in_last_block) {
489 		r = shadow_ablock(resize->info, &resize->root,
490 				  resize->new_nr_full_blocks, &block, &ab);
491 		if (r)
492 			return r;
493 
494 		trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block);
495 		unlock_ablock(resize->info, block);
496 	}
497 
498 	return 0;
499 }
500 
501 /*
502  * Grow an array.
503  */
504 static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries)
505 {
506 	int r;
507 	struct dm_block *block;
508 	struct array_block *ab;
509 
510 	r = shadow_ablock(resize->info, &resize->root,
511 			  resize->old_nr_full_blocks, &block, &ab);
512 	if (r)
513 		return r;
514 
515 	fill_ablock(resize->info, ab, resize->value, new_nr_entries);
516 	unlock_ablock(resize->info, block);
517 
518 	return r;
519 }
520 
521 static int grow_add_tail_block(struct resize *resize)
522 {
523 	return insert_new_ablock(resize->info, resize->size_of_block,
524 				 resize->max_entries,
525 				 resize->new_nr_full_blocks,
526 				 resize->new_nr_entries_in_last_block,
527 				 resize->value, &resize->root);
528 }
529 
530 static int grow_needs_more_blocks(struct resize *resize)
531 {
532 	int r;
533 	unsigned int old_nr_blocks = resize->old_nr_full_blocks;
534 
535 	if (resize->old_nr_entries_in_last_block > 0) {
536 		old_nr_blocks++;
537 
538 		r = grow_extend_tail_block(resize, resize->max_entries);
539 		if (r)
540 			return r;
541 	}
542 
543 	r = insert_full_ablocks(resize->info, resize->size_of_block,
544 				old_nr_blocks,
545 				resize->new_nr_full_blocks,
546 				resize->max_entries, resize->value,
547 				&resize->root);
548 	if (r)
549 		return r;
550 
551 	if (resize->new_nr_entries_in_last_block)
552 		r = grow_add_tail_block(resize);
553 
554 	return r;
555 }
556 
557 static int grow(struct resize *resize)
558 {
559 	if (resize->new_nr_full_blocks > resize->old_nr_full_blocks)
560 		return grow_needs_more_blocks(resize);
561 
562 	else if (resize->old_nr_entries_in_last_block)
563 		return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block);
564 
565 	else
566 		return grow_add_tail_block(resize);
567 }
568 
569 /*----------------------------------------------------------------*/
570 
571 /*
572  * These are the value_type functions for the btree elements, which point
573  * to array blocks.
574  */
575 static void block_inc(void *context, const void *value, unsigned int count)
576 {
577 	const __le64 *block_le = value;
578 	struct dm_array_info *info = context;
579 	unsigned int i;
580 
581 	for (i = 0; i < count; i++, block_le++)
582 		dm_tm_inc(info->btree_info.tm, le64_to_cpu(*block_le));
583 }
584 
585 static void __block_dec(void *context, const void *value)
586 {
587 	int r;
588 	uint64_t b;
589 	__le64 block_le;
590 	uint32_t ref_count;
591 	struct dm_block *block;
592 	struct array_block *ab;
593 	struct dm_array_info *info = context;
594 
595 	memcpy(&block_le, value, sizeof(block_le));
596 	b = le64_to_cpu(block_le);
597 
598 	r = dm_tm_ref(info->btree_info.tm, b, &ref_count);
599 	if (r) {
600 		DMERR_LIMIT("couldn't get reference count for block %llu",
601 			    (unsigned long long) b);
602 		return;
603 	}
604 
605 	if (ref_count == 1) {
606 		/*
607 		 * We're about to drop the last reference to this ablock.
608 		 * So we need to decrement the ref count of the contents.
609 		 */
610 		r = get_ablock(info, b, &block, &ab);
611 		if (r) {
612 			DMERR_LIMIT("couldn't get array block %llu",
613 				    (unsigned long long) b);
614 			return;
615 		}
616 
617 		dec_ablock_entries(info, ab);
618 		unlock_ablock(info, block);
619 	}
620 
621 	dm_tm_dec(info->btree_info.tm, b);
622 }
623 
624 static void block_dec(void *context, const void *value, unsigned int count)
625 {
626 	unsigned int i;
627 
628 	for (i = 0; i < count; i++, value += sizeof(__le64))
629 		__block_dec(context, value);
630 }
631 
632 static int block_equal(void *context, const void *value1, const void *value2)
633 {
634 	return !memcmp(value1, value2, sizeof(__le64));
635 }
636 
637 /*----------------------------------------------------------------*/
638 
639 void dm_array_info_init(struct dm_array_info *info,
640 			struct dm_transaction_manager *tm,
641 			struct dm_btree_value_type *vt)
642 {
643 	struct dm_btree_value_type *bvt = &info->btree_info.value_type;
644 
645 	memcpy(&info->value_type, vt, sizeof(info->value_type));
646 	info->btree_info.tm = tm;
647 	info->btree_info.levels = 1;
648 
649 	bvt->context = info;
650 	bvt->size = sizeof(__le64);
651 	bvt->inc = block_inc;
652 	bvt->dec = block_dec;
653 	bvt->equal = block_equal;
654 }
655 EXPORT_SYMBOL_GPL(dm_array_info_init);
656 
657 int dm_array_empty(struct dm_array_info *info, dm_block_t *root)
658 {
659 	return dm_btree_empty(&info->btree_info, root);
660 }
661 EXPORT_SYMBOL_GPL(dm_array_empty);
662 
663 static int array_resize(struct dm_array_info *info, dm_block_t root,
664 			uint32_t old_size, uint32_t new_size,
665 			const void *value, dm_block_t *new_root)
666 {
667 	int r;
668 	struct resize resize;
669 
670 	if (old_size == new_size) {
671 		*new_root = root;
672 		return 0;
673 	}
674 
675 	resize.info = info;
676 	resize.root = root;
677 	resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
678 	resize.max_entries = calc_max_entries(info->value_type.size,
679 					      resize.size_of_block);
680 
681 	resize.old_nr_full_blocks = old_size / resize.max_entries;
682 	resize.old_nr_entries_in_last_block = old_size % resize.max_entries;
683 	resize.new_nr_full_blocks = new_size / resize.max_entries;
684 	resize.new_nr_entries_in_last_block = new_size % resize.max_entries;
685 	resize.value = value;
686 
687 	r = ((new_size > old_size) ? grow : shrink)(&resize);
688 	if (r)
689 		return r;
690 
691 	*new_root = resize.root;
692 	return 0;
693 }
694 
695 int dm_array_resize(struct dm_array_info *info, dm_block_t root,
696 		    uint32_t old_size, uint32_t new_size,
697 		    const void *value, dm_block_t *new_root)
698 	__dm_written_to_disk(value)
699 {
700 	int r = array_resize(info, root, old_size, new_size, value, new_root);
701 
702 	__dm_unbless_for_disk(value);
703 	return r;
704 }
705 EXPORT_SYMBOL_GPL(dm_array_resize);
706 
707 static int populate_ablock_with_values(struct dm_array_info *info, struct array_block *ab,
708 				       value_fn fn, void *context,
709 				       unsigned int base, unsigned int new_nr)
710 {
711 	int r;
712 	unsigned int i;
713 	struct dm_btree_value_type *vt = &info->value_type;
714 
715 	BUG_ON(le32_to_cpu(ab->nr_entries));
716 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
717 
718 	for (i = 0; i < new_nr; i++) {
719 		r = fn(base + i, element_at(info, ab, i), context);
720 		if (r)
721 			return r;
722 
723 		if (vt->inc)
724 			vt->inc(vt->context, element_at(info, ab, i), 1);
725 	}
726 
727 	ab->nr_entries = cpu_to_le32(new_nr);
728 	return 0;
729 }
730 
731 int dm_array_new(struct dm_array_info *info, dm_block_t *root,
732 		 uint32_t size, value_fn fn, void *context)
733 {
734 	int r;
735 	struct dm_block *block;
736 	struct array_block *ab;
737 	unsigned int block_index, end_block, size_of_block, max_entries;
738 
739 	r = dm_array_empty(info, root);
740 	if (r)
741 		return r;
742 
743 	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
744 	max_entries = calc_max_entries(info->value_type.size, size_of_block);
745 	end_block = dm_div_up(size, max_entries);
746 
747 	for (block_index = 0; block_index != end_block; block_index++) {
748 		r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
749 		if (r)
750 			break;
751 
752 		r = populate_ablock_with_values(info, ab, fn, context,
753 						block_index * max_entries,
754 						min(max_entries, size));
755 		if (r) {
756 			unlock_ablock(info, block);
757 			break;
758 		}
759 
760 		r = insert_ablock(info, block_index, block, root);
761 		unlock_ablock(info, block);
762 		if (r)
763 			break;
764 
765 		size -= max_entries;
766 	}
767 
768 	return r;
769 }
770 EXPORT_SYMBOL_GPL(dm_array_new);
771 
772 int dm_array_del(struct dm_array_info *info, dm_block_t root)
773 {
774 	return dm_btree_del(&info->btree_info, root);
775 }
776 EXPORT_SYMBOL_GPL(dm_array_del);
777 
778 int dm_array_get_value(struct dm_array_info *info, dm_block_t root,
779 		       uint32_t index, void *value_le)
780 {
781 	int r;
782 	struct dm_block *block;
783 	struct array_block *ab;
784 	size_t size_of_block;
785 	unsigned int entry, max_entries;
786 
787 	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
788 	max_entries = calc_max_entries(info->value_type.size, size_of_block);
789 
790 	r = lookup_ablock(info, root, index / max_entries, &block, &ab);
791 	if (r)
792 		return r;
793 
794 	entry = index % max_entries;
795 	if (entry >= le32_to_cpu(ab->nr_entries))
796 		r = -ENODATA;
797 	else
798 		memcpy(value_le, element_at(info, ab, entry),
799 		       info->value_type.size);
800 
801 	unlock_ablock(info, block);
802 	return r;
803 }
804 EXPORT_SYMBOL_GPL(dm_array_get_value);
805 
806 static int array_set_value(struct dm_array_info *info, dm_block_t root,
807 			   uint32_t index, const void *value, dm_block_t *new_root)
808 {
809 	int r;
810 	struct dm_block *block;
811 	struct array_block *ab;
812 	size_t size_of_block;
813 	unsigned int max_entries;
814 	unsigned int entry;
815 	void *old_value;
816 	struct dm_btree_value_type *vt = &info->value_type;
817 
818 	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
819 	max_entries = calc_max_entries(info->value_type.size, size_of_block);
820 
821 	r = shadow_ablock(info, &root, index / max_entries, &block, &ab);
822 	if (r)
823 		return r;
824 	*new_root = root;
825 
826 	entry = index % max_entries;
827 	if (entry >= le32_to_cpu(ab->nr_entries)) {
828 		r = -ENODATA;
829 		goto out;
830 	}
831 
832 	old_value = element_at(info, ab, entry);
833 	if (vt->dec &&
834 	    (!vt->equal || !vt->equal(vt->context, old_value, value))) {
835 		vt->dec(vt->context, old_value, 1);
836 		if (vt->inc)
837 			vt->inc(vt->context, value, 1);
838 	}
839 
840 	memcpy(old_value, value, info->value_type.size);
841 
842 out:
843 	unlock_ablock(info, block);
844 	return r;
845 }
846 
847 int dm_array_set_value(struct dm_array_info *info, dm_block_t root,
848 		 uint32_t index, const void *value, dm_block_t *new_root)
849 	__dm_written_to_disk(value)
850 {
851 	int r;
852 
853 	r = array_set_value(info, root, index, value, new_root);
854 	__dm_unbless_for_disk(value);
855 	return r;
856 }
857 EXPORT_SYMBOL_GPL(dm_array_set_value);
858 
859 struct walk_info {
860 	struct dm_array_info *info;
861 	int (*fn)(void *context, uint64_t key, void *leaf);
862 	void *context;
863 };
864 
865 static int walk_ablock(void *context, uint64_t *keys, void *leaf)
866 {
867 	struct walk_info *wi = context;
868 
869 	int r;
870 	unsigned int i;
871 	__le64 block_le;
872 	unsigned int nr_entries, max_entries;
873 	struct dm_block *block;
874 	struct array_block *ab;
875 
876 	memcpy(&block_le, leaf, sizeof(block_le));
877 	r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab);
878 	if (r)
879 		return r;
880 
881 	max_entries = le32_to_cpu(ab->max_entries);
882 	nr_entries = le32_to_cpu(ab->nr_entries);
883 	for (i = 0; i < nr_entries; i++) {
884 		r = wi->fn(wi->context, keys[0] * max_entries + i,
885 			   element_at(wi->info, ab, i));
886 
887 		if (r)
888 			break;
889 	}
890 
891 	unlock_ablock(wi->info, block);
892 	return r;
893 }
894 
895 int dm_array_walk(struct dm_array_info *info, dm_block_t root,
896 		  int (*fn)(void *, uint64_t key, void *leaf),
897 		  void *context)
898 {
899 	struct walk_info wi;
900 
901 	wi.info = info;
902 	wi.fn = fn;
903 	wi.context = context;
904 
905 	return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi);
906 }
907 EXPORT_SYMBOL_GPL(dm_array_walk);
908 
909 /*----------------------------------------------------------------*/
910 
911 static int load_ablock(struct dm_array_cursor *c)
912 {
913 	int r;
914 	__le64 value_le;
915 	uint64_t key;
916 
917 	if (c->block)
918 		unlock_ablock(c->info, c->block);
919 
920 	c->block = NULL;
921 	c->ab = NULL;
922 	c->index = 0;
923 
924 	r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le);
925 	if (r) {
926 		DMERR("dm_btree_cursor_get_value failed");
927 		dm_btree_cursor_end(&c->cursor);
928 
929 	} else {
930 		r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab);
931 		if (r) {
932 			DMERR("get_ablock failed");
933 			dm_btree_cursor_end(&c->cursor);
934 		}
935 	}
936 
937 	return r;
938 }
939 
940 int dm_array_cursor_begin(struct dm_array_info *info, dm_block_t root,
941 			  struct dm_array_cursor *c)
942 {
943 	int r;
944 
945 	memset(c, 0, sizeof(*c));
946 	c->info = info;
947 	r = dm_btree_cursor_begin(&info->btree_info, root, true, &c->cursor);
948 	if (r) {
949 		DMERR("couldn't create btree cursor");
950 		return r;
951 	}
952 
953 	return load_ablock(c);
954 }
955 EXPORT_SYMBOL_GPL(dm_array_cursor_begin);
956 
957 void dm_array_cursor_end(struct dm_array_cursor *c)
958 {
959 	if (c->block) {
960 		unlock_ablock(c->info, c->block);
961 		dm_btree_cursor_end(&c->cursor);
962 	}
963 }
964 EXPORT_SYMBOL_GPL(dm_array_cursor_end);
965 
966 int dm_array_cursor_next(struct dm_array_cursor *c)
967 {
968 	int r;
969 
970 	if (!c->block)
971 		return -ENODATA;
972 
973 	c->index++;
974 
975 	if (c->index >= le32_to_cpu(c->ab->nr_entries)) {
976 		r = dm_btree_cursor_next(&c->cursor);
977 		if (r)
978 			return r;
979 
980 		r = load_ablock(c);
981 		if (r)
982 			return r;
983 	}
984 
985 	return 0;
986 }
987 EXPORT_SYMBOL_GPL(dm_array_cursor_next);
988 
989 int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count)
990 {
991 	int r;
992 
993 	do {
994 		uint32_t remaining = le32_to_cpu(c->ab->nr_entries) - c->index;
995 
996 		if (count < remaining) {
997 			c->index += count;
998 			return 0;
999 		}
1000 
1001 		count -= remaining;
1002 		r = dm_array_cursor_next(c);
1003 
1004 	} while (!r);
1005 
1006 	return r;
1007 }
1008 EXPORT_SYMBOL_GPL(dm_array_cursor_skip);
1009 
1010 void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le)
1011 {
1012 	*value_le = element_at(c->info, c->ab, c->index);
1013 }
1014 EXPORT_SYMBOL_GPL(dm_array_cursor_get_value);
1015 
1016 /*----------------------------------------------------------------*/
1017