xref: /linux/drivers/md/dm-cache-metadata.c (revision c4ee0af3fa0dc65f690fc908f02b8355f9576ea0)
1 /*
2  * Copyright (C) 2012 Red Hat, Inc.
3  *
4  * This file is released under the GPL.
5  */
6 
7 #include "dm-cache-metadata.h"
8 
9 #include "persistent-data/dm-array.h"
10 #include "persistent-data/dm-bitset.h"
11 #include "persistent-data/dm-space-map.h"
12 #include "persistent-data/dm-space-map-disk.h"
13 #include "persistent-data/dm-transaction-manager.h"
14 
15 #include <linux/device-mapper.h>
16 
17 /*----------------------------------------------------------------*/
18 
19 #define DM_MSG_PREFIX   "cache metadata"
20 
21 #define CACHE_SUPERBLOCK_MAGIC 06142003
22 #define CACHE_SUPERBLOCK_LOCATION 0
23 
24 /*
25  * defines a range of metadata versions that this module can handle.
26  */
27 #define MIN_CACHE_VERSION 1
28 #define MAX_CACHE_VERSION 1
29 
30 #define CACHE_METADATA_CACHE_SIZE 64
31 
32 /*
33  *  3 for btree insert +
34  *  2 for btree lookup used within space map
35  */
36 #define CACHE_MAX_CONCURRENT_LOCKS 5
37 #define SPACE_MAP_ROOT_SIZE 128
38 
39 enum superblock_flag_bits {
40 	/* for spotting crashes that would invalidate the dirty bitset */
41 	CLEAN_SHUTDOWN,
42 };
43 
44 /*
45  * Each mapping from cache block -> origin block carries a set of flags.
46  */
47 enum mapping_bits {
48 	/*
49 	 * A valid mapping.  Because we're using an array we clear this
50 	 * flag for an non existant mapping.
51 	 */
52 	M_VALID = 1,
53 
54 	/*
55 	 * The data on the cache is different from that on the origin.
56 	 */
57 	M_DIRTY = 2
58 };
59 
60 struct cache_disk_superblock {
61 	__le32 csum;
62 	__le32 flags;
63 	__le64 blocknr;
64 
65 	__u8 uuid[16];
66 	__le64 magic;
67 	__le32 version;
68 
69 	__u8 policy_name[CACHE_POLICY_NAME_SIZE];
70 	__le32 policy_hint_size;
71 
72 	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
73 	__le64 mapping_root;
74 	__le64 hint_root;
75 
76 	__le64 discard_root;
77 	__le64 discard_block_size;
78 	__le64 discard_nr_blocks;
79 
80 	__le32 data_block_size;
81 	__le32 metadata_block_size;
82 	__le32 cache_blocks;
83 
84 	__le32 compat_flags;
85 	__le32 compat_ro_flags;
86 	__le32 incompat_flags;
87 
88 	__le32 read_hits;
89 	__le32 read_misses;
90 	__le32 write_hits;
91 	__le32 write_misses;
92 
93 	__le32 policy_version[CACHE_POLICY_VERSION_SIZE];
94 } __packed;
95 
96 struct dm_cache_metadata {
97 	struct block_device *bdev;
98 	struct dm_block_manager *bm;
99 	struct dm_space_map *metadata_sm;
100 	struct dm_transaction_manager *tm;
101 
102 	struct dm_array_info info;
103 	struct dm_array_info hint_info;
104 	struct dm_disk_bitset discard_info;
105 
106 	struct rw_semaphore root_lock;
107 	dm_block_t root;
108 	dm_block_t hint_root;
109 	dm_block_t discard_root;
110 
111 	sector_t discard_block_size;
112 	dm_dblock_t discard_nr_blocks;
113 
114 	sector_t data_block_size;
115 	dm_cblock_t cache_blocks;
116 	bool changed:1;
117 	bool clean_when_opened:1;
118 
119 	char policy_name[CACHE_POLICY_NAME_SIZE];
120 	unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
121 	size_t policy_hint_size;
122 	struct dm_cache_statistics stats;
123 };
124 
125 /*-------------------------------------------------------------------
126  * superblock validator
127  *-----------------------------------------------------------------*/
128 
129 #define SUPERBLOCK_CSUM_XOR 9031977
130 
131 static void sb_prepare_for_write(struct dm_block_validator *v,
132 				 struct dm_block *b,
133 				 size_t sb_block_size)
134 {
135 	struct cache_disk_superblock *disk_super = dm_block_data(b);
136 
137 	disk_super->blocknr = cpu_to_le64(dm_block_location(b));
138 	disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
139 						      sb_block_size - sizeof(__le32),
140 						      SUPERBLOCK_CSUM_XOR));
141 }
142 
143 static int check_metadata_version(struct cache_disk_superblock *disk_super)
144 {
145 	uint32_t metadata_version = le32_to_cpu(disk_super->version);
146 	if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
147 		DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
148 		      metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
149 		return -EINVAL;
150 	}
151 
152 	return 0;
153 }
154 
155 static int sb_check(struct dm_block_validator *v,
156 		    struct dm_block *b,
157 		    size_t sb_block_size)
158 {
159 	struct cache_disk_superblock *disk_super = dm_block_data(b);
160 	__le32 csum_le;
161 
162 	if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
163 		DMERR("sb_check failed: blocknr %llu: wanted %llu",
164 		      le64_to_cpu(disk_super->blocknr),
165 		      (unsigned long long)dm_block_location(b));
166 		return -ENOTBLK;
167 	}
168 
169 	if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
170 		DMERR("sb_check failed: magic %llu: wanted %llu",
171 		      le64_to_cpu(disk_super->magic),
172 		      (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
173 		return -EILSEQ;
174 	}
175 
176 	csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
177 					     sb_block_size - sizeof(__le32),
178 					     SUPERBLOCK_CSUM_XOR));
179 	if (csum_le != disk_super->csum) {
180 		DMERR("sb_check failed: csum %u: wanted %u",
181 		      le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
182 		return -EILSEQ;
183 	}
184 
185 	return check_metadata_version(disk_super);
186 }
187 
188 static struct dm_block_validator sb_validator = {
189 	.name = "superblock",
190 	.prepare_for_write = sb_prepare_for_write,
191 	.check = sb_check
192 };
193 
194 /*----------------------------------------------------------------*/
195 
196 static int superblock_read_lock(struct dm_cache_metadata *cmd,
197 				struct dm_block **sblock)
198 {
199 	return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
200 			       &sb_validator, sblock);
201 }
202 
203 static int superblock_lock_zero(struct dm_cache_metadata *cmd,
204 				struct dm_block **sblock)
205 {
206 	return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
207 				     &sb_validator, sblock);
208 }
209 
210 static int superblock_lock(struct dm_cache_metadata *cmd,
211 			   struct dm_block **sblock)
212 {
213 	return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
214 				&sb_validator, sblock);
215 }
216 
217 /*----------------------------------------------------------------*/
218 
219 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result)
220 {
221 	int r;
222 	unsigned i;
223 	struct dm_block *b;
224 	__le64 *data_le, zero = cpu_to_le64(0);
225 	unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
226 
227 	/*
228 	 * We can't use a validator here - it may be all zeroes.
229 	 */
230 	r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
231 	if (r)
232 		return r;
233 
234 	data_le = dm_block_data(b);
235 	*result = true;
236 	for (i = 0; i < sb_block_size; i++) {
237 		if (data_le[i] != zero) {
238 			*result = false;
239 			break;
240 		}
241 	}
242 
243 	return dm_bm_unlock(b);
244 }
245 
246 static void __setup_mapping_info(struct dm_cache_metadata *cmd)
247 {
248 	struct dm_btree_value_type vt;
249 
250 	vt.context = NULL;
251 	vt.size = sizeof(__le64);
252 	vt.inc = NULL;
253 	vt.dec = NULL;
254 	vt.equal = NULL;
255 	dm_array_info_init(&cmd->info, cmd->tm, &vt);
256 
257 	if (cmd->policy_hint_size) {
258 		vt.size = sizeof(__le32);
259 		dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
260 	}
261 }
262 
263 static int __write_initial_superblock(struct dm_cache_metadata *cmd)
264 {
265 	int r;
266 	struct dm_block *sblock;
267 	size_t metadata_len;
268 	struct cache_disk_superblock *disk_super;
269 	sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT;
270 
271 	/* FIXME: see if we can lose the max sectors limit */
272 	if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
273 		bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
274 
275 	r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
276 	if (r < 0)
277 		return r;
278 
279 	r = dm_tm_pre_commit(cmd->tm);
280 	if (r < 0)
281 		return r;
282 
283 	r = superblock_lock_zero(cmd, &sblock);
284 	if (r)
285 		return r;
286 
287 	disk_super = dm_block_data(sblock);
288 	disk_super->flags = 0;
289 	memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
290 	disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
291 	disk_super->version = cpu_to_le32(MAX_CACHE_VERSION);
292 	memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
293 	memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
294 	disk_super->policy_hint_size = 0;
295 
296 	r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
297 			    metadata_len);
298 	if (r < 0)
299 		goto bad_locked;
300 
301 	disk_super->mapping_root = cpu_to_le64(cmd->root);
302 	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
303 	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
304 	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
305 	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
306 	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
307 	disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
308 	disk_super->cache_blocks = cpu_to_le32(0);
309 
310 	disk_super->read_hits = cpu_to_le32(0);
311 	disk_super->read_misses = cpu_to_le32(0);
312 	disk_super->write_hits = cpu_to_le32(0);
313 	disk_super->write_misses = cpu_to_le32(0);
314 
315 	return dm_tm_commit(cmd->tm, sblock);
316 
317 bad_locked:
318 	dm_bm_unlock(sblock);
319 	return r;
320 }
321 
322 static int __format_metadata(struct dm_cache_metadata *cmd)
323 {
324 	int r;
325 
326 	r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
327 				 &cmd->tm, &cmd->metadata_sm);
328 	if (r < 0) {
329 		DMERR("tm_create_with_sm failed");
330 		return r;
331 	}
332 
333 	__setup_mapping_info(cmd);
334 
335 	r = dm_array_empty(&cmd->info, &cmd->root);
336 	if (r < 0)
337 		goto bad;
338 
339 	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
340 
341 	r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
342 	if (r < 0)
343 		goto bad;
344 
345 	cmd->discard_block_size = 0;
346 	cmd->discard_nr_blocks = 0;
347 
348 	r = __write_initial_superblock(cmd);
349 	if (r)
350 		goto bad;
351 
352 	cmd->clean_when_opened = true;
353 	return 0;
354 
355 bad:
356 	dm_tm_destroy(cmd->tm);
357 	dm_sm_destroy(cmd->metadata_sm);
358 
359 	return r;
360 }
361 
362 static int __check_incompat_features(struct cache_disk_superblock *disk_super,
363 				     struct dm_cache_metadata *cmd)
364 {
365 	uint32_t features;
366 
367 	features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
368 	if (features) {
369 		DMERR("could not access metadata due to unsupported optional features (%lx).",
370 		      (unsigned long)features);
371 		return -EINVAL;
372 	}
373 
374 	/*
375 	 * Check for read-only metadata to skip the following RDWR checks.
376 	 */
377 	if (get_disk_ro(cmd->bdev->bd_disk))
378 		return 0;
379 
380 	features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
381 	if (features) {
382 		DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
383 		      (unsigned long)features);
384 		return -EINVAL;
385 	}
386 
387 	return 0;
388 }
389 
390 static int __open_metadata(struct dm_cache_metadata *cmd)
391 {
392 	int r;
393 	struct dm_block *sblock;
394 	struct cache_disk_superblock *disk_super;
395 	unsigned long sb_flags;
396 
397 	r = superblock_read_lock(cmd, &sblock);
398 	if (r < 0) {
399 		DMERR("couldn't read lock superblock");
400 		return r;
401 	}
402 
403 	disk_super = dm_block_data(sblock);
404 
405 	r = __check_incompat_features(disk_super, cmd);
406 	if (r < 0)
407 		goto bad;
408 
409 	r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
410 			       disk_super->metadata_space_map_root,
411 			       sizeof(disk_super->metadata_space_map_root),
412 			       &cmd->tm, &cmd->metadata_sm);
413 	if (r < 0) {
414 		DMERR("tm_open_with_sm failed");
415 		goto bad;
416 	}
417 
418 	__setup_mapping_info(cmd);
419 	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
420 	sb_flags = le32_to_cpu(disk_super->flags);
421 	cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
422 	return dm_bm_unlock(sblock);
423 
424 bad:
425 	dm_bm_unlock(sblock);
426 	return r;
427 }
428 
429 static int __open_or_format_metadata(struct dm_cache_metadata *cmd,
430 				     bool format_device)
431 {
432 	int r;
433 	bool unformatted = false;
434 
435 	r = __superblock_all_zeroes(cmd->bm, &unformatted);
436 	if (r)
437 		return r;
438 
439 	if (unformatted)
440 		return format_device ? __format_metadata(cmd) : -EPERM;
441 
442 	return __open_metadata(cmd);
443 }
444 
445 static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
446 					    bool may_format_device)
447 {
448 	int r;
449 	cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE,
450 					  CACHE_METADATA_CACHE_SIZE,
451 					  CACHE_MAX_CONCURRENT_LOCKS);
452 	if (IS_ERR(cmd->bm)) {
453 		DMERR("could not create block manager");
454 		return PTR_ERR(cmd->bm);
455 	}
456 
457 	r = __open_or_format_metadata(cmd, may_format_device);
458 	if (r)
459 		dm_block_manager_destroy(cmd->bm);
460 
461 	return r;
462 }
463 
464 static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
465 {
466 	dm_sm_destroy(cmd->metadata_sm);
467 	dm_tm_destroy(cmd->tm);
468 	dm_block_manager_destroy(cmd->bm);
469 }
470 
471 typedef unsigned long (*flags_mutator)(unsigned long);
472 
473 static void update_flags(struct cache_disk_superblock *disk_super,
474 			 flags_mutator mutator)
475 {
476 	uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags));
477 	disk_super->flags = cpu_to_le32(sb_flags);
478 }
479 
480 static unsigned long set_clean_shutdown(unsigned long flags)
481 {
482 	set_bit(CLEAN_SHUTDOWN, &flags);
483 	return flags;
484 }
485 
486 static unsigned long clear_clean_shutdown(unsigned long flags)
487 {
488 	clear_bit(CLEAN_SHUTDOWN, &flags);
489 	return flags;
490 }
491 
492 static void read_superblock_fields(struct dm_cache_metadata *cmd,
493 				   struct cache_disk_superblock *disk_super)
494 {
495 	cmd->root = le64_to_cpu(disk_super->mapping_root);
496 	cmd->hint_root = le64_to_cpu(disk_super->hint_root);
497 	cmd->discard_root = le64_to_cpu(disk_super->discard_root);
498 	cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
499 	cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
500 	cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
501 	cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
502 	strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
503 	cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
504 	cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
505 	cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
506 	cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
507 
508 	cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
509 	cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses);
510 	cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
511 	cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
512 
513 	cmd->changed = false;
514 }
515 
516 /*
517  * The mutator updates the superblock flags.
518  */
519 static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
520 				     flags_mutator mutator)
521 {
522 	int r;
523 	struct cache_disk_superblock *disk_super;
524 	struct dm_block *sblock;
525 
526 	r = superblock_lock(cmd, &sblock);
527 	if (r)
528 		return r;
529 
530 	disk_super = dm_block_data(sblock);
531 	update_flags(disk_super, mutator);
532 	read_superblock_fields(cmd, disk_super);
533 
534 	return dm_bm_flush_and_unlock(cmd->bm, sblock);
535 }
536 
537 static int __begin_transaction(struct dm_cache_metadata *cmd)
538 {
539 	int r;
540 	struct cache_disk_superblock *disk_super;
541 	struct dm_block *sblock;
542 
543 	/*
544 	 * We re-read the superblock every time.  Shouldn't need to do this
545 	 * really.
546 	 */
547 	r = superblock_read_lock(cmd, &sblock);
548 	if (r)
549 		return r;
550 
551 	disk_super = dm_block_data(sblock);
552 	read_superblock_fields(cmd, disk_super);
553 	dm_bm_unlock(sblock);
554 
555 	return 0;
556 }
557 
558 static int __commit_transaction(struct dm_cache_metadata *cmd,
559 				flags_mutator mutator)
560 {
561 	int r;
562 	size_t metadata_len;
563 	struct cache_disk_superblock *disk_super;
564 	struct dm_block *sblock;
565 
566 	/*
567 	 * We need to know if the cache_disk_superblock exceeds a 512-byte sector.
568 	 */
569 	BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
570 
571 	r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
572 			    &cmd->discard_root);
573 	if (r)
574 		return r;
575 
576 	r = dm_tm_pre_commit(cmd->tm);
577 	if (r < 0)
578 		return r;
579 
580 	r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
581 	if (r < 0)
582 		return r;
583 
584 	r = superblock_lock(cmd, &sblock);
585 	if (r)
586 		return r;
587 
588 	disk_super = dm_block_data(sblock);
589 
590 	if (mutator)
591 		update_flags(disk_super, mutator);
592 
593 	disk_super->mapping_root = cpu_to_le64(cmd->root);
594 	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
595 	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
596 	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
597 	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
598 	disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
599 	strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
600 	disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
601 	disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
602 	disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
603 
604 	disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
605 	disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
606 	disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
607 	disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
608 
609 	r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
610 			    metadata_len);
611 	if (r < 0) {
612 		dm_bm_unlock(sblock);
613 		return r;
614 	}
615 
616 	return dm_tm_commit(cmd->tm, sblock);
617 }
618 
619 /*----------------------------------------------------------------*/
620 
621 /*
622  * The mappings are held in a dm-array that has 64-bit values stored in
623  * little-endian format.  The index is the cblock, the high 48bits of the
624  * value are the oblock and the low 16 bit the flags.
625  */
626 #define FLAGS_MASK ((1 << 16) - 1)
627 
628 static __le64 pack_value(dm_oblock_t block, unsigned flags)
629 {
630 	uint64_t value = from_oblock(block);
631 	value <<= 16;
632 	value = value | (flags & FLAGS_MASK);
633 	return cpu_to_le64(value);
634 }
635 
636 static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
637 {
638 	uint64_t value = le64_to_cpu(value_le);
639 	uint64_t b = value >> 16;
640 	*block = to_oblock(b);
641 	*flags = value & FLAGS_MASK;
642 }
643 
644 /*----------------------------------------------------------------*/
645 
646 struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
647 						 sector_t data_block_size,
648 						 bool may_format_device,
649 						 size_t policy_hint_size)
650 {
651 	int r;
652 	struct dm_cache_metadata *cmd;
653 
654 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
655 	if (!cmd) {
656 		DMERR("could not allocate metadata struct");
657 		return NULL;
658 	}
659 
660 	init_rwsem(&cmd->root_lock);
661 	cmd->bdev = bdev;
662 	cmd->data_block_size = data_block_size;
663 	cmd->cache_blocks = 0;
664 	cmd->policy_hint_size = policy_hint_size;
665 	cmd->changed = true;
666 
667 	r = __create_persistent_data_objects(cmd, may_format_device);
668 	if (r) {
669 		kfree(cmd);
670 		return ERR_PTR(r);
671 	}
672 
673 	r = __begin_transaction_flags(cmd, clear_clean_shutdown);
674 	if (r < 0) {
675 		dm_cache_metadata_close(cmd);
676 		return ERR_PTR(r);
677 	}
678 
679 	return cmd;
680 }
681 
682 void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
683 {
684 	__destroy_persistent_data_objects(cmd);
685 	kfree(cmd);
686 }
687 
688 /*
689  * Checks that the given cache block is either unmapped or clean.
690  */
691 static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
692 				   bool *result)
693 {
694 	int r;
695 	__le64 value;
696 	dm_oblock_t ob;
697 	unsigned flags;
698 
699 	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
700 	if (r) {
701 		DMERR("block_unmapped_or_clean failed");
702 		return r;
703 	}
704 
705 	unpack_value(value, &ob, &flags);
706 	*result = !((flags & M_VALID) && (flags & M_DIRTY));
707 
708 	return 0;
709 }
710 
711 static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
712 					dm_cblock_t begin, dm_cblock_t end,
713 					bool *result)
714 {
715 	int r;
716 	*result = true;
717 
718 	while (begin != end) {
719 		r = block_unmapped_or_clean(cmd, begin, result);
720 		if (r)
721 			return r;
722 
723 		if (!*result) {
724 			DMERR("cache block %llu is dirty",
725 			      (unsigned long long) from_cblock(begin));
726 			return 0;
727 		}
728 
729 		begin = to_cblock(from_cblock(begin) + 1);
730 	}
731 
732 	return 0;
733 }
734 
735 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
736 {
737 	int r;
738 	bool clean;
739 	__le64 null_mapping = pack_value(0, 0);
740 
741 	down_write(&cmd->root_lock);
742 	__dm_bless_for_disk(&null_mapping);
743 
744 	if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
745 		r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean);
746 		if (r) {
747 			__dm_unbless_for_disk(&null_mapping);
748 			goto out;
749 		}
750 
751 		if (!clean) {
752 			DMERR("unable to shrink cache due to dirty blocks");
753 			r = -EINVAL;
754 			__dm_unbless_for_disk(&null_mapping);
755 			goto out;
756 		}
757 	}
758 
759 	r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
760 			    from_cblock(new_cache_size),
761 			    &null_mapping, &cmd->root);
762 	if (!r)
763 		cmd->cache_blocks = new_cache_size;
764 	cmd->changed = true;
765 
766 out:
767 	up_write(&cmd->root_lock);
768 
769 	return r;
770 }
771 
772 int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
773 				   sector_t discard_block_size,
774 				   dm_dblock_t new_nr_entries)
775 {
776 	int r;
777 
778 	down_write(&cmd->root_lock);
779 	r = dm_bitset_resize(&cmd->discard_info,
780 			     cmd->discard_root,
781 			     from_dblock(cmd->discard_nr_blocks),
782 			     from_dblock(new_nr_entries),
783 			     false, &cmd->discard_root);
784 	if (!r) {
785 		cmd->discard_block_size = discard_block_size;
786 		cmd->discard_nr_blocks = new_nr_entries;
787 	}
788 
789 	cmd->changed = true;
790 	up_write(&cmd->root_lock);
791 
792 	return r;
793 }
794 
795 static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
796 {
797 	return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
798 				 from_dblock(b), &cmd->discard_root);
799 }
800 
801 static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
802 {
803 	return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
804 				   from_dblock(b), &cmd->discard_root);
805 }
806 
807 static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b,
808 			  bool *is_discarded)
809 {
810 	return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root,
811 				  from_dblock(b), &cmd->discard_root,
812 				  is_discarded);
813 }
814 
815 static int __discard(struct dm_cache_metadata *cmd,
816 		     dm_dblock_t dblock, bool discard)
817 {
818 	int r;
819 
820 	r = (discard ? __set_discard : __clear_discard)(cmd, dblock);
821 	if (r)
822 		return r;
823 
824 	cmd->changed = true;
825 	return 0;
826 }
827 
828 int dm_cache_set_discard(struct dm_cache_metadata *cmd,
829 			 dm_dblock_t dblock, bool discard)
830 {
831 	int r;
832 
833 	down_write(&cmd->root_lock);
834 	r = __discard(cmd, dblock, discard);
835 	up_write(&cmd->root_lock);
836 
837 	return r;
838 }
839 
840 static int __load_discards(struct dm_cache_metadata *cmd,
841 			   load_discard_fn fn, void *context)
842 {
843 	int r = 0;
844 	dm_block_t b;
845 	bool discard;
846 
847 	for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
848 		dm_dblock_t dblock = to_dblock(b);
849 
850 		if (cmd->clean_when_opened) {
851 			r = __is_discarded(cmd, dblock, &discard);
852 			if (r)
853 				return r;
854 		} else
855 			discard = false;
856 
857 		r = fn(context, cmd->discard_block_size, dblock, discard);
858 		if (r)
859 			break;
860 	}
861 
862 	return r;
863 }
864 
865 int dm_cache_load_discards(struct dm_cache_metadata *cmd,
866 			   load_discard_fn fn, void *context)
867 {
868 	int r;
869 
870 	down_read(&cmd->root_lock);
871 	r = __load_discards(cmd, fn, context);
872 	up_read(&cmd->root_lock);
873 
874 	return r;
875 }
876 
877 dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd)
878 {
879 	dm_cblock_t r;
880 
881 	down_read(&cmd->root_lock);
882 	r = cmd->cache_blocks;
883 	up_read(&cmd->root_lock);
884 
885 	return r;
886 }
887 
888 static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
889 {
890 	int r;
891 	__le64 value = pack_value(0, 0);
892 
893 	__dm_bless_for_disk(&value);
894 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
895 			       &value, &cmd->root);
896 	if (r)
897 		return r;
898 
899 	cmd->changed = true;
900 	return 0;
901 }
902 
903 int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
904 {
905 	int r;
906 
907 	down_write(&cmd->root_lock);
908 	r = __remove(cmd, cblock);
909 	up_write(&cmd->root_lock);
910 
911 	return r;
912 }
913 
914 static int __insert(struct dm_cache_metadata *cmd,
915 		    dm_cblock_t cblock, dm_oblock_t oblock)
916 {
917 	int r;
918 	__le64 value = pack_value(oblock, M_VALID);
919 	__dm_bless_for_disk(&value);
920 
921 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
922 			       &value, &cmd->root);
923 	if (r)
924 		return r;
925 
926 	cmd->changed = true;
927 	return 0;
928 }
929 
930 int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
931 			    dm_cblock_t cblock, dm_oblock_t oblock)
932 {
933 	int r;
934 
935 	down_write(&cmd->root_lock);
936 	r = __insert(cmd, cblock, oblock);
937 	up_write(&cmd->root_lock);
938 
939 	return r;
940 }
941 
942 struct thunk {
943 	load_mapping_fn fn;
944 	void *context;
945 
946 	struct dm_cache_metadata *cmd;
947 	bool respect_dirty_flags;
948 	bool hints_valid;
949 };
950 
951 static bool policy_unchanged(struct dm_cache_metadata *cmd,
952 			     struct dm_cache_policy *policy)
953 {
954 	const char *policy_name = dm_cache_policy_get_name(policy);
955 	const unsigned *policy_version = dm_cache_policy_get_version(policy);
956 	size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
957 
958 	/*
959 	 * Ensure policy names match.
960 	 */
961 	if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
962 		return false;
963 
964 	/*
965 	 * Ensure policy major versions match.
966 	 */
967 	if (cmd->policy_version[0] != policy_version[0])
968 		return false;
969 
970 	/*
971 	 * Ensure policy hint sizes match.
972 	 */
973 	if (cmd->policy_hint_size != policy_hint_size)
974 		return false;
975 
976 	return true;
977 }
978 
979 static bool hints_array_initialized(struct dm_cache_metadata *cmd)
980 {
981 	return cmd->hint_root && cmd->policy_hint_size;
982 }
983 
984 static bool hints_array_available(struct dm_cache_metadata *cmd,
985 				  struct dm_cache_policy *policy)
986 {
987 	return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
988 		hints_array_initialized(cmd);
989 }
990 
991 static int __load_mapping(void *context, uint64_t cblock, void *leaf)
992 {
993 	int r = 0;
994 	bool dirty;
995 	__le64 value;
996 	__le32 hint_value = 0;
997 	dm_oblock_t oblock;
998 	unsigned flags;
999 	struct thunk *thunk = context;
1000 	struct dm_cache_metadata *cmd = thunk->cmd;
1001 
1002 	memcpy(&value, leaf, sizeof(value));
1003 	unpack_value(value, &oblock, &flags);
1004 
1005 	if (flags & M_VALID) {
1006 		if (thunk->hints_valid) {
1007 			r = dm_array_get_value(&cmd->hint_info, cmd->hint_root,
1008 					       cblock, &hint_value);
1009 			if (r && r != -ENODATA)
1010 				return r;
1011 		}
1012 
1013 		dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true;
1014 		r = thunk->fn(thunk->context, oblock, to_cblock(cblock),
1015 			      dirty, le32_to_cpu(hint_value), thunk->hints_valid);
1016 	}
1017 
1018 	return r;
1019 }
1020 
1021 static int __load_mappings(struct dm_cache_metadata *cmd,
1022 			   struct dm_cache_policy *policy,
1023 			   load_mapping_fn fn, void *context)
1024 {
1025 	struct thunk thunk;
1026 
1027 	thunk.fn = fn;
1028 	thunk.context = context;
1029 
1030 	thunk.cmd = cmd;
1031 	thunk.respect_dirty_flags = cmd->clean_when_opened;
1032 	thunk.hints_valid = hints_array_available(cmd, policy);
1033 
1034 	return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
1035 }
1036 
1037 int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
1038 			   struct dm_cache_policy *policy,
1039 			   load_mapping_fn fn, void *context)
1040 {
1041 	int r;
1042 
1043 	down_read(&cmd->root_lock);
1044 	r = __load_mappings(cmd, policy, fn, context);
1045 	up_read(&cmd->root_lock);
1046 
1047 	return r;
1048 }
1049 
1050 static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
1051 {
1052 	int r = 0;
1053 	__le64 value;
1054 	dm_oblock_t oblock;
1055 	unsigned flags;
1056 
1057 	memcpy(&value, leaf, sizeof(value));
1058 	unpack_value(value, &oblock, &flags);
1059 
1060 	return r;
1061 }
1062 
1063 static int __dump_mappings(struct dm_cache_metadata *cmd)
1064 {
1065 	return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL);
1066 }
1067 
1068 void dm_cache_dump(struct dm_cache_metadata *cmd)
1069 {
1070 	down_read(&cmd->root_lock);
1071 	__dump_mappings(cmd);
1072 	up_read(&cmd->root_lock);
1073 }
1074 
1075 int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
1076 {
1077 	int r;
1078 
1079 	down_read(&cmd->root_lock);
1080 	r = cmd->changed;
1081 	up_read(&cmd->root_lock);
1082 
1083 	return r;
1084 }
1085 
1086 static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty)
1087 {
1088 	int r;
1089 	unsigned flags;
1090 	dm_oblock_t oblock;
1091 	__le64 value;
1092 
1093 	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value);
1094 	if (r)
1095 		return r;
1096 
1097 	unpack_value(value, &oblock, &flags);
1098 
1099 	if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty))
1100 		/* nothing to be done */
1101 		return 0;
1102 
1103 	value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
1104 	__dm_bless_for_disk(&value);
1105 
1106 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1107 			       &value, &cmd->root);
1108 	if (r)
1109 		return r;
1110 
1111 	cmd->changed = true;
1112 	return 0;
1113 
1114 }
1115 
1116 int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
1117 		       dm_cblock_t cblock, bool dirty)
1118 {
1119 	int r;
1120 
1121 	down_write(&cmd->root_lock);
1122 	r = __dirty(cmd, cblock, dirty);
1123 	up_write(&cmd->root_lock);
1124 
1125 	return r;
1126 }
1127 
1128 void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
1129 				 struct dm_cache_statistics *stats)
1130 {
1131 	down_read(&cmd->root_lock);
1132 	*stats = cmd->stats;
1133 	up_read(&cmd->root_lock);
1134 }
1135 
1136 void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
1137 				 struct dm_cache_statistics *stats)
1138 {
1139 	down_write(&cmd->root_lock);
1140 	cmd->stats = *stats;
1141 	up_write(&cmd->root_lock);
1142 }
1143 
1144 int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
1145 {
1146 	int r;
1147 	flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
1148 				 clear_clean_shutdown);
1149 
1150 	down_write(&cmd->root_lock);
1151 	r = __commit_transaction(cmd, mutator);
1152 	if (r)
1153 		goto out;
1154 
1155 	r = __begin_transaction(cmd);
1156 
1157 out:
1158 	up_write(&cmd->root_lock);
1159 	return r;
1160 }
1161 
1162 int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
1163 					   dm_block_t *result)
1164 {
1165 	int r = -EINVAL;
1166 
1167 	down_read(&cmd->root_lock);
1168 	r = dm_sm_get_nr_free(cmd->metadata_sm, result);
1169 	up_read(&cmd->root_lock);
1170 
1171 	return r;
1172 }
1173 
1174 int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
1175 				   dm_block_t *result)
1176 {
1177 	int r = -EINVAL;
1178 
1179 	down_read(&cmd->root_lock);
1180 	r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
1181 	up_read(&cmd->root_lock);
1182 
1183 	return r;
1184 }
1185 
1186 /*----------------------------------------------------------------*/
1187 
1188 static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1189 {
1190 	int r;
1191 	__le32 value;
1192 	size_t hint_size;
1193 	const char *policy_name = dm_cache_policy_get_name(policy);
1194 	const unsigned *policy_version = dm_cache_policy_get_version(policy);
1195 
1196 	if (!policy_name[0] ||
1197 	    (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
1198 		return -EINVAL;
1199 
1200 	if (!policy_unchanged(cmd, policy)) {
1201 		strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
1202 		memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
1203 
1204 		hint_size = dm_cache_policy_get_hint_size(policy);
1205 		if (!hint_size)
1206 			return 0; /* short-circuit hints initialization */
1207 		cmd->policy_hint_size = hint_size;
1208 
1209 		if (cmd->hint_root) {
1210 			r = dm_array_del(&cmd->hint_info, cmd->hint_root);
1211 			if (r)
1212 				return r;
1213 		}
1214 
1215 		r = dm_array_empty(&cmd->hint_info, &cmd->hint_root);
1216 		if (r)
1217 			return r;
1218 
1219 		value = cpu_to_le32(0);
1220 		__dm_bless_for_disk(&value);
1221 		r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0,
1222 				    from_cblock(cmd->cache_blocks),
1223 				    &value, &cmd->hint_root);
1224 		if (r)
1225 			return r;
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1232 {
1233 	int r;
1234 
1235 	down_write(&cmd->root_lock);
1236 	r = begin_hints(cmd, policy);
1237 	up_write(&cmd->root_lock);
1238 
1239 	return r;
1240 }
1241 
1242 static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
1243 		     uint32_t hint)
1244 {
1245 	int r;
1246 	__le32 value = cpu_to_le32(hint);
1247 	__dm_bless_for_disk(&value);
1248 
1249 	r = dm_array_set_value(&cmd->hint_info, cmd->hint_root,
1250 			       from_cblock(cblock), &value, &cmd->hint_root);
1251 	cmd->changed = true;
1252 
1253 	return r;
1254 }
1255 
1256 int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
1257 		       uint32_t hint)
1258 {
1259 	int r;
1260 
1261 	if (!hints_array_initialized(cmd))
1262 		return 0;
1263 
1264 	down_write(&cmd->root_lock);
1265 	r = save_hint(cmd, cblock, hint);
1266 	up_write(&cmd->root_lock);
1267 
1268 	return r;
1269 }
1270 
1271 int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
1272 {
1273 	return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
1274 }
1275