xref: /linux/drivers/md/dm-era-target.c (revision eec40579d84873dfb7021eb24c50360f073237c5)
1*eec40579SJoe Thornber #include "dm.h"
2*eec40579SJoe Thornber #include "persistent-data/dm-transaction-manager.h"
3*eec40579SJoe Thornber #include "persistent-data/dm-bitset.h"
4*eec40579SJoe Thornber #include "persistent-data/dm-space-map.h"
5*eec40579SJoe Thornber 
6*eec40579SJoe Thornber #include <linux/dm-io.h>
7*eec40579SJoe Thornber #include <linux/dm-kcopyd.h>
8*eec40579SJoe Thornber #include <linux/init.h>
9*eec40579SJoe Thornber #include <linux/mempool.h>
10*eec40579SJoe Thornber #include <linux/module.h>
11*eec40579SJoe Thornber #include <linux/slab.h>
12*eec40579SJoe Thornber #include <linux/vmalloc.h>
13*eec40579SJoe Thornber 
14*eec40579SJoe Thornber #define DM_MSG_PREFIX "era"
15*eec40579SJoe Thornber 
16*eec40579SJoe Thornber #define SUPERBLOCK_LOCATION 0
17*eec40579SJoe Thornber #define SUPERBLOCK_MAGIC 2126579579
18*eec40579SJoe Thornber #define SUPERBLOCK_CSUM_XOR 146538381
19*eec40579SJoe Thornber #define MIN_ERA_VERSION 1
20*eec40579SJoe Thornber #define MAX_ERA_VERSION 1
21*eec40579SJoe Thornber #define INVALID_WRITESET_ROOT SUPERBLOCK_LOCATION
22*eec40579SJoe Thornber #define MIN_BLOCK_SIZE 8
23*eec40579SJoe Thornber 
24*eec40579SJoe Thornber /*----------------------------------------------------------------
25*eec40579SJoe Thornber  * Writeset
26*eec40579SJoe Thornber  *--------------------------------------------------------------*/
27*eec40579SJoe Thornber struct writeset_metadata {
28*eec40579SJoe Thornber 	uint32_t nr_bits;
29*eec40579SJoe Thornber 	dm_block_t root;
30*eec40579SJoe Thornber };
31*eec40579SJoe Thornber 
32*eec40579SJoe Thornber struct writeset {
33*eec40579SJoe Thornber 	struct writeset_metadata md;
34*eec40579SJoe Thornber 
35*eec40579SJoe Thornber 	/*
36*eec40579SJoe Thornber 	 * An in core copy of the bits to save constantly doing look ups on
37*eec40579SJoe Thornber 	 * disk.
38*eec40579SJoe Thornber 	 */
39*eec40579SJoe Thornber 	unsigned long *bits;
40*eec40579SJoe Thornber };
41*eec40579SJoe Thornber 
42*eec40579SJoe Thornber /*
43*eec40579SJoe Thornber  * This does not free off the on disk bitset as this will normally be done
44*eec40579SJoe Thornber  * after digesting into the era array.
45*eec40579SJoe Thornber  */
46*eec40579SJoe Thornber static void writeset_free(struct writeset *ws)
47*eec40579SJoe Thornber {
48*eec40579SJoe Thornber 	vfree(ws->bits);
49*eec40579SJoe Thornber }
50*eec40579SJoe Thornber 
51*eec40579SJoe Thornber static int setup_on_disk_bitset(struct dm_disk_bitset *info,
52*eec40579SJoe Thornber 				unsigned nr_bits, dm_block_t *root)
53*eec40579SJoe Thornber {
54*eec40579SJoe Thornber 	int r;
55*eec40579SJoe Thornber 
56*eec40579SJoe Thornber 	r = dm_bitset_empty(info, root);
57*eec40579SJoe Thornber 	if (r)
58*eec40579SJoe Thornber 		return r;
59*eec40579SJoe Thornber 
60*eec40579SJoe Thornber 	return dm_bitset_resize(info, *root, 0, nr_bits, false, root);
61*eec40579SJoe Thornber }
62*eec40579SJoe Thornber 
63*eec40579SJoe Thornber static size_t bitset_size(unsigned nr_bits)
64*eec40579SJoe Thornber {
65*eec40579SJoe Thornber 	return sizeof(unsigned long) * dm_div_up(nr_bits, BITS_PER_LONG);
66*eec40579SJoe Thornber }
67*eec40579SJoe Thornber 
68*eec40579SJoe Thornber /*
69*eec40579SJoe Thornber  * Allocates memory for the in core bitset.
70*eec40579SJoe Thornber  */
71*eec40579SJoe Thornber static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks)
72*eec40579SJoe Thornber {
73*eec40579SJoe Thornber 	ws->md.nr_bits = nr_blocks;
74*eec40579SJoe Thornber 	ws->md.root = INVALID_WRITESET_ROOT;
75*eec40579SJoe Thornber 	ws->bits = vzalloc(bitset_size(nr_blocks));
76*eec40579SJoe Thornber 	if (!ws->bits) {
77*eec40579SJoe Thornber 		DMERR("%s: couldn't allocate in memory bitset", __func__);
78*eec40579SJoe Thornber 		return -ENOMEM;
79*eec40579SJoe Thornber 	}
80*eec40579SJoe Thornber 
81*eec40579SJoe Thornber 	return 0;
82*eec40579SJoe Thornber }
83*eec40579SJoe Thornber 
84*eec40579SJoe Thornber /*
85*eec40579SJoe Thornber  * Wipes the in-core bitset, and creates a new on disk bitset.
86*eec40579SJoe Thornber  */
87*eec40579SJoe Thornber static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws)
88*eec40579SJoe Thornber {
89*eec40579SJoe Thornber 	int r;
90*eec40579SJoe Thornber 
91*eec40579SJoe Thornber 	memset(ws->bits, 0, bitset_size(ws->md.nr_bits));
92*eec40579SJoe Thornber 
93*eec40579SJoe Thornber 	r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root);
94*eec40579SJoe Thornber 	if (r) {
95*eec40579SJoe Thornber 		DMERR("%s: setup_on_disk_bitset failed", __func__);
96*eec40579SJoe Thornber 		return r;
97*eec40579SJoe Thornber 	}
98*eec40579SJoe Thornber 
99*eec40579SJoe Thornber 	return 0;
100*eec40579SJoe Thornber }
101*eec40579SJoe Thornber 
102*eec40579SJoe Thornber static bool writeset_marked(struct writeset *ws, dm_block_t block)
103*eec40579SJoe Thornber {
104*eec40579SJoe Thornber 	return test_bit(block, ws->bits);
105*eec40579SJoe Thornber }
106*eec40579SJoe Thornber 
107*eec40579SJoe Thornber static int writeset_marked_on_disk(struct dm_disk_bitset *info,
108*eec40579SJoe Thornber 				   struct writeset_metadata *m, dm_block_t block,
109*eec40579SJoe Thornber 				   bool *result)
110*eec40579SJoe Thornber {
111*eec40579SJoe Thornber 	dm_block_t old = m->root;
112*eec40579SJoe Thornber 
113*eec40579SJoe Thornber 	/*
114*eec40579SJoe Thornber 	 * The bitset was flushed when it was archived, so we know there'll
115*eec40579SJoe Thornber 	 * be no change to the root.
116*eec40579SJoe Thornber 	 */
117*eec40579SJoe Thornber 	int r = dm_bitset_test_bit(info, m->root, block, &m->root, result);
118*eec40579SJoe Thornber 	if (r) {
119*eec40579SJoe Thornber 		DMERR("%s: dm_bitset_test_bit failed", __func__);
120*eec40579SJoe Thornber 		return r;
121*eec40579SJoe Thornber 	}
122*eec40579SJoe Thornber 
123*eec40579SJoe Thornber 	BUG_ON(m->root != old);
124*eec40579SJoe Thornber 
125*eec40579SJoe Thornber 	return r;
126*eec40579SJoe Thornber }
127*eec40579SJoe Thornber 
128*eec40579SJoe Thornber /*
129*eec40579SJoe Thornber  * Returns < 0 on error, 0 if the bit wasn't previously set, 1 if it was.
130*eec40579SJoe Thornber  */
131*eec40579SJoe Thornber static int writeset_test_and_set(struct dm_disk_bitset *info,
132*eec40579SJoe Thornber 				 struct writeset *ws, uint32_t block)
133*eec40579SJoe Thornber {
134*eec40579SJoe Thornber 	int r;
135*eec40579SJoe Thornber 
136*eec40579SJoe Thornber 	if (!test_and_set_bit(block, ws->bits)) {
137*eec40579SJoe Thornber 		r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root);
138*eec40579SJoe Thornber 		if (r) {
139*eec40579SJoe Thornber 			/* FIXME: fail mode */
140*eec40579SJoe Thornber 			return r;
141*eec40579SJoe Thornber 		}
142*eec40579SJoe Thornber 
143*eec40579SJoe Thornber 		return 0;
144*eec40579SJoe Thornber 	}
145*eec40579SJoe Thornber 
146*eec40579SJoe Thornber 	return 1;
147*eec40579SJoe Thornber }
148*eec40579SJoe Thornber 
149*eec40579SJoe Thornber /*----------------------------------------------------------------
150*eec40579SJoe Thornber  * On disk metadata layout
151*eec40579SJoe Thornber  *--------------------------------------------------------------*/
152*eec40579SJoe Thornber #define SPACE_MAP_ROOT_SIZE 128
153*eec40579SJoe Thornber #define UUID_LEN 16
154*eec40579SJoe Thornber 
155*eec40579SJoe Thornber struct writeset_disk {
156*eec40579SJoe Thornber 	__le32 nr_bits;
157*eec40579SJoe Thornber 	__le64 root;
158*eec40579SJoe Thornber } __packed;
159*eec40579SJoe Thornber 
160*eec40579SJoe Thornber struct superblock_disk {
161*eec40579SJoe Thornber 	__le32 csum;
162*eec40579SJoe Thornber 	__le32 flags;
163*eec40579SJoe Thornber 	__le64 blocknr;
164*eec40579SJoe Thornber 
165*eec40579SJoe Thornber 	__u8 uuid[UUID_LEN];
166*eec40579SJoe Thornber 	__le64 magic;
167*eec40579SJoe Thornber 	__le32 version;
168*eec40579SJoe Thornber 
169*eec40579SJoe Thornber 	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
170*eec40579SJoe Thornber 
171*eec40579SJoe Thornber 	__le32 data_block_size;
172*eec40579SJoe Thornber 	__le32 metadata_block_size;
173*eec40579SJoe Thornber 	__le32 nr_blocks;
174*eec40579SJoe Thornber 
175*eec40579SJoe Thornber 	__le32 current_era;
176*eec40579SJoe Thornber 	struct writeset_disk current_writeset;
177*eec40579SJoe Thornber 
178*eec40579SJoe Thornber 	/*
179*eec40579SJoe Thornber 	 * Only these two fields are valid within the metadata snapshot.
180*eec40579SJoe Thornber 	 */
181*eec40579SJoe Thornber 	__le64 writeset_tree_root;
182*eec40579SJoe Thornber 	__le64 era_array_root;
183*eec40579SJoe Thornber 
184*eec40579SJoe Thornber 	__le64 metadata_snap;
185*eec40579SJoe Thornber } __packed;
186*eec40579SJoe Thornber 
187*eec40579SJoe Thornber /*----------------------------------------------------------------
188*eec40579SJoe Thornber  * Superblock validation
189*eec40579SJoe Thornber  *--------------------------------------------------------------*/
190*eec40579SJoe Thornber static void sb_prepare_for_write(struct dm_block_validator *v,
191*eec40579SJoe Thornber 				 struct dm_block *b,
192*eec40579SJoe Thornber 				 size_t sb_block_size)
193*eec40579SJoe Thornber {
194*eec40579SJoe Thornber 	struct superblock_disk *disk = dm_block_data(b);
195*eec40579SJoe Thornber 
196*eec40579SJoe Thornber 	disk->blocknr = cpu_to_le64(dm_block_location(b));
197*eec40579SJoe Thornber 	disk->csum = cpu_to_le32(dm_bm_checksum(&disk->flags,
198*eec40579SJoe Thornber 						sb_block_size - sizeof(__le32),
199*eec40579SJoe Thornber 						SUPERBLOCK_CSUM_XOR));
200*eec40579SJoe Thornber }
201*eec40579SJoe Thornber 
202*eec40579SJoe Thornber static int check_metadata_version(struct superblock_disk *disk)
203*eec40579SJoe Thornber {
204*eec40579SJoe Thornber 	uint32_t metadata_version = le32_to_cpu(disk->version);
205*eec40579SJoe Thornber 	if (metadata_version < MIN_ERA_VERSION || metadata_version > MAX_ERA_VERSION) {
206*eec40579SJoe Thornber 		DMERR("Era metadata version %u found, but only versions between %u and %u supported.",
207*eec40579SJoe Thornber 		      metadata_version, MIN_ERA_VERSION, MAX_ERA_VERSION);
208*eec40579SJoe Thornber 		return -EINVAL;
209*eec40579SJoe Thornber 	}
210*eec40579SJoe Thornber 
211*eec40579SJoe Thornber 	return 0;
212*eec40579SJoe Thornber }
213*eec40579SJoe Thornber 
214*eec40579SJoe Thornber static int sb_check(struct dm_block_validator *v,
215*eec40579SJoe Thornber 		    struct dm_block *b,
216*eec40579SJoe Thornber 		    size_t sb_block_size)
217*eec40579SJoe Thornber {
218*eec40579SJoe Thornber 	struct superblock_disk *disk = dm_block_data(b);
219*eec40579SJoe Thornber 	__le32 csum_le;
220*eec40579SJoe Thornber 
221*eec40579SJoe Thornber 	if (dm_block_location(b) != le64_to_cpu(disk->blocknr)) {
222*eec40579SJoe Thornber 		DMERR("sb_check failed: blocknr %llu: wanted %llu",
223*eec40579SJoe Thornber 		      le64_to_cpu(disk->blocknr),
224*eec40579SJoe Thornber 		      (unsigned long long)dm_block_location(b));
225*eec40579SJoe Thornber 		return -ENOTBLK;
226*eec40579SJoe Thornber 	}
227*eec40579SJoe Thornber 
228*eec40579SJoe Thornber 	if (le64_to_cpu(disk->magic) != SUPERBLOCK_MAGIC) {
229*eec40579SJoe Thornber 		DMERR("sb_check failed: magic %llu: wanted %llu",
230*eec40579SJoe Thornber 		      le64_to_cpu(disk->magic),
231*eec40579SJoe Thornber 		      (unsigned long long) SUPERBLOCK_MAGIC);
232*eec40579SJoe Thornber 		return -EILSEQ;
233*eec40579SJoe Thornber 	}
234*eec40579SJoe Thornber 
235*eec40579SJoe Thornber 	csum_le = cpu_to_le32(dm_bm_checksum(&disk->flags,
236*eec40579SJoe Thornber 					     sb_block_size - sizeof(__le32),
237*eec40579SJoe Thornber 					     SUPERBLOCK_CSUM_XOR));
238*eec40579SJoe Thornber 	if (csum_le != disk->csum) {
239*eec40579SJoe Thornber 		DMERR("sb_check failed: csum %u: wanted %u",
240*eec40579SJoe Thornber 		      le32_to_cpu(csum_le), le32_to_cpu(disk->csum));
241*eec40579SJoe Thornber 		return -EILSEQ;
242*eec40579SJoe Thornber 	}
243*eec40579SJoe Thornber 
244*eec40579SJoe Thornber 	return check_metadata_version(disk);
245*eec40579SJoe Thornber }
246*eec40579SJoe Thornber 
247*eec40579SJoe Thornber static struct dm_block_validator sb_validator = {
248*eec40579SJoe Thornber 	.name = "superblock",
249*eec40579SJoe Thornber 	.prepare_for_write = sb_prepare_for_write,
250*eec40579SJoe Thornber 	.check = sb_check
251*eec40579SJoe Thornber };
252*eec40579SJoe Thornber 
253*eec40579SJoe Thornber /*----------------------------------------------------------------
254*eec40579SJoe Thornber  * Low level metadata handling
255*eec40579SJoe Thornber  *--------------------------------------------------------------*/
256*eec40579SJoe Thornber #define DM_ERA_METADATA_BLOCK_SIZE 4096
257*eec40579SJoe Thornber #define DM_ERA_METADATA_CACHE_SIZE 64
258*eec40579SJoe Thornber #define ERA_MAX_CONCURRENT_LOCKS 5
259*eec40579SJoe Thornber 
260*eec40579SJoe Thornber struct era_metadata {
261*eec40579SJoe Thornber 	struct block_device *bdev;
262*eec40579SJoe Thornber 	struct dm_block_manager *bm;
263*eec40579SJoe Thornber 	struct dm_space_map *sm;
264*eec40579SJoe Thornber 	struct dm_transaction_manager *tm;
265*eec40579SJoe Thornber 
266*eec40579SJoe Thornber 	dm_block_t block_size;
267*eec40579SJoe Thornber 	uint32_t nr_blocks;
268*eec40579SJoe Thornber 
269*eec40579SJoe Thornber 	uint32_t current_era;
270*eec40579SJoe Thornber 
271*eec40579SJoe Thornber 	/*
272*eec40579SJoe Thornber 	 * We preallocate 2 writesets.  When an era rolls over we
273*eec40579SJoe Thornber 	 * switch between them. This means the allocation is done at
274*eec40579SJoe Thornber 	 * preresume time, rather than on the io path.
275*eec40579SJoe Thornber 	 */
276*eec40579SJoe Thornber 	struct writeset writesets[2];
277*eec40579SJoe Thornber 	struct writeset *current_writeset;
278*eec40579SJoe Thornber 
279*eec40579SJoe Thornber 	dm_block_t writeset_tree_root;
280*eec40579SJoe Thornber 	dm_block_t era_array_root;
281*eec40579SJoe Thornber 
282*eec40579SJoe Thornber 	struct dm_disk_bitset bitset_info;
283*eec40579SJoe Thornber 	struct dm_btree_info writeset_tree_info;
284*eec40579SJoe Thornber 	struct dm_array_info era_array_info;
285*eec40579SJoe Thornber 
286*eec40579SJoe Thornber 	dm_block_t metadata_snap;
287*eec40579SJoe Thornber 
288*eec40579SJoe Thornber 	/*
289*eec40579SJoe Thornber 	 * A flag that is set whenever a writeset has been archived.
290*eec40579SJoe Thornber 	 */
291*eec40579SJoe Thornber 	bool archived_writesets;
292*eec40579SJoe Thornber };
293*eec40579SJoe Thornber 
294*eec40579SJoe Thornber static int superblock_read_lock(struct era_metadata *md,
295*eec40579SJoe Thornber 				struct dm_block **sblock)
296*eec40579SJoe Thornber {
297*eec40579SJoe Thornber 	return dm_bm_read_lock(md->bm, SUPERBLOCK_LOCATION,
298*eec40579SJoe Thornber 			       &sb_validator, sblock);
299*eec40579SJoe Thornber }
300*eec40579SJoe Thornber 
301*eec40579SJoe Thornber static int superblock_lock_zero(struct era_metadata *md,
302*eec40579SJoe Thornber 				struct dm_block **sblock)
303*eec40579SJoe Thornber {
304*eec40579SJoe Thornber 	return dm_bm_write_lock_zero(md->bm, SUPERBLOCK_LOCATION,
305*eec40579SJoe Thornber 				     &sb_validator, sblock);
306*eec40579SJoe Thornber }
307*eec40579SJoe Thornber 
308*eec40579SJoe Thornber static int superblock_lock(struct era_metadata *md,
309*eec40579SJoe Thornber 			   struct dm_block **sblock)
310*eec40579SJoe Thornber {
311*eec40579SJoe Thornber 	return dm_bm_write_lock(md->bm, SUPERBLOCK_LOCATION,
312*eec40579SJoe Thornber 				&sb_validator, sblock);
313*eec40579SJoe Thornber }
314*eec40579SJoe Thornber 
315*eec40579SJoe Thornber /* FIXME: duplication with cache and thin */
316*eec40579SJoe Thornber static int superblock_all_zeroes(struct dm_block_manager *bm, bool *result)
317*eec40579SJoe Thornber {
318*eec40579SJoe Thornber 	int r;
319*eec40579SJoe Thornber 	unsigned i;
320*eec40579SJoe Thornber 	struct dm_block *b;
321*eec40579SJoe Thornber 	__le64 *data_le, zero = cpu_to_le64(0);
322*eec40579SJoe Thornber 	unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
323*eec40579SJoe Thornber 
324*eec40579SJoe Thornber 	/*
325*eec40579SJoe Thornber 	 * We can't use a validator here - it may be all zeroes.
326*eec40579SJoe Thornber 	 */
327*eec40579SJoe Thornber 	r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &b);
328*eec40579SJoe Thornber 	if (r)
329*eec40579SJoe Thornber 		return r;
330*eec40579SJoe Thornber 
331*eec40579SJoe Thornber 	data_le = dm_block_data(b);
332*eec40579SJoe Thornber 	*result = true;
333*eec40579SJoe Thornber 	for (i = 0; i < sb_block_size; i++) {
334*eec40579SJoe Thornber 		if (data_le[i] != zero) {
335*eec40579SJoe Thornber 			*result = false;
336*eec40579SJoe Thornber 			break;
337*eec40579SJoe Thornber 		}
338*eec40579SJoe Thornber 	}
339*eec40579SJoe Thornber 
340*eec40579SJoe Thornber 	return dm_bm_unlock(b);
341*eec40579SJoe Thornber }
342*eec40579SJoe Thornber 
343*eec40579SJoe Thornber /*----------------------------------------------------------------*/
344*eec40579SJoe Thornber 
345*eec40579SJoe Thornber static void ws_pack(const struct writeset_metadata *core, struct writeset_disk *disk)
346*eec40579SJoe Thornber {
347*eec40579SJoe Thornber 	disk->nr_bits = cpu_to_le32(core->nr_bits);
348*eec40579SJoe Thornber 	disk->root = cpu_to_le64(core->root);
349*eec40579SJoe Thornber }
350*eec40579SJoe Thornber 
351*eec40579SJoe Thornber static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata *core)
352*eec40579SJoe Thornber {
353*eec40579SJoe Thornber 	core->nr_bits = le32_to_cpu(disk->nr_bits);
354*eec40579SJoe Thornber 	core->root = le64_to_cpu(disk->root);
355*eec40579SJoe Thornber }
356*eec40579SJoe Thornber 
357*eec40579SJoe Thornber static void ws_inc(void *context, const void *value)
358*eec40579SJoe Thornber {
359*eec40579SJoe Thornber 	struct era_metadata *md = context;
360*eec40579SJoe Thornber 	struct writeset_disk ws_d;
361*eec40579SJoe Thornber 	dm_block_t b;
362*eec40579SJoe Thornber 
363*eec40579SJoe Thornber 	memcpy(&ws_d, value, sizeof(ws_d));
364*eec40579SJoe Thornber 	b = le64_to_cpu(ws_d.root);
365*eec40579SJoe Thornber 
366*eec40579SJoe Thornber 	dm_tm_inc(md->tm, b);
367*eec40579SJoe Thornber }
368*eec40579SJoe Thornber 
369*eec40579SJoe Thornber static void ws_dec(void *context, const void *value)
370*eec40579SJoe Thornber {
371*eec40579SJoe Thornber 	struct era_metadata *md = context;
372*eec40579SJoe Thornber 	struct writeset_disk ws_d;
373*eec40579SJoe Thornber 	dm_block_t b;
374*eec40579SJoe Thornber 
375*eec40579SJoe Thornber 	memcpy(&ws_d, value, sizeof(ws_d));
376*eec40579SJoe Thornber 	b = le64_to_cpu(ws_d.root);
377*eec40579SJoe Thornber 
378*eec40579SJoe Thornber 	dm_bitset_del(&md->bitset_info, b);
379*eec40579SJoe Thornber }
380*eec40579SJoe Thornber 
381*eec40579SJoe Thornber static int ws_eq(void *context, const void *value1, const void *value2)
382*eec40579SJoe Thornber {
383*eec40579SJoe Thornber 	return !memcmp(value1, value2, sizeof(struct writeset_metadata));
384*eec40579SJoe Thornber }
385*eec40579SJoe Thornber 
386*eec40579SJoe Thornber /*----------------------------------------------------------------*/
387*eec40579SJoe Thornber 
388*eec40579SJoe Thornber static void setup_writeset_tree_info(struct era_metadata *md)
389*eec40579SJoe Thornber {
390*eec40579SJoe Thornber 	struct dm_btree_value_type *vt = &md->writeset_tree_info.value_type;
391*eec40579SJoe Thornber 	md->writeset_tree_info.tm = md->tm;
392*eec40579SJoe Thornber 	md->writeset_tree_info.levels = 1;
393*eec40579SJoe Thornber 	vt->context = md;
394*eec40579SJoe Thornber 	vt->size = sizeof(struct writeset_disk);
395*eec40579SJoe Thornber 	vt->inc = ws_inc;
396*eec40579SJoe Thornber 	vt->dec = ws_dec;
397*eec40579SJoe Thornber 	vt->equal = ws_eq;
398*eec40579SJoe Thornber }
399*eec40579SJoe Thornber 
400*eec40579SJoe Thornber static void setup_era_array_info(struct era_metadata *md)
401*eec40579SJoe Thornber 
402*eec40579SJoe Thornber {
403*eec40579SJoe Thornber 	struct dm_btree_value_type vt;
404*eec40579SJoe Thornber 	vt.context = NULL;
405*eec40579SJoe Thornber 	vt.size = sizeof(__le32);
406*eec40579SJoe Thornber 	vt.inc = NULL;
407*eec40579SJoe Thornber 	vt.dec = NULL;
408*eec40579SJoe Thornber 	vt.equal = NULL;
409*eec40579SJoe Thornber 
410*eec40579SJoe Thornber 	dm_array_info_init(&md->era_array_info, md->tm, &vt);
411*eec40579SJoe Thornber }
412*eec40579SJoe Thornber 
413*eec40579SJoe Thornber static void setup_infos(struct era_metadata *md)
414*eec40579SJoe Thornber {
415*eec40579SJoe Thornber 	dm_disk_bitset_init(md->tm, &md->bitset_info);
416*eec40579SJoe Thornber 	setup_writeset_tree_info(md);
417*eec40579SJoe Thornber 	setup_era_array_info(md);
418*eec40579SJoe Thornber }
419*eec40579SJoe Thornber 
420*eec40579SJoe Thornber /*----------------------------------------------------------------*/
421*eec40579SJoe Thornber 
422*eec40579SJoe Thornber static int create_fresh_metadata(struct era_metadata *md)
423*eec40579SJoe Thornber {
424*eec40579SJoe Thornber 	int r;
425*eec40579SJoe Thornber 
426*eec40579SJoe Thornber 	r = dm_tm_create_with_sm(md->bm, SUPERBLOCK_LOCATION,
427*eec40579SJoe Thornber 				 &md->tm, &md->sm);
428*eec40579SJoe Thornber 	if (r < 0) {
429*eec40579SJoe Thornber 		DMERR("dm_tm_create_with_sm failed");
430*eec40579SJoe Thornber 		return r;
431*eec40579SJoe Thornber 	}
432*eec40579SJoe Thornber 
433*eec40579SJoe Thornber 	setup_infos(md);
434*eec40579SJoe Thornber 
435*eec40579SJoe Thornber 	r = dm_btree_empty(&md->writeset_tree_info, &md->writeset_tree_root);
436*eec40579SJoe Thornber 	if (r) {
437*eec40579SJoe Thornber 		DMERR("couldn't create new writeset tree");
438*eec40579SJoe Thornber 		goto bad;
439*eec40579SJoe Thornber 	}
440*eec40579SJoe Thornber 
441*eec40579SJoe Thornber 	r = dm_array_empty(&md->era_array_info, &md->era_array_root);
442*eec40579SJoe Thornber 	if (r) {
443*eec40579SJoe Thornber 		DMERR("couldn't create era array");
444*eec40579SJoe Thornber 		goto bad;
445*eec40579SJoe Thornber 	}
446*eec40579SJoe Thornber 
447*eec40579SJoe Thornber 	return 0;
448*eec40579SJoe Thornber 
449*eec40579SJoe Thornber bad:
450*eec40579SJoe Thornber 	dm_sm_destroy(md->sm);
451*eec40579SJoe Thornber 	dm_tm_destroy(md->tm);
452*eec40579SJoe Thornber 
453*eec40579SJoe Thornber 	return r;
454*eec40579SJoe Thornber }
455*eec40579SJoe Thornber 
456*eec40579SJoe Thornber /*
457*eec40579SJoe Thornber  * Writes a superblock, including the static fields that don't get updated
458*eec40579SJoe Thornber  * with every commit (possible optimisation here).  'md' should be fully
459*eec40579SJoe Thornber  * constructed when this is called.
460*eec40579SJoe Thornber  */
461*eec40579SJoe Thornber static int prepare_superblock(struct era_metadata *md, struct superblock_disk *disk)
462*eec40579SJoe Thornber {
463*eec40579SJoe Thornber 	int r;
464*eec40579SJoe Thornber 	size_t metadata_len;
465*eec40579SJoe Thornber 
466*eec40579SJoe Thornber 	disk->magic = cpu_to_le64(SUPERBLOCK_MAGIC);
467*eec40579SJoe Thornber 	disk->flags = cpu_to_le32(0ul);
468*eec40579SJoe Thornber 
469*eec40579SJoe Thornber 	/* FIXME: can't keep blanking the uuid (uuid is currently unused though) */
470*eec40579SJoe Thornber 	memset(disk->uuid, 0, sizeof(disk->uuid));
471*eec40579SJoe Thornber 	disk->version = cpu_to_le32(MAX_ERA_VERSION);
472*eec40579SJoe Thornber 
473*eec40579SJoe Thornber 	r = dm_sm_root_size(md->sm, &metadata_len);
474*eec40579SJoe Thornber 	if (r < 0)
475*eec40579SJoe Thornber 		return r;
476*eec40579SJoe Thornber 
477*eec40579SJoe Thornber 	r = dm_sm_copy_root(md->sm, &disk->metadata_space_map_root,
478*eec40579SJoe Thornber 			    metadata_len);
479*eec40579SJoe Thornber 	if (r < 0)
480*eec40579SJoe Thornber 		return r;
481*eec40579SJoe Thornber 
482*eec40579SJoe Thornber 	disk->data_block_size = cpu_to_le32(md->block_size);
483*eec40579SJoe Thornber 	disk->metadata_block_size = cpu_to_le32(DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
484*eec40579SJoe Thornber 	disk->nr_blocks = cpu_to_le32(md->nr_blocks);
485*eec40579SJoe Thornber 	disk->current_era = cpu_to_le32(md->current_era);
486*eec40579SJoe Thornber 
487*eec40579SJoe Thornber 	ws_pack(&md->current_writeset->md, &disk->current_writeset);
488*eec40579SJoe Thornber 	disk->writeset_tree_root = cpu_to_le64(md->writeset_tree_root);
489*eec40579SJoe Thornber 	disk->era_array_root = cpu_to_le64(md->era_array_root);
490*eec40579SJoe Thornber 	disk->metadata_snap = cpu_to_le64(md->metadata_snap);
491*eec40579SJoe Thornber 
492*eec40579SJoe Thornber 	return 0;
493*eec40579SJoe Thornber }
494*eec40579SJoe Thornber 
495*eec40579SJoe Thornber static int write_superblock(struct era_metadata *md)
496*eec40579SJoe Thornber {
497*eec40579SJoe Thornber 	int r;
498*eec40579SJoe Thornber 	struct dm_block *sblock;
499*eec40579SJoe Thornber 	struct superblock_disk *disk;
500*eec40579SJoe Thornber 
501*eec40579SJoe Thornber 	r = superblock_lock_zero(md, &sblock);
502*eec40579SJoe Thornber 	if (r)
503*eec40579SJoe Thornber 		return r;
504*eec40579SJoe Thornber 
505*eec40579SJoe Thornber 	disk = dm_block_data(sblock);
506*eec40579SJoe Thornber 	r = prepare_superblock(md, disk);
507*eec40579SJoe Thornber 	if (r) {
508*eec40579SJoe Thornber 		DMERR("%s: prepare_superblock failed", __func__);
509*eec40579SJoe Thornber 		dm_bm_unlock(sblock); /* FIXME: does this commit? */
510*eec40579SJoe Thornber 		return r;
511*eec40579SJoe Thornber 	}
512*eec40579SJoe Thornber 
513*eec40579SJoe Thornber 	return dm_tm_commit(md->tm, sblock);
514*eec40579SJoe Thornber }
515*eec40579SJoe Thornber 
516*eec40579SJoe Thornber /*
517*eec40579SJoe Thornber  * Assumes block_size and the infos are set.
518*eec40579SJoe Thornber  */
519*eec40579SJoe Thornber static int format_metadata(struct era_metadata *md)
520*eec40579SJoe Thornber {
521*eec40579SJoe Thornber 	int r;
522*eec40579SJoe Thornber 
523*eec40579SJoe Thornber 	r = create_fresh_metadata(md);
524*eec40579SJoe Thornber 	if (r)
525*eec40579SJoe Thornber 		return r;
526*eec40579SJoe Thornber 
527*eec40579SJoe Thornber 	r = write_superblock(md);
528*eec40579SJoe Thornber 	if (r) {
529*eec40579SJoe Thornber 		dm_sm_destroy(md->sm);
530*eec40579SJoe Thornber 		dm_tm_destroy(md->tm);
531*eec40579SJoe Thornber 		return r;
532*eec40579SJoe Thornber 	}
533*eec40579SJoe Thornber 
534*eec40579SJoe Thornber 	return 0;
535*eec40579SJoe Thornber }
536*eec40579SJoe Thornber 
537*eec40579SJoe Thornber static int open_metadata(struct era_metadata *md)
538*eec40579SJoe Thornber {
539*eec40579SJoe Thornber 	int r;
540*eec40579SJoe Thornber 	struct dm_block *sblock;
541*eec40579SJoe Thornber 	struct superblock_disk *disk;
542*eec40579SJoe Thornber 
543*eec40579SJoe Thornber 	r = superblock_read_lock(md, &sblock);
544*eec40579SJoe Thornber 	if (r) {
545*eec40579SJoe Thornber 		DMERR("couldn't read_lock superblock");
546*eec40579SJoe Thornber 		return r;
547*eec40579SJoe Thornber 	}
548*eec40579SJoe Thornber 
549*eec40579SJoe Thornber 	disk = dm_block_data(sblock);
550*eec40579SJoe Thornber 	r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION,
551*eec40579SJoe Thornber 			       disk->metadata_space_map_root,
552*eec40579SJoe Thornber 			       sizeof(disk->metadata_space_map_root),
553*eec40579SJoe Thornber 			       &md->tm, &md->sm);
554*eec40579SJoe Thornber 	if (r) {
555*eec40579SJoe Thornber 		DMERR("dm_tm_open_with_sm failed");
556*eec40579SJoe Thornber 		goto bad;
557*eec40579SJoe Thornber 	}
558*eec40579SJoe Thornber 
559*eec40579SJoe Thornber 	setup_infos(md);
560*eec40579SJoe Thornber 
561*eec40579SJoe Thornber 	md->block_size = le32_to_cpu(disk->data_block_size);
562*eec40579SJoe Thornber 	md->nr_blocks = le32_to_cpu(disk->nr_blocks);
563*eec40579SJoe Thornber 	md->current_era = le32_to_cpu(disk->current_era);
564*eec40579SJoe Thornber 
565*eec40579SJoe Thornber 	md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root);
566*eec40579SJoe Thornber 	md->era_array_root = le64_to_cpu(disk->era_array_root);
567*eec40579SJoe Thornber 	md->metadata_snap = le64_to_cpu(disk->metadata_snap);
568*eec40579SJoe Thornber 	md->archived_writesets = true;
569*eec40579SJoe Thornber 
570*eec40579SJoe Thornber 	return dm_bm_unlock(sblock);
571*eec40579SJoe Thornber 
572*eec40579SJoe Thornber bad:
573*eec40579SJoe Thornber 	dm_bm_unlock(sblock);
574*eec40579SJoe Thornber 	return r;
575*eec40579SJoe Thornber }
576*eec40579SJoe Thornber 
577*eec40579SJoe Thornber static int open_or_format_metadata(struct era_metadata *md,
578*eec40579SJoe Thornber 				   bool may_format)
579*eec40579SJoe Thornber {
580*eec40579SJoe Thornber 	int r;
581*eec40579SJoe Thornber 	bool unformatted = false;
582*eec40579SJoe Thornber 
583*eec40579SJoe Thornber 	r = superblock_all_zeroes(md->bm, &unformatted);
584*eec40579SJoe Thornber 	if (r)
585*eec40579SJoe Thornber 		return r;
586*eec40579SJoe Thornber 
587*eec40579SJoe Thornber 	if (unformatted)
588*eec40579SJoe Thornber 		return may_format ? format_metadata(md) : -EPERM;
589*eec40579SJoe Thornber 
590*eec40579SJoe Thornber 	return open_metadata(md);
591*eec40579SJoe Thornber }
592*eec40579SJoe Thornber 
593*eec40579SJoe Thornber static int create_persistent_data_objects(struct era_metadata *md,
594*eec40579SJoe Thornber 					  bool may_format)
595*eec40579SJoe Thornber {
596*eec40579SJoe Thornber 	int r;
597*eec40579SJoe Thornber 
598*eec40579SJoe Thornber 	md->bm = dm_block_manager_create(md->bdev, DM_ERA_METADATA_BLOCK_SIZE,
599*eec40579SJoe Thornber 					 DM_ERA_METADATA_CACHE_SIZE,
600*eec40579SJoe Thornber 					 ERA_MAX_CONCURRENT_LOCKS);
601*eec40579SJoe Thornber 	if (IS_ERR(md->bm)) {
602*eec40579SJoe Thornber 		DMERR("could not create block manager");
603*eec40579SJoe Thornber 		return PTR_ERR(md->bm);
604*eec40579SJoe Thornber 	}
605*eec40579SJoe Thornber 
606*eec40579SJoe Thornber 	r = open_or_format_metadata(md, may_format);
607*eec40579SJoe Thornber 	if (r)
608*eec40579SJoe Thornber 		dm_block_manager_destroy(md->bm);
609*eec40579SJoe Thornber 
610*eec40579SJoe Thornber 	return r;
611*eec40579SJoe Thornber }
612*eec40579SJoe Thornber 
613*eec40579SJoe Thornber static void destroy_persistent_data_objects(struct era_metadata *md)
614*eec40579SJoe Thornber {
615*eec40579SJoe Thornber 	dm_sm_destroy(md->sm);
616*eec40579SJoe Thornber 	dm_tm_destroy(md->tm);
617*eec40579SJoe Thornber 	dm_block_manager_destroy(md->bm);
618*eec40579SJoe Thornber }
619*eec40579SJoe Thornber 
620*eec40579SJoe Thornber /*
621*eec40579SJoe Thornber  * This waits until all era_map threads have picked up the new filter.
622*eec40579SJoe Thornber  */
623*eec40579SJoe Thornber static void swap_writeset(struct era_metadata *md, struct writeset *new_writeset)
624*eec40579SJoe Thornber {
625*eec40579SJoe Thornber 	rcu_assign_pointer(md->current_writeset, new_writeset);
626*eec40579SJoe Thornber 	synchronize_rcu();
627*eec40579SJoe Thornber }
628*eec40579SJoe Thornber 
629*eec40579SJoe Thornber /*----------------------------------------------------------------
630*eec40579SJoe Thornber  * Writesets get 'digested' into the main era array.
631*eec40579SJoe Thornber  *
632*eec40579SJoe Thornber  * We're using a coroutine here so the worker thread can do the digestion,
633*eec40579SJoe Thornber  * thus avoiding synchronisation of the metadata.  Digesting a whole
634*eec40579SJoe Thornber  * writeset in one go would cause too much latency.
635*eec40579SJoe Thornber  *--------------------------------------------------------------*/
636*eec40579SJoe Thornber struct digest {
637*eec40579SJoe Thornber 	uint32_t era;
638*eec40579SJoe Thornber 	unsigned nr_bits, current_bit;
639*eec40579SJoe Thornber 	struct writeset_metadata writeset;
640*eec40579SJoe Thornber 	__le32 value;
641*eec40579SJoe Thornber 	struct dm_disk_bitset info;
642*eec40579SJoe Thornber 
643*eec40579SJoe Thornber 	int (*step)(struct era_metadata *, struct digest *);
644*eec40579SJoe Thornber };
645*eec40579SJoe Thornber 
646*eec40579SJoe Thornber static int metadata_digest_lookup_writeset(struct era_metadata *md,
647*eec40579SJoe Thornber 					   struct digest *d);
648*eec40579SJoe Thornber 
649*eec40579SJoe Thornber static int metadata_digest_remove_writeset(struct era_metadata *md,
650*eec40579SJoe Thornber 					   struct digest *d)
651*eec40579SJoe Thornber {
652*eec40579SJoe Thornber 	int r;
653*eec40579SJoe Thornber 	uint64_t key = d->era;
654*eec40579SJoe Thornber 
655*eec40579SJoe Thornber 	r = dm_btree_remove(&md->writeset_tree_info, md->writeset_tree_root,
656*eec40579SJoe Thornber 			    &key, &md->writeset_tree_root);
657*eec40579SJoe Thornber 	if (r) {
658*eec40579SJoe Thornber 		DMERR("%s: dm_btree_remove failed", __func__);
659*eec40579SJoe Thornber 		return r;
660*eec40579SJoe Thornber 	}
661*eec40579SJoe Thornber 
662*eec40579SJoe Thornber 	d->step = metadata_digest_lookup_writeset;
663*eec40579SJoe Thornber 	return 0;
664*eec40579SJoe Thornber }
665*eec40579SJoe Thornber 
666*eec40579SJoe Thornber #define INSERTS_PER_STEP 100
667*eec40579SJoe Thornber 
668*eec40579SJoe Thornber static int metadata_digest_transcribe_writeset(struct era_metadata *md,
669*eec40579SJoe Thornber 					       struct digest *d)
670*eec40579SJoe Thornber {
671*eec40579SJoe Thornber 	int r;
672*eec40579SJoe Thornber 	bool marked;
673*eec40579SJoe Thornber 	unsigned b, e = min(d->current_bit + INSERTS_PER_STEP, d->nr_bits);
674*eec40579SJoe Thornber 
675*eec40579SJoe Thornber 	for (b = d->current_bit; b < e; b++) {
676*eec40579SJoe Thornber 		r = writeset_marked_on_disk(&d->info, &d->writeset, b, &marked);
677*eec40579SJoe Thornber 		if (r) {
678*eec40579SJoe Thornber 			DMERR("%s: writeset_marked_on_disk failed", __func__);
679*eec40579SJoe Thornber 			return r;
680*eec40579SJoe Thornber 		}
681*eec40579SJoe Thornber 
682*eec40579SJoe Thornber 		if (!marked)
683*eec40579SJoe Thornber 			continue;
684*eec40579SJoe Thornber 
685*eec40579SJoe Thornber 		__dm_bless_for_disk(&d->value);
686*eec40579SJoe Thornber 		r = dm_array_set_value(&md->era_array_info, md->era_array_root,
687*eec40579SJoe Thornber 				       b, &d->value, &md->era_array_root);
688*eec40579SJoe Thornber 		if (r) {
689*eec40579SJoe Thornber 			DMERR("%s: dm_array_set_value failed", __func__);
690*eec40579SJoe Thornber 			return r;
691*eec40579SJoe Thornber 		}
692*eec40579SJoe Thornber 	}
693*eec40579SJoe Thornber 
694*eec40579SJoe Thornber 	if (b == d->nr_bits)
695*eec40579SJoe Thornber 		d->step = metadata_digest_remove_writeset;
696*eec40579SJoe Thornber 	else
697*eec40579SJoe Thornber 		d->current_bit = b;
698*eec40579SJoe Thornber 
699*eec40579SJoe Thornber 	return 0;
700*eec40579SJoe Thornber }
701*eec40579SJoe Thornber 
702*eec40579SJoe Thornber static int metadata_digest_lookup_writeset(struct era_metadata *md,
703*eec40579SJoe Thornber 					   struct digest *d)
704*eec40579SJoe Thornber {
705*eec40579SJoe Thornber 	int r;
706*eec40579SJoe Thornber 	uint64_t key;
707*eec40579SJoe Thornber 	struct writeset_disk disk;
708*eec40579SJoe Thornber 
709*eec40579SJoe Thornber 	r = dm_btree_find_lowest_key(&md->writeset_tree_info,
710*eec40579SJoe Thornber 				     md->writeset_tree_root, &key);
711*eec40579SJoe Thornber 	if (r < 0)
712*eec40579SJoe Thornber 		return r;
713*eec40579SJoe Thornber 
714*eec40579SJoe Thornber 	d->era = key;
715*eec40579SJoe Thornber 
716*eec40579SJoe Thornber 	r = dm_btree_lookup(&md->writeset_tree_info,
717*eec40579SJoe Thornber 			    md->writeset_tree_root, &key, &disk);
718*eec40579SJoe Thornber 	if (r) {
719*eec40579SJoe Thornber 		if (r == -ENODATA) {
720*eec40579SJoe Thornber 			d->step = NULL;
721*eec40579SJoe Thornber 			return 0;
722*eec40579SJoe Thornber 		}
723*eec40579SJoe Thornber 
724*eec40579SJoe Thornber 		DMERR("%s: dm_btree_lookup failed", __func__);
725*eec40579SJoe Thornber 		return r;
726*eec40579SJoe Thornber 	}
727*eec40579SJoe Thornber 
728*eec40579SJoe Thornber 	ws_unpack(&disk, &d->writeset);
729*eec40579SJoe Thornber 	d->value = cpu_to_le32(key);
730*eec40579SJoe Thornber 
731*eec40579SJoe Thornber 	d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks);
732*eec40579SJoe Thornber 	d->current_bit = 0;
733*eec40579SJoe Thornber 	d->step = metadata_digest_transcribe_writeset;
734*eec40579SJoe Thornber 
735*eec40579SJoe Thornber 	return 0;
736*eec40579SJoe Thornber }
737*eec40579SJoe Thornber 
738*eec40579SJoe Thornber static int metadata_digest_start(struct era_metadata *md, struct digest *d)
739*eec40579SJoe Thornber {
740*eec40579SJoe Thornber 	if (d->step)
741*eec40579SJoe Thornber 		return 0;
742*eec40579SJoe Thornber 
743*eec40579SJoe Thornber 	memset(d, 0, sizeof(*d));
744*eec40579SJoe Thornber 
745*eec40579SJoe Thornber 	/*
746*eec40579SJoe Thornber 	 * We initialise another bitset info to avoid any caching side
747*eec40579SJoe Thornber 	 * effects with the previous one.
748*eec40579SJoe Thornber 	 */
749*eec40579SJoe Thornber 	dm_disk_bitset_init(md->tm, &d->info);
750*eec40579SJoe Thornber 	d->step = metadata_digest_lookup_writeset;
751*eec40579SJoe Thornber 
752*eec40579SJoe Thornber 	return 0;
753*eec40579SJoe Thornber }
754*eec40579SJoe Thornber 
755*eec40579SJoe Thornber /*----------------------------------------------------------------
756*eec40579SJoe Thornber  * High level metadata interface.  Target methods should use these, and not
757*eec40579SJoe Thornber  * the lower level ones.
758*eec40579SJoe Thornber  *--------------------------------------------------------------*/
759*eec40579SJoe Thornber static struct era_metadata *metadata_open(struct block_device *bdev,
760*eec40579SJoe Thornber 					  sector_t block_size,
761*eec40579SJoe Thornber 					  bool may_format)
762*eec40579SJoe Thornber {
763*eec40579SJoe Thornber 	int r;
764*eec40579SJoe Thornber 	struct era_metadata *md = kzalloc(sizeof(*md), GFP_KERNEL);
765*eec40579SJoe Thornber 
766*eec40579SJoe Thornber 	if (!md)
767*eec40579SJoe Thornber 		return NULL;
768*eec40579SJoe Thornber 
769*eec40579SJoe Thornber 	md->bdev = bdev;
770*eec40579SJoe Thornber 	md->block_size = block_size;
771*eec40579SJoe Thornber 
772*eec40579SJoe Thornber 	md->writesets[0].md.root = INVALID_WRITESET_ROOT;
773*eec40579SJoe Thornber 	md->writesets[1].md.root = INVALID_WRITESET_ROOT;
774*eec40579SJoe Thornber 	md->current_writeset = &md->writesets[0];
775*eec40579SJoe Thornber 
776*eec40579SJoe Thornber 	r = create_persistent_data_objects(md, may_format);
777*eec40579SJoe Thornber 	if (r) {
778*eec40579SJoe Thornber 		kfree(md);
779*eec40579SJoe Thornber 		return ERR_PTR(r);
780*eec40579SJoe Thornber 	}
781*eec40579SJoe Thornber 
782*eec40579SJoe Thornber 	return md;
783*eec40579SJoe Thornber }
784*eec40579SJoe Thornber 
785*eec40579SJoe Thornber static void metadata_close(struct era_metadata *md)
786*eec40579SJoe Thornber {
787*eec40579SJoe Thornber 	destroy_persistent_data_objects(md);
788*eec40579SJoe Thornber 	kfree(md);
789*eec40579SJoe Thornber }
790*eec40579SJoe Thornber 
791*eec40579SJoe Thornber static bool valid_nr_blocks(dm_block_t n)
792*eec40579SJoe Thornber {
793*eec40579SJoe Thornber 	/*
794*eec40579SJoe Thornber 	 * dm_bitset restricts us to 2^32.  test_bit & co. restrict us
795*eec40579SJoe Thornber 	 * further to 2^31 - 1
796*eec40579SJoe Thornber 	 */
797*eec40579SJoe Thornber 	return n < (1ull << 31);
798*eec40579SJoe Thornber }
799*eec40579SJoe Thornber 
800*eec40579SJoe Thornber static int metadata_resize(struct era_metadata *md, void *arg)
801*eec40579SJoe Thornber {
802*eec40579SJoe Thornber 	int r;
803*eec40579SJoe Thornber 	dm_block_t *new_size = arg;
804*eec40579SJoe Thornber 	__le32 value;
805*eec40579SJoe Thornber 
806*eec40579SJoe Thornber 	if (!valid_nr_blocks(*new_size)) {
807*eec40579SJoe Thornber 		DMERR("Invalid number of origin blocks %llu",
808*eec40579SJoe Thornber 		      (unsigned long long) *new_size);
809*eec40579SJoe Thornber 		return -EINVAL;
810*eec40579SJoe Thornber 	}
811*eec40579SJoe Thornber 
812*eec40579SJoe Thornber 	writeset_free(&md->writesets[0]);
813*eec40579SJoe Thornber 	writeset_free(&md->writesets[1]);
814*eec40579SJoe Thornber 
815*eec40579SJoe Thornber 	r = writeset_alloc(&md->writesets[0], *new_size);
816*eec40579SJoe Thornber 	if (r) {
817*eec40579SJoe Thornber 		DMERR("%s: writeset_alloc failed for writeset 0", __func__);
818*eec40579SJoe Thornber 		return r;
819*eec40579SJoe Thornber 	}
820*eec40579SJoe Thornber 
821*eec40579SJoe Thornber 	r = writeset_alloc(&md->writesets[1], *new_size);
822*eec40579SJoe Thornber 	if (r) {
823*eec40579SJoe Thornber 		DMERR("%s: writeset_alloc failed for writeset 1", __func__);
824*eec40579SJoe Thornber 		return r;
825*eec40579SJoe Thornber 	}
826*eec40579SJoe Thornber 
827*eec40579SJoe Thornber 	value = cpu_to_le32(0u);
828*eec40579SJoe Thornber 	__dm_bless_for_disk(&value);
829*eec40579SJoe Thornber 	r = dm_array_resize(&md->era_array_info, md->era_array_root,
830*eec40579SJoe Thornber 			    md->nr_blocks, *new_size,
831*eec40579SJoe Thornber 			    &value, &md->era_array_root);
832*eec40579SJoe Thornber 	if (r) {
833*eec40579SJoe Thornber 		DMERR("%s: dm_array_resize failed", __func__);
834*eec40579SJoe Thornber 		return r;
835*eec40579SJoe Thornber 	}
836*eec40579SJoe Thornber 
837*eec40579SJoe Thornber 	md->nr_blocks = *new_size;
838*eec40579SJoe Thornber 	return 0;
839*eec40579SJoe Thornber }
840*eec40579SJoe Thornber 
841*eec40579SJoe Thornber static int metadata_era_archive(struct era_metadata *md)
842*eec40579SJoe Thornber {
843*eec40579SJoe Thornber 	int r;
844*eec40579SJoe Thornber 	uint64_t keys[1];
845*eec40579SJoe Thornber 	struct writeset_disk value;
846*eec40579SJoe Thornber 
847*eec40579SJoe Thornber 	r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root,
848*eec40579SJoe Thornber 			    &md->current_writeset->md.root);
849*eec40579SJoe Thornber 	if (r) {
850*eec40579SJoe Thornber 		DMERR("%s: dm_bitset_flush failed", __func__);
851*eec40579SJoe Thornber 		return r;
852*eec40579SJoe Thornber 	}
853*eec40579SJoe Thornber 
854*eec40579SJoe Thornber 	ws_pack(&md->current_writeset->md, &value);
855*eec40579SJoe Thornber 	md->current_writeset->md.root = INVALID_WRITESET_ROOT;
856*eec40579SJoe Thornber 
857*eec40579SJoe Thornber 	keys[0] = md->current_era;
858*eec40579SJoe Thornber 	__dm_bless_for_disk(&value);
859*eec40579SJoe Thornber 	r = dm_btree_insert(&md->writeset_tree_info, md->writeset_tree_root,
860*eec40579SJoe Thornber 			    keys, &value, &md->writeset_tree_root);
861*eec40579SJoe Thornber 	if (r) {
862*eec40579SJoe Thornber 		DMERR("%s: couldn't insert writeset into btree", __func__);
863*eec40579SJoe Thornber 		/* FIXME: fail mode */
864*eec40579SJoe Thornber 		return r;
865*eec40579SJoe Thornber 	}
866*eec40579SJoe Thornber 
867*eec40579SJoe Thornber 	md->archived_writesets = true;
868*eec40579SJoe Thornber 
869*eec40579SJoe Thornber 	return 0;
870*eec40579SJoe Thornber }
871*eec40579SJoe Thornber 
872*eec40579SJoe Thornber static struct writeset *next_writeset(struct era_metadata *md)
873*eec40579SJoe Thornber {
874*eec40579SJoe Thornber 	return (md->current_writeset == &md->writesets[0]) ?
875*eec40579SJoe Thornber 		&md->writesets[1] : &md->writesets[0];
876*eec40579SJoe Thornber }
877*eec40579SJoe Thornber 
878*eec40579SJoe Thornber static int metadata_new_era(struct era_metadata *md)
879*eec40579SJoe Thornber {
880*eec40579SJoe Thornber 	int r;
881*eec40579SJoe Thornber 	struct writeset *new_writeset = next_writeset(md);
882*eec40579SJoe Thornber 
883*eec40579SJoe Thornber 	r = writeset_init(&md->bitset_info, new_writeset);
884*eec40579SJoe Thornber 	if (r) {
885*eec40579SJoe Thornber 		DMERR("%s: writeset_init failed", __func__);
886*eec40579SJoe Thornber 		return r;
887*eec40579SJoe Thornber 	}
888*eec40579SJoe Thornber 
889*eec40579SJoe Thornber 	swap_writeset(md, new_writeset);
890*eec40579SJoe Thornber 	md->current_era++;
891*eec40579SJoe Thornber 
892*eec40579SJoe Thornber 	return 0;
893*eec40579SJoe Thornber }
894*eec40579SJoe Thornber 
895*eec40579SJoe Thornber static int metadata_era_rollover(struct era_metadata *md)
896*eec40579SJoe Thornber {
897*eec40579SJoe Thornber 	int r;
898*eec40579SJoe Thornber 
899*eec40579SJoe Thornber 	if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) {
900*eec40579SJoe Thornber 		r = metadata_era_archive(md);
901*eec40579SJoe Thornber 		if (r) {
902*eec40579SJoe Thornber 			DMERR("%s: metadata_archive_era failed", __func__);
903*eec40579SJoe Thornber 			/* FIXME: fail mode? */
904*eec40579SJoe Thornber 			return r;
905*eec40579SJoe Thornber 		}
906*eec40579SJoe Thornber 	}
907*eec40579SJoe Thornber 
908*eec40579SJoe Thornber 	r = metadata_new_era(md);
909*eec40579SJoe Thornber 	if (r) {
910*eec40579SJoe Thornber 		DMERR("%s: new era failed", __func__);
911*eec40579SJoe Thornber 		/* FIXME: fail mode */
912*eec40579SJoe Thornber 		return r;
913*eec40579SJoe Thornber 	}
914*eec40579SJoe Thornber 
915*eec40579SJoe Thornber 	return 0;
916*eec40579SJoe Thornber }
917*eec40579SJoe Thornber 
918*eec40579SJoe Thornber static bool metadata_current_marked(struct era_metadata *md, dm_block_t block)
919*eec40579SJoe Thornber {
920*eec40579SJoe Thornber 	bool r;
921*eec40579SJoe Thornber 	struct writeset *ws;
922*eec40579SJoe Thornber 
923*eec40579SJoe Thornber 	rcu_read_lock();
924*eec40579SJoe Thornber 	ws = rcu_dereference(md->current_writeset);
925*eec40579SJoe Thornber 	r = writeset_marked(ws, block);
926*eec40579SJoe Thornber 	rcu_read_unlock();
927*eec40579SJoe Thornber 
928*eec40579SJoe Thornber 	return r;
929*eec40579SJoe Thornber }
930*eec40579SJoe Thornber 
931*eec40579SJoe Thornber static int metadata_commit(struct era_metadata *md)
932*eec40579SJoe Thornber {
933*eec40579SJoe Thornber 	int r;
934*eec40579SJoe Thornber 	struct dm_block *sblock;
935*eec40579SJoe Thornber 
936*eec40579SJoe Thornber 	if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) {
937*eec40579SJoe Thornber 		r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root,
938*eec40579SJoe Thornber 				    &md->current_writeset->md.root);
939*eec40579SJoe Thornber 		if (r) {
940*eec40579SJoe Thornber 			DMERR("%s: bitset flush failed", __func__);
941*eec40579SJoe Thornber 			return r;
942*eec40579SJoe Thornber 		}
943*eec40579SJoe Thornber 	}
944*eec40579SJoe Thornber 
945*eec40579SJoe Thornber 	r = dm_tm_pre_commit(md->tm);
946*eec40579SJoe Thornber 	if (r) {
947*eec40579SJoe Thornber 		DMERR("%s: pre commit failed", __func__);
948*eec40579SJoe Thornber 		return r;
949*eec40579SJoe Thornber 	}
950*eec40579SJoe Thornber 
951*eec40579SJoe Thornber 	r = superblock_lock(md, &sblock);
952*eec40579SJoe Thornber 	if (r) {
953*eec40579SJoe Thornber 		DMERR("%s: superblock lock failed", __func__);
954*eec40579SJoe Thornber 		return r;
955*eec40579SJoe Thornber 	}
956*eec40579SJoe Thornber 
957*eec40579SJoe Thornber 	r = prepare_superblock(md, dm_block_data(sblock));
958*eec40579SJoe Thornber 	if (r) {
959*eec40579SJoe Thornber 		DMERR("%s: prepare_superblock failed", __func__);
960*eec40579SJoe Thornber 		dm_bm_unlock(sblock); /* FIXME: does this commit? */
961*eec40579SJoe Thornber 		return r;
962*eec40579SJoe Thornber 	}
963*eec40579SJoe Thornber 
964*eec40579SJoe Thornber 	return dm_tm_commit(md->tm, sblock);
965*eec40579SJoe Thornber }
966*eec40579SJoe Thornber 
967*eec40579SJoe Thornber static int metadata_checkpoint(struct era_metadata *md)
968*eec40579SJoe Thornber {
969*eec40579SJoe Thornber 	/*
970*eec40579SJoe Thornber 	 * For now we just rollover, but later I want to put a check in to
971*eec40579SJoe Thornber 	 * avoid this if the filter is still pretty fresh.
972*eec40579SJoe Thornber 	 */
973*eec40579SJoe Thornber 	return metadata_era_rollover(md);
974*eec40579SJoe Thornber }
975*eec40579SJoe Thornber 
976*eec40579SJoe Thornber /*
977*eec40579SJoe Thornber  * Metadata snapshots allow userland to access era data.
978*eec40579SJoe Thornber  */
979*eec40579SJoe Thornber static int metadata_take_snap(struct era_metadata *md)
980*eec40579SJoe Thornber {
981*eec40579SJoe Thornber 	int r, inc;
982*eec40579SJoe Thornber 	struct dm_block *clone;
983*eec40579SJoe Thornber 
984*eec40579SJoe Thornber 	if (md->metadata_snap != SUPERBLOCK_LOCATION) {
985*eec40579SJoe Thornber 		DMERR("%s: metadata snapshot already exists", __func__);
986*eec40579SJoe Thornber 		return -EINVAL;
987*eec40579SJoe Thornber 	}
988*eec40579SJoe Thornber 
989*eec40579SJoe Thornber 	r = metadata_era_rollover(md);
990*eec40579SJoe Thornber 	if (r) {
991*eec40579SJoe Thornber 		DMERR("%s: era rollover failed", __func__);
992*eec40579SJoe Thornber 		return r;
993*eec40579SJoe Thornber 	}
994*eec40579SJoe Thornber 
995*eec40579SJoe Thornber 	r = metadata_commit(md);
996*eec40579SJoe Thornber 	if (r) {
997*eec40579SJoe Thornber 		DMERR("%s: pre commit failed", __func__);
998*eec40579SJoe Thornber 		return r;
999*eec40579SJoe Thornber 	}
1000*eec40579SJoe Thornber 
1001*eec40579SJoe Thornber 	r = dm_sm_inc_block(md->sm, SUPERBLOCK_LOCATION);
1002*eec40579SJoe Thornber 	if (r) {
1003*eec40579SJoe Thornber 		DMERR("%s: couldn't increment superblock", __func__);
1004*eec40579SJoe Thornber 		return r;
1005*eec40579SJoe Thornber 	}
1006*eec40579SJoe Thornber 
1007*eec40579SJoe Thornber 	r = dm_tm_shadow_block(md->tm, SUPERBLOCK_LOCATION,
1008*eec40579SJoe Thornber 			       &sb_validator, &clone, &inc);
1009*eec40579SJoe Thornber 	if (r) {
1010*eec40579SJoe Thornber 		DMERR("%s: couldn't shadow superblock", __func__);
1011*eec40579SJoe Thornber 		dm_sm_dec_block(md->sm, SUPERBLOCK_LOCATION);
1012*eec40579SJoe Thornber 		return r;
1013*eec40579SJoe Thornber 	}
1014*eec40579SJoe Thornber 	BUG_ON(!inc);
1015*eec40579SJoe Thornber 
1016*eec40579SJoe Thornber 	r = dm_sm_inc_block(md->sm, md->writeset_tree_root);
1017*eec40579SJoe Thornber 	if (r) {
1018*eec40579SJoe Thornber 		DMERR("%s: couldn't inc writeset tree root", __func__);
1019*eec40579SJoe Thornber 		dm_tm_unlock(md->tm, clone);
1020*eec40579SJoe Thornber 		return r;
1021*eec40579SJoe Thornber 	}
1022*eec40579SJoe Thornber 
1023*eec40579SJoe Thornber 	r = dm_sm_inc_block(md->sm, md->era_array_root);
1024*eec40579SJoe Thornber 	if (r) {
1025*eec40579SJoe Thornber 		DMERR("%s: couldn't inc era tree root", __func__);
1026*eec40579SJoe Thornber 		dm_sm_dec_block(md->sm, md->writeset_tree_root);
1027*eec40579SJoe Thornber 		dm_tm_unlock(md->tm, clone);
1028*eec40579SJoe Thornber 		return r;
1029*eec40579SJoe Thornber 	}
1030*eec40579SJoe Thornber 
1031*eec40579SJoe Thornber 	md->metadata_snap = dm_block_location(clone);
1032*eec40579SJoe Thornber 
1033*eec40579SJoe Thornber 	r = dm_tm_unlock(md->tm, clone);
1034*eec40579SJoe Thornber 	if (r) {
1035*eec40579SJoe Thornber 		DMERR("%s: couldn't unlock clone", __func__);
1036*eec40579SJoe Thornber 		md->metadata_snap = SUPERBLOCK_LOCATION;
1037*eec40579SJoe Thornber 		return r;
1038*eec40579SJoe Thornber 	}
1039*eec40579SJoe Thornber 
1040*eec40579SJoe Thornber 	return 0;
1041*eec40579SJoe Thornber }
1042*eec40579SJoe Thornber 
1043*eec40579SJoe Thornber static int metadata_drop_snap(struct era_metadata *md)
1044*eec40579SJoe Thornber {
1045*eec40579SJoe Thornber 	int r;
1046*eec40579SJoe Thornber 	dm_block_t location;
1047*eec40579SJoe Thornber 	struct dm_block *clone;
1048*eec40579SJoe Thornber 	struct superblock_disk *disk;
1049*eec40579SJoe Thornber 
1050*eec40579SJoe Thornber 	if (md->metadata_snap == SUPERBLOCK_LOCATION) {
1051*eec40579SJoe Thornber 		DMERR("%s: no snap to drop", __func__);
1052*eec40579SJoe Thornber 		return -EINVAL;
1053*eec40579SJoe Thornber 	}
1054*eec40579SJoe Thornber 
1055*eec40579SJoe Thornber 	r = dm_tm_read_lock(md->tm, md->metadata_snap, &sb_validator, &clone);
1056*eec40579SJoe Thornber 	if (r) {
1057*eec40579SJoe Thornber 		DMERR("%s: couldn't read lock superblock clone", __func__);
1058*eec40579SJoe Thornber 		return r;
1059*eec40579SJoe Thornber 	}
1060*eec40579SJoe Thornber 
1061*eec40579SJoe Thornber 	/*
1062*eec40579SJoe Thornber 	 * Whatever happens now we'll commit with no record of the metadata
1063*eec40579SJoe Thornber 	 * snap.
1064*eec40579SJoe Thornber 	 */
1065*eec40579SJoe Thornber 	md->metadata_snap = SUPERBLOCK_LOCATION;
1066*eec40579SJoe Thornber 
1067*eec40579SJoe Thornber 	disk = dm_block_data(clone);
1068*eec40579SJoe Thornber 	r = dm_btree_del(&md->writeset_tree_info,
1069*eec40579SJoe Thornber 			 le64_to_cpu(disk->writeset_tree_root));
1070*eec40579SJoe Thornber 	if (r) {
1071*eec40579SJoe Thornber 		DMERR("%s: error deleting writeset tree clone", __func__);
1072*eec40579SJoe Thornber 		dm_tm_unlock(md->tm, clone);
1073*eec40579SJoe Thornber 		return r;
1074*eec40579SJoe Thornber 	}
1075*eec40579SJoe Thornber 
1076*eec40579SJoe Thornber 	r = dm_array_del(&md->era_array_info, le64_to_cpu(disk->era_array_root));
1077*eec40579SJoe Thornber 	if (r) {
1078*eec40579SJoe Thornber 		DMERR("%s: error deleting era array clone", __func__);
1079*eec40579SJoe Thornber 		dm_tm_unlock(md->tm, clone);
1080*eec40579SJoe Thornber 		return r;
1081*eec40579SJoe Thornber 	}
1082*eec40579SJoe Thornber 
1083*eec40579SJoe Thornber 	location = dm_block_location(clone);
1084*eec40579SJoe Thornber 	dm_tm_unlock(md->tm, clone);
1085*eec40579SJoe Thornber 
1086*eec40579SJoe Thornber 	return dm_sm_dec_block(md->sm, location);
1087*eec40579SJoe Thornber }
1088*eec40579SJoe Thornber 
1089*eec40579SJoe Thornber struct metadata_stats {
1090*eec40579SJoe Thornber 	dm_block_t used;
1091*eec40579SJoe Thornber 	dm_block_t total;
1092*eec40579SJoe Thornber 	dm_block_t snap;
1093*eec40579SJoe Thornber 	uint32_t era;
1094*eec40579SJoe Thornber };
1095*eec40579SJoe Thornber 
1096*eec40579SJoe Thornber static int metadata_get_stats(struct era_metadata *md, void *ptr)
1097*eec40579SJoe Thornber {
1098*eec40579SJoe Thornber 	int r;
1099*eec40579SJoe Thornber 	struct metadata_stats *s = ptr;
1100*eec40579SJoe Thornber 	dm_block_t nr_free, nr_total;
1101*eec40579SJoe Thornber 
1102*eec40579SJoe Thornber 	r = dm_sm_get_nr_free(md->sm, &nr_free);
1103*eec40579SJoe Thornber 	if (r) {
1104*eec40579SJoe Thornber 		DMERR("dm_sm_get_nr_free returned %d", r);
1105*eec40579SJoe Thornber 		return r;
1106*eec40579SJoe Thornber 	}
1107*eec40579SJoe Thornber 
1108*eec40579SJoe Thornber 	r = dm_sm_get_nr_blocks(md->sm, &nr_total);
1109*eec40579SJoe Thornber 	if (r) {
1110*eec40579SJoe Thornber 		DMERR("dm_pool_get_metadata_dev_size returned %d", r);
1111*eec40579SJoe Thornber 		return r;
1112*eec40579SJoe Thornber 	}
1113*eec40579SJoe Thornber 
1114*eec40579SJoe Thornber 	s->used = nr_total - nr_free;
1115*eec40579SJoe Thornber 	s->total = nr_total;
1116*eec40579SJoe Thornber 	s->snap = md->metadata_snap;
1117*eec40579SJoe Thornber 	s->era = md->current_era;
1118*eec40579SJoe Thornber 
1119*eec40579SJoe Thornber 	return 0;
1120*eec40579SJoe Thornber }
1121*eec40579SJoe Thornber 
1122*eec40579SJoe Thornber /*----------------------------------------------------------------*/
1123*eec40579SJoe Thornber 
1124*eec40579SJoe Thornber struct era {
1125*eec40579SJoe Thornber 	struct dm_target *ti;
1126*eec40579SJoe Thornber 	struct dm_target_callbacks callbacks;
1127*eec40579SJoe Thornber 
1128*eec40579SJoe Thornber 	struct dm_dev *metadata_dev;
1129*eec40579SJoe Thornber 	struct dm_dev *origin_dev;
1130*eec40579SJoe Thornber 
1131*eec40579SJoe Thornber 	dm_block_t nr_blocks;
1132*eec40579SJoe Thornber 	uint32_t sectors_per_block;
1133*eec40579SJoe Thornber 	int sectors_per_block_shift;
1134*eec40579SJoe Thornber 	struct era_metadata *md;
1135*eec40579SJoe Thornber 
1136*eec40579SJoe Thornber 	struct workqueue_struct *wq;
1137*eec40579SJoe Thornber 	struct work_struct worker;
1138*eec40579SJoe Thornber 
1139*eec40579SJoe Thornber 	spinlock_t deferred_lock;
1140*eec40579SJoe Thornber 	struct bio_list deferred_bios;
1141*eec40579SJoe Thornber 
1142*eec40579SJoe Thornber 	spinlock_t rpc_lock;
1143*eec40579SJoe Thornber 	struct list_head rpc_calls;
1144*eec40579SJoe Thornber 
1145*eec40579SJoe Thornber 	struct digest digest;
1146*eec40579SJoe Thornber 	atomic_t suspended;
1147*eec40579SJoe Thornber };
1148*eec40579SJoe Thornber 
1149*eec40579SJoe Thornber struct rpc {
1150*eec40579SJoe Thornber 	struct list_head list;
1151*eec40579SJoe Thornber 
1152*eec40579SJoe Thornber 	int (*fn0)(struct era_metadata *);
1153*eec40579SJoe Thornber 	int (*fn1)(struct era_metadata *, void *);
1154*eec40579SJoe Thornber 	void *arg;
1155*eec40579SJoe Thornber 	int result;
1156*eec40579SJoe Thornber 
1157*eec40579SJoe Thornber 	struct completion complete;
1158*eec40579SJoe Thornber };
1159*eec40579SJoe Thornber 
1160*eec40579SJoe Thornber /*----------------------------------------------------------------
1161*eec40579SJoe Thornber  * Remapping.
1162*eec40579SJoe Thornber  *---------------------------------------------------------------*/
1163*eec40579SJoe Thornber static bool block_size_is_power_of_two(struct era *era)
1164*eec40579SJoe Thornber {
1165*eec40579SJoe Thornber 	return era->sectors_per_block_shift >= 0;
1166*eec40579SJoe Thornber }
1167*eec40579SJoe Thornber 
1168*eec40579SJoe Thornber static dm_block_t get_block(struct era *era, struct bio *bio)
1169*eec40579SJoe Thornber {
1170*eec40579SJoe Thornber 	sector_t block_nr = bio->bi_iter.bi_sector;
1171*eec40579SJoe Thornber 
1172*eec40579SJoe Thornber 	if (!block_size_is_power_of_two(era))
1173*eec40579SJoe Thornber 		(void) sector_div(block_nr, era->sectors_per_block);
1174*eec40579SJoe Thornber 	else
1175*eec40579SJoe Thornber 		block_nr >>= era->sectors_per_block_shift;
1176*eec40579SJoe Thornber 
1177*eec40579SJoe Thornber 	return block_nr;
1178*eec40579SJoe Thornber }
1179*eec40579SJoe Thornber 
1180*eec40579SJoe Thornber static void remap_to_origin(struct era *era, struct bio *bio)
1181*eec40579SJoe Thornber {
1182*eec40579SJoe Thornber 	bio->bi_bdev = era->origin_dev->bdev;
1183*eec40579SJoe Thornber }
1184*eec40579SJoe Thornber 
1185*eec40579SJoe Thornber /*----------------------------------------------------------------
1186*eec40579SJoe Thornber  * Worker thread
1187*eec40579SJoe Thornber  *--------------------------------------------------------------*/
1188*eec40579SJoe Thornber static void wake_worker(struct era *era)
1189*eec40579SJoe Thornber {
1190*eec40579SJoe Thornber 	if (!atomic_read(&era->suspended))
1191*eec40579SJoe Thornber 		queue_work(era->wq, &era->worker);
1192*eec40579SJoe Thornber }
1193*eec40579SJoe Thornber 
1194*eec40579SJoe Thornber static void process_old_eras(struct era *era)
1195*eec40579SJoe Thornber {
1196*eec40579SJoe Thornber 	int r;
1197*eec40579SJoe Thornber 
1198*eec40579SJoe Thornber 	if (!era->digest.step)
1199*eec40579SJoe Thornber 		return;
1200*eec40579SJoe Thornber 
1201*eec40579SJoe Thornber 	r = era->digest.step(era->md, &era->digest);
1202*eec40579SJoe Thornber 	if (r < 0) {
1203*eec40579SJoe Thornber 		DMERR("%s: digest step failed, stopping digestion", __func__);
1204*eec40579SJoe Thornber 		era->digest.step = NULL;
1205*eec40579SJoe Thornber 
1206*eec40579SJoe Thornber 	} else if (era->digest.step)
1207*eec40579SJoe Thornber 		wake_worker(era);
1208*eec40579SJoe Thornber }
1209*eec40579SJoe Thornber 
1210*eec40579SJoe Thornber static void process_deferred_bios(struct era *era)
1211*eec40579SJoe Thornber {
1212*eec40579SJoe Thornber 	int r;
1213*eec40579SJoe Thornber 	struct bio_list deferred_bios, marked_bios;
1214*eec40579SJoe Thornber 	struct bio *bio;
1215*eec40579SJoe Thornber 	bool commit_needed = false;
1216*eec40579SJoe Thornber 	bool failed = false;
1217*eec40579SJoe Thornber 
1218*eec40579SJoe Thornber 	bio_list_init(&deferred_bios);
1219*eec40579SJoe Thornber 	bio_list_init(&marked_bios);
1220*eec40579SJoe Thornber 
1221*eec40579SJoe Thornber 	spin_lock(&era->deferred_lock);
1222*eec40579SJoe Thornber 	bio_list_merge(&deferred_bios, &era->deferred_bios);
1223*eec40579SJoe Thornber 	bio_list_init(&era->deferred_bios);
1224*eec40579SJoe Thornber 	spin_unlock(&era->deferred_lock);
1225*eec40579SJoe Thornber 
1226*eec40579SJoe Thornber 	while ((bio = bio_list_pop(&deferred_bios))) {
1227*eec40579SJoe Thornber 		r = writeset_test_and_set(&era->md->bitset_info,
1228*eec40579SJoe Thornber 					  era->md->current_writeset,
1229*eec40579SJoe Thornber 					  get_block(era, bio));
1230*eec40579SJoe Thornber 		if (r < 0) {
1231*eec40579SJoe Thornber 			/*
1232*eec40579SJoe Thornber 			 * This is bad news, we need to rollback.
1233*eec40579SJoe Thornber 			 * FIXME: finish.
1234*eec40579SJoe Thornber 			 */
1235*eec40579SJoe Thornber 			failed = true;
1236*eec40579SJoe Thornber 
1237*eec40579SJoe Thornber 		} else if (r == 0)
1238*eec40579SJoe Thornber 			commit_needed = true;
1239*eec40579SJoe Thornber 
1240*eec40579SJoe Thornber 		bio_list_add(&marked_bios, bio);
1241*eec40579SJoe Thornber 	}
1242*eec40579SJoe Thornber 
1243*eec40579SJoe Thornber 	if (commit_needed) {
1244*eec40579SJoe Thornber 		r = metadata_commit(era->md);
1245*eec40579SJoe Thornber 		if (r)
1246*eec40579SJoe Thornber 			failed = true;
1247*eec40579SJoe Thornber 	}
1248*eec40579SJoe Thornber 
1249*eec40579SJoe Thornber 	if (failed)
1250*eec40579SJoe Thornber 		while ((bio = bio_list_pop(&marked_bios)))
1251*eec40579SJoe Thornber 			bio_io_error(bio);
1252*eec40579SJoe Thornber 	else
1253*eec40579SJoe Thornber 		while ((bio = bio_list_pop(&marked_bios)))
1254*eec40579SJoe Thornber 			generic_make_request(bio);
1255*eec40579SJoe Thornber }
1256*eec40579SJoe Thornber 
1257*eec40579SJoe Thornber static void process_rpc_calls(struct era *era)
1258*eec40579SJoe Thornber {
1259*eec40579SJoe Thornber 	int r;
1260*eec40579SJoe Thornber 	bool need_commit = false;
1261*eec40579SJoe Thornber 	struct list_head calls;
1262*eec40579SJoe Thornber 	struct rpc *rpc, *tmp;
1263*eec40579SJoe Thornber 
1264*eec40579SJoe Thornber 	INIT_LIST_HEAD(&calls);
1265*eec40579SJoe Thornber 	spin_lock(&era->rpc_lock);
1266*eec40579SJoe Thornber 	list_splice_init(&era->rpc_calls, &calls);
1267*eec40579SJoe Thornber 	spin_unlock(&era->rpc_lock);
1268*eec40579SJoe Thornber 
1269*eec40579SJoe Thornber 	list_for_each_entry_safe(rpc, tmp, &calls, list) {
1270*eec40579SJoe Thornber 		rpc->result = rpc->fn0 ? rpc->fn0(era->md) : rpc->fn1(era->md, rpc->arg);
1271*eec40579SJoe Thornber 		need_commit = true;
1272*eec40579SJoe Thornber 	}
1273*eec40579SJoe Thornber 
1274*eec40579SJoe Thornber 	if (need_commit) {
1275*eec40579SJoe Thornber 		r = metadata_commit(era->md);
1276*eec40579SJoe Thornber 		if (r)
1277*eec40579SJoe Thornber 			list_for_each_entry_safe(rpc, tmp, &calls, list)
1278*eec40579SJoe Thornber 				rpc->result = r;
1279*eec40579SJoe Thornber 	}
1280*eec40579SJoe Thornber 
1281*eec40579SJoe Thornber 	list_for_each_entry_safe(rpc, tmp, &calls, list)
1282*eec40579SJoe Thornber 		complete(&rpc->complete);
1283*eec40579SJoe Thornber }
1284*eec40579SJoe Thornber 
1285*eec40579SJoe Thornber static void kick_off_digest(struct era *era)
1286*eec40579SJoe Thornber {
1287*eec40579SJoe Thornber 	if (era->md->archived_writesets) {
1288*eec40579SJoe Thornber 		era->md->archived_writesets = false;
1289*eec40579SJoe Thornber 		metadata_digest_start(era->md, &era->digest);
1290*eec40579SJoe Thornber 	}
1291*eec40579SJoe Thornber }
1292*eec40579SJoe Thornber 
1293*eec40579SJoe Thornber static void do_work(struct work_struct *ws)
1294*eec40579SJoe Thornber {
1295*eec40579SJoe Thornber 	struct era *era = container_of(ws, struct era, worker);
1296*eec40579SJoe Thornber 
1297*eec40579SJoe Thornber 	kick_off_digest(era);
1298*eec40579SJoe Thornber 	process_old_eras(era);
1299*eec40579SJoe Thornber 	process_deferred_bios(era);
1300*eec40579SJoe Thornber 	process_rpc_calls(era);
1301*eec40579SJoe Thornber }
1302*eec40579SJoe Thornber 
1303*eec40579SJoe Thornber static void defer_bio(struct era *era, struct bio *bio)
1304*eec40579SJoe Thornber {
1305*eec40579SJoe Thornber 	spin_lock(&era->deferred_lock);
1306*eec40579SJoe Thornber 	bio_list_add(&era->deferred_bios, bio);
1307*eec40579SJoe Thornber 	spin_unlock(&era->deferred_lock);
1308*eec40579SJoe Thornber 
1309*eec40579SJoe Thornber 	wake_worker(era);
1310*eec40579SJoe Thornber }
1311*eec40579SJoe Thornber 
1312*eec40579SJoe Thornber /*
1313*eec40579SJoe Thornber  * Make an rpc call to the worker to change the metadata.
1314*eec40579SJoe Thornber  */
1315*eec40579SJoe Thornber static int perform_rpc(struct era *era, struct rpc *rpc)
1316*eec40579SJoe Thornber {
1317*eec40579SJoe Thornber 	rpc->result = 0;
1318*eec40579SJoe Thornber 	init_completion(&rpc->complete);
1319*eec40579SJoe Thornber 
1320*eec40579SJoe Thornber 	spin_lock(&era->rpc_lock);
1321*eec40579SJoe Thornber 	list_add(&rpc->list, &era->rpc_calls);
1322*eec40579SJoe Thornber 	spin_unlock(&era->rpc_lock);
1323*eec40579SJoe Thornber 
1324*eec40579SJoe Thornber 	wake_worker(era);
1325*eec40579SJoe Thornber 	wait_for_completion(&rpc->complete);
1326*eec40579SJoe Thornber 
1327*eec40579SJoe Thornber 	return rpc->result;
1328*eec40579SJoe Thornber }
1329*eec40579SJoe Thornber 
1330*eec40579SJoe Thornber static int in_worker0(struct era *era, int (*fn)(struct era_metadata *))
1331*eec40579SJoe Thornber {
1332*eec40579SJoe Thornber 	struct rpc rpc;
1333*eec40579SJoe Thornber 	rpc.fn0 = fn;
1334*eec40579SJoe Thornber 	rpc.fn1 = NULL;
1335*eec40579SJoe Thornber 
1336*eec40579SJoe Thornber 	return perform_rpc(era, &rpc);
1337*eec40579SJoe Thornber }
1338*eec40579SJoe Thornber 
1339*eec40579SJoe Thornber static int in_worker1(struct era *era,
1340*eec40579SJoe Thornber 		      int (*fn)(struct era_metadata *, void *), void *arg)
1341*eec40579SJoe Thornber {
1342*eec40579SJoe Thornber 	struct rpc rpc;
1343*eec40579SJoe Thornber 	rpc.fn0 = NULL;
1344*eec40579SJoe Thornber 	rpc.fn1 = fn;
1345*eec40579SJoe Thornber 	rpc.arg = arg;
1346*eec40579SJoe Thornber 
1347*eec40579SJoe Thornber 	return perform_rpc(era, &rpc);
1348*eec40579SJoe Thornber }
1349*eec40579SJoe Thornber 
1350*eec40579SJoe Thornber static void start_worker(struct era *era)
1351*eec40579SJoe Thornber {
1352*eec40579SJoe Thornber 	atomic_set(&era->suspended, 0);
1353*eec40579SJoe Thornber }
1354*eec40579SJoe Thornber 
1355*eec40579SJoe Thornber static void stop_worker(struct era *era)
1356*eec40579SJoe Thornber {
1357*eec40579SJoe Thornber 	atomic_set(&era->suspended, 1);
1358*eec40579SJoe Thornber 	flush_workqueue(era->wq);
1359*eec40579SJoe Thornber }
1360*eec40579SJoe Thornber 
1361*eec40579SJoe Thornber /*----------------------------------------------------------------
1362*eec40579SJoe Thornber  * Target methods
1363*eec40579SJoe Thornber  *--------------------------------------------------------------*/
1364*eec40579SJoe Thornber static int dev_is_congested(struct dm_dev *dev, int bdi_bits)
1365*eec40579SJoe Thornber {
1366*eec40579SJoe Thornber 	struct request_queue *q = bdev_get_queue(dev->bdev);
1367*eec40579SJoe Thornber 	return bdi_congested(&q->backing_dev_info, bdi_bits);
1368*eec40579SJoe Thornber }
1369*eec40579SJoe Thornber 
1370*eec40579SJoe Thornber static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
1371*eec40579SJoe Thornber {
1372*eec40579SJoe Thornber 	struct era *era = container_of(cb, struct era, callbacks);
1373*eec40579SJoe Thornber 	return dev_is_congested(era->origin_dev, bdi_bits);
1374*eec40579SJoe Thornber }
1375*eec40579SJoe Thornber 
1376*eec40579SJoe Thornber static void era_destroy(struct era *era)
1377*eec40579SJoe Thornber {
1378*eec40579SJoe Thornber 	metadata_close(era->md);
1379*eec40579SJoe Thornber 
1380*eec40579SJoe Thornber 	if (era->wq)
1381*eec40579SJoe Thornber 		destroy_workqueue(era->wq);
1382*eec40579SJoe Thornber 
1383*eec40579SJoe Thornber 	if (era->origin_dev)
1384*eec40579SJoe Thornber 		dm_put_device(era->ti, era->origin_dev);
1385*eec40579SJoe Thornber 
1386*eec40579SJoe Thornber 	if (era->metadata_dev)
1387*eec40579SJoe Thornber 		dm_put_device(era->ti, era->metadata_dev);
1388*eec40579SJoe Thornber 
1389*eec40579SJoe Thornber 	kfree(era);
1390*eec40579SJoe Thornber }
1391*eec40579SJoe Thornber 
1392*eec40579SJoe Thornber static dm_block_t calc_nr_blocks(struct era *era)
1393*eec40579SJoe Thornber {
1394*eec40579SJoe Thornber 	return dm_sector_div_up(era->ti->len, era->sectors_per_block);
1395*eec40579SJoe Thornber }
1396*eec40579SJoe Thornber 
1397*eec40579SJoe Thornber static bool valid_block_size(dm_block_t block_size)
1398*eec40579SJoe Thornber {
1399*eec40579SJoe Thornber 	bool greater_than_zero = block_size > 0;
1400*eec40579SJoe Thornber 	bool multiple_of_min_block_size = (block_size & (MIN_BLOCK_SIZE - 1)) == 0;
1401*eec40579SJoe Thornber 
1402*eec40579SJoe Thornber 	return greater_than_zero && multiple_of_min_block_size;
1403*eec40579SJoe Thornber }
1404*eec40579SJoe Thornber 
1405*eec40579SJoe Thornber /*
1406*eec40579SJoe Thornber  * <metadata dev> <data dev> <data block size (sectors)>
1407*eec40579SJoe Thornber  */
1408*eec40579SJoe Thornber static int era_ctr(struct dm_target *ti, unsigned argc, char **argv)
1409*eec40579SJoe Thornber {
1410*eec40579SJoe Thornber 	int r;
1411*eec40579SJoe Thornber 	char dummy;
1412*eec40579SJoe Thornber 	struct era *era;
1413*eec40579SJoe Thornber 	struct era_metadata *md;
1414*eec40579SJoe Thornber 
1415*eec40579SJoe Thornber 	if (argc != 3) {
1416*eec40579SJoe Thornber 		ti->error = "Invalid argument count";
1417*eec40579SJoe Thornber 		return -EINVAL;
1418*eec40579SJoe Thornber 	}
1419*eec40579SJoe Thornber 
1420*eec40579SJoe Thornber 	era = kzalloc(sizeof(*era), GFP_KERNEL);
1421*eec40579SJoe Thornber 	if (!era) {
1422*eec40579SJoe Thornber 		ti->error = "Error allocating era structure";
1423*eec40579SJoe Thornber 		return -ENOMEM;
1424*eec40579SJoe Thornber 	}
1425*eec40579SJoe Thornber 
1426*eec40579SJoe Thornber 	era->ti = ti;
1427*eec40579SJoe Thornber 
1428*eec40579SJoe Thornber 	r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &era->metadata_dev);
1429*eec40579SJoe Thornber 	if (r) {
1430*eec40579SJoe Thornber 		ti->error = "Error opening metadata device";
1431*eec40579SJoe Thornber 		era_destroy(era);
1432*eec40579SJoe Thornber 		return -EINVAL;
1433*eec40579SJoe Thornber 	}
1434*eec40579SJoe Thornber 
1435*eec40579SJoe Thornber 	r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &era->origin_dev);
1436*eec40579SJoe Thornber 	if (r) {
1437*eec40579SJoe Thornber 		ti->error = "Error opening data device";
1438*eec40579SJoe Thornber 		era_destroy(era);
1439*eec40579SJoe Thornber 		return -EINVAL;
1440*eec40579SJoe Thornber 	}
1441*eec40579SJoe Thornber 
1442*eec40579SJoe Thornber 	r = sscanf(argv[2], "%u%c", &era->sectors_per_block, &dummy);
1443*eec40579SJoe Thornber 	if (r != 1) {
1444*eec40579SJoe Thornber 		ti->error = "Error parsing block size";
1445*eec40579SJoe Thornber 		era_destroy(era);
1446*eec40579SJoe Thornber 		return -EINVAL;
1447*eec40579SJoe Thornber 	}
1448*eec40579SJoe Thornber 
1449*eec40579SJoe Thornber 	r = dm_set_target_max_io_len(ti, era->sectors_per_block);
1450*eec40579SJoe Thornber 	if (r) {
1451*eec40579SJoe Thornber 		ti->error = "could not set max io len";
1452*eec40579SJoe Thornber 		era_destroy(era);
1453*eec40579SJoe Thornber 		return -EINVAL;
1454*eec40579SJoe Thornber 	}
1455*eec40579SJoe Thornber 
1456*eec40579SJoe Thornber 	if (!valid_block_size(era->sectors_per_block)) {
1457*eec40579SJoe Thornber 		ti->error = "Invalid block size";
1458*eec40579SJoe Thornber 		era_destroy(era);
1459*eec40579SJoe Thornber 		return -EINVAL;
1460*eec40579SJoe Thornber 	}
1461*eec40579SJoe Thornber 	if (era->sectors_per_block & (era->sectors_per_block - 1))
1462*eec40579SJoe Thornber 		era->sectors_per_block_shift = -1;
1463*eec40579SJoe Thornber 	else
1464*eec40579SJoe Thornber 		era->sectors_per_block_shift = __ffs(era->sectors_per_block);
1465*eec40579SJoe Thornber 
1466*eec40579SJoe Thornber 	md = metadata_open(era->metadata_dev->bdev, era->sectors_per_block, true);
1467*eec40579SJoe Thornber 	if (IS_ERR(md)) {
1468*eec40579SJoe Thornber 		ti->error = "Error reading metadata";
1469*eec40579SJoe Thornber 		era_destroy(era);
1470*eec40579SJoe Thornber 		return PTR_ERR(md);
1471*eec40579SJoe Thornber 	}
1472*eec40579SJoe Thornber 	era->md = md;
1473*eec40579SJoe Thornber 
1474*eec40579SJoe Thornber 	era->nr_blocks = calc_nr_blocks(era);
1475*eec40579SJoe Thornber 
1476*eec40579SJoe Thornber 	r = metadata_resize(era->md, &era->nr_blocks);
1477*eec40579SJoe Thornber 	if (r) {
1478*eec40579SJoe Thornber 		ti->error = "couldn't resize metadata";
1479*eec40579SJoe Thornber 		era_destroy(era);
1480*eec40579SJoe Thornber 		return -ENOMEM;
1481*eec40579SJoe Thornber 	}
1482*eec40579SJoe Thornber 
1483*eec40579SJoe Thornber 	era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
1484*eec40579SJoe Thornber 	if (!era->wq) {
1485*eec40579SJoe Thornber 		ti->error = "could not create workqueue for metadata object";
1486*eec40579SJoe Thornber 		era_destroy(era);
1487*eec40579SJoe Thornber 		return -ENOMEM;
1488*eec40579SJoe Thornber 	}
1489*eec40579SJoe Thornber 	INIT_WORK(&era->worker, do_work);
1490*eec40579SJoe Thornber 
1491*eec40579SJoe Thornber 	spin_lock_init(&era->deferred_lock);
1492*eec40579SJoe Thornber 	bio_list_init(&era->deferred_bios);
1493*eec40579SJoe Thornber 
1494*eec40579SJoe Thornber 	spin_lock_init(&era->rpc_lock);
1495*eec40579SJoe Thornber 	INIT_LIST_HEAD(&era->rpc_calls);
1496*eec40579SJoe Thornber 
1497*eec40579SJoe Thornber 	ti->private = era;
1498*eec40579SJoe Thornber 	ti->num_flush_bios = 1;
1499*eec40579SJoe Thornber 	ti->flush_supported = true;
1500*eec40579SJoe Thornber 
1501*eec40579SJoe Thornber 	ti->num_discard_bios = 1;
1502*eec40579SJoe Thornber 	ti->discards_supported = true;
1503*eec40579SJoe Thornber 	era->callbacks.congested_fn = era_is_congested;
1504*eec40579SJoe Thornber 	dm_table_add_target_callbacks(ti->table, &era->callbacks);
1505*eec40579SJoe Thornber 
1506*eec40579SJoe Thornber 	return 0;
1507*eec40579SJoe Thornber }
1508*eec40579SJoe Thornber 
1509*eec40579SJoe Thornber static void era_dtr(struct dm_target *ti)
1510*eec40579SJoe Thornber {
1511*eec40579SJoe Thornber 	era_destroy(ti->private);
1512*eec40579SJoe Thornber }
1513*eec40579SJoe Thornber 
1514*eec40579SJoe Thornber static int era_map(struct dm_target *ti, struct bio *bio)
1515*eec40579SJoe Thornber {
1516*eec40579SJoe Thornber 	struct era *era = ti->private;
1517*eec40579SJoe Thornber 	dm_block_t block = get_block(era, bio);
1518*eec40579SJoe Thornber 
1519*eec40579SJoe Thornber 	/*
1520*eec40579SJoe Thornber 	 * All bios get remapped to the origin device.  We do this now, but
1521*eec40579SJoe Thornber 	 * it may not get issued until later.  Depending on whether the
1522*eec40579SJoe Thornber 	 * block is marked in this era.
1523*eec40579SJoe Thornber 	 */
1524*eec40579SJoe Thornber 	remap_to_origin(era, bio);
1525*eec40579SJoe Thornber 
1526*eec40579SJoe Thornber 	/*
1527*eec40579SJoe Thornber 	 * REQ_FLUSH bios carry no data, so we're not interested in them.
1528*eec40579SJoe Thornber 	 */
1529*eec40579SJoe Thornber 	if (!(bio->bi_rw & REQ_FLUSH) &&
1530*eec40579SJoe Thornber 	    (bio_data_dir(bio) == WRITE) &&
1531*eec40579SJoe Thornber 	    !metadata_current_marked(era->md, block)) {
1532*eec40579SJoe Thornber 		defer_bio(era, bio);
1533*eec40579SJoe Thornber 		return DM_MAPIO_SUBMITTED;
1534*eec40579SJoe Thornber 	}
1535*eec40579SJoe Thornber 
1536*eec40579SJoe Thornber 	return DM_MAPIO_REMAPPED;
1537*eec40579SJoe Thornber }
1538*eec40579SJoe Thornber 
1539*eec40579SJoe Thornber static void era_postsuspend(struct dm_target *ti)
1540*eec40579SJoe Thornber {
1541*eec40579SJoe Thornber 	int r;
1542*eec40579SJoe Thornber 	struct era *era = ti->private;
1543*eec40579SJoe Thornber 
1544*eec40579SJoe Thornber 	r = in_worker0(era, metadata_era_archive);
1545*eec40579SJoe Thornber 	if (r) {
1546*eec40579SJoe Thornber 		DMERR("%s: couldn't archive current era", __func__);
1547*eec40579SJoe Thornber 		/* FIXME: fail mode */
1548*eec40579SJoe Thornber 	}
1549*eec40579SJoe Thornber 
1550*eec40579SJoe Thornber 	stop_worker(era);
1551*eec40579SJoe Thornber }
1552*eec40579SJoe Thornber 
1553*eec40579SJoe Thornber static int era_preresume(struct dm_target *ti)
1554*eec40579SJoe Thornber {
1555*eec40579SJoe Thornber 	int r;
1556*eec40579SJoe Thornber 	struct era *era = ti->private;
1557*eec40579SJoe Thornber 	dm_block_t new_size = calc_nr_blocks(era);
1558*eec40579SJoe Thornber 
1559*eec40579SJoe Thornber 	if (era->nr_blocks != new_size) {
1560*eec40579SJoe Thornber 		r = in_worker1(era, metadata_resize, &new_size);
1561*eec40579SJoe Thornber 		if (r)
1562*eec40579SJoe Thornber 			return r;
1563*eec40579SJoe Thornber 
1564*eec40579SJoe Thornber 		era->nr_blocks = new_size;
1565*eec40579SJoe Thornber 	}
1566*eec40579SJoe Thornber 
1567*eec40579SJoe Thornber 	start_worker(era);
1568*eec40579SJoe Thornber 
1569*eec40579SJoe Thornber 	r = in_worker0(era, metadata_new_era);
1570*eec40579SJoe Thornber 	if (r) {
1571*eec40579SJoe Thornber 		DMERR("%s: metadata_era_rollover failed", __func__);
1572*eec40579SJoe Thornber 		return r;
1573*eec40579SJoe Thornber 	}
1574*eec40579SJoe Thornber 
1575*eec40579SJoe Thornber 	return 0;
1576*eec40579SJoe Thornber }
1577*eec40579SJoe Thornber 
1578*eec40579SJoe Thornber /*
1579*eec40579SJoe Thornber  * Status format:
1580*eec40579SJoe Thornber  *
1581*eec40579SJoe Thornber  * <metadata block size> <#used metadata blocks>/<#total metadata blocks>
1582*eec40579SJoe Thornber  * <current era> <held metadata root | '-'>
1583*eec40579SJoe Thornber  */
1584*eec40579SJoe Thornber static void era_status(struct dm_target *ti, status_type_t type,
1585*eec40579SJoe Thornber 		       unsigned status_flags, char *result, unsigned maxlen)
1586*eec40579SJoe Thornber {
1587*eec40579SJoe Thornber 	int r;
1588*eec40579SJoe Thornber 	struct era *era = ti->private;
1589*eec40579SJoe Thornber 	ssize_t sz = 0;
1590*eec40579SJoe Thornber 	struct metadata_stats stats;
1591*eec40579SJoe Thornber 	char buf[BDEVNAME_SIZE];
1592*eec40579SJoe Thornber 
1593*eec40579SJoe Thornber 	switch (type) {
1594*eec40579SJoe Thornber 	case STATUSTYPE_INFO:
1595*eec40579SJoe Thornber 		r = in_worker1(era, metadata_get_stats, &stats);
1596*eec40579SJoe Thornber 		if (r)
1597*eec40579SJoe Thornber 			goto err;
1598*eec40579SJoe Thornber 
1599*eec40579SJoe Thornber 		DMEMIT("%u %llu/%llu %u",
1600*eec40579SJoe Thornber 		       (unsigned) (DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT),
1601*eec40579SJoe Thornber 		       (unsigned long long) stats.used,
1602*eec40579SJoe Thornber 		       (unsigned long long) stats.total,
1603*eec40579SJoe Thornber 		       (unsigned) stats.era);
1604*eec40579SJoe Thornber 
1605*eec40579SJoe Thornber 		if (stats.snap != SUPERBLOCK_LOCATION)
1606*eec40579SJoe Thornber 			DMEMIT(" %llu", stats.snap);
1607*eec40579SJoe Thornber 		else
1608*eec40579SJoe Thornber 			DMEMIT(" -");
1609*eec40579SJoe Thornber 		break;
1610*eec40579SJoe Thornber 
1611*eec40579SJoe Thornber 	case STATUSTYPE_TABLE:
1612*eec40579SJoe Thornber 		format_dev_t(buf, era->metadata_dev->bdev->bd_dev);
1613*eec40579SJoe Thornber 		DMEMIT("%s ", buf);
1614*eec40579SJoe Thornber 		format_dev_t(buf, era->origin_dev->bdev->bd_dev);
1615*eec40579SJoe Thornber 		DMEMIT("%s %u", buf, era->sectors_per_block);
1616*eec40579SJoe Thornber 		break;
1617*eec40579SJoe Thornber 	}
1618*eec40579SJoe Thornber 
1619*eec40579SJoe Thornber 	return;
1620*eec40579SJoe Thornber 
1621*eec40579SJoe Thornber err:
1622*eec40579SJoe Thornber 	DMEMIT("Error");
1623*eec40579SJoe Thornber }
1624*eec40579SJoe Thornber 
1625*eec40579SJoe Thornber static int era_message(struct dm_target *ti, unsigned argc, char **argv)
1626*eec40579SJoe Thornber {
1627*eec40579SJoe Thornber 	struct era *era = ti->private;
1628*eec40579SJoe Thornber 
1629*eec40579SJoe Thornber 	if (argc != 1) {
1630*eec40579SJoe Thornber 		DMERR("incorrect number of message arguments");
1631*eec40579SJoe Thornber 		return -EINVAL;
1632*eec40579SJoe Thornber 	}
1633*eec40579SJoe Thornber 
1634*eec40579SJoe Thornber 	if (!strcasecmp(argv[0], "checkpoint"))
1635*eec40579SJoe Thornber 		return in_worker0(era, metadata_checkpoint);
1636*eec40579SJoe Thornber 
1637*eec40579SJoe Thornber 	if (!strcasecmp(argv[0], "take_metadata_snap"))
1638*eec40579SJoe Thornber 		return in_worker0(era, metadata_take_snap);
1639*eec40579SJoe Thornber 
1640*eec40579SJoe Thornber 	if (!strcasecmp(argv[0], "drop_metadata_snap"))
1641*eec40579SJoe Thornber 		return in_worker0(era, metadata_drop_snap);
1642*eec40579SJoe Thornber 
1643*eec40579SJoe Thornber 	DMERR("unsupported message '%s'", argv[0]);
1644*eec40579SJoe Thornber 	return -EINVAL;
1645*eec40579SJoe Thornber }
1646*eec40579SJoe Thornber 
1647*eec40579SJoe Thornber static sector_t get_dev_size(struct dm_dev *dev)
1648*eec40579SJoe Thornber {
1649*eec40579SJoe Thornber 	return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
1650*eec40579SJoe Thornber }
1651*eec40579SJoe Thornber 
1652*eec40579SJoe Thornber static int era_iterate_devices(struct dm_target *ti,
1653*eec40579SJoe Thornber 			       iterate_devices_callout_fn fn, void *data)
1654*eec40579SJoe Thornber {
1655*eec40579SJoe Thornber 	struct era *era = ti->private;
1656*eec40579SJoe Thornber 	return fn(ti, era->origin_dev, 0, get_dev_size(era->origin_dev), data);
1657*eec40579SJoe Thornber }
1658*eec40579SJoe Thornber 
1659*eec40579SJoe Thornber static int era_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
1660*eec40579SJoe Thornber 		     struct bio_vec *biovec, int max_size)
1661*eec40579SJoe Thornber {
1662*eec40579SJoe Thornber 	struct era *era = ti->private;
1663*eec40579SJoe Thornber 	struct request_queue *q = bdev_get_queue(era->origin_dev->bdev);
1664*eec40579SJoe Thornber 
1665*eec40579SJoe Thornber 	if (!q->merge_bvec_fn)
1666*eec40579SJoe Thornber 		return max_size;
1667*eec40579SJoe Thornber 
1668*eec40579SJoe Thornber 	bvm->bi_bdev = era->origin_dev->bdev;
1669*eec40579SJoe Thornber 
1670*eec40579SJoe Thornber 	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
1671*eec40579SJoe Thornber }
1672*eec40579SJoe Thornber 
1673*eec40579SJoe Thornber static void era_io_hints(struct dm_target *ti, struct queue_limits *limits)
1674*eec40579SJoe Thornber {
1675*eec40579SJoe Thornber 	struct era *era = ti->private;
1676*eec40579SJoe Thornber 	uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
1677*eec40579SJoe Thornber 
1678*eec40579SJoe Thornber 	/*
1679*eec40579SJoe Thornber 	 * If the system-determined stacked limits are compatible with the
1680*eec40579SJoe Thornber 	 * era device's blocksize (io_opt is a factor) do not override them.
1681*eec40579SJoe Thornber 	 */
1682*eec40579SJoe Thornber 	if (io_opt_sectors < era->sectors_per_block ||
1683*eec40579SJoe Thornber 	    do_div(io_opt_sectors, era->sectors_per_block)) {
1684*eec40579SJoe Thornber 		blk_limits_io_min(limits, 0);
1685*eec40579SJoe Thornber 		blk_limits_io_opt(limits, era->sectors_per_block << SECTOR_SHIFT);
1686*eec40579SJoe Thornber 	}
1687*eec40579SJoe Thornber }
1688*eec40579SJoe Thornber 
1689*eec40579SJoe Thornber /*----------------------------------------------------------------*/
1690*eec40579SJoe Thornber 
1691*eec40579SJoe Thornber static struct target_type era_target = {
1692*eec40579SJoe Thornber 	.name = "era",
1693*eec40579SJoe Thornber 	.version = {1, 0, 0},
1694*eec40579SJoe Thornber 	.module = THIS_MODULE,
1695*eec40579SJoe Thornber 	.ctr = era_ctr,
1696*eec40579SJoe Thornber 	.dtr = era_dtr,
1697*eec40579SJoe Thornber 	.map = era_map,
1698*eec40579SJoe Thornber 	.postsuspend = era_postsuspend,
1699*eec40579SJoe Thornber 	.preresume = era_preresume,
1700*eec40579SJoe Thornber 	.status = era_status,
1701*eec40579SJoe Thornber 	.message = era_message,
1702*eec40579SJoe Thornber 	.iterate_devices = era_iterate_devices,
1703*eec40579SJoe Thornber 	.merge = era_merge,
1704*eec40579SJoe Thornber 	.io_hints = era_io_hints
1705*eec40579SJoe Thornber };
1706*eec40579SJoe Thornber 
1707*eec40579SJoe Thornber static int __init dm_era_init(void)
1708*eec40579SJoe Thornber {
1709*eec40579SJoe Thornber 	int r;
1710*eec40579SJoe Thornber 
1711*eec40579SJoe Thornber 	r = dm_register_target(&era_target);
1712*eec40579SJoe Thornber 	if (r) {
1713*eec40579SJoe Thornber 		DMERR("era target registration failed: %d", r);
1714*eec40579SJoe Thornber 		return r;
1715*eec40579SJoe Thornber 	}
1716*eec40579SJoe Thornber 
1717*eec40579SJoe Thornber 	return 0;
1718*eec40579SJoe Thornber }
1719*eec40579SJoe Thornber 
1720*eec40579SJoe Thornber static void __exit dm_era_exit(void)
1721*eec40579SJoe Thornber {
1722*eec40579SJoe Thornber 	dm_unregister_target(&era_target);
1723*eec40579SJoe Thornber }
1724*eec40579SJoe Thornber 
1725*eec40579SJoe Thornber module_init(dm_era_init);
1726*eec40579SJoe Thornber module_exit(dm_era_exit);
1727*eec40579SJoe Thornber 
1728*eec40579SJoe Thornber MODULE_DESCRIPTION(DM_NAME " era target");
1729*eec40579SJoe Thornber MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
1730*eec40579SJoe Thornber MODULE_LICENSE("GPL");
1731