1*eec40579SJoe Thornber #include "dm.h" 2*eec40579SJoe Thornber #include "persistent-data/dm-transaction-manager.h" 3*eec40579SJoe Thornber #include "persistent-data/dm-bitset.h" 4*eec40579SJoe Thornber #include "persistent-data/dm-space-map.h" 5*eec40579SJoe Thornber 6*eec40579SJoe Thornber #include <linux/dm-io.h> 7*eec40579SJoe Thornber #include <linux/dm-kcopyd.h> 8*eec40579SJoe Thornber #include <linux/init.h> 9*eec40579SJoe Thornber #include <linux/mempool.h> 10*eec40579SJoe Thornber #include <linux/module.h> 11*eec40579SJoe Thornber #include <linux/slab.h> 12*eec40579SJoe Thornber #include <linux/vmalloc.h> 13*eec40579SJoe Thornber 14*eec40579SJoe Thornber #define DM_MSG_PREFIX "era" 15*eec40579SJoe Thornber 16*eec40579SJoe Thornber #define SUPERBLOCK_LOCATION 0 17*eec40579SJoe Thornber #define SUPERBLOCK_MAGIC 2126579579 18*eec40579SJoe Thornber #define SUPERBLOCK_CSUM_XOR 146538381 19*eec40579SJoe Thornber #define MIN_ERA_VERSION 1 20*eec40579SJoe Thornber #define MAX_ERA_VERSION 1 21*eec40579SJoe Thornber #define INVALID_WRITESET_ROOT SUPERBLOCK_LOCATION 22*eec40579SJoe Thornber #define MIN_BLOCK_SIZE 8 23*eec40579SJoe Thornber 24*eec40579SJoe Thornber /*---------------------------------------------------------------- 25*eec40579SJoe Thornber * Writeset 26*eec40579SJoe Thornber *--------------------------------------------------------------*/ 27*eec40579SJoe Thornber struct writeset_metadata { 28*eec40579SJoe Thornber uint32_t nr_bits; 29*eec40579SJoe Thornber dm_block_t root; 30*eec40579SJoe Thornber }; 31*eec40579SJoe Thornber 32*eec40579SJoe Thornber struct writeset { 33*eec40579SJoe Thornber struct writeset_metadata md; 34*eec40579SJoe Thornber 35*eec40579SJoe Thornber /* 36*eec40579SJoe Thornber * An in core copy of the bits to save constantly doing look ups on 37*eec40579SJoe Thornber * disk. 38*eec40579SJoe Thornber */ 39*eec40579SJoe Thornber unsigned long *bits; 40*eec40579SJoe Thornber }; 41*eec40579SJoe Thornber 42*eec40579SJoe Thornber /* 43*eec40579SJoe Thornber * This does not free off the on disk bitset as this will normally be done 44*eec40579SJoe Thornber * after digesting into the era array. 45*eec40579SJoe Thornber */ 46*eec40579SJoe Thornber static void writeset_free(struct writeset *ws) 47*eec40579SJoe Thornber { 48*eec40579SJoe Thornber vfree(ws->bits); 49*eec40579SJoe Thornber } 50*eec40579SJoe Thornber 51*eec40579SJoe Thornber static int setup_on_disk_bitset(struct dm_disk_bitset *info, 52*eec40579SJoe Thornber unsigned nr_bits, dm_block_t *root) 53*eec40579SJoe Thornber { 54*eec40579SJoe Thornber int r; 55*eec40579SJoe Thornber 56*eec40579SJoe Thornber r = dm_bitset_empty(info, root); 57*eec40579SJoe Thornber if (r) 58*eec40579SJoe Thornber return r; 59*eec40579SJoe Thornber 60*eec40579SJoe Thornber return dm_bitset_resize(info, *root, 0, nr_bits, false, root); 61*eec40579SJoe Thornber } 62*eec40579SJoe Thornber 63*eec40579SJoe Thornber static size_t bitset_size(unsigned nr_bits) 64*eec40579SJoe Thornber { 65*eec40579SJoe Thornber return sizeof(unsigned long) * dm_div_up(nr_bits, BITS_PER_LONG); 66*eec40579SJoe Thornber } 67*eec40579SJoe Thornber 68*eec40579SJoe Thornber /* 69*eec40579SJoe Thornber * Allocates memory for the in core bitset. 70*eec40579SJoe Thornber */ 71*eec40579SJoe Thornber static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) 72*eec40579SJoe Thornber { 73*eec40579SJoe Thornber ws->md.nr_bits = nr_blocks; 74*eec40579SJoe Thornber ws->md.root = INVALID_WRITESET_ROOT; 75*eec40579SJoe Thornber ws->bits = vzalloc(bitset_size(nr_blocks)); 76*eec40579SJoe Thornber if (!ws->bits) { 77*eec40579SJoe Thornber DMERR("%s: couldn't allocate in memory bitset", __func__); 78*eec40579SJoe Thornber return -ENOMEM; 79*eec40579SJoe Thornber } 80*eec40579SJoe Thornber 81*eec40579SJoe Thornber return 0; 82*eec40579SJoe Thornber } 83*eec40579SJoe Thornber 84*eec40579SJoe Thornber /* 85*eec40579SJoe Thornber * Wipes the in-core bitset, and creates a new on disk bitset. 86*eec40579SJoe Thornber */ 87*eec40579SJoe Thornber static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws) 88*eec40579SJoe Thornber { 89*eec40579SJoe Thornber int r; 90*eec40579SJoe Thornber 91*eec40579SJoe Thornber memset(ws->bits, 0, bitset_size(ws->md.nr_bits)); 92*eec40579SJoe Thornber 93*eec40579SJoe Thornber r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root); 94*eec40579SJoe Thornber if (r) { 95*eec40579SJoe Thornber DMERR("%s: setup_on_disk_bitset failed", __func__); 96*eec40579SJoe Thornber return r; 97*eec40579SJoe Thornber } 98*eec40579SJoe Thornber 99*eec40579SJoe Thornber return 0; 100*eec40579SJoe Thornber } 101*eec40579SJoe Thornber 102*eec40579SJoe Thornber static bool writeset_marked(struct writeset *ws, dm_block_t block) 103*eec40579SJoe Thornber { 104*eec40579SJoe Thornber return test_bit(block, ws->bits); 105*eec40579SJoe Thornber } 106*eec40579SJoe Thornber 107*eec40579SJoe Thornber static int writeset_marked_on_disk(struct dm_disk_bitset *info, 108*eec40579SJoe Thornber struct writeset_metadata *m, dm_block_t block, 109*eec40579SJoe Thornber bool *result) 110*eec40579SJoe Thornber { 111*eec40579SJoe Thornber dm_block_t old = m->root; 112*eec40579SJoe Thornber 113*eec40579SJoe Thornber /* 114*eec40579SJoe Thornber * The bitset was flushed when it was archived, so we know there'll 115*eec40579SJoe Thornber * be no change to the root. 116*eec40579SJoe Thornber */ 117*eec40579SJoe Thornber int r = dm_bitset_test_bit(info, m->root, block, &m->root, result); 118*eec40579SJoe Thornber if (r) { 119*eec40579SJoe Thornber DMERR("%s: dm_bitset_test_bit failed", __func__); 120*eec40579SJoe Thornber return r; 121*eec40579SJoe Thornber } 122*eec40579SJoe Thornber 123*eec40579SJoe Thornber BUG_ON(m->root != old); 124*eec40579SJoe Thornber 125*eec40579SJoe Thornber return r; 126*eec40579SJoe Thornber } 127*eec40579SJoe Thornber 128*eec40579SJoe Thornber /* 129*eec40579SJoe Thornber * Returns < 0 on error, 0 if the bit wasn't previously set, 1 if it was. 130*eec40579SJoe Thornber */ 131*eec40579SJoe Thornber static int writeset_test_and_set(struct dm_disk_bitset *info, 132*eec40579SJoe Thornber struct writeset *ws, uint32_t block) 133*eec40579SJoe Thornber { 134*eec40579SJoe Thornber int r; 135*eec40579SJoe Thornber 136*eec40579SJoe Thornber if (!test_and_set_bit(block, ws->bits)) { 137*eec40579SJoe Thornber r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root); 138*eec40579SJoe Thornber if (r) { 139*eec40579SJoe Thornber /* FIXME: fail mode */ 140*eec40579SJoe Thornber return r; 141*eec40579SJoe Thornber } 142*eec40579SJoe Thornber 143*eec40579SJoe Thornber return 0; 144*eec40579SJoe Thornber } 145*eec40579SJoe Thornber 146*eec40579SJoe Thornber return 1; 147*eec40579SJoe Thornber } 148*eec40579SJoe Thornber 149*eec40579SJoe Thornber /*---------------------------------------------------------------- 150*eec40579SJoe Thornber * On disk metadata layout 151*eec40579SJoe Thornber *--------------------------------------------------------------*/ 152*eec40579SJoe Thornber #define SPACE_MAP_ROOT_SIZE 128 153*eec40579SJoe Thornber #define UUID_LEN 16 154*eec40579SJoe Thornber 155*eec40579SJoe Thornber struct writeset_disk { 156*eec40579SJoe Thornber __le32 nr_bits; 157*eec40579SJoe Thornber __le64 root; 158*eec40579SJoe Thornber } __packed; 159*eec40579SJoe Thornber 160*eec40579SJoe Thornber struct superblock_disk { 161*eec40579SJoe Thornber __le32 csum; 162*eec40579SJoe Thornber __le32 flags; 163*eec40579SJoe Thornber __le64 blocknr; 164*eec40579SJoe Thornber 165*eec40579SJoe Thornber __u8 uuid[UUID_LEN]; 166*eec40579SJoe Thornber __le64 magic; 167*eec40579SJoe Thornber __le32 version; 168*eec40579SJoe Thornber 169*eec40579SJoe Thornber __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 170*eec40579SJoe Thornber 171*eec40579SJoe Thornber __le32 data_block_size; 172*eec40579SJoe Thornber __le32 metadata_block_size; 173*eec40579SJoe Thornber __le32 nr_blocks; 174*eec40579SJoe Thornber 175*eec40579SJoe Thornber __le32 current_era; 176*eec40579SJoe Thornber struct writeset_disk current_writeset; 177*eec40579SJoe Thornber 178*eec40579SJoe Thornber /* 179*eec40579SJoe Thornber * Only these two fields are valid within the metadata snapshot. 180*eec40579SJoe Thornber */ 181*eec40579SJoe Thornber __le64 writeset_tree_root; 182*eec40579SJoe Thornber __le64 era_array_root; 183*eec40579SJoe Thornber 184*eec40579SJoe Thornber __le64 metadata_snap; 185*eec40579SJoe Thornber } __packed; 186*eec40579SJoe Thornber 187*eec40579SJoe Thornber /*---------------------------------------------------------------- 188*eec40579SJoe Thornber * Superblock validation 189*eec40579SJoe Thornber *--------------------------------------------------------------*/ 190*eec40579SJoe Thornber static void sb_prepare_for_write(struct dm_block_validator *v, 191*eec40579SJoe Thornber struct dm_block *b, 192*eec40579SJoe Thornber size_t sb_block_size) 193*eec40579SJoe Thornber { 194*eec40579SJoe Thornber struct superblock_disk *disk = dm_block_data(b); 195*eec40579SJoe Thornber 196*eec40579SJoe Thornber disk->blocknr = cpu_to_le64(dm_block_location(b)); 197*eec40579SJoe Thornber disk->csum = cpu_to_le32(dm_bm_checksum(&disk->flags, 198*eec40579SJoe Thornber sb_block_size - sizeof(__le32), 199*eec40579SJoe Thornber SUPERBLOCK_CSUM_XOR)); 200*eec40579SJoe Thornber } 201*eec40579SJoe Thornber 202*eec40579SJoe Thornber static int check_metadata_version(struct superblock_disk *disk) 203*eec40579SJoe Thornber { 204*eec40579SJoe Thornber uint32_t metadata_version = le32_to_cpu(disk->version); 205*eec40579SJoe Thornber if (metadata_version < MIN_ERA_VERSION || metadata_version > MAX_ERA_VERSION) { 206*eec40579SJoe Thornber DMERR("Era metadata version %u found, but only versions between %u and %u supported.", 207*eec40579SJoe Thornber metadata_version, MIN_ERA_VERSION, MAX_ERA_VERSION); 208*eec40579SJoe Thornber return -EINVAL; 209*eec40579SJoe Thornber } 210*eec40579SJoe Thornber 211*eec40579SJoe Thornber return 0; 212*eec40579SJoe Thornber } 213*eec40579SJoe Thornber 214*eec40579SJoe Thornber static int sb_check(struct dm_block_validator *v, 215*eec40579SJoe Thornber struct dm_block *b, 216*eec40579SJoe Thornber size_t sb_block_size) 217*eec40579SJoe Thornber { 218*eec40579SJoe Thornber struct superblock_disk *disk = dm_block_data(b); 219*eec40579SJoe Thornber __le32 csum_le; 220*eec40579SJoe Thornber 221*eec40579SJoe Thornber if (dm_block_location(b) != le64_to_cpu(disk->blocknr)) { 222*eec40579SJoe Thornber DMERR("sb_check failed: blocknr %llu: wanted %llu", 223*eec40579SJoe Thornber le64_to_cpu(disk->blocknr), 224*eec40579SJoe Thornber (unsigned long long)dm_block_location(b)); 225*eec40579SJoe Thornber return -ENOTBLK; 226*eec40579SJoe Thornber } 227*eec40579SJoe Thornber 228*eec40579SJoe Thornber if (le64_to_cpu(disk->magic) != SUPERBLOCK_MAGIC) { 229*eec40579SJoe Thornber DMERR("sb_check failed: magic %llu: wanted %llu", 230*eec40579SJoe Thornber le64_to_cpu(disk->magic), 231*eec40579SJoe Thornber (unsigned long long) SUPERBLOCK_MAGIC); 232*eec40579SJoe Thornber return -EILSEQ; 233*eec40579SJoe Thornber } 234*eec40579SJoe Thornber 235*eec40579SJoe Thornber csum_le = cpu_to_le32(dm_bm_checksum(&disk->flags, 236*eec40579SJoe Thornber sb_block_size - sizeof(__le32), 237*eec40579SJoe Thornber SUPERBLOCK_CSUM_XOR)); 238*eec40579SJoe Thornber if (csum_le != disk->csum) { 239*eec40579SJoe Thornber DMERR("sb_check failed: csum %u: wanted %u", 240*eec40579SJoe Thornber le32_to_cpu(csum_le), le32_to_cpu(disk->csum)); 241*eec40579SJoe Thornber return -EILSEQ; 242*eec40579SJoe Thornber } 243*eec40579SJoe Thornber 244*eec40579SJoe Thornber return check_metadata_version(disk); 245*eec40579SJoe Thornber } 246*eec40579SJoe Thornber 247*eec40579SJoe Thornber static struct dm_block_validator sb_validator = { 248*eec40579SJoe Thornber .name = "superblock", 249*eec40579SJoe Thornber .prepare_for_write = sb_prepare_for_write, 250*eec40579SJoe Thornber .check = sb_check 251*eec40579SJoe Thornber }; 252*eec40579SJoe Thornber 253*eec40579SJoe Thornber /*---------------------------------------------------------------- 254*eec40579SJoe Thornber * Low level metadata handling 255*eec40579SJoe Thornber *--------------------------------------------------------------*/ 256*eec40579SJoe Thornber #define DM_ERA_METADATA_BLOCK_SIZE 4096 257*eec40579SJoe Thornber #define DM_ERA_METADATA_CACHE_SIZE 64 258*eec40579SJoe Thornber #define ERA_MAX_CONCURRENT_LOCKS 5 259*eec40579SJoe Thornber 260*eec40579SJoe Thornber struct era_metadata { 261*eec40579SJoe Thornber struct block_device *bdev; 262*eec40579SJoe Thornber struct dm_block_manager *bm; 263*eec40579SJoe Thornber struct dm_space_map *sm; 264*eec40579SJoe Thornber struct dm_transaction_manager *tm; 265*eec40579SJoe Thornber 266*eec40579SJoe Thornber dm_block_t block_size; 267*eec40579SJoe Thornber uint32_t nr_blocks; 268*eec40579SJoe Thornber 269*eec40579SJoe Thornber uint32_t current_era; 270*eec40579SJoe Thornber 271*eec40579SJoe Thornber /* 272*eec40579SJoe Thornber * We preallocate 2 writesets. When an era rolls over we 273*eec40579SJoe Thornber * switch between them. This means the allocation is done at 274*eec40579SJoe Thornber * preresume time, rather than on the io path. 275*eec40579SJoe Thornber */ 276*eec40579SJoe Thornber struct writeset writesets[2]; 277*eec40579SJoe Thornber struct writeset *current_writeset; 278*eec40579SJoe Thornber 279*eec40579SJoe Thornber dm_block_t writeset_tree_root; 280*eec40579SJoe Thornber dm_block_t era_array_root; 281*eec40579SJoe Thornber 282*eec40579SJoe Thornber struct dm_disk_bitset bitset_info; 283*eec40579SJoe Thornber struct dm_btree_info writeset_tree_info; 284*eec40579SJoe Thornber struct dm_array_info era_array_info; 285*eec40579SJoe Thornber 286*eec40579SJoe Thornber dm_block_t metadata_snap; 287*eec40579SJoe Thornber 288*eec40579SJoe Thornber /* 289*eec40579SJoe Thornber * A flag that is set whenever a writeset has been archived. 290*eec40579SJoe Thornber */ 291*eec40579SJoe Thornber bool archived_writesets; 292*eec40579SJoe Thornber }; 293*eec40579SJoe Thornber 294*eec40579SJoe Thornber static int superblock_read_lock(struct era_metadata *md, 295*eec40579SJoe Thornber struct dm_block **sblock) 296*eec40579SJoe Thornber { 297*eec40579SJoe Thornber return dm_bm_read_lock(md->bm, SUPERBLOCK_LOCATION, 298*eec40579SJoe Thornber &sb_validator, sblock); 299*eec40579SJoe Thornber } 300*eec40579SJoe Thornber 301*eec40579SJoe Thornber static int superblock_lock_zero(struct era_metadata *md, 302*eec40579SJoe Thornber struct dm_block **sblock) 303*eec40579SJoe Thornber { 304*eec40579SJoe Thornber return dm_bm_write_lock_zero(md->bm, SUPERBLOCK_LOCATION, 305*eec40579SJoe Thornber &sb_validator, sblock); 306*eec40579SJoe Thornber } 307*eec40579SJoe Thornber 308*eec40579SJoe Thornber static int superblock_lock(struct era_metadata *md, 309*eec40579SJoe Thornber struct dm_block **sblock) 310*eec40579SJoe Thornber { 311*eec40579SJoe Thornber return dm_bm_write_lock(md->bm, SUPERBLOCK_LOCATION, 312*eec40579SJoe Thornber &sb_validator, sblock); 313*eec40579SJoe Thornber } 314*eec40579SJoe Thornber 315*eec40579SJoe Thornber /* FIXME: duplication with cache and thin */ 316*eec40579SJoe Thornber static int superblock_all_zeroes(struct dm_block_manager *bm, bool *result) 317*eec40579SJoe Thornber { 318*eec40579SJoe Thornber int r; 319*eec40579SJoe Thornber unsigned i; 320*eec40579SJoe Thornber struct dm_block *b; 321*eec40579SJoe Thornber __le64 *data_le, zero = cpu_to_le64(0); 322*eec40579SJoe Thornber unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64); 323*eec40579SJoe Thornber 324*eec40579SJoe Thornber /* 325*eec40579SJoe Thornber * We can't use a validator here - it may be all zeroes. 326*eec40579SJoe Thornber */ 327*eec40579SJoe Thornber r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &b); 328*eec40579SJoe Thornber if (r) 329*eec40579SJoe Thornber return r; 330*eec40579SJoe Thornber 331*eec40579SJoe Thornber data_le = dm_block_data(b); 332*eec40579SJoe Thornber *result = true; 333*eec40579SJoe Thornber for (i = 0; i < sb_block_size; i++) { 334*eec40579SJoe Thornber if (data_le[i] != zero) { 335*eec40579SJoe Thornber *result = false; 336*eec40579SJoe Thornber break; 337*eec40579SJoe Thornber } 338*eec40579SJoe Thornber } 339*eec40579SJoe Thornber 340*eec40579SJoe Thornber return dm_bm_unlock(b); 341*eec40579SJoe Thornber } 342*eec40579SJoe Thornber 343*eec40579SJoe Thornber /*----------------------------------------------------------------*/ 344*eec40579SJoe Thornber 345*eec40579SJoe Thornber static void ws_pack(const struct writeset_metadata *core, struct writeset_disk *disk) 346*eec40579SJoe Thornber { 347*eec40579SJoe Thornber disk->nr_bits = cpu_to_le32(core->nr_bits); 348*eec40579SJoe Thornber disk->root = cpu_to_le64(core->root); 349*eec40579SJoe Thornber } 350*eec40579SJoe Thornber 351*eec40579SJoe Thornber static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata *core) 352*eec40579SJoe Thornber { 353*eec40579SJoe Thornber core->nr_bits = le32_to_cpu(disk->nr_bits); 354*eec40579SJoe Thornber core->root = le64_to_cpu(disk->root); 355*eec40579SJoe Thornber } 356*eec40579SJoe Thornber 357*eec40579SJoe Thornber static void ws_inc(void *context, const void *value) 358*eec40579SJoe Thornber { 359*eec40579SJoe Thornber struct era_metadata *md = context; 360*eec40579SJoe Thornber struct writeset_disk ws_d; 361*eec40579SJoe Thornber dm_block_t b; 362*eec40579SJoe Thornber 363*eec40579SJoe Thornber memcpy(&ws_d, value, sizeof(ws_d)); 364*eec40579SJoe Thornber b = le64_to_cpu(ws_d.root); 365*eec40579SJoe Thornber 366*eec40579SJoe Thornber dm_tm_inc(md->tm, b); 367*eec40579SJoe Thornber } 368*eec40579SJoe Thornber 369*eec40579SJoe Thornber static void ws_dec(void *context, const void *value) 370*eec40579SJoe Thornber { 371*eec40579SJoe Thornber struct era_metadata *md = context; 372*eec40579SJoe Thornber struct writeset_disk ws_d; 373*eec40579SJoe Thornber dm_block_t b; 374*eec40579SJoe Thornber 375*eec40579SJoe Thornber memcpy(&ws_d, value, sizeof(ws_d)); 376*eec40579SJoe Thornber b = le64_to_cpu(ws_d.root); 377*eec40579SJoe Thornber 378*eec40579SJoe Thornber dm_bitset_del(&md->bitset_info, b); 379*eec40579SJoe Thornber } 380*eec40579SJoe Thornber 381*eec40579SJoe Thornber static int ws_eq(void *context, const void *value1, const void *value2) 382*eec40579SJoe Thornber { 383*eec40579SJoe Thornber return !memcmp(value1, value2, sizeof(struct writeset_metadata)); 384*eec40579SJoe Thornber } 385*eec40579SJoe Thornber 386*eec40579SJoe Thornber /*----------------------------------------------------------------*/ 387*eec40579SJoe Thornber 388*eec40579SJoe Thornber static void setup_writeset_tree_info(struct era_metadata *md) 389*eec40579SJoe Thornber { 390*eec40579SJoe Thornber struct dm_btree_value_type *vt = &md->writeset_tree_info.value_type; 391*eec40579SJoe Thornber md->writeset_tree_info.tm = md->tm; 392*eec40579SJoe Thornber md->writeset_tree_info.levels = 1; 393*eec40579SJoe Thornber vt->context = md; 394*eec40579SJoe Thornber vt->size = sizeof(struct writeset_disk); 395*eec40579SJoe Thornber vt->inc = ws_inc; 396*eec40579SJoe Thornber vt->dec = ws_dec; 397*eec40579SJoe Thornber vt->equal = ws_eq; 398*eec40579SJoe Thornber } 399*eec40579SJoe Thornber 400*eec40579SJoe Thornber static void setup_era_array_info(struct era_metadata *md) 401*eec40579SJoe Thornber 402*eec40579SJoe Thornber { 403*eec40579SJoe Thornber struct dm_btree_value_type vt; 404*eec40579SJoe Thornber vt.context = NULL; 405*eec40579SJoe Thornber vt.size = sizeof(__le32); 406*eec40579SJoe Thornber vt.inc = NULL; 407*eec40579SJoe Thornber vt.dec = NULL; 408*eec40579SJoe Thornber vt.equal = NULL; 409*eec40579SJoe Thornber 410*eec40579SJoe Thornber dm_array_info_init(&md->era_array_info, md->tm, &vt); 411*eec40579SJoe Thornber } 412*eec40579SJoe Thornber 413*eec40579SJoe Thornber static void setup_infos(struct era_metadata *md) 414*eec40579SJoe Thornber { 415*eec40579SJoe Thornber dm_disk_bitset_init(md->tm, &md->bitset_info); 416*eec40579SJoe Thornber setup_writeset_tree_info(md); 417*eec40579SJoe Thornber setup_era_array_info(md); 418*eec40579SJoe Thornber } 419*eec40579SJoe Thornber 420*eec40579SJoe Thornber /*----------------------------------------------------------------*/ 421*eec40579SJoe Thornber 422*eec40579SJoe Thornber static int create_fresh_metadata(struct era_metadata *md) 423*eec40579SJoe Thornber { 424*eec40579SJoe Thornber int r; 425*eec40579SJoe Thornber 426*eec40579SJoe Thornber r = dm_tm_create_with_sm(md->bm, SUPERBLOCK_LOCATION, 427*eec40579SJoe Thornber &md->tm, &md->sm); 428*eec40579SJoe Thornber if (r < 0) { 429*eec40579SJoe Thornber DMERR("dm_tm_create_with_sm failed"); 430*eec40579SJoe Thornber return r; 431*eec40579SJoe Thornber } 432*eec40579SJoe Thornber 433*eec40579SJoe Thornber setup_infos(md); 434*eec40579SJoe Thornber 435*eec40579SJoe Thornber r = dm_btree_empty(&md->writeset_tree_info, &md->writeset_tree_root); 436*eec40579SJoe Thornber if (r) { 437*eec40579SJoe Thornber DMERR("couldn't create new writeset tree"); 438*eec40579SJoe Thornber goto bad; 439*eec40579SJoe Thornber } 440*eec40579SJoe Thornber 441*eec40579SJoe Thornber r = dm_array_empty(&md->era_array_info, &md->era_array_root); 442*eec40579SJoe Thornber if (r) { 443*eec40579SJoe Thornber DMERR("couldn't create era array"); 444*eec40579SJoe Thornber goto bad; 445*eec40579SJoe Thornber } 446*eec40579SJoe Thornber 447*eec40579SJoe Thornber return 0; 448*eec40579SJoe Thornber 449*eec40579SJoe Thornber bad: 450*eec40579SJoe Thornber dm_sm_destroy(md->sm); 451*eec40579SJoe Thornber dm_tm_destroy(md->tm); 452*eec40579SJoe Thornber 453*eec40579SJoe Thornber return r; 454*eec40579SJoe Thornber } 455*eec40579SJoe Thornber 456*eec40579SJoe Thornber /* 457*eec40579SJoe Thornber * Writes a superblock, including the static fields that don't get updated 458*eec40579SJoe Thornber * with every commit (possible optimisation here). 'md' should be fully 459*eec40579SJoe Thornber * constructed when this is called. 460*eec40579SJoe Thornber */ 461*eec40579SJoe Thornber static int prepare_superblock(struct era_metadata *md, struct superblock_disk *disk) 462*eec40579SJoe Thornber { 463*eec40579SJoe Thornber int r; 464*eec40579SJoe Thornber size_t metadata_len; 465*eec40579SJoe Thornber 466*eec40579SJoe Thornber disk->magic = cpu_to_le64(SUPERBLOCK_MAGIC); 467*eec40579SJoe Thornber disk->flags = cpu_to_le32(0ul); 468*eec40579SJoe Thornber 469*eec40579SJoe Thornber /* FIXME: can't keep blanking the uuid (uuid is currently unused though) */ 470*eec40579SJoe Thornber memset(disk->uuid, 0, sizeof(disk->uuid)); 471*eec40579SJoe Thornber disk->version = cpu_to_le32(MAX_ERA_VERSION); 472*eec40579SJoe Thornber 473*eec40579SJoe Thornber r = dm_sm_root_size(md->sm, &metadata_len); 474*eec40579SJoe Thornber if (r < 0) 475*eec40579SJoe Thornber return r; 476*eec40579SJoe Thornber 477*eec40579SJoe Thornber r = dm_sm_copy_root(md->sm, &disk->metadata_space_map_root, 478*eec40579SJoe Thornber metadata_len); 479*eec40579SJoe Thornber if (r < 0) 480*eec40579SJoe Thornber return r; 481*eec40579SJoe Thornber 482*eec40579SJoe Thornber disk->data_block_size = cpu_to_le32(md->block_size); 483*eec40579SJoe Thornber disk->metadata_block_size = cpu_to_le32(DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); 484*eec40579SJoe Thornber disk->nr_blocks = cpu_to_le32(md->nr_blocks); 485*eec40579SJoe Thornber disk->current_era = cpu_to_le32(md->current_era); 486*eec40579SJoe Thornber 487*eec40579SJoe Thornber ws_pack(&md->current_writeset->md, &disk->current_writeset); 488*eec40579SJoe Thornber disk->writeset_tree_root = cpu_to_le64(md->writeset_tree_root); 489*eec40579SJoe Thornber disk->era_array_root = cpu_to_le64(md->era_array_root); 490*eec40579SJoe Thornber disk->metadata_snap = cpu_to_le64(md->metadata_snap); 491*eec40579SJoe Thornber 492*eec40579SJoe Thornber return 0; 493*eec40579SJoe Thornber } 494*eec40579SJoe Thornber 495*eec40579SJoe Thornber static int write_superblock(struct era_metadata *md) 496*eec40579SJoe Thornber { 497*eec40579SJoe Thornber int r; 498*eec40579SJoe Thornber struct dm_block *sblock; 499*eec40579SJoe Thornber struct superblock_disk *disk; 500*eec40579SJoe Thornber 501*eec40579SJoe Thornber r = superblock_lock_zero(md, &sblock); 502*eec40579SJoe Thornber if (r) 503*eec40579SJoe Thornber return r; 504*eec40579SJoe Thornber 505*eec40579SJoe Thornber disk = dm_block_data(sblock); 506*eec40579SJoe Thornber r = prepare_superblock(md, disk); 507*eec40579SJoe Thornber if (r) { 508*eec40579SJoe Thornber DMERR("%s: prepare_superblock failed", __func__); 509*eec40579SJoe Thornber dm_bm_unlock(sblock); /* FIXME: does this commit? */ 510*eec40579SJoe Thornber return r; 511*eec40579SJoe Thornber } 512*eec40579SJoe Thornber 513*eec40579SJoe Thornber return dm_tm_commit(md->tm, sblock); 514*eec40579SJoe Thornber } 515*eec40579SJoe Thornber 516*eec40579SJoe Thornber /* 517*eec40579SJoe Thornber * Assumes block_size and the infos are set. 518*eec40579SJoe Thornber */ 519*eec40579SJoe Thornber static int format_metadata(struct era_metadata *md) 520*eec40579SJoe Thornber { 521*eec40579SJoe Thornber int r; 522*eec40579SJoe Thornber 523*eec40579SJoe Thornber r = create_fresh_metadata(md); 524*eec40579SJoe Thornber if (r) 525*eec40579SJoe Thornber return r; 526*eec40579SJoe Thornber 527*eec40579SJoe Thornber r = write_superblock(md); 528*eec40579SJoe Thornber if (r) { 529*eec40579SJoe Thornber dm_sm_destroy(md->sm); 530*eec40579SJoe Thornber dm_tm_destroy(md->tm); 531*eec40579SJoe Thornber return r; 532*eec40579SJoe Thornber } 533*eec40579SJoe Thornber 534*eec40579SJoe Thornber return 0; 535*eec40579SJoe Thornber } 536*eec40579SJoe Thornber 537*eec40579SJoe Thornber static int open_metadata(struct era_metadata *md) 538*eec40579SJoe Thornber { 539*eec40579SJoe Thornber int r; 540*eec40579SJoe Thornber struct dm_block *sblock; 541*eec40579SJoe Thornber struct superblock_disk *disk; 542*eec40579SJoe Thornber 543*eec40579SJoe Thornber r = superblock_read_lock(md, &sblock); 544*eec40579SJoe Thornber if (r) { 545*eec40579SJoe Thornber DMERR("couldn't read_lock superblock"); 546*eec40579SJoe Thornber return r; 547*eec40579SJoe Thornber } 548*eec40579SJoe Thornber 549*eec40579SJoe Thornber disk = dm_block_data(sblock); 550*eec40579SJoe Thornber r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION, 551*eec40579SJoe Thornber disk->metadata_space_map_root, 552*eec40579SJoe Thornber sizeof(disk->metadata_space_map_root), 553*eec40579SJoe Thornber &md->tm, &md->sm); 554*eec40579SJoe Thornber if (r) { 555*eec40579SJoe Thornber DMERR("dm_tm_open_with_sm failed"); 556*eec40579SJoe Thornber goto bad; 557*eec40579SJoe Thornber } 558*eec40579SJoe Thornber 559*eec40579SJoe Thornber setup_infos(md); 560*eec40579SJoe Thornber 561*eec40579SJoe Thornber md->block_size = le32_to_cpu(disk->data_block_size); 562*eec40579SJoe Thornber md->nr_blocks = le32_to_cpu(disk->nr_blocks); 563*eec40579SJoe Thornber md->current_era = le32_to_cpu(disk->current_era); 564*eec40579SJoe Thornber 565*eec40579SJoe Thornber md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root); 566*eec40579SJoe Thornber md->era_array_root = le64_to_cpu(disk->era_array_root); 567*eec40579SJoe Thornber md->metadata_snap = le64_to_cpu(disk->metadata_snap); 568*eec40579SJoe Thornber md->archived_writesets = true; 569*eec40579SJoe Thornber 570*eec40579SJoe Thornber return dm_bm_unlock(sblock); 571*eec40579SJoe Thornber 572*eec40579SJoe Thornber bad: 573*eec40579SJoe Thornber dm_bm_unlock(sblock); 574*eec40579SJoe Thornber return r; 575*eec40579SJoe Thornber } 576*eec40579SJoe Thornber 577*eec40579SJoe Thornber static int open_or_format_metadata(struct era_metadata *md, 578*eec40579SJoe Thornber bool may_format) 579*eec40579SJoe Thornber { 580*eec40579SJoe Thornber int r; 581*eec40579SJoe Thornber bool unformatted = false; 582*eec40579SJoe Thornber 583*eec40579SJoe Thornber r = superblock_all_zeroes(md->bm, &unformatted); 584*eec40579SJoe Thornber if (r) 585*eec40579SJoe Thornber return r; 586*eec40579SJoe Thornber 587*eec40579SJoe Thornber if (unformatted) 588*eec40579SJoe Thornber return may_format ? format_metadata(md) : -EPERM; 589*eec40579SJoe Thornber 590*eec40579SJoe Thornber return open_metadata(md); 591*eec40579SJoe Thornber } 592*eec40579SJoe Thornber 593*eec40579SJoe Thornber static int create_persistent_data_objects(struct era_metadata *md, 594*eec40579SJoe Thornber bool may_format) 595*eec40579SJoe Thornber { 596*eec40579SJoe Thornber int r; 597*eec40579SJoe Thornber 598*eec40579SJoe Thornber md->bm = dm_block_manager_create(md->bdev, DM_ERA_METADATA_BLOCK_SIZE, 599*eec40579SJoe Thornber DM_ERA_METADATA_CACHE_SIZE, 600*eec40579SJoe Thornber ERA_MAX_CONCURRENT_LOCKS); 601*eec40579SJoe Thornber if (IS_ERR(md->bm)) { 602*eec40579SJoe Thornber DMERR("could not create block manager"); 603*eec40579SJoe Thornber return PTR_ERR(md->bm); 604*eec40579SJoe Thornber } 605*eec40579SJoe Thornber 606*eec40579SJoe Thornber r = open_or_format_metadata(md, may_format); 607*eec40579SJoe Thornber if (r) 608*eec40579SJoe Thornber dm_block_manager_destroy(md->bm); 609*eec40579SJoe Thornber 610*eec40579SJoe Thornber return r; 611*eec40579SJoe Thornber } 612*eec40579SJoe Thornber 613*eec40579SJoe Thornber static void destroy_persistent_data_objects(struct era_metadata *md) 614*eec40579SJoe Thornber { 615*eec40579SJoe Thornber dm_sm_destroy(md->sm); 616*eec40579SJoe Thornber dm_tm_destroy(md->tm); 617*eec40579SJoe Thornber dm_block_manager_destroy(md->bm); 618*eec40579SJoe Thornber } 619*eec40579SJoe Thornber 620*eec40579SJoe Thornber /* 621*eec40579SJoe Thornber * This waits until all era_map threads have picked up the new filter. 622*eec40579SJoe Thornber */ 623*eec40579SJoe Thornber static void swap_writeset(struct era_metadata *md, struct writeset *new_writeset) 624*eec40579SJoe Thornber { 625*eec40579SJoe Thornber rcu_assign_pointer(md->current_writeset, new_writeset); 626*eec40579SJoe Thornber synchronize_rcu(); 627*eec40579SJoe Thornber } 628*eec40579SJoe Thornber 629*eec40579SJoe Thornber /*---------------------------------------------------------------- 630*eec40579SJoe Thornber * Writesets get 'digested' into the main era array. 631*eec40579SJoe Thornber * 632*eec40579SJoe Thornber * We're using a coroutine here so the worker thread can do the digestion, 633*eec40579SJoe Thornber * thus avoiding synchronisation of the metadata. Digesting a whole 634*eec40579SJoe Thornber * writeset in one go would cause too much latency. 635*eec40579SJoe Thornber *--------------------------------------------------------------*/ 636*eec40579SJoe Thornber struct digest { 637*eec40579SJoe Thornber uint32_t era; 638*eec40579SJoe Thornber unsigned nr_bits, current_bit; 639*eec40579SJoe Thornber struct writeset_metadata writeset; 640*eec40579SJoe Thornber __le32 value; 641*eec40579SJoe Thornber struct dm_disk_bitset info; 642*eec40579SJoe Thornber 643*eec40579SJoe Thornber int (*step)(struct era_metadata *, struct digest *); 644*eec40579SJoe Thornber }; 645*eec40579SJoe Thornber 646*eec40579SJoe Thornber static int metadata_digest_lookup_writeset(struct era_metadata *md, 647*eec40579SJoe Thornber struct digest *d); 648*eec40579SJoe Thornber 649*eec40579SJoe Thornber static int metadata_digest_remove_writeset(struct era_metadata *md, 650*eec40579SJoe Thornber struct digest *d) 651*eec40579SJoe Thornber { 652*eec40579SJoe Thornber int r; 653*eec40579SJoe Thornber uint64_t key = d->era; 654*eec40579SJoe Thornber 655*eec40579SJoe Thornber r = dm_btree_remove(&md->writeset_tree_info, md->writeset_tree_root, 656*eec40579SJoe Thornber &key, &md->writeset_tree_root); 657*eec40579SJoe Thornber if (r) { 658*eec40579SJoe Thornber DMERR("%s: dm_btree_remove failed", __func__); 659*eec40579SJoe Thornber return r; 660*eec40579SJoe Thornber } 661*eec40579SJoe Thornber 662*eec40579SJoe Thornber d->step = metadata_digest_lookup_writeset; 663*eec40579SJoe Thornber return 0; 664*eec40579SJoe Thornber } 665*eec40579SJoe Thornber 666*eec40579SJoe Thornber #define INSERTS_PER_STEP 100 667*eec40579SJoe Thornber 668*eec40579SJoe Thornber static int metadata_digest_transcribe_writeset(struct era_metadata *md, 669*eec40579SJoe Thornber struct digest *d) 670*eec40579SJoe Thornber { 671*eec40579SJoe Thornber int r; 672*eec40579SJoe Thornber bool marked; 673*eec40579SJoe Thornber unsigned b, e = min(d->current_bit + INSERTS_PER_STEP, d->nr_bits); 674*eec40579SJoe Thornber 675*eec40579SJoe Thornber for (b = d->current_bit; b < e; b++) { 676*eec40579SJoe Thornber r = writeset_marked_on_disk(&d->info, &d->writeset, b, &marked); 677*eec40579SJoe Thornber if (r) { 678*eec40579SJoe Thornber DMERR("%s: writeset_marked_on_disk failed", __func__); 679*eec40579SJoe Thornber return r; 680*eec40579SJoe Thornber } 681*eec40579SJoe Thornber 682*eec40579SJoe Thornber if (!marked) 683*eec40579SJoe Thornber continue; 684*eec40579SJoe Thornber 685*eec40579SJoe Thornber __dm_bless_for_disk(&d->value); 686*eec40579SJoe Thornber r = dm_array_set_value(&md->era_array_info, md->era_array_root, 687*eec40579SJoe Thornber b, &d->value, &md->era_array_root); 688*eec40579SJoe Thornber if (r) { 689*eec40579SJoe Thornber DMERR("%s: dm_array_set_value failed", __func__); 690*eec40579SJoe Thornber return r; 691*eec40579SJoe Thornber } 692*eec40579SJoe Thornber } 693*eec40579SJoe Thornber 694*eec40579SJoe Thornber if (b == d->nr_bits) 695*eec40579SJoe Thornber d->step = metadata_digest_remove_writeset; 696*eec40579SJoe Thornber else 697*eec40579SJoe Thornber d->current_bit = b; 698*eec40579SJoe Thornber 699*eec40579SJoe Thornber return 0; 700*eec40579SJoe Thornber } 701*eec40579SJoe Thornber 702*eec40579SJoe Thornber static int metadata_digest_lookup_writeset(struct era_metadata *md, 703*eec40579SJoe Thornber struct digest *d) 704*eec40579SJoe Thornber { 705*eec40579SJoe Thornber int r; 706*eec40579SJoe Thornber uint64_t key; 707*eec40579SJoe Thornber struct writeset_disk disk; 708*eec40579SJoe Thornber 709*eec40579SJoe Thornber r = dm_btree_find_lowest_key(&md->writeset_tree_info, 710*eec40579SJoe Thornber md->writeset_tree_root, &key); 711*eec40579SJoe Thornber if (r < 0) 712*eec40579SJoe Thornber return r; 713*eec40579SJoe Thornber 714*eec40579SJoe Thornber d->era = key; 715*eec40579SJoe Thornber 716*eec40579SJoe Thornber r = dm_btree_lookup(&md->writeset_tree_info, 717*eec40579SJoe Thornber md->writeset_tree_root, &key, &disk); 718*eec40579SJoe Thornber if (r) { 719*eec40579SJoe Thornber if (r == -ENODATA) { 720*eec40579SJoe Thornber d->step = NULL; 721*eec40579SJoe Thornber return 0; 722*eec40579SJoe Thornber } 723*eec40579SJoe Thornber 724*eec40579SJoe Thornber DMERR("%s: dm_btree_lookup failed", __func__); 725*eec40579SJoe Thornber return r; 726*eec40579SJoe Thornber } 727*eec40579SJoe Thornber 728*eec40579SJoe Thornber ws_unpack(&disk, &d->writeset); 729*eec40579SJoe Thornber d->value = cpu_to_le32(key); 730*eec40579SJoe Thornber 731*eec40579SJoe Thornber d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks); 732*eec40579SJoe Thornber d->current_bit = 0; 733*eec40579SJoe Thornber d->step = metadata_digest_transcribe_writeset; 734*eec40579SJoe Thornber 735*eec40579SJoe Thornber return 0; 736*eec40579SJoe Thornber } 737*eec40579SJoe Thornber 738*eec40579SJoe Thornber static int metadata_digest_start(struct era_metadata *md, struct digest *d) 739*eec40579SJoe Thornber { 740*eec40579SJoe Thornber if (d->step) 741*eec40579SJoe Thornber return 0; 742*eec40579SJoe Thornber 743*eec40579SJoe Thornber memset(d, 0, sizeof(*d)); 744*eec40579SJoe Thornber 745*eec40579SJoe Thornber /* 746*eec40579SJoe Thornber * We initialise another bitset info to avoid any caching side 747*eec40579SJoe Thornber * effects with the previous one. 748*eec40579SJoe Thornber */ 749*eec40579SJoe Thornber dm_disk_bitset_init(md->tm, &d->info); 750*eec40579SJoe Thornber d->step = metadata_digest_lookup_writeset; 751*eec40579SJoe Thornber 752*eec40579SJoe Thornber return 0; 753*eec40579SJoe Thornber } 754*eec40579SJoe Thornber 755*eec40579SJoe Thornber /*---------------------------------------------------------------- 756*eec40579SJoe Thornber * High level metadata interface. Target methods should use these, and not 757*eec40579SJoe Thornber * the lower level ones. 758*eec40579SJoe Thornber *--------------------------------------------------------------*/ 759*eec40579SJoe Thornber static struct era_metadata *metadata_open(struct block_device *bdev, 760*eec40579SJoe Thornber sector_t block_size, 761*eec40579SJoe Thornber bool may_format) 762*eec40579SJoe Thornber { 763*eec40579SJoe Thornber int r; 764*eec40579SJoe Thornber struct era_metadata *md = kzalloc(sizeof(*md), GFP_KERNEL); 765*eec40579SJoe Thornber 766*eec40579SJoe Thornber if (!md) 767*eec40579SJoe Thornber return NULL; 768*eec40579SJoe Thornber 769*eec40579SJoe Thornber md->bdev = bdev; 770*eec40579SJoe Thornber md->block_size = block_size; 771*eec40579SJoe Thornber 772*eec40579SJoe Thornber md->writesets[0].md.root = INVALID_WRITESET_ROOT; 773*eec40579SJoe Thornber md->writesets[1].md.root = INVALID_WRITESET_ROOT; 774*eec40579SJoe Thornber md->current_writeset = &md->writesets[0]; 775*eec40579SJoe Thornber 776*eec40579SJoe Thornber r = create_persistent_data_objects(md, may_format); 777*eec40579SJoe Thornber if (r) { 778*eec40579SJoe Thornber kfree(md); 779*eec40579SJoe Thornber return ERR_PTR(r); 780*eec40579SJoe Thornber } 781*eec40579SJoe Thornber 782*eec40579SJoe Thornber return md; 783*eec40579SJoe Thornber } 784*eec40579SJoe Thornber 785*eec40579SJoe Thornber static void metadata_close(struct era_metadata *md) 786*eec40579SJoe Thornber { 787*eec40579SJoe Thornber destroy_persistent_data_objects(md); 788*eec40579SJoe Thornber kfree(md); 789*eec40579SJoe Thornber } 790*eec40579SJoe Thornber 791*eec40579SJoe Thornber static bool valid_nr_blocks(dm_block_t n) 792*eec40579SJoe Thornber { 793*eec40579SJoe Thornber /* 794*eec40579SJoe Thornber * dm_bitset restricts us to 2^32. test_bit & co. restrict us 795*eec40579SJoe Thornber * further to 2^31 - 1 796*eec40579SJoe Thornber */ 797*eec40579SJoe Thornber return n < (1ull << 31); 798*eec40579SJoe Thornber } 799*eec40579SJoe Thornber 800*eec40579SJoe Thornber static int metadata_resize(struct era_metadata *md, void *arg) 801*eec40579SJoe Thornber { 802*eec40579SJoe Thornber int r; 803*eec40579SJoe Thornber dm_block_t *new_size = arg; 804*eec40579SJoe Thornber __le32 value; 805*eec40579SJoe Thornber 806*eec40579SJoe Thornber if (!valid_nr_blocks(*new_size)) { 807*eec40579SJoe Thornber DMERR("Invalid number of origin blocks %llu", 808*eec40579SJoe Thornber (unsigned long long) *new_size); 809*eec40579SJoe Thornber return -EINVAL; 810*eec40579SJoe Thornber } 811*eec40579SJoe Thornber 812*eec40579SJoe Thornber writeset_free(&md->writesets[0]); 813*eec40579SJoe Thornber writeset_free(&md->writesets[1]); 814*eec40579SJoe Thornber 815*eec40579SJoe Thornber r = writeset_alloc(&md->writesets[0], *new_size); 816*eec40579SJoe Thornber if (r) { 817*eec40579SJoe Thornber DMERR("%s: writeset_alloc failed for writeset 0", __func__); 818*eec40579SJoe Thornber return r; 819*eec40579SJoe Thornber } 820*eec40579SJoe Thornber 821*eec40579SJoe Thornber r = writeset_alloc(&md->writesets[1], *new_size); 822*eec40579SJoe Thornber if (r) { 823*eec40579SJoe Thornber DMERR("%s: writeset_alloc failed for writeset 1", __func__); 824*eec40579SJoe Thornber return r; 825*eec40579SJoe Thornber } 826*eec40579SJoe Thornber 827*eec40579SJoe Thornber value = cpu_to_le32(0u); 828*eec40579SJoe Thornber __dm_bless_for_disk(&value); 829*eec40579SJoe Thornber r = dm_array_resize(&md->era_array_info, md->era_array_root, 830*eec40579SJoe Thornber md->nr_blocks, *new_size, 831*eec40579SJoe Thornber &value, &md->era_array_root); 832*eec40579SJoe Thornber if (r) { 833*eec40579SJoe Thornber DMERR("%s: dm_array_resize failed", __func__); 834*eec40579SJoe Thornber return r; 835*eec40579SJoe Thornber } 836*eec40579SJoe Thornber 837*eec40579SJoe Thornber md->nr_blocks = *new_size; 838*eec40579SJoe Thornber return 0; 839*eec40579SJoe Thornber } 840*eec40579SJoe Thornber 841*eec40579SJoe Thornber static int metadata_era_archive(struct era_metadata *md) 842*eec40579SJoe Thornber { 843*eec40579SJoe Thornber int r; 844*eec40579SJoe Thornber uint64_t keys[1]; 845*eec40579SJoe Thornber struct writeset_disk value; 846*eec40579SJoe Thornber 847*eec40579SJoe Thornber r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 848*eec40579SJoe Thornber &md->current_writeset->md.root); 849*eec40579SJoe Thornber if (r) { 850*eec40579SJoe Thornber DMERR("%s: dm_bitset_flush failed", __func__); 851*eec40579SJoe Thornber return r; 852*eec40579SJoe Thornber } 853*eec40579SJoe Thornber 854*eec40579SJoe Thornber ws_pack(&md->current_writeset->md, &value); 855*eec40579SJoe Thornber md->current_writeset->md.root = INVALID_WRITESET_ROOT; 856*eec40579SJoe Thornber 857*eec40579SJoe Thornber keys[0] = md->current_era; 858*eec40579SJoe Thornber __dm_bless_for_disk(&value); 859*eec40579SJoe Thornber r = dm_btree_insert(&md->writeset_tree_info, md->writeset_tree_root, 860*eec40579SJoe Thornber keys, &value, &md->writeset_tree_root); 861*eec40579SJoe Thornber if (r) { 862*eec40579SJoe Thornber DMERR("%s: couldn't insert writeset into btree", __func__); 863*eec40579SJoe Thornber /* FIXME: fail mode */ 864*eec40579SJoe Thornber return r; 865*eec40579SJoe Thornber } 866*eec40579SJoe Thornber 867*eec40579SJoe Thornber md->archived_writesets = true; 868*eec40579SJoe Thornber 869*eec40579SJoe Thornber return 0; 870*eec40579SJoe Thornber } 871*eec40579SJoe Thornber 872*eec40579SJoe Thornber static struct writeset *next_writeset(struct era_metadata *md) 873*eec40579SJoe Thornber { 874*eec40579SJoe Thornber return (md->current_writeset == &md->writesets[0]) ? 875*eec40579SJoe Thornber &md->writesets[1] : &md->writesets[0]; 876*eec40579SJoe Thornber } 877*eec40579SJoe Thornber 878*eec40579SJoe Thornber static int metadata_new_era(struct era_metadata *md) 879*eec40579SJoe Thornber { 880*eec40579SJoe Thornber int r; 881*eec40579SJoe Thornber struct writeset *new_writeset = next_writeset(md); 882*eec40579SJoe Thornber 883*eec40579SJoe Thornber r = writeset_init(&md->bitset_info, new_writeset); 884*eec40579SJoe Thornber if (r) { 885*eec40579SJoe Thornber DMERR("%s: writeset_init failed", __func__); 886*eec40579SJoe Thornber return r; 887*eec40579SJoe Thornber } 888*eec40579SJoe Thornber 889*eec40579SJoe Thornber swap_writeset(md, new_writeset); 890*eec40579SJoe Thornber md->current_era++; 891*eec40579SJoe Thornber 892*eec40579SJoe Thornber return 0; 893*eec40579SJoe Thornber } 894*eec40579SJoe Thornber 895*eec40579SJoe Thornber static int metadata_era_rollover(struct era_metadata *md) 896*eec40579SJoe Thornber { 897*eec40579SJoe Thornber int r; 898*eec40579SJoe Thornber 899*eec40579SJoe Thornber if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) { 900*eec40579SJoe Thornber r = metadata_era_archive(md); 901*eec40579SJoe Thornber if (r) { 902*eec40579SJoe Thornber DMERR("%s: metadata_archive_era failed", __func__); 903*eec40579SJoe Thornber /* FIXME: fail mode? */ 904*eec40579SJoe Thornber return r; 905*eec40579SJoe Thornber } 906*eec40579SJoe Thornber } 907*eec40579SJoe Thornber 908*eec40579SJoe Thornber r = metadata_new_era(md); 909*eec40579SJoe Thornber if (r) { 910*eec40579SJoe Thornber DMERR("%s: new era failed", __func__); 911*eec40579SJoe Thornber /* FIXME: fail mode */ 912*eec40579SJoe Thornber return r; 913*eec40579SJoe Thornber } 914*eec40579SJoe Thornber 915*eec40579SJoe Thornber return 0; 916*eec40579SJoe Thornber } 917*eec40579SJoe Thornber 918*eec40579SJoe Thornber static bool metadata_current_marked(struct era_metadata *md, dm_block_t block) 919*eec40579SJoe Thornber { 920*eec40579SJoe Thornber bool r; 921*eec40579SJoe Thornber struct writeset *ws; 922*eec40579SJoe Thornber 923*eec40579SJoe Thornber rcu_read_lock(); 924*eec40579SJoe Thornber ws = rcu_dereference(md->current_writeset); 925*eec40579SJoe Thornber r = writeset_marked(ws, block); 926*eec40579SJoe Thornber rcu_read_unlock(); 927*eec40579SJoe Thornber 928*eec40579SJoe Thornber return r; 929*eec40579SJoe Thornber } 930*eec40579SJoe Thornber 931*eec40579SJoe Thornber static int metadata_commit(struct era_metadata *md) 932*eec40579SJoe Thornber { 933*eec40579SJoe Thornber int r; 934*eec40579SJoe Thornber struct dm_block *sblock; 935*eec40579SJoe Thornber 936*eec40579SJoe Thornber if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) { 937*eec40579SJoe Thornber r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 938*eec40579SJoe Thornber &md->current_writeset->md.root); 939*eec40579SJoe Thornber if (r) { 940*eec40579SJoe Thornber DMERR("%s: bitset flush failed", __func__); 941*eec40579SJoe Thornber return r; 942*eec40579SJoe Thornber } 943*eec40579SJoe Thornber } 944*eec40579SJoe Thornber 945*eec40579SJoe Thornber r = dm_tm_pre_commit(md->tm); 946*eec40579SJoe Thornber if (r) { 947*eec40579SJoe Thornber DMERR("%s: pre commit failed", __func__); 948*eec40579SJoe Thornber return r; 949*eec40579SJoe Thornber } 950*eec40579SJoe Thornber 951*eec40579SJoe Thornber r = superblock_lock(md, &sblock); 952*eec40579SJoe Thornber if (r) { 953*eec40579SJoe Thornber DMERR("%s: superblock lock failed", __func__); 954*eec40579SJoe Thornber return r; 955*eec40579SJoe Thornber } 956*eec40579SJoe Thornber 957*eec40579SJoe Thornber r = prepare_superblock(md, dm_block_data(sblock)); 958*eec40579SJoe Thornber if (r) { 959*eec40579SJoe Thornber DMERR("%s: prepare_superblock failed", __func__); 960*eec40579SJoe Thornber dm_bm_unlock(sblock); /* FIXME: does this commit? */ 961*eec40579SJoe Thornber return r; 962*eec40579SJoe Thornber } 963*eec40579SJoe Thornber 964*eec40579SJoe Thornber return dm_tm_commit(md->tm, sblock); 965*eec40579SJoe Thornber } 966*eec40579SJoe Thornber 967*eec40579SJoe Thornber static int metadata_checkpoint(struct era_metadata *md) 968*eec40579SJoe Thornber { 969*eec40579SJoe Thornber /* 970*eec40579SJoe Thornber * For now we just rollover, but later I want to put a check in to 971*eec40579SJoe Thornber * avoid this if the filter is still pretty fresh. 972*eec40579SJoe Thornber */ 973*eec40579SJoe Thornber return metadata_era_rollover(md); 974*eec40579SJoe Thornber } 975*eec40579SJoe Thornber 976*eec40579SJoe Thornber /* 977*eec40579SJoe Thornber * Metadata snapshots allow userland to access era data. 978*eec40579SJoe Thornber */ 979*eec40579SJoe Thornber static int metadata_take_snap(struct era_metadata *md) 980*eec40579SJoe Thornber { 981*eec40579SJoe Thornber int r, inc; 982*eec40579SJoe Thornber struct dm_block *clone; 983*eec40579SJoe Thornber 984*eec40579SJoe Thornber if (md->metadata_snap != SUPERBLOCK_LOCATION) { 985*eec40579SJoe Thornber DMERR("%s: metadata snapshot already exists", __func__); 986*eec40579SJoe Thornber return -EINVAL; 987*eec40579SJoe Thornber } 988*eec40579SJoe Thornber 989*eec40579SJoe Thornber r = metadata_era_rollover(md); 990*eec40579SJoe Thornber if (r) { 991*eec40579SJoe Thornber DMERR("%s: era rollover failed", __func__); 992*eec40579SJoe Thornber return r; 993*eec40579SJoe Thornber } 994*eec40579SJoe Thornber 995*eec40579SJoe Thornber r = metadata_commit(md); 996*eec40579SJoe Thornber if (r) { 997*eec40579SJoe Thornber DMERR("%s: pre commit failed", __func__); 998*eec40579SJoe Thornber return r; 999*eec40579SJoe Thornber } 1000*eec40579SJoe Thornber 1001*eec40579SJoe Thornber r = dm_sm_inc_block(md->sm, SUPERBLOCK_LOCATION); 1002*eec40579SJoe Thornber if (r) { 1003*eec40579SJoe Thornber DMERR("%s: couldn't increment superblock", __func__); 1004*eec40579SJoe Thornber return r; 1005*eec40579SJoe Thornber } 1006*eec40579SJoe Thornber 1007*eec40579SJoe Thornber r = dm_tm_shadow_block(md->tm, SUPERBLOCK_LOCATION, 1008*eec40579SJoe Thornber &sb_validator, &clone, &inc); 1009*eec40579SJoe Thornber if (r) { 1010*eec40579SJoe Thornber DMERR("%s: couldn't shadow superblock", __func__); 1011*eec40579SJoe Thornber dm_sm_dec_block(md->sm, SUPERBLOCK_LOCATION); 1012*eec40579SJoe Thornber return r; 1013*eec40579SJoe Thornber } 1014*eec40579SJoe Thornber BUG_ON(!inc); 1015*eec40579SJoe Thornber 1016*eec40579SJoe Thornber r = dm_sm_inc_block(md->sm, md->writeset_tree_root); 1017*eec40579SJoe Thornber if (r) { 1018*eec40579SJoe Thornber DMERR("%s: couldn't inc writeset tree root", __func__); 1019*eec40579SJoe Thornber dm_tm_unlock(md->tm, clone); 1020*eec40579SJoe Thornber return r; 1021*eec40579SJoe Thornber } 1022*eec40579SJoe Thornber 1023*eec40579SJoe Thornber r = dm_sm_inc_block(md->sm, md->era_array_root); 1024*eec40579SJoe Thornber if (r) { 1025*eec40579SJoe Thornber DMERR("%s: couldn't inc era tree root", __func__); 1026*eec40579SJoe Thornber dm_sm_dec_block(md->sm, md->writeset_tree_root); 1027*eec40579SJoe Thornber dm_tm_unlock(md->tm, clone); 1028*eec40579SJoe Thornber return r; 1029*eec40579SJoe Thornber } 1030*eec40579SJoe Thornber 1031*eec40579SJoe Thornber md->metadata_snap = dm_block_location(clone); 1032*eec40579SJoe Thornber 1033*eec40579SJoe Thornber r = dm_tm_unlock(md->tm, clone); 1034*eec40579SJoe Thornber if (r) { 1035*eec40579SJoe Thornber DMERR("%s: couldn't unlock clone", __func__); 1036*eec40579SJoe Thornber md->metadata_snap = SUPERBLOCK_LOCATION; 1037*eec40579SJoe Thornber return r; 1038*eec40579SJoe Thornber } 1039*eec40579SJoe Thornber 1040*eec40579SJoe Thornber return 0; 1041*eec40579SJoe Thornber } 1042*eec40579SJoe Thornber 1043*eec40579SJoe Thornber static int metadata_drop_snap(struct era_metadata *md) 1044*eec40579SJoe Thornber { 1045*eec40579SJoe Thornber int r; 1046*eec40579SJoe Thornber dm_block_t location; 1047*eec40579SJoe Thornber struct dm_block *clone; 1048*eec40579SJoe Thornber struct superblock_disk *disk; 1049*eec40579SJoe Thornber 1050*eec40579SJoe Thornber if (md->metadata_snap == SUPERBLOCK_LOCATION) { 1051*eec40579SJoe Thornber DMERR("%s: no snap to drop", __func__); 1052*eec40579SJoe Thornber return -EINVAL; 1053*eec40579SJoe Thornber } 1054*eec40579SJoe Thornber 1055*eec40579SJoe Thornber r = dm_tm_read_lock(md->tm, md->metadata_snap, &sb_validator, &clone); 1056*eec40579SJoe Thornber if (r) { 1057*eec40579SJoe Thornber DMERR("%s: couldn't read lock superblock clone", __func__); 1058*eec40579SJoe Thornber return r; 1059*eec40579SJoe Thornber } 1060*eec40579SJoe Thornber 1061*eec40579SJoe Thornber /* 1062*eec40579SJoe Thornber * Whatever happens now we'll commit with no record of the metadata 1063*eec40579SJoe Thornber * snap. 1064*eec40579SJoe Thornber */ 1065*eec40579SJoe Thornber md->metadata_snap = SUPERBLOCK_LOCATION; 1066*eec40579SJoe Thornber 1067*eec40579SJoe Thornber disk = dm_block_data(clone); 1068*eec40579SJoe Thornber r = dm_btree_del(&md->writeset_tree_info, 1069*eec40579SJoe Thornber le64_to_cpu(disk->writeset_tree_root)); 1070*eec40579SJoe Thornber if (r) { 1071*eec40579SJoe Thornber DMERR("%s: error deleting writeset tree clone", __func__); 1072*eec40579SJoe Thornber dm_tm_unlock(md->tm, clone); 1073*eec40579SJoe Thornber return r; 1074*eec40579SJoe Thornber } 1075*eec40579SJoe Thornber 1076*eec40579SJoe Thornber r = dm_array_del(&md->era_array_info, le64_to_cpu(disk->era_array_root)); 1077*eec40579SJoe Thornber if (r) { 1078*eec40579SJoe Thornber DMERR("%s: error deleting era array clone", __func__); 1079*eec40579SJoe Thornber dm_tm_unlock(md->tm, clone); 1080*eec40579SJoe Thornber return r; 1081*eec40579SJoe Thornber } 1082*eec40579SJoe Thornber 1083*eec40579SJoe Thornber location = dm_block_location(clone); 1084*eec40579SJoe Thornber dm_tm_unlock(md->tm, clone); 1085*eec40579SJoe Thornber 1086*eec40579SJoe Thornber return dm_sm_dec_block(md->sm, location); 1087*eec40579SJoe Thornber } 1088*eec40579SJoe Thornber 1089*eec40579SJoe Thornber struct metadata_stats { 1090*eec40579SJoe Thornber dm_block_t used; 1091*eec40579SJoe Thornber dm_block_t total; 1092*eec40579SJoe Thornber dm_block_t snap; 1093*eec40579SJoe Thornber uint32_t era; 1094*eec40579SJoe Thornber }; 1095*eec40579SJoe Thornber 1096*eec40579SJoe Thornber static int metadata_get_stats(struct era_metadata *md, void *ptr) 1097*eec40579SJoe Thornber { 1098*eec40579SJoe Thornber int r; 1099*eec40579SJoe Thornber struct metadata_stats *s = ptr; 1100*eec40579SJoe Thornber dm_block_t nr_free, nr_total; 1101*eec40579SJoe Thornber 1102*eec40579SJoe Thornber r = dm_sm_get_nr_free(md->sm, &nr_free); 1103*eec40579SJoe Thornber if (r) { 1104*eec40579SJoe Thornber DMERR("dm_sm_get_nr_free returned %d", r); 1105*eec40579SJoe Thornber return r; 1106*eec40579SJoe Thornber } 1107*eec40579SJoe Thornber 1108*eec40579SJoe Thornber r = dm_sm_get_nr_blocks(md->sm, &nr_total); 1109*eec40579SJoe Thornber if (r) { 1110*eec40579SJoe Thornber DMERR("dm_pool_get_metadata_dev_size returned %d", r); 1111*eec40579SJoe Thornber return r; 1112*eec40579SJoe Thornber } 1113*eec40579SJoe Thornber 1114*eec40579SJoe Thornber s->used = nr_total - nr_free; 1115*eec40579SJoe Thornber s->total = nr_total; 1116*eec40579SJoe Thornber s->snap = md->metadata_snap; 1117*eec40579SJoe Thornber s->era = md->current_era; 1118*eec40579SJoe Thornber 1119*eec40579SJoe Thornber return 0; 1120*eec40579SJoe Thornber } 1121*eec40579SJoe Thornber 1122*eec40579SJoe Thornber /*----------------------------------------------------------------*/ 1123*eec40579SJoe Thornber 1124*eec40579SJoe Thornber struct era { 1125*eec40579SJoe Thornber struct dm_target *ti; 1126*eec40579SJoe Thornber struct dm_target_callbacks callbacks; 1127*eec40579SJoe Thornber 1128*eec40579SJoe Thornber struct dm_dev *metadata_dev; 1129*eec40579SJoe Thornber struct dm_dev *origin_dev; 1130*eec40579SJoe Thornber 1131*eec40579SJoe Thornber dm_block_t nr_blocks; 1132*eec40579SJoe Thornber uint32_t sectors_per_block; 1133*eec40579SJoe Thornber int sectors_per_block_shift; 1134*eec40579SJoe Thornber struct era_metadata *md; 1135*eec40579SJoe Thornber 1136*eec40579SJoe Thornber struct workqueue_struct *wq; 1137*eec40579SJoe Thornber struct work_struct worker; 1138*eec40579SJoe Thornber 1139*eec40579SJoe Thornber spinlock_t deferred_lock; 1140*eec40579SJoe Thornber struct bio_list deferred_bios; 1141*eec40579SJoe Thornber 1142*eec40579SJoe Thornber spinlock_t rpc_lock; 1143*eec40579SJoe Thornber struct list_head rpc_calls; 1144*eec40579SJoe Thornber 1145*eec40579SJoe Thornber struct digest digest; 1146*eec40579SJoe Thornber atomic_t suspended; 1147*eec40579SJoe Thornber }; 1148*eec40579SJoe Thornber 1149*eec40579SJoe Thornber struct rpc { 1150*eec40579SJoe Thornber struct list_head list; 1151*eec40579SJoe Thornber 1152*eec40579SJoe Thornber int (*fn0)(struct era_metadata *); 1153*eec40579SJoe Thornber int (*fn1)(struct era_metadata *, void *); 1154*eec40579SJoe Thornber void *arg; 1155*eec40579SJoe Thornber int result; 1156*eec40579SJoe Thornber 1157*eec40579SJoe Thornber struct completion complete; 1158*eec40579SJoe Thornber }; 1159*eec40579SJoe Thornber 1160*eec40579SJoe Thornber /*---------------------------------------------------------------- 1161*eec40579SJoe Thornber * Remapping. 1162*eec40579SJoe Thornber *---------------------------------------------------------------*/ 1163*eec40579SJoe Thornber static bool block_size_is_power_of_two(struct era *era) 1164*eec40579SJoe Thornber { 1165*eec40579SJoe Thornber return era->sectors_per_block_shift >= 0; 1166*eec40579SJoe Thornber } 1167*eec40579SJoe Thornber 1168*eec40579SJoe Thornber static dm_block_t get_block(struct era *era, struct bio *bio) 1169*eec40579SJoe Thornber { 1170*eec40579SJoe Thornber sector_t block_nr = bio->bi_iter.bi_sector; 1171*eec40579SJoe Thornber 1172*eec40579SJoe Thornber if (!block_size_is_power_of_two(era)) 1173*eec40579SJoe Thornber (void) sector_div(block_nr, era->sectors_per_block); 1174*eec40579SJoe Thornber else 1175*eec40579SJoe Thornber block_nr >>= era->sectors_per_block_shift; 1176*eec40579SJoe Thornber 1177*eec40579SJoe Thornber return block_nr; 1178*eec40579SJoe Thornber } 1179*eec40579SJoe Thornber 1180*eec40579SJoe Thornber static void remap_to_origin(struct era *era, struct bio *bio) 1181*eec40579SJoe Thornber { 1182*eec40579SJoe Thornber bio->bi_bdev = era->origin_dev->bdev; 1183*eec40579SJoe Thornber } 1184*eec40579SJoe Thornber 1185*eec40579SJoe Thornber /*---------------------------------------------------------------- 1186*eec40579SJoe Thornber * Worker thread 1187*eec40579SJoe Thornber *--------------------------------------------------------------*/ 1188*eec40579SJoe Thornber static void wake_worker(struct era *era) 1189*eec40579SJoe Thornber { 1190*eec40579SJoe Thornber if (!atomic_read(&era->suspended)) 1191*eec40579SJoe Thornber queue_work(era->wq, &era->worker); 1192*eec40579SJoe Thornber } 1193*eec40579SJoe Thornber 1194*eec40579SJoe Thornber static void process_old_eras(struct era *era) 1195*eec40579SJoe Thornber { 1196*eec40579SJoe Thornber int r; 1197*eec40579SJoe Thornber 1198*eec40579SJoe Thornber if (!era->digest.step) 1199*eec40579SJoe Thornber return; 1200*eec40579SJoe Thornber 1201*eec40579SJoe Thornber r = era->digest.step(era->md, &era->digest); 1202*eec40579SJoe Thornber if (r < 0) { 1203*eec40579SJoe Thornber DMERR("%s: digest step failed, stopping digestion", __func__); 1204*eec40579SJoe Thornber era->digest.step = NULL; 1205*eec40579SJoe Thornber 1206*eec40579SJoe Thornber } else if (era->digest.step) 1207*eec40579SJoe Thornber wake_worker(era); 1208*eec40579SJoe Thornber } 1209*eec40579SJoe Thornber 1210*eec40579SJoe Thornber static void process_deferred_bios(struct era *era) 1211*eec40579SJoe Thornber { 1212*eec40579SJoe Thornber int r; 1213*eec40579SJoe Thornber struct bio_list deferred_bios, marked_bios; 1214*eec40579SJoe Thornber struct bio *bio; 1215*eec40579SJoe Thornber bool commit_needed = false; 1216*eec40579SJoe Thornber bool failed = false; 1217*eec40579SJoe Thornber 1218*eec40579SJoe Thornber bio_list_init(&deferred_bios); 1219*eec40579SJoe Thornber bio_list_init(&marked_bios); 1220*eec40579SJoe Thornber 1221*eec40579SJoe Thornber spin_lock(&era->deferred_lock); 1222*eec40579SJoe Thornber bio_list_merge(&deferred_bios, &era->deferred_bios); 1223*eec40579SJoe Thornber bio_list_init(&era->deferred_bios); 1224*eec40579SJoe Thornber spin_unlock(&era->deferred_lock); 1225*eec40579SJoe Thornber 1226*eec40579SJoe Thornber while ((bio = bio_list_pop(&deferred_bios))) { 1227*eec40579SJoe Thornber r = writeset_test_and_set(&era->md->bitset_info, 1228*eec40579SJoe Thornber era->md->current_writeset, 1229*eec40579SJoe Thornber get_block(era, bio)); 1230*eec40579SJoe Thornber if (r < 0) { 1231*eec40579SJoe Thornber /* 1232*eec40579SJoe Thornber * This is bad news, we need to rollback. 1233*eec40579SJoe Thornber * FIXME: finish. 1234*eec40579SJoe Thornber */ 1235*eec40579SJoe Thornber failed = true; 1236*eec40579SJoe Thornber 1237*eec40579SJoe Thornber } else if (r == 0) 1238*eec40579SJoe Thornber commit_needed = true; 1239*eec40579SJoe Thornber 1240*eec40579SJoe Thornber bio_list_add(&marked_bios, bio); 1241*eec40579SJoe Thornber } 1242*eec40579SJoe Thornber 1243*eec40579SJoe Thornber if (commit_needed) { 1244*eec40579SJoe Thornber r = metadata_commit(era->md); 1245*eec40579SJoe Thornber if (r) 1246*eec40579SJoe Thornber failed = true; 1247*eec40579SJoe Thornber } 1248*eec40579SJoe Thornber 1249*eec40579SJoe Thornber if (failed) 1250*eec40579SJoe Thornber while ((bio = bio_list_pop(&marked_bios))) 1251*eec40579SJoe Thornber bio_io_error(bio); 1252*eec40579SJoe Thornber else 1253*eec40579SJoe Thornber while ((bio = bio_list_pop(&marked_bios))) 1254*eec40579SJoe Thornber generic_make_request(bio); 1255*eec40579SJoe Thornber } 1256*eec40579SJoe Thornber 1257*eec40579SJoe Thornber static void process_rpc_calls(struct era *era) 1258*eec40579SJoe Thornber { 1259*eec40579SJoe Thornber int r; 1260*eec40579SJoe Thornber bool need_commit = false; 1261*eec40579SJoe Thornber struct list_head calls; 1262*eec40579SJoe Thornber struct rpc *rpc, *tmp; 1263*eec40579SJoe Thornber 1264*eec40579SJoe Thornber INIT_LIST_HEAD(&calls); 1265*eec40579SJoe Thornber spin_lock(&era->rpc_lock); 1266*eec40579SJoe Thornber list_splice_init(&era->rpc_calls, &calls); 1267*eec40579SJoe Thornber spin_unlock(&era->rpc_lock); 1268*eec40579SJoe Thornber 1269*eec40579SJoe Thornber list_for_each_entry_safe(rpc, tmp, &calls, list) { 1270*eec40579SJoe Thornber rpc->result = rpc->fn0 ? rpc->fn0(era->md) : rpc->fn1(era->md, rpc->arg); 1271*eec40579SJoe Thornber need_commit = true; 1272*eec40579SJoe Thornber } 1273*eec40579SJoe Thornber 1274*eec40579SJoe Thornber if (need_commit) { 1275*eec40579SJoe Thornber r = metadata_commit(era->md); 1276*eec40579SJoe Thornber if (r) 1277*eec40579SJoe Thornber list_for_each_entry_safe(rpc, tmp, &calls, list) 1278*eec40579SJoe Thornber rpc->result = r; 1279*eec40579SJoe Thornber } 1280*eec40579SJoe Thornber 1281*eec40579SJoe Thornber list_for_each_entry_safe(rpc, tmp, &calls, list) 1282*eec40579SJoe Thornber complete(&rpc->complete); 1283*eec40579SJoe Thornber } 1284*eec40579SJoe Thornber 1285*eec40579SJoe Thornber static void kick_off_digest(struct era *era) 1286*eec40579SJoe Thornber { 1287*eec40579SJoe Thornber if (era->md->archived_writesets) { 1288*eec40579SJoe Thornber era->md->archived_writesets = false; 1289*eec40579SJoe Thornber metadata_digest_start(era->md, &era->digest); 1290*eec40579SJoe Thornber } 1291*eec40579SJoe Thornber } 1292*eec40579SJoe Thornber 1293*eec40579SJoe Thornber static void do_work(struct work_struct *ws) 1294*eec40579SJoe Thornber { 1295*eec40579SJoe Thornber struct era *era = container_of(ws, struct era, worker); 1296*eec40579SJoe Thornber 1297*eec40579SJoe Thornber kick_off_digest(era); 1298*eec40579SJoe Thornber process_old_eras(era); 1299*eec40579SJoe Thornber process_deferred_bios(era); 1300*eec40579SJoe Thornber process_rpc_calls(era); 1301*eec40579SJoe Thornber } 1302*eec40579SJoe Thornber 1303*eec40579SJoe Thornber static void defer_bio(struct era *era, struct bio *bio) 1304*eec40579SJoe Thornber { 1305*eec40579SJoe Thornber spin_lock(&era->deferred_lock); 1306*eec40579SJoe Thornber bio_list_add(&era->deferred_bios, bio); 1307*eec40579SJoe Thornber spin_unlock(&era->deferred_lock); 1308*eec40579SJoe Thornber 1309*eec40579SJoe Thornber wake_worker(era); 1310*eec40579SJoe Thornber } 1311*eec40579SJoe Thornber 1312*eec40579SJoe Thornber /* 1313*eec40579SJoe Thornber * Make an rpc call to the worker to change the metadata. 1314*eec40579SJoe Thornber */ 1315*eec40579SJoe Thornber static int perform_rpc(struct era *era, struct rpc *rpc) 1316*eec40579SJoe Thornber { 1317*eec40579SJoe Thornber rpc->result = 0; 1318*eec40579SJoe Thornber init_completion(&rpc->complete); 1319*eec40579SJoe Thornber 1320*eec40579SJoe Thornber spin_lock(&era->rpc_lock); 1321*eec40579SJoe Thornber list_add(&rpc->list, &era->rpc_calls); 1322*eec40579SJoe Thornber spin_unlock(&era->rpc_lock); 1323*eec40579SJoe Thornber 1324*eec40579SJoe Thornber wake_worker(era); 1325*eec40579SJoe Thornber wait_for_completion(&rpc->complete); 1326*eec40579SJoe Thornber 1327*eec40579SJoe Thornber return rpc->result; 1328*eec40579SJoe Thornber } 1329*eec40579SJoe Thornber 1330*eec40579SJoe Thornber static int in_worker0(struct era *era, int (*fn)(struct era_metadata *)) 1331*eec40579SJoe Thornber { 1332*eec40579SJoe Thornber struct rpc rpc; 1333*eec40579SJoe Thornber rpc.fn0 = fn; 1334*eec40579SJoe Thornber rpc.fn1 = NULL; 1335*eec40579SJoe Thornber 1336*eec40579SJoe Thornber return perform_rpc(era, &rpc); 1337*eec40579SJoe Thornber } 1338*eec40579SJoe Thornber 1339*eec40579SJoe Thornber static int in_worker1(struct era *era, 1340*eec40579SJoe Thornber int (*fn)(struct era_metadata *, void *), void *arg) 1341*eec40579SJoe Thornber { 1342*eec40579SJoe Thornber struct rpc rpc; 1343*eec40579SJoe Thornber rpc.fn0 = NULL; 1344*eec40579SJoe Thornber rpc.fn1 = fn; 1345*eec40579SJoe Thornber rpc.arg = arg; 1346*eec40579SJoe Thornber 1347*eec40579SJoe Thornber return perform_rpc(era, &rpc); 1348*eec40579SJoe Thornber } 1349*eec40579SJoe Thornber 1350*eec40579SJoe Thornber static void start_worker(struct era *era) 1351*eec40579SJoe Thornber { 1352*eec40579SJoe Thornber atomic_set(&era->suspended, 0); 1353*eec40579SJoe Thornber } 1354*eec40579SJoe Thornber 1355*eec40579SJoe Thornber static void stop_worker(struct era *era) 1356*eec40579SJoe Thornber { 1357*eec40579SJoe Thornber atomic_set(&era->suspended, 1); 1358*eec40579SJoe Thornber flush_workqueue(era->wq); 1359*eec40579SJoe Thornber } 1360*eec40579SJoe Thornber 1361*eec40579SJoe Thornber /*---------------------------------------------------------------- 1362*eec40579SJoe Thornber * Target methods 1363*eec40579SJoe Thornber *--------------------------------------------------------------*/ 1364*eec40579SJoe Thornber static int dev_is_congested(struct dm_dev *dev, int bdi_bits) 1365*eec40579SJoe Thornber { 1366*eec40579SJoe Thornber struct request_queue *q = bdev_get_queue(dev->bdev); 1367*eec40579SJoe Thornber return bdi_congested(&q->backing_dev_info, bdi_bits); 1368*eec40579SJoe Thornber } 1369*eec40579SJoe Thornber 1370*eec40579SJoe Thornber static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1371*eec40579SJoe Thornber { 1372*eec40579SJoe Thornber struct era *era = container_of(cb, struct era, callbacks); 1373*eec40579SJoe Thornber return dev_is_congested(era->origin_dev, bdi_bits); 1374*eec40579SJoe Thornber } 1375*eec40579SJoe Thornber 1376*eec40579SJoe Thornber static void era_destroy(struct era *era) 1377*eec40579SJoe Thornber { 1378*eec40579SJoe Thornber metadata_close(era->md); 1379*eec40579SJoe Thornber 1380*eec40579SJoe Thornber if (era->wq) 1381*eec40579SJoe Thornber destroy_workqueue(era->wq); 1382*eec40579SJoe Thornber 1383*eec40579SJoe Thornber if (era->origin_dev) 1384*eec40579SJoe Thornber dm_put_device(era->ti, era->origin_dev); 1385*eec40579SJoe Thornber 1386*eec40579SJoe Thornber if (era->metadata_dev) 1387*eec40579SJoe Thornber dm_put_device(era->ti, era->metadata_dev); 1388*eec40579SJoe Thornber 1389*eec40579SJoe Thornber kfree(era); 1390*eec40579SJoe Thornber } 1391*eec40579SJoe Thornber 1392*eec40579SJoe Thornber static dm_block_t calc_nr_blocks(struct era *era) 1393*eec40579SJoe Thornber { 1394*eec40579SJoe Thornber return dm_sector_div_up(era->ti->len, era->sectors_per_block); 1395*eec40579SJoe Thornber } 1396*eec40579SJoe Thornber 1397*eec40579SJoe Thornber static bool valid_block_size(dm_block_t block_size) 1398*eec40579SJoe Thornber { 1399*eec40579SJoe Thornber bool greater_than_zero = block_size > 0; 1400*eec40579SJoe Thornber bool multiple_of_min_block_size = (block_size & (MIN_BLOCK_SIZE - 1)) == 0; 1401*eec40579SJoe Thornber 1402*eec40579SJoe Thornber return greater_than_zero && multiple_of_min_block_size; 1403*eec40579SJoe Thornber } 1404*eec40579SJoe Thornber 1405*eec40579SJoe Thornber /* 1406*eec40579SJoe Thornber * <metadata dev> <data dev> <data block size (sectors)> 1407*eec40579SJoe Thornber */ 1408*eec40579SJoe Thornber static int era_ctr(struct dm_target *ti, unsigned argc, char **argv) 1409*eec40579SJoe Thornber { 1410*eec40579SJoe Thornber int r; 1411*eec40579SJoe Thornber char dummy; 1412*eec40579SJoe Thornber struct era *era; 1413*eec40579SJoe Thornber struct era_metadata *md; 1414*eec40579SJoe Thornber 1415*eec40579SJoe Thornber if (argc != 3) { 1416*eec40579SJoe Thornber ti->error = "Invalid argument count"; 1417*eec40579SJoe Thornber return -EINVAL; 1418*eec40579SJoe Thornber } 1419*eec40579SJoe Thornber 1420*eec40579SJoe Thornber era = kzalloc(sizeof(*era), GFP_KERNEL); 1421*eec40579SJoe Thornber if (!era) { 1422*eec40579SJoe Thornber ti->error = "Error allocating era structure"; 1423*eec40579SJoe Thornber return -ENOMEM; 1424*eec40579SJoe Thornber } 1425*eec40579SJoe Thornber 1426*eec40579SJoe Thornber era->ti = ti; 1427*eec40579SJoe Thornber 1428*eec40579SJoe Thornber r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &era->metadata_dev); 1429*eec40579SJoe Thornber if (r) { 1430*eec40579SJoe Thornber ti->error = "Error opening metadata device"; 1431*eec40579SJoe Thornber era_destroy(era); 1432*eec40579SJoe Thornber return -EINVAL; 1433*eec40579SJoe Thornber } 1434*eec40579SJoe Thornber 1435*eec40579SJoe Thornber r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &era->origin_dev); 1436*eec40579SJoe Thornber if (r) { 1437*eec40579SJoe Thornber ti->error = "Error opening data device"; 1438*eec40579SJoe Thornber era_destroy(era); 1439*eec40579SJoe Thornber return -EINVAL; 1440*eec40579SJoe Thornber } 1441*eec40579SJoe Thornber 1442*eec40579SJoe Thornber r = sscanf(argv[2], "%u%c", &era->sectors_per_block, &dummy); 1443*eec40579SJoe Thornber if (r != 1) { 1444*eec40579SJoe Thornber ti->error = "Error parsing block size"; 1445*eec40579SJoe Thornber era_destroy(era); 1446*eec40579SJoe Thornber return -EINVAL; 1447*eec40579SJoe Thornber } 1448*eec40579SJoe Thornber 1449*eec40579SJoe Thornber r = dm_set_target_max_io_len(ti, era->sectors_per_block); 1450*eec40579SJoe Thornber if (r) { 1451*eec40579SJoe Thornber ti->error = "could not set max io len"; 1452*eec40579SJoe Thornber era_destroy(era); 1453*eec40579SJoe Thornber return -EINVAL; 1454*eec40579SJoe Thornber } 1455*eec40579SJoe Thornber 1456*eec40579SJoe Thornber if (!valid_block_size(era->sectors_per_block)) { 1457*eec40579SJoe Thornber ti->error = "Invalid block size"; 1458*eec40579SJoe Thornber era_destroy(era); 1459*eec40579SJoe Thornber return -EINVAL; 1460*eec40579SJoe Thornber } 1461*eec40579SJoe Thornber if (era->sectors_per_block & (era->sectors_per_block - 1)) 1462*eec40579SJoe Thornber era->sectors_per_block_shift = -1; 1463*eec40579SJoe Thornber else 1464*eec40579SJoe Thornber era->sectors_per_block_shift = __ffs(era->sectors_per_block); 1465*eec40579SJoe Thornber 1466*eec40579SJoe Thornber md = metadata_open(era->metadata_dev->bdev, era->sectors_per_block, true); 1467*eec40579SJoe Thornber if (IS_ERR(md)) { 1468*eec40579SJoe Thornber ti->error = "Error reading metadata"; 1469*eec40579SJoe Thornber era_destroy(era); 1470*eec40579SJoe Thornber return PTR_ERR(md); 1471*eec40579SJoe Thornber } 1472*eec40579SJoe Thornber era->md = md; 1473*eec40579SJoe Thornber 1474*eec40579SJoe Thornber era->nr_blocks = calc_nr_blocks(era); 1475*eec40579SJoe Thornber 1476*eec40579SJoe Thornber r = metadata_resize(era->md, &era->nr_blocks); 1477*eec40579SJoe Thornber if (r) { 1478*eec40579SJoe Thornber ti->error = "couldn't resize metadata"; 1479*eec40579SJoe Thornber era_destroy(era); 1480*eec40579SJoe Thornber return -ENOMEM; 1481*eec40579SJoe Thornber } 1482*eec40579SJoe Thornber 1483*eec40579SJoe Thornber era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 1484*eec40579SJoe Thornber if (!era->wq) { 1485*eec40579SJoe Thornber ti->error = "could not create workqueue for metadata object"; 1486*eec40579SJoe Thornber era_destroy(era); 1487*eec40579SJoe Thornber return -ENOMEM; 1488*eec40579SJoe Thornber } 1489*eec40579SJoe Thornber INIT_WORK(&era->worker, do_work); 1490*eec40579SJoe Thornber 1491*eec40579SJoe Thornber spin_lock_init(&era->deferred_lock); 1492*eec40579SJoe Thornber bio_list_init(&era->deferred_bios); 1493*eec40579SJoe Thornber 1494*eec40579SJoe Thornber spin_lock_init(&era->rpc_lock); 1495*eec40579SJoe Thornber INIT_LIST_HEAD(&era->rpc_calls); 1496*eec40579SJoe Thornber 1497*eec40579SJoe Thornber ti->private = era; 1498*eec40579SJoe Thornber ti->num_flush_bios = 1; 1499*eec40579SJoe Thornber ti->flush_supported = true; 1500*eec40579SJoe Thornber 1501*eec40579SJoe Thornber ti->num_discard_bios = 1; 1502*eec40579SJoe Thornber ti->discards_supported = true; 1503*eec40579SJoe Thornber era->callbacks.congested_fn = era_is_congested; 1504*eec40579SJoe Thornber dm_table_add_target_callbacks(ti->table, &era->callbacks); 1505*eec40579SJoe Thornber 1506*eec40579SJoe Thornber return 0; 1507*eec40579SJoe Thornber } 1508*eec40579SJoe Thornber 1509*eec40579SJoe Thornber static void era_dtr(struct dm_target *ti) 1510*eec40579SJoe Thornber { 1511*eec40579SJoe Thornber era_destroy(ti->private); 1512*eec40579SJoe Thornber } 1513*eec40579SJoe Thornber 1514*eec40579SJoe Thornber static int era_map(struct dm_target *ti, struct bio *bio) 1515*eec40579SJoe Thornber { 1516*eec40579SJoe Thornber struct era *era = ti->private; 1517*eec40579SJoe Thornber dm_block_t block = get_block(era, bio); 1518*eec40579SJoe Thornber 1519*eec40579SJoe Thornber /* 1520*eec40579SJoe Thornber * All bios get remapped to the origin device. We do this now, but 1521*eec40579SJoe Thornber * it may not get issued until later. Depending on whether the 1522*eec40579SJoe Thornber * block is marked in this era. 1523*eec40579SJoe Thornber */ 1524*eec40579SJoe Thornber remap_to_origin(era, bio); 1525*eec40579SJoe Thornber 1526*eec40579SJoe Thornber /* 1527*eec40579SJoe Thornber * REQ_FLUSH bios carry no data, so we're not interested in them. 1528*eec40579SJoe Thornber */ 1529*eec40579SJoe Thornber if (!(bio->bi_rw & REQ_FLUSH) && 1530*eec40579SJoe Thornber (bio_data_dir(bio) == WRITE) && 1531*eec40579SJoe Thornber !metadata_current_marked(era->md, block)) { 1532*eec40579SJoe Thornber defer_bio(era, bio); 1533*eec40579SJoe Thornber return DM_MAPIO_SUBMITTED; 1534*eec40579SJoe Thornber } 1535*eec40579SJoe Thornber 1536*eec40579SJoe Thornber return DM_MAPIO_REMAPPED; 1537*eec40579SJoe Thornber } 1538*eec40579SJoe Thornber 1539*eec40579SJoe Thornber static void era_postsuspend(struct dm_target *ti) 1540*eec40579SJoe Thornber { 1541*eec40579SJoe Thornber int r; 1542*eec40579SJoe Thornber struct era *era = ti->private; 1543*eec40579SJoe Thornber 1544*eec40579SJoe Thornber r = in_worker0(era, metadata_era_archive); 1545*eec40579SJoe Thornber if (r) { 1546*eec40579SJoe Thornber DMERR("%s: couldn't archive current era", __func__); 1547*eec40579SJoe Thornber /* FIXME: fail mode */ 1548*eec40579SJoe Thornber } 1549*eec40579SJoe Thornber 1550*eec40579SJoe Thornber stop_worker(era); 1551*eec40579SJoe Thornber } 1552*eec40579SJoe Thornber 1553*eec40579SJoe Thornber static int era_preresume(struct dm_target *ti) 1554*eec40579SJoe Thornber { 1555*eec40579SJoe Thornber int r; 1556*eec40579SJoe Thornber struct era *era = ti->private; 1557*eec40579SJoe Thornber dm_block_t new_size = calc_nr_blocks(era); 1558*eec40579SJoe Thornber 1559*eec40579SJoe Thornber if (era->nr_blocks != new_size) { 1560*eec40579SJoe Thornber r = in_worker1(era, metadata_resize, &new_size); 1561*eec40579SJoe Thornber if (r) 1562*eec40579SJoe Thornber return r; 1563*eec40579SJoe Thornber 1564*eec40579SJoe Thornber era->nr_blocks = new_size; 1565*eec40579SJoe Thornber } 1566*eec40579SJoe Thornber 1567*eec40579SJoe Thornber start_worker(era); 1568*eec40579SJoe Thornber 1569*eec40579SJoe Thornber r = in_worker0(era, metadata_new_era); 1570*eec40579SJoe Thornber if (r) { 1571*eec40579SJoe Thornber DMERR("%s: metadata_era_rollover failed", __func__); 1572*eec40579SJoe Thornber return r; 1573*eec40579SJoe Thornber } 1574*eec40579SJoe Thornber 1575*eec40579SJoe Thornber return 0; 1576*eec40579SJoe Thornber } 1577*eec40579SJoe Thornber 1578*eec40579SJoe Thornber /* 1579*eec40579SJoe Thornber * Status format: 1580*eec40579SJoe Thornber * 1581*eec40579SJoe Thornber * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 1582*eec40579SJoe Thornber * <current era> <held metadata root | '-'> 1583*eec40579SJoe Thornber */ 1584*eec40579SJoe Thornber static void era_status(struct dm_target *ti, status_type_t type, 1585*eec40579SJoe Thornber unsigned status_flags, char *result, unsigned maxlen) 1586*eec40579SJoe Thornber { 1587*eec40579SJoe Thornber int r; 1588*eec40579SJoe Thornber struct era *era = ti->private; 1589*eec40579SJoe Thornber ssize_t sz = 0; 1590*eec40579SJoe Thornber struct metadata_stats stats; 1591*eec40579SJoe Thornber char buf[BDEVNAME_SIZE]; 1592*eec40579SJoe Thornber 1593*eec40579SJoe Thornber switch (type) { 1594*eec40579SJoe Thornber case STATUSTYPE_INFO: 1595*eec40579SJoe Thornber r = in_worker1(era, metadata_get_stats, &stats); 1596*eec40579SJoe Thornber if (r) 1597*eec40579SJoe Thornber goto err; 1598*eec40579SJoe Thornber 1599*eec40579SJoe Thornber DMEMIT("%u %llu/%llu %u", 1600*eec40579SJoe Thornber (unsigned) (DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), 1601*eec40579SJoe Thornber (unsigned long long) stats.used, 1602*eec40579SJoe Thornber (unsigned long long) stats.total, 1603*eec40579SJoe Thornber (unsigned) stats.era); 1604*eec40579SJoe Thornber 1605*eec40579SJoe Thornber if (stats.snap != SUPERBLOCK_LOCATION) 1606*eec40579SJoe Thornber DMEMIT(" %llu", stats.snap); 1607*eec40579SJoe Thornber else 1608*eec40579SJoe Thornber DMEMIT(" -"); 1609*eec40579SJoe Thornber break; 1610*eec40579SJoe Thornber 1611*eec40579SJoe Thornber case STATUSTYPE_TABLE: 1612*eec40579SJoe Thornber format_dev_t(buf, era->metadata_dev->bdev->bd_dev); 1613*eec40579SJoe Thornber DMEMIT("%s ", buf); 1614*eec40579SJoe Thornber format_dev_t(buf, era->origin_dev->bdev->bd_dev); 1615*eec40579SJoe Thornber DMEMIT("%s %u", buf, era->sectors_per_block); 1616*eec40579SJoe Thornber break; 1617*eec40579SJoe Thornber } 1618*eec40579SJoe Thornber 1619*eec40579SJoe Thornber return; 1620*eec40579SJoe Thornber 1621*eec40579SJoe Thornber err: 1622*eec40579SJoe Thornber DMEMIT("Error"); 1623*eec40579SJoe Thornber } 1624*eec40579SJoe Thornber 1625*eec40579SJoe Thornber static int era_message(struct dm_target *ti, unsigned argc, char **argv) 1626*eec40579SJoe Thornber { 1627*eec40579SJoe Thornber struct era *era = ti->private; 1628*eec40579SJoe Thornber 1629*eec40579SJoe Thornber if (argc != 1) { 1630*eec40579SJoe Thornber DMERR("incorrect number of message arguments"); 1631*eec40579SJoe Thornber return -EINVAL; 1632*eec40579SJoe Thornber } 1633*eec40579SJoe Thornber 1634*eec40579SJoe Thornber if (!strcasecmp(argv[0], "checkpoint")) 1635*eec40579SJoe Thornber return in_worker0(era, metadata_checkpoint); 1636*eec40579SJoe Thornber 1637*eec40579SJoe Thornber if (!strcasecmp(argv[0], "take_metadata_snap")) 1638*eec40579SJoe Thornber return in_worker0(era, metadata_take_snap); 1639*eec40579SJoe Thornber 1640*eec40579SJoe Thornber if (!strcasecmp(argv[0], "drop_metadata_snap")) 1641*eec40579SJoe Thornber return in_worker0(era, metadata_drop_snap); 1642*eec40579SJoe Thornber 1643*eec40579SJoe Thornber DMERR("unsupported message '%s'", argv[0]); 1644*eec40579SJoe Thornber return -EINVAL; 1645*eec40579SJoe Thornber } 1646*eec40579SJoe Thornber 1647*eec40579SJoe Thornber static sector_t get_dev_size(struct dm_dev *dev) 1648*eec40579SJoe Thornber { 1649*eec40579SJoe Thornber return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 1650*eec40579SJoe Thornber } 1651*eec40579SJoe Thornber 1652*eec40579SJoe Thornber static int era_iterate_devices(struct dm_target *ti, 1653*eec40579SJoe Thornber iterate_devices_callout_fn fn, void *data) 1654*eec40579SJoe Thornber { 1655*eec40579SJoe Thornber struct era *era = ti->private; 1656*eec40579SJoe Thornber return fn(ti, era->origin_dev, 0, get_dev_size(era->origin_dev), data); 1657*eec40579SJoe Thornber } 1658*eec40579SJoe Thornber 1659*eec40579SJoe Thornber static int era_merge(struct dm_target *ti, struct bvec_merge_data *bvm, 1660*eec40579SJoe Thornber struct bio_vec *biovec, int max_size) 1661*eec40579SJoe Thornber { 1662*eec40579SJoe Thornber struct era *era = ti->private; 1663*eec40579SJoe Thornber struct request_queue *q = bdev_get_queue(era->origin_dev->bdev); 1664*eec40579SJoe Thornber 1665*eec40579SJoe Thornber if (!q->merge_bvec_fn) 1666*eec40579SJoe Thornber return max_size; 1667*eec40579SJoe Thornber 1668*eec40579SJoe Thornber bvm->bi_bdev = era->origin_dev->bdev; 1669*eec40579SJoe Thornber 1670*eec40579SJoe Thornber return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 1671*eec40579SJoe Thornber } 1672*eec40579SJoe Thornber 1673*eec40579SJoe Thornber static void era_io_hints(struct dm_target *ti, struct queue_limits *limits) 1674*eec40579SJoe Thornber { 1675*eec40579SJoe Thornber struct era *era = ti->private; 1676*eec40579SJoe Thornber uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 1677*eec40579SJoe Thornber 1678*eec40579SJoe Thornber /* 1679*eec40579SJoe Thornber * If the system-determined stacked limits are compatible with the 1680*eec40579SJoe Thornber * era device's blocksize (io_opt is a factor) do not override them. 1681*eec40579SJoe Thornber */ 1682*eec40579SJoe Thornber if (io_opt_sectors < era->sectors_per_block || 1683*eec40579SJoe Thornber do_div(io_opt_sectors, era->sectors_per_block)) { 1684*eec40579SJoe Thornber blk_limits_io_min(limits, 0); 1685*eec40579SJoe Thornber blk_limits_io_opt(limits, era->sectors_per_block << SECTOR_SHIFT); 1686*eec40579SJoe Thornber } 1687*eec40579SJoe Thornber } 1688*eec40579SJoe Thornber 1689*eec40579SJoe Thornber /*----------------------------------------------------------------*/ 1690*eec40579SJoe Thornber 1691*eec40579SJoe Thornber static struct target_type era_target = { 1692*eec40579SJoe Thornber .name = "era", 1693*eec40579SJoe Thornber .version = {1, 0, 0}, 1694*eec40579SJoe Thornber .module = THIS_MODULE, 1695*eec40579SJoe Thornber .ctr = era_ctr, 1696*eec40579SJoe Thornber .dtr = era_dtr, 1697*eec40579SJoe Thornber .map = era_map, 1698*eec40579SJoe Thornber .postsuspend = era_postsuspend, 1699*eec40579SJoe Thornber .preresume = era_preresume, 1700*eec40579SJoe Thornber .status = era_status, 1701*eec40579SJoe Thornber .message = era_message, 1702*eec40579SJoe Thornber .iterate_devices = era_iterate_devices, 1703*eec40579SJoe Thornber .merge = era_merge, 1704*eec40579SJoe Thornber .io_hints = era_io_hints 1705*eec40579SJoe Thornber }; 1706*eec40579SJoe Thornber 1707*eec40579SJoe Thornber static int __init dm_era_init(void) 1708*eec40579SJoe Thornber { 1709*eec40579SJoe Thornber int r; 1710*eec40579SJoe Thornber 1711*eec40579SJoe Thornber r = dm_register_target(&era_target); 1712*eec40579SJoe Thornber if (r) { 1713*eec40579SJoe Thornber DMERR("era target registration failed: %d", r); 1714*eec40579SJoe Thornber return r; 1715*eec40579SJoe Thornber } 1716*eec40579SJoe Thornber 1717*eec40579SJoe Thornber return 0; 1718*eec40579SJoe Thornber } 1719*eec40579SJoe Thornber 1720*eec40579SJoe Thornber static void __exit dm_era_exit(void) 1721*eec40579SJoe Thornber { 1722*eec40579SJoe Thornber dm_unregister_target(&era_target); 1723*eec40579SJoe Thornber } 1724*eec40579SJoe Thornber 1725*eec40579SJoe Thornber module_init(dm_era_init); 1726*eec40579SJoe Thornber module_exit(dm_era_exit); 1727*eec40579SJoe Thornber 1728*eec40579SJoe Thornber MODULE_DESCRIPTION(DM_NAME " era target"); 1729*eec40579SJoe Thornber MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 1730*eec40579SJoe Thornber MODULE_LICENSE("GPL"); 1731