1cd575ddfSKent Overstreet // SPDX-License-Identifier: GPL-2.0 2cd575ddfSKent Overstreet 3cd575ddfSKent Overstreet /* erasure coding */ 4cd575ddfSKent Overstreet 5cd575ddfSKent Overstreet #include "bcachefs.h" 6cd575ddfSKent Overstreet #include "alloc_foreground.h" 7cd575ddfSKent Overstreet #include "bset.h" 8cd575ddfSKent Overstreet #include "btree_gc.h" 9cd575ddfSKent Overstreet #include "btree_update.h" 10cd575ddfSKent Overstreet #include "buckets.h" 11cd575ddfSKent Overstreet #include "disk_groups.h" 12cd575ddfSKent Overstreet #include "ec.h" 13cd575ddfSKent Overstreet #include "error.h" 14cd575ddfSKent Overstreet #include "io.h" 1561c8d7c8SKent Overstreet #include "journal_io.h" 16cd575ddfSKent Overstreet #include "keylist.h" 17cd575ddfSKent Overstreet #include "super-io.h" 18cd575ddfSKent Overstreet #include "util.h" 19cd575ddfSKent Overstreet 20de5bb710SKent Overstreet #include <linux/sort.h> 21de5bb710SKent Overstreet 22de5bb710SKent Overstreet #ifdef __KERNEL__ 23de5bb710SKent Overstreet 24cd575ddfSKent Overstreet #include <linux/raid/pq.h> 25cd575ddfSKent Overstreet #include <linux/raid/xor.h> 26de5bb710SKent Overstreet 27de5bb710SKent Overstreet static void raid5_recov(unsigned disks, unsigned failed_idx, 28de5bb710SKent Overstreet size_t size, void **data) 29de5bb710SKent Overstreet { 30de5bb710SKent Overstreet unsigned i = 2, nr; 31de5bb710SKent Overstreet 32de5bb710SKent Overstreet BUG_ON(failed_idx >= disks); 33de5bb710SKent Overstreet 34de5bb710SKent Overstreet swap(data[0], data[failed_idx]); 35de5bb710SKent Overstreet memcpy(data[0], data[1], size); 36de5bb710SKent Overstreet 37de5bb710SKent Overstreet while (i < disks) { 38de5bb710SKent Overstreet nr = min_t(unsigned, disks - i, MAX_XOR_BLOCKS); 39de5bb710SKent Overstreet xor_blocks(nr, size, data[0], data + i); 40de5bb710SKent Overstreet i += nr; 41de5bb710SKent Overstreet } 42de5bb710SKent Overstreet 43de5bb710SKent Overstreet swap(data[0], data[failed_idx]); 44de5bb710SKent Overstreet } 45de5bb710SKent Overstreet 46de5bb710SKent Overstreet static void raid_gen(int nd, int np, size_t size, void **v) 47de5bb710SKent Overstreet { 48de5bb710SKent Overstreet if (np >= 1) 49de5bb710SKent Overstreet raid5_recov(nd + np, nd, size, v); 50de5bb710SKent Overstreet if (np >= 2) 51de5bb710SKent Overstreet raid6_call.gen_syndrome(nd + np, size, v); 52de5bb710SKent Overstreet BUG_ON(np > 2); 53de5bb710SKent Overstreet } 54de5bb710SKent Overstreet 55de5bb710SKent Overstreet static void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v) 56de5bb710SKent Overstreet { 57de5bb710SKent Overstreet switch (nr) { 58de5bb710SKent Overstreet case 0: 59de5bb710SKent Overstreet break; 60de5bb710SKent Overstreet case 1: 61de5bb710SKent Overstreet if (ir[0] < nd + 1) 62de5bb710SKent Overstreet raid5_recov(nd + 1, ir[0], size, v); 63de5bb710SKent Overstreet else 64de5bb710SKent Overstreet raid6_call.gen_syndrome(nd + np, size, v); 65de5bb710SKent Overstreet break; 66de5bb710SKent Overstreet case 2: 67de5bb710SKent Overstreet if (ir[1] < nd) { 68de5bb710SKent Overstreet /* data+data failure. */ 69de5bb710SKent Overstreet raid6_2data_recov(nd + np, size, ir[0], ir[1], v); 70de5bb710SKent Overstreet } else if (ir[0] < nd) { 71de5bb710SKent Overstreet /* data + p/q failure */ 72de5bb710SKent Overstreet 73de5bb710SKent Overstreet if (ir[1] == nd) /* data + p failure */ 74de5bb710SKent Overstreet raid6_datap_recov(nd + np, size, ir[0], v); 75de5bb710SKent Overstreet else { /* data + q failure */ 76de5bb710SKent Overstreet raid5_recov(nd + 1, ir[0], size, v); 77de5bb710SKent Overstreet raid6_call.gen_syndrome(nd + np, size, v); 78de5bb710SKent Overstreet } 79de5bb710SKent Overstreet } else { 80de5bb710SKent Overstreet raid_gen(nd, np, size, v); 81de5bb710SKent Overstreet } 82de5bb710SKent Overstreet break; 83de5bb710SKent Overstreet default: 84de5bb710SKent Overstreet BUG(); 85de5bb710SKent Overstreet } 86de5bb710SKent Overstreet } 87de5bb710SKent Overstreet 88de5bb710SKent Overstreet #else 89de5bb710SKent Overstreet 90de5bb710SKent Overstreet #include <raid/raid.h> 91de5bb710SKent Overstreet 92de5bb710SKent Overstreet #endif 93cd575ddfSKent Overstreet 94cd575ddfSKent Overstreet struct ec_bio { 95cd575ddfSKent Overstreet struct bch_dev *ca; 96cd575ddfSKent Overstreet struct ec_stripe_buf *buf; 97cd575ddfSKent Overstreet size_t idx; 98cd575ddfSKent Overstreet struct bio bio; 99cd575ddfSKent Overstreet }; 100cd575ddfSKent Overstreet 101cd575ddfSKent Overstreet /* Stripes btree keys: */ 102cd575ddfSKent Overstreet 10326609b61SKent Overstreet const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k) 104cd575ddfSKent Overstreet { 10526609b61SKent Overstreet const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; 10626609b61SKent Overstreet 107cd575ddfSKent Overstreet if (k.k->p.inode) 108cd575ddfSKent Overstreet return "invalid stripe key"; 109cd575ddfSKent Overstreet 110cd575ddfSKent Overstreet if (bkey_val_bytes(k.k) < sizeof(*s)) 111cd575ddfSKent Overstreet return "incorrect value size"; 112cd575ddfSKent Overstreet 11376640280SKent Overstreet if (bkey_val_bytes(k.k) < sizeof(*s) || 11476640280SKent Overstreet bkey_val_u64s(k.k) < stripe_val_u64s(s)) 115cd575ddfSKent Overstreet return "incorrect value size"; 116cd575ddfSKent Overstreet 117cd575ddfSKent Overstreet return NULL; 118cd575ddfSKent Overstreet } 119cd575ddfSKent Overstreet 12026609b61SKent Overstreet void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, 121cd575ddfSKent Overstreet struct bkey_s_c k) 122cd575ddfSKent Overstreet { 123cd575ddfSKent Overstreet const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; 124cd575ddfSKent Overstreet unsigned i; 125cd575ddfSKent Overstreet 126cd575ddfSKent Overstreet pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", 127cd575ddfSKent Overstreet s->algorithm, 128cd575ddfSKent Overstreet le16_to_cpu(s->sectors), 129cd575ddfSKent Overstreet s->nr_blocks - s->nr_redundant, 130cd575ddfSKent Overstreet s->nr_redundant, 131cd575ddfSKent Overstreet s->csum_type, 132cd575ddfSKent Overstreet 1U << s->csum_granularity_bits); 133cd575ddfSKent Overstreet 134cd575ddfSKent Overstreet for (i = 0; i < s->nr_blocks; i++) 13561c8d7c8SKent Overstreet pr_buf(out, " %u:%llu:%u", s->ptrs[i].dev, 13661c8d7c8SKent Overstreet (u64) s->ptrs[i].offset, 13761c8d7c8SKent Overstreet stripe_blockcount_get(s, i)); 138cd575ddfSKent Overstreet } 139cd575ddfSKent Overstreet 140cd575ddfSKent Overstreet static int ptr_matches_stripe(struct bch_fs *c, 141cd575ddfSKent Overstreet struct bch_stripe *v, 142cd575ddfSKent Overstreet const struct bch_extent_ptr *ptr) 143cd575ddfSKent Overstreet { 144cd575ddfSKent Overstreet unsigned i; 145cd575ddfSKent Overstreet 146cd575ddfSKent Overstreet for (i = 0; i < v->nr_blocks - v->nr_redundant; i++) { 147cd575ddfSKent Overstreet const struct bch_extent_ptr *ptr2 = v->ptrs + i; 148cd575ddfSKent Overstreet 149cd575ddfSKent Overstreet if (ptr->dev == ptr2->dev && 150cd575ddfSKent Overstreet ptr->gen == ptr2->gen && 151cd575ddfSKent Overstreet ptr->offset >= ptr2->offset && 152cd575ddfSKent Overstreet ptr->offset < ptr2->offset + le16_to_cpu(v->sectors)) 153cd575ddfSKent Overstreet return i; 154cd575ddfSKent Overstreet } 155cd575ddfSKent Overstreet 156cd575ddfSKent Overstreet return -1; 157cd575ddfSKent Overstreet } 158cd575ddfSKent Overstreet 159cd575ddfSKent Overstreet static int extent_matches_stripe(struct bch_fs *c, 160cd575ddfSKent Overstreet struct bch_stripe *v, 161cd575ddfSKent Overstreet struct bkey_s_c k) 162cd575ddfSKent Overstreet { 163cd575ddfSKent Overstreet struct bkey_s_c_extent e; 164cd575ddfSKent Overstreet const struct bch_extent_ptr *ptr; 165cd575ddfSKent Overstreet int idx; 166cd575ddfSKent Overstreet 167cd575ddfSKent Overstreet if (!bkey_extent_is_data(k.k)) 168cd575ddfSKent Overstreet return -1; 169cd575ddfSKent Overstreet 170cd575ddfSKent Overstreet e = bkey_s_c_to_extent(k); 171cd575ddfSKent Overstreet 172cd575ddfSKent Overstreet extent_for_each_ptr(e, ptr) { 173cd575ddfSKent Overstreet idx = ptr_matches_stripe(c, v, ptr); 174cd575ddfSKent Overstreet if (idx >= 0) 175cd575ddfSKent Overstreet return idx; 176cd575ddfSKent Overstreet } 177cd575ddfSKent Overstreet 178cd575ddfSKent Overstreet return -1; 179cd575ddfSKent Overstreet } 180cd575ddfSKent Overstreet 181cd575ddfSKent Overstreet static void ec_stripe_key_init(struct bch_fs *c, 182cd575ddfSKent Overstreet struct bkey_i_stripe *s, 183cd575ddfSKent Overstreet struct open_buckets *blocks, 184cd575ddfSKent Overstreet struct open_buckets *parity, 185cd575ddfSKent Overstreet unsigned stripe_size) 186cd575ddfSKent Overstreet { 187cd575ddfSKent Overstreet struct open_bucket *ob; 188cd575ddfSKent Overstreet unsigned i, u64s; 189cd575ddfSKent Overstreet 190cd575ddfSKent Overstreet bkey_stripe_init(&s->k_i); 191cd575ddfSKent Overstreet s->v.sectors = cpu_to_le16(stripe_size); 192cd575ddfSKent Overstreet s->v.algorithm = 0; 193cd575ddfSKent Overstreet s->v.nr_blocks = parity->nr + blocks->nr; 194cd575ddfSKent Overstreet s->v.nr_redundant = parity->nr; 195cd575ddfSKent Overstreet s->v.csum_granularity_bits = ilog2(c->sb.encoded_extent_max); 196cd575ddfSKent Overstreet s->v.csum_type = BCH_CSUM_CRC32C; 197cd575ddfSKent Overstreet s->v.pad = 0; 198cd575ddfSKent Overstreet 199cd575ddfSKent Overstreet open_bucket_for_each(c, blocks, ob, i) 200cd575ddfSKent Overstreet s->v.ptrs[i] = ob->ptr; 201cd575ddfSKent Overstreet 202cd575ddfSKent Overstreet open_bucket_for_each(c, parity, ob, i) 203cd575ddfSKent Overstreet s->v.ptrs[blocks->nr + i] = ob->ptr; 204cd575ddfSKent Overstreet 205cd575ddfSKent Overstreet while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) { 206cd575ddfSKent Overstreet BUG_ON(1 << s->v.csum_granularity_bits >= 207cd575ddfSKent Overstreet le16_to_cpu(s->v.sectors) || 208cd575ddfSKent Overstreet s->v.csum_granularity_bits == U8_MAX); 209cd575ddfSKent Overstreet s->v.csum_granularity_bits++; 210cd575ddfSKent Overstreet } 211cd575ddfSKent Overstreet 212cd575ddfSKent Overstreet set_bkey_val_u64s(&s->k, u64s); 213cd575ddfSKent Overstreet } 214cd575ddfSKent Overstreet 215cd575ddfSKent Overstreet /* Checksumming: */ 216cd575ddfSKent Overstreet 217cd575ddfSKent Overstreet static void ec_generate_checksums(struct ec_stripe_buf *buf) 218cd575ddfSKent Overstreet { 219cd575ddfSKent Overstreet struct bch_stripe *v = &buf->key.v; 220cd575ddfSKent Overstreet unsigned csum_granularity = 1 << v->csum_granularity_bits; 221cd575ddfSKent Overstreet unsigned csums_per_device = stripe_csums_per_device(v); 222cd575ddfSKent Overstreet unsigned csum_bytes = bch_crc_bytes[v->csum_type]; 223cd575ddfSKent Overstreet unsigned i, j; 224cd575ddfSKent Overstreet 225cd575ddfSKent Overstreet if (!csum_bytes) 226cd575ddfSKent Overstreet return; 227cd575ddfSKent Overstreet 228cd575ddfSKent Overstreet BUG_ON(buf->offset); 229cd575ddfSKent Overstreet BUG_ON(buf->size != le16_to_cpu(v->sectors)); 230cd575ddfSKent Overstreet 231cd575ddfSKent Overstreet for (i = 0; i < v->nr_blocks; i++) { 232cd575ddfSKent Overstreet for (j = 0; j < csums_per_device; j++) { 233cd575ddfSKent Overstreet unsigned offset = j << v->csum_granularity_bits; 234cd575ddfSKent Overstreet unsigned len = min(csum_granularity, buf->size - offset); 235cd575ddfSKent Overstreet 236cd575ddfSKent Overstreet struct bch_csum csum = 237cd575ddfSKent Overstreet bch2_checksum(NULL, v->csum_type, 238cd575ddfSKent Overstreet null_nonce(), 239cd575ddfSKent Overstreet buf->data[i] + (offset << 9), 240cd575ddfSKent Overstreet len << 9); 241cd575ddfSKent Overstreet 242cd575ddfSKent Overstreet memcpy(stripe_csum(v, i, j), &csum, csum_bytes); 243cd575ddfSKent Overstreet } 244cd575ddfSKent Overstreet } 245cd575ddfSKent Overstreet } 246cd575ddfSKent Overstreet 247cd575ddfSKent Overstreet static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) 248cd575ddfSKent Overstreet { 249cd575ddfSKent Overstreet struct bch_stripe *v = &buf->key.v; 250cd575ddfSKent Overstreet unsigned csum_granularity = 1 << v->csum_granularity_bits; 251cd575ddfSKent Overstreet unsigned csum_bytes = bch_crc_bytes[v->csum_type]; 252cd575ddfSKent Overstreet unsigned i; 253cd575ddfSKent Overstreet 254cd575ddfSKent Overstreet if (!csum_bytes) 255cd575ddfSKent Overstreet return; 256cd575ddfSKent Overstreet 257cd575ddfSKent Overstreet for (i = 0; i < v->nr_blocks; i++) { 258cd575ddfSKent Overstreet unsigned offset = buf->offset; 259cd575ddfSKent Overstreet unsigned end = buf->offset + buf->size; 260cd575ddfSKent Overstreet 261cd575ddfSKent Overstreet if (!test_bit(i, buf->valid)) 262cd575ddfSKent Overstreet continue; 263cd575ddfSKent Overstreet 264cd575ddfSKent Overstreet while (offset < end) { 265cd575ddfSKent Overstreet unsigned j = offset >> v->csum_granularity_bits; 266cd575ddfSKent Overstreet unsigned len = min(csum_granularity, end - offset); 267cd575ddfSKent Overstreet struct bch_csum csum; 268cd575ddfSKent Overstreet 269cd575ddfSKent Overstreet BUG_ON(offset & (csum_granularity - 1)); 270cd575ddfSKent Overstreet BUG_ON(offset + len != le16_to_cpu(v->sectors) && 271cd575ddfSKent Overstreet ((offset + len) & (csum_granularity - 1))); 272cd575ddfSKent Overstreet 273cd575ddfSKent Overstreet csum = bch2_checksum(NULL, v->csum_type, 274cd575ddfSKent Overstreet null_nonce(), 275cd575ddfSKent Overstreet buf->data[i] + ((offset - buf->offset) << 9), 276cd575ddfSKent Overstreet len << 9); 277cd575ddfSKent Overstreet 278cd575ddfSKent Overstreet if (memcmp(stripe_csum(v, i, j), &csum, csum_bytes)) { 279cd575ddfSKent Overstreet __bcache_io_error(c, 280cd575ddfSKent Overstreet "checksum error while doing reconstruct read (%u:%u)", 281cd575ddfSKent Overstreet i, j); 282cd575ddfSKent Overstreet clear_bit(i, buf->valid); 283cd575ddfSKent Overstreet break; 284cd575ddfSKent Overstreet } 285cd575ddfSKent Overstreet 286cd575ddfSKent Overstreet offset += len; 287cd575ddfSKent Overstreet } 288cd575ddfSKent Overstreet } 289cd575ddfSKent Overstreet } 290cd575ddfSKent Overstreet 291cd575ddfSKent Overstreet /* Erasure coding: */ 292cd575ddfSKent Overstreet 293cd575ddfSKent Overstreet static void ec_generate_ec(struct ec_stripe_buf *buf) 294cd575ddfSKent Overstreet { 295cd575ddfSKent Overstreet struct bch_stripe *v = &buf->key.v; 296cd575ddfSKent Overstreet unsigned nr_data = v->nr_blocks - v->nr_redundant; 297cd575ddfSKent Overstreet unsigned bytes = le16_to_cpu(v->sectors) << 9; 298cd575ddfSKent Overstreet 299de5bb710SKent Overstreet raid_gen(nr_data, v->nr_redundant, bytes, buf->data); 300cd575ddfSKent Overstreet } 301cd575ddfSKent Overstreet 302cd575ddfSKent Overstreet static unsigned __ec_nr_failed(struct ec_stripe_buf *buf, unsigned nr) 303cd575ddfSKent Overstreet { 304cd575ddfSKent Overstreet return nr - bitmap_weight(buf->valid, nr); 305cd575ddfSKent Overstreet } 306cd575ddfSKent Overstreet 307cd575ddfSKent Overstreet static unsigned ec_nr_failed(struct ec_stripe_buf *buf) 308cd575ddfSKent Overstreet { 309cd575ddfSKent Overstreet return __ec_nr_failed(buf, buf->key.v.nr_blocks); 310cd575ddfSKent Overstreet } 311cd575ddfSKent Overstreet 312cd575ddfSKent Overstreet static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf) 313cd575ddfSKent Overstreet { 314cd575ddfSKent Overstreet struct bch_stripe *v = &buf->key.v; 315cd575ddfSKent Overstreet unsigned i, failed[EC_STRIPE_MAX], nr_failed = 0; 316cd575ddfSKent Overstreet unsigned nr_data = v->nr_blocks - v->nr_redundant; 317cd575ddfSKent Overstreet unsigned bytes = buf->size << 9; 318cd575ddfSKent Overstreet 319cd575ddfSKent Overstreet if (ec_nr_failed(buf) > v->nr_redundant) { 320cd575ddfSKent Overstreet __bcache_io_error(c, 321cd575ddfSKent Overstreet "error doing reconstruct read: unable to read enough blocks"); 322cd575ddfSKent Overstreet return -1; 323cd575ddfSKent Overstreet } 324cd575ddfSKent Overstreet 325cd575ddfSKent Overstreet for (i = 0; i < nr_data; i++) 326cd575ddfSKent Overstreet if (!test_bit(i, buf->valid)) 327cd575ddfSKent Overstreet failed[nr_failed++] = i; 328cd575ddfSKent Overstreet 329de5bb710SKent Overstreet raid_rec(nr_failed, failed, nr_data, v->nr_redundant, bytes, buf->data); 330cd575ddfSKent Overstreet return 0; 331cd575ddfSKent Overstreet } 332cd575ddfSKent Overstreet 333cd575ddfSKent Overstreet /* IO: */ 334cd575ddfSKent Overstreet 335cd575ddfSKent Overstreet static void ec_block_endio(struct bio *bio) 336cd575ddfSKent Overstreet { 337cd575ddfSKent Overstreet struct ec_bio *ec_bio = container_of(bio, struct ec_bio, bio); 338cd575ddfSKent Overstreet struct bch_dev *ca = ec_bio->ca; 339cd575ddfSKent Overstreet struct closure *cl = bio->bi_private; 340cd575ddfSKent Overstreet 341cd575ddfSKent Overstreet if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding")) 342cd575ddfSKent Overstreet clear_bit(ec_bio->idx, ec_bio->buf->valid); 343cd575ddfSKent Overstreet 344cd575ddfSKent Overstreet bio_put(&ec_bio->bio); 345cd575ddfSKent Overstreet percpu_ref_put(&ca->io_ref); 346cd575ddfSKent Overstreet closure_put(cl); 347cd575ddfSKent Overstreet } 348cd575ddfSKent Overstreet 349cd575ddfSKent Overstreet static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, 350cd575ddfSKent Overstreet unsigned rw, unsigned idx, struct closure *cl) 351cd575ddfSKent Overstreet { 352cd575ddfSKent Overstreet struct bch_stripe *v = &buf->key.v; 353cd575ddfSKent Overstreet unsigned offset = 0, bytes = buf->size << 9; 354cd575ddfSKent Overstreet struct bch_extent_ptr *ptr = &v->ptrs[idx]; 355cd575ddfSKent Overstreet struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); 356cd575ddfSKent Overstreet 357cd575ddfSKent Overstreet if (!bch2_dev_get_ioref(ca, rw)) { 358cd575ddfSKent Overstreet clear_bit(idx, buf->valid); 359cd575ddfSKent Overstreet return; 360cd575ddfSKent Overstreet } 361cd575ddfSKent Overstreet 362cd575ddfSKent Overstreet while (offset < bytes) { 363cd575ddfSKent Overstreet unsigned nr_iovecs = min_t(size_t, BIO_MAX_VECS, 364cd575ddfSKent Overstreet DIV_ROUND_UP(bytes, PAGE_SIZE)); 365cd575ddfSKent Overstreet unsigned b = min_t(size_t, bytes - offset, 366cd575ddfSKent Overstreet nr_iovecs << PAGE_SHIFT); 367cd575ddfSKent Overstreet struct ec_bio *ec_bio; 368cd575ddfSKent Overstreet 369cd575ddfSKent Overstreet ec_bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 370cd575ddfSKent Overstreet nr_iovecs, 371cd575ddfSKent Overstreet rw, 372cd575ddfSKent Overstreet GFP_KERNEL, 373cd575ddfSKent Overstreet &c->ec_bioset), 374cd575ddfSKent Overstreet struct ec_bio, bio); 375cd575ddfSKent Overstreet 376cd575ddfSKent Overstreet ec_bio->ca = ca; 377cd575ddfSKent Overstreet ec_bio->buf = buf; 378cd575ddfSKent Overstreet ec_bio->idx = idx; 379cd575ddfSKent Overstreet 380cd575ddfSKent Overstreet ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9); 381cd575ddfSKent Overstreet ec_bio->bio.bi_iter.bi_size = b; 382cd575ddfSKent Overstreet ec_bio->bio.bi_end_io = ec_block_endio; 383cd575ddfSKent Overstreet ec_bio->bio.bi_private = cl; 384cd575ddfSKent Overstreet 385cd575ddfSKent Overstreet bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset); 386cd575ddfSKent Overstreet 387cd575ddfSKent Overstreet closure_get(cl); 388cd575ddfSKent Overstreet percpu_ref_get(&ca->io_ref); 389cd575ddfSKent Overstreet 390cd575ddfSKent Overstreet submit_bio(&ec_bio->bio); 391cd575ddfSKent Overstreet 392cd575ddfSKent Overstreet offset += b; 393cd575ddfSKent Overstreet } 394cd575ddfSKent Overstreet 395cd575ddfSKent Overstreet percpu_ref_put(&ca->io_ref); 396cd575ddfSKent Overstreet } 397cd575ddfSKent Overstreet 398cd575ddfSKent Overstreet /* recovery read path: */ 399cd575ddfSKent Overstreet int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) 400cd575ddfSKent Overstreet { 401cd575ddfSKent Overstreet struct btree_iter iter; 402cd575ddfSKent Overstreet struct ec_stripe_buf *buf; 403cd575ddfSKent Overstreet struct closure cl; 404cd575ddfSKent Overstreet struct bkey_s_c k; 405cd575ddfSKent Overstreet struct bch_stripe *v; 406cd575ddfSKent Overstreet unsigned stripe_idx; 407cd575ddfSKent Overstreet unsigned offset, end; 408cd575ddfSKent Overstreet unsigned i, nr_data, csum_granularity; 409cd575ddfSKent Overstreet int ret = 0, idx; 410cd575ddfSKent Overstreet 411cd575ddfSKent Overstreet closure_init_stack(&cl); 412cd575ddfSKent Overstreet 413cd575ddfSKent Overstreet BUG_ON(!rbio->pick.idx || 414cd575ddfSKent Overstreet rbio->pick.idx - 1 >= rbio->pick.ec_nr); 415cd575ddfSKent Overstreet 416cd575ddfSKent Overstreet stripe_idx = rbio->pick.ec[rbio->pick.idx - 1].idx; 417cd575ddfSKent Overstreet 418cd575ddfSKent Overstreet buf = kzalloc(sizeof(*buf), GFP_NOIO); 419cd575ddfSKent Overstreet if (!buf) 420cd575ddfSKent Overstreet return -ENOMEM; 421cd575ddfSKent Overstreet 422cd575ddfSKent Overstreet bch2_btree_iter_init(&iter, c, BTREE_ID_EC, 423cd575ddfSKent Overstreet POS(0, stripe_idx), 424cd575ddfSKent Overstreet BTREE_ITER_SLOTS); 425cd575ddfSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 42626609b61SKent Overstreet if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) { 427cd575ddfSKent Overstreet __bcache_io_error(c, 428cd575ddfSKent Overstreet "error doing reconstruct read: stripe not found"); 429cd575ddfSKent Overstreet kfree(buf); 430cd575ddfSKent Overstreet return bch2_btree_iter_unlock(&iter) ?: -EIO; 431cd575ddfSKent Overstreet } 432cd575ddfSKent Overstreet 433cd575ddfSKent Overstreet bkey_reassemble(&buf->key.k_i, k); 434cd575ddfSKent Overstreet bch2_btree_iter_unlock(&iter); 435cd575ddfSKent Overstreet 436cd575ddfSKent Overstreet v = &buf->key.v; 437cd575ddfSKent Overstreet 438cd575ddfSKent Overstreet nr_data = v->nr_blocks - v->nr_redundant; 439cd575ddfSKent Overstreet 440cd575ddfSKent Overstreet idx = ptr_matches_stripe(c, v, &rbio->pick.ptr); 441cd575ddfSKent Overstreet BUG_ON(idx < 0); 442cd575ddfSKent Overstreet 443cd575ddfSKent Overstreet csum_granularity = 1U << v->csum_granularity_bits; 444cd575ddfSKent Overstreet 445cd575ddfSKent Overstreet offset = rbio->bio.bi_iter.bi_sector - v->ptrs[idx].offset; 446cd575ddfSKent Overstreet end = offset + bio_sectors(&rbio->bio); 447cd575ddfSKent Overstreet 448cd575ddfSKent Overstreet BUG_ON(end > le16_to_cpu(v->sectors)); 449cd575ddfSKent Overstreet 450cd575ddfSKent Overstreet buf->offset = round_down(offset, csum_granularity); 451cd575ddfSKent Overstreet buf->size = min_t(unsigned, le16_to_cpu(v->sectors), 452cd575ddfSKent Overstreet round_up(end, csum_granularity)) - buf->offset; 453cd575ddfSKent Overstreet 454cd575ddfSKent Overstreet for (i = 0; i < v->nr_blocks; i++) { 455cd575ddfSKent Overstreet buf->data[i] = kmalloc(buf->size << 9, GFP_NOIO); 456cd575ddfSKent Overstreet if (!buf->data[i]) { 457cd575ddfSKent Overstreet ret = -ENOMEM; 458cd575ddfSKent Overstreet goto err; 459cd575ddfSKent Overstreet } 460cd575ddfSKent Overstreet } 461cd575ddfSKent Overstreet 462cd575ddfSKent Overstreet memset(buf->valid, 0xFF, sizeof(buf->valid)); 463cd575ddfSKent Overstreet 464cd575ddfSKent Overstreet for (i = 0; i < v->nr_blocks; i++) { 465cd575ddfSKent Overstreet struct bch_extent_ptr *ptr = v->ptrs + i; 466cd575ddfSKent Overstreet struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); 467cd575ddfSKent Overstreet 468cd575ddfSKent Overstreet if (ptr_stale(ca, ptr)) { 469cd575ddfSKent Overstreet __bcache_io_error(c, 470cd575ddfSKent Overstreet "error doing reconstruct read: stale pointer"); 471cd575ddfSKent Overstreet clear_bit(i, buf->valid); 472cd575ddfSKent Overstreet continue; 473cd575ddfSKent Overstreet } 474cd575ddfSKent Overstreet 475cd575ddfSKent Overstreet ec_block_io(c, buf, REQ_OP_READ, i, &cl); 476cd575ddfSKent Overstreet } 477cd575ddfSKent Overstreet 478cd575ddfSKent Overstreet closure_sync(&cl); 479cd575ddfSKent Overstreet 480cd575ddfSKent Overstreet if (ec_nr_failed(buf) > v->nr_redundant) { 481cd575ddfSKent Overstreet __bcache_io_error(c, 482cd575ddfSKent Overstreet "error doing reconstruct read: unable to read enough blocks"); 483cd575ddfSKent Overstreet ret = -EIO; 484cd575ddfSKent Overstreet goto err; 485cd575ddfSKent Overstreet } 486cd575ddfSKent Overstreet 487cd575ddfSKent Overstreet ec_validate_checksums(c, buf); 488cd575ddfSKent Overstreet 489cd575ddfSKent Overstreet ret = ec_do_recov(c, buf); 490cd575ddfSKent Overstreet if (ret) 491cd575ddfSKent Overstreet goto err; 492cd575ddfSKent Overstreet 493cd575ddfSKent Overstreet memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter, 494cd575ddfSKent Overstreet buf->data[idx] + ((offset - buf->offset) << 9)); 495cd575ddfSKent Overstreet err: 496cd575ddfSKent Overstreet for (i = 0; i < v->nr_blocks; i++) 497cd575ddfSKent Overstreet kfree(buf->data[i]); 498cd575ddfSKent Overstreet kfree(buf); 499cd575ddfSKent Overstreet return ret; 500cd575ddfSKent Overstreet } 501cd575ddfSKent Overstreet 502dfe9bfb3SKent Overstreet /* stripe bucket accounting: */ 503cd575ddfSKent Overstreet 504cd575ddfSKent Overstreet static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) 505cd575ddfSKent Overstreet { 506cd575ddfSKent Overstreet ec_stripes_heap n, *h = &c->ec_stripes_heap; 507cd575ddfSKent Overstreet 508cd575ddfSKent Overstreet if (idx >= h->size) { 509cd575ddfSKent Overstreet if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp)) 510cd575ddfSKent Overstreet return -ENOMEM; 511cd575ddfSKent Overstreet 512cd575ddfSKent Overstreet spin_lock(&c->ec_stripes_heap_lock); 513cd575ddfSKent Overstreet if (n.size > h->size) { 514cd575ddfSKent Overstreet memcpy(n.data, h->data, h->used * sizeof(h->data[0])); 515cd575ddfSKent Overstreet n.used = h->used; 516cd575ddfSKent Overstreet swap(*h, n); 517cd575ddfSKent Overstreet } 518cd575ddfSKent Overstreet spin_unlock(&c->ec_stripes_heap_lock); 519cd575ddfSKent Overstreet 520cd575ddfSKent Overstreet free_heap(&n); 521cd575ddfSKent Overstreet } 522cd575ddfSKent Overstreet 523dfe9bfb3SKent Overstreet if (!genradix_ptr_alloc(&c->stripes[0], idx, gfp)) 524dfe9bfb3SKent Overstreet return -ENOMEM; 525dfe9bfb3SKent Overstreet 526dfe9bfb3SKent Overstreet if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING && 527dfe9bfb3SKent Overstreet !genradix_ptr_alloc(&c->stripes[1], idx, gfp)) 528cd575ddfSKent Overstreet return -ENOMEM; 529cd575ddfSKent Overstreet 530cd575ddfSKent Overstreet return 0; 531cd575ddfSKent Overstreet } 532cd575ddfSKent Overstreet 533cd575ddfSKent Overstreet static int ec_stripe_mem_alloc(struct bch_fs *c, 534cd575ddfSKent Overstreet struct btree_iter *iter) 535cd575ddfSKent Overstreet { 536cd575ddfSKent Overstreet size_t idx = iter->pos.offset; 537cd575ddfSKent Overstreet 538cd575ddfSKent Overstreet if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT|__GFP_NOWARN)) 539cd575ddfSKent Overstreet return 0; 540cd575ddfSKent Overstreet 541cd575ddfSKent Overstreet bch2_btree_iter_unlock(iter); 542cd575ddfSKent Overstreet 543cd575ddfSKent Overstreet if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL)) 544cd575ddfSKent Overstreet return -EINTR; 545cd575ddfSKent Overstreet return -ENOMEM; 546cd575ddfSKent Overstreet } 547cd575ddfSKent Overstreet 548cd575ddfSKent Overstreet static ssize_t stripe_idx_to_delete(struct bch_fs *c) 549cd575ddfSKent Overstreet { 550cd575ddfSKent Overstreet ec_stripes_heap *h = &c->ec_stripes_heap; 551cd575ddfSKent Overstreet 552cd575ddfSKent Overstreet return h->data[0].blocks_nonempty == 0 ? h->data[0].idx : -1; 553cd575ddfSKent Overstreet } 554cd575ddfSKent Overstreet 555cd575ddfSKent Overstreet static inline int ec_stripes_heap_cmp(ec_stripes_heap *h, 556cd575ddfSKent Overstreet struct ec_stripe_heap_entry l, 557cd575ddfSKent Overstreet struct ec_stripe_heap_entry r) 558cd575ddfSKent Overstreet { 559cd575ddfSKent Overstreet return ((l.blocks_nonempty > r.blocks_nonempty) - 560cd575ddfSKent Overstreet (l.blocks_nonempty < r.blocks_nonempty)); 561cd575ddfSKent Overstreet } 562cd575ddfSKent Overstreet 563cd575ddfSKent Overstreet static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h, 564cd575ddfSKent Overstreet size_t i) 565cd575ddfSKent Overstreet { 566cd575ddfSKent Overstreet struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap); 567cd575ddfSKent Overstreet 568dfe9bfb3SKent Overstreet genradix_ptr(&c->stripes[0], h->data[i].idx)->heap_idx = i; 569cd575ddfSKent Overstreet } 570cd575ddfSKent Overstreet 571cd575ddfSKent Overstreet static void heap_verify_backpointer(struct bch_fs *c, size_t idx) 572cd575ddfSKent Overstreet { 573cd575ddfSKent Overstreet ec_stripes_heap *h = &c->ec_stripes_heap; 574dfe9bfb3SKent Overstreet struct stripe *m = genradix_ptr(&c->stripes[0], idx); 575cd575ddfSKent Overstreet 576cd575ddfSKent Overstreet BUG_ON(!m->alive); 577cd575ddfSKent Overstreet BUG_ON(m->heap_idx >= h->used); 578cd575ddfSKent Overstreet BUG_ON(h->data[m->heap_idx].idx != idx); 579cd575ddfSKent Overstreet } 580cd575ddfSKent Overstreet 581cd575ddfSKent Overstreet void bch2_stripes_heap_update(struct bch_fs *c, 582dfe9bfb3SKent Overstreet struct stripe *m, size_t idx) 583cd575ddfSKent Overstreet { 584cd575ddfSKent Overstreet ec_stripes_heap *h = &c->ec_stripes_heap; 585cd575ddfSKent Overstreet size_t i; 586cd575ddfSKent Overstreet 587cd575ddfSKent Overstreet heap_verify_backpointer(c, idx); 588cd575ddfSKent Overstreet 58961c8d7c8SKent Overstreet h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty; 590cd575ddfSKent Overstreet 591cd575ddfSKent Overstreet i = m->heap_idx; 592cd575ddfSKent Overstreet heap_sift_up(h, i, ec_stripes_heap_cmp, 593cd575ddfSKent Overstreet ec_stripes_heap_set_backpointer); 594cd575ddfSKent Overstreet heap_sift_down(h, i, ec_stripes_heap_cmp, 595cd575ddfSKent Overstreet ec_stripes_heap_set_backpointer); 596cd575ddfSKent Overstreet 597cd575ddfSKent Overstreet heap_verify_backpointer(c, idx); 598cd575ddfSKent Overstreet 59961c8d7c8SKent Overstreet if (stripe_idx_to_delete(c) >= 0) 600cd575ddfSKent Overstreet schedule_work(&c->ec_stripe_delete_work); 601cd575ddfSKent Overstreet } 602cd575ddfSKent Overstreet 603cd575ddfSKent Overstreet void bch2_stripes_heap_del(struct bch_fs *c, 604dfe9bfb3SKent Overstreet struct stripe *m, size_t idx) 605cd575ddfSKent Overstreet { 606cd575ddfSKent Overstreet heap_verify_backpointer(c, idx); 607cd575ddfSKent Overstreet 608cd575ddfSKent Overstreet m->alive = false; 609cd575ddfSKent Overstreet heap_del(&c->ec_stripes_heap, m->heap_idx, 610cd575ddfSKent Overstreet ec_stripes_heap_cmp, 611cd575ddfSKent Overstreet ec_stripes_heap_set_backpointer); 612cd575ddfSKent Overstreet } 613cd575ddfSKent Overstreet 614cd575ddfSKent Overstreet void bch2_stripes_heap_insert(struct bch_fs *c, 615dfe9bfb3SKent Overstreet struct stripe *m, size_t idx) 616cd575ddfSKent Overstreet { 617cd575ddfSKent Overstreet BUG_ON(heap_full(&c->ec_stripes_heap)); 618cd575ddfSKent Overstreet 619cd575ddfSKent Overstreet heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) { 620cd575ddfSKent Overstreet .idx = idx, 62161c8d7c8SKent Overstreet .blocks_nonempty = m->blocks_nonempty, 622cd575ddfSKent Overstreet }), 623cd575ddfSKent Overstreet ec_stripes_heap_cmp, 624cd575ddfSKent Overstreet ec_stripes_heap_set_backpointer); 625cd575ddfSKent Overstreet m->alive = true; 626cd575ddfSKent Overstreet 627cd575ddfSKent Overstreet heap_verify_backpointer(c, idx); 628cd575ddfSKent Overstreet } 629cd575ddfSKent Overstreet 630dfe9bfb3SKent Overstreet /* stripe deletion */ 631dfe9bfb3SKent Overstreet 632*0564b167SKent Overstreet static int ec_stripe_delete(struct bch_fs *c, size_t idx) 633cd575ddfSKent Overstreet { 634*0564b167SKent Overstreet return bch2_btree_delete_range(c, BTREE_ID_EC, 635cd575ddfSKent Overstreet POS(0, idx), 636*0564b167SKent Overstreet POS(0, idx + 1), 637*0564b167SKent Overstreet NULL); 638cd575ddfSKent Overstreet } 639cd575ddfSKent Overstreet 640cd575ddfSKent Overstreet static void ec_stripe_delete_work(struct work_struct *work) 641cd575ddfSKent Overstreet { 642cd575ddfSKent Overstreet struct bch_fs *c = 643cd575ddfSKent Overstreet container_of(work, struct bch_fs, ec_stripe_delete_work); 644cd575ddfSKent Overstreet ssize_t idx; 645cd575ddfSKent Overstreet 646cd575ddfSKent Overstreet down_read(&c->gc_lock); 647dfe9bfb3SKent Overstreet mutex_lock(&c->ec_stripe_create_lock); 648cd575ddfSKent Overstreet 649cd575ddfSKent Overstreet while (1) { 650cd575ddfSKent Overstreet spin_lock(&c->ec_stripes_heap_lock); 651cd575ddfSKent Overstreet idx = stripe_idx_to_delete(c); 652cd575ddfSKent Overstreet spin_unlock(&c->ec_stripes_heap_lock); 653cd575ddfSKent Overstreet 654cd575ddfSKent Overstreet if (idx < 0) 655cd575ddfSKent Overstreet break; 656cd575ddfSKent Overstreet 657cd575ddfSKent Overstreet ec_stripe_delete(c, idx); 658cd575ddfSKent Overstreet } 659cd575ddfSKent Overstreet 660dfe9bfb3SKent Overstreet mutex_unlock(&c->ec_stripe_create_lock); 661cd575ddfSKent Overstreet up_read(&c->gc_lock); 662cd575ddfSKent Overstreet } 663cd575ddfSKent Overstreet 664dfe9bfb3SKent Overstreet /* stripe creation: */ 665dfe9bfb3SKent Overstreet 666cd575ddfSKent Overstreet static int ec_stripe_bkey_insert(struct bch_fs *c, 667cd575ddfSKent Overstreet struct bkey_i_stripe *stripe) 668cd575ddfSKent Overstreet { 669*0564b167SKent Overstreet struct btree_trans trans; 670*0564b167SKent Overstreet struct btree_iter *iter; 671cd575ddfSKent Overstreet struct bkey_s_c k; 672cd575ddfSKent Overstreet int ret; 673cd575ddfSKent Overstreet 674*0564b167SKent Overstreet bch2_trans_init(&trans, c); 675cd575ddfSKent Overstreet retry: 676*0564b167SKent Overstreet bch2_trans_begin(&trans); 677*0564b167SKent Overstreet 678*0564b167SKent Overstreet /* XXX: start pos hint */ 679*0564b167SKent Overstreet iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN, 680*0564b167SKent Overstreet BTREE_ITER_SLOTS|BTREE_ITER_INTENT); 681*0564b167SKent Overstreet 682*0564b167SKent Overstreet for_each_btree_key_continue(iter, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) { 683*0564b167SKent Overstreet if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) 684*0564b167SKent Overstreet break; 685cd575ddfSKent Overstreet 686cd575ddfSKent Overstreet if (bkey_deleted(k.k)) 687cd575ddfSKent Overstreet goto found_slot; 688cd575ddfSKent Overstreet } 689cd575ddfSKent Overstreet 690*0564b167SKent Overstreet ret = -ENOSPC; 691*0564b167SKent Overstreet goto out; 692cd575ddfSKent Overstreet found_slot: 693*0564b167SKent Overstreet ret = ec_stripe_mem_alloc(c, iter); 694cd575ddfSKent Overstreet 695cd575ddfSKent Overstreet if (ret == -EINTR) 696cd575ddfSKent Overstreet goto retry; 697cd575ddfSKent Overstreet if (ret) 698cd575ddfSKent Overstreet return ret; 699cd575ddfSKent Overstreet 700*0564b167SKent Overstreet stripe->k.p = iter->pos; 701cd575ddfSKent Overstreet 702*0564b167SKent Overstreet bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &stripe->k_i)); 703*0564b167SKent Overstreet 704*0564b167SKent Overstreet ret = bch2_trans_commit(&trans, NULL, NULL, 705cd575ddfSKent Overstreet BTREE_INSERT_NOFAIL| 706*0564b167SKent Overstreet BTREE_INSERT_USE_RESERVE); 707*0564b167SKent Overstreet out: 708*0564b167SKent Overstreet bch2_trans_exit(&trans); 709cd575ddfSKent Overstreet 710cd575ddfSKent Overstreet return ret; 711cd575ddfSKent Overstreet } 712cd575ddfSKent Overstreet 713cd575ddfSKent Overstreet static void extent_stripe_ptr_add(struct bkey_s_extent e, 714cd575ddfSKent Overstreet struct ec_stripe_buf *s, 715cd575ddfSKent Overstreet struct bch_extent_ptr *ptr, 716cd575ddfSKent Overstreet unsigned block) 717cd575ddfSKent Overstreet { 718cd575ddfSKent Overstreet struct bch_extent_stripe_ptr *dst = (void *) ptr; 719cd575ddfSKent Overstreet union bch_extent_entry *end = extent_entry_last(e); 720cd575ddfSKent Overstreet 721cd575ddfSKent Overstreet memmove_u64s_up(dst + 1, dst, (u64 *) end - (u64 *) dst); 722cd575ddfSKent Overstreet e.k->u64s += sizeof(*dst) / sizeof(u64); 723cd575ddfSKent Overstreet 724cd575ddfSKent Overstreet *dst = (struct bch_extent_stripe_ptr) { 725cd575ddfSKent Overstreet .type = 1 << BCH_EXTENT_ENTRY_stripe_ptr, 726cd575ddfSKent Overstreet .block = block, 727cd575ddfSKent Overstreet .idx = s->key.k.p.offset, 728cd575ddfSKent Overstreet }; 729cd575ddfSKent Overstreet } 730cd575ddfSKent Overstreet 731cd575ddfSKent Overstreet static int ec_stripe_update_ptrs(struct bch_fs *c, 732cd575ddfSKent Overstreet struct ec_stripe_buf *s, 733cd575ddfSKent Overstreet struct bkey *pos) 734cd575ddfSKent Overstreet { 735*0564b167SKent Overstreet struct btree_trans trans; 736*0564b167SKent Overstreet struct btree_iter *iter; 737cd575ddfSKent Overstreet struct bkey_s_c k; 738cd575ddfSKent Overstreet struct bkey_s_extent e; 739cd575ddfSKent Overstreet struct bch_extent_ptr *ptr; 740cd575ddfSKent Overstreet BKEY_PADDED(k) tmp; 741cd575ddfSKent Overstreet int ret = 0, dev, idx; 742cd575ddfSKent Overstreet 743*0564b167SKent Overstreet bch2_trans_init(&trans, c); 744*0564b167SKent Overstreet 745*0564b167SKent Overstreet iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, 746cd575ddfSKent Overstreet bkey_start_pos(pos), 747cd575ddfSKent Overstreet BTREE_ITER_INTENT); 748cd575ddfSKent Overstreet 749*0564b167SKent Overstreet while ((k = bch2_btree_iter_peek(iter)).k && 750*0564b167SKent Overstreet !(ret = btree_iter_err(k)) && 751cd575ddfSKent Overstreet bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) { 752cd575ddfSKent Overstreet idx = extent_matches_stripe(c, &s->key.v, k); 753cd575ddfSKent Overstreet if (idx < 0) { 754*0564b167SKent Overstreet bch2_btree_iter_next(iter); 755cd575ddfSKent Overstreet continue; 756cd575ddfSKent Overstreet } 757cd575ddfSKent Overstreet 758cd575ddfSKent Overstreet dev = s->key.v.ptrs[idx].dev; 759cd575ddfSKent Overstreet 760cd575ddfSKent Overstreet bkey_reassemble(&tmp.k, k); 761cd575ddfSKent Overstreet e = bkey_i_to_s_extent(&tmp.k); 762cd575ddfSKent Overstreet 763cd575ddfSKent Overstreet extent_for_each_ptr(e, ptr) 764cd575ddfSKent Overstreet if (ptr->dev != dev) 765cd575ddfSKent Overstreet ptr->cached = true; 766cd575ddfSKent Overstreet 767cd575ddfSKent Overstreet ptr = (void *) bch2_extent_has_device(e.c, dev); 768cd575ddfSKent Overstreet BUG_ON(!ptr); 769cd575ddfSKent Overstreet 770cd575ddfSKent Overstreet extent_stripe_ptr_add(e, s, ptr, idx); 771cd575ddfSKent Overstreet 772*0564b167SKent Overstreet bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &tmp.k)); 773*0564b167SKent Overstreet 774*0564b167SKent Overstreet ret = bch2_trans_commit(&trans, NULL, NULL, 775cd575ddfSKent Overstreet BTREE_INSERT_ATOMIC| 776cd575ddfSKent Overstreet BTREE_INSERT_NOFAIL| 777*0564b167SKent Overstreet BTREE_INSERT_USE_RESERVE); 778cd575ddfSKent Overstreet if (ret == -EINTR) 779cd575ddfSKent Overstreet ret = 0; 780cd575ddfSKent Overstreet if (ret) 781cd575ddfSKent Overstreet break; 782cd575ddfSKent Overstreet } 783cd575ddfSKent Overstreet 784*0564b167SKent Overstreet bch2_trans_exit(&trans); 785*0564b167SKent Overstreet 786*0564b167SKent Overstreet return ret; 787cd575ddfSKent Overstreet } 788cd575ddfSKent Overstreet 789cd575ddfSKent Overstreet /* 790cd575ddfSKent Overstreet * data buckets of new stripe all written: create the stripe 791cd575ddfSKent Overstreet */ 792cd575ddfSKent Overstreet static void ec_stripe_create(struct ec_stripe_new *s) 793cd575ddfSKent Overstreet { 794cd575ddfSKent Overstreet struct bch_fs *c = s->c; 795cd575ddfSKent Overstreet struct open_bucket *ob; 796cd575ddfSKent Overstreet struct bkey_i *k; 797cd575ddfSKent Overstreet struct bch_stripe *v = &s->stripe.key.v; 798cd575ddfSKent Overstreet unsigned i, nr_data = v->nr_blocks - v->nr_redundant; 799cd575ddfSKent Overstreet struct closure cl; 800cd575ddfSKent Overstreet int ret; 801cd575ddfSKent Overstreet 802cd575ddfSKent Overstreet BUG_ON(s->h->s == s); 803cd575ddfSKent Overstreet 804cd575ddfSKent Overstreet closure_init_stack(&cl); 805cd575ddfSKent Overstreet 806cd575ddfSKent Overstreet if (s->err) { 807cd575ddfSKent Overstreet bch_err(c, "error creating stripe: error writing data buckets"); 808cd575ddfSKent Overstreet goto err; 809cd575ddfSKent Overstreet } 810cd575ddfSKent Overstreet 811cd575ddfSKent Overstreet if (!percpu_ref_tryget(&c->writes)) 812cd575ddfSKent Overstreet goto err; 813cd575ddfSKent Overstreet 814cd575ddfSKent Overstreet BUG_ON(bitmap_weight(s->blocks_allocated, 815cd575ddfSKent Overstreet s->blocks.nr) != s->blocks.nr); 816cd575ddfSKent Overstreet 817cd575ddfSKent Overstreet ec_generate_ec(&s->stripe); 818cd575ddfSKent Overstreet 819cd575ddfSKent Overstreet ec_generate_checksums(&s->stripe); 820cd575ddfSKent Overstreet 821cd575ddfSKent Overstreet /* write p/q: */ 822cd575ddfSKent Overstreet for (i = nr_data; i < v->nr_blocks; i++) 823cd575ddfSKent Overstreet ec_block_io(c, &s->stripe, REQ_OP_WRITE, i, &cl); 824cd575ddfSKent Overstreet 825cd575ddfSKent Overstreet closure_sync(&cl); 826cd575ddfSKent Overstreet 827cd575ddfSKent Overstreet for (i = nr_data; i < v->nr_blocks; i++) 828cd575ddfSKent Overstreet if (!test_bit(i, s->stripe.valid)) { 829cd575ddfSKent Overstreet bch_err(c, "error creating stripe: error writing redundancy buckets"); 830cd575ddfSKent Overstreet goto err_put_writes; 831cd575ddfSKent Overstreet } 832cd575ddfSKent Overstreet 833dfe9bfb3SKent Overstreet mutex_lock(&c->ec_stripe_create_lock); 834dfe9bfb3SKent Overstreet 835cd575ddfSKent Overstreet ret = ec_stripe_bkey_insert(c, &s->stripe.key); 836cd575ddfSKent Overstreet if (ret) { 837cd575ddfSKent Overstreet bch_err(c, "error creating stripe: error creating stripe key"); 838dfe9bfb3SKent Overstreet goto err_unlock; 839cd575ddfSKent Overstreet } 840cd575ddfSKent Overstreet 841cd575ddfSKent Overstreet for_each_keylist_key(&s->keys, k) { 842cd575ddfSKent Overstreet ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k); 843cd575ddfSKent Overstreet if (ret) 844cd575ddfSKent Overstreet break; 845cd575ddfSKent Overstreet } 846cd575ddfSKent Overstreet 847dfe9bfb3SKent Overstreet err_unlock: 848dfe9bfb3SKent Overstreet mutex_unlock(&c->ec_stripe_create_lock); 849cd575ddfSKent Overstreet err_put_writes: 850cd575ddfSKent Overstreet percpu_ref_put(&c->writes); 851cd575ddfSKent Overstreet err: 852cd575ddfSKent Overstreet open_bucket_for_each(c, &s->blocks, ob, i) { 853cd575ddfSKent Overstreet ob->ec = NULL; 854cd575ddfSKent Overstreet __bch2_open_bucket_put(c, ob); 855cd575ddfSKent Overstreet } 856cd575ddfSKent Overstreet 857cd575ddfSKent Overstreet bch2_open_buckets_put(c, &s->parity); 858cd575ddfSKent Overstreet 859cd575ddfSKent Overstreet bch2_keylist_free(&s->keys, s->inline_keys); 860cd575ddfSKent Overstreet 861cd575ddfSKent Overstreet mutex_lock(&s->h->lock); 862cd575ddfSKent Overstreet list_del(&s->list); 863cd575ddfSKent Overstreet mutex_unlock(&s->h->lock); 864cd575ddfSKent Overstreet 865cd575ddfSKent Overstreet for (i = 0; i < s->stripe.key.v.nr_blocks; i++) 866cd575ddfSKent Overstreet kvpfree(s->stripe.data[i], s->stripe.size << 9); 867cd575ddfSKent Overstreet kfree(s); 868cd575ddfSKent Overstreet } 869cd575ddfSKent Overstreet 870cd575ddfSKent Overstreet static struct ec_stripe_new *ec_stripe_set_pending(struct ec_stripe_head *h) 871cd575ddfSKent Overstreet { 872cd575ddfSKent Overstreet struct ec_stripe_new *s = h->s; 873cd575ddfSKent Overstreet 874cd575ddfSKent Overstreet list_add(&s->list, &h->stripes); 875cd575ddfSKent Overstreet h->s = NULL; 876cd575ddfSKent Overstreet 877cd575ddfSKent Overstreet return s; 878cd575ddfSKent Overstreet } 879cd575ddfSKent Overstreet 880cd575ddfSKent Overstreet static void ec_stripe_new_put(struct ec_stripe_new *s) 881cd575ddfSKent Overstreet { 882cd575ddfSKent Overstreet BUG_ON(atomic_read(&s->pin) <= 0); 883cd575ddfSKent Overstreet if (atomic_dec_and_test(&s->pin)) 884cd575ddfSKent Overstreet ec_stripe_create(s); 885cd575ddfSKent Overstreet } 886cd575ddfSKent Overstreet 887cd575ddfSKent Overstreet /* have a full bucket - hand it off to be erasure coded: */ 888cd575ddfSKent Overstreet void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob) 889cd575ddfSKent Overstreet { 890cd575ddfSKent Overstreet struct ec_stripe_new *s = ob->ec; 891cd575ddfSKent Overstreet 892cd575ddfSKent Overstreet if (ob->sectors_free) 893cd575ddfSKent Overstreet s->err = -1; 894cd575ddfSKent Overstreet 895cd575ddfSKent Overstreet ec_stripe_new_put(s); 896cd575ddfSKent Overstreet } 897cd575ddfSKent Overstreet 898cd575ddfSKent Overstreet void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob) 899cd575ddfSKent Overstreet { 900cd575ddfSKent Overstreet struct ec_stripe_new *s = ob->ec; 901cd575ddfSKent Overstreet 902cd575ddfSKent Overstreet s->err = -EIO; 903cd575ddfSKent Overstreet } 904cd575ddfSKent Overstreet 905cd575ddfSKent Overstreet void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp) 906cd575ddfSKent Overstreet { 907cd575ddfSKent Overstreet struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs); 908cd575ddfSKent Overstreet struct bch_dev *ca; 909cd575ddfSKent Overstreet unsigned offset; 910cd575ddfSKent Overstreet 911cd575ddfSKent Overstreet if (!ob) 912cd575ddfSKent Overstreet return NULL; 913cd575ddfSKent Overstreet 914cd575ddfSKent Overstreet ca = bch_dev_bkey_exists(c, ob->ptr.dev); 915cd575ddfSKent Overstreet offset = ca->mi.bucket_size - ob->sectors_free; 916cd575ddfSKent Overstreet 917cd575ddfSKent Overstreet return ob->ec->stripe.data[ob->ec_idx] + (offset << 9); 918cd575ddfSKent Overstreet } 919cd575ddfSKent Overstreet 920cd575ddfSKent Overstreet void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp, 921cd575ddfSKent Overstreet struct bpos pos, unsigned sectors) 922cd575ddfSKent Overstreet { 923cd575ddfSKent Overstreet struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs); 924cd575ddfSKent Overstreet struct ec_stripe_new *ec; 925cd575ddfSKent Overstreet 926cd575ddfSKent Overstreet if (!ob) 927cd575ddfSKent Overstreet return; 928cd575ddfSKent Overstreet 929cd575ddfSKent Overstreet ec = ob->ec; 930cd575ddfSKent Overstreet mutex_lock(&ec->lock); 931cd575ddfSKent Overstreet 932cd575ddfSKent Overstreet if (bch2_keylist_realloc(&ec->keys, ec->inline_keys, 933cd575ddfSKent Overstreet ARRAY_SIZE(ec->inline_keys), 934cd575ddfSKent Overstreet BKEY_U64s)) { 935cd575ddfSKent Overstreet BUG(); 936cd575ddfSKent Overstreet } 937cd575ddfSKent Overstreet 938cd575ddfSKent Overstreet bkey_init(&ec->keys.top->k); 939cd575ddfSKent Overstreet ec->keys.top->k.p = pos; 940cd575ddfSKent Overstreet bch2_key_resize(&ec->keys.top->k, sectors); 941cd575ddfSKent Overstreet bch2_keylist_push(&ec->keys); 942cd575ddfSKent Overstreet 943cd575ddfSKent Overstreet mutex_unlock(&ec->lock); 944cd575ddfSKent Overstreet } 945cd575ddfSKent Overstreet 946cd575ddfSKent Overstreet static int unsigned_cmp(const void *_l, const void *_r) 947cd575ddfSKent Overstreet { 948cd575ddfSKent Overstreet unsigned l = *((const unsigned *) _l); 949cd575ddfSKent Overstreet unsigned r = *((const unsigned *) _r); 950cd575ddfSKent Overstreet 951cd575ddfSKent Overstreet return (l > r) - (l < r); 952cd575ddfSKent Overstreet } 953cd575ddfSKent Overstreet 954cd575ddfSKent Overstreet /* pick most common bucket size: */ 955cd575ddfSKent Overstreet static unsigned pick_blocksize(struct bch_fs *c, 956cd575ddfSKent Overstreet struct bch_devs_mask *devs) 957cd575ddfSKent Overstreet { 958cd575ddfSKent Overstreet struct bch_dev *ca; 959cd575ddfSKent Overstreet unsigned i, nr = 0, sizes[BCH_SB_MEMBERS_MAX]; 960cd575ddfSKent Overstreet struct { 961cd575ddfSKent Overstreet unsigned nr, size; 962cd575ddfSKent Overstreet } cur = { 0, 0 }, best = { 0, 0 }; 963cd575ddfSKent Overstreet 964cd575ddfSKent Overstreet for_each_member_device_rcu(ca, c, i, devs) 965cd575ddfSKent Overstreet sizes[nr++] = ca->mi.bucket_size; 966cd575ddfSKent Overstreet 967cd575ddfSKent Overstreet sort(sizes, nr, sizeof(unsigned), unsigned_cmp, NULL); 968cd575ddfSKent Overstreet 969cd575ddfSKent Overstreet for (i = 0; i < nr; i++) { 970cd575ddfSKent Overstreet if (sizes[i] != cur.size) { 971cd575ddfSKent Overstreet if (cur.nr > best.nr) 972cd575ddfSKent Overstreet best = cur; 973cd575ddfSKent Overstreet 974cd575ddfSKent Overstreet cur.nr = 0; 975cd575ddfSKent Overstreet cur.size = sizes[i]; 976cd575ddfSKent Overstreet } 977cd575ddfSKent Overstreet 978cd575ddfSKent Overstreet cur.nr++; 979cd575ddfSKent Overstreet } 980cd575ddfSKent Overstreet 981cd575ddfSKent Overstreet if (cur.nr > best.nr) 982cd575ddfSKent Overstreet best = cur; 983cd575ddfSKent Overstreet 984cd575ddfSKent Overstreet return best.size; 985cd575ddfSKent Overstreet } 986cd575ddfSKent Overstreet 987cd575ddfSKent Overstreet int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h) 988cd575ddfSKent Overstreet { 989cd575ddfSKent Overstreet struct ec_stripe_new *s; 990cd575ddfSKent Overstreet unsigned i; 991cd575ddfSKent Overstreet 992cd575ddfSKent Overstreet BUG_ON(h->parity.nr != h->redundancy); 993cd575ddfSKent Overstreet BUG_ON(!h->blocks.nr); 994cd575ddfSKent Overstreet BUG_ON(h->parity.nr + h->blocks.nr > EC_STRIPE_MAX); 995cd575ddfSKent Overstreet lockdep_assert_held(&h->lock); 996cd575ddfSKent Overstreet 997cd575ddfSKent Overstreet s = kzalloc(sizeof(*s), GFP_KERNEL); 998cd575ddfSKent Overstreet if (!s) 999cd575ddfSKent Overstreet return -ENOMEM; 1000cd575ddfSKent Overstreet 1001cd575ddfSKent Overstreet mutex_init(&s->lock); 1002cd575ddfSKent Overstreet atomic_set(&s->pin, 1); 1003cd575ddfSKent Overstreet s->c = c; 1004cd575ddfSKent Overstreet s->h = h; 1005cd575ddfSKent Overstreet s->blocks = h->blocks; 1006cd575ddfSKent Overstreet s->parity = h->parity; 1007cd575ddfSKent Overstreet 1008cd575ddfSKent Overstreet memset(&h->blocks, 0, sizeof(h->blocks)); 1009cd575ddfSKent Overstreet memset(&h->parity, 0, sizeof(h->parity)); 1010cd575ddfSKent Overstreet 1011cd575ddfSKent Overstreet bch2_keylist_init(&s->keys, s->inline_keys); 1012cd575ddfSKent Overstreet 1013cd575ddfSKent Overstreet s->stripe.offset = 0; 1014cd575ddfSKent Overstreet s->stripe.size = h->blocksize; 1015cd575ddfSKent Overstreet memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid)); 1016cd575ddfSKent Overstreet 1017cd575ddfSKent Overstreet ec_stripe_key_init(c, &s->stripe.key, 1018cd575ddfSKent Overstreet &s->blocks, &s->parity, 1019cd575ddfSKent Overstreet h->blocksize); 1020cd575ddfSKent Overstreet 1021cd575ddfSKent Overstreet for (i = 0; i < s->stripe.key.v.nr_blocks; i++) { 1022cd575ddfSKent Overstreet s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL); 1023cd575ddfSKent Overstreet if (!s->stripe.data[i]) 1024cd575ddfSKent Overstreet goto err; 1025cd575ddfSKent Overstreet } 1026cd575ddfSKent Overstreet 1027cd575ddfSKent Overstreet h->s = s; 1028cd575ddfSKent Overstreet 1029cd575ddfSKent Overstreet return 0; 1030cd575ddfSKent Overstreet err: 1031cd575ddfSKent Overstreet for (i = 0; i < s->stripe.key.v.nr_blocks; i++) 1032cd575ddfSKent Overstreet kvpfree(s->stripe.data[i], s->stripe.size << 9); 1033cd575ddfSKent Overstreet kfree(s); 1034cd575ddfSKent Overstreet return -ENOMEM; 1035cd575ddfSKent Overstreet } 1036cd575ddfSKent Overstreet 1037cd575ddfSKent Overstreet static struct ec_stripe_head * 1038cd575ddfSKent Overstreet ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target, 1039cd575ddfSKent Overstreet unsigned algo, unsigned redundancy) 1040cd575ddfSKent Overstreet { 1041cd575ddfSKent Overstreet struct ec_stripe_head *h; 1042cd575ddfSKent Overstreet struct bch_dev *ca; 1043cd575ddfSKent Overstreet unsigned i; 1044cd575ddfSKent Overstreet 1045cd575ddfSKent Overstreet h = kzalloc(sizeof(*h), GFP_KERNEL); 1046cd575ddfSKent Overstreet if (!h) 1047cd575ddfSKent Overstreet return NULL; 1048cd575ddfSKent Overstreet 1049cd575ddfSKent Overstreet mutex_init(&h->lock); 1050cd575ddfSKent Overstreet mutex_lock(&h->lock); 1051cd575ddfSKent Overstreet INIT_LIST_HEAD(&h->stripes); 1052cd575ddfSKent Overstreet 1053cd575ddfSKent Overstreet h->target = target; 1054cd575ddfSKent Overstreet h->algo = algo; 1055cd575ddfSKent Overstreet h->redundancy = redundancy; 1056cd575ddfSKent Overstreet 1057cd575ddfSKent Overstreet rcu_read_lock(); 1058cd575ddfSKent Overstreet h->devs = target_rw_devs(c, BCH_DATA_USER, target); 1059cd575ddfSKent Overstreet 1060cd575ddfSKent Overstreet for_each_member_device_rcu(ca, c, i, &h->devs) 1061cd575ddfSKent Overstreet if (!ca->mi.durability) 1062cd575ddfSKent Overstreet __clear_bit(i, h->devs.d); 1063cd575ddfSKent Overstreet 1064cd575ddfSKent Overstreet h->blocksize = pick_blocksize(c, &h->devs); 1065cd575ddfSKent Overstreet 1066cd575ddfSKent Overstreet for_each_member_device_rcu(ca, c, i, &h->devs) 1067cd575ddfSKent Overstreet if (ca->mi.bucket_size == h->blocksize) 1068cd575ddfSKent Overstreet h->nr_active_devs++; 1069cd575ddfSKent Overstreet 1070cd575ddfSKent Overstreet rcu_read_unlock(); 1071cd575ddfSKent Overstreet list_add(&h->list, &c->ec_new_stripe_list); 1072cd575ddfSKent Overstreet return h; 1073cd575ddfSKent Overstreet } 1074cd575ddfSKent Overstreet 1075cd575ddfSKent Overstreet void bch2_ec_stripe_head_put(struct ec_stripe_head *h) 1076cd575ddfSKent Overstreet { 1077cd575ddfSKent Overstreet struct ec_stripe_new *s = NULL; 1078cd575ddfSKent Overstreet 1079cd575ddfSKent Overstreet if (h->s && 1080cd575ddfSKent Overstreet bitmap_weight(h->s->blocks_allocated, 1081cd575ddfSKent Overstreet h->s->blocks.nr) == h->s->blocks.nr) 1082cd575ddfSKent Overstreet s = ec_stripe_set_pending(h); 1083cd575ddfSKent Overstreet 1084cd575ddfSKent Overstreet mutex_unlock(&h->lock); 1085cd575ddfSKent Overstreet 1086cd575ddfSKent Overstreet if (s) 1087cd575ddfSKent Overstreet ec_stripe_new_put(s); 1088cd575ddfSKent Overstreet } 1089cd575ddfSKent Overstreet 1090cd575ddfSKent Overstreet struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, 1091cd575ddfSKent Overstreet unsigned target, 1092cd575ddfSKent Overstreet unsigned algo, 1093cd575ddfSKent Overstreet unsigned redundancy) 1094cd575ddfSKent Overstreet { 1095cd575ddfSKent Overstreet struct ec_stripe_head *h; 1096cd575ddfSKent Overstreet 1097cd575ddfSKent Overstreet if (!redundancy) 1098cd575ddfSKent Overstreet return NULL; 1099cd575ddfSKent Overstreet 1100cd575ddfSKent Overstreet mutex_lock(&c->ec_new_stripe_lock); 1101cd575ddfSKent Overstreet list_for_each_entry(h, &c->ec_new_stripe_list, list) 1102cd575ddfSKent Overstreet if (h->target == target && 1103cd575ddfSKent Overstreet h->algo == algo && 1104cd575ddfSKent Overstreet h->redundancy == redundancy) { 1105cd575ddfSKent Overstreet mutex_lock(&h->lock); 1106cd575ddfSKent Overstreet goto found; 1107cd575ddfSKent Overstreet } 1108cd575ddfSKent Overstreet 1109cd575ddfSKent Overstreet h = ec_new_stripe_head_alloc(c, target, algo, redundancy); 1110cd575ddfSKent Overstreet found: 1111cd575ddfSKent Overstreet mutex_unlock(&c->ec_new_stripe_lock); 1112cd575ddfSKent Overstreet return h; 1113cd575ddfSKent Overstreet } 1114cd575ddfSKent Overstreet 1115cd575ddfSKent Overstreet void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) 1116cd575ddfSKent Overstreet { 1117cd575ddfSKent Overstreet struct ec_stripe_head *h; 1118cd575ddfSKent Overstreet struct open_bucket *ob; 1119cd575ddfSKent Overstreet unsigned i; 1120cd575ddfSKent Overstreet 1121cd575ddfSKent Overstreet mutex_lock(&c->ec_new_stripe_lock); 1122cd575ddfSKent Overstreet list_for_each_entry(h, &c->ec_new_stripe_list, list) { 1123cd575ddfSKent Overstreet struct ec_stripe_new *s = NULL; 1124cd575ddfSKent Overstreet 1125cd575ddfSKent Overstreet mutex_lock(&h->lock); 1126cd575ddfSKent Overstreet bch2_open_buckets_stop_dev(c, ca, 1127cd575ddfSKent Overstreet &h->blocks, 1128cd575ddfSKent Overstreet BCH_DATA_USER); 1129cd575ddfSKent Overstreet bch2_open_buckets_stop_dev(c, ca, 1130cd575ddfSKent Overstreet &h->parity, 1131cd575ddfSKent Overstreet BCH_DATA_USER); 1132cd575ddfSKent Overstreet 1133cd575ddfSKent Overstreet if (!h->s) 1134cd575ddfSKent Overstreet goto unlock; 1135cd575ddfSKent Overstreet 1136cd575ddfSKent Overstreet open_bucket_for_each(c, &h->s->blocks, ob, i) 1137cd575ddfSKent Overstreet if (ob->ptr.dev == ca->dev_idx) 1138cd575ddfSKent Overstreet goto found; 1139cd575ddfSKent Overstreet open_bucket_for_each(c, &h->s->parity, ob, i) 1140cd575ddfSKent Overstreet if (ob->ptr.dev == ca->dev_idx) 1141cd575ddfSKent Overstreet goto found; 1142cd575ddfSKent Overstreet goto unlock; 1143cd575ddfSKent Overstreet found: 1144cd575ddfSKent Overstreet h->s->err = -1; 1145cd575ddfSKent Overstreet s = ec_stripe_set_pending(h); 1146cd575ddfSKent Overstreet unlock: 1147cd575ddfSKent Overstreet mutex_unlock(&h->lock); 1148cd575ddfSKent Overstreet 1149cd575ddfSKent Overstreet if (s) 1150cd575ddfSKent Overstreet ec_stripe_new_put(s); 1151cd575ddfSKent Overstreet } 1152cd575ddfSKent Overstreet mutex_unlock(&c->ec_new_stripe_lock); 1153cd575ddfSKent Overstreet } 1154cd575ddfSKent Overstreet 1155*0564b167SKent Overstreet static int __bch2_stripe_write_key(struct btree_trans *trans, 115661c8d7c8SKent Overstreet struct btree_iter *iter, 115761c8d7c8SKent Overstreet struct stripe *m, 115861c8d7c8SKent Overstreet size_t idx, 115961c8d7c8SKent Overstreet struct bkey_i_stripe *new_key, 116061c8d7c8SKent Overstreet unsigned flags) 116161c8d7c8SKent Overstreet { 1162*0564b167SKent Overstreet struct bch_fs *c = trans->c; 116361c8d7c8SKent Overstreet struct bkey_s_c k; 116461c8d7c8SKent Overstreet unsigned i; 116561c8d7c8SKent Overstreet int ret; 116661c8d7c8SKent Overstreet 116761c8d7c8SKent Overstreet bch2_btree_iter_set_pos(iter, POS(0, idx)); 116861c8d7c8SKent Overstreet 116961c8d7c8SKent Overstreet k = bch2_btree_iter_peek_slot(iter); 117061c8d7c8SKent Overstreet ret = btree_iter_err(k); 117161c8d7c8SKent Overstreet if (ret) 117261c8d7c8SKent Overstreet return ret; 117361c8d7c8SKent Overstreet 117461c8d7c8SKent Overstreet if (k.k->type != KEY_TYPE_stripe) 117561c8d7c8SKent Overstreet return -EIO; 117661c8d7c8SKent Overstreet 117761c8d7c8SKent Overstreet bkey_reassemble(&new_key->k_i, k); 117861c8d7c8SKent Overstreet 117961c8d7c8SKent Overstreet spin_lock(&c->ec_stripes_heap_lock); 118061c8d7c8SKent Overstreet 118161c8d7c8SKent Overstreet for (i = 0; i < new_key->v.nr_blocks; i++) 118261c8d7c8SKent Overstreet stripe_blockcount_set(&new_key->v, i, 118361c8d7c8SKent Overstreet m->block_sectors[i]); 118461c8d7c8SKent Overstreet m->dirty = false; 118561c8d7c8SKent Overstreet 118661c8d7c8SKent Overstreet spin_unlock(&c->ec_stripes_heap_lock); 118761c8d7c8SKent Overstreet 1188*0564b167SKent Overstreet bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &new_key->k_i)); 1189*0564b167SKent Overstreet 1190*0564b167SKent Overstreet return bch2_trans_commit(trans, NULL, NULL, 1191*0564b167SKent Overstreet BTREE_INSERT_NOFAIL|flags); 119261c8d7c8SKent Overstreet } 119361c8d7c8SKent Overstreet 119461c8d7c8SKent Overstreet int bch2_stripes_write(struct bch_fs *c, bool *wrote) 119561c8d7c8SKent Overstreet { 1196*0564b167SKent Overstreet struct btree_trans trans; 1197*0564b167SKent Overstreet struct btree_iter *iter; 119861c8d7c8SKent Overstreet struct genradix_iter giter; 119961c8d7c8SKent Overstreet struct bkey_i_stripe *new_key; 120061c8d7c8SKent Overstreet struct stripe *m; 120161c8d7c8SKent Overstreet int ret = 0; 120261c8d7c8SKent Overstreet 120361c8d7c8SKent Overstreet new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL); 120461c8d7c8SKent Overstreet BUG_ON(!new_key); 120561c8d7c8SKent Overstreet 1206*0564b167SKent Overstreet bch2_trans_init(&trans, c); 1207*0564b167SKent Overstreet 1208*0564b167SKent Overstreet iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN, 120961c8d7c8SKent Overstreet BTREE_ITER_SLOTS|BTREE_ITER_INTENT); 121061c8d7c8SKent Overstreet 121161c8d7c8SKent Overstreet genradix_for_each(&c->stripes[0], giter, m) { 121261c8d7c8SKent Overstreet if (!m->dirty) 121361c8d7c8SKent Overstreet continue; 121461c8d7c8SKent Overstreet 1215*0564b167SKent Overstreet ret = __bch2_stripe_write_key(&trans, iter, m, giter.pos, 121661c8d7c8SKent Overstreet new_key, BTREE_INSERT_NOCHECK_RW); 121761c8d7c8SKent Overstreet if (ret) 121861c8d7c8SKent Overstreet break; 121961c8d7c8SKent Overstreet 122061c8d7c8SKent Overstreet *wrote = true; 122161c8d7c8SKent Overstreet } 122261c8d7c8SKent Overstreet 1223*0564b167SKent Overstreet bch2_trans_exit(&trans); 122461c8d7c8SKent Overstreet 122561c8d7c8SKent Overstreet kfree(new_key); 122661c8d7c8SKent Overstreet 122761c8d7c8SKent Overstreet return ret; 122861c8d7c8SKent Overstreet } 122961c8d7c8SKent Overstreet 123061c8d7c8SKent Overstreet static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k) 123161c8d7c8SKent Overstreet { 123261c8d7c8SKent Overstreet 123361c8d7c8SKent Overstreet struct gc_pos pos = { 0 }; 123461c8d7c8SKent Overstreet 123561c8d7c8SKent Overstreet bch2_mark_key(c, k, true, 0, pos, NULL, 0, 0); 123661c8d7c8SKent Overstreet } 123761c8d7c8SKent Overstreet 123861c8d7c8SKent Overstreet int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list) 123961c8d7c8SKent Overstreet { 124061c8d7c8SKent Overstreet struct journal_replay *r; 124161c8d7c8SKent Overstreet struct btree_iter iter; 124261c8d7c8SKent Overstreet struct bkey_s_c k; 124361c8d7c8SKent Overstreet int ret; 124461c8d7c8SKent Overstreet 124561c8d7c8SKent Overstreet ret = bch2_fs_ec_start(c); 124661c8d7c8SKent Overstreet if (ret) 124761c8d7c8SKent Overstreet return ret; 124861c8d7c8SKent Overstreet 124961c8d7c8SKent Overstreet for_each_btree_key(&iter, c, BTREE_ID_EC, POS_MIN, 0, k) { 125061c8d7c8SKent Overstreet bch2_stripe_read_key(c, k); 125161c8d7c8SKent Overstreet bch2_btree_iter_cond_resched(&iter); 125261c8d7c8SKent Overstreet } 125361c8d7c8SKent Overstreet 125461c8d7c8SKent Overstreet ret = bch2_btree_iter_unlock(&iter); 125561c8d7c8SKent Overstreet if (ret) 125661c8d7c8SKent Overstreet return ret; 125761c8d7c8SKent Overstreet 125861c8d7c8SKent Overstreet list_for_each_entry(r, journal_replay_list, list) { 125961c8d7c8SKent Overstreet struct bkey_i *k, *n; 126061c8d7c8SKent Overstreet struct jset_entry *entry; 126161c8d7c8SKent Overstreet 126261c8d7c8SKent Overstreet for_each_jset_key(k, n, entry, &r->j) 126361c8d7c8SKent Overstreet if (entry->btree_id == BTREE_ID_EC) 126461c8d7c8SKent Overstreet bch2_stripe_read_key(c, bkey_i_to_s_c(k)); 126561c8d7c8SKent Overstreet } 126661c8d7c8SKent Overstreet 126761c8d7c8SKent Overstreet return 0; 126861c8d7c8SKent Overstreet } 126961c8d7c8SKent Overstreet 1270dfe9bfb3SKent Overstreet int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) 1271cd575ddfSKent Overstreet { 1272cd575ddfSKent Overstreet struct btree_iter iter; 1273cd575ddfSKent Overstreet struct bkey_s_c k; 1274cd575ddfSKent Overstreet size_t i, idx = 0; 1275cd575ddfSKent Overstreet int ret = 0; 1276cd575ddfSKent Overstreet 1277cd575ddfSKent Overstreet bch2_btree_iter_init(&iter, c, BTREE_ID_EC, POS(0, U64_MAX), 0); 1278cd575ddfSKent Overstreet 1279cd575ddfSKent Overstreet k = bch2_btree_iter_prev(&iter); 1280cd575ddfSKent Overstreet if (!IS_ERR_OR_NULL(k.k)) 1281cd575ddfSKent Overstreet idx = k.k->p.offset + 1; 1282cd575ddfSKent Overstreet ret = bch2_btree_iter_unlock(&iter); 1283cd575ddfSKent Overstreet if (ret) 1284cd575ddfSKent Overstreet return ret; 1285cd575ddfSKent Overstreet 1286dfe9bfb3SKent Overstreet if (!gc && 1287dfe9bfb3SKent Overstreet !init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx), 1288cd575ddfSKent Overstreet GFP_KERNEL)) 1289cd575ddfSKent Overstreet return -ENOMEM; 1290cd575ddfSKent Overstreet #if 0 1291dfe9bfb3SKent Overstreet ret = genradix_prealloc(&c->stripes[gc], idx, GFP_KERNEL); 1292cd575ddfSKent Overstreet #else 1293cd575ddfSKent Overstreet for (i = 0; i < idx; i++) 1294dfe9bfb3SKent Overstreet if (!genradix_ptr_alloc(&c->stripes[gc], i, GFP_KERNEL)) 1295cd575ddfSKent Overstreet return -ENOMEM; 1296cd575ddfSKent Overstreet #endif 1297cd575ddfSKent Overstreet return 0; 1298cd575ddfSKent Overstreet } 1299cd575ddfSKent Overstreet 1300dfe9bfb3SKent Overstreet int bch2_fs_ec_start(struct bch_fs *c) 1301dfe9bfb3SKent Overstreet { 1302dfe9bfb3SKent Overstreet return bch2_ec_mem_alloc(c, false); 1303dfe9bfb3SKent Overstreet } 1304dfe9bfb3SKent Overstreet 1305cd575ddfSKent Overstreet void bch2_fs_ec_exit(struct bch_fs *c) 1306cd575ddfSKent Overstreet { 1307cd575ddfSKent Overstreet struct ec_stripe_head *h; 1308cd575ddfSKent Overstreet 1309cd575ddfSKent Overstreet while (1) { 1310cd575ddfSKent Overstreet mutex_lock(&c->ec_new_stripe_lock); 1311cd575ddfSKent Overstreet h = list_first_entry_or_null(&c->ec_new_stripe_list, 1312cd575ddfSKent Overstreet struct ec_stripe_head, list); 1313cd575ddfSKent Overstreet if (h) 1314cd575ddfSKent Overstreet list_del(&h->list); 1315cd575ddfSKent Overstreet mutex_unlock(&c->ec_new_stripe_lock); 1316cd575ddfSKent Overstreet if (!h) 1317cd575ddfSKent Overstreet break; 1318cd575ddfSKent Overstreet 1319cd575ddfSKent Overstreet BUG_ON(h->s); 1320cd575ddfSKent Overstreet BUG_ON(!list_empty(&h->stripes)); 1321cd575ddfSKent Overstreet kfree(h); 1322cd575ddfSKent Overstreet } 1323cd575ddfSKent Overstreet 1324cd575ddfSKent Overstreet free_heap(&c->ec_stripes_heap); 1325dfe9bfb3SKent Overstreet genradix_free(&c->stripes[0]); 1326cd575ddfSKent Overstreet bioset_exit(&c->ec_bioset); 1327cd575ddfSKent Overstreet } 1328cd575ddfSKent Overstreet 1329cd575ddfSKent Overstreet int bch2_fs_ec_init(struct bch_fs *c) 1330cd575ddfSKent Overstreet { 1331cd575ddfSKent Overstreet INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work); 1332cd575ddfSKent Overstreet 1333cd575ddfSKent Overstreet return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio), 1334cd575ddfSKent Overstreet BIOSET_NEED_BVECS); 1335cd575ddfSKent Overstreet } 1336