xref: /linux/fs/bcachefs/ec.c (revision 42b16d3ac371a2fac9b6f08fd75f23f34ba3955a)
1cd575ddfSKent Overstreet // SPDX-License-Identifier: GPL-2.0
2cd575ddfSKent Overstreet 
3cd575ddfSKent Overstreet /* erasure coding */
4cd575ddfSKent Overstreet 
5cd575ddfSKent Overstreet #include "bcachefs.h"
6f4f78779SKent Overstreet #include "alloc_background.h"
7cd575ddfSKent Overstreet #include "alloc_foreground.h"
8dea5647eSKent Overstreet #include "backpointers.h"
907a1006aSKent Overstreet #include "bkey_buf.h"
10cd575ddfSKent Overstreet #include "bset.h"
11cd575ddfSKent Overstreet #include "btree_gc.h"
12cd575ddfSKent Overstreet #include "btree_update.h"
13dea5647eSKent Overstreet #include "btree_write_buffer.h"
14cd575ddfSKent Overstreet #include "buckets.h"
151809b8cbSKent Overstreet #include "checksum.h"
161d16c605SKent Overstreet #include "disk_accounting.h"
17cd575ddfSKent Overstreet #include "disk_groups.h"
18cd575ddfSKent Overstreet #include "ec.h"
19cd575ddfSKent Overstreet #include "error.h"
201809b8cbSKent Overstreet #include "io_read.h"
21cd575ddfSKent Overstreet #include "io_write.h"
22d0734356SKent Overstreet #include "keylist.h"
23b547d005SKent Overstreet #include "recovery.h"
24cd575ddfSKent Overstreet #include "replicas.h"
25cd575ddfSKent Overstreet #include "super-io.h"
26cd575ddfSKent Overstreet #include "util.h"
27de5bb710SKent Overstreet 
28de5bb710SKent Overstreet #include <linux/sort.h>
29de5bb710SKent Overstreet 
30de5bb710SKent Overstreet #ifdef __KERNEL__
31cd575ddfSKent Overstreet 
32cd575ddfSKent Overstreet #include <linux/raid/pq.h>
33de5bb710SKent Overstreet #include <linux/raid/xor.h>
34de5bb710SKent Overstreet 
raid5_recov(unsigned disks,unsigned failed_idx,size_t size,void ** data)35de5bb710SKent Overstreet static void raid5_recov(unsigned disks, unsigned failed_idx,
36de5bb710SKent Overstreet 			size_t size, void **data)
37de5bb710SKent Overstreet {
38de5bb710SKent Overstreet 	unsigned i = 2, nr;
39de5bb710SKent Overstreet 
40de5bb710SKent Overstreet 	BUG_ON(failed_idx >= disks);
41de5bb710SKent Overstreet 
42de5bb710SKent Overstreet 	swap(data[0], data[failed_idx]);
43de5bb710SKent Overstreet 	memcpy(data[0], data[1], size);
44de5bb710SKent Overstreet 
45de5bb710SKent Overstreet 	while (i < disks) {
46de5bb710SKent Overstreet 		nr = min_t(unsigned, disks - i, MAX_XOR_BLOCKS);
47de5bb710SKent Overstreet 		xor_blocks(nr, size, data[0], data + i);
48de5bb710SKent Overstreet 		i += nr;
49de5bb710SKent Overstreet 	}
50de5bb710SKent Overstreet 
51de5bb710SKent Overstreet 	swap(data[0], data[failed_idx]);
52de5bb710SKent Overstreet }
53de5bb710SKent Overstreet 
raid_gen(int nd,int np,size_t size,void ** v)54de5bb710SKent Overstreet static void raid_gen(int nd, int np, size_t size, void **v)
55de5bb710SKent Overstreet {
56de5bb710SKent Overstreet 	if (np >= 1)
57de5bb710SKent Overstreet 		raid5_recov(nd + np, nd, size, v);
58de5bb710SKent Overstreet 	if (np >= 2)
59de5bb710SKent Overstreet 		raid6_call.gen_syndrome(nd + np, size, v);
60de5bb710SKent Overstreet 	BUG_ON(np > 2);
61de5bb710SKent Overstreet }
62de5bb710SKent Overstreet 
raid_rec(int nr,int * ir,int nd,int np,size_t size,void ** v)63de5bb710SKent Overstreet static void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v)
64de5bb710SKent Overstreet {
65de5bb710SKent Overstreet 	switch (nr) {
66de5bb710SKent Overstreet 	case 0:
67de5bb710SKent Overstreet 		break;
68de5bb710SKent Overstreet 	case 1:
69de5bb710SKent Overstreet 		if (ir[0] < nd + 1)
70de5bb710SKent Overstreet 			raid5_recov(nd + 1, ir[0], size, v);
71de5bb710SKent Overstreet 		else
72de5bb710SKent Overstreet 			raid6_call.gen_syndrome(nd + np, size, v);
73de5bb710SKent Overstreet 		break;
74de5bb710SKent Overstreet 	case 2:
75de5bb710SKent Overstreet 		if (ir[1] < nd) {
76de5bb710SKent Overstreet 			/* data+data failure. */
77de5bb710SKent Overstreet 			raid6_2data_recov(nd + np, size, ir[0], ir[1], v);
78de5bb710SKent Overstreet 		} else if (ir[0] < nd) {
79de5bb710SKent Overstreet 			/* data + p/q failure */
80de5bb710SKent Overstreet 
81de5bb710SKent Overstreet 			if (ir[1] == nd) /* data + p failure */
82de5bb710SKent Overstreet 				raid6_datap_recov(nd + np, size, ir[0], v);
83de5bb710SKent Overstreet 			else { /* data + q failure */
84de5bb710SKent Overstreet 				raid5_recov(nd + 1, ir[0], size, v);
85de5bb710SKent Overstreet 				raid6_call.gen_syndrome(nd + np, size, v);
86de5bb710SKent Overstreet 			}
87de5bb710SKent Overstreet 		} else {
88de5bb710SKent Overstreet 			raid_gen(nd, np, size, v);
89de5bb710SKent Overstreet 		}
90de5bb710SKent Overstreet 		break;
91de5bb710SKent Overstreet 	default:
92de5bb710SKent Overstreet 		BUG();
93de5bb710SKent Overstreet 	}
94de5bb710SKent Overstreet }
95de5bb710SKent Overstreet 
96de5bb710SKent Overstreet #else
97de5bb710SKent Overstreet 
98de5bb710SKent Overstreet #include <raid/raid.h>
99de5bb710SKent Overstreet 
100cd575ddfSKent Overstreet #endif
101cd575ddfSKent Overstreet 
102cd575ddfSKent Overstreet struct ec_bio {
103cd575ddfSKent Overstreet 	struct bch_dev		*ca;
104cd575ddfSKent Overstreet 	struct ec_stripe_buf	*buf;
105cd575ddfSKent Overstreet 	size_t			idx;
106cd575ddfSKent Overstreet 	struct bio		bio;
107cd575ddfSKent Overstreet };
108cd575ddfSKent Overstreet 
109cd575ddfSKent Overstreet /* Stripes btree keys: */
110*d97de0d0SKent Overstreet 
bch2_stripe_validate(struct bch_fs * c,struct bkey_s_c k,enum bch_validate_flags flags)111*d97de0d0SKent Overstreet int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k,
112cd575ddfSKent Overstreet 			 enum bch_validate_flags flags)
11326609b61SKent Overstreet {
114b65db750SKent Overstreet 	const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
11526609b61SKent Overstreet 	int ret = 0;
116b65db750SKent Overstreet 
117*d97de0d0SKent Overstreet 	bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) ||
118*d97de0d0SKent Overstreet 			 bpos_gt(k.k->p, POS(0, U32_MAX)),
119b65db750SKent Overstreet 			 c, stripe_pos_bad,
1207f4e1d5dSKent Overstreet 			 "stripe at bad pos");
121*d97de0d0SKent Overstreet 
122*d97de0d0SKent Overstreet 	bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s),
123b65db750SKent Overstreet 			 c, stripe_val_size_bad,
124f0ac7df2SKent Overstreet 			 "incorrect value size (%zu < %u)",
125cd575ddfSKent Overstreet 			 bkey_val_u64s(k.k), stripe_val_u64s(s));
126*d97de0d0SKent Overstreet 
127b65db750SKent Overstreet 	ret = bch2_bkey_ptrs_validate(c, k, flags);
128b65db750SKent Overstreet fsck_err:
129cd575ddfSKent Overstreet 	return ret;
130cd575ddfSKent Overstreet }
13126609b61SKent Overstreet 
bch2_stripe_to_text(struct printbuf * out,struct bch_fs * c,struct bkey_s_c k)132cd575ddfSKent Overstreet void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
133cd575ddfSKent Overstreet 			 struct bkey_s_c k)
1342aeed876SKent Overstreet {
1352aeed876SKent Overstreet 	const struct bch_stripe *sp = bkey_s_c_to_stripe(k).v;
1362aeed876SKent Overstreet 	struct bch_stripe s = {};
1372aeed876SKent Overstreet 
1382aeed876SKent Overstreet 	memcpy(&s, sp, min(sizeof(s), bkey_val_bytes(k.k)));
1392aeed876SKent Overstreet 
140cd575ddfSKent Overstreet 	unsigned nr_data = s.nr_blocks - s.nr_redundant;
1419abb6dd7SKent Overstreet 
1422aeed876SKent Overstreet 	prt_printf(out, "algo %u sectors %u blocks %u:%u csum ",
1432aeed876SKent Overstreet 		   s.algorithm,
1440d763863SKent Overstreet 		   le16_to_cpu(s.sectors),
1459abb6dd7SKent Overstreet 		   nr_data,
1469abb6dd7SKent Overstreet 		   s.nr_redundant);
1479abb6dd7SKent Overstreet 	bch2_prt_csum_type(out, s.csum_type);
148cd575ddfSKent Overstreet 	prt_printf(out, " gran %u", 1U << s.csum_granularity_bits);
1492aeed876SKent Overstreet 
1502aeed876SKent Overstreet 	if (s.disk_label) {
1510d763863SKent Overstreet 		prt_str(out, " label");
1522aeed876SKent Overstreet 		bch2_disk_path_to_text(out, c, s.disk_label - 1);
1532aeed876SKent Overstreet 	}
1542aeed876SKent Overstreet 
1552aeed876SKent Overstreet 	for (unsigned i = 0; i < s.nr_blocks; i++) {
1562aeed876SKent Overstreet 		const struct bch_extent_ptr *ptr = sp->ptrs + i;
1572aeed876SKent Overstreet 
1582aeed876SKent Overstreet 		if ((void *) ptr >= bkey_val_end(k))
1592aeed876SKent Overstreet 			break;
1602aeed876SKent Overstreet 
1610d763863SKent Overstreet 		prt_char(out, ' ');
162cd575ddfSKent Overstreet 		bch2_extent_ptr_to_text(out, c, ptr);
163cd575ddfSKent Overstreet 
164f4f78779SKent Overstreet 		if (s.csum_type < BCH_CSUM_NR &&
165f4f78779SKent Overstreet 		    i < nr_data &&
1669cc455d1SKent Overstreet 		    stripe_blockcount_offset(&s, i) < bkey_val_bytes(k.k))
1671f2f92ecSKent Overstreet 			prt_printf(out,  "#%u", stripe_blockcount_get(sp, i));
168f4f78779SKent Overstreet 	}
1699cc455d1SKent Overstreet }
1709cc455d1SKent Overstreet 
17107d7c4daSKent Overstreet /* Triggers: */
17207d7c4daSKent Overstreet 
__mark_stripe_bucket(struct btree_trans * trans,struct bch_dev * ca,struct bkey_s_c_stripe s,unsigned ptr_idx,bool deleting,struct bpos bucket,struct bch_alloc_v4 * a,enum btree_iter_update_trigger_flags flags)173f4f78779SKent Overstreet static int __mark_stripe_bucket(struct btree_trans *trans,
174c4e8db2bSKent Overstreet 				struct bch_dev *ca,
175c4e8db2bSKent Overstreet 				struct bkey_s_c_stripe s,
176c4e8db2bSKent Overstreet 				unsigned ptr_idx, bool deleting,
177c4e8db2bSKent Overstreet 				struct bpos bucket,
178c4e8db2bSKent Overstreet 				struct bch_alloc_v4 *a,
179d9307646SKent Overstreet 				enum btree_iter_update_trigger_flags flags)
180f4f78779SKent Overstreet {
181f4f78779SKent Overstreet 	const struct bch_extent_ptr *ptr = s.v->ptrs + ptr_idx;
18207d7c4daSKent Overstreet 	unsigned nr_data = s.v->nr_blocks - s.v->nr_redundant;
183f4f78779SKent Overstreet 	bool parity = ptr_idx >= nr_data;
184f4f78779SKent Overstreet 	enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe;
185f4f78779SKent Overstreet 	s64 sectors = parity ? le16_to_cpu(s.v->sectors) : 0;
186d9307646SKent Overstreet 	struct printbuf buf = PRINTBUF;
1870acf2169SKent Overstreet 	int ret = 0;
1880acf2169SKent Overstreet 
189d9307646SKent Overstreet 	struct bch_fs *c = trans->c;
1900acf2169SKent Overstreet 	if (deleting)
1910acf2169SKent Overstreet 		sectors = -sectors;
1920acf2169SKent Overstreet 
1930acf2169SKent Overstreet 	if (!deleting) {
194d9307646SKent Overstreet 		if (bch2_trans_inconsistent_on(a->stripe ||
195d9307646SKent Overstreet 					       a->stripe_redundancy, trans,
196d9307646SKent Overstreet 				"bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)\n%s",
197d9307646SKent Overstreet 				bucket.inode, bucket.offset, a->gen,
198d9307646SKent Overstreet 				bch2_data_type_str(a->data_type),
1990acf2169SKent Overstreet 				a->dirty_sectors,
200d9307646SKent Overstreet 				a->stripe, s.k->p.offset,
2010acf2169SKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
2020acf2169SKent Overstreet 			ret = -BCH_ERR_mark_stripe;
2030acf2169SKent Overstreet 			goto err;
2040acf2169SKent Overstreet 		}
205d9307646SKent Overstreet 
206d9307646SKent Overstreet 		if (bch2_trans_inconsistent_on(parity && bch2_bucket_sectors_total(*a), trans,
207d9307646SKent Overstreet 				"bucket %llu:%llu gen %u data type %s dirty_sectors %u cached_sectors %u: data already in parity bucket\n%s",
208d9307646SKent Overstreet 				bucket.inode, bucket.offset, a->gen,
209d9307646SKent Overstreet 				bch2_data_type_str(a->data_type),
2100acf2169SKent Overstreet 				a->dirty_sectors,
2110acf2169SKent Overstreet 				a->cached_sectors,
212d9307646SKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
2130acf2169SKent Overstreet 			ret = -BCH_ERR_mark_stripe;
2140acf2169SKent Overstreet 			goto err;
215d9307646SKent Overstreet 		}
216d9307646SKent Overstreet 	} else {
217d9307646SKent Overstreet 		if (bch2_trans_inconsistent_on(a->stripe != s.k->p.offset ||
218d9307646SKent Overstreet 					       a->stripe_redundancy != s.v->nr_redundant, trans,
219d9307646SKent Overstreet 				"bucket %llu:%llu gen %u: not marked as stripe when deleting stripe (got %u)\n%s",
2200acf2169SKent Overstreet 				bucket.inode, bucket.offset, a->gen,
221d9307646SKent Overstreet 				a->stripe,
2220acf2169SKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
2230acf2169SKent Overstreet 			ret = -BCH_ERR_mark_stripe;
224d9307646SKent Overstreet 			goto err;
225d9307646SKent Overstreet 		}
226d9307646SKent Overstreet 
227d9307646SKent Overstreet 		if (bch2_trans_inconsistent_on(a->data_type != data_type, trans,
228d9307646SKent Overstreet 				"bucket %llu:%llu gen %u data type %s: wrong data type when stripe, should be %s\n%s",
229d9307646SKent Overstreet 				bucket.inode, bucket.offset, a->gen,
230d9307646SKent Overstreet 				bch2_data_type_str(a->data_type),
2310acf2169SKent Overstreet 				bch2_data_type_str(data_type),
2320acf2169SKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
233d9307646SKent Overstreet 			ret = -BCH_ERR_mark_stripe;
2340acf2169SKent Overstreet 			goto err;
2350acf2169SKent Overstreet 		}
2360acf2169SKent Overstreet 
237d9307646SKent Overstreet 		if (bch2_trans_inconsistent_on(parity &&
238d9307646SKent Overstreet 					       (a->dirty_sectors != -sectors ||
239d9307646SKent Overstreet 						a->cached_sectors), trans,
240d9307646SKent Overstreet 				"bucket %llu:%llu gen %u dirty_sectors %u cached_sectors %u: wrong sectors when deleting parity block of stripe\n%s",
241d9307646SKent Overstreet 				bucket.inode, bucket.offset, a->gen,
242f4f78779SKent Overstreet 				a->dirty_sectors,
24370e3e039SKent Overstreet 				a->cached_sectors,
24407d7c4daSKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
2450acf2169SKent Overstreet 			ret = -BCH_ERR_mark_stripe;
246f4f78779SKent Overstreet 			goto err;
247f4f78779SKent Overstreet 		}
24870e3e039SKent Overstreet 	}
249d9307646SKent Overstreet 
250f4f78779SKent Overstreet 	if (sectors) {
2510acf2169SKent Overstreet 		ret = bch2_bucket_ref_update(trans, ca, s.s_c, ptr, sectors, data_type,
2520acf2169SKent Overstreet 					     a->gen, a->data_type, &a->dirty_sectors);
253f4f78779SKent Overstreet 		if (ret)
2540acf2169SKent Overstreet 			goto err;
2550acf2169SKent Overstreet 	}
256f4f78779SKent Overstreet 
2570acf2169SKent Overstreet 	if (!deleting) {
2580acf2169SKent Overstreet 		a->stripe		= s.k->p.offset;
259f4f78779SKent Overstreet 		a->stripe_redundancy	= s.v->nr_redundant;
260d9307646SKent Overstreet 	} else {
261f4f78779SKent Overstreet 		a->stripe		= 0;
262f4f78779SKent Overstreet 		a->stripe_redundancy	= 0;
263f4f78779SKent Overstreet 	}
264f4f78779SKent Overstreet 
265d9307646SKent Overstreet 	alloc_data_type_set(a, data_type);
2669cc455d1SKent Overstreet err:
2675dd8c60eSKent Overstreet 	printbuf_exit(&buf);
268f4f78779SKent Overstreet 	return ret;
269f4f78779SKent Overstreet }
270d9307646SKent Overstreet 
mark_stripe_bucket(struct btree_trans * trans,struct bkey_s_c_stripe s,unsigned ptr_idx,bool deleting,enum btree_iter_update_trigger_flags flags)2719432e90dSKent Overstreet static int mark_stripe_bucket(struct btree_trans *trans,
2721f2f92ecSKent Overstreet 			      struct bkey_s_c_stripe s,
2731f2f92ecSKent Overstreet 			      unsigned ptr_idx, bool deleting,
2741f2f92ecSKent Overstreet 			      enum btree_iter_update_trigger_flags flags)
2751f2f92ecSKent Overstreet {
2761f2f92ecSKent Overstreet 	struct bch_fs *c = trans->c;
2771f2f92ecSKent Overstreet 	const struct bch_extent_ptr *ptr = s.v->ptrs + ptr_idx;
2781f2f92ecSKent Overstreet 	struct printbuf buf = PRINTBUF;
2791f2f92ecSKent Overstreet 	int ret = 0;
2801f2f92ecSKent Overstreet 
2811f2f92ecSKent Overstreet 	struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev);
282f4f78779SKent Overstreet 	if (unlikely(!ca)) {
2839cc455d1SKent Overstreet 		if (ptr->dev != BCH_SB_MEMBER_INVALID && !(flags & BTREE_TRIGGER_overwrite))
2849cc455d1SKent Overstreet 			ret = -BCH_ERR_mark_stripe;
285e0d5bc6aSKent Overstreet 		goto err;
2861f2f92ecSKent Overstreet 	}
2871f2f92ecSKent Overstreet 
2889cc455d1SKent Overstreet 	struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
2899cc455d1SKent Overstreet 
2909cc455d1SKent Overstreet 	if (flags & BTREE_TRIGGER_transactional) {
2919cc455d1SKent Overstreet 		struct bkey_i_alloc_v4 *a =
2929cc455d1SKent Overstreet 			bch2_trans_start_alloc_update(trans, bucket, 0);
2939432e90dSKent Overstreet 		ret = PTR_ERR_OR_ZERO(a) ?:
2949432e90dSKent Overstreet 			__mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags);
2959432e90dSKent Overstreet 	}
2969432e90dSKent Overstreet 
2979432e90dSKent Overstreet 	if (flags & BTREE_TRIGGER_gc) {
2989432e90dSKent Overstreet 		percpu_down_read(&c->mark_lock);
2999432e90dSKent Overstreet 		struct bucket *g = gc_bucket(ca, bucket.offset);
3009cc455d1SKent Overstreet 		if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n  %s",
3010acf2169SKent Overstreet 					    ptr->dev,
3021f2f92ecSKent Overstreet 					    (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
3030acf2169SKent Overstreet 			ret = -BCH_ERR_mark_stripe;
3040acf2169SKent Overstreet 			goto err_unlock;
3059432e90dSKent Overstreet 		}
306f4f78779SKent Overstreet 
307fb23d57aSKent Overstreet 		bucket_lock(g);
308fb23d57aSKent Overstreet 		struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old;
309f4f78779SKent Overstreet 		ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags);
3101f2f92ecSKent Overstreet 		alloc_to_bucket(g, new);
3111f2f92ecSKent Overstreet 		bucket_unlock(g);
3129432e90dSKent Overstreet err_unlock:
3131f2f92ecSKent Overstreet 		percpu_up_read(&c->mark_lock);
3149cc455d1SKent Overstreet 		if (!ret)
3159cc455d1SKent Overstreet 			ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
3169cc455d1SKent Overstreet 	}
3179cc455d1SKent Overstreet err:
3189cc455d1SKent Overstreet 	bch2_dev_put(ca);
3199cc455d1SKent Overstreet 	printbuf_exit(&buf);
3209cc455d1SKent Overstreet 	return ret;
3219cc455d1SKent Overstreet }
3229cc455d1SKent Overstreet 
mark_stripe_buckets(struct btree_trans * trans,struct bkey_s_c old,struct bkey_s_c new,enum btree_iter_update_trigger_flags flags)3239cc455d1SKent Overstreet static int mark_stripe_buckets(struct btree_trans *trans,
3249cc455d1SKent Overstreet 			       struct bkey_s_c old, struct bkey_s_c new,
3259cc455d1SKent Overstreet 			       enum btree_iter_update_trigger_flags flags)
3269cc455d1SKent Overstreet {
3279cc455d1SKent Overstreet 	const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
3289cc455d1SKent Overstreet 		? bkey_s_c_to_stripe(old).v : NULL;
3299cc455d1SKent Overstreet 	const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
3309cc455d1SKent Overstreet 		? bkey_s_c_to_stripe(new).v : NULL;
3319cc455d1SKent Overstreet 
3329cc455d1SKent Overstreet 	BUG_ON(old_s && new_s && old_s->nr_blocks != new_s->nr_blocks);
3339cc455d1SKent Overstreet 
3349cc455d1SKent Overstreet 	unsigned nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks;
3359cc455d1SKent Overstreet 
3369cc455d1SKent Overstreet 	for (unsigned i = 0; i < nr_blocks; i++) {
3379cc455d1SKent Overstreet 		if (new_s && old_s &&
3389cc455d1SKent Overstreet 		    !memcmp(&new_s->ptrs[i],
3399cc455d1SKent Overstreet 			    &old_s->ptrs[i],
3409cc455d1SKent Overstreet 			    sizeof(new_s->ptrs[i])))
3419cc455d1SKent Overstreet 			continue;
3429cc455d1SKent Overstreet 
3439cc455d1SKent Overstreet 		if (new_s) {
3449cc455d1SKent Overstreet 			int ret = mark_stripe_bucket(trans,
3459cc455d1SKent Overstreet 					bkey_s_c_to_stripe(new), i, false, flags);
3469cc455d1SKent Overstreet 			if (ret)
3479cc455d1SKent Overstreet 				return ret;
3489cc455d1SKent Overstreet 		}
3499cc455d1SKent Overstreet 
3509cc455d1SKent Overstreet 		if (old_s) {
3519cc455d1SKent Overstreet 			int ret = mark_stripe_bucket(trans,
3529cc455d1SKent Overstreet 					bkey_s_c_to_stripe(old), i, true, flags);
3539cc455d1SKent Overstreet 			if (ret)
354e4eb3e5aSKent Overstreet 				return ret;
355f40d13f9SKent Overstreet 		}
356f4f78779SKent Overstreet 	}
3575dd8c60eSKent Overstreet 
358f4f78779SKent Overstreet 	return 0;
359f4f78779SKent Overstreet }
360f4f78779SKent Overstreet 
stripe_to_mem(struct stripe * m,const struct bch_stripe * s)361f4f78779SKent Overstreet static inline void stripe_to_mem(struct stripe *m, const struct bch_stripe *s)
362f4f78779SKent Overstreet {
363f4f78779SKent Overstreet 	m->sectors	= le16_to_cpu(s->sectors);
364f4f78779SKent Overstreet 	m->algorithm	= s->algorithm;
365f4f78779SKent Overstreet 	m->nr_blocks	= s->nr_blocks;
366f4f78779SKent Overstreet 	m->nr_redundant	= s->nr_redundant;
367f40d13f9SKent Overstreet 	m->disk_label	= s->disk_label;
368f40d13f9SKent Overstreet 	m->blocks_nonempty = 0;
369f40d13f9SKent Overstreet 
370fb23d57aSKent Overstreet 	for (unsigned i = 0; i < s->nr_blocks; i++)
371fb23d57aSKent Overstreet 		m->blocks_nonempty += !!stripe_blockcount_get(s, i);
372fb23d57aSKent Overstreet }
373fb23d57aSKent Overstreet 
bch2_trigger_stripe(struct btree_trans * trans,enum btree_id btree,unsigned level,struct bkey_s_c old,struct bkey_s _new,enum btree_iter_update_trigger_flags flags)374fb23d57aSKent Overstreet int bch2_trigger_stripe(struct btree_trans *trans,
375fb23d57aSKent Overstreet 			enum btree_id btree, unsigned level,
376e4eb3e5aSKent Overstreet 			struct bkey_s_c old, struct bkey_s _new,
377e4eb3e5aSKent Overstreet 			enum btree_iter_update_trigger_flags flags)
378e4eb3e5aSKent Overstreet {
379e4eb3e5aSKent Overstreet 	struct bkey_s_c new = _new.s_c;
380e4eb3e5aSKent Overstreet 	struct bch_fs *c = trans->c;
381e4eb3e5aSKent Overstreet 	u64 idx = new.k->p.offset;
382e4eb3e5aSKent Overstreet 	const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
383e4eb3e5aSKent Overstreet 		? bkey_s_c_to_stripe(old).v : NULL;
384e4eb3e5aSKent Overstreet 	const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
385f4f78779SKent Overstreet 		? bkey_s_c_to_stripe(new).v : NULL;
386fb23d57aSKent Overstreet 
387fb23d57aSKent Overstreet 	if (unlikely(flags & BTREE_TRIGGER_check_repair))
388fb23d57aSKent Overstreet 		return bch2_check_fix_ptrs(trans, btree, level, _new.s_c, flags);
389fb23d57aSKent Overstreet 
390fb23d57aSKent Overstreet 	BUG_ON(new_s && old_s &&
391fb23d57aSKent Overstreet 	       (new_s->nr_blocks	!= old_s->nr_blocks ||
392fb23d57aSKent Overstreet 		new_s->nr_redundant	!= old_s->nr_redundant));
393fb23d57aSKent Overstreet 
394fb23d57aSKent Overstreet 
395fb23d57aSKent Overstreet 	if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
396fb23d57aSKent Overstreet 		/*
397fb23d57aSKent Overstreet 		 * If the pointers aren't changing, we don't need to do anything:
398fb23d57aSKent Overstreet 		 */
399fb23d57aSKent Overstreet 		if (new_s && old_s &&
400fb23d57aSKent Overstreet 		    new_s->nr_blocks	== old_s->nr_blocks &&
401fb23d57aSKent Overstreet 		    new_s->nr_redundant	== old_s->nr_redundant &&
402fb23d57aSKent Overstreet 		    !memcmp(old_s->ptrs, new_s->ptrs,
403fb23d57aSKent Overstreet 			    new_s->nr_blocks * sizeof(struct bch_extent_ptr)))
404fb23d57aSKent Overstreet 			return 0;
405fb23d57aSKent Overstreet 
406fb23d57aSKent Overstreet 		struct gc_stripe *gc = NULL;
407fb23d57aSKent Overstreet 		if (flags & BTREE_TRIGGER_gc) {
408fb23d57aSKent Overstreet 			gc = genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL);
409fb23d57aSKent Overstreet 			if (!gc) {
410fb23d57aSKent Overstreet 				bch_err(c, "error allocating memory for gc_stripes, idx %llu", idx);
411fb23d57aSKent Overstreet 				return -BCH_ERR_ENOMEM_mark_stripe;
412fb23d57aSKent Overstreet 			}
413fb23d57aSKent Overstreet 
414e4eb3e5aSKent Overstreet 			/*
415e4eb3e5aSKent Overstreet 			 * This will be wrong when we bring back runtime gc: we should
4161d16c605SKent Overstreet 			 * be unmarking the old key and then marking the new key
417e4eb3e5aSKent Overstreet 			 *
4181d16c605SKent Overstreet 			 * Also: when we bring back runtime gc, locking
4191d16c605SKent Overstreet 			 */
4201d16c605SKent Overstreet 			gc->alive	= true;
4211d16c605SKent Overstreet 			gc->sectors	= le16_to_cpu(new_s->sectors);
422fb23d57aSKent Overstreet 			gc->nr_blocks	= new_s->nr_blocks;
423e4eb3e5aSKent Overstreet 			gc->nr_redundant	= new_s->nr_redundant;
424e4eb3e5aSKent Overstreet 
425fb23d57aSKent Overstreet 			for (unsigned i = 0; i < new_s->nr_blocks; i++)
426fb23d57aSKent Overstreet 				gc->ptrs[i] = new_s->ptrs[i];
427fb23d57aSKent Overstreet 
428e4eb3e5aSKent Overstreet 			/*
429e4eb3e5aSKent Overstreet 			 * gc recalculates this field from stripe ptr
430e4eb3e5aSKent Overstreet 			 * references:
4311d16c605SKent Overstreet 			 */
432e4eb3e5aSKent Overstreet 			memset(gc->block_sectors, 0, sizeof(gc->block_sectors));
4331d16c605SKent Overstreet 		}
4341d16c605SKent Overstreet 
4351d16c605SKent Overstreet 		if (new_s) {
4361d16c605SKent Overstreet 			s64 sectors = (u64) le16_to_cpu(new_s->sectors) * new_s->nr_redundant;
437fb23d57aSKent Overstreet 
438e4eb3e5aSKent Overstreet 			struct disk_accounting_pos acc = {
439e4eb3e5aSKent Overstreet 				.type = BCH_DISK_ACCOUNTING_replicas,
440e4eb3e5aSKent Overstreet 			};
441e4eb3e5aSKent Overstreet 			bch2_bkey_to_replicas(&acc.replicas, new);
4429cc455d1SKent Overstreet 			int ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, gc);
443e4eb3e5aSKent Overstreet 			if (ret)
444e4eb3e5aSKent Overstreet 				return ret;
445e4eb3e5aSKent Overstreet 
446e4eb3e5aSKent Overstreet 			if (gc)
4475dd8c60eSKent Overstreet 				memcpy(&gc->r.e, &acc.replicas, replicas_entry_bytes(&acc.replicas));
448f4f78779SKent Overstreet 		}
449f4f78779SKent Overstreet 
450f4f78779SKent Overstreet 		if (old_s) {
451f4f78779SKent Overstreet 			s64 sectors = -((s64) le16_to_cpu(old_s->sectors)) * old_s->nr_redundant;
452f4f78779SKent Overstreet 
453f4f78779SKent Overstreet 			struct disk_accounting_pos acc = {
454f4f78779SKent Overstreet 				.type = BCH_DISK_ACCOUNTING_replicas,
455f4f78779SKent Overstreet 			};
456f4f78779SKent Overstreet 			bch2_bkey_to_replicas(&acc.replicas, old);
457f4f78779SKent Overstreet 			int ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, gc);
458f4f78779SKent Overstreet 			if (ret)
459f4f78779SKent Overstreet 				return ret;
460f4f78779SKent Overstreet 		}
461f4f78779SKent Overstreet 
462f4f78779SKent Overstreet 		int ret = mark_stripe_buckets(trans, old, new, flags);
463f4f78779SKent Overstreet 		if (ret)
464f4f78779SKent Overstreet 			return ret;
465f4f78779SKent Overstreet 	}
466f4f78779SKent Overstreet 
467f4f78779SKent Overstreet 	if (flags & BTREE_TRIGGER_atomic) {
468f4f78779SKent Overstreet 		struct stripe *m = genradix_ptr(&c->stripes, idx);
469f4f78779SKent Overstreet 
470f4f78779SKent Overstreet 		if (!m) {
471f4f78779SKent Overstreet 			struct printbuf buf1 = PRINTBUF;
472f4f78779SKent Overstreet 			struct printbuf buf2 = PRINTBUF;
473f4f78779SKent Overstreet 
474f4f78779SKent Overstreet 			bch2_bkey_val_to_text(&buf1, c, old);
475f4f78779SKent Overstreet 			bch2_bkey_val_to_text(&buf2, c, new);
476e4eb3e5aSKent Overstreet 			bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n"
477f4f78779SKent Overstreet 					    "old %s\n"
478f4f78779SKent Overstreet 					    "new %s", idx, buf1.buf, buf2.buf);
479f4f78779SKent Overstreet 			printbuf_exit(&buf2);
480f4f78779SKent Overstreet 			printbuf_exit(&buf1);
481f4f78779SKent Overstreet 			bch2_inconsistent_error(c);
482f4f78779SKent Overstreet 			return -1;
483f4f78779SKent Overstreet 		}
484e4eb3e5aSKent Overstreet 
485e4eb3e5aSKent Overstreet 		if (!new_s) {
486f4f78779SKent Overstreet 			bch2_stripes_heap_del(c, m, idx);
487f4f78779SKent Overstreet 
488f4f78779SKent Overstreet 			memset(m, 0, sizeof(*m));
4892a3731e3SKent Overstreet 		} else {
490bf0fdb4dSKent Overstreet 			stripe_to_mem(m, new_s);
491bf0fdb4dSKent Overstreet 
492cd575ddfSKent Overstreet 			if (!old_s)
4932a3731e3SKent Overstreet 				bch2_stripes_heap_insert(c, m, idx);
4942a3731e3SKent Overstreet 			else
495cd575ddfSKent Overstreet 				bch2_stripes_heap_update(c, m, idx);
4962a3731e3SKent Overstreet 		}
4972a3731e3SKent Overstreet 	}
498b3b66e30SKent Overstreet 
499bf0fdb4dSKent Overstreet 	return 0;
500bf0fdb4dSKent Overstreet }
501bf0fdb4dSKent Overstreet 
502bf0fdb4dSKent Overstreet /* returns blocknr in stripe that we matched: */
bkey_matches_stripe(struct bch_stripe * s,struct bkey_s_c k,unsigned * block)503cd575ddfSKent Overstreet static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s,
504bf0fdb4dSKent Overstreet 						struct bkey_s_c k, unsigned *block)
505cd575ddfSKent Overstreet {
506cd575ddfSKent Overstreet 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
50742c7d748SKent Overstreet 	unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
50842c7d748SKent Overstreet 
50999aaf570SKent Overstreet 	bkey_for_each_ptr(ptrs, ptr)
51099aaf570SKent Overstreet 		for (i = 0; i < nr_data; i++)
51199aaf570SKent Overstreet 			if (__bch2_ptr_matches_stripe(&s->ptrs[i], ptr,
51242c7d748SKent Overstreet 						      le16_to_cpu(s->sectors))) {
51342c7d748SKent Overstreet 				*block = i;
51442c7d748SKent Overstreet 				return ptr;
51542c7d748SKent Overstreet 			}
51642c7d748SKent Overstreet 
51742c7d748SKent Overstreet 	return NULL;
51842c7d748SKent Overstreet }
51942c7d748SKent Overstreet 
extent_has_stripe_ptr(struct bkey_s_c k,u64 idx)52099aaf570SKent Overstreet static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
52199aaf570SKent Overstreet {
52299aaf570SKent Overstreet 	switch (k.k->type) {
52399aaf570SKent Overstreet 	case KEY_TYPE_extent: {
52442c7d748SKent Overstreet 		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
52542c7d748SKent Overstreet 		const union bch_extent_entry *entry;
52642c7d748SKent Overstreet 
52781d8599eSKent Overstreet 		extent_for_each_entry(e, entry)
52881d8599eSKent Overstreet 			if (extent_entry_type(entry) ==
5292a3731e3SKent Overstreet 			    BCH_EXTENT_ENTRY_stripe_ptr &&
53081d8599eSKent Overstreet 			    entry->stripe_ptr.idx == idx)
531bf5a261cSKent Overstreet 				return true;
532bf5a261cSKent Overstreet 
53381d8599eSKent Overstreet 		break;
53481d8599eSKent Overstreet 	}
535bf5a261cSKent Overstreet 	}
536cb6fc943SKent Overstreet 
5372a3731e3SKent Overstreet 	return false;
53881d8599eSKent Overstreet }
53981d8599eSKent Overstreet 
540bf5a261cSKent Overstreet /* Stripe bufs: */
54181d8599eSKent Overstreet 
ec_stripe_buf_exit(struct ec_stripe_buf * buf)5425be6a274SKent Overstreet static void ec_stripe_buf_exit(struct ec_stripe_buf *buf)
5432a3731e3SKent Overstreet {
5442a3731e3SKent Overstreet 	if (buf->key.k.type == KEY_TYPE_stripe) {
54581d8599eSKent Overstreet 		struct bkey_i_stripe *s = bkey_i_to_stripe(&buf->key);
546bf5a261cSKent Overstreet 		unsigned i;
5472a3731e3SKent Overstreet 
5482a3731e3SKent Overstreet 		for (i = 0; i < s->v.nr_blocks; i++) {
54981d8599eSKent Overstreet 			kvfree(buf->data[i]);
55081d8599eSKent Overstreet 			buf->data[i] = NULL;
5512a3731e3SKent Overstreet 		}
55281d8599eSKent Overstreet 	}
5532a3731e3SKent Overstreet }
5542a3731e3SKent Overstreet 
5552a3731e3SKent Overstreet /* XXX: this is a non-mempoolified memory allocation: */
ec_stripe_buf_init(struct ec_stripe_buf * buf,unsigned offset,unsigned size)5562a3731e3SKent Overstreet static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
5572a3731e3SKent Overstreet 			      unsigned offset, unsigned size)
5582a3731e3SKent Overstreet {
5592a3731e3SKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
5602a3731e3SKent Overstreet 	unsigned csum_granularity = 1U << v->csum_granularity_bits;
5612a3731e3SKent Overstreet 	unsigned end = offset + size;
562bf5a261cSKent Overstreet 	unsigned i;
563cb6fc943SKent Overstreet 
5642a3731e3SKent Overstreet 	BUG_ON(end > le16_to_cpu(v->sectors));
56581d8599eSKent Overstreet 
56681d8599eSKent Overstreet 	offset	= round_down(offset, csum_granularity);
56781d8599eSKent Overstreet 	end	= min_t(unsigned, le16_to_cpu(v->sectors),
56881d8599eSKent Overstreet 			round_up(end, csum_granularity));
56981d8599eSKent Overstreet 
5702a3731e3SKent Overstreet 	buf->offset	= offset;
5715be6a274SKent Overstreet 	buf->size	= end - offset;
57281d8599eSKent Overstreet 
57381d8599eSKent Overstreet 	memset(buf->valid, 0xFF, sizeof(buf->valid));
574cd575ddfSKent Overstreet 
575cd575ddfSKent Overstreet 	for (i = 0; i < v->nr_blocks; i++) {
5762a3731e3SKent Overstreet 		buf->data[i] = kvmalloc(buf->size << 9, GFP_KERNEL);
5772a3731e3SKent Overstreet 		if (!buf->data[i])
578cd575ddfSKent Overstreet 			goto err;
579bf5a261cSKent Overstreet 	}
580cd575ddfSKent Overstreet 
5812a3731e3SKent Overstreet 	return 0;
5822a3731e3SKent Overstreet err:
583cd575ddfSKent Overstreet 	ec_stripe_buf_exit(buf);
5842a3731e3SKent Overstreet 	return -BCH_ERR_ENOMEM_stripe_buf;
5852a3731e3SKent Overstreet }
5862a3731e3SKent Overstreet 
5872a3731e3SKent Overstreet /* Checksumming: */
5882a3731e3SKent Overstreet 
ec_block_checksum(struct ec_stripe_buf * buf,unsigned block,unsigned offset)5892a3731e3SKent Overstreet static struct bch_csum ec_block_checksum(struct ec_stripe_buf *buf,
5902a3731e3SKent Overstreet 					 unsigned block, unsigned offset)
5912a3731e3SKent Overstreet {
5922a3731e3SKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
5932a3731e3SKent Overstreet 	unsigned csum_granularity = 1 << v->csum_granularity_bits;
5942a3731e3SKent Overstreet 	unsigned end = buf->offset + buf->size;
5952a3731e3SKent Overstreet 	unsigned len = min(csum_granularity, end - offset);
5962a3731e3SKent Overstreet 
5972a3731e3SKent Overstreet 	BUG_ON(offset >= end);
598bf5a261cSKent Overstreet 	BUG_ON(offset <  buf->offset);
5992a3731e3SKent Overstreet 	BUG_ON(offset & (csum_granularity - 1));
6002a3731e3SKent Overstreet 	BUG_ON(offset + len != le16_to_cpu(v->sectors) &&
6012a3731e3SKent Overstreet 	       (len & (csum_granularity - 1)));
602cd575ddfSKent Overstreet 
603cd575ddfSKent Overstreet 	return bch2_checksum(NULL, v->csum_type,
604cd575ddfSKent Overstreet 			     null_nonce(),
605cd575ddfSKent Overstreet 			     buf->data[block] + ((offset - buf->offset) << 9),
606cd575ddfSKent Overstreet 			     len << 9);
6072a3731e3SKent Overstreet }
6082a3731e3SKent Overstreet 
ec_generate_checksums(struct ec_stripe_buf * buf)6092a3731e3SKent Overstreet static void ec_generate_checksums(struct ec_stripe_buf *buf)
6102a3731e3SKent Overstreet {
611cd575ddfSKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
612cd575ddfSKent Overstreet 	unsigned i, j, csums_per_device = stripe_csums_per_device(v);
613cd575ddfSKent Overstreet 
614cd575ddfSKent Overstreet 	if (!v->csum_type)
615bf5a261cSKent Overstreet 		return;
616cd575ddfSKent Overstreet 
617cd575ddfSKent Overstreet 	BUG_ON(buf->offset);
618cd575ddfSKent Overstreet 	BUG_ON(buf->size != le16_to_cpu(v->sectors));
6192a3731e3SKent Overstreet 
620cd575ddfSKent Overstreet 	for (i = 0; i < v->nr_blocks; i++)
621cd575ddfSKent Overstreet 		for (j = 0; j < csums_per_device; j++)
622cd575ddfSKent Overstreet 			stripe_csum_set(v, i, j,
623cd575ddfSKent Overstreet 				ec_block_checksum(buf, i, j << v->csum_granularity_bits));
624cd575ddfSKent Overstreet }
625cd575ddfSKent Overstreet 
ec_validate_checksums(struct bch_fs * c,struct ec_stripe_buf * buf)626cd575ddfSKent Overstreet static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
627cd575ddfSKent Overstreet {
628cd575ddfSKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
629cd575ddfSKent Overstreet 	unsigned csum_granularity = 1 << v->csum_granularity_bits;
630cd575ddfSKent Overstreet 	unsigned i;
631cd575ddfSKent Overstreet 
6322a3731e3SKent Overstreet 	if (!v->csum_type)
6332a3731e3SKent Overstreet 		return;
634cd575ddfSKent Overstreet 
6352a3731e3SKent Overstreet 	for (i = 0; i < v->nr_blocks; i++) {
636c387d844SKent Overstreet 		unsigned offset = buf->offset;
637c387d844SKent Overstreet 		unsigned end = buf->offset + buf->size;
638c7046ed0SKent Overstreet 
6397f4e1d5dSKent Overstreet 		if (!test_bit(i, buf->valid))
6409abb6dd7SKent Overstreet 			continue;
6419abb6dd7SKent Overstreet 
642c7046ed0SKent Overstreet 		while (offset < end) {
643c7046ed0SKent Overstreet 			unsigned j = offset >> v->csum_granularity_bits;
644c7046ed0SKent Overstreet 			unsigned len = min(csum_granularity, end - offset);
645c7046ed0SKent Overstreet 			struct bch_csum want = stripe_csum_get(v, i, j);
6467f4e1d5dSKent Overstreet 			struct bch_csum got = ec_block_checksum(buf, i, offset);
647c7046ed0SKent Overstreet 
648c387d844SKent Overstreet 			if (bch2_crc_cmp(want, got)) {
649c387d844SKent Overstreet 				struct bch_dev *ca = bch2_dev_tryget(c, v->ptrs[i].dev);
650c387d844SKent Overstreet 				if (ca) {
651cd575ddfSKent Overstreet 					struct printbuf err = PRINTBUF;
652cd575ddfSKent Overstreet 
653cd575ddfSKent Overstreet 					prt_str(&err, "stripe ");
654cd575ddfSKent Overstreet 					bch2_csum_err_msg(&err, v->csum_type, want, got);
655cd575ddfSKent Overstreet 					prt_printf(&err, "  for %ps at %u of\n  ", (void *) _RET_IP_, i);
656cd575ddfSKent Overstreet 					bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
657cd575ddfSKent Overstreet 					bch_err_ratelimited(ca, "%s", err.buf);
658cd575ddfSKent Overstreet 					printbuf_exit(&err);
659cd575ddfSKent Overstreet 
660cd575ddfSKent Overstreet 					bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
661cd575ddfSKent Overstreet 				}
662cd575ddfSKent Overstreet 
663bf5a261cSKent Overstreet 				clear_bit(i, buf->valid);
664cd575ddfSKent Overstreet 				break;
665cd575ddfSKent Overstreet 			}
666cd575ddfSKent Overstreet 
667de5bb710SKent Overstreet 			offset += len;
668cd575ddfSKent Overstreet 		}
669cd575ddfSKent Overstreet 	}
670cd575ddfSKent Overstreet }
671cd575ddfSKent Overstreet 
672bf5a261cSKent Overstreet /* Erasure coding: */
673bf5a261cSKent Overstreet 
ec_generate_ec(struct ec_stripe_buf * buf)674bf5a261cSKent Overstreet static void ec_generate_ec(struct ec_stripe_buf *buf)
675cd575ddfSKent Overstreet {
676cd575ddfSKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
677cd575ddfSKent Overstreet 	unsigned nr_data = v->nr_blocks - v->nr_redundant;
678cd575ddfSKent Overstreet 	unsigned bytes = le16_to_cpu(v->sectors) << 9;
679bf5a261cSKent Overstreet 
680ffb7c3d3SKent Overstreet 	raid_gen(nr_data, v->nr_redundant, bytes, buf->data);
681cd575ddfSKent Overstreet }
682cd575ddfSKent Overstreet 
ec_nr_failed(struct ec_stripe_buf * buf)683cd575ddfSKent Overstreet static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
684cd575ddfSKent Overstreet {
6850fefe8d8SKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
686cd575ddfSKent Overstreet 
687cd575ddfSKent Overstreet 	return v->nr_blocks - bitmap_weight(buf->valid, v->nr_blocks);
688cd575ddfSKent Overstreet }
689cd575ddfSKent Overstreet 
ec_do_recov(struct bch_fs * c,struct ec_stripe_buf * buf)690cd575ddfSKent Overstreet static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
691cd575ddfSKent Overstreet {
692cd575ddfSKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
693cd575ddfSKent Overstreet 	unsigned i, failed[BCH_BKEY_PTRS_MAX], nr_failed = 0;
694de5bb710SKent Overstreet 	unsigned nr_data = v->nr_blocks - v->nr_redundant;
695cd575ddfSKent Overstreet 	unsigned bytes = buf->size << 9;
696cd575ddfSKent Overstreet 
697cd575ddfSKent Overstreet 	if (ec_nr_failed(buf) > v->nr_redundant) {
698cd575ddfSKent Overstreet 		bch_err_ratelimited(c,
699cd575ddfSKent Overstreet 			"error doing reconstruct read: unable to read enough blocks");
700cd575ddfSKent Overstreet 		return -1;
701cd575ddfSKent Overstreet 	}
702cd575ddfSKent Overstreet 
703bf5a261cSKent Overstreet 	for (i = 0; i < nr_data; i++)
7047f4e1d5dSKent Overstreet 		if (!test_bit(i, buf->valid))
705cd575ddfSKent Overstreet 			failed[nr_failed++] = i;
706cd575ddfSKent Overstreet 
707cd575ddfSKent Overstreet 	raid_rec(nr_failed, failed, nr_data, v->nr_redundant, bytes, buf->data);
70894119eebSKent Overstreet 	return 0;
70994119eebSKent Overstreet }
71094119eebSKent Overstreet 
71194119eebSKent Overstreet /* IO: */
71294119eebSKent Overstreet 
ec_block_endio(struct bio * bio)713306d40dfSKent Overstreet static void ec_block_endio(struct bio *bio)
71463b214e7SKent Overstreet {
715cd575ddfSKent Overstreet 	struct ec_bio *ec_bio = container_of(bio, struct ec_bio, bio);
716cd575ddfSKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&ec_bio->buf->key)->v;
7179432e90dSKent Overstreet 	struct bch_extent_ptr *ptr = &v->ptrs[ec_bio->idx];
7189432e90dSKent Overstreet 	struct bch_dev *ca = ec_bio->ca;
7197f4e1d5dSKent Overstreet 	struct closure *cl = bio->bi_private;
7209432e90dSKent Overstreet 
7219432e90dSKent Overstreet 	if (bch2_dev_io_err_on(bio->bi_status, ca,
7229432e90dSKent Overstreet 			       bio_data_dir(bio)
7237f4e1d5dSKent Overstreet 			       ? BCH_MEMBER_ERROR_write
7247f4e1d5dSKent Overstreet 			       : BCH_MEMBER_ERROR_read,
7257f4e1d5dSKent Overstreet 			       "erasure coding %s error: %s",
726cd575ddfSKent Overstreet 			       bio_data_dir(bio) ? "write" : "read",
727cd575ddfSKent Overstreet 			       bch2_blk_status_to_str(bio->bi_status)))
728cd575ddfSKent Overstreet 		clear_bit(ec_bio->idx, ec_bio->buf->valid);
729cd575ddfSKent Overstreet 
730cd575ddfSKent Overstreet 	int stale = dev_ptr_stale(ca, ptr);
731cd575ddfSKent Overstreet 	if (stale) {
73273bd774dSKent Overstreet 		bch_err_ratelimited(ca->fs,
733cd575ddfSKent Overstreet 				    "error %s stripe: stale/invalid pointer (%i) after io",
734bf5a261cSKent Overstreet 				    bio_data_dir(bio) == READ ? "reading from" : "writing to",
735cd575ddfSKent Overstreet 				    stale);
736cd575ddfSKent Overstreet 		clear_bit(ec_bio->idx, ec_bio->buf->valid);
737bf5a261cSKent Overstreet 	}
738af4d05c4SKent Overstreet 
739af4d05c4SKent Overstreet 	bio_put(&ec_bio->bio);
74073bd774dSKent Overstreet 	percpu_ref_put(&ca->io_ref);
741cd575ddfSKent Overstreet 	closure_put(cl);
7422c91ab72SKent Overstreet }
7432c91ab72SKent Overstreet 
ec_block_io(struct bch_fs * c,struct ec_stripe_buf * buf,blk_opf_t opf,unsigned idx,struct closure * cl)7442c91ab72SKent Overstreet static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
7452c91ab72SKent Overstreet 			blk_opf_t opf, unsigned idx, struct closure *cl)
7462c91ab72SKent Overstreet {
7472c91ab72SKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
7489432e90dSKent Overstreet 	unsigned offset = 0, bytes = buf->size << 9;
7499432e90dSKent Overstreet 	struct bch_extent_ptr *ptr = &v->ptrs[idx];
7502a3731e3SKent Overstreet 	enum bch_data_type data_type = idx < v->nr_blocks - v->nr_redundant
7519432e90dSKent Overstreet 		? BCH_DATA_user
7529432e90dSKent Overstreet 		: BCH_DATA_parity;
7539432e90dSKent Overstreet 	int rw = op_is_write(opf);
7542a3731e3SKent Overstreet 
7552a3731e3SKent Overstreet 	struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw);
7562a3731e3SKent Overstreet 	if (!ca) {
7572a3731e3SKent Overstreet 		clear_bit(idx, buf->valid);
758cd575ddfSKent Overstreet 		return;
759af4d05c4SKent Overstreet 	}
760af4d05c4SKent Overstreet 
761cd575ddfSKent Overstreet 	int stale = dev_ptr_stale(ca, ptr);
762cd575ddfSKent Overstreet 	if (stale) {
763cd575ddfSKent Overstreet 		bch_err_ratelimited(c,
764cd575ddfSKent Overstreet 				    "error %s stripe: stale pointer (%i)",
765cd575ddfSKent Overstreet 				    rw == READ ? "reading from" : "writing to",
766cd575ddfSKent Overstreet 				    stale);
767cd575ddfSKent Overstreet 		clear_bit(idx, buf->valid);
768cd575ddfSKent Overstreet 		return;
769cd575ddfSKent Overstreet 	}
77073bd774dSKent Overstreet 
771cd575ddfSKent Overstreet 
772cd575ddfSKent Overstreet 	this_cpu_add(ca->io_done->sectors[rw][data_type], buf->size);
773cd575ddfSKent Overstreet 
774cd575ddfSKent Overstreet 	while (offset < bytes) {
775cd575ddfSKent Overstreet 		unsigned nr_iovecs = min_t(size_t, BIO_MAX_VECS,
776cd575ddfSKent Overstreet 					   DIV_ROUND_UP(bytes, PAGE_SIZE));
777cd575ddfSKent Overstreet 		unsigned b = min_t(size_t, bytes - offset,
778cd575ddfSKent Overstreet 				   nr_iovecs << PAGE_SHIFT);
779cd575ddfSKent Overstreet 		struct ec_bio *ec_bio;
780cd575ddfSKent Overstreet 
781cd575ddfSKent Overstreet 		ec_bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev,
782cd575ddfSKent Overstreet 						       nr_iovecs,
783885678f6SKent Overstreet 						       opf,
784cd575ddfSKent Overstreet 						       GFP_KERNEL,
785cd575ddfSKent Overstreet 						       &c->ec_bioset),
786cd575ddfSKent Overstreet 				      struct ec_bio, bio);
787cd575ddfSKent Overstreet 
788cd575ddfSKent Overstreet 		ec_bio->ca			= ca;
789cd575ddfSKent Overstreet 		ec_bio->buf			= buf;
790cd575ddfSKent Overstreet 		ec_bio->idx			= idx;
791cd575ddfSKent Overstreet 
792cd575ddfSKent Overstreet 		ec_bio->bio.bi_iter.bi_sector	= ptr->offset + buf->offset + (offset >> 9);
793cd575ddfSKent Overstreet 		ec_bio->bio.bi_end_io		= ec_block_endio;
794cd575ddfSKent Overstreet 		ec_bio->bio.bi_private		= cl;
795cd575ddfSKent Overstreet 
79670ded998SKent Overstreet 		bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
79770ded998SKent Overstreet 
798cd575ddfSKent Overstreet 		closure_get(cl);
79967e0dd8fSKent Overstreet 		percpu_ref_get(&ca->io_ref);
8002a3731e3SKent Overstreet 
8012a3731e3SKent Overstreet 		submit_bio(&ec_bio->bio);
8022a3731e3SKent Overstreet 
803bcb79a51SKent Overstreet 		offset += b;
8045dd8c60eSKent Overstreet 	}
8052a3731e3SKent Overstreet 
8062a3731e3SKent Overstreet 	percpu_ref_put(&ca->io_ref);
8072a3731e3SKent Overstreet }
8082a3731e3SKent Overstreet 
get_stripe_key_trans(struct btree_trans * trans,u64 idx,struct ec_stripe_buf * stripe)8092a3731e3SKent Overstreet static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
8102a3731e3SKent Overstreet 				struct ec_stripe_buf *stripe)
8112a3731e3SKent Overstreet {
812bf5a261cSKent Overstreet 	struct btree_iter iter;
8132a3731e3SKent Overstreet 	struct bkey_s_c k;
81470ded998SKent Overstreet 	int ret;
8152a3731e3SKent Overstreet 
8162a3731e3SKent Overstreet 	k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes,
8172a3731e3SKent Overstreet 			       POS(0, idx), BTREE_ITER_slots);
8182a3731e3SKent Overstreet 	ret = bkey_err(k);
819aa982665SKent Overstreet 	if (ret)
8202a3731e3SKent Overstreet 		goto err;
821aa982665SKent Overstreet 	if (k.k->type != KEY_TYPE_stripe) {
822cd575ddfSKent Overstreet 		ret = -ENOENT;
823cd575ddfSKent Overstreet 		goto err;
824cd575ddfSKent Overstreet 	}
8252a3731e3SKent Overstreet 	bkey_reassemble(&stripe->key, k);
8262a3731e3SKent Overstreet err:
827cd575ddfSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
828cd575ddfSKent Overstreet 	return ret;
829cd575ddfSKent Overstreet }
83037954a27SKent Overstreet 
831cd575ddfSKent Overstreet /* recovery read path: */
bch2_ec_read_extent(struct btree_trans * trans,struct bch_read_bio * rbio,struct bkey_s_c orig_k)83219c304beSKent Overstreet int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
833cd575ddfSKent Overstreet 			struct bkey_s_c orig_k)
83465d48e35SKent Overstreet {
835cd575ddfSKent Overstreet 	struct bch_fs *c = trans->c;
836aa982665SKent Overstreet 	struct ec_stripe_buf *buf = NULL;
8372a3731e3SKent Overstreet 	struct closure cl;
8380fefe8d8SKent Overstreet 	struct bch_stripe *v;
8392a3731e3SKent Overstreet 	unsigned i, offset;
840cd575ddfSKent Overstreet 	const char *msg = NULL;
8412a3731e3SKent Overstreet 	struct printbuf msgbuf = PRINTBUF;
842cd575ddfSKent Overstreet 	int ret = 0;
843cd575ddfSKent Overstreet 
844bf5a261cSKent Overstreet 	closure_init_stack(&cl);
845cd575ddfSKent Overstreet 
8462a3731e3SKent Overstreet 	BUG_ON(!rbio->pick.has_ec);
8472a3731e3SKent Overstreet 
8482a3731e3SKent Overstreet 	buf = kzalloc(sizeof(*buf), GFP_NOFS);
8492a3731e3SKent Overstreet 	if (!buf)
850cd575ddfSKent Overstreet 		return -BCH_ERR_ENOMEM_ec_read_extent;
851cd575ddfSKent Overstreet 
852cd575ddfSKent Overstreet 	ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf));
8532a3731e3SKent Overstreet 	if (ret) {
8542a3731e3SKent Overstreet 		msg = "stripe not found";
8550fefe8d8SKent Overstreet 		goto err;
8562a3731e3SKent Overstreet 	}
8572a3731e3SKent Overstreet 
8582a3731e3SKent Overstreet 	v = &bkey_i_to_stripe(&buf->key)->v;
859cd575ddfSKent Overstreet 
860cd575ddfSKent Overstreet 	if (!bch2_ptr_matches_stripe(v, rbio->pick)) {
8612a3731e3SKent Overstreet 		msg = "pointer doesn't match stripe";
8622a3731e3SKent Overstreet 		goto err;
8632a3731e3SKent Overstreet 	}
8642a3731e3SKent Overstreet 
8652a3731e3SKent Overstreet 	offset = rbio->bio.bi_iter.bi_sector - v->ptrs[rbio->pick.ec.block].offset;
866cd575ddfSKent Overstreet 	if (offset + bio_sectors(&rbio->bio) > le16_to_cpu(v->sectors)) {
867cd575ddfSKent Overstreet 		msg = "read is bigger than stripe";
868cd575ddfSKent Overstreet 		goto err;
869cd575ddfSKent Overstreet 	}
870cd575ddfSKent Overstreet 
8710fefe8d8SKent Overstreet 	ret = ec_stripe_buf_init(buf, offset, bio_sectors(&rbio->bio));
872cd575ddfSKent Overstreet 	if (ret) {
873cd575ddfSKent Overstreet 		msg = "-ENOMEM";
874cd575ddfSKent Overstreet 		goto err;
875cd575ddfSKent Overstreet 	}
876cd575ddfSKent Overstreet 
877cd575ddfSKent Overstreet 	for (i = 0; i < v->nr_blocks; i++)
878cd575ddfSKent Overstreet 		ec_block_io(c, buf, REQ_OP_READ, i, &cl);
879cd575ddfSKent Overstreet 
880cd575ddfSKent Overstreet 	closure_sync(&cl);
881cd575ddfSKent Overstreet 
882cd575ddfSKent Overstreet 	if (ec_nr_failed(buf) > v->nr_redundant) {
883cd575ddfSKent Overstreet 		msg = "unable to read enough blocks";
8842a3731e3SKent Overstreet 		goto err;
885cd575ddfSKent Overstreet 	}
8862a3731e3SKent Overstreet 
887cd575ddfSKent Overstreet 	ec_validate_checksums(c, buf);
888cd575ddfSKent Overstreet 
889cd575ddfSKent Overstreet 	ret = ec_do_recov(c, buf);
890cd575ddfSKent Overstreet 	if (ret)
891dfe9bfb3SKent Overstreet 		goto err;
892cd575ddfSKent Overstreet 
893cd575ddfSKent Overstreet 	memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter,
894cd575ddfSKent Overstreet 		      buf->data[rbio->pick.ec.block] + ((offset - buf->offset) << 9));
895cd575ddfSKent Overstreet out:
896cd575ddfSKent Overstreet 	ec_stripe_buf_exit(buf);
897cd575ddfSKent Overstreet 	kfree(buf);
898cd575ddfSKent Overstreet 	return ret;
89965d48e35SKent Overstreet err:
900cd575ddfSKent Overstreet 	bch2_bkey_val_to_text(&msgbuf, c, orig_k);
901627a2312SKent Overstreet 	bch_err_ratelimited(c,
902cd575ddfSKent Overstreet 			    "error doing reconstruct read: %s\n  %s", msg, msgbuf.buf);
9031fcce6b8SKuan-Wei Chiu 	printbuf_exit(&msgbuf);;
9041fcce6b8SKuan-Wei Chiu 	ret = -BCH_ERR_stripe_reconstruct;
905cd575ddfSKent Overstreet 	goto out;
906cd575ddfSKent Overstreet }
907627a2312SKent Overstreet 
908cd575ddfSKent Overstreet /* stripe bucket accounting: */
909cd575ddfSKent Overstreet 
__ec_stripe_mem_alloc(struct bch_fs * c,size_t idx,gfp_t gfp)910cd575ddfSKent Overstreet static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
911cd575ddfSKent Overstreet {
912990d42d1SKent Overstreet 	ec_stripes_heap n, *h = &c->ec_stripes_heap;
91365d48e35SKent Overstreet 
914dfe9bfb3SKent Overstreet 	if (idx >= h->size) {
915088d0de8SKent Overstreet 		if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp))
916990d42d1SKent Overstreet 			return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
91765d48e35SKent Overstreet 
918cd575ddfSKent Overstreet 		mutex_lock(&c->ec_stripes_heap_lock);
919cd575ddfSKent Overstreet 		if (n.size > h->size) {
920cd575ddfSKent Overstreet 			memcpy(n.data, h->data, h->nr * sizeof(h->data[0]));
921cd575ddfSKent Overstreet 			n.nr = h->nr;
9229f6bd307SKent Overstreet 			swap(*h, n);
923cd575ddfSKent Overstreet 		}
924cd575ddfSKent Overstreet 		mutex_unlock(&c->ec_stripes_heap_lock);
925d95dd378SKent Overstreet 
926d95dd378SKent Overstreet 		free_heap(&n);
927cd575ddfSKent Overstreet 	}
928cd575ddfSKent Overstreet 
9294b1e6699SKent Overstreet 	if (!genradix_ptr_alloc(&c->stripes, idx, gfp))
9304b1e6699SKent Overstreet 		return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
9314b1e6699SKent Overstreet 
9324b1e6699SKent Overstreet 	if (c->gc_pos.phase != GC_PHASE_not_running &&
9334b1e6699SKent Overstreet 	    !genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
9344b1e6699SKent Overstreet 		return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
9354b1e6699SKent Overstreet 
9364b1e6699SKent Overstreet 	return 0;
9374b1e6699SKent Overstreet }
9384b1e6699SKent Overstreet 
ec_stripe_mem_alloc(struct btree_trans * trans,struct btree_iter * iter)9394b1e6699SKent Overstreet static int ec_stripe_mem_alloc(struct btree_trans *trans,
9404b1e6699SKent Overstreet 			       struct btree_iter *iter)
9414b1e6699SKent Overstreet {
9424b1e6699SKent Overstreet 	return allocate_dropping_locks_errcode(trans,
9434b1e6699SKent Overstreet 			__ec_stripe_mem_alloc(trans->c, iter->pos.offset, _gfp));
9444b1e6699SKent Overstreet }
9454b1e6699SKent Overstreet 
9464b1e6699SKent Overstreet /*
9474b1e6699SKent Overstreet  * Hash table of open stripes:
9484b1e6699SKent Overstreet  * Stripes that are being created or modified are kept in a hash table, so that
9494b1e6699SKent Overstreet  * stripe deletion can skip them.
9504b1e6699SKent Overstreet  */
9514b1e6699SKent Overstreet 
__bch2_stripe_is_open(struct bch_fs * c,u64 idx)9524b1e6699SKent Overstreet static bool __bch2_stripe_is_open(struct bch_fs *c, u64 idx)
9534b1e6699SKent Overstreet {
9544b1e6699SKent Overstreet 	unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new)));
9554b1e6699SKent Overstreet 	struct ec_stripe_new *s;
9564b1e6699SKent Overstreet 
9574b1e6699SKent Overstreet 	hlist_for_each_entry(s, &c->ec_stripes_new[hash], hash)
9584b1e6699SKent Overstreet 		if (s->idx == idx)
9594b1e6699SKent Overstreet 			return true;
9604b1e6699SKent Overstreet 	return false;
9614b1e6699SKent Overstreet }
9624b1e6699SKent Overstreet 
bch2_stripe_is_open(struct bch_fs * c,u64 idx)9634b1e6699SKent Overstreet static bool bch2_stripe_is_open(struct bch_fs *c, u64 idx)
9644b1e6699SKent Overstreet {
9654b1e6699SKent Overstreet 	bool ret = false;
9664b1e6699SKent Overstreet 
9674b1e6699SKent Overstreet 	spin_lock(&c->ec_stripes_new_lock);
9684b1e6699SKent Overstreet 	ret = __bch2_stripe_is_open(c, idx);
9694b1e6699SKent Overstreet 	spin_unlock(&c->ec_stripes_new_lock);
9704b1e6699SKent Overstreet 
9714b1e6699SKent Overstreet 	return ret;
9724b1e6699SKent Overstreet }
9734b1e6699SKent Overstreet 
bch2_try_open_stripe(struct bch_fs * c,struct ec_stripe_new * s,u64 idx)9744b1e6699SKent Overstreet static bool bch2_try_open_stripe(struct bch_fs *c,
9754b1e6699SKent Overstreet 				 struct ec_stripe_new *s,
9764b1e6699SKent Overstreet 				 u64 idx)
9774b1e6699SKent Overstreet {
9784b1e6699SKent Overstreet 	bool ret;
9794b1e6699SKent Overstreet 
9804b1e6699SKent Overstreet 	spin_lock(&c->ec_stripes_new_lock);
9814b1e6699SKent Overstreet 	ret = !__bch2_stripe_is_open(c, idx);
9824b1e6699SKent Overstreet 	if (ret) {
9834b1e6699SKent Overstreet 		unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new)));
9844b1e6699SKent Overstreet 
9854b1e6699SKent Overstreet 		s->idx = idx;
9864b1e6699SKent Overstreet 		hlist_add_head(&s->hash, &c->ec_stripes_new[hash]);
9874b1e6699SKent Overstreet 	}
9884b1e6699SKent Overstreet 	spin_unlock(&c->ec_stripes_new_lock);
989ba7c37d3SKent Overstreet 
990cd575ddfSKent Overstreet 	return ret;
991cd575ddfSKent Overstreet }
992cd575ddfSKent Overstreet 
bch2_stripe_close(struct bch_fs * c,struct ec_stripe_new * s)9934b1e6699SKent Overstreet static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s)
9944b1e6699SKent Overstreet {
9951fcce6b8SKuan-Wei Chiu 	BUG_ON(!s->idx);
996e28ef07eSKent Overstreet 
997e28ef07eSKent Overstreet 	spin_lock(&c->ec_stripes_new_lock);
998e28ef07eSKent Overstreet 	hlist_del_init(&s->hash);
9994b1e6699SKent Overstreet 	spin_unlock(&c->ec_stripes_new_lock);
10004b1e6699SKent Overstreet 
1001cd575ddfSKent Overstreet 	s->idx = 0;
1002cd575ddfSKent Overstreet }
1003cd575ddfSKent Overstreet 
1004cd575ddfSKent Overstreet /* Heap of all existing stripes, ordered by blocks_nonempty */
1005cd575ddfSKent Overstreet 
stripe_idx_to_delete(struct bch_fs * c)1006cd575ddfSKent Overstreet static u64 stripe_idx_to_delete(struct bch_fs *c)
1007cd575ddfSKent Overstreet {
1008990d42d1SKent Overstreet 	ec_stripes_heap *h = &c->ec_stripes_heap;
1009cd575ddfSKent Overstreet 
1010cd575ddfSKent Overstreet 	lockdep_assert_held(&c->ec_stripes_heap_lock);
10111fcce6b8SKuan-Wei Chiu 
10121fcce6b8SKuan-Wei Chiu 	if (h->nr &&
10131fcce6b8SKuan-Wei Chiu 	    h->data[0].blocks_nonempty == 0 &&
10141fcce6b8SKuan-Wei Chiu 	    !bch2_stripe_is_open(c, h->data[0].idx))
10151fcce6b8SKuan-Wei Chiu 		return h->data[0].idx;
10161fcce6b8SKuan-Wei Chiu 
10171fcce6b8SKuan-Wei Chiu 	return 0;
10181fcce6b8SKuan-Wei Chiu }
10191fcce6b8SKuan-Wei Chiu 
ec_stripes_heap_set_backpointer(ec_stripes_heap * h,size_t i)10201fcce6b8SKuan-Wei Chiu static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
10211fcce6b8SKuan-Wei Chiu 						   size_t i)
10221fcce6b8SKuan-Wei Chiu {
10231fcce6b8SKuan-Wei Chiu 	struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
10241fcce6b8SKuan-Wei Chiu 
10251fcce6b8SKuan-Wei Chiu 	genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i;
10261fcce6b8SKuan-Wei Chiu }
10271fcce6b8SKuan-Wei Chiu 
ec_stripes_heap_cmp(const void * l,const void * r,void __always_unused * args)10281fcce6b8SKuan-Wei Chiu static inline bool ec_stripes_heap_cmp(const void *l, const void *r, void __always_unused *args)
10291fcce6b8SKuan-Wei Chiu {
10301fcce6b8SKuan-Wei Chiu 	struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
10311fcce6b8SKuan-Wei Chiu 	struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
10321fcce6b8SKuan-Wei Chiu 
10331fcce6b8SKuan-Wei Chiu 	return ((_l->blocks_nonempty > _r->blocks_nonempty) <
1034cd575ddfSKent Overstreet 		(_l->blocks_nonempty < _r->blocks_nonempty));
1035cd575ddfSKent Overstreet }
1036cd575ddfSKent Overstreet 
ec_stripes_heap_swap(void * l,void * r,void * h)1037990d42d1SKent Overstreet static inline void ec_stripes_heap_swap(void *l, void *r, void *h)
1038cd575ddfSKent Overstreet {
10391fcce6b8SKuan-Wei Chiu 	struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
1040cd575ddfSKent Overstreet 	struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
1041cd575ddfSKent Overstreet 	ec_stripes_heap *_h = (ec_stripes_heap *)h;
1042cd575ddfSKent Overstreet 	size_t i = _l - _h->data;
1043ba6dd1ddSKent Overstreet 	size_t j = _r - _h->data;
1044ba6dd1ddSKent Overstreet 
1045ba6dd1ddSKent Overstreet 	swap(*_l, *_r);
10461fcce6b8SKuan-Wei Chiu 
10471fcce6b8SKuan-Wei Chiu 	ec_stripes_heap_set_backpointer(_h, i);
10481fcce6b8SKuan-Wei Chiu 	ec_stripes_heap_set_backpointer(_h, j);
10491fcce6b8SKuan-Wei Chiu }
10501fcce6b8SKuan-Wei Chiu 
heap_verify_backpointer(struct bch_fs * c,size_t idx)105127616a31SKent Overstreet static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
1052ba6dd1ddSKent Overstreet {
1053ba6dd1ddSKent Overstreet 	ec_stripes_heap *h = &c->ec_stripes_heap;
10541fcce6b8SKuan-Wei Chiu 	struct stripe *m = genradix_ptr(&c->stripes, idx);
105527616a31SKent Overstreet 
1056ba6dd1ddSKent Overstreet 	BUG_ON(m->heap_idx >= h->nr);
1057ba6dd1ddSKent Overstreet 	BUG_ON(h->data[m->heap_idx].idx != idx);
1058ba6dd1ddSKent Overstreet }
1059ba6dd1ddSKent Overstreet 
bch2_stripes_heap_del(struct bch_fs * c,struct stripe * m,size_t idx)1060ba6dd1ddSKent Overstreet void bch2_stripes_heap_del(struct bch_fs *c,
10611fcce6b8SKuan-Wei Chiu 			   struct stripe *m, size_t idx)
10621fcce6b8SKuan-Wei Chiu {
10631fcce6b8SKuan-Wei Chiu 	const struct min_heap_callbacks callbacks = {
10641fcce6b8SKuan-Wei Chiu 		.less = ec_stripes_heap_cmp,
1065ba6dd1ddSKent Overstreet 		.swp = ec_stripes_heap_swap,
10661fcce6b8SKuan-Wei Chiu 	};
10671fcce6b8SKuan-Wei Chiu 
10681fcce6b8SKuan-Wei Chiu 	mutex_lock(&c->ec_stripes_heap_lock);
10691fcce6b8SKuan-Wei Chiu 	heap_verify_backpointer(c, idx);
10701fcce6b8SKuan-Wei Chiu 
1071ba6dd1ddSKent Overstreet 	min_heap_del(&c->ec_stripes_heap, m->heap_idx, &callbacks, &c->ec_stripes_heap);
1072ba6dd1ddSKent Overstreet 	mutex_unlock(&c->ec_stripes_heap_lock);
1073ba6dd1ddSKent Overstreet }
10741fcce6b8SKuan-Wei Chiu 
bch2_stripes_heap_insert(struct bch_fs * c,struct stripe * m,size_t idx)10751fcce6b8SKuan-Wei Chiu void bch2_stripes_heap_insert(struct bch_fs *c,
1076ba6dd1ddSKent Overstreet 			      struct stripe *m, size_t idx)
1077ba6dd1ddSKent Overstreet {
107827616a31SKent Overstreet 	const struct min_heap_callbacks callbacks = {
1079ba6dd1ddSKent Overstreet 		.less = ec_stripes_heap_cmp,
1080ba6dd1ddSKent Overstreet 		.swp = ec_stripes_heap_swap,
1081cd575ddfSKent Overstreet 	};
1082dfe9bfb3SKent Overstreet 
1083cd575ddfSKent Overstreet 	mutex_lock(&c->ec_stripes_heap_lock);
10841fcce6b8SKuan-Wei Chiu 	BUG_ON(min_heap_full(&c->ec_stripes_heap));
10851fcce6b8SKuan-Wei Chiu 
10861fcce6b8SKuan-Wei Chiu 	genradix_ptr(&c->stripes, idx)->heap_idx = c->ec_stripes_heap.nr;
10871fcce6b8SKuan-Wei Chiu 	min_heap_push(&c->ec_stripes_heap, &((struct ec_stripe_heap_entry) {
1088cd575ddfSKent Overstreet 			.idx = idx,
108927616a31SKent Overstreet 			.blocks_nonempty = m->blocks_nonempty,
1090cd575ddfSKent Overstreet 		}),
1091cd575ddfSKent Overstreet 		&callbacks,
109227616a31SKent Overstreet 		&c->ec_stripes_heap);
1093cd575ddfSKent Overstreet 
1094cd575ddfSKent Overstreet 	heap_verify_backpointer(c, idx);
109561c8d7c8SKent Overstreet 	mutex_unlock(&c->ec_stripes_heap_lock);
1096cd575ddfSKent Overstreet }
1097cd575ddfSKent Overstreet 
bch2_stripes_heap_update(struct bch_fs * c,struct stripe * m,size_t idx)10981fcce6b8SKuan-Wei Chiu void bch2_stripes_heap_update(struct bch_fs *c,
10991fcce6b8SKuan-Wei Chiu 			      struct stripe *m, size_t idx)
1100cd575ddfSKent Overstreet {
1101cd575ddfSKent Overstreet 	const struct min_heap_callbacks callbacks = {
1102cd575ddfSKent Overstreet 		.less = ec_stripes_heap_cmp,
110327616a31SKent Overstreet 		.swp = ec_stripes_heap_swap,
110427616a31SKent Overstreet 	};
110527616a31SKent Overstreet 	ec_stripes_heap *h = &c->ec_stripes_heap;
110627616a31SKent Overstreet 	bool do_deletes;
1107dd81a060SKent Overstreet 	size_t i;
1108cd575ddfSKent Overstreet 
1109cd575ddfSKent Overstreet 	mutex_lock(&c->ec_stripes_heap_lock);
1110dfe9bfb3SKent Overstreet 	heap_verify_backpointer(c, idx);
1111dfe9bfb3SKent Overstreet 
1112ba7c37d3SKent Overstreet 	h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
1113cd575ddfSKent Overstreet 
1114ba7c37d3SKent Overstreet 	i = m->heap_idx;
1115ba7c37d3SKent Overstreet 	min_heap_sift_up(h,	i, &callbacks, &c->ec_stripes_heap);
1116ba7c37d3SKent Overstreet 	min_heap_sift_down(h, i, &callbacks, &c->ec_stripes_heap);
1117ba7c37d3SKent Overstreet 
1118ba7c37d3SKent Overstreet 	heap_verify_backpointer(c, idx);
1119ba7c37d3SKent Overstreet 
1120bcb79a51SKent Overstreet 	do_deletes = stripe_idx_to_delete(c) != 0;
11215dd8c60eSKent Overstreet 	mutex_unlock(&c->ec_stripes_heap_lock);
1122ba7c37d3SKent Overstreet 
1123ba7c37d3SKent Overstreet 	if (do_deletes)
1124ba7c37d3SKent Overstreet 		bch2_do_stripe_deletes(c);
1125ba7c37d3SKent Overstreet }
1126ba7c37d3SKent Overstreet 
1127ba7c37d3SKent Overstreet /* stripe deletion */
1128ba7c37d3SKent Overstreet 
ec_stripe_delete(struct btree_trans * trans,u64 idx)1129ba7c37d3SKent Overstreet static int ec_stripe_delete(struct btree_trans *trans, u64 idx)
1130ba7c37d3SKent Overstreet {
1131ba7c37d3SKent Overstreet 	struct bch_fs *c = trans->c;
1132ba7c37d3SKent Overstreet 	struct btree_iter iter;
1133ba7c37d3SKent Overstreet 	struct bkey_s_c k;
1134ba7c37d3SKent Overstreet 	struct bkey_s_c_stripe s;
1135ba7c37d3SKent Overstreet 	int ret;
1136ba7c37d3SKent Overstreet 
1137ba7c37d3SKent Overstreet 	k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes, POS(0, idx),
1138ba7c37d3SKent Overstreet 			       BTREE_ITER_intent);
1139ba7c37d3SKent Overstreet 	ret = bkey_err(k);
1140ba7c37d3SKent Overstreet 	if (ret)
1141ba7c37d3SKent Overstreet 		goto err;
1142ba7c37d3SKent Overstreet 
1143ba7c37d3SKent Overstreet 	if (k.k->type != KEY_TYPE_stripe) {
1144ba7c37d3SKent Overstreet 		bch2_fs_inconsistent(c, "attempting to delete nonexistent stripe %llu", idx);
1145ba7c37d3SKent Overstreet 		ret = -EINVAL;
1146ba7c37d3SKent Overstreet 		goto err;
1147ba7c37d3SKent Overstreet 	}
1148cd575ddfSKent Overstreet 
1149cd575ddfSKent Overstreet 	s = bkey_s_c_to_stripe(k);
1150cd575ddfSKent Overstreet 	for (unsigned i = 0; i < s.v->nr_blocks; i++)
1151cd575ddfSKent Overstreet 		if (stripe_blockcount_get(s.v, i)) {
1152cd575ddfSKent Overstreet 			struct printbuf buf = PRINTBUF;
1153cd575ddfSKent Overstreet 
1154ba7c37d3SKent Overstreet 			bch2_bkey_val_to_text(&buf, c, k);
1155cd575ddfSKent Overstreet 			bch2_fs_inconsistent(c, "attempting to delete nonempty stripe %s", buf.buf);
1156627a2312SKent Overstreet 			printbuf_exit(&buf);
1157cf904c8dSKent Overstreet 			ret = -EINVAL;
1158627a2312SKent Overstreet 			goto err;
1159cd575ddfSKent Overstreet 		}
1160ba7c37d3SKent Overstreet 
1161ba7c37d3SKent Overstreet 	ret = bch2_btree_delete_at(trans, &iter, 0);
1162ba7c37d3SKent Overstreet err:
1163cf904c8dSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
11646bd68ec2SKent Overstreet 	return ret;
11651bb3c2a9SKent Overstreet }
1166cf904c8dSKent Overstreet 
ec_stripe_delete_work(struct work_struct * work)116797fd13adSKent Overstreet static void ec_stripe_delete_work(struct work_struct *work)
1168cd575ddfSKent Overstreet {
1169dd81a060SKent Overstreet 	struct bch_fs *c =
1170d94189adSKent Overstreet 		container_of(work, struct bch_fs, ec_stripe_delete_work);
1171dd81a060SKent Overstreet 
1172dd81a060SKent Overstreet 	while (1) {
1173dd81a060SKent Overstreet 		mutex_lock(&c->ec_stripes_heap_lock);
1174dd81a060SKent Overstreet 		u64 idx = stripe_idx_to_delete(c);
1175d94189adSKent Overstreet 		mutex_unlock(&c->ec_stripes_heap_lock);
11768bff9875SBrian Foster 
1177d94189adSKent Overstreet 		if (!idx)
1178cd575ddfSKent Overstreet 			break;
1179cd575ddfSKent Overstreet 
1180dfe9bfb3SKent Overstreet 		int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
1181dfe9bfb3SKent Overstreet 					ec_stripe_delete(trans, idx));
11824b1e6699SKent Overstreet 		bch_err_fn(c, ret);
11834b1e6699SKent Overstreet 		if (ret)
11844b1e6699SKent Overstreet 			break;
1185cd575ddfSKent Overstreet 	}
1186990d42d1SKent Overstreet 
118767e0dd8fSKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
1188cd575ddfSKent Overstreet }
1189c6e658eeSKent Overstreet 
bch2_do_stripe_deletes(struct bch_fs * c)1190c6e658eeSKent Overstreet void bch2_do_stripe_deletes(struct bch_fs *c)
1191bcb79a51SKent Overstreet {
11925dd8c60eSKent Overstreet 	if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) &&
1193c6e658eeSKent Overstreet 	    !queue_work(c->write_ref_wq, &c->ec_stripe_delete_work))
1194c6e658eeSKent Overstreet 		bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
1195c6e658eeSKent Overstreet }
1196c6e658eeSKent Overstreet 
11974b1e6699SKent Overstreet /* stripe creation: */
11984b1e6699SKent Overstreet 
ec_stripe_key_update(struct btree_trans * trans,struct bkey_i_stripe * new,bool create)11994b1e6699SKent Overstreet static int ec_stripe_key_update(struct btree_trans *trans,
12004b1e6699SKent Overstreet 				struct bkey_i_stripe *new,
12014b1e6699SKent Overstreet 				bool create)
1202c6e658eeSKent Overstreet {
1203c6e658eeSKent Overstreet 	struct bch_fs *c = trans->c;
1204c6e658eeSKent Overstreet 	struct btree_iter iter;
12054b1e6699SKent Overstreet 	struct bkey_s_c k;
12064b1e6699SKent Overstreet 	int ret;
12074b1e6699SKent Overstreet 
1208c6e658eeSKent Overstreet 	k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes,
12094b1e6699SKent Overstreet 			       new->k.p, BTREE_ITER_intent);
12104b1e6699SKent Overstreet 	ret = bkey_err(k);
1211c6e658eeSKent Overstreet 	if (ret)
1212c6e658eeSKent Overstreet 		goto err;
1213c6e658eeSKent Overstreet 
1214c6e658eeSKent Overstreet 	if (k.k->type != (create ? KEY_TYPE_deleted : KEY_TYPE_stripe)) {
12159d32097fSKent Overstreet 		bch2_fs_inconsistent(c, "error %s stripe: got existing key type %s",
12169d32097fSKent Overstreet 				     create ? "creating" : "updating",
12179d32097fSKent Overstreet 				     bch2_bkey_types[k.k->type]);
12189d32097fSKent Overstreet 		ret = -EINVAL;
12199d32097fSKent Overstreet 		goto err;
12209d32097fSKent Overstreet 	}
12219d32097fSKent Overstreet 
12229d32097fSKent Overstreet 	if (k.k->type == KEY_TYPE_stripe) {
12239d32097fSKent Overstreet 		const struct bch_stripe *old = bkey_s_c_to_stripe(k).v;
12249d32097fSKent Overstreet 		unsigned i;
12254b1e6699SKent Overstreet 
1226c6e658eeSKent Overstreet 		if (old->nr_blocks != new->v.nr_blocks) {
122767e0dd8fSKent Overstreet 			bch_err(c, "error updating stripe: nr_blocks does not match");
1228c6e658eeSKent Overstreet 			ret = -EINVAL;
122967e0dd8fSKent Overstreet 			goto err;
1230c6e658eeSKent Overstreet 		}
1231c6e658eeSKent Overstreet 
1232c6e658eeSKent Overstreet 		for (i = 0; i < new->v.nr_blocks; i++) {
12330a515633SKent Overstreet 			unsigned v = stripe_blockcount_get(old, i);
1234633cf069SKent Overstreet 
1235dea5647eSKent Overstreet 			BUG_ON(v &&
1236cd575ddfSKent Overstreet 			       (old->ptrs[i].dev != new->v.ptrs[i].dev ||
123762a03559SKent Overstreet 				old->ptrs[i].gen != new->v.ptrs[i].gen ||
1238cd575ddfSKent Overstreet 				old->ptrs[i].offset != new->v.ptrs[i].offset));
1239bf5a261cSKent Overstreet 
1240dea5647eSKent Overstreet 			stripe_blockcount_set(&new->v, i, v);
1241dea5647eSKent Overstreet 		}
1242dea5647eSKent Overstreet 	}
1243dea5647eSKent Overstreet 
1244bf0fdb4dSKent Overstreet 	ret = bch2_trans_update(trans, &iter, &new->k_i, 0);
12459a768ab7SKent Overstreet err:
124664784adeSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
12470a515633SKent Overstreet 	return ret;
12480a515633SKent Overstreet }
1249aef90ce0SKent Overstreet 
ec_stripe_update_extent(struct btree_trans * trans,struct bch_dev * ca,struct bpos bucket,u8 gen,struct ec_stripe_buf * s,struct bpos * bp_pos)1250633cf069SKent Overstreet static int ec_stripe_update_extent(struct btree_trans *trans,
12515dd8c60eSKent Overstreet 				   struct bch_dev *ca,
1252dea5647eSKent Overstreet 				   struct bpos bucket, u8 gen,
1253dea5647eSKent Overstreet 				   struct ec_stripe_buf *s,
125462a03559SKent Overstreet 				   struct bpos *bp_pos)
1255dea5647eSKent Overstreet {
1256dea5647eSKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
12578f2bbcddSKent Overstreet 	struct bch_fs *c = trans->c;
12588f2bbcddSKent Overstreet 	struct bch_backpointer bp;
12598f2bbcddSKent Overstreet 	struct btree_iter iter;
12608f2bbcddSKent Overstreet 	struct bkey_s_c k;
12618f2bbcddSKent Overstreet 	const struct bch_extent_ptr *ptr_c;
126262a03559SKent Overstreet 	struct bch_extent_ptr *ec_ptr = NULL;
12638f2bbcddSKent Overstreet 	struct bch_extent_stripe_ptr stripe_ptr;
12648f2bbcddSKent Overstreet 	struct bkey_i *n;
126526bab33bSKent Overstreet 	int ret, dev, block;
126626bab33bSKent Overstreet 
126726bab33bSKent Overstreet 	ret = bch2_get_next_backpointer(trans, ca, bucket, gen,
12688f2bbcddSKent Overstreet 				bp_pos, &bp, BTREE_ITER_cached);
12698f2bbcddSKent Overstreet 	if (ret)
12708f2bbcddSKent Overstreet 		return ret;
12718f2bbcddSKent Overstreet 	if (bpos_eq(*bp_pos, SPOS_MAX))
12728f2bbcddSKent Overstreet 		return 0;
1273dea5647eSKent Overstreet 
12748f2bbcddSKent Overstreet 	if (bp.level) {
1275dea5647eSKent Overstreet 		struct printbuf buf = PRINTBUF;
12765dd8c60eSKent Overstreet 		struct btree_iter node_iter;
1277dea5647eSKent Overstreet 		struct btree *b;
1278dea5647eSKent Overstreet 
1279dea5647eSKent Overstreet 		b = bch2_backpointer_get_node(trans, &node_iter, *bp_pos, bp);
1280dea5647eSKent Overstreet 		bch2_trans_iter_exit(trans, &node_iter);
1281dea5647eSKent Overstreet 
1282dea5647eSKent Overstreet 		if (!b)
1283dea5647eSKent Overstreet 			return 0;
1284dea5647eSKent Overstreet 
1285dea5647eSKent Overstreet 		prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b);
1286dea5647eSKent Overstreet 		bch2_backpointer_to_text(&buf, &bp);
12870a515633SKent Overstreet 
12880a515633SKent Overstreet 		bch2_fs_inconsistent(c, "%s", buf.buf);
1289dea5647eSKent Overstreet 		printbuf_exit(&buf);
129042c7d748SKent Overstreet 		return -EIO;
1291bf5a261cSKent Overstreet 	}
1292bf0fdb4dSKent Overstreet 
1293bf0fdb4dSKent Overstreet 	k = bch2_backpointer_get_key(trans, &iter, *bp_pos, bp, BTREE_ITER_intent);
1294bf0fdb4dSKent Overstreet 	ret = bkey_err(k);
1295bf0fdb4dSKent Overstreet 	if (ret)
12960a515633SKent Overstreet 		return ret;
1297dea5647eSKent Overstreet 	if (!k.k) {
1298cd575ddfSKent Overstreet 		/*
1299bf5a261cSKent Overstreet 		 * extent no longer exists - we could flush the btree
1300cd575ddfSKent Overstreet 		 * write buffer and retry to verify, but no need:
130164784adeSKent Overstreet 		 */
13020a515633SKent Overstreet 		return 0;
13030a515633SKent Overstreet 	}
1304dea5647eSKent Overstreet 
1305cd575ddfSKent Overstreet 	if (extent_has_stripe_ptr(k, s->key.k.p.offset))
130664784adeSKent Overstreet 		goto out;
130764784adeSKent Overstreet 
13080a515633SKent Overstreet 	ptr_c = bkey_matches_stripe(v, k, &block);
1309702ffea2SKent Overstreet 	/*
1310f793bc15SKent Overstreet 	 * It doesn't generally make sense to erasure code cached ptrs:
1311cd575ddfSKent Overstreet 	 * XXX: should we be incrementing a counter?
131264784adeSKent Overstreet 	 */
131364784adeSKent Overstreet 	if (!ptr_c || ptr_c->cached)
131464784adeSKent Overstreet 		goto out;
1315bf5a261cSKent Overstreet 
131664784adeSKent Overstreet 	dev = v->ptrs[block].dev;
131764784adeSKent Overstreet 
131864784adeSKent Overstreet 	n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(stripe_ptr));
131964784adeSKent Overstreet 	ret = PTR_ERR_OR_ZERO(n);
132064784adeSKent Overstreet 	if (ret)
132164784adeSKent Overstreet 		goto out;
1322cd575ddfSKent Overstreet 
1323dea5647eSKent Overstreet 	bkey_reassemble(n, k);
1324dea5647eSKent Overstreet 
1325dea5647eSKent Overstreet 	bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(n), ptr, ptr->dev != dev);
1326dea5647eSKent Overstreet 	ec_ptr = bch2_bkey_has_device(bkey_i_to_s(n), dev);
1327cd575ddfSKent Overstreet 	BUG_ON(!ec_ptr);
1328cd575ddfSKent Overstreet 
1329dea5647eSKent Overstreet 	stripe_ptr = (struct bch_extent_stripe_ptr) {
1330dea5647eSKent Overstreet 		.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
13310a515633SKent Overstreet 		.block		= block,
1332dea5647eSKent Overstreet 		.redundancy	= v->nr_redundant,
1333bf5a261cSKent Overstreet 		.idx		= s->key.k.p.offset,
13341f2f92ecSKent Overstreet 	};
133562a03559SKent Overstreet 
1336dea5647eSKent Overstreet 	__extent_entry_insert(n,
13370564b167SKent Overstreet 			(union bch_extent_entry *) ec_ptr,
13381f2f92ecSKent Overstreet 			(union bch_extent_entry *) &stripe_ptr);
13391f2f92ecSKent Overstreet 
13401f2f92ecSKent Overstreet 	ret = bch2_trans_update(trans, &iter, n, 0);
13411f2f92ecSKent Overstreet out:
13421f2f92ecSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
13431f2f92ecSKent Overstreet 	return ret;
1344dea5647eSKent Overstreet }
1345dea5647eSKent Overstreet 
ec_stripe_update_bucket(struct btree_trans * trans,struct ec_stripe_buf * s,unsigned block)1346cb52d23eSKent Overstreet static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_buf *s,
1347cb52d23eSKent Overstreet 				   unsigned block)
1348633cf069SKent Overstreet {
1349dea5647eSKent Overstreet 	struct bch_fs *c = trans->c;
1350dea5647eSKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
135162a03559SKent Overstreet 	struct bch_extent_ptr ptr = v->ptrs[block];
1352dea5647eSKent Overstreet 	struct bpos bp_pos = POS_MIN;
1353dea5647eSKent Overstreet 	int ret = 0;
135462a03559SKent Overstreet 
1355dea5647eSKent Overstreet 	struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev);
1356dea5647eSKent Overstreet 	if (!ca)
13571f2f92ecSKent Overstreet 		return -EIO;
1358dea5647eSKent Overstreet 
1359dea5647eSKent Overstreet 	struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr);
1360dea5647eSKent Overstreet 
1361dea5647eSKent Overstreet 	while (1) {
1362dea5647eSKent Overstreet 		ret = commit_do(trans, NULL, NULL,
13636bd68ec2SKent Overstreet 				BCH_TRANS_COMMIT_no_check_rw|
1364bf5a261cSKent Overstreet 				BCH_TRANS_COMMIT_no_enospc,
1365dea5647eSKent Overstreet 			ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, &bp_pos));
1366dea5647eSKent Overstreet 		if (ret)
1367dea5647eSKent Overstreet 			break;
136809caeabeSKent Overstreet 		if (bkey_eq(bp_pos, POS_MAX))
1369dea5647eSKent Overstreet 			break;
1370dea5647eSKent Overstreet 
1371dea5647eSKent Overstreet 		bp_pos = bpos_nosnap_successor(bp_pos);
1372dea5647eSKent Overstreet 	}
13736bd68ec2SKent Overstreet 
1374dea5647eSKent Overstreet 	bch2_dev_put(ca);
1375dea5647eSKent Overstreet 	return ret;
1376dea5647eSKent Overstreet }
1377dea5647eSKent Overstreet 
ec_stripe_update_extents(struct bch_fs * c,struct ec_stripe_buf * s)13786bd68ec2SKent Overstreet static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
1379dea5647eSKent Overstreet {
1380dea5647eSKent Overstreet 	struct btree_trans *trans = bch2_trans_get(c);
1381cd575ddfSKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
1382cd575ddfSKent Overstreet 	unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
138394bc95c4SKent Overstreet 	int ret = 0;
138494bc95c4SKent Overstreet 
138594bc95c4SKent Overstreet 	ret = bch2_btree_write_buffer_flush_sync(trans);
138694bc95c4SKent Overstreet 	if (ret)
138794bc95c4SKent Overstreet 		goto err;
13882c91ab72SKent Overstreet 
13892c91ab72SKent Overstreet 	for (i = 0; i < nr_data; i++) {
13907c50140fSKent Overstreet 		ret = ec_stripe_update_bucket(trans, s, i);
139194bc95c4SKent Overstreet 		if (ret)
139294bc95c4SKent Overstreet 			break;
139394bc95c4SKent Overstreet 	}
13942c91ab72SKent Overstreet err:
139594bc95c4SKent Overstreet 	bch2_trans_put(trans);
139694bc95c4SKent Overstreet 
139794bc95c4SKent Overstreet 	return ret;
139894bc95c4SKent Overstreet }
13992c91ab72SKent Overstreet 
zero_out_rest_of_ec_bucket(struct bch_fs * c,struct ec_stripe_new * s,unsigned block,struct open_bucket * ob)140094bc95c4SKent Overstreet static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
140194bc95c4SKent Overstreet 				       struct ec_stripe_new *s,
140294bc95c4SKent Overstreet 				       unsigned block,
140394bc95c4SKent Overstreet 				       struct open_bucket *ob)
140494bc95c4SKent Overstreet {
140594bc95c4SKent Overstreet 	struct bch_dev *ca = bch2_dev_get_ioref(c, ob->dev, WRITE);
140694bc95c4SKent Overstreet 	if (!ca) {
140794bc95c4SKent Overstreet 		s->err = -BCH_ERR_erofs_no_writes;
140894bc95c4SKent Overstreet 		return;
140994bc95c4SKent Overstreet 	}
1410fba053d2SKent Overstreet 
1411fba053d2SKent Overstreet 	unsigned offset = ca->mi.bucket_size - ob->sectors_free;
1412fba053d2SKent Overstreet 	memset(s->new_stripe.data[block] + (offset << 9),
1413fba053d2SKent Overstreet 	       0,
1414fba053d2SKent Overstreet 	       ob->sectors_free << 9);
1415fba053d2SKent Overstreet 
1416fba053d2SKent Overstreet 	int ret = blkdev_issue_zeroout(ca->disk_sb.bdev,
1417cd575ddfSKent Overstreet 			ob->bucket * ca->mi.bucket_size + offset,
1418cd575ddfSKent Overstreet 			ob->sectors_free,
1419cd575ddfSKent Overstreet 			GFP_KERNEL, 0);
1420cd575ddfSKent Overstreet 
1421cd575ddfSKent Overstreet 	percpu_ref_put(&ca->io_ref);
1422cd575ddfSKent Overstreet 
1423cd575ddfSKent Overstreet 	if (ret)
1424bf5a261cSKent Overstreet 		s->err = ret;
1425cd575ddfSKent Overstreet }
1426cd575ddfSKent Overstreet 
bch2_ec_stripe_new_free(struct bch_fs * c,struct ec_stripe_new * s)1427cd575ddfSKent Overstreet void bch2_ec_stripe_new_free(struct bch_fs *c, struct ec_stripe_new *s)
1428cd575ddfSKent Overstreet {
1429cd575ddfSKent Overstreet 	if (s->idx)
143081d8599eSKent Overstreet 		bch2_stripe_close(c, s);
1431cd575ddfSKent Overstreet 	kfree(s);
1432aebe7a67SKent Overstreet }
143394bc95c4SKent Overstreet 
143494bc95c4SKent Overstreet /*
143594bc95c4SKent Overstreet  * data buckets of new stripe all written: create the stripe
143694bc95c4SKent Overstreet  */
ec_stripe_create(struct ec_stripe_new * s)143794bc95c4SKent Overstreet static void ec_stripe_create(struct ec_stripe_new *s)
143894bc95c4SKent Overstreet {
143994bc95c4SKent Overstreet 	struct bch_fs *c = s->c;
1440aebe7a67SKent Overstreet 	struct open_bucket *ob;
144194bc95c4SKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
1442cd575ddfSKent Overstreet 	unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
1443858536c7SKent Overstreet 	int ret;
1444cd575ddfSKent Overstreet 
1445cd575ddfSKent Overstreet 	BUG_ON(s->h->s == s);
1446cd575ddfSKent Overstreet 
1447cd575ddfSKent Overstreet 	closure_sync(&s->iodone);
144881d8599eSKent Overstreet 
144981d8599eSKent Overstreet 	if (!s->err) {
145081d8599eSKent Overstreet 		for (i = 0; i < nr_data; i++)
145181d8599eSKent Overstreet 			if (s->blocks[i]) {
145281d8599eSKent Overstreet 				ob = c->open_buckets + s->blocks[i];
145381d8599eSKent Overstreet 
145481d8599eSKent Overstreet 				if (ob->sectors_free)
145581d8599eSKent Overstreet 					zero_out_rest_of_ec_bucket(c, s, i, ob);
145681d8599eSKent Overstreet 			}
1457bf5a261cSKent Overstreet 	}
145881d8599eSKent Overstreet 
145981d8599eSKent Overstreet 	if (s->err) {
146081d8599eSKent Overstreet 		if (!bch2_err_matches(s->err, EROFS))
14612a3731e3SKent Overstreet 			bch_err(c, "error creating stripe: error writing data buckets");
146281d8599eSKent Overstreet 		goto err;
146381d8599eSKent Overstreet 	}
1464f6b94a3bSKent Overstreet 
14655be6a274SKent Overstreet 	if (s->have_existing_stripe) {
1466f6b94a3bSKent Overstreet 		ec_validate_checksums(c, &s->existing_stripe);
146781d8599eSKent Overstreet 
1468cd575ddfSKent Overstreet 		if (ec_do_recov(c, &s->existing_stripe)) {
146981d8599eSKent Overstreet 			bch_err(c, "error creating stripe: error reading existing stripe");
1470cd575ddfSKent Overstreet 			goto err;
1471cd575ddfSKent Overstreet 		}
1472cd575ddfSKent Overstreet 
147381d8599eSKent Overstreet 		for (i = 0; i < nr_data; i++)
147481d8599eSKent Overstreet 			if (stripe_blockcount_get(&bkey_i_to_stripe(&s->existing_stripe.key)->v, i))
1475cd575ddfSKent Overstreet 				swap(s->new_stripe.data[i],
147681d8599eSKent Overstreet 				     s->existing_stripe.data[i]);
1477cd575ddfSKent Overstreet 
1478ebe8bd75SKent Overstreet 		ec_stripe_buf_exit(&s->existing_stripe);
1479cd575ddfSKent Overstreet 	}
1480cd575ddfSKent Overstreet 
1481b40901b0SKent Overstreet 	BUG_ON(!s->allocated);
1482cb52d23eSKent Overstreet 	BUG_ON(!s->idx);
1483cb52d23eSKent Overstreet 
14846bd68ec2SKent Overstreet 	ec_generate_ec(&s->new_stripe);
1485bf5a261cSKent Overstreet 
14864b1e6699SKent Overstreet 	ec_generate_checksums(&s->new_stripe);
1487cf904c8dSKent Overstreet 
1488cd575ddfSKent Overstreet 	/* write p/q: */
1489ebe8bd75SKent Overstreet 	for (i = nr_data; i < v->nr_blocks; i++)
1490cd575ddfSKent Overstreet 		ec_block_io(c, &s->new_stripe, REQ_OP_WRITE, i, &s->iodone);
1491cd575ddfSKent Overstreet 	closure_sync(&s->iodone);
1492dea5647eSKent Overstreet 
1493cf904c8dSKent Overstreet 	if (ec_nr_failed(&s->new_stripe)) {
1494cf904c8dSKent Overstreet 		bch_err(c, "error creating stripe: error writing redundancy buckets");
14954b1e6699SKent Overstreet 		goto err;
1496cd575ddfSKent Overstreet 	}
1497af4d05c4SKent Overstreet 
1498af4d05c4SKent Overstreet 	ret = bch2_trans_do(c, &s->res, NULL,
14996c7585b0SKent Overstreet 			    BCH_TRANS_COMMIT_no_check_rw|
15006c7585b0SKent Overstreet 			    BCH_TRANS_COMMIT_no_enospc,
15016c7585b0SKent Overstreet 			    ec_stripe_key_update(trans,
15026c7585b0SKent Overstreet 					bkey_i_to_stripe(&s->new_stripe.key),
15036c7585b0SKent Overstreet 					!s->have_existing_stripe));
1504cd575ddfSKent Overstreet 	bch_err_msg(c, ret, "creating stripe key");
1505cd575ddfSKent Overstreet 	if (ret) {
15066c7585b0SKent Overstreet 		goto err;
15076c7585b0SKent Overstreet 	}
1508cd575ddfSKent Overstreet 
15096c7585b0SKent Overstreet 	ret = ec_stripe_update_extents(c, &s->new_stripe);
1510cd575ddfSKent Overstreet 	bch_err_msg(c, ret, "error updating extents");
151181c771b2SKent Overstreet 	if (ret)
151281c771b2SKent Overstreet 		goto err;
151381c771b2SKent Overstreet err:
151499a3d398SKent Overstreet 	bch2_disk_reservation_put(c, &s->res);
151581c771b2SKent Overstreet 
15162a3731e3SKent Overstreet 	for (i = 0; i < v->nr_blocks; i++)
15172a3731e3SKent Overstreet 		if (s->blocks[i]) {
151881d8599eSKent Overstreet 			ob = c->open_buckets + s->blocks[i];
1519fba053d2SKent Overstreet 
1520fba053d2SKent Overstreet 			if (i < nr_data) {
1521cd575ddfSKent Overstreet 				ob->ec = NULL;
1522cd575ddfSKent Overstreet 				__bch2_open_bucket_put(c, ob);
1523ebe8bd75SKent Overstreet 			} else {
1524ebe8bd75SKent Overstreet 				bch2_open_bucket_put(c, ob);
1525ebe8bd75SKent Overstreet 			}
1526ebe8bd75SKent Overstreet 		}
1527ebe8bd75SKent Overstreet 
1528ebe8bd75SKent Overstreet 	mutex_lock(&c->ec_stripe_new_lock);
1529fba053d2SKent Overstreet 	list_del(&s->list);
1530ebe8bd75SKent Overstreet 	mutex_unlock(&c->ec_stripe_new_lock);
1531ebe8bd75SKent Overstreet 	wake_up(&c->ec_stripe_new_wait);
1532ebe8bd75SKent Overstreet 
1533ebe8bd75SKent Overstreet 	ec_stripe_buf_exit(&s->existing_stripe);
1534ebe8bd75SKent Overstreet 	ec_stripe_buf_exit(&s->new_stripe);
1535ebe8bd75SKent Overstreet 	closure_debug_destroy(&s->iodone);
1536ebe8bd75SKent Overstreet 
1537ebe8bd75SKent Overstreet 	ec_stripe_new_put(c, s, STRIPE_REF_stripe);
1538703e2a43SKent Overstreet }
1539703e2a43SKent Overstreet 
get_pending_stripe(struct bch_fs * c)1540703e2a43SKent Overstreet static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
1541703e2a43SKent Overstreet {
1542ebe8bd75SKent Overstreet 	struct ec_stripe_new *s;
1543ebe8bd75SKent Overstreet 
1544ebe8bd75SKent Overstreet 	mutex_lock(&c->ec_stripe_new_lock);
1545703e2a43SKent Overstreet 	list_for_each_entry(s, &c->ec_stripe_new_list, list)
1546ebe8bd75SKent Overstreet 		if (!atomic_read(&s->ref[STRIPE_REF_io]))
1547ebe8bd75SKent Overstreet 			goto out;
1548703e2a43SKent Overstreet 	s = NULL;
1549ebe8bd75SKent Overstreet out:
1550ebe8bd75SKent Overstreet 	mutex_unlock(&c->ec_stripe_new_lock);
1551ebe8bd75SKent Overstreet 
1552ebe8bd75SKent Overstreet 	return s;
1553ebe8bd75SKent Overstreet }
1554ebe8bd75SKent Overstreet 
ec_stripe_create_work(struct work_struct * work)1555ebe8bd75SKent Overstreet static void ec_stripe_create_work(struct work_struct *work)
1556703e2a43SKent Overstreet {
1557703e2a43SKent Overstreet 	struct bch_fs *c = container_of(work,
1558703e2a43SKent Overstreet 		struct bch_fs, ec_stripe_create_work);
1559cd575ddfSKent Overstreet 	struct ec_stripe_new *s;
1560cd575ddfSKent Overstreet 
1561cd575ddfSKent Overstreet 	while ((s = get_pending_stripe(c)))
1562f6b94a3bSKent Overstreet 		ec_stripe_create(s);
1563f6b94a3bSKent Overstreet 
1564cd575ddfSKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
1565703e2a43SKent Overstreet }
1566cd575ddfSKent Overstreet 
bch2_ec_do_stripe_creates(struct bch_fs * c)1567703e2a43SKent Overstreet void bch2_ec_do_stripe_creates(struct bch_fs *c)
1568703e2a43SKent Overstreet {
1569703e2a43SKent Overstreet 	bch2_write_ref_get(c, BCH_WRITE_REF_stripe_create);
1570cd575ddfSKent Overstreet 
1571fba053d2SKent Overstreet 	if (!queue_work(system_long_wq, &c->ec_stripe_create_work))
1572cd575ddfSKent Overstreet 		bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
1573cd575ddfSKent Overstreet }
1574cd575ddfSKent Overstreet 
ec_stripe_new_set_pending(struct bch_fs * c,struct ec_stripe_head * h)1575cd575ddfSKent Overstreet static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
1576cd575ddfSKent Overstreet {
1577cd575ddfSKent Overstreet 	struct ec_stripe_new *s = h->s;
1578cd575ddfSKent Overstreet 
1579cd575ddfSKent Overstreet 	lockdep_assert_held(&h->lock);
1580cd575ddfSKent Overstreet 
1581cd575ddfSKent Overstreet 	BUG_ON(!s->allocated && !s->err);
1582cd575ddfSKent Overstreet 
1583cd575ddfSKent Overstreet 	h->s		= NULL;
1584cd575ddfSKent Overstreet 	s->pending	= true;
1585cd575ddfSKent Overstreet 
1586cd575ddfSKent Overstreet 	mutex_lock(&c->ec_stripe_new_lock);
15875be6a274SKent Overstreet 	list_add(&s->list, &c->ec_stripe_new_list);
15885be6a274SKent Overstreet 	mutex_unlock(&c->ec_stripe_new_lock);
15898783856aSKent Overstreet 
15908783856aSKent Overstreet 	ec_stripe_new_put(c, s, STRIPE_REF_io);
1591cd575ddfSKent Overstreet }
159281d8599eSKent Overstreet 
ec_stripe_new_cancel(struct bch_fs * c,struct ec_stripe_head * h,int err)1593cd575ddfSKent Overstreet static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int err)
1594cd575ddfSKent Overstreet {
1595cd575ddfSKent Overstreet 	h->s->err = err;
1596cd575ddfSKent Overstreet 	ec_stripe_new_set_pending(c, h);
1597cd575ddfSKent Overstreet }
1598cd575ddfSKent Overstreet 
bch2_ec_bucket_cancel(struct bch_fs * c,struct open_bucket * ob)1599cd575ddfSKent Overstreet void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
16003ea2b1e1SKent Overstreet {
1601cd575ddfSKent Overstreet 	struct ec_stripe_new *s = ob->ec;
1602cd575ddfSKent Overstreet 
1603cd575ddfSKent Overstreet 	s->err = -EIO;
1604cd575ddfSKent Overstreet }
1605cd575ddfSKent Overstreet 
bch2_writepoint_ec_buf(struct bch_fs * c,struct write_point * wp)1606cd575ddfSKent Overstreet void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
160741b84fb4SKent Overstreet {
1608cd575ddfSKent Overstreet 	struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs);
1609cd575ddfSKent Overstreet 	if (!ob)
1610cd575ddfSKent Overstreet 		return NULL;
1611cd575ddfSKent Overstreet 
161241b84fb4SKent Overstreet 	BUG_ON(!ob->ec->new_stripe.data[ob->ec_idx]);
1613cd575ddfSKent Overstreet 
1614cd575ddfSKent Overstreet 	struct bch_dev *ca	= ob_dev(c, ob);
1615cd575ddfSKent Overstreet 	unsigned offset		= ca->mi.bucket_size - ob->sectors_free;
1616cd575ddfSKent Overstreet 
161741b84fb4SKent Overstreet 	return ob->ec->new_stripe.data[ob->ec_idx] + (offset << 9);
1618cd575ddfSKent Overstreet }
1619cd575ddfSKent Overstreet 
unsigned_cmp(const void * _l,const void * _r)1620cd575ddfSKent Overstreet static int unsigned_cmp(const void *_l, const void *_r)
1621cd575ddfSKent Overstreet {
1622cd575ddfSKent Overstreet 	unsigned l = *((const unsigned *) _l);
1623cd575ddfSKent Overstreet 	unsigned r = *((const unsigned *) _r);
1624cd575ddfSKent Overstreet 
1625cd575ddfSKent Overstreet 	return cmp_int(l, r);
1626cd575ddfSKent Overstreet }
1627cd575ddfSKent Overstreet 
1628cd575ddfSKent Overstreet /* pick most common bucket size: */
pick_blocksize(struct bch_fs * c,struct bch_devs_mask * devs)1629cd575ddfSKent Overstreet static unsigned pick_blocksize(struct bch_fs *c,
1630cd575ddfSKent Overstreet 			       struct bch_devs_mask *devs)
1631cd575ddfSKent Overstreet {
1632cd575ddfSKent Overstreet 	unsigned nr = 0, sizes[BCH_SB_MEMBERS_MAX];
1633cd575ddfSKent Overstreet 	struct {
1634cd575ddfSKent Overstreet 		unsigned nr, size;
16350ba95accSKent Overstreet 	} cur = { 0, 0 }, best = { 0, 0 };
16360ba95accSKent Overstreet 
16370ba95accSKent Overstreet 	for_each_member_device_rcu(c, ca, devs)
16380ba95accSKent Overstreet 		sizes[nr++] = ca->mi.bucket_size;
16390ba95accSKent Overstreet 
1640f6b94a3bSKent Overstreet 	sort(sizes, nr, sizeof(unsigned), unsigned_cmp, NULL);
1641bf5a261cSKent Overstreet 
1642f6b94a3bSKent Overstreet 	for (unsigned i = 0; i < nr; i++) {
1643f6b94a3bSKent Overstreet 		if (sizes[i] != cur.size) {
1644f6b94a3bSKent Overstreet 			if (cur.nr > best.nr)
1645f6b94a3bSKent Overstreet 				best = cur;
1646bf5a261cSKent Overstreet 
1647f6b94a3bSKent Overstreet 			cur.nr = 0;
1648f6b94a3bSKent Overstreet 			cur.size = sizes[i];
1649f6b94a3bSKent Overstreet 		}
1650f6b94a3bSKent Overstreet 
1651f6b94a3bSKent Overstreet 		cur.nr++;
1652f6b94a3bSKent Overstreet 	}
1653e4099990SKent Overstreet 
16546404dcc9SKent Overstreet 	if (cur.nr > best.nr)
1655f6b94a3bSKent Overstreet 		best = cur;
1656f6b94a3bSKent Overstreet 
1657f6b94a3bSKent Overstreet 	return best.size;
1658f6b94a3bSKent Overstreet }
1659f6b94a3bSKent Overstreet 
may_create_new_stripe(struct bch_fs * c)1660f6b94a3bSKent Overstreet static bool may_create_new_stripe(struct bch_fs *c)
1661f6b94a3bSKent Overstreet {
1662f6b94a3bSKent Overstreet 	return false;
1663f6b94a3bSKent Overstreet }
1664f6b94a3bSKent Overstreet 
ec_stripe_key_init(struct bch_fs * c,struct bkey_i * k,unsigned nr_data,unsigned nr_parity,unsigned stripe_size,unsigned disk_label)1665f6b94a3bSKent Overstreet static void ec_stripe_key_init(struct bch_fs *c,
1666f6b94a3bSKent Overstreet 			       struct bkey_i *k,
1667f6b94a3bSKent Overstreet 			       unsigned nr_data,
1668cd575ddfSKent Overstreet 			       unsigned nr_parity,
1669cd575ddfSKent Overstreet 			       unsigned stripe_size,
1670cd575ddfSKent Overstreet 			       unsigned disk_label)
1671cd575ddfSKent Overstreet {
1672cd575ddfSKent Overstreet 	struct bkey_i_stripe *s = bkey_stripe_init(k);
1673cd575ddfSKent Overstreet 	unsigned u64s;
1674cd575ddfSKent Overstreet 
167565d48e35SKent Overstreet 	s->v.sectors			= cpu_to_le16(stripe_size);
1676cd575ddfSKent Overstreet 	s->v.algorithm			= 0;
1677cd575ddfSKent Overstreet 	s->v.nr_blocks			= nr_data + nr_parity;
167881d8599eSKent Overstreet 	s->v.nr_redundant		= nr_parity;
1679fba053d2SKent Overstreet 	s->v.csum_granularity_bits	= ilog2(c->opts.encoded_extent_max >> 9);
1680fba053d2SKent Overstreet 	s->v.csum_type			= BCH_CSUM_crc32c;
1681cd575ddfSKent Overstreet 	s->v.disk_label			= disk_label;
1682cd575ddfSKent Overstreet 
1683f6b94a3bSKent Overstreet 	while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
1684ffb7c3d3SKent Overstreet 		BUG_ON(1 << s->v.csum_granularity_bits >=
1685f6b94a3bSKent Overstreet 		       le16_to_cpu(s->v.sectors) ||
1686cd575ddfSKent Overstreet 		       s->v.csum_granularity_bits == U8_MAX);
1687bf5a261cSKent Overstreet 		s->v.csum_granularity_bits++;
1688bf5a261cSKent Overstreet 	}
1689cd575ddfSKent Overstreet 
1690cd575ddfSKent Overstreet 	set_bkey_val_u64s(&s->k, u64s);
1691cd575ddfSKent Overstreet }
1692cd575ddfSKent Overstreet 
ec_new_stripe_alloc(struct bch_fs * c,struct ec_stripe_head * h)1693cd575ddfSKent Overstreet static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
1694cd575ddfSKent Overstreet {
1695cd575ddfSKent Overstreet 	struct ec_stripe_new *s;
16968deed5f4SKent Overstreet 
1697e53a961cSKent Overstreet 	lockdep_assert_held(&h->lock);
1698cd575ddfSKent Overstreet 
1699cd575ddfSKent Overstreet 	s = kzalloc(sizeof(*s), GFP_KERNEL);
1700cd575ddfSKent Overstreet 	if (!s)
1701cd575ddfSKent Overstreet 		return -BCH_ERR_ENOMEM_ec_new_stripe_alloc;
1702cd575ddfSKent Overstreet 
1703cd575ddfSKent Overstreet 	mutex_init(&s->lock);
1704cd575ddfSKent Overstreet 	closure_init(&s->iodone, NULL);
1705cd575ddfSKent Overstreet 	atomic_set(&s->ref[STRIPE_REF_stripe], 1);
170673d86dfdSKent Overstreet 	atomic_set(&s->ref[STRIPE_REF_io], 1);
1707cd575ddfSKent Overstreet 	s->c		= c;
1708cd575ddfSKent Overstreet 	s->h		= h;
1709cd575ddfSKent Overstreet 	s->nr_data	= min_t(unsigned, h->nr_active_devs,
1710cd575ddfSKent Overstreet 				BCH_BKEY_PTRS_MAX) - h->redundancy;
1711e53a961cSKent Overstreet 	s->nr_parity	= h->redundancy;
1712cd575ddfSKent Overstreet 
1713cd575ddfSKent Overstreet 	ec_stripe_key_init(c, &s->new_stripe.key,
171489fd25beSKent Overstreet 			   s->nr_data, s->nr_parity,
1715cd575ddfSKent Overstreet 			   h->blocksize, h->disk_label);
171641b84fb4SKent Overstreet 
1717cd575ddfSKent Overstreet 	h->s = s;
171841b84fb4SKent Overstreet 	h->nr_created++;
1719cd575ddfSKent Overstreet 	return 0;
1720cd575ddfSKent Overstreet }
1721cd575ddfSKent Overstreet 
ec_stripe_head_devs_update(struct bch_fs * c,struct ec_stripe_head * h)172241b84fb4SKent Overstreet static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
1723cd575ddfSKent Overstreet {
1724cd575ddfSKent Overstreet 	struct bch_devs_mask devs = h->devs;
1725cd575ddfSKent Overstreet 
1726cd575ddfSKent Overstreet 	rcu_read_lock();
17274d6128dcSKent Overstreet 	h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
17284d6128dcSKent Overstreet 				 ? group_to_target(h->disk_label - 1)
17294d6128dcSKent Overstreet 				 : 0);
17304d6128dcSKent Overstreet 	unsigned nr_devs = dev_mask_nr(&h->devs);
17314d6128dcSKent Overstreet 
17324d6128dcSKent Overstreet 	for_each_member_device_rcu(c, ca, &h->devs)
17334d6128dcSKent Overstreet 		if (!ca->mi.durability)
17344d6128dcSKent Overstreet 			__clear_bit(ca->dev_idx, h->devs.d);
17354d6128dcSKent Overstreet 	unsigned nr_devs_with_durability = dev_mask_nr(&h->devs);
1736703e2a43SKent Overstreet 
1737cd575ddfSKent Overstreet 	h->blocksize = pick_blocksize(c, &h->devs);
1738cd575ddfSKent Overstreet 
1739cd575ddfSKent Overstreet 	h->nr_active_devs = 0;
1740703e2a43SKent Overstreet 	for_each_member_device_rcu(c, ca, &h->devs)
1741cd575ddfSKent Overstreet 		if (ca->mi.bucket_size == h->blocksize)
1742cd575ddfSKent Overstreet 			h->nr_active_devs++;
1743f6b94a3bSKent Overstreet 
1744cd575ddfSKent Overstreet 	rcu_read_unlock();
17456c7585b0SKent Overstreet 
1746703e2a43SKent Overstreet 	/*
1747cd575ddfSKent Overstreet 	 * If we only have redundancy + 1 devices, we're better off with just
1748cd575ddfSKent Overstreet 	 * replication:
1749cd575ddfSKent Overstreet 	 */
1750cd575ddfSKent Overstreet 	h->insufficient_devs = h->nr_active_devs < h->redundancy + 2;
175173bd774dSKent Overstreet 
175273bd774dSKent Overstreet 	if (h->insufficient_devs) {
1753cd575ddfSKent Overstreet 		const char *err;
1754cd575ddfSKent Overstreet 
17558deed5f4SKent Overstreet 		if (nr_devs < h->redundancy + 2)
1756e53a961cSKent Overstreet 			err = NULL;
1757cd575ddfSKent Overstreet 		else if (nr_devs_with_durability < h->redundancy + 2)
1758af0ee5bcSKent Overstreet 			err = "cannot use durability=0 devices";
1759cd575ddfSKent Overstreet 		else
1760af0ee5bcSKent Overstreet 			err = "mismatched bucket sizes";
1761cd575ddfSKent Overstreet 
1762cd575ddfSKent Overstreet 		if (err)
1763cd575ddfSKent Overstreet 			bch_err(c, "insufficient devices available to create stripe (have %u, need %u): %s",
1764cd575ddfSKent Overstreet 				h->nr_active_devs, h->redundancy + 2, err);
176573d86dfdSKent Overstreet 	}
176673d86dfdSKent Overstreet 
1767af0ee5bcSKent Overstreet 	struct bch_devs_mask devs_leaving;
1768af0ee5bcSKent Overstreet 	bitmap_andnot(devs_leaving.d, devs.d, h->devs.d, BCH_SB_MEMBERS_MAX);
17693c471b65SKent Overstreet 
17707c50140fSKent Overstreet 	if (h->s && !h->s->allocated && dev_mask_nr(&devs_leaving))
1771b40901b0SKent Overstreet 		ec_stripe_new_cancel(c, h, -EINTR);
1772b40901b0SKent Overstreet 
1773b40901b0SKent Overstreet 	h->rw_devs_change_count = c->rw_devs_change_count;
1774703e2a43SKent Overstreet }
1775cd575ddfSKent Overstreet 
1776cd575ddfSKent Overstreet static struct ec_stripe_head *
ec_new_stripe_head_alloc(struct bch_fs * c,unsigned disk_label,unsigned algo,unsigned redundancy,enum bch_watermark watermark)17778deed5f4SKent Overstreet ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
1778e53a961cSKent Overstreet 			 unsigned algo, unsigned redundancy,
177973d86dfdSKent Overstreet 			 enum bch_watermark watermark)
178073d86dfdSKent Overstreet {
178173d86dfdSKent Overstreet 	struct ec_stripe_head *h;
1782cd575ddfSKent Overstreet 
1783cd575ddfSKent Overstreet 	h = kzalloc(sizeof(*h), GFP_KERNEL);
1784cd575ddfSKent Overstreet 	if (!h)
1785e53a961cSKent Overstreet 		return NULL;
1786cd575ddfSKent Overstreet 
17874d6128dcSKent Overstreet 	mutex_init(&h->lock);
17884d6128dcSKent Overstreet 	BUG_ON(!mutex_trylock(&h->lock));
17894d6128dcSKent Overstreet 
17904d6128dcSKent Overstreet 	h->disk_label	= disk_label;
17914d6128dcSKent Overstreet 	h->algo		= algo;
1792703e2a43SKent Overstreet 	h->redundancy	= redundancy;
1793cd575ddfSKent Overstreet 	h->watermark	= watermark;
1794cd575ddfSKent Overstreet 
1795cd575ddfSKent Overstreet 	list_add(&h->list, &c->ec_stripe_head_list);
17962c7dd446SKent Overstreet 	return h;
1797e53a961cSKent Overstreet }
1798f6b94a3bSKent Overstreet 
bch2_ec_stripe_head_put(struct bch_fs * c,struct ec_stripe_head * h)17992c7dd446SKent Overstreet void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
18006c7585b0SKent Overstreet {
1801f6b94a3bSKent Overstreet 	if (h->s &&
18026c7585b0SKent Overstreet 	    h->s->allocated &&
1803bf5a261cSKent Overstreet 	    bitmap_weight(h->s->blocks_allocated,
18046c7585b0SKent Overstreet 			  h->s->nr_data) == h->s->nr_data)
1805f6b94a3bSKent Overstreet 		ec_stripe_new_set_pending(c, h);
1806fc6c01e2SKent Overstreet 
1807f6b94a3bSKent Overstreet 	mutex_unlock(&h->lock);
1808bf5a261cSKent Overstreet }
1809bf5a261cSKent Overstreet 
18105be6a274SKent Overstreet static struct ec_stripe_head *
__bch2_ec_stripe_head_get(struct btree_trans * trans,unsigned disk_label,unsigned algo,unsigned redundancy,enum bch_watermark watermark)18112caca9fbSKent Overstreet __bch2_ec_stripe_head_get(struct btree_trans *trans,
18122caca9fbSKent Overstreet 			  unsigned disk_label,
18132caca9fbSKent Overstreet 			  unsigned algo,
1814bf5a261cSKent Overstreet 			  unsigned redundancy,
1815bf5a261cSKent Overstreet 			  enum bch_watermark watermark)
18166c7585b0SKent Overstreet {
18176c7585b0SKent Overstreet 	struct bch_fs *c = trans->c;
18186c7585b0SKent Overstreet 	struct ec_stripe_head *h;
18196c7585b0SKent Overstreet 	int ret;
18206c7585b0SKent Overstreet 
1821f6b94a3bSKent Overstreet 	if (!redundancy)
18226c7585b0SKent Overstreet 		return NULL;
18236c7585b0SKent Overstreet 
1824f6b94a3bSKent Overstreet 	ret = bch2_trans_mutex_lock(trans, &c->ec_stripe_head_lock);
18256c7585b0SKent Overstreet 	if (ret)
18266c7585b0SKent Overstreet 		return ERR_PTR(ret);
18272c7dd446SKent Overstreet 
1828f6b94a3bSKent Overstreet 	if (test_bit(BCH_FS_going_ro, &c->flags)) {
1829f6b94a3bSKent Overstreet 		h = ERR_PTR(-BCH_ERR_erofs_no_writes);
18306c7585b0SKent Overstreet 		goto err;
18316c7585b0SKent Overstreet 	}
18327635e1a6SKent Overstreet 
18337635e1a6SKent Overstreet 	list_for_each_entry(h, &c->ec_stripe_head_list, list)
1834e53a961cSKent Overstreet 		if (h->disk_label	== disk_label &&
18352c40a240SKent Overstreet 		    h->algo		== algo &&
18366c7585b0SKent Overstreet 		    h->redundancy	== redundancy &&
18376c7585b0SKent Overstreet 		    h->watermark	== watermark) {
18386c7585b0SKent Overstreet 			ret = bch2_trans_mutex_lock(trans, &h->lock);
18396c7585b0SKent Overstreet 			if (ret) {
18406c7585b0SKent Overstreet 				h = ERR_PTR(ret);
18416c7585b0SKent Overstreet 				goto err;
18426c7585b0SKent Overstreet 			}
18436c7585b0SKent Overstreet 			goto found;
1844bf5a261cSKent Overstreet 		}
18456c7585b0SKent Overstreet 
18466c7585b0SKent Overstreet 	h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark);
18476c7585b0SKent Overstreet found:
1848f6b94a3bSKent Overstreet 	if (h->rw_devs_change_count != c->rw_devs_change_count)
1849f25d8215SKent Overstreet 		ec_stripe_head_devs_update(c, h);
1850f6b94a3bSKent Overstreet 
1851f6b94a3bSKent Overstreet 	if (h->insufficient_devs) {
18526c7585b0SKent Overstreet 		mutex_unlock(&h->lock);
18536c7585b0SKent Overstreet 		h = NULL;
18542c7dd446SKent Overstreet 	}
1855f6b94a3bSKent Overstreet err:
1856f6b94a3bSKent Overstreet 	mutex_unlock(&c->ec_stripe_head_lock);
18576c7585b0SKent Overstreet 	return h;
18586c7585b0SKent Overstreet }
18597635e1a6SKent Overstreet 
new_stripe_alloc_buckets(struct btree_trans * trans,struct ec_stripe_head * h,enum bch_watermark watermark,struct closure * cl)18607635e1a6SKent Overstreet static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h,
1861e53a961cSKent Overstreet 				    enum bch_watermark watermark, struct closure *cl)
18622c40a240SKent Overstreet {
18636c7585b0SKent Overstreet 	struct bch_fs *c = trans->c;
18646c7585b0SKent Overstreet 	struct bch_devs_mask devs = h->devs;
18656c7585b0SKent Overstreet 	struct open_bucket *ob;
18666c7585b0SKent Overstreet 	struct open_buckets buckets;
18676c7585b0SKent Overstreet 	struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
18686c7585b0SKent Overstreet 	unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
18696c7585b0SKent Overstreet 	bool have_cache = true;
1870bf5a261cSKent Overstreet 	int ret = 0;
18716c7585b0SKent Overstreet 
18726c7585b0SKent Overstreet 	BUG_ON(v->nr_blocks	!= h->s->nr_data + h->s->nr_parity);
18736c7585b0SKent Overstreet 	BUG_ON(v->nr_redundant	!= h->s->nr_parity);
1874f6b94a3bSKent Overstreet 
1875f6b94a3bSKent Overstreet 	/* * We bypass the sector allocator which normally does this: */
1876f6b94a3bSKent Overstreet 	bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
1877f6b94a3bSKent Overstreet 
1878f25d8215SKent Overstreet 	for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) {
1879f25d8215SKent Overstreet 		__clear_bit(v->ptrs[i].dev, devs.d);
1880f25d8215SKent Overstreet 		if (i < h->s->nr_data)
18810ba95accSKent Overstreet 			nr_have_data++;
18820ba95accSKent Overstreet 		else
18832a3731e3SKent Overstreet 			nr_have_parity++;
18840ba95accSKent Overstreet 	}
18850ba95accSKent Overstreet 
18860ba95accSKent Overstreet 	BUG_ON(nr_have_data	> h->s->nr_data);
18870ba95accSKent Overstreet 	BUG_ON(nr_have_parity	> h->s->nr_parity);
18880ba95accSKent Overstreet 
18897f4e1d5dSKent Overstreet 	buckets.nr = 0;
18900ba95accSKent Overstreet 	if (nr_have_parity < h->s->nr_parity) {
18910ba95accSKent Overstreet 		ret = bch2_bucket_alloc_set_trans(trans, &buckets,
18920ba95accSKent Overstreet 					    &h->parity_stripe,
18930ba95accSKent Overstreet 					    &devs,
1894627a2312SKent Overstreet 					    h->s->nr_parity,
18951fcce6b8SKuan-Wei Chiu 					    &nr_have_parity,
18967f4e1d5dSKent Overstreet 					    &have_cache, 0,
18970ba95accSKent Overstreet 					    BCH_DATA_parity,
18980ba95accSKent Overstreet 					    watermark,
18990ba95accSKent Overstreet 					    cl);
19000ba95accSKent Overstreet 
19014b1e6699SKent Overstreet 		open_bucket_for_each(c, &buckets, ob, i) {
1902990d42d1SKent Overstreet 			j = find_next_zero_bit(h->s->blocks_gotten,
19030ba95accSKent Overstreet 					       h->s->nr_data + h->s->nr_parity,
19042a3731e3SKent Overstreet 					       h->s->nr_data);
19052a3731e3SKent Overstreet 			BUG_ON(j >= h->s->nr_data + h->s->nr_parity);
19062a3731e3SKent Overstreet 
190727616a31SKent Overstreet 			h->s->blocks[j] = buckets.v[i];
190827616a31SKent Overstreet 			v->ptrs[j] = bch2_ob_ptr(c, ob);
19097f4e1d5dSKent Overstreet 			__set_bit(j, h->s->blocks_gotten);
19107f4e1d5dSKent Overstreet 		}
19110ba95accSKent Overstreet 
19120ba95accSKent Overstreet 		if (ret)
1913627a2312SKent Overstreet 			return ret;
19147f4e1d5dSKent Overstreet 	}
19150ba95accSKent Overstreet 
19160ba95accSKent Overstreet 	buckets.nr = 0;
191770ded998SKent Overstreet 	if (nr_have_data < h->s->nr_data) {
1918f6b94a3bSKent Overstreet 		ret = bch2_bucket_alloc_set_trans(trans, &buckets,
191970ded998SKent Overstreet 					    &h->block_stripe,
1920bf5a261cSKent Overstreet 					    &devs,
1921bf5a261cSKent Overstreet 					    h->s->nr_data,
19226c7585b0SKent Overstreet 					    &nr_have_data,
19230ba95accSKent Overstreet 					    &have_cache, 0,
1924af4d05c4SKent Overstreet 					    BCH_DATA_user,
1925f6b94a3bSKent Overstreet 					    watermark,
19265be6a274SKent Overstreet 					    cl);
19275be6a274SKent Overstreet 
19285be6a274SKent Overstreet 		open_bucket_for_each(c, &buckets, ob, i) {
19295be6a274SKent Overstreet 			j = find_next_zero_bit(h->s->blocks_gotten,
19302a3731e3SKent Overstreet 					       h->s->nr_data, 0);
1931160dff6dSKent Overstreet 			BUG_ON(j >= h->s->nr_data);
19325be6a274SKent Overstreet 
19330ef837a0SRobbie Litchfield 			h->s->blocks[j] = buckets.v[i];
193470ded998SKent Overstreet 			v->ptrs[j] = bch2_ob_ptr(c, ob);
19353ed94062SKent Overstreet 			__set_bit(j, h->s->blocks_gotten);
19363ed94062SKent Overstreet 		}
193781d8599eSKent Overstreet 
19389d32097fSKent Overstreet 		if (ret)
19390ef837a0SRobbie Litchfield 			return ret;
194081d8599eSKent Overstreet 	}
194181d8599eSKent Overstreet 
1942bf5a261cSKent Overstreet 	return 0;
1943bf5a261cSKent Overstreet }
1944bf5a261cSKent Overstreet 
get_existing_stripe(struct bch_fs * c,struct ec_stripe_head * head)1945bf5a261cSKent Overstreet static s64 get_existing_stripe(struct bch_fs *c,
1946bf5a261cSKent Overstreet 			       struct ec_stripe_head *head)
19475be6a274SKent Overstreet {
19485be6a274SKent Overstreet 	ec_stripes_heap *h = &c->ec_stripes_heap;
19495be6a274SKent Overstreet 	struct stripe *m;
19505be6a274SKent Overstreet 	size_t heap_idx;
19515be6a274SKent Overstreet 	u64 stripe_idx;
19520ba95accSKent Overstreet 	s64 ret = -1;
19530ba95accSKent Overstreet 
19546c7585b0SKent Overstreet 	if (may_create_new_stripe(c))
1955bf5a261cSKent Overstreet 		return -1;
19566c7585b0SKent Overstreet 
1957e84face6SKent Overstreet 	mutex_lock(&c->ec_stripes_heap_lock);
1958e84face6SKent Overstreet 	for (heap_idx = 0; heap_idx < h->nr; heap_idx++) {
1959e84face6SKent Overstreet 		/* No blocks worth reusing, stripe will just be deleted: */
1960e84face6SKent Overstreet 		if (!h->data[heap_idx].blocks_nonempty)
1961bf5a261cSKent Overstreet 			continue;
1962e84face6SKent Overstreet 
1963e84face6SKent Overstreet 		stripe_idx = h->data[heap_idx].idx;
1964e84face6SKent Overstreet 
1965e84face6SKent Overstreet 		m = genradix_ptr(&c->stripes, stripe_idx);
1966e84face6SKent Overstreet 
1967e84face6SKent Overstreet 		if (m->disk_label	== head->disk_label &&
1968bf5a261cSKent Overstreet 		    m->algorithm	== head->algo &&
1969bf5a261cSKent Overstreet 		    m->nr_redundant	== head->redundancy &&
19706c7585b0SKent Overstreet 		    m->sectors		== head->blocksize &&
19710ba95accSKent Overstreet 		    m->blocks_nonempty	< m->nr_blocks - m->nr_redundant &&
19726c7585b0SKent Overstreet 		    bch2_try_open_stripe(c, head->s, stripe_idx)) {
197381d8599eSKent Overstreet 			ret = stripe_idx;
197481d8599eSKent Overstreet 			break;
19750ba95accSKent Overstreet 		}
197681d8599eSKent Overstreet 	}
1977bf5a261cSKent Overstreet 	mutex_unlock(&c->ec_stripes_heap_lock);
19789d32097fSKent Overstreet 	return ret;
19790ef837a0SRobbie Litchfield }
19800ef837a0SRobbie Litchfield 
__bch2_ec_stripe_head_reuse(struct btree_trans * trans,struct ec_stripe_head * h)198181d8599eSKent Overstreet static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h)
198281d8599eSKent Overstreet {
19834b1e6699SKent Overstreet 	struct bch_fs *c = trans->c;
19840ef837a0SRobbie Litchfield 	struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
19854b1e6699SKent Overstreet 	struct bch_stripe *existing_v;
19864b1e6699SKent Overstreet 	unsigned i;
19874b1e6699SKent Overstreet 	s64 idx;
19884b1e6699SKent Overstreet 	int ret;
19894b1e6699SKent Overstreet 
19904b1e6699SKent Overstreet 	/*
19914b1e6699SKent Overstreet 	 * If we can't allocate a new stripe, and there's no stripes with empty
19929d32097fSKent Overstreet 	 * blocks for us to reuse, that means we have to wait on copygc:
19934b1e6699SKent Overstreet 	 */
1994af4d05c4SKent Overstreet 	idx = get_existing_stripe(c, h);
19959d32097fSKent Overstreet 	if (idx < 0)
19969d32097fSKent Overstreet 		return -BCH_ERR_stripe_alloc_blocked;
19974b1e6699SKent Overstreet 
19984b1e6699SKent Overstreet 	ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
19999d32097fSKent Overstreet 	bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c,
20004b1e6699SKent Overstreet 			     "reading stripe key: %s", bch2_err_str(ret));
20014b1e6699SKent Overstreet 	if (ret) {
20025dd8c60eSKent Overstreet 		bch2_stripe_close(c, h->s);
20034b1e6699SKent Overstreet 		return ret;
20044b1e6699SKent Overstreet 	}
20054b1e6699SKent Overstreet 
20064b1e6699SKent Overstreet 	existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v;
20074b1e6699SKent Overstreet 
20084b1e6699SKent Overstreet 	BUG_ON(existing_v->nr_redundant != h->s->nr_parity);
20094b1e6699SKent Overstreet 	h->s->nr_data = existing_v->nr_blocks -
20104b1e6699SKent Overstreet 		existing_v->nr_redundant;
20114b1e6699SKent Overstreet 
20124b1e6699SKent Overstreet 	ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize);
20134b1e6699SKent Overstreet 	if (ret) {
20144b1e6699SKent Overstreet 		bch2_stripe_close(c, h->s);
20154b1e6699SKent Overstreet 		return ret;
20164b1e6699SKent Overstreet 	}
20174b1e6699SKent Overstreet 
20184b1e6699SKent Overstreet 	BUG_ON(h->s->existing_stripe.size != h->blocksize);
20194b1e6699SKent Overstreet 	BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors));
20204b1e6699SKent Overstreet 
20214b1e6699SKent Overstreet 	/*
20224b1e6699SKent Overstreet 	 * Free buckets we initially allocated - they might conflict with
20234b1e6699SKent Overstreet 	 * blocks from the stripe we're reusing:
20244b1e6699SKent Overstreet 	 */
20254b1e6699SKent Overstreet 	for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) {
20264b1e6699SKent Overstreet 		bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]);
20274b1e6699SKent Overstreet 		h->s->blocks[i] = 0;
20284b1e6699SKent Overstreet 	}
20294b1e6699SKent Overstreet 	memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten));
20304b1e6699SKent Overstreet 	memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated));
20314b1e6699SKent Overstreet 
20324b1e6699SKent Overstreet 	for (i = 0; i < existing_v->nr_blocks; i++) {
20334b1e6699SKent Overstreet 		if (stripe_blockcount_get(existing_v, i)) {
20344b1e6699SKent Overstreet 			__set_bit(i, h->s->blocks_gotten);
20354b1e6699SKent Overstreet 			__set_bit(i, h->s->blocks_allocated);
20364b1e6699SKent Overstreet 		}
2037f6b94a3bSKent Overstreet 
2038f6b94a3bSKent Overstreet 		ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
20392c7dd446SKent Overstreet 	}
20400ef837a0SRobbie Litchfield 
20410ef837a0SRobbie Litchfield 	bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key);
20420ef837a0SRobbie Litchfield 	h->s->have_existing_stripe = true;
2043e53a961cSKent Overstreet 
20440ef837a0SRobbie Litchfield 	return 0;
20450ef837a0SRobbie Litchfield }
20462c7dd446SKent Overstreet 
__bch2_ec_stripe_head_reserve(struct btree_trans * trans,struct ec_stripe_head * h)20470ef837a0SRobbie Litchfield static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h)
2048e84face6SKent Overstreet {
20490ef837a0SRobbie Litchfield 	struct bch_fs *c = trans->c;
20500ef837a0SRobbie Litchfield 	struct btree_iter iter;
2051e53a961cSKent Overstreet 	struct bkey_s_c k;
2052af0ee5bcSKent Overstreet 	struct bpos min_pos = POS(0, 1);
2053af0ee5bcSKent Overstreet 	struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
20540ef837a0SRobbie Litchfield 	int ret;
2055e84face6SKent Overstreet 
205665d48e35SKent Overstreet 	if (!h->s->res.sectors) {
205765d48e35SKent Overstreet 		ret = bch2_disk_reservation_get(c, &h->s->res,
20580ef837a0SRobbie Litchfield 					h->blocksize,
20590ef837a0SRobbie Litchfield 					h->s->nr_parity,
20600ef837a0SRobbie Litchfield 					BCH_DISK_RESERVATION_NOFAIL);
20610ef837a0SRobbie Litchfield 		if (ret)
20620ef837a0SRobbie Litchfield 			return ret;
2063e84face6SKent Overstreet 	}
2064e84face6SKent Overstreet 
2065e84face6SKent Overstreet 	for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos,
2066e84face6SKent Overstreet 			   BTREE_ITER_slots|BTREE_ITER_intent, k, ret) {
2067e84face6SKent Overstreet 		if (bkey_gt(k.k->p, POS(0, U32_MAX))) {
2068e84face6SKent Overstreet 			if (start_pos.offset) {
2069e84face6SKent Overstreet 				start_pos = min_pos;
2070e53a961cSKent Overstreet 				bch2_btree_iter_set_pos(&iter, start_pos);
2071e84face6SKent Overstreet 				continue;
2072e84face6SKent Overstreet 			}
20735be6a274SKent Overstreet 
2074e84face6SKent Overstreet 			ret = -BCH_ERR_ENOSPC_stripe_create;
2075e84face6SKent Overstreet 			break;
2076e84face6SKent Overstreet 		}
2077e84face6SKent Overstreet 
20780ef837a0SRobbie Litchfield 		if (bkey_deleted(k.k) &&
2079e84face6SKent Overstreet 		    bch2_try_open_stripe(c, h->s, k.k->p.offset))
2080e84face6SKent Overstreet 			break;
20810ef837a0SRobbie Litchfield 	}
2082e84face6SKent Overstreet 
208370ded998SKent Overstreet 	c->ec_stripe_hint = iter.pos.offset;
2084e84face6SKent Overstreet 
2085e84face6SKent Overstreet 	if (ret)
2086e84face6SKent Overstreet 		goto err;
20870ef837a0SRobbie Litchfield 
20880ef837a0SRobbie Litchfield 	ret = ec_stripe_mem_alloc(trans, &iter);
2089e53a961cSKent Overstreet 	if (ret) {
2090e53a961cSKent Overstreet 		bch2_stripe_close(c, h->s);
209110d9f7d2SKent Overstreet 		goto err;
209210d9f7d2SKent Overstreet 	}
209310d9f7d2SKent Overstreet 
209410d9f7d2SKent Overstreet 	h->s->new_stripe.key.k.p = iter.pos;
209510d9f7d2SKent Overstreet out:
209610d9f7d2SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
2097e84face6SKent Overstreet 	return ret;
2098e84face6SKent Overstreet err:
2099e84face6SKent Overstreet 	bch2_disk_reservation_put(c, &h->s->res);
2100e84face6SKent Overstreet 	goto out;
2101e84face6SKent Overstreet }
2102e84face6SKent Overstreet 
bch2_ec_stripe_head_get(struct btree_trans * trans,unsigned target,unsigned algo,unsigned redundancy,enum bch_watermark watermark,struct closure * cl)2103e84face6SKent Overstreet struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
2104e84face6SKent Overstreet 					       unsigned target,
2105e84face6SKent Overstreet 					       unsigned algo,
2106e53a961cSKent Overstreet 					       unsigned redundancy,
2107e84face6SKent Overstreet 					       enum bch_watermark watermark,
2108e84face6SKent Overstreet 					       struct closure *cl)
2109e53a961cSKent Overstreet {
21100ef837a0SRobbie Litchfield 	struct bch_fs *c = trans->c;
21110ef837a0SRobbie Litchfield 	struct ec_stripe_head *h;
21120ef837a0SRobbie Litchfield 	bool waiting = false;
21135be6a274SKent Overstreet 	unsigned disk_label = 0;
21145be6a274SKent Overstreet 	struct target t = target_decode(target);
21155be6a274SKent Overstreet 	int ret;
21165be6a274SKent Overstreet 
21175be6a274SKent Overstreet 	if (t.type == TARGET_GROUP) {
21185be6a274SKent Overstreet 		if (t.group > U8_MAX) {
21195be6a274SKent Overstreet 			bch_err(c, "cannot create a stripe when disk_label > U8_MAX");
21205be6a274SKent Overstreet 			return NULL;
21215be6a274SKent Overstreet 		}
21224b1e6699SKent Overstreet 		disk_label = t.group + 1; /* 0 == no label */
2123f6b94a3bSKent Overstreet 	}
21240ef837a0SRobbie Litchfield 
21250ef837a0SRobbie Litchfield 	h = __bch2_ec_stripe_head_get(trans, disk_label, algo, redundancy, watermark);
2126fc6c01e2SKent Overstreet 	if (IS_ERR_OR_NULL(h))
2127f6b94a3bSKent Overstreet 		return h;
2128f6b94a3bSKent Overstreet 
2129b40901b0SKent Overstreet 	if (!h->s) {
2130cd575ddfSKent Overstreet 		ret = ec_new_stripe_alloc(c, h);
2131cd575ddfSKent Overstreet 		if (ret) {
2132cd575ddfSKent Overstreet 			bch_err(c, "failed to allocate new stripe");
2133cd575ddfSKent Overstreet 			goto err;
2134cd575ddfSKent Overstreet 		}
2135703e2a43SKent Overstreet 	}
2136703e2a43SKent Overstreet 
2137cd575ddfSKent Overstreet 	if (h->s->allocated)
2138cd575ddfSKent Overstreet 		goto allocated;
2139cd575ddfSKent Overstreet 
2140cd575ddfSKent Overstreet 	if (h->s->have_existing_stripe)
2141b40901b0SKent Overstreet 		goto alloc_existing;
2142b40901b0SKent Overstreet 
2143b40901b0SKent Overstreet 	/* First, try to allocate a full stripe: */
2144bf5a261cSKent Overstreet 	ret =   new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?:
21456c7585b0SKent Overstreet 		__bch2_ec_stripe_head_reserve(trans, h);
21466c7585b0SKent Overstreet 	if (!ret)
21476c7585b0SKent Overstreet 		goto allocate_buf;
21486c7585b0SKent Overstreet 	if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
2149abe19d45SKent Overstreet 	    bch2_err_matches(ret, ENOMEM))
2150cd575ddfSKent Overstreet 		goto err;
21516c7585b0SKent Overstreet 
2152cd575ddfSKent Overstreet 	/*
2153cd575ddfSKent Overstreet 	 * Not enough buckets available for a full stripe: we must reuse an
21547c50140fSKent Overstreet 	 * existing stripe:
2155703e2a43SKent Overstreet 	 */
2156cd575ddfSKent Overstreet 	while (1) {
2157cd575ddfSKent Overstreet 		ret = __bch2_ec_stripe_head_reuse(trans, h);
2158cd575ddfSKent Overstreet 		if (!ret)
2159703e2a43SKent Overstreet 			break;
2160cd575ddfSKent Overstreet 		if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
2161cd575ddfSKent Overstreet 			goto err;
2162b40901b0SKent Overstreet 
2163b40901b0SKent Overstreet 		if (watermark == BCH_WATERMARK_copygc) {
2164b40901b0SKent Overstreet 			ret =   new_stripe_alloc_buckets(trans, h, watermark, NULL) ?:
2165b40901b0SKent Overstreet 				__bch2_ec_stripe_head_reserve(trans, h);
2166b40901b0SKent Overstreet 			if (ret)
2167b40901b0SKent Overstreet 				goto err;
2168b40901b0SKent Overstreet 			goto allocate_buf;
2169b40901b0SKent Overstreet 		}
2170b40901b0SKent Overstreet 
2171b40901b0SKent Overstreet 		/* XXX freelist_wait? */
2172b40901b0SKent Overstreet 		closure_wait(&c->freelist_wait, cl);
2173b40901b0SKent Overstreet 		waiting = true;
2174b40901b0SKent Overstreet 	}
2175b40901b0SKent Overstreet 
2176b40901b0SKent Overstreet 	if (waiting)
2177b40901b0SKent Overstreet 		closure_wake_up(&c->freelist_wait);
2178b40901b0SKent Overstreet alloc_existing:
2179b40901b0SKent Overstreet 	/*
2180b40901b0SKent Overstreet 	 * Retry allocating buckets, with the watermark for this
2181b40901b0SKent Overstreet 	 * particular write:
2182b40901b0SKent Overstreet 	 */
2183b40901b0SKent Overstreet 	ret = new_stripe_alloc_buckets(trans, h, watermark, cl);
2184b40901b0SKent Overstreet 	if (ret)
2185b40901b0SKent Overstreet 		goto err;
2186b40901b0SKent Overstreet 
2187b40901b0SKent Overstreet allocate_buf:
21885222a460SKent Overstreet 	ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize);
218961c8d7c8SKent Overstreet 	if (ret)
219080eab7a7SKent Overstreet 		goto err;
21915028b907SKent Overstreet 
21925dd8c60eSKent Overstreet 	h->s->allocated = true;
2193b547d005SKent Overstreet allocated:
21945222a460SKent Overstreet 	BUG_ON(!h->s->idx);
219558e1ea4bSKent Overstreet 	BUG_ON(!h->s->new_stripe.data[0]);
2196b547d005SKent Overstreet 	BUG_ON(trans->restarted);
2197b547d005SKent Overstreet 	return h;
21985222a460SKent Overstreet err:
2199b547d005SKent Overstreet 	bch2_ec_stripe_head_put(c, h);
220080eab7a7SKent Overstreet 	return ERR_PTR(ret);
2201b547d005SKent Overstreet }
220280eab7a7SKent Overstreet 
2203b547d005SKent Overstreet /* device removal */
2204b547d005SKent Overstreet 
bch2_invalidate_stripe_to_dev(struct btree_trans * trans,struct bkey_s_c k_a)2205b547d005SKent Overstreet static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_s_c k_a)
2206b547d005SKent Overstreet {
2207b547d005SKent Overstreet 	struct bch_alloc_v4 a_convert;
2208b547d005SKent Overstreet 	const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k_a, &a_convert);
220980eab7a7SKent Overstreet 
2210990d42d1SKent Overstreet 	if (!a->stripe)
2211b547d005SKent Overstreet 		return 0;
221227616a31SKent Overstreet 
221327b2df98SKent Overstreet 	if (a->stripe_sectors) {
221427b2df98SKent Overstreet 		bch_err(trans->c, "trying to invalidate device in stripe when bucket has stripe data");
22151bb3c2a9SKent Overstreet 		return -BCH_ERR_invalidate_stripe_to_dev;
2216b2930396SKent Overstreet 	}
221761c8d7c8SKent Overstreet 
221861c8d7c8SKent Overstreet 	struct btree_iter iter;
2219ba6dd1ddSKent Overstreet 	struct bkey_i_stripe *s =
2220ba6dd1ddSKent Overstreet 		bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe),
2221ba6dd1ddSKent Overstreet 					BTREE_ITER_slots, stripe);
2222ba6dd1ddSKent Overstreet 	int ret = PTR_ERR_OR_ZERO(s);
2223ba6dd1ddSKent Overstreet 	if (ret)
2224ba6dd1ddSKent Overstreet 		return ret;
2225627a2312SKent Overstreet 
22261fcce6b8SKuan-Wei Chiu 	struct disk_accounting_pos acc = {
2227990d42d1SKent Overstreet 		.type = BCH_DISK_ACCOUNTING_replicas,
2228ba6dd1ddSKent Overstreet 	};
2229fba053d2SKent Overstreet 
2230ba6dd1ddSKent Overstreet 	s64 sectors = 0;
2231ba6dd1ddSKent Overstreet 	for (unsigned i = 0; i < s->v.nr_blocks; i++)
2232ba6dd1ddSKent Overstreet 		sectors -= stripe_blockcount_get(&s->v, i);
2233fba053d2SKent Overstreet 
2234fba053d2SKent Overstreet 	bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
2235fba053d2SKent Overstreet 	acc.replicas.data_type = BCH_DATA_user;
2236ba6dd1ddSKent Overstreet 	ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
2237627a2312SKent Overstreet 	if (ret)
2238ba6dd1ddSKent Overstreet 		goto err;
2239ba6dd1ddSKent Overstreet 
2240c1e44462SKent Overstreet 	struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(&s->k_i));
2241c1e44462SKent Overstreet 	bkey_for_each_ptr(ptrs, ptr)
2242c1e44462SKent Overstreet 		if (ptr->dev == k_a.k->p.inode)
2243c1e44462SKent Overstreet 			ptr->dev = BCH_SB_MEMBER_INVALID;
2244c1e44462SKent Overstreet 
2245c1e44462SKent Overstreet 	sectors = -sectors;
2246c1e44462SKent Overstreet 
2247c1e44462SKent Overstreet 	bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
2248c1e44462SKent Overstreet 	acc.replicas.data_type = BCH_DATA_user;
2249c1e44462SKent Overstreet 	ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
2250c1e44462SKent Overstreet 	if (ret)
2251c1e44462SKent Overstreet 		goto err;
2252c1e44462SKent Overstreet err:
2253c1e44462SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
2254c1e44462SKent Overstreet 	return ret;
2255c1e44462SKent Overstreet }
2256c1e44462SKent Overstreet 
bch2_dev_remove_stripes(struct bch_fs * c,unsigned dev_idx)22577807e143SKent Overstreet int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx)
22587807e143SKent Overstreet {
22597807e143SKent Overstreet 	return bch2_trans_run(c,
22607807e143SKent Overstreet 		for_each_btree_key_upto_commit(trans, iter,
22617807e143SKent Overstreet 				  BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX),
22627807e143SKent Overstreet 				  BTREE_ITER_intent, k,
22637807e143SKent Overstreet 				  NULL, NULL, 0, ({
2264e6539b0aSKent Overstreet 			bch2_invalidate_stripe_to_dev(trans, k);
2265e6539b0aSKent Overstreet 	})));
2266e53a961cSKent Overstreet }
22677807e143SKent Overstreet 
22687807e143SKent Overstreet /* startup/shutdown */
2269c1e44462SKent Overstreet 
__bch2_ec_stop(struct bch_fs * c,struct bch_dev * ca)22707807e143SKent Overstreet static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
22717807e143SKent Overstreet {
22727807e143SKent Overstreet 	struct ec_stripe_head *h;
2273e6539b0aSKent Overstreet 	struct open_bucket *ob;
2274e6539b0aSKent Overstreet 	unsigned i;
22757807e143SKent Overstreet 
2276c1e44462SKent Overstreet 	mutex_lock(&c->ec_stripe_head_lock);
2277c1e44462SKent Overstreet 	list_for_each_entry(h, &c->ec_stripe_head_list, list) {
22787807e143SKent Overstreet 		mutex_lock(&h->lock);
22797807e143SKent Overstreet 		if (!h->s)
22807807e143SKent Overstreet 			goto unlock;
2281cd575ddfSKent Overstreet 
2282cd575ddfSKent Overstreet 		if (!ca)
2283cd575ddfSKent Overstreet 			goto found;
2284e3877382SKent Overstreet 
2285cd575ddfSKent Overstreet 		for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) {
2286cd575ddfSKent Overstreet 			if (!h->s->blocks[i])
2287703e2a43SKent Overstreet 				continue;
2288703e2a43SKent Overstreet 
2289cd575ddfSKent Overstreet 			ob = c->open_buckets + h->s->blocks[i];
2290cd575ddfSKent Overstreet 			if (ob->dev == ca->dev_idx)
2291cd575ddfSKent Overstreet 				goto found;
2292703e2a43SKent Overstreet 		}
2293cd575ddfSKent Overstreet 		goto unlock;
2294cd575ddfSKent Overstreet found:
2295cd575ddfSKent Overstreet 		ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes);
2296e3877382SKent Overstreet unlock:
2297bf5a261cSKent Overstreet 		mutex_unlock(&h->lock);
2298e3877382SKent Overstreet 	}
2299e3877382SKent Overstreet 	mutex_unlock(&c->ec_stripe_head_lock);
2300e3877382SKent Overstreet }
2301e3877382SKent Overstreet 
bch2_ec_stop_dev(struct bch_fs * c,struct bch_dev * ca)2302cd575ddfSKent Overstreet void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
2303cd575ddfSKent Overstreet {
2304cd575ddfSKent Overstreet 	__bch2_ec_stop(c, ca);
2305703e2a43SKent Overstreet }
2306703e2a43SKent Overstreet 
bch2_fs_ec_stop(struct bch_fs * c)2307cd575ddfSKent Overstreet void bch2_fs_ec_stop(struct bch_fs *c)
2308990d42d1SKent Overstreet {
2309cd575ddfSKent Overstreet 	__bch2_ec_stop(c, NULL);
2310cd575ddfSKent Overstreet }
2311cd575ddfSKent Overstreet 
bch2_fs_ec_flush_done(struct bch_fs * c)231284c72755SKent Overstreet static bool bch2_fs_ec_flush_done(struct bch_fs *c)
2313cd575ddfSKent Overstreet {
2314b40901b0SKent Overstreet 	bool ret;
2315b40901b0SKent Overstreet 
2316b40901b0SKent Overstreet 	mutex_lock(&c->ec_stripe_new_lock);
2317b40901b0SKent Overstreet 	ret = list_empty(&c->ec_stripe_new_list);
2318b40901b0SKent Overstreet 	mutex_unlock(&c->ec_stripe_new_lock);
2319b40901b0SKent Overstreet 
2320b40901b0SKent Overstreet 	return ret;
2321b40901b0SKent Overstreet }
2322b40901b0SKent Overstreet 
bch2_fs_ec_flush(struct bch_fs * c)2323b40901b0SKent Overstreet void bch2_fs_ec_flush(struct bch_fs *c)
2324703e2a43SKent Overstreet {
2325cd575ddfSKent Overstreet 	wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c));
232684c72755SKent Overstreet }
2327cd575ddfSKent Overstreet 
bch2_stripes_read(struct bch_fs * c)232884c72755SKent Overstreet int bch2_stripes_read(struct bch_fs *c)
232984c72755SKent Overstreet {
2330cd575ddfSKent Overstreet 	int ret = bch2_trans_run(c,
2331cd575ddfSKent Overstreet 		for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN,
2332cd575ddfSKent Overstreet 				   BTREE_ITER_prefetch, k, ({
2333 			if (k.k->type != KEY_TYPE_stripe)
2334 				continue;
2335 
2336 			ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
2337 			if (ret)
2338 				break;
2339 
2340 			struct stripe *m = genradix_ptr(&c->stripes, k.k->p.offset);
2341 
2342 			stripe_to_mem(m, bkey_s_c_to_stripe(k).v);
2343 
2344 			bch2_stripes_heap_insert(c, m, k.k->p.offset);
2345 			0;
2346 		})));
2347 	bch_err_fn(c, ret);
2348 	return ret;
2349 }
2350 
bch2_stripes_heap_to_text(struct printbuf * out,struct bch_fs * c)2351 void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
2352 {
2353 	ec_stripes_heap *h = &c->ec_stripes_heap;
2354 	struct stripe *m;
2355 	size_t i;
2356 
2357 	mutex_lock(&c->ec_stripes_heap_lock);
2358 	for (i = 0; i < min_t(size_t, h->nr, 50); i++) {
2359 		m = genradix_ptr(&c->stripes, h->data[i].idx);
2360 
2361 		prt_printf(out, "%zu %u/%u+%u", h->data[i].idx,
2362 		       h->data[i].blocks_nonempty,
2363 		       m->nr_blocks - m->nr_redundant,
2364 		       m->nr_redundant);
2365 		if (bch2_stripe_is_open(c, h->data[i].idx))
2366 			prt_str(out, " open");
2367 		prt_newline(out);
2368 	}
2369 	mutex_unlock(&c->ec_stripes_heap_lock);
2370 }
2371 
bch2_new_stripe_to_text(struct printbuf * out,struct bch_fs * c,struct ec_stripe_new * s)2372 static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c,
2373 				    struct ec_stripe_new *s)
2374 {
2375 	prt_printf(out, "\tidx %llu blocks %u+%u allocated %u ref %u %u %s obs",
2376 		   s->idx, s->nr_data, s->nr_parity,
2377 		   bitmap_weight(s->blocks_allocated, s->nr_data),
2378 		   atomic_read(&s->ref[STRIPE_REF_io]),
2379 		   atomic_read(&s->ref[STRIPE_REF_stripe]),
2380 		   bch2_watermarks[s->h->watermark]);
2381 
2382 	struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
2383 	unsigned i;
2384 	for_each_set_bit(i, s->blocks_gotten, v->nr_blocks)
2385 		prt_printf(out, " %u", s->blocks[i]);
2386 	prt_newline(out);
2387 	bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&s->new_stripe.key));
2388 	prt_newline(out);
2389 }
2390 
bch2_new_stripes_to_text(struct printbuf * out,struct bch_fs * c)2391 void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
2392 {
2393 	struct ec_stripe_head *h;
2394 	struct ec_stripe_new *s;
2395 
2396 	mutex_lock(&c->ec_stripe_head_lock);
2397 	list_for_each_entry(h, &c->ec_stripe_head_list, list) {
2398 		prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n",
2399 		       h->disk_label, h->algo, h->redundancy,
2400 		       bch2_watermarks[h->watermark],
2401 		       h->nr_created);
2402 
2403 		if (h->s)
2404 			bch2_new_stripe_to_text(out, c, h->s);
2405 	}
2406 	mutex_unlock(&c->ec_stripe_head_lock);
2407 
2408 	prt_printf(out, "in flight:\n");
2409 
2410 	mutex_lock(&c->ec_stripe_new_lock);
2411 	list_for_each_entry(s, &c->ec_stripe_new_list, list)
2412 		bch2_new_stripe_to_text(out, c, s);
2413 	mutex_unlock(&c->ec_stripe_new_lock);
2414 }
2415 
bch2_fs_ec_exit(struct bch_fs * c)2416 void bch2_fs_ec_exit(struct bch_fs *c)
2417 {
2418 	struct ec_stripe_head *h;
2419 	unsigned i;
2420 
2421 	while (1) {
2422 		mutex_lock(&c->ec_stripe_head_lock);
2423 		h = list_first_entry_or_null(&c->ec_stripe_head_list,
2424 					     struct ec_stripe_head, list);
2425 		if (h)
2426 			list_del(&h->list);
2427 		mutex_unlock(&c->ec_stripe_head_lock);
2428 		if (!h)
2429 			break;
2430 
2431 		if (h->s) {
2432 			for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++)
2433 				BUG_ON(h->s->blocks[i]);
2434 
2435 			kfree(h->s);
2436 		}
2437 		kfree(h);
2438 	}
2439 
2440 	BUG_ON(!list_empty(&c->ec_stripe_new_list));
2441 
2442 	free_heap(&c->ec_stripes_heap);
2443 	genradix_free(&c->stripes);
2444 	bioset_exit(&c->ec_bioset);
2445 }
2446 
bch2_fs_ec_init_early(struct bch_fs * c)2447 void bch2_fs_ec_init_early(struct bch_fs *c)
2448 {
2449 	spin_lock_init(&c->ec_stripes_new_lock);
2450 	mutex_init(&c->ec_stripes_heap_lock);
2451 
2452 	INIT_LIST_HEAD(&c->ec_stripe_head_list);
2453 	mutex_init(&c->ec_stripe_head_lock);
2454 
2455 	INIT_LIST_HEAD(&c->ec_stripe_new_list);
2456 	mutex_init(&c->ec_stripe_new_lock);
2457 	init_waitqueue_head(&c->ec_stripe_new_wait);
2458 
2459 	INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
2460 	INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
2461 }
2462 
bch2_fs_ec_init(struct bch_fs * c)2463 int bch2_fs_ec_init(struct bch_fs *c)
2464 {
2465 	return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
2466 			   BIOSET_NEED_BVECS);
2467 }
2468