xref: /linux/fs/bcachefs/journal_seq_blacklist.c (revision 1dd7f9d98de0740b42f1ac3f0b1d8af9c76801de)
11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0
21c6fdbd8SKent Overstreet 
31c6fdbd8SKent Overstreet #include "bcachefs.h"
4*1dd7f9d9SKent Overstreet #include "btree_iter.h"
5*1dd7f9d9SKent Overstreet #include "eytzinger.h"
61c6fdbd8SKent Overstreet #include "journal_seq_blacklist.h"
7*1dd7f9d9SKent Overstreet #include "super-io.h"
81c6fdbd8SKent Overstreet 
91c6fdbd8SKent Overstreet /*
101c6fdbd8SKent Overstreet  * journal_seq_blacklist machinery:
111c6fdbd8SKent Overstreet  *
121c6fdbd8SKent Overstreet  * To guarantee order of btree updates after a crash, we need to detect when a
131c6fdbd8SKent Overstreet  * btree node entry (bset) is newer than the newest journal entry that was
141c6fdbd8SKent Overstreet  * successfully written, and ignore it - effectively ignoring any btree updates
151c6fdbd8SKent Overstreet  * that didn't make it into the journal.
161c6fdbd8SKent Overstreet  *
171c6fdbd8SKent Overstreet  * If we didn't do this, we might have two btree nodes, a and b, both with
181c6fdbd8SKent Overstreet  * updates that weren't written to the journal yet: if b was updated after a,
191c6fdbd8SKent Overstreet  * but b was flushed and not a - oops; on recovery we'll find that the updates
201c6fdbd8SKent Overstreet  * to b happened, but not the updates to a that happened before it.
211c6fdbd8SKent Overstreet  *
221c6fdbd8SKent Overstreet  * Ignoring bsets that are newer than the newest journal entry is always safe,
231c6fdbd8SKent Overstreet  * because everything they contain will also have been journalled - and must
241c6fdbd8SKent Overstreet  * still be present in the journal on disk until a journal entry has been
251c6fdbd8SKent Overstreet  * written _after_ that bset was written.
261c6fdbd8SKent Overstreet  *
271c6fdbd8SKent Overstreet  * To accomplish this, bsets record the newest journal sequence number they
281c6fdbd8SKent Overstreet  * contain updates for; then, on startup, the btree code queries the journal
291c6fdbd8SKent Overstreet  * code to ask "Is this sequence number newer than the newest journal entry? If
301c6fdbd8SKent Overstreet  * so, ignore it."
311c6fdbd8SKent Overstreet  *
321c6fdbd8SKent Overstreet  * When this happens, we must blacklist that journal sequence number: the
331c6fdbd8SKent Overstreet  * journal must not write any entries with that sequence number, and it must
341c6fdbd8SKent Overstreet  * record that it was blacklisted so that a) on recovery we don't think we have
351c6fdbd8SKent Overstreet  * missing journal entries and b) so that the btree code continues to ignore
361c6fdbd8SKent Overstreet  * that bset, until that btree node is rewritten.
371c6fdbd8SKent Overstreet  */
381c6fdbd8SKent Overstreet 
39*1dd7f9d9SKent Overstreet static unsigned
40*1dd7f9d9SKent Overstreet blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
411c6fdbd8SKent Overstreet {
42*1dd7f9d9SKent Overstreet 	return bl
43*1dd7f9d9SKent Overstreet 		? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
44*1dd7f9d9SKent Overstreet 		   sizeof(struct journal_seq_blacklist_entry))
45*1dd7f9d9SKent Overstreet 		: 0;
46*1dd7f9d9SKent Overstreet }
471c6fdbd8SKent Overstreet 
48*1dd7f9d9SKent Overstreet static unsigned sb_blacklist_u64s(unsigned nr)
49*1dd7f9d9SKent Overstreet {
50*1dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl;
511c6fdbd8SKent Overstreet 
52*1dd7f9d9SKent Overstreet 	return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
53*1dd7f9d9SKent Overstreet }
54*1dd7f9d9SKent Overstreet 
55*1dd7f9d9SKent Overstreet static struct bch_sb_field_journal_seq_blacklist *
56*1dd7f9d9SKent Overstreet blacklist_entry_try_merge(struct bch_fs *c,
57*1dd7f9d9SKent Overstreet 			  struct bch_sb_field_journal_seq_blacklist *bl,
58*1dd7f9d9SKent Overstreet 			  unsigned i)
59*1dd7f9d9SKent Overstreet {
60*1dd7f9d9SKent Overstreet 	unsigned nr = blacklist_nr_entries(bl);
61*1dd7f9d9SKent Overstreet 
62*1dd7f9d9SKent Overstreet 	if (le64_to_cpu(bl->start[i].end) >=
63*1dd7f9d9SKent Overstreet 	    le64_to_cpu(bl->start[i + 1].start)) {
64*1dd7f9d9SKent Overstreet 		bl->start[i].end = bl->start[i + 1].end;
65*1dd7f9d9SKent Overstreet 		--nr;
66*1dd7f9d9SKent Overstreet 		memmove(&bl->start[i],
67*1dd7f9d9SKent Overstreet 			&bl->start[i + 1],
68*1dd7f9d9SKent Overstreet 			sizeof(bl->start[0]) * (nr - i));
69*1dd7f9d9SKent Overstreet 
70*1dd7f9d9SKent Overstreet 		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
71*1dd7f9d9SKent Overstreet 							sb_blacklist_u64s(nr));
72*1dd7f9d9SKent Overstreet 		BUG_ON(!bl);
73*1dd7f9d9SKent Overstreet 	}
74*1dd7f9d9SKent Overstreet 
75*1dd7f9d9SKent Overstreet 	return bl;
76*1dd7f9d9SKent Overstreet }
77*1dd7f9d9SKent Overstreet 
78*1dd7f9d9SKent Overstreet int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
79*1dd7f9d9SKent Overstreet {
80*1dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl;
81*1dd7f9d9SKent Overstreet 	unsigned i, nr;
82*1dd7f9d9SKent Overstreet 	int ret = 0;
83*1dd7f9d9SKent Overstreet 
84*1dd7f9d9SKent Overstreet 	mutex_lock(&c->sb_lock);
85*1dd7f9d9SKent Overstreet 	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
86*1dd7f9d9SKent Overstreet 	nr = blacklist_nr_entries(bl);
87*1dd7f9d9SKent Overstreet 
88*1dd7f9d9SKent Overstreet 	if (bl) {
89*1dd7f9d9SKent Overstreet 		for (i = 0; i < nr; i++) {
90*1dd7f9d9SKent Overstreet 			struct journal_seq_blacklist_entry *e =
91*1dd7f9d9SKent Overstreet 				bl->start + i;
92*1dd7f9d9SKent Overstreet 
93*1dd7f9d9SKent Overstreet 			if (start == le64_to_cpu(e->start) &&
94*1dd7f9d9SKent Overstreet 			    end   == le64_to_cpu(e->end))
95*1dd7f9d9SKent Overstreet 				goto out;
96*1dd7f9d9SKent Overstreet 
97*1dd7f9d9SKent Overstreet 			if (start <= le64_to_cpu(e->start) &&
98*1dd7f9d9SKent Overstreet 			    end   >= le64_to_cpu(e->end)) {
99*1dd7f9d9SKent Overstreet 				e->start = cpu_to_le64(start);
100*1dd7f9d9SKent Overstreet 				e->end	= cpu_to_le64(end);
101*1dd7f9d9SKent Overstreet 
102*1dd7f9d9SKent Overstreet 				if (i + 1 < nr)
103*1dd7f9d9SKent Overstreet 					bl = blacklist_entry_try_merge(c,
104*1dd7f9d9SKent Overstreet 								bl, i);
105*1dd7f9d9SKent Overstreet 				if (i)
106*1dd7f9d9SKent Overstreet 					bl = blacklist_entry_try_merge(c,
107*1dd7f9d9SKent Overstreet 								bl, i - 1);
108*1dd7f9d9SKent Overstreet 				goto out_write_sb;
109*1dd7f9d9SKent Overstreet 			}
110*1dd7f9d9SKent Overstreet 		}
111*1dd7f9d9SKent Overstreet 	}
112*1dd7f9d9SKent Overstreet 
113*1dd7f9d9SKent Overstreet 	bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
114*1dd7f9d9SKent Overstreet 					sb_blacklist_u64s(nr + 1));
115*1dd7f9d9SKent Overstreet 	if (!bl) {
116*1dd7f9d9SKent Overstreet 		ret = -ENOMEM;
117*1dd7f9d9SKent Overstreet 		goto out;
118*1dd7f9d9SKent Overstreet 	}
119*1dd7f9d9SKent Overstreet 
120*1dd7f9d9SKent Overstreet 	bl->start[nr].start	= cpu_to_le64(start);
121*1dd7f9d9SKent Overstreet 	bl->start[nr].end	= cpu_to_le64(end);
122*1dd7f9d9SKent Overstreet out_write_sb:
123*1dd7f9d9SKent Overstreet 	c->disk_sb.sb->features[0] |=
124*1dd7f9d9SKent Overstreet 		1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3;
125*1dd7f9d9SKent Overstreet 
126*1dd7f9d9SKent Overstreet 	ret = bch2_write_super(c);
127*1dd7f9d9SKent Overstreet out:
128*1dd7f9d9SKent Overstreet 	mutex_unlock(&c->sb_lock);
129*1dd7f9d9SKent Overstreet 
130*1dd7f9d9SKent Overstreet 	return ret;
131*1dd7f9d9SKent Overstreet }
132*1dd7f9d9SKent Overstreet 
133*1dd7f9d9SKent Overstreet static int journal_seq_blacklist_table_cmp(const void *_l,
134*1dd7f9d9SKent Overstreet 					   const void *_r, size_t size)
135*1dd7f9d9SKent Overstreet {
136*1dd7f9d9SKent Overstreet 	const struct journal_seq_blacklist_table_entry *l = _l;
137*1dd7f9d9SKent Overstreet 	const struct journal_seq_blacklist_table_entry *r = _r;
138*1dd7f9d9SKent Overstreet 
139*1dd7f9d9SKent Overstreet 	return (l->start > r->start) - (l->start < r->start);
140*1dd7f9d9SKent Overstreet }
141*1dd7f9d9SKent Overstreet 
142*1dd7f9d9SKent Overstreet bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
143*1dd7f9d9SKent Overstreet 				     bool dirty)
144*1dd7f9d9SKent Overstreet {
145*1dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
146*1dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table_entry search = { .start = seq };
147*1dd7f9d9SKent Overstreet 	int idx;
148*1dd7f9d9SKent Overstreet 
149*1dd7f9d9SKent Overstreet 	if (!t)
150*1dd7f9d9SKent Overstreet 		return false;
151*1dd7f9d9SKent Overstreet 
152*1dd7f9d9SKent Overstreet 	idx = eytzinger0_find_le(t->entries, t->nr,
153*1dd7f9d9SKent Overstreet 				 sizeof(t->entries[0]),
154*1dd7f9d9SKent Overstreet 				 journal_seq_blacklist_table_cmp,
155*1dd7f9d9SKent Overstreet 				 &search);
156*1dd7f9d9SKent Overstreet 	if (idx < 0)
157*1dd7f9d9SKent Overstreet 		return false;
158*1dd7f9d9SKent Overstreet 
159*1dd7f9d9SKent Overstreet 	BUG_ON(t->entries[idx].start > seq);
160*1dd7f9d9SKent Overstreet 
161*1dd7f9d9SKent Overstreet 	if (seq >= t->entries[idx].end)
162*1dd7f9d9SKent Overstreet 		return false;
163*1dd7f9d9SKent Overstreet 
164*1dd7f9d9SKent Overstreet 	if (dirty)
165*1dd7f9d9SKent Overstreet 		t->entries[idx].dirty = true;
166*1dd7f9d9SKent Overstreet 	return true;
167*1dd7f9d9SKent Overstreet }
168*1dd7f9d9SKent Overstreet 
169*1dd7f9d9SKent Overstreet int bch2_blacklist_table_initialize(struct bch_fs *c)
170*1dd7f9d9SKent Overstreet {
171*1dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
172*1dd7f9d9SKent Overstreet 		bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
173*1dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table *t;
174*1dd7f9d9SKent Overstreet 	unsigned i, nr = blacklist_nr_entries(bl);
175*1dd7f9d9SKent Overstreet 
176*1dd7f9d9SKent Overstreet 	BUG_ON(c->journal_seq_blacklist_table);
177*1dd7f9d9SKent Overstreet 
178*1dd7f9d9SKent Overstreet 	if (!bl)
179*1dd7f9d9SKent Overstreet 		return 0;
180*1dd7f9d9SKent Overstreet 
181*1dd7f9d9SKent Overstreet 	t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
182*1dd7f9d9SKent Overstreet 		    GFP_KERNEL);
183*1dd7f9d9SKent Overstreet 	if (!t)
184*1dd7f9d9SKent Overstreet 		return -ENOMEM;
185*1dd7f9d9SKent Overstreet 
186*1dd7f9d9SKent Overstreet 	t->nr = nr;
187*1dd7f9d9SKent Overstreet 
188*1dd7f9d9SKent Overstreet 	for (i = 0; i < nr; i++) {
189*1dd7f9d9SKent Overstreet 		t->entries[i].start	= le64_to_cpu(bl->start[i].start);
190*1dd7f9d9SKent Overstreet 		t->entries[i].end	= le64_to_cpu(bl->start[i].end);
191*1dd7f9d9SKent Overstreet 	}
192*1dd7f9d9SKent Overstreet 
193*1dd7f9d9SKent Overstreet 	eytzinger0_sort(t->entries,
194*1dd7f9d9SKent Overstreet 			t->nr,
195*1dd7f9d9SKent Overstreet 			sizeof(t->entries[0]),
196*1dd7f9d9SKent Overstreet 			journal_seq_blacklist_table_cmp,
197*1dd7f9d9SKent Overstreet 			NULL);
198*1dd7f9d9SKent Overstreet 
199*1dd7f9d9SKent Overstreet 	c->journal_seq_blacklist_table = t;
200*1dd7f9d9SKent Overstreet 	return 0;
201*1dd7f9d9SKent Overstreet }
202*1dd7f9d9SKent Overstreet 
203*1dd7f9d9SKent Overstreet static const char *
204*1dd7f9d9SKent Overstreet bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
205*1dd7f9d9SKent Overstreet 				       struct bch_sb_field *f)
206*1dd7f9d9SKent Overstreet {
207*1dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
208*1dd7f9d9SKent Overstreet 		field_to_type(f, journal_seq_blacklist);
209*1dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_entry *i;
210*1dd7f9d9SKent Overstreet 	unsigned nr = blacklist_nr_entries(bl);
211*1dd7f9d9SKent Overstreet 
212*1dd7f9d9SKent Overstreet 	for (i = bl->start; i < bl->start + nr; i++) {
213*1dd7f9d9SKent Overstreet 		if (le64_to_cpu(i->start) >=
214*1dd7f9d9SKent Overstreet 		    le64_to_cpu(i->end))
215*1dd7f9d9SKent Overstreet 			return "entry start >= end";
216*1dd7f9d9SKent Overstreet 
217*1dd7f9d9SKent Overstreet 		if (i + 1 < bl->start + nr &&
218*1dd7f9d9SKent Overstreet 		    le64_to_cpu(i[0].end) >
219*1dd7f9d9SKent Overstreet 		    le64_to_cpu(i[1].start))
220*1dd7f9d9SKent Overstreet 			return "entries out of order";
221*1dd7f9d9SKent Overstreet 	}
222*1dd7f9d9SKent Overstreet 
223*1dd7f9d9SKent Overstreet 	return NULL;
224*1dd7f9d9SKent Overstreet }
225*1dd7f9d9SKent Overstreet 
226*1dd7f9d9SKent Overstreet static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
227*1dd7f9d9SKent Overstreet 						  struct bch_sb *sb,
228*1dd7f9d9SKent Overstreet 						  struct bch_sb_field *f)
229*1dd7f9d9SKent Overstreet {
230*1dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
231*1dd7f9d9SKent Overstreet 		field_to_type(f, journal_seq_blacklist);
232*1dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_entry *i;
233*1dd7f9d9SKent Overstreet 	unsigned nr = blacklist_nr_entries(bl);
234*1dd7f9d9SKent Overstreet 
235*1dd7f9d9SKent Overstreet 	for (i = bl->start; i < bl->start + nr; i++) {
236*1dd7f9d9SKent Overstreet 		if (i != bl->start)
237*1dd7f9d9SKent Overstreet 			pr_buf(out, " ");
238*1dd7f9d9SKent Overstreet 
239*1dd7f9d9SKent Overstreet 		pr_buf(out, "%llu-%llu",
240*1dd7f9d9SKent Overstreet 		       le64_to_cpu(i->start),
241*1dd7f9d9SKent Overstreet 		       le64_to_cpu(i->end));
242*1dd7f9d9SKent Overstreet 	}
243*1dd7f9d9SKent Overstreet }
244*1dd7f9d9SKent Overstreet 
245*1dd7f9d9SKent Overstreet const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
246*1dd7f9d9SKent Overstreet 	.validate	= bch2_sb_journal_seq_blacklist_validate,
247*1dd7f9d9SKent Overstreet 	.to_text	= bch2_sb_journal_seq_blacklist_to_text
248*1dd7f9d9SKent Overstreet };
249*1dd7f9d9SKent Overstreet 
250*1dd7f9d9SKent Overstreet void bch2_blacklist_entries_gc(struct work_struct *work)
251*1dd7f9d9SKent Overstreet {
252*1dd7f9d9SKent Overstreet 	struct bch_fs *c = container_of(work, struct bch_fs,
253*1dd7f9d9SKent Overstreet 					journal_seq_blacklist_gc_work);
254*1dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table *t;
255*1dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl;
256*1dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_entry *src, *dst;
257424eb881SKent Overstreet 	struct btree_trans trans;
258*1dd7f9d9SKent Overstreet 	unsigned i, nr, new_nr;
259*1dd7f9d9SKent Overstreet 	int ret;
2601c6fdbd8SKent Overstreet 
261424eb881SKent Overstreet 	bch2_trans_init(&trans, c);
262424eb881SKent Overstreet 
263*1dd7f9d9SKent Overstreet 	for (i = 0; i < BTREE_ID_NR; i++) {
264*1dd7f9d9SKent Overstreet 		struct btree_iter *iter;
265*1dd7f9d9SKent Overstreet 		struct btree *b;
2661c6fdbd8SKent Overstreet 
267*1dd7f9d9SKent Overstreet 		for_each_btree_node(&trans, iter, i, POS_MIN,
268*1dd7f9d9SKent Overstreet 				    BTREE_ITER_PREFETCH, b)
269*1dd7f9d9SKent Overstreet 			if (test_bit(BCH_FS_STOPPING, &c->flags)) {
270424eb881SKent Overstreet 				bch2_trans_exit(&trans);
2711c6fdbd8SKent Overstreet 				return;
2721c6fdbd8SKent Overstreet 			}
273*1dd7f9d9SKent Overstreet 		bch2_trans_iter_free(&trans, iter);
2741c6fdbd8SKent Overstreet 	}
2751c6fdbd8SKent Overstreet 
276*1dd7f9d9SKent Overstreet 	ret = bch2_trans_exit(&trans);
2771c6fdbd8SKent Overstreet 	if (ret)
2781c6fdbd8SKent Overstreet 		return;
2791c6fdbd8SKent Overstreet 
280*1dd7f9d9SKent Overstreet 	mutex_lock(&c->sb_lock);
281*1dd7f9d9SKent Overstreet 	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
282*1dd7f9d9SKent Overstreet 	if (!bl)
283*1dd7f9d9SKent Overstreet 		goto out;
2841c6fdbd8SKent Overstreet 
285*1dd7f9d9SKent Overstreet 	nr = blacklist_nr_entries(bl);
286*1dd7f9d9SKent Overstreet 	dst = bl->start;
2871c6fdbd8SKent Overstreet 
288*1dd7f9d9SKent Overstreet 	t = c->journal_seq_blacklist_table;
289*1dd7f9d9SKent Overstreet 	BUG_ON(nr != t->nr);
2901c6fdbd8SKent Overstreet 
291*1dd7f9d9SKent Overstreet 	for (src = bl->start, i = eytzinger0_first(t->nr);
292*1dd7f9d9SKent Overstreet 	     src < bl->start + nr;
293*1dd7f9d9SKent Overstreet 	     src++, i = eytzinger0_next(i, nr)) {
294*1dd7f9d9SKent Overstreet 		BUG_ON(t->entries[i].start	!= le64_to_cpu(src->start));
295*1dd7f9d9SKent Overstreet 		BUG_ON(t->entries[i].end	!= le64_to_cpu(src->end));
296*1dd7f9d9SKent Overstreet 
297*1dd7f9d9SKent Overstreet 		if (t->entries[i].dirty)
298*1dd7f9d9SKent Overstreet 			*dst++ = *src;
299*1dd7f9d9SKent Overstreet 	}
300*1dd7f9d9SKent Overstreet 
301*1dd7f9d9SKent Overstreet 	new_nr = dst - bl->start;
302*1dd7f9d9SKent Overstreet 
303*1dd7f9d9SKent Overstreet 	bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
304*1dd7f9d9SKent Overstreet 
305*1dd7f9d9SKent Overstreet 	if (new_nr != nr) {
306*1dd7f9d9SKent Overstreet 		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
307*1dd7f9d9SKent Overstreet 				new_nr ? sb_blacklist_u64s(new_nr) : 0);
308*1dd7f9d9SKent Overstreet 		BUG_ON(new_nr && !bl);
309*1dd7f9d9SKent Overstreet 
310*1dd7f9d9SKent Overstreet 		if (!new_nr)
311*1dd7f9d9SKent Overstreet 			c->disk_sb.sb->features[0] &=
312*1dd7f9d9SKent Overstreet 				~(1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3);
313*1dd7f9d9SKent Overstreet 
314*1dd7f9d9SKent Overstreet 		bch2_write_super(c);
315*1dd7f9d9SKent Overstreet 	}
316*1dd7f9d9SKent Overstreet out:
317*1dd7f9d9SKent Overstreet 	mutex_unlock(&c->sb_lock);
3181c6fdbd8SKent Overstreet }
319