xref: /linux/fs/bcachefs/journal_seq_blacklist.c (revision 20bceecb3159bbe06a26fc6747457d9de02ec227)
11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0
21c6fdbd8SKent Overstreet 
31c6fdbd8SKent Overstreet #include "bcachefs.h"
41dd7f9d9SKent Overstreet #include "btree_iter.h"
51dd7f9d9SKent Overstreet #include "eytzinger.h"
61c6fdbd8SKent Overstreet #include "journal_seq_blacklist.h"
71dd7f9d9SKent Overstreet #include "super-io.h"
81c6fdbd8SKent Overstreet 
91c6fdbd8SKent Overstreet /*
101c6fdbd8SKent Overstreet  * journal_seq_blacklist machinery:
111c6fdbd8SKent Overstreet  *
121c6fdbd8SKent Overstreet  * To guarantee order of btree updates after a crash, we need to detect when a
131c6fdbd8SKent Overstreet  * btree node entry (bset) is newer than the newest journal entry that was
141c6fdbd8SKent Overstreet  * successfully written, and ignore it - effectively ignoring any btree updates
151c6fdbd8SKent Overstreet  * that didn't make it into the journal.
161c6fdbd8SKent Overstreet  *
171c6fdbd8SKent Overstreet  * If we didn't do this, we might have two btree nodes, a and b, both with
181c6fdbd8SKent Overstreet  * updates that weren't written to the journal yet: if b was updated after a,
191c6fdbd8SKent Overstreet  * but b was flushed and not a - oops; on recovery we'll find that the updates
201c6fdbd8SKent Overstreet  * to b happened, but not the updates to a that happened before it.
211c6fdbd8SKent Overstreet  *
221c6fdbd8SKent Overstreet  * Ignoring bsets that are newer than the newest journal entry is always safe,
231c6fdbd8SKent Overstreet  * because everything they contain will also have been journalled - and must
241c6fdbd8SKent Overstreet  * still be present in the journal on disk until a journal entry has been
251c6fdbd8SKent Overstreet  * written _after_ that bset was written.
261c6fdbd8SKent Overstreet  *
271c6fdbd8SKent Overstreet  * To accomplish this, bsets record the newest journal sequence number they
281c6fdbd8SKent Overstreet  * contain updates for; then, on startup, the btree code queries the journal
291c6fdbd8SKent Overstreet  * code to ask "Is this sequence number newer than the newest journal entry? If
301c6fdbd8SKent Overstreet  * so, ignore it."
311c6fdbd8SKent Overstreet  *
321c6fdbd8SKent Overstreet  * When this happens, we must blacklist that journal sequence number: the
331c6fdbd8SKent Overstreet  * journal must not write any entries with that sequence number, and it must
341c6fdbd8SKent Overstreet  * record that it was blacklisted so that a) on recovery we don't think we have
351c6fdbd8SKent Overstreet  * missing journal entries and b) so that the btree code continues to ignore
361c6fdbd8SKent Overstreet  * that bset, until that btree node is rewritten.
371c6fdbd8SKent Overstreet  */
381c6fdbd8SKent Overstreet 
391dd7f9d9SKent Overstreet static unsigned
401dd7f9d9SKent Overstreet blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
411c6fdbd8SKent Overstreet {
421dd7f9d9SKent Overstreet 	return bl
431dd7f9d9SKent Overstreet 		? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
441dd7f9d9SKent Overstreet 		   sizeof(struct journal_seq_blacklist_entry))
451dd7f9d9SKent Overstreet 		: 0;
461dd7f9d9SKent Overstreet }
471c6fdbd8SKent Overstreet 
481dd7f9d9SKent Overstreet static unsigned sb_blacklist_u64s(unsigned nr)
491dd7f9d9SKent Overstreet {
501dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl;
511c6fdbd8SKent Overstreet 
521dd7f9d9SKent Overstreet 	return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
531dd7f9d9SKent Overstreet }
541dd7f9d9SKent Overstreet 
551dd7f9d9SKent Overstreet static struct bch_sb_field_journal_seq_blacklist *
561dd7f9d9SKent Overstreet blacklist_entry_try_merge(struct bch_fs *c,
571dd7f9d9SKent Overstreet 			  struct bch_sb_field_journal_seq_blacklist *bl,
581dd7f9d9SKent Overstreet 			  unsigned i)
591dd7f9d9SKent Overstreet {
601dd7f9d9SKent Overstreet 	unsigned nr = blacklist_nr_entries(bl);
611dd7f9d9SKent Overstreet 
621dd7f9d9SKent Overstreet 	if (le64_to_cpu(bl->start[i].end) >=
631dd7f9d9SKent Overstreet 	    le64_to_cpu(bl->start[i + 1].start)) {
641dd7f9d9SKent Overstreet 		bl->start[i].end = bl->start[i + 1].end;
651dd7f9d9SKent Overstreet 		--nr;
661dd7f9d9SKent Overstreet 		memmove(&bl->start[i],
671dd7f9d9SKent Overstreet 			&bl->start[i + 1],
681dd7f9d9SKent Overstreet 			sizeof(bl->start[0]) * (nr - i));
691dd7f9d9SKent Overstreet 
701dd7f9d9SKent Overstreet 		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
711dd7f9d9SKent Overstreet 							sb_blacklist_u64s(nr));
721dd7f9d9SKent Overstreet 		BUG_ON(!bl);
731dd7f9d9SKent Overstreet 	}
741dd7f9d9SKent Overstreet 
751dd7f9d9SKent Overstreet 	return bl;
761dd7f9d9SKent Overstreet }
771dd7f9d9SKent Overstreet 
781dd7f9d9SKent Overstreet int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
791dd7f9d9SKent Overstreet {
801dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl;
811dd7f9d9SKent Overstreet 	unsigned i, nr;
821dd7f9d9SKent Overstreet 	int ret = 0;
831dd7f9d9SKent Overstreet 
841dd7f9d9SKent Overstreet 	mutex_lock(&c->sb_lock);
851dd7f9d9SKent Overstreet 	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
861dd7f9d9SKent Overstreet 	nr = blacklist_nr_entries(bl);
871dd7f9d9SKent Overstreet 
881dd7f9d9SKent Overstreet 	if (bl) {
891dd7f9d9SKent Overstreet 		for (i = 0; i < nr; i++) {
901dd7f9d9SKent Overstreet 			struct journal_seq_blacklist_entry *e =
911dd7f9d9SKent Overstreet 				bl->start + i;
921dd7f9d9SKent Overstreet 
931dd7f9d9SKent Overstreet 			if (start == le64_to_cpu(e->start) &&
941dd7f9d9SKent Overstreet 			    end   == le64_to_cpu(e->end))
951dd7f9d9SKent Overstreet 				goto out;
961dd7f9d9SKent Overstreet 
971dd7f9d9SKent Overstreet 			if (start <= le64_to_cpu(e->start) &&
981dd7f9d9SKent Overstreet 			    end   >= le64_to_cpu(e->end)) {
991dd7f9d9SKent Overstreet 				e->start = cpu_to_le64(start);
1001dd7f9d9SKent Overstreet 				e->end	= cpu_to_le64(end);
1011dd7f9d9SKent Overstreet 
1021dd7f9d9SKent Overstreet 				if (i + 1 < nr)
1031dd7f9d9SKent Overstreet 					bl = blacklist_entry_try_merge(c,
1041dd7f9d9SKent Overstreet 								bl, i);
1051dd7f9d9SKent Overstreet 				if (i)
1061dd7f9d9SKent Overstreet 					bl = blacklist_entry_try_merge(c,
1071dd7f9d9SKent Overstreet 								bl, i - 1);
1081dd7f9d9SKent Overstreet 				goto out_write_sb;
1091dd7f9d9SKent Overstreet 			}
1101dd7f9d9SKent Overstreet 		}
1111dd7f9d9SKent Overstreet 	}
1121dd7f9d9SKent Overstreet 
1131dd7f9d9SKent Overstreet 	bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
1141dd7f9d9SKent Overstreet 					sb_blacklist_u64s(nr + 1));
1151dd7f9d9SKent Overstreet 	if (!bl) {
1161dd7f9d9SKent Overstreet 		ret = -ENOMEM;
1171dd7f9d9SKent Overstreet 		goto out;
1181dd7f9d9SKent Overstreet 	}
1191dd7f9d9SKent Overstreet 
1201dd7f9d9SKent Overstreet 	bl->start[nr].start	= cpu_to_le64(start);
1211dd7f9d9SKent Overstreet 	bl->start[nr].end	= cpu_to_le64(end);
1221dd7f9d9SKent Overstreet out_write_sb:
1231dd7f9d9SKent Overstreet 	c->disk_sb.sb->features[0] |=
1241dd7f9d9SKent Overstreet 		1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3;
1251dd7f9d9SKent Overstreet 
1261dd7f9d9SKent Overstreet 	ret = bch2_write_super(c);
1271dd7f9d9SKent Overstreet out:
1281dd7f9d9SKent Overstreet 	mutex_unlock(&c->sb_lock);
1291dd7f9d9SKent Overstreet 
1301dd7f9d9SKent Overstreet 	return ret;
1311dd7f9d9SKent Overstreet }
1321dd7f9d9SKent Overstreet 
1331dd7f9d9SKent Overstreet static int journal_seq_blacklist_table_cmp(const void *_l,
1341dd7f9d9SKent Overstreet 					   const void *_r, size_t size)
1351dd7f9d9SKent Overstreet {
1361dd7f9d9SKent Overstreet 	const struct journal_seq_blacklist_table_entry *l = _l;
1371dd7f9d9SKent Overstreet 	const struct journal_seq_blacklist_table_entry *r = _r;
1381dd7f9d9SKent Overstreet 
1393ea2b1e1SKent Overstreet 	return cmp_int(l->start, r->start);
1401dd7f9d9SKent Overstreet }
1411dd7f9d9SKent Overstreet 
1421dd7f9d9SKent Overstreet bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
1431dd7f9d9SKent Overstreet 				     bool dirty)
1441dd7f9d9SKent Overstreet {
1451dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
1461dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table_entry search = { .start = seq };
1471dd7f9d9SKent Overstreet 	int idx;
1481dd7f9d9SKent Overstreet 
1491dd7f9d9SKent Overstreet 	if (!t)
1501dd7f9d9SKent Overstreet 		return false;
1511dd7f9d9SKent Overstreet 
1521dd7f9d9SKent Overstreet 	idx = eytzinger0_find_le(t->entries, t->nr,
1531dd7f9d9SKent Overstreet 				 sizeof(t->entries[0]),
1541dd7f9d9SKent Overstreet 				 journal_seq_blacklist_table_cmp,
1551dd7f9d9SKent Overstreet 				 &search);
1561dd7f9d9SKent Overstreet 	if (idx < 0)
1571dd7f9d9SKent Overstreet 		return false;
1581dd7f9d9SKent Overstreet 
1591dd7f9d9SKent Overstreet 	BUG_ON(t->entries[idx].start > seq);
1601dd7f9d9SKent Overstreet 
1611dd7f9d9SKent Overstreet 	if (seq >= t->entries[idx].end)
1621dd7f9d9SKent Overstreet 		return false;
1631dd7f9d9SKent Overstreet 
1641dd7f9d9SKent Overstreet 	if (dirty)
1651dd7f9d9SKent Overstreet 		t->entries[idx].dirty = true;
1661dd7f9d9SKent Overstreet 	return true;
1671dd7f9d9SKent Overstreet }
1681dd7f9d9SKent Overstreet 
1691dd7f9d9SKent Overstreet int bch2_blacklist_table_initialize(struct bch_fs *c)
1701dd7f9d9SKent Overstreet {
1711dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
1721dd7f9d9SKent Overstreet 		bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
1731dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table *t;
1741dd7f9d9SKent Overstreet 	unsigned i, nr = blacklist_nr_entries(bl);
1751dd7f9d9SKent Overstreet 
1761dd7f9d9SKent Overstreet 	BUG_ON(c->journal_seq_blacklist_table);
1771dd7f9d9SKent Overstreet 
1781dd7f9d9SKent Overstreet 	if (!bl)
1791dd7f9d9SKent Overstreet 		return 0;
1801dd7f9d9SKent Overstreet 
1811dd7f9d9SKent Overstreet 	t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
1821dd7f9d9SKent Overstreet 		    GFP_KERNEL);
1831dd7f9d9SKent Overstreet 	if (!t)
1841dd7f9d9SKent Overstreet 		return -ENOMEM;
1851dd7f9d9SKent Overstreet 
1861dd7f9d9SKent Overstreet 	t->nr = nr;
1871dd7f9d9SKent Overstreet 
1881dd7f9d9SKent Overstreet 	for (i = 0; i < nr; i++) {
1891dd7f9d9SKent Overstreet 		t->entries[i].start	= le64_to_cpu(bl->start[i].start);
1901dd7f9d9SKent Overstreet 		t->entries[i].end	= le64_to_cpu(bl->start[i].end);
1911dd7f9d9SKent Overstreet 	}
1921dd7f9d9SKent Overstreet 
1931dd7f9d9SKent Overstreet 	eytzinger0_sort(t->entries,
1941dd7f9d9SKent Overstreet 			t->nr,
1951dd7f9d9SKent Overstreet 			sizeof(t->entries[0]),
1961dd7f9d9SKent Overstreet 			journal_seq_blacklist_table_cmp,
1971dd7f9d9SKent Overstreet 			NULL);
1981dd7f9d9SKent Overstreet 
1991dd7f9d9SKent Overstreet 	c->journal_seq_blacklist_table = t;
2001dd7f9d9SKent Overstreet 	return 0;
2011dd7f9d9SKent Overstreet }
2021dd7f9d9SKent Overstreet 
2031dd7f9d9SKent Overstreet static const char *
2041dd7f9d9SKent Overstreet bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
2051dd7f9d9SKent Overstreet 				       struct bch_sb_field *f)
2061dd7f9d9SKent Overstreet {
2071dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
2081dd7f9d9SKent Overstreet 		field_to_type(f, journal_seq_blacklist);
2091dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_entry *i;
2101dd7f9d9SKent Overstreet 	unsigned nr = blacklist_nr_entries(bl);
2111dd7f9d9SKent Overstreet 
2121dd7f9d9SKent Overstreet 	for (i = bl->start; i < bl->start + nr; i++) {
2131dd7f9d9SKent Overstreet 		if (le64_to_cpu(i->start) >=
2141dd7f9d9SKent Overstreet 		    le64_to_cpu(i->end))
2151dd7f9d9SKent Overstreet 			return "entry start >= end";
2161dd7f9d9SKent Overstreet 
2171dd7f9d9SKent Overstreet 		if (i + 1 < bl->start + nr &&
2181dd7f9d9SKent Overstreet 		    le64_to_cpu(i[0].end) >
2191dd7f9d9SKent Overstreet 		    le64_to_cpu(i[1].start))
2201dd7f9d9SKent Overstreet 			return "entries out of order";
2211dd7f9d9SKent Overstreet 	}
2221dd7f9d9SKent Overstreet 
2231dd7f9d9SKent Overstreet 	return NULL;
2241dd7f9d9SKent Overstreet }
2251dd7f9d9SKent Overstreet 
2261dd7f9d9SKent Overstreet static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
2271dd7f9d9SKent Overstreet 						  struct bch_sb *sb,
2281dd7f9d9SKent Overstreet 						  struct bch_sb_field *f)
2291dd7f9d9SKent Overstreet {
2301dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
2311dd7f9d9SKent Overstreet 		field_to_type(f, journal_seq_blacklist);
2321dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_entry *i;
2331dd7f9d9SKent Overstreet 	unsigned nr = blacklist_nr_entries(bl);
2341dd7f9d9SKent Overstreet 
2351dd7f9d9SKent Overstreet 	for (i = bl->start; i < bl->start + nr; i++) {
2361dd7f9d9SKent Overstreet 		if (i != bl->start)
2371dd7f9d9SKent Overstreet 			pr_buf(out, " ");
2381dd7f9d9SKent Overstreet 
2391dd7f9d9SKent Overstreet 		pr_buf(out, "%llu-%llu",
2401dd7f9d9SKent Overstreet 		       le64_to_cpu(i->start),
2411dd7f9d9SKent Overstreet 		       le64_to_cpu(i->end));
2421dd7f9d9SKent Overstreet 	}
2431dd7f9d9SKent Overstreet }
2441dd7f9d9SKent Overstreet 
2451dd7f9d9SKent Overstreet const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
2461dd7f9d9SKent Overstreet 	.validate	= bch2_sb_journal_seq_blacklist_validate,
2471dd7f9d9SKent Overstreet 	.to_text	= bch2_sb_journal_seq_blacklist_to_text
2481dd7f9d9SKent Overstreet };
2491dd7f9d9SKent Overstreet 
2501dd7f9d9SKent Overstreet void bch2_blacklist_entries_gc(struct work_struct *work)
2511dd7f9d9SKent Overstreet {
2521dd7f9d9SKent Overstreet 	struct bch_fs *c = container_of(work, struct bch_fs,
2531dd7f9d9SKent Overstreet 					journal_seq_blacklist_gc_work);
2541dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table *t;
2551dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl;
2561dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_entry *src, *dst;
257424eb881SKent Overstreet 	struct btree_trans trans;
2581dd7f9d9SKent Overstreet 	unsigned i, nr, new_nr;
2591dd7f9d9SKent Overstreet 	int ret;
2601c6fdbd8SKent Overstreet 
261*20bceecbSKent Overstreet 	bch2_trans_init(&trans, c, 0, 0);
262424eb881SKent Overstreet 
2631dd7f9d9SKent Overstreet 	for (i = 0; i < BTREE_ID_NR; i++) {
2641dd7f9d9SKent Overstreet 		struct btree_iter *iter;
2651dd7f9d9SKent Overstreet 		struct btree *b;
2661c6fdbd8SKent Overstreet 
2671dd7f9d9SKent Overstreet 		for_each_btree_node(&trans, iter, i, POS_MIN,
2681dd7f9d9SKent Overstreet 				    BTREE_ITER_PREFETCH, b)
2691dd7f9d9SKent Overstreet 			if (test_bit(BCH_FS_STOPPING, &c->flags)) {
270424eb881SKent Overstreet 				bch2_trans_exit(&trans);
2711c6fdbd8SKent Overstreet 				return;
2721c6fdbd8SKent Overstreet 			}
2731dd7f9d9SKent Overstreet 		bch2_trans_iter_free(&trans, iter);
2741c6fdbd8SKent Overstreet 	}
2751c6fdbd8SKent Overstreet 
2761dd7f9d9SKent Overstreet 	ret = bch2_trans_exit(&trans);
2771c6fdbd8SKent Overstreet 	if (ret)
2781c6fdbd8SKent Overstreet 		return;
2791c6fdbd8SKent Overstreet 
2801dd7f9d9SKent Overstreet 	mutex_lock(&c->sb_lock);
2811dd7f9d9SKent Overstreet 	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
2821dd7f9d9SKent Overstreet 	if (!bl)
2831dd7f9d9SKent Overstreet 		goto out;
2841c6fdbd8SKent Overstreet 
2851dd7f9d9SKent Overstreet 	nr = blacklist_nr_entries(bl);
2861dd7f9d9SKent Overstreet 	dst = bl->start;
2871c6fdbd8SKent Overstreet 
2881dd7f9d9SKent Overstreet 	t = c->journal_seq_blacklist_table;
2891dd7f9d9SKent Overstreet 	BUG_ON(nr != t->nr);
2901c6fdbd8SKent Overstreet 
2911dd7f9d9SKent Overstreet 	for (src = bl->start, i = eytzinger0_first(t->nr);
2921dd7f9d9SKent Overstreet 	     src < bl->start + nr;
2931dd7f9d9SKent Overstreet 	     src++, i = eytzinger0_next(i, nr)) {
2941dd7f9d9SKent Overstreet 		BUG_ON(t->entries[i].start	!= le64_to_cpu(src->start));
2951dd7f9d9SKent Overstreet 		BUG_ON(t->entries[i].end	!= le64_to_cpu(src->end));
2961dd7f9d9SKent Overstreet 
2971dd7f9d9SKent Overstreet 		if (t->entries[i].dirty)
2981dd7f9d9SKent Overstreet 			*dst++ = *src;
2991dd7f9d9SKent Overstreet 	}
3001dd7f9d9SKent Overstreet 
3011dd7f9d9SKent Overstreet 	new_nr = dst - bl->start;
3021dd7f9d9SKent Overstreet 
3031dd7f9d9SKent Overstreet 	bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
3041dd7f9d9SKent Overstreet 
3051dd7f9d9SKent Overstreet 	if (new_nr != nr) {
3061dd7f9d9SKent Overstreet 		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
3071dd7f9d9SKent Overstreet 				new_nr ? sb_blacklist_u64s(new_nr) : 0);
3081dd7f9d9SKent Overstreet 		BUG_ON(new_nr && !bl);
3091dd7f9d9SKent Overstreet 
3101dd7f9d9SKent Overstreet 		if (!new_nr)
3111dd7f9d9SKent Overstreet 			c->disk_sb.sb->features[0] &=
3121dd7f9d9SKent Overstreet 				~(1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3);
3131dd7f9d9SKent Overstreet 
3141dd7f9d9SKent Overstreet 		bch2_write_super(c);
3151dd7f9d9SKent Overstreet 	}
3161dd7f9d9SKent Overstreet out:
3171dd7f9d9SKent Overstreet 	mutex_unlock(&c->sb_lock);
3181c6fdbd8SKent Overstreet }
319