xref: /linux/fs/bcachefs/journal_seq_blacklist.c (revision adbcada43fa79197224b5a522b1faaf222b43bcd)
11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0
21c6fdbd8SKent Overstreet 
31c6fdbd8SKent Overstreet #include "bcachefs.h"
41dd7f9d9SKent Overstreet #include "btree_iter.h"
51dd7f9d9SKent Overstreet #include "eytzinger.h"
61c6fdbd8SKent Overstreet #include "journal_seq_blacklist.h"
71dd7f9d9SKent Overstreet #include "super-io.h"
81c6fdbd8SKent Overstreet 
91c6fdbd8SKent Overstreet /*
101c6fdbd8SKent Overstreet  * journal_seq_blacklist machinery:
111c6fdbd8SKent Overstreet  *
121c6fdbd8SKent Overstreet  * To guarantee order of btree updates after a crash, we need to detect when a
131c6fdbd8SKent Overstreet  * btree node entry (bset) is newer than the newest journal entry that was
141c6fdbd8SKent Overstreet  * successfully written, and ignore it - effectively ignoring any btree updates
151c6fdbd8SKent Overstreet  * that didn't make it into the journal.
161c6fdbd8SKent Overstreet  *
171c6fdbd8SKent Overstreet  * If we didn't do this, we might have two btree nodes, a and b, both with
181c6fdbd8SKent Overstreet  * updates that weren't written to the journal yet: if b was updated after a,
191c6fdbd8SKent Overstreet  * but b was flushed and not a - oops; on recovery we'll find that the updates
201c6fdbd8SKent Overstreet  * to b happened, but not the updates to a that happened before it.
211c6fdbd8SKent Overstreet  *
221c6fdbd8SKent Overstreet  * Ignoring bsets that are newer than the newest journal entry is always safe,
231c6fdbd8SKent Overstreet  * because everything they contain will also have been journalled - and must
241c6fdbd8SKent Overstreet  * still be present in the journal on disk until a journal entry has been
251c6fdbd8SKent Overstreet  * written _after_ that bset was written.
261c6fdbd8SKent Overstreet  *
271c6fdbd8SKent Overstreet  * To accomplish this, bsets record the newest journal sequence number they
281c6fdbd8SKent Overstreet  * contain updates for; then, on startup, the btree code queries the journal
291c6fdbd8SKent Overstreet  * code to ask "Is this sequence number newer than the newest journal entry? If
301c6fdbd8SKent Overstreet  * so, ignore it."
311c6fdbd8SKent Overstreet  *
321c6fdbd8SKent Overstreet  * When this happens, we must blacklist that journal sequence number: the
331c6fdbd8SKent Overstreet  * journal must not write any entries with that sequence number, and it must
341c6fdbd8SKent Overstreet  * record that it was blacklisted so that a) on recovery we don't think we have
351c6fdbd8SKent Overstreet  * missing journal entries and b) so that the btree code continues to ignore
361c6fdbd8SKent Overstreet  * that bset, until that btree node is rewritten.
371c6fdbd8SKent Overstreet  */
381c6fdbd8SKent Overstreet 
391dd7f9d9SKent Overstreet static unsigned sb_blacklist_u64s(unsigned nr)
401dd7f9d9SKent Overstreet {
411dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl;
421c6fdbd8SKent Overstreet 
431dd7f9d9SKent Overstreet 	return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
441dd7f9d9SKent Overstreet }
451dd7f9d9SKent Overstreet 
461dd7f9d9SKent Overstreet static struct bch_sb_field_journal_seq_blacklist *
471dd7f9d9SKent Overstreet blacklist_entry_try_merge(struct bch_fs *c,
481dd7f9d9SKent Overstreet 			  struct bch_sb_field_journal_seq_blacklist *bl,
491dd7f9d9SKent Overstreet 			  unsigned i)
501dd7f9d9SKent Overstreet {
511dd7f9d9SKent Overstreet 	unsigned nr = blacklist_nr_entries(bl);
521dd7f9d9SKent Overstreet 
531dd7f9d9SKent Overstreet 	if (le64_to_cpu(bl->start[i].end) >=
541dd7f9d9SKent Overstreet 	    le64_to_cpu(bl->start[i + 1].start)) {
551dd7f9d9SKent Overstreet 		bl->start[i].end = bl->start[i + 1].end;
561dd7f9d9SKent Overstreet 		--nr;
571dd7f9d9SKent Overstreet 		memmove(&bl->start[i],
581dd7f9d9SKent Overstreet 			&bl->start[i + 1],
591dd7f9d9SKent Overstreet 			sizeof(bl->start[0]) * (nr - i));
601dd7f9d9SKent Overstreet 
611dd7f9d9SKent Overstreet 		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
621dd7f9d9SKent Overstreet 							sb_blacklist_u64s(nr));
631dd7f9d9SKent Overstreet 		BUG_ON(!bl);
641dd7f9d9SKent Overstreet 	}
651dd7f9d9SKent Overstreet 
661dd7f9d9SKent Overstreet 	return bl;
671dd7f9d9SKent Overstreet }
681dd7f9d9SKent Overstreet 
691dd7f9d9SKent Overstreet int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
701dd7f9d9SKent Overstreet {
711dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl;
721dd7f9d9SKent Overstreet 	unsigned i, nr;
731dd7f9d9SKent Overstreet 	int ret = 0;
741dd7f9d9SKent Overstreet 
751dd7f9d9SKent Overstreet 	mutex_lock(&c->sb_lock);
761dd7f9d9SKent Overstreet 	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
771dd7f9d9SKent Overstreet 	nr = blacklist_nr_entries(bl);
781dd7f9d9SKent Overstreet 
791dd7f9d9SKent Overstreet 	if (bl) {
801dd7f9d9SKent Overstreet 		for (i = 0; i < nr; i++) {
811dd7f9d9SKent Overstreet 			struct journal_seq_blacklist_entry *e =
821dd7f9d9SKent Overstreet 				bl->start + i;
831dd7f9d9SKent Overstreet 
841dd7f9d9SKent Overstreet 			if (start == le64_to_cpu(e->start) &&
851dd7f9d9SKent Overstreet 			    end   == le64_to_cpu(e->end))
861dd7f9d9SKent Overstreet 				goto out;
871dd7f9d9SKent Overstreet 
881dd7f9d9SKent Overstreet 			if (start <= le64_to_cpu(e->start) &&
891dd7f9d9SKent Overstreet 			    end   >= le64_to_cpu(e->end)) {
901dd7f9d9SKent Overstreet 				e->start = cpu_to_le64(start);
911dd7f9d9SKent Overstreet 				e->end	= cpu_to_le64(end);
921dd7f9d9SKent Overstreet 
931dd7f9d9SKent Overstreet 				if (i + 1 < nr)
941dd7f9d9SKent Overstreet 					bl = blacklist_entry_try_merge(c,
951dd7f9d9SKent Overstreet 								bl, i);
961dd7f9d9SKent Overstreet 				if (i)
971dd7f9d9SKent Overstreet 					bl = blacklist_entry_try_merge(c,
981dd7f9d9SKent Overstreet 								bl, i - 1);
991dd7f9d9SKent Overstreet 				goto out_write_sb;
1001dd7f9d9SKent Overstreet 			}
1011dd7f9d9SKent Overstreet 		}
1021dd7f9d9SKent Overstreet 	}
1031dd7f9d9SKent Overstreet 
1041dd7f9d9SKent Overstreet 	bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
1051dd7f9d9SKent Overstreet 					sb_blacklist_u64s(nr + 1));
1061dd7f9d9SKent Overstreet 	if (!bl) {
1071dd7f9d9SKent Overstreet 		ret = -ENOMEM;
1081dd7f9d9SKent Overstreet 		goto out;
1091dd7f9d9SKent Overstreet 	}
1101dd7f9d9SKent Overstreet 
1111dd7f9d9SKent Overstreet 	bl->start[nr].start	= cpu_to_le64(start);
1121dd7f9d9SKent Overstreet 	bl->start[nr].end	= cpu_to_le64(end);
1131dd7f9d9SKent Overstreet out_write_sb:
1141dd7f9d9SKent Overstreet 	c->disk_sb.sb->features[0] |=
1151c3ff72cSKent Overstreet 		1ULL << BCH_FEATURE_journal_seq_blacklist_v3;
1161dd7f9d9SKent Overstreet 
1171dd7f9d9SKent Overstreet 	ret = bch2_write_super(c);
1181dd7f9d9SKent Overstreet out:
1191dd7f9d9SKent Overstreet 	mutex_unlock(&c->sb_lock);
1201dd7f9d9SKent Overstreet 
121*adbcada4SKent Overstreet 	return ret ?: bch2_blacklist_table_initialize(c);
1221dd7f9d9SKent Overstreet }
1231dd7f9d9SKent Overstreet 
1241dd7f9d9SKent Overstreet static int journal_seq_blacklist_table_cmp(const void *_l,
1251dd7f9d9SKent Overstreet 					   const void *_r, size_t size)
1261dd7f9d9SKent Overstreet {
1271dd7f9d9SKent Overstreet 	const struct journal_seq_blacklist_table_entry *l = _l;
1281dd7f9d9SKent Overstreet 	const struct journal_seq_blacklist_table_entry *r = _r;
1291dd7f9d9SKent Overstreet 
1303ea2b1e1SKent Overstreet 	return cmp_int(l->start, r->start);
1311dd7f9d9SKent Overstreet }
1321dd7f9d9SKent Overstreet 
1331dd7f9d9SKent Overstreet bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
1341dd7f9d9SKent Overstreet 				     bool dirty)
1351dd7f9d9SKent Overstreet {
1361dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
1371dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table_entry search = { .start = seq };
1381dd7f9d9SKent Overstreet 	int idx;
1391dd7f9d9SKent Overstreet 
1401dd7f9d9SKent Overstreet 	if (!t)
1411dd7f9d9SKent Overstreet 		return false;
1421dd7f9d9SKent Overstreet 
1431dd7f9d9SKent Overstreet 	idx = eytzinger0_find_le(t->entries, t->nr,
1441dd7f9d9SKent Overstreet 				 sizeof(t->entries[0]),
1451dd7f9d9SKent Overstreet 				 journal_seq_blacklist_table_cmp,
1461dd7f9d9SKent Overstreet 				 &search);
1471dd7f9d9SKent Overstreet 	if (idx < 0)
1481dd7f9d9SKent Overstreet 		return false;
1491dd7f9d9SKent Overstreet 
1501dd7f9d9SKent Overstreet 	BUG_ON(t->entries[idx].start > seq);
1511dd7f9d9SKent Overstreet 
1521dd7f9d9SKent Overstreet 	if (seq >= t->entries[idx].end)
1531dd7f9d9SKent Overstreet 		return false;
1541dd7f9d9SKent Overstreet 
1551dd7f9d9SKent Overstreet 	if (dirty)
1561dd7f9d9SKent Overstreet 		t->entries[idx].dirty = true;
1571dd7f9d9SKent Overstreet 	return true;
1581dd7f9d9SKent Overstreet }
1591dd7f9d9SKent Overstreet 
1601dd7f9d9SKent Overstreet int bch2_blacklist_table_initialize(struct bch_fs *c)
1611dd7f9d9SKent Overstreet {
1621dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
1631dd7f9d9SKent Overstreet 		bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
1641dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table *t;
1651dd7f9d9SKent Overstreet 	unsigned i, nr = blacklist_nr_entries(bl);
1661dd7f9d9SKent Overstreet 
1671dd7f9d9SKent Overstreet 	if (!bl)
1681dd7f9d9SKent Overstreet 		return 0;
1691dd7f9d9SKent Overstreet 
1701dd7f9d9SKent Overstreet 	t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
1711dd7f9d9SKent Overstreet 		    GFP_KERNEL);
1721dd7f9d9SKent Overstreet 	if (!t)
1731dd7f9d9SKent Overstreet 		return -ENOMEM;
1741dd7f9d9SKent Overstreet 
1751dd7f9d9SKent Overstreet 	t->nr = nr;
1761dd7f9d9SKent Overstreet 
1771dd7f9d9SKent Overstreet 	for (i = 0; i < nr; i++) {
1781dd7f9d9SKent Overstreet 		t->entries[i].start	= le64_to_cpu(bl->start[i].start);
1791dd7f9d9SKent Overstreet 		t->entries[i].end	= le64_to_cpu(bl->start[i].end);
1801dd7f9d9SKent Overstreet 	}
1811dd7f9d9SKent Overstreet 
1821dd7f9d9SKent Overstreet 	eytzinger0_sort(t->entries,
1831dd7f9d9SKent Overstreet 			t->nr,
1841dd7f9d9SKent Overstreet 			sizeof(t->entries[0]),
1851dd7f9d9SKent Overstreet 			journal_seq_blacklist_table_cmp,
1861dd7f9d9SKent Overstreet 			NULL);
1871dd7f9d9SKent Overstreet 
188*adbcada4SKent Overstreet 	kfree(c->journal_seq_blacklist_table);
1891dd7f9d9SKent Overstreet 	c->journal_seq_blacklist_table = t;
1901dd7f9d9SKent Overstreet 	return 0;
1911dd7f9d9SKent Overstreet }
1921dd7f9d9SKent Overstreet 
1931dd7f9d9SKent Overstreet static const char *
1941dd7f9d9SKent Overstreet bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
1951dd7f9d9SKent Overstreet 				       struct bch_sb_field *f)
1961dd7f9d9SKent Overstreet {
1971dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
1981dd7f9d9SKent Overstreet 		field_to_type(f, journal_seq_blacklist);
1991dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_entry *i;
2001dd7f9d9SKent Overstreet 	unsigned nr = blacklist_nr_entries(bl);
2011dd7f9d9SKent Overstreet 
2021dd7f9d9SKent Overstreet 	for (i = bl->start; i < bl->start + nr; i++) {
2031dd7f9d9SKent Overstreet 		if (le64_to_cpu(i->start) >=
2041dd7f9d9SKent Overstreet 		    le64_to_cpu(i->end))
2051dd7f9d9SKent Overstreet 			return "entry start >= end";
2061dd7f9d9SKent Overstreet 
2071dd7f9d9SKent Overstreet 		if (i + 1 < bl->start + nr &&
2081dd7f9d9SKent Overstreet 		    le64_to_cpu(i[0].end) >
2091dd7f9d9SKent Overstreet 		    le64_to_cpu(i[1].start))
2101dd7f9d9SKent Overstreet 			return "entries out of order";
2111dd7f9d9SKent Overstreet 	}
2121dd7f9d9SKent Overstreet 
2131dd7f9d9SKent Overstreet 	return NULL;
2141dd7f9d9SKent Overstreet }
2151dd7f9d9SKent Overstreet 
2161dd7f9d9SKent Overstreet static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
2171dd7f9d9SKent Overstreet 						  struct bch_sb *sb,
2181dd7f9d9SKent Overstreet 						  struct bch_sb_field *f)
2191dd7f9d9SKent Overstreet {
2201dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
2211dd7f9d9SKent Overstreet 		field_to_type(f, journal_seq_blacklist);
2221dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_entry *i;
2231dd7f9d9SKent Overstreet 	unsigned nr = blacklist_nr_entries(bl);
2241dd7f9d9SKent Overstreet 
2251dd7f9d9SKent Overstreet 	for (i = bl->start; i < bl->start + nr; i++) {
2261dd7f9d9SKent Overstreet 		if (i != bl->start)
2271dd7f9d9SKent Overstreet 			pr_buf(out, " ");
2281dd7f9d9SKent Overstreet 
2291dd7f9d9SKent Overstreet 		pr_buf(out, "%llu-%llu",
2301dd7f9d9SKent Overstreet 		       le64_to_cpu(i->start),
2311dd7f9d9SKent Overstreet 		       le64_to_cpu(i->end));
2321dd7f9d9SKent Overstreet 	}
2331dd7f9d9SKent Overstreet }
2341dd7f9d9SKent Overstreet 
2351dd7f9d9SKent Overstreet const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
2361dd7f9d9SKent Overstreet 	.validate	= bch2_sb_journal_seq_blacklist_validate,
2371dd7f9d9SKent Overstreet 	.to_text	= bch2_sb_journal_seq_blacklist_to_text
2381dd7f9d9SKent Overstreet };
2391dd7f9d9SKent Overstreet 
2401dd7f9d9SKent Overstreet void bch2_blacklist_entries_gc(struct work_struct *work)
2411dd7f9d9SKent Overstreet {
2421dd7f9d9SKent Overstreet 	struct bch_fs *c = container_of(work, struct bch_fs,
2431dd7f9d9SKent Overstreet 					journal_seq_blacklist_gc_work);
2441dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table *t;
2451dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl;
2461dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_entry *src, *dst;
247424eb881SKent Overstreet 	struct btree_trans trans;
2481dd7f9d9SKent Overstreet 	unsigned i, nr, new_nr;
2491dd7f9d9SKent Overstreet 	int ret;
2501c6fdbd8SKent Overstreet 
25120bceecbSKent Overstreet 	bch2_trans_init(&trans, c, 0, 0);
252424eb881SKent Overstreet 
2531dd7f9d9SKent Overstreet 	for (i = 0; i < BTREE_ID_NR; i++) {
2541dd7f9d9SKent Overstreet 		struct btree_iter *iter;
2551dd7f9d9SKent Overstreet 		struct btree *b;
2561c6fdbd8SKent Overstreet 
2571dd7f9d9SKent Overstreet 		for_each_btree_node(&trans, iter, i, POS_MIN,
2581dd7f9d9SKent Overstreet 				    BTREE_ITER_PREFETCH, b)
2591dd7f9d9SKent Overstreet 			if (test_bit(BCH_FS_STOPPING, &c->flags)) {
260424eb881SKent Overstreet 				bch2_trans_exit(&trans);
2611c6fdbd8SKent Overstreet 				return;
2621c6fdbd8SKent Overstreet 			}
2631dd7f9d9SKent Overstreet 		bch2_trans_iter_free(&trans, iter);
2641c6fdbd8SKent Overstreet 	}
2651c6fdbd8SKent Overstreet 
2661dd7f9d9SKent Overstreet 	ret = bch2_trans_exit(&trans);
2671c6fdbd8SKent Overstreet 	if (ret)
2681c6fdbd8SKent Overstreet 		return;
2691c6fdbd8SKent Overstreet 
2701dd7f9d9SKent Overstreet 	mutex_lock(&c->sb_lock);
2711dd7f9d9SKent Overstreet 	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
2721dd7f9d9SKent Overstreet 	if (!bl)
2731dd7f9d9SKent Overstreet 		goto out;
2741c6fdbd8SKent Overstreet 
2751dd7f9d9SKent Overstreet 	nr = blacklist_nr_entries(bl);
2761dd7f9d9SKent Overstreet 	dst = bl->start;
2771c6fdbd8SKent Overstreet 
2781dd7f9d9SKent Overstreet 	t = c->journal_seq_blacklist_table;
2791dd7f9d9SKent Overstreet 	BUG_ON(nr != t->nr);
2801c6fdbd8SKent Overstreet 
2811dd7f9d9SKent Overstreet 	for (src = bl->start, i = eytzinger0_first(t->nr);
2821dd7f9d9SKent Overstreet 	     src < bl->start + nr;
2831dd7f9d9SKent Overstreet 	     src++, i = eytzinger0_next(i, nr)) {
2841dd7f9d9SKent Overstreet 		BUG_ON(t->entries[i].start	!= le64_to_cpu(src->start));
2851dd7f9d9SKent Overstreet 		BUG_ON(t->entries[i].end	!= le64_to_cpu(src->end));
2861dd7f9d9SKent Overstreet 
2871dd7f9d9SKent Overstreet 		if (t->entries[i].dirty)
2881dd7f9d9SKent Overstreet 			*dst++ = *src;
2891dd7f9d9SKent Overstreet 	}
2901dd7f9d9SKent Overstreet 
2911dd7f9d9SKent Overstreet 	new_nr = dst - bl->start;
2921dd7f9d9SKent Overstreet 
2931dd7f9d9SKent Overstreet 	bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
2941dd7f9d9SKent Overstreet 
2951dd7f9d9SKent Overstreet 	if (new_nr != nr) {
2961dd7f9d9SKent Overstreet 		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
2971dd7f9d9SKent Overstreet 				new_nr ? sb_blacklist_u64s(new_nr) : 0);
2981dd7f9d9SKent Overstreet 		BUG_ON(new_nr && !bl);
2991dd7f9d9SKent Overstreet 
3001dd7f9d9SKent Overstreet 		if (!new_nr)
3011dd7f9d9SKent Overstreet 			c->disk_sb.sb->features[0] &=
3021c3ff72cSKent Overstreet 				~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
3031dd7f9d9SKent Overstreet 
3041dd7f9d9SKent Overstreet 		bch2_write_super(c);
3051dd7f9d9SKent Overstreet 	}
3061dd7f9d9SKent Overstreet out:
3071dd7f9d9SKent Overstreet 	mutex_unlock(&c->sb_lock);
3081c6fdbd8SKent Overstreet }
309