xref: /linux/fs/bcachefs/journal_seq_blacklist.c (revision cd63a278acedc375603820abff11a5414af53769)
11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0
21c6fdbd8SKent Overstreet 
31c6fdbd8SKent Overstreet #include "bcachefs.h"
41dd7f9d9SKent Overstreet #include "eytzinger.h"
5f0415829SKent Overstreet #include "journal.h"
61c6fdbd8SKent Overstreet #include "journal_seq_blacklist.h"
71dd7f9d9SKent Overstreet #include "super-io.h"
81c6fdbd8SKent Overstreet 
91c6fdbd8SKent Overstreet /*
101c6fdbd8SKent Overstreet  * journal_seq_blacklist machinery:
111c6fdbd8SKent Overstreet  *
121c6fdbd8SKent Overstreet  * To guarantee order of btree updates after a crash, we need to detect when a
131c6fdbd8SKent Overstreet  * btree node entry (bset) is newer than the newest journal entry that was
141c6fdbd8SKent Overstreet  * successfully written, and ignore it - effectively ignoring any btree updates
151c6fdbd8SKent Overstreet  * that didn't make it into the journal.
161c6fdbd8SKent Overstreet  *
171c6fdbd8SKent Overstreet  * If we didn't do this, we might have two btree nodes, a and b, both with
181c6fdbd8SKent Overstreet  * updates that weren't written to the journal yet: if b was updated after a,
191c6fdbd8SKent Overstreet  * but b was flushed and not a - oops; on recovery we'll find that the updates
201c6fdbd8SKent Overstreet  * to b happened, but not the updates to a that happened before it.
211c6fdbd8SKent Overstreet  *
221c6fdbd8SKent Overstreet  * Ignoring bsets that are newer than the newest journal entry is always safe,
231c6fdbd8SKent Overstreet  * because everything they contain will also have been journalled - and must
241c6fdbd8SKent Overstreet  * still be present in the journal on disk until a journal entry has been
251c6fdbd8SKent Overstreet  * written _after_ that bset was written.
261c6fdbd8SKent Overstreet  *
271c6fdbd8SKent Overstreet  * To accomplish this, bsets record the newest journal sequence number they
281c6fdbd8SKent Overstreet  * contain updates for; then, on startup, the btree code queries the journal
291c6fdbd8SKent Overstreet  * code to ask "Is this sequence number newer than the newest journal entry? If
301c6fdbd8SKent Overstreet  * so, ignore it."
311c6fdbd8SKent Overstreet  *
321c6fdbd8SKent Overstreet  * When this happens, we must blacklist that journal sequence number: the
331c6fdbd8SKent Overstreet  * journal must not write any entries with that sequence number, and it must
341c6fdbd8SKent Overstreet  * record that it was blacklisted so that a) on recovery we don't think we have
351c6fdbd8SKent Overstreet  * missing journal entries and b) so that the btree code continues to ignore
361c6fdbd8SKent Overstreet  * that bset, until that btree node is rewritten.
371c6fdbd8SKent Overstreet  */
381c6fdbd8SKent Overstreet 
sb_blacklist_u64s(unsigned nr)391dd7f9d9SKent Overstreet static unsigned sb_blacklist_u64s(unsigned nr)
401dd7f9d9SKent Overstreet {
411dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl;
421c6fdbd8SKent Overstreet 
431dd7f9d9SKent Overstreet 	return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
441dd7f9d9SKent Overstreet }
451dd7f9d9SKent Overstreet 
bch2_journal_seq_blacklist_add(struct bch_fs * c,u64 start,u64 end)461dd7f9d9SKent Overstreet int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
471dd7f9d9SKent Overstreet {
481dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl;
496fa30fe7SKent Overstreet 	unsigned i = 0, nr;
501dd7f9d9SKent Overstreet 	int ret = 0;
511dd7f9d9SKent Overstreet 
521dd7f9d9SKent Overstreet 	mutex_lock(&c->sb_lock);
534637429eSKent Overstreet 	bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist);
541dd7f9d9SKent Overstreet 	nr = blacklist_nr_entries(bl);
551dd7f9d9SKent Overstreet 
566fa30fe7SKent Overstreet 	while (i < nr) {
571dd7f9d9SKent Overstreet 		struct journal_seq_blacklist_entry *e =
581dd7f9d9SKent Overstreet 			bl->start + i;
591dd7f9d9SKent Overstreet 
606fa30fe7SKent Overstreet 		if (end < le64_to_cpu(e->start))
616fa30fe7SKent Overstreet 			break;
621dd7f9d9SKent Overstreet 
636fa30fe7SKent Overstreet 		if (start > le64_to_cpu(e->end)) {
646fa30fe7SKent Overstreet 			i++;
656fa30fe7SKent Overstreet 			continue;
661dd7f9d9SKent Overstreet 		}
676fa30fe7SKent Overstreet 
686fa30fe7SKent Overstreet 		/*
696fa30fe7SKent Overstreet 		 * Entry is contiguous or overlapping with new entry: merge it
706fa30fe7SKent Overstreet 		 * with new entry, and delete:
716fa30fe7SKent Overstreet 		 */
726fa30fe7SKent Overstreet 
736fa30fe7SKent Overstreet 		start	= min(start,	le64_to_cpu(e->start));
746fa30fe7SKent Overstreet 		end	= max(end,	le64_to_cpu(e->end));
756fa30fe7SKent Overstreet 		array_remove_item(bl->start, nr, i);
761dd7f9d9SKent Overstreet 	}
771dd7f9d9SKent Overstreet 
784637429eSKent Overstreet 	bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
791dd7f9d9SKent Overstreet 				  sb_blacklist_u64s(nr + 1));
801dd7f9d9SKent Overstreet 	if (!bl) {
8165d48e35SKent Overstreet 		ret = -BCH_ERR_ENOSPC_sb_journal_seq_blacklist;
821dd7f9d9SKent Overstreet 		goto out;
831dd7f9d9SKent Overstreet 	}
841dd7f9d9SKent Overstreet 
856fa30fe7SKent Overstreet 	array_insert_item(bl->start, nr, i, ((struct journal_seq_blacklist_entry) {
866fa30fe7SKent Overstreet 		.start	= cpu_to_le64(start),
876fa30fe7SKent Overstreet 		.end	= cpu_to_le64(end),
886fa30fe7SKent Overstreet 	}));
89c0ebe3e4SKent Overstreet 	c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
901dd7f9d9SKent Overstreet 
911dd7f9d9SKent Overstreet 	ret = bch2_write_super(c);
921dd7f9d9SKent Overstreet out:
931dd7f9d9SKent Overstreet 	mutex_unlock(&c->sb_lock);
941dd7f9d9SKent Overstreet 
95adbcada4SKent Overstreet 	return ret ?: bch2_blacklist_table_initialize(c);
961dd7f9d9SKent Overstreet }
971dd7f9d9SKent Overstreet 
journal_seq_blacklist_table_cmp(const void * _l,const void * _r)98ca1e02f7SKent Overstreet static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
991dd7f9d9SKent Overstreet {
1001dd7f9d9SKent Overstreet 	const struct journal_seq_blacklist_table_entry *l = _l;
1011dd7f9d9SKent Overstreet 	const struct journal_seq_blacklist_table_entry *r = _r;
1021dd7f9d9SKent Overstreet 
1033ea2b1e1SKent Overstreet 	return cmp_int(l->start, r->start);
1041dd7f9d9SKent Overstreet }
1051dd7f9d9SKent Overstreet 
bch2_journal_seq_is_blacklisted(struct bch_fs * c,u64 seq,bool dirty)1061dd7f9d9SKent Overstreet bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
1071dd7f9d9SKent Overstreet 				     bool dirty)
1081dd7f9d9SKent Overstreet {
1091dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
1101dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table_entry search = { .start = seq };
1111dd7f9d9SKent Overstreet 	int idx;
1121dd7f9d9SKent Overstreet 
1131dd7f9d9SKent Overstreet 	if (!t)
1141dd7f9d9SKent Overstreet 		return false;
1151dd7f9d9SKent Overstreet 
1161dd7f9d9SKent Overstreet 	idx = eytzinger0_find_le(t->entries, t->nr,
1171dd7f9d9SKent Overstreet 				 sizeof(t->entries[0]),
1181dd7f9d9SKent Overstreet 				 journal_seq_blacklist_table_cmp,
1191dd7f9d9SKent Overstreet 				 &search);
1201dd7f9d9SKent Overstreet 	if (idx < 0)
1211dd7f9d9SKent Overstreet 		return false;
1221dd7f9d9SKent Overstreet 
1231dd7f9d9SKent Overstreet 	BUG_ON(t->entries[idx].start > seq);
1241dd7f9d9SKent Overstreet 
1251dd7f9d9SKent Overstreet 	if (seq >= t->entries[idx].end)
1261dd7f9d9SKent Overstreet 		return false;
1271dd7f9d9SKent Overstreet 
1281dd7f9d9SKent Overstreet 	if (dirty)
1291dd7f9d9SKent Overstreet 		t->entries[idx].dirty = true;
1301dd7f9d9SKent Overstreet 	return true;
1311dd7f9d9SKent Overstreet }
1321dd7f9d9SKent Overstreet 
bch2_blacklist_table_initialize(struct bch_fs * c)1331dd7f9d9SKent Overstreet int bch2_blacklist_table_initialize(struct bch_fs *c)
1341dd7f9d9SKent Overstreet {
1351dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
1364637429eSKent Overstreet 		bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist);
1371dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_table *t;
1381dd7f9d9SKent Overstreet 	unsigned i, nr = blacklist_nr_entries(bl);
1391dd7f9d9SKent Overstreet 
1401dd7f9d9SKent Overstreet 	if (!bl)
1411dd7f9d9SKent Overstreet 		return 0;
1421dd7f9d9SKent Overstreet 
1433e489998SErick Archer 	t = kzalloc(struct_size(t, entries, nr), GFP_KERNEL);
1441dd7f9d9SKent Overstreet 	if (!t)
14565d48e35SKent Overstreet 		return -BCH_ERR_ENOMEM_blacklist_table_init;
1461dd7f9d9SKent Overstreet 
1471dd7f9d9SKent Overstreet 	t->nr = nr;
1481dd7f9d9SKent Overstreet 
1491dd7f9d9SKent Overstreet 	for (i = 0; i < nr; i++) {
1501dd7f9d9SKent Overstreet 		t->entries[i].start	= le64_to_cpu(bl->start[i].start);
1511dd7f9d9SKent Overstreet 		t->entries[i].end	= le64_to_cpu(bl->start[i].end);
1521dd7f9d9SKent Overstreet 	}
1531dd7f9d9SKent Overstreet 
1541dd7f9d9SKent Overstreet 	eytzinger0_sort(t->entries,
1551dd7f9d9SKent Overstreet 			t->nr,
1561dd7f9d9SKent Overstreet 			sizeof(t->entries[0]),
1571dd7f9d9SKent Overstreet 			journal_seq_blacklist_table_cmp,
1581dd7f9d9SKent Overstreet 			NULL);
1591dd7f9d9SKent Overstreet 
160adbcada4SKent Overstreet 	kfree(c->journal_seq_blacklist_table);
1611dd7f9d9SKent Overstreet 	c->journal_seq_blacklist_table = t;
1621dd7f9d9SKent Overstreet 	return 0;
1631dd7f9d9SKent Overstreet }
1641dd7f9d9SKent Overstreet 
bch2_sb_journal_seq_blacklist_validate(struct bch_sb * sb,struct bch_sb_field * f,enum bch_validate_flags flags,struct printbuf * err)165a5c3e265SKent Overstreet static int bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb, struct bch_sb_field *f,
166a5c3e265SKent Overstreet 				enum bch_validate_flags flags, struct printbuf *err)
1671dd7f9d9SKent Overstreet {
1681dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
1691dd7f9d9SKent Overstreet 		field_to_type(f, journal_seq_blacklist);
170efe68e1dSKent Overstreet 	unsigned i, nr = blacklist_nr_entries(bl);
1711dd7f9d9SKent Overstreet 
172efe68e1dSKent Overstreet 	for (i = 0; i < nr; i++) {
173efe68e1dSKent Overstreet 		struct journal_seq_blacklist_entry *e = bl->start + i;
1741dd7f9d9SKent Overstreet 
175efe68e1dSKent Overstreet 		if (le64_to_cpu(e->start) >=
176efe68e1dSKent Overstreet 		    le64_to_cpu(e->end)) {
177401ec4dbSKent Overstreet 			prt_printf(err, "entry %u start >= end (%llu >= %llu)",
178efe68e1dSKent Overstreet 			       i, le64_to_cpu(e->start), le64_to_cpu(e->end));
17978c0b75cSKent Overstreet 			return -BCH_ERR_invalid_sb_journal_seq_blacklist;
1801dd7f9d9SKent Overstreet 		}
1811dd7f9d9SKent Overstreet 
182efe68e1dSKent Overstreet 		if (i + 1 < nr &&
183efe68e1dSKent Overstreet 		    le64_to_cpu(e[0].end) >
184efe68e1dSKent Overstreet 		    le64_to_cpu(e[1].start)) {
185401ec4dbSKent Overstreet 			prt_printf(err, "entry %u out of order with next entry (%llu > %llu)",
186efe68e1dSKent Overstreet 			       i + 1, le64_to_cpu(e[0].end), le64_to_cpu(e[1].start));
18778c0b75cSKent Overstreet 			return -BCH_ERR_invalid_sb_journal_seq_blacklist;
188efe68e1dSKent Overstreet 		}
189efe68e1dSKent Overstreet 	}
190efe68e1dSKent Overstreet 
191efe68e1dSKent Overstreet 	return 0;
1921dd7f9d9SKent Overstreet }
1931dd7f9d9SKent Overstreet 
bch2_sb_journal_seq_blacklist_to_text(struct printbuf * out,struct bch_sb * sb,struct bch_sb_field * f)1941dd7f9d9SKent Overstreet static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
1951dd7f9d9SKent Overstreet 						  struct bch_sb *sb,
1961dd7f9d9SKent Overstreet 						  struct bch_sb_field *f)
1971dd7f9d9SKent Overstreet {
1981dd7f9d9SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
1991dd7f9d9SKent Overstreet 		field_to_type(f, journal_seq_blacklist);
2001dd7f9d9SKent Overstreet 	struct journal_seq_blacklist_entry *i;
2011dd7f9d9SKent Overstreet 	unsigned nr = blacklist_nr_entries(bl);
2021dd7f9d9SKent Overstreet 
2031dd7f9d9SKent Overstreet 	for (i = bl->start; i < bl->start + nr; i++) {
2041dd7f9d9SKent Overstreet 		if (i != bl->start)
205401ec4dbSKent Overstreet 			prt_printf(out, " ");
2061dd7f9d9SKent Overstreet 
207401ec4dbSKent Overstreet 		prt_printf(out, "%llu-%llu",
2081dd7f9d9SKent Overstreet 		       le64_to_cpu(i->start),
2091dd7f9d9SKent Overstreet 		       le64_to_cpu(i->end));
2101dd7f9d9SKent Overstreet 	}
211401ec4dbSKent Overstreet 	prt_newline(out);
2121dd7f9d9SKent Overstreet }
2131dd7f9d9SKent Overstreet 
2141dd7f9d9SKent Overstreet const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
2151dd7f9d9SKent Overstreet 	.validate	= bch2_sb_journal_seq_blacklist_validate,
2161dd7f9d9SKent Overstreet 	.to_text	= bch2_sb_journal_seq_blacklist_to_text
2171dd7f9d9SKent Overstreet };
2189b6e2f1eSKent Overstreet 
bch2_blacklist_entries_gc(struct bch_fs * c)219f0415829SKent Overstreet bool bch2_blacklist_entries_gc(struct bch_fs *c)
2209b6e2f1eSKent Overstreet {
2219b6e2f1eSKent Overstreet 	struct journal_seq_blacklist_entry *src, *dst;
2229b6e2f1eSKent Overstreet 
223f0415829SKent Overstreet 	struct bch_sb_field_journal_seq_blacklist *bl =
224f0415829SKent Overstreet 		bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist);
2259b6e2f1eSKent Overstreet 	if (!bl)
226f0415829SKent Overstreet 		return false;
2279b6e2f1eSKent Overstreet 
228f0415829SKent Overstreet 	unsigned nr = blacklist_nr_entries(bl);
2299b6e2f1eSKent Overstreet 	dst = bl->start;
2309b6e2f1eSKent Overstreet 
231f0415829SKent Overstreet 	struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
2329b6e2f1eSKent Overstreet 	BUG_ON(nr != t->nr);
2339b6e2f1eSKent Overstreet 
234f0415829SKent Overstreet 	unsigned i;
235*472237b6SPei Li 	for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr);
2369b6e2f1eSKent Overstreet 	     src < bl->start + nr;
2379b6e2f1eSKent Overstreet 	     src++, i = eytzinger0_next(i, nr)) {
2389b6e2f1eSKent Overstreet 		BUG_ON(t->entries[i].start	!= le64_to_cpu(src->start));
2399b6e2f1eSKent Overstreet 		BUG_ON(t->entries[i].end	!= le64_to_cpu(src->end));
2409b6e2f1eSKent Overstreet 
241f0415829SKent Overstreet 		if (t->entries[i].dirty || t->entries[i].end >= c->journal.oldest_seq_found_ondisk)
2429b6e2f1eSKent Overstreet 			*dst++ = *src;
2439b6e2f1eSKent Overstreet 	}
2449b6e2f1eSKent Overstreet 
245f0415829SKent Overstreet 	unsigned new_nr = dst - bl->start;
246f0415829SKent Overstreet 	if (new_nr == nr)
247f0415829SKent Overstreet 		return false;
2489b6e2f1eSKent Overstreet 
249f0415829SKent Overstreet 	bch_verbose(c, "nr blacklist entries was %u, now %u", nr, new_nr);
2509b6e2f1eSKent Overstreet 
2514637429eSKent Overstreet 	bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
2529b6e2f1eSKent Overstreet 				  new_nr ? sb_blacklist_u64s(new_nr) : 0);
2539b6e2f1eSKent Overstreet 	BUG_ON(new_nr && !bl);
254f0415829SKent Overstreet 	return true;
2559b6e2f1eSKent Overstreet }
256