11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0 21c6fdbd8SKent Overstreet 31c6fdbd8SKent Overstreet #include "bcachefs.h" 41dd7f9d9SKent Overstreet #include "btree_iter.h" 51dd7f9d9SKent Overstreet #include "eytzinger.h" 61c6fdbd8SKent Overstreet #include "journal_seq_blacklist.h" 71dd7f9d9SKent Overstreet #include "super-io.h" 81c6fdbd8SKent Overstreet 91c6fdbd8SKent Overstreet /* 101c6fdbd8SKent Overstreet * journal_seq_blacklist machinery: 111c6fdbd8SKent Overstreet * 121c6fdbd8SKent Overstreet * To guarantee order of btree updates after a crash, we need to detect when a 131c6fdbd8SKent Overstreet * btree node entry (bset) is newer than the newest journal entry that was 141c6fdbd8SKent Overstreet * successfully written, and ignore it - effectively ignoring any btree updates 151c6fdbd8SKent Overstreet * that didn't make it into the journal. 161c6fdbd8SKent Overstreet * 171c6fdbd8SKent Overstreet * If we didn't do this, we might have two btree nodes, a and b, both with 181c6fdbd8SKent Overstreet * updates that weren't written to the journal yet: if b was updated after a, 191c6fdbd8SKent Overstreet * but b was flushed and not a - oops; on recovery we'll find that the updates 201c6fdbd8SKent Overstreet * to b happened, but not the updates to a that happened before it. 211c6fdbd8SKent Overstreet * 221c6fdbd8SKent Overstreet * Ignoring bsets that are newer than the newest journal entry is always safe, 231c6fdbd8SKent Overstreet * because everything they contain will also have been journalled - and must 241c6fdbd8SKent Overstreet * still be present in the journal on disk until a journal entry has been 251c6fdbd8SKent Overstreet * written _after_ that bset was written. 261c6fdbd8SKent Overstreet * 271c6fdbd8SKent Overstreet * To accomplish this, bsets record the newest journal sequence number they 281c6fdbd8SKent Overstreet * contain updates for; then, on startup, the btree code queries the journal 291c6fdbd8SKent Overstreet * code to ask "Is this sequence number newer than the newest journal entry? If 301c6fdbd8SKent Overstreet * so, ignore it." 311c6fdbd8SKent Overstreet * 321c6fdbd8SKent Overstreet * When this happens, we must blacklist that journal sequence number: the 331c6fdbd8SKent Overstreet * journal must not write any entries with that sequence number, and it must 341c6fdbd8SKent Overstreet * record that it was blacklisted so that a) on recovery we don't think we have 351c6fdbd8SKent Overstreet * missing journal entries and b) so that the btree code continues to ignore 361c6fdbd8SKent Overstreet * that bset, until that btree node is rewritten. 371c6fdbd8SKent Overstreet */ 381c6fdbd8SKent Overstreet 391dd7f9d9SKent Overstreet static unsigned sb_blacklist_u64s(unsigned nr) 401dd7f9d9SKent Overstreet { 411dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl; 421c6fdbd8SKent Overstreet 431dd7f9d9SKent Overstreet return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64); 441dd7f9d9SKent Overstreet } 451dd7f9d9SKent Overstreet 461dd7f9d9SKent Overstreet static struct bch_sb_field_journal_seq_blacklist * 471dd7f9d9SKent Overstreet blacklist_entry_try_merge(struct bch_fs *c, 481dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl, 491dd7f9d9SKent Overstreet unsigned i) 501dd7f9d9SKent Overstreet { 511dd7f9d9SKent Overstreet unsigned nr = blacklist_nr_entries(bl); 521dd7f9d9SKent Overstreet 531dd7f9d9SKent Overstreet if (le64_to_cpu(bl->start[i].end) >= 541dd7f9d9SKent Overstreet le64_to_cpu(bl->start[i + 1].start)) { 551dd7f9d9SKent Overstreet bl->start[i].end = bl->start[i + 1].end; 561dd7f9d9SKent Overstreet --nr; 571dd7f9d9SKent Overstreet memmove(&bl->start[i], 581dd7f9d9SKent Overstreet &bl->start[i + 1], 591dd7f9d9SKent Overstreet sizeof(bl->start[0]) * (nr - i)); 601dd7f9d9SKent Overstreet 611dd7f9d9SKent Overstreet bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, 621dd7f9d9SKent Overstreet sb_blacklist_u64s(nr)); 631dd7f9d9SKent Overstreet BUG_ON(!bl); 641dd7f9d9SKent Overstreet } 651dd7f9d9SKent Overstreet 661dd7f9d9SKent Overstreet return bl; 671dd7f9d9SKent Overstreet } 681dd7f9d9SKent Overstreet 691dd7f9d9SKent Overstreet int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) 701dd7f9d9SKent Overstreet { 711dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl; 721dd7f9d9SKent Overstreet unsigned i, nr; 731dd7f9d9SKent Overstreet int ret = 0; 741dd7f9d9SKent Overstreet 751dd7f9d9SKent Overstreet mutex_lock(&c->sb_lock); 761dd7f9d9SKent Overstreet bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb); 771dd7f9d9SKent Overstreet nr = blacklist_nr_entries(bl); 781dd7f9d9SKent Overstreet 791dd7f9d9SKent Overstreet if (bl) { 801dd7f9d9SKent Overstreet for (i = 0; i < nr; i++) { 811dd7f9d9SKent Overstreet struct journal_seq_blacklist_entry *e = 821dd7f9d9SKent Overstreet bl->start + i; 831dd7f9d9SKent Overstreet 841dd7f9d9SKent Overstreet if (start == le64_to_cpu(e->start) && 851dd7f9d9SKent Overstreet end == le64_to_cpu(e->end)) 861dd7f9d9SKent Overstreet goto out; 871dd7f9d9SKent Overstreet 881dd7f9d9SKent Overstreet if (start <= le64_to_cpu(e->start) && 891dd7f9d9SKent Overstreet end >= le64_to_cpu(e->end)) { 901dd7f9d9SKent Overstreet e->start = cpu_to_le64(start); 911dd7f9d9SKent Overstreet e->end = cpu_to_le64(end); 921dd7f9d9SKent Overstreet 931dd7f9d9SKent Overstreet if (i + 1 < nr) 941dd7f9d9SKent Overstreet bl = blacklist_entry_try_merge(c, 951dd7f9d9SKent Overstreet bl, i); 961dd7f9d9SKent Overstreet if (i) 971dd7f9d9SKent Overstreet bl = blacklist_entry_try_merge(c, 981dd7f9d9SKent Overstreet bl, i - 1); 991dd7f9d9SKent Overstreet goto out_write_sb; 1001dd7f9d9SKent Overstreet } 1011dd7f9d9SKent Overstreet } 1021dd7f9d9SKent Overstreet } 1031dd7f9d9SKent Overstreet 1041dd7f9d9SKent Overstreet bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, 1051dd7f9d9SKent Overstreet sb_blacklist_u64s(nr + 1)); 1061dd7f9d9SKent Overstreet if (!bl) { 1071dd7f9d9SKent Overstreet ret = -ENOMEM; 1081dd7f9d9SKent Overstreet goto out; 1091dd7f9d9SKent Overstreet } 1101dd7f9d9SKent Overstreet 1111dd7f9d9SKent Overstreet bl->start[nr].start = cpu_to_le64(start); 1121dd7f9d9SKent Overstreet bl->start[nr].end = cpu_to_le64(end); 1131dd7f9d9SKent Overstreet out_write_sb: 1141dd7f9d9SKent Overstreet c->disk_sb.sb->features[0] |= 1151c3ff72cSKent Overstreet 1ULL << BCH_FEATURE_journal_seq_blacklist_v3; 1161dd7f9d9SKent Overstreet 1171dd7f9d9SKent Overstreet ret = bch2_write_super(c); 1181dd7f9d9SKent Overstreet out: 1191dd7f9d9SKent Overstreet mutex_unlock(&c->sb_lock); 1201dd7f9d9SKent Overstreet 121*adbcada4SKent Overstreet return ret ?: bch2_blacklist_table_initialize(c); 1221dd7f9d9SKent Overstreet } 1231dd7f9d9SKent Overstreet 1241dd7f9d9SKent Overstreet static int journal_seq_blacklist_table_cmp(const void *_l, 1251dd7f9d9SKent Overstreet const void *_r, size_t size) 1261dd7f9d9SKent Overstreet { 1271dd7f9d9SKent Overstreet const struct journal_seq_blacklist_table_entry *l = _l; 1281dd7f9d9SKent Overstreet const struct journal_seq_blacklist_table_entry *r = _r; 1291dd7f9d9SKent Overstreet 1303ea2b1e1SKent Overstreet return cmp_int(l->start, r->start); 1311dd7f9d9SKent Overstreet } 1321dd7f9d9SKent Overstreet 1331dd7f9d9SKent Overstreet bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq, 1341dd7f9d9SKent Overstreet bool dirty) 1351dd7f9d9SKent Overstreet { 1361dd7f9d9SKent Overstreet struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table; 1371dd7f9d9SKent Overstreet struct journal_seq_blacklist_table_entry search = { .start = seq }; 1381dd7f9d9SKent Overstreet int idx; 1391dd7f9d9SKent Overstreet 1401dd7f9d9SKent Overstreet if (!t) 1411dd7f9d9SKent Overstreet return false; 1421dd7f9d9SKent Overstreet 1431dd7f9d9SKent Overstreet idx = eytzinger0_find_le(t->entries, t->nr, 1441dd7f9d9SKent Overstreet sizeof(t->entries[0]), 1451dd7f9d9SKent Overstreet journal_seq_blacklist_table_cmp, 1461dd7f9d9SKent Overstreet &search); 1471dd7f9d9SKent Overstreet if (idx < 0) 1481dd7f9d9SKent Overstreet return false; 1491dd7f9d9SKent Overstreet 1501dd7f9d9SKent Overstreet BUG_ON(t->entries[idx].start > seq); 1511dd7f9d9SKent Overstreet 1521dd7f9d9SKent Overstreet if (seq >= t->entries[idx].end) 1531dd7f9d9SKent Overstreet return false; 1541dd7f9d9SKent Overstreet 1551dd7f9d9SKent Overstreet if (dirty) 1561dd7f9d9SKent Overstreet t->entries[idx].dirty = true; 1571dd7f9d9SKent Overstreet return true; 1581dd7f9d9SKent Overstreet } 1591dd7f9d9SKent Overstreet 1601dd7f9d9SKent Overstreet int bch2_blacklist_table_initialize(struct bch_fs *c) 1611dd7f9d9SKent Overstreet { 1621dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl = 1631dd7f9d9SKent Overstreet bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb); 1641dd7f9d9SKent Overstreet struct journal_seq_blacklist_table *t; 1651dd7f9d9SKent Overstreet unsigned i, nr = blacklist_nr_entries(bl); 1661dd7f9d9SKent Overstreet 1671dd7f9d9SKent Overstreet if (!bl) 1681dd7f9d9SKent Overstreet return 0; 1691dd7f9d9SKent Overstreet 1701dd7f9d9SKent Overstreet t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr, 1711dd7f9d9SKent Overstreet GFP_KERNEL); 1721dd7f9d9SKent Overstreet if (!t) 1731dd7f9d9SKent Overstreet return -ENOMEM; 1741dd7f9d9SKent Overstreet 1751dd7f9d9SKent Overstreet t->nr = nr; 1761dd7f9d9SKent Overstreet 1771dd7f9d9SKent Overstreet for (i = 0; i < nr; i++) { 1781dd7f9d9SKent Overstreet t->entries[i].start = le64_to_cpu(bl->start[i].start); 1791dd7f9d9SKent Overstreet t->entries[i].end = le64_to_cpu(bl->start[i].end); 1801dd7f9d9SKent Overstreet } 1811dd7f9d9SKent Overstreet 1821dd7f9d9SKent Overstreet eytzinger0_sort(t->entries, 1831dd7f9d9SKent Overstreet t->nr, 1841dd7f9d9SKent Overstreet sizeof(t->entries[0]), 1851dd7f9d9SKent Overstreet journal_seq_blacklist_table_cmp, 1861dd7f9d9SKent Overstreet NULL); 1871dd7f9d9SKent Overstreet 188*adbcada4SKent Overstreet kfree(c->journal_seq_blacklist_table); 1891dd7f9d9SKent Overstreet c->journal_seq_blacklist_table = t; 1901dd7f9d9SKent Overstreet return 0; 1911dd7f9d9SKent Overstreet } 1921dd7f9d9SKent Overstreet 1931dd7f9d9SKent Overstreet static const char * 1941dd7f9d9SKent Overstreet bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb, 1951dd7f9d9SKent Overstreet struct bch_sb_field *f) 1961dd7f9d9SKent Overstreet { 1971dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl = 1981dd7f9d9SKent Overstreet field_to_type(f, journal_seq_blacklist); 1991dd7f9d9SKent Overstreet struct journal_seq_blacklist_entry *i; 2001dd7f9d9SKent Overstreet unsigned nr = blacklist_nr_entries(bl); 2011dd7f9d9SKent Overstreet 2021dd7f9d9SKent Overstreet for (i = bl->start; i < bl->start + nr; i++) { 2031dd7f9d9SKent Overstreet if (le64_to_cpu(i->start) >= 2041dd7f9d9SKent Overstreet le64_to_cpu(i->end)) 2051dd7f9d9SKent Overstreet return "entry start >= end"; 2061dd7f9d9SKent Overstreet 2071dd7f9d9SKent Overstreet if (i + 1 < bl->start + nr && 2081dd7f9d9SKent Overstreet le64_to_cpu(i[0].end) > 2091dd7f9d9SKent Overstreet le64_to_cpu(i[1].start)) 2101dd7f9d9SKent Overstreet return "entries out of order"; 2111dd7f9d9SKent Overstreet } 2121dd7f9d9SKent Overstreet 2131dd7f9d9SKent Overstreet return NULL; 2141dd7f9d9SKent Overstreet } 2151dd7f9d9SKent Overstreet 2161dd7f9d9SKent Overstreet static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out, 2171dd7f9d9SKent Overstreet struct bch_sb *sb, 2181dd7f9d9SKent Overstreet struct bch_sb_field *f) 2191dd7f9d9SKent Overstreet { 2201dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl = 2211dd7f9d9SKent Overstreet field_to_type(f, journal_seq_blacklist); 2221dd7f9d9SKent Overstreet struct journal_seq_blacklist_entry *i; 2231dd7f9d9SKent Overstreet unsigned nr = blacklist_nr_entries(bl); 2241dd7f9d9SKent Overstreet 2251dd7f9d9SKent Overstreet for (i = bl->start; i < bl->start + nr; i++) { 2261dd7f9d9SKent Overstreet if (i != bl->start) 2271dd7f9d9SKent Overstreet pr_buf(out, " "); 2281dd7f9d9SKent Overstreet 2291dd7f9d9SKent Overstreet pr_buf(out, "%llu-%llu", 2301dd7f9d9SKent Overstreet le64_to_cpu(i->start), 2311dd7f9d9SKent Overstreet le64_to_cpu(i->end)); 2321dd7f9d9SKent Overstreet } 2331dd7f9d9SKent Overstreet } 2341dd7f9d9SKent Overstreet 2351dd7f9d9SKent Overstreet const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = { 2361dd7f9d9SKent Overstreet .validate = bch2_sb_journal_seq_blacklist_validate, 2371dd7f9d9SKent Overstreet .to_text = bch2_sb_journal_seq_blacklist_to_text 2381dd7f9d9SKent Overstreet }; 2391dd7f9d9SKent Overstreet 2401dd7f9d9SKent Overstreet void bch2_blacklist_entries_gc(struct work_struct *work) 2411dd7f9d9SKent Overstreet { 2421dd7f9d9SKent Overstreet struct bch_fs *c = container_of(work, struct bch_fs, 2431dd7f9d9SKent Overstreet journal_seq_blacklist_gc_work); 2441dd7f9d9SKent Overstreet struct journal_seq_blacklist_table *t; 2451dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl; 2461dd7f9d9SKent Overstreet struct journal_seq_blacklist_entry *src, *dst; 247424eb881SKent Overstreet struct btree_trans trans; 2481dd7f9d9SKent Overstreet unsigned i, nr, new_nr; 2491dd7f9d9SKent Overstreet int ret; 2501c6fdbd8SKent Overstreet 25120bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 252424eb881SKent Overstreet 2531dd7f9d9SKent Overstreet for (i = 0; i < BTREE_ID_NR; i++) { 2541dd7f9d9SKent Overstreet struct btree_iter *iter; 2551dd7f9d9SKent Overstreet struct btree *b; 2561c6fdbd8SKent Overstreet 2571dd7f9d9SKent Overstreet for_each_btree_node(&trans, iter, i, POS_MIN, 2581dd7f9d9SKent Overstreet BTREE_ITER_PREFETCH, b) 2591dd7f9d9SKent Overstreet if (test_bit(BCH_FS_STOPPING, &c->flags)) { 260424eb881SKent Overstreet bch2_trans_exit(&trans); 2611c6fdbd8SKent Overstreet return; 2621c6fdbd8SKent Overstreet } 2631dd7f9d9SKent Overstreet bch2_trans_iter_free(&trans, iter); 2641c6fdbd8SKent Overstreet } 2651c6fdbd8SKent Overstreet 2661dd7f9d9SKent Overstreet ret = bch2_trans_exit(&trans); 2671c6fdbd8SKent Overstreet if (ret) 2681c6fdbd8SKent Overstreet return; 2691c6fdbd8SKent Overstreet 2701dd7f9d9SKent Overstreet mutex_lock(&c->sb_lock); 2711dd7f9d9SKent Overstreet bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb); 2721dd7f9d9SKent Overstreet if (!bl) 2731dd7f9d9SKent Overstreet goto out; 2741c6fdbd8SKent Overstreet 2751dd7f9d9SKent Overstreet nr = blacklist_nr_entries(bl); 2761dd7f9d9SKent Overstreet dst = bl->start; 2771c6fdbd8SKent Overstreet 2781dd7f9d9SKent Overstreet t = c->journal_seq_blacklist_table; 2791dd7f9d9SKent Overstreet BUG_ON(nr != t->nr); 2801c6fdbd8SKent Overstreet 2811dd7f9d9SKent Overstreet for (src = bl->start, i = eytzinger0_first(t->nr); 2821dd7f9d9SKent Overstreet src < bl->start + nr; 2831dd7f9d9SKent Overstreet src++, i = eytzinger0_next(i, nr)) { 2841dd7f9d9SKent Overstreet BUG_ON(t->entries[i].start != le64_to_cpu(src->start)); 2851dd7f9d9SKent Overstreet BUG_ON(t->entries[i].end != le64_to_cpu(src->end)); 2861dd7f9d9SKent Overstreet 2871dd7f9d9SKent Overstreet if (t->entries[i].dirty) 2881dd7f9d9SKent Overstreet *dst++ = *src; 2891dd7f9d9SKent Overstreet } 2901dd7f9d9SKent Overstreet 2911dd7f9d9SKent Overstreet new_nr = dst - bl->start; 2921dd7f9d9SKent Overstreet 2931dd7f9d9SKent Overstreet bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr); 2941dd7f9d9SKent Overstreet 2951dd7f9d9SKent Overstreet if (new_nr != nr) { 2961dd7f9d9SKent Overstreet bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, 2971dd7f9d9SKent Overstreet new_nr ? sb_blacklist_u64s(new_nr) : 0); 2981dd7f9d9SKent Overstreet BUG_ON(new_nr && !bl); 2991dd7f9d9SKent Overstreet 3001dd7f9d9SKent Overstreet if (!new_nr) 3011dd7f9d9SKent Overstreet c->disk_sb.sb->features[0] &= 3021c3ff72cSKent Overstreet ~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3); 3031dd7f9d9SKent Overstreet 3041dd7f9d9SKent Overstreet bch2_write_super(c); 3051dd7f9d9SKent Overstreet } 3061dd7f9d9SKent Overstreet out: 3071dd7f9d9SKent Overstreet mutex_unlock(&c->sb_lock); 3081c6fdbd8SKent Overstreet } 309