11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0 21c6fdbd8SKent Overstreet 31c6fdbd8SKent Overstreet #include "bcachefs.h" 41dd7f9d9SKent Overstreet #include "btree_iter.h" 51dd7f9d9SKent Overstreet #include "eytzinger.h" 61c6fdbd8SKent Overstreet #include "journal_seq_blacklist.h" 71dd7f9d9SKent Overstreet #include "super-io.h" 81c6fdbd8SKent Overstreet 91c6fdbd8SKent Overstreet /* 101c6fdbd8SKent Overstreet * journal_seq_blacklist machinery: 111c6fdbd8SKent Overstreet * 121c6fdbd8SKent Overstreet * To guarantee order of btree updates after a crash, we need to detect when a 131c6fdbd8SKent Overstreet * btree node entry (bset) is newer than the newest journal entry that was 141c6fdbd8SKent Overstreet * successfully written, and ignore it - effectively ignoring any btree updates 151c6fdbd8SKent Overstreet * that didn't make it into the journal. 161c6fdbd8SKent Overstreet * 171c6fdbd8SKent Overstreet * If we didn't do this, we might have two btree nodes, a and b, both with 181c6fdbd8SKent Overstreet * updates that weren't written to the journal yet: if b was updated after a, 191c6fdbd8SKent Overstreet * but b was flushed and not a - oops; on recovery we'll find that the updates 201c6fdbd8SKent Overstreet * to b happened, but not the updates to a that happened before it. 211c6fdbd8SKent Overstreet * 221c6fdbd8SKent Overstreet * Ignoring bsets that are newer than the newest journal entry is always safe, 231c6fdbd8SKent Overstreet * because everything they contain will also have been journalled - and must 241c6fdbd8SKent Overstreet * still be present in the journal on disk until a journal entry has been 251c6fdbd8SKent Overstreet * written _after_ that bset was written. 261c6fdbd8SKent Overstreet * 271c6fdbd8SKent Overstreet * To accomplish this, bsets record the newest journal sequence number they 281c6fdbd8SKent Overstreet * contain updates for; then, on startup, the btree code queries the journal 291c6fdbd8SKent Overstreet * code to ask "Is this sequence number newer than the newest journal entry? If 301c6fdbd8SKent Overstreet * so, ignore it." 311c6fdbd8SKent Overstreet * 321c6fdbd8SKent Overstreet * When this happens, we must blacklist that journal sequence number: the 331c6fdbd8SKent Overstreet * journal must not write any entries with that sequence number, and it must 341c6fdbd8SKent Overstreet * record that it was blacklisted so that a) on recovery we don't think we have 351c6fdbd8SKent Overstreet * missing journal entries and b) so that the btree code continues to ignore 361c6fdbd8SKent Overstreet * that bset, until that btree node is rewritten. 371c6fdbd8SKent Overstreet */ 381c6fdbd8SKent Overstreet 391dd7f9d9SKent Overstreet static unsigned 401dd7f9d9SKent Overstreet blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl) 411c6fdbd8SKent Overstreet { 421dd7f9d9SKent Overstreet return bl 431dd7f9d9SKent Overstreet ? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) / 441dd7f9d9SKent Overstreet sizeof(struct journal_seq_blacklist_entry)) 451dd7f9d9SKent Overstreet : 0; 461dd7f9d9SKent Overstreet } 471c6fdbd8SKent Overstreet 481dd7f9d9SKent Overstreet static unsigned sb_blacklist_u64s(unsigned nr) 491dd7f9d9SKent Overstreet { 501dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl; 511c6fdbd8SKent Overstreet 521dd7f9d9SKent Overstreet return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64); 531dd7f9d9SKent Overstreet } 541dd7f9d9SKent Overstreet 551dd7f9d9SKent Overstreet static struct bch_sb_field_journal_seq_blacklist * 561dd7f9d9SKent Overstreet blacklist_entry_try_merge(struct bch_fs *c, 571dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl, 581dd7f9d9SKent Overstreet unsigned i) 591dd7f9d9SKent Overstreet { 601dd7f9d9SKent Overstreet unsigned nr = blacklist_nr_entries(bl); 611dd7f9d9SKent Overstreet 621dd7f9d9SKent Overstreet if (le64_to_cpu(bl->start[i].end) >= 631dd7f9d9SKent Overstreet le64_to_cpu(bl->start[i + 1].start)) { 641dd7f9d9SKent Overstreet bl->start[i].end = bl->start[i + 1].end; 651dd7f9d9SKent Overstreet --nr; 661dd7f9d9SKent Overstreet memmove(&bl->start[i], 671dd7f9d9SKent Overstreet &bl->start[i + 1], 681dd7f9d9SKent Overstreet sizeof(bl->start[0]) * (nr - i)); 691dd7f9d9SKent Overstreet 701dd7f9d9SKent Overstreet bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, 711dd7f9d9SKent Overstreet sb_blacklist_u64s(nr)); 721dd7f9d9SKent Overstreet BUG_ON(!bl); 731dd7f9d9SKent Overstreet } 741dd7f9d9SKent Overstreet 751dd7f9d9SKent Overstreet return bl; 761dd7f9d9SKent Overstreet } 771dd7f9d9SKent Overstreet 781dd7f9d9SKent Overstreet int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) 791dd7f9d9SKent Overstreet { 801dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl; 811dd7f9d9SKent Overstreet unsigned i, nr; 821dd7f9d9SKent Overstreet int ret = 0; 831dd7f9d9SKent Overstreet 841dd7f9d9SKent Overstreet mutex_lock(&c->sb_lock); 851dd7f9d9SKent Overstreet bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb); 861dd7f9d9SKent Overstreet nr = blacklist_nr_entries(bl); 871dd7f9d9SKent Overstreet 881dd7f9d9SKent Overstreet if (bl) { 891dd7f9d9SKent Overstreet for (i = 0; i < nr; i++) { 901dd7f9d9SKent Overstreet struct journal_seq_blacklist_entry *e = 911dd7f9d9SKent Overstreet bl->start + i; 921dd7f9d9SKent Overstreet 931dd7f9d9SKent Overstreet if (start == le64_to_cpu(e->start) && 941dd7f9d9SKent Overstreet end == le64_to_cpu(e->end)) 951dd7f9d9SKent Overstreet goto out; 961dd7f9d9SKent Overstreet 971dd7f9d9SKent Overstreet if (start <= le64_to_cpu(e->start) && 981dd7f9d9SKent Overstreet end >= le64_to_cpu(e->end)) { 991dd7f9d9SKent Overstreet e->start = cpu_to_le64(start); 1001dd7f9d9SKent Overstreet e->end = cpu_to_le64(end); 1011dd7f9d9SKent Overstreet 1021dd7f9d9SKent Overstreet if (i + 1 < nr) 1031dd7f9d9SKent Overstreet bl = blacklist_entry_try_merge(c, 1041dd7f9d9SKent Overstreet bl, i); 1051dd7f9d9SKent Overstreet if (i) 1061dd7f9d9SKent Overstreet bl = blacklist_entry_try_merge(c, 1071dd7f9d9SKent Overstreet bl, i - 1); 1081dd7f9d9SKent Overstreet goto out_write_sb; 1091dd7f9d9SKent Overstreet } 1101dd7f9d9SKent Overstreet } 1111dd7f9d9SKent Overstreet } 1121dd7f9d9SKent Overstreet 1131dd7f9d9SKent Overstreet bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, 1141dd7f9d9SKent Overstreet sb_blacklist_u64s(nr + 1)); 1151dd7f9d9SKent Overstreet if (!bl) { 1161dd7f9d9SKent Overstreet ret = -ENOMEM; 1171dd7f9d9SKent Overstreet goto out; 1181dd7f9d9SKent Overstreet } 1191dd7f9d9SKent Overstreet 1201dd7f9d9SKent Overstreet bl->start[nr].start = cpu_to_le64(start); 1211dd7f9d9SKent Overstreet bl->start[nr].end = cpu_to_le64(end); 1221dd7f9d9SKent Overstreet out_write_sb: 1231dd7f9d9SKent Overstreet c->disk_sb.sb->features[0] |= 1241dd7f9d9SKent Overstreet 1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3; 1251dd7f9d9SKent Overstreet 1261dd7f9d9SKent Overstreet ret = bch2_write_super(c); 1271dd7f9d9SKent Overstreet out: 1281dd7f9d9SKent Overstreet mutex_unlock(&c->sb_lock); 1291dd7f9d9SKent Overstreet 1301dd7f9d9SKent Overstreet return ret; 1311dd7f9d9SKent Overstreet } 1321dd7f9d9SKent Overstreet 1331dd7f9d9SKent Overstreet static int journal_seq_blacklist_table_cmp(const void *_l, 1341dd7f9d9SKent Overstreet const void *_r, size_t size) 1351dd7f9d9SKent Overstreet { 1361dd7f9d9SKent Overstreet const struct journal_seq_blacklist_table_entry *l = _l; 1371dd7f9d9SKent Overstreet const struct journal_seq_blacklist_table_entry *r = _r; 1381dd7f9d9SKent Overstreet 1393ea2b1e1SKent Overstreet return cmp_int(l->start, r->start); 1401dd7f9d9SKent Overstreet } 1411dd7f9d9SKent Overstreet 1421dd7f9d9SKent Overstreet bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq, 1431dd7f9d9SKent Overstreet bool dirty) 1441dd7f9d9SKent Overstreet { 1451dd7f9d9SKent Overstreet struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table; 1461dd7f9d9SKent Overstreet struct journal_seq_blacklist_table_entry search = { .start = seq }; 1471dd7f9d9SKent Overstreet int idx; 1481dd7f9d9SKent Overstreet 1491dd7f9d9SKent Overstreet if (!t) 1501dd7f9d9SKent Overstreet return false; 1511dd7f9d9SKent Overstreet 1521dd7f9d9SKent Overstreet idx = eytzinger0_find_le(t->entries, t->nr, 1531dd7f9d9SKent Overstreet sizeof(t->entries[0]), 1541dd7f9d9SKent Overstreet journal_seq_blacklist_table_cmp, 1551dd7f9d9SKent Overstreet &search); 1561dd7f9d9SKent Overstreet if (idx < 0) 1571dd7f9d9SKent Overstreet return false; 1581dd7f9d9SKent Overstreet 1591dd7f9d9SKent Overstreet BUG_ON(t->entries[idx].start > seq); 1601dd7f9d9SKent Overstreet 1611dd7f9d9SKent Overstreet if (seq >= t->entries[idx].end) 1621dd7f9d9SKent Overstreet return false; 1631dd7f9d9SKent Overstreet 1641dd7f9d9SKent Overstreet if (dirty) 1651dd7f9d9SKent Overstreet t->entries[idx].dirty = true; 1661dd7f9d9SKent Overstreet return true; 1671dd7f9d9SKent Overstreet } 1681dd7f9d9SKent Overstreet 1691dd7f9d9SKent Overstreet int bch2_blacklist_table_initialize(struct bch_fs *c) 1701dd7f9d9SKent Overstreet { 1711dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl = 1721dd7f9d9SKent Overstreet bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb); 1731dd7f9d9SKent Overstreet struct journal_seq_blacklist_table *t; 1741dd7f9d9SKent Overstreet unsigned i, nr = blacklist_nr_entries(bl); 1751dd7f9d9SKent Overstreet 1761dd7f9d9SKent Overstreet BUG_ON(c->journal_seq_blacklist_table); 1771dd7f9d9SKent Overstreet 1781dd7f9d9SKent Overstreet if (!bl) 1791dd7f9d9SKent Overstreet return 0; 1801dd7f9d9SKent Overstreet 1811dd7f9d9SKent Overstreet t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr, 1821dd7f9d9SKent Overstreet GFP_KERNEL); 1831dd7f9d9SKent Overstreet if (!t) 1841dd7f9d9SKent Overstreet return -ENOMEM; 1851dd7f9d9SKent Overstreet 1861dd7f9d9SKent Overstreet t->nr = nr; 1871dd7f9d9SKent Overstreet 1881dd7f9d9SKent Overstreet for (i = 0; i < nr; i++) { 1891dd7f9d9SKent Overstreet t->entries[i].start = le64_to_cpu(bl->start[i].start); 1901dd7f9d9SKent Overstreet t->entries[i].end = le64_to_cpu(bl->start[i].end); 1911dd7f9d9SKent Overstreet } 1921dd7f9d9SKent Overstreet 1931dd7f9d9SKent Overstreet eytzinger0_sort(t->entries, 1941dd7f9d9SKent Overstreet t->nr, 1951dd7f9d9SKent Overstreet sizeof(t->entries[0]), 1961dd7f9d9SKent Overstreet journal_seq_blacklist_table_cmp, 1971dd7f9d9SKent Overstreet NULL); 1981dd7f9d9SKent Overstreet 1991dd7f9d9SKent Overstreet c->journal_seq_blacklist_table = t; 2001dd7f9d9SKent Overstreet return 0; 2011dd7f9d9SKent Overstreet } 2021dd7f9d9SKent Overstreet 2031dd7f9d9SKent Overstreet static const char * 2041dd7f9d9SKent Overstreet bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb, 2051dd7f9d9SKent Overstreet struct bch_sb_field *f) 2061dd7f9d9SKent Overstreet { 2071dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl = 2081dd7f9d9SKent Overstreet field_to_type(f, journal_seq_blacklist); 2091dd7f9d9SKent Overstreet struct journal_seq_blacklist_entry *i; 2101dd7f9d9SKent Overstreet unsigned nr = blacklist_nr_entries(bl); 2111dd7f9d9SKent Overstreet 2121dd7f9d9SKent Overstreet for (i = bl->start; i < bl->start + nr; i++) { 2131dd7f9d9SKent Overstreet if (le64_to_cpu(i->start) >= 2141dd7f9d9SKent Overstreet le64_to_cpu(i->end)) 2151dd7f9d9SKent Overstreet return "entry start >= end"; 2161dd7f9d9SKent Overstreet 2171dd7f9d9SKent Overstreet if (i + 1 < bl->start + nr && 2181dd7f9d9SKent Overstreet le64_to_cpu(i[0].end) > 2191dd7f9d9SKent Overstreet le64_to_cpu(i[1].start)) 2201dd7f9d9SKent Overstreet return "entries out of order"; 2211dd7f9d9SKent Overstreet } 2221dd7f9d9SKent Overstreet 2231dd7f9d9SKent Overstreet return NULL; 2241dd7f9d9SKent Overstreet } 2251dd7f9d9SKent Overstreet 2261dd7f9d9SKent Overstreet static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out, 2271dd7f9d9SKent Overstreet struct bch_sb *sb, 2281dd7f9d9SKent Overstreet struct bch_sb_field *f) 2291dd7f9d9SKent Overstreet { 2301dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl = 2311dd7f9d9SKent Overstreet field_to_type(f, journal_seq_blacklist); 2321dd7f9d9SKent Overstreet struct journal_seq_blacklist_entry *i; 2331dd7f9d9SKent Overstreet unsigned nr = blacklist_nr_entries(bl); 2341dd7f9d9SKent Overstreet 2351dd7f9d9SKent Overstreet for (i = bl->start; i < bl->start + nr; i++) { 2361dd7f9d9SKent Overstreet if (i != bl->start) 2371dd7f9d9SKent Overstreet pr_buf(out, " "); 2381dd7f9d9SKent Overstreet 2391dd7f9d9SKent Overstreet pr_buf(out, "%llu-%llu", 2401dd7f9d9SKent Overstreet le64_to_cpu(i->start), 2411dd7f9d9SKent Overstreet le64_to_cpu(i->end)); 2421dd7f9d9SKent Overstreet } 2431dd7f9d9SKent Overstreet } 2441dd7f9d9SKent Overstreet 2451dd7f9d9SKent Overstreet const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = { 2461dd7f9d9SKent Overstreet .validate = bch2_sb_journal_seq_blacklist_validate, 2471dd7f9d9SKent Overstreet .to_text = bch2_sb_journal_seq_blacklist_to_text 2481dd7f9d9SKent Overstreet }; 2491dd7f9d9SKent Overstreet 2501dd7f9d9SKent Overstreet void bch2_blacklist_entries_gc(struct work_struct *work) 2511dd7f9d9SKent Overstreet { 2521dd7f9d9SKent Overstreet struct bch_fs *c = container_of(work, struct bch_fs, 2531dd7f9d9SKent Overstreet journal_seq_blacklist_gc_work); 2541dd7f9d9SKent Overstreet struct journal_seq_blacklist_table *t; 2551dd7f9d9SKent Overstreet struct bch_sb_field_journal_seq_blacklist *bl; 2561dd7f9d9SKent Overstreet struct journal_seq_blacklist_entry *src, *dst; 257424eb881SKent Overstreet struct btree_trans trans; 2581dd7f9d9SKent Overstreet unsigned i, nr, new_nr; 2591dd7f9d9SKent Overstreet int ret; 2601c6fdbd8SKent Overstreet 261*20bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 262424eb881SKent Overstreet 2631dd7f9d9SKent Overstreet for (i = 0; i < BTREE_ID_NR; i++) { 2641dd7f9d9SKent Overstreet struct btree_iter *iter; 2651dd7f9d9SKent Overstreet struct btree *b; 2661c6fdbd8SKent Overstreet 2671dd7f9d9SKent Overstreet for_each_btree_node(&trans, iter, i, POS_MIN, 2681dd7f9d9SKent Overstreet BTREE_ITER_PREFETCH, b) 2691dd7f9d9SKent Overstreet if (test_bit(BCH_FS_STOPPING, &c->flags)) { 270424eb881SKent Overstreet bch2_trans_exit(&trans); 2711c6fdbd8SKent Overstreet return; 2721c6fdbd8SKent Overstreet } 2731dd7f9d9SKent Overstreet bch2_trans_iter_free(&trans, iter); 2741c6fdbd8SKent Overstreet } 2751c6fdbd8SKent Overstreet 2761dd7f9d9SKent Overstreet ret = bch2_trans_exit(&trans); 2771c6fdbd8SKent Overstreet if (ret) 2781c6fdbd8SKent Overstreet return; 2791c6fdbd8SKent Overstreet 2801dd7f9d9SKent Overstreet mutex_lock(&c->sb_lock); 2811dd7f9d9SKent Overstreet bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb); 2821dd7f9d9SKent Overstreet if (!bl) 2831dd7f9d9SKent Overstreet goto out; 2841c6fdbd8SKent Overstreet 2851dd7f9d9SKent Overstreet nr = blacklist_nr_entries(bl); 2861dd7f9d9SKent Overstreet dst = bl->start; 2871c6fdbd8SKent Overstreet 2881dd7f9d9SKent Overstreet t = c->journal_seq_blacklist_table; 2891dd7f9d9SKent Overstreet BUG_ON(nr != t->nr); 2901c6fdbd8SKent Overstreet 2911dd7f9d9SKent Overstreet for (src = bl->start, i = eytzinger0_first(t->nr); 2921dd7f9d9SKent Overstreet src < bl->start + nr; 2931dd7f9d9SKent Overstreet src++, i = eytzinger0_next(i, nr)) { 2941dd7f9d9SKent Overstreet BUG_ON(t->entries[i].start != le64_to_cpu(src->start)); 2951dd7f9d9SKent Overstreet BUG_ON(t->entries[i].end != le64_to_cpu(src->end)); 2961dd7f9d9SKent Overstreet 2971dd7f9d9SKent Overstreet if (t->entries[i].dirty) 2981dd7f9d9SKent Overstreet *dst++ = *src; 2991dd7f9d9SKent Overstreet } 3001dd7f9d9SKent Overstreet 3011dd7f9d9SKent Overstreet new_nr = dst - bl->start; 3021dd7f9d9SKent Overstreet 3031dd7f9d9SKent Overstreet bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr); 3041dd7f9d9SKent Overstreet 3051dd7f9d9SKent Overstreet if (new_nr != nr) { 3061dd7f9d9SKent Overstreet bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, 3071dd7f9d9SKent Overstreet new_nr ? sb_blacklist_u64s(new_nr) : 0); 3081dd7f9d9SKent Overstreet BUG_ON(new_nr && !bl); 3091dd7f9d9SKent Overstreet 3101dd7f9d9SKent Overstreet if (!new_nr) 3111dd7f9d9SKent Overstreet c->disk_sb.sb->features[0] &= 3121dd7f9d9SKent Overstreet ~(1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3); 3131dd7f9d9SKent Overstreet 3141dd7f9d9SKent Overstreet bch2_write_super(c); 3151dd7f9d9SKent Overstreet } 3161dd7f9d9SKent Overstreet out: 3171dd7f9d9SKent Overstreet mutex_unlock(&c->sb_lock); 3181c6fdbd8SKent Overstreet } 319