1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "btree_gc.h" 7 #include "btree_node_scan.h" 8 #include "disk_accounting.h" 9 #include "ec.h" 10 #include "fsck.h" 11 #include "inode.h" 12 #include "journal.h" 13 #include "lru.h" 14 #include "logged_ops.h" 15 #include "movinggc.h" 16 #include "rebalance.h" 17 #include "recovery.h" 18 #include "recovery_passes.h" 19 #include "snapshot.h" 20 #include "subvolume.h" 21 #include "super.h" 22 #include "super-io.h" 23 24 const char * const bch2_recovery_passes[] = { 25 #define x(_fn, ...) #_fn, 26 BCH_RECOVERY_PASSES() 27 #undef x 28 NULL 29 }; 30 31 /* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */ 32 static int bch2_recovery_pass_empty(struct bch_fs *c) 33 { 34 return 0; 35 } 36 37 static int bch2_set_may_go_rw(struct bch_fs *c) 38 { 39 struct journal_keys *keys = &c->journal_keys; 40 41 /* 42 * After we go RW, the journal keys buffer can't be modified (except for 43 * setting journal_key->overwritten: it will be accessed by multiple 44 * threads 45 */ 46 move_gap(keys, keys->nr); 47 48 set_bit(BCH_FS_may_go_rw, &c->flags); 49 50 if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes) 51 return bch2_fs_read_write_early(c); 52 return 0; 53 } 54 55 struct recovery_pass_fn { 56 int (*fn)(struct bch_fs *); 57 unsigned when; 58 }; 59 60 static struct recovery_pass_fn recovery_pass_fns[] = { 61 #define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, 62 BCH_RECOVERY_PASSES() 63 #undef x 64 }; 65 66 static const u8 passes_to_stable_map[] = { 67 #define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, 68 BCH_RECOVERY_PASSES() 69 #undef x 70 }; 71 72 static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass) 73 { 74 return passes_to_stable_map[pass]; 75 } 76 77 u64 bch2_recovery_passes_to_stable(u64 v) 78 { 79 u64 ret = 0; 80 for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++) 81 if (v & BIT_ULL(i)) 82 ret |= BIT_ULL(passes_to_stable_map[i]); 83 return ret; 84 } 85 86 u64 bch2_recovery_passes_from_stable(u64 v) 87 { 88 static const u8 map[] = { 89 #define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n, 90 BCH_RECOVERY_PASSES() 91 #undef x 92 }; 93 94 u64 ret = 0; 95 for (unsigned i = 0; i < ARRAY_SIZE(map); i++) 96 if (v & BIT_ULL(i)) 97 ret |= BIT_ULL(map[i]); 98 return ret; 99 } 100 101 /* 102 * For when we need to rewind recovery passes and run a pass we skipped: 103 */ 104 static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, 105 enum bch_recovery_pass pass) 106 { 107 if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) 108 return -BCH_ERR_not_in_recovery; 109 110 if (c->recovery_passes_complete & BIT_ULL(pass)) 111 return 0; 112 113 bool print = !(c->opts.recovery_passes & BIT_ULL(pass)); 114 115 if (pass < BCH_RECOVERY_PASS_set_may_go_rw && 116 c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { 117 if (print) 118 bch_info(c, "need recovery pass %s (%u), but already rw", 119 bch2_recovery_passes[pass], pass); 120 return -BCH_ERR_cannot_rewind_recovery; 121 } 122 123 if (print) 124 bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", 125 bch2_recovery_passes[pass], pass, 126 bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); 127 128 c->opts.recovery_passes |= BIT_ULL(pass); 129 130 if (c->curr_recovery_pass > pass) { 131 c->next_recovery_pass = pass; 132 c->recovery_passes_complete &= (1ULL << pass) >> 1; 133 return -BCH_ERR_restart_recovery; 134 } else { 135 return 0; 136 } 137 } 138 139 int bch2_run_explicit_recovery_pass(struct bch_fs *c, 140 enum bch_recovery_pass pass) 141 { 142 unsigned long flags; 143 spin_lock_irqsave(&c->recovery_pass_lock, flags); 144 int ret = __bch2_run_explicit_recovery_pass(c, pass); 145 spin_unlock_irqrestore(&c->recovery_pass_lock, flags); 146 return ret; 147 } 148 149 int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, 150 enum bch_recovery_pass pass) 151 { 152 lockdep_assert_held(&c->sb_lock); 153 154 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 155 __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); 156 157 return bch2_run_explicit_recovery_pass(c, pass); 158 } 159 160 int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, 161 enum bch_recovery_pass pass) 162 { 163 enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass); 164 165 mutex_lock(&c->sb_lock); 166 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 167 168 if (!test_bit_le64(s, ext->recovery_passes_required)) { 169 __set_bit_le64(s, ext->recovery_passes_required); 170 bch2_write_super(c); 171 } 172 mutex_unlock(&c->sb_lock); 173 174 return bch2_run_explicit_recovery_pass(c, pass); 175 } 176 177 static void bch2_clear_recovery_pass_required(struct bch_fs *c, 178 enum bch_recovery_pass pass) 179 { 180 enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass); 181 182 mutex_lock(&c->sb_lock); 183 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 184 185 if (test_bit_le64(s, ext->recovery_passes_required)) { 186 __clear_bit_le64(s, ext->recovery_passes_required); 187 bch2_write_super(c); 188 } 189 mutex_unlock(&c->sb_lock); 190 } 191 192 u64 bch2_fsck_recovery_passes(void) 193 { 194 u64 ret = 0; 195 196 for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) 197 if (recovery_pass_fns[i].when & PASS_FSCK) 198 ret |= BIT_ULL(i); 199 return ret; 200 } 201 202 static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) 203 { 204 struct recovery_pass_fn *p = recovery_pass_fns + pass; 205 206 if (c->opts.recovery_passes_exclude & BIT_ULL(pass)) 207 return false; 208 if (c->opts.recovery_passes & BIT_ULL(pass)) 209 return true; 210 if ((p->when & PASS_FSCK) && c->opts.fsck) 211 return true; 212 if ((p->when & PASS_UNCLEAN) && !c->sb.clean) 213 return true; 214 if (p->when & PASS_ALWAYS) 215 return true; 216 return false; 217 } 218 219 static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) 220 { 221 struct recovery_pass_fn *p = recovery_pass_fns + pass; 222 int ret; 223 224 if (!(p->when & PASS_SILENT)) 225 bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), 226 bch2_recovery_passes[pass]); 227 ret = p->fn(c); 228 if (ret) 229 return ret; 230 if (!(p->when & PASS_SILENT)) 231 bch2_print(c, KERN_CONT " done\n"); 232 233 return 0; 234 } 235 236 int bch2_run_online_recovery_passes(struct bch_fs *c) 237 { 238 for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) { 239 struct recovery_pass_fn *p = recovery_pass_fns + i; 240 241 if (!(p->when & PASS_ONLINE)) 242 continue; 243 244 int ret = bch2_run_recovery_pass(c, i); 245 if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { 246 i = c->curr_recovery_pass; 247 continue; 248 } 249 if (ret) 250 return ret; 251 } 252 253 return 0; 254 } 255 256 int bch2_run_recovery_passes(struct bch_fs *c) 257 { 258 int ret = 0; 259 260 /* 261 * We can't allow set_may_go_rw to be excluded; that would cause us to 262 * use the journal replay keys for updates where it's not expected. 263 */ 264 c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; 265 266 spin_lock_irq(&c->recovery_pass_lock); 267 268 while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { 269 unsigned prev_done = c->recovery_pass_done; 270 unsigned pass = c->curr_recovery_pass; 271 272 c->next_recovery_pass = pass + 1; 273 274 if (c->opts.recovery_pass_last && 275 c->curr_recovery_pass > c->opts.recovery_pass_last) 276 break; 277 278 if (should_run_recovery_pass(c, pass)) { 279 spin_unlock_irq(&c->recovery_pass_lock); 280 ret = bch2_run_recovery_pass(c, pass) ?: 281 bch2_journal_flush(&c->journal); 282 283 if (!ret && !test_bit(BCH_FS_error, &c->flags)) 284 bch2_clear_recovery_pass_required(c, pass); 285 spin_lock_irq(&c->recovery_pass_lock); 286 287 if (c->next_recovery_pass < c->curr_recovery_pass) { 288 /* 289 * bch2_run_explicit_recovery_pass() was called: we 290 * can't always catch -BCH_ERR_restart_recovery because 291 * it may have been called from another thread (btree 292 * node read completion) 293 */ 294 ret = 0; 295 c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); 296 } else { 297 c->recovery_passes_complete |= BIT_ULL(pass); 298 c->recovery_pass_done = max(c->recovery_pass_done, pass); 299 } 300 } 301 302 c->curr_recovery_pass = c->next_recovery_pass; 303 304 if (prev_done <= BCH_RECOVERY_PASS_check_snapshots && 305 c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) { 306 bch2_copygc_wakeup(c); 307 bch2_rebalance_wakeup(c); 308 } 309 } 310 311 spin_unlock_irq(&c->recovery_pass_lock); 312 313 return ret; 314 } 315