1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "btree_gc.h" 7 #include "btree_node_scan.h" 8 #include "disk_accounting.h" 9 #include "ec.h" 10 #include "fsck.h" 11 #include "inode.h" 12 #include "journal.h" 13 #include "lru.h" 14 #include "logged_ops.h" 15 #include "rebalance.h" 16 #include "recovery.h" 17 #include "recovery_passes.h" 18 #include "snapshot.h" 19 #include "subvolume.h" 20 #include "super.h" 21 #include "super-io.h" 22 23 const char * const bch2_recovery_passes[] = { 24 #define x(_fn, ...) #_fn, 25 BCH_RECOVERY_PASSES() 26 #undef x 27 NULL 28 }; 29 30 /* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */ 31 static int bch2_recovery_pass_empty(struct bch_fs *c) 32 { 33 return 0; 34 } 35 36 static int bch2_set_may_go_rw(struct bch_fs *c) 37 { 38 struct journal_keys *keys = &c->journal_keys; 39 40 /* 41 * After we go RW, the journal keys buffer can't be modified (except for 42 * setting journal_key->overwritten: it will be accessed by multiple 43 * threads 44 */ 45 move_gap(keys, keys->nr); 46 47 set_bit(BCH_FS_may_go_rw, &c->flags); 48 49 if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes) 50 return bch2_fs_read_write_early(c); 51 return 0; 52 } 53 54 struct recovery_pass_fn { 55 int (*fn)(struct bch_fs *); 56 unsigned when; 57 }; 58 59 static struct recovery_pass_fn recovery_pass_fns[] = { 60 #define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, 61 BCH_RECOVERY_PASSES() 62 #undef x 63 }; 64 65 static const u8 passes_to_stable_map[] = { 66 #define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, 67 BCH_RECOVERY_PASSES() 68 #undef x 69 }; 70 71 static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass) 72 { 73 return passes_to_stable_map[pass]; 74 } 75 76 u64 bch2_recovery_passes_to_stable(u64 v) 77 { 78 u64 ret = 0; 79 for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++) 80 if (v & BIT_ULL(i)) 81 ret |= BIT_ULL(passes_to_stable_map[i]); 82 return ret; 83 } 84 85 u64 bch2_recovery_passes_from_stable(u64 v) 86 { 87 static const u8 map[] = { 88 #define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n, 89 BCH_RECOVERY_PASSES() 90 #undef x 91 }; 92 93 u64 ret = 0; 94 for (unsigned i = 0; i < ARRAY_SIZE(map); i++) 95 if (v & BIT_ULL(i)) 96 ret |= BIT_ULL(map[i]); 97 return ret; 98 } 99 100 /* 101 * For when we need to rewind recovery passes and run a pass we skipped: 102 */ 103 static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, 104 enum bch_recovery_pass pass) 105 { 106 if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) 107 return -BCH_ERR_not_in_recovery; 108 109 if (c->recovery_passes_complete & BIT_ULL(pass)) 110 return 0; 111 112 bool print = !(c->opts.recovery_passes & BIT_ULL(pass)); 113 114 if (pass < BCH_RECOVERY_PASS_set_may_go_rw && 115 c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { 116 if (print) 117 bch_info(c, "need recovery pass %s (%u), but already rw", 118 bch2_recovery_passes[pass], pass); 119 return -BCH_ERR_cannot_rewind_recovery; 120 } 121 122 if (print) 123 bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", 124 bch2_recovery_passes[pass], pass, 125 bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); 126 127 c->opts.recovery_passes |= BIT_ULL(pass); 128 129 if (c->curr_recovery_pass > pass) { 130 c->next_recovery_pass = pass; 131 c->recovery_passes_complete &= (1ULL << pass) >> 1; 132 return -BCH_ERR_restart_recovery; 133 } else { 134 return 0; 135 } 136 } 137 138 int bch2_run_explicit_recovery_pass(struct bch_fs *c, 139 enum bch_recovery_pass pass) 140 { 141 unsigned long flags; 142 spin_lock_irqsave(&c->recovery_pass_lock, flags); 143 int ret = __bch2_run_explicit_recovery_pass(c, pass); 144 spin_unlock_irqrestore(&c->recovery_pass_lock, flags); 145 return ret; 146 } 147 148 int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, 149 enum bch_recovery_pass pass) 150 { 151 lockdep_assert_held(&c->sb_lock); 152 153 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 154 __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); 155 156 return bch2_run_explicit_recovery_pass(c, pass); 157 } 158 159 int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, 160 enum bch_recovery_pass pass) 161 { 162 enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass); 163 164 mutex_lock(&c->sb_lock); 165 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 166 167 if (!test_bit_le64(s, ext->recovery_passes_required)) { 168 __set_bit_le64(s, ext->recovery_passes_required); 169 bch2_write_super(c); 170 } 171 mutex_unlock(&c->sb_lock); 172 173 return bch2_run_explicit_recovery_pass(c, pass); 174 } 175 176 static void bch2_clear_recovery_pass_required(struct bch_fs *c, 177 enum bch_recovery_pass pass) 178 { 179 enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass); 180 181 mutex_lock(&c->sb_lock); 182 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 183 184 if (test_bit_le64(s, ext->recovery_passes_required)) { 185 __clear_bit_le64(s, ext->recovery_passes_required); 186 bch2_write_super(c); 187 } 188 mutex_unlock(&c->sb_lock); 189 } 190 191 u64 bch2_fsck_recovery_passes(void) 192 { 193 u64 ret = 0; 194 195 for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) 196 if (recovery_pass_fns[i].when & PASS_FSCK) 197 ret |= BIT_ULL(i); 198 return ret; 199 } 200 201 static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) 202 { 203 struct recovery_pass_fn *p = recovery_pass_fns + pass; 204 205 if (c->opts.recovery_passes_exclude & BIT_ULL(pass)) 206 return false; 207 if (c->opts.recovery_passes & BIT_ULL(pass)) 208 return true; 209 if ((p->when & PASS_FSCK) && c->opts.fsck) 210 return true; 211 if ((p->when & PASS_UNCLEAN) && !c->sb.clean) 212 return true; 213 if (p->when & PASS_ALWAYS) 214 return true; 215 return false; 216 } 217 218 static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) 219 { 220 struct recovery_pass_fn *p = recovery_pass_fns + pass; 221 int ret; 222 223 if (!(p->when & PASS_SILENT)) 224 bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), 225 bch2_recovery_passes[pass]); 226 ret = p->fn(c); 227 if (ret) 228 return ret; 229 if (!(p->when & PASS_SILENT)) 230 bch2_print(c, KERN_CONT " done\n"); 231 232 return 0; 233 } 234 235 int bch2_run_online_recovery_passes(struct bch_fs *c) 236 { 237 int ret = 0; 238 239 down_read(&c->state_lock); 240 241 for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) { 242 struct recovery_pass_fn *p = recovery_pass_fns + i; 243 244 if (!(p->when & PASS_ONLINE)) 245 continue; 246 247 ret = bch2_run_recovery_pass(c, i); 248 if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { 249 i = c->curr_recovery_pass; 250 continue; 251 } 252 if (ret) 253 break; 254 } 255 256 up_read(&c->state_lock); 257 258 return ret; 259 } 260 261 int bch2_run_recovery_passes(struct bch_fs *c) 262 { 263 int ret = 0; 264 265 /* 266 * We can't allow set_may_go_rw to be excluded; that would cause us to 267 * use the journal replay keys for updates where it's not expected. 268 */ 269 c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; 270 271 while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { 272 c->next_recovery_pass = c->curr_recovery_pass + 1; 273 274 spin_lock_irq(&c->recovery_pass_lock); 275 unsigned pass = c->curr_recovery_pass; 276 277 if (c->opts.recovery_pass_last && 278 c->curr_recovery_pass > c->opts.recovery_pass_last) { 279 spin_unlock_irq(&c->recovery_pass_lock); 280 break; 281 } 282 283 if (!should_run_recovery_pass(c, pass)) { 284 c->curr_recovery_pass++; 285 c->recovery_pass_done = max(c->recovery_pass_done, pass); 286 spin_unlock_irq(&c->recovery_pass_lock); 287 continue; 288 } 289 spin_unlock_irq(&c->recovery_pass_lock); 290 291 ret = bch2_run_recovery_pass(c, pass) ?: 292 bch2_journal_flush(&c->journal); 293 294 if (!ret && !test_bit(BCH_FS_error, &c->flags)) 295 bch2_clear_recovery_pass_required(c, pass); 296 297 spin_lock_irq(&c->recovery_pass_lock); 298 if (c->next_recovery_pass < c->curr_recovery_pass) { 299 /* 300 * bch2_run_explicit_recovery_pass() was called: we 301 * can't always catch -BCH_ERR_restart_recovery because 302 * it may have been called from another thread (btree 303 * node read completion) 304 */ 305 ret = 0; 306 c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); 307 } else { 308 c->recovery_passes_complete |= BIT_ULL(pass); 309 c->recovery_pass_done = max(c->recovery_pass_done, pass); 310 } 311 c->curr_recovery_pass = c->next_recovery_pass; 312 spin_unlock_irq(&c->recovery_pass_lock); 313 } 314 315 return ret; 316 } 317