1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "error.h" 4 #include "journal.h" 5 #include "recovery_passes.h" 6 #include "super.h" 7 #include "thread_with_file.h" 8 9 #define FSCK_ERR_RATELIMIT_NR 10 10 11 bool bch2_inconsistent_error(struct bch_fs *c) 12 { 13 set_bit(BCH_FS_error, &c->flags); 14 15 switch (c->opts.errors) { 16 case BCH_ON_ERROR_continue: 17 return false; 18 case BCH_ON_ERROR_fix_safe: 19 case BCH_ON_ERROR_ro: 20 if (bch2_fs_emergency_read_only(c)) 21 bch_err(c, "inconsistency detected - emergency read only at journal seq %llu", 22 journal_cur_seq(&c->journal)); 23 return true; 24 case BCH_ON_ERROR_panic: 25 panic(bch2_fmt(c, "panic after error")); 26 return true; 27 default: 28 BUG(); 29 } 30 } 31 32 int bch2_topology_error(struct bch_fs *c) 33 { 34 set_bit(BCH_FS_topology_error, &c->flags); 35 if (!test_bit(BCH_FS_fsck_running, &c->flags)) { 36 bch2_inconsistent_error(c); 37 return -BCH_ERR_btree_need_topology_repair; 38 } else { 39 return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: 40 -BCH_ERR_btree_node_read_validate_error; 41 } 42 } 43 44 void bch2_fatal_error(struct bch_fs *c) 45 { 46 if (bch2_fs_emergency_read_only(c)) 47 bch_err(c, "fatal error - emergency read only"); 48 } 49 50 void bch2_io_error_work(struct work_struct *work) 51 { 52 struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); 53 struct bch_fs *c = ca->fs; 54 bool dev; 55 56 down_write(&c->state_lock); 57 dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro, 58 BCH_FORCE_IF_DEGRADED); 59 if (dev 60 ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, 61 BCH_FORCE_IF_DEGRADED) 62 : bch2_fs_emergency_read_only(c)) 63 bch_err(ca, 64 "too many IO errors, setting %s RO", 65 dev ? "device" : "filesystem"); 66 up_write(&c->state_lock); 67 } 68 69 void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) 70 { 71 atomic64_inc(&ca->errors[type]); 72 //queue_work(system_long_wq, &ca->io_error_work); 73 } 74 75 enum ask_yn { 76 YN_NO, 77 YN_YES, 78 YN_ALLNO, 79 YN_ALLYES, 80 }; 81 82 static enum ask_yn parse_yn_response(char *buf) 83 { 84 buf = strim(buf); 85 86 if (strlen(buf) == 1) 87 switch (buf[0]) { 88 case 'n': 89 return YN_NO; 90 case 'y': 91 return YN_YES; 92 case 'N': 93 return YN_ALLNO; 94 case 'Y': 95 return YN_ALLYES; 96 } 97 return -1; 98 } 99 100 #ifdef __KERNEL__ 101 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) 102 { 103 struct stdio_redirect *stdio = c->stdio; 104 105 if (c->stdio_filter && c->stdio_filter != current) 106 stdio = NULL; 107 108 if (!stdio) 109 return YN_NO; 110 111 char buf[100]; 112 int ret; 113 114 do { 115 bch2_print(c, " (y,n, or Y,N for all errors of this type) "); 116 117 int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1); 118 if (r < 0) 119 return YN_NO; 120 buf[r] = '\0'; 121 } while ((ret = parse_yn_response(buf)) < 0); 122 123 return ret; 124 } 125 #else 126 127 #include "tools-util.h" 128 129 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) 130 { 131 char *buf = NULL; 132 size_t buflen = 0; 133 int ret; 134 135 do { 136 fputs(" (y,n, or Y,N for all errors of this type) ", stdout); 137 fflush(stdout); 138 139 if (getline(&buf, &buflen, stdin) < 0) 140 die("error reading from standard input"); 141 } while ((ret = parse_yn_response(buf)) < 0); 142 143 free(buf); 144 return ret; 145 } 146 147 #endif 148 149 static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) 150 { 151 struct fsck_err_state *s; 152 153 if (!test_bit(BCH_FS_fsck_running, &c->flags)) 154 return NULL; 155 156 list_for_each_entry(s, &c->fsck_error_msgs, list) 157 if (s->fmt == fmt) { 158 /* 159 * move it to the head of the list: repeated fsck errors 160 * are common 161 */ 162 list_move(&s->list, &c->fsck_error_msgs); 163 return s; 164 } 165 166 s = kzalloc(sizeof(*s), GFP_NOFS); 167 if (!s) { 168 if (!c->fsck_alloc_msgs_err) 169 bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); 170 c->fsck_alloc_msgs_err = true; 171 return NULL; 172 } 173 174 INIT_LIST_HEAD(&s->list); 175 s->fmt = fmt; 176 list_add(&s->list, &c->fsck_error_msgs); 177 return s; 178 } 179 180 /* s/fix?/fixing/ s/recreate?/recreating/ */ 181 static void prt_actioning(struct printbuf *out, const char *action) 182 { 183 unsigned len = strlen(action); 184 185 BUG_ON(action[len - 1] != '?'); 186 --len; 187 188 if (action[len - 1] == 'e') 189 --len; 190 191 prt_bytes(out, action, len); 192 prt_str(out, "ing"); 193 } 194 195 static const u8 fsck_flags_extra[] = { 196 #define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags, 197 BCH_SB_ERRS() 198 #undef x 199 }; 200 201 int bch2_fsck_err(struct bch_fs *c, 202 enum bch_fsck_flags flags, 203 enum bch_sb_error_id err, 204 const char *fmt, ...) 205 { 206 struct fsck_err_state *s = NULL; 207 va_list args; 208 bool print = true, suppressing = false, inconsistent = false; 209 struct printbuf buf = PRINTBUF, *out = &buf; 210 int ret = -BCH_ERR_fsck_ignore; 211 const char *action_orig = "fix?", *action = action_orig; 212 213 if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) 214 flags |= fsck_flags_extra[err]; 215 216 if ((flags & FSCK_CAN_FIX) && 217 test_bit(err, c->sb.errors_silent)) 218 return -BCH_ERR_fsck_fix; 219 220 bch2_sb_error_count(c, err); 221 222 va_start(args, fmt); 223 prt_vprintf(out, fmt, args); 224 va_end(args); 225 226 /* Custom fix/continue/recreate/etc.? */ 227 if (out->buf[out->pos - 1] == '?') { 228 const char *p = strrchr(out->buf, ','); 229 if (p) { 230 out->pos = p - out->buf; 231 action = kstrdup(p + 2, GFP_KERNEL); 232 if (!action) { 233 ret = -ENOMEM; 234 goto err; 235 } 236 } 237 } 238 239 mutex_lock(&c->fsck_error_msgs_lock); 240 s = fsck_err_get(c, fmt); 241 if (s) { 242 /* 243 * We may be called multiple times for the same error on 244 * transaction restart - this memoizes instead of asking the user 245 * multiple times for the same error: 246 */ 247 if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { 248 ret = s->ret; 249 mutex_unlock(&c->fsck_error_msgs_lock); 250 goto err; 251 } 252 253 kfree(s->last_msg); 254 s->last_msg = kstrdup(buf.buf, GFP_KERNEL); 255 if (!s->last_msg) { 256 mutex_unlock(&c->fsck_error_msgs_lock); 257 ret = -ENOMEM; 258 goto err; 259 } 260 261 if (c->opts.ratelimit_errors && 262 !(flags & FSCK_NO_RATELIMIT) && 263 s->nr >= FSCK_ERR_RATELIMIT_NR) { 264 if (s->nr == FSCK_ERR_RATELIMIT_NR) 265 suppressing = true; 266 else 267 print = false; 268 } 269 270 s->nr++; 271 } 272 273 #ifdef BCACHEFS_LOG_PREFIX 274 if (!strncmp(fmt, "bcachefs:", 9)) 275 prt_printf(out, bch2_log_msg(c, "")); 276 #endif 277 278 if ((flags & FSCK_CAN_FIX) && 279 (flags & FSCK_AUTOFIX) && 280 (c->opts.errors == BCH_ON_ERROR_continue || 281 c->opts.errors == BCH_ON_ERROR_fix_safe)) { 282 prt_str(out, ", "); 283 prt_actioning(out, action); 284 ret = -BCH_ERR_fsck_fix; 285 } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { 286 if (c->opts.errors != BCH_ON_ERROR_continue || 287 !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { 288 prt_str(out, ", shutting down"); 289 inconsistent = true; 290 ret = -BCH_ERR_fsck_errors_not_fixed; 291 } else if (flags & FSCK_CAN_FIX) { 292 prt_str(out, ", "); 293 prt_actioning(out, action); 294 ret = -BCH_ERR_fsck_fix; 295 } else { 296 prt_str(out, ", continuing"); 297 ret = -BCH_ERR_fsck_ignore; 298 } 299 } else if (c->opts.fix_errors == FSCK_FIX_exit) { 300 prt_str(out, ", exiting"); 301 ret = -BCH_ERR_fsck_errors_not_fixed; 302 } else if (flags & FSCK_CAN_FIX) { 303 int fix = s && s->fix 304 ? s->fix 305 : c->opts.fix_errors; 306 307 if (fix == FSCK_FIX_ask) { 308 prt_str(out, ", "); 309 prt_str(out, action); 310 311 if (bch2_fs_stdio_redirect(c)) 312 bch2_print(c, "%s", out->buf); 313 else 314 bch2_print_string_as_lines(KERN_ERR, out->buf); 315 print = false; 316 317 int ask = bch2_fsck_ask_yn(c); 318 319 if (ask >= YN_ALLNO && s) 320 s->fix = ask == YN_ALLNO 321 ? FSCK_FIX_no 322 : FSCK_FIX_yes; 323 324 ret = ask & 1 325 ? -BCH_ERR_fsck_fix 326 : -BCH_ERR_fsck_ignore; 327 } else if (fix == FSCK_FIX_yes || 328 (c->opts.nochanges && 329 !(flags & FSCK_CAN_IGNORE))) { 330 prt_str(out, ", "); 331 prt_actioning(out, action); 332 ret = -BCH_ERR_fsck_fix; 333 } else { 334 prt_str(out, ", not "); 335 prt_actioning(out, action); 336 } 337 } else if (flags & FSCK_NEED_FSCK) { 338 prt_str(out, " (run fsck to correct)"); 339 } else { 340 prt_str(out, " (repair unimplemented)"); 341 } 342 343 if (ret == -BCH_ERR_fsck_ignore && 344 (c->opts.fix_errors == FSCK_FIX_exit || 345 !(flags & FSCK_CAN_IGNORE))) 346 ret = -BCH_ERR_fsck_errors_not_fixed; 347 348 if (print) { 349 if (bch2_fs_stdio_redirect(c)) 350 bch2_print(c, "%s\n", out->buf); 351 else 352 bch2_print_string_as_lines(KERN_ERR, out->buf); 353 } 354 355 if (test_bit(BCH_FS_fsck_running, &c->flags) && 356 (ret != -BCH_ERR_fsck_fix && 357 ret != -BCH_ERR_fsck_ignore)) 358 bch_err(c, "Unable to continue, halting"); 359 else if (suppressing) 360 bch_err(c, "Ratelimiting new instances of previous error"); 361 362 if (s) 363 s->ret = ret; 364 365 mutex_unlock(&c->fsck_error_msgs_lock); 366 367 if (inconsistent) 368 bch2_inconsistent_error(c); 369 370 if (ret == -BCH_ERR_fsck_fix) { 371 set_bit(BCH_FS_errors_fixed, &c->flags); 372 } else { 373 set_bit(BCH_FS_errors_not_fixed, &c->flags); 374 set_bit(BCH_FS_error, &c->flags); 375 } 376 err: 377 if (action != action_orig) 378 kfree(action); 379 printbuf_exit(&buf); 380 return ret; 381 } 382 383 void bch2_flush_fsck_errs(struct bch_fs *c) 384 { 385 struct fsck_err_state *s, *n; 386 387 mutex_lock(&c->fsck_error_msgs_lock); 388 389 list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { 390 if (s->ratelimited && s->last_msg) 391 bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); 392 393 list_del(&s->list); 394 kfree(s->last_msg); 395 kfree(s); 396 } 397 398 mutex_unlock(&c->fsck_error_msgs_lock); 399 } 400