1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "error.h" 4 #include "journal.h" 5 #include "recovery_passes.h" 6 #include "super.h" 7 #include "thread_with_file.h" 8 9 #define FSCK_ERR_RATELIMIT_NR 10 10 11 bool bch2_inconsistent_error(struct bch_fs *c) 12 { 13 set_bit(BCH_FS_error, &c->flags); 14 15 switch (c->opts.errors) { 16 case BCH_ON_ERROR_continue: 17 return false; 18 case BCH_ON_ERROR_ro: 19 if (bch2_fs_emergency_read_only(c)) 20 bch_err(c, "inconsistency detected - emergency read only at journal seq %llu", 21 journal_cur_seq(&c->journal)); 22 return true; 23 case BCH_ON_ERROR_panic: 24 panic(bch2_fmt(c, "panic after error")); 25 return true; 26 default: 27 BUG(); 28 } 29 } 30 31 int bch2_topology_error(struct bch_fs *c) 32 { 33 set_bit(BCH_FS_topology_error, &c->flags); 34 if (!test_bit(BCH_FS_fsck_running, &c->flags)) { 35 bch2_inconsistent_error(c); 36 return -BCH_ERR_btree_need_topology_repair; 37 } else { 38 return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: 39 -BCH_ERR_btree_node_read_validate_error; 40 } 41 } 42 43 void bch2_fatal_error(struct bch_fs *c) 44 { 45 if (bch2_fs_emergency_read_only(c)) 46 bch_err(c, "fatal error - emergency read only"); 47 } 48 49 void bch2_io_error_work(struct work_struct *work) 50 { 51 struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); 52 struct bch_fs *c = ca->fs; 53 bool dev; 54 55 down_write(&c->state_lock); 56 dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro, 57 BCH_FORCE_IF_DEGRADED); 58 if (dev 59 ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, 60 BCH_FORCE_IF_DEGRADED) 61 : bch2_fs_emergency_read_only(c)) 62 bch_err(ca, 63 "too many IO errors, setting %s RO", 64 dev ? "device" : "filesystem"); 65 up_write(&c->state_lock); 66 } 67 68 void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) 69 { 70 atomic64_inc(&ca->errors[type]); 71 //queue_work(system_long_wq, &ca->io_error_work); 72 } 73 74 enum ask_yn { 75 YN_NO, 76 YN_YES, 77 YN_ALLNO, 78 YN_ALLYES, 79 }; 80 81 static enum ask_yn parse_yn_response(char *buf) 82 { 83 buf = strim(buf); 84 85 if (strlen(buf) == 1) 86 switch (buf[0]) { 87 case 'n': 88 return YN_NO; 89 case 'y': 90 return YN_YES; 91 case 'N': 92 return YN_ALLNO; 93 case 'Y': 94 return YN_ALLYES; 95 } 96 return -1; 97 } 98 99 #ifdef __KERNEL__ 100 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) 101 { 102 struct stdio_redirect *stdio = c->stdio; 103 104 if (c->stdio_filter && c->stdio_filter != current) 105 stdio = NULL; 106 107 if (!stdio) 108 return YN_NO; 109 110 char buf[100]; 111 int ret; 112 113 do { 114 bch2_print(c, " (y,n, or Y,N for all errors of this type) "); 115 116 int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1); 117 if (r < 0) 118 return YN_NO; 119 buf[r] = '\0'; 120 } while ((ret = parse_yn_response(buf)) < 0); 121 122 return ret; 123 } 124 #else 125 126 #include "tools-util.h" 127 128 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) 129 { 130 char *buf = NULL; 131 size_t buflen = 0; 132 int ret; 133 134 do { 135 fputs(" (y,n, or Y,N for all errors of this type) ", stdout); 136 fflush(stdout); 137 138 if (getline(&buf, &buflen, stdin) < 0) 139 die("error reading from standard input"); 140 } while ((ret = parse_yn_response(buf)) < 0); 141 142 free(buf); 143 return ret; 144 } 145 146 #endif 147 148 static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) 149 { 150 struct fsck_err_state *s; 151 152 if (!test_bit(BCH_FS_fsck_running, &c->flags)) 153 return NULL; 154 155 list_for_each_entry(s, &c->fsck_error_msgs, list) 156 if (s->fmt == fmt) { 157 /* 158 * move it to the head of the list: repeated fsck errors 159 * are common 160 */ 161 list_move(&s->list, &c->fsck_error_msgs); 162 return s; 163 } 164 165 s = kzalloc(sizeof(*s), GFP_NOFS); 166 if (!s) { 167 if (!c->fsck_alloc_msgs_err) 168 bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); 169 c->fsck_alloc_msgs_err = true; 170 return NULL; 171 } 172 173 INIT_LIST_HEAD(&s->list); 174 s->fmt = fmt; 175 list_add(&s->list, &c->fsck_error_msgs); 176 return s; 177 } 178 179 /* s/fix?/fixing/ s/recreate?/recreating/ */ 180 static void prt_actioning(struct printbuf *out, const char *action) 181 { 182 unsigned len = strlen(action); 183 184 BUG_ON(action[len - 1] != '?'); 185 --len; 186 187 if (action[len - 1] == 'e') 188 --len; 189 190 prt_bytes(out, action, len); 191 prt_str(out, "ing"); 192 } 193 194 int bch2_fsck_err(struct bch_fs *c, 195 enum bch_fsck_flags flags, 196 enum bch_sb_error_id err, 197 const char *fmt, ...) 198 { 199 struct fsck_err_state *s = NULL; 200 va_list args; 201 bool print = true, suppressing = false, inconsistent = false; 202 struct printbuf buf = PRINTBUF, *out = &buf; 203 int ret = -BCH_ERR_fsck_ignore; 204 const char *action_orig = "fix?", *action = action_orig; 205 206 if ((flags & FSCK_CAN_FIX) && 207 test_bit(err, c->sb.errors_silent)) 208 return -BCH_ERR_fsck_fix; 209 210 bch2_sb_error_count(c, err); 211 212 va_start(args, fmt); 213 prt_vprintf(out, fmt, args); 214 va_end(args); 215 216 /* Custom fix/continue/recreate/etc.? */ 217 if (out->buf[out->pos - 1] == '?') { 218 const char *p = strrchr(out->buf, ','); 219 if (p) { 220 out->pos = p - out->buf; 221 action = kstrdup(p + 2, GFP_KERNEL); 222 if (!action) { 223 ret = -ENOMEM; 224 goto err; 225 } 226 } 227 } 228 229 mutex_lock(&c->fsck_error_msgs_lock); 230 s = fsck_err_get(c, fmt); 231 if (s) { 232 /* 233 * We may be called multiple times for the same error on 234 * transaction restart - this memoizes instead of asking the user 235 * multiple times for the same error: 236 */ 237 if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { 238 ret = s->ret; 239 mutex_unlock(&c->fsck_error_msgs_lock); 240 goto err; 241 } 242 243 kfree(s->last_msg); 244 s->last_msg = kstrdup(buf.buf, GFP_KERNEL); 245 if (!s->last_msg) { 246 mutex_unlock(&c->fsck_error_msgs_lock); 247 ret = -ENOMEM; 248 goto err; 249 } 250 251 if (c->opts.ratelimit_errors && 252 !(flags & FSCK_NO_RATELIMIT) && 253 s->nr >= FSCK_ERR_RATELIMIT_NR) { 254 if (s->nr == FSCK_ERR_RATELIMIT_NR) 255 suppressing = true; 256 else 257 print = false; 258 } 259 260 s->nr++; 261 } 262 263 #ifdef BCACHEFS_LOG_PREFIX 264 if (!strncmp(fmt, "bcachefs:", 9)) 265 prt_printf(out, bch2_log_msg(c, "")); 266 #endif 267 268 if (!test_bit(BCH_FS_fsck_running, &c->flags)) { 269 if (c->opts.errors != BCH_ON_ERROR_continue || 270 !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { 271 prt_str(out, ", shutting down"); 272 inconsistent = true; 273 ret = -BCH_ERR_fsck_errors_not_fixed; 274 } else if (flags & FSCK_CAN_FIX) { 275 prt_str(out, ", "); 276 prt_actioning(out, action); 277 ret = -BCH_ERR_fsck_fix; 278 } else { 279 prt_str(out, ", continuing"); 280 ret = -BCH_ERR_fsck_ignore; 281 } 282 } else if (c->opts.fix_errors == FSCK_FIX_exit) { 283 prt_str(out, ", exiting"); 284 ret = -BCH_ERR_fsck_errors_not_fixed; 285 } else if (flags & FSCK_CAN_FIX) { 286 int fix = s && s->fix 287 ? s->fix 288 : c->opts.fix_errors; 289 290 if (fix == FSCK_FIX_ask) { 291 prt_str(out, ", "); 292 prt_str(out, action); 293 294 if (bch2_fs_stdio_redirect(c)) 295 bch2_print(c, "%s", out->buf); 296 else 297 bch2_print_string_as_lines(KERN_ERR, out->buf); 298 print = false; 299 300 int ask = bch2_fsck_ask_yn(c); 301 302 if (ask >= YN_ALLNO && s) 303 s->fix = ask == YN_ALLNO 304 ? FSCK_FIX_no 305 : FSCK_FIX_yes; 306 307 ret = ask & 1 308 ? -BCH_ERR_fsck_fix 309 : -BCH_ERR_fsck_ignore; 310 } else if (fix == FSCK_FIX_yes || 311 (c->opts.nochanges && 312 !(flags & FSCK_CAN_IGNORE))) { 313 prt_str(out, ", "); 314 prt_actioning(out, action); 315 ret = -BCH_ERR_fsck_fix; 316 } else { 317 prt_str(out, ", not "); 318 prt_actioning(out, action); 319 } 320 } else if (flags & FSCK_NEED_FSCK) { 321 prt_str(out, " (run fsck to correct)"); 322 } else { 323 prt_str(out, " (repair unimplemented)"); 324 } 325 326 if (ret == -BCH_ERR_fsck_ignore && 327 (c->opts.fix_errors == FSCK_FIX_exit || 328 !(flags & FSCK_CAN_IGNORE))) 329 ret = -BCH_ERR_fsck_errors_not_fixed; 330 331 if (print) { 332 if (bch2_fs_stdio_redirect(c)) 333 bch2_print(c, "%s\n", out->buf); 334 else 335 bch2_print_string_as_lines(KERN_ERR, out->buf); 336 } 337 338 if (test_bit(BCH_FS_fsck_running, &c->flags) && 339 (ret != -BCH_ERR_fsck_fix && 340 ret != -BCH_ERR_fsck_ignore)) 341 bch_err(c, "Unable to continue, halting"); 342 else if (suppressing) 343 bch_err(c, "Ratelimiting new instances of previous error"); 344 345 if (s) 346 s->ret = ret; 347 348 mutex_unlock(&c->fsck_error_msgs_lock); 349 350 if (inconsistent) 351 bch2_inconsistent_error(c); 352 353 if (ret == -BCH_ERR_fsck_fix) { 354 set_bit(BCH_FS_errors_fixed, &c->flags); 355 } else { 356 set_bit(BCH_FS_errors_not_fixed, &c->flags); 357 set_bit(BCH_FS_error, &c->flags); 358 } 359 err: 360 if (action != action_orig) 361 kfree(action); 362 printbuf_exit(&buf); 363 return ret; 364 } 365 366 void bch2_flush_fsck_errs(struct bch_fs *c) 367 { 368 struct fsck_err_state *s, *n; 369 370 mutex_lock(&c->fsck_error_msgs_lock); 371 372 list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { 373 if (s->ratelimited && s->last_msg) 374 bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); 375 376 list_del(&s->list); 377 kfree(s->last_msg); 378 kfree(s); 379 } 380 381 mutex_unlock(&c->fsck_error_msgs_lock); 382 } 383