1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "btree_cache.h" 4 #include "btree_iter.h" 5 #include "error.h" 6 #include "journal.h" 7 #include "namei.h" 8 #include "recovery_passes.h" 9 #include "super.h" 10 #include "thread_with_file.h" 11 12 #define FSCK_ERR_RATELIMIT_NR 10 13 14 void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out) 15 { 16 printbuf_indent_add_nextline(out, 2); 17 18 #ifdef BCACHEFS_LOG_PREFIX 19 prt_printf(out, bch2_log_msg(c, "")); 20 #endif 21 } 22 23 bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) 24 { 25 set_bit(BCH_FS_error, &c->flags); 26 27 switch (c->opts.errors) { 28 case BCH_ON_ERROR_continue: 29 return false; 30 case BCH_ON_ERROR_fix_safe: 31 case BCH_ON_ERROR_ro: 32 if (bch2_fs_emergency_read_only(c)) 33 prt_printf(out, "inconsistency detected - emergency read only at journal seq %llu\n", 34 journal_cur_seq(&c->journal)); 35 return true; 36 case BCH_ON_ERROR_panic: 37 bch2_print_string_as_lines_nonblocking(KERN_ERR, out->buf); 38 panic(bch2_fmt(c, "panic after error")); 39 return true; 40 default: 41 BUG(); 42 } 43 } 44 45 bool bch2_inconsistent_error(struct bch_fs *c) 46 { 47 struct printbuf buf = PRINTBUF; 48 buf.atomic++; 49 50 printbuf_indent_add_nextline(&buf, 2); 51 52 bool ret = __bch2_inconsistent_error(c, &buf); 53 if (ret) 54 bch_err(c, "%s", buf.buf); 55 printbuf_exit(&buf); 56 return ret; 57 } 58 59 __printf(3, 0) 60 static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *trans, 61 const char *fmt, va_list args) 62 { 63 struct printbuf buf = PRINTBUF; 64 buf.atomic++; 65 66 bch2_log_msg_start(c, &buf); 67 68 prt_vprintf(&buf, fmt, args); 69 prt_newline(&buf); 70 71 if (trans) 72 bch2_trans_updates_to_text(&buf, trans); 73 bool ret = __bch2_inconsistent_error(c, &buf); 74 bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf); 75 76 printbuf_exit(&buf); 77 return ret; 78 } 79 80 bool bch2_fs_inconsistent(struct bch_fs *c, const char *fmt, ...) 81 { 82 va_list args; 83 va_start(args, fmt); 84 bool ret = bch2_fs_trans_inconsistent(c, NULL, fmt, args); 85 va_end(args); 86 return ret; 87 } 88 89 bool bch2_trans_inconsistent(struct btree_trans *trans, const char *fmt, ...) 90 { 91 va_list args; 92 va_start(args, fmt); 93 bool ret = bch2_fs_trans_inconsistent(trans->c, trans, fmt, args); 94 va_end(args); 95 return ret; 96 } 97 98 int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) 99 { 100 prt_printf(out, "btree topology error: "); 101 102 set_bit(BCH_FS_topology_error, &c->flags); 103 if (!test_bit(BCH_FS_recovery_running, &c->flags)) { 104 __bch2_inconsistent_error(c, out); 105 return -BCH_ERR_btree_need_topology_repair; 106 } else { 107 return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: 108 -BCH_ERR_btree_node_read_validate_error; 109 } 110 } 111 112 int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) 113 { 114 struct printbuf buf = PRINTBUF; 115 116 bch2_log_msg_start(c, &buf); 117 118 va_list args; 119 va_start(args, fmt); 120 prt_vprintf(&buf, fmt, args); 121 va_end(args); 122 123 int ret = __bch2_topology_error(c, &buf); 124 bch2_print_string_as_lines(KERN_ERR, buf.buf); 125 126 printbuf_exit(&buf); 127 return ret; 128 } 129 130 void bch2_fatal_error(struct bch_fs *c) 131 { 132 if (bch2_fs_emergency_read_only(c)) 133 bch_err(c, "fatal error - emergency read only"); 134 } 135 136 void bch2_io_error_work(struct work_struct *work) 137 { 138 struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); 139 struct bch_fs *c = ca->fs; 140 141 /* XXX: if it's reads or checksums that are failing, set it to failed */ 142 143 down_write(&c->state_lock); 144 unsigned long write_errors_start = READ_ONCE(ca->write_errors_start); 145 146 if (write_errors_start && 147 time_after(jiffies, 148 write_errors_start + c->opts.write_error_timeout * HZ)) { 149 if (ca->mi.state >= BCH_MEMBER_STATE_ro) 150 goto out; 151 152 bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, 153 BCH_FORCE_IF_DEGRADED); 154 155 bch_err(ca, 156 "writes erroring for %u seconds, setting %s ro", 157 c->opts.write_error_timeout, 158 dev ? "device" : "filesystem"); 159 if (!dev) 160 bch2_fs_emergency_read_only(c); 161 162 } 163 out: 164 up_write(&c->state_lock); 165 } 166 167 void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) 168 { 169 atomic64_inc(&ca->errors[type]); 170 171 if (type == BCH_MEMBER_ERROR_write && !ca->write_errors_start) 172 ca->write_errors_start = jiffies; 173 174 queue_work(system_long_wq, &ca->io_error_work); 175 } 176 177 enum ask_yn { 178 YN_NO, 179 YN_YES, 180 YN_ALLNO, 181 YN_ALLYES, 182 }; 183 184 static enum ask_yn parse_yn_response(char *buf) 185 { 186 buf = strim(buf); 187 188 if (strlen(buf) == 1) 189 switch (buf[0]) { 190 case 'n': 191 return YN_NO; 192 case 'y': 193 return YN_YES; 194 case 'N': 195 return YN_ALLNO; 196 case 'Y': 197 return YN_ALLYES; 198 } 199 return -1; 200 } 201 202 #ifdef __KERNEL__ 203 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans) 204 { 205 struct stdio_redirect *stdio = c->stdio; 206 207 if (c->stdio_filter && c->stdio_filter != current) 208 stdio = NULL; 209 210 if (!stdio) 211 return YN_NO; 212 213 if (trans) 214 bch2_trans_unlock(trans); 215 216 unsigned long unlock_long_at = trans ? jiffies + HZ * 2 : 0; 217 darray_char line = {}; 218 int ret; 219 220 do { 221 unsigned long t; 222 bch2_print(c, " (y,n, or Y,N for all errors of this type) "); 223 rewait: 224 t = unlock_long_at 225 ? max_t(long, unlock_long_at - jiffies, 0) 226 : MAX_SCHEDULE_TIMEOUT; 227 228 int r = bch2_stdio_redirect_readline_timeout(stdio, &line, t); 229 if (r == -ETIME) { 230 bch2_trans_unlock_long(trans); 231 unlock_long_at = 0; 232 goto rewait; 233 } 234 235 if (r < 0) { 236 ret = YN_NO; 237 break; 238 } 239 240 darray_last(line) = '\0'; 241 } while ((ret = parse_yn_response(line.data)) < 0); 242 243 darray_exit(&line); 244 return ret; 245 } 246 #else 247 248 #include "tools-util.h" 249 250 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans) 251 { 252 char *buf = NULL; 253 size_t buflen = 0; 254 int ret; 255 256 do { 257 fputs(" (y,n, or Y,N for all errors of this type) ", stdout); 258 fflush(stdout); 259 260 if (getline(&buf, &buflen, stdin) < 0) 261 die("error reading from standard input"); 262 } while ((ret = parse_yn_response(buf)) < 0); 263 264 free(buf); 265 return ret; 266 } 267 268 #endif 269 270 static struct fsck_err_state *fsck_err_get(struct bch_fs *c, 271 enum bch_sb_error_id id) 272 { 273 struct fsck_err_state *s; 274 275 if (!test_bit(BCH_FS_fsck_running, &c->flags)) 276 return NULL; 277 278 list_for_each_entry(s, &c->fsck_error_msgs, list) 279 if (s->id == id) { 280 /* 281 * move it to the head of the list: repeated fsck errors 282 * are common 283 */ 284 list_move(&s->list, &c->fsck_error_msgs); 285 return s; 286 } 287 288 s = kzalloc(sizeof(*s), GFP_NOFS); 289 if (!s) { 290 if (!c->fsck_alloc_msgs_err) 291 bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); 292 c->fsck_alloc_msgs_err = true; 293 return NULL; 294 } 295 296 INIT_LIST_HEAD(&s->list); 297 s->id = id; 298 list_add(&s->list, &c->fsck_error_msgs); 299 return s; 300 } 301 302 /* s/fix?/fixing/ s/recreate?/recreating/ */ 303 static void prt_actioning(struct printbuf *out, const char *action) 304 { 305 unsigned len = strlen(action); 306 307 BUG_ON(action[len - 1] != '?'); 308 --len; 309 310 if (action[len - 1] == 'e') 311 --len; 312 313 prt_bytes(out, action, len); 314 prt_str(out, "ing"); 315 } 316 317 static const u8 fsck_flags_extra[] = { 318 #define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags, 319 BCH_SB_ERRS() 320 #undef x 321 }; 322 323 static int do_fsck_ask_yn(struct bch_fs *c, 324 struct btree_trans *trans, 325 struct printbuf *question, 326 const char *action) 327 { 328 prt_str(question, ", "); 329 prt_str(question, action); 330 331 if (bch2_fs_stdio_redirect(c)) 332 bch2_print(c, "%s", question->buf); 333 else 334 bch2_print_string_as_lines(KERN_ERR, question->buf); 335 336 int ask = bch2_fsck_ask_yn(c, trans); 337 338 if (trans) { 339 int ret = bch2_trans_relock(trans); 340 if (ret) 341 return ret; 342 } 343 344 return ask; 345 } 346 347 static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c, 348 enum bch_sb_error_id id, const char *msg, 349 bool *repeat, bool *print, bool *suppress) 350 { 351 bch2_sb_error_count(c, id); 352 353 struct fsck_err_state *s = fsck_err_get(c, id); 354 if (s) { 355 /* 356 * We may be called multiple times for the same error on 357 * transaction restart - this memoizes instead of asking the user 358 * multiple times for the same error: 359 */ 360 if (s->last_msg && !strcmp(msg, s->last_msg)) { 361 *repeat = true; 362 *print = false; 363 return s; 364 } 365 366 kfree(s->last_msg); 367 s->last_msg = kstrdup(msg, GFP_KERNEL); 368 369 if (c->opts.ratelimit_errors && 370 s->nr >= FSCK_ERR_RATELIMIT_NR) { 371 if (s->nr == FSCK_ERR_RATELIMIT_NR) 372 *suppress = true; 373 else 374 *print = false; 375 } 376 377 s->nr++; 378 } 379 return s; 380 } 381 382 void __bch2_count_fsck_err(struct bch_fs *c, 383 enum bch_sb_error_id id, const char *msg, 384 bool *repeat, bool *print, bool *suppress) 385 { 386 bch2_sb_error_count(c, id); 387 388 mutex_lock(&c->fsck_error_msgs_lock); 389 count_fsck_err_locked(c, id, msg, repeat, print, suppress); 390 mutex_unlock(&c->fsck_error_msgs_lock); 391 } 392 393 int __bch2_fsck_err(struct bch_fs *c, 394 struct btree_trans *trans, 395 enum bch_fsck_flags flags, 396 enum bch_sb_error_id err, 397 const char *fmt, ...) 398 { 399 va_list args; 400 struct printbuf buf = PRINTBUF, *out = &buf; 401 int ret = -BCH_ERR_fsck_ignore; 402 const char *action_orig = "fix?", *action = action_orig; 403 404 might_sleep(); 405 406 if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) 407 flags |= fsck_flags_extra[err]; 408 409 if (!c) 410 c = trans->c; 411 412 /* 413 * Ugly: if there's a transaction in the current task it has to be 414 * passed in to unlock if we prompt for user input. 415 * 416 * But, plumbing a transaction and transaction restarts into 417 * bkey_validate() is problematic. 418 * 419 * So: 420 * - make all bkey errors AUTOFIX, they're simple anyways (we just 421 * delete the key) 422 * - and we don't need to warn if we're not prompting 423 */ 424 WARN_ON((flags & FSCK_CAN_FIX) && 425 !(flags & FSCK_AUTOFIX) && 426 !trans && 427 bch2_current_has_btree_trans(c)); 428 429 if (test_bit(err, c->sb.errors_silent)) 430 return flags & FSCK_CAN_FIX 431 ? -BCH_ERR_fsck_fix 432 : -BCH_ERR_fsck_ignore; 433 434 printbuf_indent_add_nextline(out, 2); 435 436 #ifdef BCACHEFS_LOG_PREFIX 437 if (strncmp(fmt, "bcachefs", 8)) 438 prt_printf(out, bch2_log_msg(c, "")); 439 #endif 440 441 va_start(args, fmt); 442 prt_vprintf(out, fmt, args); 443 va_end(args); 444 445 /* Custom fix/continue/recreate/etc.? */ 446 if (out->buf[out->pos - 1] == '?') { 447 const char *p = strrchr(out->buf, ','); 448 if (p) { 449 out->pos = p - out->buf; 450 action = kstrdup(p + 2, GFP_KERNEL); 451 if (!action) { 452 ret = -ENOMEM; 453 goto err; 454 } 455 } 456 } 457 458 mutex_lock(&c->fsck_error_msgs_lock); 459 bool repeat = false, print = true, suppress = false; 460 bool inconsistent = false, exiting = false; 461 struct fsck_err_state *s = 462 count_fsck_err_locked(c, err, buf.buf, &repeat, &print, &suppress); 463 if (repeat) { 464 ret = s->ret; 465 goto err_unlock; 466 } 467 468 if ((flags & FSCK_AUTOFIX) && 469 (c->opts.errors == BCH_ON_ERROR_continue || 470 c->opts.errors == BCH_ON_ERROR_fix_safe)) { 471 prt_str(out, ", "); 472 if (flags & FSCK_CAN_FIX) { 473 prt_actioning(out, action); 474 ret = -BCH_ERR_fsck_fix; 475 } else { 476 prt_str(out, ", continuing"); 477 ret = -BCH_ERR_fsck_ignore; 478 } 479 480 goto print; 481 } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { 482 if (c->opts.errors != BCH_ON_ERROR_continue || 483 !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { 484 prt_str(out, ", shutting down"); 485 inconsistent = true; 486 print = true; 487 ret = -BCH_ERR_fsck_errors_not_fixed; 488 } else if (flags & FSCK_CAN_FIX) { 489 prt_str(out, ", "); 490 prt_actioning(out, action); 491 ret = -BCH_ERR_fsck_fix; 492 } else { 493 prt_str(out, ", continuing"); 494 ret = -BCH_ERR_fsck_ignore; 495 } 496 } else if (c->opts.fix_errors == FSCK_FIX_exit) { 497 prt_str(out, ", exiting"); 498 ret = -BCH_ERR_fsck_errors_not_fixed; 499 } else if (flags & FSCK_CAN_FIX) { 500 int fix = s && s->fix 501 ? s->fix 502 : c->opts.fix_errors; 503 504 if (fix == FSCK_FIX_ask) { 505 print = false; 506 507 ret = do_fsck_ask_yn(c, trans, out, action); 508 if (ret < 0) 509 goto err_unlock; 510 511 if (ret >= YN_ALLNO && s) 512 s->fix = ret == YN_ALLNO 513 ? FSCK_FIX_no 514 : FSCK_FIX_yes; 515 516 ret = ret & 1 517 ? -BCH_ERR_fsck_fix 518 : -BCH_ERR_fsck_ignore; 519 } else if (fix == FSCK_FIX_yes || 520 (c->opts.nochanges && 521 !(flags & FSCK_CAN_IGNORE))) { 522 prt_str(out, ", "); 523 prt_actioning(out, action); 524 ret = -BCH_ERR_fsck_fix; 525 } else { 526 prt_str(out, ", not "); 527 prt_actioning(out, action); 528 } 529 } else if (!(flags & FSCK_CAN_IGNORE)) { 530 prt_str(out, " (repair unimplemented)"); 531 } 532 533 if (ret == -BCH_ERR_fsck_ignore && 534 (c->opts.fix_errors == FSCK_FIX_exit || 535 !(flags & FSCK_CAN_IGNORE))) 536 ret = -BCH_ERR_fsck_errors_not_fixed; 537 538 if (test_bit(BCH_FS_fsck_running, &c->flags) && 539 (ret != -BCH_ERR_fsck_fix && 540 ret != -BCH_ERR_fsck_ignore)) { 541 exiting = true; 542 print = true; 543 } 544 print: 545 prt_newline(out); 546 547 if (inconsistent) 548 __bch2_inconsistent_error(c, out); 549 else if (exiting) 550 prt_printf(out, "Unable to continue, halting\n"); 551 else if (suppress) 552 prt_printf(out, "Ratelimiting new instances of previous error\n"); 553 554 if (print) { 555 /* possibly strip an empty line, from printbuf_indent_add */ 556 while (out->pos && out->buf[out->pos - 1] == ' ') 557 --out->pos; 558 printbuf_nul_terminate(out); 559 560 if (bch2_fs_stdio_redirect(c)) 561 bch2_print(c, "%s", out->buf); 562 else 563 bch2_print_string_as_lines(KERN_ERR, out->buf); 564 } 565 566 if (s) 567 s->ret = ret; 568 569 /* 570 * We don't yet track whether the filesystem currently has errors, for 571 * log_fsck_err()s: that would require us to track for every error type 572 * which recovery pass corrects it, to get the fsck exit status correct: 573 */ 574 if (flags & FSCK_CAN_FIX) { 575 if (ret == -BCH_ERR_fsck_fix) { 576 set_bit(BCH_FS_errors_fixed, &c->flags); 577 } else { 578 set_bit(BCH_FS_errors_not_fixed, &c->flags); 579 set_bit(BCH_FS_error, &c->flags); 580 } 581 } 582 err_unlock: 583 mutex_unlock(&c->fsck_error_msgs_lock); 584 err: 585 if (action != action_orig) 586 kfree(action); 587 printbuf_exit(&buf); 588 return ret; 589 } 590 591 static const char * const bch2_bkey_validate_contexts[] = { 592 #define x(n) #n, 593 BKEY_VALIDATE_CONTEXTS() 594 #undef x 595 NULL 596 }; 597 598 int __bch2_bkey_fsck_err(struct bch_fs *c, 599 struct bkey_s_c k, 600 struct bkey_validate_context from, 601 enum bch_sb_error_id err, 602 const char *fmt, ...) 603 { 604 if (from.flags & BCH_VALIDATE_silent) 605 return -BCH_ERR_fsck_delete_bkey; 606 607 unsigned fsck_flags = 0; 608 if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) { 609 if (test_bit(err, c->sb.errors_silent)) 610 return -BCH_ERR_fsck_delete_bkey; 611 612 fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; 613 } 614 if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) 615 fsck_flags |= fsck_flags_extra[err]; 616 617 struct printbuf buf = PRINTBUF; 618 prt_printf(&buf, "invalid bkey in %s", 619 bch2_bkey_validate_contexts[from.from]); 620 621 if (from.from == BKEY_VALIDATE_journal) 622 prt_printf(&buf, " journal seq=%llu offset=%u", 623 from.journal_seq, from.journal_offset); 624 625 prt_str(&buf, " btree="); 626 bch2_btree_id_to_text(&buf, from.btree); 627 prt_printf(&buf, " level=%u: ", from.level); 628 629 bch2_bkey_val_to_text(&buf, c, k); 630 prt_newline(&buf); 631 632 va_list args; 633 va_start(args, fmt); 634 prt_vprintf(&buf, fmt, args); 635 va_end(args); 636 637 int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s, delete?", buf.buf); 638 printbuf_exit(&buf); 639 return ret; 640 } 641 642 void bch2_flush_fsck_errs(struct bch_fs *c) 643 { 644 struct fsck_err_state *s, *n; 645 646 mutex_lock(&c->fsck_error_msgs_lock); 647 648 list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { 649 if (s->ratelimited && s->last_msg) 650 bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); 651 652 list_del(&s->list); 653 kfree(s->last_msg); 654 kfree(s); 655 } 656 657 mutex_unlock(&c->fsck_error_msgs_lock); 658 } 659 660 int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, 661 subvol_inum inum, u64 offset) 662 { 663 u32 restart_count = trans->restart_count; 664 int ret = 0; 665 666 if (inum.subvol) { 667 ret = bch2_inum_to_path(trans, inum, out); 668 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 669 return ret; 670 } 671 if (!inum.subvol || ret) 672 prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum); 673 prt_printf(out, " offset %llu: ", offset); 674 675 return trans_was_restarted(trans, restart_count); 676 } 677 678 void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out, 679 subvol_inum inum, u64 offset) 680 { 681 bch2_trans_do(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); 682 } 683 684 int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, 685 struct bpos pos) 686 { 687 struct bch_fs *c = trans->c; 688 int ret = 0; 689 690 if (!bch2_snapshot_is_leaf(c, pos.snapshot)) 691 prt_str(out, "(multiple snapshots) "); 692 693 subvol_inum inum = { 694 .subvol = bch2_snapshot_tree_oldest_subvol(c, pos.snapshot), 695 .inum = pos.inode, 696 }; 697 698 if (inum.subvol) { 699 ret = bch2_inum_to_path(trans, inum, out); 700 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 701 return ret; 702 } 703 704 if (!inum.subvol || ret) 705 prt_printf(out, "inum %llu:%u", pos.inode, pos.snapshot); 706 707 prt_printf(out, " offset %llu: ", pos.offset << 8); 708 return 0; 709 } 710 711 void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out, 712 struct bpos pos) 713 { 714 bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos)); 715 } 716