1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "error.h" 4 #include "journal.h" 5 #include "recovery_passes.h" 6 #include "super.h" 7 #include "thread_with_file.h" 8 9 #define FSCK_ERR_RATELIMIT_NR 10 10 11 bool bch2_inconsistent_error(struct bch_fs *c) 12 { 13 set_bit(BCH_FS_error, &c->flags); 14 15 switch (c->opts.errors) { 16 case BCH_ON_ERROR_continue: 17 return false; 18 case BCH_ON_ERROR_ro: 19 if (bch2_fs_emergency_read_only(c)) 20 bch_err(c, "inconsistency detected - emergency read only at journal seq %llu", 21 journal_cur_seq(&c->journal)); 22 return true; 23 case BCH_ON_ERROR_panic: 24 panic(bch2_fmt(c, "panic after error")); 25 return true; 26 default: 27 BUG(); 28 } 29 } 30 31 int bch2_topology_error(struct bch_fs *c) 32 { 33 set_bit(BCH_FS_topology_error, &c->flags); 34 if (!test_bit(BCH_FS_fsck_running, &c->flags)) { 35 bch2_inconsistent_error(c); 36 return -BCH_ERR_btree_need_topology_repair; 37 } else { 38 return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: 39 -BCH_ERR_btree_node_read_validate_error; 40 } 41 } 42 43 void bch2_fatal_error(struct bch_fs *c) 44 { 45 if (bch2_fs_emergency_read_only(c)) 46 bch_err(c, "fatal error - emergency read only"); 47 } 48 49 void bch2_io_error_work(struct work_struct *work) 50 { 51 struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); 52 struct bch_fs *c = ca->fs; 53 bool dev; 54 55 down_write(&c->state_lock); 56 dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro, 57 BCH_FORCE_IF_DEGRADED); 58 if (dev 59 ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, 60 BCH_FORCE_IF_DEGRADED) 61 : bch2_fs_emergency_read_only(c)) 62 bch_err(ca, 63 "too many IO errors, setting %s RO", 64 dev ? "device" : "filesystem"); 65 up_write(&c->state_lock); 66 } 67 68 void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) 69 { 70 atomic64_inc(&ca->errors[type]); 71 //queue_work(system_long_wq, &ca->io_error_work); 72 } 73 74 enum ask_yn { 75 YN_NO, 76 YN_YES, 77 YN_ALLNO, 78 YN_ALLYES, 79 }; 80 81 static enum ask_yn parse_yn_response(char *buf) 82 { 83 buf = strim(buf); 84 85 if (strlen(buf) == 1) 86 switch (buf[0]) { 87 case 'n': 88 return YN_NO; 89 case 'y': 90 return YN_YES; 91 case 'N': 92 return YN_ALLNO; 93 case 'Y': 94 return YN_ALLYES; 95 } 96 return -1; 97 } 98 99 #ifdef __KERNEL__ 100 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) 101 { 102 struct stdio_redirect *stdio = c->stdio; 103 104 if (c->stdio_filter && c->stdio_filter != current) 105 stdio = NULL; 106 107 if (!stdio) 108 return YN_NO; 109 110 char buf[100]; 111 int ret; 112 113 do { 114 bch2_print(c, " (y,n, or Y,N for all errors of this type) "); 115 116 int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1); 117 if (r < 0) 118 return YN_NO; 119 buf[r] = '\0'; 120 } while ((ret = parse_yn_response(buf)) < 0); 121 122 return ret; 123 } 124 #else 125 126 #include "tools-util.h" 127 128 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) 129 { 130 char *buf = NULL; 131 size_t buflen = 0; 132 int ret; 133 134 do { 135 fputs(" (y,n, or Y,N for all errors of this type) ", stdout); 136 fflush(stdout); 137 138 if (getline(&buf, &buflen, stdin) < 0) 139 die("error reading from standard input"); 140 } while ((ret = parse_yn_response(buf)) < 0); 141 142 free(buf); 143 return ret; 144 } 145 146 #endif 147 148 static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) 149 { 150 struct fsck_err_state *s; 151 152 if (!test_bit(BCH_FS_fsck_running, &c->flags)) 153 return NULL; 154 155 list_for_each_entry(s, &c->fsck_error_msgs, list) 156 if (s->fmt == fmt) { 157 /* 158 * move it to the head of the list: repeated fsck errors 159 * are common 160 */ 161 list_move(&s->list, &c->fsck_error_msgs); 162 return s; 163 } 164 165 s = kzalloc(sizeof(*s), GFP_NOFS); 166 if (!s) { 167 if (!c->fsck_alloc_msgs_err) 168 bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); 169 c->fsck_alloc_msgs_err = true; 170 return NULL; 171 } 172 173 INIT_LIST_HEAD(&s->list); 174 s->fmt = fmt; 175 list_add(&s->list, &c->fsck_error_msgs); 176 return s; 177 } 178 179 int bch2_fsck_err(struct bch_fs *c, 180 enum bch_fsck_flags flags, 181 enum bch_sb_error_id err, 182 const char *fmt, ...) 183 { 184 struct fsck_err_state *s = NULL; 185 va_list args; 186 bool print = true, suppressing = false, inconsistent = false; 187 struct printbuf buf = PRINTBUF, *out = &buf; 188 int ret = -BCH_ERR_fsck_ignore; 189 190 if ((flags & FSCK_CAN_FIX) && 191 test_bit(err, c->sb.errors_silent)) 192 return -BCH_ERR_fsck_fix; 193 194 bch2_sb_error_count(c, err); 195 196 va_start(args, fmt); 197 prt_vprintf(out, fmt, args); 198 va_end(args); 199 200 mutex_lock(&c->fsck_error_msgs_lock); 201 s = fsck_err_get(c, fmt); 202 if (s) { 203 /* 204 * We may be called multiple times for the same error on 205 * transaction restart - this memoizes instead of asking the user 206 * multiple times for the same error: 207 */ 208 if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { 209 ret = s->ret; 210 mutex_unlock(&c->fsck_error_msgs_lock); 211 printbuf_exit(&buf); 212 return ret; 213 } 214 215 kfree(s->last_msg); 216 s->last_msg = kstrdup(buf.buf, GFP_KERNEL); 217 218 if (c->opts.ratelimit_errors && 219 !(flags & FSCK_NO_RATELIMIT) && 220 s->nr >= FSCK_ERR_RATELIMIT_NR) { 221 if (s->nr == FSCK_ERR_RATELIMIT_NR) 222 suppressing = true; 223 else 224 print = false; 225 } 226 227 s->nr++; 228 } 229 230 #ifdef BCACHEFS_LOG_PREFIX 231 if (!strncmp(fmt, "bcachefs:", 9)) 232 prt_printf(out, bch2_log_msg(c, "")); 233 #endif 234 235 if (!test_bit(BCH_FS_fsck_running, &c->flags)) { 236 if (c->opts.errors != BCH_ON_ERROR_continue || 237 !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { 238 prt_str(out, ", shutting down"); 239 inconsistent = true; 240 ret = -BCH_ERR_fsck_errors_not_fixed; 241 } else if (flags & FSCK_CAN_FIX) { 242 prt_str(out, ", fixing"); 243 ret = -BCH_ERR_fsck_fix; 244 } else { 245 prt_str(out, ", continuing"); 246 ret = -BCH_ERR_fsck_ignore; 247 } 248 } else if (c->opts.fix_errors == FSCK_FIX_exit) { 249 prt_str(out, ", exiting"); 250 ret = -BCH_ERR_fsck_errors_not_fixed; 251 } else if (flags & FSCK_CAN_FIX) { 252 int fix = s && s->fix 253 ? s->fix 254 : c->opts.fix_errors; 255 256 if (fix == FSCK_FIX_ask) { 257 int ask; 258 259 prt_str(out, ": fix?"); 260 if (bch2_fs_stdio_redirect(c)) 261 bch2_print(c, "%s", out->buf); 262 else 263 bch2_print_string_as_lines(KERN_ERR, out->buf); 264 print = false; 265 266 ask = bch2_fsck_ask_yn(c); 267 268 if (ask >= YN_ALLNO && s) 269 s->fix = ask == YN_ALLNO 270 ? FSCK_FIX_no 271 : FSCK_FIX_yes; 272 273 ret = ask & 1 274 ? -BCH_ERR_fsck_fix 275 : -BCH_ERR_fsck_ignore; 276 } else if (fix == FSCK_FIX_yes || 277 (c->opts.nochanges && 278 !(flags & FSCK_CAN_IGNORE))) { 279 prt_str(out, ", fixing"); 280 ret = -BCH_ERR_fsck_fix; 281 } else { 282 prt_str(out, ", not fixing"); 283 } 284 } else if (flags & FSCK_NEED_FSCK) { 285 prt_str(out, " (run fsck to correct)"); 286 } else { 287 prt_str(out, " (repair unimplemented)"); 288 } 289 290 if (ret == -BCH_ERR_fsck_ignore && 291 (c->opts.fix_errors == FSCK_FIX_exit || 292 !(flags & FSCK_CAN_IGNORE))) 293 ret = -BCH_ERR_fsck_errors_not_fixed; 294 295 if (print) { 296 if (bch2_fs_stdio_redirect(c)) 297 bch2_print(c, "%s\n", out->buf); 298 else 299 bch2_print_string_as_lines(KERN_ERR, out->buf); 300 } 301 302 if (test_bit(BCH_FS_fsck_running, &c->flags) && 303 (ret != -BCH_ERR_fsck_fix && 304 ret != -BCH_ERR_fsck_ignore)) 305 bch_err(c, "Unable to continue, halting"); 306 else if (suppressing) 307 bch_err(c, "Ratelimiting new instances of previous error"); 308 309 if (s) 310 s->ret = ret; 311 312 mutex_unlock(&c->fsck_error_msgs_lock); 313 314 printbuf_exit(&buf); 315 316 if (inconsistent) 317 bch2_inconsistent_error(c); 318 319 if (ret == -BCH_ERR_fsck_fix) { 320 set_bit(BCH_FS_errors_fixed, &c->flags); 321 } else { 322 set_bit(BCH_FS_errors_not_fixed, &c->flags); 323 set_bit(BCH_FS_error, &c->flags); 324 } 325 326 return ret; 327 } 328 329 void bch2_flush_fsck_errs(struct bch_fs *c) 330 { 331 struct fsck_err_state *s, *n; 332 333 mutex_lock(&c->fsck_error_msgs_lock); 334 335 list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { 336 if (s->ratelimited && s->last_msg) 337 bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); 338 339 list_del(&s->list); 340 kfree(s->last_msg); 341 kfree(s); 342 } 343 344 mutex_unlock(&c->fsck_error_msgs_lock); 345 } 346