1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "error.h" 4 #include "recovery.h" 5 #include "super.h" 6 #include "thread_with_file.h" 7 8 #define FSCK_ERR_RATELIMIT_NR 10 9 10 bool bch2_inconsistent_error(struct bch_fs *c) 11 { 12 set_bit(BCH_FS_error, &c->flags); 13 14 switch (c->opts.errors) { 15 case BCH_ON_ERROR_continue: 16 return false; 17 case BCH_ON_ERROR_ro: 18 if (bch2_fs_emergency_read_only(c)) 19 bch_err(c, "inconsistency detected - emergency read only"); 20 return true; 21 case BCH_ON_ERROR_panic: 22 panic(bch2_fmt(c, "panic after error")); 23 return true; 24 default: 25 BUG(); 26 } 27 } 28 29 int bch2_topology_error(struct bch_fs *c) 30 { 31 set_bit(BCH_FS_topology_error, &c->flags); 32 if (!test_bit(BCH_FS_fsck_running, &c->flags)) { 33 bch2_inconsistent_error(c); 34 return -BCH_ERR_btree_need_topology_repair; 35 } else { 36 return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: 37 -BCH_ERR_btree_node_read_validate_error; 38 } 39 } 40 41 void bch2_fatal_error(struct bch_fs *c) 42 { 43 if (bch2_fs_emergency_read_only(c)) 44 bch_err(c, "fatal error - emergency read only"); 45 } 46 47 void bch2_io_error_work(struct work_struct *work) 48 { 49 struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); 50 struct bch_fs *c = ca->fs; 51 bool dev; 52 53 down_write(&c->state_lock); 54 dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro, 55 BCH_FORCE_IF_DEGRADED); 56 if (dev 57 ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, 58 BCH_FORCE_IF_DEGRADED) 59 : bch2_fs_emergency_read_only(c)) 60 bch_err(ca, 61 "too many IO errors, setting %s RO", 62 dev ? "device" : "filesystem"); 63 up_write(&c->state_lock); 64 } 65 66 void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) 67 { 68 atomic64_inc(&ca->errors[type]); 69 //queue_work(system_long_wq, &ca->io_error_work); 70 } 71 72 enum ask_yn { 73 YN_NO, 74 YN_YES, 75 YN_ALLNO, 76 YN_ALLYES, 77 }; 78 79 static enum ask_yn parse_yn_response(char *buf) 80 { 81 buf = strim(buf); 82 83 if (strlen(buf) == 1) 84 switch (buf[0]) { 85 case 'n': 86 return YN_NO; 87 case 'y': 88 return YN_YES; 89 case 'N': 90 return YN_ALLNO; 91 case 'Y': 92 return YN_ALLYES; 93 } 94 return -1; 95 } 96 97 #ifdef __KERNEL__ 98 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) 99 { 100 struct stdio_redirect *stdio = c->stdio; 101 102 if (c->stdio_filter && c->stdio_filter != current) 103 stdio = NULL; 104 105 if (!stdio) 106 return YN_NO; 107 108 char buf[100]; 109 int ret; 110 111 do { 112 bch2_print(c, " (y,n, or Y,N for all errors of this type) "); 113 114 int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1); 115 if (r < 0) 116 return YN_NO; 117 buf[r] = '\0'; 118 } while ((ret = parse_yn_response(buf)) < 0); 119 120 return ret; 121 } 122 #else 123 124 #include "tools-util.h" 125 126 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) 127 { 128 char *buf = NULL; 129 size_t buflen = 0; 130 int ret; 131 132 do { 133 fputs(" (y,n, or Y,N for all errors of this type) ", stdout); 134 fflush(stdout); 135 136 if (getline(&buf, &buflen, stdin) < 0) 137 die("error reading from standard input"); 138 } while ((ret = parse_yn_response(buf)) < 0); 139 140 free(buf); 141 return ret; 142 } 143 144 #endif 145 146 static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) 147 { 148 struct fsck_err_state *s; 149 150 if (!test_bit(BCH_FS_fsck_running, &c->flags)) 151 return NULL; 152 153 list_for_each_entry(s, &c->fsck_error_msgs, list) 154 if (s->fmt == fmt) { 155 /* 156 * move it to the head of the list: repeated fsck errors 157 * are common 158 */ 159 list_move(&s->list, &c->fsck_error_msgs); 160 return s; 161 } 162 163 s = kzalloc(sizeof(*s), GFP_NOFS); 164 if (!s) { 165 if (!c->fsck_alloc_msgs_err) 166 bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); 167 c->fsck_alloc_msgs_err = true; 168 return NULL; 169 } 170 171 INIT_LIST_HEAD(&s->list); 172 s->fmt = fmt; 173 list_add(&s->list, &c->fsck_error_msgs); 174 return s; 175 } 176 177 int bch2_fsck_err(struct bch_fs *c, 178 enum bch_fsck_flags flags, 179 enum bch_sb_error_id err, 180 const char *fmt, ...) 181 { 182 struct fsck_err_state *s = NULL; 183 va_list args; 184 bool print = true, suppressing = false, inconsistent = false; 185 struct printbuf buf = PRINTBUF, *out = &buf; 186 int ret = -BCH_ERR_fsck_ignore; 187 188 if ((flags & FSCK_CAN_FIX) && 189 test_bit(err, c->sb.errors_silent)) 190 return -BCH_ERR_fsck_fix; 191 192 bch2_sb_error_count(c, err); 193 194 va_start(args, fmt); 195 prt_vprintf(out, fmt, args); 196 va_end(args); 197 198 mutex_lock(&c->fsck_error_msgs_lock); 199 s = fsck_err_get(c, fmt); 200 if (s) { 201 /* 202 * We may be called multiple times for the same error on 203 * transaction restart - this memoizes instead of asking the user 204 * multiple times for the same error: 205 */ 206 if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { 207 ret = s->ret; 208 mutex_unlock(&c->fsck_error_msgs_lock); 209 printbuf_exit(&buf); 210 return ret; 211 } 212 213 kfree(s->last_msg); 214 s->last_msg = kstrdup(buf.buf, GFP_KERNEL); 215 216 if (c->opts.ratelimit_errors && 217 !(flags & FSCK_NO_RATELIMIT) && 218 s->nr >= FSCK_ERR_RATELIMIT_NR) { 219 if (s->nr == FSCK_ERR_RATELIMIT_NR) 220 suppressing = true; 221 else 222 print = false; 223 } 224 225 s->nr++; 226 } 227 228 #ifdef BCACHEFS_LOG_PREFIX 229 if (!strncmp(fmt, "bcachefs:", 9)) 230 prt_printf(out, bch2_log_msg(c, "")); 231 #endif 232 233 if (!test_bit(BCH_FS_fsck_running, &c->flags)) { 234 if (c->opts.errors != BCH_ON_ERROR_continue || 235 !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { 236 prt_str(out, ", shutting down"); 237 inconsistent = true; 238 ret = -BCH_ERR_fsck_errors_not_fixed; 239 } else if (flags & FSCK_CAN_FIX) { 240 prt_str(out, ", fixing"); 241 ret = -BCH_ERR_fsck_fix; 242 } else { 243 prt_str(out, ", continuing"); 244 ret = -BCH_ERR_fsck_ignore; 245 } 246 } else if (c->opts.fix_errors == FSCK_FIX_exit) { 247 prt_str(out, ", exiting"); 248 ret = -BCH_ERR_fsck_errors_not_fixed; 249 } else if (flags & FSCK_CAN_FIX) { 250 int fix = s && s->fix 251 ? s->fix 252 : c->opts.fix_errors; 253 254 if (fix == FSCK_FIX_ask) { 255 int ask; 256 257 prt_str(out, ": fix?"); 258 if (bch2_fs_stdio_redirect(c)) 259 bch2_print(c, "%s", out->buf); 260 else 261 bch2_print_string_as_lines(KERN_ERR, out->buf); 262 print = false; 263 264 ask = bch2_fsck_ask_yn(c); 265 266 if (ask >= YN_ALLNO && s) 267 s->fix = ask == YN_ALLNO 268 ? FSCK_FIX_no 269 : FSCK_FIX_yes; 270 271 ret = ask & 1 272 ? -BCH_ERR_fsck_fix 273 : -BCH_ERR_fsck_ignore; 274 } else if (fix == FSCK_FIX_yes || 275 (c->opts.nochanges && 276 !(flags & FSCK_CAN_IGNORE))) { 277 prt_str(out, ", fixing"); 278 ret = -BCH_ERR_fsck_fix; 279 } else { 280 prt_str(out, ", not fixing"); 281 } 282 } else if (flags & FSCK_NEED_FSCK) { 283 prt_str(out, " (run fsck to correct)"); 284 } else { 285 prt_str(out, " (repair unimplemented)"); 286 } 287 288 if (ret == -BCH_ERR_fsck_ignore && 289 (c->opts.fix_errors == FSCK_FIX_exit || 290 !(flags & FSCK_CAN_IGNORE))) 291 ret = -BCH_ERR_fsck_errors_not_fixed; 292 293 if (print) { 294 if (bch2_fs_stdio_redirect(c)) 295 bch2_print(c, "%s\n", out->buf); 296 else 297 bch2_print_string_as_lines(KERN_ERR, out->buf); 298 } 299 300 if (test_bit(BCH_FS_fsck_running, &c->flags) && 301 (ret != -BCH_ERR_fsck_fix && 302 ret != -BCH_ERR_fsck_ignore)) 303 bch_err(c, "Unable to continue, halting"); 304 else if (suppressing) 305 bch_err(c, "Ratelimiting new instances of previous error"); 306 307 if (s) 308 s->ret = ret; 309 310 mutex_unlock(&c->fsck_error_msgs_lock); 311 312 printbuf_exit(&buf); 313 314 if (inconsistent) 315 bch2_inconsistent_error(c); 316 317 if (ret == -BCH_ERR_fsck_fix) { 318 set_bit(BCH_FS_errors_fixed, &c->flags); 319 } else { 320 set_bit(BCH_FS_errors_not_fixed, &c->flags); 321 set_bit(BCH_FS_error, &c->flags); 322 } 323 324 return ret; 325 } 326 327 void bch2_flush_fsck_errs(struct bch_fs *c) 328 { 329 struct fsck_err_state *s, *n; 330 331 mutex_lock(&c->fsck_error_msgs_lock); 332 333 list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { 334 if (s->ratelimited && s->last_msg) 335 bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); 336 337 list_del(&s->list); 338 kfree(s->last_msg); 339 kfree(s); 340 } 341 342 mutex_unlock(&c->fsck_error_msgs_lock); 343 } 344