1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "error.h" 4 #include "super.h" 5 #include "thread_with_file.h" 6 7 #define FSCK_ERR_RATELIMIT_NR 10 8 9 bool bch2_inconsistent_error(struct bch_fs *c) 10 { 11 set_bit(BCH_FS_error, &c->flags); 12 13 switch (c->opts.errors) { 14 case BCH_ON_ERROR_continue: 15 return false; 16 case BCH_ON_ERROR_ro: 17 if (bch2_fs_emergency_read_only(c)) 18 bch_err(c, "inconsistency detected - emergency read only"); 19 return true; 20 case BCH_ON_ERROR_panic: 21 panic(bch2_fmt(c, "panic after error")); 22 return true; 23 default: 24 BUG(); 25 } 26 } 27 28 void bch2_topology_error(struct bch_fs *c) 29 { 30 set_bit(BCH_FS_topology_error, &c->flags); 31 if (!test_bit(BCH_FS_fsck_running, &c->flags)) 32 bch2_inconsistent_error(c); 33 } 34 35 void bch2_fatal_error(struct bch_fs *c) 36 { 37 if (bch2_fs_emergency_read_only(c)) 38 bch_err(c, "fatal error - emergency read only"); 39 } 40 41 void bch2_io_error_work(struct work_struct *work) 42 { 43 struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); 44 struct bch_fs *c = ca->fs; 45 bool dev; 46 47 down_write(&c->state_lock); 48 dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro, 49 BCH_FORCE_IF_DEGRADED); 50 if (dev 51 ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, 52 BCH_FORCE_IF_DEGRADED) 53 : bch2_fs_emergency_read_only(c)) 54 bch_err(ca, 55 "too many IO errors, setting %s RO", 56 dev ? "device" : "filesystem"); 57 up_write(&c->state_lock); 58 } 59 60 void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) 61 { 62 atomic64_inc(&ca->errors[type]); 63 //queue_work(system_long_wq, &ca->io_error_work); 64 } 65 66 enum ask_yn { 67 YN_NO, 68 YN_YES, 69 YN_ALLNO, 70 YN_ALLYES, 71 }; 72 73 static enum ask_yn parse_yn_response(char *buf) 74 { 75 buf = strim(buf); 76 77 if (strlen(buf) == 1) 78 switch (buf[0]) { 79 case 'n': 80 return YN_NO; 81 case 'y': 82 return YN_YES; 83 case 'N': 84 return YN_ALLNO; 85 case 'Y': 86 return YN_ALLYES; 87 } 88 return -1; 89 } 90 91 #ifdef __KERNEL__ 92 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) 93 { 94 struct stdio_redirect *stdio = c->stdio; 95 96 if (c->stdio_filter && c->stdio_filter != current) 97 stdio = NULL; 98 99 if (!stdio) 100 return YN_NO; 101 102 char buf[100]; 103 int ret; 104 105 do { 106 bch2_print(c, " (y,n, or Y,N for all errors of this type) "); 107 108 int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1); 109 if (r < 0) 110 return YN_NO; 111 buf[r] = '\0'; 112 } while ((ret = parse_yn_response(buf)) < 0); 113 114 return ret; 115 } 116 #else 117 118 #include "tools-util.h" 119 120 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) 121 { 122 char *buf = NULL; 123 size_t buflen = 0; 124 int ret; 125 126 do { 127 fputs(" (y,n, or Y,N for all errors of this type) ", stdout); 128 fflush(stdout); 129 130 if (getline(&buf, &buflen, stdin) < 0) 131 die("error reading from standard input"); 132 } while ((ret = parse_yn_response(buf)) < 0); 133 134 free(buf); 135 return ret; 136 } 137 138 #endif 139 140 static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) 141 { 142 struct fsck_err_state *s; 143 144 if (!test_bit(BCH_FS_fsck_running, &c->flags)) 145 return NULL; 146 147 list_for_each_entry(s, &c->fsck_error_msgs, list) 148 if (s->fmt == fmt) { 149 /* 150 * move it to the head of the list: repeated fsck errors 151 * are common 152 */ 153 list_move(&s->list, &c->fsck_error_msgs); 154 return s; 155 } 156 157 s = kzalloc(sizeof(*s), GFP_NOFS); 158 if (!s) { 159 if (!c->fsck_alloc_msgs_err) 160 bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); 161 c->fsck_alloc_msgs_err = true; 162 return NULL; 163 } 164 165 INIT_LIST_HEAD(&s->list); 166 s->fmt = fmt; 167 list_add(&s->list, &c->fsck_error_msgs); 168 return s; 169 } 170 171 int bch2_fsck_err(struct bch_fs *c, 172 enum bch_fsck_flags flags, 173 enum bch_sb_error_id err, 174 const char *fmt, ...) 175 { 176 struct fsck_err_state *s = NULL; 177 va_list args; 178 bool print = true, suppressing = false, inconsistent = false; 179 struct printbuf buf = PRINTBUF, *out = &buf; 180 int ret = -BCH_ERR_fsck_ignore; 181 182 if ((flags & FSCK_CAN_FIX) && 183 test_bit(err, c->sb.errors_silent)) 184 return -BCH_ERR_fsck_fix; 185 186 bch2_sb_error_count(c, err); 187 188 va_start(args, fmt); 189 prt_vprintf(out, fmt, args); 190 va_end(args); 191 192 mutex_lock(&c->fsck_error_msgs_lock); 193 s = fsck_err_get(c, fmt); 194 if (s) { 195 /* 196 * We may be called multiple times for the same error on 197 * transaction restart - this memoizes instead of asking the user 198 * multiple times for the same error: 199 */ 200 if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { 201 ret = s->ret; 202 mutex_unlock(&c->fsck_error_msgs_lock); 203 printbuf_exit(&buf); 204 return ret; 205 } 206 207 kfree(s->last_msg); 208 s->last_msg = kstrdup(buf.buf, GFP_KERNEL); 209 210 if (c->opts.ratelimit_errors && 211 !(flags & FSCK_NO_RATELIMIT) && 212 s->nr >= FSCK_ERR_RATELIMIT_NR) { 213 if (s->nr == FSCK_ERR_RATELIMIT_NR) 214 suppressing = true; 215 else 216 print = false; 217 } 218 219 s->nr++; 220 } 221 222 #ifdef BCACHEFS_LOG_PREFIX 223 if (!strncmp(fmt, "bcachefs:", 9)) 224 prt_printf(out, bch2_log_msg(c, "")); 225 #endif 226 227 if (!test_bit(BCH_FS_fsck_running, &c->flags)) { 228 if (c->opts.errors != BCH_ON_ERROR_continue || 229 !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { 230 prt_str(out, ", shutting down"); 231 inconsistent = true; 232 ret = -BCH_ERR_fsck_errors_not_fixed; 233 } else if (flags & FSCK_CAN_FIX) { 234 prt_str(out, ", fixing"); 235 ret = -BCH_ERR_fsck_fix; 236 } else { 237 prt_str(out, ", continuing"); 238 ret = -BCH_ERR_fsck_ignore; 239 } 240 } else if (c->opts.fix_errors == FSCK_FIX_exit) { 241 prt_str(out, ", exiting"); 242 ret = -BCH_ERR_fsck_errors_not_fixed; 243 } else if (flags & FSCK_CAN_FIX) { 244 int fix = s && s->fix 245 ? s->fix 246 : c->opts.fix_errors; 247 248 if (fix == FSCK_FIX_ask) { 249 int ask; 250 251 prt_str(out, ": fix?"); 252 if (bch2_fs_stdio_redirect(c)) 253 bch2_print(c, "%s", out->buf); 254 else 255 bch2_print_string_as_lines(KERN_ERR, out->buf); 256 print = false; 257 258 ask = bch2_fsck_ask_yn(c); 259 260 if (ask >= YN_ALLNO && s) 261 s->fix = ask == YN_ALLNO 262 ? FSCK_FIX_no 263 : FSCK_FIX_yes; 264 265 ret = ask & 1 266 ? -BCH_ERR_fsck_fix 267 : -BCH_ERR_fsck_ignore; 268 } else if (fix == FSCK_FIX_yes || 269 (c->opts.nochanges && 270 !(flags & FSCK_CAN_IGNORE))) { 271 prt_str(out, ", fixing"); 272 ret = -BCH_ERR_fsck_fix; 273 } else { 274 prt_str(out, ", not fixing"); 275 } 276 } else if (flags & FSCK_NEED_FSCK) { 277 prt_str(out, " (run fsck to correct)"); 278 } else { 279 prt_str(out, " (repair unimplemented)"); 280 } 281 282 if (ret == -BCH_ERR_fsck_ignore && 283 (c->opts.fix_errors == FSCK_FIX_exit || 284 !(flags & FSCK_CAN_IGNORE))) 285 ret = -BCH_ERR_fsck_errors_not_fixed; 286 287 if (print) { 288 if (bch2_fs_stdio_redirect(c)) 289 bch2_print(c, "%s\n", out->buf); 290 else 291 bch2_print_string_as_lines(KERN_ERR, out->buf); 292 } 293 294 if (test_bit(BCH_FS_fsck_running, &c->flags) && 295 (ret != -BCH_ERR_fsck_fix && 296 ret != -BCH_ERR_fsck_ignore)) 297 bch_err(c, "Unable to continue, halting"); 298 else if (suppressing) 299 bch_err(c, "Ratelimiting new instances of previous error"); 300 301 if (s) 302 s->ret = ret; 303 304 mutex_unlock(&c->fsck_error_msgs_lock); 305 306 printbuf_exit(&buf); 307 308 if (inconsistent) 309 bch2_inconsistent_error(c); 310 311 if (ret == -BCH_ERR_fsck_fix) { 312 set_bit(BCH_FS_errors_fixed, &c->flags); 313 } else { 314 set_bit(BCH_FS_errors_not_fixed, &c->flags); 315 set_bit(BCH_FS_error, &c->flags); 316 } 317 318 return ret; 319 } 320 321 void bch2_flush_fsck_errs(struct bch_fs *c) 322 { 323 struct fsck_err_state *s, *n; 324 325 mutex_lock(&c->fsck_error_msgs_lock); 326 327 list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { 328 if (s->ratelimited && s->last_msg) 329 bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); 330 331 list_del(&s->list); 332 kfree(s->last_msg); 333 kfree(s); 334 } 335 336 mutex_unlock(&c->fsck_error_msgs_lock); 337 } 338