xref: /linux/fs/bcachefs/error.c (revision 6f2a71a99ebd5dfaa7948a2e9c59eae94b741bd8)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "btree_cache.h"
4 #include "btree_iter.h"
5 #include "error.h"
6 #include "journal.h"
7 #include "namei.h"
8 #include "recovery_passes.h"
9 #include "super.h"
10 #include "thread_with_file.h"
11 
12 #define FSCK_ERR_RATELIMIT_NR	10
13 
__bch2_log_msg_start(const char * fs_or_dev_name,struct printbuf * out)14 void __bch2_log_msg_start(const char *fs_or_dev_name, struct printbuf *out)
15 {
16 	printbuf_indent_add_nextline(out, 2);
17 
18 #ifdef BCACHEFS_LOG_PREFIX
19 	prt_printf(out, "bcachefs (%s): ", fs_or_dev_name);
20 #endif
21 }
22 
__bch2_inconsistent_error(struct bch_fs * c,struct printbuf * out)23 bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out)
24 {
25 	set_bit(BCH_FS_error, &c->flags);
26 
27 	switch (c->opts.errors) {
28 	case BCH_ON_ERROR_continue:
29 		return false;
30 	case BCH_ON_ERROR_fix_safe:
31 	case BCH_ON_ERROR_ro:
32 		bch2_fs_emergency_read_only2(c, out);
33 		return true;
34 	case BCH_ON_ERROR_panic:
35 		bch2_print_str(c, KERN_ERR, out->buf);
36 		panic(bch2_fmt(c, "panic after error"));
37 		return true;
38 	default:
39 		BUG();
40 	}
41 }
42 
bch2_inconsistent_error(struct bch_fs * c)43 bool bch2_inconsistent_error(struct bch_fs *c)
44 {
45 	struct printbuf buf = PRINTBUF;
46 	buf.atomic++;
47 
48 	printbuf_indent_add_nextline(&buf, 2);
49 
50 	bool ret = __bch2_inconsistent_error(c, &buf);
51 	if (ret)
52 		bch_err(c, "%s", buf.buf);
53 	printbuf_exit(&buf);
54 	return ret;
55 }
56 
57 __printf(3, 0)
bch2_fs_trans_inconsistent(struct bch_fs * c,struct btree_trans * trans,const char * fmt,va_list args)58 static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *trans,
59 				       const char *fmt, va_list args)
60 {
61 	struct printbuf buf = PRINTBUF;
62 	buf.atomic++;
63 
64 	bch2_log_msg_start(c, &buf);
65 
66 	prt_vprintf(&buf, fmt, args);
67 	prt_newline(&buf);
68 
69 	if (trans)
70 		bch2_trans_updates_to_text(&buf, trans);
71 	bool ret = __bch2_inconsistent_error(c, &buf);
72 	bch2_print_str(c, KERN_ERR, buf.buf);
73 
74 	printbuf_exit(&buf);
75 	return ret;
76 }
77 
bch2_fs_inconsistent(struct bch_fs * c,const char * fmt,...)78 bool bch2_fs_inconsistent(struct bch_fs *c, const char *fmt, ...)
79 {
80 	va_list args;
81 	va_start(args, fmt);
82 	bool ret = bch2_fs_trans_inconsistent(c, NULL, fmt, args);
83 	va_end(args);
84 	return ret;
85 }
86 
bch2_trans_inconsistent(struct btree_trans * trans,const char * fmt,...)87 bool bch2_trans_inconsistent(struct btree_trans *trans, const char *fmt, ...)
88 {
89 	va_list args;
90 	va_start(args, fmt);
91 	bool ret = bch2_fs_trans_inconsistent(trans->c, trans, fmt, args);
92 	va_end(args);
93 	return ret;
94 }
95 
__bch2_topology_error(struct bch_fs * c,struct printbuf * out)96 int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
97 {
98 	prt_printf(out, "btree topology error: ");
99 
100 	set_bit(BCH_FS_topology_error, &c->flags);
101 	if (!test_bit(BCH_FS_in_recovery, &c->flags)) {
102 		__bch2_inconsistent_error(c, out);
103 		return bch_err_throw(c, btree_need_topology_repair);
104 	} else {
105 		return bch2_run_explicit_recovery_pass(c, out, BCH_RECOVERY_PASS_check_topology, 0) ?:
106 			bch_err_throw(c, btree_node_read_validate_error);
107 	}
108 }
109 
bch2_fs_topology_error(struct bch_fs * c,const char * fmt,...)110 int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...)
111 {
112 	struct printbuf buf = PRINTBUF;
113 
114 	bch2_log_msg_start(c, &buf);
115 
116 	va_list args;
117 	va_start(args, fmt);
118 	prt_vprintf(&buf, fmt, args);
119 	va_end(args);
120 
121 	int ret = __bch2_topology_error(c, &buf);
122 	bch2_print_str(c, KERN_ERR, buf.buf);
123 
124 	printbuf_exit(&buf);
125 	return ret;
126 }
127 
bch2_fatal_error(struct bch_fs * c)128 void bch2_fatal_error(struct bch_fs *c)
129 {
130 	if (bch2_fs_emergency_read_only(c))
131 		bch_err(c, "fatal error - emergency read only");
132 }
133 
bch2_io_error_work(struct work_struct * work)134 void bch2_io_error_work(struct work_struct *work)
135 {
136 	struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
137 	struct bch_fs *c = ca->fs;
138 
139 	/* XXX: if it's reads or checksums that are failing, set it to failed */
140 
141 	down_write(&c->state_lock);
142 	unsigned long write_errors_start = READ_ONCE(ca->write_errors_start);
143 
144 	if (write_errors_start &&
145 	    time_after(jiffies,
146 		       write_errors_start + c->opts.write_error_timeout * HZ)) {
147 		if (ca->mi.state >= BCH_MEMBER_STATE_ro)
148 			goto out;
149 
150 		bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
151 						 BCH_FORCE_IF_DEGRADED);
152 		struct printbuf buf = PRINTBUF;
153 		__bch2_log_msg_start(ca->name, &buf);
154 
155 		prt_printf(&buf, "writes erroring for %u seconds, setting %s ro",
156 			c->opts.write_error_timeout,
157 			dev ? "device" : "filesystem");
158 		if (!dev)
159 			bch2_fs_emergency_read_only2(c, &buf);
160 
161 		bch2_print_str(c, KERN_ERR, buf.buf);
162 		printbuf_exit(&buf);
163 	}
164 out:
165 	up_write(&c->state_lock);
166 }
167 
bch2_io_error(struct bch_dev * ca,enum bch_member_error_type type)168 void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
169 {
170 	atomic64_inc(&ca->errors[type]);
171 
172 	if (type == BCH_MEMBER_ERROR_write && !ca->write_errors_start)
173 		ca->write_errors_start = jiffies;
174 
175 	queue_work(system_long_wq, &ca->io_error_work);
176 }
177 
178 enum ask_yn {
179 	YN_NO,
180 	YN_YES,
181 	YN_ALLNO,
182 	YN_ALLYES,
183 };
184 
parse_yn_response(char * buf)185 static enum ask_yn parse_yn_response(char *buf)
186 {
187 	buf = strim(buf);
188 
189 	if (strlen(buf) == 1)
190 		switch (buf[0]) {
191 		case 'n':
192 			return YN_NO;
193 		case 'y':
194 			return YN_YES;
195 		case 'N':
196 			return YN_ALLNO;
197 		case 'Y':
198 			return YN_ALLYES;
199 		}
200 	return -1;
201 }
202 
203 #ifdef __KERNEL__
bch2_fsck_ask_yn(struct bch_fs * c,struct btree_trans * trans)204 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans)
205 {
206 	struct stdio_redirect *stdio = c->stdio;
207 
208 	if (c->stdio_filter && c->stdio_filter != current)
209 		stdio = NULL;
210 
211 	if (!stdio)
212 		return YN_NO;
213 
214 	if (trans)
215 		bch2_trans_unlock(trans);
216 
217 	unsigned long unlock_long_at = trans ? jiffies + HZ * 2 : 0;
218 	darray_char line = {};
219 	int ret;
220 
221 	do {
222 		unsigned long t;
223 		bch2_print(c, " (y,n, or Y,N for all errors of this type) ");
224 rewait:
225 		t = unlock_long_at
226 			? max_t(long, unlock_long_at - jiffies, 0)
227 			: MAX_SCHEDULE_TIMEOUT;
228 
229 		int r = bch2_stdio_redirect_readline_timeout(stdio, &line, t);
230 		if (r == -ETIME) {
231 			bch2_trans_unlock_long(trans);
232 			unlock_long_at = 0;
233 			goto rewait;
234 		}
235 
236 		if (r < 0) {
237 			ret = YN_NO;
238 			break;
239 		}
240 
241 		darray_last(line) = '\0';
242 	} while ((ret = parse_yn_response(line.data)) < 0);
243 
244 	darray_exit(&line);
245 	return ret;
246 }
247 #else
248 
249 #include "tools-util.h"
250 
bch2_fsck_ask_yn(struct bch_fs * c,struct btree_trans * trans)251 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans)
252 {
253 	char *buf = NULL;
254 	size_t buflen = 0;
255 	int ret;
256 
257 	do {
258 		fputs(" (y,n, or Y,N for all errors of this type) ", stdout);
259 		fflush(stdout);
260 
261 		if (getline(&buf, &buflen, stdin) < 0)
262 			die("error reading from standard input");
263 	} while ((ret = parse_yn_response(buf)) < 0);
264 
265 	free(buf);
266 	return ret;
267 }
268 
269 #endif
270 
fsck_err_get(struct bch_fs * c,enum bch_sb_error_id id)271 static struct fsck_err_state *fsck_err_get(struct bch_fs *c,
272 					   enum bch_sb_error_id id)
273 {
274 	struct fsck_err_state *s;
275 
276 	list_for_each_entry(s, &c->fsck_error_msgs, list)
277 		if (s->id == id) {
278 			/*
279 			 * move it to the head of the list: repeated fsck errors
280 			 * are common
281 			 */
282 			list_move(&s->list, &c->fsck_error_msgs);
283 			return s;
284 		}
285 
286 	s = kzalloc(sizeof(*s), GFP_NOFS);
287 	if (!s) {
288 		if (!c->fsck_alloc_msgs_err)
289 			bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
290 		c->fsck_alloc_msgs_err = true;
291 		return NULL;
292 	}
293 
294 	INIT_LIST_HEAD(&s->list);
295 	s->id = id;
296 	list_add(&s->list, &c->fsck_error_msgs);
297 	return s;
298 }
299 
300 /* s/fix?/fixing/ s/recreate?/recreating/ */
prt_actioning(struct printbuf * out,const char * action)301 static void prt_actioning(struct printbuf *out, const char *action)
302 {
303 	unsigned len = strlen(action);
304 
305 	BUG_ON(action[len - 1] != '?');
306 	--len;
307 
308 	if (action[len - 1] == 'e')
309 		--len;
310 
311 	prt_bytes(out, action, len);
312 	prt_str(out, "ing");
313 }
314 
315 static const u8 fsck_flags_extra[] = {
316 #define x(t, n, flags)		[BCH_FSCK_ERR_##t] = flags,
317 	BCH_SB_ERRS()
318 #undef x
319 };
320 
do_fsck_ask_yn(struct bch_fs * c,struct btree_trans * trans,struct printbuf * question,const char * action)321 static int do_fsck_ask_yn(struct bch_fs *c,
322 			  struct btree_trans *trans,
323 			  struct printbuf *question,
324 			  const char *action)
325 {
326 	prt_str(question, ", ");
327 	prt_str(question, action);
328 
329 	if (bch2_fs_stdio_redirect(c))
330 		bch2_print(c, "%s", question->buf);
331 	else
332 		bch2_print_str(c, KERN_ERR, question->buf);
333 
334 	int ask = bch2_fsck_ask_yn(c, trans);
335 
336 	if (trans) {
337 		int ret = bch2_trans_relock(trans);
338 		if (ret)
339 			return ret;
340 	}
341 
342 	return ask;
343 }
344 
count_fsck_err_locked(struct bch_fs * c,enum bch_sb_error_id id,const char * msg,bool * repeat,bool * print,bool * suppress)345 static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c,
346 			  enum bch_sb_error_id id, const char *msg,
347 			  bool *repeat, bool *print, bool *suppress)
348 {
349 	bch2_sb_error_count(c, id);
350 
351 	struct fsck_err_state *s = fsck_err_get(c, id);
352 	if (s) {
353 		/*
354 		 * We may be called multiple times for the same error on
355 		 * transaction restart - this memoizes instead of asking the user
356 		 * multiple times for the same error:
357 		 */
358 		if (s->last_msg && !strcmp(msg, s->last_msg)) {
359 			*repeat = true;
360 			*print = false;
361 			return s;
362 		}
363 
364 		kfree(s->last_msg);
365 		s->last_msg = kstrdup(msg, GFP_KERNEL);
366 
367 		if (c->opts.ratelimit_errors &&
368 		    s->nr >= FSCK_ERR_RATELIMIT_NR) {
369 			if (s->nr == FSCK_ERR_RATELIMIT_NR)
370 				*suppress = true;
371 			else
372 				*print = false;
373 		}
374 
375 		s->nr++;
376 	}
377 	return s;
378 }
379 
__bch2_count_fsck_err(struct bch_fs * c,enum bch_sb_error_id id,struct printbuf * msg)380 bool __bch2_count_fsck_err(struct bch_fs *c,
381 			   enum bch_sb_error_id id, struct printbuf *msg)
382 {
383 	bch2_sb_error_count(c, id);
384 
385 	mutex_lock(&c->fsck_error_msgs_lock);
386 	bool print = true, repeat = false, suppress = false;
387 
388 	count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress);
389 	mutex_unlock(&c->fsck_error_msgs_lock);
390 
391 	if (suppress)
392 		prt_printf(msg, "Ratelimiting new instances of previous error\n");
393 
394 	return print && !repeat;
395 }
396 
bch2_fsck_err_opt(struct bch_fs * c,enum bch_fsck_flags flags,enum bch_sb_error_id err)397 int bch2_fsck_err_opt(struct bch_fs *c,
398 		      enum bch_fsck_flags flags,
399 		      enum bch_sb_error_id err)
400 {
401 	if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
402 		flags |= fsck_flags_extra[err];
403 
404 	if (test_bit(BCH_FS_in_fsck, &c->flags)) {
405 		if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE)))
406 			return bch_err_throw(c, fsck_repair_unimplemented);
407 
408 		switch (c->opts.fix_errors) {
409 		case FSCK_FIX_exit:
410 			return bch_err_throw(c, fsck_errors_not_fixed);
411 		case FSCK_FIX_yes:
412 			if (flags & FSCK_CAN_FIX)
413 				return bch_err_throw(c, fsck_fix);
414 			fallthrough;
415 		case FSCK_FIX_no:
416 			if (flags & FSCK_CAN_IGNORE)
417 				return bch_err_throw(c, fsck_ignore);
418 			return bch_err_throw(c, fsck_errors_not_fixed);
419 		case FSCK_FIX_ask:
420 			if (flags & FSCK_AUTOFIX)
421 				return bch_err_throw(c, fsck_fix);
422 			return bch_err_throw(c, fsck_ask);
423 		default:
424 			BUG();
425 		}
426 	} else {
427 		if ((flags & FSCK_AUTOFIX) &&
428 		    (c->opts.errors == BCH_ON_ERROR_continue ||
429 		     c->opts.errors == BCH_ON_ERROR_fix_safe))
430 			return bch_err_throw(c, fsck_fix);
431 
432 		if (c->opts.errors == BCH_ON_ERROR_continue &&
433 		    (flags & FSCK_CAN_IGNORE))
434 			return bch_err_throw(c, fsck_ignore);
435 		return bch_err_throw(c, fsck_errors_not_fixed);
436 	}
437 }
438 
__bch2_fsck_err(struct bch_fs * c,struct btree_trans * trans,enum bch_fsck_flags flags,enum bch_sb_error_id err,const char * fmt,...)439 int __bch2_fsck_err(struct bch_fs *c,
440 		  struct btree_trans *trans,
441 		  enum bch_fsck_flags flags,
442 		  enum bch_sb_error_id err,
443 		  const char *fmt, ...)
444 {
445 	va_list args;
446 	struct printbuf buf = PRINTBUF, *out = &buf;
447 	int ret = 0;
448 	const char *action_orig = "fix?", *action = action_orig;
449 
450 	might_sleep();
451 
452 	if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
453 		flags |= fsck_flags_extra[err];
454 
455 	if (!c)
456 		c = trans->c;
457 
458 	/*
459 	 * Ugly: if there's a transaction in the current task it has to be
460 	 * passed in to unlock if we prompt for user input.
461 	 *
462 	 * But, plumbing a transaction and transaction restarts into
463 	 * bkey_validate() is problematic.
464 	 *
465 	 * So:
466 	 * - make all bkey errors AUTOFIX, they're simple anyways (we just
467 	 *   delete the key)
468 	 * - and we don't need to warn if we're not prompting
469 	 */
470 	WARN_ON((flags & FSCK_CAN_FIX) &&
471 		!(flags & FSCK_AUTOFIX) &&
472 		!trans &&
473 		bch2_current_has_btree_trans(c));
474 
475 	if (test_bit(err, c->sb.errors_silent))
476 		return flags & FSCK_CAN_FIX
477 			? bch_err_throw(c, fsck_fix)
478 			: bch_err_throw(c, fsck_ignore);
479 
480 	printbuf_indent_add_nextline(out, 2);
481 
482 #ifdef BCACHEFS_LOG_PREFIX
483 	if (strncmp(fmt, "bcachefs", 8))
484 		prt_printf(out, bch2_log_msg(c, ""));
485 #endif
486 
487 	va_start(args, fmt);
488 	prt_vprintf(out, fmt, args);
489 	va_end(args);
490 
491 	/* Custom fix/continue/recreate/etc.? */
492 	if (out->buf[out->pos - 1] == '?') {
493 		const char *p = strrchr(out->buf, ',');
494 		if (p) {
495 			out->pos = p - out->buf;
496 			action = kstrdup(p + 2, GFP_KERNEL);
497 			if (!action) {
498 				ret = -ENOMEM;
499 				goto err;
500 			}
501 		}
502 	}
503 
504 	mutex_lock(&c->fsck_error_msgs_lock);
505 	bool repeat = false, print = true, suppress = false;
506 	bool inconsistent = false, exiting = false;
507 	struct fsck_err_state *s =
508 		count_fsck_err_locked(c, err, buf.buf, &repeat, &print, &suppress);
509 	if (repeat) {
510 		ret = s->ret;
511 		goto err_unlock;
512 	}
513 
514 	if ((flags & FSCK_AUTOFIX) &&
515 	    (c->opts.errors == BCH_ON_ERROR_continue ||
516 	     c->opts.errors == BCH_ON_ERROR_fix_safe)) {
517 		prt_str(out, ", ");
518 		if (flags & FSCK_CAN_FIX) {
519 			prt_actioning(out, action);
520 			ret = bch_err_throw(c, fsck_fix);
521 		} else {
522 			prt_str(out, ", continuing");
523 			ret = bch_err_throw(c, fsck_ignore);
524 		}
525 
526 		goto print;
527 	} else if (!test_bit(BCH_FS_in_fsck, &c->flags)) {
528 		if (c->opts.errors != BCH_ON_ERROR_continue ||
529 		    !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
530 			prt_str_indented(out, ", shutting down\n"
531 					 "error not marked as autofix and not in fsck\n"
532 					 "run fsck, and forward to devs so error can be marked for self-healing");
533 			inconsistent = true;
534 			print = true;
535 			ret = bch_err_throw(c, fsck_errors_not_fixed);
536 		} else if (flags & FSCK_CAN_FIX) {
537 			prt_str(out, ", ");
538 			prt_actioning(out, action);
539 			ret = bch_err_throw(c, fsck_fix);
540 		} else {
541 			prt_str(out, ", continuing");
542 			ret = bch_err_throw(c, fsck_ignore);
543 		}
544 	} else if (c->opts.fix_errors == FSCK_FIX_exit) {
545 		prt_str(out, ", exiting");
546 		ret = bch_err_throw(c, fsck_errors_not_fixed);
547 	} else if (flags & FSCK_CAN_FIX) {
548 		int fix = s && s->fix
549 			? s->fix
550 			: c->opts.fix_errors;
551 
552 		if (fix == FSCK_FIX_ask) {
553 			print = false;
554 
555 			ret = do_fsck_ask_yn(c, trans, out, action);
556 			if (ret < 0)
557 				goto err_unlock;
558 
559 			if (ret >= YN_ALLNO && s)
560 				s->fix = ret == YN_ALLNO
561 					? FSCK_FIX_no
562 					: FSCK_FIX_yes;
563 
564 			ret = ret & 1
565 				? bch_err_throw(c, fsck_fix)
566 				: bch_err_throw(c, fsck_ignore);
567 		} else if (fix == FSCK_FIX_yes ||
568 			   (c->opts.nochanges &&
569 			    !(flags & FSCK_CAN_IGNORE))) {
570 			prt_str(out, ", ");
571 			prt_actioning(out, action);
572 			ret = bch_err_throw(c, fsck_fix);
573 		} else {
574 			prt_str(out, ", not ");
575 			prt_actioning(out, action);
576 			ret = bch_err_throw(c, fsck_ignore);
577 		}
578 	} else {
579 		if (flags & FSCK_CAN_IGNORE) {
580 			prt_str(out, ", continuing");
581 			ret = bch_err_throw(c, fsck_ignore);
582 		} else {
583 			prt_str(out, " (repair unimplemented)");
584 			ret = bch_err_throw(c, fsck_repair_unimplemented);
585 		}
586 	}
587 
588 	if (bch2_err_matches(ret, BCH_ERR_fsck_ignore) &&
589 	    (c->opts.fix_errors == FSCK_FIX_exit ||
590 	     !(flags & FSCK_CAN_IGNORE)))
591 		ret = bch_err_throw(c, fsck_errors_not_fixed);
592 
593 	if (test_bit(BCH_FS_in_fsck, &c->flags) &&
594 	    (!bch2_err_matches(ret, BCH_ERR_fsck_fix) &&
595 	     !bch2_err_matches(ret, BCH_ERR_fsck_ignore))) {
596 		exiting = true;
597 		print = true;
598 	}
599 print:
600 	prt_newline(out);
601 
602 	if (inconsistent)
603 		__bch2_inconsistent_error(c, out);
604 	else if (exiting)
605 		prt_printf(out, "Unable to continue, halting\n");
606 	else if (suppress)
607 		prt_printf(out, "Ratelimiting new instances of previous error\n");
608 
609 	if (print) {
610 		/* possibly strip an empty line, from printbuf_indent_add */
611 		while (out->pos && out->buf[out->pos - 1] == ' ')
612 			--out->pos;
613 		printbuf_nul_terminate(out);
614 
615 		if (bch2_fs_stdio_redirect(c))
616 			bch2_print(c, "%s", out->buf);
617 		else
618 			bch2_print_str(c, KERN_ERR, out->buf);
619 	}
620 
621 	if (s)
622 		s->ret = ret;
623 
624 	if (trans &&
625 	    !(flags & FSCK_ERR_NO_LOG) &&
626 	    ret == -BCH_ERR_fsck_fix)
627 		ret = bch2_trans_log_str(trans, bch2_sb_error_strs[err]) ?: ret;
628 err_unlock:
629 	mutex_unlock(&c->fsck_error_msgs_lock);
630 err:
631 	/*
632 	 * We don't yet track whether the filesystem currently has errors, for
633 	 * log_fsck_err()s: that would require us to track for every error type
634 	 * which recovery pass corrects it, to get the fsck exit status correct:
635 	 */
636 	if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) {
637 		set_bit(BCH_FS_errors_fixed, &c->flags);
638 	} else {
639 		set_bit(BCH_FS_errors_not_fixed, &c->flags);
640 		set_bit(BCH_FS_error, &c->flags);
641 	}
642 
643 	if (action != action_orig)
644 		kfree(action);
645 	printbuf_exit(&buf);
646 
647 	BUG_ON(!ret);
648 	return ret;
649 }
650 
651 static const char * const bch2_bkey_validate_contexts[] = {
652 #define x(n) #n,
653 	BKEY_VALIDATE_CONTEXTS()
654 #undef x
655 	NULL
656 };
657 
__bch2_bkey_fsck_err(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from,enum bch_sb_error_id err,const char * fmt,...)658 int __bch2_bkey_fsck_err(struct bch_fs *c,
659 			 struct bkey_s_c k,
660 			 struct bkey_validate_context from,
661 			 enum bch_sb_error_id err,
662 			 const char *fmt, ...)
663 {
664 	if (from.flags & BCH_VALIDATE_silent)
665 		return bch_err_throw(c, fsck_delete_bkey);
666 
667 	unsigned fsck_flags = 0;
668 	if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) {
669 		if (test_bit(err, c->sb.errors_silent))
670 			return bch_err_throw(c, fsck_delete_bkey);
671 
672 		fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX;
673 	}
674 	if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
675 		fsck_flags |= fsck_flags_extra[err];
676 
677 	struct printbuf buf = PRINTBUF;
678 	prt_printf(&buf, "invalid bkey in %s",
679 		   bch2_bkey_validate_contexts[from.from]);
680 
681 	if (from.from == BKEY_VALIDATE_journal)
682 		prt_printf(&buf, " journal seq=%llu offset=%u",
683 			   from.journal_seq, from.journal_offset);
684 
685 	prt_str(&buf, " btree=");
686 	bch2_btree_id_to_text(&buf, from.btree);
687 	prt_printf(&buf, " level=%u: ", from.level);
688 
689 	bch2_bkey_val_to_text(&buf, c, k);
690 	prt_newline(&buf);
691 
692 	va_list args;
693 	va_start(args, fmt);
694 	prt_vprintf(&buf, fmt, args);
695 	va_end(args);
696 
697 	int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s, delete?", buf.buf);
698 	printbuf_exit(&buf);
699 	return ret;
700 }
701 
__bch2_flush_fsck_errs(struct bch_fs * c,bool print)702 static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print)
703 {
704 	struct fsck_err_state *s, *n;
705 
706 	mutex_lock(&c->fsck_error_msgs_lock);
707 
708 	list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
709 		if (print && s->ratelimited && s->last_msg)
710 			bch_err(c, "Saw %llu errors like:\n  %s", s->nr, s->last_msg);
711 
712 		list_del(&s->list);
713 		kfree(s->last_msg);
714 		kfree(s);
715 	}
716 
717 	mutex_unlock(&c->fsck_error_msgs_lock);
718 }
719 
bch2_flush_fsck_errs(struct bch_fs * c)720 void bch2_flush_fsck_errs(struct bch_fs *c)
721 {
722 	__bch2_flush_fsck_errs(c, true);
723 }
724 
bch2_free_fsck_errs(struct bch_fs * c)725 void bch2_free_fsck_errs(struct bch_fs *c)
726 {
727 	__bch2_flush_fsck_errs(c, false);
728 }
729 
bch2_inum_offset_err_msg_trans(struct btree_trans * trans,struct printbuf * out,subvol_inum inum,u64 offset)730 int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
731 				    subvol_inum inum, u64 offset)
732 {
733 	u32 restart_count = trans->restart_count;
734 	int ret = 0;
735 
736 	if (inum.subvol) {
737 		ret = bch2_inum_to_path(trans, inum, out);
738 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
739 			return ret;
740 	}
741 	if (!inum.subvol || ret)
742 		prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum);
743 	prt_printf(out, " offset %llu: ", offset);
744 
745 	return trans_was_restarted(trans, restart_count);
746 }
747 
bch2_inum_offset_err_msg(struct bch_fs * c,struct printbuf * out,subvol_inum inum,u64 offset)748 void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out,
749 			      subvol_inum inum, u64 offset)
750 {
751 	bch2_trans_do(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset));
752 }
753 
bch2_inum_snap_offset_err_msg_trans(struct btree_trans * trans,struct printbuf * out,struct bpos pos)754 int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
755 					struct bpos pos)
756 {
757 	int ret = bch2_inum_snapshot_to_path(trans, pos.inode, pos.snapshot, NULL, out);
758 	if (ret)
759 		return ret;
760 
761 	prt_printf(out, " offset %llu: ", pos.offset << 8);
762 	return 0;
763 }
764 
bch2_inum_snap_offset_err_msg(struct bch_fs * c,struct printbuf * out,struct bpos pos)765 void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out,
766 				  struct bpos pos)
767 {
768 	bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos));
769 }
770