xref: /linux/fs/bcachefs/error.h (revision f694f30e81c4ade358eb8c75273bac1a48f0cb8f)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _BCACHEFS_ERROR_H
3 #define _BCACHEFS_ERROR_H
4 
5 #include <linux/list.h>
6 #include <linux/printk.h>
7 #include "bkey_types.h"
8 #include "sb-errors.h"
9 
10 struct bch_dev;
11 struct bch_fs;
12 struct work_struct;
13 
14 /*
15  * XXX: separate out errors that indicate on disk data is inconsistent, and flag
16  * superblock as such
17  */
18 
19 /* Error messages: */
20 
21 void bch2_log_msg_start(struct bch_fs *, struct printbuf *);
22 
23 /*
24  * Inconsistency errors: The on disk data is inconsistent. If these occur during
25  * initial recovery, they don't indicate a bug in the running code - we walk all
26  * the metadata before modifying anything. If they occur at runtime, they
27  * indicate either a bug in the running code or (less likely) data is being
28  * silently corrupted under us.
29  *
30  * XXX: audit all inconsistent errors and make sure they're all recoverable, in
31  * BCH_ON_ERROR_CONTINUE mode
32  */
33 
34 bool __bch2_inconsistent_error(struct bch_fs *, struct printbuf *);
35 bool bch2_inconsistent_error(struct bch_fs *);
36 __printf(2, 3)
37 bool bch2_fs_inconsistent(struct bch_fs *, const char *, ...);
38 
39 #define bch2_fs_inconsistent_on(cond, ...)				\
40 ({									\
41 	bool _ret = unlikely(!!(cond));					\
42 	if (_ret)							\
43 		bch2_fs_inconsistent(__VA_ARGS__);			\
44 	_ret;								\
45 })
46 
47 __printf(2, 3)
48 bool bch2_trans_inconsistent(struct btree_trans *, const char *, ...);
49 
50 #define bch2_trans_inconsistent_on(cond, ...)				\
51 ({									\
52 	bool _ret = unlikely(!!(cond));					\
53 	if (_ret)							\
54 		bch2_trans_inconsistent(__VA_ARGS__);			\
55 	_ret;								\
56 })
57 
58 int __bch2_topology_error(struct bch_fs *, struct printbuf *);
59 __printf(2, 3)
60 int bch2_fs_topology_error(struct bch_fs *, const char *, ...);
61 
62 /*
63  * Fsck errors: inconsistency errors we detect at mount time, and should ideally
64  * be able to repair:
65  */
66 
67 struct fsck_err_state {
68 	struct list_head	list;
69 	enum bch_sb_error_id	id;
70 	u64			nr;
71 	bool			ratelimited;
72 	int			ret;
73 	int			fix;
74 	char			*last_msg;
75 };
76 
77 #define fsck_err_count(_c, _err)	bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
78 
79 void __bch2_count_fsck_err(struct bch_fs *,
80 			   enum bch_sb_error_id, const char *,
81 			   bool *, bool *, bool *);
82 #define bch2_count_fsck_err(_c, _err, ...)				\
83 	__bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__)
84 
85 __printf(5, 6) __cold
86 int __bch2_fsck_err(struct bch_fs *, struct btree_trans *,
87 		  enum bch_fsck_flags,
88 		  enum bch_sb_error_id,
89 		  const char *, ...);
90 #define bch2_fsck_err(c, _flags, _err_type, ...)				\
91 	__bch2_fsck_err(type_is(c, struct bch_fs *) ? (struct bch_fs *) c : NULL,\
92 			type_is(c, struct btree_trans *) ? (struct btree_trans *) c : NULL,\
93 			_flags, BCH_FSCK_ERR_##_err_type, __VA_ARGS__)
94 
95 void bch2_flush_fsck_errs(struct bch_fs *);
96 
97 #define fsck_err_wrap(_do)						\
98 ({									\
99 	int _ret = _do;							\
100 	if (_ret != -BCH_ERR_fsck_fix &&				\
101 	    _ret != -BCH_ERR_fsck_ignore) {				\
102 		ret = _ret;						\
103 		goto fsck_err;						\
104 	}								\
105 									\
106 	_ret == -BCH_ERR_fsck_fix;					\
107 })
108 
109 #define __fsck_err(...)		fsck_err_wrap(bch2_fsck_err(__VA_ARGS__))
110 
111 /* These macros return true if error should be fixed: */
112 
113 /* XXX: mark in superblock that filesystem contains errors, if we ignore: */
114 
115 #define __fsck_err_on(cond, c, _flags, _err_type, ...)			\
116 ({									\
117 	might_sleep();							\
118 									\
119 	if (type_is(c, struct bch_fs *))				\
120 		WARN_ON(bch2_current_has_btree_trans((struct bch_fs *) c));\
121 									\
122 	(unlikely(cond) ? __fsck_err(c, _flags, _err_type, __VA_ARGS__) : false);\
123 })
124 
125 #define mustfix_fsck_err(c, _err_type, ...)				\
126 	__fsck_err(c, FSCK_CAN_FIX, _err_type, __VA_ARGS__)
127 
128 #define mustfix_fsck_err_on(cond, c, _err_type, ...)			\
129 	__fsck_err_on(cond, c, FSCK_CAN_FIX, _err_type, __VA_ARGS__)
130 
131 #define fsck_err(c, _err_type, ...)					\
132 	__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
133 
134 #define fsck_err_on(cond, c, _err_type, ...)				\
135 	__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
136 
137 #define log_fsck_err(c, _err_type, ...)					\
138 	__fsck_err(c, FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
139 
140 #define log_fsck_err_on(cond, ...)					\
141 ({									\
142 	bool _ret = unlikely(!!(cond));					\
143 	if (_ret)							\
144 		log_fsck_err(__VA_ARGS__);				\
145 	_ret;								\
146 })
147 
148 enum bch_validate_flags;
149 __printf(5, 6)
150 int __bch2_bkey_fsck_err(struct bch_fs *,
151 			 struct bkey_s_c,
152 			 struct bkey_validate_context from,
153 			 enum bch_sb_error_id,
154 			 const char *, ...);
155 
156 /*
157  * for now, bkey fsck errors are always handled by deleting the entire key -
158  * this will change at some point
159  */
160 #define bkey_fsck_err(c, _err_type, _err_msg, ...)			\
161 do {									\
162 	int _ret = __bch2_bkey_fsck_err(c, k, from,			\
163 				BCH_FSCK_ERR_##_err_type,		\
164 				_err_msg, ##__VA_ARGS__);		\
165 	if (_ret != -BCH_ERR_fsck_fix &&				\
166 	    _ret != -BCH_ERR_fsck_ignore)				\
167 		ret = _ret;						\
168 	ret = -BCH_ERR_fsck_delete_bkey;				\
169 	goto fsck_err;							\
170 } while (0)
171 
172 #define bkey_fsck_err_on(cond, ...)					\
173 do {									\
174 	if (unlikely(cond))						\
175 		bkey_fsck_err(__VA_ARGS__);				\
176 } while (0)
177 
178 /*
179  * Fatal errors: these don't indicate a bug, but we can't continue running in RW
180  * mode - pretty much just due to metadata IO errors:
181  */
182 
183 void bch2_fatal_error(struct bch_fs *);
184 
185 #define bch2_fs_fatal_error(c, _msg, ...)				\
186 do {									\
187 	bch_err(c, "%s(): fatal error " _msg, __func__, ##__VA_ARGS__);	\
188 	bch2_fatal_error(c);						\
189 } while (0)
190 
191 #define bch2_fs_fatal_err_on(cond, c, ...)				\
192 ({									\
193 	bool _ret = unlikely(!!(cond));					\
194 									\
195 	if (_ret)							\
196 		bch2_fs_fatal_error(c, __VA_ARGS__);			\
197 	_ret;								\
198 })
199 
200 /*
201  * IO errors: either recoverable metadata IO (because we have replicas), or data
202  * IO - we need to log it and print out a message, but we don't (necessarily)
203  * want to shut down the fs:
204  */
205 
206 void bch2_io_error_work(struct work_struct *);
207 
208 /* Does the error handling without logging a message */
209 void bch2_io_error(struct bch_dev *, enum bch_member_error_type);
210 
211 #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
212 void bch2_latency_acct(struct bch_dev *, u64, int);
213 #else
214 static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {}
215 #endif
216 
217 static inline void bch2_account_io_success_fail(struct bch_dev *ca,
218 						enum bch_member_error_type type,
219 						bool success)
220 {
221 	if (likely(success)) {
222 		if (type == BCH_MEMBER_ERROR_write &&
223 		    ca->write_errors_start)
224 			ca->write_errors_start = 0;
225 	} else {
226 		bch2_io_error(ca, type);
227 	}
228 }
229 
230 static inline void bch2_account_io_completion(struct bch_dev *ca,
231 					      enum bch_member_error_type type,
232 					      u64 submit_time, bool success)
233 {
234 	if (unlikely(!ca))
235 		return;
236 
237 	if (type != BCH_MEMBER_ERROR_checksum)
238 		bch2_latency_acct(ca, submit_time, type);
239 
240 	bch2_account_io_success_fail(ca, type, success);
241 }
242 
243 int bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum, u64);
244 
245 void bch2_inum_offset_err_msg(struct bch_fs *, struct printbuf *, subvol_inum, u64);
246 
247 int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *, struct printbuf *, struct bpos);
248 void bch2_inum_snap_offset_err_msg(struct bch_fs *, struct printbuf *, struct bpos);
249 
250 #endif /* _BCACHEFS_ERROR_H */
251