xref: /linux/fs/xfs/libxfs/xfs_health.h (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 /* SPDX-License-Identifier: GPL-2.0+ */
2 /*
3  * Copyright (C) 2019 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #ifndef __XFS_HEALTH_H__
7 #define __XFS_HEALTH_H__
8 
9 /*
10  * In-Core Filesystem Health Assessments
11  * =====================================
12  *
13  * We'd like to be able to summarize the current health status of the
14  * filesystem so that the administrator knows when it's necessary to schedule
15  * some downtime for repairs.  Until then, we would also like to avoid abrupt
16  * shutdowns due to corrupt metadata.
17  *
18  * The online scrub feature evaluates the health of all filesystem metadata.
19  * When scrub detects corruption in a piece of metadata it will set the
20  * corresponding sickness flag, and repair will clear it if successful.  If
21  * problems remain at unmount time, we can also request manual intervention by
22  * logging a notice to run xfs_repair.
23  *
24  * Each health tracking group uses a pair of fields for reporting.  The
25  * "checked" field tell us if a given piece of metadata has ever been examined,
26  * and the "sick" field tells us if that piece was found to need repairs.
27  * Therefore we can conclude that for a given sick flag value:
28  *
29  *  - checked && sick   => metadata needs repair
30  *  - checked && !sick  => metadata is ok
31  *  - !checked && sick  => errors have been observed during normal operation,
32  *                         but the metadata has not been checked thoroughly
33  *  - !checked && !sick => has not been examined since mount
34  *
35  * Evidence of health problems can be sorted into three basic categories:
36  *
37  * a) Primary evidence, which signals that something is defective within the
38  *    general grouping of metadata.
39  *
40  * b) Secondary evidence, which are side effects of primary problem but are
41  *    not themselves problems.  These can be forgotten when the primary
42  *    health problems are addressed.
43  *
44  * c) Indirect evidence, which points to something being wrong in another
45  *    group, but we had to release resources and this is all that's left of
46  *    that state.
47  */
48 
49 struct xfs_mount;
50 struct xfs_perag;
51 struct xfs_inode;
52 struct xfs_fsop_geom;
53 struct xfs_btree_cur;
54 struct xfs_da_args;
55 
56 /* Observable health issues for metadata spanning the entire filesystem. */
57 #define XFS_SICK_FS_COUNTERS	(1 << 0)  /* summary counters */
58 #define XFS_SICK_FS_UQUOTA	(1 << 1)  /* user quota */
59 #define XFS_SICK_FS_GQUOTA	(1 << 2)  /* group quota */
60 #define XFS_SICK_FS_PQUOTA	(1 << 3)  /* project quota */
61 #define XFS_SICK_FS_QUOTACHECK	(1 << 4)  /* quota counts */
62 #define XFS_SICK_FS_NLINKS	(1 << 5)  /* inode link counts */
63 
64 /* Observable health issues for realtime volume metadata. */
65 #define XFS_SICK_RT_BITMAP	(1 << 0)  /* realtime bitmap */
66 #define XFS_SICK_RT_SUMMARY	(1 << 1)  /* realtime summary */
67 
68 /* Observable health issues for AG metadata. */
69 #define XFS_SICK_AG_SB		(1 << 0)  /* superblock */
70 #define XFS_SICK_AG_AGF		(1 << 1)  /* AGF header */
71 #define XFS_SICK_AG_AGFL	(1 << 2)  /* AGFL header */
72 #define XFS_SICK_AG_AGI		(1 << 3)  /* AGI header */
73 #define XFS_SICK_AG_BNOBT	(1 << 4)  /* free space by block */
74 #define XFS_SICK_AG_CNTBT	(1 << 5)  /* free space by length */
75 #define XFS_SICK_AG_INOBT	(1 << 6)  /* inode index */
76 #define XFS_SICK_AG_FINOBT	(1 << 7)  /* free inode index */
77 #define XFS_SICK_AG_RMAPBT	(1 << 8)  /* reverse mappings */
78 #define XFS_SICK_AG_REFCNTBT	(1 << 9)  /* reference counts */
79 #define XFS_SICK_AG_INODES	(1 << 10) /* inactivated bad inodes */
80 
81 /* Observable health issues for inode metadata. */
82 #define XFS_SICK_INO_CORE	(1 << 0)  /* inode core */
83 #define XFS_SICK_INO_BMBTD	(1 << 1)  /* data fork */
84 #define XFS_SICK_INO_BMBTA	(1 << 2)  /* attr fork */
85 #define XFS_SICK_INO_BMBTC	(1 << 3)  /* cow fork */
86 #define XFS_SICK_INO_DIR	(1 << 4)  /* directory */
87 #define XFS_SICK_INO_XATTR	(1 << 5)  /* extended attributes */
88 #define XFS_SICK_INO_SYMLINK	(1 << 6)  /* symbolic link remote target */
89 #define XFS_SICK_INO_PARENT	(1 << 7)  /* parent pointers */
90 
91 #define XFS_SICK_INO_BMBTD_ZAPPED	(1 << 8)  /* data fork erased */
92 #define XFS_SICK_INO_BMBTA_ZAPPED	(1 << 9)  /* attr fork erased */
93 #define XFS_SICK_INO_DIR_ZAPPED		(1 << 10) /* directory erased */
94 #define XFS_SICK_INO_SYMLINK_ZAPPED	(1 << 11) /* symlink erased */
95 
96 /* Don't propagate sick status to ag health summary during inactivation */
97 #define XFS_SICK_INO_FORGET	(1 << 12)
98 
99 /* Primary evidence of health problems in a given group. */
100 #define XFS_SICK_FS_PRIMARY	(XFS_SICK_FS_COUNTERS | \
101 				 XFS_SICK_FS_UQUOTA | \
102 				 XFS_SICK_FS_GQUOTA | \
103 				 XFS_SICK_FS_PQUOTA | \
104 				 XFS_SICK_FS_QUOTACHECK | \
105 				 XFS_SICK_FS_NLINKS)
106 
107 #define XFS_SICK_RT_PRIMARY	(XFS_SICK_RT_BITMAP | \
108 				 XFS_SICK_RT_SUMMARY)
109 
110 #define XFS_SICK_AG_PRIMARY	(XFS_SICK_AG_SB | \
111 				 XFS_SICK_AG_AGF | \
112 				 XFS_SICK_AG_AGFL | \
113 				 XFS_SICK_AG_AGI | \
114 				 XFS_SICK_AG_BNOBT | \
115 				 XFS_SICK_AG_CNTBT | \
116 				 XFS_SICK_AG_INOBT | \
117 				 XFS_SICK_AG_FINOBT | \
118 				 XFS_SICK_AG_RMAPBT | \
119 				 XFS_SICK_AG_REFCNTBT)
120 
121 #define XFS_SICK_INO_PRIMARY	(XFS_SICK_INO_CORE | \
122 				 XFS_SICK_INO_BMBTD | \
123 				 XFS_SICK_INO_BMBTA | \
124 				 XFS_SICK_INO_BMBTC | \
125 				 XFS_SICK_INO_DIR | \
126 				 XFS_SICK_INO_XATTR | \
127 				 XFS_SICK_INO_SYMLINK | \
128 				 XFS_SICK_INO_PARENT)
129 
130 #define XFS_SICK_INO_ZAPPED	(XFS_SICK_INO_BMBTD_ZAPPED | \
131 				 XFS_SICK_INO_BMBTA_ZAPPED | \
132 				 XFS_SICK_INO_DIR_ZAPPED | \
133 				 XFS_SICK_INO_SYMLINK_ZAPPED)
134 
135 /* Secondary state related to (but not primary evidence of) health problems. */
136 #define XFS_SICK_FS_SECONDARY	(0)
137 #define XFS_SICK_RT_SECONDARY	(0)
138 #define XFS_SICK_AG_SECONDARY	(0)
139 #define XFS_SICK_INO_SECONDARY	(XFS_SICK_INO_FORGET)
140 
141 /* Evidence of health problems elsewhere. */
142 #define XFS_SICK_FS_INDIRECT	(0)
143 #define XFS_SICK_RT_INDIRECT	(0)
144 #define XFS_SICK_AG_INDIRECT	(XFS_SICK_AG_INODES)
145 #define XFS_SICK_INO_INDIRECT	(0)
146 
147 /* All health masks. */
148 #define XFS_SICK_FS_ALL	(XFS_SICK_FS_PRIMARY | \
149 				 XFS_SICK_FS_SECONDARY | \
150 				 XFS_SICK_FS_INDIRECT)
151 
152 #define XFS_SICK_RT_ALL	(XFS_SICK_RT_PRIMARY | \
153 				 XFS_SICK_RT_SECONDARY | \
154 				 XFS_SICK_RT_INDIRECT)
155 
156 #define XFS_SICK_AG_ALL	(XFS_SICK_AG_PRIMARY | \
157 				 XFS_SICK_AG_SECONDARY | \
158 				 XFS_SICK_AG_INDIRECT)
159 
160 #define XFS_SICK_INO_ALL	(XFS_SICK_INO_PRIMARY | \
161 				 XFS_SICK_INO_SECONDARY | \
162 				 XFS_SICK_INO_INDIRECT | \
163 				 XFS_SICK_INO_ZAPPED)
164 
165 /*
166  * These functions must be provided by the xfs implementation.  Function
167  * behavior with respect to the first argument should be as follows:
168  *
169  * xfs_*_mark_sick:        Set the sick flags and do not set checked flags.
170  *                         Runtime code should call this upon encountering
171  *                         a corruption.
172  *
173  * xfs_*_mark_corrupt:     Set the sick and checked flags simultaneously.
174  *                         Fsck tools should call this when corruption is
175  *                         found.
176  *
177  * xfs_*_mark_healthy:     Clear the sick flags and set the checked flags.
178  *                         Fsck tools should call this after correcting errors.
179  *
180  * xfs_*_measure_sickness: Return the sick and check status in the provided
181  *                         out parameters.
182  */
183 
184 void xfs_fs_mark_sick(struct xfs_mount *mp, unsigned int mask);
185 void xfs_fs_mark_corrupt(struct xfs_mount *mp, unsigned int mask);
186 void xfs_fs_mark_healthy(struct xfs_mount *mp, unsigned int mask);
187 void xfs_fs_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
188 		unsigned int *checked);
189 
190 void xfs_rt_mark_sick(struct xfs_mount *mp, unsigned int mask);
191 void xfs_rt_mark_corrupt(struct xfs_mount *mp, unsigned int mask);
192 void xfs_rt_mark_healthy(struct xfs_mount *mp, unsigned int mask);
193 void xfs_rt_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
194 		unsigned int *checked);
195 
196 void xfs_agno_mark_sick(struct xfs_mount *mp, xfs_agnumber_t agno,
197 		unsigned int mask);
198 void xfs_ag_mark_sick(struct xfs_perag *pag, unsigned int mask);
199 void xfs_ag_mark_corrupt(struct xfs_perag *pag, unsigned int mask);
200 void xfs_ag_mark_healthy(struct xfs_perag *pag, unsigned int mask);
201 void xfs_ag_measure_sickness(struct xfs_perag *pag, unsigned int *sick,
202 		unsigned int *checked);
203 
204 void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask);
205 void xfs_inode_mark_corrupt(struct xfs_inode *ip, unsigned int mask);
206 void xfs_inode_mark_healthy(struct xfs_inode *ip, unsigned int mask);
207 void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
208 		unsigned int *checked);
209 
210 void xfs_health_unmount(struct xfs_mount *mp);
211 void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
212 void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
213 void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork);
214 void xfs_da_mark_sick(struct xfs_da_args *args);
215 
216 /* Now some helpers. */
217 
218 static inline bool
219 xfs_fs_has_sickness(struct xfs_mount *mp, unsigned int mask)
220 {
221 	unsigned int	sick, checked;
222 
223 	xfs_fs_measure_sickness(mp, &sick, &checked);
224 	return sick & mask;
225 }
226 
227 static inline bool
228 xfs_rt_has_sickness(struct xfs_mount *mp, unsigned int mask)
229 {
230 	unsigned int	sick, checked;
231 
232 	xfs_rt_measure_sickness(mp, &sick, &checked);
233 	return sick & mask;
234 }
235 
236 static inline bool
237 xfs_ag_has_sickness(struct xfs_perag *pag, unsigned int mask)
238 {
239 	unsigned int	sick, checked;
240 
241 	xfs_ag_measure_sickness(pag, &sick, &checked);
242 	return sick & mask;
243 }
244 
245 static inline bool
246 xfs_inode_has_sickness(struct xfs_inode *ip, unsigned int mask)
247 {
248 	unsigned int	sick, checked;
249 
250 	xfs_inode_measure_sickness(ip, &sick, &checked);
251 	return sick & mask;
252 }
253 
254 static inline bool
255 xfs_fs_is_healthy(struct xfs_mount *mp)
256 {
257 	return !xfs_fs_has_sickness(mp, -1U);
258 }
259 
260 static inline bool
261 xfs_rt_is_healthy(struct xfs_mount *mp)
262 {
263 	return !xfs_rt_has_sickness(mp, -1U);
264 }
265 
266 static inline bool
267 xfs_ag_is_healthy(struct xfs_perag *pag)
268 {
269 	return !xfs_ag_has_sickness(pag, -1U);
270 }
271 
272 static inline bool
273 xfs_inode_is_healthy(struct xfs_inode *ip)
274 {
275 	return !xfs_inode_has_sickness(ip, -1U);
276 }
277 
278 void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
279 void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
280 void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
281 
282 #define xfs_metadata_is_sick(error) \
283 	(unlikely((error) == -EFSCORRUPTED || (error) == -EFSBADCRC))
284 
285 #endif	/* __XFS_HEALTH_H__ */
286