xref: /linux/fs/xfs/libxfs/xfs_health.h (revision b477ff98d903618a1ab8247861f2ea6e70c0f0f8)
1 /* SPDX-License-Identifier: GPL-2.0+ */
2 /*
3  * Copyright (C) 2019 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #ifndef __XFS_HEALTH_H__
7 #define __XFS_HEALTH_H__
8 
9 struct xfs_group;
10 
11 /*
12  * In-Core Filesystem Health Assessments
13  * =====================================
14  *
15  * We'd like to be able to summarize the current health status of the
16  * filesystem so that the administrator knows when it's necessary to schedule
17  * some downtime for repairs.  Until then, we would also like to avoid abrupt
18  * shutdowns due to corrupt metadata.
19  *
20  * The online scrub feature evaluates the health of all filesystem metadata.
21  * When scrub detects corruption in a piece of metadata it will set the
22  * corresponding sickness flag, and repair will clear it if successful.  If
23  * problems remain at unmount time, we can also request manual intervention by
24  * logging a notice to run xfs_repair.
25  *
26  * Each health tracking group uses a pair of fields for reporting.  The
27  * "checked" field tell us if a given piece of metadata has ever been examined,
28  * and the "sick" field tells us if that piece was found to need repairs.
29  * Therefore we can conclude that for a given sick flag value:
30  *
31  *  - checked && sick   => metadata needs repair
32  *  - checked && !sick  => metadata is ok
33  *  - !checked && sick  => errors have been observed during normal operation,
34  *                         but the metadata has not been checked thoroughly
35  *  - !checked && !sick => has not been examined since mount
36  *
37  * Evidence of health problems can be sorted into three basic categories:
38  *
39  * a) Primary evidence, which signals that something is defective within the
40  *    general grouping of metadata.
41  *
42  * b) Secondary evidence, which are side effects of primary problem but are
43  *    not themselves problems.  These can be forgotten when the primary
44  *    health problems are addressed.
45  *
46  * c) Indirect evidence, which points to something being wrong in another
47  *    group, but we had to release resources and this is all that's left of
48  *    that state.
49  */
50 
51 struct xfs_mount;
52 struct xfs_perag;
53 struct xfs_inode;
54 struct xfs_fsop_geom;
55 struct xfs_btree_cur;
56 struct xfs_da_args;
57 struct xfs_rtgroup;
58 
59 /* Observable health issues for metadata spanning the entire filesystem. */
60 #define XFS_SICK_FS_COUNTERS	(1 << 0)  /* summary counters */
61 #define XFS_SICK_FS_UQUOTA	(1 << 1)  /* user quota */
62 #define XFS_SICK_FS_GQUOTA	(1 << 2)  /* group quota */
63 #define XFS_SICK_FS_PQUOTA	(1 << 3)  /* project quota */
64 #define XFS_SICK_FS_QUOTACHECK	(1 << 4)  /* quota counts */
65 #define XFS_SICK_FS_NLINKS	(1 << 5)  /* inode link counts */
66 #define XFS_SICK_FS_METADIR	(1 << 6)  /* metadata directory tree */
67 #define XFS_SICK_FS_METAPATH	(1 << 7)  /* metadata directory tree path */
68 
69 /* Observable health issues for realtime group metadata. */
70 #define XFS_SICK_RG_SUPER	(1 << 0)  /* rt group superblock */
71 #define XFS_SICK_RG_BITMAP	(1 << 1)  /* rt group bitmap */
72 #define XFS_SICK_RG_SUMMARY	(1 << 2)  /* rt groups summary */
73 #define XFS_SICK_RG_RMAPBT	(1 << 3)  /* reverse mappings */
74 #define XFS_SICK_RG_REFCNTBT	(1 << 4)  /* reference counts */
75 
76 /* Observable health issues for AG metadata. */
77 #define XFS_SICK_AG_SB		(1 << 0)  /* superblock */
78 #define XFS_SICK_AG_AGF		(1 << 1)  /* AGF header */
79 #define XFS_SICK_AG_AGFL	(1 << 2)  /* AGFL header */
80 #define XFS_SICK_AG_AGI		(1 << 3)  /* AGI header */
81 #define XFS_SICK_AG_BNOBT	(1 << 4)  /* free space by block */
82 #define XFS_SICK_AG_CNTBT	(1 << 5)  /* free space by length */
83 #define XFS_SICK_AG_INOBT	(1 << 6)  /* inode index */
84 #define XFS_SICK_AG_FINOBT	(1 << 7)  /* free inode index */
85 #define XFS_SICK_AG_RMAPBT	(1 << 8)  /* reverse mappings */
86 #define XFS_SICK_AG_REFCNTBT	(1 << 9)  /* reference counts */
87 #define XFS_SICK_AG_INODES	(1 << 10) /* inactivated bad inodes */
88 
89 /* Observable health issues for inode metadata. */
90 #define XFS_SICK_INO_CORE	(1 << 0)  /* inode core */
91 #define XFS_SICK_INO_BMBTD	(1 << 1)  /* data fork */
92 #define XFS_SICK_INO_BMBTA	(1 << 2)  /* attr fork */
93 #define XFS_SICK_INO_BMBTC	(1 << 3)  /* cow fork */
94 #define XFS_SICK_INO_DIR	(1 << 4)  /* directory */
95 #define XFS_SICK_INO_XATTR	(1 << 5)  /* extended attributes */
96 #define XFS_SICK_INO_SYMLINK	(1 << 6)  /* symbolic link remote target */
97 #define XFS_SICK_INO_PARENT	(1 << 7)  /* parent pointers */
98 
99 #define XFS_SICK_INO_BMBTD_ZAPPED	(1 << 8)  /* data fork erased */
100 #define XFS_SICK_INO_BMBTA_ZAPPED	(1 << 9)  /* attr fork erased */
101 #define XFS_SICK_INO_DIR_ZAPPED		(1 << 10) /* directory erased */
102 #define XFS_SICK_INO_SYMLINK_ZAPPED	(1 << 11) /* symlink erased */
103 
104 /* Don't propagate sick status to ag health summary during inactivation */
105 #define XFS_SICK_INO_FORGET	(1 << 12)
106 #define XFS_SICK_INO_DIRTREE	(1 << 13)  /* directory tree structure */
107 
108 /* Primary evidence of health problems in a given group. */
109 #define XFS_SICK_FS_PRIMARY	(XFS_SICK_FS_COUNTERS | \
110 				 XFS_SICK_FS_UQUOTA | \
111 				 XFS_SICK_FS_GQUOTA | \
112 				 XFS_SICK_FS_PQUOTA | \
113 				 XFS_SICK_FS_QUOTACHECK | \
114 				 XFS_SICK_FS_NLINKS | \
115 				 XFS_SICK_FS_METADIR | \
116 				 XFS_SICK_FS_METAPATH)
117 
118 #define XFS_SICK_RG_PRIMARY	(XFS_SICK_RG_SUPER | \
119 				 XFS_SICK_RG_BITMAP | \
120 				 XFS_SICK_RG_SUMMARY | \
121 				 XFS_SICK_RG_RMAPBT | \
122 				 XFS_SICK_RG_REFCNTBT)
123 
124 #define XFS_SICK_AG_PRIMARY	(XFS_SICK_AG_SB | \
125 				 XFS_SICK_AG_AGF | \
126 				 XFS_SICK_AG_AGFL | \
127 				 XFS_SICK_AG_AGI | \
128 				 XFS_SICK_AG_BNOBT | \
129 				 XFS_SICK_AG_CNTBT | \
130 				 XFS_SICK_AG_INOBT | \
131 				 XFS_SICK_AG_FINOBT | \
132 				 XFS_SICK_AG_RMAPBT | \
133 				 XFS_SICK_AG_REFCNTBT)
134 
135 #define XFS_SICK_INO_PRIMARY	(XFS_SICK_INO_CORE | \
136 				 XFS_SICK_INO_BMBTD | \
137 				 XFS_SICK_INO_BMBTA | \
138 				 XFS_SICK_INO_BMBTC | \
139 				 XFS_SICK_INO_DIR | \
140 				 XFS_SICK_INO_XATTR | \
141 				 XFS_SICK_INO_SYMLINK | \
142 				 XFS_SICK_INO_PARENT | \
143 				 XFS_SICK_INO_DIRTREE)
144 
145 #define XFS_SICK_INO_ZAPPED	(XFS_SICK_INO_BMBTD_ZAPPED | \
146 				 XFS_SICK_INO_BMBTA_ZAPPED | \
147 				 XFS_SICK_INO_DIR_ZAPPED | \
148 				 XFS_SICK_INO_SYMLINK_ZAPPED)
149 
150 /* Secondary state related to (but not primary evidence of) health problems. */
151 #define XFS_SICK_FS_SECONDARY	(0)
152 #define XFS_SICK_RG_SECONDARY	(0)
153 #define XFS_SICK_AG_SECONDARY	(0)
154 #define XFS_SICK_INO_SECONDARY	(XFS_SICK_INO_FORGET)
155 
156 /* Evidence of health problems elsewhere. */
157 #define XFS_SICK_FS_INDIRECT	(0)
158 #define XFS_SICK_RG_INDIRECT	(0)
159 #define XFS_SICK_AG_INDIRECT	(XFS_SICK_AG_INODES)
160 #define XFS_SICK_INO_INDIRECT	(0)
161 
162 /* All health masks. */
163 #define XFS_SICK_FS_ALL		(XFS_SICK_FS_PRIMARY | \
164 				 XFS_SICK_FS_SECONDARY | \
165 				 XFS_SICK_FS_INDIRECT)
166 
167 #define XFS_SICK_RG_ALL		(XFS_SICK_RG_PRIMARY | \
168 				 XFS_SICK_RG_SECONDARY | \
169 				 XFS_SICK_RG_INDIRECT)
170 
171 #define XFS_SICK_AG_ALL		(XFS_SICK_AG_PRIMARY | \
172 				 XFS_SICK_AG_SECONDARY | \
173 				 XFS_SICK_AG_INDIRECT)
174 
175 #define XFS_SICK_INO_ALL	(XFS_SICK_INO_PRIMARY | \
176 				 XFS_SICK_INO_SECONDARY | \
177 				 XFS_SICK_INO_INDIRECT | \
178 				 XFS_SICK_INO_ZAPPED)
179 
180 /*
181  * These functions must be provided by the xfs implementation.  Function
182  * behavior with respect to the first argument should be as follows:
183  *
184  * xfs_*_mark_sick:        Set the sick flags and do not set checked flags.
185  *                         Runtime code should call this upon encountering
186  *                         a corruption.
187  *
188  * xfs_*_mark_corrupt:     Set the sick and checked flags simultaneously.
189  *                         Fsck tools should call this when corruption is
190  *                         found.
191  *
192  * xfs_*_mark_healthy:     Clear the sick flags and set the checked flags.
193  *                         Fsck tools should call this after correcting errors.
194  *
195  * xfs_*_measure_sickness: Return the sick and check status in the provided
196  *                         out parameters.
197  */
198 
199 void xfs_fs_mark_sick(struct xfs_mount *mp, unsigned int mask);
200 void xfs_fs_mark_corrupt(struct xfs_mount *mp, unsigned int mask);
201 void xfs_fs_mark_healthy(struct xfs_mount *mp, unsigned int mask);
202 void xfs_fs_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
203 		unsigned int *checked);
204 
205 void xfs_rgno_mark_sick(struct xfs_mount *mp, xfs_rgnumber_t rgno,
206 		unsigned int mask);
207 
208 void xfs_agno_mark_sick(struct xfs_mount *mp, xfs_agnumber_t agno,
209 		unsigned int mask);
210 void xfs_group_mark_sick(struct xfs_group *xg, unsigned int mask);
211 #define xfs_ag_mark_sick(pag, mask) \
212 	xfs_group_mark_sick(pag_group(pag), (mask))
213 void xfs_group_mark_corrupt(struct xfs_group *xg, unsigned int mask);
214 void xfs_group_mark_healthy(struct xfs_group *xg, unsigned int mask);
215 void xfs_group_measure_sickness(struct xfs_group *xg, unsigned int *sick,
216 		unsigned int *checked);
217 
218 void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask);
219 void xfs_inode_mark_corrupt(struct xfs_inode *ip, unsigned int mask);
220 void xfs_inode_mark_healthy(struct xfs_inode *ip, unsigned int mask);
221 void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
222 		unsigned int *checked);
223 
224 void xfs_health_unmount(struct xfs_mount *mp);
225 void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
226 void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
227 void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork);
228 void xfs_da_mark_sick(struct xfs_da_args *args);
229 
230 /* Now some helpers. */
231 
232 static inline bool
xfs_fs_has_sickness(struct xfs_mount * mp,unsigned int mask)233 xfs_fs_has_sickness(struct xfs_mount *mp, unsigned int mask)
234 {
235 	unsigned int	sick, checked;
236 
237 	xfs_fs_measure_sickness(mp, &sick, &checked);
238 	return sick & mask;
239 }
240 
241 static inline bool
xfs_group_has_sickness(struct xfs_group * xg,unsigned int mask)242 xfs_group_has_sickness(
243 	struct xfs_group	*xg,
244 	unsigned int		mask)
245 {
246 	unsigned int		sick, checked;
247 
248 	xfs_group_measure_sickness(xg, &sick, &checked);
249 	return sick & mask;
250 }
251 
252 #define xfs_ag_has_sickness(pag, mask) \
253 	xfs_group_has_sickness(pag_group(pag), (mask))
254 #define xfs_ag_is_healthy(pag) \
255 	(!xfs_ag_has_sickness((pag), UINT_MAX))
256 
257 #define xfs_rtgroup_has_sickness(rtg, mask) \
258 	xfs_group_has_sickness(rtg_group(rtg), (mask))
259 #define xfs_rtgroup_is_healthy(rtg) \
260 	(!xfs_rtgroup_has_sickness((rtg), UINT_MAX))
261 
262 static inline bool
xfs_inode_has_sickness(struct xfs_inode * ip,unsigned int mask)263 xfs_inode_has_sickness(struct xfs_inode *ip, unsigned int mask)
264 {
265 	unsigned int	sick, checked;
266 
267 	xfs_inode_measure_sickness(ip, &sick, &checked);
268 	return sick & mask;
269 }
270 
271 static inline bool
xfs_fs_is_healthy(struct xfs_mount * mp)272 xfs_fs_is_healthy(struct xfs_mount *mp)
273 {
274 	return !xfs_fs_has_sickness(mp, -1U);
275 }
276 
277 static inline bool
xfs_inode_is_healthy(struct xfs_inode * ip)278 xfs_inode_is_healthy(struct xfs_inode *ip)
279 {
280 	return !xfs_inode_has_sickness(ip, -1U);
281 }
282 
283 void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
284 void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
285 void xfs_rtgroup_geom_health(struct xfs_rtgroup *rtg,
286 		struct xfs_rtgroup_geometry *rgeo);
287 void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
288 
289 #define xfs_metadata_is_sick(error) \
290 	(unlikely((error) == -EFSCORRUPTED || (error) == -EFSBADCRC))
291 
292 #endif	/* __XFS_HEALTH_H__ */
293