xref: /linux/fs/xfs/xfs_healthmon.h (revision c17ee635fd3a482b2ad2bf5e269755c2eae5f25e)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * Copyright (c) 2024-2026 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #ifndef __XFS_HEALTHMON_H__
7 #define __XFS_HEALTHMON_H__
8 
9 struct xfs_healthmon {
10 	/*
11 	 * Weak reference to the xfs filesystem that is being monitored.  It
12 	 * will be set to zero when the filesystem detaches from the monitor.
13 	 * Do not dereference this pointer.
14 	 */
15 	uintptr_t			mount_cookie;
16 
17 	/*
18 	 * Device number of the filesystem being monitored.  This is for
19 	 * consistent tracing even after unmount.
20 	 */
21 	dev_t				dev;
22 
23 	/*
24 	 * Reference count of this structure.  The open healthmon fd holds one
25 	 * ref, the xfs_mount holds another ref if it points to this object,
26 	 * and running event handlers hold their own refs.
27 	 */
28 	refcount_t			ref;
29 
30 	/* lock for event list and event counters */
31 	struct mutex			lock;
32 
33 	/* list of event objects */
34 	struct xfs_healthmon_event	*first_event;
35 	struct xfs_healthmon_event	*last_event;
36 
37 	/* preallocated event for unmount */
38 	struct xfs_healthmon_event	*unmount_event;
39 
40 	/* number of events in the list */
41 	unsigned int			events;
42 
43 	/* do we want all events? */
44 	bool				verbose:1;
45 
46 	/* waiter so read/poll can sleep until the arrival of events */
47 	struct wait_queue_head		wait;
48 
49 	/*
50 	 * Buffer for formatting events for a read_iter call.  Events are
51 	 * formatted into the buffer at bufhead, and buftail determines where
52 	 * to start a copy_iter to get those events to userspace.  All buffer
53 	 * fields are protected by inode_lock.
54 	 */
55 	char				*buffer;
56 	size_t				bufsize;
57 	size_t				bufhead;
58 	size_t				buftail;
59 
60 	/* did we lose previous events? */
61 	unsigned long long		lost_prev_event;
62 
63 	/* total counts of events observed and lost events */
64 	unsigned long long		total_events;
65 	unsigned long long		total_lost;
66 };
67 
68 void xfs_healthmon_unmount(struct xfs_mount *mp);
69 
70 enum xfs_healthmon_type {
71 	XFS_HEALTHMON_RUNNING,	/* monitor running */
72 	XFS_HEALTHMON_LOST,	/* message lost */
73 	XFS_HEALTHMON_UNMOUNT,	/* filesystem is unmounting */
74 
75 	/* filesystem shutdown */
76 	XFS_HEALTHMON_SHUTDOWN,
77 
78 	/* metadata health events */
79 	XFS_HEALTHMON_SICK,	/* runtime corruption observed */
80 	XFS_HEALTHMON_CORRUPT,	/* fsck reported corruption */
81 	XFS_HEALTHMON_HEALTHY,	/* fsck reported healthy structure */
82 
83 	/* media errors */
84 	XFS_HEALTHMON_MEDIA_ERROR,
85 
86 	/* file range events */
87 	XFS_HEALTHMON_BUFREAD,
88 	XFS_HEALTHMON_BUFWRITE,
89 	XFS_HEALTHMON_DIOREAD,
90 	XFS_HEALTHMON_DIOWRITE,
91 	XFS_HEALTHMON_DATALOST,
92 };
93 
94 enum xfs_healthmon_domain {
95 	XFS_HEALTHMON_MOUNT,	/* affects the whole fs */
96 
97 	/* metadata health events */
98 	XFS_HEALTHMON_FS,	/* main filesystem metadata */
99 	XFS_HEALTHMON_AG,	/* allocation group metadata */
100 	XFS_HEALTHMON_INODE,	/* inode metadata */
101 	XFS_HEALTHMON_RTGROUP,	/* realtime group metadata */
102 
103 	/* media errors */
104 	XFS_HEALTHMON_DATADEV,
105 	XFS_HEALTHMON_RTDEV,
106 	XFS_HEALTHMON_LOGDEV,
107 
108 	/* file range events */
109 	XFS_HEALTHMON_FILERANGE,
110 };
111 
112 struct xfs_healthmon_event {
113 	struct xfs_healthmon_event	*next;
114 
115 	enum xfs_healthmon_type		type;
116 	enum xfs_healthmon_domain	domain;
117 
118 	uint64_t			time_ns;
119 
120 	union {
121 		/* lost events */
122 		struct {
123 			uint64_t	lostcount;
124 		};
125 		/* fs/rt metadata */
126 		struct {
127 			/* XFS_SICK_* flags */
128 			unsigned int	fsmask;
129 		};
130 		/* ag/rtgroup metadata */
131 		struct {
132 			/* XFS_SICK_(AG|RG)* flags */
133 			unsigned int	grpmask;
134 			unsigned int	group;
135 		};
136 		/* inode metadata */
137 		struct {
138 			/* XFS_SICK_INO_* flags */
139 			unsigned int	imask;
140 			uint32_t	gen;
141 			xfs_ino_t	ino;
142 		};
143 		/* shutdown */
144 		struct {
145 			unsigned int	flags;
146 		};
147 		/* media errors */
148 		struct {
149 			xfs_daddr_t	daddr;
150 			uint64_t	bbcount;
151 		};
152 		/* file range events */
153 		struct {
154 			xfs_ino_t	fino;
155 			loff_t		fpos;
156 			uint64_t	flen;
157 			uint32_t	fgen;
158 			int		error;
159 		};
160 	};
161 };
162 
163 void xfs_healthmon_report_fs(struct xfs_mount *mp,
164 		enum xfs_healthmon_type type, unsigned int old_mask,
165 		unsigned int new_mask);
166 void xfs_healthmon_report_group(struct xfs_group *xg,
167 		enum xfs_healthmon_type type, unsigned int old_mask,
168 		unsigned int new_mask);
169 void xfs_healthmon_report_inode(struct xfs_inode *ip,
170 		enum xfs_healthmon_type type, unsigned int old_mask,
171 		unsigned int new_mask);
172 
173 void xfs_healthmon_report_shutdown(struct xfs_mount *mp, uint32_t flags);
174 
175 void xfs_healthmon_report_media(struct xfs_mount *mp, enum xfs_device fdev,
176 		xfs_daddr_t daddr, uint64_t bbcount);
177 
178 void xfs_healthmon_report_file_ioerror(struct xfs_inode *ip,
179 		const struct fserror_event *p);
180 
181 long xfs_ioc_health_monitor(struct file *file,
182 		struct xfs_health_monitor __user *arg);
183 
184 #endif /* __XFS_HEALTHMON_H__ */
185