xref: /linux/fs/xfs/xfs_healthmon.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1a48373e7SDarrick J. Wong // SPDX-License-Identifier: GPL-2.0-or-later
2a48373e7SDarrick J. Wong /*
3a48373e7SDarrick J. Wong  * Copyright (c) 2024-2026 Oracle.  All Rights Reserved.
4a48373e7SDarrick J. Wong  * Author: Darrick J. Wong <djwong@kernel.org>
5a48373e7SDarrick J. Wong  */
604a65666SCarlos Maiolino #include "xfs_platform.h"
7a48373e7SDarrick J. Wong #include "xfs_fs.h"
8a48373e7SDarrick J. Wong #include "xfs_shared.h"
9a48373e7SDarrick J. Wong #include "xfs_format.h"
10a48373e7SDarrick J. Wong #include "xfs_log_format.h"
11a48373e7SDarrick J. Wong #include "xfs_trans_resv.h"
12a48373e7SDarrick J. Wong #include "xfs_mount.h"
13a48373e7SDarrick J. Wong #include "xfs_inode.h"
14a48373e7SDarrick J. Wong #include "xfs_trace.h"
15a48373e7SDarrick J. Wong #include "xfs_ag.h"
16a48373e7SDarrick J. Wong #include "xfs_btree.h"
17a48373e7SDarrick J. Wong #include "xfs_da_format.h"
18a48373e7SDarrick J. Wong #include "xfs_da_btree.h"
19a48373e7SDarrick J. Wong #include "xfs_quota_defs.h"
20a48373e7SDarrick J. Wong #include "xfs_rtgroup.h"
215eb4cb18SDarrick J. Wong #include "xfs_health.h"
22a48373e7SDarrick J. Wong #include "xfs_healthmon.h"
2374c4795eSDarrick J. Wong #include "xfs_fsops.h"
24e76e0e3fSDarrick J. Wong #include "xfs_notify_failure.h"
25dfa8bad3SDarrick J. Wong #include "xfs_file.h"
26c0e719cbSDarrick J. Wong #include "xfs_ioctl.h"
27a48373e7SDarrick J. Wong 
28a48373e7SDarrick J. Wong #include <linux/anon_inodes.h>
29a48373e7SDarrick J. Wong #include <linux/eventpoll.h>
30a48373e7SDarrick J. Wong #include <linux/poll.h>
31dfa8bad3SDarrick J. Wong #include <linux/fserror.h>
32a48373e7SDarrick J. Wong 
33a48373e7SDarrick J. Wong /*
34a48373e7SDarrick J. Wong  * Live Health Monitoring
35a48373e7SDarrick J. Wong  * ======================
36a48373e7SDarrick J. Wong  *
37a48373e7SDarrick J. Wong  * Autonomous self-healing of XFS filesystems requires a means for the kernel
38a48373e7SDarrick J. Wong  * to send filesystem health events to a monitoring daemon in userspace.  To
39a48373e7SDarrick J. Wong  * accomplish this, we establish a thread_with_file kthread object to handle
40a48373e7SDarrick J. Wong  * translating internal events about filesystem health into a format that can
41a48373e7SDarrick J. Wong  * be parsed easily by userspace.  When those internal events occur, the core
42a48373e7SDarrick J. Wong  * filesystem code calls this health monitor to convey the events to userspace.
43a48373e7SDarrick J. Wong  * Userspace reads events from the file descriptor returned by the ioctl.
44a48373e7SDarrick J. Wong  *
45a48373e7SDarrick J. Wong  * The healthmon abstraction has a weak reference to the host filesystem mount
46a48373e7SDarrick J. Wong  * so that the queueing and processing of the events do not pin the mount and
47a48373e7SDarrick J. Wong  * cannot slow down the main filesystem.  The healthmon object can exist past
48a48373e7SDarrick J. Wong  * the end of the filesystem mount.
49a48373e7SDarrick J. Wong  */
50a48373e7SDarrick J. Wong 
51a48373e7SDarrick J. Wong /* sign of a detached health monitor */
52a48373e7SDarrick J. Wong #define DETACHED_MOUNT_COOKIE		((uintptr_t)0)
53a48373e7SDarrick J. Wong 
54b3a289a2SDarrick J. Wong /* Constrain the number of event objects that can build up in memory. */
55b3a289a2SDarrick J. Wong #define XFS_HEALTHMON_MAX_EVENTS	(SZ_32K / \
56b3a289a2SDarrick J. Wong 					 sizeof(struct xfs_healthmon_event))
57b3a289a2SDarrick J. Wong 
58b3a289a2SDarrick J. Wong /* Constrain the size of the output buffer for read_iter. */
59b3a289a2SDarrick J. Wong #define XFS_HEALTHMON_MAX_OUTBUF	SZ_64K
60b3a289a2SDarrick J. Wong 
61a48373e7SDarrick J. Wong /* spinlock for atomically updating xfs_mount <-> xfs_healthmon pointers */
62a48373e7SDarrick J. Wong static DEFINE_SPINLOCK(xfs_healthmon_lock);
63a48373e7SDarrick J. Wong 
64a48373e7SDarrick J. Wong /* Grab a reference to the healthmon object for a given mount, if any. */
65a48373e7SDarrick J. Wong static struct xfs_healthmon *
66a48373e7SDarrick J. Wong xfs_healthmon_get(
67a48373e7SDarrick J. Wong 	struct xfs_mount		*mp)
68a48373e7SDarrick J. Wong {
69a48373e7SDarrick J. Wong 	struct xfs_healthmon		*hm;
70a48373e7SDarrick J. Wong 
71a48373e7SDarrick J. Wong 	rcu_read_lock();
72a48373e7SDarrick J. Wong 	hm = mp->m_healthmon;
73a48373e7SDarrick J. Wong 	if (hm && !refcount_inc_not_zero(&hm->ref))
74a48373e7SDarrick J. Wong 		hm = NULL;
75a48373e7SDarrick J. Wong 	rcu_read_unlock();
76a48373e7SDarrick J. Wong 
77a48373e7SDarrick J. Wong 	return hm;
78a48373e7SDarrick J. Wong }
79a48373e7SDarrick J. Wong 
80a48373e7SDarrick J. Wong /*
81a48373e7SDarrick J. Wong  * Release the reference to a healthmon object.  If there are no more holders,
82a48373e7SDarrick J. Wong  * free the health monitor after an RCU grace period to eliminate possibility
83a48373e7SDarrick J. Wong  * of races with xfs_healthmon_get.
84a48373e7SDarrick J. Wong  */
85a48373e7SDarrick J. Wong static void
86a48373e7SDarrick J. Wong xfs_healthmon_put(
87a48373e7SDarrick J. Wong 	struct xfs_healthmon		*hm)
88a48373e7SDarrick J. Wong {
89b3a289a2SDarrick J. Wong 	if (refcount_dec_and_test(&hm->ref)) {
90b3a289a2SDarrick J. Wong 		struct xfs_healthmon_event	*event;
91b3a289a2SDarrick J. Wong 		struct xfs_healthmon_event	*next = hm->first_event;
92b3a289a2SDarrick J. Wong 
93b3a289a2SDarrick J. Wong 		while ((event = next) != NULL) {
94b3a289a2SDarrick J. Wong 			trace_xfs_healthmon_drop(hm, event);
95b3a289a2SDarrick J. Wong 			next = event->next;
96b3a289a2SDarrick J. Wong 			kfree(event);
97b3a289a2SDarrick J. Wong 		}
98b3a289a2SDarrick J. Wong 
9925ca57faSDarrick J. Wong 		kfree(hm->unmount_event);
100b3a289a2SDarrick J. Wong 		kfree(hm->buffer);
101b3a289a2SDarrick J. Wong 		mutex_destroy(&hm->lock);
102a48373e7SDarrick J. Wong 		kfree_rcu_mightsleep(hm);
103a48373e7SDarrick J. Wong 	}
104b3a289a2SDarrick J. Wong }
105a48373e7SDarrick J. Wong 
106a48373e7SDarrick J. Wong /* Attach a health monitor to an xfs_mount.  Only one allowed at a time. */
107a48373e7SDarrick J. Wong STATIC int
108a48373e7SDarrick J. Wong xfs_healthmon_attach(
109a48373e7SDarrick J. Wong 	struct xfs_mount	*mp,
110a48373e7SDarrick J. Wong 	struct xfs_healthmon	*hm)
111a48373e7SDarrick J. Wong {
112a48373e7SDarrick J. Wong 	spin_lock(&xfs_healthmon_lock);
113a48373e7SDarrick J. Wong 	if (mp->m_healthmon != NULL) {
114a48373e7SDarrick J. Wong 		spin_unlock(&xfs_healthmon_lock);
115a48373e7SDarrick J. Wong 		return -EEXIST;
116a48373e7SDarrick J. Wong 	}
117a48373e7SDarrick J. Wong 
118a48373e7SDarrick J. Wong 	refcount_inc(&hm->ref);
119a48373e7SDarrick J. Wong 	mp->m_healthmon = hm;
120a48373e7SDarrick J. Wong 	hm->mount_cookie = (uintptr_t)mp->m_super;
121a48373e7SDarrick J. Wong 	spin_unlock(&xfs_healthmon_lock);
122a48373e7SDarrick J. Wong 
123a48373e7SDarrick J. Wong 	return 0;
124a48373e7SDarrick J. Wong }
125a48373e7SDarrick J. Wong 
126a48373e7SDarrick J. Wong /* Detach a xfs mount from a specific healthmon instance. */
127a48373e7SDarrick J. Wong STATIC void
128a48373e7SDarrick J. Wong xfs_healthmon_detach(
129a48373e7SDarrick J. Wong 	struct xfs_healthmon	*hm)
130a48373e7SDarrick J. Wong {
131a48373e7SDarrick J. Wong 	spin_lock(&xfs_healthmon_lock);
132a48373e7SDarrick J. Wong 	if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) {
133a48373e7SDarrick J. Wong 		spin_unlock(&xfs_healthmon_lock);
134a48373e7SDarrick J. Wong 		return;
135a48373e7SDarrick J. Wong 	}
136a48373e7SDarrick J. Wong 
137a48373e7SDarrick J. Wong 	XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL;
138a48373e7SDarrick J. Wong 	hm->mount_cookie = DETACHED_MOUNT_COOKIE;
139a48373e7SDarrick J. Wong 	spin_unlock(&xfs_healthmon_lock);
140a48373e7SDarrick J. Wong 
141b3a289a2SDarrick J. Wong 	trace_xfs_healthmon_detach(hm);
142a48373e7SDarrick J. Wong 	xfs_healthmon_put(hm);
143a48373e7SDarrick J. Wong }
144a48373e7SDarrick J. Wong 
145b3a289a2SDarrick J. Wong static inline void xfs_healthmon_bump_events(struct xfs_healthmon *hm)
146b3a289a2SDarrick J. Wong {
147b3a289a2SDarrick J. Wong 	hm->events++;
148b3a289a2SDarrick J. Wong 	hm->total_events++;
149b3a289a2SDarrick J. Wong }
150b3a289a2SDarrick J. Wong 
151b3a289a2SDarrick J. Wong static inline void xfs_healthmon_bump_lost(struct xfs_healthmon *hm)
152b3a289a2SDarrick J. Wong {
153b3a289a2SDarrick J. Wong 	hm->lost_prev_event++;
154b3a289a2SDarrick J. Wong 	hm->total_lost++;
155b3a289a2SDarrick J. Wong }
156b3a289a2SDarrick J. Wong 
157b3a289a2SDarrick J. Wong /*
158b3a289a2SDarrick J. Wong  * If possible, merge a new event into an existing event.  Returns whether or
159b3a289a2SDarrick J. Wong  * not it merged anything.
160b3a289a2SDarrick J. Wong  */
161b3a289a2SDarrick J. Wong static bool
162b3a289a2SDarrick J. Wong xfs_healthmon_merge_events(
163b3a289a2SDarrick J. Wong 	struct xfs_healthmon_event		*existing,
164b3a289a2SDarrick J. Wong 	const struct xfs_healthmon_event	*new)
165b3a289a2SDarrick J. Wong {
166b3a289a2SDarrick J. Wong 	if (!existing)
167b3a289a2SDarrick J. Wong 		return false;
168b3a289a2SDarrick J. Wong 
169b3a289a2SDarrick J. Wong 	/* type and domain must match to merge events */
170b3a289a2SDarrick J. Wong 	if (existing->type != new->type ||
171b3a289a2SDarrick J. Wong 	    existing->domain != new->domain)
172b3a289a2SDarrick J. Wong 		return false;
173b3a289a2SDarrick J. Wong 
174b3a289a2SDarrick J. Wong 	switch (existing->type) {
175b3a289a2SDarrick J. Wong 	case XFS_HEALTHMON_RUNNING:
17625ca57faSDarrick J. Wong 	case XFS_HEALTHMON_UNMOUNT:
177b3a289a2SDarrick J. Wong 		/* should only ever be one of these events anyway */
178b3a289a2SDarrick J. Wong 		return false;
179b3a289a2SDarrick J. Wong 
180b3a289a2SDarrick J. Wong 	case XFS_HEALTHMON_LOST:
181b3a289a2SDarrick J. Wong 		existing->lostcount += new->lostcount;
182b3a289a2SDarrick J. Wong 		return true;
1835eb4cb18SDarrick J. Wong 
1845eb4cb18SDarrick J. Wong 	case XFS_HEALTHMON_SICK:
1855eb4cb18SDarrick J. Wong 	case XFS_HEALTHMON_CORRUPT:
1865eb4cb18SDarrick J. Wong 	case XFS_HEALTHMON_HEALTHY:
1875eb4cb18SDarrick J. Wong 		switch (existing->domain) {
1885eb4cb18SDarrick J. Wong 		case XFS_HEALTHMON_FS:
1895eb4cb18SDarrick J. Wong 			existing->fsmask |= new->fsmask;
1905eb4cb18SDarrick J. Wong 			return true;
1915eb4cb18SDarrick J. Wong 		case XFS_HEALTHMON_AG:
1925eb4cb18SDarrick J. Wong 		case XFS_HEALTHMON_RTGROUP:
1935eb4cb18SDarrick J. Wong 			if (existing->group == new->group){
1945eb4cb18SDarrick J. Wong 				existing->grpmask |= new->grpmask;
1955eb4cb18SDarrick J. Wong 				return true;
1965eb4cb18SDarrick J. Wong 			}
1975eb4cb18SDarrick J. Wong 			return false;
1985eb4cb18SDarrick J. Wong 		case XFS_HEALTHMON_INODE:
1995eb4cb18SDarrick J. Wong 			if (existing->ino == new->ino &&
2005eb4cb18SDarrick J. Wong 			    existing->gen == new->gen) {
2015eb4cb18SDarrick J. Wong 				existing->imask |= new->imask;
2025eb4cb18SDarrick J. Wong 				return true;
2035eb4cb18SDarrick J. Wong 			}
2045eb4cb18SDarrick J. Wong 			return false;
2055eb4cb18SDarrick J. Wong 		default:
2065eb4cb18SDarrick J. Wong 			ASSERT(0);
2075eb4cb18SDarrick J. Wong 			return false;
2085eb4cb18SDarrick J. Wong 		}
2095eb4cb18SDarrick J. Wong 		return false;
21074c4795eSDarrick J. Wong 
21174c4795eSDarrick J. Wong 	case XFS_HEALTHMON_SHUTDOWN:
21274c4795eSDarrick J. Wong 		/* yes, we can race to shutdown */
21374c4795eSDarrick J. Wong 		existing->flags |= new->flags;
21474c4795eSDarrick J. Wong 		return true;
215e76e0e3fSDarrick J. Wong 
216e76e0e3fSDarrick J. Wong 	case XFS_HEALTHMON_MEDIA_ERROR:
217e76e0e3fSDarrick J. Wong 		/* physically adjacent errors can merge */
218e76e0e3fSDarrick J. Wong 		if (existing->daddr + existing->bbcount == new->daddr) {
219e76e0e3fSDarrick J. Wong 			existing->bbcount += new->bbcount;
220e76e0e3fSDarrick J. Wong 			return true;
221e76e0e3fSDarrick J. Wong 		}
222e76e0e3fSDarrick J. Wong 		if (new->daddr + new->bbcount == existing->daddr) {
223e76e0e3fSDarrick J. Wong 			existing->daddr = new->daddr;
224e76e0e3fSDarrick J. Wong 			existing->bbcount += new->bbcount;
225e76e0e3fSDarrick J. Wong 			return true;
226e76e0e3fSDarrick J. Wong 		}
227e76e0e3fSDarrick J. Wong 		return false;
228dfa8bad3SDarrick J. Wong 
229dfa8bad3SDarrick J. Wong 	case XFS_HEALTHMON_BUFREAD:
230dfa8bad3SDarrick J. Wong 	case XFS_HEALTHMON_BUFWRITE:
231dfa8bad3SDarrick J. Wong 	case XFS_HEALTHMON_DIOREAD:
232dfa8bad3SDarrick J. Wong 	case XFS_HEALTHMON_DIOWRITE:
233dfa8bad3SDarrick J. Wong 	case XFS_HEALTHMON_DATALOST:
234dfa8bad3SDarrick J. Wong 		/* logically adjacent file ranges can merge */
235dfa8bad3SDarrick J. Wong 		if (existing->fino != new->fino || existing->fgen != new->fgen)
236dfa8bad3SDarrick J. Wong 			return false;
237dfa8bad3SDarrick J. Wong 
238dfa8bad3SDarrick J. Wong 		if (existing->fpos + existing->flen == new->fpos) {
239dfa8bad3SDarrick J. Wong 			existing->flen += new->flen;
240dfa8bad3SDarrick J. Wong 			return true;
241dfa8bad3SDarrick J. Wong 		}
242dfa8bad3SDarrick J. Wong 
243dfa8bad3SDarrick J. Wong 		if (new->fpos + new->flen == existing->fpos) {
244dfa8bad3SDarrick J. Wong 			existing->fpos = new->fpos;
245dfa8bad3SDarrick J. Wong 			existing->flen += new->flen;
246dfa8bad3SDarrick J. Wong 			return true;
247dfa8bad3SDarrick J. Wong 		}
248dfa8bad3SDarrick J. Wong 		return false;
249b3a289a2SDarrick J. Wong 	}
250b3a289a2SDarrick J. Wong 
251b3a289a2SDarrick J. Wong 	return false;
252b3a289a2SDarrick J. Wong }
253b3a289a2SDarrick J. Wong 
254b3a289a2SDarrick J. Wong /* Insert an event onto the start of the queue. */
255b3a289a2SDarrick J. Wong static inline void
256b3a289a2SDarrick J. Wong __xfs_healthmon_insert(
257b3a289a2SDarrick J. Wong 	struct xfs_healthmon		*hm,
258b3a289a2SDarrick J. Wong 	struct xfs_healthmon_event	*event)
259b3a289a2SDarrick J. Wong {
260b3a289a2SDarrick J. Wong 	struct timespec64		now;
261b3a289a2SDarrick J. Wong 
262b3a289a2SDarrick J. Wong 	ktime_get_coarse_real_ts64(&now);
263b3a289a2SDarrick J. Wong 	event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec;
264b3a289a2SDarrick J. Wong 
265b3a289a2SDarrick J. Wong 	event->next = hm->first_event;
266b3a289a2SDarrick J. Wong 	if (!hm->first_event)
267b3a289a2SDarrick J. Wong 		hm->first_event = event;
268b3a289a2SDarrick J. Wong 	if (!hm->last_event)
269b3a289a2SDarrick J. Wong 		hm->last_event = event;
270b3a289a2SDarrick J. Wong 	xfs_healthmon_bump_events(hm);
271b3a289a2SDarrick J. Wong 	wake_up(&hm->wait);
272b3a289a2SDarrick J. Wong 
273b3a289a2SDarrick J. Wong 	trace_xfs_healthmon_insert(hm, event);
274b3a289a2SDarrick J. Wong }
275b3a289a2SDarrick J. Wong 
276b3a289a2SDarrick J. Wong /* Push an event onto the end of the queue. */
277b3a289a2SDarrick J. Wong static inline void
278b3a289a2SDarrick J. Wong __xfs_healthmon_push(
279b3a289a2SDarrick J. Wong 	struct xfs_healthmon		*hm,
280b3a289a2SDarrick J. Wong 	struct xfs_healthmon_event	*event)
281b3a289a2SDarrick J. Wong {
282b3a289a2SDarrick J. Wong 	struct timespec64		now;
283b3a289a2SDarrick J. Wong 
284b3a289a2SDarrick J. Wong 	ktime_get_coarse_real_ts64(&now);
285b3a289a2SDarrick J. Wong 	event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec;
286b3a289a2SDarrick J. Wong 
287b3a289a2SDarrick J. Wong 	if (!hm->first_event)
288b3a289a2SDarrick J. Wong 		hm->first_event = event;
289b3a289a2SDarrick J. Wong 	if (hm->last_event)
290b3a289a2SDarrick J. Wong 		hm->last_event->next = event;
291b3a289a2SDarrick J. Wong 	hm->last_event = event;
292b3a289a2SDarrick J. Wong 	event->next = NULL;
293b3a289a2SDarrick J. Wong 	xfs_healthmon_bump_events(hm);
294b3a289a2SDarrick J. Wong 	wake_up(&hm->wait);
295b3a289a2SDarrick J. Wong 
296b3a289a2SDarrick J. Wong 	trace_xfs_healthmon_push(hm, event);
297b3a289a2SDarrick J. Wong }
298b3a289a2SDarrick J. Wong 
299b3a289a2SDarrick J. Wong /* Deal with any previously lost events */
300b3a289a2SDarrick J. Wong static int
301b3a289a2SDarrick J. Wong xfs_healthmon_clear_lost_prev(
302b3a289a2SDarrick J. Wong 	struct xfs_healthmon		*hm)
303b3a289a2SDarrick J. Wong {
304b3a289a2SDarrick J. Wong 	struct xfs_healthmon_event	lost_event = {
305b3a289a2SDarrick J. Wong 		.type			= XFS_HEALTHMON_LOST,
306b3a289a2SDarrick J. Wong 		.domain			= XFS_HEALTHMON_MOUNT,
307b3a289a2SDarrick J. Wong 		.lostcount		= hm->lost_prev_event,
308b3a289a2SDarrick J. Wong 	};
309b3a289a2SDarrick J. Wong 	struct xfs_healthmon_event	*event = NULL;
310b3a289a2SDarrick J. Wong 
311b3a289a2SDarrick J. Wong 	if (xfs_healthmon_merge_events(hm->last_event, &lost_event)) {
312b3a289a2SDarrick J. Wong 		trace_xfs_healthmon_merge(hm, hm->last_event);
313b3a289a2SDarrick J. Wong 		wake_up(&hm->wait);
314b3a289a2SDarrick J. Wong 		goto cleared;
315b3a289a2SDarrick J. Wong 	}
316b3a289a2SDarrick J. Wong 
317b3a289a2SDarrick J. Wong 	if (hm->events < XFS_HEALTHMON_MAX_EVENTS)
318b3a289a2SDarrick J. Wong 		event = kmemdup(&lost_event, sizeof(struct xfs_healthmon_event),
319b3a289a2SDarrick J. Wong 				GFP_NOFS);
320b3a289a2SDarrick J. Wong 	if (!event)
321b3a289a2SDarrick J. Wong 		return -ENOMEM;
322b3a289a2SDarrick J. Wong 
323b3a289a2SDarrick J. Wong 	__xfs_healthmon_push(hm, event);
324b3a289a2SDarrick J. Wong cleared:
325b3a289a2SDarrick J. Wong 	hm->lost_prev_event = 0;
326b3a289a2SDarrick J. Wong 	return 0;
327b3a289a2SDarrick J. Wong }
328b3a289a2SDarrick J. Wong 
329b3a289a2SDarrick J. Wong /*
330b3a289a2SDarrick J. Wong  * Push an event onto the end of the list after dealing with lost events and
331b3a289a2SDarrick J. Wong  * possibly full queues.
332b3a289a2SDarrick J. Wong  */
333b3a289a2SDarrick J. Wong STATIC int
334b3a289a2SDarrick J. Wong xfs_healthmon_push(
335b3a289a2SDarrick J. Wong 	struct xfs_healthmon			*hm,
336b3a289a2SDarrick J. Wong 	const struct xfs_healthmon_event	*template)
337b3a289a2SDarrick J. Wong {
338b3a289a2SDarrick J. Wong 	struct xfs_healthmon_event		*event = NULL;
339b3a289a2SDarrick J. Wong 	int					error = 0;
340b3a289a2SDarrick J. Wong 
341b3a289a2SDarrick J. Wong 	/*
342b3a289a2SDarrick J. Wong 	 * Locklessly check if the health monitor has already detached from the
343b3a289a2SDarrick J. Wong 	 * mount.  If so, ignore the event.  If we race with deactivation,
344b3a289a2SDarrick J. Wong 	 * we'll queue the event but never send it.
345b3a289a2SDarrick J. Wong 	 */
346b3a289a2SDarrick J. Wong 	if (hm->mount_cookie == DETACHED_MOUNT_COOKIE)
347b3a289a2SDarrick J. Wong 		return -ESHUTDOWN;
348b3a289a2SDarrick J. Wong 
349b3a289a2SDarrick J. Wong 	mutex_lock(&hm->lock);
350b3a289a2SDarrick J. Wong 
351b3a289a2SDarrick J. Wong 	/* Report previously lost events before we do anything else */
352b3a289a2SDarrick J. Wong 	if (hm->lost_prev_event) {
353b3a289a2SDarrick J. Wong 		error = xfs_healthmon_clear_lost_prev(hm);
354b3a289a2SDarrick J. Wong 		if (error)
355b3a289a2SDarrick J. Wong 			goto out_unlock;
356b3a289a2SDarrick J. Wong 	}
357b3a289a2SDarrick J. Wong 
358b3a289a2SDarrick J. Wong 	/* Try to merge with the newest event */
359b3a289a2SDarrick J. Wong 	if (xfs_healthmon_merge_events(hm->last_event, template)) {
360b3a289a2SDarrick J. Wong 		trace_xfs_healthmon_merge(hm, hm->last_event);
361b3a289a2SDarrick J. Wong 		wake_up(&hm->wait);
362b3a289a2SDarrick J. Wong 		goto out_unlock;
363b3a289a2SDarrick J. Wong 	}
364b3a289a2SDarrick J. Wong 
365b3a289a2SDarrick J. Wong 	/* Only create a heap event object if we're not already at capacity. */
366b3a289a2SDarrick J. Wong 	if (hm->events < XFS_HEALTHMON_MAX_EVENTS)
367b3a289a2SDarrick J. Wong 		event = kmemdup(template, sizeof(struct xfs_healthmon_event),
368b3a289a2SDarrick J. Wong 				GFP_NOFS);
369b3a289a2SDarrick J. Wong 	if (!event) {
370b3a289a2SDarrick J. Wong 		/* No memory means we lose the event */
371b3a289a2SDarrick J. Wong 		trace_xfs_healthmon_lost_event(hm);
372b3a289a2SDarrick J. Wong 		xfs_healthmon_bump_lost(hm);
373b3a289a2SDarrick J. Wong 		error = -ENOMEM;
374b3a289a2SDarrick J. Wong 		goto out_unlock;
375b3a289a2SDarrick J. Wong 	}
376b3a289a2SDarrick J. Wong 
377b3a289a2SDarrick J. Wong 	__xfs_healthmon_push(hm, event);
378b3a289a2SDarrick J. Wong 
379b3a289a2SDarrick J. Wong out_unlock:
380b3a289a2SDarrick J. Wong 	mutex_unlock(&hm->lock);
381b3a289a2SDarrick J. Wong 	return error;
382b3a289a2SDarrick J. Wong }
383b3a289a2SDarrick J. Wong 
38425ca57faSDarrick J. Wong /*
38525ca57faSDarrick J. Wong  * Report that the filesystem is being unmounted, then detach the xfs mount
38625ca57faSDarrick J. Wong  * from this healthmon instance.
38725ca57faSDarrick J. Wong  */
388a48373e7SDarrick J. Wong void
389a48373e7SDarrick J. Wong xfs_healthmon_unmount(
390a48373e7SDarrick J. Wong 	struct xfs_mount		*mp)
391a48373e7SDarrick J. Wong {
392a48373e7SDarrick J. Wong 	struct xfs_healthmon		*hm = xfs_healthmon_get(mp);
393a48373e7SDarrick J. Wong 
394a48373e7SDarrick J. Wong 	if (!hm)
395a48373e7SDarrick J. Wong 		return;
396a48373e7SDarrick J. Wong 
39725ca57faSDarrick J. Wong 	trace_xfs_healthmon_report_unmount(hm);
39825ca57faSDarrick J. Wong 
39925ca57faSDarrick J. Wong 	/*
40025ca57faSDarrick J. Wong 	 * Insert the unmount notification at the start of the event queue so
40125ca57faSDarrick J. Wong 	 * that userspace knows the filesystem went away as soon as possible.
40225ca57faSDarrick J. Wong 	 * There's nothing actionable for userspace after an unmount.  Once
40325ca57faSDarrick J. Wong 	 * we've inserted the unmount event, hm no longer owns that event.
40425ca57faSDarrick J. Wong 	 */
40525ca57faSDarrick J. Wong 	__xfs_healthmon_insert(hm, hm->unmount_event);
40625ca57faSDarrick J. Wong 	hm->unmount_event = NULL;
40725ca57faSDarrick J. Wong 
408a48373e7SDarrick J. Wong 	xfs_healthmon_detach(hm);
409a48373e7SDarrick J. Wong 	xfs_healthmon_put(hm);
410a48373e7SDarrick J. Wong }
411a48373e7SDarrick J. Wong 
4125eb4cb18SDarrick J. Wong /* Compute the reporting mask for non-unmount metadata health events. */
4135eb4cb18SDarrick J. Wong static inline unsigned int
4145eb4cb18SDarrick J. Wong metadata_event_mask(
4155eb4cb18SDarrick J. Wong 	struct xfs_healthmon		*hm,
4165eb4cb18SDarrick J. Wong 	enum xfs_healthmon_type		type,
4175eb4cb18SDarrick J. Wong 	unsigned int			old_mask,
4185eb4cb18SDarrick J. Wong 	unsigned int			new_mask)
4195eb4cb18SDarrick J. Wong {
4205eb4cb18SDarrick J. Wong 	/* If we want all events, return all events. */
4215eb4cb18SDarrick J. Wong 	if (hm->verbose)
4225eb4cb18SDarrick J. Wong 		return new_mask;
4235eb4cb18SDarrick J. Wong 
4245eb4cb18SDarrick J. Wong 	switch (type) {
4255eb4cb18SDarrick J. Wong 	case XFS_HEALTHMON_SICK:
4265eb4cb18SDarrick J. Wong 		/* Always report runtime corruptions */
4275eb4cb18SDarrick J. Wong 		return new_mask;
4285eb4cb18SDarrick J. Wong 	case XFS_HEALTHMON_CORRUPT:
4295eb4cb18SDarrick J. Wong 		/* Only report new fsck errors */
4305eb4cb18SDarrick J. Wong 		return new_mask & ~old_mask;
4315eb4cb18SDarrick J. Wong 	case XFS_HEALTHMON_HEALTHY:
4325eb4cb18SDarrick J. Wong 		/* Only report healthy metadata that got fixed */
4335eb4cb18SDarrick J. Wong 		return new_mask & old_mask;
4345eb4cb18SDarrick J. Wong 	default:
4355eb4cb18SDarrick J. Wong 		ASSERT(0);
4365eb4cb18SDarrick J. Wong 		break;
4375eb4cb18SDarrick J. Wong 	}
4385eb4cb18SDarrick J. Wong 
4395eb4cb18SDarrick J. Wong 	return 0;
4405eb4cb18SDarrick J. Wong }
4415eb4cb18SDarrick J. Wong 
4425eb4cb18SDarrick J. Wong /* Report XFS_FS_SICK_* events to healthmon */
4435eb4cb18SDarrick J. Wong void
4445eb4cb18SDarrick J. Wong xfs_healthmon_report_fs(
4455eb4cb18SDarrick J. Wong 	struct xfs_mount		*mp,
4465eb4cb18SDarrick J. Wong 	enum xfs_healthmon_type		type,
4475eb4cb18SDarrick J. Wong 	unsigned int			old_mask,
4485eb4cb18SDarrick J. Wong 	unsigned int			new_mask)
4495eb4cb18SDarrick J. Wong {
4505eb4cb18SDarrick J. Wong 	struct xfs_healthmon_event	event = {
4515eb4cb18SDarrick J. Wong 		.type			= type,
4525eb4cb18SDarrick J. Wong 		.domain			= XFS_HEALTHMON_FS,
4535eb4cb18SDarrick J. Wong 	};
4545eb4cb18SDarrick J. Wong 	struct xfs_healthmon		*hm = xfs_healthmon_get(mp);
4555eb4cb18SDarrick J. Wong 
4565eb4cb18SDarrick J. Wong 	if (!hm)
4575eb4cb18SDarrick J. Wong 		return;
4585eb4cb18SDarrick J. Wong 
4595eb4cb18SDarrick J. Wong 	event.fsmask = metadata_event_mask(hm, type, old_mask, new_mask) &
4605eb4cb18SDarrick J. Wong 			~XFS_SICK_FS_SECONDARY;
4615eb4cb18SDarrick J. Wong 	trace_xfs_healthmon_report_fs(hm, old_mask, new_mask, &event);
4625eb4cb18SDarrick J. Wong 
4635eb4cb18SDarrick J. Wong 	if (event.fsmask)
4645eb4cb18SDarrick J. Wong 		xfs_healthmon_push(hm, &event);
4655eb4cb18SDarrick J. Wong 
4665eb4cb18SDarrick J. Wong 	xfs_healthmon_put(hm);
4675eb4cb18SDarrick J. Wong }
4685eb4cb18SDarrick J. Wong 
4695eb4cb18SDarrick J. Wong /* Report XFS_SICK_(AG|RG)* flags to healthmon */
4705eb4cb18SDarrick J. Wong void
4715eb4cb18SDarrick J. Wong xfs_healthmon_report_group(
4725eb4cb18SDarrick J. Wong 	struct xfs_group		*xg,
4735eb4cb18SDarrick J. Wong 	enum xfs_healthmon_type		type,
4745eb4cb18SDarrick J. Wong 	unsigned int			old_mask,
4755eb4cb18SDarrick J. Wong 	unsigned int			new_mask)
4765eb4cb18SDarrick J. Wong {
4775eb4cb18SDarrick J. Wong 	struct xfs_healthmon_event	event = {
4785eb4cb18SDarrick J. Wong 		.type			= type,
4795eb4cb18SDarrick J. Wong 		.group			= xg->xg_gno,
4805eb4cb18SDarrick J. Wong 	};
4815eb4cb18SDarrick J. Wong 	struct xfs_healthmon		*hm = xfs_healthmon_get(xg->xg_mount);
4825eb4cb18SDarrick J. Wong 
4835eb4cb18SDarrick J. Wong 	if (!hm)
4845eb4cb18SDarrick J. Wong 		return;
4855eb4cb18SDarrick J. Wong 
4865eb4cb18SDarrick J. Wong 	switch (xg->xg_type) {
4875eb4cb18SDarrick J. Wong 	case XG_TYPE_RTG:
4885eb4cb18SDarrick J. Wong 		event.domain = XFS_HEALTHMON_RTGROUP;
4895eb4cb18SDarrick J. Wong 		event.grpmask = metadata_event_mask(hm, type, old_mask,
4905eb4cb18SDarrick J. Wong 						    new_mask) &
4915eb4cb18SDarrick J. Wong 				~XFS_SICK_RG_SECONDARY;
4925eb4cb18SDarrick J. Wong 		break;
4935eb4cb18SDarrick J. Wong 	case XG_TYPE_AG:
4945eb4cb18SDarrick J. Wong 		event.domain = XFS_HEALTHMON_AG;
4955eb4cb18SDarrick J. Wong 		event.grpmask = metadata_event_mask(hm, type, old_mask,
4965eb4cb18SDarrick J. Wong 						    new_mask) &
4975eb4cb18SDarrick J. Wong 				~XFS_SICK_AG_SECONDARY;
4985eb4cb18SDarrick J. Wong 		break;
4995eb4cb18SDarrick J. Wong 	default:
5005eb4cb18SDarrick J. Wong 		ASSERT(0);
5015eb4cb18SDarrick J. Wong 		break;
5025eb4cb18SDarrick J. Wong 	}
5035eb4cb18SDarrick J. Wong 
5045eb4cb18SDarrick J. Wong 	trace_xfs_healthmon_report_group(hm, old_mask, new_mask, &event);
5055eb4cb18SDarrick J. Wong 
5065eb4cb18SDarrick J. Wong 	if (event.grpmask)
5075eb4cb18SDarrick J. Wong 		xfs_healthmon_push(hm, &event);
5085eb4cb18SDarrick J. Wong 
5095eb4cb18SDarrick J. Wong 	xfs_healthmon_put(hm);
5105eb4cb18SDarrick J. Wong }
5115eb4cb18SDarrick J. Wong 
5125eb4cb18SDarrick J. Wong /* Report XFS_SICK_INO_* flags to healthmon */
5135eb4cb18SDarrick J. Wong void
5145eb4cb18SDarrick J. Wong xfs_healthmon_report_inode(
5155eb4cb18SDarrick J. Wong 	struct xfs_inode		*ip,
5165eb4cb18SDarrick J. Wong 	enum xfs_healthmon_type		type,
5175eb4cb18SDarrick J. Wong 	unsigned int			old_mask,
5185eb4cb18SDarrick J. Wong 	unsigned int			new_mask)
5195eb4cb18SDarrick J. Wong {
5205eb4cb18SDarrick J. Wong 	struct xfs_healthmon_event	event = {
5215eb4cb18SDarrick J. Wong 		.type			= type,
5225eb4cb18SDarrick J. Wong 		.domain			= XFS_HEALTHMON_INODE,
5235eb4cb18SDarrick J. Wong 		.ino			= ip->i_ino,
5245eb4cb18SDarrick J. Wong 		.gen			= VFS_I(ip)->i_generation,
5255eb4cb18SDarrick J. Wong 	};
5265eb4cb18SDarrick J. Wong 	struct xfs_healthmon		*hm = xfs_healthmon_get(ip->i_mount);
5275eb4cb18SDarrick J. Wong 
5285eb4cb18SDarrick J. Wong 	if (!hm)
5295eb4cb18SDarrick J. Wong 		return;
5305eb4cb18SDarrick J. Wong 
5315eb4cb18SDarrick J. Wong 	event.imask = metadata_event_mask(hm, type, old_mask, new_mask) &
5325eb4cb18SDarrick J. Wong 			~XFS_SICK_INO_SECONDARY;
5335eb4cb18SDarrick J. Wong 	trace_xfs_healthmon_report_inode(hm, old_mask, event.imask, &event);
5345eb4cb18SDarrick J. Wong 
5355eb4cb18SDarrick J. Wong 	if (event.imask)
5365eb4cb18SDarrick J. Wong 		xfs_healthmon_push(hm, &event);
5375eb4cb18SDarrick J. Wong 
5385eb4cb18SDarrick J. Wong 	xfs_healthmon_put(hm);
5395eb4cb18SDarrick J. Wong }
5405eb4cb18SDarrick J. Wong 
54174c4795eSDarrick J. Wong /* Add a shutdown event to the reporting queue. */
54274c4795eSDarrick J. Wong void
54374c4795eSDarrick J. Wong xfs_healthmon_report_shutdown(
54474c4795eSDarrick J. Wong 	struct xfs_mount		*mp,
54574c4795eSDarrick J. Wong 	uint32_t			flags)
54674c4795eSDarrick J. Wong {
54774c4795eSDarrick J. Wong 	struct xfs_healthmon_event	event = {
54874c4795eSDarrick J. Wong 		.type			= XFS_HEALTHMON_SHUTDOWN,
54974c4795eSDarrick J. Wong 		.domain			= XFS_HEALTHMON_MOUNT,
55074c4795eSDarrick J. Wong 		.flags			= flags,
55174c4795eSDarrick J. Wong 	};
55274c4795eSDarrick J. Wong 	struct xfs_healthmon		*hm = xfs_healthmon_get(mp);
55374c4795eSDarrick J. Wong 
55474c4795eSDarrick J. Wong 	if (!hm)
55574c4795eSDarrick J. Wong 		return;
55674c4795eSDarrick J. Wong 
55774c4795eSDarrick J. Wong 	trace_xfs_healthmon_report_shutdown(hm, flags);
55874c4795eSDarrick J. Wong 
55974c4795eSDarrick J. Wong 	xfs_healthmon_push(hm, &event);
56074c4795eSDarrick J. Wong 	xfs_healthmon_put(hm);
56174c4795eSDarrick J. Wong }
56274c4795eSDarrick J. Wong 
563e76e0e3fSDarrick J. Wong static inline enum xfs_healthmon_domain
564e76e0e3fSDarrick J. Wong media_error_domain(
565e76e0e3fSDarrick J. Wong 	enum xfs_device			fdev)
566e76e0e3fSDarrick J. Wong {
567e76e0e3fSDarrick J. Wong 	switch (fdev) {
568e76e0e3fSDarrick J. Wong 	case XFS_DEV_DATA:
569e76e0e3fSDarrick J. Wong 		return XFS_HEALTHMON_DATADEV;
570e76e0e3fSDarrick J. Wong 	case XFS_DEV_LOG:
571e76e0e3fSDarrick J. Wong 		return XFS_HEALTHMON_LOGDEV;
572e76e0e3fSDarrick J. Wong 	case XFS_DEV_RT:
573e76e0e3fSDarrick J. Wong 		return XFS_HEALTHMON_RTDEV;
574e76e0e3fSDarrick J. Wong 	}
575e76e0e3fSDarrick J. Wong 
576e76e0e3fSDarrick J. Wong 	ASSERT(0);
577e76e0e3fSDarrick J. Wong 	return 0;
578e76e0e3fSDarrick J. Wong }
579e76e0e3fSDarrick J. Wong 
580e76e0e3fSDarrick J. Wong /* Add a media error event to the reporting queue. */
581e76e0e3fSDarrick J. Wong void
582e76e0e3fSDarrick J. Wong xfs_healthmon_report_media(
583e76e0e3fSDarrick J. Wong 	struct xfs_mount		*mp,
584e76e0e3fSDarrick J. Wong 	enum xfs_device			fdev,
585e76e0e3fSDarrick J. Wong 	xfs_daddr_t			daddr,
586e76e0e3fSDarrick J. Wong 	uint64_t			bbcount)
587e76e0e3fSDarrick J. Wong {
588e76e0e3fSDarrick J. Wong 	struct xfs_healthmon_event	event = {
589e76e0e3fSDarrick J. Wong 		.type			= XFS_HEALTHMON_MEDIA_ERROR,
590e76e0e3fSDarrick J. Wong 		.domain			= media_error_domain(fdev),
591e76e0e3fSDarrick J. Wong 		.daddr			= daddr,
592e76e0e3fSDarrick J. Wong 		.bbcount		= bbcount,
593e76e0e3fSDarrick J. Wong 	};
594e76e0e3fSDarrick J. Wong 	struct xfs_healthmon		*hm = xfs_healthmon_get(mp);
595e76e0e3fSDarrick J. Wong 
596e76e0e3fSDarrick J. Wong 	if (!hm)
597e76e0e3fSDarrick J. Wong 		return;
598e76e0e3fSDarrick J. Wong 
599e76e0e3fSDarrick J. Wong 	trace_xfs_healthmon_report_media(hm, fdev, &event);
600e76e0e3fSDarrick J. Wong 
601e76e0e3fSDarrick J. Wong 	xfs_healthmon_push(hm, &event);
602e76e0e3fSDarrick J. Wong 	xfs_healthmon_put(hm);
603e76e0e3fSDarrick J. Wong }
604e76e0e3fSDarrick J. Wong 
605dfa8bad3SDarrick J. Wong static inline enum xfs_healthmon_type file_ioerr_type(enum fserror_type action)
606dfa8bad3SDarrick J. Wong {
607dfa8bad3SDarrick J. Wong 	switch (action) {
608dfa8bad3SDarrick J. Wong 	case FSERR_BUFFERED_READ:
609dfa8bad3SDarrick J. Wong 		return XFS_HEALTHMON_BUFREAD;
610dfa8bad3SDarrick J. Wong 	case FSERR_BUFFERED_WRITE:
611dfa8bad3SDarrick J. Wong 		return XFS_HEALTHMON_BUFWRITE;
612dfa8bad3SDarrick J. Wong 	case FSERR_DIRECTIO_READ:
613dfa8bad3SDarrick J. Wong 		return XFS_HEALTHMON_DIOREAD;
614dfa8bad3SDarrick J. Wong 	case FSERR_DIRECTIO_WRITE:
615dfa8bad3SDarrick J. Wong 		return XFS_HEALTHMON_DIOWRITE;
616dfa8bad3SDarrick J. Wong 	case FSERR_DATA_LOST:
617dfa8bad3SDarrick J. Wong 		return XFS_HEALTHMON_DATALOST;
618dfa8bad3SDarrick J. Wong 	case FSERR_METADATA:
619dfa8bad3SDarrick J. Wong 		/* filtered out by xfs_fs_report_error */
620dfa8bad3SDarrick J. Wong 		break;
621dfa8bad3SDarrick J. Wong 	}
622dfa8bad3SDarrick J. Wong 
623dfa8bad3SDarrick J. Wong 	ASSERT(0);
624dfa8bad3SDarrick J. Wong 	return -1;
625dfa8bad3SDarrick J. Wong }
626dfa8bad3SDarrick J. Wong 
627dfa8bad3SDarrick J. Wong /* Add a file io error event to the reporting queue. */
628dfa8bad3SDarrick J. Wong void
629dfa8bad3SDarrick J. Wong xfs_healthmon_report_file_ioerror(
630dfa8bad3SDarrick J. Wong 	struct xfs_inode		*ip,
631dfa8bad3SDarrick J. Wong 	const struct fserror_event	*p)
632dfa8bad3SDarrick J. Wong {
633dfa8bad3SDarrick J. Wong 	struct xfs_healthmon_event	event = {
634dfa8bad3SDarrick J. Wong 		.type			= file_ioerr_type(p->type),
635dfa8bad3SDarrick J. Wong 		.domain			= XFS_HEALTHMON_FILERANGE,
636dfa8bad3SDarrick J. Wong 		.fino			= ip->i_ino,
637dfa8bad3SDarrick J. Wong 		.fgen			= VFS_I(ip)->i_generation,
638dfa8bad3SDarrick J. Wong 		.fpos			= p->pos,
639dfa8bad3SDarrick J. Wong 		.flen			= p->len,
640dfa8bad3SDarrick J. Wong 		/* send positive error number to userspace */
641dfa8bad3SDarrick J. Wong 		.error			= -p->error,
642dfa8bad3SDarrick J. Wong 	};
643dfa8bad3SDarrick J. Wong 	struct xfs_healthmon		*hm = xfs_healthmon_get(ip->i_mount);
644dfa8bad3SDarrick J. Wong 
645dfa8bad3SDarrick J. Wong 	if (!hm)
646dfa8bad3SDarrick J. Wong 		return;
647dfa8bad3SDarrick J. Wong 
648dfa8bad3SDarrick J. Wong 	trace_xfs_healthmon_report_file_ioerror(hm, p);
649dfa8bad3SDarrick J. Wong 
650dfa8bad3SDarrick J. Wong 	xfs_healthmon_push(hm, &event);
651dfa8bad3SDarrick J. Wong 	xfs_healthmon_put(hm);
652dfa8bad3SDarrick J. Wong }
653dfa8bad3SDarrick J. Wong 
654b3a289a2SDarrick J. Wong static inline void
655b3a289a2SDarrick J. Wong xfs_healthmon_reset_outbuf(
656b3a289a2SDarrick J. Wong 	struct xfs_healthmon		*hm)
657b3a289a2SDarrick J. Wong {
658b3a289a2SDarrick J. Wong 	hm->buftail = 0;
659b3a289a2SDarrick J. Wong 	hm->bufhead = 0;
660b3a289a2SDarrick J. Wong }
661b3a289a2SDarrick J. Wong 
66274c4795eSDarrick J. Wong struct flags_map {
66374c4795eSDarrick J. Wong 	unsigned int		in_mask;
66474c4795eSDarrick J. Wong 	unsigned int		out_mask;
66574c4795eSDarrick J. Wong };
66674c4795eSDarrick J. Wong 
66774c4795eSDarrick J. Wong static const struct flags_map shutdown_map[] = {
66874c4795eSDarrick J. Wong 	{ SHUTDOWN_META_IO_ERROR,	XFS_HEALTH_SHUTDOWN_META_IO_ERROR },
66974c4795eSDarrick J. Wong 	{ SHUTDOWN_LOG_IO_ERROR,	XFS_HEALTH_SHUTDOWN_LOG_IO_ERROR },
67074c4795eSDarrick J. Wong 	{ SHUTDOWN_FORCE_UMOUNT,	XFS_HEALTH_SHUTDOWN_FORCE_UMOUNT },
67174c4795eSDarrick J. Wong 	{ SHUTDOWN_CORRUPT_INCORE,	XFS_HEALTH_SHUTDOWN_CORRUPT_INCORE },
67274c4795eSDarrick J. Wong 	{ SHUTDOWN_CORRUPT_ONDISK,	XFS_HEALTH_SHUTDOWN_CORRUPT_ONDISK },
67374c4795eSDarrick J. Wong 	{ SHUTDOWN_DEVICE_REMOVED,	XFS_HEALTH_SHUTDOWN_DEVICE_REMOVED },
67474c4795eSDarrick J. Wong };
67574c4795eSDarrick J. Wong 
67674c4795eSDarrick J. Wong static inline unsigned int
67774c4795eSDarrick J. Wong __map_flags(
67874c4795eSDarrick J. Wong 	const struct flags_map	*map,
67974c4795eSDarrick J. Wong 	size_t			array_len,
68074c4795eSDarrick J. Wong 	unsigned int		flags)
68174c4795eSDarrick J. Wong {
68274c4795eSDarrick J. Wong 	const struct flags_map	*m;
68374c4795eSDarrick J. Wong 	unsigned int		ret = 0;
68474c4795eSDarrick J. Wong 
68574c4795eSDarrick J. Wong 	for (m = map; m < map + array_len; m++) {
68674c4795eSDarrick J. Wong 		if (flags & m->in_mask)
68774c4795eSDarrick J. Wong 			ret |= m->out_mask;
68874c4795eSDarrick J. Wong 	}
68974c4795eSDarrick J. Wong 
69074c4795eSDarrick J. Wong 	return ret;
69174c4795eSDarrick J. Wong }
69274c4795eSDarrick J. Wong 
69374c4795eSDarrick J. Wong #define map_flags(map, flags) __map_flags((map), ARRAY_SIZE(map), (flags))
69474c4795eSDarrick J. Wong 
69574c4795eSDarrick J. Wong static inline unsigned int shutdown_mask(unsigned int in)
69674c4795eSDarrick J. Wong {
69774c4795eSDarrick J. Wong 	return map_flags(shutdown_map, in);
69874c4795eSDarrick J. Wong }
69974c4795eSDarrick J. Wong 
700b3a289a2SDarrick J. Wong static const unsigned int domain_map[] = {
701b3a289a2SDarrick J. Wong 	[XFS_HEALTHMON_MOUNT]		= XFS_HEALTH_MONITOR_DOMAIN_MOUNT,
7025eb4cb18SDarrick J. Wong 	[XFS_HEALTHMON_FS]		= XFS_HEALTH_MONITOR_DOMAIN_FS,
7035eb4cb18SDarrick J. Wong 	[XFS_HEALTHMON_AG]		= XFS_HEALTH_MONITOR_DOMAIN_AG,
7045eb4cb18SDarrick J. Wong 	[XFS_HEALTHMON_INODE]		= XFS_HEALTH_MONITOR_DOMAIN_INODE,
7055eb4cb18SDarrick J. Wong 	[XFS_HEALTHMON_RTGROUP]		= XFS_HEALTH_MONITOR_DOMAIN_RTGROUP,
706e76e0e3fSDarrick J. Wong 	[XFS_HEALTHMON_DATADEV]		= XFS_HEALTH_MONITOR_DOMAIN_DATADEV,
707e76e0e3fSDarrick J. Wong 	[XFS_HEALTHMON_RTDEV]		= XFS_HEALTH_MONITOR_DOMAIN_RTDEV,
708e76e0e3fSDarrick J. Wong 	[XFS_HEALTHMON_LOGDEV]		= XFS_HEALTH_MONITOR_DOMAIN_LOGDEV,
709dfa8bad3SDarrick J. Wong 	[XFS_HEALTHMON_FILERANGE]	= XFS_HEALTH_MONITOR_DOMAIN_FILERANGE,
710b3a289a2SDarrick J. Wong };
711b3a289a2SDarrick J. Wong 
712b3a289a2SDarrick J. Wong static const unsigned int type_map[] = {
713b3a289a2SDarrick J. Wong 	[XFS_HEALTHMON_RUNNING]		= XFS_HEALTH_MONITOR_TYPE_RUNNING,
714b3a289a2SDarrick J. Wong 	[XFS_HEALTHMON_LOST]		= XFS_HEALTH_MONITOR_TYPE_LOST,
7155eb4cb18SDarrick J. Wong 	[XFS_HEALTHMON_SICK]		= XFS_HEALTH_MONITOR_TYPE_SICK,
7165eb4cb18SDarrick J. Wong 	[XFS_HEALTHMON_CORRUPT]		= XFS_HEALTH_MONITOR_TYPE_CORRUPT,
7175eb4cb18SDarrick J. Wong 	[XFS_HEALTHMON_HEALTHY]		= XFS_HEALTH_MONITOR_TYPE_HEALTHY,
7185eb4cb18SDarrick J. Wong 	[XFS_HEALTHMON_UNMOUNT]		= XFS_HEALTH_MONITOR_TYPE_UNMOUNT,
71974c4795eSDarrick J. Wong 	[XFS_HEALTHMON_SHUTDOWN]	= XFS_HEALTH_MONITOR_TYPE_SHUTDOWN,
720e76e0e3fSDarrick J. Wong 	[XFS_HEALTHMON_MEDIA_ERROR]	= XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR,
721dfa8bad3SDarrick J. Wong 	[XFS_HEALTHMON_BUFREAD]		= XFS_HEALTH_MONITOR_TYPE_BUFREAD,
722dfa8bad3SDarrick J. Wong 	[XFS_HEALTHMON_BUFWRITE]	= XFS_HEALTH_MONITOR_TYPE_BUFWRITE,
723dfa8bad3SDarrick J. Wong 	[XFS_HEALTHMON_DIOREAD]		= XFS_HEALTH_MONITOR_TYPE_DIOREAD,
724dfa8bad3SDarrick J. Wong 	[XFS_HEALTHMON_DIOWRITE]	= XFS_HEALTH_MONITOR_TYPE_DIOWRITE,
725dfa8bad3SDarrick J. Wong 	[XFS_HEALTHMON_DATALOST]	= XFS_HEALTH_MONITOR_TYPE_DATALOST,
726b3a289a2SDarrick J. Wong };
727b3a289a2SDarrick J. Wong 
728b3a289a2SDarrick J. Wong /* Render event as a V0 structure */
729b3a289a2SDarrick J. Wong STATIC int
730b3a289a2SDarrick J. Wong xfs_healthmon_format_v0(
731b3a289a2SDarrick J. Wong 	struct xfs_healthmon		*hm,
732b3a289a2SDarrick J. Wong 	const struct xfs_healthmon_event *event)
733b3a289a2SDarrick J. Wong {
734b3a289a2SDarrick J. Wong 	struct xfs_health_monitor_event	hme = {
735b3a289a2SDarrick J. Wong 		.time_ns		= event->time_ns,
736b3a289a2SDarrick J. Wong 	};
737b3a289a2SDarrick J. Wong 
738b3a289a2SDarrick J. Wong 	trace_xfs_healthmon_format(hm, event);
739b3a289a2SDarrick J. Wong 
740b3a289a2SDarrick J. Wong 	if (event->domain < 0 || event->domain >= ARRAY_SIZE(domain_map) ||
741b3a289a2SDarrick J. Wong 	    event->type < 0   || event->type >= ARRAY_SIZE(type_map))
742b3a289a2SDarrick J. Wong 		return -EFSCORRUPTED;
743b3a289a2SDarrick J. Wong 
744b3a289a2SDarrick J. Wong 	hme.domain = domain_map[event->domain];
745b3a289a2SDarrick J. Wong 	hme.type = type_map[event->type];
746b3a289a2SDarrick J. Wong 
747b3a289a2SDarrick J. Wong 	/* fill in the event-specific details */
748b3a289a2SDarrick J. Wong 	switch (event->domain) {
749b3a289a2SDarrick J. Wong 	case XFS_HEALTHMON_MOUNT:
750b3a289a2SDarrick J. Wong 		switch (event->type) {
751b3a289a2SDarrick J. Wong 		case XFS_HEALTHMON_LOST:
752b3a289a2SDarrick J. Wong 			hme.e.lost.count = event->lostcount;
753b3a289a2SDarrick J. Wong 			break;
75474c4795eSDarrick J. Wong 		case XFS_HEALTHMON_SHUTDOWN:
75574c4795eSDarrick J. Wong 			hme.e.shutdown.reasons = shutdown_mask(event->flags);
75674c4795eSDarrick J. Wong 			break;
757b3a289a2SDarrick J. Wong 		default:
758b3a289a2SDarrick J. Wong 			break;
759b3a289a2SDarrick J. Wong 		}
760b3a289a2SDarrick J. Wong 		break;
7615eb4cb18SDarrick J. Wong 	case XFS_HEALTHMON_FS:
7625eb4cb18SDarrick J. Wong 		hme.e.fs.mask = xfs_healthmon_fs_mask(event->fsmask);
7635eb4cb18SDarrick J. Wong 		break;
7645eb4cb18SDarrick J. Wong 	case XFS_HEALTHMON_RTGROUP:
7655eb4cb18SDarrick J. Wong 		hme.e.group.mask = xfs_healthmon_rtgroup_mask(event->grpmask);
7665eb4cb18SDarrick J. Wong 		hme.e.group.gno = event->group;
7675eb4cb18SDarrick J. Wong 		break;
7685eb4cb18SDarrick J. Wong 	case XFS_HEALTHMON_AG:
7695eb4cb18SDarrick J. Wong 		hme.e.group.mask = xfs_healthmon_perag_mask(event->grpmask);
7705eb4cb18SDarrick J. Wong 		hme.e.group.gno = event->group;
7715eb4cb18SDarrick J. Wong 		break;
7725eb4cb18SDarrick J. Wong 	case XFS_HEALTHMON_INODE:
7735eb4cb18SDarrick J. Wong 		hme.e.inode.mask = xfs_healthmon_inode_mask(event->imask);
7745eb4cb18SDarrick J. Wong 		hme.e.inode.ino = event->ino;
7755eb4cb18SDarrick J. Wong 		hme.e.inode.gen = event->gen;
7765eb4cb18SDarrick J. Wong 		break;
777e76e0e3fSDarrick J. Wong 	case XFS_HEALTHMON_DATADEV:
778e76e0e3fSDarrick J. Wong 	case XFS_HEALTHMON_LOGDEV:
779e76e0e3fSDarrick J. Wong 	case XFS_HEALTHMON_RTDEV:
780e76e0e3fSDarrick J. Wong 		hme.e.media.daddr = event->daddr;
781e76e0e3fSDarrick J. Wong 		hme.e.media.bbcount = event->bbcount;
782e76e0e3fSDarrick J. Wong 		break;
783dfa8bad3SDarrick J. Wong 	case XFS_HEALTHMON_FILERANGE:
784dfa8bad3SDarrick J. Wong 		hme.e.filerange.ino = event->fino;
785dfa8bad3SDarrick J. Wong 		hme.e.filerange.gen = event->fgen;
786dfa8bad3SDarrick J. Wong 		hme.e.filerange.pos = event->fpos;
787dfa8bad3SDarrick J. Wong 		hme.e.filerange.len = event->flen;
788dfa8bad3SDarrick J. Wong 		hme.e.filerange.error = abs(event->error);
789dfa8bad3SDarrick J. Wong 		break;
790b3a289a2SDarrick J. Wong 	default:
791b3a289a2SDarrick J. Wong 		break;
792b3a289a2SDarrick J. Wong 	}
793b3a289a2SDarrick J. Wong 
794b3a289a2SDarrick J. Wong 	ASSERT(hm->bufhead + sizeof(hme) <= hm->bufsize);
795b3a289a2SDarrick J. Wong 
796b3a289a2SDarrick J. Wong 	/* copy formatted object to the outbuf */
797b3a289a2SDarrick J. Wong 	if (hm->bufhead + sizeof(hme) <= hm->bufsize) {
798b3a289a2SDarrick J. Wong 		memcpy(hm->buffer + hm->bufhead, &hme, sizeof(hme));
799b3a289a2SDarrick J. Wong 		hm->bufhead += sizeof(hme);
800b3a289a2SDarrick J. Wong 	}
801b3a289a2SDarrick J. Wong 
802b3a289a2SDarrick J. Wong 	return 0;
803b3a289a2SDarrick J. Wong }
804b3a289a2SDarrick J. Wong 
805b3a289a2SDarrick J. Wong /* How many bytes are waiting in the outbuf to be copied? */
806b3a289a2SDarrick J. Wong static inline size_t
807b3a289a2SDarrick J. Wong xfs_healthmon_outbuf_bytes(
808b3a289a2SDarrick J. Wong 	struct xfs_healthmon	*hm)
809b3a289a2SDarrick J. Wong {
810b3a289a2SDarrick J. Wong 	if (hm->bufhead > hm->buftail)
811b3a289a2SDarrick J. Wong 		return hm->bufhead - hm->buftail;
812b3a289a2SDarrick J. Wong 	return 0;
813b3a289a2SDarrick J. Wong }
814b3a289a2SDarrick J. Wong 
815b3a289a2SDarrick J. Wong /*
816b3a289a2SDarrick J. Wong  * Do we have something for userspace to read?  This can mean unmount events,
817b3a289a2SDarrick J. Wong  * events pending in the queue, or pending bytes in the outbuf.
818b3a289a2SDarrick J. Wong  */
819b3a289a2SDarrick J. Wong static inline bool
820b3a289a2SDarrick J. Wong xfs_healthmon_has_eventdata(
821b3a289a2SDarrick J. Wong 	struct xfs_healthmon	*hm)
822b3a289a2SDarrick J. Wong {
823b3a289a2SDarrick J. Wong 	/*
824b3a289a2SDarrick J. Wong 	 * If the health monitor is already detached from the xfs_mount, we
825b3a289a2SDarrick J. Wong 	 * want reads to return 0 bytes even if there are no events, because
826b3a289a2SDarrick J. Wong 	 * userspace interprets that as EOF.  If we race with deactivation,
827b3a289a2SDarrick J. Wong 	 * read_iter will take the necessary locks to discover that there are
828b3a289a2SDarrick J. Wong 	 * no events to send.
829b3a289a2SDarrick J. Wong 	 */
830b3a289a2SDarrick J. Wong 	if (hm->mount_cookie == DETACHED_MOUNT_COOKIE)
831b3a289a2SDarrick J. Wong 		return true;
832b3a289a2SDarrick J. Wong 
833b3a289a2SDarrick J. Wong 	/*
834b3a289a2SDarrick J. Wong 	 * Either there are events waiting to be formatted into the buffer, or
835b3a289a2SDarrick J. Wong 	 * there's unread bytes in the buffer.
836b3a289a2SDarrick J. Wong 	 */
837b3a289a2SDarrick J. Wong 	return hm->events > 0 || xfs_healthmon_outbuf_bytes(hm) > 0;
838b3a289a2SDarrick J. Wong }
839b3a289a2SDarrick J. Wong 
840b3a289a2SDarrick J. Wong /* Try to copy the rest of the outbuf to the iov iter. */
841b3a289a2SDarrick J. Wong STATIC ssize_t
842b3a289a2SDarrick J. Wong xfs_healthmon_copybuf(
843b3a289a2SDarrick J. Wong 	struct xfs_healthmon	*hm,
844b3a289a2SDarrick J. Wong 	struct iov_iter		*to)
845b3a289a2SDarrick J. Wong {
846b3a289a2SDarrick J. Wong 	size_t			to_copy;
847b3a289a2SDarrick J. Wong 	size_t			w = 0;
848b3a289a2SDarrick J. Wong 
849b3a289a2SDarrick J. Wong 	trace_xfs_healthmon_copybuf(hm, to);
850b3a289a2SDarrick J. Wong 
851b3a289a2SDarrick J. Wong 	to_copy = xfs_healthmon_outbuf_bytes(hm);
852b3a289a2SDarrick J. Wong 	if (to_copy) {
853b3a289a2SDarrick J. Wong 		w = copy_to_iter(hm->buffer + hm->buftail, to_copy, to);
854b3a289a2SDarrick J. Wong 		if (!w)
855b3a289a2SDarrick J. Wong 			return -EFAULT;
856b3a289a2SDarrick J. Wong 
857b3a289a2SDarrick J. Wong 		hm->buftail += w;
858b3a289a2SDarrick J. Wong 	}
859b3a289a2SDarrick J. Wong 
860b3a289a2SDarrick J. Wong 	/*
861b3a289a2SDarrick J. Wong 	 * Nothing left to copy?  Reset the output buffer cursors to the start
862b3a289a2SDarrick J. Wong 	 * since there's no live data in the buffer.
863b3a289a2SDarrick J. Wong 	 */
864b3a289a2SDarrick J. Wong 	if (xfs_healthmon_outbuf_bytes(hm) == 0)
865b3a289a2SDarrick J. Wong 		xfs_healthmon_reset_outbuf(hm);
866b3a289a2SDarrick J. Wong 	return w;
867b3a289a2SDarrick J. Wong }
868b3a289a2SDarrick J. Wong 
869b3a289a2SDarrick J. Wong /*
870b3a289a2SDarrick J. Wong  * Return a health monitoring event for formatting into the output buffer if
871b3a289a2SDarrick J. Wong  * there's enough space in the outbuf and an event waiting for us.  Caller
872b3a289a2SDarrick J. Wong  * must hold i_rwsem on the healthmon file.
873b3a289a2SDarrick J. Wong  */
874b3a289a2SDarrick J. Wong static inline struct xfs_healthmon_event *
875b3a289a2SDarrick J. Wong xfs_healthmon_format_pop(
876b3a289a2SDarrick J. Wong 	struct xfs_healthmon	*hm)
877b3a289a2SDarrick J. Wong {
878b3a289a2SDarrick J. Wong 	struct xfs_healthmon_event *event;
879b3a289a2SDarrick J. Wong 
880b3a289a2SDarrick J. Wong 	if (hm->bufhead + sizeof(*event) > hm->bufsize)
881b3a289a2SDarrick J. Wong 		return NULL;
882b3a289a2SDarrick J. Wong 
883b3a289a2SDarrick J. Wong 	mutex_lock(&hm->lock);
884b3a289a2SDarrick J. Wong 	event = hm->first_event;
885b3a289a2SDarrick J. Wong 	if (event) {
886b3a289a2SDarrick J. Wong 		if (hm->last_event == event)
887b3a289a2SDarrick J. Wong 			hm->last_event = NULL;
888b3a289a2SDarrick J. Wong 		hm->first_event = event->next;
889b3a289a2SDarrick J. Wong 		hm->events--;
890b3a289a2SDarrick J. Wong 
891b3a289a2SDarrick J. Wong 		trace_xfs_healthmon_pop(hm, event);
892b3a289a2SDarrick J. Wong 	}
893b3a289a2SDarrick J. Wong 	mutex_unlock(&hm->lock);
894b3a289a2SDarrick J. Wong 	return event;
895b3a289a2SDarrick J. Wong }
896b3a289a2SDarrick J. Wong 
897b3a289a2SDarrick J. Wong /* Allocate formatting buffer */
898b3a289a2SDarrick J. Wong STATIC int
899b3a289a2SDarrick J. Wong xfs_healthmon_alloc_outbuf(
900b3a289a2SDarrick J. Wong 	struct xfs_healthmon	*hm,
901b3a289a2SDarrick J. Wong 	size_t			user_bufsize)
902b3a289a2SDarrick J. Wong {
903b3a289a2SDarrick J. Wong 	void			*outbuf;
904b3a289a2SDarrick J. Wong 	size_t			bufsize =
905b3a289a2SDarrick J. Wong 		min(XFS_HEALTHMON_MAX_OUTBUF, max(PAGE_SIZE, user_bufsize));
906b3a289a2SDarrick J. Wong 
907b3a289a2SDarrick J. Wong 	outbuf = kzalloc(bufsize, GFP_KERNEL);
908b3a289a2SDarrick J. Wong 	if (!outbuf) {
909b3a289a2SDarrick J. Wong 		if (bufsize == PAGE_SIZE)
910b3a289a2SDarrick J. Wong 			return -ENOMEM;
911b3a289a2SDarrick J. Wong 
912b3a289a2SDarrick J. Wong 		bufsize = PAGE_SIZE;
913b3a289a2SDarrick J. Wong 		outbuf = kzalloc(bufsize, GFP_KERNEL);
914b3a289a2SDarrick J. Wong 		if (!outbuf)
915b3a289a2SDarrick J. Wong 			return -ENOMEM;
916b3a289a2SDarrick J. Wong 	}
917b3a289a2SDarrick J. Wong 
918b3a289a2SDarrick J. Wong 	hm->buffer = outbuf;
919b3a289a2SDarrick J. Wong 	hm->bufsize = bufsize;
920b3a289a2SDarrick J. Wong 	hm->bufhead = 0;
921b3a289a2SDarrick J. Wong 	hm->buftail = 0;
922b3a289a2SDarrick J. Wong 
923b3a289a2SDarrick J. Wong 	return 0;
924b3a289a2SDarrick J. Wong }
925b3a289a2SDarrick J. Wong 
926b3a289a2SDarrick J. Wong /*
927b3a289a2SDarrick J. Wong  * Convey queued event data to userspace.  First copy any remaining bytes in
928b3a289a2SDarrick J. Wong  * the outbuf, then format the oldest event into the outbuf and copy that too.
929b3a289a2SDarrick J. Wong  */
930a48373e7SDarrick J. Wong STATIC ssize_t
931a48373e7SDarrick J. Wong xfs_healthmon_read_iter(
932a48373e7SDarrick J. Wong 	struct kiocb		*iocb,
933a48373e7SDarrick J. Wong 	struct iov_iter		*to)
934a48373e7SDarrick J. Wong {
935b3a289a2SDarrick J. Wong 	struct file		*file = iocb->ki_filp;
936b3a289a2SDarrick J. Wong 	struct inode		*inode = file_inode(file);
937b3a289a2SDarrick J. Wong 	struct xfs_healthmon	*hm = file->private_data;
938b3a289a2SDarrick J. Wong 	struct xfs_healthmon_event *event;
939b3a289a2SDarrick J. Wong 	size_t			copied = 0;
940b3a289a2SDarrick J. Wong 	ssize_t			ret = 0;
941b3a289a2SDarrick J. Wong 
942b3a289a2SDarrick J. Wong 	if (file->f_flags & O_NONBLOCK) {
943b3a289a2SDarrick J. Wong 		if (!xfs_healthmon_has_eventdata(hm) || !inode_trylock(inode))
944b3a289a2SDarrick J. Wong 			return -EAGAIN;
945b3a289a2SDarrick J. Wong 	} else {
946b3a289a2SDarrick J. Wong 		ret = wait_event_interruptible(hm->wait,
947b3a289a2SDarrick J. Wong 				xfs_healthmon_has_eventdata(hm));
948b3a289a2SDarrick J. Wong 		if (ret)
949b3a289a2SDarrick J. Wong 			return ret;
950b3a289a2SDarrick J. Wong 
951b3a289a2SDarrick J. Wong 		inode_lock(inode);
952b3a289a2SDarrick J. Wong 	}
953b3a289a2SDarrick J. Wong 
954b3a289a2SDarrick J. Wong 	if (hm->bufsize == 0) {
955b3a289a2SDarrick J. Wong 		ret = xfs_healthmon_alloc_outbuf(hm, iov_iter_count(to));
956b3a289a2SDarrick J. Wong 		if (ret)
957b3a289a2SDarrick J. Wong 			goto out_unlock;
958b3a289a2SDarrick J. Wong 	}
959b3a289a2SDarrick J. Wong 
960b3a289a2SDarrick J. Wong 	trace_xfs_healthmon_read_start(hm);
961b3a289a2SDarrick J. Wong 
962b3a289a2SDarrick J. Wong 	/*
963b3a289a2SDarrick J. Wong 	 * If there's anything left in the output buffer, copy that before
964b3a289a2SDarrick J. Wong 	 * formatting more events.
965b3a289a2SDarrick J. Wong 	 */
966b3a289a2SDarrick J. Wong 	ret = xfs_healthmon_copybuf(hm, to);
967b3a289a2SDarrick J. Wong 	if (ret < 0)
968b3a289a2SDarrick J. Wong 		goto out_unlock;
969b3a289a2SDarrick J. Wong 	copied += ret;
970b3a289a2SDarrick J. Wong 
971b3a289a2SDarrick J. Wong 	while (iov_iter_count(to) > 0) {
972b3a289a2SDarrick J. Wong 		/* Format the next events into the outbuf until it's full. */
973b3a289a2SDarrick J. Wong 		while ((event = xfs_healthmon_format_pop(hm)) != NULL) {
974b3a289a2SDarrick J. Wong 			ret = xfs_healthmon_format_v0(hm, event);
975b3a289a2SDarrick J. Wong 			kfree(event);
976b3a289a2SDarrick J. Wong 			if (ret)
977b3a289a2SDarrick J. Wong 				goto out_unlock;
978b3a289a2SDarrick J. Wong 		}
979b3a289a2SDarrick J. Wong 
980b3a289a2SDarrick J. Wong 		/* Copy anything formatted into outbuf to userspace */
981b3a289a2SDarrick J. Wong 		ret = xfs_healthmon_copybuf(hm, to);
982b3a289a2SDarrick J. Wong 		if (ret <= 0)
983b3a289a2SDarrick J. Wong 			break;
984b3a289a2SDarrick J. Wong 
985b3a289a2SDarrick J. Wong 		copied += ret;
986b3a289a2SDarrick J. Wong 	}
987b3a289a2SDarrick J. Wong 
988b3a289a2SDarrick J. Wong out_unlock:
989b3a289a2SDarrick J. Wong 	trace_xfs_healthmon_read_finish(hm);
990b3a289a2SDarrick J. Wong 	inode_unlock(inode);
991b3a289a2SDarrick J. Wong 	return copied ?: ret;
992b3a289a2SDarrick J. Wong }
993b3a289a2SDarrick J. Wong 
994b3a289a2SDarrick J. Wong /* Poll for available events. */
995b3a289a2SDarrick J. Wong STATIC __poll_t
996b3a289a2SDarrick J. Wong xfs_healthmon_poll(
997b3a289a2SDarrick J. Wong 	struct file			*file,
998b3a289a2SDarrick J. Wong 	struct poll_table_struct	*wait)
999b3a289a2SDarrick J. Wong {
1000b3a289a2SDarrick J. Wong 	struct xfs_healthmon		*hm = file->private_data;
1001b3a289a2SDarrick J. Wong 	__poll_t			mask = 0;
1002b3a289a2SDarrick J. Wong 
1003b3a289a2SDarrick J. Wong 	poll_wait(file, &hm->wait, wait);
1004b3a289a2SDarrick J. Wong 
1005b3a289a2SDarrick J. Wong 	if (xfs_healthmon_has_eventdata(hm))
1006b3a289a2SDarrick J. Wong 		mask |= EPOLLIN;
1007b3a289a2SDarrick J. Wong 	return mask;
1008a48373e7SDarrick J. Wong }
1009a48373e7SDarrick J. Wong 
1010a48373e7SDarrick J. Wong /* Free the health monitoring information. */
1011a48373e7SDarrick J. Wong STATIC int
1012a48373e7SDarrick J. Wong xfs_healthmon_release(
1013a48373e7SDarrick J. Wong 	struct inode		*inode,
1014a48373e7SDarrick J. Wong 	struct file		*file)
1015a48373e7SDarrick J. Wong {
1016a48373e7SDarrick J. Wong 	struct xfs_healthmon	*hm = file->private_data;
1017a48373e7SDarrick J. Wong 
1018b3a289a2SDarrick J. Wong 	trace_xfs_healthmon_release(hm);
1019b3a289a2SDarrick J. Wong 
1020a48373e7SDarrick J. Wong 	/*
1021a48373e7SDarrick J. Wong 	 * We might be closing the healthmon file before the filesystem
1022a48373e7SDarrick J. Wong 	 * unmounts, because userspace processes can terminate at any time and
1023a48373e7SDarrick J. Wong 	 * for any reason.  Null out xfs_mount::m_healthmon so that another
1024a48373e7SDarrick J. Wong 	 * process can create another health monitor file.
1025a48373e7SDarrick J. Wong 	 */
1026a48373e7SDarrick J. Wong 	xfs_healthmon_detach(hm);
1027a48373e7SDarrick J. Wong 
1028b3a289a2SDarrick J. Wong 	/*
1029b3a289a2SDarrick J. Wong 	 * Wake up any readers that might be left.  There shouldn't be any
1030b3a289a2SDarrick J. Wong 	 * because the only users of the waiter are read and poll.
1031b3a289a2SDarrick J. Wong 	 */
1032b3a289a2SDarrick J. Wong 	wake_up_all(&hm->wait);
1033b3a289a2SDarrick J. Wong 
1034a48373e7SDarrick J. Wong 	xfs_healthmon_put(hm);
1035a48373e7SDarrick J. Wong 	return 0;
1036a48373e7SDarrick J. Wong }
1037a48373e7SDarrick J. Wong 
1038a48373e7SDarrick J. Wong /* Validate ioctl parameters. */
1039a48373e7SDarrick J. Wong static inline bool
1040a48373e7SDarrick J. Wong xfs_healthmon_validate(
1041a48373e7SDarrick J. Wong 	const struct xfs_health_monitor	*hmo)
1042a48373e7SDarrick J. Wong {
1043b3a289a2SDarrick J. Wong 	if (hmo->flags & ~XFS_HEALTH_MONITOR_ALL)
1044a48373e7SDarrick J. Wong 		return false;
1045b3a289a2SDarrick J. Wong 	if (hmo->format != XFS_HEALTH_MONITOR_FMT_V0)
1046a48373e7SDarrick J. Wong 		return false;
1047a48373e7SDarrick J. Wong 	if (memchr_inv(&hmo->pad, 0, sizeof(hmo->pad)))
1048a48373e7SDarrick J. Wong 		return false;
1049a48373e7SDarrick J. Wong 	return true;
1050a48373e7SDarrick J. Wong }
1051a48373e7SDarrick J. Wong 
1052a48373e7SDarrick J. Wong /* Emit some data about the health monitoring fd. */
1053a48373e7SDarrick J. Wong static void
1054a48373e7SDarrick J. Wong xfs_healthmon_show_fdinfo(
1055a48373e7SDarrick J. Wong 	struct seq_file		*m,
1056a48373e7SDarrick J. Wong 	struct file		*file)
1057a48373e7SDarrick J. Wong {
1058a48373e7SDarrick J. Wong 	struct xfs_healthmon	*hm = file->private_data;
1059a48373e7SDarrick J. Wong 
1060b3a289a2SDarrick J. Wong 	mutex_lock(&hm->lock);
1061b3a289a2SDarrick J. Wong 	seq_printf(m, "state:\t%s\ndev:\t%d:%d\nformat:\tv0\nevents:\t%llu\nlost:\t%llu\n",
1062a48373e7SDarrick J. Wong 			hm->mount_cookie == DETACHED_MOUNT_COOKIE ?
1063a48373e7SDarrick J. Wong 				"dead" : "alive",
1064b3a289a2SDarrick J. Wong 			MAJOR(hm->dev), MINOR(hm->dev),
1065b3a289a2SDarrick J. Wong 			hm->total_events,
1066b3a289a2SDarrick J. Wong 			hm->total_lost);
1067b3a289a2SDarrick J. Wong 	mutex_unlock(&hm->lock);
1068a48373e7SDarrick J. Wong }
1069a48373e7SDarrick J. Wong 
1070c0e719cbSDarrick J. Wong /* Reconfigure the health monitor. */
1071c0e719cbSDarrick J. Wong STATIC long
1072c0e719cbSDarrick J. Wong xfs_healthmon_reconfigure(
1073c0e719cbSDarrick J. Wong 	struct file			*file,
1074c0e719cbSDarrick J. Wong 	unsigned int			cmd,
1075c0e719cbSDarrick J. Wong 	void __user			*arg)
1076c0e719cbSDarrick J. Wong {
1077c0e719cbSDarrick J. Wong 	struct xfs_health_monitor	hmo;
1078c0e719cbSDarrick J. Wong 	struct xfs_healthmon		*hm = file->private_data;
1079c0e719cbSDarrick J. Wong 
1080c0e719cbSDarrick J. Wong 	if (copy_from_user(&hmo, arg, sizeof(hmo)))
1081c0e719cbSDarrick J. Wong 		return -EFAULT;
1082c0e719cbSDarrick J. Wong 
1083c0e719cbSDarrick J. Wong 	if (!xfs_healthmon_validate(&hmo))
1084c0e719cbSDarrick J. Wong 		return -EINVAL;
1085c0e719cbSDarrick J. Wong 
1086c0e719cbSDarrick J. Wong 	mutex_lock(&hm->lock);
1087c0e719cbSDarrick J. Wong 	hm->verbose = !!(hmo.flags & XFS_HEALTH_MONITOR_VERBOSE);
1088c0e719cbSDarrick J. Wong 	mutex_unlock(&hm->lock);
1089c0e719cbSDarrick J. Wong 
1090c0e719cbSDarrick J. Wong 	return 0;
1091c0e719cbSDarrick J. Wong }
1092c0e719cbSDarrick J. Wong 
10938b85dc40SDarrick J. Wong /* Does the fd point to the same filesystem as the one we're monitoring? */
10948b85dc40SDarrick J. Wong STATIC long
10958b85dc40SDarrick J. Wong xfs_healthmon_file_on_monitored_fs(
10968b85dc40SDarrick J. Wong 	struct file			*file,
10978b85dc40SDarrick J. Wong 	unsigned int			cmd,
10988b85dc40SDarrick J. Wong 	void __user			*arg)
10998b85dc40SDarrick J. Wong {
11008b85dc40SDarrick J. Wong 	struct xfs_health_file_on_monitored_fs hms;
11018b85dc40SDarrick J. Wong 	struct xfs_healthmon		*hm = file->private_data;
11028b85dc40SDarrick J. Wong 	struct inode			*hms_inode;
11038b85dc40SDarrick J. Wong 
11048b85dc40SDarrick J. Wong 	if (copy_from_user(&hms, arg, sizeof(hms)))
11058b85dc40SDarrick J. Wong 		return -EFAULT;
11068b85dc40SDarrick J. Wong 
11078b85dc40SDarrick J. Wong 	if (hms.flags)
11088b85dc40SDarrick J. Wong 		return -EINVAL;
11098b85dc40SDarrick J. Wong 
11108b85dc40SDarrick J. Wong 	CLASS(fd, hms_fd)(hms.fd);
11118b85dc40SDarrick J. Wong 	if (fd_empty(hms_fd))
11128b85dc40SDarrick J. Wong 		return -EBADF;
11138b85dc40SDarrick J. Wong 
11148b85dc40SDarrick J. Wong 	hms_inode = file_inode(fd_file(hms_fd));
11158b85dc40SDarrick J. Wong 	mutex_lock(&hm->lock);
11168b85dc40SDarrick J. Wong 	if (hm->mount_cookie != (uintptr_t)hms_inode->i_sb) {
11178b85dc40SDarrick J. Wong 		mutex_unlock(&hm->lock);
11188b85dc40SDarrick J. Wong 		return -ESTALE;
11198b85dc40SDarrick J. Wong 	}
11208b85dc40SDarrick J. Wong 
11218b85dc40SDarrick J. Wong 	mutex_unlock(&hm->lock);
11228b85dc40SDarrick J. Wong 	return 0;
11238b85dc40SDarrick J. Wong }
11248b85dc40SDarrick J. Wong 
1125c0e719cbSDarrick J. Wong /* Handle ioctls for the health monitoring thread. */
1126c0e719cbSDarrick J. Wong STATIC long
1127c0e719cbSDarrick J. Wong xfs_healthmon_ioctl(
1128c0e719cbSDarrick J. Wong 	struct file			*file,
1129c0e719cbSDarrick J. Wong 	unsigned int			cmd,
1130c0e719cbSDarrick J. Wong 	unsigned long			p)
1131c0e719cbSDarrick J. Wong {
1132c0e719cbSDarrick J. Wong 	void __user			*arg = (void __user *)p;
1133c0e719cbSDarrick J. Wong 
1134c0e719cbSDarrick J. Wong 	switch (cmd) {
1135c0e719cbSDarrick J. Wong 	case XFS_IOC_HEALTH_MONITOR:
1136c0e719cbSDarrick J. Wong 		return xfs_healthmon_reconfigure(file, cmd, arg);
11378b85dc40SDarrick J. Wong 	case XFS_IOC_HEALTH_FD_ON_MONITORED_FS:
11388b85dc40SDarrick J. Wong 		return xfs_healthmon_file_on_monitored_fs(file, cmd, arg);
1139c0e719cbSDarrick J. Wong 	default:
1140c0e719cbSDarrick J. Wong 		break;
1141c0e719cbSDarrick J. Wong 	}
1142c0e719cbSDarrick J. Wong 
1143c0e719cbSDarrick J. Wong 	return -ENOTTY;
1144c0e719cbSDarrick J. Wong }
1145c0e719cbSDarrick J. Wong 
1146a48373e7SDarrick J. Wong static const struct file_operations xfs_healthmon_fops = {
1147a48373e7SDarrick J. Wong 	.owner		= THIS_MODULE,
1148a48373e7SDarrick J. Wong 	.show_fdinfo	= xfs_healthmon_show_fdinfo,
1149a48373e7SDarrick J. Wong 	.read_iter	= xfs_healthmon_read_iter,
1150b3a289a2SDarrick J. Wong 	.poll		= xfs_healthmon_poll,
1151a48373e7SDarrick J. Wong 	.release	= xfs_healthmon_release,
1152c0e719cbSDarrick J. Wong 	.unlocked_ioctl	= xfs_healthmon_ioctl,
1153a48373e7SDarrick J. Wong };
1154a48373e7SDarrick J. Wong 
1155a48373e7SDarrick J. Wong /*
1156a48373e7SDarrick J. Wong  * Create a health monitoring file.  Returns an index to the fd table or a
1157a48373e7SDarrick J. Wong  * negative errno.
1158a48373e7SDarrick J. Wong  */
1159a48373e7SDarrick J. Wong long
1160a48373e7SDarrick J. Wong xfs_ioc_health_monitor(
1161a48373e7SDarrick J. Wong 	struct file			*file,
1162a48373e7SDarrick J. Wong 	struct xfs_health_monitor __user *arg)
1163a48373e7SDarrick J. Wong {
1164a48373e7SDarrick J. Wong 	struct xfs_health_monitor	hmo;
1165b3a289a2SDarrick J. Wong 	struct xfs_healthmon_event	*running_event;
1166a48373e7SDarrick J. Wong 	struct xfs_healthmon		*hm;
1167a48373e7SDarrick J. Wong 	struct xfs_inode		*ip = XFS_I(file_inode(file));
1168a48373e7SDarrick J. Wong 	struct xfs_mount		*mp = ip->i_mount;
1169a48373e7SDarrick J. Wong 	int				ret;
1170a48373e7SDarrick J. Wong 
1171a48373e7SDarrick J. Wong 	/*
1172a48373e7SDarrick J. Wong 	 * The only intended user of the health monitoring system should be the
1173a48373e7SDarrick J. Wong 	 * xfs_healer daemon running on behalf of the whole filesystem in the
1174a48373e7SDarrick J. Wong 	 * initial user namespace.  IOWs, we don't allow unprivileged userspace
1175a48373e7SDarrick J. Wong 	 * (they can use fsnotify) nor do we allow containers.
1176a48373e7SDarrick J. Wong 	 */
1177a48373e7SDarrick J. Wong 	if (!capable(CAP_SYS_ADMIN))
1178a48373e7SDarrick J. Wong 		return -EPERM;
1179a48373e7SDarrick J. Wong 	if (ip->i_ino != mp->m_sb.sb_rootino)
1180a48373e7SDarrick J. Wong 		return -EPERM;
1181a48373e7SDarrick J. Wong 	if (current_user_ns() != &init_user_ns)
1182a48373e7SDarrick J. Wong 		return -EPERM;
1183a48373e7SDarrick J. Wong 
1184a48373e7SDarrick J. Wong 	if (copy_from_user(&hmo, arg, sizeof(hmo)))
1185a48373e7SDarrick J. Wong 		return -EFAULT;
1186a48373e7SDarrick J. Wong 
1187a48373e7SDarrick J. Wong 	if (!xfs_healthmon_validate(&hmo))
1188a48373e7SDarrick J. Wong 		return -EINVAL;
1189a48373e7SDarrick J. Wong 
1190*bf4afc53SLinus Torvalds 	hm = kzalloc_obj(*hm);
1191a48373e7SDarrick J. Wong 	if (!hm)
1192a48373e7SDarrick J. Wong 		return -ENOMEM;
1193a48373e7SDarrick J. Wong 	hm->dev = mp->m_super->s_dev;
1194a48373e7SDarrick J. Wong 	refcount_set(&hm->ref, 1);
1195a48373e7SDarrick J. Wong 
1196b3a289a2SDarrick J. Wong 	mutex_init(&hm->lock);
1197b3a289a2SDarrick J. Wong 	init_waitqueue_head(&hm->wait);
1198b3a289a2SDarrick J. Wong 
1199b3a289a2SDarrick J. Wong 	if (hmo.flags & XFS_HEALTH_MONITOR_VERBOSE)
1200b3a289a2SDarrick J. Wong 		hm->verbose = true;
1201b3a289a2SDarrick J. Wong 
1202b3a289a2SDarrick J. Wong 	/* Queue up the first event that lets the client know we're running. */
120369050f8dSKees Cook 	running_event = kzalloc_obj(struct xfs_healthmon_event, GFP_NOFS);
1204b3a289a2SDarrick J. Wong 	if (!running_event) {
1205b3a289a2SDarrick J. Wong 		ret = -ENOMEM;
1206b3a289a2SDarrick J. Wong 		goto out_hm;
1207b3a289a2SDarrick J. Wong 	}
1208b3a289a2SDarrick J. Wong 	running_event->type = XFS_HEALTHMON_RUNNING;
1209b3a289a2SDarrick J. Wong 	running_event->domain = XFS_HEALTHMON_MOUNT;
1210b3a289a2SDarrick J. Wong 	__xfs_healthmon_insert(hm, running_event);
1211b3a289a2SDarrick J. Wong 
1212a48373e7SDarrick J. Wong 	/*
121325ca57faSDarrick J. Wong 	 * Preallocate the unmount event so that we can't fail to notify the
121425ca57faSDarrick J. Wong 	 * filesystem later.  This is key for triggering fast exit of the
121525ca57faSDarrick J. Wong 	 * xfs_healer daemon.
121625ca57faSDarrick J. Wong 	 */
121769050f8dSKees Cook 	hm->unmount_event = kzalloc_obj(struct xfs_healthmon_event, GFP_NOFS);
121825ca57faSDarrick J. Wong 	if (!hm->unmount_event) {
121925ca57faSDarrick J. Wong 		ret = -ENOMEM;
122025ca57faSDarrick J. Wong 		goto out_hm;
122125ca57faSDarrick J. Wong 	}
122225ca57faSDarrick J. Wong 	hm->unmount_event->type = XFS_HEALTHMON_UNMOUNT;
122325ca57faSDarrick J. Wong 	hm->unmount_event->domain = XFS_HEALTHMON_MOUNT;
122425ca57faSDarrick J. Wong 
122525ca57faSDarrick J. Wong 	/*
1226a48373e7SDarrick J. Wong 	 * Try to attach this health monitor to the xfs_mount.  The monitor is
1227a48373e7SDarrick J. Wong 	 * considered live and will receive events if this succeeds.
1228a48373e7SDarrick J. Wong 	 */
1229a48373e7SDarrick J. Wong 	ret = xfs_healthmon_attach(mp, hm);
1230a48373e7SDarrick J. Wong 	if (ret)
1231a48373e7SDarrick J. Wong 		goto out_hm;
1232a48373e7SDarrick J. Wong 
1233a48373e7SDarrick J. Wong 	/*
1234a48373e7SDarrick J. Wong 	 * Create the anonymous file and install a fd for it.  If it succeeds,
1235a48373e7SDarrick J. Wong 	 * the file owns hm and can go away at any time, so we must not access
1236a48373e7SDarrick J. Wong 	 * it again.  This must go last because we can't undo a fd table
1237a48373e7SDarrick J. Wong 	 * installation.
1238a48373e7SDarrick J. Wong 	 */
1239a48373e7SDarrick J. Wong 	ret = anon_inode_getfd("xfs_healthmon", &xfs_healthmon_fops, hm,
1240a48373e7SDarrick J. Wong 			O_CLOEXEC | O_RDONLY);
1241a48373e7SDarrick J. Wong 	if (ret < 0)
1242a48373e7SDarrick J. Wong 		goto out_mp;
1243a48373e7SDarrick J. Wong 
1244b3a289a2SDarrick J. Wong 	trace_xfs_healthmon_create(mp->m_super->s_dev, hmo.flags, hmo.format);
1245b3a289a2SDarrick J. Wong 
1246a48373e7SDarrick J. Wong 	return ret;
1247a48373e7SDarrick J. Wong 
1248a48373e7SDarrick J. Wong out_mp:
1249a48373e7SDarrick J. Wong 	xfs_healthmon_detach(hm);
1250a48373e7SDarrick J. Wong out_hm:
1251a48373e7SDarrick J. Wong 	ASSERT(refcount_read(&hm->ref) == 1);
1252a48373e7SDarrick J. Wong 	xfs_healthmon_put(hm);
1253a48373e7SDarrick J. Wong 	return ret;
1254a48373e7SDarrick J. Wong }
1255