xref: /linux/fs/xfs/xfs_healthmon.c (revision 399af66228cfd7df79dc360810b6b673000f8090)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2024-2026 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_trace.h"
15 #include "xfs_ag.h"
16 #include "xfs_btree.h"
17 #include "xfs_da_format.h"
18 #include "xfs_da_btree.h"
19 #include "xfs_quota_defs.h"
20 #include "xfs_rtgroup.h"
21 #include "xfs_health.h"
22 #include "xfs_healthmon.h"
23 #include "xfs_fsops.h"
24 #include "xfs_notify_failure.h"
25 #include "xfs_file.h"
26 #include "xfs_ioctl.h"
27 
28 #include <linux/anon_inodes.h>
29 #include <linux/eventpoll.h>
30 #include <linux/poll.h>
31 #include <linux/fserror.h>
32 
33 /*
34  * Live Health Monitoring
35  * ======================
36  *
37  * Autonomous self-healing of XFS filesystems requires a means for the kernel
38  * to send filesystem health events to a monitoring daemon in userspace.  To
39  * accomplish this, we establish a thread_with_file kthread object to handle
40  * translating internal events about filesystem health into a format that can
41  * be parsed easily by userspace.  When those internal events occur, the core
42  * filesystem code calls this health monitor to convey the events to userspace.
43  * Userspace reads events from the file descriptor returned by the ioctl.
44  *
45  * The healthmon abstraction has a weak reference to the host filesystem mount
46  * so that the queueing and processing of the events do not pin the mount and
47  * cannot slow down the main filesystem.  The healthmon object can exist past
48  * the end of the filesystem mount.
49  */
50 
51 /* sign of a detached health monitor */
52 #define DETACHED_MOUNT_COOKIE		((uintptr_t)0)
53 
54 /* Constrain the number of event objects that can build up in memory. */
55 #define XFS_HEALTHMON_MAX_EVENTS	(SZ_32K / \
56 					 sizeof(struct xfs_healthmon_event))
57 
58 /* Constrain the size of the output buffer for read_iter. */
59 #define XFS_HEALTHMON_MAX_OUTBUF	SZ_64K
60 
61 /* spinlock for atomically updating xfs_mount <-> xfs_healthmon pointers */
62 static DEFINE_SPINLOCK(xfs_healthmon_lock);
63 
64 /* Grab a reference to the healthmon object for a given mount, if any. */
65 static struct xfs_healthmon *
xfs_healthmon_get(struct xfs_mount * mp)66 xfs_healthmon_get(
67 	struct xfs_mount		*mp)
68 {
69 	struct xfs_healthmon		*hm;
70 
71 	rcu_read_lock();
72 	hm = rcu_dereference(mp->m_healthmon);
73 	if (hm && !refcount_inc_not_zero(&hm->ref))
74 		hm = NULL;
75 	rcu_read_unlock();
76 
77 	return hm;
78 }
79 
80 /*
81  * Release the reference to a healthmon object.  If there are no more holders,
82  * free the health monitor after an RCU grace period to eliminate possibility
83  * of races with xfs_healthmon_get.
84  */
85 static void
xfs_healthmon_put(struct xfs_healthmon * hm)86 xfs_healthmon_put(
87 	struct xfs_healthmon		*hm)
88 {
89 	if (refcount_dec_and_test(&hm->ref)) {
90 		struct xfs_healthmon_event	*event;
91 		struct xfs_healthmon_event	*next = hm->first_event;
92 
93 		while ((event = next) != NULL) {
94 			trace_xfs_healthmon_drop(hm, event);
95 			next = event->next;
96 			kfree(event);
97 		}
98 
99 		kfree(hm->unmount_event);
100 		kfree(hm->buffer);
101 		mutex_destroy(&hm->lock);
102 		kfree_rcu_mightsleep(hm);
103 	}
104 }
105 
106 /* Attach a health monitor to an xfs_mount.  Only one allowed at a time. */
107 STATIC int
xfs_healthmon_attach(struct xfs_mount * mp,struct xfs_healthmon * hm)108 xfs_healthmon_attach(
109 	struct xfs_mount	*mp,
110 	struct xfs_healthmon	*hm)
111 {
112 	spin_lock(&xfs_healthmon_lock);
113 	if (rcu_access_pointer(mp->m_healthmon) != NULL) {
114 		spin_unlock(&xfs_healthmon_lock);
115 		return -EEXIST;
116 	}
117 
118 	refcount_inc(&hm->ref);
119 	rcu_assign_pointer(mp->m_healthmon, hm);
120 	hm->mount_cookie = (uintptr_t)mp->m_super;
121 	spin_unlock(&xfs_healthmon_lock);
122 
123 	return 0;
124 }
125 
126 /* Detach a xfs mount from a specific healthmon instance. */
127 STATIC void
xfs_healthmon_detach(struct xfs_healthmon * hm)128 xfs_healthmon_detach(
129 	struct xfs_healthmon	*hm)
130 {
131 	struct xfs_mount	*mp;
132 
133 	spin_lock(&xfs_healthmon_lock);
134 	if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) {
135 		spin_unlock(&xfs_healthmon_lock);
136 		return;
137 	}
138 
139 	mp = XFS_M((struct super_block *)hm->mount_cookie);
140 	rcu_assign_pointer(mp->m_healthmon, NULL);
141 	hm->mount_cookie = DETACHED_MOUNT_COOKIE;
142 	spin_unlock(&xfs_healthmon_lock);
143 
144 	/*
145 	 * Wake up any readers that might remain.  This can happen if unmount
146 	 * races with the healthmon fd owner entering ->read_iter, having
147 	 * already emptied the event queue.
148 	 *
149 	 * In the ->release case there shouldn't be any readers because the
150 	 * only users of the waiter are read and poll.
151 	 */
152 	wake_up_all(&hm->wait);
153 
154 	trace_xfs_healthmon_detach(hm);
155 	xfs_healthmon_put(hm);
156 }
157 
xfs_healthmon_bump_events(struct xfs_healthmon * hm)158 static inline void xfs_healthmon_bump_events(struct xfs_healthmon *hm)
159 {
160 	hm->events++;
161 	hm->total_events++;
162 }
163 
xfs_healthmon_bump_lost(struct xfs_healthmon * hm)164 static inline void xfs_healthmon_bump_lost(struct xfs_healthmon *hm)
165 {
166 	hm->lost_prev_event++;
167 	hm->total_lost++;
168 }
169 
170 /*
171  * If possible, merge a new event into an existing event.  Returns whether or
172  * not it merged anything.
173  */
174 static bool
xfs_healthmon_merge_events(struct xfs_healthmon_event * existing,const struct xfs_healthmon_event * new)175 xfs_healthmon_merge_events(
176 	struct xfs_healthmon_event		*existing,
177 	const struct xfs_healthmon_event	*new)
178 {
179 	if (!existing)
180 		return false;
181 
182 	/* type and domain must match to merge events */
183 	if (existing->type != new->type ||
184 	    existing->domain != new->domain)
185 		return false;
186 
187 	switch (existing->type) {
188 	case XFS_HEALTHMON_RUNNING:
189 	case XFS_HEALTHMON_UNMOUNT:
190 		/* should only ever be one of these events anyway */
191 		return false;
192 
193 	case XFS_HEALTHMON_LOST:
194 		existing->lostcount += new->lostcount;
195 		return true;
196 
197 	case XFS_HEALTHMON_SICK:
198 	case XFS_HEALTHMON_CORRUPT:
199 	case XFS_HEALTHMON_HEALTHY:
200 		switch (existing->domain) {
201 		case XFS_HEALTHMON_FS:
202 			existing->fsmask |= new->fsmask;
203 			return true;
204 		case XFS_HEALTHMON_AG:
205 		case XFS_HEALTHMON_RTGROUP:
206 			if (existing->group == new->group){
207 				existing->grpmask |= new->grpmask;
208 				return true;
209 			}
210 			return false;
211 		case XFS_HEALTHMON_INODE:
212 			if (existing->ino == new->ino &&
213 			    existing->gen == new->gen) {
214 				existing->imask |= new->imask;
215 				return true;
216 			}
217 			return false;
218 		default:
219 			ASSERT(0);
220 			return false;
221 		}
222 		return false;
223 
224 	case XFS_HEALTHMON_SHUTDOWN:
225 		/* yes, we can race to shutdown */
226 		existing->flags |= new->flags;
227 		return true;
228 
229 	case XFS_HEALTHMON_MEDIA_ERROR:
230 		/* physically adjacent errors can merge */
231 		if (existing->daddr + existing->bbcount == new->daddr) {
232 			existing->bbcount += new->bbcount;
233 			return true;
234 		}
235 		if (new->daddr + new->bbcount == existing->daddr) {
236 			existing->daddr = new->daddr;
237 			existing->bbcount += new->bbcount;
238 			return true;
239 		}
240 		return false;
241 
242 	case XFS_HEALTHMON_BUFREAD:
243 	case XFS_HEALTHMON_BUFWRITE:
244 	case XFS_HEALTHMON_DIOREAD:
245 	case XFS_HEALTHMON_DIOWRITE:
246 	case XFS_HEALTHMON_DATALOST:
247 		/* logically adjacent file ranges can merge */
248 		if (existing->fino != new->fino || existing->fgen != new->fgen)
249 			return false;
250 
251 		if (existing->fpos + existing->flen == new->fpos) {
252 			existing->flen += new->flen;
253 			return true;
254 		}
255 
256 		if (new->fpos + new->flen == existing->fpos) {
257 			existing->fpos = new->fpos;
258 			existing->flen += new->flen;
259 			return true;
260 		}
261 		return false;
262 	}
263 
264 	return false;
265 }
266 
267 /* Insert an event onto the start of the queue. */
268 static inline void
__xfs_healthmon_insert(struct xfs_healthmon * hm,struct xfs_healthmon_event * event)269 __xfs_healthmon_insert(
270 	struct xfs_healthmon		*hm,
271 	struct xfs_healthmon_event	*event)
272 {
273 	struct timespec64		now;
274 
275 	ktime_get_coarse_real_ts64(&now);
276 	event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec;
277 
278 	event->next = hm->first_event;
279 	if (!hm->first_event)
280 		hm->first_event = event;
281 	if (!hm->last_event)
282 		hm->last_event = event;
283 	xfs_healthmon_bump_events(hm);
284 	wake_up(&hm->wait);
285 
286 	trace_xfs_healthmon_insert(hm, event);
287 }
288 
289 /* Push an event onto the end of the queue. */
290 static inline void
__xfs_healthmon_push(struct xfs_healthmon * hm,struct xfs_healthmon_event * event)291 __xfs_healthmon_push(
292 	struct xfs_healthmon		*hm,
293 	struct xfs_healthmon_event	*event)
294 {
295 	struct timespec64		now;
296 
297 	ktime_get_coarse_real_ts64(&now);
298 	event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec;
299 
300 	if (!hm->first_event)
301 		hm->first_event = event;
302 	if (hm->last_event)
303 		hm->last_event->next = event;
304 	hm->last_event = event;
305 	event->next = NULL;
306 	xfs_healthmon_bump_events(hm);
307 	wake_up(&hm->wait);
308 
309 	trace_xfs_healthmon_push(hm, event);
310 }
311 
312 /* Deal with any previously lost events */
313 static int
xfs_healthmon_clear_lost_prev(struct xfs_healthmon * hm)314 xfs_healthmon_clear_lost_prev(
315 	struct xfs_healthmon		*hm)
316 {
317 	struct xfs_healthmon_event	lost_event = {
318 		.type			= XFS_HEALTHMON_LOST,
319 		.domain			= XFS_HEALTHMON_MOUNT,
320 		.lostcount		= hm->lost_prev_event,
321 	};
322 	struct xfs_healthmon_event	*event = NULL;
323 
324 	if (xfs_healthmon_merge_events(hm->last_event, &lost_event)) {
325 		trace_xfs_healthmon_merge(hm, hm->last_event);
326 		wake_up(&hm->wait);
327 		goto cleared;
328 	}
329 
330 	if (hm->events < XFS_HEALTHMON_MAX_EVENTS)
331 		event = kmemdup(&lost_event, sizeof(struct xfs_healthmon_event),
332 				GFP_NOFS);
333 	if (!event)
334 		return -ENOMEM;
335 
336 	__xfs_healthmon_push(hm, event);
337 cleared:
338 	hm->lost_prev_event = 0;
339 	return 0;
340 }
341 
342 /*
343  * Push an event onto the end of the list after dealing with lost events and
344  * possibly full queues.
345  */
346 STATIC int
xfs_healthmon_push(struct xfs_healthmon * hm,const struct xfs_healthmon_event * template)347 xfs_healthmon_push(
348 	struct xfs_healthmon			*hm,
349 	const struct xfs_healthmon_event	*template)
350 {
351 	struct xfs_healthmon_event		*event = NULL;
352 	int					error = 0;
353 
354 	/*
355 	 * Locklessly check if the health monitor has already detached from the
356 	 * mount.  If so, ignore the event.  If we race with deactivation,
357 	 * we'll queue the event but never send it.
358 	 */
359 	if (hm->mount_cookie == DETACHED_MOUNT_COOKIE)
360 		return -ESHUTDOWN;
361 
362 	mutex_lock(&hm->lock);
363 
364 	/* Report previously lost events before we do anything else */
365 	if (hm->lost_prev_event) {
366 		error = xfs_healthmon_clear_lost_prev(hm);
367 		if (error)
368 			goto out_unlock;
369 	}
370 
371 	/* Try to merge with the newest event */
372 	if (xfs_healthmon_merge_events(hm->last_event, template)) {
373 		trace_xfs_healthmon_merge(hm, hm->last_event);
374 		wake_up(&hm->wait);
375 		goto out_unlock;
376 	}
377 
378 	/* Only create a heap event object if we're not already at capacity. */
379 	if (hm->events < XFS_HEALTHMON_MAX_EVENTS)
380 		event = kmemdup(template, sizeof(struct xfs_healthmon_event),
381 				GFP_NOFS);
382 	if (!event) {
383 		/* No memory means we lose the event */
384 		trace_xfs_healthmon_lost_event(hm);
385 		xfs_healthmon_bump_lost(hm);
386 		error = -ENOMEM;
387 		goto out_unlock;
388 	}
389 
390 	__xfs_healthmon_push(hm, event);
391 
392 out_unlock:
393 	mutex_unlock(&hm->lock);
394 	return error;
395 }
396 
397 /*
398  * Report that the filesystem is being unmounted, then detach the xfs mount
399  * from this healthmon instance.
400  */
401 void
xfs_healthmon_unmount(struct xfs_mount * mp)402 xfs_healthmon_unmount(
403 	struct xfs_mount		*mp)
404 {
405 	struct xfs_healthmon		*hm = xfs_healthmon_get(mp);
406 
407 	if (!hm)
408 		return;
409 
410 	trace_xfs_healthmon_report_unmount(hm);
411 
412 	/*
413 	 * Insert the unmount notification at the start of the event queue so
414 	 * that userspace knows the filesystem went away as soon as possible.
415 	 * There's nothing actionable for userspace after an unmount.  Once
416 	 * we've inserted the unmount event, hm no longer owns that event.
417 	 */
418 	__xfs_healthmon_insert(hm, hm->unmount_event);
419 	hm->unmount_event = NULL;
420 
421 	xfs_healthmon_detach(hm);
422 	xfs_healthmon_put(hm);
423 }
424 
425 /* Compute the reporting mask for non-unmount metadata health events. */
426 static inline unsigned int
metadata_event_mask(struct xfs_healthmon * hm,enum xfs_healthmon_type type,unsigned int old_mask,unsigned int new_mask)427 metadata_event_mask(
428 	struct xfs_healthmon		*hm,
429 	enum xfs_healthmon_type		type,
430 	unsigned int			old_mask,
431 	unsigned int			new_mask)
432 {
433 	/* If we want all events, return all events. */
434 	if (hm->verbose)
435 		return new_mask;
436 
437 	switch (type) {
438 	case XFS_HEALTHMON_SICK:
439 		/* Always report runtime corruptions */
440 		return new_mask;
441 	case XFS_HEALTHMON_CORRUPT:
442 		/* Only report new fsck errors */
443 		return new_mask & ~old_mask;
444 	case XFS_HEALTHMON_HEALTHY:
445 		/* Only report healthy metadata that got fixed */
446 		return new_mask & old_mask;
447 	default:
448 		ASSERT(0);
449 		break;
450 	}
451 
452 	return 0;
453 }
454 
455 /* Report XFS_FS_SICK_* events to healthmon */
456 void
xfs_healthmon_report_fs(struct xfs_mount * mp,enum xfs_healthmon_type type,unsigned int old_mask,unsigned int new_mask)457 xfs_healthmon_report_fs(
458 	struct xfs_mount		*mp,
459 	enum xfs_healthmon_type		type,
460 	unsigned int			old_mask,
461 	unsigned int			new_mask)
462 {
463 	struct xfs_healthmon_event	event = {
464 		.type			= type,
465 		.domain			= XFS_HEALTHMON_FS,
466 	};
467 	struct xfs_healthmon		*hm = xfs_healthmon_get(mp);
468 
469 	if (!hm)
470 		return;
471 
472 	event.fsmask = metadata_event_mask(hm, type, old_mask, new_mask) &
473 			~XFS_SICK_FS_SECONDARY;
474 	trace_xfs_healthmon_report_fs(hm, old_mask, new_mask, &event);
475 
476 	if (event.fsmask)
477 		xfs_healthmon_push(hm, &event);
478 
479 	xfs_healthmon_put(hm);
480 }
481 
482 /* Report XFS_SICK_(AG|RG)* flags to healthmon */
483 void
xfs_healthmon_report_group(struct xfs_group * xg,enum xfs_healthmon_type type,unsigned int old_mask,unsigned int new_mask)484 xfs_healthmon_report_group(
485 	struct xfs_group		*xg,
486 	enum xfs_healthmon_type		type,
487 	unsigned int			old_mask,
488 	unsigned int			new_mask)
489 {
490 	struct xfs_healthmon_event	event = {
491 		.type			= type,
492 		.group			= xg->xg_gno,
493 	};
494 	struct xfs_healthmon		*hm = xfs_healthmon_get(xg->xg_mount);
495 
496 	if (!hm)
497 		return;
498 
499 	switch (xg->xg_type) {
500 	case XG_TYPE_RTG:
501 		event.domain = XFS_HEALTHMON_RTGROUP;
502 		event.grpmask = metadata_event_mask(hm, type, old_mask,
503 						    new_mask) &
504 				~XFS_SICK_RG_SECONDARY;
505 		break;
506 	case XG_TYPE_AG:
507 		event.domain = XFS_HEALTHMON_AG;
508 		event.grpmask = metadata_event_mask(hm, type, old_mask,
509 						    new_mask) &
510 				~XFS_SICK_AG_SECONDARY;
511 		break;
512 	default:
513 		ASSERT(0);
514 		break;
515 	}
516 
517 	trace_xfs_healthmon_report_group(hm, old_mask, new_mask, &event);
518 
519 	if (event.grpmask)
520 		xfs_healthmon_push(hm, &event);
521 
522 	xfs_healthmon_put(hm);
523 }
524 
525 /* Report XFS_SICK_INO_* flags to healthmon */
526 void
xfs_healthmon_report_inode(struct xfs_inode * ip,enum xfs_healthmon_type type,unsigned int old_mask,unsigned int new_mask)527 xfs_healthmon_report_inode(
528 	struct xfs_inode		*ip,
529 	enum xfs_healthmon_type		type,
530 	unsigned int			old_mask,
531 	unsigned int			new_mask)
532 {
533 	struct xfs_healthmon_event	event = {
534 		.type			= type,
535 		.domain			= XFS_HEALTHMON_INODE,
536 		.ino			= ip->i_ino,
537 		.gen			= VFS_I(ip)->i_generation,
538 	};
539 	struct xfs_healthmon		*hm = xfs_healthmon_get(ip->i_mount);
540 
541 	if (!hm)
542 		return;
543 
544 	event.imask = metadata_event_mask(hm, type, old_mask, new_mask) &
545 			~XFS_SICK_INO_SECONDARY;
546 	trace_xfs_healthmon_report_inode(hm, old_mask, event.imask, &event);
547 
548 	if (event.imask)
549 		xfs_healthmon_push(hm, &event);
550 
551 	xfs_healthmon_put(hm);
552 }
553 
554 /* Add a shutdown event to the reporting queue. */
555 void
xfs_healthmon_report_shutdown(struct xfs_mount * mp,uint32_t flags)556 xfs_healthmon_report_shutdown(
557 	struct xfs_mount		*mp,
558 	uint32_t			flags)
559 {
560 	struct xfs_healthmon_event	event = {
561 		.type			= XFS_HEALTHMON_SHUTDOWN,
562 		.domain			= XFS_HEALTHMON_MOUNT,
563 		.flags			= flags,
564 	};
565 	struct xfs_healthmon		*hm = xfs_healthmon_get(mp);
566 
567 	if (!hm)
568 		return;
569 
570 	trace_xfs_healthmon_report_shutdown(hm, flags);
571 
572 	xfs_healthmon_push(hm, &event);
573 	xfs_healthmon_put(hm);
574 }
575 
576 static inline enum xfs_healthmon_domain
media_error_domain(enum xfs_device fdev)577 media_error_domain(
578 	enum xfs_device			fdev)
579 {
580 	switch (fdev) {
581 	case XFS_DEV_DATA:
582 		return XFS_HEALTHMON_DATADEV;
583 	case XFS_DEV_LOG:
584 		return XFS_HEALTHMON_LOGDEV;
585 	case XFS_DEV_RT:
586 		return XFS_HEALTHMON_RTDEV;
587 	}
588 
589 	ASSERT(0);
590 	return 0;
591 }
592 
593 /* Add a media error event to the reporting queue. */
594 void
xfs_healthmon_report_media(struct xfs_mount * mp,enum xfs_device fdev,xfs_daddr_t daddr,uint64_t bbcount)595 xfs_healthmon_report_media(
596 	struct xfs_mount		*mp,
597 	enum xfs_device			fdev,
598 	xfs_daddr_t			daddr,
599 	uint64_t			bbcount)
600 {
601 	struct xfs_healthmon_event	event = {
602 		.type			= XFS_HEALTHMON_MEDIA_ERROR,
603 		.domain			= media_error_domain(fdev),
604 		.daddr			= daddr,
605 		.bbcount		= bbcount,
606 	};
607 	struct xfs_healthmon		*hm = xfs_healthmon_get(mp);
608 
609 	if (!hm)
610 		return;
611 
612 	trace_xfs_healthmon_report_media(hm, fdev, &event);
613 
614 	xfs_healthmon_push(hm, &event);
615 	xfs_healthmon_put(hm);
616 }
617 
file_ioerr_type(enum fserror_type action)618 static inline enum xfs_healthmon_type file_ioerr_type(enum fserror_type action)
619 {
620 	switch (action) {
621 	case FSERR_BUFFERED_READ:
622 		return XFS_HEALTHMON_BUFREAD;
623 	case FSERR_BUFFERED_WRITE:
624 		return XFS_HEALTHMON_BUFWRITE;
625 	case FSERR_DIRECTIO_READ:
626 		return XFS_HEALTHMON_DIOREAD;
627 	case FSERR_DIRECTIO_WRITE:
628 		return XFS_HEALTHMON_DIOWRITE;
629 	case FSERR_DATA_LOST:
630 		return XFS_HEALTHMON_DATALOST;
631 	case FSERR_METADATA:
632 		/* filtered out by xfs_fs_report_error */
633 		break;
634 	}
635 
636 	ASSERT(0);
637 	return -1;
638 }
639 
640 /* Add a file io error event to the reporting queue. */
641 void
xfs_healthmon_report_file_ioerror(struct xfs_inode * ip,const struct fserror_event * p)642 xfs_healthmon_report_file_ioerror(
643 	struct xfs_inode		*ip,
644 	const struct fserror_event	*p)
645 {
646 	struct xfs_healthmon_event	event = {
647 		.type			= file_ioerr_type(p->type),
648 		.domain			= XFS_HEALTHMON_FILERANGE,
649 		.fino			= ip->i_ino,
650 		.fgen			= VFS_I(ip)->i_generation,
651 		.fpos			= p->pos,
652 		.flen			= p->len,
653 		/* send positive error number to userspace */
654 		.error			= -p->error,
655 	};
656 	struct xfs_healthmon		*hm = xfs_healthmon_get(ip->i_mount);
657 
658 	if (!hm)
659 		return;
660 
661 	trace_xfs_healthmon_report_file_ioerror(hm, p);
662 
663 	xfs_healthmon_push(hm, &event);
664 	xfs_healthmon_put(hm);
665 }
666 
667 static inline void
xfs_healthmon_reset_outbuf(struct xfs_healthmon * hm)668 xfs_healthmon_reset_outbuf(
669 	struct xfs_healthmon		*hm)
670 {
671 	hm->buftail = 0;
672 	hm->bufhead = 0;
673 }
674 
675 struct flags_map {
676 	unsigned int		in_mask;
677 	unsigned int		out_mask;
678 };
679 
680 static const struct flags_map shutdown_map[] = {
681 	{ SHUTDOWN_META_IO_ERROR,	XFS_HEALTH_SHUTDOWN_META_IO_ERROR },
682 	{ SHUTDOWN_LOG_IO_ERROR,	XFS_HEALTH_SHUTDOWN_LOG_IO_ERROR },
683 	{ SHUTDOWN_FORCE_UMOUNT,	XFS_HEALTH_SHUTDOWN_FORCE_UMOUNT },
684 	{ SHUTDOWN_CORRUPT_INCORE,	XFS_HEALTH_SHUTDOWN_CORRUPT_INCORE },
685 	{ SHUTDOWN_CORRUPT_ONDISK,	XFS_HEALTH_SHUTDOWN_CORRUPT_ONDISK },
686 	{ SHUTDOWN_DEVICE_REMOVED,	XFS_HEALTH_SHUTDOWN_DEVICE_REMOVED },
687 };
688 
689 static inline unsigned int
__map_flags(const struct flags_map * map,size_t array_len,unsigned int flags)690 __map_flags(
691 	const struct flags_map	*map,
692 	size_t			array_len,
693 	unsigned int		flags)
694 {
695 	const struct flags_map	*m;
696 	unsigned int		ret = 0;
697 
698 	for (m = map; m < map + array_len; m++) {
699 		if (flags & m->in_mask)
700 			ret |= m->out_mask;
701 	}
702 
703 	return ret;
704 }
705 
706 #define map_flags(map, flags) __map_flags((map), ARRAY_SIZE(map), (flags))
707 
shutdown_mask(unsigned int in)708 static inline unsigned int shutdown_mask(unsigned int in)
709 {
710 	return map_flags(shutdown_map, in);
711 }
712 
713 static const unsigned int domain_map[] = {
714 	[XFS_HEALTHMON_MOUNT]		= XFS_HEALTH_MONITOR_DOMAIN_MOUNT,
715 	[XFS_HEALTHMON_FS]		= XFS_HEALTH_MONITOR_DOMAIN_FS,
716 	[XFS_HEALTHMON_AG]		= XFS_HEALTH_MONITOR_DOMAIN_AG,
717 	[XFS_HEALTHMON_INODE]		= XFS_HEALTH_MONITOR_DOMAIN_INODE,
718 	[XFS_HEALTHMON_RTGROUP]		= XFS_HEALTH_MONITOR_DOMAIN_RTGROUP,
719 	[XFS_HEALTHMON_DATADEV]		= XFS_HEALTH_MONITOR_DOMAIN_DATADEV,
720 	[XFS_HEALTHMON_RTDEV]		= XFS_HEALTH_MONITOR_DOMAIN_RTDEV,
721 	[XFS_HEALTHMON_LOGDEV]		= XFS_HEALTH_MONITOR_DOMAIN_LOGDEV,
722 	[XFS_HEALTHMON_FILERANGE]	= XFS_HEALTH_MONITOR_DOMAIN_FILERANGE,
723 };
724 
725 static const unsigned int type_map[] = {
726 	[XFS_HEALTHMON_RUNNING]		= XFS_HEALTH_MONITOR_TYPE_RUNNING,
727 	[XFS_HEALTHMON_LOST]		= XFS_HEALTH_MONITOR_TYPE_LOST,
728 	[XFS_HEALTHMON_SICK]		= XFS_HEALTH_MONITOR_TYPE_SICK,
729 	[XFS_HEALTHMON_CORRUPT]		= XFS_HEALTH_MONITOR_TYPE_CORRUPT,
730 	[XFS_HEALTHMON_HEALTHY]		= XFS_HEALTH_MONITOR_TYPE_HEALTHY,
731 	[XFS_HEALTHMON_UNMOUNT]		= XFS_HEALTH_MONITOR_TYPE_UNMOUNT,
732 	[XFS_HEALTHMON_SHUTDOWN]	= XFS_HEALTH_MONITOR_TYPE_SHUTDOWN,
733 	[XFS_HEALTHMON_MEDIA_ERROR]	= XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR,
734 	[XFS_HEALTHMON_BUFREAD]		= XFS_HEALTH_MONITOR_TYPE_BUFREAD,
735 	[XFS_HEALTHMON_BUFWRITE]	= XFS_HEALTH_MONITOR_TYPE_BUFWRITE,
736 	[XFS_HEALTHMON_DIOREAD]		= XFS_HEALTH_MONITOR_TYPE_DIOREAD,
737 	[XFS_HEALTHMON_DIOWRITE]	= XFS_HEALTH_MONITOR_TYPE_DIOWRITE,
738 	[XFS_HEALTHMON_DATALOST]	= XFS_HEALTH_MONITOR_TYPE_DATALOST,
739 };
740 
741 /* Render event as a V0 structure */
742 STATIC int
xfs_healthmon_format_v0(struct xfs_healthmon * hm,const struct xfs_healthmon_event * event)743 xfs_healthmon_format_v0(
744 	struct xfs_healthmon		*hm,
745 	const struct xfs_healthmon_event *event)
746 {
747 	struct xfs_health_monitor_event	hme = {
748 		.time_ns		= event->time_ns,
749 	};
750 
751 	trace_xfs_healthmon_format(hm, event);
752 
753 	if (event->domain < 0 || event->domain >= ARRAY_SIZE(domain_map) ||
754 	    event->type < 0   || event->type >= ARRAY_SIZE(type_map))
755 		return -EFSCORRUPTED;
756 
757 	hme.domain = domain_map[event->domain];
758 	hme.type = type_map[event->type];
759 
760 	/* fill in the event-specific details */
761 	switch (event->domain) {
762 	case XFS_HEALTHMON_MOUNT:
763 		switch (event->type) {
764 		case XFS_HEALTHMON_LOST:
765 			hme.e.lost.count = event->lostcount;
766 			break;
767 		case XFS_HEALTHMON_SHUTDOWN:
768 			hme.e.shutdown.reasons = shutdown_mask(event->flags);
769 			break;
770 		default:
771 			break;
772 		}
773 		break;
774 	case XFS_HEALTHMON_FS:
775 		hme.e.fs.mask = xfs_healthmon_fs_mask(event->fsmask);
776 		break;
777 	case XFS_HEALTHMON_RTGROUP:
778 		hme.e.group.mask = xfs_healthmon_rtgroup_mask(event->grpmask);
779 		hme.e.group.gno = event->group;
780 		break;
781 	case XFS_HEALTHMON_AG:
782 		hme.e.group.mask = xfs_healthmon_perag_mask(event->grpmask);
783 		hme.e.group.gno = event->group;
784 		break;
785 	case XFS_HEALTHMON_INODE:
786 		hme.e.inode.mask = xfs_healthmon_inode_mask(event->imask);
787 		hme.e.inode.ino = event->ino;
788 		hme.e.inode.gen = event->gen;
789 		break;
790 	case XFS_HEALTHMON_DATADEV:
791 	case XFS_HEALTHMON_LOGDEV:
792 	case XFS_HEALTHMON_RTDEV:
793 		hme.e.media.daddr = event->daddr;
794 		hme.e.media.bbcount = event->bbcount;
795 		break;
796 	case XFS_HEALTHMON_FILERANGE:
797 		hme.e.filerange.ino = event->fino;
798 		hme.e.filerange.gen = event->fgen;
799 		hme.e.filerange.pos = event->fpos;
800 		hme.e.filerange.len = event->flen;
801 		hme.e.filerange.error = abs(event->error);
802 		break;
803 	default:
804 		break;
805 	}
806 
807 	ASSERT(hm->bufhead + sizeof(hme) <= hm->bufsize);
808 
809 	/* copy formatted object to the outbuf */
810 	if (hm->bufhead + sizeof(hme) <= hm->bufsize) {
811 		memcpy(hm->buffer + hm->bufhead, &hme, sizeof(hme));
812 		hm->bufhead += sizeof(hme);
813 	}
814 
815 	return 0;
816 }
817 
818 /* How many bytes are waiting in the outbuf to be copied? */
819 static inline size_t
xfs_healthmon_outbuf_bytes(struct xfs_healthmon * hm)820 xfs_healthmon_outbuf_bytes(
821 	struct xfs_healthmon	*hm)
822 {
823 	if (hm->bufhead > hm->buftail)
824 		return hm->bufhead - hm->buftail;
825 	return 0;
826 }
827 
828 /*
829  * Do we have something for userspace to read?  This can mean unmount events,
830  * events pending in the queue, or pending bytes in the outbuf.
831  */
832 static inline bool
xfs_healthmon_has_eventdata(struct xfs_healthmon * hm)833 xfs_healthmon_has_eventdata(
834 	struct xfs_healthmon	*hm)
835 {
836 	/*
837 	 * If the health monitor is already detached from the xfs_mount, we
838 	 * want reads to return 0 bytes even if there are no events, because
839 	 * userspace interprets that as EOF.  If we race with deactivation,
840 	 * read_iter will take the necessary locks to discover that there are
841 	 * no events to send.
842 	 */
843 	if (hm->mount_cookie == DETACHED_MOUNT_COOKIE)
844 		return true;
845 
846 	/*
847 	 * Either there are events waiting to be formatted into the buffer, or
848 	 * there's unread bytes in the buffer.
849 	 */
850 	return hm->events > 0 || xfs_healthmon_outbuf_bytes(hm) > 0;
851 }
852 
853 /* Try to copy the rest of the outbuf to the iov iter. */
854 STATIC ssize_t
xfs_healthmon_copybuf(struct xfs_healthmon * hm,struct iov_iter * to)855 xfs_healthmon_copybuf(
856 	struct xfs_healthmon	*hm,
857 	struct iov_iter		*to)
858 {
859 	size_t			to_copy;
860 	size_t			w = 0;
861 
862 	trace_xfs_healthmon_copybuf(hm, to);
863 
864 	to_copy = xfs_healthmon_outbuf_bytes(hm);
865 	if (to_copy) {
866 		w = copy_to_iter(hm->buffer + hm->buftail, to_copy, to);
867 		if (!w)
868 			return -EFAULT;
869 
870 		hm->buftail += w;
871 	}
872 
873 	/*
874 	 * Nothing left to copy?  Reset the output buffer cursors to the start
875 	 * since there's no live data in the buffer.
876 	 */
877 	if (xfs_healthmon_outbuf_bytes(hm) == 0)
878 		xfs_healthmon_reset_outbuf(hm);
879 	return w;
880 }
881 
882 /*
883  * Return a health monitoring event for formatting into the output buffer if
884  * there's enough space in the outbuf and an event waiting for us.  Caller
885  * must hold i_rwsem on the healthmon file.
886  */
887 static inline struct xfs_healthmon_event *
xfs_healthmon_format_pop(struct xfs_healthmon * hm)888 xfs_healthmon_format_pop(
889 	struct xfs_healthmon	*hm)
890 {
891 	struct xfs_healthmon_event *event;
892 
893 	if (hm->bufhead + sizeof(*event) > hm->bufsize)
894 		return NULL;
895 
896 	mutex_lock(&hm->lock);
897 	event = hm->first_event;
898 	if (event) {
899 		if (hm->last_event == event)
900 			hm->last_event = NULL;
901 		hm->first_event = event->next;
902 		hm->events--;
903 
904 		trace_xfs_healthmon_pop(hm, event);
905 	}
906 	mutex_unlock(&hm->lock);
907 	return event;
908 }
909 
910 /* Allocate formatting buffer */
911 STATIC int
xfs_healthmon_alloc_outbuf(struct xfs_healthmon * hm,size_t user_bufsize)912 xfs_healthmon_alloc_outbuf(
913 	struct xfs_healthmon	*hm,
914 	size_t			user_bufsize)
915 {
916 	void			*outbuf;
917 	size_t			bufsize =
918 		min(XFS_HEALTHMON_MAX_OUTBUF, max(PAGE_SIZE, user_bufsize));
919 
920 	outbuf = kzalloc(bufsize, GFP_KERNEL);
921 	if (!outbuf) {
922 		if (bufsize == PAGE_SIZE)
923 			return -ENOMEM;
924 
925 		bufsize = PAGE_SIZE;
926 		outbuf = kzalloc(bufsize, GFP_KERNEL);
927 		if (!outbuf)
928 			return -ENOMEM;
929 	}
930 
931 	hm->buffer = outbuf;
932 	hm->bufsize = bufsize;
933 	hm->bufhead = 0;
934 	hm->buftail = 0;
935 
936 	return 0;
937 }
938 
939 /*
940  * Convey queued event data to userspace.  First copy any remaining bytes in
941  * the outbuf, then format the oldest event into the outbuf and copy that too.
942  */
943 STATIC ssize_t
xfs_healthmon_read_iter(struct kiocb * iocb,struct iov_iter * to)944 xfs_healthmon_read_iter(
945 	struct kiocb		*iocb,
946 	struct iov_iter		*to)
947 {
948 	struct file		*file = iocb->ki_filp;
949 	struct inode		*inode = file_inode(file);
950 	struct xfs_healthmon	*hm = file->private_data;
951 	struct xfs_healthmon_event *event;
952 	size_t			copied = 0;
953 	ssize_t			ret = 0;
954 
955 	if (file->f_flags & O_NONBLOCK) {
956 		if (!xfs_healthmon_has_eventdata(hm) || !inode_trylock(inode))
957 			return -EAGAIN;
958 	} else {
959 		ret = wait_event_interruptible(hm->wait,
960 				xfs_healthmon_has_eventdata(hm));
961 		if (ret)
962 			return ret;
963 
964 		inode_lock(inode);
965 	}
966 
967 	if (hm->bufsize == 0) {
968 		ret = xfs_healthmon_alloc_outbuf(hm, iov_iter_count(to));
969 		if (ret)
970 			goto out_unlock;
971 	}
972 
973 	trace_xfs_healthmon_read_start(hm);
974 
975 	/*
976 	 * If there's anything left in the output buffer, copy that before
977 	 * formatting more events.
978 	 */
979 	ret = xfs_healthmon_copybuf(hm, to);
980 	if (ret < 0)
981 		goto out_unlock;
982 	copied += ret;
983 
984 	while (iov_iter_count(to) > 0) {
985 		/* Format the next events into the outbuf until it's full. */
986 		while ((event = xfs_healthmon_format_pop(hm)) != NULL) {
987 			ret = xfs_healthmon_format_v0(hm, event);
988 			kfree(event);
989 			if (ret)
990 				goto out_unlock;
991 		}
992 
993 		/* Copy anything formatted into outbuf to userspace */
994 		ret = xfs_healthmon_copybuf(hm, to);
995 		if (ret <= 0)
996 			break;
997 
998 		copied += ret;
999 	}
1000 
1001 out_unlock:
1002 	trace_xfs_healthmon_read_finish(hm);
1003 	inode_unlock(inode);
1004 	return copied ?: ret;
1005 }
1006 
1007 /* Poll for available events. */
1008 STATIC __poll_t
xfs_healthmon_poll(struct file * file,struct poll_table_struct * wait)1009 xfs_healthmon_poll(
1010 	struct file			*file,
1011 	struct poll_table_struct	*wait)
1012 {
1013 	struct xfs_healthmon		*hm = file->private_data;
1014 	__poll_t			mask = 0;
1015 
1016 	poll_wait(file, &hm->wait, wait);
1017 
1018 	if (xfs_healthmon_has_eventdata(hm))
1019 		mask |= EPOLLIN;
1020 	return mask;
1021 }
1022 
1023 /* Free the health monitoring information. */
1024 STATIC int
xfs_healthmon_release(struct inode * inode,struct file * file)1025 xfs_healthmon_release(
1026 	struct inode		*inode,
1027 	struct file		*file)
1028 {
1029 	struct xfs_healthmon	*hm = file->private_data;
1030 
1031 	trace_xfs_healthmon_release(hm);
1032 
1033 	/*
1034 	 * We might be closing the healthmon file before the filesystem
1035 	 * unmounts, because userspace processes can terminate at any time and
1036 	 * for any reason.  Null out xfs_mount::m_healthmon so that another
1037 	 * process can create another health monitor file.
1038 	 */
1039 	xfs_healthmon_detach(hm);
1040 	xfs_healthmon_put(hm);
1041 	return 0;
1042 }
1043 
1044 /* Validate ioctl parameters. */
1045 static inline bool
xfs_healthmon_validate(const struct xfs_health_monitor * hmo)1046 xfs_healthmon_validate(
1047 	const struct xfs_health_monitor	*hmo)
1048 {
1049 	if (hmo->flags & ~XFS_HEALTH_MONITOR_ALL)
1050 		return false;
1051 	if (hmo->format != XFS_HEALTH_MONITOR_FMT_V0)
1052 		return false;
1053 	if (memchr_inv(&hmo->pad, 0, sizeof(hmo->pad)))
1054 		return false;
1055 	return true;
1056 }
1057 
1058 /* Emit some data about the health monitoring fd. */
1059 static void
xfs_healthmon_show_fdinfo(struct seq_file * m,struct file * file)1060 xfs_healthmon_show_fdinfo(
1061 	struct seq_file		*m,
1062 	struct file		*file)
1063 {
1064 	struct xfs_healthmon	*hm = file->private_data;
1065 
1066 	mutex_lock(&hm->lock);
1067 	seq_printf(m, "state:\t%s\ndev:\t%d:%d\nformat:\tv0\nevents:\t%llu\nlost:\t%llu\n",
1068 			hm->mount_cookie == DETACHED_MOUNT_COOKIE ?
1069 				"dead" : "alive",
1070 			MAJOR(hm->dev), MINOR(hm->dev),
1071 			hm->total_events,
1072 			hm->total_lost);
1073 	mutex_unlock(&hm->lock);
1074 }
1075 
1076 /* Reconfigure the health monitor. */
1077 STATIC long
xfs_healthmon_reconfigure(struct file * file,unsigned int cmd,void __user * arg)1078 xfs_healthmon_reconfigure(
1079 	struct file			*file,
1080 	unsigned int			cmd,
1081 	void __user			*arg)
1082 {
1083 	struct xfs_health_monitor	hmo;
1084 	struct xfs_healthmon		*hm = file->private_data;
1085 
1086 	if (copy_from_user(&hmo, arg, sizeof(hmo)))
1087 		return -EFAULT;
1088 
1089 	if (!xfs_healthmon_validate(&hmo))
1090 		return -EINVAL;
1091 
1092 	mutex_lock(&hm->lock);
1093 	hm->verbose = !!(hmo.flags & XFS_HEALTH_MONITOR_VERBOSE);
1094 	mutex_unlock(&hm->lock);
1095 
1096 	return 0;
1097 }
1098 
1099 /* Does the fd point to the same filesystem as the one we're monitoring? */
1100 STATIC long
xfs_healthmon_file_on_monitored_fs(struct file * file,unsigned int cmd,void __user * arg)1101 xfs_healthmon_file_on_monitored_fs(
1102 	struct file			*file,
1103 	unsigned int			cmd,
1104 	void __user			*arg)
1105 {
1106 	struct xfs_health_file_on_monitored_fs hms;
1107 	struct xfs_healthmon		*hm = file->private_data;
1108 	struct inode			*hms_inode;
1109 
1110 	if (copy_from_user(&hms, arg, sizeof(hms)))
1111 		return -EFAULT;
1112 
1113 	if (hms.flags)
1114 		return -EINVAL;
1115 
1116 	CLASS(fd, hms_fd)(hms.fd);
1117 	if (fd_empty(hms_fd))
1118 		return -EBADF;
1119 
1120 	hms_inode = file_inode(fd_file(hms_fd));
1121 	mutex_lock(&hm->lock);
1122 	if (hm->mount_cookie != (uintptr_t)hms_inode->i_sb) {
1123 		mutex_unlock(&hm->lock);
1124 		return -ESTALE;
1125 	}
1126 
1127 	mutex_unlock(&hm->lock);
1128 	return 0;
1129 }
1130 
1131 /* Handle ioctls for the health monitoring thread. */
1132 STATIC long
xfs_healthmon_ioctl(struct file * file,unsigned int cmd,unsigned long p)1133 xfs_healthmon_ioctl(
1134 	struct file			*file,
1135 	unsigned int			cmd,
1136 	unsigned long			p)
1137 {
1138 	void __user			*arg = (void __user *)p;
1139 
1140 	switch (cmd) {
1141 	case XFS_IOC_HEALTH_MONITOR:
1142 		return xfs_healthmon_reconfigure(file, cmd, arg);
1143 	case XFS_IOC_HEALTH_FD_ON_MONITORED_FS:
1144 		return xfs_healthmon_file_on_monitored_fs(file, cmd, arg);
1145 	default:
1146 		break;
1147 	}
1148 
1149 	return -ENOTTY;
1150 }
1151 
1152 static const struct file_operations xfs_healthmon_fops = {
1153 	.owner		= THIS_MODULE,
1154 	.show_fdinfo	= xfs_healthmon_show_fdinfo,
1155 	.read_iter	= xfs_healthmon_read_iter,
1156 	.poll		= xfs_healthmon_poll,
1157 	.release	= xfs_healthmon_release,
1158 	.unlocked_ioctl	= xfs_healthmon_ioctl,
1159 };
1160 
1161 /*
1162  * Create a health monitoring file.  Returns an index to the fd table or a
1163  * negative errno.
1164  */
1165 long
xfs_ioc_health_monitor(struct file * file,struct xfs_health_monitor __user * arg)1166 xfs_ioc_health_monitor(
1167 	struct file			*file,
1168 	struct xfs_health_monitor __user *arg)
1169 {
1170 	struct xfs_health_monitor	hmo;
1171 	struct xfs_healthmon_event	*running_event;
1172 	struct xfs_healthmon		*hm;
1173 	struct xfs_inode		*ip = XFS_I(file_inode(file));
1174 	struct xfs_mount		*mp = ip->i_mount;
1175 	int				ret;
1176 
1177 	/*
1178 	 * The only intended user of the health monitoring system should be the
1179 	 * xfs_healer daemon running on behalf of the whole filesystem in the
1180 	 * initial user namespace.  IOWs, we don't allow unprivileged userspace
1181 	 * (they can use fsnotify) nor do we allow containers.
1182 	 */
1183 	if (!capable(CAP_SYS_ADMIN))
1184 		return -EPERM;
1185 	if (ip->i_ino != mp->m_sb.sb_rootino)
1186 		return -EPERM;
1187 	if (current_user_ns() != &init_user_ns)
1188 		return -EPERM;
1189 
1190 	if (copy_from_user(&hmo, arg, sizeof(hmo)))
1191 		return -EFAULT;
1192 
1193 	if (!xfs_healthmon_validate(&hmo))
1194 		return -EINVAL;
1195 
1196 	hm = kzalloc_obj(*hm);
1197 	if (!hm)
1198 		return -ENOMEM;
1199 	hm->dev = mp->m_super->s_dev;
1200 	refcount_set(&hm->ref, 1);
1201 
1202 	mutex_init(&hm->lock);
1203 	init_waitqueue_head(&hm->wait);
1204 
1205 	if (hmo.flags & XFS_HEALTH_MONITOR_VERBOSE)
1206 		hm->verbose = true;
1207 
1208 	/* Queue up the first event that lets the client know we're running. */
1209 	running_event = kzalloc_obj(struct xfs_healthmon_event, GFP_NOFS);
1210 	if (!running_event) {
1211 		ret = -ENOMEM;
1212 		goto out_hm;
1213 	}
1214 	running_event->type = XFS_HEALTHMON_RUNNING;
1215 	running_event->domain = XFS_HEALTHMON_MOUNT;
1216 	__xfs_healthmon_insert(hm, running_event);
1217 
1218 	/*
1219 	 * Preallocate the unmount event so that we can't fail to notify the
1220 	 * filesystem later.  This is key for triggering fast exit of the
1221 	 * xfs_healer daemon.
1222 	 */
1223 	hm->unmount_event = kzalloc_obj(struct xfs_healthmon_event, GFP_NOFS);
1224 	if (!hm->unmount_event) {
1225 		ret = -ENOMEM;
1226 		goto out_hm;
1227 	}
1228 	hm->unmount_event->type = XFS_HEALTHMON_UNMOUNT;
1229 	hm->unmount_event->domain = XFS_HEALTHMON_MOUNT;
1230 
1231 	/*
1232 	 * Try to attach this health monitor to the xfs_mount.  The monitor is
1233 	 * considered live and will receive events if this succeeds.
1234 	 */
1235 	ret = xfs_healthmon_attach(mp, hm);
1236 	if (ret)
1237 		goto out_hm;
1238 
1239 	/*
1240 	 * Create the anonymous file and install a fd for it.  If it succeeds,
1241 	 * the file owns hm and can go away at any time, so we must not access
1242 	 * it again.  This must go last because we can't undo a fd table
1243 	 * installation.
1244 	 */
1245 	ret = anon_inode_getfd("xfs_healthmon", &xfs_healthmon_fops, hm,
1246 			O_CLOEXEC | O_RDONLY);
1247 	if (ret < 0)
1248 		goto out_mp;
1249 
1250 	trace_xfs_healthmon_create(mp->m_super->s_dev, hmo.flags, hmo.format);
1251 
1252 	return ret;
1253 
1254 out_mp:
1255 	xfs_healthmon_detach(hm);
1256 out_hm:
1257 	ASSERT(refcount_read(&hm->ref) == 1);
1258 	xfs_healthmon_put(hm);
1259 	return ret;
1260 }
1261