1a48373e7SDarrick J. Wong // SPDX-License-Identifier: GPL-2.0-or-later 2a48373e7SDarrick J. Wong /* 3a48373e7SDarrick J. Wong * Copyright (c) 2024-2026 Oracle. All Rights Reserved. 4a48373e7SDarrick J. Wong * Author: Darrick J. Wong <djwong@kernel.org> 5a48373e7SDarrick J. Wong */ 604a65666SCarlos Maiolino #include "xfs_platform.h" 7a48373e7SDarrick J. Wong #include "xfs_fs.h" 8a48373e7SDarrick J. Wong #include "xfs_shared.h" 9a48373e7SDarrick J. Wong #include "xfs_format.h" 10a48373e7SDarrick J. Wong #include "xfs_log_format.h" 11a48373e7SDarrick J. Wong #include "xfs_trans_resv.h" 12a48373e7SDarrick J. Wong #include "xfs_mount.h" 13a48373e7SDarrick J. Wong #include "xfs_inode.h" 14a48373e7SDarrick J. Wong #include "xfs_trace.h" 15a48373e7SDarrick J. Wong #include "xfs_ag.h" 16a48373e7SDarrick J. Wong #include "xfs_btree.h" 17a48373e7SDarrick J. Wong #include "xfs_da_format.h" 18a48373e7SDarrick J. Wong #include "xfs_da_btree.h" 19a48373e7SDarrick J. Wong #include "xfs_quota_defs.h" 20a48373e7SDarrick J. Wong #include "xfs_rtgroup.h" 215eb4cb18SDarrick J. Wong #include "xfs_health.h" 22a48373e7SDarrick J. Wong #include "xfs_healthmon.h" 2374c4795eSDarrick J. Wong #include "xfs_fsops.h" 24e76e0e3fSDarrick J. Wong #include "xfs_notify_failure.h" 25dfa8bad3SDarrick J. Wong #include "xfs_file.h" 26c0e719cbSDarrick J. Wong #include "xfs_ioctl.h" 27a48373e7SDarrick J. Wong 28a48373e7SDarrick J. Wong #include <linux/anon_inodes.h> 29a48373e7SDarrick J. Wong #include <linux/eventpoll.h> 30a48373e7SDarrick J. Wong #include <linux/poll.h> 31dfa8bad3SDarrick J. Wong #include <linux/fserror.h> 32a48373e7SDarrick J. Wong 33a48373e7SDarrick J. Wong /* 34a48373e7SDarrick J. Wong * Live Health Monitoring 35a48373e7SDarrick J. Wong * ====================== 36a48373e7SDarrick J. Wong * 37a48373e7SDarrick J. Wong * Autonomous self-healing of XFS filesystems requires a means for the kernel 38a48373e7SDarrick J. Wong * to send filesystem health events to a monitoring daemon in userspace. To 39a48373e7SDarrick J. Wong * accomplish this, we establish a thread_with_file kthread object to handle 40a48373e7SDarrick J. Wong * translating internal events about filesystem health into a format that can 41a48373e7SDarrick J. Wong * be parsed easily by userspace. When those internal events occur, the core 42a48373e7SDarrick J. Wong * filesystem code calls this health monitor to convey the events to userspace. 43a48373e7SDarrick J. Wong * Userspace reads events from the file descriptor returned by the ioctl. 44a48373e7SDarrick J. Wong * 45a48373e7SDarrick J. Wong * The healthmon abstraction has a weak reference to the host filesystem mount 46a48373e7SDarrick J. Wong * so that the queueing and processing of the events do not pin the mount and 47a48373e7SDarrick J. Wong * cannot slow down the main filesystem. The healthmon object can exist past 48a48373e7SDarrick J. Wong * the end of the filesystem mount. 49a48373e7SDarrick J. Wong */ 50a48373e7SDarrick J. Wong 51a48373e7SDarrick J. Wong /* sign of a detached health monitor */ 52a48373e7SDarrick J. Wong #define DETACHED_MOUNT_COOKIE ((uintptr_t)0) 53a48373e7SDarrick J. Wong 54b3a289a2SDarrick J. Wong /* Constrain the number of event objects that can build up in memory. */ 55b3a289a2SDarrick J. Wong #define XFS_HEALTHMON_MAX_EVENTS (SZ_32K / \ 56b3a289a2SDarrick J. Wong sizeof(struct xfs_healthmon_event)) 57b3a289a2SDarrick J. Wong 58b3a289a2SDarrick J. Wong /* Constrain the size of the output buffer for read_iter. */ 59b3a289a2SDarrick J. Wong #define XFS_HEALTHMON_MAX_OUTBUF SZ_64K 60b3a289a2SDarrick J. Wong 61a48373e7SDarrick J. Wong /* spinlock for atomically updating xfs_mount <-> xfs_healthmon pointers */ 62a48373e7SDarrick J. Wong static DEFINE_SPINLOCK(xfs_healthmon_lock); 63a48373e7SDarrick J. Wong 64a48373e7SDarrick J. Wong /* Grab a reference to the healthmon object for a given mount, if any. */ 65a48373e7SDarrick J. Wong static struct xfs_healthmon * 66a48373e7SDarrick J. Wong xfs_healthmon_get( 67a48373e7SDarrick J. Wong struct xfs_mount *mp) 68a48373e7SDarrick J. Wong { 69a48373e7SDarrick J. Wong struct xfs_healthmon *hm; 70a48373e7SDarrick J. Wong 71a48373e7SDarrick J. Wong rcu_read_lock(); 72a48373e7SDarrick J. Wong hm = mp->m_healthmon; 73a48373e7SDarrick J. Wong if (hm && !refcount_inc_not_zero(&hm->ref)) 74a48373e7SDarrick J. Wong hm = NULL; 75a48373e7SDarrick J. Wong rcu_read_unlock(); 76a48373e7SDarrick J. Wong 77a48373e7SDarrick J. Wong return hm; 78a48373e7SDarrick J. Wong } 79a48373e7SDarrick J. Wong 80a48373e7SDarrick J. Wong /* 81a48373e7SDarrick J. Wong * Release the reference to a healthmon object. If there are no more holders, 82a48373e7SDarrick J. Wong * free the health monitor after an RCU grace period to eliminate possibility 83a48373e7SDarrick J. Wong * of races with xfs_healthmon_get. 84a48373e7SDarrick J. Wong */ 85a48373e7SDarrick J. Wong static void 86a48373e7SDarrick J. Wong xfs_healthmon_put( 87a48373e7SDarrick J. Wong struct xfs_healthmon *hm) 88a48373e7SDarrick J. Wong { 89b3a289a2SDarrick J. Wong if (refcount_dec_and_test(&hm->ref)) { 90b3a289a2SDarrick J. Wong struct xfs_healthmon_event *event; 91b3a289a2SDarrick J. Wong struct xfs_healthmon_event *next = hm->first_event; 92b3a289a2SDarrick J. Wong 93b3a289a2SDarrick J. Wong while ((event = next) != NULL) { 94b3a289a2SDarrick J. Wong trace_xfs_healthmon_drop(hm, event); 95b3a289a2SDarrick J. Wong next = event->next; 96b3a289a2SDarrick J. Wong kfree(event); 97b3a289a2SDarrick J. Wong } 98b3a289a2SDarrick J. Wong 9925ca57faSDarrick J. Wong kfree(hm->unmount_event); 100b3a289a2SDarrick J. Wong kfree(hm->buffer); 101b3a289a2SDarrick J. Wong mutex_destroy(&hm->lock); 102a48373e7SDarrick J. Wong kfree_rcu_mightsleep(hm); 103a48373e7SDarrick J. Wong } 104b3a289a2SDarrick J. Wong } 105a48373e7SDarrick J. Wong 106a48373e7SDarrick J. Wong /* Attach a health monitor to an xfs_mount. Only one allowed at a time. */ 107a48373e7SDarrick J. Wong STATIC int 108a48373e7SDarrick J. Wong xfs_healthmon_attach( 109a48373e7SDarrick J. Wong struct xfs_mount *mp, 110a48373e7SDarrick J. Wong struct xfs_healthmon *hm) 111a48373e7SDarrick J. Wong { 112a48373e7SDarrick J. Wong spin_lock(&xfs_healthmon_lock); 113a48373e7SDarrick J. Wong if (mp->m_healthmon != NULL) { 114a48373e7SDarrick J. Wong spin_unlock(&xfs_healthmon_lock); 115a48373e7SDarrick J. Wong return -EEXIST; 116a48373e7SDarrick J. Wong } 117a48373e7SDarrick J. Wong 118a48373e7SDarrick J. Wong refcount_inc(&hm->ref); 119a48373e7SDarrick J. Wong mp->m_healthmon = hm; 120a48373e7SDarrick J. Wong hm->mount_cookie = (uintptr_t)mp->m_super; 121a48373e7SDarrick J. Wong spin_unlock(&xfs_healthmon_lock); 122a48373e7SDarrick J. Wong 123a48373e7SDarrick J. Wong return 0; 124a48373e7SDarrick J. Wong } 125a48373e7SDarrick J. Wong 126a48373e7SDarrick J. Wong /* Detach a xfs mount from a specific healthmon instance. */ 127a48373e7SDarrick J. Wong STATIC void 128a48373e7SDarrick J. Wong xfs_healthmon_detach( 129a48373e7SDarrick J. Wong struct xfs_healthmon *hm) 130a48373e7SDarrick J. Wong { 131a48373e7SDarrick J. Wong spin_lock(&xfs_healthmon_lock); 132a48373e7SDarrick J. Wong if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) { 133a48373e7SDarrick J. Wong spin_unlock(&xfs_healthmon_lock); 134a48373e7SDarrick J. Wong return; 135a48373e7SDarrick J. Wong } 136a48373e7SDarrick J. Wong 137a48373e7SDarrick J. Wong XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL; 138a48373e7SDarrick J. Wong hm->mount_cookie = DETACHED_MOUNT_COOKIE; 139a48373e7SDarrick J. Wong spin_unlock(&xfs_healthmon_lock); 140a48373e7SDarrick J. Wong 141b3a289a2SDarrick J. Wong trace_xfs_healthmon_detach(hm); 142a48373e7SDarrick J. Wong xfs_healthmon_put(hm); 143a48373e7SDarrick J. Wong } 144a48373e7SDarrick J. Wong 145b3a289a2SDarrick J. Wong static inline void xfs_healthmon_bump_events(struct xfs_healthmon *hm) 146b3a289a2SDarrick J. Wong { 147b3a289a2SDarrick J. Wong hm->events++; 148b3a289a2SDarrick J. Wong hm->total_events++; 149b3a289a2SDarrick J. Wong } 150b3a289a2SDarrick J. Wong 151b3a289a2SDarrick J. Wong static inline void xfs_healthmon_bump_lost(struct xfs_healthmon *hm) 152b3a289a2SDarrick J. Wong { 153b3a289a2SDarrick J. Wong hm->lost_prev_event++; 154b3a289a2SDarrick J. Wong hm->total_lost++; 155b3a289a2SDarrick J. Wong } 156b3a289a2SDarrick J. Wong 157b3a289a2SDarrick J. Wong /* 158b3a289a2SDarrick J. Wong * If possible, merge a new event into an existing event. Returns whether or 159b3a289a2SDarrick J. Wong * not it merged anything. 160b3a289a2SDarrick J. Wong */ 161b3a289a2SDarrick J. Wong static bool 162b3a289a2SDarrick J. Wong xfs_healthmon_merge_events( 163b3a289a2SDarrick J. Wong struct xfs_healthmon_event *existing, 164b3a289a2SDarrick J. Wong const struct xfs_healthmon_event *new) 165b3a289a2SDarrick J. Wong { 166b3a289a2SDarrick J. Wong if (!existing) 167b3a289a2SDarrick J. Wong return false; 168b3a289a2SDarrick J. Wong 169b3a289a2SDarrick J. Wong /* type and domain must match to merge events */ 170b3a289a2SDarrick J. Wong if (existing->type != new->type || 171b3a289a2SDarrick J. Wong existing->domain != new->domain) 172b3a289a2SDarrick J. Wong return false; 173b3a289a2SDarrick J. Wong 174b3a289a2SDarrick J. Wong switch (existing->type) { 175b3a289a2SDarrick J. Wong case XFS_HEALTHMON_RUNNING: 17625ca57faSDarrick J. Wong case XFS_HEALTHMON_UNMOUNT: 177b3a289a2SDarrick J. Wong /* should only ever be one of these events anyway */ 178b3a289a2SDarrick J. Wong return false; 179b3a289a2SDarrick J. Wong 180b3a289a2SDarrick J. Wong case XFS_HEALTHMON_LOST: 181b3a289a2SDarrick J. Wong existing->lostcount += new->lostcount; 182b3a289a2SDarrick J. Wong return true; 1835eb4cb18SDarrick J. Wong 1845eb4cb18SDarrick J. Wong case XFS_HEALTHMON_SICK: 1855eb4cb18SDarrick J. Wong case XFS_HEALTHMON_CORRUPT: 1865eb4cb18SDarrick J. Wong case XFS_HEALTHMON_HEALTHY: 1875eb4cb18SDarrick J. Wong switch (existing->domain) { 1885eb4cb18SDarrick J. Wong case XFS_HEALTHMON_FS: 1895eb4cb18SDarrick J. Wong existing->fsmask |= new->fsmask; 1905eb4cb18SDarrick J. Wong return true; 1915eb4cb18SDarrick J. Wong case XFS_HEALTHMON_AG: 1925eb4cb18SDarrick J. Wong case XFS_HEALTHMON_RTGROUP: 1935eb4cb18SDarrick J. Wong if (existing->group == new->group){ 1945eb4cb18SDarrick J. Wong existing->grpmask |= new->grpmask; 1955eb4cb18SDarrick J. Wong return true; 1965eb4cb18SDarrick J. Wong } 1975eb4cb18SDarrick J. Wong return false; 1985eb4cb18SDarrick J. Wong case XFS_HEALTHMON_INODE: 1995eb4cb18SDarrick J. Wong if (existing->ino == new->ino && 2005eb4cb18SDarrick J. Wong existing->gen == new->gen) { 2015eb4cb18SDarrick J. Wong existing->imask |= new->imask; 2025eb4cb18SDarrick J. Wong return true; 2035eb4cb18SDarrick J. Wong } 2045eb4cb18SDarrick J. Wong return false; 2055eb4cb18SDarrick J. Wong default: 2065eb4cb18SDarrick J. Wong ASSERT(0); 2075eb4cb18SDarrick J. Wong return false; 2085eb4cb18SDarrick J. Wong } 2095eb4cb18SDarrick J. Wong return false; 21074c4795eSDarrick J. Wong 21174c4795eSDarrick J. Wong case XFS_HEALTHMON_SHUTDOWN: 21274c4795eSDarrick J. Wong /* yes, we can race to shutdown */ 21374c4795eSDarrick J. Wong existing->flags |= new->flags; 21474c4795eSDarrick J. Wong return true; 215e76e0e3fSDarrick J. Wong 216e76e0e3fSDarrick J. Wong case XFS_HEALTHMON_MEDIA_ERROR: 217e76e0e3fSDarrick J. Wong /* physically adjacent errors can merge */ 218e76e0e3fSDarrick J. Wong if (existing->daddr + existing->bbcount == new->daddr) { 219e76e0e3fSDarrick J. Wong existing->bbcount += new->bbcount; 220e76e0e3fSDarrick J. Wong return true; 221e76e0e3fSDarrick J. Wong } 222e76e0e3fSDarrick J. Wong if (new->daddr + new->bbcount == existing->daddr) { 223e76e0e3fSDarrick J. Wong existing->daddr = new->daddr; 224e76e0e3fSDarrick J. Wong existing->bbcount += new->bbcount; 225e76e0e3fSDarrick J. Wong return true; 226e76e0e3fSDarrick J. Wong } 227e76e0e3fSDarrick J. Wong return false; 228dfa8bad3SDarrick J. Wong 229dfa8bad3SDarrick J. Wong case XFS_HEALTHMON_BUFREAD: 230dfa8bad3SDarrick J. Wong case XFS_HEALTHMON_BUFWRITE: 231dfa8bad3SDarrick J. Wong case XFS_HEALTHMON_DIOREAD: 232dfa8bad3SDarrick J. Wong case XFS_HEALTHMON_DIOWRITE: 233dfa8bad3SDarrick J. Wong case XFS_HEALTHMON_DATALOST: 234dfa8bad3SDarrick J. Wong /* logically adjacent file ranges can merge */ 235dfa8bad3SDarrick J. Wong if (existing->fino != new->fino || existing->fgen != new->fgen) 236dfa8bad3SDarrick J. Wong return false; 237dfa8bad3SDarrick J. Wong 238dfa8bad3SDarrick J. Wong if (existing->fpos + existing->flen == new->fpos) { 239dfa8bad3SDarrick J. Wong existing->flen += new->flen; 240dfa8bad3SDarrick J. Wong return true; 241dfa8bad3SDarrick J. Wong } 242dfa8bad3SDarrick J. Wong 243dfa8bad3SDarrick J. Wong if (new->fpos + new->flen == existing->fpos) { 244dfa8bad3SDarrick J. Wong existing->fpos = new->fpos; 245dfa8bad3SDarrick J. Wong existing->flen += new->flen; 246dfa8bad3SDarrick J. Wong return true; 247dfa8bad3SDarrick J. Wong } 248dfa8bad3SDarrick J. Wong return false; 249b3a289a2SDarrick J. Wong } 250b3a289a2SDarrick J. Wong 251b3a289a2SDarrick J. Wong return false; 252b3a289a2SDarrick J. Wong } 253b3a289a2SDarrick J. Wong 254b3a289a2SDarrick J. Wong /* Insert an event onto the start of the queue. */ 255b3a289a2SDarrick J. Wong static inline void 256b3a289a2SDarrick J. Wong __xfs_healthmon_insert( 257b3a289a2SDarrick J. Wong struct xfs_healthmon *hm, 258b3a289a2SDarrick J. Wong struct xfs_healthmon_event *event) 259b3a289a2SDarrick J. Wong { 260b3a289a2SDarrick J. Wong struct timespec64 now; 261b3a289a2SDarrick J. Wong 262b3a289a2SDarrick J. Wong ktime_get_coarse_real_ts64(&now); 263b3a289a2SDarrick J. Wong event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec; 264b3a289a2SDarrick J. Wong 265b3a289a2SDarrick J. Wong event->next = hm->first_event; 266b3a289a2SDarrick J. Wong if (!hm->first_event) 267b3a289a2SDarrick J. Wong hm->first_event = event; 268b3a289a2SDarrick J. Wong if (!hm->last_event) 269b3a289a2SDarrick J. Wong hm->last_event = event; 270b3a289a2SDarrick J. Wong xfs_healthmon_bump_events(hm); 271b3a289a2SDarrick J. Wong wake_up(&hm->wait); 272b3a289a2SDarrick J. Wong 273b3a289a2SDarrick J. Wong trace_xfs_healthmon_insert(hm, event); 274b3a289a2SDarrick J. Wong } 275b3a289a2SDarrick J. Wong 276b3a289a2SDarrick J. Wong /* Push an event onto the end of the queue. */ 277b3a289a2SDarrick J. Wong static inline void 278b3a289a2SDarrick J. Wong __xfs_healthmon_push( 279b3a289a2SDarrick J. Wong struct xfs_healthmon *hm, 280b3a289a2SDarrick J. Wong struct xfs_healthmon_event *event) 281b3a289a2SDarrick J. Wong { 282b3a289a2SDarrick J. Wong struct timespec64 now; 283b3a289a2SDarrick J. Wong 284b3a289a2SDarrick J. Wong ktime_get_coarse_real_ts64(&now); 285b3a289a2SDarrick J. Wong event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec; 286b3a289a2SDarrick J. Wong 287b3a289a2SDarrick J. Wong if (!hm->first_event) 288b3a289a2SDarrick J. Wong hm->first_event = event; 289b3a289a2SDarrick J. Wong if (hm->last_event) 290b3a289a2SDarrick J. Wong hm->last_event->next = event; 291b3a289a2SDarrick J. Wong hm->last_event = event; 292b3a289a2SDarrick J. Wong event->next = NULL; 293b3a289a2SDarrick J. Wong xfs_healthmon_bump_events(hm); 294b3a289a2SDarrick J. Wong wake_up(&hm->wait); 295b3a289a2SDarrick J. Wong 296b3a289a2SDarrick J. Wong trace_xfs_healthmon_push(hm, event); 297b3a289a2SDarrick J. Wong } 298b3a289a2SDarrick J. Wong 299b3a289a2SDarrick J. Wong /* Deal with any previously lost events */ 300b3a289a2SDarrick J. Wong static int 301b3a289a2SDarrick J. Wong xfs_healthmon_clear_lost_prev( 302b3a289a2SDarrick J. Wong struct xfs_healthmon *hm) 303b3a289a2SDarrick J. Wong { 304b3a289a2SDarrick J. Wong struct xfs_healthmon_event lost_event = { 305b3a289a2SDarrick J. Wong .type = XFS_HEALTHMON_LOST, 306b3a289a2SDarrick J. Wong .domain = XFS_HEALTHMON_MOUNT, 307b3a289a2SDarrick J. Wong .lostcount = hm->lost_prev_event, 308b3a289a2SDarrick J. Wong }; 309b3a289a2SDarrick J. Wong struct xfs_healthmon_event *event = NULL; 310b3a289a2SDarrick J. Wong 311b3a289a2SDarrick J. Wong if (xfs_healthmon_merge_events(hm->last_event, &lost_event)) { 312b3a289a2SDarrick J. Wong trace_xfs_healthmon_merge(hm, hm->last_event); 313b3a289a2SDarrick J. Wong wake_up(&hm->wait); 314b3a289a2SDarrick J. Wong goto cleared; 315b3a289a2SDarrick J. Wong } 316b3a289a2SDarrick J. Wong 317b3a289a2SDarrick J. Wong if (hm->events < XFS_HEALTHMON_MAX_EVENTS) 318b3a289a2SDarrick J. Wong event = kmemdup(&lost_event, sizeof(struct xfs_healthmon_event), 319b3a289a2SDarrick J. Wong GFP_NOFS); 320b3a289a2SDarrick J. Wong if (!event) 321b3a289a2SDarrick J. Wong return -ENOMEM; 322b3a289a2SDarrick J. Wong 323b3a289a2SDarrick J. Wong __xfs_healthmon_push(hm, event); 324b3a289a2SDarrick J. Wong cleared: 325b3a289a2SDarrick J. Wong hm->lost_prev_event = 0; 326b3a289a2SDarrick J. Wong return 0; 327b3a289a2SDarrick J. Wong } 328b3a289a2SDarrick J. Wong 329b3a289a2SDarrick J. Wong /* 330b3a289a2SDarrick J. Wong * Push an event onto the end of the list after dealing with lost events and 331b3a289a2SDarrick J. Wong * possibly full queues. 332b3a289a2SDarrick J. Wong */ 333b3a289a2SDarrick J. Wong STATIC int 334b3a289a2SDarrick J. Wong xfs_healthmon_push( 335b3a289a2SDarrick J. Wong struct xfs_healthmon *hm, 336b3a289a2SDarrick J. Wong const struct xfs_healthmon_event *template) 337b3a289a2SDarrick J. Wong { 338b3a289a2SDarrick J. Wong struct xfs_healthmon_event *event = NULL; 339b3a289a2SDarrick J. Wong int error = 0; 340b3a289a2SDarrick J. Wong 341b3a289a2SDarrick J. Wong /* 342b3a289a2SDarrick J. Wong * Locklessly check if the health monitor has already detached from the 343b3a289a2SDarrick J. Wong * mount. If so, ignore the event. If we race with deactivation, 344b3a289a2SDarrick J. Wong * we'll queue the event but never send it. 345b3a289a2SDarrick J. Wong */ 346b3a289a2SDarrick J. Wong if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) 347b3a289a2SDarrick J. Wong return -ESHUTDOWN; 348b3a289a2SDarrick J. Wong 349b3a289a2SDarrick J. Wong mutex_lock(&hm->lock); 350b3a289a2SDarrick J. Wong 351b3a289a2SDarrick J. Wong /* Report previously lost events before we do anything else */ 352b3a289a2SDarrick J. Wong if (hm->lost_prev_event) { 353b3a289a2SDarrick J. Wong error = xfs_healthmon_clear_lost_prev(hm); 354b3a289a2SDarrick J. Wong if (error) 355b3a289a2SDarrick J. Wong goto out_unlock; 356b3a289a2SDarrick J. Wong } 357b3a289a2SDarrick J. Wong 358b3a289a2SDarrick J. Wong /* Try to merge with the newest event */ 359b3a289a2SDarrick J. Wong if (xfs_healthmon_merge_events(hm->last_event, template)) { 360b3a289a2SDarrick J. Wong trace_xfs_healthmon_merge(hm, hm->last_event); 361b3a289a2SDarrick J. Wong wake_up(&hm->wait); 362b3a289a2SDarrick J. Wong goto out_unlock; 363b3a289a2SDarrick J. Wong } 364b3a289a2SDarrick J. Wong 365b3a289a2SDarrick J. Wong /* Only create a heap event object if we're not already at capacity. */ 366b3a289a2SDarrick J. Wong if (hm->events < XFS_HEALTHMON_MAX_EVENTS) 367b3a289a2SDarrick J. Wong event = kmemdup(template, sizeof(struct xfs_healthmon_event), 368b3a289a2SDarrick J. Wong GFP_NOFS); 369b3a289a2SDarrick J. Wong if (!event) { 370b3a289a2SDarrick J. Wong /* No memory means we lose the event */ 371b3a289a2SDarrick J. Wong trace_xfs_healthmon_lost_event(hm); 372b3a289a2SDarrick J. Wong xfs_healthmon_bump_lost(hm); 373b3a289a2SDarrick J. Wong error = -ENOMEM; 374b3a289a2SDarrick J. Wong goto out_unlock; 375b3a289a2SDarrick J. Wong } 376b3a289a2SDarrick J. Wong 377b3a289a2SDarrick J. Wong __xfs_healthmon_push(hm, event); 378b3a289a2SDarrick J. Wong 379b3a289a2SDarrick J. Wong out_unlock: 380b3a289a2SDarrick J. Wong mutex_unlock(&hm->lock); 381b3a289a2SDarrick J. Wong return error; 382b3a289a2SDarrick J. Wong } 383b3a289a2SDarrick J. Wong 38425ca57faSDarrick J. Wong /* 38525ca57faSDarrick J. Wong * Report that the filesystem is being unmounted, then detach the xfs mount 38625ca57faSDarrick J. Wong * from this healthmon instance. 38725ca57faSDarrick J. Wong */ 388a48373e7SDarrick J. Wong void 389a48373e7SDarrick J. Wong xfs_healthmon_unmount( 390a48373e7SDarrick J. Wong struct xfs_mount *mp) 391a48373e7SDarrick J. Wong { 392a48373e7SDarrick J. Wong struct xfs_healthmon *hm = xfs_healthmon_get(mp); 393a48373e7SDarrick J. Wong 394a48373e7SDarrick J. Wong if (!hm) 395a48373e7SDarrick J. Wong return; 396a48373e7SDarrick J. Wong 39725ca57faSDarrick J. Wong trace_xfs_healthmon_report_unmount(hm); 39825ca57faSDarrick J. Wong 39925ca57faSDarrick J. Wong /* 40025ca57faSDarrick J. Wong * Insert the unmount notification at the start of the event queue so 40125ca57faSDarrick J. Wong * that userspace knows the filesystem went away as soon as possible. 40225ca57faSDarrick J. Wong * There's nothing actionable for userspace after an unmount. Once 40325ca57faSDarrick J. Wong * we've inserted the unmount event, hm no longer owns that event. 40425ca57faSDarrick J. Wong */ 40525ca57faSDarrick J. Wong __xfs_healthmon_insert(hm, hm->unmount_event); 40625ca57faSDarrick J. Wong hm->unmount_event = NULL; 40725ca57faSDarrick J. Wong 408a48373e7SDarrick J. Wong xfs_healthmon_detach(hm); 409a48373e7SDarrick J. Wong xfs_healthmon_put(hm); 410a48373e7SDarrick J. Wong } 411a48373e7SDarrick J. Wong 4125eb4cb18SDarrick J. Wong /* Compute the reporting mask for non-unmount metadata health events. */ 4135eb4cb18SDarrick J. Wong static inline unsigned int 4145eb4cb18SDarrick J. Wong metadata_event_mask( 4155eb4cb18SDarrick J. Wong struct xfs_healthmon *hm, 4165eb4cb18SDarrick J. Wong enum xfs_healthmon_type type, 4175eb4cb18SDarrick J. Wong unsigned int old_mask, 4185eb4cb18SDarrick J. Wong unsigned int new_mask) 4195eb4cb18SDarrick J. Wong { 4205eb4cb18SDarrick J. Wong /* If we want all events, return all events. */ 4215eb4cb18SDarrick J. Wong if (hm->verbose) 4225eb4cb18SDarrick J. Wong return new_mask; 4235eb4cb18SDarrick J. Wong 4245eb4cb18SDarrick J. Wong switch (type) { 4255eb4cb18SDarrick J. Wong case XFS_HEALTHMON_SICK: 4265eb4cb18SDarrick J. Wong /* Always report runtime corruptions */ 4275eb4cb18SDarrick J. Wong return new_mask; 4285eb4cb18SDarrick J. Wong case XFS_HEALTHMON_CORRUPT: 4295eb4cb18SDarrick J. Wong /* Only report new fsck errors */ 4305eb4cb18SDarrick J. Wong return new_mask & ~old_mask; 4315eb4cb18SDarrick J. Wong case XFS_HEALTHMON_HEALTHY: 4325eb4cb18SDarrick J. Wong /* Only report healthy metadata that got fixed */ 4335eb4cb18SDarrick J. Wong return new_mask & old_mask; 4345eb4cb18SDarrick J. Wong default: 4355eb4cb18SDarrick J. Wong ASSERT(0); 4365eb4cb18SDarrick J. Wong break; 4375eb4cb18SDarrick J. Wong } 4385eb4cb18SDarrick J. Wong 4395eb4cb18SDarrick J. Wong return 0; 4405eb4cb18SDarrick J. Wong } 4415eb4cb18SDarrick J. Wong 4425eb4cb18SDarrick J. Wong /* Report XFS_FS_SICK_* events to healthmon */ 4435eb4cb18SDarrick J. Wong void 4445eb4cb18SDarrick J. Wong xfs_healthmon_report_fs( 4455eb4cb18SDarrick J. Wong struct xfs_mount *mp, 4465eb4cb18SDarrick J. Wong enum xfs_healthmon_type type, 4475eb4cb18SDarrick J. Wong unsigned int old_mask, 4485eb4cb18SDarrick J. Wong unsigned int new_mask) 4495eb4cb18SDarrick J. Wong { 4505eb4cb18SDarrick J. Wong struct xfs_healthmon_event event = { 4515eb4cb18SDarrick J. Wong .type = type, 4525eb4cb18SDarrick J. Wong .domain = XFS_HEALTHMON_FS, 4535eb4cb18SDarrick J. Wong }; 4545eb4cb18SDarrick J. Wong struct xfs_healthmon *hm = xfs_healthmon_get(mp); 4555eb4cb18SDarrick J. Wong 4565eb4cb18SDarrick J. Wong if (!hm) 4575eb4cb18SDarrick J. Wong return; 4585eb4cb18SDarrick J. Wong 4595eb4cb18SDarrick J. Wong event.fsmask = metadata_event_mask(hm, type, old_mask, new_mask) & 4605eb4cb18SDarrick J. Wong ~XFS_SICK_FS_SECONDARY; 4615eb4cb18SDarrick J. Wong trace_xfs_healthmon_report_fs(hm, old_mask, new_mask, &event); 4625eb4cb18SDarrick J. Wong 4635eb4cb18SDarrick J. Wong if (event.fsmask) 4645eb4cb18SDarrick J. Wong xfs_healthmon_push(hm, &event); 4655eb4cb18SDarrick J. Wong 4665eb4cb18SDarrick J. Wong xfs_healthmon_put(hm); 4675eb4cb18SDarrick J. Wong } 4685eb4cb18SDarrick J. Wong 4695eb4cb18SDarrick J. Wong /* Report XFS_SICK_(AG|RG)* flags to healthmon */ 4705eb4cb18SDarrick J. Wong void 4715eb4cb18SDarrick J. Wong xfs_healthmon_report_group( 4725eb4cb18SDarrick J. Wong struct xfs_group *xg, 4735eb4cb18SDarrick J. Wong enum xfs_healthmon_type type, 4745eb4cb18SDarrick J. Wong unsigned int old_mask, 4755eb4cb18SDarrick J. Wong unsigned int new_mask) 4765eb4cb18SDarrick J. Wong { 4775eb4cb18SDarrick J. Wong struct xfs_healthmon_event event = { 4785eb4cb18SDarrick J. Wong .type = type, 4795eb4cb18SDarrick J. Wong .group = xg->xg_gno, 4805eb4cb18SDarrick J. Wong }; 4815eb4cb18SDarrick J. Wong struct xfs_healthmon *hm = xfs_healthmon_get(xg->xg_mount); 4825eb4cb18SDarrick J. Wong 4835eb4cb18SDarrick J. Wong if (!hm) 4845eb4cb18SDarrick J. Wong return; 4855eb4cb18SDarrick J. Wong 4865eb4cb18SDarrick J. Wong switch (xg->xg_type) { 4875eb4cb18SDarrick J. Wong case XG_TYPE_RTG: 4885eb4cb18SDarrick J. Wong event.domain = XFS_HEALTHMON_RTGROUP; 4895eb4cb18SDarrick J. Wong event.grpmask = metadata_event_mask(hm, type, old_mask, 4905eb4cb18SDarrick J. Wong new_mask) & 4915eb4cb18SDarrick J. Wong ~XFS_SICK_RG_SECONDARY; 4925eb4cb18SDarrick J. Wong break; 4935eb4cb18SDarrick J. Wong case XG_TYPE_AG: 4945eb4cb18SDarrick J. Wong event.domain = XFS_HEALTHMON_AG; 4955eb4cb18SDarrick J. Wong event.grpmask = metadata_event_mask(hm, type, old_mask, 4965eb4cb18SDarrick J. Wong new_mask) & 4975eb4cb18SDarrick J. Wong ~XFS_SICK_AG_SECONDARY; 4985eb4cb18SDarrick J. Wong break; 4995eb4cb18SDarrick J. Wong default: 5005eb4cb18SDarrick J. Wong ASSERT(0); 5015eb4cb18SDarrick J. Wong break; 5025eb4cb18SDarrick J. Wong } 5035eb4cb18SDarrick J. Wong 5045eb4cb18SDarrick J. Wong trace_xfs_healthmon_report_group(hm, old_mask, new_mask, &event); 5055eb4cb18SDarrick J. Wong 5065eb4cb18SDarrick J. Wong if (event.grpmask) 5075eb4cb18SDarrick J. Wong xfs_healthmon_push(hm, &event); 5085eb4cb18SDarrick J. Wong 5095eb4cb18SDarrick J. Wong xfs_healthmon_put(hm); 5105eb4cb18SDarrick J. Wong } 5115eb4cb18SDarrick J. Wong 5125eb4cb18SDarrick J. Wong /* Report XFS_SICK_INO_* flags to healthmon */ 5135eb4cb18SDarrick J. Wong void 5145eb4cb18SDarrick J. Wong xfs_healthmon_report_inode( 5155eb4cb18SDarrick J. Wong struct xfs_inode *ip, 5165eb4cb18SDarrick J. Wong enum xfs_healthmon_type type, 5175eb4cb18SDarrick J. Wong unsigned int old_mask, 5185eb4cb18SDarrick J. Wong unsigned int new_mask) 5195eb4cb18SDarrick J. Wong { 5205eb4cb18SDarrick J. Wong struct xfs_healthmon_event event = { 5215eb4cb18SDarrick J. Wong .type = type, 5225eb4cb18SDarrick J. Wong .domain = XFS_HEALTHMON_INODE, 5235eb4cb18SDarrick J. Wong .ino = ip->i_ino, 5245eb4cb18SDarrick J. Wong .gen = VFS_I(ip)->i_generation, 5255eb4cb18SDarrick J. Wong }; 5265eb4cb18SDarrick J. Wong struct xfs_healthmon *hm = xfs_healthmon_get(ip->i_mount); 5275eb4cb18SDarrick J. Wong 5285eb4cb18SDarrick J. Wong if (!hm) 5295eb4cb18SDarrick J. Wong return; 5305eb4cb18SDarrick J. Wong 5315eb4cb18SDarrick J. Wong event.imask = metadata_event_mask(hm, type, old_mask, new_mask) & 5325eb4cb18SDarrick J. Wong ~XFS_SICK_INO_SECONDARY; 5335eb4cb18SDarrick J. Wong trace_xfs_healthmon_report_inode(hm, old_mask, event.imask, &event); 5345eb4cb18SDarrick J. Wong 5355eb4cb18SDarrick J. Wong if (event.imask) 5365eb4cb18SDarrick J. Wong xfs_healthmon_push(hm, &event); 5375eb4cb18SDarrick J. Wong 5385eb4cb18SDarrick J. Wong xfs_healthmon_put(hm); 5395eb4cb18SDarrick J. Wong } 5405eb4cb18SDarrick J. Wong 54174c4795eSDarrick J. Wong /* Add a shutdown event to the reporting queue. */ 54274c4795eSDarrick J. Wong void 54374c4795eSDarrick J. Wong xfs_healthmon_report_shutdown( 54474c4795eSDarrick J. Wong struct xfs_mount *mp, 54574c4795eSDarrick J. Wong uint32_t flags) 54674c4795eSDarrick J. Wong { 54774c4795eSDarrick J. Wong struct xfs_healthmon_event event = { 54874c4795eSDarrick J. Wong .type = XFS_HEALTHMON_SHUTDOWN, 54974c4795eSDarrick J. Wong .domain = XFS_HEALTHMON_MOUNT, 55074c4795eSDarrick J. Wong .flags = flags, 55174c4795eSDarrick J. Wong }; 55274c4795eSDarrick J. Wong struct xfs_healthmon *hm = xfs_healthmon_get(mp); 55374c4795eSDarrick J. Wong 55474c4795eSDarrick J. Wong if (!hm) 55574c4795eSDarrick J. Wong return; 55674c4795eSDarrick J. Wong 55774c4795eSDarrick J. Wong trace_xfs_healthmon_report_shutdown(hm, flags); 55874c4795eSDarrick J. Wong 55974c4795eSDarrick J. Wong xfs_healthmon_push(hm, &event); 56074c4795eSDarrick J. Wong xfs_healthmon_put(hm); 56174c4795eSDarrick J. Wong } 56274c4795eSDarrick J. Wong 563e76e0e3fSDarrick J. Wong static inline enum xfs_healthmon_domain 564e76e0e3fSDarrick J. Wong media_error_domain( 565e76e0e3fSDarrick J. Wong enum xfs_device fdev) 566e76e0e3fSDarrick J. Wong { 567e76e0e3fSDarrick J. Wong switch (fdev) { 568e76e0e3fSDarrick J. Wong case XFS_DEV_DATA: 569e76e0e3fSDarrick J. Wong return XFS_HEALTHMON_DATADEV; 570e76e0e3fSDarrick J. Wong case XFS_DEV_LOG: 571e76e0e3fSDarrick J. Wong return XFS_HEALTHMON_LOGDEV; 572e76e0e3fSDarrick J. Wong case XFS_DEV_RT: 573e76e0e3fSDarrick J. Wong return XFS_HEALTHMON_RTDEV; 574e76e0e3fSDarrick J. Wong } 575e76e0e3fSDarrick J. Wong 576e76e0e3fSDarrick J. Wong ASSERT(0); 577e76e0e3fSDarrick J. Wong return 0; 578e76e0e3fSDarrick J. Wong } 579e76e0e3fSDarrick J. Wong 580e76e0e3fSDarrick J. Wong /* Add a media error event to the reporting queue. */ 581e76e0e3fSDarrick J. Wong void 582e76e0e3fSDarrick J. Wong xfs_healthmon_report_media( 583e76e0e3fSDarrick J. Wong struct xfs_mount *mp, 584e76e0e3fSDarrick J. Wong enum xfs_device fdev, 585e76e0e3fSDarrick J. Wong xfs_daddr_t daddr, 586e76e0e3fSDarrick J. Wong uint64_t bbcount) 587e76e0e3fSDarrick J. Wong { 588e76e0e3fSDarrick J. Wong struct xfs_healthmon_event event = { 589e76e0e3fSDarrick J. Wong .type = XFS_HEALTHMON_MEDIA_ERROR, 590e76e0e3fSDarrick J. Wong .domain = media_error_domain(fdev), 591e76e0e3fSDarrick J. Wong .daddr = daddr, 592e76e0e3fSDarrick J. Wong .bbcount = bbcount, 593e76e0e3fSDarrick J. Wong }; 594e76e0e3fSDarrick J. Wong struct xfs_healthmon *hm = xfs_healthmon_get(mp); 595e76e0e3fSDarrick J. Wong 596e76e0e3fSDarrick J. Wong if (!hm) 597e76e0e3fSDarrick J. Wong return; 598e76e0e3fSDarrick J. Wong 599e76e0e3fSDarrick J. Wong trace_xfs_healthmon_report_media(hm, fdev, &event); 600e76e0e3fSDarrick J. Wong 601e76e0e3fSDarrick J. Wong xfs_healthmon_push(hm, &event); 602e76e0e3fSDarrick J. Wong xfs_healthmon_put(hm); 603e76e0e3fSDarrick J. Wong } 604e76e0e3fSDarrick J. Wong 605dfa8bad3SDarrick J. Wong static inline enum xfs_healthmon_type file_ioerr_type(enum fserror_type action) 606dfa8bad3SDarrick J. Wong { 607dfa8bad3SDarrick J. Wong switch (action) { 608dfa8bad3SDarrick J. Wong case FSERR_BUFFERED_READ: 609dfa8bad3SDarrick J. Wong return XFS_HEALTHMON_BUFREAD; 610dfa8bad3SDarrick J. Wong case FSERR_BUFFERED_WRITE: 611dfa8bad3SDarrick J. Wong return XFS_HEALTHMON_BUFWRITE; 612dfa8bad3SDarrick J. Wong case FSERR_DIRECTIO_READ: 613dfa8bad3SDarrick J. Wong return XFS_HEALTHMON_DIOREAD; 614dfa8bad3SDarrick J. Wong case FSERR_DIRECTIO_WRITE: 615dfa8bad3SDarrick J. Wong return XFS_HEALTHMON_DIOWRITE; 616dfa8bad3SDarrick J. Wong case FSERR_DATA_LOST: 617dfa8bad3SDarrick J. Wong return XFS_HEALTHMON_DATALOST; 618dfa8bad3SDarrick J. Wong case FSERR_METADATA: 619dfa8bad3SDarrick J. Wong /* filtered out by xfs_fs_report_error */ 620dfa8bad3SDarrick J. Wong break; 621dfa8bad3SDarrick J. Wong } 622dfa8bad3SDarrick J. Wong 623dfa8bad3SDarrick J. Wong ASSERT(0); 624dfa8bad3SDarrick J. Wong return -1; 625dfa8bad3SDarrick J. Wong } 626dfa8bad3SDarrick J. Wong 627dfa8bad3SDarrick J. Wong /* Add a file io error event to the reporting queue. */ 628dfa8bad3SDarrick J. Wong void 629dfa8bad3SDarrick J. Wong xfs_healthmon_report_file_ioerror( 630dfa8bad3SDarrick J. Wong struct xfs_inode *ip, 631dfa8bad3SDarrick J. Wong const struct fserror_event *p) 632dfa8bad3SDarrick J. Wong { 633dfa8bad3SDarrick J. Wong struct xfs_healthmon_event event = { 634dfa8bad3SDarrick J. Wong .type = file_ioerr_type(p->type), 635dfa8bad3SDarrick J. Wong .domain = XFS_HEALTHMON_FILERANGE, 636dfa8bad3SDarrick J. Wong .fino = ip->i_ino, 637dfa8bad3SDarrick J. Wong .fgen = VFS_I(ip)->i_generation, 638dfa8bad3SDarrick J. Wong .fpos = p->pos, 639dfa8bad3SDarrick J. Wong .flen = p->len, 640dfa8bad3SDarrick J. Wong /* send positive error number to userspace */ 641dfa8bad3SDarrick J. Wong .error = -p->error, 642dfa8bad3SDarrick J. Wong }; 643dfa8bad3SDarrick J. Wong struct xfs_healthmon *hm = xfs_healthmon_get(ip->i_mount); 644dfa8bad3SDarrick J. Wong 645dfa8bad3SDarrick J. Wong if (!hm) 646dfa8bad3SDarrick J. Wong return; 647dfa8bad3SDarrick J. Wong 648dfa8bad3SDarrick J. Wong trace_xfs_healthmon_report_file_ioerror(hm, p); 649dfa8bad3SDarrick J. Wong 650dfa8bad3SDarrick J. Wong xfs_healthmon_push(hm, &event); 651dfa8bad3SDarrick J. Wong xfs_healthmon_put(hm); 652dfa8bad3SDarrick J. Wong } 653dfa8bad3SDarrick J. Wong 654b3a289a2SDarrick J. Wong static inline void 655b3a289a2SDarrick J. Wong xfs_healthmon_reset_outbuf( 656b3a289a2SDarrick J. Wong struct xfs_healthmon *hm) 657b3a289a2SDarrick J. Wong { 658b3a289a2SDarrick J. Wong hm->buftail = 0; 659b3a289a2SDarrick J. Wong hm->bufhead = 0; 660b3a289a2SDarrick J. Wong } 661b3a289a2SDarrick J. Wong 66274c4795eSDarrick J. Wong struct flags_map { 66374c4795eSDarrick J. Wong unsigned int in_mask; 66474c4795eSDarrick J. Wong unsigned int out_mask; 66574c4795eSDarrick J. Wong }; 66674c4795eSDarrick J. Wong 66774c4795eSDarrick J. Wong static const struct flags_map shutdown_map[] = { 66874c4795eSDarrick J. Wong { SHUTDOWN_META_IO_ERROR, XFS_HEALTH_SHUTDOWN_META_IO_ERROR }, 66974c4795eSDarrick J. Wong { SHUTDOWN_LOG_IO_ERROR, XFS_HEALTH_SHUTDOWN_LOG_IO_ERROR }, 67074c4795eSDarrick J. Wong { SHUTDOWN_FORCE_UMOUNT, XFS_HEALTH_SHUTDOWN_FORCE_UMOUNT }, 67174c4795eSDarrick J. Wong { SHUTDOWN_CORRUPT_INCORE, XFS_HEALTH_SHUTDOWN_CORRUPT_INCORE }, 67274c4795eSDarrick J. Wong { SHUTDOWN_CORRUPT_ONDISK, XFS_HEALTH_SHUTDOWN_CORRUPT_ONDISK }, 67374c4795eSDarrick J. Wong { SHUTDOWN_DEVICE_REMOVED, XFS_HEALTH_SHUTDOWN_DEVICE_REMOVED }, 67474c4795eSDarrick J. Wong }; 67574c4795eSDarrick J. Wong 67674c4795eSDarrick J. Wong static inline unsigned int 67774c4795eSDarrick J. Wong __map_flags( 67874c4795eSDarrick J. Wong const struct flags_map *map, 67974c4795eSDarrick J. Wong size_t array_len, 68074c4795eSDarrick J. Wong unsigned int flags) 68174c4795eSDarrick J. Wong { 68274c4795eSDarrick J. Wong const struct flags_map *m; 68374c4795eSDarrick J. Wong unsigned int ret = 0; 68474c4795eSDarrick J. Wong 68574c4795eSDarrick J. Wong for (m = map; m < map + array_len; m++) { 68674c4795eSDarrick J. Wong if (flags & m->in_mask) 68774c4795eSDarrick J. Wong ret |= m->out_mask; 68874c4795eSDarrick J. Wong } 68974c4795eSDarrick J. Wong 69074c4795eSDarrick J. Wong return ret; 69174c4795eSDarrick J. Wong } 69274c4795eSDarrick J. Wong 69374c4795eSDarrick J. Wong #define map_flags(map, flags) __map_flags((map), ARRAY_SIZE(map), (flags)) 69474c4795eSDarrick J. Wong 69574c4795eSDarrick J. Wong static inline unsigned int shutdown_mask(unsigned int in) 69674c4795eSDarrick J. Wong { 69774c4795eSDarrick J. Wong return map_flags(shutdown_map, in); 69874c4795eSDarrick J. Wong } 69974c4795eSDarrick J. Wong 700b3a289a2SDarrick J. Wong static const unsigned int domain_map[] = { 701b3a289a2SDarrick J. Wong [XFS_HEALTHMON_MOUNT] = XFS_HEALTH_MONITOR_DOMAIN_MOUNT, 7025eb4cb18SDarrick J. Wong [XFS_HEALTHMON_FS] = XFS_HEALTH_MONITOR_DOMAIN_FS, 7035eb4cb18SDarrick J. Wong [XFS_HEALTHMON_AG] = XFS_HEALTH_MONITOR_DOMAIN_AG, 7045eb4cb18SDarrick J. Wong [XFS_HEALTHMON_INODE] = XFS_HEALTH_MONITOR_DOMAIN_INODE, 7055eb4cb18SDarrick J. Wong [XFS_HEALTHMON_RTGROUP] = XFS_HEALTH_MONITOR_DOMAIN_RTGROUP, 706e76e0e3fSDarrick J. Wong [XFS_HEALTHMON_DATADEV] = XFS_HEALTH_MONITOR_DOMAIN_DATADEV, 707e76e0e3fSDarrick J. Wong [XFS_HEALTHMON_RTDEV] = XFS_HEALTH_MONITOR_DOMAIN_RTDEV, 708e76e0e3fSDarrick J. Wong [XFS_HEALTHMON_LOGDEV] = XFS_HEALTH_MONITOR_DOMAIN_LOGDEV, 709dfa8bad3SDarrick J. Wong [XFS_HEALTHMON_FILERANGE] = XFS_HEALTH_MONITOR_DOMAIN_FILERANGE, 710b3a289a2SDarrick J. Wong }; 711b3a289a2SDarrick J. Wong 712b3a289a2SDarrick J. Wong static const unsigned int type_map[] = { 713b3a289a2SDarrick J. Wong [XFS_HEALTHMON_RUNNING] = XFS_HEALTH_MONITOR_TYPE_RUNNING, 714b3a289a2SDarrick J. Wong [XFS_HEALTHMON_LOST] = XFS_HEALTH_MONITOR_TYPE_LOST, 7155eb4cb18SDarrick J. Wong [XFS_HEALTHMON_SICK] = XFS_HEALTH_MONITOR_TYPE_SICK, 7165eb4cb18SDarrick J. Wong [XFS_HEALTHMON_CORRUPT] = XFS_HEALTH_MONITOR_TYPE_CORRUPT, 7175eb4cb18SDarrick J. Wong [XFS_HEALTHMON_HEALTHY] = XFS_HEALTH_MONITOR_TYPE_HEALTHY, 7185eb4cb18SDarrick J. Wong [XFS_HEALTHMON_UNMOUNT] = XFS_HEALTH_MONITOR_TYPE_UNMOUNT, 71974c4795eSDarrick J. Wong [XFS_HEALTHMON_SHUTDOWN] = XFS_HEALTH_MONITOR_TYPE_SHUTDOWN, 720e76e0e3fSDarrick J. Wong [XFS_HEALTHMON_MEDIA_ERROR] = XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR, 721dfa8bad3SDarrick J. Wong [XFS_HEALTHMON_BUFREAD] = XFS_HEALTH_MONITOR_TYPE_BUFREAD, 722dfa8bad3SDarrick J. Wong [XFS_HEALTHMON_BUFWRITE] = XFS_HEALTH_MONITOR_TYPE_BUFWRITE, 723dfa8bad3SDarrick J. Wong [XFS_HEALTHMON_DIOREAD] = XFS_HEALTH_MONITOR_TYPE_DIOREAD, 724dfa8bad3SDarrick J. Wong [XFS_HEALTHMON_DIOWRITE] = XFS_HEALTH_MONITOR_TYPE_DIOWRITE, 725dfa8bad3SDarrick J. Wong [XFS_HEALTHMON_DATALOST] = XFS_HEALTH_MONITOR_TYPE_DATALOST, 726b3a289a2SDarrick J. Wong }; 727b3a289a2SDarrick J. Wong 728b3a289a2SDarrick J. Wong /* Render event as a V0 structure */ 729b3a289a2SDarrick J. Wong STATIC int 730b3a289a2SDarrick J. Wong xfs_healthmon_format_v0( 731b3a289a2SDarrick J. Wong struct xfs_healthmon *hm, 732b3a289a2SDarrick J. Wong const struct xfs_healthmon_event *event) 733b3a289a2SDarrick J. Wong { 734b3a289a2SDarrick J. Wong struct xfs_health_monitor_event hme = { 735b3a289a2SDarrick J. Wong .time_ns = event->time_ns, 736b3a289a2SDarrick J. Wong }; 737b3a289a2SDarrick J. Wong 738b3a289a2SDarrick J. Wong trace_xfs_healthmon_format(hm, event); 739b3a289a2SDarrick J. Wong 740b3a289a2SDarrick J. Wong if (event->domain < 0 || event->domain >= ARRAY_SIZE(domain_map) || 741b3a289a2SDarrick J. Wong event->type < 0 || event->type >= ARRAY_SIZE(type_map)) 742b3a289a2SDarrick J. Wong return -EFSCORRUPTED; 743b3a289a2SDarrick J. Wong 744b3a289a2SDarrick J. Wong hme.domain = domain_map[event->domain]; 745b3a289a2SDarrick J. Wong hme.type = type_map[event->type]; 746b3a289a2SDarrick J. Wong 747b3a289a2SDarrick J. Wong /* fill in the event-specific details */ 748b3a289a2SDarrick J. Wong switch (event->domain) { 749b3a289a2SDarrick J. Wong case XFS_HEALTHMON_MOUNT: 750b3a289a2SDarrick J. Wong switch (event->type) { 751b3a289a2SDarrick J. Wong case XFS_HEALTHMON_LOST: 752b3a289a2SDarrick J. Wong hme.e.lost.count = event->lostcount; 753b3a289a2SDarrick J. Wong break; 75474c4795eSDarrick J. Wong case XFS_HEALTHMON_SHUTDOWN: 75574c4795eSDarrick J. Wong hme.e.shutdown.reasons = shutdown_mask(event->flags); 75674c4795eSDarrick J. Wong break; 757b3a289a2SDarrick J. Wong default: 758b3a289a2SDarrick J. Wong break; 759b3a289a2SDarrick J. Wong } 760b3a289a2SDarrick J. Wong break; 7615eb4cb18SDarrick J. Wong case XFS_HEALTHMON_FS: 7625eb4cb18SDarrick J. Wong hme.e.fs.mask = xfs_healthmon_fs_mask(event->fsmask); 7635eb4cb18SDarrick J. Wong break; 7645eb4cb18SDarrick J. Wong case XFS_HEALTHMON_RTGROUP: 7655eb4cb18SDarrick J. Wong hme.e.group.mask = xfs_healthmon_rtgroup_mask(event->grpmask); 7665eb4cb18SDarrick J. Wong hme.e.group.gno = event->group; 7675eb4cb18SDarrick J. Wong break; 7685eb4cb18SDarrick J. Wong case XFS_HEALTHMON_AG: 7695eb4cb18SDarrick J. Wong hme.e.group.mask = xfs_healthmon_perag_mask(event->grpmask); 7705eb4cb18SDarrick J. Wong hme.e.group.gno = event->group; 7715eb4cb18SDarrick J. Wong break; 7725eb4cb18SDarrick J. Wong case XFS_HEALTHMON_INODE: 7735eb4cb18SDarrick J. Wong hme.e.inode.mask = xfs_healthmon_inode_mask(event->imask); 7745eb4cb18SDarrick J. Wong hme.e.inode.ino = event->ino; 7755eb4cb18SDarrick J. Wong hme.e.inode.gen = event->gen; 7765eb4cb18SDarrick J. Wong break; 777e76e0e3fSDarrick J. Wong case XFS_HEALTHMON_DATADEV: 778e76e0e3fSDarrick J. Wong case XFS_HEALTHMON_LOGDEV: 779e76e0e3fSDarrick J. Wong case XFS_HEALTHMON_RTDEV: 780e76e0e3fSDarrick J. Wong hme.e.media.daddr = event->daddr; 781e76e0e3fSDarrick J. Wong hme.e.media.bbcount = event->bbcount; 782e76e0e3fSDarrick J. Wong break; 783dfa8bad3SDarrick J. Wong case XFS_HEALTHMON_FILERANGE: 784dfa8bad3SDarrick J. Wong hme.e.filerange.ino = event->fino; 785dfa8bad3SDarrick J. Wong hme.e.filerange.gen = event->fgen; 786dfa8bad3SDarrick J. Wong hme.e.filerange.pos = event->fpos; 787dfa8bad3SDarrick J. Wong hme.e.filerange.len = event->flen; 788dfa8bad3SDarrick J. Wong hme.e.filerange.error = abs(event->error); 789dfa8bad3SDarrick J. Wong break; 790b3a289a2SDarrick J. Wong default: 791b3a289a2SDarrick J. Wong break; 792b3a289a2SDarrick J. Wong } 793b3a289a2SDarrick J. Wong 794b3a289a2SDarrick J. Wong ASSERT(hm->bufhead + sizeof(hme) <= hm->bufsize); 795b3a289a2SDarrick J. Wong 796b3a289a2SDarrick J. Wong /* copy formatted object to the outbuf */ 797b3a289a2SDarrick J. Wong if (hm->bufhead + sizeof(hme) <= hm->bufsize) { 798b3a289a2SDarrick J. Wong memcpy(hm->buffer + hm->bufhead, &hme, sizeof(hme)); 799b3a289a2SDarrick J. Wong hm->bufhead += sizeof(hme); 800b3a289a2SDarrick J. Wong } 801b3a289a2SDarrick J. Wong 802b3a289a2SDarrick J. Wong return 0; 803b3a289a2SDarrick J. Wong } 804b3a289a2SDarrick J. Wong 805b3a289a2SDarrick J. Wong /* How many bytes are waiting in the outbuf to be copied? */ 806b3a289a2SDarrick J. Wong static inline size_t 807b3a289a2SDarrick J. Wong xfs_healthmon_outbuf_bytes( 808b3a289a2SDarrick J. Wong struct xfs_healthmon *hm) 809b3a289a2SDarrick J. Wong { 810b3a289a2SDarrick J. Wong if (hm->bufhead > hm->buftail) 811b3a289a2SDarrick J. Wong return hm->bufhead - hm->buftail; 812b3a289a2SDarrick J. Wong return 0; 813b3a289a2SDarrick J. Wong } 814b3a289a2SDarrick J. Wong 815b3a289a2SDarrick J. Wong /* 816b3a289a2SDarrick J. Wong * Do we have something for userspace to read? This can mean unmount events, 817b3a289a2SDarrick J. Wong * events pending in the queue, or pending bytes in the outbuf. 818b3a289a2SDarrick J. Wong */ 819b3a289a2SDarrick J. Wong static inline bool 820b3a289a2SDarrick J. Wong xfs_healthmon_has_eventdata( 821b3a289a2SDarrick J. Wong struct xfs_healthmon *hm) 822b3a289a2SDarrick J. Wong { 823b3a289a2SDarrick J. Wong /* 824b3a289a2SDarrick J. Wong * If the health monitor is already detached from the xfs_mount, we 825b3a289a2SDarrick J. Wong * want reads to return 0 bytes even if there are no events, because 826b3a289a2SDarrick J. Wong * userspace interprets that as EOF. If we race with deactivation, 827b3a289a2SDarrick J. Wong * read_iter will take the necessary locks to discover that there are 828b3a289a2SDarrick J. Wong * no events to send. 829b3a289a2SDarrick J. Wong */ 830b3a289a2SDarrick J. Wong if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) 831b3a289a2SDarrick J. Wong return true; 832b3a289a2SDarrick J. Wong 833b3a289a2SDarrick J. Wong /* 834b3a289a2SDarrick J. Wong * Either there are events waiting to be formatted into the buffer, or 835b3a289a2SDarrick J. Wong * there's unread bytes in the buffer. 836b3a289a2SDarrick J. Wong */ 837b3a289a2SDarrick J. Wong return hm->events > 0 || xfs_healthmon_outbuf_bytes(hm) > 0; 838b3a289a2SDarrick J. Wong } 839b3a289a2SDarrick J. Wong 840b3a289a2SDarrick J. Wong /* Try to copy the rest of the outbuf to the iov iter. */ 841b3a289a2SDarrick J. Wong STATIC ssize_t 842b3a289a2SDarrick J. Wong xfs_healthmon_copybuf( 843b3a289a2SDarrick J. Wong struct xfs_healthmon *hm, 844b3a289a2SDarrick J. Wong struct iov_iter *to) 845b3a289a2SDarrick J. Wong { 846b3a289a2SDarrick J. Wong size_t to_copy; 847b3a289a2SDarrick J. Wong size_t w = 0; 848b3a289a2SDarrick J. Wong 849b3a289a2SDarrick J. Wong trace_xfs_healthmon_copybuf(hm, to); 850b3a289a2SDarrick J. Wong 851b3a289a2SDarrick J. Wong to_copy = xfs_healthmon_outbuf_bytes(hm); 852b3a289a2SDarrick J. Wong if (to_copy) { 853b3a289a2SDarrick J. Wong w = copy_to_iter(hm->buffer + hm->buftail, to_copy, to); 854b3a289a2SDarrick J. Wong if (!w) 855b3a289a2SDarrick J. Wong return -EFAULT; 856b3a289a2SDarrick J. Wong 857b3a289a2SDarrick J. Wong hm->buftail += w; 858b3a289a2SDarrick J. Wong } 859b3a289a2SDarrick J. Wong 860b3a289a2SDarrick J. Wong /* 861b3a289a2SDarrick J. Wong * Nothing left to copy? Reset the output buffer cursors to the start 862b3a289a2SDarrick J. Wong * since there's no live data in the buffer. 863b3a289a2SDarrick J. Wong */ 864b3a289a2SDarrick J. Wong if (xfs_healthmon_outbuf_bytes(hm) == 0) 865b3a289a2SDarrick J. Wong xfs_healthmon_reset_outbuf(hm); 866b3a289a2SDarrick J. Wong return w; 867b3a289a2SDarrick J. Wong } 868b3a289a2SDarrick J. Wong 869b3a289a2SDarrick J. Wong /* 870b3a289a2SDarrick J. Wong * Return a health monitoring event for formatting into the output buffer if 871b3a289a2SDarrick J. Wong * there's enough space in the outbuf and an event waiting for us. Caller 872b3a289a2SDarrick J. Wong * must hold i_rwsem on the healthmon file. 873b3a289a2SDarrick J. Wong */ 874b3a289a2SDarrick J. Wong static inline struct xfs_healthmon_event * 875b3a289a2SDarrick J. Wong xfs_healthmon_format_pop( 876b3a289a2SDarrick J. Wong struct xfs_healthmon *hm) 877b3a289a2SDarrick J. Wong { 878b3a289a2SDarrick J. Wong struct xfs_healthmon_event *event; 879b3a289a2SDarrick J. Wong 880b3a289a2SDarrick J. Wong if (hm->bufhead + sizeof(*event) > hm->bufsize) 881b3a289a2SDarrick J. Wong return NULL; 882b3a289a2SDarrick J. Wong 883b3a289a2SDarrick J. Wong mutex_lock(&hm->lock); 884b3a289a2SDarrick J. Wong event = hm->first_event; 885b3a289a2SDarrick J. Wong if (event) { 886b3a289a2SDarrick J. Wong if (hm->last_event == event) 887b3a289a2SDarrick J. Wong hm->last_event = NULL; 888b3a289a2SDarrick J. Wong hm->first_event = event->next; 889b3a289a2SDarrick J. Wong hm->events--; 890b3a289a2SDarrick J. Wong 891b3a289a2SDarrick J. Wong trace_xfs_healthmon_pop(hm, event); 892b3a289a2SDarrick J. Wong } 893b3a289a2SDarrick J. Wong mutex_unlock(&hm->lock); 894b3a289a2SDarrick J. Wong return event; 895b3a289a2SDarrick J. Wong } 896b3a289a2SDarrick J. Wong 897b3a289a2SDarrick J. Wong /* Allocate formatting buffer */ 898b3a289a2SDarrick J. Wong STATIC int 899b3a289a2SDarrick J. Wong xfs_healthmon_alloc_outbuf( 900b3a289a2SDarrick J. Wong struct xfs_healthmon *hm, 901b3a289a2SDarrick J. Wong size_t user_bufsize) 902b3a289a2SDarrick J. Wong { 903b3a289a2SDarrick J. Wong void *outbuf; 904b3a289a2SDarrick J. Wong size_t bufsize = 905b3a289a2SDarrick J. Wong min(XFS_HEALTHMON_MAX_OUTBUF, max(PAGE_SIZE, user_bufsize)); 906b3a289a2SDarrick J. Wong 907b3a289a2SDarrick J. Wong outbuf = kzalloc(bufsize, GFP_KERNEL); 908b3a289a2SDarrick J. Wong if (!outbuf) { 909b3a289a2SDarrick J. Wong if (bufsize == PAGE_SIZE) 910b3a289a2SDarrick J. Wong return -ENOMEM; 911b3a289a2SDarrick J. Wong 912b3a289a2SDarrick J. Wong bufsize = PAGE_SIZE; 913b3a289a2SDarrick J. Wong outbuf = kzalloc(bufsize, GFP_KERNEL); 914b3a289a2SDarrick J. Wong if (!outbuf) 915b3a289a2SDarrick J. Wong return -ENOMEM; 916b3a289a2SDarrick J. Wong } 917b3a289a2SDarrick J. Wong 918b3a289a2SDarrick J. Wong hm->buffer = outbuf; 919b3a289a2SDarrick J. Wong hm->bufsize = bufsize; 920b3a289a2SDarrick J. Wong hm->bufhead = 0; 921b3a289a2SDarrick J. Wong hm->buftail = 0; 922b3a289a2SDarrick J. Wong 923b3a289a2SDarrick J. Wong return 0; 924b3a289a2SDarrick J. Wong } 925b3a289a2SDarrick J. Wong 926b3a289a2SDarrick J. Wong /* 927b3a289a2SDarrick J. Wong * Convey queued event data to userspace. First copy any remaining bytes in 928b3a289a2SDarrick J. Wong * the outbuf, then format the oldest event into the outbuf and copy that too. 929b3a289a2SDarrick J. Wong */ 930a48373e7SDarrick J. Wong STATIC ssize_t 931a48373e7SDarrick J. Wong xfs_healthmon_read_iter( 932a48373e7SDarrick J. Wong struct kiocb *iocb, 933a48373e7SDarrick J. Wong struct iov_iter *to) 934a48373e7SDarrick J. Wong { 935b3a289a2SDarrick J. Wong struct file *file = iocb->ki_filp; 936b3a289a2SDarrick J. Wong struct inode *inode = file_inode(file); 937b3a289a2SDarrick J. Wong struct xfs_healthmon *hm = file->private_data; 938b3a289a2SDarrick J. Wong struct xfs_healthmon_event *event; 939b3a289a2SDarrick J. Wong size_t copied = 0; 940b3a289a2SDarrick J. Wong ssize_t ret = 0; 941b3a289a2SDarrick J. Wong 942b3a289a2SDarrick J. Wong if (file->f_flags & O_NONBLOCK) { 943b3a289a2SDarrick J. Wong if (!xfs_healthmon_has_eventdata(hm) || !inode_trylock(inode)) 944b3a289a2SDarrick J. Wong return -EAGAIN; 945b3a289a2SDarrick J. Wong } else { 946b3a289a2SDarrick J. Wong ret = wait_event_interruptible(hm->wait, 947b3a289a2SDarrick J. Wong xfs_healthmon_has_eventdata(hm)); 948b3a289a2SDarrick J. Wong if (ret) 949b3a289a2SDarrick J. Wong return ret; 950b3a289a2SDarrick J. Wong 951b3a289a2SDarrick J. Wong inode_lock(inode); 952b3a289a2SDarrick J. Wong } 953b3a289a2SDarrick J. Wong 954b3a289a2SDarrick J. Wong if (hm->bufsize == 0) { 955b3a289a2SDarrick J. Wong ret = xfs_healthmon_alloc_outbuf(hm, iov_iter_count(to)); 956b3a289a2SDarrick J. Wong if (ret) 957b3a289a2SDarrick J. Wong goto out_unlock; 958b3a289a2SDarrick J. Wong } 959b3a289a2SDarrick J. Wong 960b3a289a2SDarrick J. Wong trace_xfs_healthmon_read_start(hm); 961b3a289a2SDarrick J. Wong 962b3a289a2SDarrick J. Wong /* 963b3a289a2SDarrick J. Wong * If there's anything left in the output buffer, copy that before 964b3a289a2SDarrick J. Wong * formatting more events. 965b3a289a2SDarrick J. Wong */ 966b3a289a2SDarrick J. Wong ret = xfs_healthmon_copybuf(hm, to); 967b3a289a2SDarrick J. Wong if (ret < 0) 968b3a289a2SDarrick J. Wong goto out_unlock; 969b3a289a2SDarrick J. Wong copied += ret; 970b3a289a2SDarrick J. Wong 971b3a289a2SDarrick J. Wong while (iov_iter_count(to) > 0) { 972b3a289a2SDarrick J. Wong /* Format the next events into the outbuf until it's full. */ 973b3a289a2SDarrick J. Wong while ((event = xfs_healthmon_format_pop(hm)) != NULL) { 974b3a289a2SDarrick J. Wong ret = xfs_healthmon_format_v0(hm, event); 975b3a289a2SDarrick J. Wong kfree(event); 976b3a289a2SDarrick J. Wong if (ret) 977b3a289a2SDarrick J. Wong goto out_unlock; 978b3a289a2SDarrick J. Wong } 979b3a289a2SDarrick J. Wong 980b3a289a2SDarrick J. Wong /* Copy anything formatted into outbuf to userspace */ 981b3a289a2SDarrick J. Wong ret = xfs_healthmon_copybuf(hm, to); 982b3a289a2SDarrick J. Wong if (ret <= 0) 983b3a289a2SDarrick J. Wong break; 984b3a289a2SDarrick J. Wong 985b3a289a2SDarrick J. Wong copied += ret; 986b3a289a2SDarrick J. Wong } 987b3a289a2SDarrick J. Wong 988b3a289a2SDarrick J. Wong out_unlock: 989b3a289a2SDarrick J. Wong trace_xfs_healthmon_read_finish(hm); 990b3a289a2SDarrick J. Wong inode_unlock(inode); 991b3a289a2SDarrick J. Wong return copied ?: ret; 992b3a289a2SDarrick J. Wong } 993b3a289a2SDarrick J. Wong 994b3a289a2SDarrick J. Wong /* Poll for available events. */ 995b3a289a2SDarrick J. Wong STATIC __poll_t 996b3a289a2SDarrick J. Wong xfs_healthmon_poll( 997b3a289a2SDarrick J. Wong struct file *file, 998b3a289a2SDarrick J. Wong struct poll_table_struct *wait) 999b3a289a2SDarrick J. Wong { 1000b3a289a2SDarrick J. Wong struct xfs_healthmon *hm = file->private_data; 1001b3a289a2SDarrick J. Wong __poll_t mask = 0; 1002b3a289a2SDarrick J. Wong 1003b3a289a2SDarrick J. Wong poll_wait(file, &hm->wait, wait); 1004b3a289a2SDarrick J. Wong 1005b3a289a2SDarrick J. Wong if (xfs_healthmon_has_eventdata(hm)) 1006b3a289a2SDarrick J. Wong mask |= EPOLLIN; 1007b3a289a2SDarrick J. Wong return mask; 1008a48373e7SDarrick J. Wong } 1009a48373e7SDarrick J. Wong 1010a48373e7SDarrick J. Wong /* Free the health monitoring information. */ 1011a48373e7SDarrick J. Wong STATIC int 1012a48373e7SDarrick J. Wong xfs_healthmon_release( 1013a48373e7SDarrick J. Wong struct inode *inode, 1014a48373e7SDarrick J. Wong struct file *file) 1015a48373e7SDarrick J. Wong { 1016a48373e7SDarrick J. Wong struct xfs_healthmon *hm = file->private_data; 1017a48373e7SDarrick J. Wong 1018b3a289a2SDarrick J. Wong trace_xfs_healthmon_release(hm); 1019b3a289a2SDarrick J. Wong 1020a48373e7SDarrick J. Wong /* 1021a48373e7SDarrick J. Wong * We might be closing the healthmon file before the filesystem 1022a48373e7SDarrick J. Wong * unmounts, because userspace processes can terminate at any time and 1023a48373e7SDarrick J. Wong * for any reason. Null out xfs_mount::m_healthmon so that another 1024a48373e7SDarrick J. Wong * process can create another health monitor file. 1025a48373e7SDarrick J. Wong */ 1026a48373e7SDarrick J. Wong xfs_healthmon_detach(hm); 1027a48373e7SDarrick J. Wong 1028b3a289a2SDarrick J. Wong /* 1029b3a289a2SDarrick J. Wong * Wake up any readers that might be left. There shouldn't be any 1030b3a289a2SDarrick J. Wong * because the only users of the waiter are read and poll. 1031b3a289a2SDarrick J. Wong */ 1032b3a289a2SDarrick J. Wong wake_up_all(&hm->wait); 1033b3a289a2SDarrick J. Wong 1034a48373e7SDarrick J. Wong xfs_healthmon_put(hm); 1035a48373e7SDarrick J. Wong return 0; 1036a48373e7SDarrick J. Wong } 1037a48373e7SDarrick J. Wong 1038a48373e7SDarrick J. Wong /* Validate ioctl parameters. */ 1039a48373e7SDarrick J. Wong static inline bool 1040a48373e7SDarrick J. Wong xfs_healthmon_validate( 1041a48373e7SDarrick J. Wong const struct xfs_health_monitor *hmo) 1042a48373e7SDarrick J. Wong { 1043b3a289a2SDarrick J. Wong if (hmo->flags & ~XFS_HEALTH_MONITOR_ALL) 1044a48373e7SDarrick J. Wong return false; 1045b3a289a2SDarrick J. Wong if (hmo->format != XFS_HEALTH_MONITOR_FMT_V0) 1046a48373e7SDarrick J. Wong return false; 1047a48373e7SDarrick J. Wong if (memchr_inv(&hmo->pad, 0, sizeof(hmo->pad))) 1048a48373e7SDarrick J. Wong return false; 1049a48373e7SDarrick J. Wong return true; 1050a48373e7SDarrick J. Wong } 1051a48373e7SDarrick J. Wong 1052a48373e7SDarrick J. Wong /* Emit some data about the health monitoring fd. */ 1053a48373e7SDarrick J. Wong static void 1054a48373e7SDarrick J. Wong xfs_healthmon_show_fdinfo( 1055a48373e7SDarrick J. Wong struct seq_file *m, 1056a48373e7SDarrick J. Wong struct file *file) 1057a48373e7SDarrick J. Wong { 1058a48373e7SDarrick J. Wong struct xfs_healthmon *hm = file->private_data; 1059a48373e7SDarrick J. Wong 1060b3a289a2SDarrick J. Wong mutex_lock(&hm->lock); 1061b3a289a2SDarrick J. Wong seq_printf(m, "state:\t%s\ndev:\t%d:%d\nformat:\tv0\nevents:\t%llu\nlost:\t%llu\n", 1062a48373e7SDarrick J. Wong hm->mount_cookie == DETACHED_MOUNT_COOKIE ? 1063a48373e7SDarrick J. Wong "dead" : "alive", 1064b3a289a2SDarrick J. Wong MAJOR(hm->dev), MINOR(hm->dev), 1065b3a289a2SDarrick J. Wong hm->total_events, 1066b3a289a2SDarrick J. Wong hm->total_lost); 1067b3a289a2SDarrick J. Wong mutex_unlock(&hm->lock); 1068a48373e7SDarrick J. Wong } 1069a48373e7SDarrick J. Wong 1070c0e719cbSDarrick J. Wong /* Reconfigure the health monitor. */ 1071c0e719cbSDarrick J. Wong STATIC long 1072c0e719cbSDarrick J. Wong xfs_healthmon_reconfigure( 1073c0e719cbSDarrick J. Wong struct file *file, 1074c0e719cbSDarrick J. Wong unsigned int cmd, 1075c0e719cbSDarrick J. Wong void __user *arg) 1076c0e719cbSDarrick J. Wong { 1077c0e719cbSDarrick J. Wong struct xfs_health_monitor hmo; 1078c0e719cbSDarrick J. Wong struct xfs_healthmon *hm = file->private_data; 1079c0e719cbSDarrick J. Wong 1080c0e719cbSDarrick J. Wong if (copy_from_user(&hmo, arg, sizeof(hmo))) 1081c0e719cbSDarrick J. Wong return -EFAULT; 1082c0e719cbSDarrick J. Wong 1083c0e719cbSDarrick J. Wong if (!xfs_healthmon_validate(&hmo)) 1084c0e719cbSDarrick J. Wong return -EINVAL; 1085c0e719cbSDarrick J. Wong 1086c0e719cbSDarrick J. Wong mutex_lock(&hm->lock); 1087c0e719cbSDarrick J. Wong hm->verbose = !!(hmo.flags & XFS_HEALTH_MONITOR_VERBOSE); 1088c0e719cbSDarrick J. Wong mutex_unlock(&hm->lock); 1089c0e719cbSDarrick J. Wong 1090c0e719cbSDarrick J. Wong return 0; 1091c0e719cbSDarrick J. Wong } 1092c0e719cbSDarrick J. Wong 10938b85dc40SDarrick J. Wong /* Does the fd point to the same filesystem as the one we're monitoring? */ 10948b85dc40SDarrick J. Wong STATIC long 10958b85dc40SDarrick J. Wong xfs_healthmon_file_on_monitored_fs( 10968b85dc40SDarrick J. Wong struct file *file, 10978b85dc40SDarrick J. Wong unsigned int cmd, 10988b85dc40SDarrick J. Wong void __user *arg) 10998b85dc40SDarrick J. Wong { 11008b85dc40SDarrick J. Wong struct xfs_health_file_on_monitored_fs hms; 11018b85dc40SDarrick J. Wong struct xfs_healthmon *hm = file->private_data; 11028b85dc40SDarrick J. Wong struct inode *hms_inode; 11038b85dc40SDarrick J. Wong 11048b85dc40SDarrick J. Wong if (copy_from_user(&hms, arg, sizeof(hms))) 11058b85dc40SDarrick J. Wong return -EFAULT; 11068b85dc40SDarrick J. Wong 11078b85dc40SDarrick J. Wong if (hms.flags) 11088b85dc40SDarrick J. Wong return -EINVAL; 11098b85dc40SDarrick J. Wong 11108b85dc40SDarrick J. Wong CLASS(fd, hms_fd)(hms.fd); 11118b85dc40SDarrick J. Wong if (fd_empty(hms_fd)) 11128b85dc40SDarrick J. Wong return -EBADF; 11138b85dc40SDarrick J. Wong 11148b85dc40SDarrick J. Wong hms_inode = file_inode(fd_file(hms_fd)); 11158b85dc40SDarrick J. Wong mutex_lock(&hm->lock); 11168b85dc40SDarrick J. Wong if (hm->mount_cookie != (uintptr_t)hms_inode->i_sb) { 11178b85dc40SDarrick J. Wong mutex_unlock(&hm->lock); 11188b85dc40SDarrick J. Wong return -ESTALE; 11198b85dc40SDarrick J. Wong } 11208b85dc40SDarrick J. Wong 11218b85dc40SDarrick J. Wong mutex_unlock(&hm->lock); 11228b85dc40SDarrick J. Wong return 0; 11238b85dc40SDarrick J. Wong } 11248b85dc40SDarrick J. Wong 1125c0e719cbSDarrick J. Wong /* Handle ioctls for the health monitoring thread. */ 1126c0e719cbSDarrick J. Wong STATIC long 1127c0e719cbSDarrick J. Wong xfs_healthmon_ioctl( 1128c0e719cbSDarrick J. Wong struct file *file, 1129c0e719cbSDarrick J. Wong unsigned int cmd, 1130c0e719cbSDarrick J. Wong unsigned long p) 1131c0e719cbSDarrick J. Wong { 1132c0e719cbSDarrick J. Wong void __user *arg = (void __user *)p; 1133c0e719cbSDarrick J. Wong 1134c0e719cbSDarrick J. Wong switch (cmd) { 1135c0e719cbSDarrick J. Wong case XFS_IOC_HEALTH_MONITOR: 1136c0e719cbSDarrick J. Wong return xfs_healthmon_reconfigure(file, cmd, arg); 11378b85dc40SDarrick J. Wong case XFS_IOC_HEALTH_FD_ON_MONITORED_FS: 11388b85dc40SDarrick J. Wong return xfs_healthmon_file_on_monitored_fs(file, cmd, arg); 1139c0e719cbSDarrick J. Wong default: 1140c0e719cbSDarrick J. Wong break; 1141c0e719cbSDarrick J. Wong } 1142c0e719cbSDarrick J. Wong 1143c0e719cbSDarrick J. Wong return -ENOTTY; 1144c0e719cbSDarrick J. Wong } 1145c0e719cbSDarrick J. Wong 1146a48373e7SDarrick J. Wong static const struct file_operations xfs_healthmon_fops = { 1147a48373e7SDarrick J. Wong .owner = THIS_MODULE, 1148a48373e7SDarrick J. Wong .show_fdinfo = xfs_healthmon_show_fdinfo, 1149a48373e7SDarrick J. Wong .read_iter = xfs_healthmon_read_iter, 1150b3a289a2SDarrick J. Wong .poll = xfs_healthmon_poll, 1151a48373e7SDarrick J. Wong .release = xfs_healthmon_release, 1152c0e719cbSDarrick J. Wong .unlocked_ioctl = xfs_healthmon_ioctl, 1153a48373e7SDarrick J. Wong }; 1154a48373e7SDarrick J. Wong 1155a48373e7SDarrick J. Wong /* 1156a48373e7SDarrick J. Wong * Create a health monitoring file. Returns an index to the fd table or a 1157a48373e7SDarrick J. Wong * negative errno. 1158a48373e7SDarrick J. Wong */ 1159a48373e7SDarrick J. Wong long 1160a48373e7SDarrick J. Wong xfs_ioc_health_monitor( 1161a48373e7SDarrick J. Wong struct file *file, 1162a48373e7SDarrick J. Wong struct xfs_health_monitor __user *arg) 1163a48373e7SDarrick J. Wong { 1164a48373e7SDarrick J. Wong struct xfs_health_monitor hmo; 1165b3a289a2SDarrick J. Wong struct xfs_healthmon_event *running_event; 1166a48373e7SDarrick J. Wong struct xfs_healthmon *hm; 1167a48373e7SDarrick J. Wong struct xfs_inode *ip = XFS_I(file_inode(file)); 1168a48373e7SDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 1169a48373e7SDarrick J. Wong int ret; 1170a48373e7SDarrick J. Wong 1171a48373e7SDarrick J. Wong /* 1172a48373e7SDarrick J. Wong * The only intended user of the health monitoring system should be the 1173a48373e7SDarrick J. Wong * xfs_healer daemon running on behalf of the whole filesystem in the 1174a48373e7SDarrick J. Wong * initial user namespace. IOWs, we don't allow unprivileged userspace 1175a48373e7SDarrick J. Wong * (they can use fsnotify) nor do we allow containers. 1176a48373e7SDarrick J. Wong */ 1177a48373e7SDarrick J. Wong if (!capable(CAP_SYS_ADMIN)) 1178a48373e7SDarrick J. Wong return -EPERM; 1179a48373e7SDarrick J. Wong if (ip->i_ino != mp->m_sb.sb_rootino) 1180a48373e7SDarrick J. Wong return -EPERM; 1181a48373e7SDarrick J. Wong if (current_user_ns() != &init_user_ns) 1182a48373e7SDarrick J. Wong return -EPERM; 1183a48373e7SDarrick J. Wong 1184a48373e7SDarrick J. Wong if (copy_from_user(&hmo, arg, sizeof(hmo))) 1185a48373e7SDarrick J. Wong return -EFAULT; 1186a48373e7SDarrick J. Wong 1187a48373e7SDarrick J. Wong if (!xfs_healthmon_validate(&hmo)) 1188a48373e7SDarrick J. Wong return -EINVAL; 1189a48373e7SDarrick J. Wong 1190*bf4afc53SLinus Torvalds hm = kzalloc_obj(*hm); 1191a48373e7SDarrick J. Wong if (!hm) 1192a48373e7SDarrick J. Wong return -ENOMEM; 1193a48373e7SDarrick J. Wong hm->dev = mp->m_super->s_dev; 1194a48373e7SDarrick J. Wong refcount_set(&hm->ref, 1); 1195a48373e7SDarrick J. Wong 1196b3a289a2SDarrick J. Wong mutex_init(&hm->lock); 1197b3a289a2SDarrick J. Wong init_waitqueue_head(&hm->wait); 1198b3a289a2SDarrick J. Wong 1199b3a289a2SDarrick J. Wong if (hmo.flags & XFS_HEALTH_MONITOR_VERBOSE) 1200b3a289a2SDarrick J. Wong hm->verbose = true; 1201b3a289a2SDarrick J. Wong 1202b3a289a2SDarrick J. Wong /* Queue up the first event that lets the client know we're running. */ 120369050f8dSKees Cook running_event = kzalloc_obj(struct xfs_healthmon_event, GFP_NOFS); 1204b3a289a2SDarrick J. Wong if (!running_event) { 1205b3a289a2SDarrick J. Wong ret = -ENOMEM; 1206b3a289a2SDarrick J. Wong goto out_hm; 1207b3a289a2SDarrick J. Wong } 1208b3a289a2SDarrick J. Wong running_event->type = XFS_HEALTHMON_RUNNING; 1209b3a289a2SDarrick J. Wong running_event->domain = XFS_HEALTHMON_MOUNT; 1210b3a289a2SDarrick J. Wong __xfs_healthmon_insert(hm, running_event); 1211b3a289a2SDarrick J. Wong 1212a48373e7SDarrick J. Wong /* 121325ca57faSDarrick J. Wong * Preallocate the unmount event so that we can't fail to notify the 121425ca57faSDarrick J. Wong * filesystem later. This is key for triggering fast exit of the 121525ca57faSDarrick J. Wong * xfs_healer daemon. 121625ca57faSDarrick J. Wong */ 121769050f8dSKees Cook hm->unmount_event = kzalloc_obj(struct xfs_healthmon_event, GFP_NOFS); 121825ca57faSDarrick J. Wong if (!hm->unmount_event) { 121925ca57faSDarrick J. Wong ret = -ENOMEM; 122025ca57faSDarrick J. Wong goto out_hm; 122125ca57faSDarrick J. Wong } 122225ca57faSDarrick J. Wong hm->unmount_event->type = XFS_HEALTHMON_UNMOUNT; 122325ca57faSDarrick J. Wong hm->unmount_event->domain = XFS_HEALTHMON_MOUNT; 122425ca57faSDarrick J. Wong 122525ca57faSDarrick J. Wong /* 1226a48373e7SDarrick J. Wong * Try to attach this health monitor to the xfs_mount. The monitor is 1227a48373e7SDarrick J. Wong * considered live and will receive events if this succeeds. 1228a48373e7SDarrick J. Wong */ 1229a48373e7SDarrick J. Wong ret = xfs_healthmon_attach(mp, hm); 1230a48373e7SDarrick J. Wong if (ret) 1231a48373e7SDarrick J. Wong goto out_hm; 1232a48373e7SDarrick J. Wong 1233a48373e7SDarrick J. Wong /* 1234a48373e7SDarrick J. Wong * Create the anonymous file and install a fd for it. If it succeeds, 1235a48373e7SDarrick J. Wong * the file owns hm and can go away at any time, so we must not access 1236a48373e7SDarrick J. Wong * it again. This must go last because we can't undo a fd table 1237a48373e7SDarrick J. Wong * installation. 1238a48373e7SDarrick J. Wong */ 1239a48373e7SDarrick J. Wong ret = anon_inode_getfd("xfs_healthmon", &xfs_healthmon_fops, hm, 1240a48373e7SDarrick J. Wong O_CLOEXEC | O_RDONLY); 1241a48373e7SDarrick J. Wong if (ret < 0) 1242a48373e7SDarrick J. Wong goto out_mp; 1243a48373e7SDarrick J. Wong 1244b3a289a2SDarrick J. Wong trace_xfs_healthmon_create(mp->m_super->s_dev, hmo.flags, hmo.format); 1245b3a289a2SDarrick J. Wong 1246a48373e7SDarrick J. Wong return ret; 1247a48373e7SDarrick J. Wong 1248a48373e7SDarrick J. Wong out_mp: 1249a48373e7SDarrick J. Wong xfs_healthmon_detach(hm); 1250a48373e7SDarrick J. Wong out_hm: 1251a48373e7SDarrick J. Wong ASSERT(refcount_read(&hm->ref) == 1); 1252a48373e7SDarrick J. Wong xfs_healthmon_put(hm); 1253a48373e7SDarrick J. Wong return ret; 1254a48373e7SDarrick J. Wong } 1255