1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2025 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include <linux/fs.h> 7 #include <linux/fsnotify.h> 8 #include <linux/mempool.h> 9 #include <linux/fserror.h> 10 11 #define FSERROR_DEFAULT_EVENT_POOL_SIZE (32) 12 13 static struct mempool fserror_events_pool; 14 15 void fserror_mount(struct super_block *sb) 16 { 17 /* 18 * The pending error counter is biased by 1 so that we don't wake_var 19 * until we're actually trying to unmount. 20 */ 21 refcount_set(&sb->s_pending_errors, 1); 22 } 23 24 void fserror_unmount(struct super_block *sb) 25 { 26 /* 27 * If we don't drop the pending error count to zero, then wait for it 28 * to drop below 1, which means that the pending errors cleared and 29 * hopefully we didn't saturate with 1 billion+ concurrent events. 30 */ 31 if (!refcount_dec_and_test(&sb->s_pending_errors)) 32 wait_var_event(&sb->s_pending_errors, 33 refcount_read(&sb->s_pending_errors) < 1); 34 } 35 36 static inline void fserror_pending_dec(struct super_block *sb) 37 { 38 if (refcount_dec_and_test(&sb->s_pending_errors)) 39 wake_up_var(&sb->s_pending_errors); 40 } 41 42 static inline void fserror_free_event(struct fserror_event *event) 43 { 44 fserror_pending_dec(event->sb); 45 mempool_free(event, &fserror_events_pool); 46 } 47 48 static void fserror_worker(struct work_struct *work) 49 { 50 struct fserror_event *event = 51 container_of(work, struct fserror_event, work); 52 struct super_block *sb = event->sb; 53 54 if (sb->s_flags & SB_ACTIVE) { 55 struct fs_error_report report = { 56 /* send positive error number to userspace */ 57 .error = -event->error, 58 .inode = event->inode, 59 .sb = event->sb, 60 }; 61 62 if (sb->s_op->report_error) 63 sb->s_op->report_error(event); 64 65 fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, NULL, NULL, 66 NULL, 0); 67 } 68 69 iput(event->inode); 70 fserror_free_event(event); 71 } 72 73 static inline struct fserror_event *fserror_alloc_event(struct super_block *sb, 74 gfp_t gfp_flags) 75 { 76 struct fserror_event *event = NULL; 77 78 /* 79 * If pending_errors already reached zero or is no longer active, 80 * the superblock is being deactivated so there's no point in 81 * continuing. 82 * 83 * The order of the check of s_pending_errors and SB_ACTIVE are 84 * mandated by order of accesses in generic_shutdown_super and 85 * fserror_unmount. Barriers are implicitly provided by the refcount 86 * manipulations in this function and fserror_unmount. 87 */ 88 if (!refcount_inc_not_zero(&sb->s_pending_errors)) 89 return NULL; 90 if (!(sb->s_flags & SB_ACTIVE)) 91 goto out_pending; 92 93 event = mempool_alloc(&fserror_events_pool, gfp_flags); 94 if (!event) 95 goto out_pending; 96 97 /* mempool_alloc doesn't support GFP_ZERO */ 98 memset(event, 0, sizeof(*event)); 99 event->sb = sb; 100 INIT_WORK(&event->work, fserror_worker); 101 102 return event; 103 104 out_pending: 105 fserror_pending_dec(sb); 106 return NULL; 107 } 108 109 /** 110 * fserror_report - report a filesystem error of some kind 111 * 112 * @sb: superblock of the filesystem 113 * @inode: inode within that filesystem, if applicable 114 * @type: type of error encountered 115 * @pos: start of inode range affected, if applicable 116 * @len: length of inode range affected, if applicable 117 * @error: error number encountered, must be negative 118 * @gfp: memory allocation flags for conveying the event to a worker, 119 * since this function can be called from atomic contexts 120 * 121 * Report details of a filesystem error to the super_operations::report_error 122 * callback if present; and to fsnotify for distribution to userspace. @sb, 123 * @gfp, @type, and @error must all be specified. For file I/O errors, the 124 * @inode, @pos, and @len fields must also be specified. For file metadata 125 * errors, @inode must be specified. If @inode is not NULL, then @inode->i_sb 126 * must point to @sb. 127 * 128 * Reporting work is deferred to a workqueue to ensure that ->report_error is 129 * called from process context without any locks held. An active reference to 130 * the inode is maintained until event handling is complete, and unmount will 131 * wait for queued events to drain. 132 */ 133 void fserror_report(struct super_block *sb, struct inode *inode, 134 enum fserror_type type, loff_t pos, u64 len, int error, 135 gfp_t gfp) 136 { 137 struct fserror_event *event; 138 139 /* sb and inode must be from the same filesystem */ 140 WARN_ON_ONCE(inode && inode->i_sb != sb); 141 142 /* error number must be negative */ 143 WARN_ON_ONCE(error >= 0); 144 145 event = fserror_alloc_event(sb, gfp); 146 if (!event) 147 goto lost; 148 149 event->type = type; 150 event->pos = pos; 151 event->len = len; 152 event->error = error; 153 154 /* 155 * Can't iput from non-sleeping context, so grabbing another reference 156 * to the inode must be the last thing before submitting the event. 157 */ 158 if (inode) { 159 event->inode = igrab(inode); 160 if (!event->inode) 161 goto lost_event; 162 } 163 164 /* 165 * Use schedule_work here even if we're already in process context so 166 * that fsnotify and super_operations::report_error implementations are 167 * guaranteed to run in process context without any locks held. Since 168 * errors are supposed to be rare, the overhead shouldn't kill us any 169 * more than the failing device will. 170 */ 171 schedule_work(&event->work); 172 return; 173 174 lost_event: 175 fserror_free_event(event); 176 lost: 177 if (inode) 178 pr_err_ratelimited( 179 "%s: lost file I/O error report for ino %lu type %u pos 0x%llx len 0x%llx error %d", 180 sb->s_id, inode->i_ino, type, pos, len, error); 181 else 182 pr_err_ratelimited( 183 "%s: lost filesystem error report for type %u error %d", 184 sb->s_id, type, error); 185 } 186 EXPORT_SYMBOL_GPL(fserror_report); 187 188 static int __init fserror_init(void) 189 { 190 return mempool_init_kmalloc_pool(&fserror_events_pool, 191 FSERROR_DEFAULT_EVENT_POOL_SIZE, 192 sizeof(struct fserror_event)); 193 } 194 fs_initcall(fserror_init); 195