xref: /linux/fs/fserror.c (revision dd466ea0029961ee0ee6e8e468faa1506275c8a9)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2025 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include <linux/fs.h>
7 #include <linux/fsnotify.h>
8 #include <linux/mempool.h>
9 #include <linux/fserror.h>
10 
11 #define FSERROR_DEFAULT_EVENT_POOL_SIZE		(32)
12 
13 static struct mempool fserror_events_pool;
14 
15 void fserror_mount(struct super_block *sb)
16 {
17 	/*
18 	 * The pending error counter is biased by 1 so that we don't wake_var
19 	 * until we're actually trying to unmount.
20 	 */
21 	refcount_set(&sb->s_pending_errors, 1);
22 }
23 
24 void fserror_unmount(struct super_block *sb)
25 {
26 	/*
27 	 * If we don't drop the pending error count to zero, then wait for it
28 	 * to drop below 1, which means that the pending errors cleared and
29 	 * hopefully we didn't saturate with 1 billion+ concurrent events.
30 	 */
31 	if (!refcount_dec_and_test(&sb->s_pending_errors))
32 		wait_var_event(&sb->s_pending_errors,
33 			       refcount_read(&sb->s_pending_errors) < 1);
34 }
35 
36 static inline void fserror_pending_dec(struct super_block *sb)
37 {
38 	if (refcount_dec_and_test(&sb->s_pending_errors))
39 		wake_up_var(&sb->s_pending_errors);
40 }
41 
42 static inline void fserror_free_event(struct fserror_event *event)
43 {
44 	fserror_pending_dec(event->sb);
45 	mempool_free(event, &fserror_events_pool);
46 }
47 
48 static void fserror_worker(struct work_struct *work)
49 {
50 	struct fserror_event *event =
51 			container_of(work, struct fserror_event, work);
52 	struct super_block *sb = event->sb;
53 
54 	if (sb->s_flags & SB_ACTIVE) {
55 		struct fs_error_report report = {
56 			/* send positive error number to userspace */
57 			.error = -event->error,
58 			.inode = event->inode,
59 			.sb = event->sb,
60 		};
61 
62 		if (sb->s_op->report_error)
63 			sb->s_op->report_error(event);
64 
65 		fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, NULL, NULL,
66 			 NULL, 0);
67 	}
68 
69 	iput(event->inode);
70 	fserror_free_event(event);
71 }
72 
73 static inline struct fserror_event *fserror_alloc_event(struct super_block *sb,
74 							gfp_t gfp_flags)
75 {
76 	struct fserror_event *event = NULL;
77 
78 	/*
79 	 * If pending_errors already reached zero or is no longer active,
80 	 * the superblock is being deactivated so there's no point in
81 	 * continuing.
82 	 *
83 	 * The order of the check of s_pending_errors and SB_ACTIVE are
84 	 * mandated by order of accesses in generic_shutdown_super and
85 	 * fserror_unmount.  Barriers are implicitly provided by the refcount
86 	 * manipulations in this function and fserror_unmount.
87 	 */
88 	if (!refcount_inc_not_zero(&sb->s_pending_errors))
89 		return NULL;
90 	if (!(sb->s_flags & SB_ACTIVE))
91 		goto out_pending;
92 
93 	event = mempool_alloc(&fserror_events_pool, gfp_flags);
94 	if (!event)
95 		goto out_pending;
96 
97 	/* mempool_alloc doesn't support GFP_ZERO */
98 	memset(event, 0, sizeof(*event));
99 	event->sb = sb;
100 	INIT_WORK(&event->work, fserror_worker);
101 
102 	return event;
103 
104 out_pending:
105 	fserror_pending_dec(sb);
106 	return NULL;
107 }
108 
109 /**
110  * fserror_report - report a filesystem error of some kind
111  *
112  * @sb:		superblock of the filesystem
113  * @inode:	inode within that filesystem, if applicable
114  * @type:	type of error encountered
115  * @pos:	start of inode range affected, if applicable
116  * @len:	length of inode range affected, if applicable
117  * @error:	error number encountered, must be negative
118  * @gfp:	memory allocation flags for conveying the event to a worker,
119  *		since this function can be called from atomic contexts
120  *
121  * Report details of a filesystem error to the super_operations::report_error
122  * callback if present; and to fsnotify for distribution to userspace.  @sb,
123  * @gfp, @type, and @error must all be specified.  For file I/O errors, the
124  * @inode, @pos, and @len fields must also be specified.  For file metadata
125  * errors, @inode must be specified.  If @inode is not NULL, then @inode->i_sb
126  * must point to @sb.
127  *
128  * Reporting work is deferred to a workqueue to ensure that ->report_error is
129  * called from process context without any locks held.  An active reference to
130  * the inode is maintained until event handling is complete, and unmount will
131  * wait for queued events to drain.
132  */
133 void fserror_report(struct super_block *sb, struct inode *inode,
134 		    enum fserror_type type, loff_t pos, u64 len, int error,
135 		    gfp_t gfp)
136 {
137 	struct fserror_event *event;
138 
139 	/* sb and inode must be from the same filesystem */
140 	WARN_ON_ONCE(inode && inode->i_sb != sb);
141 
142 	/* error number must be negative */
143 	WARN_ON_ONCE(error >= 0);
144 
145 	event = fserror_alloc_event(sb, gfp);
146 	if (!event)
147 		goto lost;
148 
149 	event->type = type;
150 	event->pos = pos;
151 	event->len = len;
152 	event->error = error;
153 
154 	/*
155 	 * Can't iput from non-sleeping context, so grabbing another reference
156 	 * to the inode must be the last thing before submitting the event.
157 	 */
158 	if (inode) {
159 		event->inode = igrab(inode);
160 		if (!event->inode)
161 			goto lost_event;
162 	}
163 
164 	/*
165 	 * Use schedule_work here even if we're already in process context so
166 	 * that fsnotify and super_operations::report_error implementations are
167 	 * guaranteed to run in process context without any locks held.  Since
168 	 * errors are supposed to be rare, the overhead shouldn't kill us any
169 	 * more than the failing device will.
170 	 */
171 	schedule_work(&event->work);
172 	return;
173 
174 lost_event:
175 	fserror_free_event(event);
176 lost:
177 	if (inode)
178 		pr_err_ratelimited(
179  "%s: lost file I/O error report for ino %lu type %u pos 0x%llx len 0x%llx error %d",
180 		       sb->s_id, inode->i_ino, type, pos, len, error);
181 	else
182 		pr_err_ratelimited(
183  "%s: lost filesystem error report for type %u error %d",
184 		       sb->s_id, type, error);
185 }
186 EXPORT_SYMBOL_GPL(fserror_report);
187 
188 static int __init fserror_init(void)
189 {
190 	return mempool_init_kmalloc_pool(&fserror_events_pool,
191 					 FSERROR_DEFAULT_EVENT_POOL_SIZE,
192 					 sizeof(struct fserror_event));
193 }
194 fs_initcall(fserror_init);
195