xref: /linux/fs/fserror.c (revision c17ee635fd3a482b2ad2bf5e269755c2eae5f25e)
1*21945e6cSDarrick J. Wong // SPDX-License-Identifier: GPL-2.0-or-later
2*21945e6cSDarrick J. Wong /*
3*21945e6cSDarrick J. Wong  * Copyright (c) 2025 Oracle.  All Rights Reserved.
4*21945e6cSDarrick J. Wong  * Author: Darrick J. Wong <djwong@kernel.org>
5*21945e6cSDarrick J. Wong  */
6*21945e6cSDarrick J. Wong #include <linux/fs.h>
7*21945e6cSDarrick J. Wong #include <linux/fsnotify.h>
8*21945e6cSDarrick J. Wong #include <linux/mempool.h>
9*21945e6cSDarrick J. Wong #include <linux/fserror.h>
10*21945e6cSDarrick J. Wong 
11*21945e6cSDarrick J. Wong #define FSERROR_DEFAULT_EVENT_POOL_SIZE		(32)
12*21945e6cSDarrick J. Wong 
13*21945e6cSDarrick J. Wong static struct mempool fserror_events_pool;
14*21945e6cSDarrick J. Wong 
15*21945e6cSDarrick J. Wong void fserror_mount(struct super_block *sb)
16*21945e6cSDarrick J. Wong {
17*21945e6cSDarrick J. Wong 	/*
18*21945e6cSDarrick J. Wong 	 * The pending error counter is biased by 1 so that we don't wake_var
19*21945e6cSDarrick J. Wong 	 * until we're actually trying to unmount.
20*21945e6cSDarrick J. Wong 	 */
21*21945e6cSDarrick J. Wong 	refcount_set(&sb->s_pending_errors, 1);
22*21945e6cSDarrick J. Wong }
23*21945e6cSDarrick J. Wong 
24*21945e6cSDarrick J. Wong void fserror_unmount(struct super_block *sb)
25*21945e6cSDarrick J. Wong {
26*21945e6cSDarrick J. Wong 	/*
27*21945e6cSDarrick J. Wong 	 * If we don't drop the pending error count to zero, then wait for it
28*21945e6cSDarrick J. Wong 	 * to drop below 1, which means that the pending errors cleared and
29*21945e6cSDarrick J. Wong 	 * hopefully we didn't saturate with 1 billion+ concurrent events.
30*21945e6cSDarrick J. Wong 	 */
31*21945e6cSDarrick J. Wong 	if (!refcount_dec_and_test(&sb->s_pending_errors))
32*21945e6cSDarrick J. Wong 		wait_var_event(&sb->s_pending_errors,
33*21945e6cSDarrick J. Wong 			       refcount_read(&sb->s_pending_errors) < 1);
34*21945e6cSDarrick J. Wong }
35*21945e6cSDarrick J. Wong 
36*21945e6cSDarrick J. Wong static inline void fserror_pending_dec(struct super_block *sb)
37*21945e6cSDarrick J. Wong {
38*21945e6cSDarrick J. Wong 	if (refcount_dec_and_test(&sb->s_pending_errors))
39*21945e6cSDarrick J. Wong 		wake_up_var(&sb->s_pending_errors);
40*21945e6cSDarrick J. Wong }
41*21945e6cSDarrick J. Wong 
42*21945e6cSDarrick J. Wong static inline void fserror_free_event(struct fserror_event *event)
43*21945e6cSDarrick J. Wong {
44*21945e6cSDarrick J. Wong 	fserror_pending_dec(event->sb);
45*21945e6cSDarrick J. Wong 	mempool_free(event, &fserror_events_pool);
46*21945e6cSDarrick J. Wong }
47*21945e6cSDarrick J. Wong 
48*21945e6cSDarrick J. Wong static void fserror_worker(struct work_struct *work)
49*21945e6cSDarrick J. Wong {
50*21945e6cSDarrick J. Wong 	struct fserror_event *event =
51*21945e6cSDarrick J. Wong 			container_of(work, struct fserror_event, work);
52*21945e6cSDarrick J. Wong 	struct super_block *sb = event->sb;
53*21945e6cSDarrick J. Wong 
54*21945e6cSDarrick J. Wong 	if (sb->s_flags & SB_ACTIVE) {
55*21945e6cSDarrick J. Wong 		struct fs_error_report report = {
56*21945e6cSDarrick J. Wong 			/* send positive error number to userspace */
57*21945e6cSDarrick J. Wong 			.error = -event->error,
58*21945e6cSDarrick J. Wong 			.inode = event->inode,
59*21945e6cSDarrick J. Wong 			.sb = event->sb,
60*21945e6cSDarrick J. Wong 		};
61*21945e6cSDarrick J. Wong 
62*21945e6cSDarrick J. Wong 		if (sb->s_op->report_error)
63*21945e6cSDarrick J. Wong 			sb->s_op->report_error(event);
64*21945e6cSDarrick J. Wong 
65*21945e6cSDarrick J. Wong 		fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, NULL, NULL,
66*21945e6cSDarrick J. Wong 			 NULL, 0);
67*21945e6cSDarrick J. Wong 	}
68*21945e6cSDarrick J. Wong 
69*21945e6cSDarrick J. Wong 	iput(event->inode);
70*21945e6cSDarrick J. Wong 	fserror_free_event(event);
71*21945e6cSDarrick J. Wong }
72*21945e6cSDarrick J. Wong 
73*21945e6cSDarrick J. Wong static inline struct fserror_event *fserror_alloc_event(struct super_block *sb,
74*21945e6cSDarrick J. Wong 							gfp_t gfp_flags)
75*21945e6cSDarrick J. Wong {
76*21945e6cSDarrick J. Wong 	struct fserror_event *event = NULL;
77*21945e6cSDarrick J. Wong 
78*21945e6cSDarrick J. Wong 	/*
79*21945e6cSDarrick J. Wong 	 * If pending_errors already reached zero or is no longer active,
80*21945e6cSDarrick J. Wong 	 * the superblock is being deactivated so there's no point in
81*21945e6cSDarrick J. Wong 	 * continuing.
82*21945e6cSDarrick J. Wong 	 *
83*21945e6cSDarrick J. Wong 	 * The order of the check of s_pending_errors and SB_ACTIVE are
84*21945e6cSDarrick J. Wong 	 * mandated by order of accesses in generic_shutdown_super and
85*21945e6cSDarrick J. Wong 	 * fserror_unmount.  Barriers are implicitly provided by the refcount
86*21945e6cSDarrick J. Wong 	 * manipulations in this function and fserror_unmount.
87*21945e6cSDarrick J. Wong 	 */
88*21945e6cSDarrick J. Wong 	if (!refcount_inc_not_zero(&sb->s_pending_errors))
89*21945e6cSDarrick J. Wong 		return NULL;
90*21945e6cSDarrick J. Wong 	if (!(sb->s_flags & SB_ACTIVE))
91*21945e6cSDarrick J. Wong 		goto out_pending;
92*21945e6cSDarrick J. Wong 
93*21945e6cSDarrick J. Wong 	event = mempool_alloc(&fserror_events_pool, gfp_flags);
94*21945e6cSDarrick J. Wong 	if (!event)
95*21945e6cSDarrick J. Wong 		goto out_pending;
96*21945e6cSDarrick J. Wong 
97*21945e6cSDarrick J. Wong 	/* mempool_alloc doesn't support GFP_ZERO */
98*21945e6cSDarrick J. Wong 	memset(event, 0, sizeof(*event));
99*21945e6cSDarrick J. Wong 	event->sb = sb;
100*21945e6cSDarrick J. Wong 	INIT_WORK(&event->work, fserror_worker);
101*21945e6cSDarrick J. Wong 
102*21945e6cSDarrick J. Wong 	return event;
103*21945e6cSDarrick J. Wong 
104*21945e6cSDarrick J. Wong out_pending:
105*21945e6cSDarrick J. Wong 	fserror_pending_dec(sb);
106*21945e6cSDarrick J. Wong 	return NULL;
107*21945e6cSDarrick J. Wong }
108*21945e6cSDarrick J. Wong 
109*21945e6cSDarrick J. Wong /**
110*21945e6cSDarrick J. Wong  * fserror_report - report a filesystem error of some kind
111*21945e6cSDarrick J. Wong  *
112*21945e6cSDarrick J. Wong  * @sb:		superblock of the filesystem
113*21945e6cSDarrick J. Wong  * @inode:	inode within that filesystem, if applicable
114*21945e6cSDarrick J. Wong  * @type:	type of error encountered
115*21945e6cSDarrick J. Wong  * @pos:	start of inode range affected, if applicable
116*21945e6cSDarrick J. Wong  * @len:	length of inode range affected, if applicable
117*21945e6cSDarrick J. Wong  * @error:	error number encountered, must be negative
118*21945e6cSDarrick J. Wong  * @gfp:	memory allocation flags for conveying the event to a worker,
119*21945e6cSDarrick J. Wong  *		since this function can be called from atomic contexts
120*21945e6cSDarrick J. Wong  *
121*21945e6cSDarrick J. Wong  * Report details of a filesystem error to the super_operations::report_error
122*21945e6cSDarrick J. Wong  * callback if present; and to fsnotify for distribution to userspace.  @sb,
123*21945e6cSDarrick J. Wong  * @gfp, @type, and @error must all be specified.  For file I/O errors, the
124*21945e6cSDarrick J. Wong  * @inode, @pos, and @len fields must also be specified.  For file metadata
125*21945e6cSDarrick J. Wong  * errors, @inode must be specified.  If @inode is not NULL, then @inode->i_sb
126*21945e6cSDarrick J. Wong  * must point to @sb.
127*21945e6cSDarrick J. Wong  *
128*21945e6cSDarrick J. Wong  * Reporting work is deferred to a workqueue to ensure that ->report_error is
129*21945e6cSDarrick J. Wong  * called from process context without any locks held.  An active reference to
130*21945e6cSDarrick J. Wong  * the inode is maintained until event handling is complete, and unmount will
131*21945e6cSDarrick J. Wong  * wait for queued events to drain.
132*21945e6cSDarrick J. Wong  */
133*21945e6cSDarrick J. Wong void fserror_report(struct super_block *sb, struct inode *inode,
134*21945e6cSDarrick J. Wong 		    enum fserror_type type, loff_t pos, u64 len, int error,
135*21945e6cSDarrick J. Wong 		    gfp_t gfp)
136*21945e6cSDarrick J. Wong {
137*21945e6cSDarrick J. Wong 	struct fserror_event *event;
138*21945e6cSDarrick J. Wong 
139*21945e6cSDarrick J. Wong 	/* sb and inode must be from the same filesystem */
140*21945e6cSDarrick J. Wong 	WARN_ON_ONCE(inode && inode->i_sb != sb);
141*21945e6cSDarrick J. Wong 
142*21945e6cSDarrick J. Wong 	/* error number must be negative */
143*21945e6cSDarrick J. Wong 	WARN_ON_ONCE(error >= 0);
144*21945e6cSDarrick J. Wong 
145*21945e6cSDarrick J. Wong 	event = fserror_alloc_event(sb, gfp);
146*21945e6cSDarrick J. Wong 	if (!event)
147*21945e6cSDarrick J. Wong 		goto lost;
148*21945e6cSDarrick J. Wong 
149*21945e6cSDarrick J. Wong 	event->type = type;
150*21945e6cSDarrick J. Wong 	event->pos = pos;
151*21945e6cSDarrick J. Wong 	event->len = len;
152*21945e6cSDarrick J. Wong 	event->error = error;
153*21945e6cSDarrick J. Wong 
154*21945e6cSDarrick J. Wong 	/*
155*21945e6cSDarrick J. Wong 	 * Can't iput from non-sleeping context, so grabbing another reference
156*21945e6cSDarrick J. Wong 	 * to the inode must be the last thing before submitting the event.
157*21945e6cSDarrick J. Wong 	 */
158*21945e6cSDarrick J. Wong 	if (inode) {
159*21945e6cSDarrick J. Wong 		event->inode = igrab(inode);
160*21945e6cSDarrick J. Wong 		if (!event->inode)
161*21945e6cSDarrick J. Wong 			goto lost_event;
162*21945e6cSDarrick J. Wong 	}
163*21945e6cSDarrick J. Wong 
164*21945e6cSDarrick J. Wong 	/*
165*21945e6cSDarrick J. Wong 	 * Use schedule_work here even if we're already in process context so
166*21945e6cSDarrick J. Wong 	 * that fsnotify and super_operations::report_error implementations are
167*21945e6cSDarrick J. Wong 	 * guaranteed to run in process context without any locks held.  Since
168*21945e6cSDarrick J. Wong 	 * errors are supposed to be rare, the overhead shouldn't kill us any
169*21945e6cSDarrick J. Wong 	 * more than the failing device will.
170*21945e6cSDarrick J. Wong 	 */
171*21945e6cSDarrick J. Wong 	schedule_work(&event->work);
172*21945e6cSDarrick J. Wong 	return;
173*21945e6cSDarrick J. Wong 
174*21945e6cSDarrick J. Wong lost_event:
175*21945e6cSDarrick J. Wong 	fserror_free_event(event);
176*21945e6cSDarrick J. Wong lost:
177*21945e6cSDarrick J. Wong 	if (inode)
178*21945e6cSDarrick J. Wong 		pr_err_ratelimited(
179*21945e6cSDarrick J. Wong  "%s: lost file I/O error report for ino %lu type %u pos 0x%llx len 0x%llx error %d",
180*21945e6cSDarrick J. Wong 		       sb->s_id, inode->i_ino, type, pos, len, error);
181*21945e6cSDarrick J. Wong 	else
182*21945e6cSDarrick J. Wong 		pr_err_ratelimited(
183*21945e6cSDarrick J. Wong  "%s: lost filesystem error report for type %u error %d",
184*21945e6cSDarrick J. Wong 		       sb->s_id, type, error);
185*21945e6cSDarrick J. Wong }
186*21945e6cSDarrick J. Wong EXPORT_SYMBOL_GPL(fserror_report);
187*21945e6cSDarrick J. Wong 
188*21945e6cSDarrick J. Wong static int __init fserror_init(void)
189*21945e6cSDarrick J. Wong {
190*21945e6cSDarrick J. Wong 	return mempool_init_kmalloc_pool(&fserror_events_pool,
191*21945e6cSDarrick J. Wong 					 FSERROR_DEFAULT_EVENT_POOL_SIZE,
192*21945e6cSDarrick J. Wong 					 sizeof(struct fserror_event));
193*21945e6cSDarrick J. Wong }
194*21945e6cSDarrick J. Wong fs_initcall(fserror_init);
195