xref: /linux/fs/xfs/scrub/xfile.c (revision 1fd1dc41724319406b0aff221a352a400b0ddfc5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "scrub/scrub.h"
14 #include "scrub/xfile.h"
15 #include "scrub/xfarray.h"
16 #include "scrub/trace.h"
17 #include <linux/shmem_fs.h>
18 
19 /*
20  * Swappable Temporary Memory
21  * ==========================
22  *
23  * Online checking sometimes needs to be able to stage a large amount of data
24  * in memory.  This information might not fit in the available memory and it
25  * doesn't all need to be accessible at all times.  In other words, we want an
26  * indexed data buffer to store data that can be paged out.
27  *
28  * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
29  * requirements.  Therefore, the xfile mechanism uses an unlinked shmem file to
30  * store our staging data.  This file is not installed in the file descriptor
31  * table so that user programs cannot access the data, which means that the
32  * xfile must be freed with xfile_destroy.
33  *
34  * xfiles assume that the caller will handle all required concurrency
35  * management; standard vfs locks (freezer and inode) are not taken.  Reads
36  * and writes are satisfied directly from the page cache.
37  */
38 
39 /*
40  * xfiles must not be exposed to userspace and require upper layers to
41  * coordinate access to the one handle returned by the constructor, so
42  * establish a separate lock class for xfiles to avoid confusing lockdep.
43  */
44 static struct lock_class_key xfile_i_mutex_key;
45 
46 /*
47  * Create an xfile of the given size.  The description will be used in the
48  * trace output.
49  */
50 int
51 xfile_create(
52 	const char		*description,
53 	loff_t			isize,
54 	struct xfile		**xfilep)
55 {
56 	struct inode		*inode;
57 	struct xfile		*xf;
58 	int			error;
59 
60 	xf = kmalloc_obj(struct xfile, XCHK_GFP_FLAGS);
61 	if (!xf)
62 		return -ENOMEM;
63 
64 	xf->file = shmem_kernel_file_setup(description, isize,
65 					   mk_vma_flags(VMA_NORESERVE_BIT));
66 	if (IS_ERR(xf->file)) {
67 		error = PTR_ERR(xf->file);
68 		goto out_xfile;
69 	}
70 
71 	inode = file_inode(xf->file);
72 	lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
73 
74 	/*
75 	 * We don't want to bother with kmapping data during repair, so don't
76 	 * allow highmem pages to back this mapping.
77 	 */
78 	mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
79 
80 	trace_xfile_create(xf);
81 
82 	*xfilep = xf;
83 	return 0;
84 out_xfile:
85 	kfree(xf);
86 	return error;
87 }
88 
89 /* Close the file and release all resources. */
90 void
91 xfile_destroy(
92 	struct xfile		*xf)
93 {
94 	struct inode		*inode = file_inode(xf->file);
95 
96 	trace_xfile_destroy(xf);
97 
98 	lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
99 	fput(xf->file);
100 	kfree(xf);
101 }
102 
103 /*
104  * Load an object.  Since we're treating this file as "memory", any error or
105  * short IO is treated as a failure to allocate memory.
106  */
107 int
108 xfile_load(
109 	struct xfile		*xf,
110 	void			*buf,
111 	size_t			count,
112 	loff_t			pos)
113 {
114 	struct inode		*inode = file_inode(xf->file);
115 	unsigned int		pflags;
116 
117 	if (count > MAX_RW_COUNT)
118 		return -ENOMEM;
119 	if (inode->i_sb->s_maxbytes - pos < count)
120 		return -ENOMEM;
121 
122 	trace_xfile_load(xf, pos, count);
123 
124 	pflags = memalloc_nofs_save();
125 	while (count > 0) {
126 		struct folio	*folio;
127 		unsigned int	len;
128 		unsigned int	offset;
129 
130 		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
131 				SGP_READ) < 0)
132 			break;
133 		if (!folio) {
134 			/*
135 			 * No data stored at this offset, just zero the output
136 			 * buffer until the next page boundary.
137 			 */
138 			len = min_t(ssize_t, count,
139 				PAGE_SIZE - offset_in_page(pos));
140 			memset(buf, 0, len);
141 		} else {
142 			if (filemap_check_wb_err(inode->i_mapping, 0)) {
143 				folio_unlock(folio);
144 				folio_put(folio);
145 				break;
146 			}
147 
148 			offset = offset_in_folio(folio, pos);
149 			len = min_t(ssize_t, count, folio_size(folio) - offset);
150 			memcpy(buf, folio_address(folio) + offset, len);
151 
152 			folio_unlock(folio);
153 			folio_put(folio);
154 		}
155 		count -= len;
156 		pos += len;
157 		buf += len;
158 	}
159 	memalloc_nofs_restore(pflags);
160 
161 	if (count)
162 		return -ENOMEM;
163 	return 0;
164 }
165 
166 /*
167  * Store an object.  Since we're treating this file as "memory", any error or
168  * short IO is treated as a failure to allocate memory.
169  */
170 int
171 xfile_store(
172 	struct xfile		*xf,
173 	const void		*buf,
174 	size_t			count,
175 	loff_t			pos)
176 {
177 	struct inode		*inode = file_inode(xf->file);
178 	unsigned int		pflags;
179 
180 	if (count > MAX_RW_COUNT)
181 		return -ENOMEM;
182 	if (inode->i_sb->s_maxbytes - pos < count)
183 		return -ENOMEM;
184 
185 	trace_xfile_store(xf, pos, count);
186 
187 	/*
188 	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
189 	 * actually allocates a folio instead of erroring out.
190 	 */
191 	if (pos + count > i_size_read(inode))
192 		i_size_write(inode, pos + count);
193 
194 	pflags = memalloc_nofs_save();
195 	while (count > 0) {
196 		struct folio	*folio;
197 		unsigned int	len;
198 		unsigned int	offset;
199 
200 		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
201 				SGP_CACHE) < 0)
202 			break;
203 		if (filemap_check_wb_err(inode->i_mapping, 0)) {
204 			folio_unlock(folio);
205 			folio_put(folio);
206 			break;
207 		}
208 
209 		offset = offset_in_folio(folio, pos);
210 		len = min_t(ssize_t, count, folio_size(folio) - offset);
211 		memcpy(folio_address(folio) + offset, buf, len);
212 
213 		folio_mark_dirty(folio);
214 		folio_unlock(folio);
215 		folio_put(folio);
216 
217 		count -= len;
218 		pos += len;
219 		buf += len;
220 	}
221 	memalloc_nofs_restore(pflags);
222 
223 	if (count)
224 		return -ENOMEM;
225 	return 0;
226 }
227 
228 /* Find the next written area in the xfile data for a given offset. */
229 loff_t
230 xfile_seek_data(
231 	struct xfile		*xf,
232 	loff_t			pos)
233 {
234 	loff_t			ret;
235 
236 	ret = vfs_llseek(xf->file, pos, SEEK_DATA);
237 	trace_xfile_seek_data(xf, pos, ret);
238 	return ret;
239 }
240 
241 /*
242  * Grab the (locked) folio for a memory object.  The object cannot span a folio
243  * boundary.  Returns the locked folio if successful, NULL if there was no
244  * folio or it didn't cover the range requested, or an ERR_PTR on failure.
245  */
246 struct folio *
247 xfile_get_folio(
248 	struct xfile		*xf,
249 	loff_t			pos,
250 	size_t			len,
251 	unsigned int		flags)
252 {
253 	struct inode		*inode = file_inode(xf->file);
254 	struct folio		*folio = NULL;
255 	unsigned int		pflags;
256 	int			error;
257 
258 	if (inode->i_sb->s_maxbytes - pos < len)
259 		return ERR_PTR(-ENOMEM);
260 
261 	trace_xfile_get_folio(xf, pos, len);
262 
263 	/*
264 	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
265 	 * actually allocates a folio instead of erroring out.
266 	 */
267 	if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
268 		i_size_write(inode, pos + len);
269 
270 	pflags = memalloc_nofs_save();
271 	error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
272 			(flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
273 	memalloc_nofs_restore(pflags);
274 	if (error)
275 		return ERR_PTR(error);
276 
277 	if (!folio)
278 		return NULL;
279 
280 	if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
281 		folio_unlock(folio);
282 		folio_put(folio);
283 		return NULL;
284 	}
285 
286 	if (filemap_check_wb_err(inode->i_mapping, 0)) {
287 		folio_unlock(folio);
288 		folio_put(folio);
289 		return ERR_PTR(-EIO);
290 	}
291 
292 	/*
293 	 * Mark the folio dirty so that it won't be reclaimed once we drop the
294 	 * (potentially last) reference in xfile_put_folio.
295 	 */
296 	if (flags & XFILE_ALLOC)
297 		folio_mark_dirty(folio);
298 	return folio;
299 }
300 
301 /*
302  * Release the (locked) folio for a memory object.
303  */
304 void
305 xfile_put_folio(
306 	struct xfile		*xf,
307 	struct folio		*folio)
308 {
309 	trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));
310 
311 	folio_unlock(folio);
312 	folio_put(folio);
313 }
314 
315 /* Discard the page cache that's backing a range of the xfile. */
316 void
317 xfile_discard(
318 	struct xfile		*xf,
319 	loff_t			pos,
320 	u64			count)
321 {
322 	trace_xfile_discard(xf, pos, count);
323 
324 	shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1);
325 }
326