1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "scrub/scrub.h"
14 #include "scrub/xfile.h"
15 #include "scrub/xfarray.h"
16 #include "scrub/trace.h"
17 #include <linux/shmem_fs.h>
18
19 /*
20 * Swappable Temporary Memory
21 * ==========================
22 *
23 * Online checking sometimes needs to be able to stage a large amount of data
24 * in memory. This information might not fit in the available memory and it
25 * doesn't all need to be accessible at all times. In other words, we want an
26 * indexed data buffer to store data that can be paged out.
27 *
28 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
29 * requirements. Therefore, the xfile mechanism uses an unlinked shmem file to
30 * store our staging data. This file is not installed in the file descriptor
31 * table so that user programs cannot access the data, which means that the
32 * xfile must be freed with xfile_destroy.
33 *
34 * xfiles assume that the caller will handle all required concurrency
35 * management; standard vfs locks (freezer and inode) are not taken. Reads
36 * and writes are satisfied directly from the page cache.
37 */
38
39 /*
40 * xfiles must not be exposed to userspace and require upper layers to
41 * coordinate access to the one handle returned by the constructor, so
42 * establish a separate lock class for xfiles to avoid confusing lockdep.
43 */
44 static struct lock_class_key xfile_i_mutex_key;
45
46 /*
47 * Create an xfile of the given size. The description will be used in the
48 * trace output.
49 */
50 int
xfile_create(const char * description,loff_t isize,struct xfile ** xfilep)51 xfile_create(
52 const char *description,
53 loff_t isize,
54 struct xfile **xfilep)
55 {
56 struct inode *inode;
57 struct xfile *xf;
58 int error;
59
60 xf = kmalloc_obj(struct xfile, XCHK_GFP_FLAGS);
61 if (!xf)
62 return -ENOMEM;
63
64 xf->file = shmem_kernel_file_setup(description, isize,
65 mk_vma_flags(VMA_NORESERVE_BIT));
66 if (IS_ERR(xf->file)) {
67 error = PTR_ERR(xf->file);
68 goto out_xfile;
69 }
70
71 inode = file_inode(xf->file);
72 lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
73
74 /*
75 * We don't want to bother with kmapping data during repair, so don't
76 * allow highmem pages to back this mapping.
77 */
78 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
79
80 trace_xfile_create(xf);
81
82 *xfilep = xf;
83 return 0;
84 out_xfile:
85 kfree(xf);
86 return error;
87 }
88
89 /* Close the file and release all resources. */
90 void
xfile_destroy(struct xfile * xf)91 xfile_destroy(
92 struct xfile *xf)
93 {
94 struct inode *inode = file_inode(xf->file);
95
96 trace_xfile_destroy(xf);
97
98 lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
99 fput(xf->file);
100 kfree(xf);
101 }
102
103 /*
104 * Load an object. Since we're treating this file as "memory", any error or
105 * short IO is treated as a failure to allocate memory.
106 */
107 int
xfile_load(struct xfile * xf,void * buf,size_t count,loff_t pos)108 xfile_load(
109 struct xfile *xf,
110 void *buf,
111 size_t count,
112 loff_t pos)
113 {
114 struct inode *inode = file_inode(xf->file);
115 unsigned int pflags;
116
117 if (count > MAX_RW_COUNT)
118 return -ENOMEM;
119 if (inode->i_sb->s_maxbytes - pos < count)
120 return -ENOMEM;
121
122 trace_xfile_load(xf, pos, count);
123
124 pflags = memalloc_nofs_save();
125 while (count > 0) {
126 struct folio *folio;
127 unsigned int len;
128 unsigned int offset;
129
130 if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
131 SGP_READ) < 0)
132 break;
133 if (!folio) {
134 /*
135 * No data stored at this offset, just zero the output
136 * buffer until the next page boundary.
137 */
138 len = min_t(ssize_t, count,
139 PAGE_SIZE - offset_in_page(pos));
140 memset(buf, 0, len);
141 } else {
142 if (filemap_check_wb_err(inode->i_mapping, 0)) {
143 folio_unlock(folio);
144 folio_put(folio);
145 break;
146 }
147
148 offset = offset_in_folio(folio, pos);
149 len = min_t(ssize_t, count, folio_size(folio) - offset);
150 memcpy(buf, folio_address(folio) + offset, len);
151
152 folio_unlock(folio);
153 folio_put(folio);
154 }
155 count -= len;
156 pos += len;
157 buf += len;
158 }
159 memalloc_nofs_restore(pflags);
160
161 if (count)
162 return -ENOMEM;
163 return 0;
164 }
165
166 /*
167 * Store an object. Since we're treating this file as "memory", any error or
168 * short IO is treated as a failure to allocate memory.
169 */
170 int
xfile_store(struct xfile * xf,const void * buf,size_t count,loff_t pos)171 xfile_store(
172 struct xfile *xf,
173 const void *buf,
174 size_t count,
175 loff_t pos)
176 {
177 struct inode *inode = file_inode(xf->file);
178 unsigned int pflags;
179
180 if (count > MAX_RW_COUNT)
181 return -ENOMEM;
182 if (inode->i_sb->s_maxbytes - pos < count)
183 return -ENOMEM;
184
185 trace_xfile_store(xf, pos, count);
186
187 /*
188 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
189 * actually allocates a folio instead of erroring out.
190 */
191 if (pos + count > i_size_read(inode))
192 i_size_write(inode, pos + count);
193
194 pflags = memalloc_nofs_save();
195 while (count > 0) {
196 struct folio *folio;
197 unsigned int len;
198 unsigned int offset;
199
200 if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
201 SGP_CACHE) < 0)
202 break;
203 if (filemap_check_wb_err(inode->i_mapping, 0)) {
204 folio_unlock(folio);
205 folio_put(folio);
206 break;
207 }
208
209 offset = offset_in_folio(folio, pos);
210 len = min_t(ssize_t, count, folio_size(folio) - offset);
211 memcpy(folio_address(folio) + offset, buf, len);
212
213 folio_mark_dirty(folio);
214 folio_unlock(folio);
215 folio_put(folio);
216
217 count -= len;
218 pos += len;
219 buf += len;
220 }
221 memalloc_nofs_restore(pflags);
222
223 if (count)
224 return -ENOMEM;
225 return 0;
226 }
227
228 /* Find the next written area in the xfile data for a given offset. */
229 loff_t
xfile_seek_data(struct xfile * xf,loff_t pos)230 xfile_seek_data(
231 struct xfile *xf,
232 loff_t pos)
233 {
234 loff_t ret;
235
236 ret = vfs_llseek(xf->file, pos, SEEK_DATA);
237 trace_xfile_seek_data(xf, pos, ret);
238 return ret;
239 }
240
241 /*
242 * Grab the (locked) folio for a memory object. The object cannot span a folio
243 * boundary. Returns the locked folio if successful, NULL if there was no
244 * folio or it didn't cover the range requested, or an ERR_PTR on failure.
245 */
246 struct folio *
xfile_get_folio(struct xfile * xf,loff_t pos,size_t len,unsigned int flags)247 xfile_get_folio(
248 struct xfile *xf,
249 loff_t pos,
250 size_t len,
251 unsigned int flags)
252 {
253 struct inode *inode = file_inode(xf->file);
254 struct folio *folio = NULL;
255 unsigned int pflags;
256 int error;
257
258 if (inode->i_sb->s_maxbytes - pos < len)
259 return ERR_PTR(-ENOMEM);
260
261 trace_xfile_get_folio(xf, pos, len);
262
263 /*
264 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
265 * actually allocates a folio instead of erroring out.
266 */
267 if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
268 i_size_write(inode, pos + len);
269
270 pflags = memalloc_nofs_save();
271 error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
272 (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
273 memalloc_nofs_restore(pflags);
274 if (error)
275 return ERR_PTR(error);
276
277 if (!folio)
278 return NULL;
279
280 if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
281 folio_unlock(folio);
282 folio_put(folio);
283 return NULL;
284 }
285
286 if (filemap_check_wb_err(inode->i_mapping, 0)) {
287 folio_unlock(folio);
288 folio_put(folio);
289 return ERR_PTR(-EIO);
290 }
291
292 /*
293 * Mark the folio dirty so that it won't be reclaimed once we drop the
294 * (potentially last) reference in xfile_put_folio.
295 */
296 if (flags & XFILE_ALLOC)
297 folio_mark_dirty(folio);
298 return folio;
299 }
300
301 /*
302 * Release the (locked) folio for a memory object.
303 */
304 void
xfile_put_folio(struct xfile * xf,struct folio * folio)305 xfile_put_folio(
306 struct xfile *xf,
307 struct folio *folio)
308 {
309 trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));
310
311 folio_unlock(folio);
312 folio_put(folio);
313 }
314
315 /* Discard the page cache that's backing a range of the xfile. */
316 void
xfile_discard(struct xfile * xf,loff_t pos,u64 count)317 xfile_discard(
318 struct xfile *xf,
319 loff_t pos,
320 u64 count)
321 {
322 trace_xfile_discard(xf, pos, count);
323
324 shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1);
325 }
326