1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "scrub/scrub.h" 14 #include "scrub/xfile.h" 15 #include "scrub/xfarray.h" 16 #include "scrub/trace.h" 17 #include <linux/shmem_fs.h> 18 19 /* 20 * Swappable Temporary Memory 21 * ========================== 22 * 23 * Online checking sometimes needs to be able to stage a large amount of data 24 * in memory. This information might not fit in the available memory and it 25 * doesn't all need to be accessible at all times. In other words, we want an 26 * indexed data buffer to store data that can be paged out. 27 * 28 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those 29 * requirements. Therefore, the xfile mechanism uses an unlinked shmem file to 30 * store our staging data. This file is not installed in the file descriptor 31 * table so that user programs cannot access the data, which means that the 32 * xfile must be freed with xfile_destroy. 33 * 34 * xfiles assume that the caller will handle all required concurrency 35 * management; standard vfs locks (freezer and inode) are not taken. Reads 36 * and writes are satisfied directly from the page cache. 37 */ 38 39 /* 40 * xfiles must not be exposed to userspace and require upper layers to 41 * coordinate access to the one handle returned by the constructor, so 42 * establish a separate lock class for xfiles to avoid confusing lockdep. 43 */ 44 static struct lock_class_key xfile_i_mutex_key; 45 46 /* 47 * Create an xfile of the given size. The description will be used in the 48 * trace output. 49 */ 50 int 51 xfile_create( 52 const char *description, 53 loff_t isize, 54 struct xfile **xfilep) 55 { 56 struct inode *inode; 57 struct xfile *xf; 58 int error; 59 60 xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS); 61 if (!xf) 62 return -ENOMEM; 63 64 xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE); 65 if (IS_ERR(xf->file)) { 66 error = PTR_ERR(xf->file); 67 goto out_xfile; 68 } 69 70 inode = file_inode(xf->file); 71 lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key); 72 73 /* 74 * We don't want to bother with kmapping data during repair, so don't 75 * allow highmem pages to back this mapping. 76 */ 77 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); 78 79 trace_xfile_create(xf); 80 81 *xfilep = xf; 82 return 0; 83 out_xfile: 84 kfree(xf); 85 return error; 86 } 87 88 /* Close the file and release all resources. */ 89 void 90 xfile_destroy( 91 struct xfile *xf) 92 { 93 struct inode *inode = file_inode(xf->file); 94 95 trace_xfile_destroy(xf); 96 97 lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key); 98 fput(xf->file); 99 kfree(xf); 100 } 101 102 /* 103 * Load an object. Since we're treating this file as "memory", any error or 104 * short IO is treated as a failure to allocate memory. 105 */ 106 int 107 xfile_load( 108 struct xfile *xf, 109 void *buf, 110 size_t count, 111 loff_t pos) 112 { 113 struct inode *inode = file_inode(xf->file); 114 unsigned int pflags; 115 116 if (count > MAX_RW_COUNT) 117 return -ENOMEM; 118 if (inode->i_sb->s_maxbytes - pos < count) 119 return -ENOMEM; 120 121 trace_xfile_load(xf, pos, count); 122 123 pflags = memalloc_nofs_save(); 124 while (count > 0) { 125 struct folio *folio; 126 unsigned int len; 127 unsigned int offset; 128 129 if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, 130 SGP_READ) < 0) 131 break; 132 if (!folio) { 133 /* 134 * No data stored at this offset, just zero the output 135 * buffer until the next page boundary. 136 */ 137 len = min_t(ssize_t, count, 138 PAGE_SIZE - offset_in_page(pos)); 139 memset(buf, 0, len); 140 } else { 141 if (filemap_check_wb_err(inode->i_mapping, 0)) { 142 folio_unlock(folio); 143 folio_put(folio); 144 break; 145 } 146 147 offset = offset_in_folio(folio, pos); 148 len = min_t(ssize_t, count, folio_size(folio) - offset); 149 memcpy(buf, folio_address(folio) + offset, len); 150 151 folio_unlock(folio); 152 folio_put(folio); 153 } 154 count -= len; 155 pos += len; 156 buf += len; 157 } 158 memalloc_nofs_restore(pflags); 159 160 if (count) 161 return -ENOMEM; 162 return 0; 163 } 164 165 /* 166 * Store an object. Since we're treating this file as "memory", any error or 167 * short IO is treated as a failure to allocate memory. 168 */ 169 int 170 xfile_store( 171 struct xfile *xf, 172 const void *buf, 173 size_t count, 174 loff_t pos) 175 { 176 struct inode *inode = file_inode(xf->file); 177 unsigned int pflags; 178 179 if (count > MAX_RW_COUNT) 180 return -ENOMEM; 181 if (inode->i_sb->s_maxbytes - pos < count) 182 return -ENOMEM; 183 184 trace_xfile_store(xf, pos, count); 185 186 /* 187 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE), 188 * actually allocates a folio instead of erroring out. 189 */ 190 if (pos + count > i_size_read(inode)) 191 i_size_write(inode, pos + count); 192 193 pflags = memalloc_nofs_save(); 194 while (count > 0) { 195 struct folio *folio; 196 unsigned int len; 197 unsigned int offset; 198 199 if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, 200 SGP_CACHE) < 0) 201 break; 202 if (filemap_check_wb_err(inode->i_mapping, 0)) { 203 folio_unlock(folio); 204 folio_put(folio); 205 break; 206 } 207 208 offset = offset_in_folio(folio, pos); 209 len = min_t(ssize_t, count, folio_size(folio) - offset); 210 memcpy(folio_address(folio) + offset, buf, len); 211 212 folio_mark_dirty(folio); 213 folio_unlock(folio); 214 folio_put(folio); 215 216 count -= len; 217 pos += len; 218 buf += len; 219 } 220 memalloc_nofs_restore(pflags); 221 222 if (count) 223 return -ENOMEM; 224 return 0; 225 } 226 227 /* Find the next written area in the xfile data for a given offset. */ 228 loff_t 229 xfile_seek_data( 230 struct xfile *xf, 231 loff_t pos) 232 { 233 loff_t ret; 234 235 ret = vfs_llseek(xf->file, pos, SEEK_DATA); 236 trace_xfile_seek_data(xf, pos, ret); 237 return ret; 238 } 239 240 /* 241 * Grab the (locked) folio for a memory object. The object cannot span a folio 242 * boundary. Returns the locked folio if successful, NULL if there was no 243 * folio or it didn't cover the range requested, or an ERR_PTR on failure. 244 */ 245 struct folio * 246 xfile_get_folio( 247 struct xfile *xf, 248 loff_t pos, 249 size_t len, 250 unsigned int flags) 251 { 252 struct inode *inode = file_inode(xf->file); 253 struct folio *folio = NULL; 254 unsigned int pflags; 255 int error; 256 257 if (inode->i_sb->s_maxbytes - pos < len) 258 return ERR_PTR(-ENOMEM); 259 260 trace_xfile_get_folio(xf, pos, len); 261 262 /* 263 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE), 264 * actually allocates a folio instead of erroring out. 265 */ 266 if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode)) 267 i_size_write(inode, pos + len); 268 269 pflags = memalloc_nofs_save(); 270 error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, 271 (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ); 272 memalloc_nofs_restore(pflags); 273 if (error) 274 return ERR_PTR(error); 275 276 if (!folio) 277 return NULL; 278 279 if (len > folio_size(folio) - offset_in_folio(folio, pos)) { 280 folio_unlock(folio); 281 folio_put(folio); 282 return NULL; 283 } 284 285 if (filemap_check_wb_err(inode->i_mapping, 0)) { 286 folio_unlock(folio); 287 folio_put(folio); 288 return ERR_PTR(-EIO); 289 } 290 291 /* 292 * Mark the folio dirty so that it won't be reclaimed once we drop the 293 * (potentially last) reference in xfile_put_folio. 294 */ 295 if (flags & XFILE_ALLOC) 296 folio_set_dirty(folio); 297 return folio; 298 } 299 300 /* 301 * Release the (locked) folio for a memory object. 302 */ 303 void 304 xfile_put_folio( 305 struct xfile *xf, 306 struct folio *folio) 307 { 308 trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio)); 309 310 folio_unlock(folio); 311 folio_put(folio); 312 } 313 314 /* Discard the page cache that's backing a range of the xfile. */ 315 void 316 xfile_discard( 317 struct xfile *xf, 318 loff_t pos, 319 u64 count) 320 { 321 trace_xfile_discard(xf, pos, count); 322 323 shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1); 324 } 325