1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2023-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_buf.h" 9 #include "xfs_buf_mem.h" 10 #include "xfs_trace.h" 11 #include <linux/shmem_fs.h> 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_buf_item.h" 15 #include "xfs_error.h" 16 17 /* 18 * Buffer Cache for In-Memory Files 19 * ================================ 20 * 21 * Online fsck wants to create ephemeral ordered recordsets. The existing 22 * btree infrastructure can do this, but we need the buffer cache to target 23 * memory instead of block devices. 24 * 25 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those 26 * requirements. Therefore, the xmbuf mechanism uses an unlinked shmem file to 27 * store our staging data. This file is not installed in the file descriptor 28 * table so that user programs cannot access the data, which means that the 29 * xmbuf must be freed with xmbuf_destroy. 30 * 31 * xmbufs assume that the caller will handle all required concurrency 32 * management; standard vfs locks (freezer and inode) are not taken. Reads 33 * and writes are satisfied directly from the page cache. 34 * 35 * The only supported block size is PAGE_SIZE, and we cannot use highmem. 36 */ 37 38 /* 39 * shmem files used to back an in-memory buffer cache must not be exposed to 40 * userspace. Upper layers must coordinate access to the one handle returned 41 * by the constructor, so establish a separate lock class for xmbufs to avoid 42 * confusing lockdep. 43 */ 44 static struct lock_class_key xmbuf_i_mutex_key; 45 46 /* 47 * Allocate a buffer cache target for a memory-backed file and set up the 48 * buffer target. 49 */ 50 int 51 xmbuf_alloc( 52 struct xfs_mount *mp, 53 const char *descr, 54 struct xfs_buftarg **btpp) 55 { 56 struct file *file; 57 struct inode *inode; 58 struct xfs_buftarg *btp; 59 int error; 60 61 btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL); 62 if (!btp) 63 return -ENOMEM; 64 65 file = shmem_kernel_file_setup(descr, 0, 0); 66 if (IS_ERR(file)) { 67 error = PTR_ERR(file); 68 goto out_free_btp; 69 } 70 inode = file_inode(file); 71 72 /* private file, private locking */ 73 lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key); 74 75 /* 76 * We don't want to bother with kmapping data during repair, so don't 77 * allow highmem pages to back this mapping. 78 */ 79 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); 80 81 /* ensure all writes are below EOF to avoid pagecache zeroing */ 82 i_size_write(inode, inode->i_sb->s_maxbytes); 83 84 error = xfs_buf_cache_init(btp->bt_cache); 85 if (error) 86 goto out_file; 87 88 /* Initialize buffer target */ 89 btp->bt_mount = mp; 90 btp->bt_dev = (dev_t)-1U; 91 btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */ 92 btp->bt_file = file; 93 btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE; 94 btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1; 95 96 error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr); 97 if (error) 98 goto out_bcache; 99 100 trace_xmbuf_create(btp); 101 102 *btpp = btp; 103 return 0; 104 105 out_bcache: 106 xfs_buf_cache_destroy(btp->bt_cache); 107 out_file: 108 fput(file); 109 out_free_btp: 110 kfree(btp); 111 return error; 112 } 113 114 /* Free a buffer cache target for a memory-backed buffer cache. */ 115 void 116 xmbuf_free( 117 struct xfs_buftarg *btp) 118 { 119 ASSERT(xfs_buftarg_is_mem(btp)); 120 ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); 121 122 trace_xmbuf_free(btp); 123 124 xfs_destroy_buftarg(btp); 125 xfs_buf_cache_destroy(btp->bt_cache); 126 fput(btp->bt_file); 127 kfree(btp); 128 } 129 130 /* Directly map a shmem page into the buffer cache. */ 131 int 132 xmbuf_map_page( 133 struct xfs_buf *bp) 134 { 135 struct inode *inode = file_inode(bp->b_target->bt_file); 136 struct folio *folio = NULL; 137 struct page *page; 138 loff_t pos = BBTOB(xfs_buf_daddr(bp)); 139 int error; 140 141 ASSERT(xfs_buftarg_is_mem(bp->b_target)); 142 143 if (bp->b_map_count != 1) 144 return -ENOMEM; 145 if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE) 146 return -ENOMEM; 147 if (offset_in_page(pos) != 0) { 148 ASSERT(offset_in_page(pos)); 149 return -ENOMEM; 150 } 151 152 error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_CACHE); 153 if (error) 154 return error; 155 156 if (filemap_check_wb_err(inode->i_mapping, 0)) { 157 folio_unlock(folio); 158 folio_put(folio); 159 return -EIO; 160 } 161 162 page = folio_file_page(folio, pos >> PAGE_SHIFT); 163 164 /* 165 * Mark the page dirty so that it won't be reclaimed once we drop the 166 * (potentially last) reference in xmbuf_unmap_page. 167 */ 168 set_page_dirty(page); 169 unlock_page(page); 170 171 bp->b_addr = page_address(page); 172 bp->b_pages = bp->b_page_array; 173 bp->b_pages[0] = page; 174 bp->b_page_count = 1; 175 return 0; 176 } 177 178 /* Unmap a shmem page that was mapped into the buffer cache. */ 179 void 180 xmbuf_unmap_page( 181 struct xfs_buf *bp) 182 { 183 struct page *page = bp->b_pages[0]; 184 185 ASSERT(xfs_buftarg_is_mem(bp->b_target)); 186 187 put_page(page); 188 189 bp->b_addr = NULL; 190 bp->b_pages[0] = NULL; 191 bp->b_pages = NULL; 192 bp->b_page_count = 0; 193 } 194 195 /* Is this a valid daddr within the buftarg? */ 196 bool 197 xmbuf_verify_daddr( 198 struct xfs_buftarg *btp, 199 xfs_daddr_t daddr) 200 { 201 struct inode *inode = file_inode(btp->bt_file); 202 203 ASSERT(xfs_buftarg_is_mem(btp)); 204 205 return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT); 206 } 207 208 /* Discard the page backing this buffer. */ 209 static void 210 xmbuf_stale( 211 struct xfs_buf *bp) 212 { 213 struct inode *inode = file_inode(bp->b_target->bt_file); 214 loff_t pos; 215 216 ASSERT(xfs_buftarg_is_mem(bp->b_target)); 217 218 pos = BBTOB(xfs_buf_daddr(bp)); 219 shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1); 220 } 221 222 /* 223 * Finalize a buffer -- discard the backing page if it's stale, or run the 224 * write verifier to detect problems. 225 */ 226 int 227 xmbuf_finalize( 228 struct xfs_buf *bp) 229 { 230 xfs_failaddr_t fa; 231 int error = 0; 232 233 if (bp->b_flags & XBF_STALE) { 234 xmbuf_stale(bp); 235 return 0; 236 } 237 238 /* 239 * Although this btree is ephemeral, validate the buffer structure so 240 * that we can detect memory corruption errors and software bugs. 241 */ 242 fa = bp->b_ops->verify_struct(bp); 243 if (fa) { 244 error = -EFSCORRUPTED; 245 xfs_verifier_error(bp, error, fa); 246 } 247 248 return error; 249 } 250 251 /* 252 * Detach this xmbuf buffer from the transaction by any means necessary. 253 * All buffers are direct-mapped, so they do not need bwrite. 254 */ 255 void 256 xmbuf_trans_bdetach( 257 struct xfs_trans *tp, 258 struct xfs_buf *bp) 259 { 260 struct xfs_buf_log_item *bli = bp->b_log_item; 261 262 ASSERT(bli != NULL); 263 264 bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED | 265 XFS_BLI_LOGGED | XFS_BLI_STALE); 266 clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags); 267 268 while (bp->b_log_item != NULL) 269 xfs_trans_bdetach(tp, bp); 270 } 271