// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2023-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_buf.h" #include "xfs_buf_mem.h" #include "xfs_trace.h" #include #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_error.h" /* * Buffer Cache for In-Memory Files * ================================ * * Online fsck wants to create ephemeral ordered recordsets. The existing * btree infrastructure can do this, but we need the buffer cache to target * memory instead of block devices. * * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those * requirements. Therefore, the xmbuf mechanism uses an unlinked shmem file to * store our staging data. This file is not installed in the file descriptor * table so that user programs cannot access the data, which means that the * xmbuf must be freed with xmbuf_destroy. * * xmbufs assume that the caller will handle all required concurrency * management; standard vfs locks (freezer and inode) are not taken. Reads * and writes are satisfied directly from the page cache. * * The only supported block size is PAGE_SIZE, and we cannot use highmem. */ /* * shmem files used to back an in-memory buffer cache must not be exposed to * userspace. Upper layers must coordinate access to the one handle returned * by the constructor, so establish a separate lock class for xmbufs to avoid * confusing lockdep. */ static struct lock_class_key xmbuf_i_mutex_key; /* * Allocate a buffer cache target for a memory-backed file and set up the * buffer target. */ int xmbuf_alloc( struct xfs_mount *mp, const char *descr, struct xfs_buftarg **btpp) { struct file *file; struct inode *inode; struct xfs_buftarg *btp; int error; btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL); if (!btp) return -ENOMEM; file = shmem_kernel_file_setup(descr, 0, 0); if (IS_ERR(file)) { error = PTR_ERR(file); goto out_free_btp; } inode = file_inode(file); /* private file, private locking */ lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key); /* * We don't want to bother with kmapping data during repair, so don't * allow highmem pages to back this mapping. */ mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); /* ensure all writes are below EOF to avoid pagecache zeroing */ i_size_write(inode, inode->i_sb->s_maxbytes); error = xfs_buf_cache_init(btp->bt_cache); if (error) goto out_file; /* Initialize buffer target */ btp->bt_mount = mp; btp->bt_dev = (dev_t)-1U; btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */ btp->bt_file = file; btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE; btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1; error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr); if (error) goto out_bcache; trace_xmbuf_create(btp); *btpp = btp; return 0; out_bcache: xfs_buf_cache_destroy(btp->bt_cache); out_file: fput(file); out_free_btp: kfree(btp); return error; } /* Free a buffer cache target for a memory-backed buffer cache. */ void xmbuf_free( struct xfs_buftarg *btp) { ASSERT(xfs_buftarg_is_mem(btp)); ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); trace_xmbuf_free(btp); xfs_destroy_buftarg(btp); xfs_buf_cache_destroy(btp->bt_cache); fput(btp->bt_file); kfree(btp); } /* Directly map a shmem page into the buffer cache. */ int xmbuf_map_page( struct xfs_buf *bp) { struct inode *inode = file_inode(bp->b_target->bt_file); struct folio *folio = NULL; struct page *page; loff_t pos = BBTOB(xfs_buf_daddr(bp)); int error; ASSERT(xfs_buftarg_is_mem(bp->b_target)); if (bp->b_map_count != 1) return -ENOMEM; if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE) return -ENOMEM; if (offset_in_page(pos) != 0) { ASSERT(offset_in_page(pos)); return -ENOMEM; } error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_CACHE); if (error) return error; if (filemap_check_wb_err(inode->i_mapping, 0)) { folio_unlock(folio); folio_put(folio); return -EIO; } page = folio_file_page(folio, pos >> PAGE_SHIFT); /* * Mark the page dirty so that it won't be reclaimed once we drop the * (potentially last) reference in xmbuf_unmap_page. */ set_page_dirty(page); unlock_page(page); bp->b_addr = page_address(page); bp->b_pages = bp->b_page_array; bp->b_pages[0] = page; bp->b_page_count = 1; return 0; } /* Unmap a shmem page that was mapped into the buffer cache. */ void xmbuf_unmap_page( struct xfs_buf *bp) { struct page *page = bp->b_pages[0]; ASSERT(xfs_buftarg_is_mem(bp->b_target)); put_page(page); bp->b_addr = NULL; bp->b_pages[0] = NULL; bp->b_pages = NULL; bp->b_page_count = 0; } /* Is this a valid daddr within the buftarg? */ bool xmbuf_verify_daddr( struct xfs_buftarg *btp, xfs_daddr_t daddr) { struct inode *inode = file_inode(btp->bt_file); ASSERT(xfs_buftarg_is_mem(btp)); return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT); } /* Discard the page backing this buffer. */ static void xmbuf_stale( struct xfs_buf *bp) { struct inode *inode = file_inode(bp->b_target->bt_file); loff_t pos; ASSERT(xfs_buftarg_is_mem(bp->b_target)); pos = BBTOB(xfs_buf_daddr(bp)); shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1); } /* * Finalize a buffer -- discard the backing page if it's stale, or run the * write verifier to detect problems. */ int xmbuf_finalize( struct xfs_buf *bp) { xfs_failaddr_t fa; int error = 0; if (bp->b_flags & XBF_STALE) { xmbuf_stale(bp); return 0; } /* * Although this btree is ephemeral, validate the buffer structure so * that we can detect memory corruption errors and software bugs. */ fa = bp->b_ops->verify_struct(bp); if (fa) { error = -EFSCORRUPTED; xfs_verifier_error(bp, error, fa); } return error; } /* * Detach this xmbuf buffer from the transaction by any means necessary. * All buffers are direct-mapped, so they do not need bwrite. */ void xmbuf_trans_bdetach( struct xfs_trans *tp, struct xfs_buf *bp) { struct xfs_buf_log_item *bli = bp->b_log_item; ASSERT(bli != NULL); bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED | XFS_BLI_LOGGED | XFS_BLI_STALE); clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags); while (bp->b_log_item != NULL) xfs_trans_bdetach(tp, bp); }