xref: /linux/fs/xfs/xfs_buf_mem.c (revision a095686a2383526d7315197e2419d84ee8470217)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2023-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_buf.h"
9 #include "xfs_buf_mem.h"
10 #include "xfs_trace.h"
11 #include <linux/shmem_fs.h>
12 
13 /*
14  * Buffer Cache for In-Memory Files
15  * ================================
16  *
17  * Online fsck wants to create ephemeral ordered recordsets.  The existing
18  * btree infrastructure can do this, but we need the buffer cache to target
19  * memory instead of block devices.
20  *
21  * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
22  * requirements.  Therefore, the xmbuf mechanism uses an unlinked shmem file to
23  * store our staging data.  This file is not installed in the file descriptor
24  * table so that user programs cannot access the data, which means that the
25  * xmbuf must be freed with xmbuf_destroy.
26  *
27  * xmbufs assume that the caller will handle all required concurrency
28  * management; standard vfs locks (freezer and inode) are not taken.  Reads
29  * and writes are satisfied directly from the page cache.
30  *
31  * The only supported block size is PAGE_SIZE, and we cannot use highmem.
32  */
33 
34 /*
35  * shmem files used to back an in-memory buffer cache must not be exposed to
36  * userspace.  Upper layers must coordinate access to the one handle returned
37  * by the constructor, so establish a separate lock class for xmbufs to avoid
38  * confusing lockdep.
39  */
40 static struct lock_class_key xmbuf_i_mutex_key;
41 
42 /*
43  * Allocate a buffer cache target for a memory-backed file and set up the
44  * buffer target.
45  */
46 int
47 xmbuf_alloc(
48 	struct xfs_mount	*mp,
49 	const char		*descr,
50 	struct xfs_buftarg	**btpp)
51 {
52 	struct file		*file;
53 	struct inode		*inode;
54 	struct xfs_buftarg	*btp;
55 	int			error;
56 
57 	btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL);
58 	if (!btp)
59 		return -ENOMEM;
60 
61 	file = shmem_kernel_file_setup(descr, 0, 0);
62 	if (IS_ERR(file)) {
63 		error = PTR_ERR(file);
64 		goto out_free_btp;
65 	}
66 	inode = file_inode(file);
67 
68 	/* private file, private locking */
69 	lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key);
70 
71 	/*
72 	 * We don't want to bother with kmapping data during repair, so don't
73 	 * allow highmem pages to back this mapping.
74 	 */
75 	mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
76 
77 	/* ensure all writes are below EOF to avoid pagecache zeroing */
78 	i_size_write(inode, inode->i_sb->s_maxbytes);
79 
80 	trace_xmbuf_create(btp);
81 
82 	error = xfs_buf_cache_init(btp->bt_cache);
83 	if (error)
84 		goto out_file;
85 
86 	/* Initialize buffer target */
87 	btp->bt_mount = mp;
88 	btp->bt_dev = (dev_t)-1U;
89 	btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */
90 	btp->bt_file = file;
91 	btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE;
92 	btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1;
93 
94 	error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
95 	if (error)
96 		goto out_bcache;
97 
98 	*btpp = btp;
99 	return 0;
100 
101 out_bcache:
102 	xfs_buf_cache_destroy(btp->bt_cache);
103 out_file:
104 	fput(file);
105 out_free_btp:
106 	kfree(btp);
107 	return error;
108 }
109 
110 /* Free a buffer cache target for a memory-backed buffer cache. */
111 void
112 xmbuf_free(
113 	struct xfs_buftarg	*btp)
114 {
115 	ASSERT(xfs_buftarg_is_mem(btp));
116 	ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
117 
118 	trace_xmbuf_free(btp);
119 
120 	xfs_destroy_buftarg(btp);
121 	xfs_buf_cache_destroy(btp->bt_cache);
122 	fput(btp->bt_file);
123 	kfree(btp);
124 }
125 
126 /* Directly map a shmem page into the buffer cache. */
127 int
128 xmbuf_map_page(
129 	struct xfs_buf		*bp)
130 {
131 	struct inode		*inode = file_inode(bp->b_target->bt_file);
132 	struct folio		*folio = NULL;
133 	struct page		*page;
134 	loff_t                  pos = BBTOB(xfs_buf_daddr(bp));
135 	int			error;
136 
137 	ASSERT(xfs_buftarg_is_mem(bp->b_target));
138 
139 	if (bp->b_map_count != 1)
140 		return -ENOMEM;
141 	if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE)
142 		return -ENOMEM;
143 	if (offset_in_page(pos) != 0) {
144 		ASSERT(offset_in_page(pos));
145 		return -ENOMEM;
146 	}
147 
148 	error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_CACHE);
149 	if (error)
150 		return error;
151 
152 	if (filemap_check_wb_err(inode->i_mapping, 0)) {
153 		folio_unlock(folio);
154 		folio_put(folio);
155 		return -EIO;
156 	}
157 
158 	page = folio_file_page(folio, pos >> PAGE_SHIFT);
159 
160 	/*
161 	 * Mark the page dirty so that it won't be reclaimed once we drop the
162 	 * (potentially last) reference in xmbuf_unmap_page.
163 	 */
164 	set_page_dirty(page);
165 	unlock_page(page);
166 
167 	bp->b_addr = page_address(page);
168 	bp->b_pages = bp->b_page_array;
169 	bp->b_pages[0] = page;
170 	bp->b_page_count = 1;
171 	return 0;
172 }
173 
174 /* Unmap a shmem page that was mapped into the buffer cache. */
175 void
176 xmbuf_unmap_page(
177 	struct xfs_buf		*bp)
178 {
179 	struct page		*page = bp->b_pages[0];
180 
181 	ASSERT(xfs_buftarg_is_mem(bp->b_target));
182 
183 	put_page(page);
184 
185 	bp->b_addr = NULL;
186 	bp->b_pages[0] = NULL;
187 	bp->b_pages = NULL;
188 	bp->b_page_count = 0;
189 }
190 
191 /* Is this a valid daddr within the buftarg? */
192 bool
193 xmbuf_verify_daddr(
194 	struct xfs_buftarg	*btp,
195 	xfs_daddr_t		daddr)
196 {
197 	struct inode		*inode = file_inode(btp->bt_file);
198 
199 	ASSERT(xfs_buftarg_is_mem(btp));
200 
201 	return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT);
202 }
203