xref: /linux/fs/xfs/libxfs/xfs_metafile.c (revision 9fd2da71c301184d98fe37674ca8d017d1ce6600)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_trans.h"
17 #include "xfs_metafile.h"
18 #include "xfs_trace.h"
19 #include "xfs_inode.h"
20 #include "xfs_quota.h"
21 #include "xfs_errortag.h"
22 #include "xfs_error.h"
23 #include "xfs_alloc.h"
24 #include "xfs_rtgroup.h"
25 #include "xfs_rtrmap_btree.h"
26 #include "xfs_rtrefcount_btree.h"
27 
28 static const struct {
29 	enum xfs_metafile_type	mtype;
30 	const char		*name;
31 } xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR };
32 
33 const char *
34 xfs_metafile_type_str(enum xfs_metafile_type metatype)
35 {
36 	unsigned int	i;
37 
38 	for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) {
39 		if (xfs_metafile_type_strs[i].mtype == metatype)
40 			return xfs_metafile_type_strs[i].name;
41 	}
42 
43 	return NULL;
44 }
45 
46 /* Set up an inode to be recognized as a metadata directory inode. */
47 void
48 xfs_metafile_set_iflag(
49 	struct xfs_trans	*tp,
50 	struct xfs_inode	*ip,
51 	enum xfs_metafile_type	metafile_type)
52 {
53 	VFS_I(ip)->i_mode &= ~0777;
54 	VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
55 	VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
56 	if (S_ISDIR(VFS_I(ip)->i_mode))
57 		ip->i_diflags |= XFS_METADIR_DIFLAGS;
58 	else
59 		ip->i_diflags |= XFS_METAFILE_DIFLAGS;
60 	ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
61 	ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
62 	ip->i_metatype = metafile_type;
63 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
64 }
65 
66 /* Clear the metadata directory inode flag. */
67 void
68 xfs_metafile_clear_iflag(
69 	struct xfs_trans	*tp,
70 	struct xfs_inode	*ip)
71 {
72 	ASSERT(xfs_is_metadir_inode(ip));
73 	ASSERT(VFS_I(ip)->i_nlink == 0);
74 
75 	ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
76 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
77 }
78 
79 /*
80  * Is the metafile reservations at or beneath a certain threshold?
81  */
82 static inline bool
83 xfs_metafile_resv_can_cover(
84 	struct xfs_mount	*mp,
85 	int64_t			rhs)
86 {
87 	/*
88 	 * The amount of space that can be allocated to this metadata file is
89 	 * the remaining reservation for the particular metadata file + the
90 	 * global free block count.  Take care of the first case to avoid
91 	 * touching the per-cpu counter.
92 	 */
93 	if (mp->m_metafile_resv_avail >= rhs)
94 		return true;
95 
96 	/*
97 	 * There aren't enough blocks left in the inode's reservation, but it
98 	 * isn't critical unless there also isn't enough free space.
99 	 */
100 	return xfs_compare_freecounter(mp, XC_FREE_BLOCKS,
101 			rhs - mp->m_metafile_resv_avail, 2048) >= 0;
102 }
103 
104 /*
105  * Is the metafile reservation critically low on blocks?  For now we'll define
106  * that as the number of blocks we can get our hands on being less than 10% of
107  * what we reserved or less than some arbitrary number (maximum btree height).
108  */
109 bool
110 xfs_metafile_resv_critical(
111 	struct xfs_mount	*mp)
112 {
113 	ASSERT(xfs_has_metadir(mp));
114 
115 	trace_xfs_metafile_resv_critical(mp, 0);
116 
117 	if (!xfs_metafile_resv_can_cover(mp, mp->m_rtbtree_maxlevels))
118 		return true;
119 
120 	if (!xfs_metafile_resv_can_cover(mp,
121 			div_u64(mp->m_metafile_resv_target, 10)))
122 		return true;
123 
124 	return XFS_TEST_ERROR(false, mp, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
125 }
126 
127 /* Allocate a block from the metadata file's reservation. */
128 void
129 xfs_metafile_resv_alloc_space(
130 	struct xfs_inode	*ip,
131 	struct xfs_alloc_arg	*args)
132 {
133 	struct xfs_mount	*mp = ip->i_mount;
134 	int64_t			len = args->len;
135 
136 	ASSERT(xfs_is_metadir_inode(ip));
137 	ASSERT(args->resv == XFS_AG_RESV_METAFILE);
138 
139 	trace_xfs_metafile_resv_alloc_space(mp, args->len);
140 
141 	/*
142 	 * Allocate the blocks from the metadata inode's block reservation
143 	 * and update the ondisk sb counter.
144 	 */
145 	mutex_lock(&mp->m_metafile_resv_lock);
146 	if (mp->m_metafile_resv_avail > 0) {
147 		int64_t		from_resv;
148 
149 		from_resv = min_t(int64_t, len, mp->m_metafile_resv_avail);
150 		mp->m_metafile_resv_avail -= from_resv;
151 		xfs_mod_delalloc(ip, 0, -from_resv);
152 		xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS,
153 				-from_resv);
154 		len -= from_resv;
155 	}
156 
157 	/*
158 	 * Any allocation in excess of the reservation requires in-core and
159 	 * on-disk fdblocks updates.  If we can grab @len blocks from the
160 	 * in-core fdblocks then all we need to do is update the on-disk
161 	 * superblock; if not, then try to steal some from the transaction's
162 	 * block reservation.  Overruns are only expected for rmap btrees.
163 	 */
164 	if (len) {
165 		unsigned int	field;
166 		int		error;
167 
168 		error = xfs_dec_fdblocks(ip->i_mount, len, true);
169 		if (error)
170 			field = XFS_TRANS_SB_FDBLOCKS;
171 		else
172 			field = XFS_TRANS_SB_RES_FDBLOCKS;
173 
174 		xfs_trans_mod_sb(args->tp, field, -len);
175 	}
176 
177 	mp->m_metafile_resv_used += args->len;
178 	mutex_unlock(&mp->m_metafile_resv_lock);
179 
180 	ip->i_nblocks += args->len;
181 	xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE);
182 }
183 
184 /* Free a block to the metadata file's reservation. */
185 void
186 xfs_metafile_resv_free_space(
187 	struct xfs_inode	*ip,
188 	struct xfs_trans	*tp,
189 	xfs_filblks_t		len)
190 {
191 	struct xfs_mount	*mp = ip->i_mount;
192 	int64_t			to_resv;
193 
194 	ASSERT(xfs_is_metadir_inode(ip));
195 
196 	trace_xfs_metafile_resv_free_space(mp, len);
197 
198 	ip->i_nblocks -= len;
199 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
200 
201 	mutex_lock(&mp->m_metafile_resv_lock);
202 	mp->m_metafile_resv_used -= len;
203 
204 	/*
205 	 * Add the freed blocks back into the inode's delalloc reservation
206 	 * until it reaches the maximum size.  Update the ondisk fdblocks only.
207 	 */
208 	to_resv = mp->m_metafile_resv_target -
209 		(mp->m_metafile_resv_used + mp->m_metafile_resv_avail);
210 	if (to_resv > 0) {
211 		to_resv = min_t(int64_t, to_resv, len);
212 		mp->m_metafile_resv_avail += to_resv;
213 		xfs_mod_delalloc(ip, 0, to_resv);
214 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv);
215 		len -= to_resv;
216 	}
217 	mutex_unlock(&mp->m_metafile_resv_lock);
218 
219 	/*
220 	 * Everything else goes back to the filesystem, so update the in-core
221 	 * and on-disk counters.
222 	 */
223 	if (len)
224 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len);
225 }
226 
227 static void
228 __xfs_metafile_resv_free(
229 	struct xfs_mount	*mp)
230 {
231 	if (mp->m_metafile_resv_avail) {
232 		xfs_mod_sb_delalloc(mp, -(int64_t)mp->m_metafile_resv_avail);
233 		xfs_add_fdblocks(mp, mp->m_metafile_resv_avail);
234 	}
235 	mp->m_metafile_resv_avail = 0;
236 	mp->m_metafile_resv_used = 0;
237 	mp->m_metafile_resv_target = 0;
238 }
239 
240 /* Release unused metafile space reservation. */
241 void
242 xfs_metafile_resv_free(
243 	struct xfs_mount	*mp)
244 {
245 	if (!xfs_has_metadir(mp))
246 		return;
247 
248 	trace_xfs_metafile_resv_free(mp, 0);
249 
250 	mutex_lock(&mp->m_metafile_resv_lock);
251 	__xfs_metafile_resv_free(mp);
252 	mutex_unlock(&mp->m_metafile_resv_lock);
253 }
254 
255 /* Set up a metafile space reservation. */
256 int
257 xfs_metafile_resv_init(
258 	struct xfs_mount	*mp)
259 {
260 	struct xfs_rtgroup	*rtg = NULL;
261 	xfs_filblks_t		used = 0, target = 0;
262 	xfs_filblks_t		hidden_space;
263 	xfs_rfsblock_t		dblocks_avail = mp->m_sb.sb_dblocks / 4;
264 	int			error = 0;
265 
266 	if (!xfs_has_metadir(mp))
267 		return 0;
268 
269 	/*
270 	 * Free any previous reservation to have a clean slate.
271 	 */
272 	mutex_lock(&mp->m_metafile_resv_lock);
273 	__xfs_metafile_resv_free(mp);
274 
275 	/*
276 	 * Currently the only btree metafiles that require reservations are the
277 	 * rtrmap and the rtrefcount.  Anything new will have to be added here
278 	 * as well.
279 	 */
280 	while ((rtg = xfs_rtgroup_next(mp, rtg))) {
281 		if (xfs_has_rtrmapbt(mp)) {
282 			used += rtg_rmap(rtg)->i_nblocks;
283 			target += xfs_rtrmapbt_calc_reserves(mp);
284 		}
285 		if (xfs_has_rtreflink(mp)) {
286 			used += rtg_refcount(rtg)->i_nblocks;
287 			target += xfs_rtrefcountbt_calc_reserves(mp);
288 		}
289 	}
290 
291 	if (!target)
292 		goto out_unlock;
293 
294 	/*
295 	 * Space taken by the per-AG metadata btrees are accounted on-disk as
296 	 * used space.  We therefore only hide the space that is reserved but
297 	 * not used by the trees.
298 	 */
299 	if (used > target)
300 		target = used;
301 	else if (target > dblocks_avail)
302 		target = dblocks_avail;
303 	hidden_space = target - used;
304 
305 	error = xfs_dec_fdblocks(mp, hidden_space, true);
306 	if (error) {
307 		trace_xfs_metafile_resv_init_error(mp, 0);
308 		goto out_unlock;
309 	}
310 
311 	xfs_mod_sb_delalloc(mp, hidden_space);
312 
313 	mp->m_metafile_resv_target = target;
314 	mp->m_metafile_resv_used = used;
315 	mp->m_metafile_resv_avail = hidden_space;
316 
317 	trace_xfs_metafile_resv_init(mp, target);
318 
319 out_unlock:
320 	mutex_unlock(&mp->m_metafile_resv_lock);
321 	return error;
322 }
323