xref: /linux/fs/xfs/libxfs/xfs_metafile.c (revision 9270102a00aabbe4d1bbb6890d514b01f1c42989)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_trans.h"
17 #include "xfs_metafile.h"
18 #include "xfs_trace.h"
19 #include "xfs_inode.h"
20 #include "xfs_quota.h"
21 #include "xfs_errortag.h"
22 #include "xfs_error.h"
23 #include "xfs_alloc.h"
24 #include "xfs_rtgroup.h"
25 #include "xfs_rtrmap_btree.h"
26 #include "xfs_rtrefcount_btree.h"
27 
28 static const struct {
29 	enum xfs_metafile_type	mtype;
30 	const char		*name;
31 } xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR };
32 
33 const char *
34 xfs_metafile_type_str(enum xfs_metafile_type metatype)
35 {
36 	unsigned int	i;
37 
38 	for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) {
39 		if (xfs_metafile_type_strs[i].mtype == metatype)
40 			return xfs_metafile_type_strs[i].name;
41 	}
42 
43 	return NULL;
44 }
45 
46 /* Set up an inode to be recognized as a metadata directory inode. */
47 void
48 xfs_metafile_set_iflag(
49 	struct xfs_trans	*tp,
50 	struct xfs_inode	*ip,
51 	enum xfs_metafile_type	metafile_type)
52 {
53 	VFS_I(ip)->i_mode &= ~0777;
54 	VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
55 	VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
56 	if (S_ISDIR(VFS_I(ip)->i_mode))
57 		ip->i_diflags |= XFS_METADIR_DIFLAGS;
58 	else
59 		ip->i_diflags |= XFS_METAFILE_DIFLAGS;
60 	ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
61 	ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
62 	ip->i_metatype = metafile_type;
63 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
64 
65 	XFS_STATS_DEC(ip->i_mount, xs_inodes_active);
66 	XFS_STATS_INC(ip->i_mount, xs_inodes_meta);
67 }
68 
69 /* Clear the metadata directory inode flag. */
70 void
71 xfs_metafile_clear_iflag(
72 	struct xfs_trans	*tp,
73 	struct xfs_inode	*ip)
74 {
75 	ASSERT(xfs_is_metadir_inode(ip));
76 	ASSERT(VFS_I(ip)->i_nlink == 0);
77 
78 	ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
79 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
80 	XFS_STATS_INC(ip->i_mount, xs_inodes_active);
81 	XFS_STATS_DEC(ip->i_mount, xs_inodes_meta);
82 }
83 
84 /*
85  * Is the metafile reservations at or beneath a certain threshold?
86  */
87 static inline bool
88 xfs_metafile_resv_can_cover(
89 	struct xfs_mount	*mp,
90 	int64_t			rhs)
91 {
92 	/*
93 	 * The amount of space that can be allocated to this metadata file is
94 	 * the remaining reservation for the particular metadata file + the
95 	 * global free block count.  Take care of the first case to avoid
96 	 * touching the per-cpu counter.
97 	 */
98 	if (mp->m_metafile_resv_avail >= rhs)
99 		return true;
100 
101 	/*
102 	 * There aren't enough blocks left in the inode's reservation, but it
103 	 * isn't critical unless there also isn't enough free space.
104 	 */
105 	return xfs_compare_freecounter(mp, XC_FREE_BLOCKS,
106 			rhs - mp->m_metafile_resv_avail, 2048) >= 0;
107 }
108 
109 /*
110  * Is the metafile reservation critically low on blocks?  For now we'll define
111  * that as the number of blocks we can get our hands on being less than 10% of
112  * what we reserved or less than some arbitrary number (maximum btree height).
113  */
114 bool
115 xfs_metafile_resv_critical(
116 	struct xfs_mount	*mp)
117 {
118 	ASSERT(xfs_has_metadir(mp));
119 
120 	trace_xfs_metafile_resv_critical(mp, 0);
121 
122 	if (!xfs_metafile_resv_can_cover(mp, mp->m_rtbtree_maxlevels))
123 		return true;
124 
125 	if (!xfs_metafile_resv_can_cover(mp,
126 			div_u64(mp->m_metafile_resv_target, 10)))
127 		return true;
128 
129 	return XFS_TEST_ERROR(mp, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
130 }
131 
132 /* Allocate a block from the metadata file's reservation. */
133 void
134 xfs_metafile_resv_alloc_space(
135 	struct xfs_inode	*ip,
136 	struct xfs_alloc_arg	*args)
137 {
138 	struct xfs_mount	*mp = ip->i_mount;
139 	int64_t			len = args->len;
140 
141 	ASSERT(xfs_is_metadir_inode(ip));
142 	ASSERT(args->resv == XFS_AG_RESV_METAFILE);
143 
144 	trace_xfs_metafile_resv_alloc_space(mp, args->len);
145 
146 	/*
147 	 * Allocate the blocks from the metadata inode's block reservation
148 	 * and update the ondisk sb counter.
149 	 */
150 	mutex_lock(&mp->m_metafile_resv_lock);
151 	if (mp->m_metafile_resv_avail > 0) {
152 		int64_t		from_resv;
153 
154 		from_resv = min_t(int64_t, len, mp->m_metafile_resv_avail);
155 		mp->m_metafile_resv_avail -= from_resv;
156 		xfs_mod_delalloc(ip, 0, -from_resv);
157 		xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS,
158 				-from_resv);
159 		len -= from_resv;
160 	}
161 
162 	/*
163 	 * Any allocation in excess of the reservation requires in-core and
164 	 * on-disk fdblocks updates.  If we can grab @len blocks from the
165 	 * in-core fdblocks then all we need to do is update the on-disk
166 	 * superblock; if not, then try to steal some from the transaction's
167 	 * block reservation.  Overruns are only expected for rmap btrees.
168 	 */
169 	if (len) {
170 		unsigned int	field;
171 		int		error;
172 
173 		error = xfs_dec_fdblocks(ip->i_mount, len, true);
174 		if (error)
175 			field = XFS_TRANS_SB_FDBLOCKS;
176 		else
177 			field = XFS_TRANS_SB_RES_FDBLOCKS;
178 
179 		xfs_trans_mod_sb(args->tp, field, -len);
180 	}
181 
182 	mp->m_metafile_resv_used += args->len;
183 	mutex_unlock(&mp->m_metafile_resv_lock);
184 
185 	ip->i_nblocks += args->len;
186 	xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE);
187 }
188 
189 /* Free a block to the metadata file's reservation. */
190 void
191 xfs_metafile_resv_free_space(
192 	struct xfs_inode	*ip,
193 	struct xfs_trans	*tp,
194 	xfs_filblks_t		len)
195 {
196 	struct xfs_mount	*mp = ip->i_mount;
197 	int64_t			to_resv;
198 
199 	ASSERT(xfs_is_metadir_inode(ip));
200 
201 	trace_xfs_metafile_resv_free_space(mp, len);
202 
203 	ip->i_nblocks -= len;
204 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
205 
206 	mutex_lock(&mp->m_metafile_resv_lock);
207 	mp->m_metafile_resv_used -= len;
208 
209 	/*
210 	 * Add the freed blocks back into the inode's delalloc reservation
211 	 * until it reaches the maximum size.  Update the ondisk fdblocks only.
212 	 */
213 	to_resv = mp->m_metafile_resv_target -
214 		(mp->m_metafile_resv_used + mp->m_metafile_resv_avail);
215 	if (to_resv > 0) {
216 		to_resv = min_t(int64_t, to_resv, len);
217 		mp->m_metafile_resv_avail += to_resv;
218 		xfs_mod_delalloc(ip, 0, to_resv);
219 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv);
220 		len -= to_resv;
221 	}
222 	mutex_unlock(&mp->m_metafile_resv_lock);
223 
224 	/*
225 	 * Everything else goes back to the filesystem, so update the in-core
226 	 * and on-disk counters.
227 	 */
228 	if (len)
229 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len);
230 }
231 
232 static void
233 __xfs_metafile_resv_free(
234 	struct xfs_mount	*mp)
235 {
236 	if (mp->m_metafile_resv_avail) {
237 		xfs_mod_sb_delalloc(mp, -(int64_t)mp->m_metafile_resv_avail);
238 		xfs_add_fdblocks(mp, mp->m_metafile_resv_avail);
239 	}
240 	mp->m_metafile_resv_avail = 0;
241 	mp->m_metafile_resv_used = 0;
242 	mp->m_metafile_resv_target = 0;
243 }
244 
245 /* Release unused metafile space reservation. */
246 void
247 xfs_metafile_resv_free(
248 	struct xfs_mount	*mp)
249 {
250 	if (!xfs_has_metadir(mp))
251 		return;
252 
253 	trace_xfs_metafile_resv_free(mp, 0);
254 
255 	mutex_lock(&mp->m_metafile_resv_lock);
256 	__xfs_metafile_resv_free(mp);
257 	mutex_unlock(&mp->m_metafile_resv_lock);
258 }
259 
260 /* Set up a metafile space reservation. */
261 int
262 xfs_metafile_resv_init(
263 	struct xfs_mount	*mp)
264 {
265 	struct xfs_rtgroup	*rtg = NULL;
266 	xfs_filblks_t		used = 0, target = 0;
267 	xfs_filblks_t		hidden_space;
268 	xfs_rfsblock_t		dblocks_avail = mp->m_sb.sb_dblocks / 4;
269 	int			error = 0;
270 
271 	if (!xfs_has_metadir(mp))
272 		return 0;
273 
274 	/*
275 	 * Free any previous reservation to have a clean slate.
276 	 */
277 	mutex_lock(&mp->m_metafile_resv_lock);
278 	__xfs_metafile_resv_free(mp);
279 
280 	/*
281 	 * Currently the only btree metafiles that require reservations are the
282 	 * rtrmap and the rtrefcount.  Anything new will have to be added here
283 	 * as well.
284 	 */
285 	while ((rtg = xfs_rtgroup_next(mp, rtg))) {
286 		if (xfs_has_rtrmapbt(mp)) {
287 			used += rtg_rmap(rtg)->i_nblocks;
288 			target += xfs_rtrmapbt_calc_reserves(mp);
289 		}
290 		if (xfs_has_rtreflink(mp)) {
291 			used += rtg_refcount(rtg)->i_nblocks;
292 			target += xfs_rtrefcountbt_calc_reserves(mp);
293 		}
294 	}
295 
296 	if (!target)
297 		goto out_unlock;
298 
299 	/*
300 	 * Space taken by the per-AG metadata btrees are accounted on-disk as
301 	 * used space.  We therefore only hide the space that is reserved but
302 	 * not used by the trees.
303 	 */
304 	if (used > target)
305 		target = used;
306 	else if (target > dblocks_avail)
307 		target = dblocks_avail;
308 	hidden_space = target - used;
309 
310 	error = xfs_dec_fdblocks(mp, hidden_space, true);
311 	if (error) {
312 		trace_xfs_metafile_resv_init_error(mp, 0);
313 		goto out_unlock;
314 	}
315 
316 	xfs_mod_sb_delalloc(mp, hidden_space);
317 
318 	mp->m_metafile_resv_target = target;
319 	mp->m_metafile_resv_used = used;
320 	mp->m_metafile_resv_avail = hidden_space;
321 
322 	trace_xfs_metafile_resv_init(mp, target);
323 
324 out_unlock:
325 	mutex_unlock(&mp->m_metafile_resv_lock);
326 	return error;
327 }
328