1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_trans.h"
17 #include "xfs_metafile.h"
18 #include "xfs_trace.h"
19 #include "xfs_inode.h"
20 #include "xfs_quota.h"
21 #include "xfs_errortag.h"
22 #include "xfs_error.h"
23 #include "xfs_alloc.h"
24 #include "xfs_rtgroup.h"
25 #include "xfs_rtrmap_btree.h"
26 #include "xfs_rtrefcount_btree.h"
27
28 static const struct {
29 enum xfs_metafile_type mtype;
30 const char *name;
31 } xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR };
32
33 const char *
xfs_metafile_type_str(enum xfs_metafile_type metatype)34 xfs_metafile_type_str(enum xfs_metafile_type metatype)
35 {
36 unsigned int i;
37
38 for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) {
39 if (xfs_metafile_type_strs[i].mtype == metatype)
40 return xfs_metafile_type_strs[i].name;
41 }
42
43 return NULL;
44 }
45
46 /* Set up an inode to be recognized as a metadata directory inode. */
47 void
xfs_metafile_set_iflag(struct xfs_trans * tp,struct xfs_inode * ip,enum xfs_metafile_type metafile_type)48 xfs_metafile_set_iflag(
49 struct xfs_trans *tp,
50 struct xfs_inode *ip,
51 enum xfs_metafile_type metafile_type)
52 {
53 VFS_I(ip)->i_mode &= ~0777;
54 VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
55 VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
56 if (S_ISDIR(VFS_I(ip)->i_mode))
57 ip->i_diflags |= XFS_METADIR_DIFLAGS;
58 else
59 ip->i_diflags |= XFS_METAFILE_DIFLAGS;
60 ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
61 ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
62 ip->i_metatype = metafile_type;
63 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
64 }
65
66 /* Clear the metadata directory inode flag. */
67 void
xfs_metafile_clear_iflag(struct xfs_trans * tp,struct xfs_inode * ip)68 xfs_metafile_clear_iflag(
69 struct xfs_trans *tp,
70 struct xfs_inode *ip)
71 {
72 ASSERT(xfs_is_metadir_inode(ip));
73 ASSERT(VFS_I(ip)->i_nlink == 0);
74
75 ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
76 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
77 }
78
79 /*
80 * Is the metafile reservations at or beneath a certain threshold?
81 */
82 static inline bool
xfs_metafile_resv_can_cover(struct xfs_mount * mp,int64_t rhs)83 xfs_metafile_resv_can_cover(
84 struct xfs_mount *mp,
85 int64_t rhs)
86 {
87 /*
88 * The amount of space that can be allocated to this metadata file is
89 * the remaining reservation for the particular metadata file + the
90 * global free block count. Take care of the first case to avoid
91 * touching the per-cpu counter.
92 */
93 if (mp->m_metafile_resv_avail >= rhs)
94 return true;
95
96 /*
97 * There aren't enough blocks left in the inode's reservation, but it
98 * isn't critical unless there also isn't enough free space.
99 */
100 return xfs_compare_freecounter(mp, XC_FREE_BLOCKS,
101 rhs - mp->m_metafile_resv_avail, 2048) >= 0;
102 }
103
104 /*
105 * Is the metafile reservation critically low on blocks? For now we'll define
106 * that as the number of blocks we can get our hands on being less than 10% of
107 * what we reserved or less than some arbitrary number (maximum btree height).
108 */
109 bool
xfs_metafile_resv_critical(struct xfs_mount * mp)110 xfs_metafile_resv_critical(
111 struct xfs_mount *mp)
112 {
113 ASSERT(xfs_has_metadir(mp));
114
115 trace_xfs_metafile_resv_critical(mp, 0);
116
117 if (!xfs_metafile_resv_can_cover(mp, mp->m_rtbtree_maxlevels))
118 return true;
119
120 if (!xfs_metafile_resv_can_cover(mp,
121 div_u64(mp->m_metafile_resv_target, 10)))
122 return true;
123
124 return XFS_TEST_ERROR(false, mp, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
125 }
126
127 /* Allocate a block from the metadata file's reservation. */
128 void
xfs_metafile_resv_alloc_space(struct xfs_inode * ip,struct xfs_alloc_arg * args)129 xfs_metafile_resv_alloc_space(
130 struct xfs_inode *ip,
131 struct xfs_alloc_arg *args)
132 {
133 struct xfs_mount *mp = ip->i_mount;
134 int64_t len = args->len;
135
136 ASSERT(xfs_is_metadir_inode(ip));
137 ASSERT(args->resv == XFS_AG_RESV_METAFILE);
138
139 trace_xfs_metafile_resv_alloc_space(mp, args->len);
140
141 /*
142 * Allocate the blocks from the metadata inode's block reservation
143 * and update the ondisk sb counter.
144 */
145 mutex_lock(&mp->m_metafile_resv_lock);
146 if (mp->m_metafile_resv_avail > 0) {
147 int64_t from_resv;
148
149 from_resv = min_t(int64_t, len, mp->m_metafile_resv_avail);
150 mp->m_metafile_resv_avail -= from_resv;
151 xfs_mod_delalloc(ip, 0, -from_resv);
152 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS,
153 -from_resv);
154 len -= from_resv;
155 }
156
157 /*
158 * Any allocation in excess of the reservation requires in-core and
159 * on-disk fdblocks updates. If we can grab @len blocks from the
160 * in-core fdblocks then all we need to do is update the on-disk
161 * superblock; if not, then try to steal some from the transaction's
162 * block reservation. Overruns are only expected for rmap btrees.
163 */
164 if (len) {
165 unsigned int field;
166 int error;
167
168 error = xfs_dec_fdblocks(ip->i_mount, len, true);
169 if (error)
170 field = XFS_TRANS_SB_FDBLOCKS;
171 else
172 field = XFS_TRANS_SB_RES_FDBLOCKS;
173
174 xfs_trans_mod_sb(args->tp, field, -len);
175 }
176
177 mp->m_metafile_resv_used += args->len;
178 mutex_unlock(&mp->m_metafile_resv_lock);
179
180 ip->i_nblocks += args->len;
181 xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE);
182 }
183
184 /* Free a block to the metadata file's reservation. */
185 void
xfs_metafile_resv_free_space(struct xfs_inode * ip,struct xfs_trans * tp,xfs_filblks_t len)186 xfs_metafile_resv_free_space(
187 struct xfs_inode *ip,
188 struct xfs_trans *tp,
189 xfs_filblks_t len)
190 {
191 struct xfs_mount *mp = ip->i_mount;
192 int64_t to_resv;
193
194 ASSERT(xfs_is_metadir_inode(ip));
195
196 trace_xfs_metafile_resv_free_space(mp, len);
197
198 ip->i_nblocks -= len;
199 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
200
201 mutex_lock(&mp->m_metafile_resv_lock);
202 mp->m_metafile_resv_used -= len;
203
204 /*
205 * Add the freed blocks back into the inode's delalloc reservation
206 * until it reaches the maximum size. Update the ondisk fdblocks only.
207 */
208 to_resv = mp->m_metafile_resv_target -
209 (mp->m_metafile_resv_used + mp->m_metafile_resv_avail);
210 if (to_resv > 0) {
211 to_resv = min_t(int64_t, to_resv, len);
212 mp->m_metafile_resv_avail += to_resv;
213 xfs_mod_delalloc(ip, 0, to_resv);
214 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv);
215 len -= to_resv;
216 }
217 mutex_unlock(&mp->m_metafile_resv_lock);
218
219 /*
220 * Everything else goes back to the filesystem, so update the in-core
221 * and on-disk counters.
222 */
223 if (len)
224 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len);
225 }
226
227 static void
__xfs_metafile_resv_free(struct xfs_mount * mp)228 __xfs_metafile_resv_free(
229 struct xfs_mount *mp)
230 {
231 if (mp->m_metafile_resv_avail) {
232 xfs_mod_sb_delalloc(mp, -(int64_t)mp->m_metafile_resv_avail);
233 xfs_add_fdblocks(mp, mp->m_metafile_resv_avail);
234 }
235 mp->m_metafile_resv_avail = 0;
236 mp->m_metafile_resv_used = 0;
237 mp->m_metafile_resv_target = 0;
238 }
239
240 /* Release unused metafile space reservation. */
241 void
xfs_metafile_resv_free(struct xfs_mount * mp)242 xfs_metafile_resv_free(
243 struct xfs_mount *mp)
244 {
245 if (!xfs_has_metadir(mp))
246 return;
247
248 trace_xfs_metafile_resv_free(mp, 0);
249
250 mutex_lock(&mp->m_metafile_resv_lock);
251 __xfs_metafile_resv_free(mp);
252 mutex_unlock(&mp->m_metafile_resv_lock);
253 }
254
255 /* Set up a metafile space reservation. */
256 int
xfs_metafile_resv_init(struct xfs_mount * mp)257 xfs_metafile_resv_init(
258 struct xfs_mount *mp)
259 {
260 struct xfs_rtgroup *rtg = NULL;
261 xfs_filblks_t used = 0, target = 0;
262 xfs_filblks_t hidden_space;
263 xfs_rfsblock_t dblocks_avail = mp->m_sb.sb_dblocks / 4;
264 int error = 0;
265
266 if (!xfs_has_metadir(mp))
267 return 0;
268
269 /*
270 * Free any previous reservation to have a clean slate.
271 */
272 mutex_lock(&mp->m_metafile_resv_lock);
273 __xfs_metafile_resv_free(mp);
274
275 /*
276 * Currently the only btree metafiles that require reservations are the
277 * rtrmap and the rtrefcount. Anything new will have to be added here
278 * as well.
279 */
280 while ((rtg = xfs_rtgroup_next(mp, rtg))) {
281 if (xfs_has_rtrmapbt(mp)) {
282 used += rtg_rmap(rtg)->i_nblocks;
283 target += xfs_rtrmapbt_calc_reserves(mp);
284 }
285 if (xfs_has_rtreflink(mp)) {
286 used += rtg_refcount(rtg)->i_nblocks;
287 target += xfs_rtrefcountbt_calc_reserves(mp);
288 }
289 }
290
291 if (!target)
292 goto out_unlock;
293
294 /*
295 * Space taken by the per-AG metadata btrees are accounted on-disk as
296 * used space. We therefore only hide the space that is reserved but
297 * not used by the trees.
298 */
299 if (used > target)
300 target = used;
301 else if (target > dblocks_avail)
302 target = dblocks_avail;
303 hidden_space = target - used;
304
305 error = xfs_dec_fdblocks(mp, hidden_space, true);
306 if (error) {
307 trace_xfs_metafile_resv_init_error(mp, 0);
308 goto out_unlock;
309 }
310
311 xfs_mod_sb_delalloc(mp, hidden_space);
312
313 mp->m_metafile_resv_target = target;
314 mp->m_metafile_resv_used = used;
315 mp->m_metafile_resv_avail = hidden_space;
316
317 trace_xfs_metafile_resv_init(mp, target);
318
319 out_unlock:
320 mutex_unlock(&mp->m_metafile_resv_lock);
321 return error;
322 }
323