1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2018-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_sb.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_trans.h" 17 #include "xfs_metafile.h" 18 #include "xfs_trace.h" 19 #include "xfs_inode.h" 20 #include "xfs_quota.h" 21 #include "xfs_errortag.h" 22 #include "xfs_error.h" 23 #include "xfs_alloc.h" 24 25 static const struct { 26 enum xfs_metafile_type mtype; 27 const char *name; 28 } xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR }; 29 30 const char * 31 xfs_metafile_type_str(enum xfs_metafile_type metatype) 32 { 33 unsigned int i; 34 35 for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) { 36 if (xfs_metafile_type_strs[i].mtype == metatype) 37 return xfs_metafile_type_strs[i].name; 38 } 39 40 return NULL; 41 } 42 43 /* Set up an inode to be recognized as a metadata directory inode. */ 44 void 45 xfs_metafile_set_iflag( 46 struct xfs_trans *tp, 47 struct xfs_inode *ip, 48 enum xfs_metafile_type metafile_type) 49 { 50 VFS_I(ip)->i_mode &= ~0777; 51 VFS_I(ip)->i_uid = GLOBAL_ROOT_UID; 52 VFS_I(ip)->i_gid = GLOBAL_ROOT_GID; 53 if (S_ISDIR(VFS_I(ip)->i_mode)) 54 ip->i_diflags |= XFS_METADIR_DIFLAGS; 55 else 56 ip->i_diflags |= XFS_METAFILE_DIFLAGS; 57 ip->i_diflags2 &= ~XFS_DIFLAG2_DAX; 58 ip->i_diflags2 |= XFS_DIFLAG2_METADATA; 59 ip->i_metatype = metafile_type; 60 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 61 } 62 63 /* Clear the metadata directory inode flag. */ 64 void 65 xfs_metafile_clear_iflag( 66 struct xfs_trans *tp, 67 struct xfs_inode *ip) 68 { 69 ASSERT(xfs_is_metadir_inode(ip)); 70 ASSERT(VFS_I(ip)->i_nlink == 0); 71 72 ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA; 73 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 74 } 75 76 /* 77 * Is the amount of space that could be allocated towards a given metadata 78 * file at or beneath a certain threshold? 79 */ 80 static inline bool 81 xfs_metafile_resv_can_cover( 82 struct xfs_inode *ip, 83 int64_t rhs) 84 { 85 /* 86 * The amount of space that can be allocated to this metadata file is 87 * the remaining reservation for the particular metadata file + the 88 * global free block count. Take care of the first case to avoid 89 * touching the per-cpu counter. 90 */ 91 if (ip->i_delayed_blks >= rhs) 92 return true; 93 94 /* 95 * There aren't enough blocks left in the inode's reservation, but it 96 * isn't critical unless there also isn't enough free space. 97 */ 98 return __percpu_counter_compare(&ip->i_mount->m_fdblocks, 99 rhs - ip->i_delayed_blks, 2048) >= 0; 100 } 101 102 /* 103 * Is this metadata file critically low on blocks? For now we'll define that 104 * as the number of blocks we can get our hands on being less than 10% of what 105 * we reserved or less than some arbitrary number (maximum btree height). 106 */ 107 bool 108 xfs_metafile_resv_critical( 109 struct xfs_inode *ip) 110 { 111 uint64_t asked_low_water; 112 113 if (!ip) 114 return false; 115 116 ASSERT(xfs_is_metadir_inode(ip)); 117 trace_xfs_metafile_resv_critical(ip, 0); 118 119 if (!xfs_metafile_resv_can_cover(ip, ip->i_mount->m_rtbtree_maxlevels)) 120 return true; 121 122 asked_low_water = div_u64(ip->i_meta_resv_asked, 10); 123 if (!xfs_metafile_resv_can_cover(ip, asked_low_water)) 124 return true; 125 126 return XFS_TEST_ERROR(false, ip->i_mount, 127 XFS_ERRTAG_METAFILE_RESV_CRITICAL); 128 } 129 130 /* Allocate a block from the metadata file's reservation. */ 131 void 132 xfs_metafile_resv_alloc_space( 133 struct xfs_inode *ip, 134 struct xfs_alloc_arg *args) 135 { 136 int64_t len = args->len; 137 138 ASSERT(xfs_is_metadir_inode(ip)); 139 ASSERT(args->resv == XFS_AG_RESV_METAFILE); 140 141 trace_xfs_metafile_resv_alloc_space(ip, args->len); 142 143 /* 144 * Allocate the blocks from the metadata inode's block reservation 145 * and update the ondisk sb counter. 146 */ 147 if (ip->i_delayed_blks > 0) { 148 int64_t from_resv; 149 150 from_resv = min_t(int64_t, len, ip->i_delayed_blks); 151 ip->i_delayed_blks -= from_resv; 152 xfs_mod_delalloc(ip, 0, -from_resv); 153 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, 154 -from_resv); 155 len -= from_resv; 156 } 157 158 /* 159 * Any allocation in excess of the reservation requires in-core and 160 * on-disk fdblocks updates. If we can grab @len blocks from the 161 * in-core fdblocks then all we need to do is update the on-disk 162 * superblock; if not, then try to steal some from the transaction's 163 * block reservation. Overruns are only expected for rmap btrees. 164 */ 165 if (len) { 166 unsigned int field; 167 int error; 168 169 error = xfs_dec_fdblocks(ip->i_mount, len, true); 170 if (error) 171 field = XFS_TRANS_SB_FDBLOCKS; 172 else 173 field = XFS_TRANS_SB_RES_FDBLOCKS; 174 175 xfs_trans_mod_sb(args->tp, field, -len); 176 } 177 178 ip->i_nblocks += args->len; 179 xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE); 180 } 181 182 /* Free a block to the metadata file's reservation. */ 183 void 184 xfs_metafile_resv_free_space( 185 struct xfs_inode *ip, 186 struct xfs_trans *tp, 187 xfs_filblks_t len) 188 { 189 int64_t to_resv; 190 191 ASSERT(xfs_is_metadir_inode(ip)); 192 trace_xfs_metafile_resv_free_space(ip, len); 193 194 ip->i_nblocks -= len; 195 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 196 197 /* 198 * Add the freed blocks back into the inode's delalloc reservation 199 * until it reaches the maximum size. Update the ondisk fdblocks only. 200 */ 201 to_resv = ip->i_meta_resv_asked - (ip->i_nblocks + ip->i_delayed_blks); 202 if (to_resv > 0) { 203 to_resv = min_t(int64_t, to_resv, len); 204 ip->i_delayed_blks += to_resv; 205 xfs_mod_delalloc(ip, 0, to_resv); 206 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv); 207 len -= to_resv; 208 } 209 210 /* 211 * Everything else goes back to the filesystem, so update the in-core 212 * and on-disk counters. 213 */ 214 if (len) 215 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len); 216 } 217 218 /* Release a metadata file's space reservation. */ 219 void 220 xfs_metafile_resv_free( 221 struct xfs_inode *ip) 222 { 223 /* Non-btree metadata inodes don't need space reservations. */ 224 if (!ip || !ip->i_meta_resv_asked) 225 return; 226 227 ASSERT(xfs_is_metadir_inode(ip)); 228 trace_xfs_metafile_resv_free(ip, 0); 229 230 if (ip->i_delayed_blks) { 231 xfs_mod_delalloc(ip, 0, -ip->i_delayed_blks); 232 xfs_add_fdblocks(ip->i_mount, ip->i_delayed_blks); 233 ip->i_delayed_blks = 0; 234 } 235 ip->i_meta_resv_asked = 0; 236 } 237 238 /* Set up a metadata file's space reservation. */ 239 int 240 xfs_metafile_resv_init( 241 struct xfs_inode *ip, 242 xfs_filblks_t ask) 243 { 244 xfs_filblks_t hidden_space; 245 xfs_filblks_t used; 246 int error; 247 248 if (!ip || ip->i_meta_resv_asked > 0) 249 return 0; 250 251 ASSERT(xfs_is_metadir_inode(ip)); 252 253 /* 254 * Space taken by all other metadata btrees are accounted on-disk as 255 * used space. We therefore only hide the space that is reserved but 256 * not used by the trees. 257 */ 258 used = ip->i_nblocks; 259 if (used > ask) 260 ask = used; 261 hidden_space = ask - used; 262 263 error = xfs_dec_fdblocks(ip->i_mount, hidden_space, true); 264 if (error) { 265 trace_xfs_metafile_resv_init_error(ip, error, _RET_IP_); 266 return error; 267 } 268 269 xfs_mod_delalloc(ip, 0, hidden_space); 270 ip->i_delayed_blks = hidden_space; 271 ip->i_meta_resv_asked = ask; 272 273 trace_xfs_metafile_resv_init(ip, ask); 274 return 0; 275 } 276