1 /* 2 * Copyright (C) 2016 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_log_format.h" 25 #include "xfs_trans_resv.h" 26 #include "xfs_sb.h" 27 #include "xfs_mount.h" 28 #include "xfs_defer.h" 29 #include "xfs_alloc.h" 30 #include "xfs_error.h" 31 #include "xfs_trace.h" 32 #include "xfs_cksum.h" 33 #include "xfs_trans.h" 34 #include "xfs_bit.h" 35 #include "xfs_bmap.h" 36 #include "xfs_bmap_btree.h" 37 #include "xfs_ag_resv.h" 38 #include "xfs_trans_space.h" 39 #include "xfs_rmap_btree.h" 40 #include "xfs_btree.h" 41 42 /* 43 * Per-AG Block Reservations 44 * 45 * For some kinds of allocation group metadata structures, it is advantageous 46 * to reserve a small number of blocks in each AG so that future expansions of 47 * that data structure do not encounter ENOSPC because errors during a btree 48 * split cause the filesystem to go offline. 49 * 50 * Prior to the introduction of reflink, this wasn't an issue because the free 51 * space btrees maintain a reserve of space (the AGFL) to handle any expansion 52 * that may be necessary; and allocations of other metadata (inodes, BMBT, 53 * dir/attr) aren't restricted to a single AG. However, with reflink it is 54 * possible to allocate all the space in an AG, have subsequent reflink/CoW 55 * activity expand the refcount btree, and discover that there's no space left 56 * to handle that expansion. Since we can calculate the maximum size of the 57 * refcount btree, we can reserve space for it and avoid ENOSPC. 58 * 59 * Handling per-AG reservations consists of three changes to the allocator's 60 * behavior: First, because these reservations are always needed, we decrease 61 * the ag_max_usable counter to reflect the size of the AG after the reserved 62 * blocks are taken. Second, the reservations must be reflected in the 63 * fdblocks count to maintain proper accounting. Third, each AG must maintain 64 * its own reserved block counter so that we can calculate the amount of space 65 * that must remain free to maintain the reservations. Fourth, the "remaining 66 * reserved blocks" count must be used when calculating the length of the 67 * longest free extent in an AG and to clamp maxlen in the per-AG allocation 68 * functions. In other words, we maintain a virtual allocation via in-core 69 * accounting tricks so that we don't have to clean up after a crash. :) 70 * 71 * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type 72 * values via struct xfs_alloc_arg or directly to the xfs_free_extent 73 * function. It might seem a little funny to maintain a reservoir of blocks 74 * to feed another reservoir, but the AGFL only holds enough blocks to get 75 * through the next transaction. The per-AG reservation is to ensure (we 76 * hope) that each AG never runs out of blocks. Each data structure wanting 77 * to use the reservation system should update ask/used in xfs_ag_resv_init. 78 */ 79 80 /* 81 * Are we critically low on blocks? For now we'll define that as the number 82 * of blocks we can get our hands on being less than 10% of what we reserved 83 * or less than some arbitrary number (maximum btree height). 84 */ 85 bool 86 xfs_ag_resv_critical( 87 struct xfs_perag *pag, 88 enum xfs_ag_resv_type type) 89 { 90 xfs_extlen_t avail; 91 xfs_extlen_t orig; 92 93 switch (type) { 94 case XFS_AG_RESV_METADATA: 95 avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved; 96 orig = pag->pag_meta_resv.ar_asked; 97 break; 98 case XFS_AG_RESV_AGFL: 99 avail = pag->pagf_freeblks + pag->pagf_flcount - 100 pag->pag_meta_resv.ar_reserved; 101 orig = pag->pag_agfl_resv.ar_asked; 102 break; 103 default: 104 ASSERT(0); 105 return false; 106 } 107 108 trace_xfs_ag_resv_critical(pag, type, avail); 109 110 /* Critically low if less than 10% or max btree height remains. */ 111 return avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS; 112 } 113 114 /* 115 * How many blocks are reserved but not used, and therefore must not be 116 * allocated away? 117 */ 118 xfs_extlen_t 119 xfs_ag_resv_needed( 120 struct xfs_perag *pag, 121 enum xfs_ag_resv_type type) 122 { 123 xfs_extlen_t len; 124 125 len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved; 126 switch (type) { 127 case XFS_AG_RESV_METADATA: 128 case XFS_AG_RESV_AGFL: 129 len -= xfs_perag_resv(pag, type)->ar_reserved; 130 break; 131 case XFS_AG_RESV_NONE: 132 /* empty */ 133 break; 134 default: 135 ASSERT(0); 136 } 137 138 trace_xfs_ag_resv_needed(pag, type, len); 139 140 return len; 141 } 142 143 /* Clean out a reservation */ 144 static int 145 __xfs_ag_resv_free( 146 struct xfs_perag *pag, 147 enum xfs_ag_resv_type type) 148 { 149 struct xfs_ag_resv *resv; 150 xfs_extlen_t oldresv; 151 int error; 152 153 trace_xfs_ag_resv_free(pag, type, 0); 154 155 resv = xfs_perag_resv(pag, type); 156 pag->pag_mount->m_ag_max_usable += resv->ar_asked; 157 /* 158 * AGFL blocks are always considered "free", so whatever 159 * was reserved at mount time must be given back at umount. 160 */ 161 if (type == XFS_AG_RESV_AGFL) 162 oldresv = resv->ar_orig_reserved; 163 else 164 oldresv = resv->ar_reserved; 165 error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true); 166 resv->ar_reserved = 0; 167 resv->ar_asked = 0; 168 169 if (error) 170 trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno, 171 error, _RET_IP_); 172 return error; 173 } 174 175 /* Free a per-AG reservation. */ 176 int 177 xfs_ag_resv_free( 178 struct xfs_perag *pag) 179 { 180 int error; 181 int err2; 182 183 error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL); 184 err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA); 185 if (err2 && !error) 186 error = err2; 187 return error; 188 } 189 190 static int 191 __xfs_ag_resv_init( 192 struct xfs_perag *pag, 193 enum xfs_ag_resv_type type, 194 xfs_extlen_t ask, 195 xfs_extlen_t used) 196 { 197 struct xfs_mount *mp = pag->pag_mount; 198 struct xfs_ag_resv *resv; 199 int error; 200 201 resv = xfs_perag_resv(pag, type); 202 if (used > ask) 203 ask = used; 204 resv->ar_asked = ask; 205 resv->ar_reserved = resv->ar_orig_reserved = ask - used; 206 mp->m_ag_max_usable -= ask; 207 208 trace_xfs_ag_resv_init(pag, type, ask); 209 210 error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true); 211 if (error) 212 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, 213 error, _RET_IP_); 214 215 return error; 216 } 217 218 /* Create a per-AG block reservation. */ 219 int 220 xfs_ag_resv_init( 221 struct xfs_perag *pag) 222 { 223 xfs_extlen_t ask; 224 xfs_extlen_t used; 225 int error = 0; 226 227 /* Create the metadata reservation. */ 228 if (pag->pag_meta_resv.ar_asked == 0) { 229 ask = used = 0; 230 231 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, 232 ask, used); 233 if (error) 234 goto out; 235 } 236 237 /* Create the AGFL metadata reservation */ 238 if (pag->pag_agfl_resv.ar_asked == 0) { 239 ask = used = 0; 240 241 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used); 242 if (error) 243 goto out; 244 } 245 246 out: 247 return error; 248 } 249 250 /* Allocate a block from the reservation. */ 251 void 252 xfs_ag_resv_alloc_extent( 253 struct xfs_perag *pag, 254 enum xfs_ag_resv_type type, 255 struct xfs_alloc_arg *args) 256 { 257 struct xfs_ag_resv *resv; 258 xfs_extlen_t len; 259 uint field; 260 261 trace_xfs_ag_resv_alloc_extent(pag, type, args->len); 262 263 switch (type) { 264 case XFS_AG_RESV_METADATA: 265 case XFS_AG_RESV_AGFL: 266 resv = xfs_perag_resv(pag, type); 267 break; 268 default: 269 ASSERT(0); 270 /* fall through */ 271 case XFS_AG_RESV_NONE: 272 field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : 273 XFS_TRANS_SB_FDBLOCKS; 274 xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len); 275 return; 276 } 277 278 len = min_t(xfs_extlen_t, args->len, resv->ar_reserved); 279 resv->ar_reserved -= len; 280 if (type == XFS_AG_RESV_AGFL) 281 return; 282 /* Allocations of reserved blocks only need on-disk sb updates... */ 283 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len); 284 /* ...but non-reserved blocks need in-core and on-disk updates. */ 285 if (args->len > len) 286 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS, 287 -((int64_t)args->len - len)); 288 } 289 290 /* Free a block to the reservation. */ 291 void 292 xfs_ag_resv_free_extent( 293 struct xfs_perag *pag, 294 enum xfs_ag_resv_type type, 295 struct xfs_trans *tp, 296 xfs_extlen_t len) 297 { 298 xfs_extlen_t leftover; 299 struct xfs_ag_resv *resv; 300 301 trace_xfs_ag_resv_free_extent(pag, type, len); 302 303 switch (type) { 304 case XFS_AG_RESV_METADATA: 305 case XFS_AG_RESV_AGFL: 306 resv = xfs_perag_resv(pag, type); 307 break; 308 default: 309 ASSERT(0); 310 /* fall through */ 311 case XFS_AG_RESV_NONE: 312 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len); 313 return; 314 } 315 316 leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved); 317 resv->ar_reserved += leftover; 318 if (type == XFS_AG_RESV_AGFL) 319 return; 320 /* Freeing into the reserved pool only requires on-disk update... */ 321 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len); 322 /* ...but freeing beyond that requires in-core and on-disk update. */ 323 if (len > leftover) 324 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover); 325 } 326