1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_format.h" 9 #include "xfs_log_format.h" 10 #include "xfs_shared.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_sb.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_inode.h" 17 #include "xfs_btree.h" 18 #include "xfs_rmap.h" 19 #include "xfs_alloc_btree.h" 20 #include "xfs_alloc.h" 21 #include "xfs_extent_busy.h" 22 #include "xfs_errortag.h" 23 #include "xfs_error.h" 24 #include "xfs_cksum.h" 25 #include "xfs_trace.h" 26 #include "xfs_trans.h" 27 #include "xfs_buf_item.h" 28 #include "xfs_log.h" 29 #include "xfs_ag_resv.h" 30 #include "xfs_bmap.h" 31 32 extern kmem_zone_t *xfs_bmap_free_item_zone; 33 34 struct workqueue_struct *xfs_alloc_wq; 35 36 #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 37 38 #define XFSA_FIXUP_BNO_OK 1 39 #define XFSA_FIXUP_CNT_OK 2 40 41 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 42 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 43 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 44 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 45 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 46 47 /* 48 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in 49 * the beginning of the block for a proper header with the location information 50 * and CRC. 51 */ 52 unsigned int 53 xfs_agfl_size( 54 struct xfs_mount *mp) 55 { 56 unsigned int size = mp->m_sb.sb_sectsize; 57 58 if (xfs_sb_version_hascrc(&mp->m_sb)) 59 size -= sizeof(struct xfs_agfl); 60 61 return size / sizeof(xfs_agblock_t); 62 } 63 64 unsigned int 65 xfs_refc_block( 66 struct xfs_mount *mp) 67 { 68 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 69 return XFS_RMAP_BLOCK(mp) + 1; 70 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 71 return XFS_FIBT_BLOCK(mp) + 1; 72 return XFS_IBT_BLOCK(mp) + 1; 73 } 74 75 xfs_extlen_t 76 xfs_prealloc_blocks( 77 struct xfs_mount *mp) 78 { 79 if (xfs_sb_version_hasreflink(&mp->m_sb)) 80 return xfs_refc_block(mp) + 1; 81 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 82 return XFS_RMAP_BLOCK(mp) + 1; 83 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 84 return XFS_FIBT_BLOCK(mp) + 1; 85 return XFS_IBT_BLOCK(mp) + 1; 86 } 87 88 /* 89 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of 90 * AGF buffer (PV 947395), we place constraints on the relationship among 91 * actual allocations for data blocks, freelist blocks, and potential file data 92 * bmap btree blocks. However, these restrictions may result in no actual space 93 * allocated for a delayed extent, for example, a data block in a certain AG is 94 * allocated but there is no additional block for the additional bmap btree 95 * block due to a split of the bmap btree of the file. The result of this may 96 * lead to an infinite loop when the file gets flushed to disk and all delayed 97 * extents need to be actually allocated. To get around this, we explicitly set 98 * aside a few blocks which will not be reserved in delayed allocation. 99 * 100 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a 101 * potential split of the file's bmap btree. 102 */ 103 unsigned int 104 xfs_alloc_set_aside( 105 struct xfs_mount *mp) 106 { 107 return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4); 108 } 109 110 /* 111 * When deciding how much space to allocate out of an AG, we limit the 112 * allocation maximum size to the size the AG. However, we cannot use all the 113 * blocks in the AG - some are permanently used by metadata. These 114 * blocks are generally: 115 * - the AG superblock, AGF, AGI and AGFL 116 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally 117 * the AGI free inode and rmap btree root blocks. 118 * - blocks on the AGFL according to xfs_alloc_set_aside() limits 119 * - the rmapbt root block 120 * 121 * The AG headers are sector sized, so the amount of space they take up is 122 * dependent on filesystem geometry. The others are all single blocks. 123 */ 124 unsigned int 125 xfs_alloc_ag_max_usable( 126 struct xfs_mount *mp) 127 { 128 unsigned int blocks; 129 130 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */ 131 blocks += XFS_ALLOC_AGFL_RESERVE; 132 blocks += 3; /* AGF, AGI btree root blocks */ 133 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 134 blocks++; /* finobt root block */ 135 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 136 blocks++; /* rmap root block */ 137 if (xfs_sb_version_hasreflink(&mp->m_sb)) 138 blocks++; /* refcount root block */ 139 140 return mp->m_sb.sb_agblocks - blocks; 141 } 142 143 /* 144 * Lookup the record equal to [bno, len] in the btree given by cur. 145 */ 146 STATIC int /* error */ 147 xfs_alloc_lookup_eq( 148 struct xfs_btree_cur *cur, /* btree cursor */ 149 xfs_agblock_t bno, /* starting block of extent */ 150 xfs_extlen_t len, /* length of extent */ 151 int *stat) /* success/failure */ 152 { 153 cur->bc_rec.a.ar_startblock = bno; 154 cur->bc_rec.a.ar_blockcount = len; 155 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 156 } 157 158 /* 159 * Lookup the first record greater than or equal to [bno, len] 160 * in the btree given by cur. 161 */ 162 int /* error */ 163 xfs_alloc_lookup_ge( 164 struct xfs_btree_cur *cur, /* btree cursor */ 165 xfs_agblock_t bno, /* starting block of extent */ 166 xfs_extlen_t len, /* length of extent */ 167 int *stat) /* success/failure */ 168 { 169 cur->bc_rec.a.ar_startblock = bno; 170 cur->bc_rec.a.ar_blockcount = len; 171 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 172 } 173 174 /* 175 * Lookup the first record less than or equal to [bno, len] 176 * in the btree given by cur. 177 */ 178 int /* error */ 179 xfs_alloc_lookup_le( 180 struct xfs_btree_cur *cur, /* btree cursor */ 181 xfs_agblock_t bno, /* starting block of extent */ 182 xfs_extlen_t len, /* length of extent */ 183 int *stat) /* success/failure */ 184 { 185 cur->bc_rec.a.ar_startblock = bno; 186 cur->bc_rec.a.ar_blockcount = len; 187 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 188 } 189 190 /* 191 * Update the record referred to by cur to the value given 192 * by [bno, len]. 193 * This either works (return 0) or gets an EFSCORRUPTED error. 194 */ 195 STATIC int /* error */ 196 xfs_alloc_update( 197 struct xfs_btree_cur *cur, /* btree cursor */ 198 xfs_agblock_t bno, /* starting block of extent */ 199 xfs_extlen_t len) /* length of extent */ 200 { 201 union xfs_btree_rec rec; 202 203 rec.alloc.ar_startblock = cpu_to_be32(bno); 204 rec.alloc.ar_blockcount = cpu_to_be32(len); 205 return xfs_btree_update(cur, &rec); 206 } 207 208 /* 209 * Get the data from the pointed-to record. 210 */ 211 int /* error */ 212 xfs_alloc_get_rec( 213 struct xfs_btree_cur *cur, /* btree cursor */ 214 xfs_agblock_t *bno, /* output: starting block of extent */ 215 xfs_extlen_t *len, /* output: length of extent */ 216 int *stat) /* output: success/failure */ 217 { 218 struct xfs_mount *mp = cur->bc_mp; 219 xfs_agnumber_t agno = cur->bc_private.a.agno; 220 union xfs_btree_rec *rec; 221 int error; 222 223 error = xfs_btree_get_rec(cur, &rec, stat); 224 if (error || !(*stat)) 225 return error; 226 if (rec->alloc.ar_blockcount == 0) 227 goto out_bad_rec; 228 229 *bno = be32_to_cpu(rec->alloc.ar_startblock); 230 *len = be32_to_cpu(rec->alloc.ar_blockcount); 231 232 /* check for valid extent range, including overflow */ 233 if (!xfs_verify_agbno(mp, agno, *bno)) 234 goto out_bad_rec; 235 if (*bno > *bno + *len) 236 goto out_bad_rec; 237 if (!xfs_verify_agbno(mp, agno, *bno + *len - 1)) 238 goto out_bad_rec; 239 240 return 0; 241 242 out_bad_rec: 243 xfs_warn(mp, 244 "%s Freespace BTree record corruption in AG %d detected!", 245 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno); 246 xfs_warn(mp, 247 "start block 0x%x block count 0x%x", *bno, *len); 248 return -EFSCORRUPTED; 249 } 250 251 /* 252 * Compute aligned version of the found extent. 253 * Takes alignment and min length into account. 254 */ 255 STATIC bool 256 xfs_alloc_compute_aligned( 257 xfs_alloc_arg_t *args, /* allocation argument structure */ 258 xfs_agblock_t foundbno, /* starting block in found extent */ 259 xfs_extlen_t foundlen, /* length in found extent */ 260 xfs_agblock_t *resbno, /* result block number */ 261 xfs_extlen_t *reslen, /* result length */ 262 unsigned *busy_gen) 263 { 264 xfs_agblock_t bno = foundbno; 265 xfs_extlen_t len = foundlen; 266 xfs_extlen_t diff; 267 bool busy; 268 269 /* Trim busy sections out of found extent */ 270 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen); 271 272 /* 273 * If we have a largish extent that happens to start before min_agbno, 274 * see if we can shift it into range... 275 */ 276 if (bno < args->min_agbno && bno + len > args->min_agbno) { 277 diff = args->min_agbno - bno; 278 if (len > diff) { 279 bno += diff; 280 len -= diff; 281 } 282 } 283 284 if (args->alignment > 1 && len >= args->minlen) { 285 xfs_agblock_t aligned_bno = roundup(bno, args->alignment); 286 287 diff = aligned_bno - bno; 288 289 *resbno = aligned_bno; 290 *reslen = diff >= len ? 0 : len - diff; 291 } else { 292 *resbno = bno; 293 *reslen = len; 294 } 295 296 return busy; 297 } 298 299 /* 300 * Compute best start block and diff for "near" allocations. 301 * freelen >= wantlen already checked by caller. 302 */ 303 STATIC xfs_extlen_t /* difference value (absolute) */ 304 xfs_alloc_compute_diff( 305 xfs_agblock_t wantbno, /* target starting block */ 306 xfs_extlen_t wantlen, /* target length */ 307 xfs_extlen_t alignment, /* target alignment */ 308 int datatype, /* are we allocating data? */ 309 xfs_agblock_t freebno, /* freespace's starting block */ 310 xfs_extlen_t freelen, /* freespace's length */ 311 xfs_agblock_t *newbnop) /* result: best start block from free */ 312 { 313 xfs_agblock_t freeend; /* end of freespace extent */ 314 xfs_agblock_t newbno1; /* return block number */ 315 xfs_agblock_t newbno2; /* other new block number */ 316 xfs_extlen_t newlen1=0; /* length with newbno1 */ 317 xfs_extlen_t newlen2=0; /* length with newbno2 */ 318 xfs_agblock_t wantend; /* end of target extent */ 319 bool userdata = xfs_alloc_is_userdata(datatype); 320 321 ASSERT(freelen >= wantlen); 322 freeend = freebno + freelen; 323 wantend = wantbno + wantlen; 324 /* 325 * We want to allocate from the start of a free extent if it is past 326 * the desired block or if we are allocating user data and the free 327 * extent is before desired block. The second case is there to allow 328 * for contiguous allocation from the remaining free space if the file 329 * grows in the short term. 330 */ 331 if (freebno >= wantbno || (userdata && freeend < wantend)) { 332 if ((newbno1 = roundup(freebno, alignment)) >= freeend) 333 newbno1 = NULLAGBLOCK; 334 } else if (freeend >= wantend && alignment > 1) { 335 newbno1 = roundup(wantbno, alignment); 336 newbno2 = newbno1 - alignment; 337 if (newbno1 >= freeend) 338 newbno1 = NULLAGBLOCK; 339 else 340 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); 341 if (newbno2 < freebno) 342 newbno2 = NULLAGBLOCK; 343 else 344 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); 345 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { 346 if (newlen1 < newlen2 || 347 (newlen1 == newlen2 && 348 XFS_ABSDIFF(newbno1, wantbno) > 349 XFS_ABSDIFF(newbno2, wantbno))) 350 newbno1 = newbno2; 351 } else if (newbno2 != NULLAGBLOCK) 352 newbno1 = newbno2; 353 } else if (freeend >= wantend) { 354 newbno1 = wantbno; 355 } else if (alignment > 1) { 356 newbno1 = roundup(freeend - wantlen, alignment); 357 if (newbno1 > freeend - wantlen && 358 newbno1 - alignment >= freebno) 359 newbno1 -= alignment; 360 else if (newbno1 >= freeend) 361 newbno1 = NULLAGBLOCK; 362 } else 363 newbno1 = freeend - wantlen; 364 *newbnop = newbno1; 365 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); 366 } 367 368 /* 369 * Fix up the length, based on mod and prod. 370 * len should be k * prod + mod for some k. 371 * If len is too small it is returned unchanged. 372 * If len hits maxlen it is left alone. 373 */ 374 STATIC void 375 xfs_alloc_fix_len( 376 xfs_alloc_arg_t *args) /* allocation argument structure */ 377 { 378 xfs_extlen_t k; 379 xfs_extlen_t rlen; 380 381 ASSERT(args->mod < args->prod); 382 rlen = args->len; 383 ASSERT(rlen >= args->minlen); 384 ASSERT(rlen <= args->maxlen); 385 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || 386 (args->mod == 0 && rlen < args->prod)) 387 return; 388 k = rlen % args->prod; 389 if (k == args->mod) 390 return; 391 if (k > args->mod) 392 rlen = rlen - (k - args->mod); 393 else 394 rlen = rlen - args->prod + (args->mod - k); 395 /* casts to (int) catch length underflows */ 396 if ((int)rlen < (int)args->minlen) 397 return; 398 ASSERT(rlen >= args->minlen && rlen <= args->maxlen); 399 ASSERT(rlen % args->prod == args->mod); 400 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >= 401 rlen + args->minleft); 402 args->len = rlen; 403 } 404 405 /* 406 * Update the two btrees, logically removing from freespace the extent 407 * starting at rbno, rlen blocks. The extent is contained within the 408 * actual (current) free extent fbno for flen blocks. 409 * Flags are passed in indicating whether the cursors are set to the 410 * relevant records. 411 */ 412 STATIC int /* error code */ 413 xfs_alloc_fixup_trees( 414 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ 415 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ 416 xfs_agblock_t fbno, /* starting block of free extent */ 417 xfs_extlen_t flen, /* length of free extent */ 418 xfs_agblock_t rbno, /* starting block of returned extent */ 419 xfs_extlen_t rlen, /* length of returned extent */ 420 int flags) /* flags, XFSA_FIXUP_... */ 421 { 422 int error; /* error code */ 423 int i; /* operation results */ 424 xfs_agblock_t nfbno1; /* first new free startblock */ 425 xfs_agblock_t nfbno2; /* second new free startblock */ 426 xfs_extlen_t nflen1=0; /* first new free length */ 427 xfs_extlen_t nflen2=0; /* second new free length */ 428 struct xfs_mount *mp; 429 430 mp = cnt_cur->bc_mp; 431 432 /* 433 * Look up the record in the by-size tree if necessary. 434 */ 435 if (flags & XFSA_FIXUP_CNT_OK) { 436 #ifdef DEBUG 437 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) 438 return error; 439 XFS_WANT_CORRUPTED_RETURN(mp, 440 i == 1 && nfbno1 == fbno && nflen1 == flen); 441 #endif 442 } else { 443 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) 444 return error; 445 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 446 } 447 /* 448 * Look up the record in the by-block tree if necessary. 449 */ 450 if (flags & XFSA_FIXUP_BNO_OK) { 451 #ifdef DEBUG 452 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) 453 return error; 454 XFS_WANT_CORRUPTED_RETURN(mp, 455 i == 1 && nfbno1 == fbno && nflen1 == flen); 456 #endif 457 } else { 458 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) 459 return error; 460 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 461 } 462 463 #ifdef DEBUG 464 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { 465 struct xfs_btree_block *bnoblock; 466 struct xfs_btree_block *cntblock; 467 468 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); 469 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); 470 471 XFS_WANT_CORRUPTED_RETURN(mp, 472 bnoblock->bb_numrecs == cntblock->bb_numrecs); 473 } 474 #endif 475 476 /* 477 * Deal with all four cases: the allocated record is contained 478 * within the freespace record, so we can have new freespace 479 * at either (or both) end, or no freespace remaining. 480 */ 481 if (rbno == fbno && rlen == flen) 482 nfbno1 = nfbno2 = NULLAGBLOCK; 483 else if (rbno == fbno) { 484 nfbno1 = rbno + rlen; 485 nflen1 = flen - rlen; 486 nfbno2 = NULLAGBLOCK; 487 } else if (rbno + rlen == fbno + flen) { 488 nfbno1 = fbno; 489 nflen1 = flen - rlen; 490 nfbno2 = NULLAGBLOCK; 491 } else { 492 nfbno1 = fbno; 493 nflen1 = rbno - fbno; 494 nfbno2 = rbno + rlen; 495 nflen2 = (fbno + flen) - nfbno2; 496 } 497 /* 498 * Delete the entry from the by-size btree. 499 */ 500 if ((error = xfs_btree_delete(cnt_cur, &i))) 501 return error; 502 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 503 /* 504 * Add new by-size btree entry(s). 505 */ 506 if (nfbno1 != NULLAGBLOCK) { 507 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 508 return error; 509 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 510 if ((error = xfs_btree_insert(cnt_cur, &i))) 511 return error; 512 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 513 } 514 if (nfbno2 != NULLAGBLOCK) { 515 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 516 return error; 517 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 518 if ((error = xfs_btree_insert(cnt_cur, &i))) 519 return error; 520 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 521 } 522 /* 523 * Fix up the by-block btree entry(s). 524 */ 525 if (nfbno1 == NULLAGBLOCK) { 526 /* 527 * No remaining freespace, just delete the by-block tree entry. 528 */ 529 if ((error = xfs_btree_delete(bno_cur, &i))) 530 return error; 531 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 532 } else { 533 /* 534 * Update the by-block entry to start later|be shorter. 535 */ 536 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) 537 return error; 538 } 539 if (nfbno2 != NULLAGBLOCK) { 540 /* 541 * 2 resulting free entries, need to add one. 542 */ 543 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 544 return error; 545 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 546 if ((error = xfs_btree_insert(bno_cur, &i))) 547 return error; 548 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 549 } 550 return 0; 551 } 552 553 static xfs_failaddr_t 554 xfs_agfl_verify( 555 struct xfs_buf *bp) 556 { 557 struct xfs_mount *mp = bp->b_target->bt_mount; 558 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 559 int i; 560 561 /* 562 * There is no verification of non-crc AGFLs because mkfs does not 563 * initialise the AGFL to zero or NULL. Hence the only valid part of the 564 * AGFL is what the AGF says is active. We can't get to the AGF, so we 565 * can't verify just those entries are valid. 566 */ 567 if (!xfs_sb_version_hascrc(&mp->m_sb)) 568 return NULL; 569 570 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) 571 return __this_address; 572 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) 573 return __this_address; 574 /* 575 * during growfs operations, the perag is not fully initialised, 576 * so we can't use it for any useful checking. growfs ensures we can't 577 * use it by using uncached buffers that don't have the perag attached 578 * so we can detect and avoid this problem. 579 */ 580 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) 581 return __this_address; 582 583 for (i = 0; i < xfs_agfl_size(mp); i++) { 584 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && 585 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 586 return __this_address; 587 } 588 589 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn))) 590 return __this_address; 591 return NULL; 592 } 593 594 static void 595 xfs_agfl_read_verify( 596 struct xfs_buf *bp) 597 { 598 struct xfs_mount *mp = bp->b_target->bt_mount; 599 xfs_failaddr_t fa; 600 601 /* 602 * There is no verification of non-crc AGFLs because mkfs does not 603 * initialise the AGFL to zero or NULL. Hence the only valid part of the 604 * AGFL is what the AGF says is active. We can't get to the AGF, so we 605 * can't verify just those entries are valid. 606 */ 607 if (!xfs_sb_version_hascrc(&mp->m_sb)) 608 return; 609 610 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) 611 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 612 else { 613 fa = xfs_agfl_verify(bp); 614 if (fa) 615 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 616 } 617 } 618 619 static void 620 xfs_agfl_write_verify( 621 struct xfs_buf *bp) 622 { 623 struct xfs_mount *mp = bp->b_target->bt_mount; 624 struct xfs_buf_log_item *bip = bp->b_log_item; 625 xfs_failaddr_t fa; 626 627 /* no verification of non-crc AGFLs */ 628 if (!xfs_sb_version_hascrc(&mp->m_sb)) 629 return; 630 631 fa = xfs_agfl_verify(bp); 632 if (fa) { 633 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 634 return; 635 } 636 637 if (bip) 638 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); 639 640 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); 641 } 642 643 const struct xfs_buf_ops xfs_agfl_buf_ops = { 644 .name = "xfs_agfl", 645 .verify_read = xfs_agfl_read_verify, 646 .verify_write = xfs_agfl_write_verify, 647 .verify_struct = xfs_agfl_verify, 648 }; 649 650 /* 651 * Read in the allocation group free block array. 652 */ 653 int /* error */ 654 xfs_alloc_read_agfl( 655 xfs_mount_t *mp, /* mount point structure */ 656 xfs_trans_t *tp, /* transaction pointer */ 657 xfs_agnumber_t agno, /* allocation group number */ 658 xfs_buf_t **bpp) /* buffer for the ag free block array */ 659 { 660 xfs_buf_t *bp; /* return value */ 661 int error; 662 663 ASSERT(agno != NULLAGNUMBER); 664 error = xfs_trans_read_buf( 665 mp, tp, mp->m_ddev_targp, 666 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), 667 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); 668 if (error) 669 return error; 670 xfs_buf_set_ref(bp, XFS_AGFL_REF); 671 *bpp = bp; 672 return 0; 673 } 674 675 STATIC int 676 xfs_alloc_update_counters( 677 struct xfs_trans *tp, 678 struct xfs_perag *pag, 679 struct xfs_buf *agbp, 680 long len) 681 { 682 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 683 684 pag->pagf_freeblks += len; 685 be32_add_cpu(&agf->agf_freeblks, len); 686 687 xfs_trans_agblocks_delta(tp, len); 688 if (unlikely(be32_to_cpu(agf->agf_freeblks) > 689 be32_to_cpu(agf->agf_length))) 690 return -EFSCORRUPTED; 691 692 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 693 return 0; 694 } 695 696 /* 697 * Allocation group level functions. 698 */ 699 700 /* 701 * Allocate a variable extent in the allocation group agno. 702 * Type and bno are used to determine where in the allocation group the 703 * extent will start. 704 * Extent's length (returned in *len) will be between minlen and maxlen, 705 * and of the form k * prod + mod unless there's nothing that large. 706 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 707 */ 708 STATIC int /* error */ 709 xfs_alloc_ag_vextent( 710 xfs_alloc_arg_t *args) /* argument structure for allocation */ 711 { 712 int error=0; 713 714 ASSERT(args->minlen > 0); 715 ASSERT(args->maxlen > 0); 716 ASSERT(args->minlen <= args->maxlen); 717 ASSERT(args->mod < args->prod); 718 ASSERT(args->alignment > 0); 719 720 /* 721 * Branch to correct routine based on the type. 722 */ 723 args->wasfromfl = 0; 724 switch (args->type) { 725 case XFS_ALLOCTYPE_THIS_AG: 726 error = xfs_alloc_ag_vextent_size(args); 727 break; 728 case XFS_ALLOCTYPE_NEAR_BNO: 729 error = xfs_alloc_ag_vextent_near(args); 730 break; 731 case XFS_ALLOCTYPE_THIS_BNO: 732 error = xfs_alloc_ag_vextent_exact(args); 733 break; 734 default: 735 ASSERT(0); 736 /* NOTREACHED */ 737 } 738 739 if (error || args->agbno == NULLAGBLOCK) 740 return error; 741 742 ASSERT(args->len >= args->minlen); 743 ASSERT(args->len <= args->maxlen); 744 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL); 745 ASSERT(args->agbno % args->alignment == 0); 746 747 /* if not file data, insert new block into the reverse map btree */ 748 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { 749 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 750 args->agbno, args->len, &args->oinfo); 751 if (error) 752 return error; 753 } 754 755 if (!args->wasfromfl) { 756 error = xfs_alloc_update_counters(args->tp, args->pag, 757 args->agbp, 758 -((long)(args->len))); 759 if (error) 760 return error; 761 762 ASSERT(!xfs_extent_busy_search(args->mp, args->agno, 763 args->agbno, args->len)); 764 } 765 766 xfs_ag_resv_alloc_extent(args->pag, args->resv, args); 767 768 XFS_STATS_INC(args->mp, xs_allocx); 769 XFS_STATS_ADD(args->mp, xs_allocb, args->len); 770 return error; 771 } 772 773 /* 774 * Allocate a variable extent at exactly agno/bno. 775 * Extent's length (returned in *len) will be between minlen and maxlen, 776 * and of the form k * prod + mod unless there's nothing that large. 777 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. 778 */ 779 STATIC int /* error */ 780 xfs_alloc_ag_vextent_exact( 781 xfs_alloc_arg_t *args) /* allocation argument structure */ 782 { 783 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 784 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 785 int error; 786 xfs_agblock_t fbno; /* start block of found extent */ 787 xfs_extlen_t flen; /* length of found extent */ 788 xfs_agblock_t tbno; /* start block of busy extent */ 789 xfs_extlen_t tlen; /* length of busy extent */ 790 xfs_agblock_t tend; /* end block of busy extent */ 791 int i; /* success/failure of operation */ 792 unsigned busy_gen; 793 794 ASSERT(args->alignment == 1); 795 796 /* 797 * Allocate/initialize a cursor for the by-number freespace btree. 798 */ 799 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 800 args->agno, XFS_BTNUM_BNO); 801 802 /* 803 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 804 * Look for the closest free block <= bno, it must contain bno 805 * if any free block does. 806 */ 807 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); 808 if (error) 809 goto error0; 810 if (!i) 811 goto not_found; 812 813 /* 814 * Grab the freespace record. 815 */ 816 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); 817 if (error) 818 goto error0; 819 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 820 ASSERT(fbno <= args->agbno); 821 822 /* 823 * Check for overlapping busy extents. 824 */ 825 tbno = fbno; 826 tlen = flen; 827 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen); 828 829 /* 830 * Give up if the start of the extent is busy, or the freespace isn't 831 * long enough for the minimum request. 832 */ 833 if (tbno > args->agbno) 834 goto not_found; 835 if (tlen < args->minlen) 836 goto not_found; 837 tend = tbno + tlen; 838 if (tend < args->agbno + args->minlen) 839 goto not_found; 840 841 /* 842 * End of extent will be smaller of the freespace end and the 843 * maximal requested end. 844 * 845 * Fix the length according to mod and prod if given. 846 */ 847 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) 848 - args->agbno; 849 xfs_alloc_fix_len(args); 850 ASSERT(args->agbno + args->len <= tend); 851 852 /* 853 * We are allocating agbno for args->len 854 * Allocate/initialize a cursor for the by-size btree. 855 */ 856 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 857 args->agno, XFS_BTNUM_CNT); 858 ASSERT(args->agbno + args->len <= 859 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 860 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, 861 args->len, XFSA_FIXUP_BNO_OK); 862 if (error) { 863 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 864 goto error0; 865 } 866 867 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 868 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 869 870 args->wasfromfl = 0; 871 trace_xfs_alloc_exact_done(args); 872 return 0; 873 874 not_found: 875 /* Didn't find it, return null. */ 876 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 877 args->agbno = NULLAGBLOCK; 878 trace_xfs_alloc_exact_notfound(args); 879 return 0; 880 881 error0: 882 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 883 trace_xfs_alloc_exact_error(args); 884 return error; 885 } 886 887 /* 888 * Search the btree in a given direction via the search cursor and compare 889 * the records found against the good extent we've already found. 890 */ 891 STATIC int 892 xfs_alloc_find_best_extent( 893 struct xfs_alloc_arg *args, /* allocation argument structure */ 894 struct xfs_btree_cur **gcur, /* good cursor */ 895 struct xfs_btree_cur **scur, /* searching cursor */ 896 xfs_agblock_t gdiff, /* difference for search comparison */ 897 xfs_agblock_t *sbno, /* extent found by search */ 898 xfs_extlen_t *slen, /* extent length */ 899 xfs_agblock_t *sbnoa, /* aligned extent found by search */ 900 xfs_extlen_t *slena, /* aligned extent length */ 901 int dir) /* 0 = search right, 1 = search left */ 902 { 903 xfs_agblock_t new; 904 xfs_agblock_t sdiff; 905 int error; 906 int i; 907 unsigned busy_gen; 908 909 /* The good extent is perfect, no need to search. */ 910 if (!gdiff) 911 goto out_use_good; 912 913 /* 914 * Look until we find a better one, run out of space or run off the end. 915 */ 916 do { 917 error = xfs_alloc_get_rec(*scur, sbno, slen, &i); 918 if (error) 919 goto error0; 920 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 921 xfs_alloc_compute_aligned(args, *sbno, *slen, 922 sbnoa, slena, &busy_gen); 923 924 /* 925 * The good extent is closer than this one. 926 */ 927 if (!dir) { 928 if (*sbnoa > args->max_agbno) 929 goto out_use_good; 930 if (*sbnoa >= args->agbno + gdiff) 931 goto out_use_good; 932 } else { 933 if (*sbnoa < args->min_agbno) 934 goto out_use_good; 935 if (*sbnoa <= args->agbno - gdiff) 936 goto out_use_good; 937 } 938 939 /* 940 * Same distance, compare length and pick the best. 941 */ 942 if (*slena >= args->minlen) { 943 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); 944 xfs_alloc_fix_len(args); 945 946 sdiff = xfs_alloc_compute_diff(args->agbno, args->len, 947 args->alignment, 948 args->datatype, *sbnoa, 949 *slena, &new); 950 951 /* 952 * Choose closer size and invalidate other cursor. 953 */ 954 if (sdiff < gdiff) 955 goto out_use_search; 956 goto out_use_good; 957 } 958 959 if (!dir) 960 error = xfs_btree_increment(*scur, 0, &i); 961 else 962 error = xfs_btree_decrement(*scur, 0, &i); 963 if (error) 964 goto error0; 965 } while (i); 966 967 out_use_good: 968 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); 969 *scur = NULL; 970 return 0; 971 972 out_use_search: 973 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); 974 *gcur = NULL; 975 return 0; 976 977 error0: 978 /* caller invalidates cursors */ 979 return error; 980 } 981 982 /* 983 * Allocate a variable extent near bno in the allocation group agno. 984 * Extent's length (returned in len) will be between minlen and maxlen, 985 * and of the form k * prod + mod unless there's nothing that large. 986 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 987 */ 988 STATIC int /* error */ 989 xfs_alloc_ag_vextent_near( 990 xfs_alloc_arg_t *args) /* allocation argument structure */ 991 { 992 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ 993 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ 994 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ 995 xfs_agblock_t gtbno; /* start bno of right side entry */ 996 xfs_agblock_t gtbnoa; /* aligned ... */ 997 xfs_extlen_t gtdiff; /* difference to right side entry */ 998 xfs_extlen_t gtlen; /* length of right side entry */ 999 xfs_extlen_t gtlena; /* aligned ... */ 1000 xfs_agblock_t gtnew; /* useful start bno of right side */ 1001 int error; /* error code */ 1002 int i; /* result code, temporary */ 1003 int j; /* result code, temporary */ 1004 xfs_agblock_t ltbno; /* start bno of left side entry */ 1005 xfs_agblock_t ltbnoa; /* aligned ... */ 1006 xfs_extlen_t ltdiff; /* difference to left side entry */ 1007 xfs_extlen_t ltlen; /* length of left side entry */ 1008 xfs_extlen_t ltlena; /* aligned ... */ 1009 xfs_agblock_t ltnew; /* useful start bno of left side */ 1010 xfs_extlen_t rlen; /* length of returned extent */ 1011 bool busy; 1012 unsigned busy_gen; 1013 #ifdef DEBUG 1014 /* 1015 * Randomly don't execute the first algorithm. 1016 */ 1017 int dofirst; /* set to do first algorithm */ 1018 1019 dofirst = prandom_u32() & 1; 1020 #endif 1021 1022 /* handle unitialized agbno range so caller doesn't have to */ 1023 if (!args->min_agbno && !args->max_agbno) 1024 args->max_agbno = args->mp->m_sb.sb_agblocks - 1; 1025 ASSERT(args->min_agbno <= args->max_agbno); 1026 1027 /* clamp agbno to the range if it's outside */ 1028 if (args->agbno < args->min_agbno) 1029 args->agbno = args->min_agbno; 1030 if (args->agbno > args->max_agbno) 1031 args->agbno = args->max_agbno; 1032 1033 restart: 1034 bno_cur_lt = NULL; 1035 bno_cur_gt = NULL; 1036 ltlen = 0; 1037 gtlena = 0; 1038 ltlena = 0; 1039 busy = false; 1040 1041 /* 1042 * Get a cursor for the by-size btree. 1043 */ 1044 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1045 args->agno, XFS_BTNUM_CNT); 1046 1047 /* 1048 * See if there are any free extents as big as maxlen. 1049 */ 1050 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) 1051 goto error0; 1052 /* 1053 * If none, then pick up the last entry in the tree unless the 1054 * tree is empty. 1055 */ 1056 if (!i) { 1057 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, 1058 <len, &i))) 1059 goto error0; 1060 if (i == 0 || ltlen == 0) { 1061 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1062 trace_xfs_alloc_near_noentry(args); 1063 return 0; 1064 } 1065 ASSERT(i == 1); 1066 } 1067 args->wasfromfl = 0; 1068 1069 /* 1070 * First algorithm. 1071 * If the requested extent is large wrt the freespaces available 1072 * in this a.g., then the cursor will be pointing to a btree entry 1073 * near the right edge of the tree. If it's in the last btree leaf 1074 * block, then we just examine all the entries in that block 1075 * that are big enough, and pick the best one. 1076 * This is written as a while loop so we can break out of it, 1077 * but we never loop back to the top. 1078 */ 1079 while (xfs_btree_islastblock(cnt_cur, 0)) { 1080 xfs_extlen_t bdiff; 1081 int besti=0; 1082 xfs_extlen_t blen=0; 1083 xfs_agblock_t bnew=0; 1084 1085 #ifdef DEBUG 1086 if (dofirst) 1087 break; 1088 #endif 1089 /* 1090 * Start from the entry that lookup found, sequence through 1091 * all larger free blocks. If we're actually pointing at a 1092 * record smaller than maxlen, go to the start of this block, 1093 * and skip all those smaller than minlen. 1094 */ 1095 if (ltlen || args->alignment > 1) { 1096 cnt_cur->bc_ptrs[0] = 1; 1097 do { 1098 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, 1099 <len, &i))) 1100 goto error0; 1101 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1102 if (ltlen >= args->minlen) 1103 break; 1104 if ((error = xfs_btree_increment(cnt_cur, 0, &i))) 1105 goto error0; 1106 } while (i); 1107 ASSERT(ltlen >= args->minlen); 1108 if (!i) 1109 break; 1110 } 1111 i = cnt_cur->bc_ptrs[0]; 1112 for (j = 1, blen = 0, bdiff = 0; 1113 !error && j && (blen < args->maxlen || bdiff > 0); 1114 error = xfs_btree_increment(cnt_cur, 0, &j)) { 1115 /* 1116 * For each entry, decide if it's better than 1117 * the previous best entry. 1118 */ 1119 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 1120 goto error0; 1121 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1122 busy = xfs_alloc_compute_aligned(args, ltbno, ltlen, 1123 <bnoa, <lena, &busy_gen); 1124 if (ltlena < args->minlen) 1125 continue; 1126 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) 1127 continue; 1128 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1129 xfs_alloc_fix_len(args); 1130 ASSERT(args->len >= args->minlen); 1131 if (args->len < blen) 1132 continue; 1133 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1134 args->alignment, args->datatype, ltbnoa, 1135 ltlena, <new); 1136 if (ltnew != NULLAGBLOCK && 1137 (args->len > blen || ltdiff < bdiff)) { 1138 bdiff = ltdiff; 1139 bnew = ltnew; 1140 blen = args->len; 1141 besti = cnt_cur->bc_ptrs[0]; 1142 } 1143 } 1144 /* 1145 * It didn't work. We COULD be in a case where 1146 * there's a good record somewhere, so try again. 1147 */ 1148 if (blen == 0) 1149 break; 1150 /* 1151 * Point at the best entry, and retrieve it again. 1152 */ 1153 cnt_cur->bc_ptrs[0] = besti; 1154 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 1155 goto error0; 1156 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1157 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1158 args->len = blen; 1159 1160 /* 1161 * We are allocating starting at bnew for blen blocks. 1162 */ 1163 args->agbno = bnew; 1164 ASSERT(bnew >= ltbno); 1165 ASSERT(bnew + blen <= ltbno + ltlen); 1166 /* 1167 * Set up a cursor for the by-bno tree. 1168 */ 1169 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, 1170 args->agbp, args->agno, XFS_BTNUM_BNO); 1171 /* 1172 * Fix up the btree entries. 1173 */ 1174 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, 1175 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) 1176 goto error0; 1177 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1178 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1179 1180 trace_xfs_alloc_near_first(args); 1181 return 0; 1182 } 1183 /* 1184 * Second algorithm. 1185 * Search in the by-bno tree to the left and to the right 1186 * simultaneously, until in each case we find a space big enough, 1187 * or run into the edge of the tree. When we run into the edge, 1188 * we deallocate that cursor. 1189 * If both searches succeed, we compare the two spaces and pick 1190 * the better one. 1191 * With alignment, it's possible for both to fail; the upper 1192 * level algorithm that picks allocation groups for allocations 1193 * is not supposed to do this. 1194 */ 1195 /* 1196 * Allocate and initialize the cursor for the leftward search. 1197 */ 1198 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1199 args->agno, XFS_BTNUM_BNO); 1200 /* 1201 * Lookup <= bno to find the leftward search's starting point. 1202 */ 1203 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) 1204 goto error0; 1205 if (!i) { 1206 /* 1207 * Didn't find anything; use this cursor for the rightward 1208 * search. 1209 */ 1210 bno_cur_gt = bno_cur_lt; 1211 bno_cur_lt = NULL; 1212 } 1213 /* 1214 * Found something. Duplicate the cursor for the rightward search. 1215 */ 1216 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) 1217 goto error0; 1218 /* 1219 * Increment the cursor, so we will point at the entry just right 1220 * of the leftward entry if any, or to the leftmost entry. 1221 */ 1222 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1223 goto error0; 1224 if (!i) { 1225 /* 1226 * It failed, there are no rightward entries. 1227 */ 1228 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); 1229 bno_cur_gt = NULL; 1230 } 1231 /* 1232 * Loop going left with the leftward cursor, right with the 1233 * rightward cursor, until either both directions give up or 1234 * we find an entry at least as big as minlen. 1235 */ 1236 do { 1237 if (bno_cur_lt) { 1238 if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) 1239 goto error0; 1240 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1241 busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen, 1242 <bnoa, <lena, &busy_gen); 1243 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) 1244 break; 1245 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) 1246 goto error0; 1247 if (!i || ltbnoa < args->min_agbno) { 1248 xfs_btree_del_cursor(bno_cur_lt, 1249 XFS_BTREE_NOERROR); 1250 bno_cur_lt = NULL; 1251 } 1252 } 1253 if (bno_cur_gt) { 1254 if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) 1255 goto error0; 1256 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1257 busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen, 1258 >bnoa, >lena, &busy_gen); 1259 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) 1260 break; 1261 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1262 goto error0; 1263 if (!i || gtbnoa > args->max_agbno) { 1264 xfs_btree_del_cursor(bno_cur_gt, 1265 XFS_BTREE_NOERROR); 1266 bno_cur_gt = NULL; 1267 } 1268 } 1269 } while (bno_cur_lt || bno_cur_gt); 1270 1271 /* 1272 * Got both cursors still active, need to find better entry. 1273 */ 1274 if (bno_cur_lt && bno_cur_gt) { 1275 if (ltlena >= args->minlen) { 1276 /* 1277 * Left side is good, look for a right side entry. 1278 */ 1279 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1280 xfs_alloc_fix_len(args); 1281 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1282 args->alignment, args->datatype, ltbnoa, 1283 ltlena, <new); 1284 1285 error = xfs_alloc_find_best_extent(args, 1286 &bno_cur_lt, &bno_cur_gt, 1287 ltdiff, >bno, >len, 1288 >bnoa, >lena, 1289 0 /* search right */); 1290 } else { 1291 ASSERT(gtlena >= args->minlen); 1292 1293 /* 1294 * Right side is good, look for a left side entry. 1295 */ 1296 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1297 xfs_alloc_fix_len(args); 1298 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1299 args->alignment, args->datatype, gtbnoa, 1300 gtlena, >new); 1301 1302 error = xfs_alloc_find_best_extent(args, 1303 &bno_cur_gt, &bno_cur_lt, 1304 gtdiff, <bno, <len, 1305 <bnoa, <lena, 1306 1 /* search left */); 1307 } 1308 1309 if (error) 1310 goto error0; 1311 } 1312 1313 /* 1314 * If we couldn't get anything, give up. 1315 */ 1316 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1317 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1318 1319 if (busy) { 1320 trace_xfs_alloc_near_busy(args); 1321 xfs_extent_busy_flush(args->mp, args->pag, busy_gen); 1322 goto restart; 1323 } 1324 trace_xfs_alloc_size_neither(args); 1325 args->agbno = NULLAGBLOCK; 1326 return 0; 1327 } 1328 1329 /* 1330 * At this point we have selected a freespace entry, either to the 1331 * left or to the right. If it's on the right, copy all the 1332 * useful variables to the "left" set so we only have one 1333 * copy of this code. 1334 */ 1335 if (bno_cur_gt) { 1336 bno_cur_lt = bno_cur_gt; 1337 bno_cur_gt = NULL; 1338 ltbno = gtbno; 1339 ltbnoa = gtbnoa; 1340 ltlen = gtlen; 1341 ltlena = gtlena; 1342 j = 1; 1343 } else 1344 j = 0; 1345 1346 /* 1347 * Fix up the length and compute the useful address. 1348 */ 1349 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1350 xfs_alloc_fix_len(args); 1351 rlen = args->len; 1352 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, 1353 args->datatype, ltbnoa, ltlena, <new); 1354 ASSERT(ltnew >= ltbno); 1355 ASSERT(ltnew + rlen <= ltbnoa + ltlena); 1356 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1357 ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno); 1358 args->agbno = ltnew; 1359 1360 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1361 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1362 goto error0; 1363 1364 if (j) 1365 trace_xfs_alloc_near_greater(args); 1366 else 1367 trace_xfs_alloc_near_lesser(args); 1368 1369 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1370 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1371 return 0; 1372 1373 error0: 1374 trace_xfs_alloc_near_error(args); 1375 if (cnt_cur != NULL) 1376 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1377 if (bno_cur_lt != NULL) 1378 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); 1379 if (bno_cur_gt != NULL) 1380 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); 1381 return error; 1382 } 1383 1384 /* 1385 * Allocate a variable extent anywhere in the allocation group agno. 1386 * Extent's length (returned in len) will be between minlen and maxlen, 1387 * and of the form k * prod + mod unless there's nothing that large. 1388 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 1389 */ 1390 STATIC int /* error */ 1391 xfs_alloc_ag_vextent_size( 1392 xfs_alloc_arg_t *args) /* allocation argument structure */ 1393 { 1394 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ 1395 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ 1396 int error; /* error result */ 1397 xfs_agblock_t fbno; /* start of found freespace */ 1398 xfs_extlen_t flen; /* length of found freespace */ 1399 int i; /* temp status variable */ 1400 xfs_agblock_t rbno; /* returned block number */ 1401 xfs_extlen_t rlen; /* length of returned extent */ 1402 bool busy; 1403 unsigned busy_gen; 1404 1405 restart: 1406 /* 1407 * Allocate and initialize a cursor for the by-size btree. 1408 */ 1409 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1410 args->agno, XFS_BTNUM_CNT); 1411 bno_cur = NULL; 1412 busy = false; 1413 1414 /* 1415 * Look for an entry >= maxlen+alignment-1 blocks. 1416 */ 1417 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1418 args->maxlen + args->alignment - 1, &i))) 1419 goto error0; 1420 1421 /* 1422 * If none then we have to settle for a smaller extent. In the case that 1423 * there are no large extents, this will return the last entry in the 1424 * tree unless the tree is empty. In the case that there are only busy 1425 * large extents, this will return the largest small extent unless there 1426 * are no smaller extents available. 1427 */ 1428 if (!i) { 1429 error = xfs_alloc_ag_vextent_small(args, cnt_cur, 1430 &fbno, &flen, &i); 1431 if (error) 1432 goto error0; 1433 if (i == 0 || flen == 0) { 1434 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1435 trace_xfs_alloc_size_noentry(args); 1436 return 0; 1437 } 1438 ASSERT(i == 1); 1439 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, 1440 &rlen, &busy_gen); 1441 } else { 1442 /* 1443 * Search for a non-busy extent that is large enough. 1444 */ 1445 for (;;) { 1446 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); 1447 if (error) 1448 goto error0; 1449 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1450 1451 busy = xfs_alloc_compute_aligned(args, fbno, flen, 1452 &rbno, &rlen, &busy_gen); 1453 1454 if (rlen >= args->maxlen) 1455 break; 1456 1457 error = xfs_btree_increment(cnt_cur, 0, &i); 1458 if (error) 1459 goto error0; 1460 if (i == 0) { 1461 /* 1462 * Our only valid extents must have been busy. 1463 * Make it unbusy by forcing the log out and 1464 * retrying. 1465 */ 1466 xfs_btree_del_cursor(cnt_cur, 1467 XFS_BTREE_NOERROR); 1468 trace_xfs_alloc_size_busy(args); 1469 xfs_extent_busy_flush(args->mp, 1470 args->pag, busy_gen); 1471 goto restart; 1472 } 1473 } 1474 } 1475 1476 /* 1477 * In the first case above, we got the last entry in the 1478 * by-size btree. Now we check to see if the space hits maxlen 1479 * once aligned; if not, we search left for something better. 1480 * This can't happen in the second case above. 1481 */ 1482 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1483 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || 1484 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1485 if (rlen < args->maxlen) { 1486 xfs_agblock_t bestfbno; 1487 xfs_extlen_t bestflen; 1488 xfs_agblock_t bestrbno; 1489 xfs_extlen_t bestrlen; 1490 1491 bestrlen = rlen; 1492 bestrbno = rbno; 1493 bestflen = flen; 1494 bestfbno = fbno; 1495 for (;;) { 1496 if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) 1497 goto error0; 1498 if (i == 0) 1499 break; 1500 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, 1501 &i))) 1502 goto error0; 1503 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1504 if (flen < bestrlen) 1505 break; 1506 busy = xfs_alloc_compute_aligned(args, fbno, flen, 1507 &rbno, &rlen, &busy_gen); 1508 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1509 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || 1510 (rlen <= flen && rbno + rlen <= fbno + flen), 1511 error0); 1512 if (rlen > bestrlen) { 1513 bestrlen = rlen; 1514 bestrbno = rbno; 1515 bestflen = flen; 1516 bestfbno = fbno; 1517 if (rlen == args->maxlen) 1518 break; 1519 } 1520 } 1521 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, 1522 &i))) 1523 goto error0; 1524 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1525 rlen = bestrlen; 1526 rbno = bestrbno; 1527 flen = bestflen; 1528 fbno = bestfbno; 1529 } 1530 args->wasfromfl = 0; 1531 /* 1532 * Fix up the length. 1533 */ 1534 args->len = rlen; 1535 if (rlen < args->minlen) { 1536 if (busy) { 1537 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1538 trace_xfs_alloc_size_busy(args); 1539 xfs_extent_busy_flush(args->mp, args->pag, busy_gen); 1540 goto restart; 1541 } 1542 goto out_nominleft; 1543 } 1544 xfs_alloc_fix_len(args); 1545 1546 rlen = args->len; 1547 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); 1548 /* 1549 * Allocate and initialize a cursor for the by-block tree. 1550 */ 1551 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1552 args->agno, XFS_BTNUM_BNO); 1553 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 1554 rbno, rlen, XFSA_FIXUP_CNT_OK))) 1555 goto error0; 1556 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1557 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1558 cnt_cur = bno_cur = NULL; 1559 args->len = rlen; 1560 args->agbno = rbno; 1561 XFS_WANT_CORRUPTED_GOTO(args->mp, 1562 args->agbno + args->len <= 1563 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1564 error0); 1565 trace_xfs_alloc_size_done(args); 1566 return 0; 1567 1568 error0: 1569 trace_xfs_alloc_size_error(args); 1570 if (cnt_cur) 1571 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1572 if (bno_cur) 1573 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1574 return error; 1575 1576 out_nominleft: 1577 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1578 trace_xfs_alloc_size_nominleft(args); 1579 args->agbno = NULLAGBLOCK; 1580 return 0; 1581 } 1582 1583 /* 1584 * Deal with the case where only small freespaces remain. 1585 * Either return the contents of the last freespace record, 1586 * or allocate space from the freelist if there is nothing in the tree. 1587 */ 1588 STATIC int /* error */ 1589 xfs_alloc_ag_vextent_small( 1590 xfs_alloc_arg_t *args, /* allocation argument structure */ 1591 xfs_btree_cur_t *ccur, /* by-size cursor */ 1592 xfs_agblock_t *fbnop, /* result block number */ 1593 xfs_extlen_t *flenp, /* result length */ 1594 int *stat) /* status: 0-freelist, 1-normal/none */ 1595 { 1596 struct xfs_owner_info oinfo; 1597 int error; 1598 xfs_agblock_t fbno; 1599 xfs_extlen_t flen; 1600 int i; 1601 1602 if ((error = xfs_btree_decrement(ccur, 0, &i))) 1603 goto error0; 1604 if (i) { 1605 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1606 goto error0; 1607 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1608 } 1609 /* 1610 * Nothing in the btree, try the freelist. Make sure 1611 * to respect minleft even when pulling from the 1612 * freelist. 1613 */ 1614 else if (args->minlen == 1 && args->alignment == 1 && 1615 args->resv != XFS_AG_RESV_AGFL && 1616 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1617 > args->minleft)) { 1618 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 1619 if (error) 1620 goto error0; 1621 if (fbno != NULLAGBLOCK) { 1622 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, 1623 xfs_alloc_allow_busy_reuse(args->datatype)); 1624 1625 if (xfs_alloc_is_userdata(args->datatype)) { 1626 xfs_buf_t *bp; 1627 1628 bp = xfs_btree_get_bufs(args->mp, args->tp, 1629 args->agno, fbno, 0); 1630 if (!bp) { 1631 error = -EFSCORRUPTED; 1632 goto error0; 1633 } 1634 xfs_trans_binval(args->tp, bp); 1635 } 1636 args->len = 1; 1637 args->agbno = fbno; 1638 XFS_WANT_CORRUPTED_GOTO(args->mp, 1639 args->agbno + args->len <= 1640 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1641 error0); 1642 args->wasfromfl = 1; 1643 trace_xfs_alloc_small_freelist(args); 1644 1645 /* 1646 * If we're feeding an AGFL block to something that 1647 * doesn't live in the free space, we need to clear 1648 * out the OWN_AG rmap. 1649 */ 1650 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); 1651 error = xfs_rmap_free(args->tp, args->agbp, args->agno, 1652 fbno, 1, &oinfo); 1653 if (error) 1654 goto error0; 1655 1656 *stat = 0; 1657 return 0; 1658 } 1659 /* 1660 * Nothing in the freelist. 1661 */ 1662 else 1663 flen = 0; 1664 } 1665 /* 1666 * Can't allocate from the freelist for some reason. 1667 */ 1668 else { 1669 fbno = NULLAGBLOCK; 1670 flen = 0; 1671 } 1672 /* 1673 * Can't do the allocation, give up. 1674 */ 1675 if (flen < args->minlen) { 1676 args->agbno = NULLAGBLOCK; 1677 trace_xfs_alloc_small_notenough(args); 1678 flen = 0; 1679 } 1680 *fbnop = fbno; 1681 *flenp = flen; 1682 *stat = 1; 1683 trace_xfs_alloc_small_done(args); 1684 return 0; 1685 1686 error0: 1687 trace_xfs_alloc_small_error(args); 1688 return error; 1689 } 1690 1691 /* 1692 * Free the extent starting at agno/bno for length. 1693 */ 1694 STATIC int 1695 xfs_free_ag_extent( 1696 xfs_trans_t *tp, 1697 xfs_buf_t *agbp, 1698 xfs_agnumber_t agno, 1699 xfs_agblock_t bno, 1700 xfs_extlen_t len, 1701 struct xfs_owner_info *oinfo, 1702 enum xfs_ag_resv_type type) 1703 { 1704 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1705 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1706 int error; /* error return value */ 1707 xfs_agblock_t gtbno; /* start of right neighbor block */ 1708 xfs_extlen_t gtlen; /* length of right neighbor block */ 1709 int haveleft; /* have a left neighbor block */ 1710 int haveright; /* have a right neighbor block */ 1711 int i; /* temp, result code */ 1712 xfs_agblock_t ltbno; /* start of left neighbor block */ 1713 xfs_extlen_t ltlen; /* length of left neighbor block */ 1714 xfs_mount_t *mp; /* mount point struct for filesystem */ 1715 xfs_agblock_t nbno; /* new starting block of freespace */ 1716 xfs_extlen_t nlen; /* new length of freespace */ 1717 xfs_perag_t *pag; /* per allocation group data */ 1718 1719 bno_cur = cnt_cur = NULL; 1720 mp = tp->t_mountp; 1721 1722 if (!xfs_rmap_should_skip_owner_update(oinfo)) { 1723 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1724 if (error) 1725 goto error0; 1726 } 1727 1728 /* 1729 * Allocate and initialize a cursor for the by-block btree. 1730 */ 1731 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); 1732 /* 1733 * Look for a neighboring block on the left (lower block numbers) 1734 * that is contiguous with this space. 1735 */ 1736 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) 1737 goto error0; 1738 if (haveleft) { 1739 /* 1740 * There is a block to our left. 1741 */ 1742 if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) 1743 goto error0; 1744 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1745 /* 1746 * It's not contiguous, though. 1747 */ 1748 if (ltbno + ltlen < bno) 1749 haveleft = 0; 1750 else { 1751 /* 1752 * If this failure happens the request to free this 1753 * space was invalid, it's (partly) already free. 1754 * Very bad. 1755 */ 1756 XFS_WANT_CORRUPTED_GOTO(mp, 1757 ltbno + ltlen <= bno, error0); 1758 } 1759 } 1760 /* 1761 * Look for a neighboring block on the right (higher block numbers) 1762 * that is contiguous with this space. 1763 */ 1764 if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) 1765 goto error0; 1766 if (haveright) { 1767 /* 1768 * There is a block to our right. 1769 */ 1770 if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) 1771 goto error0; 1772 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1773 /* 1774 * It's not contiguous, though. 1775 */ 1776 if (bno + len < gtbno) 1777 haveright = 0; 1778 else { 1779 /* 1780 * If this failure happens the request to free this 1781 * space was invalid, it's (partly) already free. 1782 * Very bad. 1783 */ 1784 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0); 1785 } 1786 } 1787 /* 1788 * Now allocate and initialize a cursor for the by-size tree. 1789 */ 1790 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); 1791 /* 1792 * Have both left and right contiguous neighbors. 1793 * Merge all three into a single free block. 1794 */ 1795 if (haveleft && haveright) { 1796 /* 1797 * Delete the old by-size entry on the left. 1798 */ 1799 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1800 goto error0; 1801 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1802 if ((error = xfs_btree_delete(cnt_cur, &i))) 1803 goto error0; 1804 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1805 /* 1806 * Delete the old by-size entry on the right. 1807 */ 1808 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1809 goto error0; 1810 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1811 if ((error = xfs_btree_delete(cnt_cur, &i))) 1812 goto error0; 1813 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1814 /* 1815 * Delete the old by-block entry for the right block. 1816 */ 1817 if ((error = xfs_btree_delete(bno_cur, &i))) 1818 goto error0; 1819 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1820 /* 1821 * Move the by-block cursor back to the left neighbor. 1822 */ 1823 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1824 goto error0; 1825 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1826 #ifdef DEBUG 1827 /* 1828 * Check that this is the right record: delete didn't 1829 * mangle the cursor. 1830 */ 1831 { 1832 xfs_agblock_t xxbno; 1833 xfs_extlen_t xxlen; 1834 1835 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, 1836 &i))) 1837 goto error0; 1838 XFS_WANT_CORRUPTED_GOTO(mp, 1839 i == 1 && xxbno == ltbno && xxlen == ltlen, 1840 error0); 1841 } 1842 #endif 1843 /* 1844 * Update remaining by-block entry to the new, joined block. 1845 */ 1846 nbno = ltbno; 1847 nlen = len + ltlen + gtlen; 1848 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1849 goto error0; 1850 } 1851 /* 1852 * Have only a left contiguous neighbor. 1853 * Merge it together with the new freespace. 1854 */ 1855 else if (haveleft) { 1856 /* 1857 * Delete the old by-size entry on the left. 1858 */ 1859 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1860 goto error0; 1861 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1862 if ((error = xfs_btree_delete(cnt_cur, &i))) 1863 goto error0; 1864 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1865 /* 1866 * Back up the by-block cursor to the left neighbor, and 1867 * update its length. 1868 */ 1869 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1870 goto error0; 1871 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1872 nbno = ltbno; 1873 nlen = len + ltlen; 1874 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1875 goto error0; 1876 } 1877 /* 1878 * Have only a right contiguous neighbor. 1879 * Merge it together with the new freespace. 1880 */ 1881 else if (haveright) { 1882 /* 1883 * Delete the old by-size entry on the right. 1884 */ 1885 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1886 goto error0; 1887 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1888 if ((error = xfs_btree_delete(cnt_cur, &i))) 1889 goto error0; 1890 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1891 /* 1892 * Update the starting block and length of the right 1893 * neighbor in the by-block tree. 1894 */ 1895 nbno = bno; 1896 nlen = len + gtlen; 1897 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1898 goto error0; 1899 } 1900 /* 1901 * No contiguous neighbors. 1902 * Insert the new freespace into the by-block tree. 1903 */ 1904 else { 1905 nbno = bno; 1906 nlen = len; 1907 if ((error = xfs_btree_insert(bno_cur, &i))) 1908 goto error0; 1909 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1910 } 1911 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1912 bno_cur = NULL; 1913 /* 1914 * In all cases we need to insert the new freespace in the by-size tree. 1915 */ 1916 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1917 goto error0; 1918 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0); 1919 if ((error = xfs_btree_insert(cnt_cur, &i))) 1920 goto error0; 1921 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1922 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1923 cnt_cur = NULL; 1924 1925 /* 1926 * Update the freespace totals in the ag and superblock. 1927 */ 1928 pag = xfs_perag_get(mp, agno); 1929 error = xfs_alloc_update_counters(tp, pag, agbp, len); 1930 xfs_ag_resv_free_extent(pag, type, tp, len); 1931 xfs_perag_put(pag); 1932 if (error) 1933 goto error0; 1934 1935 XFS_STATS_INC(mp, xs_freex); 1936 XFS_STATS_ADD(mp, xs_freeb, len); 1937 1938 trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright); 1939 1940 return 0; 1941 1942 error0: 1943 trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1); 1944 if (bno_cur) 1945 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1946 if (cnt_cur) 1947 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1948 return error; 1949 } 1950 1951 /* 1952 * Visible (exported) allocation/free functions. 1953 * Some of these are used just by xfs_alloc_btree.c and this file. 1954 */ 1955 1956 /* 1957 * Compute and fill in value of m_ag_maxlevels. 1958 */ 1959 void 1960 xfs_alloc_compute_maxlevels( 1961 xfs_mount_t *mp) /* file system mount structure */ 1962 { 1963 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr, 1964 (mp->m_sb.sb_agblocks + 1) / 2); 1965 } 1966 1967 /* 1968 * Find the length of the longest extent in an AG. The 'need' parameter 1969 * specifies how much space we're going to need for the AGFL and the 1970 * 'reserved' parameter tells us how many blocks in this AG are reserved for 1971 * other callers. 1972 */ 1973 xfs_extlen_t 1974 xfs_alloc_longest_free_extent( 1975 struct xfs_perag *pag, 1976 xfs_extlen_t need, 1977 xfs_extlen_t reserved) 1978 { 1979 xfs_extlen_t delta = 0; 1980 1981 /* 1982 * If the AGFL needs a recharge, we'll have to subtract that from the 1983 * longest extent. 1984 */ 1985 if (need > pag->pagf_flcount) 1986 delta = need - pag->pagf_flcount; 1987 1988 /* 1989 * If we cannot maintain others' reservations with space from the 1990 * not-longest freesp extents, we'll have to subtract /that/ from 1991 * the longest extent too. 1992 */ 1993 if (pag->pagf_freeblks - pag->pagf_longest < reserved) 1994 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest); 1995 1996 /* 1997 * If the longest extent is long enough to satisfy all the 1998 * reservations and AGFL rules in place, we can return this extent. 1999 */ 2000 if (pag->pagf_longest > delta) 2001 return pag->pagf_longest - delta; 2002 2003 /* Otherwise, let the caller try for 1 block if there's space. */ 2004 return pag->pagf_flcount > 0 || pag->pagf_longest > 0; 2005 } 2006 2007 unsigned int 2008 xfs_alloc_min_freelist( 2009 struct xfs_mount *mp, 2010 struct xfs_perag *pag) 2011 { 2012 unsigned int min_free; 2013 2014 /* space needed by-bno freespace btree */ 2015 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, 2016 mp->m_ag_maxlevels); 2017 /* space needed by-size freespace btree */ 2018 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, 2019 mp->m_ag_maxlevels); 2020 /* space needed reverse mapping used space btree */ 2021 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 2022 min_free += min_t(unsigned int, 2023 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1, 2024 mp->m_rmap_maxlevels); 2025 2026 return min_free; 2027 } 2028 2029 /* 2030 * Check if the operation we are fixing up the freelist for should go ahead or 2031 * not. If we are freeing blocks, we always allow it, otherwise the allocation 2032 * is dependent on whether the size and shape of free space available will 2033 * permit the requested allocation to take place. 2034 */ 2035 static bool 2036 xfs_alloc_space_available( 2037 struct xfs_alloc_arg *args, 2038 xfs_extlen_t min_free, 2039 int flags) 2040 { 2041 struct xfs_perag *pag = args->pag; 2042 xfs_extlen_t alloc_len, longest; 2043 xfs_extlen_t reservation; /* blocks that are still reserved */ 2044 int available; 2045 2046 if (flags & XFS_ALLOC_FLAG_FREEING) 2047 return true; 2048 2049 reservation = xfs_ag_resv_needed(pag, args->resv); 2050 2051 /* do we have enough contiguous free space for the allocation? */ 2052 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; 2053 longest = xfs_alloc_longest_free_extent(pag, min_free, reservation); 2054 if (longest < alloc_len) 2055 return false; 2056 2057 /* do we have enough free space remaining for the allocation? */ 2058 available = (int)(pag->pagf_freeblks + pag->pagf_flcount - 2059 reservation - min_free - args->minleft); 2060 if (available < (int)max(args->total, alloc_len)) 2061 return false; 2062 2063 /* 2064 * Clamp maxlen to the amount of free space available for the actual 2065 * extent allocation. 2066 */ 2067 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) { 2068 args->maxlen = available; 2069 ASSERT(args->maxlen > 0); 2070 ASSERT(args->maxlen >= args->minlen); 2071 } 2072 2073 return true; 2074 } 2075 2076 int 2077 xfs_free_agfl_block( 2078 struct xfs_trans *tp, 2079 xfs_agnumber_t agno, 2080 xfs_agblock_t agbno, 2081 struct xfs_buf *agbp, 2082 struct xfs_owner_info *oinfo) 2083 { 2084 int error; 2085 struct xfs_buf *bp; 2086 2087 error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo, 2088 XFS_AG_RESV_AGFL); 2089 if (error) 2090 return error; 2091 2092 bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0); 2093 if (!bp) 2094 return -EFSCORRUPTED; 2095 xfs_trans_binval(tp, bp); 2096 2097 return 0; 2098 } 2099 2100 /* 2101 * Check the agfl fields of the agf for inconsistency or corruption. The purpose 2102 * is to detect an agfl header padding mismatch between current and early v5 2103 * kernels. This problem manifests as a 1-slot size difference between the 2104 * on-disk flcount and the active [first, last] range of a wrapped agfl. This 2105 * may also catch variants of agfl count corruption unrelated to padding. Either 2106 * way, we'll reset the agfl and warn the user. 2107 * 2108 * Return true if a reset is required before the agfl can be used, false 2109 * otherwise. 2110 */ 2111 static bool 2112 xfs_agfl_needs_reset( 2113 struct xfs_mount *mp, 2114 struct xfs_agf *agf) 2115 { 2116 uint32_t f = be32_to_cpu(agf->agf_flfirst); 2117 uint32_t l = be32_to_cpu(agf->agf_fllast); 2118 uint32_t c = be32_to_cpu(agf->agf_flcount); 2119 int agfl_size = xfs_agfl_size(mp); 2120 int active; 2121 2122 /* no agfl header on v4 supers */ 2123 if (!xfs_sb_version_hascrc(&mp->m_sb)) 2124 return false; 2125 2126 /* 2127 * The agf read verifier catches severe corruption of these fields. 2128 * Repeat some sanity checks to cover a packed -> unpacked mismatch if 2129 * the verifier allows it. 2130 */ 2131 if (f >= agfl_size || l >= agfl_size) 2132 return true; 2133 if (c > agfl_size) 2134 return true; 2135 2136 /* 2137 * Check consistency between the on-disk count and the active range. An 2138 * agfl padding mismatch manifests as an inconsistent flcount. 2139 */ 2140 if (c && l >= f) 2141 active = l - f + 1; 2142 else if (c) 2143 active = agfl_size - f + l + 1; 2144 else 2145 active = 0; 2146 2147 return active != c; 2148 } 2149 2150 /* 2151 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the 2152 * agfl content cannot be trusted. Warn the user that a repair is required to 2153 * recover leaked blocks. 2154 * 2155 * The purpose of this mechanism is to handle filesystems affected by the agfl 2156 * header padding mismatch problem. A reset keeps the filesystem online with a 2157 * relatively minor free space accounting inconsistency rather than suffer the 2158 * inevitable crash from use of an invalid agfl block. 2159 */ 2160 static void 2161 xfs_agfl_reset( 2162 struct xfs_trans *tp, 2163 struct xfs_buf *agbp, 2164 struct xfs_perag *pag) 2165 { 2166 struct xfs_mount *mp = tp->t_mountp; 2167 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 2168 2169 ASSERT(pag->pagf_agflreset); 2170 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_); 2171 2172 xfs_warn(mp, 2173 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. " 2174 "Please unmount and run xfs_repair.", 2175 pag->pag_agno, pag->pagf_flcount); 2176 2177 agf->agf_flfirst = 0; 2178 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1); 2179 agf->agf_flcount = 0; 2180 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST | 2181 XFS_AGF_FLCOUNT); 2182 2183 pag->pagf_flcount = 0; 2184 pag->pagf_agflreset = false; 2185 } 2186 2187 /* 2188 * Defer an AGFL block free. This is effectively equivalent to 2189 * xfs_bmap_add_free() with some special handling particular to AGFL blocks. 2190 * 2191 * Deferring AGFL frees helps prevent log reservation overruns due to too many 2192 * allocation operations in a transaction. AGFL frees are prone to this problem 2193 * because for one they are always freed one at a time. Further, an immediate 2194 * AGFL block free can cause a btree join and require another block free before 2195 * the real allocation can proceed. Deferring the free disconnects freeing up 2196 * the AGFL slot from freeing the block. 2197 */ 2198 STATIC void 2199 xfs_defer_agfl_block( 2200 struct xfs_mount *mp, 2201 struct xfs_defer_ops *dfops, 2202 xfs_agnumber_t agno, 2203 xfs_fsblock_t agbno, 2204 struct xfs_owner_info *oinfo) 2205 { 2206 struct xfs_extent_free_item *new; /* new element */ 2207 2208 ASSERT(xfs_bmap_free_item_zone != NULL); 2209 ASSERT(oinfo != NULL); 2210 2211 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); 2212 new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); 2213 new->xefi_blockcount = 1; 2214 new->xefi_oinfo = *oinfo; 2215 2216 trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); 2217 2218 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list); 2219 } 2220 2221 /* 2222 * Decide whether to use this allocation group for this allocation. 2223 * If so, fix up the btree freelist's size. 2224 */ 2225 int /* error */ 2226 xfs_alloc_fix_freelist( 2227 struct xfs_alloc_arg *args, /* allocation argument structure */ 2228 int flags) /* XFS_ALLOC_FLAG_... */ 2229 { 2230 struct xfs_mount *mp = args->mp; 2231 struct xfs_perag *pag = args->pag; 2232 struct xfs_trans *tp = args->tp; 2233 struct xfs_buf *agbp = NULL; 2234 struct xfs_buf *agflbp = NULL; 2235 struct xfs_alloc_arg targs; /* local allocation arguments */ 2236 xfs_agblock_t bno; /* freelist block */ 2237 xfs_extlen_t need; /* total blocks needed in freelist */ 2238 int error = 0; 2239 2240 if (!pag->pagf_init) { 2241 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); 2242 if (error) 2243 goto out_no_agbp; 2244 if (!pag->pagf_init) { 2245 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 2246 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2247 goto out_agbp_relse; 2248 } 2249 } 2250 2251 /* 2252 * If this is a metadata preferred pag and we are user data then try 2253 * somewhere else if we are not being asked to try harder at this 2254 * point 2255 */ 2256 if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) && 2257 (flags & XFS_ALLOC_FLAG_TRYLOCK)) { 2258 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2259 goto out_agbp_relse; 2260 } 2261 2262 need = xfs_alloc_min_freelist(mp, pag); 2263 if (!xfs_alloc_space_available(args, need, flags | 2264 XFS_ALLOC_FLAG_CHECK)) 2265 goto out_agbp_relse; 2266 2267 /* 2268 * Get the a.g. freespace buffer. 2269 * Can fail if we're not blocking on locks, and it's held. 2270 */ 2271 if (!agbp) { 2272 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); 2273 if (error) 2274 goto out_no_agbp; 2275 if (!agbp) { 2276 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 2277 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2278 goto out_no_agbp; 2279 } 2280 } 2281 2282 /* reset a padding mismatched agfl before final free space check */ 2283 if (pag->pagf_agflreset) 2284 xfs_agfl_reset(tp, agbp, pag); 2285 2286 /* If there isn't enough total space or single-extent, reject it. */ 2287 need = xfs_alloc_min_freelist(mp, pag); 2288 if (!xfs_alloc_space_available(args, need, flags)) 2289 goto out_agbp_relse; 2290 2291 /* 2292 * Make the freelist shorter if it's too long. 2293 * 2294 * Note that from this point onwards, we will always release the agf and 2295 * agfl buffers on error. This handles the case where we error out and 2296 * the buffers are clean or may not have been joined to the transaction 2297 * and hence need to be released manually. If they have been joined to 2298 * the transaction, then xfs_trans_brelse() will handle them 2299 * appropriately based on the recursion count and dirty state of the 2300 * buffer. 2301 * 2302 * XXX (dgc): When we have lots of free space, does this buy us 2303 * anything other than extra overhead when we need to put more blocks 2304 * back on the free list? Maybe we should only do this when space is 2305 * getting low or the AGFL is more than half full? 2306 * 2307 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too 2308 * big; the NORMAP flag prevents AGFL expand/shrink operations from 2309 * updating the rmapbt. Both flags are used in xfs_repair while we're 2310 * rebuilding the rmapbt, and neither are used by the kernel. They're 2311 * both required to ensure that rmaps are correctly recorded for the 2312 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and 2313 * repair/rmap.c in xfsprogs for details. 2314 */ 2315 memset(&targs, 0, sizeof(targs)); 2316 if (flags & XFS_ALLOC_FLAG_NORMAP) 2317 xfs_rmap_skip_owner_update(&targs.oinfo); 2318 else 2319 xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG); 2320 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { 2321 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); 2322 if (error) 2323 goto out_agbp_relse; 2324 2325 /* defer agfl frees if dfops is provided */ 2326 if (tp->t_agfl_dfops) { 2327 xfs_defer_agfl_block(mp, tp->t_agfl_dfops, args->agno, 2328 bno, &targs.oinfo); 2329 } else { 2330 error = xfs_free_agfl_block(tp, args->agno, bno, agbp, 2331 &targs.oinfo); 2332 if (error) 2333 goto out_agbp_relse; 2334 } 2335 } 2336 2337 targs.tp = tp; 2338 targs.mp = mp; 2339 targs.agbp = agbp; 2340 targs.agno = args->agno; 2341 targs.alignment = targs.minlen = targs.prod = 1; 2342 targs.type = XFS_ALLOCTYPE_THIS_AG; 2343 targs.pag = pag; 2344 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp); 2345 if (error) 2346 goto out_agbp_relse; 2347 2348 /* Make the freelist longer if it's too short. */ 2349 while (pag->pagf_flcount < need) { 2350 targs.agbno = 0; 2351 targs.maxlen = need - pag->pagf_flcount; 2352 targs.resv = XFS_AG_RESV_AGFL; 2353 2354 /* Allocate as many blocks as possible at once. */ 2355 error = xfs_alloc_ag_vextent(&targs); 2356 if (error) 2357 goto out_agflbp_relse; 2358 2359 /* 2360 * Stop if we run out. Won't happen if callers are obeying 2361 * the restrictions correctly. Can happen for free calls 2362 * on a completely full ag. 2363 */ 2364 if (targs.agbno == NULLAGBLOCK) { 2365 if (flags & XFS_ALLOC_FLAG_FREEING) 2366 break; 2367 goto out_agflbp_relse; 2368 } 2369 /* 2370 * Put each allocated block on the list. 2371 */ 2372 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { 2373 error = xfs_alloc_put_freelist(tp, agbp, 2374 agflbp, bno, 0); 2375 if (error) 2376 goto out_agflbp_relse; 2377 } 2378 } 2379 xfs_trans_brelse(tp, agflbp); 2380 args->agbp = agbp; 2381 return 0; 2382 2383 out_agflbp_relse: 2384 xfs_trans_brelse(tp, agflbp); 2385 out_agbp_relse: 2386 if (agbp) 2387 xfs_trans_brelse(tp, agbp); 2388 out_no_agbp: 2389 args->agbp = NULL; 2390 return error; 2391 } 2392 2393 /* 2394 * Get a block from the freelist. 2395 * Returns with the buffer for the block gotten. 2396 */ 2397 int /* error */ 2398 xfs_alloc_get_freelist( 2399 xfs_trans_t *tp, /* transaction pointer */ 2400 xfs_buf_t *agbp, /* buffer containing the agf structure */ 2401 xfs_agblock_t *bnop, /* block address retrieved from freelist */ 2402 int btreeblk) /* destination is a AGF btree */ 2403 { 2404 xfs_agf_t *agf; /* a.g. freespace structure */ 2405 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 2406 xfs_agblock_t bno; /* block number returned */ 2407 __be32 *agfl_bno; 2408 int error; 2409 int logflags; 2410 xfs_mount_t *mp = tp->t_mountp; 2411 xfs_perag_t *pag; /* per allocation group data */ 2412 2413 /* 2414 * Freelist is empty, give up. 2415 */ 2416 agf = XFS_BUF_TO_AGF(agbp); 2417 if (!agf->agf_flcount) { 2418 *bnop = NULLAGBLOCK; 2419 return 0; 2420 } 2421 /* 2422 * Read the array of free blocks. 2423 */ 2424 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), 2425 &agflbp); 2426 if (error) 2427 return error; 2428 2429 2430 /* 2431 * Get the block number and update the data structures. 2432 */ 2433 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2434 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); 2435 be32_add_cpu(&agf->agf_flfirst, 1); 2436 xfs_trans_brelse(tp, agflbp); 2437 if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) 2438 agf->agf_flfirst = 0; 2439 2440 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2441 ASSERT(!pag->pagf_agflreset); 2442 be32_add_cpu(&agf->agf_flcount, -1); 2443 xfs_trans_agflist_delta(tp, -1); 2444 pag->pagf_flcount--; 2445 xfs_perag_put(pag); 2446 2447 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; 2448 if (btreeblk) { 2449 be32_add_cpu(&agf->agf_btreeblks, 1); 2450 pag->pagf_btreeblks++; 2451 logflags |= XFS_AGF_BTREEBLKS; 2452 } 2453 2454 xfs_alloc_log_agf(tp, agbp, logflags); 2455 *bnop = bno; 2456 2457 return 0; 2458 } 2459 2460 /* 2461 * Log the given fields from the agf structure. 2462 */ 2463 void 2464 xfs_alloc_log_agf( 2465 xfs_trans_t *tp, /* transaction pointer */ 2466 xfs_buf_t *bp, /* buffer for a.g. freelist header */ 2467 int fields) /* mask of fields to be logged (XFS_AGF_...) */ 2468 { 2469 int first; /* first byte offset */ 2470 int last; /* last byte offset */ 2471 static const short offsets[] = { 2472 offsetof(xfs_agf_t, agf_magicnum), 2473 offsetof(xfs_agf_t, agf_versionnum), 2474 offsetof(xfs_agf_t, agf_seqno), 2475 offsetof(xfs_agf_t, agf_length), 2476 offsetof(xfs_agf_t, agf_roots[0]), 2477 offsetof(xfs_agf_t, agf_levels[0]), 2478 offsetof(xfs_agf_t, agf_flfirst), 2479 offsetof(xfs_agf_t, agf_fllast), 2480 offsetof(xfs_agf_t, agf_flcount), 2481 offsetof(xfs_agf_t, agf_freeblks), 2482 offsetof(xfs_agf_t, agf_longest), 2483 offsetof(xfs_agf_t, agf_btreeblks), 2484 offsetof(xfs_agf_t, agf_uuid), 2485 offsetof(xfs_agf_t, agf_rmap_blocks), 2486 offsetof(xfs_agf_t, agf_refcount_blocks), 2487 offsetof(xfs_agf_t, agf_refcount_root), 2488 offsetof(xfs_agf_t, agf_refcount_level), 2489 /* needed so that we don't log the whole rest of the structure: */ 2490 offsetof(xfs_agf_t, agf_spare64), 2491 sizeof(xfs_agf_t) 2492 }; 2493 2494 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); 2495 2496 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); 2497 2498 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2499 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2500 } 2501 2502 /* 2503 * Interface for inode allocation to force the pag data to be initialized. 2504 */ 2505 int /* error */ 2506 xfs_alloc_pagf_init( 2507 xfs_mount_t *mp, /* file system mount structure */ 2508 xfs_trans_t *tp, /* transaction pointer */ 2509 xfs_agnumber_t agno, /* allocation group number */ 2510 int flags) /* XFS_ALLOC_FLAGS_... */ 2511 { 2512 xfs_buf_t *bp; 2513 int error; 2514 2515 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) 2516 return error; 2517 if (bp) 2518 xfs_trans_brelse(tp, bp); 2519 return 0; 2520 } 2521 2522 /* 2523 * Put the block on the freelist for the allocation group. 2524 */ 2525 int /* error */ 2526 xfs_alloc_put_freelist( 2527 xfs_trans_t *tp, /* transaction pointer */ 2528 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 2529 xfs_buf_t *agflbp,/* buffer for a.g. free block array */ 2530 xfs_agblock_t bno, /* block being freed */ 2531 int btreeblk) /* block came from a AGF btree */ 2532 { 2533 xfs_agf_t *agf; /* a.g. freespace structure */ 2534 __be32 *blockp;/* pointer to array entry */ 2535 int error; 2536 int logflags; 2537 xfs_mount_t *mp; /* mount structure */ 2538 xfs_perag_t *pag; /* per allocation group data */ 2539 __be32 *agfl_bno; 2540 int startoff; 2541 2542 agf = XFS_BUF_TO_AGF(agbp); 2543 mp = tp->t_mountp; 2544 2545 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, 2546 be32_to_cpu(agf->agf_seqno), &agflbp))) 2547 return error; 2548 be32_add_cpu(&agf->agf_fllast, 1); 2549 if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp)) 2550 agf->agf_fllast = 0; 2551 2552 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2553 ASSERT(!pag->pagf_agflreset); 2554 be32_add_cpu(&agf->agf_flcount, 1); 2555 xfs_trans_agflist_delta(tp, 1); 2556 pag->pagf_flcount++; 2557 2558 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; 2559 if (btreeblk) { 2560 be32_add_cpu(&agf->agf_btreeblks, -1); 2561 pag->pagf_btreeblks--; 2562 logflags |= XFS_AGF_BTREEBLKS; 2563 } 2564 xfs_perag_put(pag); 2565 2566 xfs_alloc_log_agf(tp, agbp, logflags); 2567 2568 ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)); 2569 2570 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2571 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2572 *blockp = cpu_to_be32(bno); 2573 startoff = (char *)blockp - (char *)agflbp->b_addr; 2574 2575 xfs_alloc_log_agf(tp, agbp, logflags); 2576 2577 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); 2578 xfs_trans_log_buf(tp, agflbp, startoff, 2579 startoff + sizeof(xfs_agblock_t) - 1); 2580 return 0; 2581 } 2582 2583 static xfs_failaddr_t 2584 xfs_agf_verify( 2585 struct xfs_buf *bp) 2586 { 2587 struct xfs_mount *mp = bp->b_target->bt_mount; 2588 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2589 2590 if (xfs_sb_version_hascrc(&mp->m_sb)) { 2591 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) 2592 return __this_address; 2593 if (!xfs_log_check_lsn(mp, 2594 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) 2595 return __this_address; 2596 } 2597 2598 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && 2599 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2600 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2601 be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) && 2602 be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) && 2603 be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) 2604 return __this_address; 2605 2606 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || 2607 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || 2608 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || 2609 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2610 return __this_address; 2611 2612 if (xfs_sb_version_hasrmapbt(&mp->m_sb) && 2613 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || 2614 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) 2615 return __this_address; 2616 2617 /* 2618 * during growfs operations, the perag is not fully initialised, 2619 * so we can't use it for any useful checking. growfs ensures we can't 2620 * use it by using uncached buffers that don't have the perag attached 2621 * so we can detect and avoid this problem. 2622 */ 2623 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) 2624 return __this_address; 2625 2626 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 2627 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) 2628 return __this_address; 2629 2630 if (xfs_sb_version_hasreflink(&mp->m_sb) && 2631 (be32_to_cpu(agf->agf_refcount_level) < 1 || 2632 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) 2633 return __this_address; 2634 2635 return NULL; 2636 2637 } 2638 2639 static void 2640 xfs_agf_read_verify( 2641 struct xfs_buf *bp) 2642 { 2643 struct xfs_mount *mp = bp->b_target->bt_mount; 2644 xfs_failaddr_t fa; 2645 2646 if (xfs_sb_version_hascrc(&mp->m_sb) && 2647 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) 2648 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 2649 else { 2650 fa = xfs_agf_verify(bp); 2651 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) 2652 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2653 } 2654 } 2655 2656 static void 2657 xfs_agf_write_verify( 2658 struct xfs_buf *bp) 2659 { 2660 struct xfs_mount *mp = bp->b_target->bt_mount; 2661 struct xfs_buf_log_item *bip = bp->b_log_item; 2662 xfs_failaddr_t fa; 2663 2664 fa = xfs_agf_verify(bp); 2665 if (fa) { 2666 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2667 return; 2668 } 2669 2670 if (!xfs_sb_version_hascrc(&mp->m_sb)) 2671 return; 2672 2673 if (bip) 2674 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); 2675 2676 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); 2677 } 2678 2679 const struct xfs_buf_ops xfs_agf_buf_ops = { 2680 .name = "xfs_agf", 2681 .verify_read = xfs_agf_read_verify, 2682 .verify_write = xfs_agf_write_verify, 2683 .verify_struct = xfs_agf_verify, 2684 }; 2685 2686 /* 2687 * Read in the allocation group header (free/alloc section). 2688 */ 2689 int /* error */ 2690 xfs_read_agf( 2691 struct xfs_mount *mp, /* mount point structure */ 2692 struct xfs_trans *tp, /* transaction pointer */ 2693 xfs_agnumber_t agno, /* allocation group number */ 2694 int flags, /* XFS_BUF_ */ 2695 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2696 { 2697 int error; 2698 2699 trace_xfs_read_agf(mp, agno); 2700 2701 ASSERT(agno != NULLAGNUMBER); 2702 error = xfs_trans_read_buf( 2703 mp, tp, mp->m_ddev_targp, 2704 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 2705 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); 2706 if (error) 2707 return error; 2708 if (!*bpp) 2709 return 0; 2710 2711 ASSERT(!(*bpp)->b_error); 2712 xfs_buf_set_ref(*bpp, XFS_AGF_REF); 2713 return 0; 2714 } 2715 2716 /* 2717 * Read in the allocation group header (free/alloc section). 2718 */ 2719 int /* error */ 2720 xfs_alloc_read_agf( 2721 struct xfs_mount *mp, /* mount point structure */ 2722 struct xfs_trans *tp, /* transaction pointer */ 2723 xfs_agnumber_t agno, /* allocation group number */ 2724 int flags, /* XFS_ALLOC_FLAG_... */ 2725 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2726 { 2727 struct xfs_agf *agf; /* ag freelist header */ 2728 struct xfs_perag *pag; /* per allocation group data */ 2729 int error; 2730 2731 trace_xfs_alloc_read_agf(mp, agno); 2732 2733 ASSERT(agno != NULLAGNUMBER); 2734 error = xfs_read_agf(mp, tp, agno, 2735 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, 2736 bpp); 2737 if (error) 2738 return error; 2739 if (!*bpp) 2740 return 0; 2741 ASSERT(!(*bpp)->b_error); 2742 2743 agf = XFS_BUF_TO_AGF(*bpp); 2744 pag = xfs_perag_get(mp, agno); 2745 if (!pag->pagf_init) { 2746 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2747 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); 2748 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); 2749 pag->pagf_longest = be32_to_cpu(agf->agf_longest); 2750 pag->pagf_levels[XFS_BTNUM_BNOi] = 2751 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); 2752 pag->pagf_levels[XFS_BTNUM_CNTi] = 2753 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2754 pag->pagf_levels[XFS_BTNUM_RMAPi] = 2755 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); 2756 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); 2757 spin_lock_init(&pag->pagb_lock); 2758 pag->pagb_count = 0; 2759 pag->pagb_tree = RB_ROOT; 2760 pag->pagf_init = 1; 2761 pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf); 2762 } 2763 #ifdef DEBUG 2764 else if (!XFS_FORCED_SHUTDOWN(mp)) { 2765 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 2766 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); 2767 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 2768 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 2769 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == 2770 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); 2771 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == 2772 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2773 } 2774 #endif 2775 xfs_perag_put(pag); 2776 return 0; 2777 } 2778 2779 /* 2780 * Allocate an extent (variable-size). 2781 * Depending on the allocation type, we either look in a single allocation 2782 * group or loop over the allocation groups to find the result. 2783 */ 2784 int /* error */ 2785 xfs_alloc_vextent( 2786 xfs_alloc_arg_t *args) /* allocation argument structure */ 2787 { 2788 xfs_agblock_t agsize; /* allocation group size */ 2789 int error; 2790 int flags; /* XFS_ALLOC_FLAG_... locking flags */ 2791 xfs_mount_t *mp; /* mount structure pointer */ 2792 xfs_agnumber_t sagno; /* starting allocation group number */ 2793 xfs_alloctype_t type; /* input allocation type */ 2794 int bump_rotor = 0; 2795 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ 2796 2797 mp = args->mp; 2798 type = args->otype = args->type; 2799 args->agbno = NULLAGBLOCK; 2800 /* 2801 * Just fix this up, for the case where the last a.g. is shorter 2802 * (or there's only one a.g.) and the caller couldn't easily figure 2803 * that out (xfs_bmap_alloc). 2804 */ 2805 agsize = mp->m_sb.sb_agblocks; 2806 if (args->maxlen > agsize) 2807 args->maxlen = agsize; 2808 if (args->alignment == 0) 2809 args->alignment = 1; 2810 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); 2811 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); 2812 ASSERT(args->minlen <= args->maxlen); 2813 ASSERT(args->minlen <= agsize); 2814 ASSERT(args->mod < args->prod); 2815 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || 2816 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || 2817 args->minlen > args->maxlen || args->minlen > agsize || 2818 args->mod >= args->prod) { 2819 args->fsbno = NULLFSBLOCK; 2820 trace_xfs_alloc_vextent_badargs(args); 2821 return 0; 2822 } 2823 2824 switch (type) { 2825 case XFS_ALLOCTYPE_THIS_AG: 2826 case XFS_ALLOCTYPE_NEAR_BNO: 2827 case XFS_ALLOCTYPE_THIS_BNO: 2828 /* 2829 * These three force us into a single a.g. 2830 */ 2831 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2832 args->pag = xfs_perag_get(mp, args->agno); 2833 error = xfs_alloc_fix_freelist(args, 0); 2834 if (error) { 2835 trace_xfs_alloc_vextent_nofix(args); 2836 goto error0; 2837 } 2838 if (!args->agbp) { 2839 trace_xfs_alloc_vextent_noagbp(args); 2840 break; 2841 } 2842 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2843 if ((error = xfs_alloc_ag_vextent(args))) 2844 goto error0; 2845 break; 2846 case XFS_ALLOCTYPE_START_BNO: 2847 /* 2848 * Try near allocation first, then anywhere-in-ag after 2849 * the first a.g. fails. 2850 */ 2851 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) && 2852 (mp->m_flags & XFS_MOUNT_32BITINODES)) { 2853 args->fsbno = XFS_AGB_TO_FSB(mp, 2854 ((mp->m_agfrotor / rotorstep) % 2855 mp->m_sb.sb_agcount), 0); 2856 bump_rotor = 1; 2857 } 2858 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2859 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2860 /* FALLTHROUGH */ 2861 case XFS_ALLOCTYPE_FIRST_AG: 2862 /* 2863 * Rotate through the allocation groups looking for a winner. 2864 */ 2865 if (type == XFS_ALLOCTYPE_FIRST_AG) { 2866 /* 2867 * Start with allocation group given by bno. 2868 */ 2869 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2870 args->type = XFS_ALLOCTYPE_THIS_AG; 2871 sagno = 0; 2872 flags = 0; 2873 } else { 2874 /* 2875 * Start with the given allocation group. 2876 */ 2877 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2878 flags = XFS_ALLOC_FLAG_TRYLOCK; 2879 } 2880 /* 2881 * Loop over allocation groups twice; first time with 2882 * trylock set, second time without. 2883 */ 2884 for (;;) { 2885 args->pag = xfs_perag_get(mp, args->agno); 2886 error = xfs_alloc_fix_freelist(args, flags); 2887 if (error) { 2888 trace_xfs_alloc_vextent_nofix(args); 2889 goto error0; 2890 } 2891 /* 2892 * If we get a buffer back then the allocation will fly. 2893 */ 2894 if (args->agbp) { 2895 if ((error = xfs_alloc_ag_vextent(args))) 2896 goto error0; 2897 break; 2898 } 2899 2900 trace_xfs_alloc_vextent_loopfailed(args); 2901 2902 /* 2903 * Didn't work, figure out the next iteration. 2904 */ 2905 if (args->agno == sagno && 2906 type == XFS_ALLOCTYPE_START_BNO) 2907 args->type = XFS_ALLOCTYPE_THIS_AG; 2908 /* 2909 * For the first allocation, we can try any AG to get 2910 * space. However, if we already have allocated a 2911 * block, we don't want to try AGs whose number is below 2912 * sagno. Otherwise, we may end up with out-of-order 2913 * locking of AGF, which might cause deadlock. 2914 */ 2915 if (++(args->agno) == mp->m_sb.sb_agcount) { 2916 if (args->firstblock != NULLFSBLOCK) 2917 args->agno = sagno; 2918 else 2919 args->agno = 0; 2920 } 2921 /* 2922 * Reached the starting a.g., must either be done 2923 * or switch to non-trylock mode. 2924 */ 2925 if (args->agno == sagno) { 2926 if (flags == 0) { 2927 args->agbno = NULLAGBLOCK; 2928 trace_xfs_alloc_vextent_allfailed(args); 2929 break; 2930 } 2931 2932 flags = 0; 2933 if (type == XFS_ALLOCTYPE_START_BNO) { 2934 args->agbno = XFS_FSB_TO_AGBNO(mp, 2935 args->fsbno); 2936 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2937 } 2938 } 2939 xfs_perag_put(args->pag); 2940 } 2941 if (bump_rotor) { 2942 if (args->agno == sagno) 2943 mp->m_agfrotor = (mp->m_agfrotor + 1) % 2944 (mp->m_sb.sb_agcount * rotorstep); 2945 else 2946 mp->m_agfrotor = (args->agno * rotorstep + 1) % 2947 (mp->m_sb.sb_agcount * rotorstep); 2948 } 2949 break; 2950 default: 2951 ASSERT(0); 2952 /* NOTREACHED */ 2953 } 2954 if (args->agbno == NULLAGBLOCK) 2955 args->fsbno = NULLFSBLOCK; 2956 else { 2957 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); 2958 #ifdef DEBUG 2959 ASSERT(args->len >= args->minlen); 2960 ASSERT(args->len <= args->maxlen); 2961 ASSERT(args->agbno % args->alignment == 0); 2962 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), 2963 args->len); 2964 #endif 2965 2966 /* Zero the extent if we were asked to do so */ 2967 if (args->datatype & XFS_ALLOC_USERDATA_ZERO) { 2968 error = xfs_zero_extent(args->ip, args->fsbno, args->len); 2969 if (error) 2970 goto error0; 2971 } 2972 2973 } 2974 xfs_perag_put(args->pag); 2975 return 0; 2976 error0: 2977 xfs_perag_put(args->pag); 2978 return error; 2979 } 2980 2981 /* Ensure that the freelist is at full capacity. */ 2982 int 2983 xfs_free_extent_fix_freelist( 2984 struct xfs_trans *tp, 2985 xfs_agnumber_t agno, 2986 struct xfs_buf **agbp) 2987 { 2988 struct xfs_alloc_arg args; 2989 int error; 2990 2991 memset(&args, 0, sizeof(struct xfs_alloc_arg)); 2992 args.tp = tp; 2993 args.mp = tp->t_mountp; 2994 args.agno = agno; 2995 2996 /* 2997 * validate that the block number is legal - the enables us to detect 2998 * and handle a silent filesystem corruption rather than crashing. 2999 */ 3000 if (args.agno >= args.mp->m_sb.sb_agcount) 3001 return -EFSCORRUPTED; 3002 3003 args.pag = xfs_perag_get(args.mp, args.agno); 3004 ASSERT(args.pag); 3005 3006 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); 3007 if (error) 3008 goto out; 3009 3010 *agbp = args.agbp; 3011 out: 3012 xfs_perag_put(args.pag); 3013 return error; 3014 } 3015 3016 /* 3017 * Free an extent. 3018 * Just break up the extent address and hand off to xfs_free_ag_extent 3019 * after fixing up the freelist. 3020 */ 3021 int /* error */ 3022 __xfs_free_extent( 3023 struct xfs_trans *tp, /* transaction pointer */ 3024 xfs_fsblock_t bno, /* starting block number of extent */ 3025 xfs_extlen_t len, /* length of extent */ 3026 struct xfs_owner_info *oinfo, /* extent owner */ 3027 enum xfs_ag_resv_type type, /* block reservation type */ 3028 bool skip_discard) 3029 { 3030 struct xfs_mount *mp = tp->t_mountp; 3031 struct xfs_buf *agbp; 3032 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno); 3033 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno); 3034 int error; 3035 unsigned int busy_flags = 0; 3036 3037 ASSERT(len != 0); 3038 ASSERT(type != XFS_AG_RESV_AGFL); 3039 3040 if (XFS_TEST_ERROR(false, mp, 3041 XFS_ERRTAG_FREE_EXTENT)) 3042 return -EIO; 3043 3044 error = xfs_free_extent_fix_freelist(tp, agno, &agbp); 3045 if (error) 3046 return error; 3047 3048 XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err); 3049 3050 /* validate the extent size is legal now we have the agf locked */ 3051 XFS_WANT_CORRUPTED_GOTO(mp, 3052 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), 3053 err); 3054 3055 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type); 3056 if (error) 3057 goto err; 3058 3059 if (skip_discard) 3060 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD; 3061 xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags); 3062 return 0; 3063 3064 err: 3065 xfs_trans_brelse(tp, agbp); 3066 return error; 3067 } 3068 3069 struct xfs_alloc_query_range_info { 3070 xfs_alloc_query_range_fn fn; 3071 void *priv; 3072 }; 3073 3074 /* Format btree record and pass to our callback. */ 3075 STATIC int 3076 xfs_alloc_query_range_helper( 3077 struct xfs_btree_cur *cur, 3078 union xfs_btree_rec *rec, 3079 void *priv) 3080 { 3081 struct xfs_alloc_query_range_info *query = priv; 3082 struct xfs_alloc_rec_incore irec; 3083 3084 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock); 3085 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount); 3086 return query->fn(cur, &irec, query->priv); 3087 } 3088 3089 /* Find all free space within a given range of blocks. */ 3090 int 3091 xfs_alloc_query_range( 3092 struct xfs_btree_cur *cur, 3093 struct xfs_alloc_rec_incore *low_rec, 3094 struct xfs_alloc_rec_incore *high_rec, 3095 xfs_alloc_query_range_fn fn, 3096 void *priv) 3097 { 3098 union xfs_btree_irec low_brec; 3099 union xfs_btree_irec high_brec; 3100 struct xfs_alloc_query_range_info query; 3101 3102 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); 3103 low_brec.a = *low_rec; 3104 high_brec.a = *high_rec; 3105 query.priv = priv; 3106 query.fn = fn; 3107 return xfs_btree_query_range(cur, &low_brec, &high_brec, 3108 xfs_alloc_query_range_helper, &query); 3109 } 3110 3111 /* Find all free space records. */ 3112 int 3113 xfs_alloc_query_all( 3114 struct xfs_btree_cur *cur, 3115 xfs_alloc_query_range_fn fn, 3116 void *priv) 3117 { 3118 struct xfs_alloc_query_range_info query; 3119 3120 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); 3121 query.priv = priv; 3122 query.fn = fn; 3123 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); 3124 } 3125 3126 /* Is there a record covering a given extent? */ 3127 int 3128 xfs_alloc_has_record( 3129 struct xfs_btree_cur *cur, 3130 xfs_agblock_t bno, 3131 xfs_extlen_t len, 3132 bool *exists) 3133 { 3134 union xfs_btree_irec low; 3135 union xfs_btree_irec high; 3136 3137 memset(&low, 0, sizeof(low)); 3138 low.a.ar_startblock = bno; 3139 memset(&high, 0xFF, sizeof(high)); 3140 high.a.ar_startblock = bno + len - 1; 3141 3142 return xfs_btree_has_record(cur, &low, &high, exists); 3143 } 3144 3145 /* 3146 * Walk all the blocks in the AGFL. The @walk_fn can return any negative 3147 * error code or XFS_BTREE_QUERY_RANGE_ABORT. 3148 */ 3149 int 3150 xfs_agfl_walk( 3151 struct xfs_mount *mp, 3152 struct xfs_agf *agf, 3153 struct xfs_buf *agflbp, 3154 xfs_agfl_walk_fn walk_fn, 3155 void *priv) 3156 { 3157 __be32 *agfl_bno; 3158 unsigned int i; 3159 int error; 3160 3161 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 3162 i = be32_to_cpu(agf->agf_flfirst); 3163 3164 /* Nothing to walk in an empty AGFL. */ 3165 if (agf->agf_flcount == cpu_to_be32(0)) 3166 return 0; 3167 3168 /* Otherwise, walk from first to last, wrapping as needed. */ 3169 for (;;) { 3170 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv); 3171 if (error) 3172 return error; 3173 if (i == be32_to_cpu(agf->agf_fllast)) 3174 break; 3175 if (++i == xfs_agfl_size(mp)) 3176 i = 0; 3177 } 3178 3179 return 0; 3180 } 3181