1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_trans.h" 16 #include "xfs_alloc.h" 17 #include "xfs_btree.h" 18 #include "xfs_btree_staging.h" 19 #include "xfs_bmap_btree.h" 20 #include "xfs_bmap.h" 21 #include "xfs_error.h" 22 #include "xfs_quota.h" 23 #include "xfs_trace.h" 24 #include "xfs_rmap.h" 25 #include "xfs_ag.h" 26 27 static struct kmem_cache *xfs_bmbt_cur_cache; 28 29 void 30 xfs_bmbt_init_block( 31 struct xfs_inode *ip, 32 struct xfs_btree_block *buf, 33 struct xfs_buf *bp, 34 __u16 level, 35 __u16 numrecs) 36 { 37 if (bp) 38 xfs_btree_init_buf(ip->i_mount, bp, &xfs_bmbt_ops, level, 39 numrecs, ip->i_ino); 40 else 41 xfs_btree_init_block(ip->i_mount, buf, &xfs_bmbt_ops, level, 42 numrecs, ip->i_ino); 43 } 44 45 /* 46 * Convert on-disk form of btree root to in-memory form. 47 */ 48 void 49 xfs_bmdr_to_bmbt( 50 struct xfs_inode *ip, 51 xfs_bmdr_block_t *dblock, 52 int dblocklen, 53 struct xfs_btree_block *rblock, 54 int rblocklen) 55 { 56 struct xfs_mount *mp = ip->i_mount; 57 int dmxr; 58 xfs_bmbt_key_t *fkp; 59 __be64 *fpp; 60 xfs_bmbt_key_t *tkp; 61 __be64 *tpp; 62 63 xfs_bmbt_init_block(ip, rblock, NULL, 0, 0); 64 rblock->bb_level = dblock->bb_level; 65 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 66 rblock->bb_numrecs = dblock->bb_numrecs; 67 dmxr = xfs_bmdr_maxrecs(dblocklen, 0); 68 fkp = xfs_bmdr_key_addr(dblock, 1); 69 tkp = xfs_bmbt_key_addr(mp, rblock, 1); 70 fpp = xfs_bmdr_ptr_addr(dblock, 1, dmxr); 71 tpp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, rblocklen); 72 dmxr = be16_to_cpu(dblock->bb_numrecs); 73 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 74 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 75 } 76 77 void 78 xfs_bmbt_disk_get_all( 79 const struct xfs_bmbt_rec *rec, 80 struct xfs_bmbt_irec *irec) 81 { 82 uint64_t l0 = get_unaligned_be64(&rec->l0); 83 uint64_t l1 = get_unaligned_be64(&rec->l1); 84 85 irec->br_startoff = (l0 & xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 86 irec->br_startblock = ((l0 & xfs_mask64lo(9)) << 43) | (l1 >> 21); 87 irec->br_blockcount = l1 & xfs_mask64lo(21); 88 if (l0 >> (64 - BMBT_EXNTFLAG_BITLEN)) 89 irec->br_state = XFS_EXT_UNWRITTEN; 90 else 91 irec->br_state = XFS_EXT_NORM; 92 } 93 94 /* 95 * Extract the blockcount field from an on disk bmap extent record. 96 */ 97 xfs_filblks_t 98 xfs_bmbt_disk_get_blockcount( 99 const struct xfs_bmbt_rec *r) 100 { 101 return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21)); 102 } 103 104 /* 105 * Extract the startoff field from a disk format bmap extent record. 106 */ 107 xfs_fileoff_t 108 xfs_bmbt_disk_get_startoff( 109 const struct xfs_bmbt_rec *r) 110 { 111 return ((xfs_fileoff_t)be64_to_cpu(r->l0) & 112 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 113 } 114 115 /* 116 * Set all the fields in a bmap extent record from the uncompressed form. 117 */ 118 void 119 xfs_bmbt_disk_set_all( 120 struct xfs_bmbt_rec *r, 121 struct xfs_bmbt_irec *s) 122 { 123 int extent_flag = (s->br_state != XFS_EXT_NORM); 124 125 ASSERT(s->br_state == XFS_EXT_NORM || s->br_state == XFS_EXT_UNWRITTEN); 126 ASSERT(!(s->br_startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN))); 127 ASSERT(!(s->br_blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN))); 128 ASSERT(!(s->br_startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN))); 129 130 put_unaligned_be64( 131 ((xfs_bmbt_rec_base_t)extent_flag << 63) | 132 ((xfs_bmbt_rec_base_t)s->br_startoff << 9) | 133 ((xfs_bmbt_rec_base_t)s->br_startblock >> 43), &r->l0); 134 put_unaligned_be64( 135 ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | 136 ((xfs_bmbt_rec_base_t)s->br_blockcount & 137 (xfs_bmbt_rec_base_t)xfs_mask64lo(21)), &r->l1); 138 } 139 140 /* 141 * Convert in-memory form of btree root to on-disk form. 142 */ 143 void 144 xfs_bmbt_to_bmdr( 145 struct xfs_mount *mp, 146 struct xfs_btree_block *rblock, 147 int rblocklen, 148 xfs_bmdr_block_t *dblock, 149 int dblocklen) 150 { 151 int dmxr; 152 xfs_bmbt_key_t *fkp; 153 __be64 *fpp; 154 xfs_bmbt_key_t *tkp; 155 __be64 *tpp; 156 157 if (xfs_has_crc(mp)) { 158 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC)); 159 ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, 160 &mp->m_sb.sb_meta_uuid)); 161 ASSERT(rblock->bb_u.l.bb_blkno == 162 cpu_to_be64(XFS_BUF_DADDR_NULL)); 163 } else 164 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); 165 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)); 166 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)); 167 ASSERT(rblock->bb_level != 0); 168 dblock->bb_level = rblock->bb_level; 169 dblock->bb_numrecs = rblock->bb_numrecs; 170 dmxr = xfs_bmdr_maxrecs(dblocklen, 0); 171 fkp = xfs_bmbt_key_addr(mp, rblock, 1); 172 tkp = xfs_bmdr_key_addr(dblock, 1); 173 fpp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, rblocklen); 174 tpp = xfs_bmdr_ptr_addr(dblock, 1, dmxr); 175 dmxr = be16_to_cpu(dblock->bb_numrecs); 176 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 177 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 178 } 179 180 STATIC struct xfs_btree_cur * 181 xfs_bmbt_dup_cursor( 182 struct xfs_btree_cur *cur) 183 { 184 struct xfs_btree_cur *new; 185 186 new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp, 187 cur->bc_ino.ip, cur->bc_ino.whichfork); 188 new->bc_flags |= (cur->bc_flags & 189 (XFS_BTREE_BMBT_INVALID_OWNER | XFS_BTREE_BMBT_WASDEL)); 190 return new; 191 } 192 193 STATIC void 194 xfs_bmbt_update_cursor( 195 struct xfs_btree_cur *src, 196 struct xfs_btree_cur *dst) 197 { 198 ASSERT((dst->bc_tp->t_highest_agno != NULLAGNUMBER) || 199 (dst->bc_ino.ip->i_diflags & XFS_DIFLAG_REALTIME)); 200 201 dst->bc_bmap.allocated += src->bc_bmap.allocated; 202 dst->bc_tp->t_highest_agno = src->bc_tp->t_highest_agno; 203 204 src->bc_bmap.allocated = 0; 205 } 206 207 STATIC int 208 xfs_bmbt_alloc_block( 209 struct xfs_btree_cur *cur, 210 const union xfs_btree_ptr *start, 211 union xfs_btree_ptr *new, 212 int *stat) 213 { 214 struct xfs_alloc_arg args; 215 int error; 216 217 memset(&args, 0, sizeof(args)); 218 args.tp = cur->bc_tp; 219 args.mp = cur->bc_mp; 220 xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_ino.ip->i_ino, 221 cur->bc_ino.whichfork); 222 args.minlen = args.maxlen = args.prod = 1; 223 args.wasdel = cur->bc_flags & XFS_BTREE_BMBT_WASDEL; 224 if (!args.wasdel && args.tp->t_blk_res == 0) 225 return -ENOSPC; 226 227 /* 228 * If we are coming here from something like unwritten extent 229 * conversion, there has been no data extent allocation already done, so 230 * we have to ensure that we attempt to locate the entire set of bmbt 231 * allocations in the same AG, as xfs_bmapi_write() would have reserved. 232 */ 233 if (cur->bc_tp->t_highest_agno == NULLAGNUMBER) 234 args.minleft = xfs_bmapi_minleft(cur->bc_tp, cur->bc_ino.ip, 235 cur->bc_ino.whichfork); 236 237 error = xfs_alloc_vextent_start_ag(&args, be64_to_cpu(start->l)); 238 if (error) 239 return error; 240 241 if (args.fsbno == NULLFSBLOCK && args.minleft) { 242 /* 243 * Could not find an AG with enough free space to satisfy 244 * a full btree split. Try again and if 245 * successful activate the lowspace algorithm. 246 */ 247 args.minleft = 0; 248 error = xfs_alloc_vextent_start_ag(&args, 0); 249 if (error) 250 return error; 251 cur->bc_tp->t_flags |= XFS_TRANS_LOWMODE; 252 } 253 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { 254 *stat = 0; 255 return 0; 256 } 257 258 ASSERT(args.len == 1); 259 cur->bc_bmap.allocated++; 260 cur->bc_ino.ip->i_nblocks++; 261 xfs_trans_log_inode(args.tp, cur->bc_ino.ip, XFS_ILOG_CORE); 262 xfs_trans_mod_dquot_byino(args.tp, cur->bc_ino.ip, 263 XFS_TRANS_DQ_BCOUNT, 1L); 264 265 new->l = cpu_to_be64(args.fsbno); 266 267 *stat = 1; 268 return 0; 269 } 270 271 STATIC int 272 xfs_bmbt_free_block( 273 struct xfs_btree_cur *cur, 274 struct xfs_buf *bp) 275 { 276 struct xfs_mount *mp = cur->bc_mp; 277 struct xfs_inode *ip = cur->bc_ino.ip; 278 struct xfs_trans *tp = cur->bc_tp; 279 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); 280 struct xfs_owner_info oinfo; 281 int error; 282 283 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); 284 error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, 285 XFS_AG_RESV_NONE, 0); 286 if (error) 287 return error; 288 289 ip->i_nblocks--; 290 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 291 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 292 return 0; 293 } 294 295 STATIC int 296 xfs_bmbt_get_minrecs( 297 struct xfs_btree_cur *cur, 298 int level) 299 { 300 if (level == cur->bc_nlevels - 1) { 301 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 302 303 return xfs_bmbt_maxrecs(cur->bc_mp, 304 ifp->if_broot_bytes, level == 0) / 2; 305 } 306 307 return cur->bc_mp->m_bmap_dmnr[level != 0]; 308 } 309 310 int 311 xfs_bmbt_get_maxrecs( 312 struct xfs_btree_cur *cur, 313 int level) 314 { 315 if (level == cur->bc_nlevels - 1) { 316 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 317 318 return xfs_bmbt_maxrecs(cur->bc_mp, 319 ifp->if_broot_bytes, level == 0); 320 } 321 322 return cur->bc_mp->m_bmap_dmxr[level != 0]; 323 324 } 325 326 /* 327 * Get the maximum records we could store in the on-disk format. 328 * 329 * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but 330 * for the root node this checks the available space in the dinode fork 331 * so that we can resize the in-memory buffer to match it. After a 332 * resize to the maximum size this function returns the same value 333 * as xfs_bmbt_get_maxrecs for the root node, too. 334 */ 335 STATIC int 336 xfs_bmbt_get_dmaxrecs( 337 struct xfs_btree_cur *cur, 338 int level) 339 { 340 if (level != cur->bc_nlevels - 1) 341 return cur->bc_mp->m_bmap_dmxr[level != 0]; 342 return xfs_bmdr_maxrecs(cur->bc_ino.forksize, level == 0); 343 } 344 345 STATIC void 346 xfs_bmbt_init_key_from_rec( 347 union xfs_btree_key *key, 348 const union xfs_btree_rec *rec) 349 { 350 key->bmbt.br_startoff = 351 cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt)); 352 } 353 354 STATIC void 355 xfs_bmbt_init_high_key_from_rec( 356 union xfs_btree_key *key, 357 const union xfs_btree_rec *rec) 358 { 359 key->bmbt.br_startoff = cpu_to_be64( 360 xfs_bmbt_disk_get_startoff(&rec->bmbt) + 361 xfs_bmbt_disk_get_blockcount(&rec->bmbt) - 1); 362 } 363 364 STATIC void 365 xfs_bmbt_init_rec_from_cur( 366 struct xfs_btree_cur *cur, 367 union xfs_btree_rec *rec) 368 { 369 xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b); 370 } 371 372 STATIC int64_t 373 xfs_bmbt_key_diff( 374 struct xfs_btree_cur *cur, 375 const union xfs_btree_key *key) 376 { 377 return (int64_t)be64_to_cpu(key->bmbt.br_startoff) - 378 cur->bc_rec.b.br_startoff; 379 } 380 381 STATIC int64_t 382 xfs_bmbt_diff_two_keys( 383 struct xfs_btree_cur *cur, 384 const union xfs_btree_key *k1, 385 const union xfs_btree_key *k2, 386 const union xfs_btree_key *mask) 387 { 388 uint64_t a = be64_to_cpu(k1->bmbt.br_startoff); 389 uint64_t b = be64_to_cpu(k2->bmbt.br_startoff); 390 391 ASSERT(!mask || mask->bmbt.br_startoff); 392 393 /* 394 * Note: This routine previously casted a and b to int64 and subtracted 395 * them to generate a result. This lead to problems if b was the 396 * "maximum" key value (all ones) being signed incorrectly, hence this 397 * somewhat less efficient version. 398 */ 399 if (a > b) 400 return 1; 401 if (b > a) 402 return -1; 403 return 0; 404 } 405 406 static xfs_failaddr_t 407 xfs_bmbt_verify( 408 struct xfs_buf *bp) 409 { 410 struct xfs_mount *mp = bp->b_mount; 411 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 412 xfs_failaddr_t fa; 413 unsigned int level; 414 415 if (!xfs_verify_magic(bp, block->bb_magic)) 416 return __this_address; 417 418 if (xfs_has_crc(mp)) { 419 /* 420 * XXX: need a better way of verifying the owner here. Right now 421 * just make sure there has been one set. 422 */ 423 fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); 424 if (fa) 425 return fa; 426 } 427 428 /* 429 * numrecs and level verification. 430 * 431 * We don't know what fork we belong to, so just verify that the level 432 * is less than the maximum of the two. Later checks will be more 433 * precise. 434 */ 435 level = be16_to_cpu(block->bb_level); 436 if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) 437 return __this_address; 438 439 return xfs_btree_fsblock_verify(bp, mp->m_bmap_dmxr[level != 0]); 440 } 441 442 static void 443 xfs_bmbt_read_verify( 444 struct xfs_buf *bp) 445 { 446 xfs_failaddr_t fa; 447 448 if (!xfs_btree_fsblock_verify_crc(bp)) 449 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 450 else { 451 fa = xfs_bmbt_verify(bp); 452 if (fa) 453 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 454 } 455 456 if (bp->b_error) 457 trace_xfs_btree_corrupt(bp, _RET_IP_); 458 } 459 460 static void 461 xfs_bmbt_write_verify( 462 struct xfs_buf *bp) 463 { 464 xfs_failaddr_t fa; 465 466 fa = xfs_bmbt_verify(bp); 467 if (fa) { 468 trace_xfs_btree_corrupt(bp, _RET_IP_); 469 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 470 return; 471 } 472 xfs_btree_fsblock_calc_crc(bp); 473 } 474 475 const struct xfs_buf_ops xfs_bmbt_buf_ops = { 476 .name = "xfs_bmbt", 477 .magic = { cpu_to_be32(XFS_BMAP_MAGIC), 478 cpu_to_be32(XFS_BMAP_CRC_MAGIC) }, 479 .verify_read = xfs_bmbt_read_verify, 480 .verify_write = xfs_bmbt_write_verify, 481 .verify_struct = xfs_bmbt_verify, 482 }; 483 484 485 STATIC int 486 xfs_bmbt_keys_inorder( 487 struct xfs_btree_cur *cur, 488 const union xfs_btree_key *k1, 489 const union xfs_btree_key *k2) 490 { 491 return be64_to_cpu(k1->bmbt.br_startoff) < 492 be64_to_cpu(k2->bmbt.br_startoff); 493 } 494 495 STATIC int 496 xfs_bmbt_recs_inorder( 497 struct xfs_btree_cur *cur, 498 const union xfs_btree_rec *r1, 499 const union xfs_btree_rec *r2) 500 { 501 return xfs_bmbt_disk_get_startoff(&r1->bmbt) + 502 xfs_bmbt_disk_get_blockcount(&r1->bmbt) <= 503 xfs_bmbt_disk_get_startoff(&r2->bmbt); 504 } 505 506 STATIC enum xbtree_key_contig 507 xfs_bmbt_keys_contiguous( 508 struct xfs_btree_cur *cur, 509 const union xfs_btree_key *key1, 510 const union xfs_btree_key *key2, 511 const union xfs_btree_key *mask) 512 { 513 ASSERT(!mask || mask->bmbt.br_startoff); 514 515 return xbtree_key_contig(be64_to_cpu(key1->bmbt.br_startoff), 516 be64_to_cpu(key2->bmbt.br_startoff)); 517 } 518 519 static inline void 520 xfs_bmbt_move_ptrs( 521 struct xfs_mount *mp, 522 struct xfs_btree_block *broot, 523 short old_size, 524 size_t new_size, 525 unsigned int numrecs) 526 { 527 void *dptr; 528 void *sptr; 529 530 sptr = xfs_bmap_broot_ptr_addr(mp, broot, 1, old_size); 531 dptr = xfs_bmap_broot_ptr_addr(mp, broot, 1, new_size); 532 memmove(dptr, sptr, numrecs * sizeof(xfs_bmbt_ptr_t)); 533 } 534 535 /* 536 * Reallocate the space for if_broot based on the number of records. Move the 537 * records and pointers in if_broot to fit the new size. When shrinking this 538 * will eliminate holes between the records and pointers created by the caller. 539 * When growing this will create holes to be filled in by the caller. 540 * 541 * The caller must not request to add more records than would fit in the 542 * on-disk inode root. If the if_broot is currently NULL, then if we are 543 * adding records, one will be allocated. The caller must also not request 544 * that the number of records go below zero, although it can go to zero. 545 * 546 * ip -- the inode whose if_broot area is changing 547 * whichfork -- which inode fork to change 548 * new_numrecs -- the new number of records requested for the if_broot array 549 * 550 * Returns the incore btree root block. 551 */ 552 struct xfs_btree_block * 553 xfs_bmap_broot_realloc( 554 struct xfs_inode *ip, 555 int whichfork, 556 unsigned int new_numrecs) 557 { 558 struct xfs_mount *mp = ip->i_mount; 559 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 560 struct xfs_btree_block *broot; 561 unsigned int new_size; 562 unsigned int old_size = ifp->if_broot_bytes; 563 564 /* 565 * Block mapping btrees do not support storing zero records; if this 566 * happens, the fork is being changed to FMT_EXTENTS. Free the broot 567 * and get out. 568 */ 569 if (new_numrecs == 0) 570 return xfs_broot_realloc(ifp, 0); 571 572 new_size = xfs_bmap_broot_space_calc(mp, new_numrecs); 573 574 /* Handle the nop case quietly. */ 575 if (new_size == old_size) 576 return ifp->if_broot; 577 578 if (new_size > old_size) { 579 unsigned int old_numrecs; 580 581 /* 582 * If there wasn't any memory allocated before, just 583 * allocate it now and get out. 584 */ 585 if (old_size == 0) 586 return xfs_broot_realloc(ifp, new_size); 587 588 /* 589 * If there is already an existing if_broot, then we need 590 * to realloc() it and shift the pointers to their new 591 * location. The records don't change location because 592 * they are kept butted up against the btree block header. 593 */ 594 old_numrecs = xfs_bmbt_maxrecs(mp, old_size, false); 595 broot = xfs_broot_realloc(ifp, new_size); 596 ASSERT(xfs_bmap_bmdr_space(broot) <= 597 xfs_inode_fork_size(ip, whichfork)); 598 xfs_bmbt_move_ptrs(mp, broot, old_size, new_size, old_numrecs); 599 return broot; 600 } 601 602 /* 603 * We're reducing, but not totally eliminating, numrecs. In this case, 604 * we are shrinking the if_broot buffer, so it must already exist. 605 */ 606 ASSERT(ifp->if_broot != NULL && old_size > 0 && new_size > 0); 607 608 /* 609 * Shrink the btree root by moving the bmbt pointers, since they are 610 * not butted up against the btree block header, then reallocating 611 * broot. 612 */ 613 xfs_bmbt_move_ptrs(mp, ifp->if_broot, old_size, new_size, new_numrecs); 614 broot = xfs_broot_realloc(ifp, new_size); 615 ASSERT(xfs_bmap_bmdr_space(broot) <= 616 xfs_inode_fork_size(ip, whichfork)); 617 return broot; 618 } 619 620 static struct xfs_btree_block * 621 xfs_bmbt_broot_realloc( 622 struct xfs_btree_cur *cur, 623 unsigned int new_numrecs) 624 { 625 return xfs_bmap_broot_realloc(cur->bc_ino.ip, cur->bc_ino.whichfork, 626 new_numrecs); 627 } 628 629 const struct xfs_btree_ops xfs_bmbt_ops = { 630 .name = "bmap", 631 .type = XFS_BTREE_TYPE_INODE, 632 633 .rec_len = sizeof(xfs_bmbt_rec_t), 634 .key_len = sizeof(xfs_bmbt_key_t), 635 .ptr_len = XFS_BTREE_LONG_PTR_LEN, 636 637 .lru_refs = XFS_BMAP_BTREE_REF, 638 .statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2), 639 640 .dup_cursor = xfs_bmbt_dup_cursor, 641 .update_cursor = xfs_bmbt_update_cursor, 642 .alloc_block = xfs_bmbt_alloc_block, 643 .free_block = xfs_bmbt_free_block, 644 .get_maxrecs = xfs_bmbt_get_maxrecs, 645 .get_minrecs = xfs_bmbt_get_minrecs, 646 .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, 647 .init_key_from_rec = xfs_bmbt_init_key_from_rec, 648 .init_high_key_from_rec = xfs_bmbt_init_high_key_from_rec, 649 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, 650 .key_diff = xfs_bmbt_key_diff, 651 .diff_two_keys = xfs_bmbt_diff_two_keys, 652 .buf_ops = &xfs_bmbt_buf_ops, 653 .keys_inorder = xfs_bmbt_keys_inorder, 654 .recs_inorder = xfs_bmbt_recs_inorder, 655 .keys_contiguous = xfs_bmbt_keys_contiguous, 656 .broot_realloc = xfs_bmbt_broot_realloc, 657 }; 658 659 /* 660 * Create a new bmap btree cursor. 661 * 662 * For staging cursors -1 in passed in whichfork. 663 */ 664 struct xfs_btree_cur * 665 xfs_bmbt_init_cursor( 666 struct xfs_mount *mp, 667 struct xfs_trans *tp, 668 struct xfs_inode *ip, 669 int whichfork) 670 { 671 struct xfs_btree_cur *cur; 672 unsigned int maxlevels; 673 674 ASSERT(whichfork != XFS_COW_FORK); 675 676 /* 677 * The Data fork always has larger maxlevel, so use that for staging 678 * cursors. 679 */ 680 switch (whichfork) { 681 case XFS_STAGING_FORK: 682 maxlevels = mp->m_bm_maxlevels[XFS_DATA_FORK]; 683 break; 684 default: 685 maxlevels = mp->m_bm_maxlevels[whichfork]; 686 break; 687 } 688 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bmbt_ops, maxlevels, 689 xfs_bmbt_cur_cache); 690 cur->bc_ino.ip = ip; 691 cur->bc_ino.whichfork = whichfork; 692 cur->bc_bmap.allocated = 0; 693 if (whichfork != XFS_STAGING_FORK) { 694 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 695 696 cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; 697 cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork); 698 } 699 return cur; 700 } 701 702 /* Calculate number of records in a block mapping btree block. */ 703 static inline unsigned int 704 xfs_bmbt_block_maxrecs( 705 unsigned int blocklen, 706 bool leaf) 707 { 708 if (leaf) 709 return blocklen / sizeof(xfs_bmbt_rec_t); 710 return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); 711 } 712 713 /* 714 * Swap in the new inode fork root. Once we pass this point the newly rebuilt 715 * mappings are in place and we have to kill off any old btree blocks. 716 */ 717 void 718 xfs_bmbt_commit_staged_btree( 719 struct xfs_btree_cur *cur, 720 struct xfs_trans *tp, 721 int whichfork) 722 { 723 struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake; 724 struct xfs_ifork *ifp; 725 static const short brootflag[2] = {XFS_ILOG_DBROOT, XFS_ILOG_ABROOT}; 726 static const short extflag[2] = {XFS_ILOG_DEXT, XFS_ILOG_AEXT}; 727 int flags = XFS_ILOG_CORE; 728 729 ASSERT(cur->bc_flags & XFS_BTREE_STAGING); 730 ASSERT(whichfork != XFS_COW_FORK); 731 732 /* 733 * Free any resources hanging off the real fork, then shallow-copy the 734 * staging fork's contents into the real fork to transfer everything 735 * we just built. 736 */ 737 ifp = xfs_ifork_ptr(cur->bc_ino.ip, whichfork); 738 xfs_idestroy_fork(ifp); 739 memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork)); 740 741 switch (ifp->if_format) { 742 case XFS_DINODE_FMT_EXTENTS: 743 flags |= extflag[whichfork]; 744 break; 745 case XFS_DINODE_FMT_BTREE: 746 flags |= brootflag[whichfork]; 747 break; 748 default: 749 ASSERT(0); 750 break; 751 } 752 xfs_trans_log_inode(tp, cur->bc_ino.ip, flags); 753 xfs_btree_commit_ifakeroot(cur, tp, whichfork); 754 } 755 756 /* 757 * Calculate number of records in a bmap btree block. 758 */ 759 unsigned int 760 xfs_bmbt_maxrecs( 761 struct xfs_mount *mp, 762 unsigned int blocklen, 763 bool leaf) 764 { 765 blocklen -= xfs_bmbt_block_len(mp); 766 return xfs_bmbt_block_maxrecs(blocklen, leaf); 767 } 768 769 /* 770 * Calculate the maximum possible height of the btree that the on-disk format 771 * supports. This is used for sizing structures large enough to support every 772 * possible configuration of a filesystem that might get mounted. 773 */ 774 unsigned int 775 xfs_bmbt_maxlevels_ondisk(void) 776 { 777 unsigned int minrecs[2]; 778 unsigned int blocklen; 779 780 blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, 781 XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); 782 783 minrecs[0] = xfs_bmbt_block_maxrecs(blocklen, true) / 2; 784 minrecs[1] = xfs_bmbt_block_maxrecs(blocklen, false) / 2; 785 786 /* One extra level for the inode root. */ 787 return xfs_btree_compute_maxlevels(minrecs, 788 XFS_MAX_EXTCNT_DATA_FORK_LARGE) + 1; 789 } 790 791 /* 792 * Calculate number of records in a bmap btree inode root. 793 */ 794 int 795 xfs_bmdr_maxrecs( 796 int blocklen, 797 int leaf) 798 { 799 blocklen -= sizeof(xfs_bmdr_block_t); 800 801 if (leaf) 802 return blocklen / sizeof(xfs_bmdr_rec_t); 803 return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t)); 804 } 805 806 /* 807 * Change the owner of a btree format fork fo the inode passed in. Change it to 808 * the owner of that is passed in so that we can change owners before or after 809 * we switch forks between inodes. The operation that the caller is doing will 810 * determine whether is needs to change owner before or after the switch. 811 * 812 * For demand paged transactional modification, the fork switch should be done 813 * after reading in all the blocks, modifying them and pinning them in the 814 * transaction. For modification when the buffers are already pinned in memory, 815 * the fork switch can be done before changing the owner as we won't need to 816 * validate the owner until the btree buffers are unpinned and writes can occur 817 * again. 818 * 819 * For recovery based ownership change, there is no transactional context and 820 * so a buffer list must be supplied so that we can record the buffers that we 821 * modified for the caller to issue IO on. 822 */ 823 int 824 xfs_bmbt_change_owner( 825 struct xfs_trans *tp, 826 struct xfs_inode *ip, 827 int whichfork, 828 xfs_ino_t new_owner, 829 struct list_head *buffer_list) 830 { 831 struct xfs_btree_cur *cur; 832 int error; 833 834 ASSERT(tp || buffer_list); 835 ASSERT(!(tp && buffer_list)); 836 ASSERT(xfs_ifork_ptr(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE); 837 838 cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); 839 cur->bc_flags |= XFS_BTREE_BMBT_INVALID_OWNER; 840 841 error = xfs_btree_change_owner(cur, new_owner, buffer_list); 842 xfs_btree_del_cursor(cur, error); 843 return error; 844 } 845 846 /* Calculate the bmap btree size for some records. */ 847 unsigned long long 848 xfs_bmbt_calc_size( 849 struct xfs_mount *mp, 850 unsigned long long len) 851 { 852 return xfs_btree_calc_size(mp->m_bmap_dmnr, len); 853 } 854 855 int __init 856 xfs_bmbt_init_cur_cache(void) 857 { 858 xfs_bmbt_cur_cache = kmem_cache_create("xfs_bmbt_cur", 859 xfs_btree_cur_sizeof(xfs_bmbt_maxlevels_ondisk()), 860 0, 0, NULL); 861 862 if (!xfs_bmbt_cur_cache) 863 return -ENOMEM; 864 return 0; 865 } 866 867 void 868 xfs_bmbt_destroy_cur_cache(void) 869 { 870 kmem_cache_destroy(xfs_bmbt_cur_cache); 871 xfs_bmbt_cur_cache = NULL; 872 } 873