1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_trans.h" 16 #include "xfs_alloc.h" 17 #include "xfs_btree.h" 18 #include "xfs_btree_staging.h" 19 #include "xfs_bmap_btree.h" 20 #include "xfs_bmap.h" 21 #include "xfs_error.h" 22 #include "xfs_quota.h" 23 #include "xfs_trace.h" 24 #include "xfs_rmap.h" 25 #include "xfs_ag.h" 26 27 static struct kmem_cache *xfs_bmbt_cur_cache; 28 29 void 30 xfs_bmbt_init_block( 31 struct xfs_inode *ip, 32 struct xfs_btree_block *buf, 33 struct xfs_buf *bp, 34 __u16 level, 35 __u16 numrecs) 36 { 37 if (bp) 38 xfs_btree_init_buf(ip->i_mount, bp, &xfs_bmbt_ops, level, 39 numrecs, ip->i_ino); 40 else 41 xfs_btree_init_block(ip->i_mount, buf, &xfs_bmbt_ops, level, 42 numrecs, ip->i_ino); 43 } 44 45 /* 46 * Convert on-disk form of btree root to in-memory form. 47 */ 48 void 49 xfs_bmdr_to_bmbt( 50 struct xfs_inode *ip, 51 xfs_bmdr_block_t *dblock, 52 int dblocklen, 53 struct xfs_btree_block *rblock, 54 int rblocklen) 55 { 56 struct xfs_mount *mp = ip->i_mount; 57 int dmxr; 58 xfs_bmbt_key_t *fkp; 59 __be64 *fpp; 60 xfs_bmbt_key_t *tkp; 61 __be64 *tpp; 62 63 xfs_bmbt_init_block(ip, rblock, NULL, 0, 0); 64 rblock->bb_level = dblock->bb_level; 65 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 66 rblock->bb_numrecs = dblock->bb_numrecs; 67 dmxr = xfs_bmdr_maxrecs(dblocklen, 0); 68 fkp = xfs_bmdr_key_addr(dblock, 1); 69 tkp = xfs_bmbt_key_addr(mp, rblock, 1); 70 fpp = xfs_bmdr_ptr_addr(dblock, 1, dmxr); 71 tpp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, rblocklen); 72 dmxr = be16_to_cpu(dblock->bb_numrecs); 73 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 74 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 75 } 76 77 void 78 xfs_bmbt_disk_get_all( 79 const struct xfs_bmbt_rec *rec, 80 struct xfs_bmbt_irec *irec) 81 { 82 uint64_t l0 = get_unaligned_be64(&rec->l0); 83 uint64_t l1 = get_unaligned_be64(&rec->l1); 84 85 irec->br_startoff = (l0 & xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 86 irec->br_startblock = ((l0 & xfs_mask64lo(9)) << 43) | (l1 >> 21); 87 irec->br_blockcount = l1 & xfs_mask64lo(21); 88 if (l0 >> (64 - BMBT_EXNTFLAG_BITLEN)) 89 irec->br_state = XFS_EXT_UNWRITTEN; 90 else 91 irec->br_state = XFS_EXT_NORM; 92 } 93 94 /* 95 * Extract the blockcount field from an on disk bmap extent record. 96 */ 97 xfs_filblks_t 98 xfs_bmbt_disk_get_blockcount( 99 const struct xfs_bmbt_rec *r) 100 { 101 return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21)); 102 } 103 104 /* 105 * Extract the startoff field from a disk format bmap extent record. 106 */ 107 xfs_fileoff_t 108 xfs_bmbt_disk_get_startoff( 109 const struct xfs_bmbt_rec *r) 110 { 111 return ((xfs_fileoff_t)be64_to_cpu(r->l0) & 112 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 113 } 114 115 /* 116 * Set all the fields in a bmap extent record from the uncompressed form. 117 */ 118 void 119 xfs_bmbt_disk_set_all( 120 struct xfs_bmbt_rec *r, 121 struct xfs_bmbt_irec *s) 122 { 123 int extent_flag = (s->br_state != XFS_EXT_NORM); 124 125 ASSERT(s->br_state == XFS_EXT_NORM || s->br_state == XFS_EXT_UNWRITTEN); 126 ASSERT(!(s->br_startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN))); 127 ASSERT(!(s->br_blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN))); 128 ASSERT(!(s->br_startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN))); 129 130 put_unaligned_be64( 131 ((xfs_bmbt_rec_base_t)extent_flag << 63) | 132 ((xfs_bmbt_rec_base_t)s->br_startoff << 9) | 133 ((xfs_bmbt_rec_base_t)s->br_startblock >> 43), &r->l0); 134 put_unaligned_be64( 135 ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | 136 ((xfs_bmbt_rec_base_t)s->br_blockcount & 137 (xfs_bmbt_rec_base_t)xfs_mask64lo(21)), &r->l1); 138 } 139 140 /* 141 * Convert in-memory form of btree root to on-disk form. 142 */ 143 void 144 xfs_bmbt_to_bmdr( 145 struct xfs_mount *mp, 146 struct xfs_btree_block *rblock, 147 int rblocklen, 148 xfs_bmdr_block_t *dblock, 149 int dblocklen) 150 { 151 int dmxr; 152 xfs_bmbt_key_t *fkp; 153 __be64 *fpp; 154 xfs_bmbt_key_t *tkp; 155 __be64 *tpp; 156 157 if (xfs_has_crc(mp)) { 158 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC)); 159 ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, 160 &mp->m_sb.sb_meta_uuid)); 161 ASSERT(rblock->bb_u.l.bb_blkno == 162 cpu_to_be64(XFS_BUF_DADDR_NULL)); 163 } else 164 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); 165 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)); 166 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)); 167 ASSERT(rblock->bb_level != 0); 168 dblock->bb_level = rblock->bb_level; 169 dblock->bb_numrecs = rblock->bb_numrecs; 170 dmxr = xfs_bmdr_maxrecs(dblocklen, 0); 171 fkp = xfs_bmbt_key_addr(mp, rblock, 1); 172 tkp = xfs_bmdr_key_addr(dblock, 1); 173 fpp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, rblocklen); 174 tpp = xfs_bmdr_ptr_addr(dblock, 1, dmxr); 175 dmxr = be16_to_cpu(dblock->bb_numrecs); 176 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 177 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 178 } 179 180 STATIC struct xfs_btree_cur * 181 xfs_bmbt_dup_cursor( 182 struct xfs_btree_cur *cur) 183 { 184 struct xfs_btree_cur *new; 185 186 new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp, 187 cur->bc_ino.ip, cur->bc_ino.whichfork); 188 new->bc_flags |= (cur->bc_flags & 189 (XFS_BTREE_BMBT_INVALID_OWNER | XFS_BTREE_BMBT_WASDEL)); 190 return new; 191 } 192 193 STATIC void 194 xfs_bmbt_update_cursor( 195 struct xfs_btree_cur *src, 196 struct xfs_btree_cur *dst) 197 { 198 ASSERT((dst->bc_tp->t_highest_agno != NULLAGNUMBER) || 199 (dst->bc_ino.ip->i_diflags & XFS_DIFLAG_REALTIME)); 200 201 dst->bc_bmap.allocated += src->bc_bmap.allocated; 202 dst->bc_tp->t_highest_agno = src->bc_tp->t_highest_agno; 203 204 src->bc_bmap.allocated = 0; 205 } 206 207 STATIC int 208 xfs_bmbt_alloc_block( 209 struct xfs_btree_cur *cur, 210 const union xfs_btree_ptr *start, 211 union xfs_btree_ptr *new, 212 int *stat) 213 { 214 struct xfs_alloc_arg args; 215 int error; 216 217 memset(&args, 0, sizeof(args)); 218 args.tp = cur->bc_tp; 219 args.mp = cur->bc_mp; 220 xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_ino.ip->i_ino, 221 cur->bc_ino.whichfork); 222 args.minlen = args.maxlen = args.prod = 1; 223 args.wasdel = cur->bc_flags & XFS_BTREE_BMBT_WASDEL; 224 if (!args.wasdel && args.tp->t_blk_res == 0) 225 return -ENOSPC; 226 227 /* 228 * If we are coming here from something like unwritten extent 229 * conversion, there has been no data extent allocation already done, so 230 * we have to ensure that we attempt to locate the entire set of bmbt 231 * allocations in the same AG, as xfs_bmapi_write() would have reserved. 232 */ 233 if (cur->bc_tp->t_highest_agno == NULLAGNUMBER) 234 args.minleft = xfs_bmapi_minleft(cur->bc_tp, cur->bc_ino.ip, 235 cur->bc_ino.whichfork); 236 237 error = xfs_alloc_vextent_start_ag(&args, be64_to_cpu(start->l)); 238 if (error) 239 return error; 240 241 if (args.fsbno == NULLFSBLOCK && args.minleft) { 242 /* 243 * Could not find an AG with enough free space to satisfy 244 * a full btree split. Try again and if 245 * successful activate the lowspace algorithm. 246 */ 247 args.minleft = 0; 248 error = xfs_alloc_vextent_start_ag(&args, 0); 249 if (error) 250 return error; 251 cur->bc_tp->t_flags |= XFS_TRANS_LOWMODE; 252 } 253 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { 254 *stat = 0; 255 return 0; 256 } 257 258 ASSERT(args.len == 1); 259 cur->bc_bmap.allocated++; 260 cur->bc_ino.ip->i_nblocks++; 261 xfs_trans_log_inode(args.tp, cur->bc_ino.ip, XFS_ILOG_CORE); 262 xfs_trans_mod_dquot_byino(args.tp, cur->bc_ino.ip, 263 XFS_TRANS_DQ_BCOUNT, 1L); 264 265 new->l = cpu_to_be64(args.fsbno); 266 267 *stat = 1; 268 return 0; 269 } 270 271 STATIC int 272 xfs_bmbt_free_block( 273 struct xfs_btree_cur *cur, 274 struct xfs_buf *bp) 275 { 276 struct xfs_mount *mp = cur->bc_mp; 277 struct xfs_inode *ip = cur->bc_ino.ip; 278 struct xfs_trans *tp = cur->bc_tp; 279 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); 280 struct xfs_owner_info oinfo; 281 int error; 282 283 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); 284 error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, 285 XFS_AG_RESV_NONE, 0); 286 if (error) 287 return error; 288 289 ip->i_nblocks--; 290 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 291 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 292 return 0; 293 } 294 295 STATIC int 296 xfs_bmbt_get_minrecs( 297 struct xfs_btree_cur *cur, 298 int level) 299 { 300 if (level == cur->bc_nlevels - 1) { 301 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 302 303 return xfs_bmbt_maxrecs(cur->bc_mp, 304 ifp->if_broot_bytes, level == 0) / 2; 305 } 306 307 return cur->bc_mp->m_bmap_dmnr[level != 0]; 308 } 309 310 int 311 xfs_bmbt_get_maxrecs( 312 struct xfs_btree_cur *cur, 313 int level) 314 { 315 if (level == cur->bc_nlevels - 1) { 316 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 317 318 return xfs_bmbt_maxrecs(cur->bc_mp, 319 ifp->if_broot_bytes, level == 0); 320 } 321 322 return cur->bc_mp->m_bmap_dmxr[level != 0]; 323 324 } 325 326 /* 327 * Get the maximum records we could store in the on-disk format. 328 * 329 * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but 330 * for the root node this checks the available space in the dinode fork 331 * so that we can resize the in-memory buffer to match it. After a 332 * resize to the maximum size this function returns the same value 333 * as xfs_bmbt_get_maxrecs for the root node, too. 334 */ 335 STATIC int 336 xfs_bmbt_get_dmaxrecs( 337 struct xfs_btree_cur *cur, 338 int level) 339 { 340 if (level != cur->bc_nlevels - 1) 341 return cur->bc_mp->m_bmap_dmxr[level != 0]; 342 return xfs_bmdr_maxrecs(cur->bc_ino.forksize, level == 0); 343 } 344 345 STATIC void 346 xfs_bmbt_init_key_from_rec( 347 union xfs_btree_key *key, 348 const union xfs_btree_rec *rec) 349 { 350 key->bmbt.br_startoff = 351 cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt)); 352 } 353 354 STATIC void 355 xfs_bmbt_init_high_key_from_rec( 356 union xfs_btree_key *key, 357 const union xfs_btree_rec *rec) 358 { 359 key->bmbt.br_startoff = cpu_to_be64( 360 xfs_bmbt_disk_get_startoff(&rec->bmbt) + 361 xfs_bmbt_disk_get_blockcount(&rec->bmbt) - 1); 362 } 363 364 STATIC void 365 xfs_bmbt_init_rec_from_cur( 366 struct xfs_btree_cur *cur, 367 union xfs_btree_rec *rec) 368 { 369 xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b); 370 } 371 372 STATIC int 373 xfs_bmbt_cmp_key_with_cur( 374 struct xfs_btree_cur *cur, 375 const union xfs_btree_key *key) 376 { 377 return cmp_int(be64_to_cpu(key->bmbt.br_startoff), 378 cur->bc_rec.b.br_startoff); 379 } 380 381 STATIC int 382 xfs_bmbt_cmp_two_keys( 383 struct xfs_btree_cur *cur, 384 const union xfs_btree_key *k1, 385 const union xfs_btree_key *k2, 386 const union xfs_btree_key *mask) 387 { 388 ASSERT(!mask || mask->bmbt.br_startoff); 389 390 return cmp_int(be64_to_cpu(k1->bmbt.br_startoff), 391 be64_to_cpu(k2->bmbt.br_startoff)); 392 } 393 394 static xfs_failaddr_t 395 xfs_bmbt_verify( 396 struct xfs_buf *bp) 397 { 398 struct xfs_mount *mp = bp->b_mount; 399 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 400 xfs_failaddr_t fa; 401 unsigned int level; 402 403 if (!xfs_verify_magic(bp, block->bb_magic)) 404 return __this_address; 405 406 if (xfs_has_crc(mp)) { 407 /* 408 * XXX: need a better way of verifying the owner here. Right now 409 * just make sure there has been one set. 410 */ 411 fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); 412 if (fa) 413 return fa; 414 } 415 416 /* 417 * numrecs and level verification. 418 * 419 * We don't know what fork we belong to, so just verify that the level 420 * is less than the maximum of the two. Later checks will be more 421 * precise. 422 */ 423 level = be16_to_cpu(block->bb_level); 424 if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) 425 return __this_address; 426 427 return xfs_btree_fsblock_verify(bp, mp->m_bmap_dmxr[level != 0]); 428 } 429 430 static void 431 xfs_bmbt_read_verify( 432 struct xfs_buf *bp) 433 { 434 xfs_failaddr_t fa; 435 436 if (!xfs_btree_fsblock_verify_crc(bp)) 437 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 438 else { 439 fa = xfs_bmbt_verify(bp); 440 if (fa) 441 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 442 } 443 444 if (bp->b_error) 445 trace_xfs_btree_corrupt(bp, _RET_IP_); 446 } 447 448 static void 449 xfs_bmbt_write_verify( 450 struct xfs_buf *bp) 451 { 452 xfs_failaddr_t fa; 453 454 fa = xfs_bmbt_verify(bp); 455 if (fa) { 456 trace_xfs_btree_corrupt(bp, _RET_IP_); 457 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 458 return; 459 } 460 xfs_btree_fsblock_calc_crc(bp); 461 } 462 463 const struct xfs_buf_ops xfs_bmbt_buf_ops = { 464 .name = "xfs_bmbt", 465 .magic = { cpu_to_be32(XFS_BMAP_MAGIC), 466 cpu_to_be32(XFS_BMAP_CRC_MAGIC) }, 467 .verify_read = xfs_bmbt_read_verify, 468 .verify_write = xfs_bmbt_write_verify, 469 .verify_struct = xfs_bmbt_verify, 470 }; 471 472 473 STATIC int 474 xfs_bmbt_keys_inorder( 475 struct xfs_btree_cur *cur, 476 const union xfs_btree_key *k1, 477 const union xfs_btree_key *k2) 478 { 479 return be64_to_cpu(k1->bmbt.br_startoff) < 480 be64_to_cpu(k2->bmbt.br_startoff); 481 } 482 483 STATIC int 484 xfs_bmbt_recs_inorder( 485 struct xfs_btree_cur *cur, 486 const union xfs_btree_rec *r1, 487 const union xfs_btree_rec *r2) 488 { 489 return xfs_bmbt_disk_get_startoff(&r1->bmbt) + 490 xfs_bmbt_disk_get_blockcount(&r1->bmbt) <= 491 xfs_bmbt_disk_get_startoff(&r2->bmbt); 492 } 493 494 STATIC enum xbtree_key_contig 495 xfs_bmbt_keys_contiguous( 496 struct xfs_btree_cur *cur, 497 const union xfs_btree_key *key1, 498 const union xfs_btree_key *key2, 499 const union xfs_btree_key *mask) 500 { 501 ASSERT(!mask || mask->bmbt.br_startoff); 502 503 return xbtree_key_contig(be64_to_cpu(key1->bmbt.br_startoff), 504 be64_to_cpu(key2->bmbt.br_startoff)); 505 } 506 507 static inline void 508 xfs_bmbt_move_ptrs( 509 struct xfs_mount *mp, 510 struct xfs_btree_block *broot, 511 short old_size, 512 size_t new_size, 513 unsigned int numrecs) 514 { 515 void *dptr; 516 void *sptr; 517 518 sptr = xfs_bmap_broot_ptr_addr(mp, broot, 1, old_size); 519 dptr = xfs_bmap_broot_ptr_addr(mp, broot, 1, new_size); 520 memmove(dptr, sptr, numrecs * sizeof(xfs_bmbt_ptr_t)); 521 } 522 523 /* 524 * Reallocate the space for if_broot based on the number of records. Move the 525 * records and pointers in if_broot to fit the new size. When shrinking this 526 * will eliminate holes between the records and pointers created by the caller. 527 * When growing this will create holes to be filled in by the caller. 528 * 529 * The caller must not request to add more records than would fit in the 530 * on-disk inode root. If the if_broot is currently NULL, then if we are 531 * adding records, one will be allocated. The caller must also not request 532 * that the number of records go below zero, although it can go to zero. 533 * 534 * ip -- the inode whose if_broot area is changing 535 * whichfork -- which inode fork to change 536 * new_numrecs -- the new number of records requested for the if_broot array 537 * 538 * Returns the incore btree root block. 539 */ 540 struct xfs_btree_block * 541 xfs_bmap_broot_realloc( 542 struct xfs_inode *ip, 543 int whichfork, 544 unsigned int new_numrecs) 545 { 546 struct xfs_mount *mp = ip->i_mount; 547 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 548 struct xfs_btree_block *broot; 549 unsigned int new_size; 550 unsigned int old_size = ifp->if_broot_bytes; 551 552 /* 553 * Block mapping btrees do not support storing zero records; if this 554 * happens, the fork is being changed to FMT_EXTENTS. Free the broot 555 * and get out. 556 */ 557 if (new_numrecs == 0) 558 return xfs_broot_realloc(ifp, 0); 559 560 new_size = xfs_bmap_broot_space_calc(mp, new_numrecs); 561 562 /* Handle the nop case quietly. */ 563 if (new_size == old_size) 564 return ifp->if_broot; 565 566 if (new_size > old_size) { 567 unsigned int old_numrecs; 568 569 /* 570 * If there wasn't any memory allocated before, just 571 * allocate it now and get out. 572 */ 573 if (old_size == 0) 574 return xfs_broot_realloc(ifp, new_size); 575 576 /* 577 * If there is already an existing if_broot, then we need 578 * to realloc() it and shift the pointers to their new 579 * location. The records don't change location because 580 * they are kept butted up against the btree block header. 581 */ 582 old_numrecs = xfs_bmbt_maxrecs(mp, old_size, false); 583 broot = xfs_broot_realloc(ifp, new_size); 584 ASSERT(xfs_bmap_bmdr_space(broot) <= 585 xfs_inode_fork_size(ip, whichfork)); 586 xfs_bmbt_move_ptrs(mp, broot, old_size, new_size, old_numrecs); 587 return broot; 588 } 589 590 /* 591 * We're reducing, but not totally eliminating, numrecs. In this case, 592 * we are shrinking the if_broot buffer, so it must already exist. 593 */ 594 ASSERT(ifp->if_broot != NULL && old_size > 0 && new_size > 0); 595 596 /* 597 * Shrink the btree root by moving the bmbt pointers, since they are 598 * not butted up against the btree block header, then reallocating 599 * broot. 600 */ 601 xfs_bmbt_move_ptrs(mp, ifp->if_broot, old_size, new_size, new_numrecs); 602 broot = xfs_broot_realloc(ifp, new_size); 603 ASSERT(xfs_bmap_bmdr_space(broot) <= 604 xfs_inode_fork_size(ip, whichfork)); 605 return broot; 606 } 607 608 static struct xfs_btree_block * 609 xfs_bmbt_broot_realloc( 610 struct xfs_btree_cur *cur, 611 unsigned int new_numrecs) 612 { 613 return xfs_bmap_broot_realloc(cur->bc_ino.ip, cur->bc_ino.whichfork, 614 new_numrecs); 615 } 616 617 const struct xfs_btree_ops xfs_bmbt_ops = { 618 .name = "bmap", 619 .type = XFS_BTREE_TYPE_INODE, 620 621 .rec_len = sizeof(xfs_bmbt_rec_t), 622 .key_len = sizeof(xfs_bmbt_key_t), 623 .ptr_len = XFS_BTREE_LONG_PTR_LEN, 624 625 .lru_refs = XFS_BMAP_BTREE_REF, 626 .statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2), 627 628 .dup_cursor = xfs_bmbt_dup_cursor, 629 .update_cursor = xfs_bmbt_update_cursor, 630 .alloc_block = xfs_bmbt_alloc_block, 631 .free_block = xfs_bmbt_free_block, 632 .get_maxrecs = xfs_bmbt_get_maxrecs, 633 .get_minrecs = xfs_bmbt_get_minrecs, 634 .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, 635 .init_key_from_rec = xfs_bmbt_init_key_from_rec, 636 .init_high_key_from_rec = xfs_bmbt_init_high_key_from_rec, 637 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, 638 .cmp_key_with_cur = xfs_bmbt_cmp_key_with_cur, 639 .cmp_two_keys = xfs_bmbt_cmp_two_keys, 640 .buf_ops = &xfs_bmbt_buf_ops, 641 .keys_inorder = xfs_bmbt_keys_inorder, 642 .recs_inorder = xfs_bmbt_recs_inorder, 643 .keys_contiguous = xfs_bmbt_keys_contiguous, 644 .broot_realloc = xfs_bmbt_broot_realloc, 645 }; 646 647 /* 648 * Create a new bmap btree cursor. 649 * 650 * For staging cursors -1 in passed in whichfork. 651 */ 652 struct xfs_btree_cur * 653 xfs_bmbt_init_cursor( 654 struct xfs_mount *mp, 655 struct xfs_trans *tp, 656 struct xfs_inode *ip, 657 int whichfork) 658 { 659 struct xfs_btree_cur *cur; 660 unsigned int maxlevels; 661 662 ASSERT(whichfork != XFS_COW_FORK); 663 664 /* 665 * The Data fork always has larger maxlevel, so use that for staging 666 * cursors. 667 */ 668 switch (whichfork) { 669 case XFS_STAGING_FORK: 670 maxlevels = mp->m_bm_maxlevels[XFS_DATA_FORK]; 671 break; 672 default: 673 maxlevels = mp->m_bm_maxlevels[whichfork]; 674 break; 675 } 676 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bmbt_ops, maxlevels, 677 xfs_bmbt_cur_cache); 678 cur->bc_ino.ip = ip; 679 cur->bc_ino.whichfork = whichfork; 680 cur->bc_bmap.allocated = 0; 681 if (whichfork != XFS_STAGING_FORK) { 682 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 683 684 cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; 685 cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork); 686 } 687 return cur; 688 } 689 690 /* Calculate number of records in a block mapping btree block. */ 691 static inline unsigned int 692 xfs_bmbt_block_maxrecs( 693 unsigned int blocklen, 694 bool leaf) 695 { 696 if (leaf) 697 return blocklen / sizeof(xfs_bmbt_rec_t); 698 return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); 699 } 700 701 /* 702 * Swap in the new inode fork root. Once we pass this point the newly rebuilt 703 * mappings are in place and we have to kill off any old btree blocks. 704 */ 705 void 706 xfs_bmbt_commit_staged_btree( 707 struct xfs_btree_cur *cur, 708 struct xfs_trans *tp, 709 int whichfork) 710 { 711 struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake; 712 struct xfs_ifork *ifp; 713 static const short brootflag[2] = {XFS_ILOG_DBROOT, XFS_ILOG_ABROOT}; 714 static const short extflag[2] = {XFS_ILOG_DEXT, XFS_ILOG_AEXT}; 715 int flags = XFS_ILOG_CORE; 716 717 ASSERT(cur->bc_flags & XFS_BTREE_STAGING); 718 ASSERT(whichfork != XFS_COW_FORK); 719 720 /* 721 * Free any resources hanging off the real fork, then shallow-copy the 722 * staging fork's contents into the real fork to transfer everything 723 * we just built. 724 */ 725 ifp = xfs_ifork_ptr(cur->bc_ino.ip, whichfork); 726 xfs_idestroy_fork(ifp); 727 memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork)); 728 729 switch (ifp->if_format) { 730 case XFS_DINODE_FMT_EXTENTS: 731 flags |= extflag[whichfork]; 732 break; 733 case XFS_DINODE_FMT_BTREE: 734 flags |= brootflag[whichfork]; 735 break; 736 default: 737 ASSERT(0); 738 break; 739 } 740 xfs_trans_log_inode(tp, cur->bc_ino.ip, flags); 741 xfs_btree_commit_ifakeroot(cur, tp, whichfork); 742 } 743 744 /* 745 * Calculate number of records in a bmap btree block. 746 */ 747 unsigned int 748 xfs_bmbt_maxrecs( 749 struct xfs_mount *mp, 750 unsigned int blocklen, 751 bool leaf) 752 { 753 blocklen -= xfs_bmbt_block_len(mp); 754 return xfs_bmbt_block_maxrecs(blocklen, leaf); 755 } 756 757 /* 758 * Calculate the maximum possible height of the btree that the on-disk format 759 * supports. This is used for sizing structures large enough to support every 760 * possible configuration of a filesystem that might get mounted. 761 */ 762 unsigned int 763 xfs_bmbt_maxlevels_ondisk(void) 764 { 765 unsigned int minrecs[2]; 766 unsigned int blocklen; 767 768 blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, 769 XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); 770 771 minrecs[0] = xfs_bmbt_block_maxrecs(blocklen, true) / 2; 772 minrecs[1] = xfs_bmbt_block_maxrecs(blocklen, false) / 2; 773 774 /* One extra level for the inode root. */ 775 return xfs_btree_compute_maxlevels(minrecs, 776 XFS_MAX_EXTCNT_DATA_FORK_LARGE) + 1; 777 } 778 779 /* 780 * Calculate number of records in a bmap btree inode root. 781 */ 782 int 783 xfs_bmdr_maxrecs( 784 int blocklen, 785 int leaf) 786 { 787 blocklen -= sizeof(xfs_bmdr_block_t); 788 789 if (leaf) 790 return blocklen / sizeof(xfs_bmdr_rec_t); 791 return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t)); 792 } 793 794 /* 795 * Change the owner of a btree format fork fo the inode passed in. Change it to 796 * the owner of that is passed in so that we can change owners before or after 797 * we switch forks between inodes. The operation that the caller is doing will 798 * determine whether is needs to change owner before or after the switch. 799 * 800 * For demand paged transactional modification, the fork switch should be done 801 * after reading in all the blocks, modifying them and pinning them in the 802 * transaction. For modification when the buffers are already pinned in memory, 803 * the fork switch can be done before changing the owner as we won't need to 804 * validate the owner until the btree buffers are unpinned and writes can occur 805 * again. 806 * 807 * For recovery based ownership change, there is no transactional context and 808 * so a buffer list must be supplied so that we can record the buffers that we 809 * modified for the caller to issue IO on. 810 */ 811 int 812 xfs_bmbt_change_owner( 813 struct xfs_trans *tp, 814 struct xfs_inode *ip, 815 int whichfork, 816 xfs_ino_t new_owner, 817 struct list_head *buffer_list) 818 { 819 struct xfs_btree_cur *cur; 820 int error; 821 822 ASSERT(tp || buffer_list); 823 ASSERT(!(tp && buffer_list)); 824 ASSERT(xfs_ifork_ptr(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE); 825 826 cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); 827 cur->bc_flags |= XFS_BTREE_BMBT_INVALID_OWNER; 828 829 error = xfs_btree_change_owner(cur, new_owner, buffer_list); 830 xfs_btree_del_cursor(cur, error); 831 return error; 832 } 833 834 /* Calculate the bmap btree size for some records. */ 835 unsigned long long 836 xfs_bmbt_calc_size( 837 struct xfs_mount *mp, 838 unsigned long long len) 839 { 840 return xfs_btree_calc_size(mp->m_bmap_dmnr, len); 841 } 842 843 int __init 844 xfs_bmbt_init_cur_cache(void) 845 { 846 xfs_bmbt_cur_cache = kmem_cache_create("xfs_bmbt_cur", 847 xfs_btree_cur_sizeof(xfs_bmbt_maxlevels_ondisk()), 848 0, 0, NULL); 849 850 if (!xfs_bmbt_cur_cache) 851 return -ENOMEM; 852 return 0; 853 } 854 855 void 856 xfs_bmbt_destroy_cur_cache(void) 857 { 858 kmem_cache_destroy(xfs_bmbt_cur_cache); 859 xfs_bmbt_cur_cache = NULL; 860 } 861